xref: /openbmc/qemu/qga/commands-linux.c (revision bf0e7b068b7d156eb2009776c9b90afdf93abb2f)
1 /*
2  * QEMU Guest Agent Linux-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qga-qapi-commands.h"
17 #include "qapi/error.h"
18 #include "commands-common.h"
19 #include "cutils.h"
20 #include <mntent.h>
21 #include <sys/ioctl.h>
22 #include <mntent.h>
23 #include <linux/nvme_ioctl.h>
24 #include "block/nvme.h"
25 
26 #ifdef CONFIG_LIBUDEV
27 #include <libudev.h>
28 #endif
29 
30 #ifdef HAVE_GETIFADDRS
31 #include <net/if.h>
32 #endif
33 
34 #include <sys/statvfs.h>
35 
36 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
37 static int dev_major_minor(const char *devpath,
38                            unsigned int *devmajor, unsigned int *devminor)
39 {
40     struct stat st;
41 
42     *devmajor = 0;
43     *devminor = 0;
44 
45     if (stat(devpath, &st) < 0) {
46         slog("failed to stat device file '%s': %s", devpath, strerror(errno));
47         return -1;
48     }
49     if (S_ISDIR(st.st_mode)) {
50         /* It is bind mount */
51         return -2;
52     }
53     if (S_ISBLK(st.st_mode)) {
54         *devmajor = major(st.st_rdev);
55         *devminor = minor(st.st_rdev);
56         return 0;
57     }
58     return -1;
59 }
60 
61 /*
62  * Check if we already have the devmajor:devminor in the mounts
63  * If thats the case return true.
64  */
65 static bool dev_exists(FsMountList *mounts, unsigned int devmajor, unsigned int devminor)
66 {
67     FsMount *mount;
68 
69     QTAILQ_FOREACH(mount, mounts, next) {
70         if (mount->devmajor == devmajor && mount->devminor == devminor) {
71             return true;
72         }
73     }
74     return false;
75 }
76 
77 static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
78 {
79     struct mntent *ment;
80     FsMount *mount;
81     char const *mtab = "/proc/self/mounts";
82     FILE *fp;
83     unsigned int devmajor, devminor;
84 
85     fp = setmntent(mtab, "r");
86     if (!fp) {
87         error_setg(errp, "failed to open mtab file: '%s'", mtab);
88         return false;
89     }
90 
91     while ((ment = getmntent(fp))) {
92         /*
93          * An entry which device name doesn't start with a '/' is
94          * either a dummy file system or a network file system.
95          * Add special handling for smbfs and cifs as is done by
96          * coreutils as well.
97          */
98         if ((ment->mnt_fsname[0] != '/') ||
99             (strcmp(ment->mnt_type, "smbfs") == 0) ||
100             (strcmp(ment->mnt_type, "cifs") == 0)) {
101             continue;
102         }
103         if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
104             /* Skip bind mounts */
105             continue;
106         }
107         if (dev_exists(mounts, devmajor, devminor)) {
108             /* Skip already existing devices (bind mounts) */
109             continue;
110         }
111 
112         mount = g_new0(FsMount, 1);
113         mount->dirname = g_strdup(ment->mnt_dir);
114         mount->devtype = g_strdup(ment->mnt_type);
115         mount->devmajor = devmajor;
116         mount->devminor = devminor;
117 
118         QTAILQ_INSERT_TAIL(mounts, mount, next);
119     }
120 
121     endmntent(fp);
122     return true;
123 }
124 
125 static void decode_mntname(char *name, int len)
126 {
127     int i, j = 0;
128     for (i = 0; i <= len; i++) {
129         if (name[i] != '\\') {
130             name[j++] = name[i];
131         } else if (name[i + 1] == '\\') {
132             name[j++] = '\\';
133             i++;
134         } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
135                    name[i + 2] >= '0' && name[i + 2] <= '7' &&
136                    name[i + 3] >= '0' && name[i + 3] <= '7') {
137             name[j++] = (name[i + 1] - '0') * 64 +
138                         (name[i + 2] - '0') * 8 +
139                         (name[i + 3] - '0');
140             i += 3;
141         } else {
142             name[j++] = name[i];
143         }
144     }
145 }
146 
147 /*
148  * Walk the mount table and build a list of local file systems
149  */
150 bool build_fs_mount_list(FsMountList *mounts, Error **errp)
151 {
152     FsMount *mount;
153     char const *mountinfo = "/proc/self/mountinfo";
154     FILE *fp;
155     char *line = NULL, *dash;
156     size_t n;
157     char check;
158     unsigned int devmajor, devminor;
159     int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
160 
161     fp = fopen(mountinfo, "r");
162     if (!fp) {
163         return build_fs_mount_list_from_mtab(mounts, errp);
164     }
165 
166     while (getline(&line, &n, fp) != -1) {
167         ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
168                      &devmajor, &devminor, &dir_s, &dir_e, &check);
169         if (ret < 3) {
170             continue;
171         }
172         dash = strstr(line + dir_e, " - ");
173         if (!dash) {
174             continue;
175         }
176         ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
177                      &type_s, &type_e, &dev_s, &dev_e, &check);
178         if (ret < 1) {
179             continue;
180         }
181         line[dir_e] = 0;
182         dash[type_e] = 0;
183         dash[dev_e] = 0;
184         decode_mntname(line + dir_s, dir_e - dir_s);
185         decode_mntname(dash + dev_s, dev_e - dev_s);
186         if (devmajor == 0) {
187             /* btrfs reports major number = 0 */
188             if (strcmp("btrfs", dash + type_s) != 0 ||
189                 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
190                 continue;
191             }
192         }
193 
194         if (dev_exists(mounts, devmajor, devminor)) {
195             /* Skip already existing devices (bind mounts) */
196             continue;
197         }
198 
199         mount = g_new0(FsMount, 1);
200         mount->dirname = g_strdup(line + dir_s);
201         mount->devtype = g_strdup(dash + type_s);
202         mount->devmajor = devmajor;
203         mount->devminor = devminor;
204 
205         QTAILQ_INSERT_TAIL(mounts, mount, next);
206     }
207     free(line);
208 
209     fclose(fp);
210     return true;
211 }
212 #endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
213 
214 #ifdef CONFIG_FSFREEZE
215 /*
216  * Walk list of mounted file systems in the guest, and freeze the ones which
217  * are real local file systems.
218  */
219 int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
220                                           strList *mountpoints,
221                                           FsMountList mounts,
222                                           Error **errp)
223 {
224     struct FsMount *mount;
225     strList *list;
226     int fd, ret, i = 0;
227 
228     QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
229         /* To issue fsfreeze in the reverse order of mounts, check if the
230          * mount is listed in the list here */
231         if (has_mountpoints) {
232             for (list = mountpoints; list; list = list->next) {
233                 if (strcmp(list->value, mount->dirname) == 0) {
234                     break;
235                 }
236             }
237             if (!list) {
238                 continue;
239             }
240         }
241 
242         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
243         if (fd == -1) {
244             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
245             return -1;
246         }
247 
248         /* we try to cull filesystems we know won't work in advance, but other
249          * filesystems may not implement fsfreeze for less obvious reasons.
250          * these will report EOPNOTSUPP. we simply ignore these when tallying
251          * the number of frozen filesystems.
252          * if a filesystem is mounted more than once (aka bind mount) a
253          * consecutive attempt to freeze an already frozen filesystem will
254          * return EBUSY.
255          *
256          * any other error means a failure to freeze a filesystem we
257          * expect to be freezable, so return an error in those cases
258          * and return system to thawed state.
259          */
260         ret = ioctl(fd, FIFREEZE);
261         if (ret == -1) {
262             if (errno != EOPNOTSUPP && errno != EBUSY) {
263                 error_setg_errno(errp, errno, "failed to freeze %s",
264                                  mount->dirname);
265                 close(fd);
266                 return -1;
267             }
268         } else {
269             i++;
270         }
271         close(fd);
272     }
273     return i;
274 }
275 
276 int qmp_guest_fsfreeze_do_thaw(Error **errp)
277 {
278     int ret;
279     FsMountList mounts;
280     FsMount *mount;
281     int fd, i = 0, logged;
282     Error *local_err = NULL;
283 
284     QTAILQ_INIT(&mounts);
285     if (!build_fs_mount_list(&mounts, &local_err)) {
286         error_propagate(errp, local_err);
287         return -1;
288     }
289 
290     QTAILQ_FOREACH(mount, &mounts, next) {
291         logged = false;
292         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
293         if (fd == -1) {
294             continue;
295         }
296         /* we have no way of knowing whether a filesystem was actually unfrozen
297          * as a result of a successful call to FITHAW, only that if an error
298          * was returned the filesystem was *not* unfrozen by that particular
299          * call.
300          *
301          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
302          * to unfreeze, continuing issuing FITHAW until an error is returned,
303          * in which case either the filesystem is in an unfreezable state, or,
304          * more likely, it was thawed previously (and remains so afterward).
305          *
306          * also, since the most recent successful call is the one that did
307          * the actual unfreeze, we can use this to provide an accurate count
308          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
309          * may * be useful for determining whether a filesystem was unfrozen
310          * during the freeze/thaw phase by a process other than qemu-ga.
311          */
312         do {
313             ret = ioctl(fd, FITHAW);
314             if (ret == 0 && !logged) {
315                 i++;
316                 logged = true;
317             }
318         } while (ret == 0);
319         close(fd);
320     }
321 
322     free_fs_mount_list(&mounts);
323 
324     return i;
325 }
326 #endif /* CONFIG_FSFREEZE */
327 
328 #if defined(CONFIG_FSFREEZE)
329 
330 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
331 {
332     char *path;
333     char *dpath;
334     char *driver = NULL;
335     char buf[PATH_MAX];
336     ssize_t len;
337 
338     path = g_strndup(syspath, pathlen);
339     dpath = g_strdup_printf("%s/driver", path);
340     len = readlink(dpath, buf, sizeof(buf) - 1);
341     if (len != -1) {
342         buf[len] = 0;
343         driver = g_path_get_basename(buf);
344     }
345     g_free(dpath);
346     g_free(path);
347     return driver;
348 }
349 
350 static int compare_uint(const void *_a, const void *_b)
351 {
352     unsigned int a = *(unsigned int *)_a;
353     unsigned int b = *(unsigned int *)_b;
354 
355     return a < b ? -1 : a > b ? 1 : 0;
356 }
357 
358 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
359 static int build_hosts(char const *syspath, char const *host, bool ata,
360                        unsigned int *hosts, int hosts_max, Error **errp)
361 {
362     char *path;
363     DIR *dir;
364     struct dirent *entry;
365     int i = 0;
366 
367     path = g_strndup(syspath, host - syspath);
368     dir = opendir(path);
369     if (!dir) {
370         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
371         g_free(path);
372         return -1;
373     }
374 
375     while (i < hosts_max) {
376         entry = readdir(dir);
377         if (!entry) {
378             break;
379         }
380         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
381             ++i;
382         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
383             ++i;
384         }
385     }
386 
387     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
388 
389     g_free(path);
390     closedir(dir);
391     return i;
392 }
393 
394 /*
395  * Store disk device info for devices on the PCI bus.
396  * Returns true if information has been stored, or false for failure.
397  */
398 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
399                                            GuestDiskAddress *disk,
400                                            Error **errp)
401 {
402     unsigned int pci[4], host, hosts[8], tgt[3];
403     int i, offset, nhosts = 0, pcilen;
404     GuestPCIAddress *pciaddr = disk->pci_controller;
405     bool has_ata = false, has_host = false, has_tgt = false;
406     const char *p;
407     char *driver = NULL;
408     bool ret = false;
409 
410     p = strstr(syspath, "/devices/pci");
411     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
412                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
413         g_debug("only pci device is supported: sysfs path '%s'", syspath);
414         return false;
415     }
416 
417     p += 12 + pcilen;
418     while (true) {
419         driver = get_pci_driver(syspath, p - syspath, errp);
420         if (driver && (g_str_equal(driver, "ata_piix") ||
421                        g_str_equal(driver, "sym53c8xx") ||
422                        g_str_equal(driver, "virtio-pci") ||
423                        g_str_equal(driver, "ahci") ||
424                        g_str_equal(driver, "nvme") ||
425                        g_str_equal(driver, "xhci_hcd") ||
426                        g_str_equal(driver, "ehci-pci"))) {
427             break;
428         }
429 
430         g_free(driver);
431         if (sscanf(p, "/%x:%x:%x.%x%n",
432                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
433             p += pcilen;
434             continue;
435         }
436 
437         g_debug("unsupported driver or sysfs path '%s'", syspath);
438         return false;
439     }
440 
441     p = strstr(syspath, "/target");
442     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
443                     tgt, tgt + 1, tgt + 2) == 3) {
444         has_tgt = true;
445     }
446 
447     p = strstr(syspath, "/ata");
448     if (p) {
449         offset = 4;
450         has_ata = true;
451     } else {
452         p = strstr(syspath, "/host");
453         offset = 5;
454     }
455     if (p && sscanf(p + offset, "%u", &host) == 1) {
456         has_host = true;
457         nhosts = build_hosts(syspath, p, has_ata, hosts,
458                              ARRAY_SIZE(hosts), errp);
459         if (nhosts < 0) {
460             goto cleanup;
461         }
462     }
463 
464     pciaddr->domain = pci[0];
465     pciaddr->bus = pci[1];
466     pciaddr->slot = pci[2];
467     pciaddr->function = pci[3];
468 
469     if (strcmp(driver, "ata_piix") == 0) {
470         /* a host per ide bus, target*:0:<unit>:0 */
471         if (!has_host || !has_tgt) {
472             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
473             goto cleanup;
474         }
475         for (i = 0; i < nhosts; i++) {
476             if (host == hosts[i]) {
477                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
478                 disk->bus = i;
479                 disk->unit = tgt[1];
480                 break;
481             }
482         }
483         if (i >= nhosts) {
484             g_debug("no host for '%s' (driver '%s')", syspath, driver);
485             goto cleanup;
486         }
487     } else if (strcmp(driver, "sym53c8xx") == 0) {
488         /* scsi(LSI Logic): target*:0:<unit>:0 */
489         if (!has_tgt) {
490             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
491             goto cleanup;
492         }
493         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
494         disk->unit = tgt[1];
495     } else if (strcmp(driver, "virtio-pci") == 0) {
496         if (has_tgt) {
497             /* virtio-scsi: target*:0:0:<unit> */
498             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
499             disk->unit = tgt[2];
500         } else {
501             /* virtio-blk: 1 disk per 1 device */
502             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
503         }
504     } else if (strcmp(driver, "ahci") == 0) {
505         /* ahci: 1 host per 1 unit */
506         if (!has_host || !has_tgt) {
507             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
508             goto cleanup;
509         }
510         for (i = 0; i < nhosts; i++) {
511             if (host == hosts[i]) {
512                 disk->unit = i;
513                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
514                 break;
515             }
516         }
517         if (i >= nhosts) {
518             g_debug("no host for '%s' (driver '%s')", syspath, driver);
519             goto cleanup;
520         }
521     } else if (strcmp(driver, "nvme") == 0) {
522         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
523     } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
524         disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
525     } else {
526         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
527         goto cleanup;
528     }
529 
530     ret = true;
531 
532 cleanup:
533     g_free(driver);
534     return ret;
535 }
536 
537 /*
538  * Store disk device info for non-PCI virtio devices (for example s390x
539  * channel I/O devices). Returns true if information has been stored, or
540  * false for failure.
541  */
542 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
543                                                  GuestDiskAddress *disk,
544                                                  Error **errp)
545 {
546     unsigned int tgt[3];
547     const char *p;
548 
549     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
550         g_debug("Unsupported virtio device '%s'", syspath);
551         return false;
552     }
553 
554     p = strstr(syspath, "/target");
555     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
556                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
557         /* virtio-scsi: target*:0:<target>:<unit> */
558         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
559         disk->bus = tgt[0];
560         disk->target = tgt[1];
561         disk->unit = tgt[2];
562     } else {
563         /* virtio-blk: 1 disk per 1 device */
564         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
565     }
566 
567     return true;
568 }
569 
570 /*
571  * Store disk device info for CCW devices (s390x channel I/O devices).
572  * Returns true if information has been stored, or false for failure.
573  */
574 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
575                                            GuestDiskAddress *disk,
576                                            Error **errp)
577 {
578     unsigned int cssid, ssid, subchno, devno;
579     const char *p;
580 
581     p = strstr(syspath, "/devices/css");
582     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
583                      &cssid, &ssid, &subchno, &devno) < 4) {
584         g_debug("could not parse ccw device sysfs path: %s", syspath);
585         return false;
586     }
587 
588     disk->ccw_address = g_new0(GuestCCWAddress, 1);
589     disk->ccw_address->cssid = cssid;
590     disk->ccw_address->ssid = ssid;
591     disk->ccw_address->subchno = subchno;
592     disk->ccw_address->devno = devno;
593 
594     if (strstr(p, "/virtio")) {
595         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
596     }
597 
598     return true;
599 }
600 
601 /* Store disk device info specified by @sysfs into @fs */
602 static void build_guest_fsinfo_for_real_device(char const *syspath,
603                                                GuestFilesystemInfo *fs,
604                                                Error **errp)
605 {
606     GuestDiskAddress *disk;
607     GuestPCIAddress *pciaddr;
608     bool has_hwinf;
609 #ifdef CONFIG_LIBUDEV
610     struct udev *udev = NULL;
611     struct udev_device *udevice = NULL;
612 #endif
613 
614     pciaddr = g_new0(GuestPCIAddress, 1);
615     pciaddr->domain = -1;                       /* -1 means field is invalid */
616     pciaddr->bus = -1;
617     pciaddr->slot = -1;
618     pciaddr->function = -1;
619 
620     disk = g_new0(GuestDiskAddress, 1);
621     disk->pci_controller = pciaddr;
622     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
623 
624 #ifdef CONFIG_LIBUDEV
625     udev = udev_new();
626     udevice = udev_device_new_from_syspath(udev, syspath);
627     if (udev == NULL || udevice == NULL) {
628         g_debug("failed to query udev");
629     } else {
630         const char *devnode, *serial;
631         devnode = udev_device_get_devnode(udevice);
632         if (devnode != NULL) {
633             disk->dev = g_strdup(devnode);
634         }
635         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
636         if (serial != NULL && *serial != 0) {
637             disk->serial = g_strdup(serial);
638         }
639     }
640 
641     udev_unref(udev);
642     udev_device_unref(udevice);
643 #endif
644 
645     if (strstr(syspath, "/devices/pci")) {
646         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
647     } else if (strstr(syspath, "/devices/css")) {
648         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
649     } else if (strstr(syspath, "/virtio")) {
650         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
651     } else {
652         g_debug("Unsupported device type for '%s'", syspath);
653         has_hwinf = false;
654     }
655 
656     if (has_hwinf || disk->dev || disk->serial) {
657         QAPI_LIST_PREPEND(fs->disk, disk);
658     } else {
659         qapi_free_GuestDiskAddress(disk);
660     }
661 }
662 
663 static void build_guest_fsinfo_for_device(char const *devpath,
664                                           GuestFilesystemInfo *fs,
665                                           Error **errp);
666 
667 /* Store a list of slave devices of virtual volume specified by @syspath into
668  * @fs */
669 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
670                                                   GuestFilesystemInfo *fs,
671                                                   Error **errp)
672 {
673     Error *err = NULL;
674     DIR *dir;
675     char *dirpath;
676     struct dirent *entry;
677 
678     dirpath = g_strdup_printf("%s/slaves", syspath);
679     dir = opendir(dirpath);
680     if (!dir) {
681         if (errno != ENOENT) {
682             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
683         }
684         g_free(dirpath);
685         return;
686     }
687 
688     for (;;) {
689         errno = 0;
690         entry = readdir(dir);
691         if (entry == NULL) {
692             if (errno) {
693                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
694             }
695             break;
696         }
697 
698         if (entry->d_type == DT_LNK) {
699             char *path;
700 
701             g_debug(" slave device '%s'", entry->d_name);
702             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
703             build_guest_fsinfo_for_device(path, fs, &err);
704             g_free(path);
705 
706             if (err) {
707                 error_propagate(errp, err);
708                 break;
709             }
710         }
711     }
712 
713     g_free(dirpath);
714     closedir(dir);
715 }
716 
717 static bool is_disk_virtual(const char *devpath, Error **errp)
718 {
719     g_autofree char *syspath = realpath(devpath, NULL);
720 
721     if (!syspath) {
722         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
723         return false;
724     }
725     return strstr(syspath, "/devices/virtual/block/") != NULL;
726 }
727 
728 /* Dispatch to functions for virtual/real device */
729 static void build_guest_fsinfo_for_device(char const *devpath,
730                                           GuestFilesystemInfo *fs,
731                                           Error **errp)
732 {
733     ERRP_GUARD();
734     g_autofree char *syspath = NULL;
735     bool is_virtual = false;
736 
737     syspath = realpath(devpath, NULL);
738     if (!syspath) {
739         if (errno != ENOENT) {
740             error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
741             return;
742         }
743 
744         /* ENOENT: This devpath may not exist because of container config */
745         if (!fs->name) {
746             fs->name = g_path_get_basename(devpath);
747         }
748         return;
749     }
750 
751     if (!fs->name) {
752         fs->name = g_path_get_basename(syspath);
753     }
754 
755     g_debug("  parse sysfs path '%s'", syspath);
756     is_virtual = is_disk_virtual(syspath, errp);
757     if (*errp != NULL) {
758         return;
759     }
760     if (is_virtual) {
761         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
762     } else {
763         build_guest_fsinfo_for_real_device(syspath, fs, errp);
764     }
765 }
766 
767 #ifdef CONFIG_LIBUDEV
768 
769 /*
770  * Wrapper around build_guest_fsinfo_for_device() for getting just
771  * the disk address.
772  */
773 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
774 {
775     g_autoptr(GuestFilesystemInfo) fs = NULL;
776 
777     fs = g_new0(GuestFilesystemInfo, 1);
778     build_guest_fsinfo_for_device(syspath, fs, errp);
779     if (fs->disk != NULL) {
780         return g_steal_pointer(&fs->disk->value);
781     }
782     return NULL;
783 }
784 
785 static char *get_alias_for_syspath(const char *syspath)
786 {
787     struct udev *udev = NULL;
788     struct udev_device *udevice = NULL;
789     char *ret = NULL;
790 
791     udev = udev_new();
792     if (udev == NULL) {
793         g_debug("failed to query udev");
794         goto out;
795     }
796     udevice = udev_device_new_from_syspath(udev, syspath);
797     if (udevice == NULL) {
798         g_debug("failed to query udev for path: %s", syspath);
799         goto out;
800     } else {
801         const char *alias = udev_device_get_property_value(
802             udevice, "DM_NAME");
803         /*
804          * NULL means there was an error and empty string means there is no
805          * alias. In case of no alias we return NULL instead of empty string.
806          */
807         if (alias == NULL) {
808             g_debug("failed to query udev for device alias for: %s",
809                 syspath);
810         } else if (*alias != 0) {
811             ret = g_strdup(alias);
812         }
813     }
814 
815 out:
816     udev_unref(udev);
817     udev_device_unref(udevice);
818     return ret;
819 }
820 
821 static char *get_device_for_syspath(const char *syspath)
822 {
823     struct udev *udev = NULL;
824     struct udev_device *udevice = NULL;
825     char *ret = NULL;
826 
827     udev = udev_new();
828     if (udev == NULL) {
829         g_debug("failed to query udev");
830         goto out;
831     }
832     udevice = udev_device_new_from_syspath(udev, syspath);
833     if (udevice == NULL) {
834         g_debug("failed to query udev for path: %s", syspath);
835         goto out;
836     } else {
837         ret = g_strdup(udev_device_get_devnode(udevice));
838     }
839 
840 out:
841     udev_unref(udev);
842     udev_device_unref(udevice);
843     return ret;
844 }
845 
846 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
847 {
848     g_autofree char *deps_dir = NULL;
849     const gchar *dep;
850     GDir *dp_deps = NULL;
851 
852     /* List dependent disks */
853     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
854     g_debug("  listing entries in: %s", deps_dir);
855     dp_deps = g_dir_open(deps_dir, 0, NULL);
856     if (dp_deps == NULL) {
857         g_debug("failed to list entries in %s", deps_dir);
858         return;
859     }
860     disk->has_dependencies = true;
861     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
862         g_autofree char *dep_dir = NULL;
863         char *dev_name;
864 
865         /* Add dependent disks */
866         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
867         dev_name = get_device_for_syspath(dep_dir);
868         if (dev_name != NULL) {
869             g_debug("  adding dependent device: %s", dev_name);
870             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
871         }
872     }
873     g_dir_close(dp_deps);
874 }
875 
876 /*
877  * Detect partitions subdirectory, name is "<disk_name><number>" or
878  * "<disk_name>p<number>"
879  *
880  * @disk_name -- last component of /sys path (e.g. sda)
881  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
882  * @disk_dev -- device node of the disk (e.g. /dev/sda)
883  */
884 static GuestDiskInfoList *get_disk_partitions(
885     GuestDiskInfoList *list,
886     const char *disk_name, const char *disk_dir,
887     const char *disk_dev)
888 {
889     GuestDiskInfoList *ret = list;
890     struct dirent *de_disk;
891     DIR *dp_disk = NULL;
892     size_t len = strlen(disk_name);
893 
894     dp_disk = opendir(disk_dir);
895     while ((de_disk = readdir(dp_disk)) != NULL) {
896         g_autofree char *partition_dir = NULL;
897         char *dev_name;
898         GuestDiskInfo *partition;
899 
900         if (!(de_disk->d_type & DT_DIR)) {
901             continue;
902         }
903 
904         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
905             ((*(de_disk->d_name + len) == 'p' &&
906             isdigit(*(de_disk->d_name + len + 1))) ||
907                 isdigit(*(de_disk->d_name + len))))) {
908             continue;
909         }
910 
911         partition_dir = g_strdup_printf("%s/%s",
912             disk_dir, de_disk->d_name);
913         dev_name = get_device_for_syspath(partition_dir);
914         if (dev_name == NULL) {
915             g_debug("Failed to get device name for syspath: %s",
916                 disk_dir);
917             continue;
918         }
919         partition = g_new0(GuestDiskInfo, 1);
920         partition->name = dev_name;
921         partition->partition = true;
922         partition->has_dependencies = true;
923         /* Add parent disk as dependent for easier tracking of hierarchy */
924         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
925 
926         QAPI_LIST_PREPEND(ret, partition);
927     }
928     closedir(dp_disk);
929 
930     return ret;
931 }
932 
933 static void get_nvme_smart(GuestDiskInfo *disk)
934 {
935     int fd;
936     GuestNVMeSmart *smart;
937     NvmeSmartLog log = {0};
938     struct nvme_admin_cmd cmd = {
939         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
940         .nsid = NVME_NSID_BROADCAST,
941         .addr = (uintptr_t)&log,
942         .data_len = sizeof(log),
943         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
944                  | (((sizeof(log) >> 2) - 1) << 16)
945     };
946 
947     fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
948     if (fd == -1) {
949         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
950         return;
951     }
952 
953     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
954         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
955         close(fd);
956         return;
957     }
958 
959     disk->smart = g_new0(GuestDiskSmart, 1);
960     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
961 
962     smart = &disk->smart->u.nvme;
963     smart->critical_warning = log.critical_warning;
964     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
965     smart->available_spare = log.available_spare;
966     smart->available_spare_threshold = log.available_spare_threshold;
967     smart->percentage_used = log.percentage_used;
968     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
969     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
970     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
971     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
972     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
973     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
974     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
975     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
976     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
977     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
978     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
979     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
980     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
981     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
982     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
983     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
984     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
985     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
986     smart->number_of_error_log_entries_lo =
987         le64_to_cpu(log.number_of_error_log_entries[0]);
988     smart->number_of_error_log_entries_hi =
989         le64_to_cpu(log.number_of_error_log_entries[1]);
990 
991     close(fd);
992 }
993 
994 static void get_disk_smart(GuestDiskInfo *disk)
995 {
996     if (disk->address
997         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
998         get_nvme_smart(disk);
999     }
1000 }
1001 
1002 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1003 {
1004     GuestDiskInfoList *ret = NULL;
1005     GuestDiskInfo *disk;
1006     DIR *dp = NULL;
1007     struct dirent *de = NULL;
1008 
1009     g_debug("listing /sys/block directory");
1010     dp = opendir("/sys/block");
1011     if (dp == NULL) {
1012         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
1013         return NULL;
1014     }
1015     while ((de = readdir(dp)) != NULL) {
1016         g_autofree char *disk_dir = NULL, *line = NULL,
1017             *size_path = NULL;
1018         char *dev_name;
1019         Error *local_err = NULL;
1020         if (de->d_type != DT_LNK) {
1021             g_debug("  skipping entry: %s", de->d_name);
1022             continue;
1023         }
1024 
1025         /* Check size and skip zero-sized disks */
1026         g_debug("  checking disk size");
1027         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1028         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1029             g_debug("  failed to read disk size");
1030             continue;
1031         }
1032         if (g_strcmp0(line, "0\n") == 0) {
1033             g_debug("  skipping zero-sized disk");
1034             continue;
1035         }
1036 
1037         g_debug("  adding %s", de->d_name);
1038         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1039         dev_name = get_device_for_syspath(disk_dir);
1040         if (dev_name == NULL) {
1041             g_debug("Failed to get device name for syspath: %s",
1042                 disk_dir);
1043             continue;
1044         }
1045         disk = g_new0(GuestDiskInfo, 1);
1046         disk->name = dev_name;
1047         disk->partition = false;
1048         disk->alias = get_alias_for_syspath(disk_dir);
1049         QAPI_LIST_PREPEND(ret, disk);
1050 
1051         /* Get address for non-virtual devices */
1052         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1053         if (local_err != NULL) {
1054             g_debug("  failed to check disk path, ignoring error: %s",
1055                 error_get_pretty(local_err));
1056             error_free(local_err);
1057             local_err = NULL;
1058             /* Don't try to get the address */
1059             is_virtual = true;
1060         }
1061         if (!is_virtual) {
1062             disk->address = get_disk_address(disk_dir, &local_err);
1063             if (local_err != NULL) {
1064                 g_debug("  failed to get device info, ignoring error: %s",
1065                     error_get_pretty(local_err));
1066                 error_free(local_err);
1067                 local_err = NULL;
1068             }
1069         }
1070 
1071         get_disk_deps(disk_dir, disk);
1072         get_disk_smart(disk);
1073         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1074     }
1075 
1076     closedir(dp);
1077 
1078     return ret;
1079 }
1080 
1081 #endif
1082 
1083 /* Return a list of the disk device(s)' info which @mount lies on */
1084 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1085                                                Error **errp)
1086 {
1087     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1088     struct statvfs buf;
1089     unsigned long used, nonroot_total, fr_size;
1090     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1091                                     mount->devmajor, mount->devminor);
1092 
1093     fs->mountpoint = g_strdup(mount->dirname);
1094     fs->type = g_strdup(mount->devtype);
1095     build_guest_fsinfo_for_device(devpath, fs, errp);
1096 
1097     if (statvfs(fs->mountpoint, &buf) == 0) {
1098         fr_size = buf.f_frsize;
1099         used = buf.f_blocks - buf.f_bfree;
1100         nonroot_total = used + buf.f_bavail;
1101         fs->used_bytes = used * fr_size;
1102         fs->total_bytes = nonroot_total * fr_size;
1103         fs->total_bytes_privileged = buf.f_blocks * fr_size;
1104 
1105         fs->has_total_bytes = true;
1106         fs->has_total_bytes_privileged = true;
1107         fs->has_used_bytes = true;
1108     }
1109 
1110     g_free(devpath);
1111 
1112     return fs;
1113 }
1114 
1115 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1116 {
1117     FsMountList mounts;
1118     struct FsMount *mount;
1119     GuestFilesystemInfoList *ret = NULL;
1120     Error *local_err = NULL;
1121 
1122     QTAILQ_INIT(&mounts);
1123     if (!build_fs_mount_list(&mounts, &local_err)) {
1124         error_propagate(errp, local_err);
1125         return NULL;
1126     }
1127 
1128     QTAILQ_FOREACH(mount, &mounts, next) {
1129         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1130 
1131         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1132         if (local_err) {
1133             error_propagate(errp, local_err);
1134             qapi_free_GuestFilesystemInfoList(ret);
1135             ret = NULL;
1136             break;
1137         }
1138     }
1139 
1140     free_fs_mount_list(&mounts);
1141     return ret;
1142 }
1143 #endif /* CONFIG_FSFREEZE */
1144 
1145 #if defined(CONFIG_FSTRIM)
1146 /*
1147  * Walk list of mounted file systems in the guest, and trim them.
1148  */
1149 GuestFilesystemTrimResponse *
1150 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1151 {
1152     GuestFilesystemTrimResponse *response;
1153     GuestFilesystemTrimResult *result;
1154     int ret = 0;
1155     FsMountList mounts;
1156     struct FsMount *mount;
1157     int fd;
1158     struct fstrim_range r;
1159 
1160     slog("guest-fstrim called");
1161 
1162     QTAILQ_INIT(&mounts);
1163     if (!build_fs_mount_list(&mounts, errp)) {
1164         return NULL;
1165     }
1166 
1167     response = g_malloc0(sizeof(*response));
1168 
1169     QTAILQ_FOREACH(mount, &mounts, next) {
1170         result = g_malloc0(sizeof(*result));
1171         result->path = g_strdup(mount->dirname);
1172 
1173         QAPI_LIST_PREPEND(response->paths, result);
1174 
1175         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1176         if (fd == -1) {
1177             result->error = g_strdup_printf("failed to open: %s",
1178                                             strerror(errno));
1179             continue;
1180         }
1181 
1182         /* We try to cull filesystems we know won't work in advance, but other
1183          * filesystems may not implement fstrim for less obvious reasons.
1184          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1185          * will be reported (e.g. CD-ROMs).
1186          * Any other error means an unexpected error.
1187          */
1188         r.start = 0;
1189         r.len = -1;
1190         r.minlen = has_minimum ? minimum : 0;
1191         ret = ioctl(fd, FITRIM, &r);
1192         if (ret == -1) {
1193             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1194                 result->error = g_strdup("trim not supported");
1195             } else {
1196                 result->error = g_strdup_printf("failed to trim: %s",
1197                                                 strerror(errno));
1198             }
1199             close(fd);
1200             continue;
1201         }
1202 
1203         result->has_minimum = true;
1204         result->minimum = r.minlen;
1205         result->has_trimmed = true;
1206         result->trimmed = r.len;
1207         close(fd);
1208     }
1209 
1210     free_fs_mount_list(&mounts);
1211     return response;
1212 }
1213 #endif /* CONFIG_FSTRIM */
1214 
1215 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1216 #define SUSPEND_SUPPORTED 0
1217 #define SUSPEND_NOT_SUPPORTED 1
1218 
1219 typedef enum {
1220     SUSPEND_MODE_DISK = 0,
1221     SUSPEND_MODE_RAM = 1,
1222     SUSPEND_MODE_HYBRID = 2,
1223 } SuspendMode;
1224 
1225 /*
1226  * Executes a command in a child process using g_spawn_sync,
1227  * returning an int >= 0 representing the exit status of the
1228  * process.
1229  *
1230  * If the program wasn't found in path, returns -1.
1231  *
1232  * If a problem happened when creating the child process,
1233  * returns -1 and errp is set.
1234  */
1235 static int run_process_child(const char *command[], Error **errp)
1236 {
1237     int exit_status, spawn_flag;
1238     GError *g_err = NULL;
1239     bool success;
1240 
1241     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1242                  G_SPAWN_STDERR_TO_DEV_NULL;
1243 
1244     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1245                             NULL, NULL, NULL, NULL,
1246                             &exit_status, &g_err);
1247 
1248     if (success) {
1249         return WEXITSTATUS(exit_status);
1250     }
1251 
1252     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1253         error_setg(errp, "failed to create child process, error '%s'",
1254                    g_err->message);
1255     }
1256 
1257     g_error_free(g_err);
1258     return -1;
1259 }
1260 
1261 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1262 {
1263     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1264                                      "systemd-hybrid-sleep"};
1265     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1266     int status;
1267 
1268     status = run_process_child(cmd, errp);
1269 
1270     /*
1271      * systemctl status uses LSB return codes so we can expect
1272      * status > 0 and be ok. To assert if the guest has support
1273      * for the selected suspend mode, status should be < 4. 4 is
1274      * the code for unknown service status, the return value when
1275      * the service does not exist. A common value is status = 3
1276      * (program is not running).
1277      */
1278     if (status > 0 && status < 4) {
1279         return true;
1280     }
1281 
1282     return false;
1283 }
1284 
1285 static void systemd_suspend(SuspendMode mode, Error **errp)
1286 {
1287     Error *local_err = NULL;
1288     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1289     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1290     int status;
1291 
1292     status = run_process_child(cmd, &local_err);
1293 
1294     if (status == 0) {
1295         return;
1296     }
1297 
1298     if ((status == -1) && !local_err) {
1299         error_setg(errp, "the helper program 'systemctl %s' was not found",
1300                    systemctl_args[mode]);
1301         return;
1302     }
1303 
1304     if (local_err) {
1305         error_propagate(errp, local_err);
1306     } else {
1307         error_setg(errp, "the helper program 'systemctl %s' returned an "
1308                    "unexpected exit status code (%d)",
1309                    systemctl_args[mode], status);
1310     }
1311 }
1312 
1313 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1314 {
1315     Error *local_err = NULL;
1316     const char *pmutils_args[3] = {"--hibernate", "--suspend",
1317                                    "--suspend-hybrid"};
1318     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1319     int status;
1320 
1321     status = run_process_child(cmd, &local_err);
1322 
1323     if (status == SUSPEND_SUPPORTED) {
1324         return true;
1325     }
1326 
1327     if ((status == -1) && !local_err) {
1328         return false;
1329     }
1330 
1331     if (local_err) {
1332         error_propagate(errp, local_err);
1333     } else {
1334         error_setg(errp,
1335                    "the helper program '%s' returned an unexpected exit"
1336                    " status code (%d)", "pm-is-supported", status);
1337     }
1338 
1339     return false;
1340 }
1341 
1342 static void pmutils_suspend(SuspendMode mode, Error **errp)
1343 {
1344     Error *local_err = NULL;
1345     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1346                                        "pm-suspend-hybrid"};
1347     const char *cmd[2] = {pmutils_binaries[mode], NULL};
1348     int status;
1349 
1350     status = run_process_child(cmd, &local_err);
1351 
1352     if (status == 0) {
1353         return;
1354     }
1355 
1356     if ((status == -1) && !local_err) {
1357         error_setg(errp, "the helper program '%s' was not found",
1358                    pmutils_binaries[mode]);
1359         return;
1360     }
1361 
1362     if (local_err) {
1363         error_propagate(errp, local_err);
1364     } else {
1365         error_setg(errp,
1366                    "the helper program '%s' returned an unexpected exit"
1367                    " status code (%d)", pmutils_binaries[mode], status);
1368     }
1369 }
1370 
1371 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1372 {
1373     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1374     const char *sysfile_str = sysfile_strs[mode];
1375     char buf[32]; /* hopefully big enough */
1376     int fd;
1377     ssize_t ret;
1378 
1379     if (!sysfile_str) {
1380         error_setg(errp, "unknown guest suspend mode");
1381         return false;
1382     }
1383 
1384     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1385     if (fd < 0) {
1386         return false;
1387     }
1388 
1389     ret = read(fd, buf, sizeof(buf) - 1);
1390     close(fd);
1391     if (ret <= 0) {
1392         return false;
1393     }
1394     buf[ret] = '\0';
1395 
1396     if (strstr(buf, sysfile_str)) {
1397         return true;
1398     }
1399     return false;
1400 }
1401 
1402 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1403 {
1404     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1405     const char *sysfile_str = sysfile_strs[mode];
1406     int fd;
1407 
1408     if (!sysfile_str) {
1409         error_setg(errp, "unknown guest suspend mode");
1410         return;
1411     }
1412 
1413     fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1414     if (fd < 0 || write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1415         error_setg(errp, "suspend: cannot write to '%s': %m",
1416                    LINUX_SYS_STATE_FILE);
1417     }
1418     if (fd >= 0) {
1419         close(fd);
1420     }
1421 }
1422 
1423 static void guest_suspend(SuspendMode mode, Error **errp)
1424 {
1425     Error *local_err = NULL;
1426     bool mode_supported = false;
1427 
1428     if (systemd_supports_mode(mode, &local_err)) {
1429         mode_supported = true;
1430         systemd_suspend(mode, &local_err);
1431 
1432         if (!local_err) {
1433             return;
1434         }
1435     }
1436 
1437     error_free(local_err);
1438     local_err = NULL;
1439 
1440     if (pmutils_supports_mode(mode, &local_err)) {
1441         mode_supported = true;
1442         pmutils_suspend(mode, &local_err);
1443 
1444         if (!local_err) {
1445             return;
1446         }
1447     }
1448 
1449     error_free(local_err);
1450     local_err = NULL;
1451 
1452     if (linux_sys_state_supports_mode(mode, &local_err)) {
1453         mode_supported = true;
1454         linux_sys_state_suspend(mode, &local_err);
1455     }
1456 
1457     if (!mode_supported) {
1458         error_free(local_err);
1459         error_setg(errp,
1460                    "the requested suspend mode is not supported by the guest");
1461     } else {
1462         error_propagate(errp, local_err);
1463     }
1464 }
1465 
1466 void qmp_guest_suspend_disk(Error **errp)
1467 {
1468     guest_suspend(SUSPEND_MODE_DISK, errp);
1469 }
1470 
1471 void qmp_guest_suspend_ram(Error **errp)
1472 {
1473     guest_suspend(SUSPEND_MODE_RAM, errp);
1474 }
1475 
1476 void qmp_guest_suspend_hybrid(Error **errp)
1477 {
1478     guest_suspend(SUSPEND_MODE_HYBRID, errp);
1479 }
1480 
1481 /* Transfer online/offline status between @vcpu and the guest system.
1482  *
1483  * On input either @errp or *@errp must be NULL.
1484  *
1485  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1486  * - R: vcpu->logical_id
1487  * - W: vcpu->online
1488  * - W: vcpu->can_offline
1489  *
1490  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1491  * - R: vcpu->logical_id
1492  * - R: vcpu->online
1493  *
1494  * Written members remain unmodified on error.
1495  */
1496 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1497                           char *dirpath, Error **errp)
1498 {
1499     int fd;
1500     int res;
1501     int dirfd;
1502     static const char fn[] = "online";
1503 
1504     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1505     if (dirfd == -1) {
1506         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1507         return;
1508     }
1509 
1510     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1511     if (fd == -1) {
1512         if (errno != ENOENT) {
1513             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1514         } else if (sys2vcpu) {
1515             vcpu->online = true;
1516             vcpu->can_offline = false;
1517         } else if (!vcpu->online) {
1518             error_setg(errp, "logical processor #%" PRId64 " can't be "
1519                        "offlined", vcpu->logical_id);
1520         } /* otherwise pretend successful re-onlining */
1521     } else {
1522         unsigned char status;
1523 
1524         res = pread(fd, &status, 1, 0);
1525         if (res == -1) {
1526             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1527         } else if (res == 0) {
1528             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1529                        fn);
1530         } else if (sys2vcpu) {
1531             vcpu->online = (status != '0');
1532             vcpu->can_offline = true;
1533         } else if (vcpu->online != (status != '0')) {
1534             status = '0' + vcpu->online;
1535             if (pwrite(fd, &status, 1, 0) == -1) {
1536                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1537                                  fn);
1538             }
1539         } /* otherwise pretend successful re-(on|off)-lining */
1540 
1541         res = close(fd);
1542         g_assert(res == 0);
1543     }
1544 
1545     res = close(dirfd);
1546     g_assert(res == 0);
1547 }
1548 
1549 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1550 {
1551     GuestLogicalProcessorList *head, **tail;
1552     const char *cpu_dir = "/sys/devices/system/cpu";
1553     const gchar *line;
1554     g_autoptr(GDir) cpu_gdir = NULL;
1555     Error *local_err = NULL;
1556 
1557     head = NULL;
1558     tail = &head;
1559     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
1560 
1561     if (cpu_gdir == NULL) {
1562         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
1563         return NULL;
1564     }
1565 
1566     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
1567         GuestLogicalProcessor *vcpu;
1568         int64_t id;
1569         if (sscanf(line, "cpu%" PRId64, &id)) {
1570             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
1571                                                     "cpu%" PRId64 "/", id);
1572             vcpu = g_malloc0(sizeof *vcpu);
1573             vcpu->logical_id = id;
1574             vcpu->has_can_offline = true; /* lolspeak ftw */
1575             transfer_vcpu(vcpu, true, path, &local_err);
1576             QAPI_LIST_APPEND(tail, vcpu);
1577         }
1578     }
1579 
1580     if (local_err == NULL) {
1581         /* there's no guest with zero VCPUs */
1582         g_assert(head != NULL);
1583         return head;
1584     }
1585 
1586     qapi_free_GuestLogicalProcessorList(head);
1587     error_propagate(errp, local_err);
1588     return NULL;
1589 }
1590 
1591 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1592 {
1593     int64_t processed;
1594     Error *local_err = NULL;
1595 
1596     processed = 0;
1597     while (vcpus != NULL) {
1598         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1599                                      vcpus->value->logical_id);
1600 
1601         transfer_vcpu(vcpus->value, false, path, &local_err);
1602         g_free(path);
1603         if (local_err != NULL) {
1604             break;
1605         }
1606         ++processed;
1607         vcpus = vcpus->next;
1608     }
1609 
1610     if (local_err != NULL) {
1611         if (processed == 0) {
1612             error_propagate(errp, local_err);
1613         } else {
1614             error_free(local_err);
1615         }
1616     }
1617 
1618     return processed;
1619 }
1620 
1621 
1622 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
1623                                int size, Error **errp)
1624 {
1625     int fd;
1626     int res;
1627 
1628     errno = 0;
1629     fd = openat(dirfd, pathname, O_RDONLY);
1630     if (fd == -1) {
1631         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1632         return;
1633     }
1634 
1635     res = pread(fd, buf, size, 0);
1636     if (res == -1) {
1637         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
1638     } else if (res == 0) {
1639         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
1640     }
1641     close(fd);
1642 }
1643 
1644 static void ga_write_sysfs_file(int dirfd, const char *pathname,
1645                                 const char *buf, int size, Error **errp)
1646 {
1647     int fd;
1648 
1649     errno = 0;
1650     fd = openat(dirfd, pathname, O_WRONLY);
1651     if (fd == -1) {
1652         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1653         return;
1654     }
1655 
1656     if (pwrite(fd, buf, size, 0) == -1) {
1657         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
1658     }
1659 
1660     close(fd);
1661 }
1662 
1663 /* Transfer online/offline status between @mem_blk and the guest system.
1664  *
1665  * On input either @errp or *@errp must be NULL.
1666  *
1667  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
1668  * - R: mem_blk->phys_index
1669  * - W: mem_blk->online
1670  * - W: mem_blk->can_offline
1671  *
1672  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
1673  * - R: mem_blk->phys_index
1674  * - R: mem_blk->online
1675  *-  R: mem_blk->can_offline
1676  * Written members remain unmodified on error.
1677  */
1678 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
1679                                   GuestMemoryBlockResponse *result,
1680                                   Error **errp)
1681 {
1682     char *dirpath;
1683     int dirfd;
1684     char *status;
1685     Error *local_err = NULL;
1686 
1687     if (!sys2memblk) {
1688         DIR *dp;
1689 
1690         if (!result) {
1691             error_setg(errp, "Internal error, 'result' should not be NULL");
1692             return;
1693         }
1694         errno = 0;
1695         dp = opendir("/sys/devices/system/memory/");
1696          /* if there is no 'memory' directory in sysfs,
1697          * we think this VM does not support online/offline memory block,
1698          * any other solution?
1699          */
1700         if (!dp) {
1701             if (errno == ENOENT) {
1702                 result->response =
1703                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1704             }
1705             goto out1;
1706         }
1707         closedir(dp);
1708     }
1709 
1710     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
1711                               mem_blk->phys_index);
1712     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1713     if (dirfd == -1) {
1714         if (sys2memblk) {
1715             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1716         } else {
1717             if (errno == ENOENT) {
1718                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
1719             } else {
1720                 result->response =
1721                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1722             }
1723         }
1724         g_free(dirpath);
1725         goto out1;
1726     }
1727     g_free(dirpath);
1728 
1729     status = g_malloc0(10);
1730     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
1731     if (local_err) {
1732         /* treat with sysfs file that not exist in old kernel */
1733         if (errno == ENOENT) {
1734             error_free(local_err);
1735             if (sys2memblk) {
1736                 mem_blk->online = true;
1737                 mem_blk->can_offline = false;
1738             } else if (!mem_blk->online) {
1739                 result->response =
1740                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1741             }
1742         } else {
1743             if (sys2memblk) {
1744                 error_propagate(errp, local_err);
1745             } else {
1746                 error_free(local_err);
1747                 result->response =
1748                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1749             }
1750         }
1751         goto out2;
1752     }
1753 
1754     if (sys2memblk) {
1755         char removable = '0';
1756 
1757         mem_blk->online = (strncmp(status, "online", 6) == 0);
1758 
1759         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
1760         if (local_err) {
1761             /* if no 'removable' file, it doesn't support offline mem blk */
1762             if (errno == ENOENT) {
1763                 error_free(local_err);
1764                 mem_blk->can_offline = false;
1765             } else {
1766                 error_propagate(errp, local_err);
1767             }
1768         } else {
1769             mem_blk->can_offline = (removable != '0');
1770         }
1771     } else {
1772         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
1773             const char *new_state = mem_blk->online ? "online" : "offline";
1774 
1775             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
1776                                 &local_err);
1777             if (local_err) {
1778                 error_free(local_err);
1779                 result->response =
1780                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1781                 goto out2;
1782             }
1783 
1784             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
1785             result->has_error_code = false;
1786         } /* otherwise pretend successful re-(on|off)-lining */
1787     }
1788     g_free(status);
1789     close(dirfd);
1790     return;
1791 
1792 out2:
1793     g_free(status);
1794     close(dirfd);
1795 out1:
1796     if (!sys2memblk) {
1797         result->has_error_code = true;
1798         result->error_code = errno;
1799     }
1800 }
1801 
1802 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
1803 {
1804     GuestMemoryBlockList *head, **tail;
1805     Error *local_err = NULL;
1806     struct dirent *de;
1807     DIR *dp;
1808 
1809     head = NULL;
1810     tail = &head;
1811 
1812     dp = opendir("/sys/devices/system/memory/");
1813     if (!dp) {
1814         /* it's ok if this happens to be a system that doesn't expose
1815          * memory blocks via sysfs, but otherwise we should report
1816          * an error
1817          */
1818         if (errno != ENOENT) {
1819             error_setg_errno(errp, errno, "Can't open directory"
1820                              "\"/sys/devices/system/memory/\"");
1821         }
1822         return NULL;
1823     }
1824 
1825     /* Note: the phys_index of memory block may be discontinuous,
1826      * this is because a memblk is the unit of the Sparse Memory design, which
1827      * allows discontinuous memory ranges (ex. NUMA), so here we should
1828      * traverse the memory block directory.
1829      */
1830     while ((de = readdir(dp)) != NULL) {
1831         GuestMemoryBlock *mem_blk;
1832 
1833         if ((strncmp(de->d_name, "memory", 6) != 0) ||
1834             !(de->d_type & DT_DIR)) {
1835             continue;
1836         }
1837 
1838         mem_blk = g_malloc0(sizeof *mem_blk);
1839         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
1840         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
1841         mem_blk->has_can_offline = true; /* lolspeak ftw */
1842         transfer_memory_block(mem_blk, true, NULL, &local_err);
1843         if (local_err) {
1844             break;
1845         }
1846 
1847         QAPI_LIST_APPEND(tail, mem_blk);
1848     }
1849 
1850     closedir(dp);
1851     if (local_err == NULL) {
1852         /* there's no guest with zero memory blocks */
1853         if (head == NULL) {
1854             error_setg(errp, "guest reported zero memory blocks!");
1855         }
1856         return head;
1857     }
1858 
1859     qapi_free_GuestMemoryBlockList(head);
1860     error_propagate(errp, local_err);
1861     return NULL;
1862 }
1863 
1864 GuestMemoryBlockResponseList *
1865 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
1866 {
1867     GuestMemoryBlockResponseList *head, **tail;
1868     Error *local_err = NULL;
1869 
1870     head = NULL;
1871     tail = &head;
1872 
1873     while (mem_blks != NULL) {
1874         GuestMemoryBlockResponse *result;
1875         GuestMemoryBlock *current_mem_blk = mem_blks->value;
1876 
1877         result = g_malloc0(sizeof(*result));
1878         result->phys_index = current_mem_blk->phys_index;
1879         transfer_memory_block(current_mem_blk, false, result, &local_err);
1880         if (local_err) { /* should never happen */
1881             goto err;
1882         }
1883 
1884         QAPI_LIST_APPEND(tail, result);
1885         mem_blks = mem_blks->next;
1886     }
1887 
1888     return head;
1889 err:
1890     qapi_free_GuestMemoryBlockResponseList(head);
1891     error_propagate(errp, local_err);
1892     return NULL;
1893 }
1894 
1895 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
1896 {
1897     Error *local_err = NULL;
1898     char *dirpath;
1899     int dirfd;
1900     char *buf;
1901     GuestMemoryBlockInfo *info;
1902 
1903     dirpath = g_strdup_printf("/sys/devices/system/memory/");
1904     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1905     if (dirfd == -1) {
1906         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1907         g_free(dirpath);
1908         return NULL;
1909     }
1910     g_free(dirpath);
1911 
1912     buf = g_malloc0(20);
1913     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
1914     close(dirfd);
1915     if (local_err) {
1916         g_free(buf);
1917         error_propagate(errp, local_err);
1918         return NULL;
1919     }
1920 
1921     info = g_new0(GuestMemoryBlockInfo, 1);
1922     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
1923 
1924     g_free(buf);
1925 
1926     return info;
1927 }
1928 
1929 #define MAX_NAME_LEN 128
1930 static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
1931 {
1932     GuestDiskStatsInfoList *head = NULL, **tail = &head;
1933     const char *diskstats = "/proc/diskstats";
1934     FILE *fp;
1935     size_t n;
1936     char *line = NULL;
1937 
1938     fp = fopen(diskstats, "r");
1939     if (fp  == NULL) {
1940         error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
1941         return NULL;
1942     }
1943 
1944     while (getline(&line, &n, fp) != -1) {
1945         g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
1946         g_autofree GuestDiskStats *diskstat = NULL;
1947         char dev_name[MAX_NAME_LEN];
1948         unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
1949         unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
1950         unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
1951         unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
1952         unsigned int major, minor;
1953         int i;
1954 
1955         i = sscanf(line, "%u %u %s %lu %lu %lu"
1956                    "%lu %lu %lu %lu %u %u %u %u"
1957                    "%lu %lu %lu %u %lu %u",
1958                    &major, &minor, dev_name,
1959                    &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
1960                    &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
1961                    &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
1962                    &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
1963                    &fl_ios, &fl_ticks);
1964 
1965         if (i < 7) {
1966             continue;
1967         }
1968 
1969         diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
1970         diskstatinfo->name = g_strdup(dev_name);
1971         diskstatinfo->major = major;
1972         diskstatinfo->minor = minor;
1973 
1974         diskstat = g_new0(GuestDiskStats, 1);
1975         if (i == 7) {
1976             diskstat->has_read_ios = true;
1977             diskstat->read_ios = rd_ios;
1978             diskstat->has_read_sectors = true;
1979             diskstat->read_sectors = rd_merges_or_rd_sec;
1980             diskstat->has_write_ios = true;
1981             diskstat->write_ios = rd_sec_or_wr_ios;
1982             diskstat->has_write_sectors = true;
1983             diskstat->write_sectors = rd_ticks_or_wr_sec;
1984         }
1985         if (i >= 14) {
1986             diskstat->has_read_ios = true;
1987             diskstat->read_ios = rd_ios;
1988             diskstat->has_read_sectors = true;
1989             diskstat->read_sectors = rd_sec_or_wr_ios;
1990             diskstat->has_read_merges = true;
1991             diskstat->read_merges = rd_merges_or_rd_sec;
1992             diskstat->has_read_ticks = true;
1993             diskstat->read_ticks = rd_ticks_or_wr_sec;
1994             diskstat->has_write_ios = true;
1995             diskstat->write_ios = wr_ios;
1996             diskstat->has_write_sectors = true;
1997             diskstat->write_sectors = wr_sec;
1998             diskstat->has_write_merges = true;
1999             diskstat->write_merges = wr_merges;
2000             diskstat->has_write_ticks = true;
2001             diskstat->write_ticks = wr_ticks;
2002             diskstat->has_ios_pgr = true;
2003             diskstat->ios_pgr = ios_pgr;
2004             diskstat->has_total_ticks = true;
2005             diskstat->total_ticks = tot_ticks;
2006             diskstat->has_weight_ticks = true;
2007             diskstat->weight_ticks = rq_ticks;
2008         }
2009         if (i >= 18) {
2010             diskstat->has_discard_ios = true;
2011             diskstat->discard_ios = dc_ios;
2012             diskstat->has_discard_merges = true;
2013             diskstat->discard_merges = dc_merges;
2014             diskstat->has_discard_sectors = true;
2015             diskstat->discard_sectors = dc_sec;
2016             diskstat->has_discard_ticks = true;
2017             diskstat->discard_ticks = dc_ticks;
2018         }
2019         if (i >= 20) {
2020             diskstat->has_flush_ios = true;
2021             diskstat->flush_ios = fl_ios;
2022             diskstat->has_flush_ticks = true;
2023             diskstat->flush_ticks = fl_ticks;
2024         }
2025 
2026         diskstatinfo->stats = g_steal_pointer(&diskstat);
2027         QAPI_LIST_APPEND(tail, diskstatinfo);
2028         diskstatinfo = NULL;
2029     }
2030     free(line);
2031     fclose(fp);
2032     return head;
2033 }
2034 
2035 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2036 {
2037     return guest_get_diskstats(errp);
2038 }
2039 
2040 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2041 {
2042     GuestCpuStatsList *head = NULL, **tail = &head;
2043     const char *cpustats = "/proc/stat";
2044     int clk_tck = sysconf(_SC_CLK_TCK);
2045     FILE *fp;
2046     size_t n;
2047     char *line = NULL;
2048 
2049     fp = fopen(cpustats, "r");
2050     if (fp  == NULL) {
2051         error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2052         return NULL;
2053     }
2054 
2055     while (getline(&line, &n, fp) != -1) {
2056         GuestCpuStats *cpustat = NULL;
2057         GuestLinuxCpuStats *linuxcpustat;
2058         int i;
2059         unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2060         unsigned long nice, guest_nice;
2061         char name[64];
2062 
2063         i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2064                    name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2065                    &steal, &guest, &guest_nice);
2066 
2067         /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2068         if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2069             continue;
2070         }
2071 
2072         if (i < 5) {
2073             slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2074             break;
2075         }
2076 
2077         cpustat = g_new0(GuestCpuStats, 1);
2078         cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2079 
2080         linuxcpustat = &cpustat->u.q_linux;
2081         linuxcpustat->cpu = atoi(&name[3]);
2082         linuxcpustat->user = user * 1000 / clk_tck;
2083         linuxcpustat->nice = nice * 1000 / clk_tck;
2084         linuxcpustat->system = system * 1000 / clk_tck;
2085         linuxcpustat->idle = idle * 1000 / clk_tck;
2086 
2087         if (i > 5) {
2088             linuxcpustat->has_iowait = true;
2089             linuxcpustat->iowait = iowait * 1000 / clk_tck;
2090         }
2091 
2092         if (i > 6) {
2093             linuxcpustat->has_irq = true;
2094             linuxcpustat->irq = irq * 1000 / clk_tck;
2095             linuxcpustat->has_softirq = true;
2096             linuxcpustat->softirq = softirq * 1000 / clk_tck;
2097         }
2098 
2099         if (i > 8) {
2100             linuxcpustat->has_steal = true;
2101             linuxcpustat->steal = steal * 1000 / clk_tck;
2102         }
2103 
2104         if (i > 9) {
2105             linuxcpustat->has_guest = true;
2106             linuxcpustat->guest = guest * 1000 / clk_tck;
2107         }
2108 
2109         if (i > 10) {
2110             linuxcpustat->has_guest = true;
2111             linuxcpustat->guest = guest * 1000 / clk_tck;
2112             linuxcpustat->has_guestnice = true;
2113             linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2114         }
2115 
2116         QAPI_LIST_APPEND(tail, cpustat);
2117     }
2118 
2119     free(line);
2120     fclose(fp);
2121     return head;
2122 }
2123 
2124 static char *hex_to_ip_address(const void *hex_value, int is_ipv6)
2125 {
2126     if (is_ipv6) {
2127         char addr[INET6_ADDRSTRLEN];
2128         struct in6_addr in6;
2129         const char *hex_str = (const char *)hex_value;
2130         int i;
2131 
2132         for (i = 0; i < 16; i++) {
2133             if (sscanf(&hex_str[i * 2], "%02hhx", &in6.s6_addr[i]) != 1) {
2134                 return NULL;
2135             }
2136         }
2137         inet_ntop(AF_INET6, &in6, addr, INET6_ADDRSTRLEN);
2138 
2139         return g_strdup(addr);
2140     } else {
2141         unsigned int hex_int = *(unsigned int *)hex_value;
2142         unsigned int byte1 = (hex_int >> 24) & 0xFF;
2143         unsigned int byte2 = (hex_int >> 16) & 0xFF;
2144         unsigned int byte3 = (hex_int >> 8) & 0xFF;
2145         unsigned int byte4 = hex_int & 0xFF;
2146 
2147         return g_strdup_printf("%u.%u.%u.%u", byte4, byte3, byte2, byte1);
2148     }
2149 }
2150 
2151 GuestNetworkRouteList *qmp_guest_network_get_route(Error **errp)
2152 {
2153     GuestNetworkRouteList *head = NULL, **tail = &head;
2154     const char *route_files[] = {"/proc/net/route", "/proc/net/ipv6_route"};
2155     FILE *fp;
2156     size_t n = 0;
2157     char *line = NULL;
2158     int firstLine;
2159     int is_ipv6;
2160     int i;
2161     char iface[IFNAMSIZ];
2162 
2163     for (i = 0; i < 2; i++) {
2164         firstLine = 1;
2165         is_ipv6 = (i == 1);
2166         fp = fopen(route_files[i], "r");
2167         if (fp == NULL) {
2168             error_setg_errno(errp, errno, "open(\"%s\")", route_files[i]);
2169             continue;
2170         }
2171 
2172         while (getline(&line, &n, fp) != -1) {
2173             if (firstLine && !is_ipv6) {
2174                 firstLine = 0;
2175                 continue;
2176             }
2177             g_autoptr(GuestNetworkRoute) route = g_new0(GuestNetworkRoute, 1);
2178 
2179             if (is_ipv6) {
2180                 char destination[33], source[33], next_hop[33];
2181                 int des_prefixlen, src_prefixlen, metric, refcnt, use, flags;
2182                 if (sscanf(line, "%32s %x %32s %x %32s %x %x %x %x %s",
2183                            destination, &des_prefixlen, source,
2184                            &src_prefixlen, next_hop, &metric, &refcnt,
2185                            &use, &flags, iface) != 10) {
2186                     continue;
2187                 }
2188 
2189                 route->destination = hex_to_ip_address(destination, 1);
2190                 if (route->destination == NULL) {
2191                     continue;
2192                 }
2193                 route->iface = g_strdup(iface);
2194                 route->source = hex_to_ip_address(source, 1);
2195                 route->nexthop = hex_to_ip_address(next_hop, 1);
2196                 route->desprefixlen = g_strdup_printf("%d", des_prefixlen);
2197                 route->srcprefixlen = g_strdup_printf("%d", src_prefixlen);
2198                 route->metric = metric;
2199                 route->has_flags = true;
2200                 route->flags = flags;
2201                 route->has_refcnt = true;
2202                 route->refcnt = refcnt;
2203                 route->has_use = true;
2204                 route->use = use;
2205                 route->version = 6;
2206             } else {
2207                 unsigned int destination, gateway, mask, flags;
2208                 int refcnt, use, metric, mtu, window, irtt;
2209                 if (sscanf(line, "%s %X %X %x %d %d %d %X %d %d %d",
2210                            iface, &destination, &gateway, &flags, &refcnt,
2211                            &use, &metric, &mask, &mtu, &window, &irtt) != 11) {
2212                     continue;
2213                 }
2214 
2215                 route->destination = hex_to_ip_address(&destination, 0);
2216                 if (route->destination == NULL) {
2217                     continue;
2218                 }
2219                 route->iface = g_strdup(iface);
2220                 route->gateway = hex_to_ip_address(&gateway, 0);
2221                 route->mask = hex_to_ip_address(&mask, 0);
2222                 route->metric = metric;
2223                 route->has_flags = true;
2224                 route->flags = flags;
2225                 route->has_refcnt = true;
2226                 route->refcnt = refcnt;
2227                 route->has_use = true;
2228                 route->use = use;
2229                 route->has_mtu = true;
2230                 route->mtu = mtu;
2231                 route->has_window = true;
2232                 route->window = window;
2233                 route->has_irtt = true;
2234                 route->irtt = irtt;
2235                 route->version = 4;
2236             }
2237 
2238             QAPI_LIST_APPEND(tail, route);
2239             route = NULL;
2240         }
2241 
2242         fclose(fp);
2243     }
2244 
2245     free(line);
2246     return head;
2247 }
2248