xref: /openbmc/qemu/qga/commands-posix.c (revision c79aa350)
1 /*
2  * QEMU Guest Agent POSIX-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include <sys/ioctl.h>
16 #include <sys/utsname.h>
17 #include <sys/wait.h>
18 #include <dirent.h>
19 #include "qga-qapi-commands.h"
20 #include "qapi/error.h"
21 #include "qapi/qmp/qerror.h"
22 #include "qemu/host-utils.h"
23 #include "qemu/sockets.h"
24 #include "qemu/base64.h"
25 #include "qemu/cutils.h"
26 #include "commands-common.h"
27 #include "block/nvme.h"
28 #include "cutils.h"
29 
30 #ifdef HAVE_UTMPX
31 #include <utmpx.h>
32 #endif
33 
34 #if defined(__linux__)
35 #include <mntent.h>
36 #include <sys/statvfs.h>
37 #include <linux/nvme_ioctl.h>
38 
39 #ifdef CONFIG_LIBUDEV
40 #include <libudev.h>
41 #endif
42 #endif
43 
44 #ifdef HAVE_GETIFADDRS
45 #include <arpa/inet.h>
46 #include <sys/socket.h>
47 #include <net/if.h>
48 #if defined(__NetBSD__) || defined(__OpenBSD__)
49 #include <net/if_arp.h>
50 #include <netinet/if_ether.h>
51 #else
52 #include <net/ethernet.h>
53 #endif
54 #include <sys/types.h>
55 #ifdef CONFIG_SOLARIS
56 #include <sys/sockio.h>
57 #endif
58 #endif
59 
60 static void ga_wait_child(pid_t pid, int *status, Error **errp)
61 {
62     pid_t rpid;
63 
64     *status = 0;
65 
66     rpid = RETRY_ON_EINTR(waitpid(pid, status, 0));
67 
68     if (rpid == -1) {
69         error_setg_errno(errp, errno, "failed to wait for child (pid: %d)",
70                          pid);
71         return;
72     }
73 
74     g_assert(rpid == pid);
75 }
76 
77 void qmp_guest_shutdown(const char *mode, Error **errp)
78 {
79     const char *shutdown_flag;
80     Error *local_err = NULL;
81     pid_t pid;
82     int status;
83 
84 #ifdef CONFIG_SOLARIS
85     const char *powerdown_flag = "-i5";
86     const char *halt_flag = "-i0";
87     const char *reboot_flag = "-i6";
88 #elif defined(CONFIG_BSD)
89     const char *powerdown_flag = "-p";
90     const char *halt_flag = "-h";
91     const char *reboot_flag = "-r";
92 #else
93     const char *powerdown_flag = "-P";
94     const char *halt_flag = "-H";
95     const char *reboot_flag = "-r";
96 #endif
97 
98     slog("guest-shutdown called, mode: %s", mode);
99     if (!mode || strcmp(mode, "powerdown") == 0) {
100         shutdown_flag = powerdown_flag;
101     } else if (strcmp(mode, "halt") == 0) {
102         shutdown_flag = halt_flag;
103     } else if (strcmp(mode, "reboot") == 0) {
104         shutdown_flag = reboot_flag;
105     } else {
106         error_setg(errp,
107                    "mode is invalid (valid values are: halt|powerdown|reboot");
108         return;
109     }
110 
111     pid = fork();
112     if (pid == 0) {
113         /* child, start the shutdown */
114         setsid();
115         reopen_fd_to_null(0);
116         reopen_fd_to_null(1);
117         reopen_fd_to_null(2);
118 
119 #ifdef CONFIG_SOLARIS
120         execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y",
121               "hypervisor initiated shutdown", (char *)NULL);
122 #elif defined(CONFIG_BSD)
123         execl("/sbin/shutdown", "shutdown", shutdown_flag, "+0",
124                "hypervisor initiated shutdown", (char *)NULL);
125 #else
126         execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
127                "hypervisor initiated shutdown", (char *)NULL);
128 #endif
129         _exit(EXIT_FAILURE);
130     } else if (pid < 0) {
131         error_setg_errno(errp, errno, "failed to create child process");
132         return;
133     }
134 
135     ga_wait_child(pid, &status, &local_err);
136     if (local_err) {
137         error_propagate(errp, local_err);
138         return;
139     }
140 
141     if (!WIFEXITED(status)) {
142         error_setg(errp, "child process has terminated abnormally");
143         return;
144     }
145 
146     if (WEXITSTATUS(status)) {
147         error_setg(errp, "child process has failed to shutdown");
148         return;
149     }
150 
151     /* succeeded */
152 }
153 
154 void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
155 {
156     int ret;
157     int status;
158     pid_t pid;
159     Error *local_err = NULL;
160     struct timeval tv;
161     static const char hwclock_path[] = "/sbin/hwclock";
162     static int hwclock_available = -1;
163 
164     if (hwclock_available < 0) {
165         hwclock_available = (access(hwclock_path, X_OK) == 0);
166     }
167 
168     if (!hwclock_available) {
169         error_setg(errp, QERR_UNSUPPORTED);
170         return;
171     }
172 
173     /* If user has passed a time, validate and set it. */
174     if (has_time) {
175         GDate date = { 0, };
176 
177         /* year-2038 will overflow in case time_t is 32bit */
178         if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
179             error_setg(errp, "Time %" PRId64 " is too large", time_ns);
180             return;
181         }
182 
183         tv.tv_sec = time_ns / 1000000000;
184         tv.tv_usec = (time_ns % 1000000000) / 1000;
185         g_date_set_time_t(&date, tv.tv_sec);
186         if (date.year < 1970 || date.year >= 2070) {
187             error_setg_errno(errp, errno, "Invalid time");
188             return;
189         }
190 
191         ret = settimeofday(&tv, NULL);
192         if (ret < 0) {
193             error_setg_errno(errp, errno, "Failed to set time to guest");
194             return;
195         }
196     }
197 
198     /* Now, if user has passed a time to set and the system time is set, we
199      * just need to synchronize the hardware clock. However, if no time was
200      * passed, user is requesting the opposite: set the system time from the
201      * hardware clock (RTC). */
202     pid = fork();
203     if (pid == 0) {
204         setsid();
205         reopen_fd_to_null(0);
206         reopen_fd_to_null(1);
207         reopen_fd_to_null(2);
208 
209         /* Use '/sbin/hwclock -w' to set RTC from the system time,
210          * or '/sbin/hwclock -s' to set the system time from RTC. */
211         execl(hwclock_path, "hwclock", has_time ? "-w" : "-s", NULL);
212         _exit(EXIT_FAILURE);
213     } else if (pid < 0) {
214         error_setg_errno(errp, errno, "failed to create child process");
215         return;
216     }
217 
218     ga_wait_child(pid, &status, &local_err);
219     if (local_err) {
220         error_propagate(errp, local_err);
221         return;
222     }
223 
224     if (!WIFEXITED(status)) {
225         error_setg(errp, "child process has terminated abnormally");
226         return;
227     }
228 
229     if (WEXITSTATUS(status)) {
230         error_setg(errp, "hwclock failed to set hardware clock to system time");
231         return;
232     }
233 }
234 
235 typedef enum {
236     RW_STATE_NEW,
237     RW_STATE_READING,
238     RW_STATE_WRITING,
239 } RwState;
240 
241 struct GuestFileHandle {
242     uint64_t id;
243     FILE *fh;
244     RwState state;
245     QTAILQ_ENTRY(GuestFileHandle) next;
246 };
247 
248 static struct {
249     QTAILQ_HEAD(, GuestFileHandle) filehandles;
250 } guest_file_state = {
251     .filehandles = QTAILQ_HEAD_INITIALIZER(guest_file_state.filehandles),
252 };
253 
254 static int64_t guest_file_handle_add(FILE *fh, Error **errp)
255 {
256     GuestFileHandle *gfh;
257     int64_t handle;
258 
259     handle = ga_get_fd_handle(ga_state, errp);
260     if (handle < 0) {
261         return -1;
262     }
263 
264     gfh = g_new0(GuestFileHandle, 1);
265     gfh->id = handle;
266     gfh->fh = fh;
267     QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
268 
269     return handle;
270 }
271 
272 GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
273 {
274     GuestFileHandle *gfh;
275 
276     QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
277     {
278         if (gfh->id == id) {
279             return gfh;
280         }
281     }
282 
283     error_setg(errp, "handle '%" PRId64 "' has not been found", id);
284     return NULL;
285 }
286 
287 typedef const char * const ccpc;
288 
289 #ifndef O_BINARY
290 #define O_BINARY 0
291 #endif
292 
293 /* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
294 static const struct {
295     ccpc *forms;
296     int oflag_base;
297 } guest_file_open_modes[] = {
298     { (ccpc[]){ "r",          NULL }, O_RDONLY                                 },
299     { (ccpc[]){ "rb",         NULL }, O_RDONLY                      | O_BINARY },
300     { (ccpc[]){ "w",          NULL }, O_WRONLY | O_CREAT | O_TRUNC             },
301     { (ccpc[]){ "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  | O_BINARY },
302     { (ccpc[]){ "a",          NULL }, O_WRONLY | O_CREAT | O_APPEND            },
303     { (ccpc[]){ "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND | O_BINARY },
304     { (ccpc[]){ "r+",         NULL }, O_RDWR                                   },
305     { (ccpc[]){ "rb+", "r+b", NULL }, O_RDWR                        | O_BINARY },
306     { (ccpc[]){ "w+",         NULL }, O_RDWR   | O_CREAT | O_TRUNC             },
307     { (ccpc[]){ "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  | O_BINARY },
308     { (ccpc[]){ "a+",         NULL }, O_RDWR   | O_CREAT | O_APPEND            },
309     { (ccpc[]){ "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND | O_BINARY }
310 };
311 
312 static int
313 find_open_flag(const char *mode_str, Error **errp)
314 {
315     unsigned mode;
316 
317     for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
318         ccpc *form;
319 
320         form = guest_file_open_modes[mode].forms;
321         while (*form != NULL && strcmp(*form, mode_str) != 0) {
322             ++form;
323         }
324         if (*form != NULL) {
325             break;
326         }
327     }
328 
329     if (mode == ARRAY_SIZE(guest_file_open_modes)) {
330         error_setg(errp, "invalid file open mode '%s'", mode_str);
331         return -1;
332     }
333     return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
334 }
335 
336 #define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
337                                S_IRGRP | S_IWGRP | \
338                                S_IROTH | S_IWOTH)
339 
340 static FILE *
341 safe_open_or_create(const char *path, const char *mode, Error **errp)
342 {
343     int oflag;
344     int fd = -1;
345     FILE *f = NULL;
346 
347     oflag = find_open_flag(mode, errp);
348     if (oflag < 0) {
349         goto end;
350     }
351 
352     /* If the caller wants / allows creation of a new file, we implement it
353      * with a two step process: open() + (open() / fchmod()).
354      *
355      * First we insist on creating the file exclusively as a new file. If
356      * that succeeds, we're free to set any file-mode bits on it. (The
357      * motivation is that we want to set those file-mode bits independently
358      * of the current umask.)
359      *
360      * If the exclusive creation fails because the file already exists
361      * (EEXIST is not possible for any other reason), we just attempt to
362      * open the file, but in this case we won't be allowed to change the
363      * file-mode bits on the preexistent file.
364      *
365      * The pathname should never disappear between the two open()s in
366      * practice. If it happens, then someone very likely tried to race us.
367      * In this case just go ahead and report the ENOENT from the second
368      * open() to the caller.
369      *
370      * If the caller wants to open a preexistent file, then the first
371      * open() is decisive and its third argument is ignored, and the second
372      * open() and the fchmod() are never called.
373      */
374     fd = qga_open_cloexec(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
375     if (fd == -1 && errno == EEXIST) {
376         oflag &= ~(unsigned)O_CREAT;
377         fd = qga_open_cloexec(path, oflag, 0);
378     }
379     if (fd == -1) {
380         error_setg_errno(errp, errno,
381                          "failed to open file '%s' (mode: '%s')",
382                          path, mode);
383         goto end;
384     }
385 
386     if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
387         error_setg_errno(errp, errno, "failed to set permission "
388                          "0%03o on new file '%s' (mode: '%s')",
389                          (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
390         goto end;
391     }
392 
393     f = fdopen(fd, mode);
394     if (f == NULL) {
395         error_setg_errno(errp, errno, "failed to associate stdio stream with "
396                          "file descriptor %d, file '%s' (mode: '%s')",
397                          fd, path, mode);
398     }
399 
400 end:
401     if (f == NULL && fd != -1) {
402         close(fd);
403         if (oflag & O_CREAT) {
404             unlink(path);
405         }
406     }
407     return f;
408 }
409 
410 int64_t qmp_guest_file_open(const char *path, const char *mode,
411                             Error **errp)
412 {
413     FILE *fh;
414     Error *local_err = NULL;
415     int64_t handle;
416 
417     if (!mode) {
418         mode = "r";
419     }
420     slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
421     fh = safe_open_or_create(path, mode, &local_err);
422     if (local_err != NULL) {
423         error_propagate(errp, local_err);
424         return -1;
425     }
426 
427     /* set fd non-blocking to avoid common use cases (like reading from a
428      * named pipe) from hanging the agent
429      */
430     if (!g_unix_set_fd_nonblocking(fileno(fh), true, NULL)) {
431         fclose(fh);
432         error_setg_errno(errp, errno, "Failed to set FD nonblocking");
433         return -1;
434     }
435 
436     handle = guest_file_handle_add(fh, errp);
437     if (handle < 0) {
438         fclose(fh);
439         return -1;
440     }
441 
442     slog("guest-file-open, handle: %" PRId64, handle);
443     return handle;
444 }
445 
446 void qmp_guest_file_close(int64_t handle, Error **errp)
447 {
448     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
449     int ret;
450 
451     slog("guest-file-close called, handle: %" PRId64, handle);
452     if (!gfh) {
453         return;
454     }
455 
456     ret = fclose(gfh->fh);
457     if (ret == EOF) {
458         error_setg_errno(errp, errno, "failed to close handle");
459         return;
460     }
461 
462     QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
463     g_free(gfh);
464 }
465 
466 GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
467                                       int64_t count, Error **errp)
468 {
469     GuestFileRead *read_data = NULL;
470     guchar *buf;
471     FILE *fh = gfh->fh;
472     size_t read_count;
473 
474     /* explicitly flush when switching from writing to reading */
475     if (gfh->state == RW_STATE_WRITING) {
476         int ret = fflush(fh);
477         if (ret == EOF) {
478             error_setg_errno(errp, errno, "failed to flush file");
479             return NULL;
480         }
481         gfh->state = RW_STATE_NEW;
482     }
483 
484     buf = g_malloc0(count + 1);
485     read_count = fread(buf, 1, count, fh);
486     if (ferror(fh)) {
487         error_setg_errno(errp, errno, "failed to read file");
488     } else {
489         buf[read_count] = 0;
490         read_data = g_new0(GuestFileRead, 1);
491         read_data->count = read_count;
492         read_data->eof = feof(fh);
493         if (read_count) {
494             read_data->buf_b64 = g_base64_encode(buf, read_count);
495         }
496         gfh->state = RW_STATE_READING;
497     }
498     g_free(buf);
499     clearerr(fh);
500 
501     return read_data;
502 }
503 
504 GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
505                                      bool has_count, int64_t count,
506                                      Error **errp)
507 {
508     GuestFileWrite *write_data = NULL;
509     guchar *buf;
510     gsize buf_len;
511     int write_count;
512     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
513     FILE *fh;
514 
515     if (!gfh) {
516         return NULL;
517     }
518 
519     fh = gfh->fh;
520 
521     if (gfh->state == RW_STATE_READING) {
522         int ret = fseek(fh, 0, SEEK_CUR);
523         if (ret == -1) {
524             error_setg_errno(errp, errno, "failed to seek file");
525             return NULL;
526         }
527         gfh->state = RW_STATE_NEW;
528     }
529 
530     buf = qbase64_decode(buf_b64, -1, &buf_len, errp);
531     if (!buf) {
532         return NULL;
533     }
534 
535     if (!has_count) {
536         count = buf_len;
537     } else if (count < 0 || count > buf_len) {
538         error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
539                    count);
540         g_free(buf);
541         return NULL;
542     }
543 
544     write_count = fwrite(buf, 1, count, fh);
545     if (ferror(fh)) {
546         error_setg_errno(errp, errno, "failed to write to file");
547         slog("guest-file-write failed, handle: %" PRId64, handle);
548     } else {
549         write_data = g_new0(GuestFileWrite, 1);
550         write_data->count = write_count;
551         write_data->eof = feof(fh);
552         gfh->state = RW_STATE_WRITING;
553     }
554     g_free(buf);
555     clearerr(fh);
556 
557     return write_data;
558 }
559 
560 struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
561                                           GuestFileWhence *whence_code,
562                                           Error **errp)
563 {
564     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
565     GuestFileSeek *seek_data = NULL;
566     FILE *fh;
567     int ret;
568     int whence;
569     Error *err = NULL;
570 
571     if (!gfh) {
572         return NULL;
573     }
574 
575     /* We stupidly exposed 'whence':'int' in our qapi */
576     whence = ga_parse_whence(whence_code, &err);
577     if (err) {
578         error_propagate(errp, err);
579         return NULL;
580     }
581 
582     fh = gfh->fh;
583     ret = fseek(fh, offset, whence);
584     if (ret == -1) {
585         error_setg_errno(errp, errno, "failed to seek file");
586         if (errno == ESPIPE) {
587             /* file is non-seekable, stdio shouldn't be buffering anyways */
588             gfh->state = RW_STATE_NEW;
589         }
590     } else {
591         seek_data = g_new0(GuestFileSeek, 1);
592         seek_data->position = ftell(fh);
593         seek_data->eof = feof(fh);
594         gfh->state = RW_STATE_NEW;
595     }
596     clearerr(fh);
597 
598     return seek_data;
599 }
600 
601 void qmp_guest_file_flush(int64_t handle, Error **errp)
602 {
603     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
604     FILE *fh;
605     int ret;
606 
607     if (!gfh) {
608         return;
609     }
610 
611     fh = gfh->fh;
612     ret = fflush(fh);
613     if (ret == EOF) {
614         error_setg_errno(errp, errno, "failed to flush file");
615     } else {
616         gfh->state = RW_STATE_NEW;
617     }
618 }
619 
620 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
621 void free_fs_mount_list(FsMountList *mounts)
622 {
623      FsMount *mount, *temp;
624 
625      if (!mounts) {
626          return;
627      }
628 
629      QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
630          QTAILQ_REMOVE(mounts, mount, next);
631          g_free(mount->dirname);
632          g_free(mount->devtype);
633          g_free(mount);
634      }
635 }
636 #endif
637 
638 #if defined(CONFIG_FSFREEZE)
639 typedef enum {
640     FSFREEZE_HOOK_THAW = 0,
641     FSFREEZE_HOOK_FREEZE,
642 } FsfreezeHookArg;
643 
644 static const char *fsfreeze_hook_arg_string[] = {
645     "thaw",
646     "freeze",
647 };
648 
649 static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
650 {
651     int status;
652     pid_t pid;
653     const char *hook;
654     const char *arg_str = fsfreeze_hook_arg_string[arg];
655     Error *local_err = NULL;
656 
657     hook = ga_fsfreeze_hook(ga_state);
658     if (!hook) {
659         return;
660     }
661     if (access(hook, X_OK) != 0) {
662         error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
663         return;
664     }
665 
666     slog("executing fsfreeze hook with arg '%s'", arg_str);
667     pid = fork();
668     if (pid == 0) {
669         setsid();
670         reopen_fd_to_null(0);
671         reopen_fd_to_null(1);
672         reopen_fd_to_null(2);
673 
674         execl(hook, hook, arg_str, NULL);
675         _exit(EXIT_FAILURE);
676     } else if (pid < 0) {
677         error_setg_errno(errp, errno, "failed to create child process");
678         return;
679     }
680 
681     ga_wait_child(pid, &status, &local_err);
682     if (local_err) {
683         error_propagate(errp, local_err);
684         return;
685     }
686 
687     if (!WIFEXITED(status)) {
688         error_setg(errp, "fsfreeze hook has terminated abnormally");
689         return;
690     }
691 
692     status = WEXITSTATUS(status);
693     if (status) {
694         error_setg(errp, "fsfreeze hook has failed with status %d", status);
695         return;
696     }
697 }
698 
699 /*
700  * Return status of freeze/thaw
701  */
702 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
703 {
704     if (ga_is_frozen(ga_state)) {
705         return GUEST_FSFREEZE_STATUS_FROZEN;
706     }
707 
708     return GUEST_FSFREEZE_STATUS_THAWED;
709 }
710 
711 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
712 {
713     return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
714 }
715 
716 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
717                                        strList *mountpoints,
718                                        Error **errp)
719 {
720     int ret;
721     FsMountList mounts;
722     Error *local_err = NULL;
723 
724     slog("guest-fsfreeze called");
725 
726     execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
727     if (local_err) {
728         error_propagate(errp, local_err);
729         return -1;
730     }
731 
732     QTAILQ_INIT(&mounts);
733     if (!build_fs_mount_list(&mounts, &local_err)) {
734         error_propagate(errp, local_err);
735         return -1;
736     }
737 
738     /* cannot risk guest agent blocking itself on a write in this state */
739     ga_set_frozen(ga_state);
740 
741     ret = qmp_guest_fsfreeze_do_freeze_list(has_mountpoints, mountpoints,
742                                             mounts, errp);
743 
744     free_fs_mount_list(&mounts);
745     /* We may not issue any FIFREEZE here.
746      * Just unset ga_state here and ready for the next call.
747      */
748     if (ret == 0) {
749         ga_unset_frozen(ga_state);
750     } else if (ret < 0) {
751         qmp_guest_fsfreeze_thaw(NULL);
752     }
753     return ret;
754 }
755 
756 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
757 {
758     int ret;
759 
760     ret = qmp_guest_fsfreeze_do_thaw(errp);
761     if (ret >= 0) {
762         ga_unset_frozen(ga_state);
763         execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
764     } else {
765         ret = 0;
766     }
767 
768     return ret;
769 }
770 
771 static void guest_fsfreeze_cleanup(void)
772 {
773     Error *err = NULL;
774 
775     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
776         qmp_guest_fsfreeze_thaw(&err);
777         if (err) {
778             slog("failed to clean up frozen filesystems: %s",
779                  error_get_pretty(err));
780             error_free(err);
781         }
782     }
783 }
784 #endif
785 
786 /* linux-specific implementations. avoid this if at all possible. */
787 #if defined(__linux__)
788 #if defined(CONFIG_FSFREEZE)
789 
790 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
791 {
792     char *path;
793     char *dpath;
794     char *driver = NULL;
795     char buf[PATH_MAX];
796     ssize_t len;
797 
798     path = g_strndup(syspath, pathlen);
799     dpath = g_strdup_printf("%s/driver", path);
800     len = readlink(dpath, buf, sizeof(buf) - 1);
801     if (len != -1) {
802         buf[len] = 0;
803         driver = g_path_get_basename(buf);
804     }
805     g_free(dpath);
806     g_free(path);
807     return driver;
808 }
809 
810 static int compare_uint(const void *_a, const void *_b)
811 {
812     unsigned int a = *(unsigned int *)_a;
813     unsigned int b = *(unsigned int *)_b;
814 
815     return a < b ? -1 : a > b ? 1 : 0;
816 }
817 
818 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
819 static int build_hosts(char const *syspath, char const *host, bool ata,
820                        unsigned int *hosts, int hosts_max, Error **errp)
821 {
822     char *path;
823     DIR *dir;
824     struct dirent *entry;
825     int i = 0;
826 
827     path = g_strndup(syspath, host - syspath);
828     dir = opendir(path);
829     if (!dir) {
830         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
831         g_free(path);
832         return -1;
833     }
834 
835     while (i < hosts_max) {
836         entry = readdir(dir);
837         if (!entry) {
838             break;
839         }
840         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
841             ++i;
842         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
843             ++i;
844         }
845     }
846 
847     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
848 
849     g_free(path);
850     closedir(dir);
851     return i;
852 }
853 
854 /*
855  * Store disk device info for devices on the PCI bus.
856  * Returns true if information has been stored, or false for failure.
857  */
858 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
859                                            GuestDiskAddress *disk,
860                                            Error **errp)
861 {
862     unsigned int pci[4], host, hosts[8], tgt[3];
863     int i, nhosts = 0, pcilen;
864     GuestPCIAddress *pciaddr = disk->pci_controller;
865     bool has_ata = false, has_host = false, has_tgt = false;
866     char *p, *q, *driver = NULL;
867     bool ret = false;
868 
869     p = strstr(syspath, "/devices/pci");
870     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
871                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
872         g_debug("only pci device is supported: sysfs path '%s'", syspath);
873         return false;
874     }
875 
876     p += 12 + pcilen;
877     while (true) {
878         driver = get_pci_driver(syspath, p - syspath, errp);
879         if (driver && (g_str_equal(driver, "ata_piix") ||
880                        g_str_equal(driver, "sym53c8xx") ||
881                        g_str_equal(driver, "virtio-pci") ||
882                        g_str_equal(driver, "ahci") ||
883                        g_str_equal(driver, "nvme"))) {
884             break;
885         }
886 
887         g_free(driver);
888         if (sscanf(p, "/%x:%x:%x.%x%n",
889                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
890             p += pcilen;
891             continue;
892         }
893 
894         g_debug("unsupported driver or sysfs path '%s'", syspath);
895         return false;
896     }
897 
898     p = strstr(syspath, "/target");
899     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
900                     tgt, tgt + 1, tgt + 2) == 3) {
901         has_tgt = true;
902     }
903 
904     p = strstr(syspath, "/ata");
905     if (p) {
906         q = p + 4;
907         has_ata = true;
908     } else {
909         p = strstr(syspath, "/host");
910         q = p + 5;
911     }
912     if (p && sscanf(q, "%u", &host) == 1) {
913         has_host = true;
914         nhosts = build_hosts(syspath, p, has_ata, hosts,
915                              ARRAY_SIZE(hosts), errp);
916         if (nhosts < 0) {
917             goto cleanup;
918         }
919     }
920 
921     pciaddr->domain = pci[0];
922     pciaddr->bus = pci[1];
923     pciaddr->slot = pci[2];
924     pciaddr->function = pci[3];
925 
926     if (strcmp(driver, "ata_piix") == 0) {
927         /* a host per ide bus, target*:0:<unit>:0 */
928         if (!has_host || !has_tgt) {
929             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
930             goto cleanup;
931         }
932         for (i = 0; i < nhosts; i++) {
933             if (host == hosts[i]) {
934                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
935                 disk->bus = i;
936                 disk->unit = tgt[1];
937                 break;
938             }
939         }
940         if (i >= nhosts) {
941             g_debug("no host for '%s' (driver '%s')", syspath, driver);
942             goto cleanup;
943         }
944     } else if (strcmp(driver, "sym53c8xx") == 0) {
945         /* scsi(LSI Logic): target*:0:<unit>:0 */
946         if (!has_tgt) {
947             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
948             goto cleanup;
949         }
950         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
951         disk->unit = tgt[1];
952     } else if (strcmp(driver, "virtio-pci") == 0) {
953         if (has_tgt) {
954             /* virtio-scsi: target*:0:0:<unit> */
955             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
956             disk->unit = tgt[2];
957         } else {
958             /* virtio-blk: 1 disk per 1 device */
959             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
960         }
961     } else if (strcmp(driver, "ahci") == 0) {
962         /* ahci: 1 host per 1 unit */
963         if (!has_host || !has_tgt) {
964             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
965             goto cleanup;
966         }
967         for (i = 0; i < nhosts; i++) {
968             if (host == hosts[i]) {
969                 disk->unit = i;
970                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
971                 break;
972             }
973         }
974         if (i >= nhosts) {
975             g_debug("no host for '%s' (driver '%s')", syspath, driver);
976             goto cleanup;
977         }
978     } else if (strcmp(driver, "nvme") == 0) {
979         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
980     } else {
981         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
982         goto cleanup;
983     }
984 
985     ret = true;
986 
987 cleanup:
988     g_free(driver);
989     return ret;
990 }
991 
992 /*
993  * Store disk device info for non-PCI virtio devices (for example s390x
994  * channel I/O devices). Returns true if information has been stored, or
995  * false for failure.
996  */
997 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
998                                                  GuestDiskAddress *disk,
999                                                  Error **errp)
1000 {
1001     unsigned int tgt[3];
1002     char *p;
1003 
1004     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
1005         g_debug("Unsupported virtio device '%s'", syspath);
1006         return false;
1007     }
1008 
1009     p = strstr(syspath, "/target");
1010     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
1011                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
1012         /* virtio-scsi: target*:0:<target>:<unit> */
1013         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
1014         disk->bus = tgt[0];
1015         disk->target = tgt[1];
1016         disk->unit = tgt[2];
1017     } else {
1018         /* virtio-blk: 1 disk per 1 device */
1019         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
1020     }
1021 
1022     return true;
1023 }
1024 
1025 /*
1026  * Store disk device info for CCW devices (s390x channel I/O devices).
1027  * Returns true if information has been stored, or false for failure.
1028  */
1029 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
1030                                            GuestDiskAddress *disk,
1031                                            Error **errp)
1032 {
1033     unsigned int cssid, ssid, subchno, devno;
1034     char *p;
1035 
1036     p = strstr(syspath, "/devices/css");
1037     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
1038                      &cssid, &ssid, &subchno, &devno) < 4) {
1039         g_debug("could not parse ccw device sysfs path: %s", syspath);
1040         return false;
1041     }
1042 
1043     disk->ccw_address = g_new0(GuestCCWAddress, 1);
1044     disk->ccw_address->cssid = cssid;
1045     disk->ccw_address->ssid = ssid;
1046     disk->ccw_address->subchno = subchno;
1047     disk->ccw_address->devno = devno;
1048 
1049     if (strstr(p, "/virtio")) {
1050         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1051     }
1052 
1053     return true;
1054 }
1055 
1056 /* Store disk device info specified by @sysfs into @fs */
1057 static void build_guest_fsinfo_for_real_device(char const *syspath,
1058                                                GuestFilesystemInfo *fs,
1059                                                Error **errp)
1060 {
1061     GuestDiskAddress *disk;
1062     GuestPCIAddress *pciaddr;
1063     bool has_hwinf;
1064 #ifdef CONFIG_LIBUDEV
1065     struct udev *udev = NULL;
1066     struct udev_device *udevice = NULL;
1067 #endif
1068 
1069     pciaddr = g_new0(GuestPCIAddress, 1);
1070     pciaddr->domain = -1;                       /* -1 means field is invalid */
1071     pciaddr->bus = -1;
1072     pciaddr->slot = -1;
1073     pciaddr->function = -1;
1074 
1075     disk = g_new0(GuestDiskAddress, 1);
1076     disk->pci_controller = pciaddr;
1077     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
1078 
1079 #ifdef CONFIG_LIBUDEV
1080     udev = udev_new();
1081     udevice = udev_device_new_from_syspath(udev, syspath);
1082     if (udev == NULL || udevice == NULL) {
1083         g_debug("failed to query udev");
1084     } else {
1085         const char *devnode, *serial;
1086         devnode = udev_device_get_devnode(udevice);
1087         if (devnode != NULL) {
1088             disk->dev = g_strdup(devnode);
1089         }
1090         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
1091         if (serial != NULL && *serial != 0) {
1092             disk->serial = g_strdup(serial);
1093         }
1094     }
1095 
1096     udev_unref(udev);
1097     udev_device_unref(udevice);
1098 #endif
1099 
1100     if (strstr(syspath, "/devices/pci")) {
1101         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
1102     } else if (strstr(syspath, "/devices/css")) {
1103         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
1104     } else if (strstr(syspath, "/virtio")) {
1105         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1106     } else {
1107         g_debug("Unsupported device type for '%s'", syspath);
1108         has_hwinf = false;
1109     }
1110 
1111     if (has_hwinf || disk->dev || disk->serial) {
1112         QAPI_LIST_PREPEND(fs->disk, disk);
1113     } else {
1114         qapi_free_GuestDiskAddress(disk);
1115     }
1116 }
1117 
1118 static void build_guest_fsinfo_for_device(char const *devpath,
1119                                           GuestFilesystemInfo *fs,
1120                                           Error **errp);
1121 
1122 /* Store a list of slave devices of virtual volume specified by @syspath into
1123  * @fs */
1124 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
1125                                                   GuestFilesystemInfo *fs,
1126                                                   Error **errp)
1127 {
1128     Error *err = NULL;
1129     DIR *dir;
1130     char *dirpath;
1131     struct dirent *entry;
1132 
1133     dirpath = g_strdup_printf("%s/slaves", syspath);
1134     dir = opendir(dirpath);
1135     if (!dir) {
1136         if (errno != ENOENT) {
1137             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
1138         }
1139         g_free(dirpath);
1140         return;
1141     }
1142 
1143     for (;;) {
1144         errno = 0;
1145         entry = readdir(dir);
1146         if (entry == NULL) {
1147             if (errno) {
1148                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
1149             }
1150             break;
1151         }
1152 
1153         if (entry->d_type == DT_LNK) {
1154             char *path;
1155 
1156             g_debug(" slave device '%s'", entry->d_name);
1157             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
1158             build_guest_fsinfo_for_device(path, fs, &err);
1159             g_free(path);
1160 
1161             if (err) {
1162                 error_propagate(errp, err);
1163                 break;
1164             }
1165         }
1166     }
1167 
1168     g_free(dirpath);
1169     closedir(dir);
1170 }
1171 
1172 static bool is_disk_virtual(const char *devpath, Error **errp)
1173 {
1174     g_autofree char *syspath = realpath(devpath, NULL);
1175 
1176     if (!syspath) {
1177         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1178         return false;
1179     }
1180     return strstr(syspath, "/devices/virtual/block/") != NULL;
1181 }
1182 
1183 /* Dispatch to functions for virtual/real device */
1184 static void build_guest_fsinfo_for_device(char const *devpath,
1185                                           GuestFilesystemInfo *fs,
1186                                           Error **errp)
1187 {
1188     ERRP_GUARD();
1189     g_autofree char *syspath = NULL;
1190     bool is_virtual = false;
1191 
1192     syspath = realpath(devpath, NULL);
1193     if (!syspath) {
1194         if (errno != ENOENT) {
1195             error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1196             return;
1197         }
1198 
1199         /* ENOENT: This devpath may not exist because of container config */
1200         if (!fs->name) {
1201             fs->name = g_path_get_basename(devpath);
1202         }
1203         return;
1204     }
1205 
1206     if (!fs->name) {
1207         fs->name = g_path_get_basename(syspath);
1208     }
1209 
1210     g_debug("  parse sysfs path '%s'", syspath);
1211     is_virtual = is_disk_virtual(syspath, errp);
1212     if (*errp != NULL) {
1213         return;
1214     }
1215     if (is_virtual) {
1216         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
1217     } else {
1218         build_guest_fsinfo_for_real_device(syspath, fs, errp);
1219     }
1220 }
1221 
1222 #ifdef CONFIG_LIBUDEV
1223 
1224 /*
1225  * Wrapper around build_guest_fsinfo_for_device() for getting just
1226  * the disk address.
1227  */
1228 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
1229 {
1230     g_autoptr(GuestFilesystemInfo) fs = NULL;
1231 
1232     fs = g_new0(GuestFilesystemInfo, 1);
1233     build_guest_fsinfo_for_device(syspath, fs, errp);
1234     if (fs->disk != NULL) {
1235         return g_steal_pointer(&fs->disk->value);
1236     }
1237     return NULL;
1238 }
1239 
1240 static char *get_alias_for_syspath(const char *syspath)
1241 {
1242     struct udev *udev = NULL;
1243     struct udev_device *udevice = NULL;
1244     char *ret = NULL;
1245 
1246     udev = udev_new();
1247     if (udev == NULL) {
1248         g_debug("failed to query udev");
1249         goto out;
1250     }
1251     udevice = udev_device_new_from_syspath(udev, syspath);
1252     if (udevice == NULL) {
1253         g_debug("failed to query udev for path: %s", syspath);
1254         goto out;
1255     } else {
1256         const char *alias = udev_device_get_property_value(
1257             udevice, "DM_NAME");
1258         /*
1259          * NULL means there was an error and empty string means there is no
1260          * alias. In case of no alias we return NULL instead of empty string.
1261          */
1262         if (alias == NULL) {
1263             g_debug("failed to query udev for device alias for: %s",
1264                 syspath);
1265         } else if (*alias != 0) {
1266             ret = g_strdup(alias);
1267         }
1268     }
1269 
1270 out:
1271     udev_unref(udev);
1272     udev_device_unref(udevice);
1273     return ret;
1274 }
1275 
1276 static char *get_device_for_syspath(const char *syspath)
1277 {
1278     struct udev *udev = NULL;
1279     struct udev_device *udevice = NULL;
1280     char *ret = NULL;
1281 
1282     udev = udev_new();
1283     if (udev == NULL) {
1284         g_debug("failed to query udev");
1285         goto out;
1286     }
1287     udevice = udev_device_new_from_syspath(udev, syspath);
1288     if (udevice == NULL) {
1289         g_debug("failed to query udev for path: %s", syspath);
1290         goto out;
1291     } else {
1292         ret = g_strdup(udev_device_get_devnode(udevice));
1293     }
1294 
1295 out:
1296     udev_unref(udev);
1297     udev_device_unref(udevice);
1298     return ret;
1299 }
1300 
1301 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
1302 {
1303     g_autofree char *deps_dir = NULL;
1304     const gchar *dep;
1305     GDir *dp_deps = NULL;
1306 
1307     /* List dependent disks */
1308     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
1309     g_debug("  listing entries in: %s", deps_dir);
1310     dp_deps = g_dir_open(deps_dir, 0, NULL);
1311     if (dp_deps == NULL) {
1312         g_debug("failed to list entries in %s", deps_dir);
1313         return;
1314     }
1315     disk->has_dependencies = true;
1316     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
1317         g_autofree char *dep_dir = NULL;
1318         char *dev_name;
1319 
1320         /* Add dependent disks */
1321         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
1322         dev_name = get_device_for_syspath(dep_dir);
1323         if (dev_name != NULL) {
1324             g_debug("  adding dependent device: %s", dev_name);
1325             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
1326         }
1327     }
1328     g_dir_close(dp_deps);
1329 }
1330 
1331 /*
1332  * Detect partitions subdirectory, name is "<disk_name><number>" or
1333  * "<disk_name>p<number>"
1334  *
1335  * @disk_name -- last component of /sys path (e.g. sda)
1336  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
1337  * @disk_dev -- device node of the disk (e.g. /dev/sda)
1338  */
1339 static GuestDiskInfoList *get_disk_partitions(
1340     GuestDiskInfoList *list,
1341     const char *disk_name, const char *disk_dir,
1342     const char *disk_dev)
1343 {
1344     GuestDiskInfoList *ret = list;
1345     struct dirent *de_disk;
1346     DIR *dp_disk = NULL;
1347     size_t len = strlen(disk_name);
1348 
1349     dp_disk = opendir(disk_dir);
1350     while ((de_disk = readdir(dp_disk)) != NULL) {
1351         g_autofree char *partition_dir = NULL;
1352         char *dev_name;
1353         GuestDiskInfo *partition;
1354 
1355         if (!(de_disk->d_type & DT_DIR)) {
1356             continue;
1357         }
1358 
1359         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
1360             ((*(de_disk->d_name + len) == 'p' &&
1361             isdigit(*(de_disk->d_name + len + 1))) ||
1362                 isdigit(*(de_disk->d_name + len))))) {
1363             continue;
1364         }
1365 
1366         partition_dir = g_strdup_printf("%s/%s",
1367             disk_dir, de_disk->d_name);
1368         dev_name = get_device_for_syspath(partition_dir);
1369         if (dev_name == NULL) {
1370             g_debug("Failed to get device name for syspath: %s",
1371                 disk_dir);
1372             continue;
1373         }
1374         partition = g_new0(GuestDiskInfo, 1);
1375         partition->name = dev_name;
1376         partition->partition = true;
1377         partition->has_dependencies = true;
1378         /* Add parent disk as dependent for easier tracking of hierarchy */
1379         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
1380 
1381         QAPI_LIST_PREPEND(ret, partition);
1382     }
1383     closedir(dp_disk);
1384 
1385     return ret;
1386 }
1387 
1388 static void get_nvme_smart(GuestDiskInfo *disk)
1389 {
1390     int fd;
1391     GuestNVMeSmart *smart;
1392     NvmeSmartLog log = {0};
1393     struct nvme_admin_cmd cmd = {
1394         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
1395         .nsid = NVME_NSID_BROADCAST,
1396         .addr = (uintptr_t)&log,
1397         .data_len = sizeof(log),
1398         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
1399                  | (((sizeof(log) >> 2) - 1) << 16)
1400     };
1401 
1402     fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
1403     if (fd == -1) {
1404         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
1405         return;
1406     }
1407 
1408     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
1409         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
1410         close(fd);
1411         return;
1412     }
1413 
1414     disk->smart = g_new0(GuestDiskSmart, 1);
1415     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
1416 
1417     smart = &disk->smart->u.nvme;
1418     smart->critical_warning = log.critical_warning;
1419     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
1420     smart->available_spare = log.available_spare;
1421     smart->available_spare_threshold = log.available_spare_threshold;
1422     smart->percentage_used = log.percentage_used;
1423     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
1424     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
1425     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
1426     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
1427     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
1428     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
1429     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
1430     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
1431     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
1432     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
1433     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
1434     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
1435     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
1436     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
1437     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
1438     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
1439     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
1440     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
1441     smart->number_of_error_log_entries_lo =
1442         le64_to_cpu(log.number_of_error_log_entries[0]);
1443     smart->number_of_error_log_entries_hi =
1444         le64_to_cpu(log.number_of_error_log_entries[1]);
1445 
1446     close(fd);
1447 }
1448 
1449 static void get_disk_smart(GuestDiskInfo *disk)
1450 {
1451     if (disk->address
1452         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
1453         get_nvme_smart(disk);
1454     }
1455 }
1456 
1457 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1458 {
1459     GuestDiskInfoList *ret = NULL;
1460     GuestDiskInfo *disk;
1461     DIR *dp = NULL;
1462     struct dirent *de = NULL;
1463 
1464     g_debug("listing /sys/block directory");
1465     dp = opendir("/sys/block");
1466     if (dp == NULL) {
1467         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
1468         return NULL;
1469     }
1470     while ((de = readdir(dp)) != NULL) {
1471         g_autofree char *disk_dir = NULL, *line = NULL,
1472             *size_path = NULL;
1473         char *dev_name;
1474         Error *local_err = NULL;
1475         if (de->d_type != DT_LNK) {
1476             g_debug("  skipping entry: %s", de->d_name);
1477             continue;
1478         }
1479 
1480         /* Check size and skip zero-sized disks */
1481         g_debug("  checking disk size");
1482         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1483         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1484             g_debug("  failed to read disk size");
1485             continue;
1486         }
1487         if (g_strcmp0(line, "0\n") == 0) {
1488             g_debug("  skipping zero-sized disk");
1489             continue;
1490         }
1491 
1492         g_debug("  adding %s", de->d_name);
1493         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1494         dev_name = get_device_for_syspath(disk_dir);
1495         if (dev_name == NULL) {
1496             g_debug("Failed to get device name for syspath: %s",
1497                 disk_dir);
1498             continue;
1499         }
1500         disk = g_new0(GuestDiskInfo, 1);
1501         disk->name = dev_name;
1502         disk->partition = false;
1503         disk->alias = get_alias_for_syspath(disk_dir);
1504         QAPI_LIST_PREPEND(ret, disk);
1505 
1506         /* Get address for non-virtual devices */
1507         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1508         if (local_err != NULL) {
1509             g_debug("  failed to check disk path, ignoring error: %s",
1510                 error_get_pretty(local_err));
1511             error_free(local_err);
1512             local_err = NULL;
1513             /* Don't try to get the address */
1514             is_virtual = true;
1515         }
1516         if (!is_virtual) {
1517             disk->address = get_disk_address(disk_dir, &local_err);
1518             if (local_err != NULL) {
1519                 g_debug("  failed to get device info, ignoring error: %s",
1520                     error_get_pretty(local_err));
1521                 error_free(local_err);
1522                 local_err = NULL;
1523             }
1524         }
1525 
1526         get_disk_deps(disk_dir, disk);
1527         get_disk_smart(disk);
1528         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1529     }
1530 
1531     closedir(dp);
1532 
1533     return ret;
1534 }
1535 
1536 #else
1537 
1538 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1539 {
1540     error_setg(errp, QERR_UNSUPPORTED);
1541     return NULL;
1542 }
1543 
1544 #endif
1545 
1546 /* Return a list of the disk device(s)' info which @mount lies on */
1547 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1548                                                Error **errp)
1549 {
1550     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1551     struct statvfs buf;
1552     unsigned long used, nonroot_total, fr_size;
1553     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1554                                     mount->devmajor, mount->devminor);
1555 
1556     fs->mountpoint = g_strdup(mount->dirname);
1557     fs->type = g_strdup(mount->devtype);
1558     build_guest_fsinfo_for_device(devpath, fs, errp);
1559 
1560     if (statvfs(fs->mountpoint, &buf) == 0) {
1561         fr_size = buf.f_frsize;
1562         used = buf.f_blocks - buf.f_bfree;
1563         nonroot_total = used + buf.f_bavail;
1564         fs->used_bytes = used * fr_size;
1565         fs->total_bytes = nonroot_total * fr_size;
1566 
1567         fs->has_total_bytes = true;
1568         fs->has_used_bytes = true;
1569     }
1570 
1571     g_free(devpath);
1572 
1573     return fs;
1574 }
1575 
1576 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1577 {
1578     FsMountList mounts;
1579     struct FsMount *mount;
1580     GuestFilesystemInfoList *ret = NULL;
1581     Error *local_err = NULL;
1582 
1583     QTAILQ_INIT(&mounts);
1584     if (!build_fs_mount_list(&mounts, &local_err)) {
1585         error_propagate(errp, local_err);
1586         return NULL;
1587     }
1588 
1589     QTAILQ_FOREACH(mount, &mounts, next) {
1590         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1591 
1592         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1593         if (local_err) {
1594             error_propagate(errp, local_err);
1595             qapi_free_GuestFilesystemInfoList(ret);
1596             ret = NULL;
1597             break;
1598         }
1599     }
1600 
1601     free_fs_mount_list(&mounts);
1602     return ret;
1603 }
1604 #endif /* CONFIG_FSFREEZE */
1605 
1606 #if defined(CONFIG_FSTRIM)
1607 /*
1608  * Walk list of mounted file systems in the guest, and trim them.
1609  */
1610 GuestFilesystemTrimResponse *
1611 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1612 {
1613     GuestFilesystemTrimResponse *response;
1614     GuestFilesystemTrimResult *result;
1615     int ret = 0;
1616     FsMountList mounts;
1617     struct FsMount *mount;
1618     int fd;
1619     struct fstrim_range r;
1620 
1621     slog("guest-fstrim called");
1622 
1623     QTAILQ_INIT(&mounts);
1624     if (!build_fs_mount_list(&mounts, errp)) {
1625         return NULL;
1626     }
1627 
1628     response = g_malloc0(sizeof(*response));
1629 
1630     QTAILQ_FOREACH(mount, &mounts, next) {
1631         result = g_malloc0(sizeof(*result));
1632         result->path = g_strdup(mount->dirname);
1633 
1634         QAPI_LIST_PREPEND(response->paths, result);
1635 
1636         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1637         if (fd == -1) {
1638             result->error = g_strdup_printf("failed to open: %s",
1639                                             strerror(errno));
1640             continue;
1641         }
1642 
1643         /* We try to cull filesystems we know won't work in advance, but other
1644          * filesystems may not implement fstrim for less obvious reasons.
1645          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1646          * will be reported (e.g. CD-ROMs).
1647          * Any other error means an unexpected error.
1648          */
1649         r.start = 0;
1650         r.len = -1;
1651         r.minlen = has_minimum ? minimum : 0;
1652         ret = ioctl(fd, FITRIM, &r);
1653         if (ret == -1) {
1654             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1655                 result->error = g_strdup("trim not supported");
1656             } else {
1657                 result->error = g_strdup_printf("failed to trim: %s",
1658                                                 strerror(errno));
1659             }
1660             close(fd);
1661             continue;
1662         }
1663 
1664         result->has_minimum = true;
1665         result->minimum = r.minlen;
1666         result->has_trimmed = true;
1667         result->trimmed = r.len;
1668         close(fd);
1669     }
1670 
1671     free_fs_mount_list(&mounts);
1672     return response;
1673 }
1674 #endif /* CONFIG_FSTRIM */
1675 
1676 
1677 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1678 #define SUSPEND_SUPPORTED 0
1679 #define SUSPEND_NOT_SUPPORTED 1
1680 
1681 typedef enum {
1682     SUSPEND_MODE_DISK = 0,
1683     SUSPEND_MODE_RAM = 1,
1684     SUSPEND_MODE_HYBRID = 2,
1685 } SuspendMode;
1686 
1687 /*
1688  * Executes a command in a child process using g_spawn_sync,
1689  * returning an int >= 0 representing the exit status of the
1690  * process.
1691  *
1692  * If the program wasn't found in path, returns -1.
1693  *
1694  * If a problem happened when creating the child process,
1695  * returns -1 and errp is set.
1696  */
1697 static int run_process_child(const char *command[], Error **errp)
1698 {
1699     int exit_status, spawn_flag;
1700     GError *g_err = NULL;
1701     bool success;
1702 
1703     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1704                  G_SPAWN_STDERR_TO_DEV_NULL;
1705 
1706     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1707                             NULL, NULL, NULL, NULL,
1708                             &exit_status, &g_err);
1709 
1710     if (success) {
1711         return WEXITSTATUS(exit_status);
1712     }
1713 
1714     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1715         error_setg(errp, "failed to create child process, error '%s'",
1716                    g_err->message);
1717     }
1718 
1719     g_error_free(g_err);
1720     return -1;
1721 }
1722 
1723 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1724 {
1725     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1726                                      "systemd-hybrid-sleep"};
1727     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1728     int status;
1729 
1730     status = run_process_child(cmd, errp);
1731 
1732     /*
1733      * systemctl status uses LSB return codes so we can expect
1734      * status > 0 and be ok. To assert if the guest has support
1735      * for the selected suspend mode, status should be < 4. 4 is
1736      * the code for unknown service status, the return value when
1737      * the service does not exist. A common value is status = 3
1738      * (program is not running).
1739      */
1740     if (status > 0 && status < 4) {
1741         return true;
1742     }
1743 
1744     return false;
1745 }
1746 
1747 static void systemd_suspend(SuspendMode mode, Error **errp)
1748 {
1749     Error *local_err = NULL;
1750     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1751     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1752     int status;
1753 
1754     status = run_process_child(cmd, &local_err);
1755 
1756     if (status == 0) {
1757         return;
1758     }
1759 
1760     if ((status == -1) && !local_err) {
1761         error_setg(errp, "the helper program 'systemctl %s' was not found",
1762                    systemctl_args[mode]);
1763         return;
1764     }
1765 
1766     if (local_err) {
1767         error_propagate(errp, local_err);
1768     } else {
1769         error_setg(errp, "the helper program 'systemctl %s' returned an "
1770                    "unexpected exit status code (%d)",
1771                    systemctl_args[mode], status);
1772     }
1773 }
1774 
1775 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1776 {
1777     Error *local_err = NULL;
1778     const char *pmutils_args[3] = {"--hibernate", "--suspend",
1779                                    "--suspend-hybrid"};
1780     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1781     int status;
1782 
1783     status = run_process_child(cmd, &local_err);
1784 
1785     if (status == SUSPEND_SUPPORTED) {
1786         return true;
1787     }
1788 
1789     if ((status == -1) && !local_err) {
1790         return false;
1791     }
1792 
1793     if (local_err) {
1794         error_propagate(errp, local_err);
1795     } else {
1796         error_setg(errp,
1797                    "the helper program '%s' returned an unexpected exit"
1798                    " status code (%d)", "pm-is-supported", status);
1799     }
1800 
1801     return false;
1802 }
1803 
1804 static void pmutils_suspend(SuspendMode mode, Error **errp)
1805 {
1806     Error *local_err = NULL;
1807     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1808                                        "pm-suspend-hybrid"};
1809     const char *cmd[2] = {pmutils_binaries[mode], NULL};
1810     int status;
1811 
1812     status = run_process_child(cmd, &local_err);
1813 
1814     if (status == 0) {
1815         return;
1816     }
1817 
1818     if ((status == -1) && !local_err) {
1819         error_setg(errp, "the helper program '%s' was not found",
1820                    pmutils_binaries[mode]);
1821         return;
1822     }
1823 
1824     if (local_err) {
1825         error_propagate(errp, local_err);
1826     } else {
1827         error_setg(errp,
1828                    "the helper program '%s' returned an unexpected exit"
1829                    " status code (%d)", pmutils_binaries[mode], status);
1830     }
1831 }
1832 
1833 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1834 {
1835     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1836     const char *sysfile_str = sysfile_strs[mode];
1837     char buf[32]; /* hopefully big enough */
1838     int fd;
1839     ssize_t ret;
1840 
1841     if (!sysfile_str) {
1842         error_setg(errp, "unknown guest suspend mode");
1843         return false;
1844     }
1845 
1846     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1847     if (fd < 0) {
1848         return false;
1849     }
1850 
1851     ret = read(fd, buf, sizeof(buf) - 1);
1852     close(fd);
1853     if (ret <= 0) {
1854         return false;
1855     }
1856     buf[ret] = '\0';
1857 
1858     if (strstr(buf, sysfile_str)) {
1859         return true;
1860     }
1861     return false;
1862 }
1863 
1864 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1865 {
1866     Error *local_err = NULL;
1867     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1868     const char *sysfile_str = sysfile_strs[mode];
1869     pid_t pid;
1870     int status;
1871 
1872     if (!sysfile_str) {
1873         error_setg(errp, "unknown guest suspend mode");
1874         return;
1875     }
1876 
1877     pid = fork();
1878     if (!pid) {
1879         /* child */
1880         int fd;
1881 
1882         setsid();
1883         reopen_fd_to_null(0);
1884         reopen_fd_to_null(1);
1885         reopen_fd_to_null(2);
1886 
1887         fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1888         if (fd < 0) {
1889             _exit(EXIT_FAILURE);
1890         }
1891 
1892         if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1893             _exit(EXIT_FAILURE);
1894         }
1895 
1896         _exit(EXIT_SUCCESS);
1897     } else if (pid < 0) {
1898         error_setg_errno(errp, errno, "failed to create child process");
1899         return;
1900     }
1901 
1902     ga_wait_child(pid, &status, &local_err);
1903     if (local_err) {
1904         error_propagate(errp, local_err);
1905         return;
1906     }
1907 
1908     if (WEXITSTATUS(status)) {
1909         error_setg(errp, "child process has failed to suspend");
1910     }
1911 
1912 }
1913 
1914 static void guest_suspend(SuspendMode mode, Error **errp)
1915 {
1916     Error *local_err = NULL;
1917     bool mode_supported = false;
1918 
1919     if (systemd_supports_mode(mode, &local_err)) {
1920         mode_supported = true;
1921         systemd_suspend(mode, &local_err);
1922     }
1923 
1924     if (!local_err) {
1925         return;
1926     }
1927 
1928     error_free(local_err);
1929     local_err = NULL;
1930 
1931     if (pmutils_supports_mode(mode, &local_err)) {
1932         mode_supported = true;
1933         pmutils_suspend(mode, &local_err);
1934     }
1935 
1936     if (!local_err) {
1937         return;
1938     }
1939 
1940     error_free(local_err);
1941     local_err = NULL;
1942 
1943     if (linux_sys_state_supports_mode(mode, &local_err)) {
1944         mode_supported = true;
1945         linux_sys_state_suspend(mode, &local_err);
1946     }
1947 
1948     if (!mode_supported) {
1949         error_free(local_err);
1950         error_setg(errp,
1951                    "the requested suspend mode is not supported by the guest");
1952     } else {
1953         error_propagate(errp, local_err);
1954     }
1955 }
1956 
1957 void qmp_guest_suspend_disk(Error **errp)
1958 {
1959     guest_suspend(SUSPEND_MODE_DISK, errp);
1960 }
1961 
1962 void qmp_guest_suspend_ram(Error **errp)
1963 {
1964     guest_suspend(SUSPEND_MODE_RAM, errp);
1965 }
1966 
1967 void qmp_guest_suspend_hybrid(Error **errp)
1968 {
1969     guest_suspend(SUSPEND_MODE_HYBRID, errp);
1970 }
1971 
1972 /* Transfer online/offline status between @vcpu and the guest system.
1973  *
1974  * On input either @errp or *@errp must be NULL.
1975  *
1976  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1977  * - R: vcpu->logical_id
1978  * - W: vcpu->online
1979  * - W: vcpu->can_offline
1980  *
1981  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1982  * - R: vcpu->logical_id
1983  * - R: vcpu->online
1984  *
1985  * Written members remain unmodified on error.
1986  */
1987 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1988                           char *dirpath, Error **errp)
1989 {
1990     int fd;
1991     int res;
1992     int dirfd;
1993     static const char fn[] = "online";
1994 
1995     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1996     if (dirfd == -1) {
1997         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1998         return;
1999     }
2000 
2001     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
2002     if (fd == -1) {
2003         if (errno != ENOENT) {
2004             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
2005         } else if (sys2vcpu) {
2006             vcpu->online = true;
2007             vcpu->can_offline = false;
2008         } else if (!vcpu->online) {
2009             error_setg(errp, "logical processor #%" PRId64 " can't be "
2010                        "offlined", vcpu->logical_id);
2011         } /* otherwise pretend successful re-onlining */
2012     } else {
2013         unsigned char status;
2014 
2015         res = pread(fd, &status, 1, 0);
2016         if (res == -1) {
2017             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
2018         } else if (res == 0) {
2019             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
2020                        fn);
2021         } else if (sys2vcpu) {
2022             vcpu->online = (status != '0');
2023             vcpu->can_offline = true;
2024         } else if (vcpu->online != (status != '0')) {
2025             status = '0' + vcpu->online;
2026             if (pwrite(fd, &status, 1, 0) == -1) {
2027                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
2028                                  fn);
2029             }
2030         } /* otherwise pretend successful re-(on|off)-lining */
2031 
2032         res = close(fd);
2033         g_assert(res == 0);
2034     }
2035 
2036     res = close(dirfd);
2037     g_assert(res == 0);
2038 }
2039 
2040 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2041 {
2042     GuestLogicalProcessorList *head, **tail;
2043     const char *cpu_dir = "/sys/devices/system/cpu";
2044     const gchar *line;
2045     g_autoptr(GDir) cpu_gdir = NULL;
2046     Error *local_err = NULL;
2047 
2048     head = NULL;
2049     tail = &head;
2050     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
2051 
2052     if (cpu_gdir == NULL) {
2053         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
2054         return NULL;
2055     }
2056 
2057     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
2058         GuestLogicalProcessor *vcpu;
2059         int64_t id;
2060         if (sscanf(line, "cpu%" PRId64, &id)) {
2061             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
2062                                                     "cpu%" PRId64 "/", id);
2063             vcpu = g_malloc0(sizeof *vcpu);
2064             vcpu->logical_id = id;
2065             vcpu->has_can_offline = true; /* lolspeak ftw */
2066             transfer_vcpu(vcpu, true, path, &local_err);
2067             QAPI_LIST_APPEND(tail, vcpu);
2068         }
2069     }
2070 
2071     if (local_err == NULL) {
2072         /* there's no guest with zero VCPUs */
2073         g_assert(head != NULL);
2074         return head;
2075     }
2076 
2077     qapi_free_GuestLogicalProcessorList(head);
2078     error_propagate(errp, local_err);
2079     return NULL;
2080 }
2081 
2082 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2083 {
2084     int64_t processed;
2085     Error *local_err = NULL;
2086 
2087     processed = 0;
2088     while (vcpus != NULL) {
2089         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
2090                                      vcpus->value->logical_id);
2091 
2092         transfer_vcpu(vcpus->value, false, path, &local_err);
2093         g_free(path);
2094         if (local_err != NULL) {
2095             break;
2096         }
2097         ++processed;
2098         vcpus = vcpus->next;
2099     }
2100 
2101     if (local_err != NULL) {
2102         if (processed == 0) {
2103             error_propagate(errp, local_err);
2104         } else {
2105             error_free(local_err);
2106         }
2107     }
2108 
2109     return processed;
2110 }
2111 #endif /* __linux__ */
2112 
2113 #if defined(__linux__) || defined(__FreeBSD__)
2114 void qmp_guest_set_user_password(const char *username,
2115                                  const char *password,
2116                                  bool crypted,
2117                                  Error **errp)
2118 {
2119     Error *local_err = NULL;
2120     char *passwd_path = NULL;
2121     pid_t pid;
2122     int status;
2123     int datafd[2] = { -1, -1 };
2124     char *rawpasswddata = NULL;
2125     size_t rawpasswdlen;
2126     char *chpasswddata = NULL;
2127     size_t chpasswdlen;
2128 
2129     rawpasswddata = (char *)qbase64_decode(password, -1, &rawpasswdlen, errp);
2130     if (!rawpasswddata) {
2131         return;
2132     }
2133     rawpasswddata = g_renew(char, rawpasswddata, rawpasswdlen + 1);
2134     rawpasswddata[rawpasswdlen] = '\0';
2135 
2136     if (strchr(rawpasswddata, '\n')) {
2137         error_setg(errp, "forbidden characters in raw password");
2138         goto out;
2139     }
2140 
2141     if (strchr(username, '\n') ||
2142         strchr(username, ':')) {
2143         error_setg(errp, "forbidden characters in username");
2144         goto out;
2145     }
2146 
2147 #ifdef __FreeBSD__
2148     chpasswddata = g_strdup(rawpasswddata);
2149     passwd_path = g_find_program_in_path("pw");
2150 #else
2151     chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata);
2152     passwd_path = g_find_program_in_path("chpasswd");
2153 #endif
2154 
2155     chpasswdlen = strlen(chpasswddata);
2156 
2157     if (!passwd_path) {
2158         error_setg(errp, "cannot find 'passwd' program in PATH");
2159         goto out;
2160     }
2161 
2162     if (!g_unix_open_pipe(datafd, FD_CLOEXEC, NULL)) {
2163         error_setg(errp, "cannot create pipe FDs");
2164         goto out;
2165     }
2166 
2167     pid = fork();
2168     if (pid == 0) {
2169         close(datafd[1]);
2170         /* child */
2171         setsid();
2172         dup2(datafd[0], 0);
2173         reopen_fd_to_null(1);
2174         reopen_fd_to_null(2);
2175 
2176 #ifdef __FreeBSD__
2177         const char *h_arg;
2178         h_arg = (crypted) ? "-H" : "-h";
2179         execl(passwd_path, "pw", "usermod", "-n", username, h_arg, "0", NULL);
2180 #else
2181         if (crypted) {
2182             execl(passwd_path, "chpasswd", "-e", NULL);
2183         } else {
2184             execl(passwd_path, "chpasswd", NULL);
2185         }
2186 #endif
2187         _exit(EXIT_FAILURE);
2188     } else if (pid < 0) {
2189         error_setg_errno(errp, errno, "failed to create child process");
2190         goto out;
2191     }
2192     close(datafd[0]);
2193     datafd[0] = -1;
2194 
2195     if (qemu_write_full(datafd[1], chpasswddata, chpasswdlen) != chpasswdlen) {
2196         error_setg_errno(errp, errno, "cannot write new account password");
2197         goto out;
2198     }
2199     close(datafd[1]);
2200     datafd[1] = -1;
2201 
2202     ga_wait_child(pid, &status, &local_err);
2203     if (local_err) {
2204         error_propagate(errp, local_err);
2205         goto out;
2206     }
2207 
2208     if (!WIFEXITED(status)) {
2209         error_setg(errp, "child process has terminated abnormally");
2210         goto out;
2211     }
2212 
2213     if (WEXITSTATUS(status)) {
2214         error_setg(errp, "child process has failed to set user password");
2215         goto out;
2216     }
2217 
2218 out:
2219     g_free(chpasswddata);
2220     g_free(rawpasswddata);
2221     g_free(passwd_path);
2222     if (datafd[0] != -1) {
2223         close(datafd[0]);
2224     }
2225     if (datafd[1] != -1) {
2226         close(datafd[1]);
2227     }
2228 }
2229 #else /* __linux__ || __FreeBSD__ */
2230 void qmp_guest_set_user_password(const char *username,
2231                                  const char *password,
2232                                  bool crypted,
2233                                  Error **errp)
2234 {
2235     error_setg(errp, QERR_UNSUPPORTED);
2236 }
2237 #endif /* __linux__ || __FreeBSD__ */
2238 
2239 #ifdef __linux__
2240 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
2241                                int size, Error **errp)
2242 {
2243     int fd;
2244     int res;
2245 
2246     errno = 0;
2247     fd = openat(dirfd, pathname, O_RDONLY);
2248     if (fd == -1) {
2249         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2250         return;
2251     }
2252 
2253     res = pread(fd, buf, size, 0);
2254     if (res == -1) {
2255         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
2256     } else if (res == 0) {
2257         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
2258     }
2259     close(fd);
2260 }
2261 
2262 static void ga_write_sysfs_file(int dirfd, const char *pathname,
2263                                 const char *buf, int size, Error **errp)
2264 {
2265     int fd;
2266 
2267     errno = 0;
2268     fd = openat(dirfd, pathname, O_WRONLY);
2269     if (fd == -1) {
2270         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2271         return;
2272     }
2273 
2274     if (pwrite(fd, buf, size, 0) == -1) {
2275         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
2276     }
2277 
2278     close(fd);
2279 }
2280 
2281 /* Transfer online/offline status between @mem_blk and the guest system.
2282  *
2283  * On input either @errp or *@errp must be NULL.
2284  *
2285  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
2286  * - R: mem_blk->phys_index
2287  * - W: mem_blk->online
2288  * - W: mem_blk->can_offline
2289  *
2290  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
2291  * - R: mem_blk->phys_index
2292  * - R: mem_blk->online
2293  *-  R: mem_blk->can_offline
2294  * Written members remain unmodified on error.
2295  */
2296 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
2297                                   GuestMemoryBlockResponse *result,
2298                                   Error **errp)
2299 {
2300     char *dirpath;
2301     int dirfd;
2302     char *status;
2303     Error *local_err = NULL;
2304 
2305     if (!sys2memblk) {
2306         DIR *dp;
2307 
2308         if (!result) {
2309             error_setg(errp, "Internal error, 'result' should not be NULL");
2310             return;
2311         }
2312         errno = 0;
2313         dp = opendir("/sys/devices/system/memory/");
2314          /* if there is no 'memory' directory in sysfs,
2315          * we think this VM does not support online/offline memory block,
2316          * any other solution?
2317          */
2318         if (!dp) {
2319             if (errno == ENOENT) {
2320                 result->response =
2321                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2322             }
2323             goto out1;
2324         }
2325         closedir(dp);
2326     }
2327 
2328     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
2329                               mem_blk->phys_index);
2330     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2331     if (dirfd == -1) {
2332         if (sys2memblk) {
2333             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2334         } else {
2335             if (errno == ENOENT) {
2336                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
2337             } else {
2338                 result->response =
2339                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2340             }
2341         }
2342         g_free(dirpath);
2343         goto out1;
2344     }
2345     g_free(dirpath);
2346 
2347     status = g_malloc0(10);
2348     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
2349     if (local_err) {
2350         /* treat with sysfs file that not exist in old kernel */
2351         if (errno == ENOENT) {
2352             error_free(local_err);
2353             if (sys2memblk) {
2354                 mem_blk->online = true;
2355                 mem_blk->can_offline = false;
2356             } else if (!mem_blk->online) {
2357                 result->response =
2358                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2359             }
2360         } else {
2361             if (sys2memblk) {
2362                 error_propagate(errp, local_err);
2363             } else {
2364                 error_free(local_err);
2365                 result->response =
2366                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2367             }
2368         }
2369         goto out2;
2370     }
2371 
2372     if (sys2memblk) {
2373         char removable = '0';
2374 
2375         mem_blk->online = (strncmp(status, "online", 6) == 0);
2376 
2377         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
2378         if (local_err) {
2379             /* if no 'removable' file, it doesn't support offline mem blk */
2380             if (errno == ENOENT) {
2381                 error_free(local_err);
2382                 mem_blk->can_offline = false;
2383             } else {
2384                 error_propagate(errp, local_err);
2385             }
2386         } else {
2387             mem_blk->can_offline = (removable != '0');
2388         }
2389     } else {
2390         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
2391             const char *new_state = mem_blk->online ? "online" : "offline";
2392 
2393             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
2394                                 &local_err);
2395             if (local_err) {
2396                 error_free(local_err);
2397                 result->response =
2398                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2399                 goto out2;
2400             }
2401 
2402             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
2403             result->has_error_code = false;
2404         } /* otherwise pretend successful re-(on|off)-lining */
2405     }
2406     g_free(status);
2407     close(dirfd);
2408     return;
2409 
2410 out2:
2411     g_free(status);
2412     close(dirfd);
2413 out1:
2414     if (!sys2memblk) {
2415         result->has_error_code = true;
2416         result->error_code = errno;
2417     }
2418 }
2419 
2420 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2421 {
2422     GuestMemoryBlockList *head, **tail;
2423     Error *local_err = NULL;
2424     struct dirent *de;
2425     DIR *dp;
2426 
2427     head = NULL;
2428     tail = &head;
2429 
2430     dp = opendir("/sys/devices/system/memory/");
2431     if (!dp) {
2432         /* it's ok if this happens to be a system that doesn't expose
2433          * memory blocks via sysfs, but otherwise we should report
2434          * an error
2435          */
2436         if (errno != ENOENT) {
2437             error_setg_errno(errp, errno, "Can't open directory"
2438                              "\"/sys/devices/system/memory/\"");
2439         }
2440         return NULL;
2441     }
2442 
2443     /* Note: the phys_index of memory block may be discontinuous,
2444      * this is because a memblk is the unit of the Sparse Memory design, which
2445      * allows discontinuous memory ranges (ex. NUMA), so here we should
2446      * traverse the memory block directory.
2447      */
2448     while ((de = readdir(dp)) != NULL) {
2449         GuestMemoryBlock *mem_blk;
2450 
2451         if ((strncmp(de->d_name, "memory", 6) != 0) ||
2452             !(de->d_type & DT_DIR)) {
2453             continue;
2454         }
2455 
2456         mem_blk = g_malloc0(sizeof *mem_blk);
2457         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
2458         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
2459         mem_blk->has_can_offline = true; /* lolspeak ftw */
2460         transfer_memory_block(mem_blk, true, NULL, &local_err);
2461         if (local_err) {
2462             break;
2463         }
2464 
2465         QAPI_LIST_APPEND(tail, mem_blk);
2466     }
2467 
2468     closedir(dp);
2469     if (local_err == NULL) {
2470         /* there's no guest with zero memory blocks */
2471         if (head == NULL) {
2472             error_setg(errp, "guest reported zero memory blocks!");
2473         }
2474         return head;
2475     }
2476 
2477     qapi_free_GuestMemoryBlockList(head);
2478     error_propagate(errp, local_err);
2479     return NULL;
2480 }
2481 
2482 GuestMemoryBlockResponseList *
2483 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2484 {
2485     GuestMemoryBlockResponseList *head, **tail;
2486     Error *local_err = NULL;
2487 
2488     head = NULL;
2489     tail = &head;
2490 
2491     while (mem_blks != NULL) {
2492         GuestMemoryBlockResponse *result;
2493         GuestMemoryBlock *current_mem_blk = mem_blks->value;
2494 
2495         result = g_malloc0(sizeof(*result));
2496         result->phys_index = current_mem_blk->phys_index;
2497         transfer_memory_block(current_mem_blk, false, result, &local_err);
2498         if (local_err) { /* should never happen */
2499             goto err;
2500         }
2501 
2502         QAPI_LIST_APPEND(tail, result);
2503         mem_blks = mem_blks->next;
2504     }
2505 
2506     return head;
2507 err:
2508     qapi_free_GuestMemoryBlockResponseList(head);
2509     error_propagate(errp, local_err);
2510     return NULL;
2511 }
2512 
2513 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2514 {
2515     Error *local_err = NULL;
2516     char *dirpath;
2517     int dirfd;
2518     char *buf;
2519     GuestMemoryBlockInfo *info;
2520 
2521     dirpath = g_strdup_printf("/sys/devices/system/memory/");
2522     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2523     if (dirfd == -1) {
2524         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2525         g_free(dirpath);
2526         return NULL;
2527     }
2528     g_free(dirpath);
2529 
2530     buf = g_malloc0(20);
2531     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
2532     close(dirfd);
2533     if (local_err) {
2534         g_free(buf);
2535         error_propagate(errp, local_err);
2536         return NULL;
2537     }
2538 
2539     info = g_new0(GuestMemoryBlockInfo, 1);
2540     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
2541 
2542     g_free(buf);
2543 
2544     return info;
2545 }
2546 
2547 #define MAX_NAME_LEN 128
2548 static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
2549 {
2550 #ifdef CONFIG_LINUX
2551     GuestDiskStatsInfoList *head = NULL, **tail = &head;
2552     const char *diskstats = "/proc/diskstats";
2553     FILE *fp;
2554     size_t n;
2555     char *line = NULL;
2556 
2557     fp = fopen(diskstats, "r");
2558     if (fp  == NULL) {
2559         error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
2560         return NULL;
2561     }
2562 
2563     while (getline(&line, &n, fp) != -1) {
2564         g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
2565         g_autofree GuestDiskStats *diskstat = NULL;
2566         char dev_name[MAX_NAME_LEN];
2567         unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
2568         unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
2569         unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
2570         unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
2571         unsigned int major, minor;
2572         int i;
2573 
2574         i = sscanf(line, "%u %u %s %lu %lu %lu"
2575                    "%lu %lu %lu %lu %u %u %u %u"
2576                    "%lu %lu %lu %u %lu %u",
2577                    &major, &minor, dev_name,
2578                    &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
2579                    &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
2580                    &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
2581                    &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
2582                    &fl_ios, &fl_ticks);
2583 
2584         if (i < 7) {
2585             continue;
2586         }
2587 
2588         diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
2589         diskstatinfo->name = g_strdup(dev_name);
2590         diskstatinfo->major = major;
2591         diskstatinfo->minor = minor;
2592 
2593         diskstat = g_new0(GuestDiskStats, 1);
2594         if (i == 7) {
2595             diskstat->has_read_ios = true;
2596             diskstat->read_ios = rd_ios;
2597             diskstat->has_read_sectors = true;
2598             diskstat->read_sectors = rd_merges_or_rd_sec;
2599             diskstat->has_write_ios = true;
2600             diskstat->write_ios = rd_sec_or_wr_ios;
2601             diskstat->has_write_sectors = true;
2602             diskstat->write_sectors = rd_ticks_or_wr_sec;
2603         }
2604         if (i >= 14) {
2605             diskstat->has_read_ios = true;
2606             diskstat->read_ios = rd_ios;
2607             diskstat->has_read_sectors = true;
2608             diskstat->read_sectors = rd_sec_or_wr_ios;
2609             diskstat->has_read_merges = true;
2610             diskstat->read_merges = rd_merges_or_rd_sec;
2611             diskstat->has_read_ticks = true;
2612             diskstat->read_ticks = rd_ticks_or_wr_sec;
2613             diskstat->has_write_ios = true;
2614             diskstat->write_ios = wr_ios;
2615             diskstat->has_write_sectors = true;
2616             diskstat->write_sectors = wr_sec;
2617             diskstat->has_write_merges = true;
2618             diskstat->write_merges = wr_merges;
2619             diskstat->has_write_ticks = true;
2620             diskstat->write_ticks = wr_ticks;
2621             diskstat->has_ios_pgr = true;
2622             diskstat->ios_pgr = ios_pgr;
2623             diskstat->has_total_ticks = true;
2624             diskstat->total_ticks = tot_ticks;
2625             diskstat->has_weight_ticks = true;
2626             diskstat->weight_ticks = rq_ticks;
2627         }
2628         if (i >= 18) {
2629             diskstat->has_discard_ios = true;
2630             diskstat->discard_ios = dc_ios;
2631             diskstat->has_discard_merges = true;
2632             diskstat->discard_merges = dc_merges;
2633             diskstat->has_discard_sectors = true;
2634             diskstat->discard_sectors = dc_sec;
2635             diskstat->has_discard_ticks = true;
2636             diskstat->discard_ticks = dc_ticks;
2637         }
2638         if (i >= 20) {
2639             diskstat->has_flush_ios = true;
2640             diskstat->flush_ios = fl_ios;
2641             diskstat->has_flush_ticks = true;
2642             diskstat->flush_ticks = fl_ticks;
2643         }
2644 
2645         diskstatinfo->stats = g_steal_pointer(&diskstat);
2646         QAPI_LIST_APPEND(tail, diskstatinfo);
2647         diskstatinfo = NULL;
2648     }
2649     free(line);
2650     fclose(fp);
2651     return head;
2652 #else
2653     g_debug("disk stats reporting available only for Linux");
2654     return NULL;
2655 #endif
2656 }
2657 
2658 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2659 {
2660     return guest_get_diskstats(errp);
2661 }
2662 
2663 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2664 {
2665     GuestCpuStatsList *head = NULL, **tail = &head;
2666     const char *cpustats = "/proc/stat";
2667     int clk_tck = sysconf(_SC_CLK_TCK);
2668     FILE *fp;
2669     size_t n;
2670     char *line = NULL;
2671 
2672     fp = fopen(cpustats, "r");
2673     if (fp  == NULL) {
2674         error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2675         return NULL;
2676     }
2677 
2678     while (getline(&line, &n, fp) != -1) {
2679         GuestCpuStats *cpustat = NULL;
2680         GuestLinuxCpuStats *linuxcpustat;
2681         int i;
2682         unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2683         unsigned long nice, guest_nice;
2684         char name[64];
2685 
2686         i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2687                    name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2688                    &steal, &guest, &guest_nice);
2689 
2690         /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2691         if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2692             continue;
2693         }
2694 
2695         if (i < 5) {
2696             slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2697             break;
2698         }
2699 
2700         cpustat = g_new0(GuestCpuStats, 1);
2701         cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2702 
2703         linuxcpustat = &cpustat->u.q_linux;
2704         linuxcpustat->cpu = atoi(&name[3]);
2705         linuxcpustat->user = user * 1000 / clk_tck;
2706         linuxcpustat->nice = nice * 1000 / clk_tck;
2707         linuxcpustat->system = system * 1000 / clk_tck;
2708         linuxcpustat->idle = idle * 1000 / clk_tck;
2709 
2710         if (i > 5) {
2711             linuxcpustat->has_iowait = true;
2712             linuxcpustat->iowait = iowait * 1000 / clk_tck;
2713         }
2714 
2715         if (i > 6) {
2716             linuxcpustat->has_irq = true;
2717             linuxcpustat->irq = irq * 1000 / clk_tck;
2718             linuxcpustat->has_softirq = true;
2719             linuxcpustat->softirq = softirq * 1000 / clk_tck;
2720         }
2721 
2722         if (i > 8) {
2723             linuxcpustat->has_steal = true;
2724             linuxcpustat->steal = steal * 1000 / clk_tck;
2725         }
2726 
2727         if (i > 9) {
2728             linuxcpustat->has_guest = true;
2729             linuxcpustat->guest = guest * 1000 / clk_tck;
2730         }
2731 
2732         if (i > 10) {
2733             linuxcpustat->has_guest = true;
2734             linuxcpustat->guest = guest * 1000 / clk_tck;
2735             linuxcpustat->has_guestnice = true;
2736             linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2737         }
2738 
2739         QAPI_LIST_APPEND(tail, cpustat);
2740     }
2741 
2742     free(line);
2743     fclose(fp);
2744     return head;
2745 }
2746 
2747 #else /* defined(__linux__) */
2748 
2749 void qmp_guest_suspend_disk(Error **errp)
2750 {
2751     error_setg(errp, QERR_UNSUPPORTED);
2752 }
2753 
2754 void qmp_guest_suspend_ram(Error **errp)
2755 {
2756     error_setg(errp, QERR_UNSUPPORTED);
2757 }
2758 
2759 void qmp_guest_suspend_hybrid(Error **errp)
2760 {
2761     error_setg(errp, QERR_UNSUPPORTED);
2762 }
2763 
2764 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2765 {
2766     error_setg(errp, QERR_UNSUPPORTED);
2767     return NULL;
2768 }
2769 
2770 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2771 {
2772     error_setg(errp, QERR_UNSUPPORTED);
2773     return -1;
2774 }
2775 
2776 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2777 {
2778     error_setg(errp, QERR_UNSUPPORTED);
2779     return NULL;
2780 }
2781 
2782 GuestMemoryBlockResponseList *
2783 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2784 {
2785     error_setg(errp, QERR_UNSUPPORTED);
2786     return NULL;
2787 }
2788 
2789 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2790 {
2791     error_setg(errp, QERR_UNSUPPORTED);
2792     return NULL;
2793 }
2794 
2795 #endif
2796 
2797 #ifdef HAVE_GETIFADDRS
2798 static GuestNetworkInterface *
2799 guest_find_interface(GuestNetworkInterfaceList *head,
2800                      const char *name)
2801 {
2802     for (; head; head = head->next) {
2803         if (strcmp(head->value->name, name) == 0) {
2804             return head->value;
2805         }
2806     }
2807 
2808     return NULL;
2809 }
2810 
2811 static int guest_get_network_stats(const char *name,
2812                        GuestNetworkInterfaceStat *stats)
2813 {
2814 #ifdef CONFIG_LINUX
2815     int name_len;
2816     char const *devinfo = "/proc/net/dev";
2817     FILE *fp;
2818     char *line = NULL, *colon;
2819     size_t n = 0;
2820     fp = fopen(devinfo, "r");
2821     if (!fp) {
2822         g_debug("failed to open network stats %s: %s", devinfo,
2823                 g_strerror(errno));
2824         return -1;
2825     }
2826     name_len = strlen(name);
2827     while (getline(&line, &n, fp) != -1) {
2828         long long dummy;
2829         long long rx_bytes;
2830         long long rx_packets;
2831         long long rx_errs;
2832         long long rx_dropped;
2833         long long tx_bytes;
2834         long long tx_packets;
2835         long long tx_errs;
2836         long long tx_dropped;
2837         char *trim_line;
2838         trim_line = g_strchug(line);
2839         if (trim_line[0] == '\0') {
2840             continue;
2841         }
2842         colon = strchr(trim_line, ':');
2843         if (!colon) {
2844             continue;
2845         }
2846         if (colon - name_len  == trim_line &&
2847            strncmp(trim_line, name, name_len) == 0) {
2848             if (sscanf(colon + 1,
2849                 "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld",
2850                   &rx_bytes, &rx_packets, &rx_errs, &rx_dropped,
2851                   &dummy, &dummy, &dummy, &dummy,
2852                   &tx_bytes, &tx_packets, &tx_errs, &tx_dropped,
2853                   &dummy, &dummy, &dummy, &dummy) != 16) {
2854                 continue;
2855             }
2856             stats->rx_bytes = rx_bytes;
2857             stats->rx_packets = rx_packets;
2858             stats->rx_errs = rx_errs;
2859             stats->rx_dropped = rx_dropped;
2860             stats->tx_bytes = tx_bytes;
2861             stats->tx_packets = tx_packets;
2862             stats->tx_errs = tx_errs;
2863             stats->tx_dropped = tx_dropped;
2864             fclose(fp);
2865             g_free(line);
2866             return 0;
2867         }
2868     }
2869     fclose(fp);
2870     g_free(line);
2871     g_debug("/proc/net/dev: Interface '%s' not found", name);
2872 #else /* !CONFIG_LINUX */
2873     g_debug("Network stats reporting available only for Linux");
2874 #endif /* !CONFIG_LINUX */
2875     return -1;
2876 }
2877 
2878 #ifndef CONFIG_BSD
2879 /*
2880  * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
2881  * buffer with ETHER_ADDR_LEN length at least.
2882  *
2883  * Returns false in case of an error, otherwise true. "obtained" argument
2884  * is true if a MAC address was obtained successful, otherwise false.
2885  */
2886 bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
2887                        bool *obtained, Error **errp)
2888 {
2889     struct ifreq ifr;
2890     int sock;
2891 
2892     *obtained = false;
2893 
2894     /* we haven't obtained HW address yet */
2895     sock = socket(PF_INET, SOCK_STREAM, 0);
2896     if (sock == -1) {
2897         error_setg_errno(errp, errno, "failed to create socket");
2898         return false;
2899     }
2900 
2901     memset(&ifr, 0, sizeof(ifr));
2902     pstrcpy(ifr.ifr_name, IF_NAMESIZE, ifa->ifa_name);
2903     if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
2904         /*
2905          * We can't get the hw addr of this interface, but that's not a
2906          * fatal error.
2907          */
2908         if (errno == EADDRNOTAVAIL) {
2909             /* The interface doesn't have a hw addr (e.g. loopback). */
2910             g_debug("failed to get MAC address of %s: %s",
2911                     ifa->ifa_name, strerror(errno));
2912         } else{
2913             g_warning("failed to get MAC address of %s: %s",
2914                       ifa->ifa_name, strerror(errno));
2915         }
2916     } else {
2917 #ifdef CONFIG_SOLARIS
2918         memcpy(buf, &ifr.ifr_addr.sa_data, ETHER_ADDR_LEN);
2919 #else
2920         memcpy(buf, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
2921 #endif
2922         *obtained = true;
2923     }
2924     close(sock);
2925     return true;
2926 }
2927 #endif /* CONFIG_BSD */
2928 
2929 /*
2930  * Build information about guest interfaces
2931  */
2932 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
2933 {
2934     GuestNetworkInterfaceList *head = NULL, **tail = &head;
2935     struct ifaddrs *ifap, *ifa;
2936 
2937     if (getifaddrs(&ifap) < 0) {
2938         error_setg_errno(errp, errno, "getifaddrs failed");
2939         goto error;
2940     }
2941 
2942     for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
2943         GuestNetworkInterface *info;
2944         GuestIpAddressList **address_tail;
2945         GuestIpAddress *address_item = NULL;
2946         GuestNetworkInterfaceStat *interface_stat = NULL;
2947         char addr4[INET_ADDRSTRLEN];
2948         char addr6[INET6_ADDRSTRLEN];
2949         unsigned char mac_addr[ETHER_ADDR_LEN];
2950         bool obtained;
2951         void *p;
2952 
2953         g_debug("Processing %s interface", ifa->ifa_name);
2954 
2955         info = guest_find_interface(head, ifa->ifa_name);
2956 
2957         if (!info) {
2958             info = g_malloc0(sizeof(*info));
2959             info->name = g_strdup(ifa->ifa_name);
2960 
2961             QAPI_LIST_APPEND(tail, info);
2962         }
2963 
2964         if (!info->hardware_address) {
2965             if (!guest_get_hw_addr(ifa, mac_addr, &obtained, errp)) {
2966                 goto error;
2967             }
2968             if (obtained) {
2969                 info->hardware_address =
2970                     g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
2971                                     (int) mac_addr[0], (int) mac_addr[1],
2972                                     (int) mac_addr[2], (int) mac_addr[3],
2973                                     (int) mac_addr[4], (int) mac_addr[5]);
2974             }
2975         }
2976 
2977         if (ifa->ifa_addr &&
2978             ifa->ifa_addr->sa_family == AF_INET) {
2979             /* interface with IPv4 address */
2980             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
2981             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
2982                 error_setg_errno(errp, errno, "inet_ntop failed");
2983                 goto error;
2984             }
2985 
2986             address_item = g_malloc0(sizeof(*address_item));
2987             address_item->ip_address = g_strdup(addr4);
2988             address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
2989 
2990             if (ifa->ifa_netmask) {
2991                 /* Count the number of set bits in netmask.
2992                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
2993                 p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
2994                 address_item->prefix = ctpop32(((uint32_t *) p)[0]);
2995             }
2996         } else if (ifa->ifa_addr &&
2997                    ifa->ifa_addr->sa_family == AF_INET6) {
2998             /* interface with IPv6 address */
2999             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
3000             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
3001                 error_setg_errno(errp, errno, "inet_ntop failed");
3002                 goto error;
3003             }
3004 
3005             address_item = g_malloc0(sizeof(*address_item));
3006             address_item->ip_address = g_strdup(addr6);
3007             address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
3008 
3009             if (ifa->ifa_netmask) {
3010                 /* Count the number of set bits in netmask.
3011                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
3012                 p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
3013                 address_item->prefix =
3014                     ctpop32(((uint32_t *) p)[0]) +
3015                     ctpop32(((uint32_t *) p)[1]) +
3016                     ctpop32(((uint32_t *) p)[2]) +
3017                     ctpop32(((uint32_t *) p)[3]);
3018             }
3019         }
3020 
3021         if (!address_item) {
3022             continue;
3023         }
3024 
3025         address_tail = &info->ip_addresses;
3026         while (*address_tail) {
3027             address_tail = &(*address_tail)->next;
3028         }
3029         QAPI_LIST_APPEND(address_tail, address_item);
3030 
3031         info->has_ip_addresses = true;
3032 
3033         if (!info->statistics) {
3034             interface_stat = g_malloc0(sizeof(*interface_stat));
3035             if (guest_get_network_stats(info->name, interface_stat) == -1) {
3036                 g_free(interface_stat);
3037             } else {
3038                 info->statistics = interface_stat;
3039             }
3040         }
3041     }
3042 
3043     freeifaddrs(ifap);
3044     return head;
3045 
3046 error:
3047     freeifaddrs(ifap);
3048     qapi_free_GuestNetworkInterfaceList(head);
3049     return NULL;
3050 }
3051 
3052 #else
3053 
3054 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
3055 {
3056     error_setg(errp, QERR_UNSUPPORTED);
3057     return NULL;
3058 }
3059 
3060 #endif /* HAVE_GETIFADDRS */
3061 
3062 #if !defined(CONFIG_FSFREEZE)
3063 
3064 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
3065 {
3066     error_setg(errp, QERR_UNSUPPORTED);
3067     return NULL;
3068 }
3069 
3070 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
3071 {
3072     error_setg(errp, QERR_UNSUPPORTED);
3073 
3074     return 0;
3075 }
3076 
3077 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
3078 {
3079     error_setg(errp, QERR_UNSUPPORTED);
3080 
3081     return 0;
3082 }
3083 
3084 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
3085                                        strList *mountpoints,
3086                                        Error **errp)
3087 {
3088     error_setg(errp, QERR_UNSUPPORTED);
3089 
3090     return 0;
3091 }
3092 
3093 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
3094 {
3095     error_setg(errp, QERR_UNSUPPORTED);
3096 
3097     return 0;
3098 }
3099 
3100 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
3101 {
3102     error_setg(errp, QERR_UNSUPPORTED);
3103     return NULL;
3104 }
3105 
3106 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
3107 {
3108     error_setg(errp, QERR_UNSUPPORTED);
3109     return NULL;
3110 }
3111 
3112 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
3113 {
3114     error_setg(errp, QERR_UNSUPPORTED);
3115     return NULL;
3116 }
3117 
3118 #endif /* CONFIG_FSFREEZE */
3119 
3120 #if !defined(CONFIG_FSTRIM)
3121 GuestFilesystemTrimResponse *
3122 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
3123 {
3124     error_setg(errp, QERR_UNSUPPORTED);
3125     return NULL;
3126 }
3127 #endif
3128 
3129 /* add unsupported commands to the list of blocked RPCs */
3130 GList *ga_command_init_blockedrpcs(GList *blockedrpcs)
3131 {
3132 #if !defined(__linux__)
3133     {
3134         const char *list[] = {
3135             "guest-suspend-disk", "guest-suspend-ram",
3136             "guest-suspend-hybrid", "guest-get-vcpus", "guest-set-vcpus",
3137             "guest-get-memory-blocks", "guest-set-memory-blocks",
3138             "guest-get-memory-block-size", "guest-get-memory-block-info",
3139             NULL};
3140         char **p = (char **)list;
3141 
3142         while (*p) {
3143             blockedrpcs = g_list_append(blockedrpcs, g_strdup(*p++));
3144         }
3145     }
3146 #endif
3147 
3148 #if !defined(HAVE_GETIFADDRS)
3149     blockedrpcs = g_list_append(blockedrpcs,
3150                               g_strdup("guest-network-get-interfaces"));
3151 #endif
3152 
3153 #if !defined(CONFIG_FSFREEZE)
3154     {
3155         const char *list[] = {
3156             "guest-get-fsinfo", "guest-fsfreeze-status",
3157             "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list",
3158             "guest-fsfreeze-thaw", "guest-get-fsinfo",
3159             "guest-get-disks", NULL};
3160         char **p = (char **)list;
3161 
3162         while (*p) {
3163             blockedrpcs = g_list_append(blockedrpcs, g_strdup(*p++));
3164         }
3165     }
3166 #endif
3167 
3168 #if !defined(CONFIG_FSTRIM)
3169     blockedrpcs = g_list_append(blockedrpcs, g_strdup("guest-fstrim"));
3170 #endif
3171 
3172     blockedrpcs = g_list_append(blockedrpcs, g_strdup("guest-get-devices"));
3173 
3174     return blockedrpcs;
3175 }
3176 
3177 /* register init/cleanup routines for stateful command groups */
3178 void ga_command_state_init(GAState *s, GACommandState *cs)
3179 {
3180 #if defined(CONFIG_FSFREEZE)
3181     ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
3182 #endif
3183 }
3184 
3185 #ifdef HAVE_UTMPX
3186 
3187 #define QGA_MICRO_SECOND_TO_SECOND 1000000
3188 
3189 static double ga_get_login_time(struct utmpx *user_info)
3190 {
3191     double seconds = (double)user_info->ut_tv.tv_sec;
3192     double useconds = (double)user_info->ut_tv.tv_usec;
3193     useconds /= QGA_MICRO_SECOND_TO_SECOND;
3194     return seconds + useconds;
3195 }
3196 
3197 GuestUserList *qmp_guest_get_users(Error **errp)
3198 {
3199     GHashTable *cache = NULL;
3200     GuestUserList *head = NULL, **tail = &head;
3201     struct utmpx *user_info = NULL;
3202     gpointer value = NULL;
3203     GuestUser *user = NULL;
3204     double login_time = 0;
3205 
3206     cache = g_hash_table_new(g_str_hash, g_str_equal);
3207     setutxent();
3208 
3209     for (;;) {
3210         user_info = getutxent();
3211         if (user_info == NULL) {
3212             break;
3213         } else if (user_info->ut_type != USER_PROCESS) {
3214             continue;
3215         } else if (g_hash_table_contains(cache, user_info->ut_user)) {
3216             value = g_hash_table_lookup(cache, user_info->ut_user);
3217             user = (GuestUser *)value;
3218             login_time = ga_get_login_time(user_info);
3219             /* We're ensuring the earliest login time to be sent */
3220             if (login_time < user->login_time) {
3221                 user->login_time = login_time;
3222             }
3223             continue;
3224         }
3225 
3226         user = g_new0(GuestUser, 1);
3227         user->user = g_strdup(user_info->ut_user);
3228         user->login_time = ga_get_login_time(user_info);
3229 
3230         g_hash_table_insert(cache, user->user, user);
3231 
3232         QAPI_LIST_APPEND(tail, user);
3233     }
3234     endutxent();
3235     g_hash_table_destroy(cache);
3236     return head;
3237 }
3238 
3239 #else
3240 
3241 GuestUserList *qmp_guest_get_users(Error **errp)
3242 {
3243     error_setg(errp, QERR_UNSUPPORTED);
3244     return NULL;
3245 }
3246 
3247 #endif
3248 
3249 /* Replace escaped special characters with theire real values. The replacement
3250  * is done in place -- returned value is in the original string.
3251  */
3252 static void ga_osrelease_replace_special(gchar *value)
3253 {
3254     gchar *p, *p2, quote;
3255 
3256     /* Trim the string at first space or semicolon if it is not enclosed in
3257      * single or double quotes. */
3258     if ((value[0] != '"') || (value[0] == '\'')) {
3259         p = strchr(value, ' ');
3260         if (p != NULL) {
3261             *p = 0;
3262         }
3263         p = strchr(value, ';');
3264         if (p != NULL) {
3265             *p = 0;
3266         }
3267         return;
3268     }
3269 
3270     quote = value[0];
3271     p2 = value;
3272     p = value + 1;
3273     while (*p != 0) {
3274         if (*p == '\\') {
3275             p++;
3276             switch (*p) {
3277             case '$':
3278             case '\'':
3279             case '"':
3280             case '\\':
3281             case '`':
3282                 break;
3283             default:
3284                 /* Keep literal backslash followed by whatever is there */
3285                 p--;
3286                 break;
3287             }
3288         } else if (*p == quote) {
3289             *p2 = 0;
3290             break;
3291         }
3292         *(p2++) = *(p++);
3293     }
3294 }
3295 
3296 static GKeyFile *ga_parse_osrelease(const char *fname)
3297 {
3298     gchar *content = NULL;
3299     gchar *content2 = NULL;
3300     GError *err = NULL;
3301     GKeyFile *keys = g_key_file_new();
3302     const char *group = "[os-release]\n";
3303 
3304     if (!g_file_get_contents(fname, &content, NULL, &err)) {
3305         slog("failed to read '%s', error: %s", fname, err->message);
3306         goto fail;
3307     }
3308 
3309     if (!g_utf8_validate(content, -1, NULL)) {
3310         slog("file is not utf-8 encoded: %s", fname);
3311         goto fail;
3312     }
3313     content2 = g_strdup_printf("%s%s", group, content);
3314 
3315     if (!g_key_file_load_from_data(keys, content2, -1, G_KEY_FILE_NONE,
3316                                    &err)) {
3317         slog("failed to parse file '%s', error: %s", fname, err->message);
3318         goto fail;
3319     }
3320 
3321     g_free(content);
3322     g_free(content2);
3323     return keys;
3324 
3325 fail:
3326     g_error_free(err);
3327     g_free(content);
3328     g_free(content2);
3329     g_key_file_free(keys);
3330     return NULL;
3331 }
3332 
3333 GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
3334 {
3335     GuestOSInfo *info = NULL;
3336     struct utsname kinfo;
3337     GKeyFile *osrelease = NULL;
3338     const char *qga_os_release = g_getenv("QGA_OS_RELEASE");
3339 
3340     info = g_new0(GuestOSInfo, 1);
3341 
3342     if (uname(&kinfo) != 0) {
3343         error_setg_errno(errp, errno, "uname failed");
3344     } else {
3345         info->kernel_version = g_strdup(kinfo.version);
3346         info->kernel_release = g_strdup(kinfo.release);
3347         info->machine = g_strdup(kinfo.machine);
3348     }
3349 
3350     if (qga_os_release != NULL) {
3351         osrelease = ga_parse_osrelease(qga_os_release);
3352     } else {
3353         osrelease = ga_parse_osrelease("/etc/os-release");
3354         if (osrelease == NULL) {
3355             osrelease = ga_parse_osrelease("/usr/lib/os-release");
3356         }
3357     }
3358 
3359     if (osrelease != NULL) {
3360         char *value;
3361 
3362 #define GET_FIELD(field, osfield) do { \
3363     value = g_key_file_get_value(osrelease, "os-release", osfield, NULL); \
3364     if (value != NULL) { \
3365         ga_osrelease_replace_special(value); \
3366         info->field = value; \
3367     } \
3368 } while (0)
3369         GET_FIELD(id, "ID");
3370         GET_FIELD(name, "NAME");
3371         GET_FIELD(pretty_name, "PRETTY_NAME");
3372         GET_FIELD(version, "VERSION");
3373         GET_FIELD(version_id, "VERSION_ID");
3374         GET_FIELD(variant, "VARIANT");
3375         GET_FIELD(variant_id, "VARIANT_ID");
3376 #undef GET_FIELD
3377 
3378         g_key_file_free(osrelease);
3379     }
3380 
3381     return info;
3382 }
3383 
3384 GuestDeviceInfoList *qmp_guest_get_devices(Error **errp)
3385 {
3386     error_setg(errp, QERR_UNSUPPORTED);
3387 
3388     return NULL;
3389 }
3390 
3391 #ifndef HOST_NAME_MAX
3392 # ifdef _POSIX_HOST_NAME_MAX
3393 #  define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
3394 # else
3395 #  define HOST_NAME_MAX 255
3396 # endif
3397 #endif
3398 
3399 char *qga_get_host_name(Error **errp)
3400 {
3401     long len = -1;
3402     g_autofree char *hostname = NULL;
3403 
3404 #ifdef _SC_HOST_NAME_MAX
3405     len = sysconf(_SC_HOST_NAME_MAX);
3406 #endif /* _SC_HOST_NAME_MAX */
3407 
3408     if (len < 0) {
3409         len = HOST_NAME_MAX;
3410     }
3411 
3412     /* Unfortunately, gethostname() below does not guarantee a
3413      * NULL terminated string. Therefore, allocate one byte more
3414      * to be sure. */
3415     hostname = g_new0(char, len + 1);
3416 
3417     if (gethostname(hostname, len) < 0) {
3418         error_setg_errno(errp, errno,
3419                          "cannot get hostname");
3420         return NULL;
3421     }
3422 
3423     return g_steal_pointer(&hostname);
3424 }
3425