xref: /openbmc/qemu/qga/commands-posix.c (revision 90bda082)
1 /*
2  * QEMU Guest Agent POSIX-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include <glib.h>
15 #include <sys/types.h>
16 #include <sys/ioctl.h>
17 #include <sys/wait.h>
18 #include <unistd.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <dirent.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <sys/stat.h>
25 #include <inttypes.h>
26 #include "qga/guest-agent-core.h"
27 #include "qga-qmp-commands.h"
28 #include "qapi/qmp/qerror.h"
29 #include "qemu/queue.h"
30 #include "qemu/host-utils.h"
31 
32 #ifndef CONFIG_HAS_ENVIRON
33 #ifdef __APPLE__
34 #include <crt_externs.h>
35 #define environ (*_NSGetEnviron())
36 #else
37 extern char **environ;
38 #endif
39 #endif
40 
41 #if defined(__linux__)
42 #include <mntent.h>
43 #include <linux/fs.h>
44 #include <ifaddrs.h>
45 #include <arpa/inet.h>
46 #include <sys/socket.h>
47 #include <net/if.h>
48 
49 #ifdef FIFREEZE
50 #define CONFIG_FSFREEZE
51 #endif
52 #ifdef FITRIM
53 #define CONFIG_FSTRIM
54 #endif
55 #endif
56 
57 static void ga_wait_child(pid_t pid, int *status, Error **errp)
58 {
59     pid_t rpid;
60 
61     *status = 0;
62 
63     do {
64         rpid = waitpid(pid, status, 0);
65     } while (rpid == -1 && errno == EINTR);
66 
67     if (rpid == -1) {
68         error_setg_errno(errp, errno, "failed to wait for child (pid: %d)",
69                          pid);
70         return;
71     }
72 
73     g_assert(rpid == pid);
74 }
75 
76 void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp)
77 {
78     const char *shutdown_flag;
79     Error *local_err = NULL;
80     pid_t pid;
81     int status;
82 
83     slog("guest-shutdown called, mode: %s", mode);
84     if (!has_mode || strcmp(mode, "powerdown") == 0) {
85         shutdown_flag = "-P";
86     } else if (strcmp(mode, "halt") == 0) {
87         shutdown_flag = "-H";
88     } else if (strcmp(mode, "reboot") == 0) {
89         shutdown_flag = "-r";
90     } else {
91         error_setg(errp,
92                    "mode is invalid (valid values are: halt|powerdown|reboot");
93         return;
94     }
95 
96     pid = fork();
97     if (pid == 0) {
98         /* child, start the shutdown */
99         setsid();
100         reopen_fd_to_null(0);
101         reopen_fd_to_null(1);
102         reopen_fd_to_null(2);
103 
104         execle("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
105                "hypervisor initiated shutdown", (char*)NULL, environ);
106         _exit(EXIT_FAILURE);
107     } else if (pid < 0) {
108         error_setg_errno(errp, errno, "failed to create child process");
109         return;
110     }
111 
112     ga_wait_child(pid, &status, &local_err);
113     if (local_err) {
114         error_propagate(errp, local_err);
115         return;
116     }
117 
118     if (!WIFEXITED(status)) {
119         error_setg(errp, "child process has terminated abnormally");
120         return;
121     }
122 
123     if (WEXITSTATUS(status)) {
124         error_setg(errp, "child process has failed to shutdown");
125         return;
126     }
127 
128     /* succeeded */
129 }
130 
131 int64_t qmp_guest_get_time(Error **errp)
132 {
133    int ret;
134    qemu_timeval tq;
135    int64_t time_ns;
136 
137    ret = qemu_gettimeofday(&tq);
138    if (ret < 0) {
139        error_setg_errno(errp, errno, "Failed to get time");
140        return -1;
141    }
142 
143    time_ns = tq.tv_sec * 1000000000LL + tq.tv_usec * 1000;
144    return time_ns;
145 }
146 
147 void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
148 {
149     int ret;
150     int status;
151     pid_t pid;
152     Error *local_err = NULL;
153     struct timeval tv;
154 
155     /* If user has passed a time, validate and set it. */
156     if (has_time) {
157         /* year-2038 will overflow in case time_t is 32bit */
158         if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
159             error_setg(errp, "Time %" PRId64 " is too large", time_ns);
160             return;
161         }
162 
163         tv.tv_sec = time_ns / 1000000000;
164         tv.tv_usec = (time_ns % 1000000000) / 1000;
165 
166         ret = settimeofday(&tv, NULL);
167         if (ret < 0) {
168             error_setg_errno(errp, errno, "Failed to set time to guest");
169             return;
170         }
171     }
172 
173     /* Now, if user has passed a time to set and the system time is set, we
174      * just need to synchronize the hardware clock. However, if no time was
175      * passed, user is requesting the opposite: set the system time from the
176      * hardware clock (RTC). */
177     pid = fork();
178     if (pid == 0) {
179         setsid();
180         reopen_fd_to_null(0);
181         reopen_fd_to_null(1);
182         reopen_fd_to_null(2);
183 
184         /* Use '/sbin/hwclock -w' to set RTC from the system time,
185          * or '/sbin/hwclock -s' to set the system time from RTC. */
186         execle("/sbin/hwclock", "hwclock", has_time ? "-w" : "-s",
187                NULL, environ);
188         _exit(EXIT_FAILURE);
189     } else if (pid < 0) {
190         error_setg_errno(errp, errno, "failed to create child process");
191         return;
192     }
193 
194     ga_wait_child(pid, &status, &local_err);
195     if (local_err) {
196         error_propagate(errp, local_err);
197         return;
198     }
199 
200     if (!WIFEXITED(status)) {
201         error_setg(errp, "child process has terminated abnormally");
202         return;
203     }
204 
205     if (WEXITSTATUS(status)) {
206         error_setg(errp, "hwclock failed to set hardware clock to system time");
207         return;
208     }
209 }
210 
211 typedef struct GuestFileHandle {
212     uint64_t id;
213     FILE *fh;
214     QTAILQ_ENTRY(GuestFileHandle) next;
215 } GuestFileHandle;
216 
217 static struct {
218     QTAILQ_HEAD(, GuestFileHandle) filehandles;
219 } guest_file_state;
220 
221 static int64_t guest_file_handle_add(FILE *fh, Error **errp)
222 {
223     GuestFileHandle *gfh;
224     int64_t handle;
225 
226     handle = ga_get_fd_handle(ga_state, errp);
227     if (handle < 0) {
228         return -1;
229     }
230 
231     gfh = g_malloc0(sizeof(GuestFileHandle));
232     gfh->id = handle;
233     gfh->fh = fh;
234     QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
235 
236     return handle;
237 }
238 
239 static GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
240 {
241     GuestFileHandle *gfh;
242 
243     QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
244     {
245         if (gfh->id == id) {
246             return gfh;
247         }
248     }
249 
250     error_setg(errp, "handle '%" PRId64 "' has not been found", id);
251     return NULL;
252 }
253 
254 typedef const char * const ccpc;
255 
256 #ifndef O_BINARY
257 #define O_BINARY 0
258 #endif
259 
260 /* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
261 static const struct {
262     ccpc *forms;
263     int oflag_base;
264 } guest_file_open_modes[] = {
265     { (ccpc[]){ "r",          NULL }, O_RDONLY                                 },
266     { (ccpc[]){ "rb",         NULL }, O_RDONLY                      | O_BINARY },
267     { (ccpc[]){ "w",          NULL }, O_WRONLY | O_CREAT | O_TRUNC             },
268     { (ccpc[]){ "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  | O_BINARY },
269     { (ccpc[]){ "a",          NULL }, O_WRONLY | O_CREAT | O_APPEND            },
270     { (ccpc[]){ "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND | O_BINARY },
271     { (ccpc[]){ "r+",         NULL }, O_RDWR                                   },
272     { (ccpc[]){ "rb+", "r+b", NULL }, O_RDWR                        | O_BINARY },
273     { (ccpc[]){ "w+",         NULL }, O_RDWR   | O_CREAT | O_TRUNC             },
274     { (ccpc[]){ "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  | O_BINARY },
275     { (ccpc[]){ "a+",         NULL }, O_RDWR   | O_CREAT | O_APPEND            },
276     { (ccpc[]){ "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND | O_BINARY }
277 };
278 
279 static int
280 find_open_flag(const char *mode_str, Error **errp)
281 {
282     unsigned mode;
283 
284     for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
285         ccpc *form;
286 
287         form = guest_file_open_modes[mode].forms;
288         while (*form != NULL && strcmp(*form, mode_str) != 0) {
289             ++form;
290         }
291         if (*form != NULL) {
292             break;
293         }
294     }
295 
296     if (mode == ARRAY_SIZE(guest_file_open_modes)) {
297         error_setg(errp, "invalid file open mode '%s'", mode_str);
298         return -1;
299     }
300     return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
301 }
302 
303 #define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
304                                S_IRGRP | S_IWGRP | \
305                                S_IROTH | S_IWOTH)
306 
307 static FILE *
308 safe_open_or_create(const char *path, const char *mode, Error **errp)
309 {
310     Error *local_err = NULL;
311     int oflag;
312 
313     oflag = find_open_flag(mode, &local_err);
314     if (local_err == NULL) {
315         int fd;
316 
317         /* If the caller wants / allows creation of a new file, we implement it
318          * with a two step process: open() + (open() / fchmod()).
319          *
320          * First we insist on creating the file exclusively as a new file. If
321          * that succeeds, we're free to set any file-mode bits on it. (The
322          * motivation is that we want to set those file-mode bits independently
323          * of the current umask.)
324          *
325          * If the exclusive creation fails because the file already exists
326          * (EEXIST is not possible for any other reason), we just attempt to
327          * open the file, but in this case we won't be allowed to change the
328          * file-mode bits on the preexistent file.
329          *
330          * The pathname should never disappear between the two open()s in
331          * practice. If it happens, then someone very likely tried to race us.
332          * In this case just go ahead and report the ENOENT from the second
333          * open() to the caller.
334          *
335          * If the caller wants to open a preexistent file, then the first
336          * open() is decisive and its third argument is ignored, and the second
337          * open() and the fchmod() are never called.
338          */
339         fd = open(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
340         if (fd == -1 && errno == EEXIST) {
341             oflag &= ~(unsigned)O_CREAT;
342             fd = open(path, oflag);
343         }
344 
345         if (fd == -1) {
346             error_setg_errno(&local_err, errno, "failed to open file '%s' "
347                              "(mode: '%s')", path, mode);
348         } else {
349             qemu_set_cloexec(fd);
350 
351             if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
352                 error_setg_errno(&local_err, errno, "failed to set permission "
353                                  "0%03o on new file '%s' (mode: '%s')",
354                                  (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
355             } else {
356                 FILE *f;
357 
358                 f = fdopen(fd, mode);
359                 if (f == NULL) {
360                     error_setg_errno(&local_err, errno, "failed to associate "
361                                      "stdio stream with file descriptor %d, "
362                                      "file '%s' (mode: '%s')", fd, path, mode);
363                 } else {
364                     return f;
365                 }
366             }
367 
368             close(fd);
369             if (oflag & O_CREAT) {
370                 unlink(path);
371             }
372         }
373     }
374 
375     error_propagate(errp, local_err);
376     return NULL;
377 }
378 
379 int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode,
380                             Error **errp)
381 {
382     FILE *fh;
383     Error *local_err = NULL;
384     int fd;
385     int64_t ret = -1, handle;
386 
387     if (!has_mode) {
388         mode = "r";
389     }
390     slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
391     fh = safe_open_or_create(path, mode, &local_err);
392     if (local_err != NULL) {
393         error_propagate(errp, local_err);
394         return -1;
395     }
396 
397     /* set fd non-blocking to avoid common use cases (like reading from a
398      * named pipe) from hanging the agent
399      */
400     fd = fileno(fh);
401     ret = fcntl(fd, F_GETFL);
402     ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK);
403     if (ret == -1) {
404         error_setg_errno(errp, errno, "failed to make file '%s' non-blocking",
405                          path);
406         fclose(fh);
407         return -1;
408     }
409 
410     handle = guest_file_handle_add(fh, errp);
411     if (handle < 0) {
412         fclose(fh);
413         return -1;
414     }
415 
416     slog("guest-file-open, handle: %" PRId64, handle);
417     return handle;
418 }
419 
420 void qmp_guest_file_close(int64_t handle, Error **errp)
421 {
422     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
423     int ret;
424 
425     slog("guest-file-close called, handle: %" PRId64, handle);
426     if (!gfh) {
427         return;
428     }
429 
430     ret = fclose(gfh->fh);
431     if (ret == EOF) {
432         error_setg_errno(errp, errno, "failed to close handle");
433         return;
434     }
435 
436     QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
437     g_free(gfh);
438 }
439 
440 struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
441                                           int64_t count, Error **errp)
442 {
443     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
444     GuestFileRead *read_data = NULL;
445     guchar *buf;
446     FILE *fh;
447     size_t read_count;
448 
449     if (!gfh) {
450         return NULL;
451     }
452 
453     if (!has_count) {
454         count = QGA_READ_COUNT_DEFAULT;
455     } else if (count < 0) {
456         error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
457                    count);
458         return NULL;
459     }
460 
461     fh = gfh->fh;
462     buf = g_malloc0(count+1);
463     read_count = fread(buf, 1, count, fh);
464     if (ferror(fh)) {
465         error_setg_errno(errp, errno, "failed to read file");
466         slog("guest-file-read failed, handle: %" PRId64, handle);
467     } else {
468         buf[read_count] = 0;
469         read_data = g_malloc0(sizeof(GuestFileRead));
470         read_data->count = read_count;
471         read_data->eof = feof(fh);
472         if (read_count) {
473             read_data->buf_b64 = g_base64_encode(buf, read_count);
474         }
475     }
476     g_free(buf);
477     clearerr(fh);
478 
479     return read_data;
480 }
481 
482 GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
483                                      bool has_count, int64_t count,
484                                      Error **errp)
485 {
486     GuestFileWrite *write_data = NULL;
487     guchar *buf;
488     gsize buf_len;
489     int write_count;
490     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
491     FILE *fh;
492 
493     if (!gfh) {
494         return NULL;
495     }
496 
497     fh = gfh->fh;
498     buf = g_base64_decode(buf_b64, &buf_len);
499 
500     if (!has_count) {
501         count = buf_len;
502     } else if (count < 0 || count > buf_len) {
503         error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
504                    count);
505         g_free(buf);
506         return NULL;
507     }
508 
509     write_count = fwrite(buf, 1, count, fh);
510     if (ferror(fh)) {
511         error_setg_errno(errp, errno, "failed to write to file");
512         slog("guest-file-write failed, handle: %" PRId64, handle);
513     } else {
514         write_data = g_malloc0(sizeof(GuestFileWrite));
515         write_data->count = write_count;
516         write_data->eof = feof(fh);
517     }
518     g_free(buf);
519     clearerr(fh);
520 
521     return write_data;
522 }
523 
524 struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
525                                           int64_t whence, Error **errp)
526 {
527     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
528     GuestFileSeek *seek_data = NULL;
529     FILE *fh;
530     int ret;
531 
532     if (!gfh) {
533         return NULL;
534     }
535 
536     fh = gfh->fh;
537     ret = fseek(fh, offset, whence);
538     if (ret == -1) {
539         error_setg_errno(errp, errno, "failed to seek file");
540     } else {
541         seek_data = g_new0(GuestFileSeek, 1);
542         seek_data->position = ftell(fh);
543         seek_data->eof = feof(fh);
544     }
545     clearerr(fh);
546 
547     return seek_data;
548 }
549 
550 void qmp_guest_file_flush(int64_t handle, Error **errp)
551 {
552     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
553     FILE *fh;
554     int ret;
555 
556     if (!gfh) {
557         return;
558     }
559 
560     fh = gfh->fh;
561     ret = fflush(fh);
562     if (ret == EOF) {
563         error_setg_errno(errp, errno, "failed to flush file");
564     }
565 }
566 
567 static void guest_file_init(void)
568 {
569     QTAILQ_INIT(&guest_file_state.filehandles);
570 }
571 
572 /* linux-specific implementations. avoid this if at all possible. */
573 #if defined(__linux__)
574 
575 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
576 typedef struct FsMount {
577     char *dirname;
578     char *devtype;
579     unsigned int devmajor, devminor;
580     QTAILQ_ENTRY(FsMount) next;
581 } FsMount;
582 
583 typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
584 
585 static void free_fs_mount_list(FsMountList *mounts)
586 {
587      FsMount *mount, *temp;
588 
589      if (!mounts) {
590          return;
591      }
592 
593      QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
594          QTAILQ_REMOVE(mounts, mount, next);
595          g_free(mount->dirname);
596          g_free(mount->devtype);
597          g_free(mount);
598      }
599 }
600 
601 static int dev_major_minor(const char *devpath,
602                            unsigned int *devmajor, unsigned int *devminor)
603 {
604     struct stat st;
605 
606     *devmajor = 0;
607     *devminor = 0;
608 
609     if (stat(devpath, &st) < 0) {
610         slog("failed to stat device file '%s': %s", devpath, strerror(errno));
611         return -1;
612     }
613     if (S_ISDIR(st.st_mode)) {
614         /* It is bind mount */
615         return -2;
616     }
617     if (S_ISBLK(st.st_mode)) {
618         *devmajor = major(st.st_rdev);
619         *devminor = minor(st.st_rdev);
620         return 0;
621     }
622     return -1;
623 }
624 
625 /*
626  * Walk the mount table and build a list of local file systems
627  */
628 static void build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
629 {
630     struct mntent *ment;
631     FsMount *mount;
632     char const *mtab = "/proc/self/mounts";
633     FILE *fp;
634     unsigned int devmajor, devminor;
635 
636     fp = setmntent(mtab, "r");
637     if (!fp) {
638         error_setg(errp, "failed to open mtab file: '%s'", mtab);
639         return;
640     }
641 
642     while ((ment = getmntent(fp))) {
643         /*
644          * An entry which device name doesn't start with a '/' is
645          * either a dummy file system or a network file system.
646          * Add special handling for smbfs and cifs as is done by
647          * coreutils as well.
648          */
649         if ((ment->mnt_fsname[0] != '/') ||
650             (strcmp(ment->mnt_type, "smbfs") == 0) ||
651             (strcmp(ment->mnt_type, "cifs") == 0)) {
652             continue;
653         }
654         if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
655             /* Skip bind mounts */
656             continue;
657         }
658 
659         mount = g_malloc0(sizeof(FsMount));
660         mount->dirname = g_strdup(ment->mnt_dir);
661         mount->devtype = g_strdup(ment->mnt_type);
662         mount->devmajor = devmajor;
663         mount->devminor = devminor;
664 
665         QTAILQ_INSERT_TAIL(mounts, mount, next);
666     }
667 
668     endmntent(fp);
669 }
670 
671 static void decode_mntname(char *name, int len)
672 {
673     int i, j = 0;
674     for (i = 0; i <= len; i++) {
675         if (name[i] != '\\') {
676             name[j++] = name[i];
677         } else if (name[i + 1] == '\\') {
678             name[j++] = '\\';
679             i++;
680         } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
681                    name[i + 2] >= '0' && name[i + 2] <= '7' &&
682                    name[i + 3] >= '0' && name[i + 3] <= '7') {
683             name[j++] = (name[i + 1] - '0') * 64 +
684                         (name[i + 2] - '0') * 8 +
685                         (name[i + 3] - '0');
686             i += 3;
687         } else {
688             name[j++] = name[i];
689         }
690     }
691 }
692 
693 static void build_fs_mount_list(FsMountList *mounts, Error **errp)
694 {
695     FsMount *mount;
696     char const *mountinfo = "/proc/self/mountinfo";
697     FILE *fp;
698     char *line = NULL, *dash;
699     size_t n;
700     char check;
701     unsigned int devmajor, devminor;
702     int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
703 
704     fp = fopen(mountinfo, "r");
705     if (!fp) {
706         build_fs_mount_list_from_mtab(mounts, errp);
707         return;
708     }
709 
710     while (getline(&line, &n, fp) != -1) {
711         ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
712                      &devmajor, &devminor, &dir_s, &dir_e, &check);
713         if (ret < 3) {
714             continue;
715         }
716         dash = strstr(line + dir_e, " - ");
717         if (!dash) {
718             continue;
719         }
720         ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
721                      &type_s, &type_e, &dev_s, &dev_e, &check);
722         if (ret < 1) {
723             continue;
724         }
725         line[dir_e] = 0;
726         dash[type_e] = 0;
727         dash[dev_e] = 0;
728         decode_mntname(line + dir_s, dir_e - dir_s);
729         decode_mntname(dash + dev_s, dev_e - dev_s);
730         if (devmajor == 0) {
731             /* btrfs reports major number = 0 */
732             if (strcmp("btrfs", dash + type_s) != 0 ||
733                 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
734                 continue;
735             }
736         }
737 
738         mount = g_malloc0(sizeof(FsMount));
739         mount->dirname = g_strdup(line + dir_s);
740         mount->devtype = g_strdup(dash + type_s);
741         mount->devmajor = devmajor;
742         mount->devminor = devminor;
743 
744         QTAILQ_INSERT_TAIL(mounts, mount, next);
745     }
746     free(line);
747 
748     fclose(fp);
749 }
750 #endif
751 
752 #if defined(CONFIG_FSFREEZE)
753 
754 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
755 {
756     char *path;
757     char *dpath;
758     char *driver = NULL;
759     char buf[PATH_MAX];
760     ssize_t len;
761 
762     path = g_strndup(syspath, pathlen);
763     dpath = g_strdup_printf("%s/driver", path);
764     len = readlink(dpath, buf, sizeof(buf) - 1);
765     if (len != -1) {
766         buf[len] = 0;
767         driver = g_strdup(basename(buf));
768     }
769     g_free(dpath);
770     g_free(path);
771     return driver;
772 }
773 
774 static int compare_uint(const void *_a, const void *_b)
775 {
776     unsigned int a = *(unsigned int *)_a;
777     unsigned int b = *(unsigned int *)_b;
778 
779     return a < b ? -1 : a > b ? 1 : 0;
780 }
781 
782 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
783 static int build_hosts(char const *syspath, char const *host, bool ata,
784                        unsigned int *hosts, int hosts_max, Error **errp)
785 {
786     char *path;
787     DIR *dir;
788     struct dirent *entry;
789     int i = 0;
790 
791     path = g_strndup(syspath, host - syspath);
792     dir = opendir(path);
793     if (!dir) {
794         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
795         g_free(path);
796         return -1;
797     }
798 
799     while (i < hosts_max) {
800         entry = readdir(dir);
801         if (!entry) {
802             break;
803         }
804         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
805             ++i;
806         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
807             ++i;
808         }
809     }
810 
811     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
812 
813     g_free(path);
814     closedir(dir);
815     return i;
816 }
817 
818 /* Store disk device info specified by @sysfs into @fs */
819 static void build_guest_fsinfo_for_real_device(char const *syspath,
820                                                GuestFilesystemInfo *fs,
821                                                Error **errp)
822 {
823     unsigned int pci[4], host, hosts[8], tgt[3];
824     int i, nhosts = 0, pcilen;
825     GuestDiskAddress *disk;
826     GuestPCIAddress *pciaddr;
827     GuestDiskAddressList *list = NULL;
828     bool has_ata = false, has_host = false, has_tgt = false;
829     char *p, *q, *driver = NULL;
830 
831     p = strstr(syspath, "/devices/pci");
832     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
833                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
834         g_debug("only pci device is supported: sysfs path \"%s\"", syspath);
835         return;
836     }
837 
838     driver = get_pci_driver(syspath, (p + 12 + pcilen) - syspath, errp);
839     if (!driver) {
840         goto cleanup;
841     }
842 
843     p = strstr(syspath, "/target");
844     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
845                     tgt, tgt + 1, tgt + 2) == 3) {
846         has_tgt = true;
847     }
848 
849     p = strstr(syspath, "/ata");
850     if (p) {
851         q = p + 4;
852         has_ata = true;
853     } else {
854         p = strstr(syspath, "/host");
855         q = p + 5;
856     }
857     if (p && sscanf(q, "%u", &host) == 1) {
858         has_host = true;
859         nhosts = build_hosts(syspath, p, has_ata, hosts,
860                              sizeof(hosts) / sizeof(hosts[0]), errp);
861         if (nhosts < 0) {
862             goto cleanup;
863         }
864     }
865 
866     pciaddr = g_malloc0(sizeof(*pciaddr));
867     pciaddr->domain = pci[0];
868     pciaddr->bus = pci[1];
869     pciaddr->slot = pci[2];
870     pciaddr->function = pci[3];
871 
872     disk = g_malloc0(sizeof(*disk));
873     disk->pci_controller = pciaddr;
874 
875     list = g_malloc0(sizeof(*list));
876     list->value = disk;
877 
878     if (strcmp(driver, "ata_piix") == 0) {
879         /* a host per ide bus, target*:0:<unit>:0 */
880         if (!has_host || !has_tgt) {
881             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
882             goto cleanup;
883         }
884         for (i = 0; i < nhosts; i++) {
885             if (host == hosts[i]) {
886                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
887                 disk->bus = i;
888                 disk->unit = tgt[1];
889                 break;
890             }
891         }
892         if (i >= nhosts) {
893             g_debug("no host for '%s' (driver '%s')", syspath, driver);
894             goto cleanup;
895         }
896     } else if (strcmp(driver, "sym53c8xx") == 0) {
897         /* scsi(LSI Logic): target*:0:<unit>:0 */
898         if (!has_tgt) {
899             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
900             goto cleanup;
901         }
902         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
903         disk->unit = tgt[1];
904     } else if (strcmp(driver, "virtio-pci") == 0) {
905         if (has_tgt) {
906             /* virtio-scsi: target*:0:0:<unit> */
907             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
908             disk->unit = tgt[2];
909         } else {
910             /* virtio-blk: 1 disk per 1 device */
911             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
912         }
913     } else if (strcmp(driver, "ahci") == 0) {
914         /* ahci: 1 host per 1 unit */
915         if (!has_host || !has_tgt) {
916             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
917             goto cleanup;
918         }
919         for (i = 0; i < nhosts; i++) {
920             if (host == hosts[i]) {
921                 disk->unit = i;
922                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
923                 break;
924             }
925         }
926         if (i >= nhosts) {
927             g_debug("no host for '%s' (driver '%s')", syspath, driver);
928             goto cleanup;
929         }
930     } else {
931         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
932         goto cleanup;
933     }
934 
935     list->next = fs->disk;
936     fs->disk = list;
937     g_free(driver);
938     return;
939 
940 cleanup:
941     if (list) {
942         qapi_free_GuestDiskAddressList(list);
943     }
944     g_free(driver);
945 }
946 
947 static void build_guest_fsinfo_for_device(char const *devpath,
948                                           GuestFilesystemInfo *fs,
949                                           Error **errp);
950 
951 /* Store a list of slave devices of virtual volume specified by @syspath into
952  * @fs */
953 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
954                                                   GuestFilesystemInfo *fs,
955                                                   Error **errp)
956 {
957     DIR *dir;
958     char *dirpath;
959     struct dirent entry, *result;
960 
961     dirpath = g_strdup_printf("%s/slaves", syspath);
962     dir = opendir(dirpath);
963     if (!dir) {
964         error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
965         g_free(dirpath);
966         return;
967     }
968     g_free(dirpath);
969 
970     for (;;) {
971         if (readdir_r(dir, &entry, &result) != 0) {
972             error_setg_errno(errp, errno, "readdir_r(\"%s\")", dirpath);
973             break;
974         }
975         if (!result) {
976             break;
977         }
978 
979         if (entry.d_type == DT_LNK) {
980             g_debug(" slave device '%s'", entry.d_name);
981             dirpath = g_strdup_printf("%s/slaves/%s", syspath, entry.d_name);
982             build_guest_fsinfo_for_device(dirpath, fs, errp);
983             g_free(dirpath);
984 
985             if (*errp) {
986                 break;
987             }
988         }
989     }
990 
991     closedir(dir);
992 }
993 
994 /* Dispatch to functions for virtual/real device */
995 static void build_guest_fsinfo_for_device(char const *devpath,
996                                           GuestFilesystemInfo *fs,
997                                           Error **errp)
998 {
999     char *syspath = realpath(devpath, NULL);
1000 
1001     if (!syspath) {
1002         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1003         return;
1004     }
1005 
1006     if (!fs->name) {
1007         fs->name = g_strdup(basename(syspath));
1008     }
1009 
1010     g_debug("  parse sysfs path '%s'", syspath);
1011 
1012     if (strstr(syspath, "/devices/virtual/block/")) {
1013         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
1014     } else {
1015         build_guest_fsinfo_for_real_device(syspath, fs, errp);
1016     }
1017 
1018     free(syspath);
1019 }
1020 
1021 /* Return a list of the disk device(s)' info which @mount lies on */
1022 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1023                                                Error **errp)
1024 {
1025     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1026     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1027                                     mount->devmajor, mount->devminor);
1028 
1029     fs->mountpoint = g_strdup(mount->dirname);
1030     fs->type = g_strdup(mount->devtype);
1031     build_guest_fsinfo_for_device(devpath, fs, errp);
1032 
1033     g_free(devpath);
1034     return fs;
1035 }
1036 
1037 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1038 {
1039     FsMountList mounts;
1040     struct FsMount *mount;
1041     GuestFilesystemInfoList *new, *ret = NULL;
1042     Error *local_err = NULL;
1043 
1044     QTAILQ_INIT(&mounts);
1045     build_fs_mount_list(&mounts, &local_err);
1046     if (local_err) {
1047         error_propagate(errp, local_err);
1048         return NULL;
1049     }
1050 
1051     QTAILQ_FOREACH(mount, &mounts, next) {
1052         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1053 
1054         new = g_malloc0(sizeof(*ret));
1055         new->value = build_guest_fsinfo(mount, &local_err);
1056         new->next = ret;
1057         ret = new;
1058         if (local_err) {
1059             error_propagate(errp, local_err);
1060             qapi_free_GuestFilesystemInfoList(ret);
1061             ret = NULL;
1062             break;
1063         }
1064     }
1065 
1066     free_fs_mount_list(&mounts);
1067     return ret;
1068 }
1069 
1070 
1071 typedef enum {
1072     FSFREEZE_HOOK_THAW = 0,
1073     FSFREEZE_HOOK_FREEZE,
1074 } FsfreezeHookArg;
1075 
1076 static const char *fsfreeze_hook_arg_string[] = {
1077     "thaw",
1078     "freeze",
1079 };
1080 
1081 static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
1082 {
1083     int status;
1084     pid_t pid;
1085     const char *hook;
1086     const char *arg_str = fsfreeze_hook_arg_string[arg];
1087     Error *local_err = NULL;
1088 
1089     hook = ga_fsfreeze_hook(ga_state);
1090     if (!hook) {
1091         return;
1092     }
1093     if (access(hook, X_OK) != 0) {
1094         error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
1095         return;
1096     }
1097 
1098     slog("executing fsfreeze hook with arg '%s'", arg_str);
1099     pid = fork();
1100     if (pid == 0) {
1101         setsid();
1102         reopen_fd_to_null(0);
1103         reopen_fd_to_null(1);
1104         reopen_fd_to_null(2);
1105 
1106         execle(hook, hook, arg_str, NULL, environ);
1107         _exit(EXIT_FAILURE);
1108     } else if (pid < 0) {
1109         error_setg_errno(errp, errno, "failed to create child process");
1110         return;
1111     }
1112 
1113     ga_wait_child(pid, &status, &local_err);
1114     if (local_err) {
1115         error_propagate(errp, local_err);
1116         return;
1117     }
1118 
1119     if (!WIFEXITED(status)) {
1120         error_setg(errp, "fsfreeze hook has terminated abnormally");
1121         return;
1122     }
1123 
1124     status = WEXITSTATUS(status);
1125     if (status) {
1126         error_setg(errp, "fsfreeze hook has failed with status %d", status);
1127         return;
1128     }
1129 }
1130 
1131 /*
1132  * Return status of freeze/thaw
1133  */
1134 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
1135 {
1136     if (ga_is_frozen(ga_state)) {
1137         return GUEST_FSFREEZE_STATUS_FROZEN;
1138     }
1139 
1140     return GUEST_FSFREEZE_STATUS_THAWED;
1141 }
1142 
1143 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
1144 {
1145     return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
1146 }
1147 
1148 /*
1149  * Walk list of mounted file systems in the guest, and freeze the ones which
1150  * are real local file systems.
1151  */
1152 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
1153                                        strList *mountpoints,
1154                                        Error **errp)
1155 {
1156     int ret = 0, i = 0;
1157     strList *list;
1158     FsMountList mounts;
1159     struct FsMount *mount;
1160     Error *local_err = NULL;
1161     int fd;
1162 
1163     slog("guest-fsfreeze called");
1164 
1165     execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
1166     if (local_err) {
1167         error_propagate(errp, local_err);
1168         return -1;
1169     }
1170 
1171     QTAILQ_INIT(&mounts);
1172     build_fs_mount_list(&mounts, &local_err);
1173     if (local_err) {
1174         error_propagate(errp, local_err);
1175         return -1;
1176     }
1177 
1178     /* cannot risk guest agent blocking itself on a write in this state */
1179     ga_set_frozen(ga_state);
1180 
1181     QTAILQ_FOREACH_REVERSE(mount, &mounts, FsMountList, next) {
1182         /* To issue fsfreeze in the reverse order of mounts, check if the
1183          * mount is listed in the list here */
1184         if (has_mountpoints) {
1185             for (list = mountpoints; list; list = list->next) {
1186                 if (strcmp(list->value, mount->dirname) == 0) {
1187                     break;
1188                 }
1189             }
1190             if (!list) {
1191                 continue;
1192             }
1193         }
1194 
1195         fd = qemu_open(mount->dirname, O_RDONLY);
1196         if (fd == -1) {
1197             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
1198             goto error;
1199         }
1200 
1201         /* we try to cull filesytems we know won't work in advance, but other
1202          * filesytems may not implement fsfreeze for less obvious reasons.
1203          * these will report EOPNOTSUPP. we simply ignore these when tallying
1204          * the number of frozen filesystems.
1205          *
1206          * any other error means a failure to freeze a filesystem we
1207          * expect to be freezable, so return an error in those cases
1208          * and return system to thawed state.
1209          */
1210         ret = ioctl(fd, FIFREEZE);
1211         if (ret == -1) {
1212             if (errno != EOPNOTSUPP) {
1213                 error_setg_errno(errp, errno, "failed to freeze %s",
1214                                  mount->dirname);
1215                 close(fd);
1216                 goto error;
1217             }
1218         } else {
1219             i++;
1220         }
1221         close(fd);
1222     }
1223 
1224     free_fs_mount_list(&mounts);
1225     return i;
1226 
1227 error:
1228     free_fs_mount_list(&mounts);
1229     qmp_guest_fsfreeze_thaw(NULL);
1230     return 0;
1231 }
1232 
1233 /*
1234  * Walk list of frozen file systems in the guest, and thaw them.
1235  */
1236 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
1237 {
1238     int ret;
1239     FsMountList mounts;
1240     FsMount *mount;
1241     int fd, i = 0, logged;
1242     Error *local_err = NULL;
1243 
1244     QTAILQ_INIT(&mounts);
1245     build_fs_mount_list(&mounts, &local_err);
1246     if (local_err) {
1247         error_propagate(errp, local_err);
1248         return 0;
1249     }
1250 
1251     QTAILQ_FOREACH(mount, &mounts, next) {
1252         logged = false;
1253         fd = qemu_open(mount->dirname, O_RDONLY);
1254         if (fd == -1) {
1255             continue;
1256         }
1257         /* we have no way of knowing whether a filesystem was actually unfrozen
1258          * as a result of a successful call to FITHAW, only that if an error
1259          * was returned the filesystem was *not* unfrozen by that particular
1260          * call.
1261          *
1262          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
1263          * to unfreeze, continuing issuing FITHAW until an error is returned,
1264          * in which case either the filesystem is in an unfreezable state, or,
1265          * more likely, it was thawed previously (and remains so afterward).
1266          *
1267          * also, since the most recent successful call is the one that did
1268          * the actual unfreeze, we can use this to provide an accurate count
1269          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
1270          * may * be useful for determining whether a filesystem was unfrozen
1271          * during the freeze/thaw phase by a process other than qemu-ga.
1272          */
1273         do {
1274             ret = ioctl(fd, FITHAW);
1275             if (ret == 0 && !logged) {
1276                 i++;
1277                 logged = true;
1278             }
1279         } while (ret == 0);
1280         close(fd);
1281     }
1282 
1283     ga_unset_frozen(ga_state);
1284     free_fs_mount_list(&mounts);
1285 
1286     execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
1287 
1288     return i;
1289 }
1290 
1291 static void guest_fsfreeze_cleanup(void)
1292 {
1293     Error *err = NULL;
1294 
1295     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
1296         qmp_guest_fsfreeze_thaw(&err);
1297         if (err) {
1298             slog("failed to clean up frozen filesystems: %s",
1299                  error_get_pretty(err));
1300             error_free(err);
1301         }
1302     }
1303 }
1304 #endif /* CONFIG_FSFREEZE */
1305 
1306 #if defined(CONFIG_FSTRIM)
1307 /*
1308  * Walk list of mounted file systems in the guest, and trim them.
1309  */
1310 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1311 {
1312     int ret = 0;
1313     FsMountList mounts;
1314     struct FsMount *mount;
1315     int fd;
1316     Error *local_err = NULL;
1317     struct fstrim_range r = {
1318         .start = 0,
1319         .len = -1,
1320         .minlen = has_minimum ? minimum : 0,
1321     };
1322 
1323     slog("guest-fstrim called");
1324 
1325     QTAILQ_INIT(&mounts);
1326     build_fs_mount_list(&mounts, &local_err);
1327     if (local_err) {
1328         error_propagate(errp, local_err);
1329         return;
1330     }
1331 
1332     QTAILQ_FOREACH(mount, &mounts, next) {
1333         fd = qemu_open(mount->dirname, O_RDONLY);
1334         if (fd == -1) {
1335             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
1336             goto error;
1337         }
1338 
1339         /* We try to cull filesytems we know won't work in advance, but other
1340          * filesytems may not implement fstrim for less obvious reasons.  These
1341          * will report EOPNOTSUPP; we simply ignore these errors.  Any other
1342          * error means an unexpected error, so return it in those cases.  In
1343          * some other cases ENOTTY will be reported (e.g. CD-ROMs).
1344          */
1345         ret = ioctl(fd, FITRIM, &r);
1346         if (ret == -1) {
1347             if (errno != ENOTTY && errno != EOPNOTSUPP) {
1348                 error_setg_errno(errp, errno, "failed to trim %s",
1349                                  mount->dirname);
1350                 close(fd);
1351                 goto error;
1352             }
1353         }
1354         close(fd);
1355     }
1356 
1357 error:
1358     free_fs_mount_list(&mounts);
1359 }
1360 #endif /* CONFIG_FSTRIM */
1361 
1362 
1363 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1364 #define SUSPEND_SUPPORTED 0
1365 #define SUSPEND_NOT_SUPPORTED 1
1366 
1367 static void bios_supports_mode(const char *pmutils_bin, const char *pmutils_arg,
1368                                const char *sysfile_str, Error **errp)
1369 {
1370     Error *local_err = NULL;
1371     char *pmutils_path;
1372     pid_t pid;
1373     int status;
1374 
1375     pmutils_path = g_find_program_in_path(pmutils_bin);
1376 
1377     pid = fork();
1378     if (!pid) {
1379         char buf[32]; /* hopefully big enough */
1380         ssize_t ret;
1381         int fd;
1382 
1383         setsid();
1384         reopen_fd_to_null(0);
1385         reopen_fd_to_null(1);
1386         reopen_fd_to_null(2);
1387 
1388         if (pmutils_path) {
1389             execle(pmutils_path, pmutils_bin, pmutils_arg, NULL, environ);
1390         }
1391 
1392         /*
1393          * If we get here either pm-utils is not installed or execle() has
1394          * failed. Let's try the manual method if the caller wants it.
1395          */
1396 
1397         if (!sysfile_str) {
1398             _exit(SUSPEND_NOT_SUPPORTED);
1399         }
1400 
1401         fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1402         if (fd < 0) {
1403             _exit(SUSPEND_NOT_SUPPORTED);
1404         }
1405 
1406         ret = read(fd, buf, sizeof(buf)-1);
1407         if (ret <= 0) {
1408             _exit(SUSPEND_NOT_SUPPORTED);
1409         }
1410         buf[ret] = '\0';
1411 
1412         if (strstr(buf, sysfile_str)) {
1413             _exit(SUSPEND_SUPPORTED);
1414         }
1415 
1416         _exit(SUSPEND_NOT_SUPPORTED);
1417     } else if (pid < 0) {
1418         error_setg_errno(errp, errno, "failed to create child process");
1419         goto out;
1420     }
1421 
1422     ga_wait_child(pid, &status, &local_err);
1423     if (local_err) {
1424         error_propagate(errp, local_err);
1425         goto out;
1426     }
1427 
1428     if (!WIFEXITED(status)) {
1429         error_setg(errp, "child process has terminated abnormally");
1430         goto out;
1431     }
1432 
1433     switch (WEXITSTATUS(status)) {
1434     case SUSPEND_SUPPORTED:
1435         goto out;
1436     case SUSPEND_NOT_SUPPORTED:
1437         error_setg(errp,
1438                    "the requested suspend mode is not supported by the guest");
1439         goto out;
1440     default:
1441         error_setg(errp,
1442                    "the helper program '%s' returned an unexpected exit status"
1443                    " code (%d)", pmutils_path, WEXITSTATUS(status));
1444         goto out;
1445     }
1446 
1447 out:
1448     g_free(pmutils_path);
1449 }
1450 
1451 static void guest_suspend(const char *pmutils_bin, const char *sysfile_str,
1452                           Error **errp)
1453 {
1454     Error *local_err = NULL;
1455     char *pmutils_path;
1456     pid_t pid;
1457     int status;
1458 
1459     pmutils_path = g_find_program_in_path(pmutils_bin);
1460 
1461     pid = fork();
1462     if (pid == 0) {
1463         /* child */
1464         int fd;
1465 
1466         setsid();
1467         reopen_fd_to_null(0);
1468         reopen_fd_to_null(1);
1469         reopen_fd_to_null(2);
1470 
1471         if (pmutils_path) {
1472             execle(pmutils_path, pmutils_bin, NULL, environ);
1473         }
1474 
1475         /*
1476          * If we get here either pm-utils is not installed or execle() has
1477          * failed. Let's try the manual method if the caller wants it.
1478          */
1479 
1480         if (!sysfile_str) {
1481             _exit(EXIT_FAILURE);
1482         }
1483 
1484         fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1485         if (fd < 0) {
1486             _exit(EXIT_FAILURE);
1487         }
1488 
1489         if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1490             _exit(EXIT_FAILURE);
1491         }
1492 
1493         _exit(EXIT_SUCCESS);
1494     } else if (pid < 0) {
1495         error_setg_errno(errp, errno, "failed to create child process");
1496         goto out;
1497     }
1498 
1499     ga_wait_child(pid, &status, &local_err);
1500     if (local_err) {
1501         error_propagate(errp, local_err);
1502         goto out;
1503     }
1504 
1505     if (!WIFEXITED(status)) {
1506         error_setg(errp, "child process has terminated abnormally");
1507         goto out;
1508     }
1509 
1510     if (WEXITSTATUS(status)) {
1511         error_setg(errp, "child process has failed to suspend");
1512         goto out;
1513     }
1514 
1515 out:
1516     g_free(pmutils_path);
1517 }
1518 
1519 void qmp_guest_suspend_disk(Error **errp)
1520 {
1521     Error *local_err = NULL;
1522 
1523     bios_supports_mode("pm-is-supported", "--hibernate", "disk", &local_err);
1524     if (local_err) {
1525         error_propagate(errp, local_err);
1526         return;
1527     }
1528 
1529     guest_suspend("pm-hibernate", "disk", errp);
1530 }
1531 
1532 void qmp_guest_suspend_ram(Error **errp)
1533 {
1534     Error *local_err = NULL;
1535 
1536     bios_supports_mode("pm-is-supported", "--suspend", "mem", &local_err);
1537     if (local_err) {
1538         error_propagate(errp, local_err);
1539         return;
1540     }
1541 
1542     guest_suspend("pm-suspend", "mem", errp);
1543 }
1544 
1545 void qmp_guest_suspend_hybrid(Error **errp)
1546 {
1547     Error *local_err = NULL;
1548 
1549     bios_supports_mode("pm-is-supported", "--suspend-hybrid", NULL,
1550                        &local_err);
1551     if (local_err) {
1552         error_propagate(errp, local_err);
1553         return;
1554     }
1555 
1556     guest_suspend("pm-suspend-hybrid", NULL, errp);
1557 }
1558 
1559 static GuestNetworkInterfaceList *
1560 guest_find_interface(GuestNetworkInterfaceList *head,
1561                      const char *name)
1562 {
1563     for (; head; head = head->next) {
1564         if (strcmp(head->value->name, name) == 0) {
1565             break;
1566         }
1567     }
1568 
1569     return head;
1570 }
1571 
1572 /*
1573  * Build information about guest interfaces
1574  */
1575 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
1576 {
1577     GuestNetworkInterfaceList *head = NULL, *cur_item = NULL;
1578     struct ifaddrs *ifap, *ifa;
1579 
1580     if (getifaddrs(&ifap) < 0) {
1581         error_setg_errno(errp, errno, "getifaddrs failed");
1582         goto error;
1583     }
1584 
1585     for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
1586         GuestNetworkInterfaceList *info;
1587         GuestIpAddressList **address_list = NULL, *address_item = NULL;
1588         char addr4[INET_ADDRSTRLEN];
1589         char addr6[INET6_ADDRSTRLEN];
1590         int sock;
1591         struct ifreq ifr;
1592         unsigned char *mac_addr;
1593         void *p;
1594 
1595         g_debug("Processing %s interface", ifa->ifa_name);
1596 
1597         info = guest_find_interface(head, ifa->ifa_name);
1598 
1599         if (!info) {
1600             info = g_malloc0(sizeof(*info));
1601             info->value = g_malloc0(sizeof(*info->value));
1602             info->value->name = g_strdup(ifa->ifa_name);
1603 
1604             if (!cur_item) {
1605                 head = cur_item = info;
1606             } else {
1607                 cur_item->next = info;
1608                 cur_item = info;
1609             }
1610         }
1611 
1612         if (!info->value->has_hardware_address &&
1613             ifa->ifa_flags & SIOCGIFHWADDR) {
1614             /* we haven't obtained HW address yet */
1615             sock = socket(PF_INET, SOCK_STREAM, 0);
1616             if (sock == -1) {
1617                 error_setg_errno(errp, errno, "failed to create socket");
1618                 goto error;
1619             }
1620 
1621             memset(&ifr, 0, sizeof(ifr));
1622             pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->value->name);
1623             if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
1624                 error_setg_errno(errp, errno,
1625                                  "failed to get MAC address of %s",
1626                                  ifa->ifa_name);
1627                 close(sock);
1628                 goto error;
1629             }
1630 
1631             close(sock);
1632             mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
1633 
1634             info->value->hardware_address =
1635                 g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
1636                                 (int) mac_addr[0], (int) mac_addr[1],
1637                                 (int) mac_addr[2], (int) mac_addr[3],
1638                                 (int) mac_addr[4], (int) mac_addr[5]);
1639 
1640             info->value->has_hardware_address = true;
1641         }
1642 
1643         if (ifa->ifa_addr &&
1644             ifa->ifa_addr->sa_family == AF_INET) {
1645             /* interface with IPv4 address */
1646             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1647             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
1648                 error_setg_errno(errp, errno, "inet_ntop failed");
1649                 goto error;
1650             }
1651 
1652             address_item = g_malloc0(sizeof(*address_item));
1653             address_item->value = g_malloc0(sizeof(*address_item->value));
1654             address_item->value->ip_address = g_strdup(addr4);
1655             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
1656 
1657             if (ifa->ifa_netmask) {
1658                 /* Count the number of set bits in netmask.
1659                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
1660                 p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
1661                 address_item->value->prefix = ctpop32(((uint32_t *) p)[0]);
1662             }
1663         } else if (ifa->ifa_addr &&
1664                    ifa->ifa_addr->sa_family == AF_INET6) {
1665             /* interface with IPv6 address */
1666             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
1667             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
1668                 error_setg_errno(errp, errno, "inet_ntop failed");
1669                 goto error;
1670             }
1671 
1672             address_item = g_malloc0(sizeof(*address_item));
1673             address_item->value = g_malloc0(sizeof(*address_item->value));
1674             address_item->value->ip_address = g_strdup(addr6);
1675             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
1676 
1677             if (ifa->ifa_netmask) {
1678                 /* Count the number of set bits in netmask.
1679                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
1680                 p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
1681                 address_item->value->prefix =
1682                     ctpop32(((uint32_t *) p)[0]) +
1683                     ctpop32(((uint32_t *) p)[1]) +
1684                     ctpop32(((uint32_t *) p)[2]) +
1685                     ctpop32(((uint32_t *) p)[3]);
1686             }
1687         }
1688 
1689         if (!address_item) {
1690             continue;
1691         }
1692 
1693         address_list = &info->value->ip_addresses;
1694 
1695         while (*address_list && (*address_list)->next) {
1696             address_list = &(*address_list)->next;
1697         }
1698 
1699         if (!*address_list) {
1700             *address_list = address_item;
1701         } else {
1702             (*address_list)->next = address_item;
1703         }
1704 
1705         info->value->has_ip_addresses = true;
1706 
1707 
1708     }
1709 
1710     freeifaddrs(ifap);
1711     return head;
1712 
1713 error:
1714     freeifaddrs(ifap);
1715     qapi_free_GuestNetworkInterfaceList(head);
1716     return NULL;
1717 }
1718 
1719 #define SYSCONF_EXACT(name, errp) sysconf_exact((name), #name, (errp))
1720 
1721 static long sysconf_exact(int name, const char *name_str, Error **errp)
1722 {
1723     long ret;
1724 
1725     errno = 0;
1726     ret = sysconf(name);
1727     if (ret == -1) {
1728         if (errno == 0) {
1729             error_setg(errp, "sysconf(%s): value indefinite", name_str);
1730         } else {
1731             error_setg_errno(errp, errno, "sysconf(%s)", name_str);
1732         }
1733     }
1734     return ret;
1735 }
1736 
1737 /* Transfer online/offline status between @vcpu and the guest system.
1738  *
1739  * On input either @errp or *@errp must be NULL.
1740  *
1741  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1742  * - R: vcpu->logical_id
1743  * - W: vcpu->online
1744  * - W: vcpu->can_offline
1745  *
1746  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1747  * - R: vcpu->logical_id
1748  * - R: vcpu->online
1749  *
1750  * Written members remain unmodified on error.
1751  */
1752 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1753                           Error **errp)
1754 {
1755     char *dirpath;
1756     int dirfd;
1757 
1758     dirpath = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1759                               vcpu->logical_id);
1760     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1761     if (dirfd == -1) {
1762         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1763     } else {
1764         static const char fn[] = "online";
1765         int fd;
1766         int res;
1767 
1768         fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1769         if (fd == -1) {
1770             if (errno != ENOENT) {
1771                 error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1772             } else if (sys2vcpu) {
1773                 vcpu->online = true;
1774                 vcpu->can_offline = false;
1775             } else if (!vcpu->online) {
1776                 error_setg(errp, "logical processor #%" PRId64 " can't be "
1777                            "offlined", vcpu->logical_id);
1778             } /* otherwise pretend successful re-onlining */
1779         } else {
1780             unsigned char status;
1781 
1782             res = pread(fd, &status, 1, 0);
1783             if (res == -1) {
1784                 error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1785             } else if (res == 0) {
1786                 error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1787                            fn);
1788             } else if (sys2vcpu) {
1789                 vcpu->online = (status != '0');
1790                 vcpu->can_offline = true;
1791             } else if (vcpu->online != (status != '0')) {
1792                 status = '0' + vcpu->online;
1793                 if (pwrite(fd, &status, 1, 0) == -1) {
1794                     error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1795                                      fn);
1796                 }
1797             } /* otherwise pretend successful re-(on|off)-lining */
1798 
1799             res = close(fd);
1800             g_assert(res == 0);
1801         }
1802 
1803         res = close(dirfd);
1804         g_assert(res == 0);
1805     }
1806 
1807     g_free(dirpath);
1808 }
1809 
1810 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1811 {
1812     int64_t current;
1813     GuestLogicalProcessorList *head, **link;
1814     long sc_max;
1815     Error *local_err = NULL;
1816 
1817     current = 0;
1818     head = NULL;
1819     link = &head;
1820     sc_max = SYSCONF_EXACT(_SC_NPROCESSORS_CONF, &local_err);
1821 
1822     while (local_err == NULL && current < sc_max) {
1823         GuestLogicalProcessor *vcpu;
1824         GuestLogicalProcessorList *entry;
1825 
1826         vcpu = g_malloc0(sizeof *vcpu);
1827         vcpu->logical_id = current++;
1828         vcpu->has_can_offline = true; /* lolspeak ftw */
1829         transfer_vcpu(vcpu, true, &local_err);
1830 
1831         entry = g_malloc0(sizeof *entry);
1832         entry->value = vcpu;
1833 
1834         *link = entry;
1835         link = &entry->next;
1836     }
1837 
1838     if (local_err == NULL) {
1839         /* there's no guest with zero VCPUs */
1840         g_assert(head != NULL);
1841         return head;
1842     }
1843 
1844     qapi_free_GuestLogicalProcessorList(head);
1845     error_propagate(errp, local_err);
1846     return NULL;
1847 }
1848 
1849 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1850 {
1851     int64_t processed;
1852     Error *local_err = NULL;
1853 
1854     processed = 0;
1855     while (vcpus != NULL) {
1856         transfer_vcpu(vcpus->value, false, &local_err);
1857         if (local_err != NULL) {
1858             break;
1859         }
1860         ++processed;
1861         vcpus = vcpus->next;
1862     }
1863 
1864     if (local_err != NULL) {
1865         if (processed == 0) {
1866             error_propagate(errp, local_err);
1867         } else {
1868             error_free(local_err);
1869         }
1870     }
1871 
1872     return processed;
1873 }
1874 
1875 #else /* defined(__linux__) */
1876 
1877 void qmp_guest_suspend_disk(Error **errp)
1878 {
1879     error_set(errp, QERR_UNSUPPORTED);
1880 }
1881 
1882 void qmp_guest_suspend_ram(Error **errp)
1883 {
1884     error_set(errp, QERR_UNSUPPORTED);
1885 }
1886 
1887 void qmp_guest_suspend_hybrid(Error **errp)
1888 {
1889     error_set(errp, QERR_UNSUPPORTED);
1890 }
1891 
1892 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
1893 {
1894     error_set(errp, QERR_UNSUPPORTED);
1895     return NULL;
1896 }
1897 
1898 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1899 {
1900     error_set(errp, QERR_UNSUPPORTED);
1901     return NULL;
1902 }
1903 
1904 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1905 {
1906     error_set(errp, QERR_UNSUPPORTED);
1907     return -1;
1908 }
1909 
1910 #endif
1911 
1912 #if !defined(CONFIG_FSFREEZE)
1913 
1914 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1915 {
1916     error_set(errp, QERR_UNSUPPORTED);
1917     return NULL;
1918 }
1919 
1920 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
1921 {
1922     error_set(errp, QERR_UNSUPPORTED);
1923 
1924     return 0;
1925 }
1926 
1927 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
1928 {
1929     error_set(errp, QERR_UNSUPPORTED);
1930 
1931     return 0;
1932 }
1933 
1934 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
1935                                        strList *mountpoints,
1936                                        Error **errp)
1937 {
1938     error_set(errp, QERR_UNSUPPORTED);
1939 
1940     return 0;
1941 }
1942 
1943 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
1944 {
1945     error_set(errp, QERR_UNSUPPORTED);
1946 
1947     return 0;
1948 }
1949 #endif /* CONFIG_FSFREEZE */
1950 
1951 #if !defined(CONFIG_FSTRIM)
1952 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1953 {
1954     error_set(errp, QERR_UNSUPPORTED);
1955 }
1956 #endif
1957 
1958 /* add unsupported commands to the blacklist */
1959 GList *ga_command_blacklist_init(GList *blacklist)
1960 {
1961 #if !defined(__linux__)
1962     {
1963         const char *list[] = {
1964             "guest-suspend-disk", "guest-suspend-ram",
1965             "guest-suspend-hybrid", "guest-network-get-interfaces",
1966             "guest-get-vcpus", "guest-set-vcpus", NULL};
1967         char **p = (char **)list;
1968 
1969         while (*p) {
1970             blacklist = g_list_append(blacklist, *p++);
1971         }
1972     }
1973 #endif
1974 
1975 #if !defined(CONFIG_FSFREEZE)
1976     {
1977         const char *list[] = {
1978             "guest-get-fsinfo", "guest-fsfreeze-status",
1979             "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list",
1980             "guest-fsfreeze-thaw", "guest-get-fsinfo", NULL};
1981         char **p = (char **)list;
1982 
1983         while (*p) {
1984             blacklist = g_list_append(blacklist, *p++);
1985         }
1986     }
1987 #endif
1988 
1989 #if !defined(CONFIG_FSTRIM)
1990     blacklist = g_list_append(blacklist, (char *)"guest-fstrim");
1991 #endif
1992 
1993     return blacklist;
1994 }
1995 
1996 /* register init/cleanup routines for stateful command groups */
1997 void ga_command_state_init(GAState *s, GACommandState *cs)
1998 {
1999 #if defined(CONFIG_FSFREEZE)
2000     ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
2001 #endif
2002     ga_command_state_add(cs, guest_file_init, NULL);
2003 }
2004