xref: /openbmc/qemu/qga/commands-posix.c (revision 0a2b5e3a7899b40d05d7c6c1c41eb4e64dd2ed4b)
1 /*
2  * QEMU Guest Agent POSIX-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include <glib.h>
15 #include <sys/types.h>
16 #include <sys/ioctl.h>
17 #include <sys/wait.h>
18 #include <unistd.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <sys/stat.h>
24 #include <inttypes.h>
25 #include "qga/guest-agent-core.h"
26 #include "qga-qmp-commands.h"
27 #include "qapi/qmp/qerror.h"
28 #include "qemu/queue.h"
29 #include "qemu/host-utils.h"
30 
31 #ifndef CONFIG_HAS_ENVIRON
32 #ifdef __APPLE__
33 #include <crt_externs.h>
34 #define environ (*_NSGetEnviron())
35 #else
36 extern char **environ;
37 #endif
38 #endif
39 
40 #if defined(__linux__)
41 #include <mntent.h>
42 #include <linux/fs.h>
43 #include <ifaddrs.h>
44 #include <arpa/inet.h>
45 #include <sys/socket.h>
46 #include <net/if.h>
47 
48 #ifdef FIFREEZE
49 #define CONFIG_FSFREEZE
50 #endif
51 #ifdef FITRIM
52 #define CONFIG_FSTRIM
53 #endif
54 #endif
55 
56 static void ga_wait_child(pid_t pid, int *status, Error **err)
57 {
58     pid_t rpid;
59 
60     *status = 0;
61 
62     do {
63         rpid = waitpid(pid, status, 0);
64     } while (rpid == -1 && errno == EINTR);
65 
66     if (rpid == -1) {
67         error_setg_errno(err, errno, "failed to wait for child (pid: %d)", pid);
68         return;
69     }
70 
71     g_assert(rpid == pid);
72 }
73 
74 void qmp_guest_shutdown(bool has_mode, const char *mode, Error **err)
75 {
76     const char *shutdown_flag;
77     Error *local_err = NULL;
78     pid_t pid;
79     int status;
80 
81     slog("guest-shutdown called, mode: %s", mode);
82     if (!has_mode || strcmp(mode, "powerdown") == 0) {
83         shutdown_flag = "-P";
84     } else if (strcmp(mode, "halt") == 0) {
85         shutdown_flag = "-H";
86     } else if (strcmp(mode, "reboot") == 0) {
87         shutdown_flag = "-r";
88     } else {
89         error_setg(err,
90                    "mode is invalid (valid values are: halt|powerdown|reboot");
91         return;
92     }
93 
94     pid = fork();
95     if (pid == 0) {
96         /* child, start the shutdown */
97         setsid();
98         reopen_fd_to_null(0);
99         reopen_fd_to_null(1);
100         reopen_fd_to_null(2);
101 
102         execle("/sbin/shutdown", "shutdown", shutdown_flag, "+0",
103                "hypervisor initiated shutdown", (char*)NULL, environ);
104         _exit(EXIT_FAILURE);
105     } else if (pid < 0) {
106         error_setg_errno(err, errno, "failed to create child process");
107         return;
108     }
109 
110     ga_wait_child(pid, &status, &local_err);
111     if (error_is_set(&local_err)) {
112         error_propagate(err, local_err);
113         return;
114     }
115 
116     if (!WIFEXITED(status)) {
117         error_setg(err, "child process has terminated abnormally");
118         return;
119     }
120 
121     if (WEXITSTATUS(status)) {
122         error_setg(err, "child process has failed to shutdown");
123         return;
124     }
125 
126     /* succeeded */
127 }
128 
129 int64_t qmp_guest_get_time(Error **errp)
130 {
131    int ret;
132    qemu_timeval tq;
133    int64_t time_ns;
134 
135    ret = qemu_gettimeofday(&tq);
136    if (ret < 0) {
137        error_setg_errno(errp, errno, "Failed to get time");
138        return -1;
139    }
140 
141    time_ns = tq.tv_sec * 1000000000LL + tq.tv_usec * 1000;
142    return time_ns;
143 }
144 
145 void qmp_guest_set_time(int64_t time_ns, Error **errp)
146 {
147     int ret;
148     int status;
149     pid_t pid;
150     Error *local_err = NULL;
151     struct timeval tv;
152 
153     /* year-2038 will overflow in case time_t is 32bit */
154     if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
155         error_setg(errp, "Time %" PRId64 " is too large", time_ns);
156         return;
157     }
158 
159     tv.tv_sec = time_ns / 1000000000;
160     tv.tv_usec = (time_ns % 1000000000) / 1000;
161 
162     ret = settimeofday(&tv, NULL);
163     if (ret < 0) {
164         error_setg_errno(errp, errno, "Failed to set time to guest");
165         return;
166     }
167 
168     /* Set the Hardware Clock to the current System Time. */
169     pid = fork();
170     if (pid == 0) {
171         setsid();
172         reopen_fd_to_null(0);
173         reopen_fd_to_null(1);
174         reopen_fd_to_null(2);
175 
176         execle("/sbin/hwclock", "hwclock", "-w", NULL, environ);
177         _exit(EXIT_FAILURE);
178     } else if (pid < 0) {
179         error_setg_errno(errp, errno, "failed to create child process");
180         return;
181     }
182 
183     ga_wait_child(pid, &status, &local_err);
184     if (error_is_set(&local_err)) {
185         error_propagate(errp, local_err);
186         return;
187     }
188 
189     if (!WIFEXITED(status)) {
190         error_setg(errp, "child process has terminated abnormally");
191         return;
192     }
193 
194     if (WEXITSTATUS(status)) {
195         error_setg(errp, "hwclock failed to set hardware clock to system time");
196         return;
197     }
198 }
199 
200 typedef struct GuestFileHandle {
201     uint64_t id;
202     FILE *fh;
203     QTAILQ_ENTRY(GuestFileHandle) next;
204 } GuestFileHandle;
205 
206 static struct {
207     QTAILQ_HEAD(, GuestFileHandle) filehandles;
208 } guest_file_state;
209 
210 static int64_t guest_file_handle_add(FILE *fh, Error **errp)
211 {
212     GuestFileHandle *gfh;
213     int64_t handle;
214 
215     handle = ga_get_fd_handle(ga_state, errp);
216     if (error_is_set(errp)) {
217         return 0;
218     }
219 
220     gfh = g_malloc0(sizeof(GuestFileHandle));
221     gfh->id = handle;
222     gfh->fh = fh;
223     QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
224 
225     return handle;
226 }
227 
228 static GuestFileHandle *guest_file_handle_find(int64_t id, Error **err)
229 {
230     GuestFileHandle *gfh;
231 
232     QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
233     {
234         if (gfh->id == id) {
235             return gfh;
236         }
237     }
238 
239     error_setg(err, "handle '%" PRId64 "' has not been found", id);
240     return NULL;
241 }
242 
243 typedef const char * const ccpc;
244 
245 /* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
246 static const struct {
247     ccpc *forms;
248     int oflag_base;
249 } guest_file_open_modes[] = {
250     { (ccpc[]){ "r",  "rb",         NULL }, O_RDONLY                      },
251     { (ccpc[]){ "w",  "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  },
252     { (ccpc[]){ "a",  "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND },
253     { (ccpc[]){ "r+", "rb+", "r+b", NULL }, O_RDWR                        },
254     { (ccpc[]){ "w+", "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  },
255     { (ccpc[]){ "a+", "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND }
256 };
257 
258 static int
259 find_open_flag(const char *mode_str, Error **err)
260 {
261     unsigned mode;
262 
263     for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
264         ccpc *form;
265 
266         form = guest_file_open_modes[mode].forms;
267         while (*form != NULL && strcmp(*form, mode_str) != 0) {
268             ++form;
269         }
270         if (*form != NULL) {
271             break;
272         }
273     }
274 
275     if (mode == ARRAY_SIZE(guest_file_open_modes)) {
276         error_setg(err, "invalid file open mode '%s'", mode_str);
277         return -1;
278     }
279     return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
280 }
281 
282 #define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
283                                S_IRGRP | S_IWGRP | \
284                                S_IROTH | S_IWOTH)
285 
286 static FILE *
287 safe_open_or_create(const char *path, const char *mode, Error **err)
288 {
289     Error *local_err = NULL;
290     int oflag;
291 
292     oflag = find_open_flag(mode, &local_err);
293     if (local_err == NULL) {
294         int fd;
295 
296         /* If the caller wants / allows creation of a new file, we implement it
297          * with a two step process: open() + (open() / fchmod()).
298          *
299          * First we insist on creating the file exclusively as a new file. If
300          * that succeeds, we're free to set any file-mode bits on it. (The
301          * motivation is that we want to set those file-mode bits independently
302          * of the current umask.)
303          *
304          * If the exclusive creation fails because the file already exists
305          * (EEXIST is not possible for any other reason), we just attempt to
306          * open the file, but in this case we won't be allowed to change the
307          * file-mode bits on the preexistent file.
308          *
309          * The pathname should never disappear between the two open()s in
310          * practice. If it happens, then someone very likely tried to race us.
311          * In this case just go ahead and report the ENOENT from the second
312          * open() to the caller.
313          *
314          * If the caller wants to open a preexistent file, then the first
315          * open() is decisive and its third argument is ignored, and the second
316          * open() and the fchmod() are never called.
317          */
318         fd = open(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
319         if (fd == -1 && errno == EEXIST) {
320             oflag &= ~(unsigned)O_CREAT;
321             fd = open(path, oflag);
322         }
323 
324         if (fd == -1) {
325             error_setg_errno(&local_err, errno, "failed to open file '%s' "
326                              "(mode: '%s')", path, mode);
327         } else {
328             qemu_set_cloexec(fd);
329 
330             if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
331                 error_setg_errno(&local_err, errno, "failed to set permission "
332                                  "0%03o on new file '%s' (mode: '%s')",
333                                  (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
334             } else {
335                 FILE *f;
336 
337                 f = fdopen(fd, mode);
338                 if (f == NULL) {
339                     error_setg_errno(&local_err, errno, "failed to associate "
340                                      "stdio stream with file descriptor %d, "
341                                      "file '%s' (mode: '%s')", fd, path, mode);
342                 } else {
343                     return f;
344                 }
345             }
346 
347             close(fd);
348         }
349     }
350 
351     error_propagate(err, local_err);
352     return NULL;
353 }
354 
355 int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, Error **err)
356 {
357     FILE *fh;
358     Error *local_err = NULL;
359     int fd;
360     int64_t ret = -1, handle;
361 
362     if (!has_mode) {
363         mode = "r";
364     }
365     slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
366     fh = safe_open_or_create(path, mode, &local_err);
367     if (local_err != NULL) {
368         error_propagate(err, local_err);
369         return -1;
370     }
371 
372     /* set fd non-blocking to avoid common use cases (like reading from a
373      * named pipe) from hanging the agent
374      */
375     fd = fileno(fh);
376     ret = fcntl(fd, F_GETFL);
377     ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK);
378     if (ret == -1) {
379         error_setg_errno(err, errno, "failed to make file '%s' non-blocking",
380                          path);
381         fclose(fh);
382         return -1;
383     }
384 
385     handle = guest_file_handle_add(fh, err);
386     if (error_is_set(err)) {
387         fclose(fh);
388         return -1;
389     }
390 
391     slog("guest-file-open, handle: %d", handle);
392     return handle;
393 }
394 
395 void qmp_guest_file_close(int64_t handle, Error **err)
396 {
397     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
398     int ret;
399 
400     slog("guest-file-close called, handle: %ld", handle);
401     if (!gfh) {
402         return;
403     }
404 
405     ret = fclose(gfh->fh);
406     if (ret == EOF) {
407         error_setg_errno(err, errno, "failed to close handle");
408         return;
409     }
410 
411     QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
412     g_free(gfh);
413 }
414 
415 struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
416                                           int64_t count, Error **err)
417 {
418     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
419     GuestFileRead *read_data = NULL;
420     guchar *buf;
421     FILE *fh;
422     size_t read_count;
423 
424     if (!gfh) {
425         return NULL;
426     }
427 
428     if (!has_count) {
429         count = QGA_READ_COUNT_DEFAULT;
430     } else if (count < 0) {
431         error_setg(err, "value '%" PRId64 "' is invalid for argument count",
432                    count);
433         return NULL;
434     }
435 
436     fh = gfh->fh;
437     buf = g_malloc0(count+1);
438     read_count = fread(buf, 1, count, fh);
439     if (ferror(fh)) {
440         error_setg_errno(err, errno, "failed to read file");
441         slog("guest-file-read failed, handle: %ld", handle);
442     } else {
443         buf[read_count] = 0;
444         read_data = g_malloc0(sizeof(GuestFileRead));
445         read_data->count = read_count;
446         read_data->eof = feof(fh);
447         if (read_count) {
448             read_data->buf_b64 = g_base64_encode(buf, read_count);
449         }
450     }
451     g_free(buf);
452     clearerr(fh);
453 
454     return read_data;
455 }
456 
457 GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
458                                      bool has_count, int64_t count, Error **err)
459 {
460     GuestFileWrite *write_data = NULL;
461     guchar *buf;
462     gsize buf_len;
463     int write_count;
464     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
465     FILE *fh;
466 
467     if (!gfh) {
468         return NULL;
469     }
470 
471     fh = gfh->fh;
472     buf = g_base64_decode(buf_b64, &buf_len);
473 
474     if (!has_count) {
475         count = buf_len;
476     } else if (count < 0 || count > buf_len) {
477         error_setg(err, "value '%" PRId64 "' is invalid for argument count",
478                    count);
479         g_free(buf);
480         return NULL;
481     }
482 
483     write_count = fwrite(buf, 1, count, fh);
484     if (ferror(fh)) {
485         error_setg_errno(err, errno, "failed to write to file");
486         slog("guest-file-write failed, handle: %ld", handle);
487     } else {
488         write_data = g_malloc0(sizeof(GuestFileWrite));
489         write_data->count = write_count;
490         write_data->eof = feof(fh);
491     }
492     g_free(buf);
493     clearerr(fh);
494 
495     return write_data;
496 }
497 
498 struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
499                                           int64_t whence, Error **err)
500 {
501     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
502     GuestFileSeek *seek_data = NULL;
503     FILE *fh;
504     int ret;
505 
506     if (!gfh) {
507         return NULL;
508     }
509 
510     fh = gfh->fh;
511     ret = fseek(fh, offset, whence);
512     if (ret == -1) {
513         error_setg_errno(err, errno, "failed to seek file");
514     } else {
515         seek_data = g_malloc0(sizeof(GuestFileRead));
516         seek_data->position = ftell(fh);
517         seek_data->eof = feof(fh);
518     }
519     clearerr(fh);
520 
521     return seek_data;
522 }
523 
524 void qmp_guest_file_flush(int64_t handle, Error **err)
525 {
526     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
527     FILE *fh;
528     int ret;
529 
530     if (!gfh) {
531         return;
532     }
533 
534     fh = gfh->fh;
535     ret = fflush(fh);
536     if (ret == EOF) {
537         error_setg_errno(err, errno, "failed to flush file");
538     }
539 }
540 
541 static void guest_file_init(void)
542 {
543     QTAILQ_INIT(&guest_file_state.filehandles);
544 }
545 
546 /* linux-specific implementations. avoid this if at all possible. */
547 #if defined(__linux__)
548 
549 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
550 typedef struct FsMount {
551     char *dirname;
552     char *devtype;
553     QTAILQ_ENTRY(FsMount) next;
554 } FsMount;
555 
556 typedef QTAILQ_HEAD(, FsMount) FsMountList;
557 
558 static void free_fs_mount_list(FsMountList *mounts)
559 {
560      FsMount *mount, *temp;
561 
562      if (!mounts) {
563          return;
564      }
565 
566      QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
567          QTAILQ_REMOVE(mounts, mount, next);
568          g_free(mount->dirname);
569          g_free(mount->devtype);
570          g_free(mount);
571      }
572 }
573 
574 /*
575  * Walk the mount table and build a list of local file systems
576  */
577 static void build_fs_mount_list(FsMountList *mounts, Error **err)
578 {
579     struct mntent *ment;
580     FsMount *mount;
581     char const *mtab = "/proc/self/mounts";
582     FILE *fp;
583 
584     fp = setmntent(mtab, "r");
585     if (!fp) {
586         error_setg(err, "failed to open mtab file: '%s'", mtab);
587         return;
588     }
589 
590     while ((ment = getmntent(fp))) {
591         /*
592          * An entry which device name doesn't start with a '/' is
593          * either a dummy file system or a network file system.
594          * Add special handling for smbfs and cifs as is done by
595          * coreutils as well.
596          */
597         if ((ment->mnt_fsname[0] != '/') ||
598             (strcmp(ment->mnt_type, "smbfs") == 0) ||
599             (strcmp(ment->mnt_type, "cifs") == 0)) {
600             continue;
601         }
602 
603         mount = g_malloc0(sizeof(FsMount));
604         mount->dirname = g_strdup(ment->mnt_dir);
605         mount->devtype = g_strdup(ment->mnt_type);
606 
607         QTAILQ_INSERT_TAIL(mounts, mount, next);
608     }
609 
610     endmntent(fp);
611 }
612 #endif
613 
614 #if defined(CONFIG_FSFREEZE)
615 
616 typedef enum {
617     FSFREEZE_HOOK_THAW = 0,
618     FSFREEZE_HOOK_FREEZE,
619 } FsfreezeHookArg;
620 
621 const char *fsfreeze_hook_arg_string[] = {
622     "thaw",
623     "freeze",
624 };
625 
626 static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **err)
627 {
628     int status;
629     pid_t pid;
630     const char *hook;
631     const char *arg_str = fsfreeze_hook_arg_string[arg];
632     Error *local_err = NULL;
633 
634     hook = ga_fsfreeze_hook(ga_state);
635     if (!hook) {
636         return;
637     }
638     if (access(hook, X_OK) != 0) {
639         error_setg_errno(err, errno, "can't access fsfreeze hook '%s'", hook);
640         return;
641     }
642 
643     slog("executing fsfreeze hook with arg '%s'", arg_str);
644     pid = fork();
645     if (pid == 0) {
646         setsid();
647         reopen_fd_to_null(0);
648         reopen_fd_to_null(1);
649         reopen_fd_to_null(2);
650 
651         execle(hook, hook, arg_str, NULL, environ);
652         _exit(EXIT_FAILURE);
653     } else if (pid < 0) {
654         error_setg_errno(err, errno, "failed to create child process");
655         return;
656     }
657 
658     ga_wait_child(pid, &status, &local_err);
659     if (error_is_set(&local_err)) {
660         error_propagate(err, local_err);
661         return;
662     }
663 
664     if (!WIFEXITED(status)) {
665         error_setg(err, "fsfreeze hook has terminated abnormally");
666         return;
667     }
668 
669     status = WEXITSTATUS(status);
670     if (status) {
671         error_setg(err, "fsfreeze hook has failed with status %d", status);
672         return;
673     }
674 }
675 
676 /*
677  * Return status of freeze/thaw
678  */
679 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **err)
680 {
681     if (ga_is_frozen(ga_state)) {
682         return GUEST_FSFREEZE_STATUS_FROZEN;
683     }
684 
685     return GUEST_FSFREEZE_STATUS_THAWED;
686 }
687 
688 /*
689  * Walk list of mounted file systems in the guest, and freeze the ones which
690  * are real local file systems.
691  */
692 int64_t qmp_guest_fsfreeze_freeze(Error **err)
693 {
694     int ret = 0, i = 0;
695     FsMountList mounts;
696     struct FsMount *mount;
697     Error *local_err = NULL;
698     int fd;
699 
700     slog("guest-fsfreeze called");
701 
702     execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
703     if (error_is_set(&local_err)) {
704         error_propagate(err, local_err);
705         return -1;
706     }
707 
708     QTAILQ_INIT(&mounts);
709     build_fs_mount_list(&mounts, &local_err);
710     if (error_is_set(&local_err)) {
711         error_propagate(err, local_err);
712         return -1;
713     }
714 
715     /* cannot risk guest agent blocking itself on a write in this state */
716     ga_set_frozen(ga_state);
717 
718     QTAILQ_FOREACH(mount, &mounts, next) {
719         fd = qemu_open(mount->dirname, O_RDONLY);
720         if (fd == -1) {
721             error_setg_errno(err, errno, "failed to open %s", mount->dirname);
722             goto error;
723         }
724 
725         /* we try to cull filesytems we know won't work in advance, but other
726          * filesytems may not implement fsfreeze for less obvious reasons.
727          * these will report EOPNOTSUPP. we simply ignore these when tallying
728          * the number of frozen filesystems.
729          *
730          * any other error means a failure to freeze a filesystem we
731          * expect to be freezable, so return an error in those cases
732          * and return system to thawed state.
733          */
734         ret = ioctl(fd, FIFREEZE);
735         if (ret == -1) {
736             if (errno != EOPNOTSUPP) {
737                 error_setg_errno(err, errno, "failed to freeze %s",
738                                  mount->dirname);
739                 close(fd);
740                 goto error;
741             }
742         } else {
743             i++;
744         }
745         close(fd);
746     }
747 
748     free_fs_mount_list(&mounts);
749     return i;
750 
751 error:
752     free_fs_mount_list(&mounts);
753     qmp_guest_fsfreeze_thaw(NULL);
754     return 0;
755 }
756 
757 /*
758  * Walk list of frozen file systems in the guest, and thaw them.
759  */
760 int64_t qmp_guest_fsfreeze_thaw(Error **err)
761 {
762     int ret;
763     FsMountList mounts;
764     FsMount *mount;
765     int fd, i = 0, logged;
766     Error *local_err = NULL;
767 
768     QTAILQ_INIT(&mounts);
769     build_fs_mount_list(&mounts, &local_err);
770     if (error_is_set(&local_err)) {
771         error_propagate(err, local_err);
772         return 0;
773     }
774 
775     QTAILQ_FOREACH(mount, &mounts, next) {
776         logged = false;
777         fd = qemu_open(mount->dirname, O_RDONLY);
778         if (fd == -1) {
779             continue;
780         }
781         /* we have no way of knowing whether a filesystem was actually unfrozen
782          * as a result of a successful call to FITHAW, only that if an error
783          * was returned the filesystem was *not* unfrozen by that particular
784          * call.
785          *
786          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
787          * to unfreeze, continuing issuing FITHAW until an error is returned,
788          * in which case either the filesystem is in an unfreezable state, or,
789          * more likely, it was thawed previously (and remains so afterward).
790          *
791          * also, since the most recent successful call is the one that did
792          * the actual unfreeze, we can use this to provide an accurate count
793          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
794          * may * be useful for determining whether a filesystem was unfrozen
795          * during the freeze/thaw phase by a process other than qemu-ga.
796          */
797         do {
798             ret = ioctl(fd, FITHAW);
799             if (ret == 0 && !logged) {
800                 i++;
801                 logged = true;
802             }
803         } while (ret == 0);
804         close(fd);
805     }
806 
807     ga_unset_frozen(ga_state);
808     free_fs_mount_list(&mounts);
809 
810     execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, err);
811 
812     return i;
813 }
814 
815 static void guest_fsfreeze_cleanup(void)
816 {
817     Error *err = NULL;
818 
819     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
820         qmp_guest_fsfreeze_thaw(&err);
821         if (err) {
822             slog("failed to clean up frozen filesystems: %s",
823                  error_get_pretty(err));
824             error_free(err);
825         }
826     }
827 }
828 #endif /* CONFIG_FSFREEZE */
829 
830 #if defined(CONFIG_FSTRIM)
831 /*
832  * Walk list of mounted file systems in the guest, and trim them.
833  */
834 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
835 {
836     int ret = 0;
837     FsMountList mounts;
838     struct FsMount *mount;
839     int fd;
840     Error *local_err = NULL;
841     struct fstrim_range r = {
842         .start = 0,
843         .len = -1,
844         .minlen = has_minimum ? minimum : 0,
845     };
846 
847     slog("guest-fstrim called");
848 
849     QTAILQ_INIT(&mounts);
850     build_fs_mount_list(&mounts, &local_err);
851     if (error_is_set(&local_err)) {
852         error_propagate(err, local_err);
853         return;
854     }
855 
856     QTAILQ_FOREACH(mount, &mounts, next) {
857         fd = qemu_open(mount->dirname, O_RDONLY);
858         if (fd == -1) {
859             error_setg_errno(err, errno, "failed to open %s", mount->dirname);
860             goto error;
861         }
862 
863         /* We try to cull filesytems we know won't work in advance, but other
864          * filesytems may not implement fstrim for less obvious reasons.  These
865          * will report EOPNOTSUPP; we simply ignore these errors.  Any other
866          * error means an unexpected error, so return it in those cases.  In
867          * some other cases ENOTTY will be reported (e.g. CD-ROMs).
868          */
869         ret = ioctl(fd, FITRIM, &r);
870         if (ret == -1) {
871             if (errno != ENOTTY && errno != EOPNOTSUPP) {
872                 error_setg_errno(err, errno, "failed to trim %s",
873                                  mount->dirname);
874                 close(fd);
875                 goto error;
876             }
877         }
878         close(fd);
879     }
880 
881 error:
882     free_fs_mount_list(&mounts);
883 }
884 #endif /* CONFIG_FSTRIM */
885 
886 
887 #define LINUX_SYS_STATE_FILE "/sys/power/state"
888 #define SUSPEND_SUPPORTED 0
889 #define SUSPEND_NOT_SUPPORTED 1
890 
891 static void bios_supports_mode(const char *pmutils_bin, const char *pmutils_arg,
892                                const char *sysfile_str, Error **err)
893 {
894     Error *local_err = NULL;
895     char *pmutils_path;
896     pid_t pid;
897     int status;
898 
899     pmutils_path = g_find_program_in_path(pmutils_bin);
900 
901     pid = fork();
902     if (!pid) {
903         char buf[32]; /* hopefully big enough */
904         ssize_t ret;
905         int fd;
906 
907         setsid();
908         reopen_fd_to_null(0);
909         reopen_fd_to_null(1);
910         reopen_fd_to_null(2);
911 
912         if (pmutils_path) {
913             execle(pmutils_path, pmutils_bin, pmutils_arg, NULL, environ);
914         }
915 
916         /*
917          * If we get here either pm-utils is not installed or execle() has
918          * failed. Let's try the manual method if the caller wants it.
919          */
920 
921         if (!sysfile_str) {
922             _exit(SUSPEND_NOT_SUPPORTED);
923         }
924 
925         fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
926         if (fd < 0) {
927             _exit(SUSPEND_NOT_SUPPORTED);
928         }
929 
930         ret = read(fd, buf, sizeof(buf)-1);
931         if (ret <= 0) {
932             _exit(SUSPEND_NOT_SUPPORTED);
933         }
934         buf[ret] = '\0';
935 
936         if (strstr(buf, sysfile_str)) {
937             _exit(SUSPEND_SUPPORTED);
938         }
939 
940         _exit(SUSPEND_NOT_SUPPORTED);
941     } else if (pid < 0) {
942         error_setg_errno(err, errno, "failed to create child process");
943         goto out;
944     }
945 
946     ga_wait_child(pid, &status, &local_err);
947     if (error_is_set(&local_err)) {
948         error_propagate(err, local_err);
949         goto out;
950     }
951 
952     if (!WIFEXITED(status)) {
953         error_setg(err, "child process has terminated abnormally");
954         goto out;
955     }
956 
957     switch (WEXITSTATUS(status)) {
958     case SUSPEND_SUPPORTED:
959         goto out;
960     case SUSPEND_NOT_SUPPORTED:
961         error_setg(err,
962                    "the requested suspend mode is not supported by the guest");
963         goto out;
964     default:
965         error_setg(err,
966                    "the helper program '%s' returned an unexpected exit status"
967                    " code (%d)", pmutils_path, WEXITSTATUS(status));
968         goto out;
969     }
970 
971 out:
972     g_free(pmutils_path);
973 }
974 
975 static void guest_suspend(const char *pmutils_bin, const char *sysfile_str,
976                           Error **err)
977 {
978     Error *local_err = NULL;
979     char *pmutils_path;
980     pid_t pid;
981     int status;
982 
983     pmutils_path = g_find_program_in_path(pmutils_bin);
984 
985     pid = fork();
986     if (pid == 0) {
987         /* child */
988         int fd;
989 
990         setsid();
991         reopen_fd_to_null(0);
992         reopen_fd_to_null(1);
993         reopen_fd_to_null(2);
994 
995         if (pmutils_path) {
996             execle(pmutils_path, pmutils_bin, NULL, environ);
997         }
998 
999         /*
1000          * If we get here either pm-utils is not installed or execle() has
1001          * failed. Let's try the manual method if the caller wants it.
1002          */
1003 
1004         if (!sysfile_str) {
1005             _exit(EXIT_FAILURE);
1006         }
1007 
1008         fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1009         if (fd < 0) {
1010             _exit(EXIT_FAILURE);
1011         }
1012 
1013         if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1014             _exit(EXIT_FAILURE);
1015         }
1016 
1017         _exit(EXIT_SUCCESS);
1018     } else if (pid < 0) {
1019         error_setg_errno(err, errno, "failed to create child process");
1020         goto out;
1021     }
1022 
1023     ga_wait_child(pid, &status, &local_err);
1024     if (error_is_set(&local_err)) {
1025         error_propagate(err, local_err);
1026         goto out;
1027     }
1028 
1029     if (!WIFEXITED(status)) {
1030         error_setg(err, "child process has terminated abnormally");
1031         goto out;
1032     }
1033 
1034     if (WEXITSTATUS(status)) {
1035         error_setg(err, "child process has failed to suspend");
1036         goto out;
1037     }
1038 
1039 out:
1040     g_free(pmutils_path);
1041 }
1042 
1043 void qmp_guest_suspend_disk(Error **err)
1044 {
1045     bios_supports_mode("pm-is-supported", "--hibernate", "disk", err);
1046     if (error_is_set(err)) {
1047         return;
1048     }
1049 
1050     guest_suspend("pm-hibernate", "disk", err);
1051 }
1052 
1053 void qmp_guest_suspend_ram(Error **err)
1054 {
1055     bios_supports_mode("pm-is-supported", "--suspend", "mem", err);
1056     if (error_is_set(err)) {
1057         return;
1058     }
1059 
1060     guest_suspend("pm-suspend", "mem", err);
1061 }
1062 
1063 void qmp_guest_suspend_hybrid(Error **err)
1064 {
1065     bios_supports_mode("pm-is-supported", "--suspend-hybrid", NULL, err);
1066     if (error_is_set(err)) {
1067         return;
1068     }
1069 
1070     guest_suspend("pm-suspend-hybrid", NULL, err);
1071 }
1072 
1073 static GuestNetworkInterfaceList *
1074 guest_find_interface(GuestNetworkInterfaceList *head,
1075                      const char *name)
1076 {
1077     for (; head; head = head->next) {
1078         if (strcmp(head->value->name, name) == 0) {
1079             break;
1080         }
1081     }
1082 
1083     return head;
1084 }
1085 
1086 /*
1087  * Build information about guest interfaces
1088  */
1089 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
1090 {
1091     GuestNetworkInterfaceList *head = NULL, *cur_item = NULL;
1092     struct ifaddrs *ifap, *ifa;
1093 
1094     if (getifaddrs(&ifap) < 0) {
1095         error_setg_errno(errp, errno, "getifaddrs failed");
1096         goto error;
1097     }
1098 
1099     for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
1100         GuestNetworkInterfaceList *info;
1101         GuestIpAddressList **address_list = NULL, *address_item = NULL;
1102         char addr4[INET_ADDRSTRLEN];
1103         char addr6[INET6_ADDRSTRLEN];
1104         int sock;
1105         struct ifreq ifr;
1106         unsigned char *mac_addr;
1107         void *p;
1108 
1109         g_debug("Processing %s interface", ifa->ifa_name);
1110 
1111         info = guest_find_interface(head, ifa->ifa_name);
1112 
1113         if (!info) {
1114             info = g_malloc0(sizeof(*info));
1115             info->value = g_malloc0(sizeof(*info->value));
1116             info->value->name = g_strdup(ifa->ifa_name);
1117 
1118             if (!cur_item) {
1119                 head = cur_item = info;
1120             } else {
1121                 cur_item->next = info;
1122                 cur_item = info;
1123             }
1124         }
1125 
1126         if (!info->value->has_hardware_address &&
1127             ifa->ifa_flags & SIOCGIFHWADDR) {
1128             /* we haven't obtained HW address yet */
1129             sock = socket(PF_INET, SOCK_STREAM, 0);
1130             if (sock == -1) {
1131                 error_setg_errno(errp, errno, "failed to create socket");
1132                 goto error;
1133             }
1134 
1135             memset(&ifr, 0, sizeof(ifr));
1136             pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->value->name);
1137             if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
1138                 error_setg_errno(errp, errno,
1139                                  "failed to get MAC address of %s",
1140                                  ifa->ifa_name);
1141                 close(sock);
1142                 goto error;
1143             }
1144 
1145             close(sock);
1146             mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
1147 
1148             info->value->hardware_address =
1149                 g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
1150                                 (int) mac_addr[0], (int) mac_addr[1],
1151                                 (int) mac_addr[2], (int) mac_addr[3],
1152                                 (int) mac_addr[4], (int) mac_addr[5]);
1153 
1154             info->value->has_hardware_address = true;
1155         }
1156 
1157         if (ifa->ifa_addr &&
1158             ifa->ifa_addr->sa_family == AF_INET) {
1159             /* interface with IPv4 address */
1160             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1161             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
1162                 error_setg_errno(errp, errno, "inet_ntop failed");
1163                 goto error;
1164             }
1165 
1166             address_item = g_malloc0(sizeof(*address_item));
1167             address_item->value = g_malloc0(sizeof(*address_item->value));
1168             address_item->value->ip_address = g_strdup(addr4);
1169             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
1170 
1171             if (ifa->ifa_netmask) {
1172                 /* Count the number of set bits in netmask.
1173                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
1174                 p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
1175                 address_item->value->prefix = ctpop32(((uint32_t *) p)[0]);
1176             }
1177         } else if (ifa->ifa_addr &&
1178                    ifa->ifa_addr->sa_family == AF_INET6) {
1179             /* interface with IPv6 address */
1180             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
1181             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
1182                 error_setg_errno(errp, errno, "inet_ntop failed");
1183                 goto error;
1184             }
1185 
1186             address_item = g_malloc0(sizeof(*address_item));
1187             address_item->value = g_malloc0(sizeof(*address_item->value));
1188             address_item->value->ip_address = g_strdup(addr6);
1189             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
1190 
1191             if (ifa->ifa_netmask) {
1192                 /* Count the number of set bits in netmask.
1193                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
1194                 p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
1195                 address_item->value->prefix =
1196                     ctpop32(((uint32_t *) p)[0]) +
1197                     ctpop32(((uint32_t *) p)[1]) +
1198                     ctpop32(((uint32_t *) p)[2]) +
1199                     ctpop32(((uint32_t *) p)[3]);
1200             }
1201         }
1202 
1203         if (!address_item) {
1204             continue;
1205         }
1206 
1207         address_list = &info->value->ip_addresses;
1208 
1209         while (*address_list && (*address_list)->next) {
1210             address_list = &(*address_list)->next;
1211         }
1212 
1213         if (!*address_list) {
1214             *address_list = address_item;
1215         } else {
1216             (*address_list)->next = address_item;
1217         }
1218 
1219         info->value->has_ip_addresses = true;
1220 
1221 
1222     }
1223 
1224     freeifaddrs(ifap);
1225     return head;
1226 
1227 error:
1228     freeifaddrs(ifap);
1229     qapi_free_GuestNetworkInterfaceList(head);
1230     return NULL;
1231 }
1232 
1233 #define SYSCONF_EXACT(name, err) sysconf_exact((name), #name, (err))
1234 
1235 static long sysconf_exact(int name, const char *name_str, Error **err)
1236 {
1237     long ret;
1238 
1239     errno = 0;
1240     ret = sysconf(name);
1241     if (ret == -1) {
1242         if (errno == 0) {
1243             error_setg(err, "sysconf(%s): value indefinite", name_str);
1244         } else {
1245             error_setg_errno(err, errno, "sysconf(%s)", name_str);
1246         }
1247     }
1248     return ret;
1249 }
1250 
1251 /* Transfer online/offline status between @vcpu and the guest system.
1252  *
1253  * On input either @errp or *@errp must be NULL.
1254  *
1255  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1256  * - R: vcpu->logical_id
1257  * - W: vcpu->online
1258  * - W: vcpu->can_offline
1259  *
1260  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1261  * - R: vcpu->logical_id
1262  * - R: vcpu->online
1263  *
1264  * Written members remain unmodified on error.
1265  */
1266 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1267                           Error **errp)
1268 {
1269     char *dirpath;
1270     int dirfd;
1271 
1272     dirpath = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1273                               vcpu->logical_id);
1274     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1275     if (dirfd == -1) {
1276         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1277     } else {
1278         static const char fn[] = "online";
1279         int fd;
1280         int res;
1281 
1282         fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1283         if (fd == -1) {
1284             if (errno != ENOENT) {
1285                 error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1286             } else if (sys2vcpu) {
1287                 vcpu->online = true;
1288                 vcpu->can_offline = false;
1289             } else if (!vcpu->online) {
1290                 error_setg(errp, "logical processor #%" PRId64 " can't be "
1291                            "offlined", vcpu->logical_id);
1292             } /* otherwise pretend successful re-onlining */
1293         } else {
1294             unsigned char status;
1295 
1296             res = pread(fd, &status, 1, 0);
1297             if (res == -1) {
1298                 error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1299             } else if (res == 0) {
1300                 error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1301                            fn);
1302             } else if (sys2vcpu) {
1303                 vcpu->online = (status != '0');
1304                 vcpu->can_offline = true;
1305             } else if (vcpu->online != (status != '0')) {
1306                 status = '0' + vcpu->online;
1307                 if (pwrite(fd, &status, 1, 0) == -1) {
1308                     error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1309                                      fn);
1310                 }
1311             } /* otherwise pretend successful re-(on|off)-lining */
1312 
1313             res = close(fd);
1314             g_assert(res == 0);
1315         }
1316 
1317         res = close(dirfd);
1318         g_assert(res == 0);
1319     }
1320 
1321     g_free(dirpath);
1322 }
1323 
1324 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1325 {
1326     int64_t current;
1327     GuestLogicalProcessorList *head, **link;
1328     long sc_max;
1329     Error *local_err = NULL;
1330 
1331     current = 0;
1332     head = NULL;
1333     link = &head;
1334     sc_max = SYSCONF_EXACT(_SC_NPROCESSORS_CONF, &local_err);
1335 
1336     while (local_err == NULL && current < sc_max) {
1337         GuestLogicalProcessor *vcpu;
1338         GuestLogicalProcessorList *entry;
1339 
1340         vcpu = g_malloc0(sizeof *vcpu);
1341         vcpu->logical_id = current++;
1342         vcpu->has_can_offline = true; /* lolspeak ftw */
1343         transfer_vcpu(vcpu, true, &local_err);
1344 
1345         entry = g_malloc0(sizeof *entry);
1346         entry->value = vcpu;
1347 
1348         *link = entry;
1349         link = &entry->next;
1350     }
1351 
1352     if (local_err == NULL) {
1353         /* there's no guest with zero VCPUs */
1354         g_assert(head != NULL);
1355         return head;
1356     }
1357 
1358     qapi_free_GuestLogicalProcessorList(head);
1359     error_propagate(errp, local_err);
1360     return NULL;
1361 }
1362 
1363 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1364 {
1365     int64_t processed;
1366     Error *local_err = NULL;
1367 
1368     processed = 0;
1369     while (vcpus != NULL) {
1370         transfer_vcpu(vcpus->value, false, &local_err);
1371         if (local_err != NULL) {
1372             break;
1373         }
1374         ++processed;
1375         vcpus = vcpus->next;
1376     }
1377 
1378     if (local_err != NULL) {
1379         if (processed == 0) {
1380             error_propagate(errp, local_err);
1381         } else {
1382             error_free(local_err);
1383         }
1384     }
1385 
1386     return processed;
1387 }
1388 
1389 #else /* defined(__linux__) */
1390 
1391 void qmp_guest_suspend_disk(Error **err)
1392 {
1393     error_set(err, QERR_UNSUPPORTED);
1394 }
1395 
1396 void qmp_guest_suspend_ram(Error **err)
1397 {
1398     error_set(err, QERR_UNSUPPORTED);
1399 }
1400 
1401 void qmp_guest_suspend_hybrid(Error **err)
1402 {
1403     error_set(err, QERR_UNSUPPORTED);
1404 }
1405 
1406 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
1407 {
1408     error_set(errp, QERR_UNSUPPORTED);
1409     return NULL;
1410 }
1411 
1412 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1413 {
1414     error_set(errp, QERR_UNSUPPORTED);
1415     return NULL;
1416 }
1417 
1418 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1419 {
1420     error_set(errp, QERR_UNSUPPORTED);
1421     return -1;
1422 }
1423 
1424 #endif
1425 
1426 #if !defined(CONFIG_FSFREEZE)
1427 
1428 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **err)
1429 {
1430     error_set(err, QERR_UNSUPPORTED);
1431 
1432     return 0;
1433 }
1434 
1435 int64_t qmp_guest_fsfreeze_freeze(Error **err)
1436 {
1437     error_set(err, QERR_UNSUPPORTED);
1438 
1439     return 0;
1440 }
1441 
1442 int64_t qmp_guest_fsfreeze_thaw(Error **err)
1443 {
1444     error_set(err, QERR_UNSUPPORTED);
1445 
1446     return 0;
1447 }
1448 #endif /* CONFIG_FSFREEZE */
1449 
1450 #if !defined(CONFIG_FSTRIM)
1451 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
1452 {
1453     error_set(err, QERR_UNSUPPORTED);
1454 }
1455 #endif
1456 
1457 /* register init/cleanup routines for stateful command groups */
1458 void ga_command_state_init(GAState *s, GACommandState *cs)
1459 {
1460 #if defined(CONFIG_FSFREEZE)
1461     ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
1462 #endif
1463     ga_command_state_add(cs, guest_file_init, NULL);
1464 }
1465