xref: /openbmc/qemu/qga/commands-posix.c (revision d6032e06)
1 /*
2  * QEMU Guest Agent POSIX-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include <glib.h>
15 #include <sys/types.h>
16 #include <sys/ioctl.h>
17 #include <sys/wait.h>
18 #include <unistd.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <sys/stat.h>
24 #include <inttypes.h>
25 #include "qga/guest-agent-core.h"
26 #include "qga-qmp-commands.h"
27 #include "qapi/qmp/qerror.h"
28 #include "qemu/queue.h"
29 #include "qemu/host-utils.h"
30 
31 #ifndef CONFIG_HAS_ENVIRON
32 #ifdef __APPLE__
33 #include <crt_externs.h>
34 #define environ (*_NSGetEnviron())
35 #else
36 extern char **environ;
37 #endif
38 #endif
39 
40 #if defined(__linux__)
41 #include <mntent.h>
42 #include <linux/fs.h>
43 #include <ifaddrs.h>
44 #include <arpa/inet.h>
45 #include <sys/socket.h>
46 #include <net/if.h>
47 
48 #ifdef FIFREEZE
49 #define CONFIG_FSFREEZE
50 #endif
51 #ifdef FITRIM
52 #define CONFIG_FSTRIM
53 #endif
54 #endif
55 
56 static void ga_wait_child(pid_t pid, int *status, Error **err)
57 {
58     pid_t rpid;
59 
60     *status = 0;
61 
62     do {
63         rpid = waitpid(pid, status, 0);
64     } while (rpid == -1 && errno == EINTR);
65 
66     if (rpid == -1) {
67         error_setg_errno(err, errno, "failed to wait for child (pid: %d)", pid);
68         return;
69     }
70 
71     g_assert(rpid == pid);
72 }
73 
74 void qmp_guest_shutdown(bool has_mode, const char *mode, Error **err)
75 {
76     const char *shutdown_flag;
77     Error *local_err = NULL;
78     pid_t pid;
79     int status;
80 
81     slog("guest-shutdown called, mode: %s", mode);
82     if (!has_mode || strcmp(mode, "powerdown") == 0) {
83         shutdown_flag = "-P";
84     } else if (strcmp(mode, "halt") == 0) {
85         shutdown_flag = "-H";
86     } else if (strcmp(mode, "reboot") == 0) {
87         shutdown_flag = "-r";
88     } else {
89         error_setg(err,
90                    "mode is invalid (valid values are: halt|powerdown|reboot");
91         return;
92     }
93 
94     pid = fork();
95     if (pid == 0) {
96         /* child, start the shutdown */
97         setsid();
98         reopen_fd_to_null(0);
99         reopen_fd_to_null(1);
100         reopen_fd_to_null(2);
101 
102         execle("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
103                "hypervisor initiated shutdown", (char*)NULL, environ);
104         _exit(EXIT_FAILURE);
105     } else if (pid < 0) {
106         error_setg_errno(err, errno, "failed to create child process");
107         return;
108     }
109 
110     ga_wait_child(pid, &status, &local_err);
111     if (local_err) {
112         error_propagate(err, local_err);
113         return;
114     }
115 
116     if (!WIFEXITED(status)) {
117         error_setg(err, "child process has terminated abnormally");
118         return;
119     }
120 
121     if (WEXITSTATUS(status)) {
122         error_setg(err, "child process has failed to shutdown");
123         return;
124     }
125 
126     /* succeeded */
127 }
128 
129 int64_t qmp_guest_get_time(Error **errp)
130 {
131    int ret;
132    qemu_timeval tq;
133    int64_t time_ns;
134 
135    ret = qemu_gettimeofday(&tq);
136    if (ret < 0) {
137        error_setg_errno(errp, errno, "Failed to get time");
138        return -1;
139    }
140 
141    time_ns = tq.tv_sec * 1000000000LL + tq.tv_usec * 1000;
142    return time_ns;
143 }
144 
145 void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
146 {
147     int ret;
148     int status;
149     pid_t pid;
150     Error *local_err = NULL;
151     struct timeval tv;
152 
153     /* If user has passed a time, validate and set it. */
154     if (has_time) {
155         /* year-2038 will overflow in case time_t is 32bit */
156         if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
157             error_setg(errp, "Time %" PRId64 " is too large", time_ns);
158             return;
159         }
160 
161         tv.tv_sec = time_ns / 1000000000;
162         tv.tv_usec = (time_ns % 1000000000) / 1000;
163 
164         ret = settimeofday(&tv, NULL);
165         if (ret < 0) {
166             error_setg_errno(errp, errno, "Failed to set time to guest");
167             return;
168         }
169     }
170 
171     /* Now, if user has passed a time to set and the system time is set, we
172      * just need to synchronize the hardware clock. However, if no time was
173      * passed, user is requesting the opposite: set the system time from the
174      * hardware clock. */
175     pid = fork();
176     if (pid == 0) {
177         setsid();
178         reopen_fd_to_null(0);
179         reopen_fd_to_null(1);
180         reopen_fd_to_null(2);
181 
182         /* Use '/sbin/hwclock -w' to set RTC from the system time,
183          * or '/sbin/hwclock -s' to set the system time from RTC. */
184         execle("/sbin/hwclock", "hwclock", has_time ? "-w" : "-s",
185                NULL, environ);
186         _exit(EXIT_FAILURE);
187     } else if (pid < 0) {
188         error_setg_errno(errp, errno, "failed to create child process");
189         return;
190     }
191 
192     ga_wait_child(pid, &status, &local_err);
193     if (local_err) {
194         error_propagate(errp, local_err);
195         return;
196     }
197 
198     if (!WIFEXITED(status)) {
199         error_setg(errp, "child process has terminated abnormally");
200         return;
201     }
202 
203     if (WEXITSTATUS(status)) {
204         error_setg(errp, "hwclock failed to set hardware clock to system time");
205         return;
206     }
207 }
208 
209 typedef struct GuestFileHandle {
210     uint64_t id;
211     FILE *fh;
212     QTAILQ_ENTRY(GuestFileHandle) next;
213 } GuestFileHandle;
214 
215 static struct {
216     QTAILQ_HEAD(, GuestFileHandle) filehandles;
217 } guest_file_state;
218 
219 static int64_t guest_file_handle_add(FILE *fh, Error **errp)
220 {
221     GuestFileHandle *gfh;
222     int64_t handle;
223 
224     handle = ga_get_fd_handle(ga_state, errp);
225     if (error_is_set(errp)) {
226         return 0;
227     }
228 
229     gfh = g_malloc0(sizeof(GuestFileHandle));
230     gfh->id = handle;
231     gfh->fh = fh;
232     QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
233 
234     return handle;
235 }
236 
237 static GuestFileHandle *guest_file_handle_find(int64_t id, Error **err)
238 {
239     GuestFileHandle *gfh;
240 
241     QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
242     {
243         if (gfh->id == id) {
244             return gfh;
245         }
246     }
247 
248     error_setg(err, "handle '%" PRId64 "' has not been found", id);
249     return NULL;
250 }
251 
252 typedef const char * const ccpc;
253 
254 #ifndef O_BINARY
255 #define O_BINARY 0
256 #endif
257 
258 /* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
259 static const struct {
260     ccpc *forms;
261     int oflag_base;
262 } guest_file_open_modes[] = {
263     { (ccpc[]){ "r",          NULL }, O_RDONLY                                 },
264     { (ccpc[]){ "rb",         NULL }, O_RDONLY                      | O_BINARY },
265     { (ccpc[]){ "w",          NULL }, O_WRONLY | O_CREAT | O_TRUNC             },
266     { (ccpc[]){ "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  | O_BINARY },
267     { (ccpc[]){ "a",          NULL }, O_WRONLY | O_CREAT | O_APPEND            },
268     { (ccpc[]){ "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND | O_BINARY },
269     { (ccpc[]){ "r+",         NULL }, O_RDWR                                   },
270     { (ccpc[]){ "rb+", "r+b", NULL }, O_RDWR                        | O_BINARY },
271     { (ccpc[]){ "w+",         NULL }, O_RDWR   | O_CREAT | O_TRUNC             },
272     { (ccpc[]){ "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  | O_BINARY },
273     { (ccpc[]){ "a+",         NULL }, O_RDWR   | O_CREAT | O_APPEND            },
274     { (ccpc[]){ "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND | O_BINARY }
275 };
276 
277 static int
278 find_open_flag(const char *mode_str, Error **err)
279 {
280     unsigned mode;
281 
282     for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
283         ccpc *form;
284 
285         form = guest_file_open_modes[mode].forms;
286         while (*form != NULL && strcmp(*form, mode_str) != 0) {
287             ++form;
288         }
289         if (*form != NULL) {
290             break;
291         }
292     }
293 
294     if (mode == ARRAY_SIZE(guest_file_open_modes)) {
295         error_setg(err, "invalid file open mode '%s'", mode_str);
296         return -1;
297     }
298     return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
299 }
300 
301 #define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
302                                S_IRGRP | S_IWGRP | \
303                                S_IROTH | S_IWOTH)
304 
305 static FILE *
306 safe_open_or_create(const char *path, const char *mode, Error **err)
307 {
308     Error *local_err = NULL;
309     int oflag;
310 
311     oflag = find_open_flag(mode, &local_err);
312     if (local_err == NULL) {
313         int fd;
314 
315         /* If the caller wants / allows creation of a new file, we implement it
316          * with a two step process: open() + (open() / fchmod()).
317          *
318          * First we insist on creating the file exclusively as a new file. If
319          * that succeeds, we're free to set any file-mode bits on it. (The
320          * motivation is that we want to set those file-mode bits independently
321          * of the current umask.)
322          *
323          * If the exclusive creation fails because the file already exists
324          * (EEXIST is not possible for any other reason), we just attempt to
325          * open the file, but in this case we won't be allowed to change the
326          * file-mode bits on the preexistent file.
327          *
328          * The pathname should never disappear between the two open()s in
329          * practice. If it happens, then someone very likely tried to race us.
330          * In this case just go ahead and report the ENOENT from the second
331          * open() to the caller.
332          *
333          * If the caller wants to open a preexistent file, then the first
334          * open() is decisive and its third argument is ignored, and the second
335          * open() and the fchmod() are never called.
336          */
337         fd = open(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
338         if (fd == -1 && errno == EEXIST) {
339             oflag &= ~(unsigned)O_CREAT;
340             fd = open(path, oflag);
341         }
342 
343         if (fd == -1) {
344             error_setg_errno(&local_err, errno, "failed to open file '%s' "
345                              "(mode: '%s')", path, mode);
346         } else {
347             qemu_set_cloexec(fd);
348 
349             if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
350                 error_setg_errno(&local_err, errno, "failed to set permission "
351                                  "0%03o on new file '%s' (mode: '%s')",
352                                  (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
353             } else {
354                 FILE *f;
355 
356                 f = fdopen(fd, mode);
357                 if (f == NULL) {
358                     error_setg_errno(&local_err, errno, "failed to associate "
359                                      "stdio stream with file descriptor %d, "
360                                      "file '%s' (mode: '%s')", fd, path, mode);
361                 } else {
362                     return f;
363                 }
364             }
365 
366             close(fd);
367             if (oflag & O_CREAT) {
368                 unlink(path);
369             }
370         }
371     }
372 
373     error_propagate(err, local_err);
374     return NULL;
375 }
376 
377 int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, Error **err)
378 {
379     FILE *fh;
380     Error *local_err = NULL;
381     int fd;
382     int64_t ret = -1, handle;
383 
384     if (!has_mode) {
385         mode = "r";
386     }
387     slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
388     fh = safe_open_or_create(path, mode, &local_err);
389     if (local_err != NULL) {
390         error_propagate(err, local_err);
391         return -1;
392     }
393 
394     /* set fd non-blocking to avoid common use cases (like reading from a
395      * named pipe) from hanging the agent
396      */
397     fd = fileno(fh);
398     ret = fcntl(fd, F_GETFL);
399     ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK);
400     if (ret == -1) {
401         error_setg_errno(err, errno, "failed to make file '%s' non-blocking",
402                          path);
403         fclose(fh);
404         return -1;
405     }
406 
407     handle = guest_file_handle_add(fh, err);
408     if (error_is_set(err)) {
409         fclose(fh);
410         return -1;
411     }
412 
413     slog("guest-file-open, handle: %" PRId64, handle);
414     return handle;
415 }
416 
417 void qmp_guest_file_close(int64_t handle, Error **err)
418 {
419     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
420     int ret;
421 
422     slog("guest-file-close called, handle: %" PRId64, handle);
423     if (!gfh) {
424         return;
425     }
426 
427     ret = fclose(gfh->fh);
428     if (ret == EOF) {
429         error_setg_errno(err, errno, "failed to close handle");
430         return;
431     }
432 
433     QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
434     g_free(gfh);
435 }
436 
437 struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
438                                           int64_t count, Error **err)
439 {
440     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
441     GuestFileRead *read_data = NULL;
442     guchar *buf;
443     FILE *fh;
444     size_t read_count;
445 
446     if (!gfh) {
447         return NULL;
448     }
449 
450     if (!has_count) {
451         count = QGA_READ_COUNT_DEFAULT;
452     } else if (count < 0) {
453         error_setg(err, "value '%" PRId64 "' is invalid for argument count",
454                    count);
455         return NULL;
456     }
457 
458     fh = gfh->fh;
459     buf = g_malloc0(count+1);
460     read_count = fread(buf, 1, count, fh);
461     if (ferror(fh)) {
462         error_setg_errno(err, errno, "failed to read file");
463         slog("guest-file-read failed, handle: %" PRId64, handle);
464     } else {
465         buf[read_count] = 0;
466         read_data = g_malloc0(sizeof(GuestFileRead));
467         read_data->count = read_count;
468         read_data->eof = feof(fh);
469         if (read_count) {
470             read_data->buf_b64 = g_base64_encode(buf, read_count);
471         }
472     }
473     g_free(buf);
474     clearerr(fh);
475 
476     return read_data;
477 }
478 
479 GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
480                                      bool has_count, int64_t count, Error **err)
481 {
482     GuestFileWrite *write_data = NULL;
483     guchar *buf;
484     gsize buf_len;
485     int write_count;
486     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
487     FILE *fh;
488 
489     if (!gfh) {
490         return NULL;
491     }
492 
493     fh = gfh->fh;
494     buf = g_base64_decode(buf_b64, &buf_len);
495 
496     if (!has_count) {
497         count = buf_len;
498     } else if (count < 0 || count > buf_len) {
499         error_setg(err, "value '%" PRId64 "' is invalid for argument count",
500                    count);
501         g_free(buf);
502         return NULL;
503     }
504 
505     write_count = fwrite(buf, 1, count, fh);
506     if (ferror(fh)) {
507         error_setg_errno(err, errno, "failed to write to file");
508         slog("guest-file-write failed, handle: %" PRId64, handle);
509     } else {
510         write_data = g_malloc0(sizeof(GuestFileWrite));
511         write_data->count = write_count;
512         write_data->eof = feof(fh);
513     }
514     g_free(buf);
515     clearerr(fh);
516 
517     return write_data;
518 }
519 
520 struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
521                                           int64_t whence, Error **err)
522 {
523     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
524     GuestFileSeek *seek_data = NULL;
525     FILE *fh;
526     int ret;
527 
528     if (!gfh) {
529         return NULL;
530     }
531 
532     fh = gfh->fh;
533     ret = fseek(fh, offset, whence);
534     if (ret == -1) {
535         error_setg_errno(err, errno, "failed to seek file");
536     } else {
537         seek_data = g_new0(GuestFileSeek, 1);
538         seek_data->position = ftell(fh);
539         seek_data->eof = feof(fh);
540     }
541     clearerr(fh);
542 
543     return seek_data;
544 }
545 
546 void qmp_guest_file_flush(int64_t handle, Error **err)
547 {
548     GuestFileHandle *gfh = guest_file_handle_find(handle, err);
549     FILE *fh;
550     int ret;
551 
552     if (!gfh) {
553         return;
554     }
555 
556     fh = gfh->fh;
557     ret = fflush(fh);
558     if (ret == EOF) {
559         error_setg_errno(err, errno, "failed to flush file");
560     }
561 }
562 
563 static void guest_file_init(void)
564 {
565     QTAILQ_INIT(&guest_file_state.filehandles);
566 }
567 
568 /* linux-specific implementations. avoid this if at all possible. */
569 #if defined(__linux__)
570 
571 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
572 typedef struct FsMount {
573     char *dirname;
574     char *devtype;
575     QTAILQ_ENTRY(FsMount) next;
576 } FsMount;
577 
578 typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
579 
580 static void free_fs_mount_list(FsMountList *mounts)
581 {
582      FsMount *mount, *temp;
583 
584      if (!mounts) {
585          return;
586      }
587 
588      QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
589          QTAILQ_REMOVE(mounts, mount, next);
590          g_free(mount->dirname);
591          g_free(mount->devtype);
592          g_free(mount);
593      }
594 }
595 
596 /*
597  * Walk the mount table and build a list of local file systems
598  */
599 static void build_fs_mount_list(FsMountList *mounts, Error **err)
600 {
601     struct mntent *ment;
602     FsMount *mount;
603     char const *mtab = "/proc/self/mounts";
604     FILE *fp;
605 
606     fp = setmntent(mtab, "r");
607     if (!fp) {
608         error_setg(err, "failed to open mtab file: '%s'", mtab);
609         return;
610     }
611 
612     while ((ment = getmntent(fp))) {
613         /*
614          * An entry which device name doesn't start with a '/' is
615          * either a dummy file system or a network file system.
616          * Add special handling for smbfs and cifs as is done by
617          * coreutils as well.
618          */
619         if ((ment->mnt_fsname[0] != '/') ||
620             (strcmp(ment->mnt_type, "smbfs") == 0) ||
621             (strcmp(ment->mnt_type, "cifs") == 0)) {
622             continue;
623         }
624 
625         mount = g_malloc0(sizeof(FsMount));
626         mount->dirname = g_strdup(ment->mnt_dir);
627         mount->devtype = g_strdup(ment->mnt_type);
628 
629         QTAILQ_INSERT_TAIL(mounts, mount, next);
630     }
631 
632     endmntent(fp);
633 }
634 #endif
635 
636 #if defined(CONFIG_FSFREEZE)
637 
638 typedef enum {
639     FSFREEZE_HOOK_THAW = 0,
640     FSFREEZE_HOOK_FREEZE,
641 } FsfreezeHookArg;
642 
643 const char *fsfreeze_hook_arg_string[] = {
644     "thaw",
645     "freeze",
646 };
647 
648 static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **err)
649 {
650     int status;
651     pid_t pid;
652     const char *hook;
653     const char *arg_str = fsfreeze_hook_arg_string[arg];
654     Error *local_err = NULL;
655 
656     hook = ga_fsfreeze_hook(ga_state);
657     if (!hook) {
658         return;
659     }
660     if (access(hook, X_OK) != 0) {
661         error_setg_errno(err, errno, "can't access fsfreeze hook '%s'", hook);
662         return;
663     }
664 
665     slog("executing fsfreeze hook with arg '%s'", arg_str);
666     pid = fork();
667     if (pid == 0) {
668         setsid();
669         reopen_fd_to_null(0);
670         reopen_fd_to_null(1);
671         reopen_fd_to_null(2);
672 
673         execle(hook, hook, arg_str, NULL, environ);
674         _exit(EXIT_FAILURE);
675     } else if (pid < 0) {
676         error_setg_errno(err, errno, "failed to create child process");
677         return;
678     }
679 
680     ga_wait_child(pid, &status, &local_err);
681     if (local_err) {
682         error_propagate(err, local_err);
683         return;
684     }
685 
686     if (!WIFEXITED(status)) {
687         error_setg(err, "fsfreeze hook has terminated abnormally");
688         return;
689     }
690 
691     status = WEXITSTATUS(status);
692     if (status) {
693         error_setg(err, "fsfreeze hook has failed with status %d", status);
694         return;
695     }
696 }
697 
698 /*
699  * Return status of freeze/thaw
700  */
701 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **err)
702 {
703     if (ga_is_frozen(ga_state)) {
704         return GUEST_FSFREEZE_STATUS_FROZEN;
705     }
706 
707     return GUEST_FSFREEZE_STATUS_THAWED;
708 }
709 
710 /*
711  * Walk list of mounted file systems in the guest, and freeze the ones which
712  * are real local file systems.
713  */
714 int64_t qmp_guest_fsfreeze_freeze(Error **err)
715 {
716     int ret = 0, i = 0;
717     FsMountList mounts;
718     struct FsMount *mount;
719     Error *local_err = NULL;
720     int fd;
721 
722     slog("guest-fsfreeze called");
723 
724     execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
725     if (local_err) {
726         error_propagate(err, local_err);
727         return -1;
728     }
729 
730     QTAILQ_INIT(&mounts);
731     build_fs_mount_list(&mounts, &local_err);
732     if (local_err) {
733         error_propagate(err, local_err);
734         return -1;
735     }
736 
737     /* cannot risk guest agent blocking itself on a write in this state */
738     ga_set_frozen(ga_state);
739 
740     QTAILQ_FOREACH_REVERSE(mount, &mounts, FsMountList, next) {
741         fd = qemu_open(mount->dirname, O_RDONLY);
742         if (fd == -1) {
743             error_setg_errno(err, errno, "failed to open %s", mount->dirname);
744             goto error;
745         }
746 
747         /* we try to cull filesytems we know won't work in advance, but other
748          * filesytems may not implement fsfreeze for less obvious reasons.
749          * these will report EOPNOTSUPP. we simply ignore these when tallying
750          * the number of frozen filesystems.
751          *
752          * any other error means a failure to freeze a filesystem we
753          * expect to be freezable, so return an error in those cases
754          * and return system to thawed state.
755          */
756         ret = ioctl(fd, FIFREEZE);
757         if (ret == -1) {
758             if (errno != EOPNOTSUPP) {
759                 error_setg_errno(err, errno, "failed to freeze %s",
760                                  mount->dirname);
761                 close(fd);
762                 goto error;
763             }
764         } else {
765             i++;
766         }
767         close(fd);
768     }
769 
770     free_fs_mount_list(&mounts);
771     return i;
772 
773 error:
774     free_fs_mount_list(&mounts);
775     qmp_guest_fsfreeze_thaw(NULL);
776     return 0;
777 }
778 
779 /*
780  * Walk list of frozen file systems in the guest, and thaw them.
781  */
782 int64_t qmp_guest_fsfreeze_thaw(Error **err)
783 {
784     int ret;
785     FsMountList mounts;
786     FsMount *mount;
787     int fd, i = 0, logged;
788     Error *local_err = NULL;
789 
790     QTAILQ_INIT(&mounts);
791     build_fs_mount_list(&mounts, &local_err);
792     if (local_err) {
793         error_propagate(err, local_err);
794         return 0;
795     }
796 
797     QTAILQ_FOREACH(mount, &mounts, next) {
798         logged = false;
799         fd = qemu_open(mount->dirname, O_RDONLY);
800         if (fd == -1) {
801             continue;
802         }
803         /* we have no way of knowing whether a filesystem was actually unfrozen
804          * as a result of a successful call to FITHAW, only that if an error
805          * was returned the filesystem was *not* unfrozen by that particular
806          * call.
807          *
808          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
809          * to unfreeze, continuing issuing FITHAW until an error is returned,
810          * in which case either the filesystem is in an unfreezable state, or,
811          * more likely, it was thawed previously (and remains so afterward).
812          *
813          * also, since the most recent successful call is the one that did
814          * the actual unfreeze, we can use this to provide an accurate count
815          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
816          * may * be useful for determining whether a filesystem was unfrozen
817          * during the freeze/thaw phase by a process other than qemu-ga.
818          */
819         do {
820             ret = ioctl(fd, FITHAW);
821             if (ret == 0 && !logged) {
822                 i++;
823                 logged = true;
824             }
825         } while (ret == 0);
826         close(fd);
827     }
828 
829     ga_unset_frozen(ga_state);
830     free_fs_mount_list(&mounts);
831 
832     execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, err);
833 
834     return i;
835 }
836 
837 static void guest_fsfreeze_cleanup(void)
838 {
839     Error *err = NULL;
840 
841     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
842         qmp_guest_fsfreeze_thaw(&err);
843         if (err) {
844             slog("failed to clean up frozen filesystems: %s",
845                  error_get_pretty(err));
846             error_free(err);
847         }
848     }
849 }
850 #endif /* CONFIG_FSFREEZE */
851 
852 #if defined(CONFIG_FSTRIM)
853 /*
854  * Walk list of mounted file systems in the guest, and trim them.
855  */
856 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
857 {
858     int ret = 0;
859     FsMountList mounts;
860     struct FsMount *mount;
861     int fd;
862     Error *local_err = NULL;
863     struct fstrim_range r = {
864         .start = 0,
865         .len = -1,
866         .minlen = has_minimum ? minimum : 0,
867     };
868 
869     slog("guest-fstrim called");
870 
871     QTAILQ_INIT(&mounts);
872     build_fs_mount_list(&mounts, &local_err);
873     if (local_err) {
874         error_propagate(err, local_err);
875         return;
876     }
877 
878     QTAILQ_FOREACH(mount, &mounts, next) {
879         fd = qemu_open(mount->dirname, O_RDONLY);
880         if (fd == -1) {
881             error_setg_errno(err, errno, "failed to open %s", mount->dirname);
882             goto error;
883         }
884 
885         /* We try to cull filesytems we know won't work in advance, but other
886          * filesytems may not implement fstrim for less obvious reasons.  These
887          * will report EOPNOTSUPP; we simply ignore these errors.  Any other
888          * error means an unexpected error, so return it in those cases.  In
889          * some other cases ENOTTY will be reported (e.g. CD-ROMs).
890          */
891         ret = ioctl(fd, FITRIM, &r);
892         if (ret == -1) {
893             if (errno != ENOTTY && errno != EOPNOTSUPP) {
894                 error_setg_errno(err, errno, "failed to trim %s",
895                                  mount->dirname);
896                 close(fd);
897                 goto error;
898             }
899         }
900         close(fd);
901     }
902 
903 error:
904     free_fs_mount_list(&mounts);
905 }
906 #endif /* CONFIG_FSTRIM */
907 
908 
909 #define LINUX_SYS_STATE_FILE "/sys/power/state"
910 #define SUSPEND_SUPPORTED 0
911 #define SUSPEND_NOT_SUPPORTED 1
912 
913 static void bios_supports_mode(const char *pmutils_bin, const char *pmutils_arg,
914                                const char *sysfile_str, Error **err)
915 {
916     Error *local_err = NULL;
917     char *pmutils_path;
918     pid_t pid;
919     int status;
920 
921     pmutils_path = g_find_program_in_path(pmutils_bin);
922 
923     pid = fork();
924     if (!pid) {
925         char buf[32]; /* hopefully big enough */
926         ssize_t ret;
927         int fd;
928 
929         setsid();
930         reopen_fd_to_null(0);
931         reopen_fd_to_null(1);
932         reopen_fd_to_null(2);
933 
934         if (pmutils_path) {
935             execle(pmutils_path, pmutils_bin, pmutils_arg, NULL, environ);
936         }
937 
938         /*
939          * If we get here either pm-utils is not installed or execle() has
940          * failed. Let's try the manual method if the caller wants it.
941          */
942 
943         if (!sysfile_str) {
944             _exit(SUSPEND_NOT_SUPPORTED);
945         }
946 
947         fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
948         if (fd < 0) {
949             _exit(SUSPEND_NOT_SUPPORTED);
950         }
951 
952         ret = read(fd, buf, sizeof(buf)-1);
953         if (ret <= 0) {
954             _exit(SUSPEND_NOT_SUPPORTED);
955         }
956         buf[ret] = '\0';
957 
958         if (strstr(buf, sysfile_str)) {
959             _exit(SUSPEND_SUPPORTED);
960         }
961 
962         _exit(SUSPEND_NOT_SUPPORTED);
963     } else if (pid < 0) {
964         error_setg_errno(err, errno, "failed to create child process");
965         goto out;
966     }
967 
968     ga_wait_child(pid, &status, &local_err);
969     if (local_err) {
970         error_propagate(err, local_err);
971         goto out;
972     }
973 
974     if (!WIFEXITED(status)) {
975         error_setg(err, "child process has terminated abnormally");
976         goto out;
977     }
978 
979     switch (WEXITSTATUS(status)) {
980     case SUSPEND_SUPPORTED:
981         goto out;
982     case SUSPEND_NOT_SUPPORTED:
983         error_setg(err,
984                    "the requested suspend mode is not supported by the guest");
985         goto out;
986     default:
987         error_setg(err,
988                    "the helper program '%s' returned an unexpected exit status"
989                    " code (%d)", pmutils_path, WEXITSTATUS(status));
990         goto out;
991     }
992 
993 out:
994     g_free(pmutils_path);
995 }
996 
997 static void guest_suspend(const char *pmutils_bin, const char *sysfile_str,
998                           Error **err)
999 {
1000     Error *local_err = NULL;
1001     char *pmutils_path;
1002     pid_t pid;
1003     int status;
1004 
1005     pmutils_path = g_find_program_in_path(pmutils_bin);
1006 
1007     pid = fork();
1008     if (pid == 0) {
1009         /* child */
1010         int fd;
1011 
1012         setsid();
1013         reopen_fd_to_null(0);
1014         reopen_fd_to_null(1);
1015         reopen_fd_to_null(2);
1016 
1017         if (pmutils_path) {
1018             execle(pmutils_path, pmutils_bin, NULL, environ);
1019         }
1020 
1021         /*
1022          * If we get here either pm-utils is not installed or execle() has
1023          * failed. Let's try the manual method if the caller wants it.
1024          */
1025 
1026         if (!sysfile_str) {
1027             _exit(EXIT_FAILURE);
1028         }
1029 
1030         fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1031         if (fd < 0) {
1032             _exit(EXIT_FAILURE);
1033         }
1034 
1035         if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1036             _exit(EXIT_FAILURE);
1037         }
1038 
1039         _exit(EXIT_SUCCESS);
1040     } else if (pid < 0) {
1041         error_setg_errno(err, errno, "failed to create child process");
1042         goto out;
1043     }
1044 
1045     ga_wait_child(pid, &status, &local_err);
1046     if (local_err) {
1047         error_propagate(err, local_err);
1048         goto out;
1049     }
1050 
1051     if (!WIFEXITED(status)) {
1052         error_setg(err, "child process has terminated abnormally");
1053         goto out;
1054     }
1055 
1056     if (WEXITSTATUS(status)) {
1057         error_setg(err, "child process has failed to suspend");
1058         goto out;
1059     }
1060 
1061 out:
1062     g_free(pmutils_path);
1063 }
1064 
1065 void qmp_guest_suspend_disk(Error **err)
1066 {
1067     bios_supports_mode("pm-is-supported", "--hibernate", "disk", err);
1068     if (error_is_set(err)) {
1069         return;
1070     }
1071 
1072     guest_suspend("pm-hibernate", "disk", err);
1073 }
1074 
1075 void qmp_guest_suspend_ram(Error **err)
1076 {
1077     bios_supports_mode("pm-is-supported", "--suspend", "mem", err);
1078     if (error_is_set(err)) {
1079         return;
1080     }
1081 
1082     guest_suspend("pm-suspend", "mem", err);
1083 }
1084 
1085 void qmp_guest_suspend_hybrid(Error **err)
1086 {
1087     bios_supports_mode("pm-is-supported", "--suspend-hybrid", NULL, err);
1088     if (error_is_set(err)) {
1089         return;
1090     }
1091 
1092     guest_suspend("pm-suspend-hybrid", NULL, err);
1093 }
1094 
1095 static GuestNetworkInterfaceList *
1096 guest_find_interface(GuestNetworkInterfaceList *head,
1097                      const char *name)
1098 {
1099     for (; head; head = head->next) {
1100         if (strcmp(head->value->name, name) == 0) {
1101             break;
1102         }
1103     }
1104 
1105     return head;
1106 }
1107 
1108 /*
1109  * Build information about guest interfaces
1110  */
1111 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
1112 {
1113     GuestNetworkInterfaceList *head = NULL, *cur_item = NULL;
1114     struct ifaddrs *ifap, *ifa;
1115 
1116     if (getifaddrs(&ifap) < 0) {
1117         error_setg_errno(errp, errno, "getifaddrs failed");
1118         goto error;
1119     }
1120 
1121     for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
1122         GuestNetworkInterfaceList *info;
1123         GuestIpAddressList **address_list = NULL, *address_item = NULL;
1124         char addr4[INET_ADDRSTRLEN];
1125         char addr6[INET6_ADDRSTRLEN];
1126         int sock;
1127         struct ifreq ifr;
1128         unsigned char *mac_addr;
1129         void *p;
1130 
1131         g_debug("Processing %s interface", ifa->ifa_name);
1132 
1133         info = guest_find_interface(head, ifa->ifa_name);
1134 
1135         if (!info) {
1136             info = g_malloc0(sizeof(*info));
1137             info->value = g_malloc0(sizeof(*info->value));
1138             info->value->name = g_strdup(ifa->ifa_name);
1139 
1140             if (!cur_item) {
1141                 head = cur_item = info;
1142             } else {
1143                 cur_item->next = info;
1144                 cur_item = info;
1145             }
1146         }
1147 
1148         if (!info->value->has_hardware_address &&
1149             ifa->ifa_flags & SIOCGIFHWADDR) {
1150             /* we haven't obtained HW address yet */
1151             sock = socket(PF_INET, SOCK_STREAM, 0);
1152             if (sock == -1) {
1153                 error_setg_errno(errp, errno, "failed to create socket");
1154                 goto error;
1155             }
1156 
1157             memset(&ifr, 0, sizeof(ifr));
1158             pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->value->name);
1159             if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
1160                 error_setg_errno(errp, errno,
1161                                  "failed to get MAC address of %s",
1162                                  ifa->ifa_name);
1163                 close(sock);
1164                 goto error;
1165             }
1166 
1167             close(sock);
1168             mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
1169 
1170             info->value->hardware_address =
1171                 g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
1172                                 (int) mac_addr[0], (int) mac_addr[1],
1173                                 (int) mac_addr[2], (int) mac_addr[3],
1174                                 (int) mac_addr[4], (int) mac_addr[5]);
1175 
1176             info->value->has_hardware_address = true;
1177         }
1178 
1179         if (ifa->ifa_addr &&
1180             ifa->ifa_addr->sa_family == AF_INET) {
1181             /* interface with IPv4 address */
1182             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1183             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
1184                 error_setg_errno(errp, errno, "inet_ntop failed");
1185                 goto error;
1186             }
1187 
1188             address_item = g_malloc0(sizeof(*address_item));
1189             address_item->value = g_malloc0(sizeof(*address_item->value));
1190             address_item->value->ip_address = g_strdup(addr4);
1191             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
1192 
1193             if (ifa->ifa_netmask) {
1194                 /* Count the number of set bits in netmask.
1195                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
1196                 p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
1197                 address_item->value->prefix = ctpop32(((uint32_t *) p)[0]);
1198             }
1199         } else if (ifa->ifa_addr &&
1200                    ifa->ifa_addr->sa_family == AF_INET6) {
1201             /* interface with IPv6 address */
1202             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
1203             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
1204                 error_setg_errno(errp, errno, "inet_ntop failed");
1205                 goto error;
1206             }
1207 
1208             address_item = g_malloc0(sizeof(*address_item));
1209             address_item->value = g_malloc0(sizeof(*address_item->value));
1210             address_item->value->ip_address = g_strdup(addr6);
1211             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
1212 
1213             if (ifa->ifa_netmask) {
1214                 /* Count the number of set bits in netmask.
1215                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
1216                 p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
1217                 address_item->value->prefix =
1218                     ctpop32(((uint32_t *) p)[0]) +
1219                     ctpop32(((uint32_t *) p)[1]) +
1220                     ctpop32(((uint32_t *) p)[2]) +
1221                     ctpop32(((uint32_t *) p)[3]);
1222             }
1223         }
1224 
1225         if (!address_item) {
1226             continue;
1227         }
1228 
1229         address_list = &info->value->ip_addresses;
1230 
1231         while (*address_list && (*address_list)->next) {
1232             address_list = &(*address_list)->next;
1233         }
1234 
1235         if (!*address_list) {
1236             *address_list = address_item;
1237         } else {
1238             (*address_list)->next = address_item;
1239         }
1240 
1241         info->value->has_ip_addresses = true;
1242 
1243 
1244     }
1245 
1246     freeifaddrs(ifap);
1247     return head;
1248 
1249 error:
1250     freeifaddrs(ifap);
1251     qapi_free_GuestNetworkInterfaceList(head);
1252     return NULL;
1253 }
1254 
1255 #define SYSCONF_EXACT(name, err) sysconf_exact((name), #name, (err))
1256 
1257 static long sysconf_exact(int name, const char *name_str, Error **err)
1258 {
1259     long ret;
1260 
1261     errno = 0;
1262     ret = sysconf(name);
1263     if (ret == -1) {
1264         if (errno == 0) {
1265             error_setg(err, "sysconf(%s): value indefinite", name_str);
1266         } else {
1267             error_setg_errno(err, errno, "sysconf(%s)", name_str);
1268         }
1269     }
1270     return ret;
1271 }
1272 
1273 /* Transfer online/offline status between @vcpu and the guest system.
1274  *
1275  * On input either @errp or *@errp must be NULL.
1276  *
1277  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1278  * - R: vcpu->logical_id
1279  * - W: vcpu->online
1280  * - W: vcpu->can_offline
1281  *
1282  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1283  * - R: vcpu->logical_id
1284  * - R: vcpu->online
1285  *
1286  * Written members remain unmodified on error.
1287  */
1288 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1289                           Error **errp)
1290 {
1291     char *dirpath;
1292     int dirfd;
1293 
1294     dirpath = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1295                               vcpu->logical_id);
1296     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1297     if (dirfd == -1) {
1298         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1299     } else {
1300         static const char fn[] = "online";
1301         int fd;
1302         int res;
1303 
1304         fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1305         if (fd == -1) {
1306             if (errno != ENOENT) {
1307                 error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1308             } else if (sys2vcpu) {
1309                 vcpu->online = true;
1310                 vcpu->can_offline = false;
1311             } else if (!vcpu->online) {
1312                 error_setg(errp, "logical processor #%" PRId64 " can't be "
1313                            "offlined", vcpu->logical_id);
1314             } /* otherwise pretend successful re-onlining */
1315         } else {
1316             unsigned char status;
1317 
1318             res = pread(fd, &status, 1, 0);
1319             if (res == -1) {
1320                 error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1321             } else if (res == 0) {
1322                 error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1323                            fn);
1324             } else if (sys2vcpu) {
1325                 vcpu->online = (status != '0');
1326                 vcpu->can_offline = true;
1327             } else if (vcpu->online != (status != '0')) {
1328                 status = '0' + vcpu->online;
1329                 if (pwrite(fd, &status, 1, 0) == -1) {
1330                     error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1331                                      fn);
1332                 }
1333             } /* otherwise pretend successful re-(on|off)-lining */
1334 
1335             res = close(fd);
1336             g_assert(res == 0);
1337         }
1338 
1339         res = close(dirfd);
1340         g_assert(res == 0);
1341     }
1342 
1343     g_free(dirpath);
1344 }
1345 
1346 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1347 {
1348     int64_t current;
1349     GuestLogicalProcessorList *head, **link;
1350     long sc_max;
1351     Error *local_err = NULL;
1352 
1353     current = 0;
1354     head = NULL;
1355     link = &head;
1356     sc_max = SYSCONF_EXACT(_SC_NPROCESSORS_CONF, &local_err);
1357 
1358     while (local_err == NULL && current < sc_max) {
1359         GuestLogicalProcessor *vcpu;
1360         GuestLogicalProcessorList *entry;
1361 
1362         vcpu = g_malloc0(sizeof *vcpu);
1363         vcpu->logical_id = current++;
1364         vcpu->has_can_offline = true; /* lolspeak ftw */
1365         transfer_vcpu(vcpu, true, &local_err);
1366 
1367         entry = g_malloc0(sizeof *entry);
1368         entry->value = vcpu;
1369 
1370         *link = entry;
1371         link = &entry->next;
1372     }
1373 
1374     if (local_err == NULL) {
1375         /* there's no guest with zero VCPUs */
1376         g_assert(head != NULL);
1377         return head;
1378     }
1379 
1380     qapi_free_GuestLogicalProcessorList(head);
1381     error_propagate(errp, local_err);
1382     return NULL;
1383 }
1384 
1385 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1386 {
1387     int64_t processed;
1388     Error *local_err = NULL;
1389 
1390     processed = 0;
1391     while (vcpus != NULL) {
1392         transfer_vcpu(vcpus->value, false, &local_err);
1393         if (local_err != NULL) {
1394             break;
1395         }
1396         ++processed;
1397         vcpus = vcpus->next;
1398     }
1399 
1400     if (local_err != NULL) {
1401         if (processed == 0) {
1402             error_propagate(errp, local_err);
1403         } else {
1404             error_free(local_err);
1405         }
1406     }
1407 
1408     return processed;
1409 }
1410 
1411 #else /* defined(__linux__) */
1412 
1413 void qmp_guest_suspend_disk(Error **err)
1414 {
1415     error_set(err, QERR_UNSUPPORTED);
1416 }
1417 
1418 void qmp_guest_suspend_ram(Error **err)
1419 {
1420     error_set(err, QERR_UNSUPPORTED);
1421 }
1422 
1423 void qmp_guest_suspend_hybrid(Error **err)
1424 {
1425     error_set(err, QERR_UNSUPPORTED);
1426 }
1427 
1428 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
1429 {
1430     error_set(errp, QERR_UNSUPPORTED);
1431     return NULL;
1432 }
1433 
1434 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1435 {
1436     error_set(errp, QERR_UNSUPPORTED);
1437     return NULL;
1438 }
1439 
1440 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1441 {
1442     error_set(errp, QERR_UNSUPPORTED);
1443     return -1;
1444 }
1445 
1446 #endif
1447 
1448 #if !defined(CONFIG_FSFREEZE)
1449 
1450 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **err)
1451 {
1452     error_set(err, QERR_UNSUPPORTED);
1453 
1454     return 0;
1455 }
1456 
1457 int64_t qmp_guest_fsfreeze_freeze(Error **err)
1458 {
1459     error_set(err, QERR_UNSUPPORTED);
1460 
1461     return 0;
1462 }
1463 
1464 int64_t qmp_guest_fsfreeze_thaw(Error **err)
1465 {
1466     error_set(err, QERR_UNSUPPORTED);
1467 
1468     return 0;
1469 }
1470 #endif /* CONFIG_FSFREEZE */
1471 
1472 #if !defined(CONFIG_FSTRIM)
1473 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
1474 {
1475     error_set(err, QERR_UNSUPPORTED);
1476 }
1477 #endif
1478 
1479 /* register init/cleanup routines for stateful command groups */
1480 void ga_command_state_init(GAState *s, GACommandState *cs)
1481 {
1482 #if defined(CONFIG_FSFREEZE)
1483     ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
1484 #endif
1485     ga_command_state_add(cs, guest_file_init, NULL);
1486 }
1487