1 /* 2 * QEMU Guest Agent Linux-specific command implementations 3 * 4 * Copyright IBM Corp. 2011 5 * 6 * Authors: 7 * Michael Roth <mdroth@linux.vnet.ibm.com> 8 * Michal Privoznik <mprivozn@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "qga-qapi-commands.h" 17 #include "commands-common.h" 18 #include "cutils.h" 19 #include <mntent.h> 20 #include <sys/ioctl.h> 21 22 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM) 23 static int dev_major_minor(const char *devpath, 24 unsigned int *devmajor, unsigned int *devminor) 25 { 26 struct stat st; 27 28 *devmajor = 0; 29 *devminor = 0; 30 31 if (stat(devpath, &st) < 0) { 32 slog("failed to stat device file '%s': %s", devpath, strerror(errno)); 33 return -1; 34 } 35 if (S_ISDIR(st.st_mode)) { 36 /* It is bind mount */ 37 return -2; 38 } 39 if (S_ISBLK(st.st_mode)) { 40 *devmajor = major(st.st_rdev); 41 *devminor = minor(st.st_rdev); 42 return 0; 43 } 44 return -1; 45 } 46 47 static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp) 48 { 49 struct mntent *ment; 50 FsMount *mount; 51 char const *mtab = "/proc/self/mounts"; 52 FILE *fp; 53 unsigned int devmajor, devminor; 54 55 fp = setmntent(mtab, "r"); 56 if (!fp) { 57 error_setg(errp, "failed to open mtab file: '%s'", mtab); 58 return false; 59 } 60 61 while ((ment = getmntent(fp))) { 62 /* 63 * An entry which device name doesn't start with a '/' is 64 * either a dummy file system or a network file system. 65 * Add special handling for smbfs and cifs as is done by 66 * coreutils as well. 67 */ 68 if ((ment->mnt_fsname[0] != '/') || 69 (strcmp(ment->mnt_type, "smbfs") == 0) || 70 (strcmp(ment->mnt_type, "cifs") == 0)) { 71 continue; 72 } 73 if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) { 74 /* Skip bind mounts */ 75 continue; 76 } 77 78 mount = g_new0(FsMount, 1); 79 mount->dirname = g_strdup(ment->mnt_dir); 80 mount->devtype = g_strdup(ment->mnt_type); 81 mount->devmajor = devmajor; 82 mount->devminor = devminor; 83 84 QTAILQ_INSERT_TAIL(mounts, mount, next); 85 } 86 87 endmntent(fp); 88 return true; 89 } 90 91 static void decode_mntname(char *name, int len) 92 { 93 int i, j = 0; 94 for (i = 0; i <= len; i++) { 95 if (name[i] != '\\') { 96 name[j++] = name[i]; 97 } else if (name[i + 1] == '\\') { 98 name[j++] = '\\'; 99 i++; 100 } else if (name[i + 1] >= '0' && name[i + 1] <= '3' && 101 name[i + 2] >= '0' && name[i + 2] <= '7' && 102 name[i + 3] >= '0' && name[i + 3] <= '7') { 103 name[j++] = (name[i + 1] - '0') * 64 + 104 (name[i + 2] - '0') * 8 + 105 (name[i + 3] - '0'); 106 i += 3; 107 } else { 108 name[j++] = name[i]; 109 } 110 } 111 } 112 113 /* 114 * Walk the mount table and build a list of local file systems 115 */ 116 bool build_fs_mount_list(FsMountList *mounts, Error **errp) 117 { 118 FsMount *mount; 119 char const *mountinfo = "/proc/self/mountinfo"; 120 FILE *fp; 121 char *line = NULL, *dash; 122 size_t n; 123 char check; 124 unsigned int devmajor, devminor; 125 int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e; 126 127 fp = fopen(mountinfo, "r"); 128 if (!fp) { 129 return build_fs_mount_list_from_mtab(mounts, errp); 130 } 131 132 while (getline(&line, &n, fp) != -1) { 133 ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c", 134 &devmajor, &devminor, &dir_s, &dir_e, &check); 135 if (ret < 3) { 136 continue; 137 } 138 dash = strstr(line + dir_e, " - "); 139 if (!dash) { 140 continue; 141 } 142 ret = sscanf(dash, " - %n%*s%n %n%*s%n%c", 143 &type_s, &type_e, &dev_s, &dev_e, &check); 144 if (ret < 1) { 145 continue; 146 } 147 line[dir_e] = 0; 148 dash[type_e] = 0; 149 dash[dev_e] = 0; 150 decode_mntname(line + dir_s, dir_e - dir_s); 151 decode_mntname(dash + dev_s, dev_e - dev_s); 152 if (devmajor == 0) { 153 /* btrfs reports major number = 0 */ 154 if (strcmp("btrfs", dash + type_s) != 0 || 155 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) { 156 continue; 157 } 158 } 159 160 mount = g_new0(FsMount, 1); 161 mount->dirname = g_strdup(line + dir_s); 162 mount->devtype = g_strdup(dash + type_s); 163 mount->devmajor = devmajor; 164 mount->devminor = devminor; 165 166 QTAILQ_INSERT_TAIL(mounts, mount, next); 167 } 168 free(line); 169 170 fclose(fp); 171 return true; 172 } 173 #endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */ 174 175 #ifdef CONFIG_FSFREEZE 176 /* 177 * Walk list of mounted file systems in the guest, and freeze the ones which 178 * are real local file systems. 179 */ 180 int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints, 181 strList *mountpoints, 182 FsMountList mounts, 183 Error **errp) 184 { 185 struct FsMount *mount; 186 strList *list; 187 int fd, ret, i = 0; 188 189 QTAILQ_FOREACH_REVERSE(mount, &mounts, next) { 190 /* To issue fsfreeze in the reverse order of mounts, check if the 191 * mount is listed in the list here */ 192 if (has_mountpoints) { 193 for (list = mountpoints; list; list = list->next) { 194 if (strcmp(list->value, mount->dirname) == 0) { 195 break; 196 } 197 } 198 if (!list) { 199 continue; 200 } 201 } 202 203 fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0); 204 if (fd == -1) { 205 error_setg_errno(errp, errno, "failed to open %s", mount->dirname); 206 return -1; 207 } 208 209 /* we try to cull filesystems we know won't work in advance, but other 210 * filesystems may not implement fsfreeze for less obvious reasons. 211 * these will report EOPNOTSUPP. we simply ignore these when tallying 212 * the number of frozen filesystems. 213 * if a filesystem is mounted more than once (aka bind mount) a 214 * consecutive attempt to freeze an already frozen filesystem will 215 * return EBUSY. 216 * 217 * any other error means a failure to freeze a filesystem we 218 * expect to be freezable, so return an error in those cases 219 * and return system to thawed state. 220 */ 221 ret = ioctl(fd, FIFREEZE); 222 if (ret == -1) { 223 if (errno != EOPNOTSUPP && errno != EBUSY) { 224 error_setg_errno(errp, errno, "failed to freeze %s", 225 mount->dirname); 226 close(fd); 227 return -1; 228 } 229 } else { 230 i++; 231 } 232 close(fd); 233 } 234 return i; 235 } 236 237 int qmp_guest_fsfreeze_do_thaw(Error **errp) 238 { 239 int ret; 240 FsMountList mounts; 241 FsMount *mount; 242 int fd, i = 0, logged; 243 Error *local_err = NULL; 244 245 QTAILQ_INIT(&mounts); 246 if (!build_fs_mount_list(&mounts, &local_err)) { 247 error_propagate(errp, local_err); 248 return -1; 249 } 250 251 QTAILQ_FOREACH(mount, &mounts, next) { 252 logged = false; 253 fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0); 254 if (fd == -1) { 255 continue; 256 } 257 /* we have no way of knowing whether a filesystem was actually unfrozen 258 * as a result of a successful call to FITHAW, only that if an error 259 * was returned the filesystem was *not* unfrozen by that particular 260 * call. 261 * 262 * since multiple preceding FIFREEZEs require multiple calls to FITHAW 263 * to unfreeze, continuing issuing FITHAW until an error is returned, 264 * in which case either the filesystem is in an unfreezable state, or, 265 * more likely, it was thawed previously (and remains so afterward). 266 * 267 * also, since the most recent successful call is the one that did 268 * the actual unfreeze, we can use this to provide an accurate count 269 * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which 270 * may * be useful for determining whether a filesystem was unfrozen 271 * during the freeze/thaw phase by a process other than qemu-ga. 272 */ 273 do { 274 ret = ioctl(fd, FITHAW); 275 if (ret == 0 && !logged) { 276 i++; 277 logged = true; 278 } 279 } while (ret == 0); 280 close(fd); 281 } 282 283 free_fs_mount_list(&mounts); 284 285 return i; 286 } 287 #endif /* CONFIG_FSFREEZE */ 288 289 290 #define LINUX_SYS_STATE_FILE "/sys/power/state" 291 #define SUSPEND_SUPPORTED 0 292 #define SUSPEND_NOT_SUPPORTED 1 293 294 typedef enum { 295 SUSPEND_MODE_DISK = 0, 296 SUSPEND_MODE_RAM = 1, 297 SUSPEND_MODE_HYBRID = 2, 298 } SuspendMode; 299 300 /* 301 * Executes a command in a child process using g_spawn_sync, 302 * returning an int >= 0 representing the exit status of the 303 * process. 304 * 305 * If the program wasn't found in path, returns -1. 306 * 307 * If a problem happened when creating the child process, 308 * returns -1 and errp is set. 309 */ 310 static int run_process_child(const char *command[], Error **errp) 311 { 312 int exit_status, spawn_flag; 313 GError *g_err = NULL; 314 bool success; 315 316 spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL | 317 G_SPAWN_STDERR_TO_DEV_NULL; 318 319 success = g_spawn_sync(NULL, (char **)command, NULL, spawn_flag, 320 NULL, NULL, NULL, NULL, 321 &exit_status, &g_err); 322 323 if (success) { 324 return WEXITSTATUS(exit_status); 325 } 326 327 if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) { 328 error_setg(errp, "failed to create child process, error '%s'", 329 g_err->message); 330 } 331 332 g_error_free(g_err); 333 return -1; 334 } 335 336 static bool systemd_supports_mode(SuspendMode mode, Error **errp) 337 { 338 const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend", 339 "systemd-hybrid-sleep"}; 340 const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL}; 341 int status; 342 343 status = run_process_child(cmd, errp); 344 345 /* 346 * systemctl status uses LSB return codes so we can expect 347 * status > 0 and be ok. To assert if the guest has support 348 * for the selected suspend mode, status should be < 4. 4 is 349 * the code for unknown service status, the return value when 350 * the service does not exist. A common value is status = 3 351 * (program is not running). 352 */ 353 if (status > 0 && status < 4) { 354 return true; 355 } 356 357 return false; 358 } 359 360 static void systemd_suspend(SuspendMode mode, Error **errp) 361 { 362 Error *local_err = NULL; 363 const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"}; 364 const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL}; 365 int status; 366 367 status = run_process_child(cmd, &local_err); 368 369 if (status == 0) { 370 return; 371 } 372 373 if ((status == -1) && !local_err) { 374 error_setg(errp, "the helper program 'systemctl %s' was not found", 375 systemctl_args[mode]); 376 return; 377 } 378 379 if (local_err) { 380 error_propagate(errp, local_err); 381 } else { 382 error_setg(errp, "the helper program 'systemctl %s' returned an " 383 "unexpected exit status code (%d)", 384 systemctl_args[mode], status); 385 } 386 } 387 388 static bool pmutils_supports_mode(SuspendMode mode, Error **errp) 389 { 390 Error *local_err = NULL; 391 const char *pmutils_args[3] = {"--hibernate", "--suspend", 392 "--suspend-hybrid"}; 393 const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL}; 394 int status; 395 396 status = run_process_child(cmd, &local_err); 397 398 if (status == SUSPEND_SUPPORTED) { 399 return true; 400 } 401 402 if ((status == -1) && !local_err) { 403 return false; 404 } 405 406 if (local_err) { 407 error_propagate(errp, local_err); 408 } else { 409 error_setg(errp, 410 "the helper program '%s' returned an unexpected exit" 411 " status code (%d)", "pm-is-supported", status); 412 } 413 414 return false; 415 } 416 417 static void pmutils_suspend(SuspendMode mode, Error **errp) 418 { 419 Error *local_err = NULL; 420 const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend", 421 "pm-suspend-hybrid"}; 422 const char *cmd[2] = {pmutils_binaries[mode], NULL}; 423 int status; 424 425 status = run_process_child(cmd, &local_err); 426 427 if (status == 0) { 428 return; 429 } 430 431 if ((status == -1) && !local_err) { 432 error_setg(errp, "the helper program '%s' was not found", 433 pmutils_binaries[mode]); 434 return; 435 } 436 437 if (local_err) { 438 error_propagate(errp, local_err); 439 } else { 440 error_setg(errp, 441 "the helper program '%s' returned an unexpected exit" 442 " status code (%d)", pmutils_binaries[mode], status); 443 } 444 } 445 446 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp) 447 { 448 const char *sysfile_strs[3] = {"disk", "mem", NULL}; 449 const char *sysfile_str = sysfile_strs[mode]; 450 char buf[32]; /* hopefully big enough */ 451 int fd; 452 ssize_t ret; 453 454 if (!sysfile_str) { 455 error_setg(errp, "unknown guest suspend mode"); 456 return false; 457 } 458 459 fd = open(LINUX_SYS_STATE_FILE, O_RDONLY); 460 if (fd < 0) { 461 return false; 462 } 463 464 ret = read(fd, buf, sizeof(buf) - 1); 465 close(fd); 466 if (ret <= 0) { 467 return false; 468 } 469 buf[ret] = '\0'; 470 471 if (strstr(buf, sysfile_str)) { 472 return true; 473 } 474 return false; 475 } 476 477 static void linux_sys_state_suspend(SuspendMode mode, Error **errp) 478 { 479 g_autoptr(GError) local_gerr = NULL; 480 const char *sysfile_strs[3] = {"disk", "mem", NULL}; 481 const char *sysfile_str = sysfile_strs[mode]; 482 483 if (!sysfile_str) { 484 error_setg(errp, "unknown guest suspend mode"); 485 return; 486 } 487 488 if (!g_file_set_contents(LINUX_SYS_STATE_FILE, sysfile_str, 489 -1, &local_gerr)) { 490 error_setg(errp, "suspend: cannot write to '%s': %s", 491 LINUX_SYS_STATE_FILE, local_gerr->message); 492 return; 493 } 494 } 495 496 static void guest_suspend(SuspendMode mode, Error **errp) 497 { 498 Error *local_err = NULL; 499 bool mode_supported = false; 500 501 if (systemd_supports_mode(mode, &local_err)) { 502 mode_supported = true; 503 systemd_suspend(mode, &local_err); 504 505 if (!local_err) { 506 return; 507 } 508 } 509 510 error_free(local_err); 511 local_err = NULL; 512 513 if (pmutils_supports_mode(mode, &local_err)) { 514 mode_supported = true; 515 pmutils_suspend(mode, &local_err); 516 517 if (!local_err) { 518 return; 519 } 520 } 521 522 error_free(local_err); 523 local_err = NULL; 524 525 if (linux_sys_state_supports_mode(mode, &local_err)) { 526 mode_supported = true; 527 linux_sys_state_suspend(mode, &local_err); 528 } 529 530 if (!mode_supported) { 531 error_free(local_err); 532 error_setg(errp, 533 "the requested suspend mode is not supported by the guest"); 534 } else { 535 error_propagate(errp, local_err); 536 } 537 } 538 539 void qmp_guest_suspend_disk(Error **errp) 540 { 541 guest_suspend(SUSPEND_MODE_DISK, errp); 542 } 543 544 void qmp_guest_suspend_ram(Error **errp) 545 { 546 guest_suspend(SUSPEND_MODE_RAM, errp); 547 } 548 549 void qmp_guest_suspend_hybrid(Error **errp) 550 { 551 guest_suspend(SUSPEND_MODE_HYBRID, errp); 552 } 553 554 /* Transfer online/offline status between @vcpu and the guest system. 555 * 556 * On input either @errp or *@errp must be NULL. 557 * 558 * In system-to-@vcpu direction, the following @vcpu fields are accessed: 559 * - R: vcpu->logical_id 560 * - W: vcpu->online 561 * - W: vcpu->can_offline 562 * 563 * In @vcpu-to-system direction, the following @vcpu fields are accessed: 564 * - R: vcpu->logical_id 565 * - R: vcpu->online 566 * 567 * Written members remain unmodified on error. 568 */ 569 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu, 570 char *dirpath, Error **errp) 571 { 572 int fd; 573 int res; 574 int dirfd; 575 static const char fn[] = "online"; 576 577 dirfd = open(dirpath, O_RDONLY | O_DIRECTORY); 578 if (dirfd == -1) { 579 error_setg_errno(errp, errno, "open(\"%s\")", dirpath); 580 return; 581 } 582 583 fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR); 584 if (fd == -1) { 585 if (errno != ENOENT) { 586 error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn); 587 } else if (sys2vcpu) { 588 vcpu->online = true; 589 vcpu->can_offline = false; 590 } else if (!vcpu->online) { 591 error_setg(errp, "logical processor #%" PRId64 " can't be " 592 "offlined", vcpu->logical_id); 593 } /* otherwise pretend successful re-onlining */ 594 } else { 595 unsigned char status; 596 597 res = pread(fd, &status, 1, 0); 598 if (res == -1) { 599 error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn); 600 } else if (res == 0) { 601 error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath, 602 fn); 603 } else if (sys2vcpu) { 604 vcpu->online = (status != '0'); 605 vcpu->can_offline = true; 606 } else if (vcpu->online != (status != '0')) { 607 status = '0' + vcpu->online; 608 if (pwrite(fd, &status, 1, 0) == -1) { 609 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath, 610 fn); 611 } 612 } /* otherwise pretend successful re-(on|off)-lining */ 613 614 res = close(fd); 615 g_assert(res == 0); 616 } 617 618 res = close(dirfd); 619 g_assert(res == 0); 620 } 621 622 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp) 623 { 624 GuestLogicalProcessorList *head, **tail; 625 const char *cpu_dir = "/sys/devices/system/cpu"; 626 const gchar *line; 627 g_autoptr(GDir) cpu_gdir = NULL; 628 Error *local_err = NULL; 629 630 head = NULL; 631 tail = &head; 632 cpu_gdir = g_dir_open(cpu_dir, 0, NULL); 633 634 if (cpu_gdir == NULL) { 635 error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir); 636 return NULL; 637 } 638 639 while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) { 640 GuestLogicalProcessor *vcpu; 641 int64_t id; 642 if (sscanf(line, "cpu%" PRId64, &id)) { 643 g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/" 644 "cpu%" PRId64 "/", id); 645 vcpu = g_malloc0(sizeof *vcpu); 646 vcpu->logical_id = id; 647 vcpu->has_can_offline = true; /* lolspeak ftw */ 648 transfer_vcpu(vcpu, true, path, &local_err); 649 QAPI_LIST_APPEND(tail, vcpu); 650 } 651 } 652 653 if (local_err == NULL) { 654 /* there's no guest with zero VCPUs */ 655 g_assert(head != NULL); 656 return head; 657 } 658 659 qapi_free_GuestLogicalProcessorList(head); 660 error_propagate(errp, local_err); 661 return NULL; 662 } 663 664 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp) 665 { 666 int64_t processed; 667 Error *local_err = NULL; 668 669 processed = 0; 670 while (vcpus != NULL) { 671 char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/", 672 vcpus->value->logical_id); 673 674 transfer_vcpu(vcpus->value, false, path, &local_err); 675 g_free(path); 676 if (local_err != NULL) { 677 break; 678 } 679 ++processed; 680 vcpus = vcpus->next; 681 } 682 683 if (local_err != NULL) { 684 if (processed == 0) { 685 error_propagate(errp, local_err); 686 } else { 687 error_free(local_err); 688 } 689 } 690 691 return processed; 692 } 693