1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include <zlib.h> 26 #include "qemu-common.h" 27 #include "qemu/error-report.h" 28 #include "qemu/iov.h" 29 #include "qemu/sockets.h" 30 #include "qemu/coroutine.h" 31 #include "migration/migration.h" 32 #include "migration/qemu-file.h" 33 #include "trace.h" 34 35 #define IO_BUF_SIZE 32768 36 #define MAX_IOV_SIZE MIN(IOV_MAX, 64) 37 38 struct QEMUFile { 39 const QEMUFileOps *ops; 40 const QEMUFileHooks *hooks; 41 void *opaque; 42 43 int64_t bytes_xfer; 44 int64_t xfer_limit; 45 46 int64_t pos; /* start of buffer when writing, end of buffer 47 when reading */ 48 int buf_index; 49 int buf_size; /* 0 when writing */ 50 uint8_t buf[IO_BUF_SIZE]; 51 52 DECLARE_BITMAP(may_free, MAX_IOV_SIZE); 53 struct iovec iov[MAX_IOV_SIZE]; 54 unsigned int iovcnt; 55 56 int last_error; 57 }; 58 59 /* 60 * Stop a file from being read/written - not all backing files can do this 61 * typically only sockets can. 62 */ 63 int qemu_file_shutdown(QEMUFile *f) 64 { 65 if (!f->ops->shut_down) { 66 return -ENOSYS; 67 } 68 return f->ops->shut_down(f->opaque, true, true); 69 } 70 71 /* 72 * Result: QEMUFile* for a 'return path' for comms in the opposite direction 73 * NULL if not available 74 */ 75 QEMUFile *qemu_file_get_return_path(QEMUFile *f) 76 { 77 if (!f->ops->get_return_path) { 78 return NULL; 79 } 80 return f->ops->get_return_path(f->opaque); 81 } 82 83 bool qemu_file_mode_is_not_valid(const char *mode) 84 { 85 if (mode == NULL || 86 (mode[0] != 'r' && mode[0] != 'w') || 87 mode[1] != 'b' || mode[2] != 0) { 88 fprintf(stderr, "qemu_fopen: Argument validity check failed\n"); 89 return true; 90 } 91 92 return false; 93 } 94 95 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) 96 { 97 QEMUFile *f; 98 99 f = g_new0(QEMUFile, 1); 100 101 f->opaque = opaque; 102 f->ops = ops; 103 return f; 104 } 105 106 107 void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) 108 { 109 f->hooks = hooks; 110 } 111 112 /* 113 * Get last error for stream f 114 * 115 * Return negative error value if there has been an error on previous 116 * operations, return 0 if no error happened. 117 * 118 */ 119 int qemu_file_get_error(QEMUFile *f) 120 { 121 return f->last_error; 122 } 123 124 void qemu_file_set_error(QEMUFile *f, int ret) 125 { 126 if (f->last_error == 0) { 127 f->last_error = ret; 128 } 129 } 130 131 bool qemu_file_is_writable(QEMUFile *f) 132 { 133 return f->ops->writev_buffer; 134 } 135 136 static void qemu_iovec_release_ram(QEMUFile *f) 137 { 138 struct iovec iov; 139 unsigned long idx; 140 141 /* Find and release all the contiguous memory ranges marked as may_free. */ 142 idx = find_next_bit(f->may_free, f->iovcnt, 0); 143 if (idx >= f->iovcnt) { 144 return; 145 } 146 iov = f->iov[idx]; 147 148 /* The madvise() in the loop is called for iov within a continuous range and 149 * then reinitialize the iov. And in the end, madvise() is called for the 150 * last iov. 151 */ 152 while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) { 153 /* check for adjacent buffer and coalesce them */ 154 if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) { 155 iov.iov_len += f->iov[idx].iov_len; 156 continue; 157 } 158 if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { 159 error_report("migrate: madvise DONTNEED failed %p %zd: %s", 160 iov.iov_base, iov.iov_len, strerror(errno)); 161 } 162 iov = f->iov[idx]; 163 } 164 if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { 165 error_report("migrate: madvise DONTNEED failed %p %zd: %s", 166 iov.iov_base, iov.iov_len, strerror(errno)); 167 } 168 memset(f->may_free, 0, sizeof(f->may_free)); 169 } 170 171 /** 172 * Flushes QEMUFile buffer 173 * 174 * If there is writev_buffer QEMUFileOps it uses it otherwise uses 175 * put_buffer ops. This will flush all pending data. If data was 176 * only partially flushed, it will set an error state. 177 */ 178 void qemu_fflush(QEMUFile *f) 179 { 180 ssize_t ret = 0; 181 ssize_t expect = 0; 182 183 if (!qemu_file_is_writable(f)) { 184 return; 185 } 186 187 if (f->iovcnt > 0) { 188 expect = iov_size(f->iov, f->iovcnt); 189 ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); 190 191 qemu_iovec_release_ram(f); 192 } 193 194 if (ret >= 0) { 195 f->pos += ret; 196 } 197 /* We expect the QEMUFile write impl to send the full 198 * data set we requested, so sanity check that. 199 */ 200 if (ret != expect) { 201 qemu_file_set_error(f, ret < 0 ? ret : -EIO); 202 } 203 f->buf_index = 0; 204 f->iovcnt = 0; 205 } 206 207 void ram_control_before_iterate(QEMUFile *f, uint64_t flags) 208 { 209 int ret = 0; 210 211 if (f->hooks && f->hooks->before_ram_iterate) { 212 ret = f->hooks->before_ram_iterate(f, f->opaque, flags, NULL); 213 if (ret < 0) { 214 qemu_file_set_error(f, ret); 215 } 216 } 217 } 218 219 void ram_control_after_iterate(QEMUFile *f, uint64_t flags) 220 { 221 int ret = 0; 222 223 if (f->hooks && f->hooks->after_ram_iterate) { 224 ret = f->hooks->after_ram_iterate(f, f->opaque, flags, NULL); 225 if (ret < 0) { 226 qemu_file_set_error(f, ret); 227 } 228 } 229 } 230 231 void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data) 232 { 233 int ret = -EINVAL; 234 235 if (f->hooks && f->hooks->hook_ram_load) { 236 ret = f->hooks->hook_ram_load(f, f->opaque, flags, data); 237 if (ret < 0) { 238 qemu_file_set_error(f, ret); 239 } 240 } else { 241 /* 242 * Hook is a hook specifically requested by the source sending a flag 243 * that expects there to be a hook on the destination. 244 */ 245 if (flags == RAM_CONTROL_HOOK) { 246 qemu_file_set_error(f, ret); 247 } 248 } 249 } 250 251 size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, 252 ram_addr_t offset, size_t size, 253 uint64_t *bytes_sent) 254 { 255 if (f->hooks && f->hooks->save_page) { 256 int ret = f->hooks->save_page(f, f->opaque, block_offset, 257 offset, size, bytes_sent); 258 259 if (ret != RAM_SAVE_CONTROL_DELAYED) { 260 if (bytes_sent && *bytes_sent > 0) { 261 qemu_update_position(f, *bytes_sent); 262 } else if (ret < 0) { 263 qemu_file_set_error(f, ret); 264 } 265 } 266 267 return ret; 268 } 269 270 return RAM_SAVE_CONTROL_NOT_SUPP; 271 } 272 273 /* 274 * Attempt to fill the buffer from the underlying file 275 * Returns the number of bytes read, or negative value for an error. 276 * 277 * Note that it can return a partially full buffer even in a not error/not EOF 278 * case if the underlying file descriptor gives a short read, and that can 279 * happen even on a blocking fd. 280 */ 281 static ssize_t qemu_fill_buffer(QEMUFile *f) 282 { 283 int len; 284 int pending; 285 286 assert(!qemu_file_is_writable(f)); 287 288 pending = f->buf_size - f->buf_index; 289 if (pending > 0) { 290 memmove(f->buf, f->buf + f->buf_index, pending); 291 } 292 f->buf_index = 0; 293 f->buf_size = pending; 294 295 len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, 296 IO_BUF_SIZE - pending); 297 if (len > 0) { 298 f->buf_size += len; 299 f->pos += len; 300 } else if (len == 0) { 301 qemu_file_set_error(f, -EIO); 302 } else if (len != -EAGAIN) { 303 qemu_file_set_error(f, len); 304 } 305 306 return len; 307 } 308 309 void qemu_update_position(QEMUFile *f, size_t size) 310 { 311 f->pos += size; 312 } 313 314 /** Closes the file 315 * 316 * Returns negative error value if any error happened on previous operations or 317 * while closing the file. Returns 0 or positive number on success. 318 * 319 * The meaning of return value on success depends on the specific backend 320 * being used. 321 */ 322 int qemu_fclose(QEMUFile *f) 323 { 324 int ret; 325 qemu_fflush(f); 326 ret = qemu_file_get_error(f); 327 328 if (f->ops->close) { 329 int ret2 = f->ops->close(f->opaque); 330 if (ret >= 0) { 331 ret = ret2; 332 } 333 } 334 /* If any error was spotted before closing, we should report it 335 * instead of the close() return value. 336 */ 337 if (f->last_error) { 338 ret = f->last_error; 339 } 340 g_free(f); 341 trace_qemu_file_fclose(); 342 return ret; 343 } 344 345 static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, 346 bool may_free) 347 { 348 /* check for adjacent buffer and coalesce them */ 349 if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + 350 f->iov[f->iovcnt - 1].iov_len && 351 may_free == test_bit(f->iovcnt - 1, f->may_free)) 352 { 353 f->iov[f->iovcnt - 1].iov_len += size; 354 } else { 355 if (may_free) { 356 set_bit(f->iovcnt, f->may_free); 357 } 358 f->iov[f->iovcnt].iov_base = (uint8_t *)buf; 359 f->iov[f->iovcnt++].iov_len = size; 360 } 361 362 if (f->iovcnt >= MAX_IOV_SIZE) { 363 qemu_fflush(f); 364 } 365 } 366 367 void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, 368 bool may_free) 369 { 370 if (f->last_error) { 371 return; 372 } 373 374 f->bytes_xfer += size; 375 add_to_iovec(f, buf, size, may_free); 376 } 377 378 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) 379 { 380 size_t l; 381 382 if (f->last_error) { 383 return; 384 } 385 386 while (size > 0) { 387 l = IO_BUF_SIZE - f->buf_index; 388 if (l > size) { 389 l = size; 390 } 391 memcpy(f->buf + f->buf_index, buf, l); 392 f->bytes_xfer += l; 393 add_to_iovec(f, f->buf + f->buf_index, l, false); 394 f->buf_index += l; 395 if (f->buf_index == IO_BUF_SIZE) { 396 qemu_fflush(f); 397 } 398 if (qemu_file_get_error(f)) { 399 break; 400 } 401 buf += l; 402 size -= l; 403 } 404 } 405 406 void qemu_put_byte(QEMUFile *f, int v) 407 { 408 if (f->last_error) { 409 return; 410 } 411 412 f->buf[f->buf_index] = v; 413 f->bytes_xfer++; 414 add_to_iovec(f, f->buf + f->buf_index, 1, false); 415 f->buf_index++; 416 if (f->buf_index == IO_BUF_SIZE) { 417 qemu_fflush(f); 418 } 419 } 420 421 void qemu_file_skip(QEMUFile *f, int size) 422 { 423 if (f->buf_index + size <= f->buf_size) { 424 f->buf_index += size; 425 } 426 } 427 428 /* 429 * Read 'size' bytes from file (at 'offset') without moving the 430 * pointer and set 'buf' to point to that data. 431 * 432 * It will return size bytes unless there was an error, in which case it will 433 * return as many as it managed to read (assuming blocking fd's which 434 * all current QEMUFile are) 435 */ 436 size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) 437 { 438 ssize_t pending; 439 size_t index; 440 441 assert(!qemu_file_is_writable(f)); 442 assert(offset < IO_BUF_SIZE); 443 assert(size <= IO_BUF_SIZE - offset); 444 445 /* The 1st byte to read from */ 446 index = f->buf_index + offset; 447 /* The number of available bytes starting at index */ 448 pending = f->buf_size - index; 449 450 /* 451 * qemu_fill_buffer might return just a few bytes, even when there isn't 452 * an error, so loop collecting them until we get enough. 453 */ 454 while (pending < size) { 455 int received = qemu_fill_buffer(f); 456 457 if (received <= 0) { 458 break; 459 } 460 461 index = f->buf_index + offset; 462 pending = f->buf_size - index; 463 } 464 465 if (pending <= 0) { 466 return 0; 467 } 468 if (size > pending) { 469 size = pending; 470 } 471 472 *buf = f->buf + index; 473 return size; 474 } 475 476 /* 477 * Read 'size' bytes of data from the file into buf. 478 * 'size' can be larger than the internal buffer. 479 * 480 * It will return size bytes unless there was an error, in which case it will 481 * return as many as it managed to read (assuming blocking fd's which 482 * all current QEMUFile are) 483 */ 484 size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) 485 { 486 size_t pending = size; 487 size_t done = 0; 488 489 while (pending > 0) { 490 size_t res; 491 uint8_t *src; 492 493 res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0); 494 if (res == 0) { 495 return done; 496 } 497 memcpy(buf, src, res); 498 qemu_file_skip(f, res); 499 buf += res; 500 pending -= res; 501 done += res; 502 } 503 return done; 504 } 505 506 /* 507 * Read 'size' bytes of data from the file. 508 * 'size' can be larger than the internal buffer. 509 * 510 * The data: 511 * may be held on an internal buffer (in which case *buf is updated 512 * to point to it) that is valid until the next qemu_file operation. 513 * OR 514 * will be copied to the *buf that was passed in. 515 * 516 * The code tries to avoid the copy if possible. 517 * 518 * It will return size bytes unless there was an error, in which case it will 519 * return as many as it managed to read (assuming blocking fd's which 520 * all current QEMUFile are) 521 * 522 * Note: Since **buf may get changed, the caller should take care to 523 * keep a pointer to the original buffer if it needs to deallocate it. 524 */ 525 size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) 526 { 527 if (size < IO_BUF_SIZE) { 528 size_t res; 529 uint8_t *src; 530 531 res = qemu_peek_buffer(f, &src, size, 0); 532 533 if (res == size) { 534 qemu_file_skip(f, res); 535 *buf = src; 536 return res; 537 } 538 } 539 540 return qemu_get_buffer(f, *buf, size); 541 } 542 543 /* 544 * Peeks a single byte from the buffer; this isn't guaranteed to work if 545 * offset leaves a gap after the previous read/peeked data. 546 */ 547 int qemu_peek_byte(QEMUFile *f, int offset) 548 { 549 int index = f->buf_index + offset; 550 551 assert(!qemu_file_is_writable(f)); 552 assert(offset < IO_BUF_SIZE); 553 554 if (index >= f->buf_size) { 555 qemu_fill_buffer(f); 556 index = f->buf_index + offset; 557 if (index >= f->buf_size) { 558 return 0; 559 } 560 } 561 return f->buf[index]; 562 } 563 564 int qemu_get_byte(QEMUFile *f) 565 { 566 int result; 567 568 result = qemu_peek_byte(f, 0); 569 qemu_file_skip(f, 1); 570 return result; 571 } 572 573 int64_t qemu_ftell_fast(QEMUFile *f) 574 { 575 int64_t ret = f->pos; 576 int i; 577 578 for (i = 0; i < f->iovcnt; i++) { 579 ret += f->iov[i].iov_len; 580 } 581 582 return ret; 583 } 584 585 int64_t qemu_ftell(QEMUFile *f) 586 { 587 qemu_fflush(f); 588 return f->pos; 589 } 590 591 int qemu_file_rate_limit(QEMUFile *f) 592 { 593 if (qemu_file_get_error(f)) { 594 return 1; 595 } 596 if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { 597 return 1; 598 } 599 return 0; 600 } 601 602 int64_t qemu_file_get_rate_limit(QEMUFile *f) 603 { 604 return f->xfer_limit; 605 } 606 607 void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit) 608 { 609 f->xfer_limit = limit; 610 } 611 612 void qemu_file_reset_rate_limit(QEMUFile *f) 613 { 614 f->bytes_xfer = 0; 615 } 616 617 void qemu_put_be16(QEMUFile *f, unsigned int v) 618 { 619 qemu_put_byte(f, v >> 8); 620 qemu_put_byte(f, v); 621 } 622 623 void qemu_put_be32(QEMUFile *f, unsigned int v) 624 { 625 qemu_put_byte(f, v >> 24); 626 qemu_put_byte(f, v >> 16); 627 qemu_put_byte(f, v >> 8); 628 qemu_put_byte(f, v); 629 } 630 631 void qemu_put_be64(QEMUFile *f, uint64_t v) 632 { 633 qemu_put_be32(f, v >> 32); 634 qemu_put_be32(f, v); 635 } 636 637 unsigned int qemu_get_be16(QEMUFile *f) 638 { 639 unsigned int v; 640 v = qemu_get_byte(f) << 8; 641 v |= qemu_get_byte(f); 642 return v; 643 } 644 645 unsigned int qemu_get_be32(QEMUFile *f) 646 { 647 unsigned int v; 648 v = (unsigned int)qemu_get_byte(f) << 24; 649 v |= qemu_get_byte(f) << 16; 650 v |= qemu_get_byte(f) << 8; 651 v |= qemu_get_byte(f); 652 return v; 653 } 654 655 uint64_t qemu_get_be64(QEMUFile *f) 656 { 657 uint64_t v; 658 v = (uint64_t)qemu_get_be32(f) << 32; 659 v |= qemu_get_be32(f); 660 return v; 661 } 662 663 /* Compress size bytes of data start at p with specific compression 664 * level and store the compressed data to the buffer of f. 665 * 666 * When f is not writable, return -1 if f has no space to save the 667 * compressed data. 668 * When f is wirtable and it has no space to save the compressed data, 669 * do fflush first, if f still has no space to save the compressed 670 * data, return -1. 671 */ 672 673 ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, 674 int level) 675 { 676 ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); 677 678 if (blen < compressBound(size)) { 679 if (!qemu_file_is_writable(f)) { 680 return -1; 681 } 682 qemu_fflush(f); 683 blen = IO_BUF_SIZE - sizeof(int32_t); 684 if (blen < compressBound(size)) { 685 return -1; 686 } 687 } 688 if (compress2(f->buf + f->buf_index + sizeof(int32_t), (uLongf *)&blen, 689 (Bytef *)p, size, level) != Z_OK) { 690 error_report("Compress Failed!"); 691 return 0; 692 } 693 qemu_put_be32(f, blen); 694 if (f->ops->writev_buffer) { 695 add_to_iovec(f, f->buf + f->buf_index, blen, false); 696 } 697 f->buf_index += blen; 698 if (f->buf_index == IO_BUF_SIZE) { 699 qemu_fflush(f); 700 } 701 return blen + sizeof(int32_t); 702 } 703 704 /* Put the data in the buffer of f_src to the buffer of f_des, and 705 * then reset the buf_index of f_src to 0. 706 */ 707 708 int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) 709 { 710 int len = 0; 711 712 if (f_src->buf_index > 0) { 713 len = f_src->buf_index; 714 qemu_put_buffer(f_des, f_src->buf, f_src->buf_index); 715 f_src->buf_index = 0; 716 f_src->iovcnt = 0; 717 } 718 return len; 719 } 720 721 /* 722 * Get a string whose length is determined by a single preceding byte 723 * A preallocated 256 byte buffer must be passed in. 724 * Returns: len on success and a 0 terminated string in the buffer 725 * else 0 726 * (Note a 0 length string will return 0 either way) 727 */ 728 size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) 729 { 730 size_t len = qemu_get_byte(f); 731 size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); 732 733 buf[res] = 0; 734 735 return res == len ? res : 0; 736 } 737 738 /* 739 * Set the blocking state of the QEMUFile. 740 * Note: On some transports the OS only keeps a single blocking state for 741 * both directions, and thus changing the blocking on the main 742 * QEMUFile can also affect the return path. 743 */ 744 void qemu_file_set_blocking(QEMUFile *f, bool block) 745 { 746 if (f->ops->set_blocking) { 747 f->ops->set_blocking(f->opaque, block); 748 } 749 } 750