1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include <zlib.h> 26 #include "qemu/error-report.h" 27 #include "qemu/iov.h" 28 #include "migration.h" 29 #include "qemu-file.h" 30 #include "trace.h" 31 32 #define IO_BUF_SIZE 32768 33 #define MAX_IOV_SIZE MIN(IOV_MAX, 64) 34 35 struct QEMUFile { 36 const QEMUFileOps *ops; 37 const QEMUFileHooks *hooks; 38 void *opaque; 39 40 int64_t bytes_xfer; 41 int64_t xfer_limit; 42 43 int64_t pos; /* start of buffer when writing, end of buffer 44 when reading */ 45 int buf_index; 46 int buf_size; /* 0 when writing */ 47 uint8_t buf[IO_BUF_SIZE]; 48 49 DECLARE_BITMAP(may_free, MAX_IOV_SIZE); 50 struct iovec iov[MAX_IOV_SIZE]; 51 unsigned int iovcnt; 52 53 int last_error; 54 }; 55 56 /* 57 * Stop a file from being read/written - not all backing files can do this 58 * typically only sockets can. 59 */ 60 int qemu_file_shutdown(QEMUFile *f) 61 { 62 if (!f->ops->shut_down) { 63 return -ENOSYS; 64 } 65 return f->ops->shut_down(f->opaque, true, true); 66 } 67 68 /* 69 * Result: QEMUFile* for a 'return path' for comms in the opposite direction 70 * NULL if not available 71 */ 72 QEMUFile *qemu_file_get_return_path(QEMUFile *f) 73 { 74 if (!f->ops->get_return_path) { 75 return NULL; 76 } 77 return f->ops->get_return_path(f->opaque); 78 } 79 80 bool qemu_file_mode_is_not_valid(const char *mode) 81 { 82 if (mode == NULL || 83 (mode[0] != 'r' && mode[0] != 'w') || 84 mode[1] != 'b' || mode[2] != 0) { 85 fprintf(stderr, "qemu_fopen: Argument validity check failed\n"); 86 return true; 87 } 88 89 return false; 90 } 91 92 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) 93 { 94 QEMUFile *f; 95 96 f = g_new0(QEMUFile, 1); 97 98 f->opaque = opaque; 99 f->ops = ops; 100 return f; 101 } 102 103 104 void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) 105 { 106 f->hooks = hooks; 107 } 108 109 /* 110 * Get last error for stream f 111 * 112 * Return negative error value if there has been an error on previous 113 * operations, return 0 if no error happened. 114 * 115 */ 116 int qemu_file_get_error(QEMUFile *f) 117 { 118 return f->last_error; 119 } 120 121 void qemu_file_set_error(QEMUFile *f, int ret) 122 { 123 if (f->last_error == 0) { 124 f->last_error = ret; 125 } 126 } 127 128 bool qemu_file_is_writable(QEMUFile *f) 129 { 130 return f->ops->writev_buffer; 131 } 132 133 static void qemu_iovec_release_ram(QEMUFile *f) 134 { 135 struct iovec iov; 136 unsigned long idx; 137 138 /* Find and release all the contiguous memory ranges marked as may_free. */ 139 idx = find_next_bit(f->may_free, f->iovcnt, 0); 140 if (idx >= f->iovcnt) { 141 return; 142 } 143 iov = f->iov[idx]; 144 145 /* The madvise() in the loop is called for iov within a continuous range and 146 * then reinitialize the iov. And in the end, madvise() is called for the 147 * last iov. 148 */ 149 while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) { 150 /* check for adjacent buffer and coalesce them */ 151 if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) { 152 iov.iov_len += f->iov[idx].iov_len; 153 continue; 154 } 155 if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { 156 error_report("migrate: madvise DONTNEED failed %p %zd: %s", 157 iov.iov_base, iov.iov_len, strerror(errno)); 158 } 159 iov = f->iov[idx]; 160 } 161 if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { 162 error_report("migrate: madvise DONTNEED failed %p %zd: %s", 163 iov.iov_base, iov.iov_len, strerror(errno)); 164 } 165 memset(f->may_free, 0, sizeof(f->may_free)); 166 } 167 168 /** 169 * Flushes QEMUFile buffer 170 * 171 * If there is writev_buffer QEMUFileOps it uses it otherwise uses 172 * put_buffer ops. This will flush all pending data. If data was 173 * only partially flushed, it will set an error state. 174 */ 175 void qemu_fflush(QEMUFile *f) 176 { 177 ssize_t ret = 0; 178 ssize_t expect = 0; 179 180 if (!qemu_file_is_writable(f)) { 181 return; 182 } 183 184 if (f->iovcnt > 0) { 185 expect = iov_size(f->iov, f->iovcnt); 186 ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); 187 188 qemu_iovec_release_ram(f); 189 } 190 191 if (ret >= 0) { 192 f->pos += ret; 193 } 194 /* We expect the QEMUFile write impl to send the full 195 * data set we requested, so sanity check that. 196 */ 197 if (ret != expect) { 198 qemu_file_set_error(f, ret < 0 ? ret : -EIO); 199 } 200 f->buf_index = 0; 201 f->iovcnt = 0; 202 } 203 204 void ram_control_before_iterate(QEMUFile *f, uint64_t flags) 205 { 206 int ret = 0; 207 208 if (f->hooks && f->hooks->before_ram_iterate) { 209 ret = f->hooks->before_ram_iterate(f, f->opaque, flags, NULL); 210 if (ret < 0) { 211 qemu_file_set_error(f, ret); 212 } 213 } 214 } 215 216 void ram_control_after_iterate(QEMUFile *f, uint64_t flags) 217 { 218 int ret = 0; 219 220 if (f->hooks && f->hooks->after_ram_iterate) { 221 ret = f->hooks->after_ram_iterate(f, f->opaque, flags, NULL); 222 if (ret < 0) { 223 qemu_file_set_error(f, ret); 224 } 225 } 226 } 227 228 void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data) 229 { 230 int ret = -EINVAL; 231 232 if (f->hooks && f->hooks->hook_ram_load) { 233 ret = f->hooks->hook_ram_load(f, f->opaque, flags, data); 234 if (ret < 0) { 235 qemu_file_set_error(f, ret); 236 } 237 } else { 238 /* 239 * Hook is a hook specifically requested by the source sending a flag 240 * that expects there to be a hook on the destination. 241 */ 242 if (flags == RAM_CONTROL_HOOK) { 243 qemu_file_set_error(f, ret); 244 } 245 } 246 } 247 248 size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, 249 ram_addr_t offset, size_t size, 250 uint64_t *bytes_sent) 251 { 252 if (f->hooks && f->hooks->save_page) { 253 int ret = f->hooks->save_page(f, f->opaque, block_offset, 254 offset, size, bytes_sent); 255 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 256 f->bytes_xfer += size; 257 } 258 259 if (ret != RAM_SAVE_CONTROL_DELAYED && 260 ret != RAM_SAVE_CONTROL_NOT_SUPP) { 261 if (bytes_sent && *bytes_sent > 0) { 262 qemu_update_position(f, *bytes_sent); 263 } else if (ret < 0) { 264 qemu_file_set_error(f, ret); 265 } 266 } 267 268 return ret; 269 } 270 271 return RAM_SAVE_CONTROL_NOT_SUPP; 272 } 273 274 /* 275 * Attempt to fill the buffer from the underlying file 276 * Returns the number of bytes read, or negative value for an error. 277 * 278 * Note that it can return a partially full buffer even in a not error/not EOF 279 * case if the underlying file descriptor gives a short read, and that can 280 * happen even on a blocking fd. 281 */ 282 static ssize_t qemu_fill_buffer(QEMUFile *f) 283 { 284 int len; 285 int pending; 286 287 assert(!qemu_file_is_writable(f)); 288 289 pending = f->buf_size - f->buf_index; 290 if (pending > 0) { 291 memmove(f->buf, f->buf + f->buf_index, pending); 292 } 293 f->buf_index = 0; 294 f->buf_size = pending; 295 296 len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, 297 IO_BUF_SIZE - pending); 298 if (len > 0) { 299 f->buf_size += len; 300 f->pos += len; 301 } else if (len == 0) { 302 qemu_file_set_error(f, -EIO); 303 } else if (len != -EAGAIN) { 304 qemu_file_set_error(f, len); 305 } 306 307 return len; 308 } 309 310 void qemu_update_position(QEMUFile *f, size_t size) 311 { 312 f->pos += size; 313 } 314 315 /** Closes the file 316 * 317 * Returns negative error value if any error happened on previous operations or 318 * while closing the file. Returns 0 or positive number on success. 319 * 320 * The meaning of return value on success depends on the specific backend 321 * being used. 322 */ 323 int qemu_fclose(QEMUFile *f) 324 { 325 int ret; 326 qemu_fflush(f); 327 ret = qemu_file_get_error(f); 328 329 if (f->ops->close) { 330 int ret2 = f->ops->close(f->opaque); 331 if (ret >= 0) { 332 ret = ret2; 333 } 334 } 335 /* If any error was spotted before closing, we should report it 336 * instead of the close() return value. 337 */ 338 if (f->last_error) { 339 ret = f->last_error; 340 } 341 g_free(f); 342 trace_qemu_file_fclose(); 343 return ret; 344 } 345 346 static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, 347 bool may_free) 348 { 349 /* check for adjacent buffer and coalesce them */ 350 if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + 351 f->iov[f->iovcnt - 1].iov_len && 352 may_free == test_bit(f->iovcnt - 1, f->may_free)) 353 { 354 f->iov[f->iovcnt - 1].iov_len += size; 355 } else { 356 if (may_free) { 357 set_bit(f->iovcnt, f->may_free); 358 } 359 f->iov[f->iovcnt].iov_base = (uint8_t *)buf; 360 f->iov[f->iovcnt++].iov_len = size; 361 } 362 363 if (f->iovcnt >= MAX_IOV_SIZE) { 364 qemu_fflush(f); 365 } 366 } 367 368 void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, 369 bool may_free) 370 { 371 if (f->last_error) { 372 return; 373 } 374 375 f->bytes_xfer += size; 376 add_to_iovec(f, buf, size, may_free); 377 } 378 379 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) 380 { 381 size_t l; 382 383 if (f->last_error) { 384 return; 385 } 386 387 while (size > 0) { 388 l = IO_BUF_SIZE - f->buf_index; 389 if (l > size) { 390 l = size; 391 } 392 memcpy(f->buf + f->buf_index, buf, l); 393 f->bytes_xfer += l; 394 add_to_iovec(f, f->buf + f->buf_index, l, false); 395 f->buf_index += l; 396 if (f->buf_index == IO_BUF_SIZE) { 397 qemu_fflush(f); 398 } 399 if (qemu_file_get_error(f)) { 400 break; 401 } 402 buf += l; 403 size -= l; 404 } 405 } 406 407 void qemu_put_byte(QEMUFile *f, int v) 408 { 409 if (f->last_error) { 410 return; 411 } 412 413 f->buf[f->buf_index] = v; 414 f->bytes_xfer++; 415 add_to_iovec(f, f->buf + f->buf_index, 1, false); 416 f->buf_index++; 417 if (f->buf_index == IO_BUF_SIZE) { 418 qemu_fflush(f); 419 } 420 } 421 422 void qemu_file_skip(QEMUFile *f, int size) 423 { 424 if (f->buf_index + size <= f->buf_size) { 425 f->buf_index += size; 426 } 427 } 428 429 /* 430 * Read 'size' bytes from file (at 'offset') without moving the 431 * pointer and set 'buf' to point to that data. 432 * 433 * It will return size bytes unless there was an error, in which case it will 434 * return as many as it managed to read (assuming blocking fd's which 435 * all current QEMUFile are) 436 */ 437 size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) 438 { 439 ssize_t pending; 440 size_t index; 441 442 assert(!qemu_file_is_writable(f)); 443 assert(offset < IO_BUF_SIZE); 444 assert(size <= IO_BUF_SIZE - offset); 445 446 /* The 1st byte to read from */ 447 index = f->buf_index + offset; 448 /* The number of available bytes starting at index */ 449 pending = f->buf_size - index; 450 451 /* 452 * qemu_fill_buffer might return just a few bytes, even when there isn't 453 * an error, so loop collecting them until we get enough. 454 */ 455 while (pending < size) { 456 int received = qemu_fill_buffer(f); 457 458 if (received <= 0) { 459 break; 460 } 461 462 index = f->buf_index + offset; 463 pending = f->buf_size - index; 464 } 465 466 if (pending <= 0) { 467 return 0; 468 } 469 if (size > pending) { 470 size = pending; 471 } 472 473 *buf = f->buf + index; 474 return size; 475 } 476 477 /* 478 * Read 'size' bytes of data from the file into buf. 479 * 'size' can be larger than the internal buffer. 480 * 481 * It will return size bytes unless there was an error, in which case it will 482 * return as many as it managed to read (assuming blocking fd's which 483 * all current QEMUFile are) 484 */ 485 size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) 486 { 487 size_t pending = size; 488 size_t done = 0; 489 490 while (pending > 0) { 491 size_t res; 492 uint8_t *src; 493 494 res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0); 495 if (res == 0) { 496 return done; 497 } 498 memcpy(buf, src, res); 499 qemu_file_skip(f, res); 500 buf += res; 501 pending -= res; 502 done += res; 503 } 504 return done; 505 } 506 507 /* 508 * Read 'size' bytes of data from the file. 509 * 'size' can be larger than the internal buffer. 510 * 511 * The data: 512 * may be held on an internal buffer (in which case *buf is updated 513 * to point to it) that is valid until the next qemu_file operation. 514 * OR 515 * will be copied to the *buf that was passed in. 516 * 517 * The code tries to avoid the copy if possible. 518 * 519 * It will return size bytes unless there was an error, in which case it will 520 * return as many as it managed to read (assuming blocking fd's which 521 * all current QEMUFile are) 522 * 523 * Note: Since **buf may get changed, the caller should take care to 524 * keep a pointer to the original buffer if it needs to deallocate it. 525 */ 526 size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) 527 { 528 if (size < IO_BUF_SIZE) { 529 size_t res; 530 uint8_t *src; 531 532 res = qemu_peek_buffer(f, &src, size, 0); 533 534 if (res == size) { 535 qemu_file_skip(f, res); 536 *buf = src; 537 return res; 538 } 539 } 540 541 return qemu_get_buffer(f, *buf, size); 542 } 543 544 /* 545 * Peeks a single byte from the buffer; this isn't guaranteed to work if 546 * offset leaves a gap after the previous read/peeked data. 547 */ 548 int qemu_peek_byte(QEMUFile *f, int offset) 549 { 550 int index = f->buf_index + offset; 551 552 assert(!qemu_file_is_writable(f)); 553 assert(offset < IO_BUF_SIZE); 554 555 if (index >= f->buf_size) { 556 qemu_fill_buffer(f); 557 index = f->buf_index + offset; 558 if (index >= f->buf_size) { 559 return 0; 560 } 561 } 562 return f->buf[index]; 563 } 564 565 int qemu_get_byte(QEMUFile *f) 566 { 567 int result; 568 569 result = qemu_peek_byte(f, 0); 570 qemu_file_skip(f, 1); 571 return result; 572 } 573 574 int64_t qemu_ftell_fast(QEMUFile *f) 575 { 576 int64_t ret = f->pos; 577 int i; 578 579 for (i = 0; i < f->iovcnt; i++) { 580 ret += f->iov[i].iov_len; 581 } 582 583 return ret; 584 } 585 586 int64_t qemu_ftell(QEMUFile *f) 587 { 588 qemu_fflush(f); 589 return f->pos; 590 } 591 592 int qemu_file_rate_limit(QEMUFile *f) 593 { 594 if (qemu_file_get_error(f)) { 595 return 1; 596 } 597 if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { 598 return 1; 599 } 600 return 0; 601 } 602 603 int64_t qemu_file_get_rate_limit(QEMUFile *f) 604 { 605 return f->xfer_limit; 606 } 607 608 void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit) 609 { 610 f->xfer_limit = limit; 611 } 612 613 void qemu_file_reset_rate_limit(QEMUFile *f) 614 { 615 f->bytes_xfer = 0; 616 } 617 618 void qemu_put_be16(QEMUFile *f, unsigned int v) 619 { 620 qemu_put_byte(f, v >> 8); 621 qemu_put_byte(f, v); 622 } 623 624 void qemu_put_be32(QEMUFile *f, unsigned int v) 625 { 626 qemu_put_byte(f, v >> 24); 627 qemu_put_byte(f, v >> 16); 628 qemu_put_byte(f, v >> 8); 629 qemu_put_byte(f, v); 630 } 631 632 void qemu_put_be64(QEMUFile *f, uint64_t v) 633 { 634 qemu_put_be32(f, v >> 32); 635 qemu_put_be32(f, v); 636 } 637 638 unsigned int qemu_get_be16(QEMUFile *f) 639 { 640 unsigned int v; 641 v = qemu_get_byte(f) << 8; 642 v |= qemu_get_byte(f); 643 return v; 644 } 645 646 unsigned int qemu_get_be32(QEMUFile *f) 647 { 648 unsigned int v; 649 v = (unsigned int)qemu_get_byte(f) << 24; 650 v |= qemu_get_byte(f) << 16; 651 v |= qemu_get_byte(f) << 8; 652 v |= qemu_get_byte(f); 653 return v; 654 } 655 656 uint64_t qemu_get_be64(QEMUFile *f) 657 { 658 uint64_t v; 659 v = (uint64_t)qemu_get_be32(f) << 32; 660 v |= qemu_get_be32(f); 661 return v; 662 } 663 664 /* return the size after compression, or negative value on error */ 665 static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len, 666 const uint8_t *source, size_t source_len) 667 { 668 int err; 669 670 err = deflateReset(stream); 671 if (err != Z_OK) { 672 return -1; 673 } 674 675 stream->avail_in = source_len; 676 stream->next_in = (uint8_t *)source; 677 stream->avail_out = dest_len; 678 stream->next_out = dest; 679 680 err = deflate(stream, Z_FINISH); 681 if (err != Z_STREAM_END) { 682 return -1; 683 } 684 685 return stream->next_out - dest; 686 } 687 688 /* Compress size bytes of data start at p and store the compressed 689 * data to the buffer of f. 690 * 691 * When f is not writable, return -1 if f has no space to save the 692 * compressed data. 693 * When f is wirtable and it has no space to save the compressed data, 694 * do fflush first, if f still has no space to save the compressed 695 * data, return -1. 696 */ 697 ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, 698 const uint8_t *p, size_t size) 699 { 700 ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); 701 702 if (blen < compressBound(size)) { 703 if (!qemu_file_is_writable(f)) { 704 return -1; 705 } 706 qemu_fflush(f); 707 blen = IO_BUF_SIZE - sizeof(int32_t); 708 if (blen < compressBound(size)) { 709 return -1; 710 } 711 } 712 713 blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t), 714 blen, p, size); 715 if (blen < 0) { 716 return -1; 717 } 718 719 qemu_put_be32(f, blen); 720 if (f->ops->writev_buffer) { 721 add_to_iovec(f, f->buf + f->buf_index, blen, false); 722 } 723 f->buf_index += blen; 724 if (f->buf_index == IO_BUF_SIZE) { 725 qemu_fflush(f); 726 } 727 return blen + sizeof(int32_t); 728 } 729 730 /* Put the data in the buffer of f_src to the buffer of f_des, and 731 * then reset the buf_index of f_src to 0. 732 */ 733 734 int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) 735 { 736 int len = 0; 737 738 if (f_src->buf_index > 0) { 739 len = f_src->buf_index; 740 qemu_put_buffer(f_des, f_src->buf, f_src->buf_index); 741 f_src->buf_index = 0; 742 f_src->iovcnt = 0; 743 } 744 return len; 745 } 746 747 /* 748 * Get a string whose length is determined by a single preceding byte 749 * A preallocated 256 byte buffer must be passed in. 750 * Returns: len on success and a 0 terminated string in the buffer 751 * else 0 752 * (Note a 0 length string will return 0 either way) 753 */ 754 size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) 755 { 756 size_t len = qemu_get_byte(f); 757 size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); 758 759 buf[res] = 0; 760 761 return res == len ? res : 0; 762 } 763 764 /* 765 * Put a string with one preceding byte containing its length. The length of 766 * the string should be less than 256. 767 */ 768 void qemu_put_counted_string(QEMUFile *f, const char *str) 769 { 770 size_t len = strlen(str); 771 772 assert(len < 256); 773 qemu_put_byte(f, len); 774 qemu_put_buffer(f, (const uint8_t *)str, len); 775 } 776 777 /* 778 * Set the blocking state of the QEMUFile. 779 * Note: On some transports the OS only keeps a single blocking state for 780 * both directions, and thus changing the blocking on the main 781 * QEMUFile can also affect the return path. 782 */ 783 void qemu_file_set_blocking(QEMUFile *f, bool block) 784 { 785 if (f->ops->set_blocking) { 786 f->ops->set_blocking(f->opaque, block); 787 } 788 } 789