xref: /openbmc/qemu/migration/qemu-file.c (revision 0806b30c8dff64e944456aa15bdc6957384e29a8)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include <zlib.h>
26 #include "qemu-common.h"
27 #include "qemu/error-report.h"
28 #include "qemu/iov.h"
29 #include "qemu/sockets.h"
30 #include "qemu/coroutine.h"
31 #include "migration/migration.h"
32 #include "migration/qemu-file.h"
33 #include "trace.h"
34 
35 #define IO_BUF_SIZE 32768
36 #define MAX_IOV_SIZE MIN(IOV_MAX, 64)
37 
38 struct QEMUFile {
39     const QEMUFileOps *ops;
40     const QEMUFileHooks *hooks;
41     void *opaque;
42 
43     int64_t bytes_xfer;
44     int64_t xfer_limit;
45 
46     int64_t pos; /* start of buffer when writing, end of buffer
47                     when reading */
48     int buf_index;
49     int buf_size; /* 0 when writing */
50     uint8_t buf[IO_BUF_SIZE];
51 
52     DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
53     struct iovec iov[MAX_IOV_SIZE];
54     unsigned int iovcnt;
55 
56     int last_error;
57 };
58 
59 /*
60  * Stop a file from being read/written - not all backing files can do this
61  * typically only sockets can.
62  */
63 int qemu_file_shutdown(QEMUFile *f)
64 {
65     if (!f->ops->shut_down) {
66         return -ENOSYS;
67     }
68     return f->ops->shut_down(f->opaque, true, true);
69 }
70 
71 /*
72  * Result: QEMUFile* for a 'return path' for comms in the opposite direction
73  *         NULL if not available
74  */
75 QEMUFile *qemu_file_get_return_path(QEMUFile *f)
76 {
77     if (!f->ops->get_return_path) {
78         return NULL;
79     }
80     return f->ops->get_return_path(f->opaque);
81 }
82 
83 bool qemu_file_mode_is_not_valid(const char *mode)
84 {
85     if (mode == NULL ||
86         (mode[0] != 'r' && mode[0] != 'w') ||
87         mode[1] != 'b' || mode[2] != 0) {
88         fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
89         return true;
90     }
91 
92     return false;
93 }
94 
95 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
96 {
97     QEMUFile *f;
98 
99     f = g_new0(QEMUFile, 1);
100 
101     f->opaque = opaque;
102     f->ops = ops;
103     return f;
104 }
105 
106 
107 void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
108 {
109     f->hooks = hooks;
110 }
111 
112 /*
113  * Get last error for stream f
114  *
115  * Return negative error value if there has been an error on previous
116  * operations, return 0 if no error happened.
117  *
118  */
119 int qemu_file_get_error(QEMUFile *f)
120 {
121     return f->last_error;
122 }
123 
124 void qemu_file_set_error(QEMUFile *f, int ret)
125 {
126     if (f->last_error == 0) {
127         f->last_error = ret;
128     }
129 }
130 
131 bool qemu_file_is_writable(QEMUFile *f)
132 {
133     return f->ops->writev_buffer;
134 }
135 
136 static void qemu_iovec_release_ram(QEMUFile *f)
137 {
138     struct iovec iov;
139     unsigned long idx;
140 
141     /* Find and release all the contiguous memory ranges marked as may_free. */
142     idx = find_next_bit(f->may_free, f->iovcnt, 0);
143     if (idx >= f->iovcnt) {
144         return;
145     }
146     iov = f->iov[idx];
147 
148     /* The madvise() in the loop is called for iov within a continuous range and
149      * then reinitialize the iov. And in the end, madvise() is called for the
150      * last iov.
151      */
152     while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
153         /* check for adjacent buffer and coalesce them */
154         if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
155             iov.iov_len += f->iov[idx].iov_len;
156             continue;
157         }
158         if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
159             error_report("migrate: madvise DONTNEED failed %p %zd: %s",
160                          iov.iov_base, iov.iov_len, strerror(errno));
161         }
162         iov = f->iov[idx];
163     }
164     if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
165             error_report("migrate: madvise DONTNEED failed %p %zd: %s",
166                          iov.iov_base, iov.iov_len, strerror(errno));
167     }
168     memset(f->may_free, 0, sizeof(f->may_free));
169 }
170 
171 /**
172  * Flushes QEMUFile buffer
173  *
174  * If there is writev_buffer QEMUFileOps it uses it otherwise uses
175  * put_buffer ops. This will flush all pending data. If data was
176  * only partially flushed, it will set an error state.
177  */
178 void qemu_fflush(QEMUFile *f)
179 {
180     ssize_t ret = 0;
181     ssize_t expect = 0;
182 
183     if (!qemu_file_is_writable(f)) {
184         return;
185     }
186 
187     if (f->iovcnt > 0) {
188         expect = iov_size(f->iov, f->iovcnt);
189         ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
190 
191         qemu_iovec_release_ram(f);
192     }
193 
194     if (ret >= 0) {
195         f->pos += ret;
196     }
197     /* We expect the QEMUFile write impl to send the full
198      * data set we requested, so sanity check that.
199      */
200     if (ret != expect) {
201         qemu_file_set_error(f, ret < 0 ? ret : -EIO);
202     }
203     f->buf_index = 0;
204     f->iovcnt = 0;
205 }
206 
207 void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
208 {
209     int ret = 0;
210 
211     if (f->hooks && f->hooks->before_ram_iterate) {
212         ret = f->hooks->before_ram_iterate(f, f->opaque, flags, NULL);
213         if (ret < 0) {
214             qemu_file_set_error(f, ret);
215         }
216     }
217 }
218 
219 void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
220 {
221     int ret = 0;
222 
223     if (f->hooks && f->hooks->after_ram_iterate) {
224         ret = f->hooks->after_ram_iterate(f, f->opaque, flags, NULL);
225         if (ret < 0) {
226             qemu_file_set_error(f, ret);
227         }
228     }
229 }
230 
231 void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
232 {
233     int ret = -EINVAL;
234 
235     if (f->hooks && f->hooks->hook_ram_load) {
236         ret = f->hooks->hook_ram_load(f, f->opaque, flags, data);
237         if (ret < 0) {
238             qemu_file_set_error(f, ret);
239         }
240     } else {
241         /*
242          * Hook is a hook specifically requested by the source sending a flag
243          * that expects there to be a hook on the destination.
244          */
245         if (flags == RAM_CONTROL_HOOK) {
246             qemu_file_set_error(f, ret);
247         }
248     }
249 }
250 
251 size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
252                              ram_addr_t offset, size_t size,
253                              uint64_t *bytes_sent)
254 {
255     if (f->hooks && f->hooks->save_page) {
256         int ret = f->hooks->save_page(f, f->opaque, block_offset,
257                                       offset, size, bytes_sent);
258 
259         if (ret != RAM_SAVE_CONTROL_DELAYED) {
260             if (bytes_sent && *bytes_sent > 0) {
261                 qemu_update_position(f, *bytes_sent);
262             } else if (ret < 0) {
263                 qemu_file_set_error(f, ret);
264             }
265         }
266 
267         return ret;
268     }
269 
270     return RAM_SAVE_CONTROL_NOT_SUPP;
271 }
272 
273 /*
274  * Attempt to fill the buffer from the underlying file
275  * Returns the number of bytes read, or negative value for an error.
276  *
277  * Note that it can return a partially full buffer even in a not error/not EOF
278  * case if the underlying file descriptor gives a short read, and that can
279  * happen even on a blocking fd.
280  */
281 static ssize_t qemu_fill_buffer(QEMUFile *f)
282 {
283     int len;
284     int pending;
285 
286     assert(!qemu_file_is_writable(f));
287 
288     pending = f->buf_size - f->buf_index;
289     if (pending > 0) {
290         memmove(f->buf, f->buf + f->buf_index, pending);
291     }
292     f->buf_index = 0;
293     f->buf_size = pending;
294 
295     len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
296                         IO_BUF_SIZE - pending);
297     if (len > 0) {
298         f->buf_size += len;
299         f->pos += len;
300     } else if (len == 0) {
301         qemu_file_set_error(f, -EIO);
302     } else if (len != -EAGAIN) {
303         qemu_file_set_error(f, len);
304     }
305 
306     return len;
307 }
308 
309 void qemu_update_position(QEMUFile *f, size_t size)
310 {
311     f->pos += size;
312 }
313 
314 /** Closes the file
315  *
316  * Returns negative error value if any error happened on previous operations or
317  * while closing the file. Returns 0 or positive number on success.
318  *
319  * The meaning of return value on success depends on the specific backend
320  * being used.
321  */
322 int qemu_fclose(QEMUFile *f)
323 {
324     int ret;
325     qemu_fflush(f);
326     ret = qemu_file_get_error(f);
327 
328     if (f->ops->close) {
329         int ret2 = f->ops->close(f->opaque);
330         if (ret >= 0) {
331             ret = ret2;
332         }
333     }
334     /* If any error was spotted before closing, we should report it
335      * instead of the close() return value.
336      */
337     if (f->last_error) {
338         ret = f->last_error;
339     }
340     g_free(f);
341     trace_qemu_file_fclose();
342     return ret;
343 }
344 
345 static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
346                          bool may_free)
347 {
348     /* check for adjacent buffer and coalesce them */
349     if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
350         f->iov[f->iovcnt - 1].iov_len &&
351         may_free == test_bit(f->iovcnt - 1, f->may_free))
352     {
353         f->iov[f->iovcnt - 1].iov_len += size;
354     } else {
355         if (may_free) {
356             set_bit(f->iovcnt, f->may_free);
357         }
358         f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
359         f->iov[f->iovcnt++].iov_len = size;
360     }
361 
362     if (f->iovcnt >= MAX_IOV_SIZE) {
363         qemu_fflush(f);
364     }
365 }
366 
367 void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
368                            bool may_free)
369 {
370     if (f->last_error) {
371         return;
372     }
373 
374     f->bytes_xfer += size;
375     add_to_iovec(f, buf, size, may_free);
376 }
377 
378 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
379 {
380     size_t l;
381 
382     if (f->last_error) {
383         return;
384     }
385 
386     while (size > 0) {
387         l = IO_BUF_SIZE - f->buf_index;
388         if (l > size) {
389             l = size;
390         }
391         memcpy(f->buf + f->buf_index, buf, l);
392         f->bytes_xfer += l;
393         add_to_iovec(f, f->buf + f->buf_index, l, false);
394         f->buf_index += l;
395         if (f->buf_index == IO_BUF_SIZE) {
396             qemu_fflush(f);
397         }
398         if (qemu_file_get_error(f)) {
399             break;
400         }
401         buf += l;
402         size -= l;
403     }
404 }
405 
406 void qemu_put_byte(QEMUFile *f, int v)
407 {
408     if (f->last_error) {
409         return;
410     }
411 
412     f->buf[f->buf_index] = v;
413     f->bytes_xfer++;
414     add_to_iovec(f, f->buf + f->buf_index, 1, false);
415     f->buf_index++;
416     if (f->buf_index == IO_BUF_SIZE) {
417         qemu_fflush(f);
418     }
419 }
420 
421 void qemu_file_skip(QEMUFile *f, int size)
422 {
423     if (f->buf_index + size <= f->buf_size) {
424         f->buf_index += size;
425     }
426 }
427 
428 /*
429  * Read 'size' bytes from file (at 'offset') without moving the
430  * pointer and set 'buf' to point to that data.
431  *
432  * It will return size bytes unless there was an error, in which case it will
433  * return as many as it managed to read (assuming blocking fd's which
434  * all current QEMUFile are)
435  */
436 size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
437 {
438     ssize_t pending;
439     size_t index;
440 
441     assert(!qemu_file_is_writable(f));
442     assert(offset < IO_BUF_SIZE);
443     assert(size <= IO_BUF_SIZE - offset);
444 
445     /* The 1st byte to read from */
446     index = f->buf_index + offset;
447     /* The number of available bytes starting at index */
448     pending = f->buf_size - index;
449 
450     /*
451      * qemu_fill_buffer might return just a few bytes, even when there isn't
452      * an error, so loop collecting them until we get enough.
453      */
454     while (pending < size) {
455         int received = qemu_fill_buffer(f);
456 
457         if (received <= 0) {
458             break;
459         }
460 
461         index = f->buf_index + offset;
462         pending = f->buf_size - index;
463     }
464 
465     if (pending <= 0) {
466         return 0;
467     }
468     if (size > pending) {
469         size = pending;
470     }
471 
472     *buf = f->buf + index;
473     return size;
474 }
475 
476 /*
477  * Read 'size' bytes of data from the file into buf.
478  * 'size' can be larger than the internal buffer.
479  *
480  * It will return size bytes unless there was an error, in which case it will
481  * return as many as it managed to read (assuming blocking fd's which
482  * all current QEMUFile are)
483  */
484 size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
485 {
486     size_t pending = size;
487     size_t done = 0;
488 
489     while (pending > 0) {
490         size_t res;
491         uint8_t *src;
492 
493         res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
494         if (res == 0) {
495             return done;
496         }
497         memcpy(buf, src, res);
498         qemu_file_skip(f, res);
499         buf += res;
500         pending -= res;
501         done += res;
502     }
503     return done;
504 }
505 
506 /*
507  * Read 'size' bytes of data from the file.
508  * 'size' can be larger than the internal buffer.
509  *
510  * The data:
511  *   may be held on an internal buffer (in which case *buf is updated
512  *     to point to it) that is valid until the next qemu_file operation.
513  * OR
514  *   will be copied to the *buf that was passed in.
515  *
516  * The code tries to avoid the copy if possible.
517  *
518  * It will return size bytes unless there was an error, in which case it will
519  * return as many as it managed to read (assuming blocking fd's which
520  * all current QEMUFile are)
521  *
522  * Note: Since **buf may get changed, the caller should take care to
523  *       keep a pointer to the original buffer if it needs to deallocate it.
524  */
525 size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
526 {
527     if (size < IO_BUF_SIZE) {
528         size_t res;
529         uint8_t *src;
530 
531         res = qemu_peek_buffer(f, &src, size, 0);
532 
533         if (res == size) {
534             qemu_file_skip(f, res);
535             *buf = src;
536             return res;
537         }
538     }
539 
540     return qemu_get_buffer(f, *buf, size);
541 }
542 
543 /*
544  * Peeks a single byte from the buffer; this isn't guaranteed to work if
545  * offset leaves a gap after the previous read/peeked data.
546  */
547 int qemu_peek_byte(QEMUFile *f, int offset)
548 {
549     int index = f->buf_index + offset;
550 
551     assert(!qemu_file_is_writable(f));
552     assert(offset < IO_BUF_SIZE);
553 
554     if (index >= f->buf_size) {
555         qemu_fill_buffer(f);
556         index = f->buf_index + offset;
557         if (index >= f->buf_size) {
558             return 0;
559         }
560     }
561     return f->buf[index];
562 }
563 
564 int qemu_get_byte(QEMUFile *f)
565 {
566     int result;
567 
568     result = qemu_peek_byte(f, 0);
569     qemu_file_skip(f, 1);
570     return result;
571 }
572 
573 int64_t qemu_ftell_fast(QEMUFile *f)
574 {
575     int64_t ret = f->pos;
576     int i;
577 
578     for (i = 0; i < f->iovcnt; i++) {
579         ret += f->iov[i].iov_len;
580     }
581 
582     return ret;
583 }
584 
585 int64_t qemu_ftell(QEMUFile *f)
586 {
587     qemu_fflush(f);
588     return f->pos;
589 }
590 
591 int qemu_file_rate_limit(QEMUFile *f)
592 {
593     if (qemu_file_get_error(f)) {
594         return 1;
595     }
596     if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) {
597         return 1;
598     }
599     return 0;
600 }
601 
602 int64_t qemu_file_get_rate_limit(QEMUFile *f)
603 {
604     return f->xfer_limit;
605 }
606 
607 void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit)
608 {
609     f->xfer_limit = limit;
610 }
611 
612 void qemu_file_reset_rate_limit(QEMUFile *f)
613 {
614     f->bytes_xfer = 0;
615 }
616 
617 void qemu_put_be16(QEMUFile *f, unsigned int v)
618 {
619     qemu_put_byte(f, v >> 8);
620     qemu_put_byte(f, v);
621 }
622 
623 void qemu_put_be32(QEMUFile *f, unsigned int v)
624 {
625     qemu_put_byte(f, v >> 24);
626     qemu_put_byte(f, v >> 16);
627     qemu_put_byte(f, v >> 8);
628     qemu_put_byte(f, v);
629 }
630 
631 void qemu_put_be64(QEMUFile *f, uint64_t v)
632 {
633     qemu_put_be32(f, v >> 32);
634     qemu_put_be32(f, v);
635 }
636 
637 unsigned int qemu_get_be16(QEMUFile *f)
638 {
639     unsigned int v;
640     v = qemu_get_byte(f) << 8;
641     v |= qemu_get_byte(f);
642     return v;
643 }
644 
645 unsigned int qemu_get_be32(QEMUFile *f)
646 {
647     unsigned int v;
648     v = (unsigned int)qemu_get_byte(f) << 24;
649     v |= qemu_get_byte(f) << 16;
650     v |= qemu_get_byte(f) << 8;
651     v |= qemu_get_byte(f);
652     return v;
653 }
654 
655 uint64_t qemu_get_be64(QEMUFile *f)
656 {
657     uint64_t v;
658     v = (uint64_t)qemu_get_be32(f) << 32;
659     v |= qemu_get_be32(f);
660     return v;
661 }
662 
663 /* Compress size bytes of data start at p with specific compression
664  * level and store the compressed data to the buffer of f.
665  *
666  * When f is not writable, return -1 if f has no space to save the
667  * compressed data.
668  * When f is wirtable and it has no space to save the compressed data,
669  * do fflush first, if f still has no space to save the compressed
670  * data, return -1.
671  */
672 
673 ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
674                                   int level)
675 {
676     ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
677 
678     if (blen < compressBound(size)) {
679         if (!qemu_file_is_writable(f)) {
680             return -1;
681         }
682         qemu_fflush(f);
683         blen = IO_BUF_SIZE - sizeof(int32_t);
684         if (blen < compressBound(size)) {
685             return -1;
686         }
687     }
688     if (compress2(f->buf + f->buf_index + sizeof(int32_t), (uLongf *)&blen,
689                   (Bytef *)p, size, level) != Z_OK) {
690         error_report("Compress Failed!");
691         return 0;
692     }
693     qemu_put_be32(f, blen);
694     if (f->ops->writev_buffer) {
695         add_to_iovec(f, f->buf + f->buf_index, blen, false);
696     }
697     f->buf_index += blen;
698     if (f->buf_index == IO_BUF_SIZE) {
699         qemu_fflush(f);
700     }
701     return blen + sizeof(int32_t);
702 }
703 
704 /* Put the data in the buffer of f_src to the buffer of f_des, and
705  * then reset the buf_index of f_src to 0.
706  */
707 
708 int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src)
709 {
710     int len = 0;
711 
712     if (f_src->buf_index > 0) {
713         len = f_src->buf_index;
714         qemu_put_buffer(f_des, f_src->buf, f_src->buf_index);
715         f_src->buf_index = 0;
716         f_src->iovcnt = 0;
717     }
718     return len;
719 }
720 
721 /*
722  * Get a string whose length is determined by a single preceding byte
723  * A preallocated 256 byte buffer must be passed in.
724  * Returns: len on success and a 0 terminated string in the buffer
725  *          else 0
726  *          (Note a 0 length string will return 0 either way)
727  */
728 size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
729 {
730     size_t len = qemu_get_byte(f);
731     size_t res = qemu_get_buffer(f, (uint8_t *)buf, len);
732 
733     buf[res] = 0;
734 
735     return res == len ? res : 0;
736 }
737 
738 /*
739  * Set the blocking state of the QEMUFile.
740  * Note: On some transports the OS only keeps a single blocking state for
741  *       both directions, and thus changing the blocking on the main
742  *       QEMUFile can also affect the return path.
743  */
744 void qemu_file_set_blocking(QEMUFile *f, bool block)
745 {
746     if (f->ops->set_blocking) {
747         f->ops->set_blocking(f->opaque, block);
748     }
749 }
750