xref: /openbmc/qemu/block/parallels.c (revision bd5629db935a6c17c86ffbb6a39aa85eed807346)
1 /*
2  * Block driver for Parallels disk image format
3  *
4  * Copyright (c) 2007 Alex Beregszaszi
5  * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
6  *
7  * This code was originally based on comparing different disk images created
8  * by Parallels. Currently it is based on opened OpenVZ sources
9  * available at
10  *     http://git.openvz.org/?p=ploop;a=summary
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this software and associated documentation files (the "Software"), to deal
14  * in the Software without restriction, including without limitation the rights
15  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16  * copies of the Software, and to permit persons to whom the Software is
17  * furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28  * THE SOFTWARE.
29  */
30 
31 #include "qemu/osdep.h"
32 #include "qemu/error-report.h"
33 #include "qapi/error.h"
34 #include "block/block_int.h"
35 #include "block/qdict.h"
36 #include "sysemu/block-backend.h"
37 #include "qemu/module.h"
38 #include "qemu/option.h"
39 #include "qapi/qmp/qdict.h"
40 #include "qapi/qobject-input-visitor.h"
41 #include "qapi/qapi-visit-block-core.h"
42 #include "qemu/bswap.h"
43 #include "qemu/bitmap.h"
44 #include "qemu/memalign.h"
45 #include "migration/blocker.h"
46 #include "parallels.h"
47 
48 /**************************************************************/
49 
50 #define HEADER_MAGIC "WithoutFreeSpace"
51 #define HEADER_MAGIC2 "WithouFreSpacExt"
52 #define HEADER_VERSION 2
53 #define HEADER_INUSE_MAGIC  (0x746F6E59)
54 #define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
55 
56 static QEnumLookup prealloc_mode_lookup = {
57     .array = (const char *const[]) {
58         "falloc",
59         "truncate",
60     },
61     .size = PRL_PREALLOC_MODE__MAX
62 };
63 
64 #define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
65 #define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
66 
67 static QemuOptsList parallels_runtime_opts = {
68     .name = "parallels",
69     .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
70     .desc = {
71         {
72             .name = PARALLELS_OPT_PREALLOC_SIZE,
73             .type = QEMU_OPT_SIZE,
74             .help = "Preallocation size on image expansion",
75             .def_value_str = "128M",
76         },
77         {
78             .name = PARALLELS_OPT_PREALLOC_MODE,
79             .type = QEMU_OPT_STRING,
80             .help = "Preallocation mode on image expansion "
81                     "(allowed values: falloc, truncate)",
82             .def_value_str = "falloc",
83         },
84         { /* end of list */ },
85     },
86 };
87 
88 static QemuOptsList parallels_create_opts = {
89     .name = "parallels-create-opts",
90     .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
91     .desc = {
92         {
93             .name = BLOCK_OPT_SIZE,
94             .type = QEMU_OPT_SIZE,
95             .help = "Virtual disk size",
96         },
97         {
98             .name = BLOCK_OPT_CLUSTER_SIZE,
99             .type = QEMU_OPT_SIZE,
100             .help = "Parallels image cluster size",
101             .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
102         },
103         { /* end of list */ }
104     }
105 };
106 
107 
bat2sect(BDRVParallelsState * s,uint32_t idx)108 static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
109 {
110     return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
111 }
112 
bat_entry_off(uint32_t idx)113 static uint32_t bat_entry_off(uint32_t idx)
114 {
115     return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
116 }
117 
seek_to_sector(BDRVParallelsState * s,int64_t sector_num)118 static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
119 {
120     uint32_t index, offset;
121 
122     index = sector_num / s->tracks;
123     offset = sector_num % s->tracks;
124 
125     /* not allocated */
126     if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
127         return -1;
128     }
129     return bat2sect(s, index) + offset;
130 }
131 
cluster_remainder(BDRVParallelsState * s,int64_t sector_num,int nb_sectors)132 static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
133         int nb_sectors)
134 {
135     int ret = s->tracks - sector_num % s->tracks;
136     return MIN(nb_sectors, ret);
137 }
138 
host_cluster_index(BDRVParallelsState * s,int64_t off)139 static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
140 {
141     off -= s->data_start << BDRV_SECTOR_BITS;
142     return off / s->cluster_size;
143 }
144 
block_status(BDRVParallelsState * s,int64_t sector_num,int nb_sectors,int * pnum)145 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
146                             int nb_sectors, int *pnum)
147 {
148     int64_t start_off = -2, prev_end_off = -2;
149 
150     *pnum = 0;
151     while (nb_sectors > 0 || start_off == -2) {
152         int64_t offset = seek_to_sector(s, sector_num);
153         int to_end;
154 
155         if (start_off == -2) {
156             start_off = offset;
157             prev_end_off = offset;
158         } else if (offset != prev_end_off) {
159             break;
160         }
161 
162         to_end = cluster_remainder(s, sector_num, nb_sectors);
163         nb_sectors -= to_end;
164         sector_num += to_end;
165         *pnum += to_end;
166 
167         if (offset > 0) {
168             prev_end_off += to_end;
169         }
170     }
171     return start_off;
172 }
173 
parallels_set_bat_entry(BDRVParallelsState * s,uint32_t index,uint32_t offset)174 static void parallels_set_bat_entry(BDRVParallelsState *s,
175                                     uint32_t index, uint32_t offset)
176 {
177     s->bat_bitmap[index] = cpu_to_le32(offset);
178     bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
179 }
180 
mark_used(BlockDriverState * bs,unsigned long * bitmap,uint32_t bitmap_size,int64_t off,uint32_t count)181 static int mark_used(BlockDriverState *bs, unsigned long *bitmap,
182                      uint32_t bitmap_size, int64_t off, uint32_t count)
183 {
184     BDRVParallelsState *s = bs->opaque;
185     uint32_t cluster_index = host_cluster_index(s, off);
186     unsigned long next_used;
187     if ((uint64_t)cluster_index + count > bitmap_size) {
188         return -E2BIG;
189     }
190     next_used = find_next_bit(bitmap, bitmap_size, cluster_index);
191     if (next_used < (uint64_t)cluster_index + count) {
192         return -EBUSY;
193     }
194     bitmap_set(bitmap, cluster_index, count);
195     return 0;
196 }
197 
198 /*
199  * Collect used bitmap. The image can contain errors, we should fill the
200  * bitmap anyway, as much as we can. This information will be used for
201  * error resolution.
202  */
parallels_fill_used_bitmap(BlockDriverState * bs)203 static int GRAPH_RDLOCK parallels_fill_used_bitmap(BlockDriverState *bs)
204 {
205     BDRVParallelsState *s = bs->opaque;
206     int64_t payload_bytes;
207     uint32_t i;
208     int err = 0;
209 
210     payload_bytes = bdrv_getlength(bs->file->bs);
211     if (payload_bytes < 0) {
212         return payload_bytes;
213     }
214     payload_bytes -= s->data_start * BDRV_SECTOR_SIZE;
215     if (payload_bytes < 0) {
216         return -EINVAL;
217     }
218 
219     s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
220     if (s->used_bmap_size == 0) {
221         return 0;
222     }
223     s->used_bmap = bitmap_try_new(s->used_bmap_size);
224     if (s->used_bmap == NULL) {
225         return -ENOMEM;
226     }
227 
228     for (i = 0; i < s->bat_size; i++) {
229         int err2;
230         int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
231         if (host_off == 0) {
232             continue;
233         }
234 
235         err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1);
236         if (err2 < 0 && err == 0) {
237             err = err2;
238         }
239     }
240     return err;
241 }
242 
parallels_free_used_bitmap(BlockDriverState * bs)243 static void parallels_free_used_bitmap(BlockDriverState *bs)
244 {
245     BDRVParallelsState *s = bs->opaque;
246     s->used_bmap_size = 0;
247     g_free(s->used_bmap);
248 }
249 
250 static int64_t coroutine_fn GRAPH_RDLOCK
allocate_clusters(BlockDriverState * bs,int64_t sector_num,int nb_sectors,int * pnum)251 allocate_clusters(BlockDriverState *bs, int64_t sector_num,
252                   int nb_sectors, int *pnum)
253 {
254     int ret = 0;
255     BDRVParallelsState *s = bs->opaque;
256     int64_t i, pos, idx, to_allocate, first_free, host_off;
257 
258     pos = block_status(s, sector_num, nb_sectors, pnum);
259     if (pos > 0) {
260         return pos;
261     }
262 
263     idx = sector_num / s->tracks;
264     to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
265 
266     /*
267      * This function is called only by parallels_co_writev(), which will never
268      * pass a sector_num at or beyond the end of the image (because the block
269      * layer never passes such a sector_num to that function). Therefore, idx
270      * is always below s->bat_size.
271      * block_status() will limit *pnum so that sector_num + *pnum will not
272      * exceed the image end. Therefore, idx + to_allocate cannot exceed
273      * s->bat_size.
274      * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
275      * will always fit into a uint32_t.
276      */
277     assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
278 
279     first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
280     if (first_free == s->used_bmap_size) {
281         uint32_t new_usedsize;
282         int64_t bytes = to_allocate * s->cluster_size;
283         bytes += s->prealloc_size * BDRV_SECTOR_SIZE;
284 
285         host_off = s->data_end * BDRV_SECTOR_SIZE;
286 
287         /*
288          * We require the expanded size to read back as zero. If the
289          * user permitted truncation, we try that; but if it fails, we
290          * force the safer-but-slower fallocate.
291          */
292         if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
293             ret = bdrv_co_truncate(bs->file, host_off + bytes,
294                                    false, PREALLOC_MODE_OFF,
295                                    BDRV_REQ_ZERO_WRITE, NULL);
296             if (ret == -ENOTSUP) {
297                 s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
298             }
299         }
300         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
301             ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0);
302         }
303         if (ret < 0) {
304             return ret;
305         }
306 
307         new_usedsize = s->used_bmap_size + bytes / s->cluster_size;
308         s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
309                                           new_usedsize);
310         s->used_bmap_size = new_usedsize;
311     } else {
312         int64_t next_used;
313         next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
314 
315         /* Not enough continuous clusters in the middle, adjust the size */
316         if (next_used - first_free < to_allocate) {
317             to_allocate = next_used - first_free;
318             *pnum = (idx + to_allocate) * s->tracks - sector_num;
319         }
320 
321         host_off = s->data_start * BDRV_SECTOR_SIZE;
322         host_off += first_free * s->cluster_size;
323 
324         /*
325          * No need to preallocate if we are using tail area from the above
326          * branch. In the other case we are likely re-using hole. Preallocate
327          * the space if required by the prealloc_mode.
328          */
329         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
330                 host_off < s->data_end * BDRV_SECTOR_SIZE) {
331             ret = bdrv_co_pwrite_zeroes(bs->file, host_off,
332                                         s->cluster_size * to_allocate, 0);
333             if (ret < 0) {
334                 return ret;
335             }
336         }
337     }
338 
339     /*
340      * Try to read from backing to fill empty clusters
341      * FIXME: 1. previous write_zeroes may be redundant
342      *        2. most of data we read from backing will be rewritten by
343      *           parallels_co_writev. On aligned-to-cluster write we do not need
344      *           this read at all.
345      *        3. it would be good to combine write of data from backing and new
346      *           data into one write call.
347      */
348     if (bs->backing) {
349         int64_t nb_cow_sectors = to_allocate * s->tracks;
350         int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
351         void *buf = qemu_blockalign(bs, nb_cow_bytes);
352 
353         ret = bdrv_co_pread(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE,
354                             nb_cow_bytes, buf, 0);
355         if (ret < 0) {
356             qemu_vfree(buf);
357             return ret;
358         }
359 
360         ret = bdrv_co_pwrite(bs->file, s->data_end * BDRV_SECTOR_SIZE,
361                              nb_cow_bytes, buf, 0);
362         qemu_vfree(buf);
363         if (ret < 0) {
364             return ret;
365         }
366     }
367 
368     ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, to_allocate);
369     if (ret < 0) {
370         /* Image consistency is broken. Alarm! */
371         return ret;
372     }
373     for (i = 0; i < to_allocate; i++) {
374         parallels_set_bat_entry(s, idx + i,
375                 host_off / BDRV_SECTOR_SIZE / s->off_multiplier);
376         host_off += s->cluster_size;
377     }
378     if (host_off > s->data_end * BDRV_SECTOR_SIZE) {
379         s->data_end = host_off / BDRV_SECTOR_SIZE;
380     }
381 
382     return bat2sect(s, idx) + sector_num % s->tracks;
383 }
384 
385 
386 static int coroutine_fn GRAPH_RDLOCK
parallels_co_flush_to_os(BlockDriverState * bs)387 parallels_co_flush_to_os(BlockDriverState *bs)
388 {
389     BDRVParallelsState *s = bs->opaque;
390     unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
391     unsigned long bit;
392 
393     qemu_co_mutex_lock(&s->lock);
394 
395     bit = find_first_bit(s->bat_dirty_bmap, size);
396     while (bit < size) {
397         uint32_t off = bit * s->bat_dirty_block;
398         uint32_t to_write = s->bat_dirty_block;
399         int ret;
400 
401         if (off + to_write > s->header_size) {
402             to_write = s->header_size - off;
403         }
404         ret = bdrv_co_pwrite(bs->file, off, to_write,
405                              (uint8_t *)s->header + off, 0);
406         if (ret < 0) {
407             qemu_co_mutex_unlock(&s->lock);
408             return ret;
409         }
410         bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
411     }
412     bitmap_zero(s->bat_dirty_bmap, size);
413 
414     qemu_co_mutex_unlock(&s->lock);
415     return 0;
416 }
417 
418 static int coroutine_fn GRAPH_RDLOCK
parallels_co_block_status(BlockDriverState * bs,bool want_zero,int64_t offset,int64_t bytes,int64_t * pnum,int64_t * map,BlockDriverState ** file)419 parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
420                           int64_t bytes, int64_t *pnum, int64_t *map,
421                           BlockDriverState **file)
422 {
423     BDRVParallelsState *s = bs->opaque;
424     int count;
425 
426     assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
427     qemu_co_mutex_lock(&s->lock);
428     offset = block_status(s, offset >> BDRV_SECTOR_BITS,
429                           bytes >> BDRV_SECTOR_BITS, &count);
430     qemu_co_mutex_unlock(&s->lock);
431 
432     *pnum = count * BDRV_SECTOR_SIZE;
433     if (offset < 0) {
434         return 0;
435     }
436 
437     *map = offset * BDRV_SECTOR_SIZE;
438     *file = bs->file->bs;
439     return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
440 }
441 
442 static int coroutine_fn GRAPH_RDLOCK
parallels_co_writev(BlockDriverState * bs,int64_t sector_num,int nb_sectors,QEMUIOVector * qiov,int flags)443 parallels_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
444                     QEMUIOVector *qiov, int flags)
445 {
446     BDRVParallelsState *s = bs->opaque;
447     uint64_t bytes_done = 0;
448     QEMUIOVector hd_qiov;
449     int ret = 0;
450 
451     qemu_iovec_init(&hd_qiov, qiov->niov);
452 
453     while (nb_sectors > 0) {
454         int64_t position;
455         int n, nbytes;
456 
457         qemu_co_mutex_lock(&s->lock);
458         position = allocate_clusters(bs, sector_num, nb_sectors, &n);
459         qemu_co_mutex_unlock(&s->lock);
460         if (position < 0) {
461             ret = (int)position;
462             break;
463         }
464 
465         nbytes = n << BDRV_SECTOR_BITS;
466 
467         qemu_iovec_reset(&hd_qiov);
468         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
469 
470         ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
471                               &hd_qiov, 0);
472         if (ret < 0) {
473             break;
474         }
475 
476         nb_sectors -= n;
477         sector_num += n;
478         bytes_done += nbytes;
479     }
480 
481     qemu_iovec_destroy(&hd_qiov);
482     return ret;
483 }
484 
485 static int coroutine_fn GRAPH_RDLOCK
parallels_co_readv(BlockDriverState * bs,int64_t sector_num,int nb_sectors,QEMUIOVector * qiov)486 parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
487                    QEMUIOVector *qiov)
488 {
489     BDRVParallelsState *s = bs->opaque;
490     uint64_t bytes_done = 0;
491     QEMUIOVector hd_qiov;
492     int ret = 0;
493 
494     qemu_iovec_init(&hd_qiov, qiov->niov);
495 
496     while (nb_sectors > 0) {
497         int64_t position;
498         int n, nbytes;
499 
500         qemu_co_mutex_lock(&s->lock);
501         position = block_status(s, sector_num, nb_sectors, &n);
502         qemu_co_mutex_unlock(&s->lock);
503 
504         nbytes = n << BDRV_SECTOR_BITS;
505 
506         qemu_iovec_reset(&hd_qiov);
507         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
508 
509         if (position < 0) {
510             if (bs->backing) {
511                 ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE,
512                                      nbytes, &hd_qiov, 0);
513                 if (ret < 0) {
514                     break;
515                 }
516             } else {
517                 qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
518             }
519         } else {
520             ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
521                                  &hd_qiov, 0);
522             if (ret < 0) {
523                 break;
524             }
525         }
526 
527         nb_sectors -= n;
528         sector_num += n;
529         bytes_done += nbytes;
530     }
531 
532     qemu_iovec_destroy(&hd_qiov);
533     return ret;
534 }
535 
536 
537 static int coroutine_fn GRAPH_RDLOCK
parallels_co_pdiscard(BlockDriverState * bs,int64_t offset,int64_t bytes)538 parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
539 {
540     int ret = 0;
541     uint32_t cluster, count;
542     BDRVParallelsState *s = bs->opaque;
543 
544     /*
545      * The image does not support ZERO mark inside the BAT, which means that
546      * stale data could be exposed from the backing file.
547      */
548     if (bs->backing) {
549         return -ENOTSUP;
550     }
551 
552     if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) {
553         return -ENOTSUP;
554     } else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) {
555         return -ENOTSUP;
556     }
557 
558     cluster = offset / s->cluster_size;
559     count = bytes / s->cluster_size;
560 
561     qemu_co_mutex_lock(&s->lock);
562     for (; count > 0; cluster++, count--) {
563         int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS;
564         if (host_off == 0) {
565             continue;
566         }
567 
568         ret = bdrv_co_pdiscard(bs->file, host_off, s->cluster_size);
569         if (ret < 0) {
570             goto done;
571         }
572 
573         parallels_set_bat_entry(s, cluster, 0);
574         bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1);
575     }
576 done:
577     qemu_co_mutex_unlock(&s->lock);
578     return ret;
579 }
580 
581 static int coroutine_fn GRAPH_RDLOCK
parallels_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)582 parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
583                            BdrvRequestFlags flags)
584 {
585     /*
586      * The zero flag is missed in the Parallels format specification. We can
587      * resort to discard if we have no backing file (this condition is checked
588      * inside parallels_co_pdiscard().
589      */
590     return parallels_co_pdiscard(bs, offset, bytes);
591 }
592 
593 
parallels_check_unclean(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix)594 static void parallels_check_unclean(BlockDriverState *bs,
595                                     BdrvCheckResult *res,
596                                     BdrvCheckMode fix)
597 {
598     BDRVParallelsState *s = bs->opaque;
599 
600     if (!s->header_unclean) {
601         return;
602     }
603 
604     fprintf(stderr, "%s image was not closed correctly\n",
605             fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
606     res->corruptions++;
607     if (fix & BDRV_FIX_ERRORS) {
608         /* parallels_close will do the job right */
609         res->corruptions_fixed++;
610         s->header_unclean = false;
611     }
612 }
613 
614 /*
615  * Returns true if data_off is correct, otherwise false. In both cases
616  * correct_offset is set to the proper value.
617  */
parallels_test_data_off(BDRVParallelsState * s,int64_t file_nb_sectors,uint32_t * correct_offset)618 static bool parallels_test_data_off(BDRVParallelsState *s,
619                                     int64_t file_nb_sectors,
620                                     uint32_t *correct_offset)
621 {
622     uint32_t data_off, min_off;
623     bool old_magic;
624 
625     /*
626      * There are two slightly different image formats: with "WithoutFreeSpace"
627      * or "WithouFreSpacExt" magic words. Call the first one as "old magic".
628      * In such images data_off field can be zero. In this case the offset is
629      * calculated as the end of BAT table plus some padding to ensure sector
630      * size alignment.
631      */
632     old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16);
633 
634     min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
635     if (!old_magic) {
636         min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE);
637     }
638 
639     if (correct_offset) {
640         *correct_offset = min_off;
641     }
642 
643     data_off = le32_to_cpu(s->header->data_off);
644     if (data_off == 0 && old_magic) {
645         return true;
646     }
647 
648     if (data_off < min_off || data_off > file_nb_sectors) {
649         return false;
650     }
651 
652     if (correct_offset) {
653         *correct_offset = data_off;
654     }
655 
656     return true;
657 }
658 
659 static int coroutine_fn GRAPH_RDLOCK
parallels_check_data_off(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix)660 parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res,
661                          BdrvCheckMode fix)
662 {
663     BDRVParallelsState *s = bs->opaque;
664     int64_t file_size;
665     uint32_t data_off;
666 
667     file_size = bdrv_co_nb_sectors(bs->file->bs);
668     if (file_size < 0) {
669         res->check_errors++;
670         return file_size;
671     }
672 
673     if (parallels_test_data_off(s, file_size, &data_off)) {
674         return 0;
675     }
676 
677     res->corruptions++;
678     if (fix & BDRV_FIX_ERRORS) {
679         int err;
680         s->header->data_off = cpu_to_le32(data_off);
681         s->data_start = data_off;
682 
683         parallels_free_used_bitmap(bs);
684         err = parallels_fill_used_bitmap(bs);
685         if (err == -ENOMEM) {
686             res->check_errors++;
687             return err;
688         }
689 
690         res->corruptions_fixed++;
691     }
692 
693     fprintf(stderr, "%s data_off field has incorrect value\n",
694             fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
695 
696     return 0;
697 }
698 
699 static int coroutine_fn GRAPH_RDLOCK
parallels_check_outside_image(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix)700 parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
701                               BdrvCheckMode fix)
702 {
703     BDRVParallelsState *s = bs->opaque;
704     uint32_t i;
705     int64_t off, high_off, size;
706 
707     size = bdrv_co_getlength(bs->file->bs);
708     if (size < 0) {
709         res->check_errors++;
710         return size;
711     }
712 
713     high_off = 0;
714     for (i = 0; i < s->bat_size; i++) {
715         off = bat2sect(s, i) << BDRV_SECTOR_BITS;
716         if (off + s->cluster_size > size) {
717             fprintf(stderr, "%s cluster %u is outside image\n",
718                     fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
719             res->corruptions++;
720             if (fix & BDRV_FIX_ERRORS) {
721                 parallels_set_bat_entry(s, i, 0);
722                 res->corruptions_fixed++;
723             }
724             continue;
725         }
726         if (high_off < off) {
727             high_off = off;
728         }
729     }
730 
731     if (high_off == 0) {
732         res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
733     } else {
734         res->image_end_offset = high_off + s->cluster_size;
735         s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
736     }
737 
738     return 0;
739 }
740 
741 static int coroutine_fn GRAPH_RDLOCK
parallels_check_leak(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix,bool explicit)742 parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
743                      BdrvCheckMode fix, bool explicit)
744 {
745     BDRVParallelsState *s = bs->opaque;
746     int64_t size;
747     int ret;
748 
749     size = bdrv_co_getlength(bs->file->bs);
750     if (size < 0) {
751         res->check_errors++;
752         return size;
753     }
754 
755     if (size > res->image_end_offset) {
756         int64_t count;
757         count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
758         if (explicit) {
759             fprintf(stderr,
760                     "%s space leaked at the end of the image %" PRId64 "\n",
761                     fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
762                     size - res->image_end_offset);
763             res->leaks += count;
764         }
765         if (fix & BDRV_FIX_LEAKS) {
766             Error *local_err = NULL;
767 
768             /*
769              * In order to really repair the image, we must shrink it.
770              * That means we have to pass exact=true.
771              */
772             ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
773                                    PREALLOC_MODE_OFF, 0, &local_err);
774             if (ret < 0) {
775                 error_report_err(local_err);
776                 res->check_errors++;
777                 return ret;
778             }
779             if (explicit) {
780                 res->leaks_fixed += count;
781             }
782         }
783     }
784 
785     return 0;
786 }
787 
788 static int coroutine_fn GRAPH_RDLOCK
parallels_check_duplicate(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix)789 parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res,
790                           BdrvCheckMode fix)
791 {
792     BDRVParallelsState *s = bs->opaque;
793     int64_t host_off, host_sector, guest_sector;
794     unsigned long *bitmap;
795     uint32_t i, bitmap_size, bat_entry;
796     int n, ret = 0;
797     uint64_t *buf = NULL;
798     bool fixed = false;
799 
800     /*
801      * Create a bitmap of used clusters.
802      * If a bit is set, there is a BAT entry pointing to this cluster.
803      * Loop through the BAT entries, check bits relevant to an entry offset.
804      * If bit is set, this entry is duplicated. Otherwise set the bit.
805      *
806      * We shouldn't worry about newly allocated clusters outside the image
807      * because they are created higher then any existing cluster pointed by
808      * a BAT entry.
809      */
810     bitmap_size = host_cluster_index(s, res->image_end_offset);
811     if (bitmap_size == 0) {
812         return 0;
813     }
814     if (res->image_end_offset % s->cluster_size) {
815         /* A not aligned image end leads to a bitmap shorter by 1 */
816         bitmap_size++;
817     }
818 
819     bitmap = bitmap_new(bitmap_size);
820 
821     buf = qemu_blockalign(bs, s->cluster_size);
822 
823     for (i = 0; i < s->bat_size; i++) {
824         host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
825         if (host_off == 0) {
826             continue;
827         }
828 
829         ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
830         assert(ret != -E2BIG);
831         if (ret == 0) {
832             continue;
833         }
834 
835         /* this cluster duplicates another one */
836         fprintf(stderr, "%s duplicate offset in BAT entry %u\n",
837                 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
838 
839         res->corruptions++;
840 
841         if (!(fix & BDRV_FIX_ERRORS)) {
842             continue;
843         }
844 
845         /*
846          * Reset the entry and allocate a new cluster
847          * for the relevant guest offset. In this way we let
848          * the lower layer to place the new cluster properly.
849          * Copy the original cluster to the allocated one.
850          * But before save the old offset value for repairing
851          * if we have an error.
852          */
853         bat_entry = s->bat_bitmap[i];
854         parallels_set_bat_entry(s, i, 0);
855 
856         ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0);
857         if (ret < 0) {
858             res->check_errors++;
859             goto out_repair_bat;
860         }
861 
862         guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS;
863         host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n);
864         if (host_sector < 0) {
865             res->check_errors++;
866             goto out_repair_bat;
867         }
868         host_off = host_sector << BDRV_SECTOR_BITS;
869 
870         ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0);
871         if (ret < 0) {
872             res->check_errors++;
873             goto out_repair_bat;
874         }
875 
876         if (host_off + s->cluster_size > res->image_end_offset) {
877             res->image_end_offset = host_off + s->cluster_size;
878         }
879 
880         /*
881          * In the future allocate_cluster() will reuse holed offsets
882          * inside the image. Keep the used clusters bitmap content
883          * consistent for the new allocated clusters too.
884          *
885          * Note, clusters allocated outside the current image are not
886          * considered, and the bitmap size doesn't change. This specifically
887          * means that -E2BIG is OK.
888          */
889         ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
890         if (ret == -EBUSY) {
891             res->check_errors++;
892             goto out_repair_bat;
893         }
894 
895         fixed = true;
896         res->corruptions_fixed++;
897 
898     }
899 
900     if (fixed) {
901         /*
902          * When new clusters are allocated, the file size increases by
903          * 128 Mb. We need to truncate the file to the right size. Let
904          * the leak fix code make its job without res changing.
905          */
906         ret = parallels_check_leak(bs, res, fix, false);
907     }
908 
909 out_free:
910     g_free(buf);
911     g_free(bitmap);
912     return ret;
913 /*
914  * We can get here only from places where index and old_offset have
915  * meaningful values.
916  */
917 out_repair_bat:
918     s->bat_bitmap[i] = bat_entry;
919     goto out_free;
920 }
921 
parallels_collect_statistics(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix)922 static void parallels_collect_statistics(BlockDriverState *bs,
923                                          BdrvCheckResult *res,
924                                          BdrvCheckMode fix)
925 {
926     BDRVParallelsState *s = bs->opaque;
927     int64_t off, prev_off;
928     uint32_t i;
929 
930     res->bfi.total_clusters = s->bat_size;
931     res->bfi.compressed_clusters = 0; /* compression is not supported */
932 
933     prev_off = 0;
934     for (i = 0; i < s->bat_size; i++) {
935         off = bat2sect(s, i) << BDRV_SECTOR_BITS;
936         /*
937          * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
938          * fixed. Skip not allocated and out-of-image BAT entries.
939          */
940         if (off == 0 || off + s->cluster_size > res->image_end_offset) {
941             prev_off = 0;
942             continue;
943         }
944 
945         if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
946             res->bfi.fragmented_clusters++;
947         }
948         prev_off = off;
949         res->bfi.allocated_clusters++;
950     }
951 }
952 
953 static int coroutine_fn GRAPH_RDLOCK
parallels_co_check(BlockDriverState * bs,BdrvCheckResult * res,BdrvCheckMode fix)954 parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
955                    BdrvCheckMode fix)
956 {
957     BDRVParallelsState *s = bs->opaque;
958     int ret;
959 
960     WITH_QEMU_LOCK_GUARD(&s->lock) {
961         parallels_check_unclean(bs, res, fix);
962 
963         ret = parallels_check_data_off(bs, res, fix);
964         if (ret < 0) {
965             return ret;
966         }
967 
968         ret = parallels_check_outside_image(bs, res, fix);
969         if (ret < 0) {
970             return ret;
971         }
972 
973         ret = parallels_check_leak(bs, res, fix, true);
974         if (ret < 0) {
975             return ret;
976         }
977 
978         ret = parallels_check_duplicate(bs, res, fix);
979         if (ret < 0) {
980             return ret;
981         }
982 
983         parallels_collect_statistics(bs, res, fix);
984     }
985 
986     ret = bdrv_co_flush(bs);
987     if (ret < 0) {
988         res->check_errors++;
989     }
990 
991     return ret;
992 }
993 
994 
995 static int coroutine_fn GRAPH_UNLOCKED
parallels_co_create(BlockdevCreateOptions * opts,Error ** errp)996 parallels_co_create(BlockdevCreateOptions* opts, Error **errp)
997 {
998     BlockdevCreateOptionsParallels *parallels_opts;
999     BlockDriverState *bs;
1000     BlockBackend *blk;
1001     int64_t total_size, cl_size;
1002     uint32_t bat_entries, bat_sectors;
1003     ParallelsHeader header;
1004     uint8_t tmp[BDRV_SECTOR_SIZE];
1005     int ret;
1006 
1007     assert(opts->driver == BLOCKDEV_DRIVER_PARALLELS);
1008     parallels_opts = &opts->u.parallels;
1009 
1010     /* Sanity checks */
1011     total_size = parallels_opts->size;
1012 
1013     if (parallels_opts->has_cluster_size) {
1014         cl_size = parallels_opts->cluster_size;
1015     } else {
1016         cl_size = DEFAULT_CLUSTER_SIZE;
1017     }
1018 
1019     /* XXX What is the real limit here? This is an insanely large maximum. */
1020     if (cl_size >= INT64_MAX / MAX_PARALLELS_IMAGE_FACTOR) {
1021         error_setg(errp, "Cluster size is too large");
1022         return -EINVAL;
1023     }
1024     if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
1025         error_setg(errp, "Image size is too large for this cluster size");
1026         return -E2BIG;
1027     }
1028 
1029     if (!QEMU_IS_ALIGNED(total_size, BDRV_SECTOR_SIZE)) {
1030         error_setg(errp, "Image size must be a multiple of 512 bytes");
1031         return -EINVAL;
1032     }
1033 
1034     if (!QEMU_IS_ALIGNED(cl_size, BDRV_SECTOR_SIZE)) {
1035         error_setg(errp, "Cluster size must be a multiple of 512 bytes");
1036         return -EINVAL;
1037     }
1038 
1039     /* Create BlockBackend to write to the image */
1040     bs = bdrv_co_open_blockdev_ref(parallels_opts->file, errp);
1041     if (bs == NULL) {
1042         return -EIO;
1043     }
1044 
1045     blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
1046                              errp);
1047     if (!blk) {
1048         ret = -EPERM;
1049         goto out;
1050     }
1051     blk_set_allow_write_beyond_eof(blk, true);
1052 
1053     /* Create image format */
1054     bat_entries = DIV_ROUND_UP(total_size, cl_size);
1055     bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
1056     bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
1057 
1058     memset(&header, 0, sizeof(header));
1059     memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
1060     header.version = cpu_to_le32(HEADER_VERSION);
1061     /* don't care much about geometry, it is not used on image level */
1062     header.heads = cpu_to_le32(HEADS_NUMBER);
1063     header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE
1064                                    / HEADS_NUMBER / SEC_IN_CYL);
1065     header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
1066     header.bat_entries = cpu_to_le32(bat_entries);
1067     header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
1068     header.data_off = cpu_to_le32(bat_sectors);
1069 
1070     /* write all the data */
1071     memset(tmp, 0, sizeof(tmp));
1072     memcpy(tmp, &header, sizeof(header));
1073 
1074     ret = blk_co_pwrite(blk, 0, BDRV_SECTOR_SIZE, tmp, 0);
1075     if (ret < 0) {
1076         goto exit;
1077     }
1078     ret = blk_co_pwrite_zeroes(blk, BDRV_SECTOR_SIZE,
1079                                (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
1080     if (ret < 0) {
1081         goto exit;
1082     }
1083 
1084     ret = 0;
1085 out:
1086     blk_co_unref(blk);
1087     bdrv_co_unref(bs);
1088     return ret;
1089 
1090 exit:
1091     error_setg_errno(errp, -ret, "Failed to create Parallels image");
1092     goto out;
1093 }
1094 
1095 static int coroutine_fn GRAPH_UNLOCKED
parallels_co_create_opts(BlockDriver * drv,const char * filename,QemuOpts * opts,Error ** errp)1096 parallels_co_create_opts(BlockDriver *drv, const char *filename,
1097                          QemuOpts *opts, Error **errp)
1098 {
1099     BlockdevCreateOptions *create_options = NULL;
1100     BlockDriverState *bs = NULL;
1101     QDict *qdict;
1102     Visitor *v;
1103     int ret;
1104 
1105     static const QDictRenames opt_renames[] = {
1106         { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
1107         { NULL, NULL },
1108     };
1109 
1110     /* Parse options and convert legacy syntax */
1111     qdict = qemu_opts_to_qdict_filtered(opts, NULL, &parallels_create_opts,
1112                                         true);
1113 
1114     if (!qdict_rename_keys(qdict, opt_renames, errp)) {
1115         ret = -EINVAL;
1116         goto done;
1117     }
1118 
1119     /* Create and open the file (protocol layer) */
1120     ret = bdrv_co_create_file(filename, opts, errp);
1121     if (ret < 0) {
1122         goto done;
1123     }
1124 
1125     bs = bdrv_co_open(filename, NULL, NULL,
1126                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
1127     if (bs == NULL) {
1128         ret = -EIO;
1129         goto done;
1130     }
1131 
1132     /* Now get the QAPI type BlockdevCreateOptions */
1133     qdict_put_str(qdict, "driver", "parallels");
1134     qdict_put_str(qdict, "file", bs->node_name);
1135 
1136     v = qobject_input_visitor_new_flat_confused(qdict, errp);
1137     if (!v) {
1138         ret = -EINVAL;
1139         goto done;
1140     }
1141 
1142     visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
1143     visit_free(v);
1144     if (!create_options) {
1145         ret = -EINVAL;
1146         goto done;
1147     }
1148 
1149     /* Silently round up sizes */
1150     create_options->u.parallels.size =
1151         ROUND_UP(create_options->u.parallels.size, BDRV_SECTOR_SIZE);
1152     create_options->u.parallels.cluster_size =
1153         ROUND_UP(create_options->u.parallels.cluster_size, BDRV_SECTOR_SIZE);
1154 
1155     /* Create the Parallels image (format layer) */
1156     ret = parallels_co_create(create_options, errp);
1157     if (ret < 0) {
1158         goto done;
1159     }
1160     ret = 0;
1161 
1162 done:
1163     qobject_unref(qdict);
1164     bdrv_co_unref(bs);
1165     qapi_free_BlockdevCreateOptions(create_options);
1166     return ret;
1167 }
1168 
1169 
parallels_probe(const uint8_t * buf,int buf_size,const char * filename)1170 static int parallels_probe(const uint8_t *buf, int buf_size,
1171                            const char *filename)
1172 {
1173     const ParallelsHeader *ph = (const void *)buf;
1174 
1175     if (buf_size < sizeof(ParallelsHeader)) {
1176         return 0;
1177     }
1178 
1179     if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
1180            !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
1181            (le32_to_cpu(ph->version) == HEADER_VERSION)) {
1182         return 100;
1183     }
1184 
1185     return 0;
1186 }
1187 
parallels_update_header(BlockDriverState * bs)1188 static int GRAPH_RDLOCK parallels_update_header(BlockDriverState *bs)
1189 {
1190     BDRVParallelsState *s = bs->opaque;
1191     unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
1192                         sizeof(ParallelsHeader));
1193 
1194     if (size > s->header_size) {
1195         size = s->header_size;
1196     }
1197     return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0);
1198 }
1199 
1200 
parallels_opts_prealloc(BlockDriverState * bs,QDict * options,Error ** errp)1201 static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options,
1202                                    Error **errp)
1203 {
1204     int err;
1205     char *buf;
1206     int64_t bytes;
1207     BDRVParallelsState *s = bs->opaque;
1208     Error *local_err = NULL;
1209     QemuOpts *opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
1210     if (!opts) {
1211         return -ENOMEM;
1212     }
1213 
1214     err = -EINVAL;
1215     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1216         goto done;
1217     }
1218 
1219     bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
1220     s->prealloc_size = bytes >> BDRV_SECTOR_BITS;
1221     buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
1222     /* prealloc_mode can be downgraded later during allocate_clusters */
1223     s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
1224                                        PRL_PREALLOC_MODE_FALLOCATE,
1225                                        &local_err);
1226     g_free(buf);
1227     if (local_err != NULL) {
1228         error_propagate(errp, local_err);
1229         goto done;
1230     }
1231     err = 0;
1232 
1233 done:
1234     qemu_opts_del(opts);
1235     return err;
1236 }
1237 
parallels_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)1238 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
1239                           Error **errp)
1240 {
1241     BDRVParallelsState *s = bs->opaque;
1242     ParallelsHeader ph;
1243     int ret, size, i;
1244     int64_t file_nb_sectors, sector;
1245     uint32_t data_start;
1246     bool need_check = false;
1247 
1248     ret = parallels_opts_prealloc(bs, options, errp);
1249     if (ret < 0) {
1250         return ret;
1251     }
1252 
1253     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
1254     if (ret < 0) {
1255         return ret;
1256     }
1257 
1258     GRAPH_RDLOCK_GUARD_MAINLOOP();
1259 
1260     file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
1261     if (file_nb_sectors < 0) {
1262         return -EINVAL;
1263     }
1264 
1265     ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
1266     if (ret < 0) {
1267         return ret;
1268     }
1269 
1270     bs->total_sectors = le64_to_cpu(ph.nb_sectors);
1271 
1272     if (le32_to_cpu(ph.version) != HEADER_VERSION) {
1273         goto fail_format;
1274     }
1275     if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
1276         s->off_multiplier = 1;
1277         bs->total_sectors = 0xffffffff & bs->total_sectors;
1278     } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
1279         s->off_multiplier = le32_to_cpu(ph.tracks);
1280     } else {
1281         goto fail_format;
1282     }
1283 
1284     s->tracks = le32_to_cpu(ph.tracks);
1285     if (s->tracks == 0) {
1286         error_setg(errp, "Invalid image: Zero sectors per track");
1287         return -EINVAL;
1288     }
1289     if (s->tracks > INT32_MAX/513) {
1290         error_setg(errp, "Invalid image: Too big cluster");
1291         return -EFBIG;
1292     }
1293     s->prealloc_size = MAX(s->tracks, s->prealloc_size);
1294     s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
1295 
1296     s->bat_size = le32_to_cpu(ph.bat_entries);
1297     if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
1298         error_setg(errp, "Catalog too large");
1299         return -EFBIG;
1300     }
1301 
1302     size = bat_entry_off(s->bat_size);
1303     s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
1304     s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
1305     if (s->header == NULL) {
1306         return -ENOMEM;
1307     }
1308 
1309     ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
1310     if (ret < 0) {
1311         goto fail;
1312     }
1313     s->bat_bitmap = (uint32_t *)(s->header + 1);
1314 
1315     if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
1316         need_check = s->header_unclean = true;
1317     }
1318 
1319     {
1320         bool ok = parallels_test_data_off(s, file_nb_sectors, &data_start);
1321         need_check = need_check || !ok;
1322     }
1323 
1324     s->data_start = data_start;
1325     s->data_end = s->data_start;
1326     if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
1327         /*
1328          * There is not enough unused space to fit to block align between BAT
1329          * and actual data. We can't avoid read-modify-write...
1330          */
1331         s->header_size = size;
1332     }
1333 
1334     if (ph.ext_off) {
1335         if (flags & BDRV_O_RDWR) {
1336             /*
1337              * It's unsafe to open image RW if there is an extension (as we
1338              * don't support it). But parallels driver in QEMU historically
1339              * ignores the extension, so print warning and don't care.
1340              */
1341             warn_report("Format Extension ignored in RW mode");
1342         } else {
1343             ret = parallels_read_format_extension(
1344                     bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp);
1345             if (ret < 0) {
1346                 goto fail;
1347             }
1348         }
1349     }
1350 
1351     if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
1352         s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
1353         ret = parallels_update_header(bs);
1354         if (ret < 0) {
1355             goto fail;
1356         }
1357     }
1358 
1359     s->bat_dirty_block = 4 * qemu_real_host_page_size();
1360     s->bat_dirty_bmap =
1361         bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
1362 
1363     /* Disable migration until bdrv_activate method is added */
1364     error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
1365                "does not support live migration",
1366                bdrv_get_device_or_node_name(bs));
1367 
1368     ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
1369     if (ret < 0) {
1370         goto fail;
1371     }
1372     qemu_co_mutex_init(&s->lock);
1373 
1374     for (i = 0; i < s->bat_size; i++) {
1375         sector = bat2sect(s, i);
1376         if (sector + s->tracks > s->data_end) {
1377             s->data_end = sector + s->tracks;
1378         }
1379     }
1380     need_check = need_check || s->data_end > file_nb_sectors;
1381 
1382     if (!need_check) {
1383         ret = parallels_fill_used_bitmap(bs);
1384         if (ret == -ENOMEM) {
1385             goto fail;
1386         }
1387         need_check = need_check || ret < 0; /* These are correctable errors */
1388     }
1389 
1390     /*
1391      * We don't repair the image here if it's opened for checks. Also we don't
1392      * want to change inactive images and can't change readonly images.
1393      */
1394     if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) {
1395         return 0;
1396     }
1397 
1398     /* Repair the image if corruption was detected. */
1399     if (need_check) {
1400         BdrvCheckResult res;
1401         ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1402         if (ret < 0) {
1403             error_setg_errno(errp, -ret, "Could not repair corrupted image");
1404             migrate_del_blocker(&s->migration_blocker);
1405             goto fail;
1406         }
1407     }
1408     return 0;
1409 
1410 fail_format:
1411     error_setg(errp, "Image not in Parallels format");
1412     return -EINVAL;
1413 
1414 fail:
1415     /*
1416      * "s" object was allocated by g_malloc0 so we can safely
1417      * try to free its fields even they were not allocated.
1418      */
1419     parallels_free_used_bitmap(bs);
1420 
1421     g_free(s->bat_dirty_bmap);
1422     qemu_vfree(s->header);
1423     return ret;
1424 }
1425 
1426 
parallels_close(BlockDriverState * bs)1427 static void parallels_close(BlockDriverState *bs)
1428 {
1429     BDRVParallelsState *s = bs->opaque;
1430 
1431     GRAPH_RDLOCK_GUARD_MAINLOOP();
1432 
1433     if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) {
1434         s->header->inuse = 0;
1435         parallels_update_header(bs);
1436 
1437         /* errors are ignored, so we might as well pass exact=true */
1438         bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
1439                       PREALLOC_MODE_OFF, 0, NULL);
1440     }
1441 
1442     parallels_free_used_bitmap(bs);
1443 
1444     g_free(s->bat_dirty_bmap);
1445     qemu_vfree(s->header);
1446 
1447     migrate_del_blocker(&s->migration_blocker);
1448 }
1449 
parallels_is_support_dirty_bitmaps(BlockDriverState * bs)1450 static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs)
1451 {
1452     return 1;
1453 }
1454 
1455 static BlockDriver bdrv_parallels = {
1456     .format_name                = "parallels",
1457     .instance_size              = sizeof(BDRVParallelsState),
1458     .create_opts                = &parallels_create_opts,
1459     .is_format                  = true,
1460     .supports_backing           = true,
1461 
1462     .bdrv_has_zero_init         = bdrv_has_zero_init_1,
1463     .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps,
1464 
1465     .bdrv_probe                 = parallels_probe,
1466     .bdrv_open                  = parallels_open,
1467     .bdrv_close                 = parallels_close,
1468     .bdrv_child_perm            = bdrv_default_perms,
1469     .bdrv_co_block_status       = parallels_co_block_status,
1470     .bdrv_co_flush_to_os        = parallels_co_flush_to_os,
1471     .bdrv_co_readv              = parallels_co_readv,
1472     .bdrv_co_writev             = parallels_co_writev,
1473     .bdrv_co_create             = parallels_co_create,
1474     .bdrv_co_create_opts        = parallels_co_create_opts,
1475     .bdrv_co_check              = parallels_co_check,
1476     .bdrv_co_pdiscard           = parallels_co_pdiscard,
1477     .bdrv_co_pwrite_zeroes      = parallels_co_pwrite_zeroes,
1478 };
1479 
bdrv_parallels_init(void)1480 static void bdrv_parallels_init(void)
1481 {
1482     bdrv_register(&bdrv_parallels);
1483 }
1484 
1485 block_init(bdrv_parallels_init);
1486