xref: /openbmc/qemu/block/parallels.c (revision d1fd31f8)
1 /*
2  * Block driver for Parallels disk image format
3  *
4  * Copyright (c) 2007 Alex Beregszaszi
5  * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
6  *
7  * This code was originally based on comparing different disk images created
8  * by Parallels. Currently it is based on opened OpenVZ sources
9  * available at
10  *     http://git.openvz.org/?p=ploop;a=summary
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this software and associated documentation files (the "Software"), to deal
14  * in the Software without restriction, including without limitation the rights
15  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16  * copies of the Software, and to permit persons to whom the Software is
17  * furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28  * THE SOFTWARE.
29  */
30 
31 #include "qemu/osdep.h"
32 #include "qapi/error.h"
33 #include "block/block_int.h"
34 #include "sysemu/block-backend.h"
35 #include "qemu/module.h"
36 #include "qemu/option.h"
37 #include "qapi/qmp/qdict.h"
38 #include "qapi/qobject-input-visitor.h"
39 #include "qapi/qapi-visit-block-core.h"
40 #include "qemu/bswap.h"
41 #include "qemu/bitmap.h"
42 #include "migration/blocker.h"
43 #include "parallels.h"
44 
45 /**************************************************************/
46 
47 #define HEADER_MAGIC "WithoutFreeSpace"
48 #define HEADER_MAGIC2 "WithouFreSpacExt"
49 #define HEADER_VERSION 2
50 #define HEADER_INUSE_MAGIC  (0x746F6E59)
51 #define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
52 
53 static QEnumLookup prealloc_mode_lookup = {
54     .array = (const char *const[]) {
55         "falloc",
56         "truncate",
57     },
58     .size = PRL_PREALLOC_MODE__MAX
59 };
60 
61 #define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
62 #define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
63 
64 static QemuOptsList parallels_runtime_opts = {
65     .name = "parallels",
66     .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
67     .desc = {
68         {
69             .name = PARALLELS_OPT_PREALLOC_SIZE,
70             .type = QEMU_OPT_SIZE,
71             .help = "Preallocation size on image expansion",
72             .def_value_str = "128M",
73         },
74         {
75             .name = PARALLELS_OPT_PREALLOC_MODE,
76             .type = QEMU_OPT_STRING,
77             .help = "Preallocation mode on image expansion "
78                     "(allowed values: falloc, truncate)",
79             .def_value_str = "falloc",
80         },
81         { /* end of list */ },
82     },
83 };
84 
85 static QemuOptsList parallels_create_opts = {
86     .name = "parallels-create-opts",
87     .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
88     .desc = {
89         {
90             .name = BLOCK_OPT_SIZE,
91             .type = QEMU_OPT_SIZE,
92             .help = "Virtual disk size",
93         },
94         {
95             .name = BLOCK_OPT_CLUSTER_SIZE,
96             .type = QEMU_OPT_SIZE,
97             .help = "Parallels image cluster size",
98             .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
99         },
100         { /* end of list */ }
101     }
102 };
103 
104 
105 static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
106 {
107     return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
108 }
109 
110 static uint32_t bat_entry_off(uint32_t idx)
111 {
112     return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
113 }
114 
115 static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
116 {
117     uint32_t index, offset;
118 
119     index = sector_num / s->tracks;
120     offset = sector_num % s->tracks;
121 
122     /* not allocated */
123     if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
124         return -1;
125     }
126     return bat2sect(s, index) + offset;
127 }
128 
129 static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
130         int nb_sectors)
131 {
132     int ret = s->tracks - sector_num % s->tracks;
133     return MIN(nb_sectors, ret);
134 }
135 
136 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
137                             int nb_sectors, int *pnum)
138 {
139     int64_t start_off = -2, prev_end_off = -2;
140 
141     *pnum = 0;
142     while (nb_sectors > 0 || start_off == -2) {
143         int64_t offset = seek_to_sector(s, sector_num);
144         int to_end;
145 
146         if (start_off == -2) {
147             start_off = offset;
148             prev_end_off = offset;
149         } else if (offset != prev_end_off) {
150             break;
151         }
152 
153         to_end = cluster_remainder(s, sector_num, nb_sectors);
154         nb_sectors -= to_end;
155         sector_num += to_end;
156         *pnum += to_end;
157 
158         if (offset > 0) {
159             prev_end_off += to_end;
160         }
161     }
162     return start_off;
163 }
164 
165 static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
166                                  int nb_sectors, int *pnum)
167 {
168     int ret;
169     BDRVParallelsState *s = bs->opaque;
170     int64_t pos, space, idx, to_allocate, i, len;
171 
172     pos = block_status(s, sector_num, nb_sectors, pnum);
173     if (pos > 0) {
174         return pos;
175     }
176 
177     idx = sector_num / s->tracks;
178     to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
179 
180     /* This function is called only by parallels_co_writev(), which will never
181      * pass a sector_num at or beyond the end of the image (because the block
182      * layer never passes such a sector_num to that function). Therefore, idx
183      * is always below s->bat_size.
184      * block_status() will limit *pnum so that sector_num + *pnum will not
185      * exceed the image end. Therefore, idx + to_allocate cannot exceed
186      * s->bat_size.
187      * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
188      * will always fit into a uint32_t. */
189     assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
190 
191     space = to_allocate * s->tracks;
192     len = bdrv_getlength(bs->file->bs);
193     if (len < 0) {
194         return len;
195     }
196     if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
197         space += s->prealloc_size;
198         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
199             ret = bdrv_pwrite_zeroes(bs->file,
200                                      s->data_end << BDRV_SECTOR_BITS,
201                                      space << BDRV_SECTOR_BITS, 0);
202         } else {
203             ret = bdrv_truncate(bs->file,
204                                 (s->data_end + space) << BDRV_SECTOR_BITS,
205                                 PREALLOC_MODE_OFF, NULL);
206         }
207         if (ret < 0) {
208             return ret;
209         }
210     }
211 
212     /* Try to read from backing to fill empty clusters
213      * FIXME: 1. previous write_zeroes may be redundant
214      *        2. most of data we read from backing will be rewritten by
215      *           parallels_co_writev. On aligned-to-cluster write we do not need
216      *           this read at all.
217      *        3. it would be good to combine write of data from backing and new
218      *           data into one write call */
219     if (bs->backing) {
220         int64_t nb_cow_sectors = to_allocate * s->tracks;
221         int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
222         QEMUIOVector qiov;
223         struct iovec iov = {
224             .iov_len = nb_cow_bytes,
225             .iov_base = qemu_blockalign(bs, nb_cow_bytes)
226         };
227         qemu_iovec_init_external(&qiov, &iov, 1);
228 
229         ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors,
230                             &qiov);
231         if (ret < 0) {
232             qemu_vfree(iov.iov_base);
233             return ret;
234         }
235 
236         ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov);
237         qemu_vfree(iov.iov_base);
238         if (ret < 0) {
239             return ret;
240         }
241     }
242 
243     for (i = 0; i < to_allocate; i++) {
244         s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
245         s->data_end += s->tracks;
246         bitmap_set(s->bat_dirty_bmap,
247                    bat_entry_off(idx + i) / s->bat_dirty_block, 1);
248     }
249 
250     return bat2sect(s, idx) + sector_num % s->tracks;
251 }
252 
253 
254 static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
255 {
256     BDRVParallelsState *s = bs->opaque;
257     unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
258     unsigned long bit;
259 
260     qemu_co_mutex_lock(&s->lock);
261 
262     bit = find_first_bit(s->bat_dirty_bmap, size);
263     while (bit < size) {
264         uint32_t off = bit * s->bat_dirty_block;
265         uint32_t to_write = s->bat_dirty_block;
266         int ret;
267 
268         if (off + to_write > s->header_size) {
269             to_write = s->header_size - off;
270         }
271         ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
272                           to_write);
273         if (ret < 0) {
274             qemu_co_mutex_unlock(&s->lock);
275             return ret;
276         }
277         bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
278     }
279     bitmap_zero(s->bat_dirty_bmap, size);
280 
281     qemu_co_mutex_unlock(&s->lock);
282     return 0;
283 }
284 
285 
286 static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
287                                                   bool want_zero,
288                                                   int64_t offset,
289                                                   int64_t bytes,
290                                                   int64_t *pnum,
291                                                   int64_t *map,
292                                                   BlockDriverState **file)
293 {
294     BDRVParallelsState *s = bs->opaque;
295     int count;
296 
297     assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
298     qemu_co_mutex_lock(&s->lock);
299     offset = block_status(s, offset >> BDRV_SECTOR_BITS,
300                           bytes >> BDRV_SECTOR_BITS, &count);
301     qemu_co_mutex_unlock(&s->lock);
302 
303     *pnum = count * BDRV_SECTOR_SIZE;
304     if (offset < 0) {
305         return 0;
306     }
307 
308     *map = offset * BDRV_SECTOR_SIZE;
309     *file = bs->file->bs;
310     return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
311 }
312 
313 static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
314         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
315 {
316     BDRVParallelsState *s = bs->opaque;
317     uint64_t bytes_done = 0;
318     QEMUIOVector hd_qiov;
319     int ret = 0;
320 
321     qemu_iovec_init(&hd_qiov, qiov->niov);
322 
323     while (nb_sectors > 0) {
324         int64_t position;
325         int n, nbytes;
326 
327         qemu_co_mutex_lock(&s->lock);
328         position = allocate_clusters(bs, sector_num, nb_sectors, &n);
329         qemu_co_mutex_unlock(&s->lock);
330         if (position < 0) {
331             ret = (int)position;
332             break;
333         }
334 
335         nbytes = n << BDRV_SECTOR_BITS;
336 
337         qemu_iovec_reset(&hd_qiov);
338         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
339 
340         ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
341         if (ret < 0) {
342             break;
343         }
344 
345         nb_sectors -= n;
346         sector_num += n;
347         bytes_done += nbytes;
348     }
349 
350     qemu_iovec_destroy(&hd_qiov);
351     return ret;
352 }
353 
354 static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
355         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
356 {
357     BDRVParallelsState *s = bs->opaque;
358     uint64_t bytes_done = 0;
359     QEMUIOVector hd_qiov;
360     int ret = 0;
361 
362     qemu_iovec_init(&hd_qiov, qiov->niov);
363 
364     while (nb_sectors > 0) {
365         int64_t position;
366         int n, nbytes;
367 
368         qemu_co_mutex_lock(&s->lock);
369         position = block_status(s, sector_num, nb_sectors, &n);
370         qemu_co_mutex_unlock(&s->lock);
371 
372         nbytes = n << BDRV_SECTOR_BITS;
373 
374         qemu_iovec_reset(&hd_qiov);
375         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
376 
377         if (position < 0) {
378             if (bs->backing) {
379                 ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
380                 if (ret < 0) {
381                     break;
382                 }
383             } else {
384                 qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
385             }
386         } else {
387             ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
388             if (ret < 0) {
389                 break;
390             }
391         }
392 
393         nb_sectors -= n;
394         sector_num += n;
395         bytes_done += nbytes;
396     }
397 
398     qemu_iovec_destroy(&hd_qiov);
399     return ret;
400 }
401 
402 
403 static int coroutine_fn parallels_co_check(BlockDriverState *bs,
404                                            BdrvCheckResult *res,
405                                            BdrvCheckMode fix)
406 {
407     BDRVParallelsState *s = bs->opaque;
408     int64_t size, prev_off, high_off;
409     int ret;
410     uint32_t i;
411     bool flush_bat = false;
412     int cluster_size = s->tracks << BDRV_SECTOR_BITS;
413 
414     size = bdrv_getlength(bs->file->bs);
415     if (size < 0) {
416         res->check_errors++;
417         return size;
418     }
419 
420     qemu_co_mutex_lock(&s->lock);
421     if (s->header_unclean) {
422         fprintf(stderr, "%s image was not closed correctly\n",
423                 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
424         res->corruptions++;
425         if (fix & BDRV_FIX_ERRORS) {
426             /* parallels_close will do the job right */
427             res->corruptions_fixed++;
428             s->header_unclean = false;
429         }
430     }
431 
432     res->bfi.total_clusters = s->bat_size;
433     res->bfi.compressed_clusters = 0; /* compression is not supported */
434 
435     high_off = 0;
436     prev_off = 0;
437     for (i = 0; i < s->bat_size; i++) {
438         int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
439         if (off == 0) {
440             prev_off = 0;
441             continue;
442         }
443 
444         /* cluster outside the image */
445         if (off > size) {
446             fprintf(stderr, "%s cluster %u is outside image\n",
447                     fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
448             res->corruptions++;
449             if (fix & BDRV_FIX_ERRORS) {
450                 prev_off = 0;
451                 s->bat_bitmap[i] = 0;
452                 res->corruptions_fixed++;
453                 flush_bat = true;
454                 continue;
455             }
456         }
457 
458         res->bfi.allocated_clusters++;
459         if (off > high_off) {
460             high_off = off;
461         }
462 
463         if (prev_off != 0 && (prev_off + cluster_size) != off) {
464             res->bfi.fragmented_clusters++;
465         }
466         prev_off = off;
467     }
468 
469     ret = 0;
470     if (flush_bat) {
471         ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
472         if (ret < 0) {
473             res->check_errors++;
474             goto out;
475         }
476     }
477 
478     res->image_end_offset = high_off + cluster_size;
479     if (size > res->image_end_offset) {
480         int64_t count;
481         count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
482         fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
483                 fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
484                 size - res->image_end_offset);
485         res->leaks += count;
486         if (fix & BDRV_FIX_LEAKS) {
487             Error *local_err = NULL;
488             ret = bdrv_truncate(bs->file, res->image_end_offset,
489                                 PREALLOC_MODE_OFF, &local_err);
490             if (ret < 0) {
491                 error_report_err(local_err);
492                 res->check_errors++;
493                 goto out;
494             }
495             res->leaks_fixed += count;
496         }
497     }
498 
499 out:
500     qemu_co_mutex_unlock(&s->lock);
501     return ret;
502 }
503 
504 
505 static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
506                                             Error **errp)
507 {
508     BlockdevCreateOptionsParallels *parallels_opts;
509     BlockDriverState *bs;
510     BlockBackend *blk;
511     int64_t total_size, cl_size;
512     uint32_t bat_entries, bat_sectors;
513     ParallelsHeader header;
514     uint8_t tmp[BDRV_SECTOR_SIZE];
515     int ret;
516 
517     assert(opts->driver == BLOCKDEV_DRIVER_PARALLELS);
518     parallels_opts = &opts->u.parallels;
519 
520     /* Sanity checks */
521     total_size = parallels_opts->size;
522 
523     if (parallels_opts->has_cluster_size) {
524         cl_size = parallels_opts->cluster_size;
525     } else {
526         cl_size = DEFAULT_CLUSTER_SIZE;
527     }
528 
529     if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
530         error_setg(errp, "Image size is too large for this cluster size");
531         return -E2BIG;
532     }
533 
534     if (!QEMU_IS_ALIGNED(total_size, BDRV_SECTOR_SIZE)) {
535         error_setg(errp, "Image size must be a multiple of 512 bytes");
536         return -EINVAL;
537     }
538 
539     if (!QEMU_IS_ALIGNED(cl_size, BDRV_SECTOR_SIZE)) {
540         error_setg(errp, "Cluster size must be a multiple of 512 bytes");
541         return -EINVAL;
542     }
543 
544     /* Create BlockBackend to write to the image */
545     bs = bdrv_open_blockdev_ref(parallels_opts->file, errp);
546     if (bs == NULL) {
547         return -EIO;
548     }
549 
550     blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
551     ret = blk_insert_bs(blk, bs, errp);
552     if (ret < 0) {
553         goto out;
554     }
555     blk_set_allow_write_beyond_eof(blk, true);
556 
557     /* Create image format */
558     ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp);
559     if (ret < 0) {
560         goto out;
561     }
562 
563     bat_entries = DIV_ROUND_UP(total_size, cl_size);
564     bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
565     bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
566 
567     memset(&header, 0, sizeof(header));
568     memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
569     header.version = cpu_to_le32(HEADER_VERSION);
570     /* don't care much about geometry, it is not used on image level */
571     header.heads = cpu_to_le32(HEADS_NUMBER);
572     header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE
573                                    / HEADS_NUMBER / SEC_IN_CYL);
574     header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
575     header.bat_entries = cpu_to_le32(bat_entries);
576     header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
577     header.data_off = cpu_to_le32(bat_sectors);
578 
579     /* write all the data */
580     memset(tmp, 0, sizeof(tmp));
581     memcpy(tmp, &header, sizeof(header));
582 
583     ret = blk_pwrite(blk, 0, tmp, BDRV_SECTOR_SIZE, 0);
584     if (ret < 0) {
585         goto exit;
586     }
587     ret = blk_pwrite_zeroes(blk, BDRV_SECTOR_SIZE,
588                             (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
589     if (ret < 0) {
590         goto exit;
591     }
592 
593     ret = 0;
594 out:
595     blk_unref(blk);
596     bdrv_unref(bs);
597     return ret;
598 
599 exit:
600     error_setg_errno(errp, -ret, "Failed to create Parallels image");
601     goto out;
602 }
603 
604 static int coroutine_fn parallels_co_create_opts(const char *filename,
605                                                  QemuOpts *opts,
606                                                  Error **errp)
607 {
608     BlockdevCreateOptions *create_options = NULL;
609     Error *local_err = NULL;
610     BlockDriverState *bs = NULL;
611     QDict *qdict = NULL;
612     QObject *qobj;
613     Visitor *v;
614     int ret;
615 
616     static const QDictRenames opt_renames[] = {
617         { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
618         { NULL, NULL },
619     };
620 
621     /* Parse options and convert legacy syntax */
622     qdict = qemu_opts_to_qdict_filtered(opts, NULL, &parallels_create_opts,
623                                         true);
624 
625     if (!qdict_rename_keys(qdict, opt_renames, errp)) {
626         ret = -EINVAL;
627         goto done;
628     }
629 
630     /* Create and open the file (protocol layer) */
631     ret = bdrv_create_file(filename, opts, &local_err);
632     if (ret < 0) {
633         error_propagate(errp, local_err);
634         goto done;
635     }
636 
637     bs = bdrv_open(filename, NULL, NULL,
638                    BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
639     if (bs == NULL) {
640         ret = -EIO;
641         goto done;
642     }
643 
644     /* Now get the QAPI type BlockdevCreateOptions */
645     qdict_put_str(qdict, "driver", "parallels");
646     qdict_put_str(qdict, "file", bs->node_name);
647 
648     qobj = qdict_crumple(qdict, errp);
649     QDECREF(qdict);
650     qdict = qobject_to(QDict, qobj);
651     if (qdict == NULL) {
652         ret = -EINVAL;
653         goto done;
654     }
655 
656     v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
657     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
658     visit_free(v);
659 
660     if (local_err) {
661         error_propagate(errp, local_err);
662         ret = -EINVAL;
663         goto done;
664     }
665 
666     /* Silently round up sizes */
667     create_options->u.parallels.size =
668         ROUND_UP(create_options->u.parallels.size, BDRV_SECTOR_SIZE);
669     create_options->u.parallels.cluster_size =
670         ROUND_UP(create_options->u.parallels.cluster_size, BDRV_SECTOR_SIZE);
671 
672     /* Create the Parallels image (format layer) */
673     ret = parallels_co_create(create_options, errp);
674     if (ret < 0) {
675         goto done;
676     }
677     ret = 0;
678 
679 done:
680     QDECREF(qdict);
681     bdrv_unref(bs);
682     qapi_free_BlockdevCreateOptions(create_options);
683     return ret;
684 }
685 
686 
687 static int parallels_probe(const uint8_t *buf, int buf_size,
688                            const char *filename)
689 {
690     const ParallelsHeader *ph = (const void *)buf;
691 
692     if (buf_size < sizeof(ParallelsHeader)) {
693         return 0;
694     }
695 
696     if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
697            !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
698            (le32_to_cpu(ph->version) == HEADER_VERSION)) {
699         return 100;
700     }
701 
702     return 0;
703 }
704 
705 static int parallels_update_header(BlockDriverState *bs)
706 {
707     BDRVParallelsState *s = bs->opaque;
708     unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
709                         sizeof(ParallelsHeader));
710 
711     if (size > s->header_size) {
712         size = s->header_size;
713     }
714     return bdrv_pwrite_sync(bs->file, 0, s->header, size);
715 }
716 
717 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
718                           Error **errp)
719 {
720     BDRVParallelsState *s = bs->opaque;
721     ParallelsHeader ph;
722     int ret, size, i;
723     QemuOpts *opts = NULL;
724     Error *local_err = NULL;
725     char *buf;
726 
727     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
728                                false, errp);
729     if (!bs->file) {
730         return -EINVAL;
731     }
732 
733     ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
734     if (ret < 0) {
735         goto fail;
736     }
737 
738     bs->total_sectors = le64_to_cpu(ph.nb_sectors);
739 
740     if (le32_to_cpu(ph.version) != HEADER_VERSION) {
741         goto fail_format;
742     }
743     if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
744         s->off_multiplier = 1;
745         bs->total_sectors = 0xffffffff & bs->total_sectors;
746     } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
747         s->off_multiplier = le32_to_cpu(ph.tracks);
748     } else {
749         goto fail_format;
750     }
751 
752     s->tracks = le32_to_cpu(ph.tracks);
753     if (s->tracks == 0) {
754         error_setg(errp, "Invalid image: Zero sectors per track");
755         ret = -EINVAL;
756         goto fail;
757     }
758     if (s->tracks > INT32_MAX/513) {
759         error_setg(errp, "Invalid image: Too big cluster");
760         ret = -EFBIG;
761         goto fail;
762     }
763 
764     s->bat_size = le32_to_cpu(ph.bat_entries);
765     if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
766         error_setg(errp, "Catalog too large");
767         ret = -EFBIG;
768         goto fail;
769     }
770 
771     size = bat_entry_off(s->bat_size);
772     s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
773     s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
774     if (s->header == NULL) {
775         ret = -ENOMEM;
776         goto fail;
777     }
778     s->data_end = le32_to_cpu(ph.data_off);
779     if (s->data_end == 0) {
780         s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
781     }
782     if (s->data_end < s->header_size) {
783         /* there is not enough unused space to fit to block align between BAT
784            and actual data. We can't avoid read-modify-write... */
785         s->header_size = size;
786     }
787 
788     ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
789     if (ret < 0) {
790         goto fail;
791     }
792     s->bat_bitmap = (uint32_t *)(s->header + 1);
793 
794     for (i = 0; i < s->bat_size; i++) {
795         int64_t off = bat2sect(s, i);
796         if (off >= s->data_end) {
797             s->data_end = off + s->tracks;
798         }
799     }
800 
801     if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
802         /* Image was not closed correctly. The check is mandatory */
803         s->header_unclean = true;
804         if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
805             error_setg(errp, "parallels: Image was not closed correctly; "
806                        "cannot be opened read/write");
807             ret = -EACCES;
808             goto fail;
809         }
810     }
811 
812     opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, &local_err);
813     if (local_err != NULL) {
814         goto fail_options;
815     }
816 
817     qemu_opts_absorb_qdict(opts, options, &local_err);
818     if (local_err != NULL) {
819         goto fail_options;
820     }
821 
822     s->prealloc_size =
823         qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
824     s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
825     buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
826     s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
827                                        PRL_PREALLOC_MODE_FALLOCATE,
828                                        &local_err);
829     g_free(buf);
830     if (local_err != NULL) {
831         goto fail_options;
832     }
833 
834     if (!bdrv_has_zero_init(bs->file->bs)) {
835         s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
836     }
837 
838     if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
839         s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
840         ret = parallels_update_header(bs);
841         if (ret < 0) {
842             goto fail;
843         }
844     }
845 
846     s->bat_dirty_block = 4 * getpagesize();
847     s->bat_dirty_bmap =
848         bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
849 
850     /* Disable migration until bdrv_invalidate_cache method is added */
851     error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
852                "does not support live migration",
853                bdrv_get_device_or_node_name(bs));
854     ret = migrate_add_blocker(s->migration_blocker, &local_err);
855     if (local_err) {
856         error_propagate(errp, local_err);
857         error_free(s->migration_blocker);
858         goto fail;
859     }
860     qemu_co_mutex_init(&s->lock);
861     return 0;
862 
863 fail_format:
864     error_setg(errp, "Image not in Parallels format");
865     ret = -EINVAL;
866 fail:
867     qemu_vfree(s->header);
868     return ret;
869 
870 fail_options:
871     error_propagate(errp, local_err);
872     ret = -EINVAL;
873     goto fail;
874 }
875 
876 
877 static void parallels_close(BlockDriverState *bs)
878 {
879     BDRVParallelsState *s = bs->opaque;
880 
881     if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) {
882         s->header->inuse = 0;
883         parallels_update_header(bs);
884         bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
885                       PREALLOC_MODE_OFF, NULL);
886     }
887 
888     g_free(s->bat_dirty_bmap);
889     qemu_vfree(s->header);
890 
891     migrate_del_blocker(s->migration_blocker);
892     error_free(s->migration_blocker);
893 }
894 
895 static BlockDriver bdrv_parallels = {
896     .format_name	= "parallels",
897     .instance_size	= sizeof(BDRVParallelsState),
898     .bdrv_probe		= parallels_probe,
899     .bdrv_open		= parallels_open,
900     .bdrv_close		= parallels_close,
901     .bdrv_child_perm          = bdrv_format_default_perms,
902     .bdrv_co_block_status     = parallels_co_block_status,
903     .bdrv_has_zero_init       = bdrv_has_zero_init_1,
904     .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
905     .bdrv_co_readv  = parallels_co_readv,
906     .bdrv_co_writev = parallels_co_writev,
907     .supports_backing = true,
908     .bdrv_co_create      = parallels_co_create,
909     .bdrv_co_create_opts = parallels_co_create_opts,
910     .bdrv_co_check  = parallels_co_check,
911     .create_opts    = &parallels_create_opts,
912 };
913 
914 static void bdrv_parallels_init(void)
915 {
916     bdrv_register(&bdrv_parallels);
917 }
918 
919 block_init(bdrv_parallels_init);
920