xref: /openbmc/qemu/block/raw-format.c (revision 6193344f)
1 /* BlockDriver implementation for "raw" format driver
2  *
3  * Copyright (C) 2010-2016 Red Hat, Inc.
4  * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
5  * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
6  *
7  * Author:
8  *   Laszlo Ersek <lersek@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to
12  * deal in the Software without restriction, including without limitation the
13  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14  * sell copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "block/block_int.h"
31 #include "qapi/error.h"
32 #include "qemu/module.h"
33 #include "qemu/option.h"
34 
35 typedef struct BDRVRawState {
36     uint64_t offset;
37     uint64_t size;
38     bool has_size;
39 } BDRVRawState;
40 
41 static const char *const mutable_opts[] = { "offset", "size", NULL };
42 
43 static QemuOptsList raw_runtime_opts = {
44     .name = "raw",
45     .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
46     .desc = {
47         {
48             .name = "offset",
49             .type = QEMU_OPT_SIZE,
50             .help = "offset in the disk where the image starts",
51         },
52         {
53             .name = "size",
54             .type = QEMU_OPT_SIZE,
55             .help = "virtual disk size",
56         },
57         { /* end of list */ }
58     },
59 };
60 
61 static QemuOptsList raw_create_opts = {
62     .name = "raw-create-opts",
63     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
64     .desc = {
65         {
66             .name = BLOCK_OPT_SIZE,
67             .type = QEMU_OPT_SIZE,
68             .help = "Virtual disk size"
69         },
70         { /* end of list */ }
71     }
72 };
73 
74 static int raw_read_options(QDict *options, uint64_t *offset, bool *has_size,
75                             uint64_t *size, Error **errp)
76 {
77     QemuOpts *opts = NULL;
78     int ret;
79 
80     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
81     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
82         ret = -EINVAL;
83         goto end;
84     }
85 
86     *offset = qemu_opt_get_size(opts, "offset", 0);
87     *has_size = qemu_opt_find(opts, "size");
88     *size = qemu_opt_get_size(opts, "size", 0);
89 
90     ret = 0;
91 end:
92     qemu_opts_del(opts);
93     return ret;
94 }
95 
96 static int raw_apply_options(BlockDriverState *bs, BDRVRawState *s,
97                              uint64_t offset, bool has_size, uint64_t size,
98                              Error **errp)
99 {
100     int64_t real_size = 0;
101 
102     real_size = bdrv_getlength(bs->file->bs);
103     if (real_size < 0) {
104         error_setg_errno(errp, -real_size, "Could not get image size");
105         return real_size;
106     }
107 
108     /* Check size and offset */
109     if (offset > real_size) {
110         error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
111                    "size of the containing file (%" PRId64 ")",
112                    s->offset, real_size);
113         return -EINVAL;
114     }
115 
116     if (has_size && (real_size - offset) < size) {
117         error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
118                    "(%" PRIu64 ") has to be smaller or equal to the "
119                    " actual size of the containing file (%" PRId64 ")",
120                    s->offset, s->size, real_size);
121         return -EINVAL;
122     }
123 
124     /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
125      * up and leaking out of the specified area. */
126     if (has_size && !QEMU_IS_ALIGNED(size, BDRV_SECTOR_SIZE)) {
127         error_setg(errp, "Specified size is not multiple of %llu",
128                    BDRV_SECTOR_SIZE);
129         return -EINVAL;
130     }
131 
132     s->offset = offset;
133     s->has_size = has_size;
134     s->size = has_size ? size : real_size - offset;
135 
136     return 0;
137 }
138 
139 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
140                               BlockReopenQueue *queue, Error **errp)
141 {
142     bool has_size;
143     uint64_t offset, size;
144     int ret;
145 
146     assert(reopen_state != NULL);
147     assert(reopen_state->bs != NULL);
148 
149     reopen_state->opaque = g_new0(BDRVRawState, 1);
150 
151     ret = raw_read_options(reopen_state->options, &offset, &has_size, &size,
152                            errp);
153     if (ret < 0) {
154         return ret;
155     }
156 
157     ret = raw_apply_options(reopen_state->bs, reopen_state->opaque,
158                             offset, has_size, size, errp);
159     if (ret < 0) {
160         return ret;
161     }
162 
163     return 0;
164 }
165 
166 static void raw_reopen_commit(BDRVReopenState *state)
167 {
168     BDRVRawState *new_s = state->opaque;
169     BDRVRawState *s = state->bs->opaque;
170 
171     memcpy(s, new_s, sizeof(BDRVRawState));
172 
173     g_free(state->opaque);
174     state->opaque = NULL;
175 }
176 
177 static void raw_reopen_abort(BDRVReopenState *state)
178 {
179     g_free(state->opaque);
180     state->opaque = NULL;
181 }
182 
183 /* Check and adjust the offset, against 'offset' and 'size' options. */
184 static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
185                                     uint64_t bytes, bool is_write)
186 {
187     BDRVRawState *s = bs->opaque;
188 
189     if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
190         /* There's not enough space for the write, or the read request is
191          * out-of-range. Don't read/write anything to prevent leaking out of
192          * the size specified in options. */
193         return is_write ? -ENOSPC : -EINVAL;
194     }
195 
196     if (*offset > INT64_MAX - s->offset) {
197         return -EINVAL;
198     }
199     *offset += s->offset;
200 
201     return 0;
202 }
203 
204 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
205                                       uint64_t bytes, QEMUIOVector *qiov,
206                                       int flags)
207 {
208     int ret;
209 
210     ret = raw_adjust_offset(bs, &offset, bytes, false);
211     if (ret) {
212         return ret;
213     }
214 
215     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
216     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
217 }
218 
219 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
220                                        uint64_t bytes, QEMUIOVector *qiov,
221                                        int flags)
222 {
223     void *buf = NULL;
224     BlockDriver *drv;
225     QEMUIOVector local_qiov;
226     int ret;
227 
228     if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
229         /* Handling partial writes would be a pain - so we just
230          * require that guests have 512-byte request alignment if
231          * probing occurred */
232         QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
233         QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
234         assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
235 
236         buf = qemu_try_blockalign(bs->file->bs, 512);
237         if (!buf) {
238             ret = -ENOMEM;
239             goto fail;
240         }
241 
242         ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
243         if (ret != 512) {
244             ret = -EINVAL;
245             goto fail;
246         }
247 
248         drv = bdrv_probe_all(buf, 512, NULL);
249         if (drv != bs->drv) {
250             ret = -EPERM;
251             goto fail;
252         }
253 
254         /* Use the checked buffer, a malicious guest might be overwriting its
255          * original buffer in the background. */
256         qemu_iovec_init(&local_qiov, qiov->niov + 1);
257         qemu_iovec_add(&local_qiov, buf, 512);
258         qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
259         qiov = &local_qiov;
260     }
261 
262     ret = raw_adjust_offset(bs, &offset, bytes, true);
263     if (ret) {
264         goto fail;
265     }
266 
267     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
268     ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
269 
270 fail:
271     if (qiov == &local_qiov) {
272         qemu_iovec_destroy(&local_qiov);
273     }
274     qemu_vfree(buf);
275     return ret;
276 }
277 
278 static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
279                                             bool want_zero, int64_t offset,
280                                             int64_t bytes, int64_t *pnum,
281                                             int64_t *map,
282                                             BlockDriverState **file)
283 {
284     BDRVRawState *s = bs->opaque;
285     *pnum = bytes;
286     *file = bs->file->bs;
287     *map = offset + s->offset;
288     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
289 }
290 
291 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
292                                              int64_t offset, int bytes,
293                                              BdrvRequestFlags flags)
294 {
295     int ret;
296 
297     ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
298     if (ret) {
299         return ret;
300     }
301     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
302 }
303 
304 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
305                                         int64_t offset, int bytes)
306 {
307     int ret;
308 
309     ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
310     if (ret) {
311         return ret;
312     }
313     return bdrv_co_pdiscard(bs->file, offset, bytes);
314 }
315 
316 static int64_t raw_getlength(BlockDriverState *bs)
317 {
318     int64_t len;
319     BDRVRawState *s = bs->opaque;
320 
321     /* Update size. It should not change unless the file was externally
322      * modified. */
323     len = bdrv_getlength(bs->file->bs);
324     if (len < 0) {
325         return len;
326     }
327 
328     if (len < s->offset) {
329         s->size = 0;
330     } else {
331         if (s->has_size) {
332             /* Try to honour the size */
333             s->size = MIN(s->size, len - s->offset);
334         } else {
335             s->size = len - s->offset;
336         }
337     }
338 
339     return s->size;
340 }
341 
342 static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
343                                      Error **errp)
344 {
345     BlockMeasureInfo *info;
346     int64_t required;
347 
348     if (in_bs) {
349         required = bdrv_getlength(in_bs);
350         if (required < 0) {
351             error_setg_errno(errp, -required, "Unable to get image size");
352             return NULL;
353         }
354     } else {
355         required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
356                             BDRV_SECTOR_SIZE);
357     }
358 
359     info = g_new0(BlockMeasureInfo, 1);
360     info->required = required;
361 
362     /* Unallocated sectors count towards the file size in raw images */
363     info->fully_allocated = info->required;
364     return info;
365 }
366 
367 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
368 {
369     return bdrv_get_info(bs->file->bs, bdi);
370 }
371 
372 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
373 {
374     if (bs->probed) {
375         /* To make it easier to protect the first sector, any probed
376          * image is restricted to read-modify-write on sub-sector
377          * operations. */
378         bs->bl.request_alignment = BDRV_SECTOR_SIZE;
379     }
380 }
381 
382 static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
383                                         bool exact, PreallocMode prealloc,
384                                         BdrvRequestFlags flags, Error **errp)
385 {
386     BDRVRawState *s = bs->opaque;
387 
388     if (s->has_size) {
389         error_setg(errp, "Cannot resize fixed-size raw disks");
390         return -ENOTSUP;
391     }
392 
393     if (INT64_MAX - offset < s->offset) {
394         error_setg(errp, "Disk size too large for the chosen offset");
395         return -EINVAL;
396     }
397 
398     s->size = offset;
399     offset += s->offset;
400     return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
401 }
402 
403 static void raw_eject(BlockDriverState *bs, bool eject_flag)
404 {
405     bdrv_eject(bs->file->bs, eject_flag);
406 }
407 
408 static void raw_lock_medium(BlockDriverState *bs, bool locked)
409 {
410     bdrv_lock_medium(bs->file->bs, locked);
411 }
412 
413 static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
414 {
415     BDRVRawState *s = bs->opaque;
416     if (s->offset || s->has_size) {
417         return -ENOTSUP;
418     }
419     return bdrv_co_ioctl(bs->file->bs, req, buf);
420 }
421 
422 static int raw_has_zero_init(BlockDriverState *bs)
423 {
424     return bdrv_has_zero_init(bs->file->bs);
425 }
426 
427 static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
428                                            const char *filename,
429                                            QemuOpts *opts,
430                                            Error **errp)
431 {
432     return bdrv_create_file(filename, opts, errp);
433 }
434 
435 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
436                     Error **errp)
437 {
438     BDRVRawState *s = bs->opaque;
439     bool has_size;
440     uint64_t offset, size;
441     BdrvChildRole file_role;
442     int ret;
443 
444     ret = raw_read_options(options, &offset, &has_size, &size, errp);
445     if (ret < 0) {
446         return ret;
447     }
448 
449     /*
450      * Without offset and a size limit, this driver behaves very much
451      * like a filter.  With any such limit, it does not.
452      */
453     if (offset || has_size) {
454         file_role = BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY;
455     } else {
456         file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
457     }
458 
459     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
460                                file_role, false, errp);
461     if (!bs->file) {
462         return -EINVAL;
463     }
464 
465     bs->sg = bs->file->bs->sg;
466     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
467         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
468     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
469         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
470             bs->file->bs->supported_zero_flags);
471     bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags &
472                                    BDRV_REQ_ZERO_WRITE;
473 
474     if (bs->probed && !bdrv_is_read_only(bs)) {
475         bdrv_refresh_filename(bs->file->bs);
476         fprintf(stderr,
477                 "WARNING: Image format was not specified for '%s' and probing "
478                 "guessed raw.\n"
479                 "         Automatically detecting the format is dangerous for "
480                 "raw images, write operations on block 0 will be restricted.\n"
481                 "         Specify the 'raw' format explicitly to remove the "
482                 "restrictions.\n",
483                 bs->file->bs->filename);
484     }
485 
486     ret = raw_apply_options(bs, s, offset, has_size, size, errp);
487     if (ret < 0) {
488         return ret;
489     }
490 
491     if (bs->sg && (s->offset || s->has_size)) {
492         error_setg(errp, "Cannot use offset/size with SCSI generic devices");
493         return -EINVAL;
494     }
495 
496     return 0;
497 }
498 
499 static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
500 {
501     /* smallest possible positive score so that raw is used if and only if no
502      * other block driver works
503      */
504     return 1;
505 }
506 
507 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
508 {
509     BDRVRawState *s = bs->opaque;
510     int ret;
511 
512     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
513     if (ret < 0) {
514         return ret;
515     }
516 
517     if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
518         return -ENOTSUP;
519     }
520 
521     return 0;
522 }
523 
524 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
525 {
526     BDRVRawState *s = bs->opaque;
527     if (s->offset || s->has_size) {
528         return -ENOTSUP;
529     }
530     return bdrv_probe_geometry(bs->file->bs, geo);
531 }
532 
533 static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
534                                                BdrvChild *src,
535                                                uint64_t src_offset,
536                                                BdrvChild *dst,
537                                                uint64_t dst_offset,
538                                                uint64_t bytes,
539                                                BdrvRequestFlags read_flags,
540                                                BdrvRequestFlags write_flags)
541 {
542     int ret;
543 
544     ret = raw_adjust_offset(bs, &src_offset, bytes, false);
545     if (ret) {
546         return ret;
547     }
548     return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
549                                    bytes, read_flags, write_flags);
550 }
551 
552 static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
553                                              BdrvChild *src,
554                                              uint64_t src_offset,
555                                              BdrvChild *dst,
556                                              uint64_t dst_offset,
557                                              uint64_t bytes,
558                                              BdrvRequestFlags read_flags,
559                                              BdrvRequestFlags write_flags)
560 {
561     int ret;
562 
563     ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
564     if (ret) {
565         return ret;
566     }
567     return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
568                                  read_flags, write_flags);
569 }
570 
571 static const char *const raw_strong_runtime_opts[] = {
572     "offset",
573     "size",
574 
575     NULL
576 };
577 
578 static void raw_cancel_in_flight(BlockDriverState *bs)
579 {
580     bdrv_cancel_in_flight(bs->file->bs);
581 }
582 
583 static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
584                            BdrvChildRole role,
585                            BlockReopenQueue *reopen_queue,
586                            uint64_t parent_perm, uint64_t parent_shared,
587                            uint64_t *nperm, uint64_t *nshared)
588 {
589     bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
590                        parent_shared, nperm, nshared);
591 
592     /*
593      * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
594      * bdrv_default_perms_for_storage() for an explanation) but we only need
595      * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
596      * to avoid permission conflicts.
597      */
598     *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
599     *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
600 }
601 
602 BlockDriver bdrv_raw = {
603     .format_name          = "raw",
604     .instance_size        = sizeof(BDRVRawState),
605     .bdrv_probe           = &raw_probe,
606     .bdrv_reopen_prepare  = &raw_reopen_prepare,
607     .bdrv_reopen_commit   = &raw_reopen_commit,
608     .bdrv_reopen_abort    = &raw_reopen_abort,
609     .bdrv_open            = &raw_open,
610     .bdrv_child_perm      = raw_child_perm,
611     .bdrv_co_create_opts  = &raw_co_create_opts,
612     .bdrv_co_preadv       = &raw_co_preadv,
613     .bdrv_co_pwritev      = &raw_co_pwritev,
614     .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
615     .bdrv_co_pdiscard     = &raw_co_pdiscard,
616     .bdrv_co_block_status = &raw_co_block_status,
617     .bdrv_co_copy_range_from = &raw_co_copy_range_from,
618     .bdrv_co_copy_range_to  = &raw_co_copy_range_to,
619     .bdrv_co_truncate     = &raw_co_truncate,
620     .bdrv_getlength       = &raw_getlength,
621     .is_format            = true,
622     .has_variable_length  = true,
623     .bdrv_measure         = &raw_measure,
624     .bdrv_get_info        = &raw_get_info,
625     .bdrv_refresh_limits  = &raw_refresh_limits,
626     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
627     .bdrv_probe_geometry  = &raw_probe_geometry,
628     .bdrv_eject           = &raw_eject,
629     .bdrv_lock_medium     = &raw_lock_medium,
630     .bdrv_co_ioctl        = &raw_co_ioctl,
631     .create_opts          = &raw_create_opts,
632     .bdrv_has_zero_init   = &raw_has_zero_init,
633     .strong_runtime_opts  = raw_strong_runtime_opts,
634     .mutable_opts         = mutable_opts,
635     .bdrv_cancel_in_flight = raw_cancel_in_flight,
636 };
637 
638 static void bdrv_raw_init(void)
639 {
640     bdrv_register(&bdrv_raw);
641 }
642 
643 block_init(bdrv_raw_init);
644