xref: /openbmc/qemu/block/raw-format.c (revision 754cb9c0)
1 /* BlockDriver implementation for "raw" format driver
2  *
3  * Copyright (C) 2010-2016 Red Hat, Inc.
4  * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
5  * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
6  *
7  * Author:
8  *   Laszlo Ersek <lersek@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to
12  * deal in the Software without restriction, including without limitation the
13  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14  * sell copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "block/block_int.h"
31 #include "qapi/error.h"
32 #include "qemu/option.h"
33 
34 typedef struct BDRVRawState {
35     uint64_t offset;
36     uint64_t size;
37     bool has_size;
38 } BDRVRawState;
39 
40 static QemuOptsList raw_runtime_opts = {
41     .name = "raw",
42     .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
43     .desc = {
44         {
45             .name = "offset",
46             .type = QEMU_OPT_SIZE,
47             .help = "offset in the disk where the image starts",
48         },
49         {
50             .name = "size",
51             .type = QEMU_OPT_SIZE,
52             .help = "virtual disk size",
53         },
54         { /* end of list */ }
55     },
56 };
57 
58 static QemuOptsList raw_create_opts = {
59     .name = "raw-create-opts",
60     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
61     .desc = {
62         {
63             .name = BLOCK_OPT_SIZE,
64             .type = QEMU_OPT_SIZE,
65             .help = "Virtual disk size"
66         },
67         { /* end of list */ }
68     }
69 };
70 
71 static int raw_read_options(QDict *options, BlockDriverState *bs,
72     BDRVRawState *s, Error **errp)
73 {
74     Error *local_err = NULL;
75     QemuOpts *opts = NULL;
76     int64_t real_size = 0;
77     int ret;
78 
79     real_size = bdrv_getlength(bs->file->bs);
80     if (real_size < 0) {
81         error_setg_errno(errp, -real_size, "Could not get image size");
82         return real_size;
83     }
84 
85     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
86     qemu_opts_absorb_qdict(opts, options, &local_err);
87     if (local_err) {
88         error_propagate(errp, local_err);
89         ret = -EINVAL;
90         goto end;
91     }
92 
93     s->offset = qemu_opt_get_size(opts, "offset", 0);
94     if (s->offset > real_size) {
95         error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
96             "size of the containing file (%" PRId64 ")",
97             s->offset, real_size);
98         ret = -EINVAL;
99         goto end;
100     }
101 
102     if (qemu_opt_find(opts, "size") != NULL) {
103         s->size = qemu_opt_get_size(opts, "size", 0);
104         s->has_size = true;
105     } else {
106         s->has_size = false;
107         s->size = real_size - s->offset;
108     }
109 
110     /* Check size and offset */
111     if ((real_size - s->offset) < s->size) {
112         error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
113             "(%" PRIu64 ") has to be smaller or equal to the "
114             " actual size of the containing file (%" PRId64 ")",
115             s->offset, s->size, real_size);
116         ret = -EINVAL;
117         goto end;
118     }
119 
120     /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
121      * up and leaking out of the specified area. */
122     if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
123         error_setg(errp, "Specified size is not multiple of %llu",
124             BDRV_SECTOR_SIZE);
125         ret = -EINVAL;
126         goto end;
127     }
128 
129     ret = 0;
130 
131 end:
132 
133     qemu_opts_del(opts);
134 
135     return ret;
136 }
137 
138 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
139                               BlockReopenQueue *queue, Error **errp)
140 {
141     assert(reopen_state != NULL);
142     assert(reopen_state->bs != NULL);
143 
144     reopen_state->opaque = g_new0(BDRVRawState, 1);
145 
146     return raw_read_options(
147         reopen_state->options,
148         reopen_state->bs,
149         reopen_state->opaque,
150         errp);
151 }
152 
153 static void raw_reopen_commit(BDRVReopenState *state)
154 {
155     BDRVRawState *new_s = state->opaque;
156     BDRVRawState *s = state->bs->opaque;
157 
158     memcpy(s, new_s, sizeof(BDRVRawState));
159 
160     g_free(state->opaque);
161     state->opaque = NULL;
162 }
163 
164 static void raw_reopen_abort(BDRVReopenState *state)
165 {
166     g_free(state->opaque);
167     state->opaque = NULL;
168 }
169 
170 /* Check and adjust the offset, against 'offset' and 'size' options. */
171 static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
172                                     uint64_t bytes, bool is_write)
173 {
174     BDRVRawState *s = bs->opaque;
175 
176     if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
177         /* There's not enough space for the write, or the read request is
178          * out-of-range. Don't read/write anything to prevent leaking out of
179          * the size specified in options. */
180         return is_write ? -ENOSPC : -EINVAL;
181     }
182 
183     if (*offset > INT64_MAX - s->offset) {
184         return -EINVAL;
185     }
186     *offset += s->offset;
187 
188     return 0;
189 }
190 
191 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
192                                       uint64_t bytes, QEMUIOVector *qiov,
193                                       int flags)
194 {
195     int ret;
196 
197     ret = raw_adjust_offset(bs, &offset, bytes, false);
198     if (ret) {
199         return ret;
200     }
201 
202     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
203     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
204 }
205 
206 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
207                                        uint64_t bytes, QEMUIOVector *qiov,
208                                        int flags)
209 {
210     void *buf = NULL;
211     BlockDriver *drv;
212     QEMUIOVector local_qiov;
213     int ret;
214 
215     if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
216         /* Handling partial writes would be a pain - so we just
217          * require that guests have 512-byte request alignment if
218          * probing occurred */
219         QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
220         QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
221         assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
222 
223         buf = qemu_try_blockalign(bs->file->bs, 512);
224         if (!buf) {
225             ret = -ENOMEM;
226             goto fail;
227         }
228 
229         ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
230         if (ret != 512) {
231             ret = -EINVAL;
232             goto fail;
233         }
234 
235         drv = bdrv_probe_all(buf, 512, NULL);
236         if (drv != bs->drv) {
237             ret = -EPERM;
238             goto fail;
239         }
240 
241         /* Use the checked buffer, a malicious guest might be overwriting its
242          * original buffer in the background. */
243         qemu_iovec_init(&local_qiov, qiov->niov + 1);
244         qemu_iovec_add(&local_qiov, buf, 512);
245         qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
246         qiov = &local_qiov;
247     }
248 
249     ret = raw_adjust_offset(bs, &offset, bytes, true);
250     if (ret) {
251         goto fail;
252     }
253 
254     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
255     ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
256 
257 fail:
258     if (qiov == &local_qiov) {
259         qemu_iovec_destroy(&local_qiov);
260     }
261     qemu_vfree(buf);
262     return ret;
263 }
264 
265 static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
266                                             bool want_zero, int64_t offset,
267                                             int64_t bytes, int64_t *pnum,
268                                             int64_t *map,
269                                             BlockDriverState **file)
270 {
271     BDRVRawState *s = bs->opaque;
272     *pnum = bytes;
273     *file = bs->file->bs;
274     *map = offset + s->offset;
275     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
276 }
277 
278 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
279                                              int64_t offset, int bytes,
280                                              BdrvRequestFlags flags)
281 {
282     int ret;
283 
284     ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
285     if (ret) {
286         return ret;
287     }
288     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
289 }
290 
291 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
292                                         int64_t offset, int bytes)
293 {
294     int ret;
295 
296     ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
297     if (ret) {
298         return ret;
299     }
300     return bdrv_co_pdiscard(bs->file, offset, bytes);
301 }
302 
303 static int64_t raw_getlength(BlockDriverState *bs)
304 {
305     int64_t len;
306     BDRVRawState *s = bs->opaque;
307 
308     /* Update size. It should not change unless the file was externally
309      * modified. */
310     len = bdrv_getlength(bs->file->bs);
311     if (len < 0) {
312         return len;
313     }
314 
315     if (len < s->offset) {
316         s->size = 0;
317     } else {
318         if (s->has_size) {
319             /* Try to honour the size */
320             s->size = MIN(s->size, len - s->offset);
321         } else {
322             s->size = len - s->offset;
323         }
324     }
325 
326     return s->size;
327 }
328 
329 static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
330                                      Error **errp)
331 {
332     BlockMeasureInfo *info;
333     int64_t required;
334 
335     if (in_bs) {
336         required = bdrv_getlength(in_bs);
337         if (required < 0) {
338             error_setg_errno(errp, -required, "Unable to get image size");
339             return NULL;
340         }
341     } else {
342         required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
343                             BDRV_SECTOR_SIZE);
344     }
345 
346     info = g_new(BlockMeasureInfo, 1);
347     info->required = required;
348 
349     /* Unallocated sectors count towards the file size in raw images */
350     info->fully_allocated = info->required;
351     return info;
352 }
353 
354 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
355 {
356     return bdrv_get_info(bs->file->bs, bdi);
357 }
358 
359 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
360 {
361     if (bs->probed) {
362         /* To make it easier to protect the first sector, any probed
363          * image is restricted to read-modify-write on sub-sector
364          * operations. */
365         bs->bl.request_alignment = BDRV_SECTOR_SIZE;
366     }
367 }
368 
369 static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
370                                         PreallocMode prealloc, Error **errp)
371 {
372     BDRVRawState *s = bs->opaque;
373 
374     if (s->has_size) {
375         error_setg(errp, "Cannot resize fixed-size raw disks");
376         return -ENOTSUP;
377     }
378 
379     if (INT64_MAX - offset < s->offset) {
380         error_setg(errp, "Disk size too large for the chosen offset");
381         return -EINVAL;
382     }
383 
384     s->size = offset;
385     offset += s->offset;
386     return bdrv_co_truncate(bs->file, offset, prealloc, errp);
387 }
388 
389 static void raw_eject(BlockDriverState *bs, bool eject_flag)
390 {
391     bdrv_eject(bs->file->bs, eject_flag);
392 }
393 
394 static void raw_lock_medium(BlockDriverState *bs, bool locked)
395 {
396     bdrv_lock_medium(bs->file->bs, locked);
397 }
398 
399 static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
400 {
401     BDRVRawState *s = bs->opaque;
402     if (s->offset || s->has_size) {
403         return -ENOTSUP;
404     }
405     return bdrv_co_ioctl(bs->file->bs, req, buf);
406 }
407 
408 static int raw_has_zero_init(BlockDriverState *bs)
409 {
410     return bdrv_has_zero_init(bs->file->bs);
411 }
412 
413 static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
414                                            Error **errp)
415 {
416     return bdrv_create_file(filename, opts, errp);
417 }
418 
419 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
420                     Error **errp)
421 {
422     BDRVRawState *s = bs->opaque;
423     int ret;
424 
425     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
426                                false, errp);
427     if (!bs->file) {
428         return -EINVAL;
429     }
430 
431     bs->sg = bs->file->bs->sg;
432     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
433         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
434     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
435         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
436             bs->file->bs->supported_zero_flags);
437 
438     if (bs->probed && !bdrv_is_read_only(bs)) {
439         bdrv_refresh_filename(bs->file->bs);
440         fprintf(stderr,
441                 "WARNING: Image format was not specified for '%s' and probing "
442                 "guessed raw.\n"
443                 "         Automatically detecting the format is dangerous for "
444                 "raw images, write operations on block 0 will be restricted.\n"
445                 "         Specify the 'raw' format explicitly to remove the "
446                 "restrictions.\n",
447                 bs->file->bs->filename);
448     }
449 
450     ret = raw_read_options(options, bs, s, errp);
451     if (ret < 0) {
452         return ret;
453     }
454 
455     if (bs->sg && (s->offset || s->has_size)) {
456         error_setg(errp, "Cannot use offset/size with SCSI generic devices");
457         return -EINVAL;
458     }
459 
460     return 0;
461 }
462 
463 static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
464 {
465     /* smallest possible positive score so that raw is used if and only if no
466      * other block driver works
467      */
468     return 1;
469 }
470 
471 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
472 {
473     BDRVRawState *s = bs->opaque;
474     int ret;
475 
476     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
477     if (ret < 0) {
478         return ret;
479     }
480 
481     if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
482         return -ENOTSUP;
483     }
484 
485     return 0;
486 }
487 
488 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
489 {
490     BDRVRawState *s = bs->opaque;
491     if (s->offset || s->has_size) {
492         return -ENOTSUP;
493     }
494     return bdrv_probe_geometry(bs->file->bs, geo);
495 }
496 
497 static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
498                                                BdrvChild *src,
499                                                uint64_t src_offset,
500                                                BdrvChild *dst,
501                                                uint64_t dst_offset,
502                                                uint64_t bytes,
503                                                BdrvRequestFlags read_flags,
504                                                BdrvRequestFlags write_flags)
505 {
506     int ret;
507 
508     ret = raw_adjust_offset(bs, &src_offset, bytes, false);
509     if (ret) {
510         return ret;
511     }
512     return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
513                                    bytes, read_flags, write_flags);
514 }
515 
516 static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
517                                              BdrvChild *src,
518                                              uint64_t src_offset,
519                                              BdrvChild *dst,
520                                              uint64_t dst_offset,
521                                              uint64_t bytes,
522                                              BdrvRequestFlags read_flags,
523                                              BdrvRequestFlags write_flags)
524 {
525     int ret;
526 
527     ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
528     if (ret) {
529         return ret;
530     }
531     return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
532                                  read_flags, write_flags);
533 }
534 
535 static const char *const raw_strong_runtime_opts[] = {
536     "offset",
537     "size",
538 
539     NULL
540 };
541 
542 BlockDriver bdrv_raw = {
543     .format_name          = "raw",
544     .instance_size        = sizeof(BDRVRawState),
545     .bdrv_probe           = &raw_probe,
546     .bdrv_reopen_prepare  = &raw_reopen_prepare,
547     .bdrv_reopen_commit   = &raw_reopen_commit,
548     .bdrv_reopen_abort    = &raw_reopen_abort,
549     .bdrv_open            = &raw_open,
550     .bdrv_child_perm      = bdrv_filter_default_perms,
551     .bdrv_co_create_opts  = &raw_co_create_opts,
552     .bdrv_co_preadv       = &raw_co_preadv,
553     .bdrv_co_pwritev      = &raw_co_pwritev,
554     .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
555     .bdrv_co_pdiscard     = &raw_co_pdiscard,
556     .bdrv_co_block_status = &raw_co_block_status,
557     .bdrv_co_copy_range_from = &raw_co_copy_range_from,
558     .bdrv_co_copy_range_to  = &raw_co_copy_range_to,
559     .bdrv_co_truncate     = &raw_co_truncate,
560     .bdrv_getlength       = &raw_getlength,
561     .has_variable_length  = true,
562     .bdrv_measure         = &raw_measure,
563     .bdrv_get_info        = &raw_get_info,
564     .bdrv_refresh_limits  = &raw_refresh_limits,
565     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
566     .bdrv_probe_geometry  = &raw_probe_geometry,
567     .bdrv_eject           = &raw_eject,
568     .bdrv_lock_medium     = &raw_lock_medium,
569     .bdrv_co_ioctl        = &raw_co_ioctl,
570     .create_opts          = &raw_create_opts,
571     .bdrv_has_zero_init   = &raw_has_zero_init,
572     .strong_runtime_opts  = raw_strong_runtime_opts,
573 };
574 
575 static void bdrv_raw_init(void)
576 {
577     bdrv_register(&bdrv_raw);
578 }
579 
580 block_init(bdrv_raw_init);
581