xref: /openbmc/qemu/block/raw-format.c (revision 7f709ce7)
1 /* BlockDriver implementation for "raw" format driver
2  *
3  * Copyright (C) 2010-2016 Red Hat, Inc.
4  * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
5  * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
6  *
7  * Author:
8  *   Laszlo Ersek <lersek@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to
12  * deal in the Software without restriction, including without limitation the
13  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14  * sell copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "block/block_int.h"
31 #include "qapi/error.h"
32 #include "qemu/option.h"
33 
34 typedef struct BDRVRawState {
35     uint64_t offset;
36     uint64_t size;
37     bool has_size;
38 } BDRVRawState;
39 
40 static QemuOptsList raw_runtime_opts = {
41     .name = "raw",
42     .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
43     .desc = {
44         {
45             .name = "offset",
46             .type = QEMU_OPT_SIZE,
47             .help = "offset in the disk where the image starts",
48         },
49         {
50             .name = "size",
51             .type = QEMU_OPT_SIZE,
52             .help = "virtual disk size",
53         },
54         { /* end of list */ }
55     },
56 };
57 
58 static QemuOptsList raw_create_opts = {
59     .name = "raw-create-opts",
60     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
61     .desc = {
62         {
63             .name = BLOCK_OPT_SIZE,
64             .type = QEMU_OPT_SIZE,
65             .help = "Virtual disk size"
66         },
67         { /* end of list */ }
68     }
69 };
70 
71 static int raw_read_options(QDict *options, BlockDriverState *bs,
72     BDRVRawState *s, Error **errp)
73 {
74     Error *local_err = NULL;
75     QemuOpts *opts = NULL;
76     int64_t real_size = 0;
77     int ret;
78 
79     real_size = bdrv_getlength(bs->file->bs);
80     if (real_size < 0) {
81         error_setg_errno(errp, -real_size, "Could not get image size");
82         return real_size;
83     }
84 
85     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
86     qemu_opts_absorb_qdict(opts, options, &local_err);
87     if (local_err) {
88         error_propagate(errp, local_err);
89         ret = -EINVAL;
90         goto end;
91     }
92 
93     s->offset = qemu_opt_get_size(opts, "offset", 0);
94     if (s->offset > real_size) {
95         error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
96             "size of the containing file (%" PRId64 ")",
97             s->offset, real_size);
98         ret = -EINVAL;
99         goto end;
100     }
101 
102     if (qemu_opt_find(opts, "size") != NULL) {
103         s->size = qemu_opt_get_size(opts, "size", 0);
104         s->has_size = true;
105     } else {
106         s->has_size = false;
107         s->size = real_size - s->offset;
108     }
109 
110     /* Check size and offset */
111     if ((real_size - s->offset) < s->size) {
112         error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
113             "(%" PRIu64 ") has to be smaller or equal to the "
114             " actual size of the containing file (%" PRId64 ")",
115             s->offset, s->size, real_size);
116         ret = -EINVAL;
117         goto end;
118     }
119 
120     /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
121      * up and leaking out of the specified area. */
122     if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
123         error_setg(errp, "Specified size is not multiple of %llu",
124             BDRV_SECTOR_SIZE);
125         ret = -EINVAL;
126         goto end;
127     }
128 
129     ret = 0;
130 
131 end:
132 
133     qemu_opts_del(opts);
134 
135     return ret;
136 }
137 
138 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
139                               BlockReopenQueue *queue, Error **errp)
140 {
141     assert(reopen_state != NULL);
142     assert(reopen_state->bs != NULL);
143 
144     reopen_state->opaque = g_new0(BDRVRawState, 1);
145 
146     return raw_read_options(
147         reopen_state->options,
148         reopen_state->bs,
149         reopen_state->opaque,
150         errp);
151 }
152 
153 static void raw_reopen_commit(BDRVReopenState *state)
154 {
155     BDRVRawState *new_s = state->opaque;
156     BDRVRawState *s = state->bs->opaque;
157 
158     memcpy(s, new_s, sizeof(BDRVRawState));
159 
160     g_free(state->opaque);
161     state->opaque = NULL;
162 }
163 
164 static void raw_reopen_abort(BDRVReopenState *state)
165 {
166     g_free(state->opaque);
167     state->opaque = NULL;
168 }
169 
170 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
171                                       uint64_t bytes, QEMUIOVector *qiov,
172                                       int flags)
173 {
174     BDRVRawState *s = bs->opaque;
175 
176     if (offset > UINT64_MAX - s->offset) {
177         return -EINVAL;
178     }
179     offset += s->offset;
180 
181     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
182     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
183 }
184 
185 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
186                                        uint64_t bytes, QEMUIOVector *qiov,
187                                        int flags)
188 {
189     BDRVRawState *s = bs->opaque;
190     void *buf = NULL;
191     BlockDriver *drv;
192     QEMUIOVector local_qiov;
193     int ret;
194 
195     if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
196         /* There's not enough space for the data. Don't write anything and just
197          * fail to prevent leaking out of the size specified in options. */
198         return -ENOSPC;
199     }
200 
201     if (offset > UINT64_MAX - s->offset) {
202         ret = -EINVAL;
203         goto fail;
204     }
205 
206     if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
207         /* Handling partial writes would be a pain - so we just
208          * require that guests have 512-byte request alignment if
209          * probing occurred */
210         QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
211         QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
212         assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
213 
214         buf = qemu_try_blockalign(bs->file->bs, 512);
215         if (!buf) {
216             ret = -ENOMEM;
217             goto fail;
218         }
219 
220         ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
221         if (ret != 512) {
222             ret = -EINVAL;
223             goto fail;
224         }
225 
226         drv = bdrv_probe_all(buf, 512, NULL);
227         if (drv != bs->drv) {
228             ret = -EPERM;
229             goto fail;
230         }
231 
232         /* Use the checked buffer, a malicious guest might be overwriting its
233          * original buffer in the background. */
234         qemu_iovec_init(&local_qiov, qiov->niov + 1);
235         qemu_iovec_add(&local_qiov, buf, 512);
236         qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
237         qiov = &local_qiov;
238     }
239 
240     offset += s->offset;
241 
242     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
243     ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
244 
245 fail:
246     if (qiov == &local_qiov) {
247         qemu_iovec_destroy(&local_qiov);
248     }
249     qemu_vfree(buf);
250     return ret;
251 }
252 
253 static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
254                                             int64_t sector_num,
255                                             int nb_sectors, int *pnum,
256                                             BlockDriverState **file)
257 {
258     BDRVRawState *s = bs->opaque;
259     *pnum = nb_sectors;
260     *file = bs->file->bs;
261     sector_num += s->offset / BDRV_SECTOR_SIZE;
262     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
263            (sector_num << BDRV_SECTOR_BITS);
264 }
265 
266 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
267                                              int64_t offset, int bytes,
268                                              BdrvRequestFlags flags)
269 {
270     BDRVRawState *s = bs->opaque;
271     if (offset > UINT64_MAX - s->offset) {
272         return -EINVAL;
273     }
274     offset += s->offset;
275     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
276 }
277 
278 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
279                                         int64_t offset, int bytes)
280 {
281     BDRVRawState *s = bs->opaque;
282     if (offset > UINT64_MAX - s->offset) {
283         return -EINVAL;
284     }
285     offset += s->offset;
286     return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
287 }
288 
289 static int64_t raw_getlength(BlockDriverState *bs)
290 {
291     int64_t len;
292     BDRVRawState *s = bs->opaque;
293 
294     /* Update size. It should not change unless the file was externally
295      * modified. */
296     len = bdrv_getlength(bs->file->bs);
297     if (len < 0) {
298         return len;
299     }
300 
301     if (len < s->offset) {
302         s->size = 0;
303     } else {
304         if (s->has_size) {
305             /* Try to honour the size */
306             s->size = MIN(s->size, len - s->offset);
307         } else {
308             s->size = len - s->offset;
309         }
310     }
311 
312     return s->size;
313 }
314 
315 static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
316                                      Error **errp)
317 {
318     BlockMeasureInfo *info;
319     int64_t required;
320 
321     if (in_bs) {
322         required = bdrv_getlength(in_bs);
323         if (required < 0) {
324             error_setg_errno(errp, -required, "Unable to get image size");
325             return NULL;
326         }
327     } else {
328         required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
329                             BDRV_SECTOR_SIZE);
330     }
331 
332     info = g_new(BlockMeasureInfo, 1);
333     info->required = required;
334 
335     /* Unallocated sectors count towards the file size in raw images */
336     info->fully_allocated = info->required;
337     return info;
338 }
339 
340 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
341 {
342     return bdrv_get_info(bs->file->bs, bdi);
343 }
344 
345 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
346 {
347     if (bs->probed) {
348         /* To make it easier to protect the first sector, any probed
349          * image is restricted to read-modify-write on sub-sector
350          * operations. */
351         bs->bl.request_alignment = BDRV_SECTOR_SIZE;
352     }
353 }
354 
355 static int raw_truncate(BlockDriverState *bs, int64_t offset,
356                         PreallocMode prealloc, Error **errp)
357 {
358     BDRVRawState *s = bs->opaque;
359 
360     if (s->has_size) {
361         error_setg(errp, "Cannot resize fixed-size raw disks");
362         return -ENOTSUP;
363     }
364 
365     if (INT64_MAX - offset < s->offset) {
366         error_setg(errp, "Disk size too large for the chosen offset");
367         return -EINVAL;
368     }
369 
370     s->size = offset;
371     offset += s->offset;
372     return bdrv_truncate(bs->file, offset, prealloc, errp);
373 }
374 
375 static void raw_eject(BlockDriverState *bs, bool eject_flag)
376 {
377     bdrv_eject(bs->file->bs, eject_flag);
378 }
379 
380 static void raw_lock_medium(BlockDriverState *bs, bool locked)
381 {
382     bdrv_lock_medium(bs->file->bs, locked);
383 }
384 
385 static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
386 {
387     BDRVRawState *s = bs->opaque;
388     if (s->offset || s->has_size) {
389         return -ENOTSUP;
390     }
391     return bdrv_co_ioctl(bs->file->bs, req, buf);
392 }
393 
394 static int raw_has_zero_init(BlockDriverState *bs)
395 {
396     return bdrv_has_zero_init(bs->file->bs);
397 }
398 
399 static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
400 {
401     return bdrv_create_file(filename, opts, errp);
402 }
403 
404 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
405                     Error **errp)
406 {
407     BDRVRawState *s = bs->opaque;
408     int ret;
409 
410     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
411                                false, errp);
412     if (!bs->file) {
413         return -EINVAL;
414     }
415 
416     bs->sg = bs->file->bs->sg;
417     bs->supported_write_flags = BDRV_REQ_FUA &
418         bs->file->bs->supported_write_flags;
419     bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
420         bs->file->bs->supported_zero_flags;
421 
422     if (bs->probed && !bdrv_is_read_only(bs)) {
423         fprintf(stderr,
424                 "WARNING: Image format was not specified for '%s' and probing "
425                 "guessed raw.\n"
426                 "         Automatically detecting the format is dangerous for "
427                 "raw images, write operations on block 0 will be restricted.\n"
428                 "         Specify the 'raw' format explicitly to remove the "
429                 "restrictions.\n",
430                 bs->file->bs->filename);
431     }
432 
433     ret = raw_read_options(options, bs, s, errp);
434     if (ret < 0) {
435         return ret;
436     }
437 
438     if (bs->sg && (s->offset || s->has_size)) {
439         error_setg(errp, "Cannot use offset/size with SCSI generic devices");
440         return -EINVAL;
441     }
442 
443     return 0;
444 }
445 
446 static void raw_close(BlockDriverState *bs)
447 {
448 }
449 
450 static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
451 {
452     /* smallest possible positive score so that raw is used if and only if no
453      * other block driver works
454      */
455     return 1;
456 }
457 
458 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
459 {
460     BDRVRawState *s = bs->opaque;
461     int ret;
462 
463     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
464     if (ret < 0) {
465         return ret;
466     }
467 
468     if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
469         return -ENOTSUP;
470     }
471 
472     return 0;
473 }
474 
475 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
476 {
477     BDRVRawState *s = bs->opaque;
478     if (s->offset || s->has_size) {
479         return -ENOTSUP;
480     }
481     return bdrv_probe_geometry(bs->file->bs, geo);
482 }
483 
484 BlockDriver bdrv_raw = {
485     .format_name          = "raw",
486     .instance_size        = sizeof(BDRVRawState),
487     .bdrv_probe           = &raw_probe,
488     .bdrv_reopen_prepare  = &raw_reopen_prepare,
489     .bdrv_reopen_commit   = &raw_reopen_commit,
490     .bdrv_reopen_abort    = &raw_reopen_abort,
491     .bdrv_open            = &raw_open,
492     .bdrv_close           = &raw_close,
493     .bdrv_child_perm      = bdrv_filter_default_perms,
494     .bdrv_create          = &raw_create,
495     .bdrv_co_preadv       = &raw_co_preadv,
496     .bdrv_co_pwritev      = &raw_co_pwritev,
497     .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
498     .bdrv_co_pdiscard     = &raw_co_pdiscard,
499     .bdrv_co_get_block_status = &raw_co_get_block_status,
500     .bdrv_truncate        = &raw_truncate,
501     .bdrv_getlength       = &raw_getlength,
502     .has_variable_length  = true,
503     .bdrv_measure         = &raw_measure,
504     .bdrv_get_info        = &raw_get_info,
505     .bdrv_refresh_limits  = &raw_refresh_limits,
506     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
507     .bdrv_probe_geometry  = &raw_probe_geometry,
508     .bdrv_eject           = &raw_eject,
509     .bdrv_lock_medium     = &raw_lock_medium,
510     .bdrv_co_ioctl        = &raw_co_ioctl,
511     .create_opts          = &raw_create_opts,
512     .bdrv_has_zero_init   = &raw_has_zero_init
513 };
514 
515 static void bdrv_raw_init(void)
516 {
517     bdrv_register(&bdrv_raw);
518 }
519 
520 block_init(bdrv_raw_init);
521