xref: /openbmc/qemu/block/raw-format.c (revision 8779fccb)
1 /* BlockDriver implementation for "raw" format driver
2  *
3  * Copyright (C) 2010-2016 Red Hat, Inc.
4  * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
5  * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
6  *
7  * Author:
8  *   Laszlo Ersek <lersek@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to
12  * deal in the Software without restriction, including without limitation the
13  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14  * sell copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "block/block_int.h"
31 #include "qapi/error.h"
32 #include "qemu/option.h"
33 
34 typedef struct BDRVRawState {
35     uint64_t offset;
36     uint64_t size;
37     bool has_size;
38 } BDRVRawState;
39 
40 static QemuOptsList raw_runtime_opts = {
41     .name = "raw",
42     .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
43     .desc = {
44         {
45             .name = "offset",
46             .type = QEMU_OPT_SIZE,
47             .help = "offset in the disk where the image starts",
48         },
49         {
50             .name = "size",
51             .type = QEMU_OPT_SIZE,
52             .help = "virtual disk size",
53         },
54         { /* end of list */ }
55     },
56 };
57 
58 static QemuOptsList raw_create_opts = {
59     .name = "raw-create-opts",
60     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
61     .desc = {
62         {
63             .name = BLOCK_OPT_SIZE,
64             .type = QEMU_OPT_SIZE,
65             .help = "Virtual disk size"
66         },
67         { /* end of list */ }
68     }
69 };
70 
71 static int raw_read_options(QDict *options, BlockDriverState *bs,
72     BDRVRawState *s, Error **errp)
73 {
74     Error *local_err = NULL;
75     QemuOpts *opts = NULL;
76     int64_t real_size = 0;
77     int ret;
78 
79     real_size = bdrv_getlength(bs->file->bs);
80     if (real_size < 0) {
81         error_setg_errno(errp, -real_size, "Could not get image size");
82         return real_size;
83     }
84 
85     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
86     qemu_opts_absorb_qdict(opts, options, &local_err);
87     if (local_err) {
88         error_propagate(errp, local_err);
89         ret = -EINVAL;
90         goto end;
91     }
92 
93     s->offset = qemu_opt_get_size(opts, "offset", 0);
94     if (s->offset > real_size) {
95         error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
96             "size of the containing file (%" PRId64 ")",
97             s->offset, real_size);
98         ret = -EINVAL;
99         goto end;
100     }
101 
102     if (qemu_opt_find(opts, "size") != NULL) {
103         s->size = qemu_opt_get_size(opts, "size", 0);
104         s->has_size = true;
105     } else {
106         s->has_size = false;
107         s->size = real_size - s->offset;
108     }
109 
110     /* Check size and offset */
111     if ((real_size - s->offset) < s->size) {
112         error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
113             "(%" PRIu64 ") has to be smaller or equal to the "
114             " actual size of the containing file (%" PRId64 ")",
115             s->offset, s->size, real_size);
116         ret = -EINVAL;
117         goto end;
118     }
119 
120     /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
121      * up and leaking out of the specified area. */
122     if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
123         error_setg(errp, "Specified size is not multiple of %llu",
124             BDRV_SECTOR_SIZE);
125         ret = -EINVAL;
126         goto end;
127     }
128 
129     ret = 0;
130 
131 end:
132 
133     qemu_opts_del(opts);
134 
135     return ret;
136 }
137 
138 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
139                               BlockReopenQueue *queue, Error **errp)
140 {
141     assert(reopen_state != NULL);
142     assert(reopen_state->bs != NULL);
143 
144     reopen_state->opaque = g_new0(BDRVRawState, 1);
145 
146     return raw_read_options(
147         reopen_state->options,
148         reopen_state->bs,
149         reopen_state->opaque,
150         errp);
151 }
152 
153 static void raw_reopen_commit(BDRVReopenState *state)
154 {
155     BDRVRawState *new_s = state->opaque;
156     BDRVRawState *s = state->bs->opaque;
157 
158     memcpy(s, new_s, sizeof(BDRVRawState));
159 
160     g_free(state->opaque);
161     state->opaque = NULL;
162 }
163 
164 static void raw_reopen_abort(BDRVReopenState *state)
165 {
166     g_free(state->opaque);
167     state->opaque = NULL;
168 }
169 
170 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
171                                       uint64_t bytes, QEMUIOVector *qiov,
172                                       int flags)
173 {
174     BDRVRawState *s = bs->opaque;
175 
176     if (offset > UINT64_MAX - s->offset) {
177         return -EINVAL;
178     }
179     offset += s->offset;
180 
181     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
182     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
183 }
184 
185 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
186                                        uint64_t bytes, QEMUIOVector *qiov,
187                                        int flags)
188 {
189     BDRVRawState *s = bs->opaque;
190     void *buf = NULL;
191     BlockDriver *drv;
192     QEMUIOVector local_qiov;
193     int ret;
194 
195     if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
196         /* There's not enough space for the data. Don't write anything and just
197          * fail to prevent leaking out of the size specified in options. */
198         return -ENOSPC;
199     }
200 
201     if (offset > UINT64_MAX - s->offset) {
202         ret = -EINVAL;
203         goto fail;
204     }
205 
206     if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
207         /* Handling partial writes would be a pain - so we just
208          * require that guests have 512-byte request alignment if
209          * probing occurred */
210         QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
211         QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
212         assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
213 
214         buf = qemu_try_blockalign(bs->file->bs, 512);
215         if (!buf) {
216             ret = -ENOMEM;
217             goto fail;
218         }
219 
220         ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
221         if (ret != 512) {
222             ret = -EINVAL;
223             goto fail;
224         }
225 
226         drv = bdrv_probe_all(buf, 512, NULL);
227         if (drv != bs->drv) {
228             ret = -EPERM;
229             goto fail;
230         }
231 
232         /* Use the checked buffer, a malicious guest might be overwriting its
233          * original buffer in the background. */
234         qemu_iovec_init(&local_qiov, qiov->niov + 1);
235         qemu_iovec_add(&local_qiov, buf, 512);
236         qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
237         qiov = &local_qiov;
238     }
239 
240     offset += s->offset;
241 
242     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
243     ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
244 
245 fail:
246     if (qiov == &local_qiov) {
247         qemu_iovec_destroy(&local_qiov);
248     }
249     qemu_vfree(buf);
250     return ret;
251 }
252 
253 static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
254                                             int64_t sector_num,
255                                             int nb_sectors, int *pnum,
256                                             BlockDriverState **file)
257 {
258     BDRVRawState *s = bs->opaque;
259     *pnum = nb_sectors;
260     *file = bs->file->bs;
261     sector_num += s->offset / BDRV_SECTOR_SIZE;
262     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
263            (sector_num << BDRV_SECTOR_BITS);
264 }
265 
266 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
267                                              int64_t offset, int count,
268                                              BdrvRequestFlags flags)
269 {
270     BDRVRawState *s = bs->opaque;
271     if (offset > UINT64_MAX - s->offset) {
272         return -EINVAL;
273     }
274     offset += s->offset;
275     return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
276 }
277 
278 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
279                                         int64_t offset, int count)
280 {
281     BDRVRawState *s = bs->opaque;
282     if (offset > UINT64_MAX - s->offset) {
283         return -EINVAL;
284     }
285     offset += s->offset;
286     return bdrv_co_pdiscard(bs->file->bs, offset, count);
287 }
288 
289 static int64_t raw_getlength(BlockDriverState *bs)
290 {
291     int64_t len;
292     BDRVRawState *s = bs->opaque;
293 
294     /* Update size. It should not change unless the file was externally
295      * modified. */
296     len = bdrv_getlength(bs->file->bs);
297     if (len < 0) {
298         return len;
299     }
300 
301     if (len < s->offset) {
302         s->size = 0;
303     } else {
304         if (s->has_size) {
305             /* Try to honour the size */
306             s->size = MIN(s->size, len - s->offset);
307         } else {
308             s->size = len - s->offset;
309         }
310     }
311 
312     return s->size;
313 }
314 
315 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
316 {
317     return bdrv_get_info(bs->file->bs, bdi);
318 }
319 
320 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
321 {
322     if (bs->probed) {
323         /* To make it easier to protect the first sector, any probed
324          * image is restricted to read-modify-write on sub-sector
325          * operations. */
326         bs->bl.request_alignment = BDRV_SECTOR_SIZE;
327     }
328 }
329 
330 static int raw_truncate(BlockDriverState *bs, int64_t offset)
331 {
332     BDRVRawState *s = bs->opaque;
333 
334     if (s->has_size) {
335         return -ENOTSUP;
336     }
337 
338     if (INT64_MAX - offset < s->offset) {
339         return -EINVAL;
340     }
341 
342     s->size = offset;
343     offset += s->offset;
344     return bdrv_truncate(bs->file, offset);
345 }
346 
347 static int raw_media_changed(BlockDriverState *bs)
348 {
349     return bdrv_media_changed(bs->file->bs);
350 }
351 
352 static void raw_eject(BlockDriverState *bs, bool eject_flag)
353 {
354     bdrv_eject(bs->file->bs, eject_flag);
355 }
356 
357 static void raw_lock_medium(BlockDriverState *bs, bool locked)
358 {
359     bdrv_lock_medium(bs->file->bs, locked);
360 }
361 
362 static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
363 {
364     BDRVRawState *s = bs->opaque;
365     if (s->offset || s->has_size) {
366         return -ENOTSUP;
367     }
368     return bdrv_co_ioctl(bs->file->bs, req, buf);
369 }
370 
371 static int raw_has_zero_init(BlockDriverState *bs)
372 {
373     return bdrv_has_zero_init(bs->file->bs);
374 }
375 
376 static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
377 {
378     return bdrv_create_file(filename, opts, errp);
379 }
380 
381 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
382                     Error **errp)
383 {
384     BDRVRawState *s = bs->opaque;
385     int ret;
386 
387     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
388                                false, errp);
389     if (!bs->file) {
390         return -EINVAL;
391     }
392 
393     bs->sg = bs->file->bs->sg;
394     bs->supported_write_flags = BDRV_REQ_FUA &
395         bs->file->bs->supported_write_flags;
396     bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
397         bs->file->bs->supported_zero_flags;
398 
399     if (bs->probed && !bdrv_is_read_only(bs)) {
400         fprintf(stderr,
401                 "WARNING: Image format was not specified for '%s' and probing "
402                 "guessed raw.\n"
403                 "         Automatically detecting the format is dangerous for "
404                 "raw images, write operations on block 0 will be restricted.\n"
405                 "         Specify the 'raw' format explicitly to remove the "
406                 "restrictions.\n",
407                 bs->file->bs->filename);
408     }
409 
410     ret = raw_read_options(options, bs, s, errp);
411     if (ret < 0) {
412         return ret;
413     }
414 
415     if (bs->sg && (s->offset || s->has_size)) {
416         error_setg(errp, "Cannot use offset/size with SCSI generic devices");
417         return -EINVAL;
418     }
419 
420     return 0;
421 }
422 
423 static void raw_close(BlockDriverState *bs)
424 {
425 }
426 
427 static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
428 {
429     /* smallest possible positive score so that raw is used if and only if no
430      * other block driver works
431      */
432     return 1;
433 }
434 
435 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
436 {
437     BDRVRawState *s = bs->opaque;
438     int ret;
439 
440     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
441     if (ret < 0) {
442         return ret;
443     }
444 
445     if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
446         return -ENOTSUP;
447     }
448 
449     return 0;
450 }
451 
452 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
453 {
454     BDRVRawState *s = bs->opaque;
455     if (s->offset || s->has_size) {
456         return -ENOTSUP;
457     }
458     return bdrv_probe_geometry(bs->file->bs, geo);
459 }
460 
461 BlockDriver bdrv_raw = {
462     .format_name          = "raw",
463     .instance_size        = sizeof(BDRVRawState),
464     .bdrv_probe           = &raw_probe,
465     .bdrv_reopen_prepare  = &raw_reopen_prepare,
466     .bdrv_reopen_commit   = &raw_reopen_commit,
467     .bdrv_reopen_abort    = &raw_reopen_abort,
468     .bdrv_open            = &raw_open,
469     .bdrv_close           = &raw_close,
470     .bdrv_create          = &raw_create,
471     .bdrv_co_preadv       = &raw_co_preadv,
472     .bdrv_co_pwritev      = &raw_co_pwritev,
473     .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
474     .bdrv_co_pdiscard     = &raw_co_pdiscard,
475     .bdrv_co_get_block_status = &raw_co_get_block_status,
476     .bdrv_truncate        = &raw_truncate,
477     .bdrv_getlength       = &raw_getlength,
478     .has_variable_length  = true,
479     .bdrv_get_info        = &raw_get_info,
480     .bdrv_refresh_limits  = &raw_refresh_limits,
481     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
482     .bdrv_probe_geometry  = &raw_probe_geometry,
483     .bdrv_media_changed   = &raw_media_changed,
484     .bdrv_eject           = &raw_eject,
485     .bdrv_lock_medium     = &raw_lock_medium,
486     .bdrv_co_ioctl        = &raw_co_ioctl,
487     .create_opts          = &raw_create_opts,
488     .bdrv_has_zero_init   = &raw_has_zero_init
489 };
490 
491 static void bdrv_raw_init(void)
492 {
493     bdrv_register(&bdrv_raw);
494 }
495 
496 block_init(bdrv_raw_init);
497