xref: /openbmc/qemu/block.c (revision 33e3963e1b9298e01cadd738124f0e618b5b79f5)
1fc01f7e7Sbellard /*
2fc01f7e7Sbellard  * QEMU System Emulator block driver
3fc01f7e7Sbellard  *
4fc01f7e7Sbellard  * Copyright (c) 2003 Fabrice Bellard
5fc01f7e7Sbellard  *
6fc01f7e7Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7fc01f7e7Sbellard  * of this software and associated documentation files (the "Software"), to deal
8fc01f7e7Sbellard  * in the Software without restriction, including without limitation the rights
9fc01f7e7Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10fc01f7e7Sbellard  * copies of the Software, and to permit persons to whom the Software is
11fc01f7e7Sbellard  * furnished to do so, subject to the following conditions:
12fc01f7e7Sbellard  *
13fc01f7e7Sbellard  * The above copyright notice and this permission notice shall be included in
14fc01f7e7Sbellard  * all copies or substantial portions of the Software.
15fc01f7e7Sbellard  *
16fc01f7e7Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fc01f7e7Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fc01f7e7Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19fc01f7e7Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20fc01f7e7Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21fc01f7e7Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22fc01f7e7Sbellard  * THE SOFTWARE.
23fc01f7e7Sbellard  */
24fc01f7e7Sbellard #include <stdlib.h>
25fc01f7e7Sbellard #include <stdio.h>
26fc01f7e7Sbellard #include <stdarg.h>
27fc01f7e7Sbellard #include <string.h>
28fc01f7e7Sbellard #include <getopt.h>
29fc01f7e7Sbellard #include <inttypes.h>
30fc01f7e7Sbellard #include <unistd.h>
31fc01f7e7Sbellard #include <sys/mman.h>
32fc01f7e7Sbellard #include <fcntl.h>
33fc01f7e7Sbellard #include <signal.h>
34fc01f7e7Sbellard #include <time.h>
35fc01f7e7Sbellard #include <sys/time.h>
36fc01f7e7Sbellard #include <malloc.h>
37fc01f7e7Sbellard #include <termios.h>
38fc01f7e7Sbellard #include <sys/poll.h>
39fc01f7e7Sbellard #include <errno.h>
40fc01f7e7Sbellard #include <sys/wait.h>
41*33e3963eSbellard #include <netinet/in.h>
42fc01f7e7Sbellard 
43fc01f7e7Sbellard #include "vl.h"
44fc01f7e7Sbellard 
45*33e3963eSbellard #define NO_THUNK_TYPE_SIZE
46*33e3963eSbellard #include "thunk.h"
47*33e3963eSbellard 
48fc01f7e7Sbellard struct BlockDriverState {
49*33e3963eSbellard     int fd; /* if -1, only COW mappings */
50fc01f7e7Sbellard     int64_t total_sectors;
510849bf08Sbellard     int read_only;
52*33e3963eSbellard 
53*33e3963eSbellard     uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */
54*33e3963eSbellard     uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */
55*33e3963eSbellard     int cow_bitmap_size;
56*33e3963eSbellard     int cow_fd;
57*33e3963eSbellard     int64_t cow_sectors_offset;
58*33e3963eSbellard     char filename[1024];
59fc01f7e7Sbellard };
60fc01f7e7Sbellard 
61*33e3963eSbellard BlockDriverState *bdrv_open(const char *filename, int snapshot)
62fc01f7e7Sbellard {
63fc01f7e7Sbellard     BlockDriverState *bs;
64*33e3963eSbellard     int fd, cow_fd;
65fc01f7e7Sbellard     int64_t size;
66*33e3963eSbellard     char template[] = "/tmp/vl.XXXXXX";
67*33e3963eSbellard     struct cow_header_v2 cow_header;
68*33e3963eSbellard     struct stat st;
69fc01f7e7Sbellard 
70fc01f7e7Sbellard     bs = malloc(sizeof(BlockDriverState));
71fc01f7e7Sbellard     if(!bs)
72fc01f7e7Sbellard         return NULL;
730849bf08Sbellard     bs->read_only = 0;
74*33e3963eSbellard     bs->fd = -1;
75*33e3963eSbellard     bs->cow_fd = -1;
76*33e3963eSbellard     bs->cow_bitmap = NULL;
77*33e3963eSbellard     strcpy(bs->filename, filename);
78*33e3963eSbellard 
79*33e3963eSbellard     /* open standard HD image */
80*33e3963eSbellard     fd = open(filename, O_RDWR | O_LARGEFILE);
81fc01f7e7Sbellard     if (fd < 0) {
82*33e3963eSbellard         /* read only image on disk */
83*33e3963eSbellard         fd = open(filename, O_RDONLY | O_LARGEFILE);
840849bf08Sbellard         if (fd < 0) {
85*33e3963eSbellard             perror(filename);
86*33e3963eSbellard             goto fail;
87fc01f7e7Sbellard         }
88*33e3963eSbellard         if (!snapshot)
890849bf08Sbellard             bs->read_only = 1;
900849bf08Sbellard     }
91*33e3963eSbellard     bs->fd = fd;
92*33e3963eSbellard 
93*33e3963eSbellard     /* see if it is a cow image */
94*33e3963eSbellard     if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) {
95*33e3963eSbellard         fprintf(stderr, "%s: could not read header\n", filename);
96*33e3963eSbellard         goto fail;
97*33e3963eSbellard     }
98*33e3963eSbellard     if (cow_header.magic == htonl(COW_MAGIC) &&
99*33e3963eSbellard         cow_header.version == htonl(COW_VERSION)) {
100*33e3963eSbellard         /* cow image found */
101*33e3963eSbellard         size = cow_header.size;
102*33e3963eSbellard #ifndef WORDS_BIGENDIAN
103*33e3963eSbellard         size = bswap64(size);
104*33e3963eSbellard #endif
105*33e3963eSbellard         bs->total_sectors = size / 512;
106*33e3963eSbellard 
107*33e3963eSbellard         bs->cow_fd = fd;
108*33e3963eSbellard         bs->fd = -1;
109*33e3963eSbellard         if (cow_header.backing_file[0] != '\0') {
110*33e3963eSbellard             if (stat(cow_header.backing_file, &st) != 0) {
111*33e3963eSbellard                 fprintf(stderr, "%s: could not find original disk image '%s'\n", filename, cow_header.backing_file);
112*33e3963eSbellard                 goto fail;
113*33e3963eSbellard             }
114*33e3963eSbellard             if (st.st_mtime != htonl(cow_header.mtime)) {
115*33e3963eSbellard                 fprintf(stderr, "%s: original raw disk image '%s' does not match saved timestamp\n", filename, cow_header.backing_file);
116*33e3963eSbellard                 goto fail;
117*33e3963eSbellard             }
118*33e3963eSbellard             fd = open(cow_header.backing_file, O_RDONLY | O_LARGEFILE);
119*33e3963eSbellard             if (fd < 0)
120*33e3963eSbellard                 goto fail;
121*33e3963eSbellard             bs->fd = fd;
122*33e3963eSbellard         }
123*33e3963eSbellard         /* mmap the bitmap */
124*33e3963eSbellard         bs->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
125*33e3963eSbellard         bs->cow_bitmap_addr = mmap(get_mmap_addr(bs->cow_bitmap_size),
126*33e3963eSbellard                                    bs->cow_bitmap_size,
127*33e3963eSbellard                                    PROT_READ | PROT_WRITE,
128*33e3963eSbellard                                    MAP_SHARED, bs->cow_fd, 0);
129*33e3963eSbellard         if (bs->cow_bitmap_addr == MAP_FAILED)
130*33e3963eSbellard             goto fail;
131*33e3963eSbellard         bs->cow_bitmap = bs->cow_bitmap_addr + sizeof(cow_header);
132*33e3963eSbellard         bs->cow_sectors_offset = (bs->cow_bitmap_size + 511) & ~511;
133*33e3963eSbellard         snapshot = 0;
134*33e3963eSbellard     } else {
135*33e3963eSbellard         /* standard raw image */
136fc01f7e7Sbellard         size = lseek64(fd, 0, SEEK_END);
137fc01f7e7Sbellard         bs->total_sectors = size / 512;
138fc01f7e7Sbellard         bs->fd = fd;
139*33e3963eSbellard     }
140*33e3963eSbellard 
141*33e3963eSbellard     if (snapshot) {
142*33e3963eSbellard         /* create a temporary COW file */
143*33e3963eSbellard         cow_fd = mkstemp(template);
144*33e3963eSbellard         if (cow_fd < 0)
145*33e3963eSbellard             goto fail;
146*33e3963eSbellard         bs->cow_fd = cow_fd;
147*33e3963eSbellard 	unlink(template);
148*33e3963eSbellard 
149*33e3963eSbellard         /* just need to allocate bitmap */
150*33e3963eSbellard         bs->cow_bitmap_size = (bs->total_sectors + 7) >> 3;
151*33e3963eSbellard         bs->cow_bitmap_addr = mmap(get_mmap_addr(bs->cow_bitmap_size),
152*33e3963eSbellard                                    bs->cow_bitmap_size,
153*33e3963eSbellard                                    PROT_READ | PROT_WRITE,
154*33e3963eSbellard                                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
155*33e3963eSbellard         if (bs->cow_bitmap_addr == MAP_FAILED)
156*33e3963eSbellard             goto fail;
157*33e3963eSbellard         bs->cow_bitmap = bs->cow_bitmap_addr;
158*33e3963eSbellard         bs->cow_sectors_offset = 0;
159*33e3963eSbellard     }
160*33e3963eSbellard 
161fc01f7e7Sbellard     return bs;
162*33e3963eSbellard  fail:
163*33e3963eSbellard     bdrv_close(bs);
164*33e3963eSbellard     return NULL;
165fc01f7e7Sbellard }
166fc01f7e7Sbellard 
167fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs)
168fc01f7e7Sbellard {
169*33e3963eSbellard     /* we unmap the mapping so that it is written to the COW file */
170*33e3963eSbellard     if (bs->cow_bitmap_addr)
171*33e3963eSbellard         munmap(bs->cow_bitmap_addr, bs->cow_bitmap_size);
172*33e3963eSbellard     if (bs->cow_fd >= 0)
173*33e3963eSbellard         close(bs->cow_fd);
174*33e3963eSbellard     if (bs->fd >= 0)
175fc01f7e7Sbellard         close(bs->fd);
176fc01f7e7Sbellard     free(bs);
177fc01f7e7Sbellard }
178fc01f7e7Sbellard 
179*33e3963eSbellard static inline void set_bit(uint8_t *bitmap, int64_t bitnum)
180*33e3963eSbellard {
181*33e3963eSbellard     bitmap[bitnum / 8] |= (1 << (bitnum%8));
182*33e3963eSbellard }
183*33e3963eSbellard 
184*33e3963eSbellard static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum)
185*33e3963eSbellard {
186*33e3963eSbellard     return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
187*33e3963eSbellard }
188*33e3963eSbellard 
189*33e3963eSbellard 
190*33e3963eSbellard /* Return true if first block has been changed (ie. current version is
191*33e3963eSbellard  * in COW file).  Set the number of continuous blocks for which that
192*33e3963eSbellard  * is true. */
193*33e3963eSbellard static int is_changed(uint8_t *bitmap,
194*33e3963eSbellard                       int64_t sector_num, int nb_sectors,
195*33e3963eSbellard                       int *num_same)
196*33e3963eSbellard {
197*33e3963eSbellard     int changed;
198*33e3963eSbellard 
199*33e3963eSbellard     if (!bitmap || nb_sectors == 0) {
200*33e3963eSbellard 	*num_same = nb_sectors;
201*33e3963eSbellard 	return 0;
202*33e3963eSbellard     }
203*33e3963eSbellard 
204*33e3963eSbellard     changed = is_bit_set(bitmap, sector_num);
205*33e3963eSbellard     for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
206*33e3963eSbellard 	if (is_bit_set(bitmap, sector_num + *num_same) != changed)
207*33e3963eSbellard 	    break;
208*33e3963eSbellard     }
209*33e3963eSbellard 
210*33e3963eSbellard     return changed;
211*33e3963eSbellard }
212*33e3963eSbellard 
213*33e3963eSbellard /* commit COW file into the raw image */
214*33e3963eSbellard int bdrv_commit(BlockDriverState *bs)
215*33e3963eSbellard {
216*33e3963eSbellard     int64_t i;
217*33e3963eSbellard     uint8_t *cow_bitmap;
218*33e3963eSbellard 
219*33e3963eSbellard     if (!bs->cow_bitmap) {
220*33e3963eSbellard 	fprintf(stderr, "Already committed to %s\n", bs->filename);
221*33e3963eSbellard 	return 0;
222*33e3963eSbellard     }
223*33e3963eSbellard 
224*33e3963eSbellard     if (bs->read_only) {
225*33e3963eSbellard 	fprintf(stderr, "Can't commit to %s: read-only\n", bs->filename);
226*33e3963eSbellard 	return -1;
227*33e3963eSbellard     }
228*33e3963eSbellard 
229*33e3963eSbellard     cow_bitmap = bs->cow_bitmap;
230*33e3963eSbellard     for (i = 0; i < bs->total_sectors; i++) {
231*33e3963eSbellard 	if (is_bit_set(cow_bitmap, i)) {
232*33e3963eSbellard 	    unsigned char sector[512];
233*33e3963eSbellard 	    if (bdrv_read(bs, i, sector, 1) != 0) {
234*33e3963eSbellard 		fprintf(stderr, "Error reading sector %lli: aborting commit\n",
235*33e3963eSbellard 			(long long)i);
236*33e3963eSbellard 		return -1;
237*33e3963eSbellard 	    }
238*33e3963eSbellard 
239*33e3963eSbellard 	    /* Make bdrv_write write to real file for a moment. */
240*33e3963eSbellard 	    bs->cow_bitmap = NULL;
241*33e3963eSbellard 	    if (bdrv_write(bs, i, sector, 1) != 0) {
242*33e3963eSbellard 		fprintf(stderr, "Error writing sector %lli: aborting commit\n",
243*33e3963eSbellard 			(long long)i);
244*33e3963eSbellard 		bs->cow_bitmap = cow_bitmap;
245*33e3963eSbellard 		return -1;
246*33e3963eSbellard 	    }
247*33e3963eSbellard 	    bs->cow_bitmap = cow_bitmap;
248*33e3963eSbellard 	}
249*33e3963eSbellard     }
250*33e3963eSbellard     fprintf(stderr, "Committed snapshot to %s\n", bs->filename);
251*33e3963eSbellard     return 0;
252*33e3963eSbellard }
253*33e3963eSbellard 
254fc01f7e7Sbellard /* return -1 if error */
255fc01f7e7Sbellard int bdrv_read(BlockDriverState *bs, int64_t sector_num,
256fc01f7e7Sbellard               uint8_t *buf, int nb_sectors)
257fc01f7e7Sbellard {
258*33e3963eSbellard     int ret, n, fd;
259*33e3963eSbellard     int64_t offset;
260fc01f7e7Sbellard 
261*33e3963eSbellard     while (nb_sectors > 0) {
262*33e3963eSbellard         if (is_changed(bs->cow_bitmap, sector_num, nb_sectors, &n)) {
263*33e3963eSbellard             fd = bs->cow_fd;
264*33e3963eSbellard             offset = bs->cow_sectors_offset;
265*33e3963eSbellard         } else {
266*33e3963eSbellard             fd = bs->fd;
267*33e3963eSbellard             offset = 0;
268*33e3963eSbellard         }
269*33e3963eSbellard 
270*33e3963eSbellard         if (fd < 0) {
271*33e3963eSbellard             /* no file, just return empty sectors */
272*33e3963eSbellard             memset(buf, 0, n * 512);
273*33e3963eSbellard         } else {
274*33e3963eSbellard             offset += sector_num * 512;
275*33e3963eSbellard             lseek64(fd, offset, SEEK_SET);
276*33e3963eSbellard             ret = read(fd, buf, n * 512);
277*33e3963eSbellard             if (ret != n * 512) {
278fc01f7e7Sbellard                 return -1;
279*33e3963eSbellard             }
280*33e3963eSbellard         }
281*33e3963eSbellard         nb_sectors -= n;
282*33e3963eSbellard         sector_num += n;
283*33e3963eSbellard         buf += n * 512;
284*33e3963eSbellard     }
285fc01f7e7Sbellard     return 0;
286fc01f7e7Sbellard }
287fc01f7e7Sbellard 
288fc01f7e7Sbellard /* return -1 if error */
289fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num,
290fc01f7e7Sbellard                const uint8_t *buf, int nb_sectors)
291fc01f7e7Sbellard {
292*33e3963eSbellard     int ret, fd, i;
293*33e3963eSbellard     int64_t offset, retl;
294fc01f7e7Sbellard 
2950849bf08Sbellard     if (bs->read_only)
2960849bf08Sbellard         return -1;
2970849bf08Sbellard 
298*33e3963eSbellard     if (bs->cow_bitmap) {
299*33e3963eSbellard         fd = bs->cow_fd;
300*33e3963eSbellard         offset = bs->cow_sectors_offset;
301*33e3963eSbellard     } else {
302*33e3963eSbellard         fd = bs->fd;
303*33e3963eSbellard         offset = 0;
304*33e3963eSbellard     }
305*33e3963eSbellard 
306*33e3963eSbellard     offset += sector_num * 512;
307*33e3963eSbellard     retl = lseek64(fd, offset, SEEK_SET);
308*33e3963eSbellard     if (retl == -1) {
309fc01f7e7Sbellard         return -1;
310*33e3963eSbellard     }
311*33e3963eSbellard     ret = write(fd, buf, nb_sectors * 512);
312*33e3963eSbellard     if (ret != nb_sectors * 512) {
313*33e3963eSbellard         return -1;
314*33e3963eSbellard     }
315*33e3963eSbellard 
316*33e3963eSbellard     if (bs->cow_bitmap) {
317*33e3963eSbellard 	for (i = 0; i < nb_sectors; i++)
318*33e3963eSbellard 	    set_bit(bs->cow_bitmap, sector_num + i);
319*33e3963eSbellard     }
320fc01f7e7Sbellard     return 0;
321fc01f7e7Sbellard }
322fc01f7e7Sbellard 
323fc01f7e7Sbellard void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr)
324fc01f7e7Sbellard {
325fc01f7e7Sbellard     *nb_sectors_ptr = bs->total_sectors;
326fc01f7e7Sbellard }
327