xref: /openbmc/qemu/block/qcow2-snapshot.c (revision 94c3db85)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu-common.h"
26 #include "block_int.h"
27 #include "block/qcow2.h"
28 
29 typedef struct QEMU_PACKED QCowSnapshotHeader {
30     /* header is 8 byte aligned */
31     uint64_t l1_table_offset;
32 
33     uint32_t l1_size;
34     uint16_t id_str_size;
35     uint16_t name_size;
36 
37     uint32_t date_sec;
38     uint32_t date_nsec;
39 
40     uint64_t vm_clock_nsec;
41 
42     uint32_t vm_state_size;
43     uint32_t extra_data_size; /* for extension */
44     /* extra data follows */
45     /* id_str follows */
46     /* name follows  */
47 } QCowSnapshotHeader;
48 
49 typedef struct QEMU_PACKED QCowSnapshotExtraData {
50     uint64_t vm_state_size_large;
51     uint64_t disk_size;
52 } QCowSnapshotExtraData;
53 
54 void qcow2_free_snapshots(BlockDriverState *bs)
55 {
56     BDRVQcowState *s = bs->opaque;
57     int i;
58 
59     for(i = 0; i < s->nb_snapshots; i++) {
60         g_free(s->snapshots[i].name);
61         g_free(s->snapshots[i].id_str);
62     }
63     g_free(s->snapshots);
64     s->snapshots = NULL;
65     s->nb_snapshots = 0;
66 }
67 
68 int qcow2_read_snapshots(BlockDriverState *bs)
69 {
70     BDRVQcowState *s = bs->opaque;
71     QCowSnapshotHeader h;
72     QCowSnapshotExtraData extra;
73     QCowSnapshot *sn;
74     int i, id_str_size, name_size;
75     int64_t offset;
76     uint32_t extra_data_size;
77     int ret;
78 
79     if (!s->nb_snapshots) {
80         s->snapshots = NULL;
81         s->snapshots_size = 0;
82         return 0;
83     }
84 
85     offset = s->snapshots_offset;
86     s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
87 
88     for(i = 0; i < s->nb_snapshots; i++) {
89         /* Read statically sized part of the snapshot header */
90         offset = align_offset(offset, 8);
91         ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
92         if (ret < 0) {
93             goto fail;
94         }
95 
96         offset += sizeof(h);
97         sn = s->snapshots + i;
98         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
99         sn->l1_size = be32_to_cpu(h.l1_size);
100         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
101         sn->date_sec = be32_to_cpu(h.date_sec);
102         sn->date_nsec = be32_to_cpu(h.date_nsec);
103         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
104         extra_data_size = be32_to_cpu(h.extra_data_size);
105 
106         id_str_size = be16_to_cpu(h.id_str_size);
107         name_size = be16_to_cpu(h.name_size);
108 
109         /* Read extra data */
110         ret = bdrv_pread(bs->file, offset, &extra,
111                          MIN(sizeof(extra), extra_data_size));
112         if (ret < 0) {
113             goto fail;
114         }
115         offset += extra_data_size;
116 
117         if (extra_data_size >= 8) {
118             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
119         }
120 
121         if (extra_data_size >= 16) {
122             sn->disk_size = be64_to_cpu(extra.disk_size);
123         } else {
124             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
125         }
126 
127         /* Read snapshot ID */
128         sn->id_str = g_malloc(id_str_size + 1);
129         ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
130         if (ret < 0) {
131             goto fail;
132         }
133         offset += id_str_size;
134         sn->id_str[id_str_size] = '\0';
135 
136         /* Read snapshot name */
137         sn->name = g_malloc(name_size + 1);
138         ret = bdrv_pread(bs->file, offset, sn->name, name_size);
139         if (ret < 0) {
140             goto fail;
141         }
142         offset += name_size;
143         sn->name[name_size] = '\0';
144     }
145 
146     s->snapshots_size = offset - s->snapshots_offset;
147     return 0;
148 
149 fail:
150     qcow2_free_snapshots(bs);
151     return ret;
152 }
153 
154 /* add at the end of the file a new list of snapshots */
155 static int qcow2_write_snapshots(BlockDriverState *bs)
156 {
157     BDRVQcowState *s = bs->opaque;
158     QCowSnapshot *sn;
159     QCowSnapshotHeader h;
160     QCowSnapshotExtraData extra;
161     int i, name_size, id_str_size, snapshots_size;
162     struct {
163         uint32_t nb_snapshots;
164         uint64_t snapshots_offset;
165     } QEMU_PACKED header_data;
166     int64_t offset, snapshots_offset;
167     int ret;
168 
169     /* compute the size of the snapshots */
170     offset = 0;
171     for(i = 0; i < s->nb_snapshots; i++) {
172         sn = s->snapshots + i;
173         offset = align_offset(offset, 8);
174         offset += sizeof(h);
175         offset += sizeof(extra);
176         offset += strlen(sn->id_str);
177         offset += strlen(sn->name);
178     }
179     snapshots_size = offset;
180 
181     /* Allocate space for the new snapshot list */
182     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
183     bdrv_flush(bs->file);
184     offset = snapshots_offset;
185     if (offset < 0) {
186         return offset;
187     }
188 
189     /* Write all snapshots to the new list */
190     for(i = 0; i < s->nb_snapshots; i++) {
191         sn = s->snapshots + i;
192         memset(&h, 0, sizeof(h));
193         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
194         h.l1_size = cpu_to_be32(sn->l1_size);
195         /* If it doesn't fit in 32 bit, older implementations should treat it
196          * as a disk-only snapshot rather than truncate the VM state */
197         if (sn->vm_state_size <= 0xffffffff) {
198             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
199         }
200         h.date_sec = cpu_to_be32(sn->date_sec);
201         h.date_nsec = cpu_to_be32(sn->date_nsec);
202         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
203         h.extra_data_size = cpu_to_be32(sizeof(extra));
204 
205         memset(&extra, 0, sizeof(extra));
206         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
207         extra.disk_size = cpu_to_be64(sn->disk_size);
208 
209         id_str_size = strlen(sn->id_str);
210         name_size = strlen(sn->name);
211         h.id_str_size = cpu_to_be16(id_str_size);
212         h.name_size = cpu_to_be16(name_size);
213         offset = align_offset(offset, 8);
214 
215         ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
216         if (ret < 0) {
217             goto fail;
218         }
219         offset += sizeof(h);
220 
221         ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
222         if (ret < 0) {
223             goto fail;
224         }
225         offset += sizeof(extra);
226 
227         ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
228         if (ret < 0) {
229             goto fail;
230         }
231         offset += id_str_size;
232 
233         ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
234         if (ret < 0) {
235             goto fail;
236         }
237         offset += name_size;
238     }
239 
240     /*
241      * Update the header to point to the new snapshot table. This requires the
242      * new table and its refcounts to be stable on disk.
243      */
244     ret = bdrv_flush(bs);
245     if (ret < 0) {
246         goto fail;
247     }
248 
249     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
250         offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
251 
252     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
253     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
254 
255     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
256                            &header_data, sizeof(header_data));
257     if (ret < 0) {
258         goto fail;
259     }
260 
261     /* free the old snapshot table */
262     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
263     s->snapshots_offset = snapshots_offset;
264     s->snapshots_size = snapshots_size;
265     return 0;
266 
267 fail:
268     return ret;
269 }
270 
271 static void find_new_snapshot_id(BlockDriverState *bs,
272                                  char *id_str, int id_str_size)
273 {
274     BDRVQcowState *s = bs->opaque;
275     QCowSnapshot *sn;
276     int i, id, id_max = 0;
277 
278     for(i = 0; i < s->nb_snapshots; i++) {
279         sn = s->snapshots + i;
280         id = strtoul(sn->id_str, NULL, 10);
281         if (id > id_max)
282             id_max = id;
283     }
284     snprintf(id_str, id_str_size, "%d", id_max + 1);
285 }
286 
287 static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
288 {
289     BDRVQcowState *s = bs->opaque;
290     int i;
291 
292     for(i = 0; i < s->nb_snapshots; i++) {
293         if (!strcmp(s->snapshots[i].id_str, id_str))
294             return i;
295     }
296     return -1;
297 }
298 
299 static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
300 {
301     BDRVQcowState *s = bs->opaque;
302     int i, ret;
303 
304     ret = find_snapshot_by_id(bs, name);
305     if (ret >= 0)
306         return ret;
307     for(i = 0; i < s->nb_snapshots; i++) {
308         if (!strcmp(s->snapshots[i].name, name))
309             return i;
310     }
311     return -1;
312 }
313 
314 /* if no id is provided, a new one is constructed */
315 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
316 {
317     BDRVQcowState *s = bs->opaque;
318     QCowSnapshot *new_snapshot_list = NULL;
319     QCowSnapshot *old_snapshot_list = NULL;
320     QCowSnapshot sn1, *sn = &sn1;
321     int i, ret;
322     uint64_t *l1_table = NULL;
323     int64_t l1_table_offset;
324 
325     memset(sn, 0, sizeof(*sn));
326 
327     /* Generate an ID if it wasn't passed */
328     if (sn_info->id_str[0] == '\0') {
329         find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
330     }
331 
332     /* Check that the ID is unique */
333     if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
334         return -EEXIST;
335     }
336 
337     /* Populate sn with passed data */
338     sn->id_str = g_strdup(sn_info->id_str);
339     sn->name = g_strdup(sn_info->name);
340 
341     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
342     sn->vm_state_size = sn_info->vm_state_size;
343     sn->date_sec = sn_info->date_sec;
344     sn->date_nsec = sn_info->date_nsec;
345     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
346 
347     /* Allocate the L1 table of the snapshot and copy the current one there. */
348     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
349     if (l1_table_offset < 0) {
350         ret = l1_table_offset;
351         goto fail;
352     }
353 
354     sn->l1_table_offset = l1_table_offset;
355     sn->l1_size = s->l1_size;
356 
357     l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
358     for(i = 0; i < s->l1_size; i++) {
359         l1_table[i] = cpu_to_be64(s->l1_table[i]);
360     }
361 
362     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
363                       s->l1_size * sizeof(uint64_t));
364     if (ret < 0) {
365         goto fail;
366     }
367 
368     g_free(l1_table);
369     l1_table = NULL;
370 
371     /*
372      * Increase the refcounts of all clusters and make sure everything is
373      * stable on disk before updating the snapshot table to contain a pointer
374      * to the new L1 table.
375      */
376     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
377     if (ret < 0) {
378         goto fail;
379     }
380 
381     ret = bdrv_flush(bs);
382     if (ret < 0) {
383         goto fail;
384     }
385 
386     /* Append the new snapshot to the snapshot list */
387     new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
388     if (s->snapshots) {
389         memcpy(new_snapshot_list, s->snapshots,
390                s->nb_snapshots * sizeof(QCowSnapshot));
391         old_snapshot_list = s->snapshots;
392     }
393     s->snapshots = new_snapshot_list;
394     s->snapshots[s->nb_snapshots++] = *sn;
395 
396     ret = qcow2_write_snapshots(bs);
397     if (ret < 0) {
398         g_free(s->snapshots);
399         s->snapshots = old_snapshot_list;
400         goto fail;
401     }
402 
403     g_free(old_snapshot_list);
404 
405 #ifdef DEBUG_ALLOC
406     {
407       BdrvCheckResult result = {0};
408       qcow2_check_refcounts(bs, &result, 0);
409     }
410 #endif
411     return 0;
412 
413 fail:
414     g_free(sn->id_str);
415     g_free(sn->name);
416     g_free(l1_table);
417 
418     return ret;
419 }
420 
421 /* copy the snapshot 'snapshot_name' into the current disk image */
422 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
423 {
424     BDRVQcowState *s = bs->opaque;
425     QCowSnapshot *sn;
426     int i, snapshot_index;
427     int cur_l1_bytes, sn_l1_bytes;
428     int ret;
429     uint64_t *sn_l1_table = NULL;
430 
431     /* Search the snapshot */
432     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
433     if (snapshot_index < 0) {
434         return -ENOENT;
435     }
436     sn = &s->snapshots[snapshot_index];
437 
438     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
439         error_report("qcow2: Loading snapshots with different disk "
440             "size is not implemented");
441         ret = -ENOTSUP;
442         goto fail;
443     }
444 
445     /*
446      * Make sure that the current L1 table is big enough to contain the whole
447      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
448      * current one must be padded with zeros.
449      */
450     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
451     if (ret < 0) {
452         goto fail;
453     }
454 
455     cur_l1_bytes = s->l1_size * sizeof(uint64_t);
456     sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
457 
458     /*
459      * Copy the snapshot L1 table to the current L1 table.
460      *
461      * Before overwriting the old current L1 table on disk, make sure to
462      * increase all refcounts for the clusters referenced by the new one.
463      * Decrease the refcount referenced by the old one only when the L1
464      * table is overwritten.
465      */
466     sn_l1_table = g_malloc0(cur_l1_bytes);
467 
468     ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
469     if (ret < 0) {
470         goto fail;
471     }
472 
473     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
474                                          sn->l1_size, 1);
475     if (ret < 0) {
476         goto fail;
477     }
478 
479     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
480                            cur_l1_bytes);
481     if (ret < 0) {
482         goto fail;
483     }
484 
485     /*
486      * Decrease refcount of clusters of current L1 table.
487      *
488      * At this point, the in-memory s->l1_table points to the old L1 table,
489      * whereas on disk we already have the new one.
490      *
491      * qcow2_update_snapshot_refcount special cases the current L1 table to use
492      * the in-memory data instead of really using the offset to load a new one,
493      * which is why this works.
494      */
495     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
496                                          s->l1_size, -1);
497 
498     /*
499      * Now update the in-memory L1 table to be in sync with the on-disk one. We
500      * need to do this even if updating refcounts failed.
501      */
502     for(i = 0;i < s->l1_size; i++) {
503         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
504     }
505 
506     if (ret < 0) {
507         goto fail;
508     }
509 
510     g_free(sn_l1_table);
511     sn_l1_table = NULL;
512 
513     /*
514      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
515      * when we decreased the refcount of the old snapshot.
516      */
517     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
518     if (ret < 0) {
519         goto fail;
520     }
521 
522 #ifdef DEBUG_ALLOC
523     {
524         BdrvCheckResult result = {0};
525         qcow2_check_refcounts(bs, &result, 0);
526     }
527 #endif
528     return 0;
529 
530 fail:
531     g_free(sn_l1_table);
532     return ret;
533 }
534 
535 int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
536 {
537     BDRVQcowState *s = bs->opaque;
538     QCowSnapshot sn;
539     int snapshot_index, ret;
540 
541     /* Search the snapshot */
542     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
543     if (snapshot_index < 0) {
544         return -ENOENT;
545     }
546     sn = s->snapshots[snapshot_index];
547 
548     /* Remove it from the snapshot list */
549     memmove(s->snapshots + snapshot_index,
550             s->snapshots + snapshot_index + 1,
551             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
552     s->nb_snapshots--;
553     ret = qcow2_write_snapshots(bs);
554     if (ret < 0) {
555         return ret;
556     }
557 
558     /*
559      * The snapshot is now unused, clean up. If we fail after this point, we
560      * won't recover but just leak clusters.
561      */
562     g_free(sn.id_str);
563     g_free(sn.name);
564 
565     /*
566      * Now decrease the refcounts of clusters referenced by the snapshot and
567      * free the L1 table.
568      */
569     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
570                                          sn.l1_size, -1);
571     if (ret < 0) {
572         return ret;
573     }
574     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));
575 
576     /* must update the copied flag on the current cluster offsets */
577     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
578     if (ret < 0) {
579         return ret;
580     }
581 
582 #ifdef DEBUG_ALLOC
583     {
584         BdrvCheckResult result = {0};
585         qcow2_check_refcounts(bs, &result, 0);
586     }
587 #endif
588     return 0;
589 }
590 
591 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
592 {
593     BDRVQcowState *s = bs->opaque;
594     QEMUSnapshotInfo *sn_tab, *sn_info;
595     QCowSnapshot *sn;
596     int i;
597 
598     if (!s->nb_snapshots) {
599         *psn_tab = NULL;
600         return s->nb_snapshots;
601     }
602 
603     sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
604     for(i = 0; i < s->nb_snapshots; i++) {
605         sn_info = sn_tab + i;
606         sn = s->snapshots + i;
607         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
608                 sn->id_str);
609         pstrcpy(sn_info->name, sizeof(sn_info->name),
610                 sn->name);
611         sn_info->vm_state_size = sn->vm_state_size;
612         sn_info->date_sec = sn->date_sec;
613         sn_info->date_nsec = sn->date_nsec;
614         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
615     }
616     *psn_tab = sn_tab;
617     return s->nb_snapshots;
618 }
619 
620 int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
621 {
622     int i, snapshot_index;
623     BDRVQcowState *s = bs->opaque;
624     QCowSnapshot *sn;
625     uint64_t *new_l1_table;
626     int new_l1_bytes;
627     int ret;
628 
629     assert(bs->read_only);
630 
631     /* Search the snapshot */
632     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
633     if (snapshot_index < 0) {
634         return -ENOENT;
635     }
636     sn = &s->snapshots[snapshot_index];
637 
638     /* Allocate and read in the snapshot's L1 table */
639     new_l1_bytes = s->l1_size * sizeof(uint64_t);
640     new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
641 
642     ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
643     if (ret < 0) {
644         g_free(new_l1_table);
645         return ret;
646     }
647 
648     /* Switch the L1 table */
649     g_free(s->l1_table);
650 
651     s->l1_size = sn->l1_size;
652     s->l1_table_offset = sn->l1_table_offset;
653     s->l1_table = new_l1_table;
654 
655     for(i = 0;i < s->l1_size; i++) {
656         be64_to_cpus(&s->l1_table[i]);
657     }
658 
659     return 0;
660 }
661