xref: /openbmc/qemu/block/qcow2-snapshot.c (revision 41bd3603)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu-common.h"
26 #include "block_int.h"
27 #include "block/qcow2.h"
28 
29 typedef struct QEMU_PACKED QCowSnapshotHeader {
30     /* header is 8 byte aligned */
31     uint64_t l1_table_offset;
32 
33     uint32_t l1_size;
34     uint16_t id_str_size;
35     uint16_t name_size;
36 
37     uint32_t date_sec;
38     uint32_t date_nsec;
39 
40     uint64_t vm_clock_nsec;
41 
42     uint32_t vm_state_size;
43     uint32_t extra_data_size; /* for extension */
44     /* extra data follows */
45     /* id_str follows */
46     /* name follows  */
47 } QCowSnapshotHeader;
48 
49 typedef struct QEMU_PACKED QCowSnapshotExtraData {
50     uint64_t vm_state_size_large;
51 } QCowSnapshotExtraData;
52 
53 void qcow2_free_snapshots(BlockDriverState *bs)
54 {
55     BDRVQcowState *s = bs->opaque;
56     int i;
57 
58     for(i = 0; i < s->nb_snapshots; i++) {
59         g_free(s->snapshots[i].name);
60         g_free(s->snapshots[i].id_str);
61     }
62     g_free(s->snapshots);
63     s->snapshots = NULL;
64     s->nb_snapshots = 0;
65 }
66 
67 int qcow2_read_snapshots(BlockDriverState *bs)
68 {
69     BDRVQcowState *s = bs->opaque;
70     QCowSnapshotHeader h;
71     QCowSnapshotExtraData extra;
72     QCowSnapshot *sn;
73     int i, id_str_size, name_size;
74     int64_t offset;
75     uint32_t extra_data_size;
76     int ret;
77 
78     if (!s->nb_snapshots) {
79         s->snapshots = NULL;
80         s->snapshots_size = 0;
81         return 0;
82     }
83 
84     offset = s->snapshots_offset;
85     s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
86 
87     for(i = 0; i < s->nb_snapshots; i++) {
88         /* Read statically sized part of the snapshot header */
89         offset = align_offset(offset, 8);
90         ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
91         if (ret < 0) {
92             goto fail;
93         }
94 
95         offset += sizeof(h);
96         sn = s->snapshots + i;
97         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
98         sn->l1_size = be32_to_cpu(h.l1_size);
99         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
100         sn->date_sec = be32_to_cpu(h.date_sec);
101         sn->date_nsec = be32_to_cpu(h.date_nsec);
102         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
103         extra_data_size = be32_to_cpu(h.extra_data_size);
104 
105         id_str_size = be16_to_cpu(h.id_str_size);
106         name_size = be16_to_cpu(h.name_size);
107 
108         /* Read extra data */
109         ret = bdrv_pread(bs->file, offset, &extra,
110                          MIN(sizeof(extra), extra_data_size));
111         if (ret < 0) {
112             goto fail;
113         }
114         offset += extra_data_size;
115 
116         if (extra_data_size >= 8) {
117             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
118         }
119 
120         /* Read snapshot ID */
121         sn->id_str = g_malloc(id_str_size + 1);
122         ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
123         if (ret < 0) {
124             goto fail;
125         }
126         offset += id_str_size;
127         sn->id_str[id_str_size] = '\0';
128 
129         /* Read snapshot name */
130         sn->name = g_malloc(name_size + 1);
131         ret = bdrv_pread(bs->file, offset, sn->name, name_size);
132         if (ret < 0) {
133             goto fail;
134         }
135         offset += name_size;
136         sn->name[name_size] = '\0';
137     }
138 
139     s->snapshots_size = offset - s->snapshots_offset;
140     return 0;
141 
142 fail:
143     qcow2_free_snapshots(bs);
144     return ret;
145 }
146 
147 /* add at the end of the file a new list of snapshots */
148 static int qcow2_write_snapshots(BlockDriverState *bs)
149 {
150     BDRVQcowState *s = bs->opaque;
151     QCowSnapshot *sn;
152     QCowSnapshotHeader h;
153     QCowSnapshotExtraData extra;
154     int i, name_size, id_str_size, snapshots_size;
155     struct {
156         uint32_t nb_snapshots;
157         uint64_t snapshots_offset;
158     } QEMU_PACKED header_data;
159     int64_t offset, snapshots_offset;
160     int ret;
161 
162     /* compute the size of the snapshots */
163     offset = 0;
164     for(i = 0; i < s->nb_snapshots; i++) {
165         sn = s->snapshots + i;
166         offset = align_offset(offset, 8);
167         offset += sizeof(h);
168         offset += sizeof(extra);
169         offset += strlen(sn->id_str);
170         offset += strlen(sn->name);
171     }
172     snapshots_size = offset;
173 
174     /* Allocate space for the new snapshot list */
175     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
176     bdrv_flush(bs->file);
177     offset = snapshots_offset;
178     if (offset < 0) {
179         return offset;
180     }
181 
182     /* Write all snapshots to the new list */
183     for(i = 0; i < s->nb_snapshots; i++) {
184         sn = s->snapshots + i;
185         memset(&h, 0, sizeof(h));
186         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
187         h.l1_size = cpu_to_be32(sn->l1_size);
188         /* If it doesn't fit in 32 bit, older implementations should treat it
189          * as a disk-only snapshot rather than truncate the VM state */
190         if (sn->vm_state_size <= 0xffffffff) {
191             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
192         }
193         h.date_sec = cpu_to_be32(sn->date_sec);
194         h.date_nsec = cpu_to_be32(sn->date_nsec);
195         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
196         h.extra_data_size = cpu_to_be32(sizeof(extra));
197 
198         memset(&extra, 0, sizeof(extra));
199         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
200 
201         id_str_size = strlen(sn->id_str);
202         name_size = strlen(sn->name);
203         h.id_str_size = cpu_to_be16(id_str_size);
204         h.name_size = cpu_to_be16(name_size);
205         offset = align_offset(offset, 8);
206 
207         ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
208         if (ret < 0) {
209             goto fail;
210         }
211         offset += sizeof(h);
212 
213         ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
214         if (ret < 0) {
215             goto fail;
216         }
217         offset += sizeof(extra);
218 
219         ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
220         if (ret < 0) {
221             goto fail;
222         }
223         offset += id_str_size;
224 
225         ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
226         if (ret < 0) {
227             goto fail;
228         }
229         offset += name_size;
230     }
231 
232     /*
233      * Update the header to point to the new snapshot table. This requires the
234      * new table and its refcounts to be stable on disk.
235      */
236     ret = bdrv_flush(bs);
237     if (ret < 0) {
238         goto fail;
239     }
240 
241     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
242         offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
243 
244     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
245     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
246 
247     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
248                            &header_data, sizeof(header_data));
249     if (ret < 0) {
250         goto fail;
251     }
252 
253     /* free the old snapshot table */
254     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
255     s->snapshots_offset = snapshots_offset;
256     s->snapshots_size = snapshots_size;
257     return 0;
258 
259 fail:
260     return ret;
261 }
262 
263 static void find_new_snapshot_id(BlockDriverState *bs,
264                                  char *id_str, int id_str_size)
265 {
266     BDRVQcowState *s = bs->opaque;
267     QCowSnapshot *sn;
268     int i, id, id_max = 0;
269 
270     for(i = 0; i < s->nb_snapshots; i++) {
271         sn = s->snapshots + i;
272         id = strtoul(sn->id_str, NULL, 10);
273         if (id > id_max)
274             id_max = id;
275     }
276     snprintf(id_str, id_str_size, "%d", id_max + 1);
277 }
278 
279 static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
280 {
281     BDRVQcowState *s = bs->opaque;
282     int i;
283 
284     for(i = 0; i < s->nb_snapshots; i++) {
285         if (!strcmp(s->snapshots[i].id_str, id_str))
286             return i;
287     }
288     return -1;
289 }
290 
291 static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
292 {
293     BDRVQcowState *s = bs->opaque;
294     int i, ret;
295 
296     ret = find_snapshot_by_id(bs, name);
297     if (ret >= 0)
298         return ret;
299     for(i = 0; i < s->nb_snapshots; i++) {
300         if (!strcmp(s->snapshots[i].name, name))
301             return i;
302     }
303     return -1;
304 }
305 
306 /* if no id is provided, a new one is constructed */
307 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
308 {
309     BDRVQcowState *s = bs->opaque;
310     QCowSnapshot *new_snapshot_list = NULL;
311     QCowSnapshot *old_snapshot_list = NULL;
312     QCowSnapshot sn1, *sn = &sn1;
313     int i, ret;
314     uint64_t *l1_table = NULL;
315     int64_t l1_table_offset;
316 
317     memset(sn, 0, sizeof(*sn));
318 
319     /* Generate an ID if it wasn't passed */
320     if (sn_info->id_str[0] == '\0') {
321         find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
322     }
323 
324     /* Check that the ID is unique */
325     if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
326         return -ENOENT;
327     }
328 
329     /* Populate sn with passed data */
330     sn->id_str = g_strdup(sn_info->id_str);
331     sn->name = g_strdup(sn_info->name);
332 
333     sn->vm_state_size = sn_info->vm_state_size;
334     sn->date_sec = sn_info->date_sec;
335     sn->date_nsec = sn_info->date_nsec;
336     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
337 
338     /* Allocate the L1 table of the snapshot and copy the current one there. */
339     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
340     if (l1_table_offset < 0) {
341         ret = l1_table_offset;
342         goto fail;
343     }
344 
345     sn->l1_table_offset = l1_table_offset;
346     sn->l1_size = s->l1_size;
347 
348     l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
349     for(i = 0; i < s->l1_size; i++) {
350         l1_table[i] = cpu_to_be64(s->l1_table[i]);
351     }
352 
353     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
354                       s->l1_size * sizeof(uint64_t));
355     if (ret < 0) {
356         goto fail;
357     }
358 
359     g_free(l1_table);
360     l1_table = NULL;
361 
362     /*
363      * Increase the refcounts of all clusters and make sure everything is
364      * stable on disk before updating the snapshot table to contain a pointer
365      * to the new L1 table.
366      */
367     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
368     if (ret < 0) {
369         goto fail;
370     }
371 
372     ret = bdrv_flush(bs);
373     if (ret < 0) {
374         goto fail;
375     }
376 
377     /* Append the new snapshot to the snapshot list */
378     new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
379     if (s->snapshots) {
380         memcpy(new_snapshot_list, s->snapshots,
381                s->nb_snapshots * sizeof(QCowSnapshot));
382         old_snapshot_list = s->snapshots;
383     }
384     s->snapshots = new_snapshot_list;
385     s->snapshots[s->nb_snapshots++] = *sn;
386 
387     ret = qcow2_write_snapshots(bs);
388     if (ret < 0) {
389         g_free(s->snapshots);
390         s->snapshots = old_snapshot_list;
391         goto fail;
392     }
393 
394     g_free(old_snapshot_list);
395 
396 #ifdef DEBUG_ALLOC
397     {
398       BdrvCheckResult result = {0};
399       qcow2_check_refcounts(bs, &result);
400     }
401 #endif
402     return 0;
403 
404 fail:
405     g_free(sn->id_str);
406     g_free(sn->name);
407     g_free(l1_table);
408 
409     return ret;
410 }
411 
412 /* copy the snapshot 'snapshot_name' into the current disk image */
413 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
414 {
415     BDRVQcowState *s = bs->opaque;
416     QCowSnapshot *sn;
417     int i, snapshot_index;
418     int cur_l1_bytes, sn_l1_bytes;
419     int ret;
420     uint64_t *sn_l1_table = NULL;
421 
422     /* Search the snapshot */
423     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
424     if (snapshot_index < 0) {
425         return -ENOENT;
426     }
427     sn = &s->snapshots[snapshot_index];
428 
429     /*
430      * Make sure that the current L1 table is big enough to contain the whole
431      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
432      * current one must be padded with zeros.
433      */
434     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
435     if (ret < 0) {
436         goto fail;
437     }
438 
439     cur_l1_bytes = s->l1_size * sizeof(uint64_t);
440     sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
441 
442     /*
443      * Copy the snapshot L1 table to the current L1 table.
444      *
445      * Before overwriting the old current L1 table on disk, make sure to
446      * increase all refcounts for the clusters referenced by the new one.
447      * Decrease the refcount referenced by the old one only when the L1
448      * table is overwritten.
449      */
450     sn_l1_table = g_malloc0(cur_l1_bytes);
451 
452     ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
453     if (ret < 0) {
454         goto fail;
455     }
456 
457     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
458                                          sn->l1_size, 1);
459     if (ret < 0) {
460         goto fail;
461     }
462 
463     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
464                            cur_l1_bytes);
465     if (ret < 0) {
466         goto fail;
467     }
468 
469     /*
470      * Decrease refcount of clusters of current L1 table.
471      *
472      * At this point, the in-memory s->l1_table points to the old L1 table,
473      * whereas on disk we already have the new one.
474      *
475      * qcow2_update_snapshot_refcount special cases the current L1 table to use
476      * the in-memory data instead of really using the offset to load a new one,
477      * which is why this works.
478      */
479     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
480                                          s->l1_size, -1);
481 
482     /*
483      * Now update the in-memory L1 table to be in sync with the on-disk one. We
484      * need to do this even if updating refcounts failed.
485      */
486     for(i = 0;i < s->l1_size; i++) {
487         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
488     }
489 
490     if (ret < 0) {
491         goto fail;
492     }
493 
494     g_free(sn_l1_table);
495     sn_l1_table = NULL;
496 
497     /*
498      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
499      * when we decreased the refcount of the old snapshot.
500      */
501     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
502     if (ret < 0) {
503         goto fail;
504     }
505 
506 #ifdef DEBUG_ALLOC
507     {
508         BdrvCheckResult result = {0};
509         qcow2_check_refcounts(bs, &result);
510     }
511 #endif
512     return 0;
513 
514 fail:
515     g_free(sn_l1_table);
516     return ret;
517 }
518 
519 int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
520 {
521     BDRVQcowState *s = bs->opaque;
522     QCowSnapshot sn;
523     int snapshot_index, ret;
524 
525     /* Search the snapshot */
526     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
527     if (snapshot_index < 0) {
528         return -ENOENT;
529     }
530     sn = s->snapshots[snapshot_index];
531 
532     /* Remove it from the snapshot list */
533     memmove(s->snapshots + snapshot_index,
534             s->snapshots + snapshot_index + 1,
535             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
536     s->nb_snapshots--;
537     ret = qcow2_write_snapshots(bs);
538     if (ret < 0) {
539         return ret;
540     }
541 
542     /*
543      * The snapshot is now unused, clean up. If we fail after this point, we
544      * won't recover but just leak clusters.
545      */
546     g_free(sn.id_str);
547     g_free(sn.name);
548 
549     /*
550      * Now decrease the refcounts of clusters referenced by the snapshot and
551      * free the L1 table.
552      */
553     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
554                                          sn.l1_size, -1);
555     if (ret < 0) {
556         return ret;
557     }
558     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));
559 
560     /* must update the copied flag on the current cluster offsets */
561     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
562     if (ret < 0) {
563         return ret;
564     }
565 
566 #ifdef DEBUG_ALLOC
567     {
568         BdrvCheckResult result = {0};
569         qcow2_check_refcounts(bs, &result);
570     }
571 #endif
572     return 0;
573 }
574 
575 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
576 {
577     BDRVQcowState *s = bs->opaque;
578     QEMUSnapshotInfo *sn_tab, *sn_info;
579     QCowSnapshot *sn;
580     int i;
581 
582     if (!s->nb_snapshots) {
583         *psn_tab = NULL;
584         return s->nb_snapshots;
585     }
586 
587     sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
588     for(i = 0; i < s->nb_snapshots; i++) {
589         sn_info = sn_tab + i;
590         sn = s->snapshots + i;
591         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
592                 sn->id_str);
593         pstrcpy(sn_info->name, sizeof(sn_info->name),
594                 sn->name);
595         sn_info->vm_state_size = sn->vm_state_size;
596         sn_info->date_sec = sn->date_sec;
597         sn_info->date_nsec = sn->date_nsec;
598         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
599     }
600     *psn_tab = sn_tab;
601     return s->nb_snapshots;
602 }
603 
604 int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
605 {
606     int i, snapshot_index;
607     BDRVQcowState *s = bs->opaque;
608     QCowSnapshot *sn;
609     uint64_t *new_l1_table;
610     int new_l1_bytes;
611     int ret;
612 
613     assert(bs->read_only);
614 
615     /* Search the snapshot */
616     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
617     if (snapshot_index < 0) {
618         return -ENOENT;
619     }
620     sn = &s->snapshots[snapshot_index];
621 
622     /* Allocate and read in the snapshot's L1 table */
623     new_l1_bytes = s->l1_size * sizeof(uint64_t);
624     new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
625 
626     ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
627     if (ret < 0) {
628         g_free(new_l1_table);
629         return ret;
630     }
631 
632     /* Switch the L1 table */
633     g_free(s->l1_table);
634 
635     s->l1_size = sn->l1_size;
636     s->l1_table_offset = sn->l1_table_offset;
637     s->l1_table = new_l1_table;
638 
639     for(i = 0;i < s->l1_size; i++) {
640         be64_to_cpus(&s->l1_table[i]);
641     }
642 
643     return 0;
644 }
645