xref: /openbmc/qemu/block/qcow2-snapshot.c (revision 8ea75438)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "sysemu/block-backend.h"
27 #include "qapi/error.h"
28 #include "qcow2.h"
29 #include "qemu/bswap.h"
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 
33 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
34 {
35     BDRVQcow2State *s = bs->opaque;
36 
37     assert(i >= 0 && i < s->nb_snapshots);
38     g_free(s->snapshots[i].name);
39     g_free(s->snapshots[i].id_str);
40     g_free(s->snapshots[i].unknown_extra_data);
41     memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
42 }
43 
44 void qcow2_free_snapshots(BlockDriverState *bs)
45 {
46     BDRVQcow2State *s = bs->opaque;
47     int i;
48 
49     for(i = 0; i < s->nb_snapshots; i++) {
50         qcow2_free_single_snapshot(bs, i);
51     }
52     g_free(s->snapshots);
53     s->snapshots = NULL;
54     s->nb_snapshots = 0;
55 }
56 
57 /*
58  * If @repair is true, try to repair a broken snapshot table instead
59  * of just returning an error:
60  *
61  * - If the snapshot table was too long, set *nb_clusters_reduced to
62  *   the number of snapshots removed off the end.
63  *   The caller will update the on-disk nb_snapshots accordingly;
64  *   this leaks clusters, but is safe.
65  *   (The on-disk information must be updated before
66  *   qcow2_check_refcounts(), because that function relies on
67  *   s->nb_snapshots to reflect the on-disk value.)
68  *
69  * - If there were snapshots with too much extra metadata, increment
70  *   *extra_data_dropped for each.
71  *   This requires the caller to eventually rewrite the whole snapshot
72  *   table, which requires cluster allocation.  Therefore, this should
73  *   be done only after qcow2_check_refcounts() made sure the refcount
74  *   structures are valid.
75  *   (In the meantime, the image is still valid because
76  *   qcow2_check_refcounts() does not do anything with snapshots'
77  *   extra data.)
78  */
79 static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
80                                    int *nb_clusters_reduced,
81                                    int *extra_data_dropped,
82                                    Error **errp)
83 {
84     BDRVQcow2State *s = bs->opaque;
85     QCowSnapshotHeader h;
86     QCowSnapshotExtraData extra;
87     QCowSnapshot *sn;
88     int i, id_str_size, name_size;
89     int64_t offset, pre_sn_offset;
90     uint64_t table_length = 0;
91     int ret;
92 
93     if (!s->nb_snapshots) {
94         s->snapshots = NULL;
95         s->snapshots_size = 0;
96         return 0;
97     }
98 
99     offset = s->snapshots_offset;
100     s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
101 
102     for(i = 0; i < s->nb_snapshots; i++) {
103         bool truncate_unknown_extra_data = false;
104 
105         pre_sn_offset = offset;
106         table_length = ROUND_UP(table_length, 8);
107 
108         /* Read statically sized part of the snapshot header */
109         offset = ROUND_UP(offset, 8);
110         ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
111         if (ret < 0) {
112             error_setg_errno(errp, -ret, "Failed to read snapshot table");
113             goto fail;
114         }
115 
116         offset += sizeof(h);
117         sn = s->snapshots + i;
118         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
119         sn->l1_size = be32_to_cpu(h.l1_size);
120         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
121         sn->date_sec = be32_to_cpu(h.date_sec);
122         sn->date_nsec = be32_to_cpu(h.date_nsec);
123         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
124         sn->extra_data_size = be32_to_cpu(h.extra_data_size);
125 
126         id_str_size = be16_to_cpu(h.id_str_size);
127         name_size = be16_to_cpu(h.name_size);
128 
129         if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
130             if (!repair) {
131                 ret = -EFBIG;
132                 error_setg(errp, "Too much extra metadata in snapshot table "
133                            "entry %i", i);
134                 error_append_hint(errp, "You can force-remove this extra "
135                                   "metadata with qemu-img check -r all\n");
136                 goto fail;
137             }
138 
139             fprintf(stderr, "Discarding too much extra metadata in snapshot "
140                     "table entry %i (%" PRIu32 " > %u)\n",
141                     i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
142 
143             (*extra_data_dropped)++;
144             truncate_unknown_extra_data = true;
145         }
146 
147         /* Read known extra data */
148         ret = bdrv_pread(bs->file, offset, &extra,
149                          MIN(sizeof(extra), sn->extra_data_size));
150         if (ret < 0) {
151             error_setg_errno(errp, -ret, "Failed to read snapshot table");
152             goto fail;
153         }
154         offset += MIN(sizeof(extra), sn->extra_data_size);
155 
156         if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
157                                          vm_state_size_large)) {
158             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
159         }
160 
161         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
162             sn->disk_size = be64_to_cpu(extra.disk_size);
163         } else {
164             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
165         }
166 
167         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) {
168             sn->icount = be64_to_cpu(extra.icount);
169         } else {
170             sn->icount = -1ULL;
171         }
172 
173         if (sn->extra_data_size > sizeof(extra)) {
174             uint64_t extra_data_end;
175             size_t unknown_extra_data_size;
176 
177             extra_data_end = offset + sn->extra_data_size - sizeof(extra);
178 
179             if (truncate_unknown_extra_data) {
180                 sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
181             }
182 
183             /* Store unknown extra data */
184             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
185             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
186             ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
187                              unknown_extra_data_size);
188             if (ret < 0) {
189                 error_setg_errno(errp, -ret,
190                                  "Failed to read snapshot table");
191                 goto fail;
192             }
193             offset = extra_data_end;
194         }
195 
196         /* Read snapshot ID */
197         sn->id_str = g_malloc(id_str_size + 1);
198         ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
199         if (ret < 0) {
200             error_setg_errno(errp, -ret, "Failed to read snapshot table");
201             goto fail;
202         }
203         offset += id_str_size;
204         sn->id_str[id_str_size] = '\0';
205 
206         /* Read snapshot name */
207         sn->name = g_malloc(name_size + 1);
208         ret = bdrv_pread(bs->file, offset, sn->name, name_size);
209         if (ret < 0) {
210             error_setg_errno(errp, -ret, "Failed to read snapshot table");
211             goto fail;
212         }
213         offset += name_size;
214         sn->name[name_size] = '\0';
215 
216         /* Note that the extra data may have been truncated */
217         table_length += sizeof(h) + sn->extra_data_size + id_str_size +
218                         name_size;
219         if (!repair) {
220             assert(table_length == offset - s->snapshots_offset);
221         }
222 
223         if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
224             offset - s->snapshots_offset > INT_MAX)
225         {
226             if (!repair) {
227                 ret = -EFBIG;
228                 error_setg(errp, "Snapshot table is too big");
229                 error_append_hint(errp, "You can force-remove all %u "
230                                   "overhanging snapshots with qemu-img check "
231                                   "-r all\n", s->nb_snapshots - i);
232                 goto fail;
233             }
234 
235             fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
236                     "table is too big)\n", s->nb_snapshots - i);
237 
238             *nb_clusters_reduced += (s->nb_snapshots - i);
239 
240             /* Discard current snapshot also */
241             qcow2_free_single_snapshot(bs, i);
242 
243             /*
244              * This leaks all the rest of the snapshot table and the
245              * snapshots' clusters, but we run in check -r all mode,
246              * so qcow2_check_refcounts() will take care of it.
247              */
248             s->nb_snapshots = i;
249             offset = pre_sn_offset;
250             break;
251         }
252     }
253 
254     assert(offset - s->snapshots_offset <= INT_MAX);
255     s->snapshots_size = offset - s->snapshots_offset;
256     return 0;
257 
258 fail:
259     qcow2_free_snapshots(bs);
260     return ret;
261 }
262 
263 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
264 {
265     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
266 }
267 
268 /* add at the end of the file a new list of snapshots */
269 int qcow2_write_snapshots(BlockDriverState *bs)
270 {
271     BDRVQcow2State *s = bs->opaque;
272     QCowSnapshot *sn;
273     QCowSnapshotHeader h;
274     QCowSnapshotExtraData extra;
275     int i, name_size, id_str_size, snapshots_size;
276     struct {
277         uint32_t nb_snapshots;
278         uint64_t snapshots_offset;
279     } QEMU_PACKED header_data;
280     int64_t offset, snapshots_offset = 0;
281     int ret;
282 
283     /* compute the size of the snapshots */
284     offset = 0;
285     for(i = 0; i < s->nb_snapshots; i++) {
286         sn = s->snapshots + i;
287         offset = ROUND_UP(offset, 8);
288         offset += sizeof(h);
289         offset += MAX(sizeof(extra), sn->extra_data_size);
290         offset += strlen(sn->id_str);
291         offset += strlen(sn->name);
292 
293         if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
294             ret = -EFBIG;
295             goto fail;
296         }
297     }
298 
299     assert(offset <= INT_MAX);
300     snapshots_size = offset;
301 
302     /* Allocate space for the new snapshot list */
303     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
304     offset = snapshots_offset;
305     if (offset < 0) {
306         ret = offset;
307         goto fail;
308     }
309     ret = bdrv_flush(bs);
310     if (ret < 0) {
311         goto fail;
312     }
313 
314     /* The snapshot list position has not yet been updated, so these clusters
315      * must indeed be completely free */
316     ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
317     if (ret < 0) {
318         goto fail;
319     }
320 
321 
322     /* Write all snapshots to the new list */
323     for(i = 0; i < s->nb_snapshots; i++) {
324         sn = s->snapshots + i;
325         memset(&h, 0, sizeof(h));
326         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
327         h.l1_size = cpu_to_be32(sn->l1_size);
328         /* If it doesn't fit in 32 bit, older implementations should treat it
329          * as a disk-only snapshot rather than truncate the VM state */
330         if (sn->vm_state_size <= 0xffffffff) {
331             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
332         }
333         h.date_sec = cpu_to_be32(sn->date_sec);
334         h.date_nsec = cpu_to_be32(sn->date_nsec);
335         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
336         h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
337                                             sn->extra_data_size));
338 
339         memset(&extra, 0, sizeof(extra));
340         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
341         extra.disk_size = cpu_to_be64(sn->disk_size);
342         extra.icount = cpu_to_be64(sn->icount);
343 
344         id_str_size = strlen(sn->id_str);
345         name_size = strlen(sn->name);
346         assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
347         h.id_str_size = cpu_to_be16(id_str_size);
348         h.name_size = cpu_to_be16(name_size);
349         offset = ROUND_UP(offset, 8);
350 
351         ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
352         if (ret < 0) {
353             goto fail;
354         }
355         offset += sizeof(h);
356 
357         ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
358         if (ret < 0) {
359             goto fail;
360         }
361         offset += sizeof(extra);
362 
363         if (sn->extra_data_size > sizeof(extra)) {
364             size_t unknown_extra_data_size =
365                 sn->extra_data_size - sizeof(extra);
366 
367             /* qcow2_read_snapshots() ensures no unbounded allocation */
368             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
369             assert(sn->unknown_extra_data);
370 
371             ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data,
372                               unknown_extra_data_size);
373             if (ret < 0) {
374                 goto fail;
375             }
376             offset += unknown_extra_data_size;
377         }
378 
379         ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
380         if (ret < 0) {
381             goto fail;
382         }
383         offset += id_str_size;
384 
385         ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
386         if (ret < 0) {
387             goto fail;
388         }
389         offset += name_size;
390     }
391 
392     /*
393      * Update the header to point to the new snapshot table. This requires the
394      * new table and its refcounts to be stable on disk.
395      */
396     ret = bdrv_flush(bs);
397     if (ret < 0) {
398         goto fail;
399     }
400 
401     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
402                       endof(QCowHeader, nb_snapshots));
403 
404     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
405     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
406 
407     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
408                            &header_data, sizeof(header_data));
409     if (ret < 0) {
410         goto fail;
411     }
412 
413     /* free the old snapshot table */
414     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
415                         QCOW2_DISCARD_SNAPSHOT);
416     s->snapshots_offset = snapshots_offset;
417     s->snapshots_size = snapshots_size;
418     return 0;
419 
420 fail:
421     if (snapshots_offset > 0) {
422         qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
423                             QCOW2_DISCARD_ALWAYS);
424     }
425     return ret;
426 }
427 
428 int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
429                                                  BdrvCheckResult *result,
430                                                  BdrvCheckMode fix)
431 {
432     BDRVQcow2State *s = bs->opaque;
433     Error *local_err = NULL;
434     int nb_clusters_reduced = 0;
435     int extra_data_dropped = 0;
436     int ret;
437     struct {
438         uint32_t nb_snapshots;
439         uint64_t snapshots_offset;
440     } QEMU_PACKED snapshot_table_pointer;
441 
442     /* qcow2_do_open() discards this information in check mode */
443     ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
444                      &snapshot_table_pointer, sizeof(snapshot_table_pointer));
445     if (ret < 0) {
446         result->check_errors++;
447         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
448                 "the image header: %s\n", strerror(-ret));
449         return ret;
450     }
451 
452     s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
453     s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
454 
455     if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
456         fprintf(stderr, "Discarding %u overhanging snapshots\n",
457                 s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
458 
459         nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
460         s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
461     }
462 
463     ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
464                                sizeof(QCowSnapshotHeader),
465                                sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
466                                "snapshot table", &local_err);
467     if (ret < 0) {
468         result->check_errors++;
469         error_reportf_err(local_err, "ERROR ");
470 
471         if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
472             fprintf(stderr, "You can force-remove all %u overhanging snapshots "
473                     "with qemu-img check -r all\n",
474                     s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
475         }
476 
477         /* We did not read the snapshot table, so invalidate this information */
478         s->snapshots_offset = 0;
479         s->nb_snapshots = 0;
480 
481         return ret;
482     }
483 
484     qemu_co_mutex_unlock(&s->lock);
485     ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
486                                   &nb_clusters_reduced, &extra_data_dropped,
487                                   &local_err);
488     qemu_co_mutex_lock(&s->lock);
489     if (ret < 0) {
490         result->check_errors++;
491         error_reportf_err(local_err,
492                           "ERROR failed to read the snapshot table: ");
493 
494         /* We did not read the snapshot table, so invalidate this information */
495         s->snapshots_offset = 0;
496         s->nb_snapshots = 0;
497 
498         return ret;
499     }
500     result->corruptions += nb_clusters_reduced + extra_data_dropped;
501 
502     if (nb_clusters_reduced) {
503         /*
504          * Update image header now, because:
505          * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
506          *     the same as what the image header says,
507          * (2) this leaks clusters, but qcow2_check_refcounts() will
508          *     fix that.
509          */
510         assert(fix & BDRV_FIX_ERRORS);
511 
512         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
513         ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
514                                &snapshot_table_pointer.nb_snapshots,
515                                sizeof(snapshot_table_pointer.nb_snapshots));
516         if (ret < 0) {
517             result->check_errors++;
518             fprintf(stderr, "ERROR failed to update the snapshot count in the "
519                     "image header: %s\n", strerror(-ret));
520             return ret;
521         }
522 
523         result->corruptions_fixed += nb_clusters_reduced;
524         result->corruptions -= nb_clusters_reduced;
525     }
526 
527     /*
528      * All of v3 images' snapshot table entries need to have at least
529      * 16 bytes of extra data.
530      */
531     if (s->qcow_version >= 3) {
532         int i;
533         for (i = 0; i < s->nb_snapshots; i++) {
534             if (s->snapshots[i].extra_data_size <
535                 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
536                 sizeof_field(QCowSnapshotExtraData, disk_size))
537             {
538                 result->corruptions++;
539                 fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
540                         fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
541             }
542         }
543     }
544 
545     return 0;
546 }
547 
548 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
549                                                 BdrvCheckResult *result,
550                                                 BdrvCheckMode fix)
551 {
552     BDRVQcow2State *s = bs->opaque;
553     int ret;
554 
555     if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
556         qemu_co_mutex_unlock(&s->lock);
557         ret = qcow2_write_snapshots(bs);
558         qemu_co_mutex_lock(&s->lock);
559         if (ret < 0) {
560             result->check_errors++;
561             fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
562                     strerror(-ret));
563             return ret;
564         }
565 
566         result->corruptions_fixed += result->corruptions;
567         result->corruptions = 0;
568     }
569 
570     return 0;
571 }
572 
573 static void find_new_snapshot_id(BlockDriverState *bs,
574                                  char *id_str, int id_str_size)
575 {
576     BDRVQcow2State *s = bs->opaque;
577     QCowSnapshot *sn;
578     int i;
579     unsigned long id, id_max = 0;
580 
581     for(i = 0; i < s->nb_snapshots; i++) {
582         sn = s->snapshots + i;
583         id = strtoul(sn->id_str, NULL, 10);
584         if (id > id_max)
585             id_max = id;
586     }
587     snprintf(id_str, id_str_size, "%lu", id_max + 1);
588 }
589 
590 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
591                                         const char *id,
592                                         const char *name)
593 {
594     BDRVQcow2State *s = bs->opaque;
595     int i;
596 
597     if (id && name) {
598         for (i = 0; i < s->nb_snapshots; i++) {
599             if (!strcmp(s->snapshots[i].id_str, id) &&
600                 !strcmp(s->snapshots[i].name, name)) {
601                 return i;
602             }
603         }
604     } else if (id) {
605         for (i = 0; i < s->nb_snapshots; i++) {
606             if (!strcmp(s->snapshots[i].id_str, id)) {
607                 return i;
608             }
609         }
610     } else if (name) {
611         for (i = 0; i < s->nb_snapshots; i++) {
612             if (!strcmp(s->snapshots[i].name, name)) {
613                 return i;
614             }
615         }
616     }
617 
618     return -1;
619 }
620 
621 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
622                                        const char *id_or_name)
623 {
624     int ret;
625 
626     ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
627     if (ret >= 0) {
628         return ret;
629     }
630     return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
631 }
632 
633 /* if no id is provided, a new one is constructed */
634 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
635 {
636     BDRVQcow2State *s = bs->opaque;
637     QCowSnapshot *new_snapshot_list = NULL;
638     QCowSnapshot *old_snapshot_list = NULL;
639     QCowSnapshot sn1, *sn = &sn1;
640     int i, ret;
641     uint64_t *l1_table = NULL;
642     int64_t l1_table_offset;
643 
644     if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
645         return -EFBIG;
646     }
647 
648     if (has_data_file(bs)) {
649         return -ENOTSUP;
650     }
651 
652     memset(sn, 0, sizeof(*sn));
653 
654     /* Generate an ID */
655     find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
656 
657     /* Populate sn with passed data */
658     sn->id_str = g_strdup(sn_info->id_str);
659     sn->name = g_strdup(sn_info->name);
660 
661     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
662     sn->vm_state_size = sn_info->vm_state_size;
663     sn->date_sec = sn_info->date_sec;
664     sn->date_nsec = sn_info->date_nsec;
665     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
666     sn->icount = sn_info->icount;
667     sn->extra_data_size = sizeof(QCowSnapshotExtraData);
668 
669     /* Allocate the L1 table of the snapshot and copy the current one there. */
670     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE);
671     if (l1_table_offset < 0) {
672         ret = l1_table_offset;
673         goto fail;
674     }
675 
676     sn->l1_table_offset = l1_table_offset;
677     sn->l1_size = s->l1_size;
678 
679     l1_table = g_try_new(uint64_t, s->l1_size);
680     if (s->l1_size && l1_table == NULL) {
681         ret = -ENOMEM;
682         goto fail;
683     }
684 
685     for(i = 0; i < s->l1_size; i++) {
686         l1_table[i] = cpu_to_be64(s->l1_table[i]);
687     }
688 
689     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
690                                         s->l1_size * L1E_SIZE, false);
691     if (ret < 0) {
692         goto fail;
693     }
694 
695     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
696                       s->l1_size * L1E_SIZE);
697     if (ret < 0) {
698         goto fail;
699     }
700 
701     g_free(l1_table);
702     l1_table = NULL;
703 
704     /*
705      * Increase the refcounts of all clusters and make sure everything is
706      * stable on disk before updating the snapshot table to contain a pointer
707      * to the new L1 table.
708      */
709     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
710     if (ret < 0) {
711         goto fail;
712     }
713 
714     /* Append the new snapshot to the snapshot list */
715     new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
716     if (s->snapshots) {
717         memcpy(new_snapshot_list, s->snapshots,
718                s->nb_snapshots * sizeof(QCowSnapshot));
719         old_snapshot_list = s->snapshots;
720     }
721     s->snapshots = new_snapshot_list;
722     s->snapshots[s->nb_snapshots++] = *sn;
723 
724     ret = qcow2_write_snapshots(bs);
725     if (ret < 0) {
726         g_free(s->snapshots);
727         s->snapshots = old_snapshot_list;
728         s->nb_snapshots--;
729         goto fail;
730     }
731 
732     g_free(old_snapshot_list);
733 
734     /* The VM state isn't needed any more in the active L1 table; in fact, it
735      * hurts by causing expensive COW for the next snapshot. */
736     qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
737                           ROUND_UP(sn->vm_state_size, s->cluster_size),
738                           QCOW2_DISCARD_NEVER, false);
739 
740 #ifdef DEBUG_ALLOC
741     {
742       BdrvCheckResult result = {0};
743       qcow2_check_refcounts(bs, &result, 0);
744     }
745 #endif
746     return 0;
747 
748 fail:
749     g_free(sn->id_str);
750     g_free(sn->name);
751     g_free(l1_table);
752 
753     return ret;
754 }
755 
756 /* copy the snapshot 'snapshot_name' into the current disk image */
757 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
758 {
759     BDRVQcow2State *s = bs->opaque;
760     QCowSnapshot *sn;
761     Error *local_err = NULL;
762     int i, snapshot_index;
763     int cur_l1_bytes, sn_l1_bytes;
764     int ret;
765     uint64_t *sn_l1_table = NULL;
766 
767     if (has_data_file(bs)) {
768         return -ENOTSUP;
769     }
770 
771     /* Search the snapshot */
772     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
773     if (snapshot_index < 0) {
774         return -ENOENT;
775     }
776     sn = &s->snapshots[snapshot_index];
777 
778     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
779                                L1E_SIZE, QCOW_MAX_L1_SIZE,
780                                "Snapshot L1 table", &local_err);
781     if (ret < 0) {
782         error_report_err(local_err);
783         goto fail;
784     }
785 
786     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
787         BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
788                                             &local_err);
789         if (!blk) {
790             error_report_err(local_err);
791             ret = -ENOTSUP;
792             goto fail;
793         }
794 
795         ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
796                            &local_err);
797         blk_unref(blk);
798         if (ret < 0) {
799             error_report_err(local_err);
800             goto fail;
801         }
802     }
803 
804     /*
805      * Make sure that the current L1 table is big enough to contain the whole
806      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
807      * current one must be padded with zeros.
808      */
809     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
810     if (ret < 0) {
811         goto fail;
812     }
813 
814     cur_l1_bytes = s->l1_size * L1E_SIZE;
815     sn_l1_bytes = sn->l1_size * L1E_SIZE;
816 
817     /*
818      * Copy the snapshot L1 table to the current L1 table.
819      *
820      * Before overwriting the old current L1 table on disk, make sure to
821      * increase all refcounts for the clusters referenced by the new one.
822      * Decrease the refcount referenced by the old one only when the L1
823      * table is overwritten.
824      */
825     sn_l1_table = g_try_malloc0(cur_l1_bytes);
826     if (cur_l1_bytes && sn_l1_table == NULL) {
827         ret = -ENOMEM;
828         goto fail;
829     }
830 
831     ret = bdrv_pread(bs->file, sn->l1_table_offset,
832                      sn_l1_table, sn_l1_bytes);
833     if (ret < 0) {
834         goto fail;
835     }
836 
837     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
838                                          sn->l1_size, 1);
839     if (ret < 0) {
840         goto fail;
841     }
842 
843     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
844                                         s->l1_table_offset, cur_l1_bytes,
845                                         false);
846     if (ret < 0) {
847         goto fail;
848     }
849 
850     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
851                            cur_l1_bytes);
852     if (ret < 0) {
853         goto fail;
854     }
855 
856     /*
857      * Decrease refcount of clusters of current L1 table.
858      *
859      * At this point, the in-memory s->l1_table points to the old L1 table,
860      * whereas on disk we already have the new one.
861      *
862      * qcow2_update_snapshot_refcount special cases the current L1 table to use
863      * the in-memory data instead of really using the offset to load a new one,
864      * which is why this works.
865      */
866     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
867                                          s->l1_size, -1);
868 
869     /*
870      * Now update the in-memory L1 table to be in sync with the on-disk one. We
871      * need to do this even if updating refcounts failed.
872      */
873     for(i = 0;i < s->l1_size; i++) {
874         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
875     }
876 
877     if (ret < 0) {
878         goto fail;
879     }
880 
881     g_free(sn_l1_table);
882     sn_l1_table = NULL;
883 
884     /*
885      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
886      * when we decreased the refcount of the old snapshot.
887      */
888     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
889     if (ret < 0) {
890         goto fail;
891     }
892 
893 #ifdef DEBUG_ALLOC
894     {
895         BdrvCheckResult result = {0};
896         qcow2_check_refcounts(bs, &result, 0);
897     }
898 #endif
899     return 0;
900 
901 fail:
902     g_free(sn_l1_table);
903     return ret;
904 }
905 
906 int qcow2_snapshot_delete(BlockDriverState *bs,
907                           const char *snapshot_id,
908                           const char *name,
909                           Error **errp)
910 {
911     BDRVQcow2State *s = bs->opaque;
912     QCowSnapshot sn;
913     int snapshot_index, ret;
914 
915     if (has_data_file(bs)) {
916         return -ENOTSUP;
917     }
918 
919     /* Search the snapshot */
920     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
921     if (snapshot_index < 0) {
922         error_setg(errp, "Can't find the snapshot");
923         return -ENOENT;
924     }
925     sn = s->snapshots[snapshot_index];
926 
927     ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
928                                L1E_SIZE, QCOW_MAX_L1_SIZE,
929                                "Snapshot L1 table", errp);
930     if (ret < 0) {
931         return ret;
932     }
933 
934     /* Remove it from the snapshot list */
935     memmove(s->snapshots + snapshot_index,
936             s->snapshots + snapshot_index + 1,
937             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
938     s->nb_snapshots--;
939     ret = qcow2_write_snapshots(bs);
940     if (ret < 0) {
941         error_setg_errno(errp, -ret,
942                          "Failed to remove snapshot from snapshot list");
943         return ret;
944     }
945 
946     /*
947      * The snapshot is now unused, clean up. If we fail after this point, we
948      * won't recover but just leak clusters.
949      */
950     g_free(sn.unknown_extra_data);
951     g_free(sn.id_str);
952     g_free(sn.name);
953 
954     /*
955      * Now decrease the refcounts of clusters referenced by the snapshot and
956      * free the L1 table.
957      */
958     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
959                                          sn.l1_size, -1);
960     if (ret < 0) {
961         error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
962         return ret;
963     }
964     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE,
965                         QCOW2_DISCARD_SNAPSHOT);
966 
967     /* must update the copied flag on the current cluster offsets */
968     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
969     if (ret < 0) {
970         error_setg_errno(errp, -ret,
971                          "Failed to update snapshot status in disk");
972         return ret;
973     }
974 
975 #ifdef DEBUG_ALLOC
976     {
977         BdrvCheckResult result = {0};
978         qcow2_check_refcounts(bs, &result, 0);
979     }
980 #endif
981     return 0;
982 }
983 
984 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
985 {
986     BDRVQcow2State *s = bs->opaque;
987     QEMUSnapshotInfo *sn_tab, *sn_info;
988     QCowSnapshot *sn;
989     int i;
990 
991     if (has_data_file(bs)) {
992         return -ENOTSUP;
993     }
994     if (!s->nb_snapshots) {
995         *psn_tab = NULL;
996         return s->nb_snapshots;
997     }
998 
999     sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
1000     for(i = 0; i < s->nb_snapshots; i++) {
1001         sn_info = sn_tab + i;
1002         sn = s->snapshots + i;
1003         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
1004                 sn->id_str);
1005         pstrcpy(sn_info->name, sizeof(sn_info->name),
1006                 sn->name);
1007         sn_info->vm_state_size = sn->vm_state_size;
1008         sn_info->date_sec = sn->date_sec;
1009         sn_info->date_nsec = sn->date_nsec;
1010         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
1011         sn_info->icount = sn->icount;
1012     }
1013     *psn_tab = sn_tab;
1014     return s->nb_snapshots;
1015 }
1016 
1017 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
1018                             const char *snapshot_id,
1019                             const char *name,
1020                             Error **errp)
1021 {
1022     int i, snapshot_index;
1023     BDRVQcow2State *s = bs->opaque;
1024     QCowSnapshot *sn;
1025     uint64_t *new_l1_table;
1026     int new_l1_bytes;
1027     int ret;
1028 
1029     assert(bdrv_is_read_only(bs));
1030 
1031     /* Search the snapshot */
1032     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1033     if (snapshot_index < 0) {
1034         error_setg(errp,
1035                    "Can't find snapshot");
1036         return -ENOENT;
1037     }
1038     sn = &s->snapshots[snapshot_index];
1039 
1040     /* Allocate and read in the snapshot's L1 table */
1041     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1042                                L1E_SIZE, QCOW_MAX_L1_SIZE,
1043                                "Snapshot L1 table", errp);
1044     if (ret < 0) {
1045         return ret;
1046     }
1047     new_l1_bytes = sn->l1_size * L1E_SIZE;
1048     new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1049     if (new_l1_table == NULL) {
1050         return -ENOMEM;
1051     }
1052 
1053     ret = bdrv_pread(bs->file, sn->l1_table_offset,
1054                      new_l1_table, new_l1_bytes);
1055     if (ret < 0) {
1056         error_setg(errp, "Failed to read l1 table for snapshot");
1057         qemu_vfree(new_l1_table);
1058         return ret;
1059     }
1060 
1061     /* Switch the L1 table */
1062     qemu_vfree(s->l1_table);
1063 
1064     s->l1_size = sn->l1_size;
1065     s->l1_table_offset = sn->l1_table_offset;
1066     s->l1_table = new_l1_table;
1067 
1068     for(i = 0;i < s->l1_size; i++) {
1069         be64_to_cpus(&s->l1_table[i]);
1070     }
1071 
1072     return 0;
1073 }
1074