xref: /openbmc/qemu/block/qcow2-snapshot.c (revision 0c4e9931)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "sysemu/block-backend.h"
27 #include "qapi/error.h"
28 #include "qcow2.h"
29 #include "qemu/bswap.h"
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 
33 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
34 {
35     BDRVQcow2State *s = bs->opaque;
36 
37     assert(i >= 0 && i < s->nb_snapshots);
38     g_free(s->snapshots[i].name);
39     g_free(s->snapshots[i].id_str);
40     g_free(s->snapshots[i].unknown_extra_data);
41     memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
42 }
43 
44 void qcow2_free_snapshots(BlockDriverState *bs)
45 {
46     BDRVQcow2State *s = bs->opaque;
47     int i;
48 
49     for(i = 0; i < s->nb_snapshots; i++) {
50         qcow2_free_single_snapshot(bs, i);
51     }
52     g_free(s->snapshots);
53     s->snapshots = NULL;
54     s->nb_snapshots = 0;
55 }
56 
57 /*
58  * If @repair is true, try to repair a broken snapshot table instead
59  * of just returning an error:
60  *
61  * - If the snapshot table was too long, set *nb_clusters_reduced to
62  *   the number of snapshots removed off the end.
63  *   The caller will update the on-disk nb_snapshots accordingly;
64  *   this leaks clusters, but is safe.
65  *   (The on-disk information must be updated before
66  *   qcow2_check_refcounts(), because that function relies on
67  *   s->nb_snapshots to reflect the on-disk value.)
68  *
69  * - If there were snapshots with too much extra metadata, increment
70  *   *extra_data_dropped for each.
71  *   This requires the caller to eventually rewrite the whole snapshot
72  *   table, which requires cluster allocation.  Therefore, this should
73  *   be done only after qcow2_check_refcounts() made sure the refcount
74  *   structures are valid.
75  *   (In the meantime, the image is still valid because
76  *   qcow2_check_refcounts() does not do anything with snapshots'
77  *   extra data.)
78  */
79 static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
80                                    int *nb_clusters_reduced,
81                                    int *extra_data_dropped,
82                                    Error **errp)
83 {
84     BDRVQcow2State *s = bs->opaque;
85     QCowSnapshotHeader h;
86     QCowSnapshotExtraData extra;
87     QCowSnapshot *sn;
88     int i, id_str_size, name_size;
89     int64_t offset, pre_sn_offset;
90     uint64_t table_length = 0;
91     int ret;
92 
93     if (!s->nb_snapshots) {
94         s->snapshots = NULL;
95         s->snapshots_size = 0;
96         return 0;
97     }
98 
99     offset = s->snapshots_offset;
100     s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
101 
102     for(i = 0; i < s->nb_snapshots; i++) {
103         bool truncate_unknown_extra_data = false;
104 
105         pre_sn_offset = offset;
106         table_length = ROUND_UP(table_length, 8);
107 
108         /* Read statically sized part of the snapshot header */
109         offset = ROUND_UP(offset, 8);
110         ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
111         if (ret < 0) {
112             error_setg_errno(errp, -ret, "Failed to read snapshot table");
113             goto fail;
114         }
115 
116         offset += sizeof(h);
117         sn = s->snapshots + i;
118         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
119         sn->l1_size = be32_to_cpu(h.l1_size);
120         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
121         sn->date_sec = be32_to_cpu(h.date_sec);
122         sn->date_nsec = be32_to_cpu(h.date_nsec);
123         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
124         sn->extra_data_size = be32_to_cpu(h.extra_data_size);
125 
126         id_str_size = be16_to_cpu(h.id_str_size);
127         name_size = be16_to_cpu(h.name_size);
128 
129         if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
130             if (!repair) {
131                 ret = -EFBIG;
132                 error_setg(errp, "Too much extra metadata in snapshot table "
133                            "entry %i", i);
134                 error_append_hint(errp, "You can force-remove this extra "
135                                   "metadata with qemu-img check -r all\n");
136                 goto fail;
137             }
138 
139             fprintf(stderr, "Discarding too much extra metadata in snapshot "
140                     "table entry %i (%" PRIu32 " > %u)\n",
141                     i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
142 
143             (*extra_data_dropped)++;
144             truncate_unknown_extra_data = true;
145         }
146 
147         /* Read known extra data */
148         ret = bdrv_pread(bs->file, offset, &extra,
149                          MIN(sizeof(extra), sn->extra_data_size));
150         if (ret < 0) {
151             error_setg_errno(errp, -ret, "Failed to read snapshot table");
152             goto fail;
153         }
154         offset += MIN(sizeof(extra), sn->extra_data_size);
155 
156         if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
157                                          vm_state_size_large)) {
158             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
159         }
160 
161         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
162             sn->disk_size = be64_to_cpu(extra.disk_size);
163         } else {
164             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
165         }
166 
167         if (sn->extra_data_size > sizeof(extra)) {
168             uint64_t extra_data_end;
169             size_t unknown_extra_data_size;
170 
171             extra_data_end = offset + sn->extra_data_size - sizeof(extra);
172 
173             if (truncate_unknown_extra_data) {
174                 sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
175             }
176 
177             /* Store unknown extra data */
178             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
179             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
180             ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
181                              unknown_extra_data_size);
182             if (ret < 0) {
183                 error_setg_errno(errp, -ret,
184                                  "Failed to read snapshot table");
185                 goto fail;
186             }
187             offset = extra_data_end;
188         }
189 
190         /* Read snapshot ID */
191         sn->id_str = g_malloc(id_str_size + 1);
192         ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
193         if (ret < 0) {
194             error_setg_errno(errp, -ret, "Failed to read snapshot table");
195             goto fail;
196         }
197         offset += id_str_size;
198         sn->id_str[id_str_size] = '\0';
199 
200         /* Read snapshot name */
201         sn->name = g_malloc(name_size + 1);
202         ret = bdrv_pread(bs->file, offset, sn->name, name_size);
203         if (ret < 0) {
204             error_setg_errno(errp, -ret, "Failed to read snapshot table");
205             goto fail;
206         }
207         offset += name_size;
208         sn->name[name_size] = '\0';
209 
210         /* Note that the extra data may have been truncated */
211         table_length += sizeof(h) + sn->extra_data_size + id_str_size +
212                         name_size;
213         if (!repair) {
214             assert(table_length == offset - s->snapshots_offset);
215         }
216 
217         if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
218             offset - s->snapshots_offset > INT_MAX)
219         {
220             if (!repair) {
221                 ret = -EFBIG;
222                 error_setg(errp, "Snapshot table is too big");
223                 error_append_hint(errp, "You can force-remove all %u "
224                                   "overhanging snapshots with qemu-img check "
225                                   "-r all\n", s->nb_snapshots - i);
226                 goto fail;
227             }
228 
229             fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
230                     "table is too big)\n", s->nb_snapshots - i);
231 
232             *nb_clusters_reduced += (s->nb_snapshots - i);
233 
234             /* Discard current snapshot also */
235             qcow2_free_single_snapshot(bs, i);
236 
237             /*
238              * This leaks all the rest of the snapshot table and the
239              * snapshots' clusters, but we run in check -r all mode,
240              * so qcow2_check_refcounts() will take care of it.
241              */
242             s->nb_snapshots = i;
243             offset = pre_sn_offset;
244             break;
245         }
246     }
247 
248     assert(offset - s->snapshots_offset <= INT_MAX);
249     s->snapshots_size = offset - s->snapshots_offset;
250     return 0;
251 
252 fail:
253     qcow2_free_snapshots(bs);
254     return ret;
255 }
256 
257 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
258 {
259     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
260 }
261 
262 /* add at the end of the file a new list of snapshots */
263 int qcow2_write_snapshots(BlockDriverState *bs)
264 {
265     BDRVQcow2State *s = bs->opaque;
266     QCowSnapshot *sn;
267     QCowSnapshotHeader h;
268     QCowSnapshotExtraData extra;
269     int i, name_size, id_str_size, snapshots_size;
270     struct {
271         uint32_t nb_snapshots;
272         uint64_t snapshots_offset;
273     } QEMU_PACKED header_data;
274     int64_t offset, snapshots_offset = 0;
275     int ret;
276 
277     /* compute the size of the snapshots */
278     offset = 0;
279     for(i = 0; i < s->nb_snapshots; i++) {
280         sn = s->snapshots + i;
281         offset = ROUND_UP(offset, 8);
282         offset += sizeof(h);
283         offset += MAX(sizeof(extra), sn->extra_data_size);
284         offset += strlen(sn->id_str);
285         offset += strlen(sn->name);
286 
287         if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
288             ret = -EFBIG;
289             goto fail;
290         }
291     }
292 
293     assert(offset <= INT_MAX);
294     snapshots_size = offset;
295 
296     /* Allocate space for the new snapshot list */
297     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
298     offset = snapshots_offset;
299     if (offset < 0) {
300         ret = offset;
301         goto fail;
302     }
303     ret = bdrv_flush(bs);
304     if (ret < 0) {
305         goto fail;
306     }
307 
308     /* The snapshot list position has not yet been updated, so these clusters
309      * must indeed be completely free */
310     ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
311     if (ret < 0) {
312         goto fail;
313     }
314 
315 
316     /* Write all snapshots to the new list */
317     for(i = 0; i < s->nb_snapshots; i++) {
318         sn = s->snapshots + i;
319         memset(&h, 0, sizeof(h));
320         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
321         h.l1_size = cpu_to_be32(sn->l1_size);
322         /* If it doesn't fit in 32 bit, older implementations should treat it
323          * as a disk-only snapshot rather than truncate the VM state */
324         if (sn->vm_state_size <= 0xffffffff) {
325             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
326         }
327         h.date_sec = cpu_to_be32(sn->date_sec);
328         h.date_nsec = cpu_to_be32(sn->date_nsec);
329         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
330         h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
331                                             sn->extra_data_size));
332 
333         memset(&extra, 0, sizeof(extra));
334         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
335         extra.disk_size = cpu_to_be64(sn->disk_size);
336 
337         id_str_size = strlen(sn->id_str);
338         name_size = strlen(sn->name);
339         assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
340         h.id_str_size = cpu_to_be16(id_str_size);
341         h.name_size = cpu_to_be16(name_size);
342         offset = ROUND_UP(offset, 8);
343 
344         ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
345         if (ret < 0) {
346             goto fail;
347         }
348         offset += sizeof(h);
349 
350         ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
351         if (ret < 0) {
352             goto fail;
353         }
354         offset += sizeof(extra);
355 
356         if (sn->extra_data_size > sizeof(extra)) {
357             size_t unknown_extra_data_size =
358                 sn->extra_data_size - sizeof(extra);
359 
360             /* qcow2_read_snapshots() ensures no unbounded allocation */
361             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
362             assert(sn->unknown_extra_data);
363 
364             ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data,
365                               unknown_extra_data_size);
366             if (ret < 0) {
367                 goto fail;
368             }
369             offset += unknown_extra_data_size;
370         }
371 
372         ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
373         if (ret < 0) {
374             goto fail;
375         }
376         offset += id_str_size;
377 
378         ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
379         if (ret < 0) {
380             goto fail;
381         }
382         offset += name_size;
383     }
384 
385     /*
386      * Update the header to point to the new snapshot table. This requires the
387      * new table and its refcounts to be stable on disk.
388      */
389     ret = bdrv_flush(bs);
390     if (ret < 0) {
391         goto fail;
392     }
393 
394     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
395                       endof(QCowHeader, nb_snapshots));
396 
397     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
398     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
399 
400     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
401                            &header_data, sizeof(header_data));
402     if (ret < 0) {
403         goto fail;
404     }
405 
406     /* free the old snapshot table */
407     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
408                         QCOW2_DISCARD_SNAPSHOT);
409     s->snapshots_offset = snapshots_offset;
410     s->snapshots_size = snapshots_size;
411     return 0;
412 
413 fail:
414     if (snapshots_offset > 0) {
415         qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
416                             QCOW2_DISCARD_ALWAYS);
417     }
418     return ret;
419 }
420 
421 int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
422                                                  BdrvCheckResult *result,
423                                                  BdrvCheckMode fix)
424 {
425     BDRVQcow2State *s = bs->opaque;
426     Error *local_err = NULL;
427     int nb_clusters_reduced = 0;
428     int extra_data_dropped = 0;
429     int ret;
430     struct {
431         uint32_t nb_snapshots;
432         uint64_t snapshots_offset;
433     } QEMU_PACKED snapshot_table_pointer;
434 
435     /* qcow2_do_open() discards this information in check mode */
436     ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
437                      &snapshot_table_pointer, sizeof(snapshot_table_pointer));
438     if (ret < 0) {
439         result->check_errors++;
440         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
441                 "the image header: %s\n", strerror(-ret));
442         return ret;
443     }
444 
445     s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
446     s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
447 
448     if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
449         fprintf(stderr, "Discarding %u overhanging snapshots\n",
450                 s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
451 
452         nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
453         s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
454     }
455 
456     ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
457                                sizeof(QCowSnapshotHeader),
458                                sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
459                                "snapshot table", &local_err);
460     if (ret < 0) {
461         result->check_errors++;
462         error_reportf_err(local_err, "ERROR ");
463 
464         if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
465             fprintf(stderr, "You can force-remove all %u overhanging snapshots "
466                     "with qemu-img check -r all\n",
467                     s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
468         }
469 
470         /* We did not read the snapshot table, so invalidate this information */
471         s->snapshots_offset = 0;
472         s->nb_snapshots = 0;
473 
474         return ret;
475     }
476 
477     qemu_co_mutex_unlock(&s->lock);
478     ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
479                                   &nb_clusters_reduced, &extra_data_dropped,
480                                   &local_err);
481     qemu_co_mutex_lock(&s->lock);
482     if (ret < 0) {
483         result->check_errors++;
484         error_reportf_err(local_err,
485                           "ERROR failed to read the snapshot table: ");
486 
487         /* We did not read the snapshot table, so invalidate this information */
488         s->snapshots_offset = 0;
489         s->nb_snapshots = 0;
490 
491         return ret;
492     }
493     result->corruptions += nb_clusters_reduced + extra_data_dropped;
494 
495     if (nb_clusters_reduced) {
496         /*
497          * Update image header now, because:
498          * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
499          *     the same as what the image header says,
500          * (2) this leaks clusters, but qcow2_check_refcounts() will
501          *     fix that.
502          */
503         assert(fix & BDRV_FIX_ERRORS);
504 
505         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
506         ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
507                                &snapshot_table_pointer.nb_snapshots,
508                                sizeof(snapshot_table_pointer.nb_snapshots));
509         if (ret < 0) {
510             result->check_errors++;
511             fprintf(stderr, "ERROR failed to update the snapshot count in the "
512                     "image header: %s\n", strerror(-ret));
513             return ret;
514         }
515 
516         result->corruptions_fixed += nb_clusters_reduced;
517         result->corruptions -= nb_clusters_reduced;
518     }
519 
520     /*
521      * All of v3 images' snapshot table entries need to have at least
522      * 16 bytes of extra data.
523      */
524     if (s->qcow_version >= 3) {
525         int i;
526         for (i = 0; i < s->nb_snapshots; i++) {
527             if (s->snapshots[i].extra_data_size <
528                 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
529                 sizeof_field(QCowSnapshotExtraData, disk_size))
530             {
531                 result->corruptions++;
532                 fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
533                         fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
534             }
535         }
536     }
537 
538     return 0;
539 }
540 
541 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
542                                                 BdrvCheckResult *result,
543                                                 BdrvCheckMode fix)
544 {
545     BDRVQcow2State *s = bs->opaque;
546     int ret;
547 
548     if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
549         qemu_co_mutex_unlock(&s->lock);
550         ret = qcow2_write_snapshots(bs);
551         qemu_co_mutex_lock(&s->lock);
552         if (ret < 0) {
553             result->check_errors++;
554             fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
555                     strerror(-ret));
556             return ret;
557         }
558 
559         result->corruptions_fixed += result->corruptions;
560         result->corruptions = 0;
561     }
562 
563     return 0;
564 }
565 
566 static void find_new_snapshot_id(BlockDriverState *bs,
567                                  char *id_str, int id_str_size)
568 {
569     BDRVQcow2State *s = bs->opaque;
570     QCowSnapshot *sn;
571     int i;
572     unsigned long id, id_max = 0;
573 
574     for(i = 0; i < s->nb_snapshots; i++) {
575         sn = s->snapshots + i;
576         id = strtoul(sn->id_str, NULL, 10);
577         if (id > id_max)
578             id_max = id;
579     }
580     snprintf(id_str, id_str_size, "%lu", id_max + 1);
581 }
582 
583 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
584                                         const char *id,
585                                         const char *name)
586 {
587     BDRVQcow2State *s = bs->opaque;
588     int i;
589 
590     if (id && name) {
591         for (i = 0; i < s->nb_snapshots; i++) {
592             if (!strcmp(s->snapshots[i].id_str, id) &&
593                 !strcmp(s->snapshots[i].name, name)) {
594                 return i;
595             }
596         }
597     } else if (id) {
598         for (i = 0; i < s->nb_snapshots; i++) {
599             if (!strcmp(s->snapshots[i].id_str, id)) {
600                 return i;
601             }
602         }
603     } else if (name) {
604         for (i = 0; i < s->nb_snapshots; i++) {
605             if (!strcmp(s->snapshots[i].name, name)) {
606                 return i;
607             }
608         }
609     }
610 
611     return -1;
612 }
613 
614 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
615                                        const char *id_or_name)
616 {
617     int ret;
618 
619     ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
620     if (ret >= 0) {
621         return ret;
622     }
623     return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
624 }
625 
626 /* if no id is provided, a new one is constructed */
627 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
628 {
629     BDRVQcow2State *s = bs->opaque;
630     QCowSnapshot *new_snapshot_list = NULL;
631     QCowSnapshot *old_snapshot_list = NULL;
632     QCowSnapshot sn1, *sn = &sn1;
633     int i, ret;
634     uint64_t *l1_table = NULL;
635     int64_t l1_table_offset;
636 
637     if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
638         return -EFBIG;
639     }
640 
641     if (has_data_file(bs)) {
642         return -ENOTSUP;
643     }
644 
645     memset(sn, 0, sizeof(*sn));
646 
647     /* Generate an ID */
648     find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
649 
650     /* Populate sn with passed data */
651     sn->id_str = g_strdup(sn_info->id_str);
652     sn->name = g_strdup(sn_info->name);
653 
654     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
655     sn->vm_state_size = sn_info->vm_state_size;
656     sn->date_sec = sn_info->date_sec;
657     sn->date_nsec = sn_info->date_nsec;
658     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
659     sn->extra_data_size = sizeof(QCowSnapshotExtraData);
660 
661     /* Allocate the L1 table of the snapshot and copy the current one there. */
662     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
663     if (l1_table_offset < 0) {
664         ret = l1_table_offset;
665         goto fail;
666     }
667 
668     sn->l1_table_offset = l1_table_offset;
669     sn->l1_size = s->l1_size;
670 
671     l1_table = g_try_new(uint64_t, s->l1_size);
672     if (s->l1_size && l1_table == NULL) {
673         ret = -ENOMEM;
674         goto fail;
675     }
676 
677     for(i = 0; i < s->l1_size; i++) {
678         l1_table[i] = cpu_to_be64(s->l1_table[i]);
679     }
680 
681     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
682                                         s->l1_size * sizeof(uint64_t), false);
683     if (ret < 0) {
684         goto fail;
685     }
686 
687     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
688                       s->l1_size * sizeof(uint64_t));
689     if (ret < 0) {
690         goto fail;
691     }
692 
693     g_free(l1_table);
694     l1_table = NULL;
695 
696     /*
697      * Increase the refcounts of all clusters and make sure everything is
698      * stable on disk before updating the snapshot table to contain a pointer
699      * to the new L1 table.
700      */
701     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
702     if (ret < 0) {
703         goto fail;
704     }
705 
706     /* Append the new snapshot to the snapshot list */
707     new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
708     if (s->snapshots) {
709         memcpy(new_snapshot_list, s->snapshots,
710                s->nb_snapshots * sizeof(QCowSnapshot));
711         old_snapshot_list = s->snapshots;
712     }
713     s->snapshots = new_snapshot_list;
714     s->snapshots[s->nb_snapshots++] = *sn;
715 
716     ret = qcow2_write_snapshots(bs);
717     if (ret < 0) {
718         g_free(s->snapshots);
719         s->snapshots = old_snapshot_list;
720         s->nb_snapshots--;
721         goto fail;
722     }
723 
724     g_free(old_snapshot_list);
725 
726     /* The VM state isn't needed any more in the active L1 table; in fact, it
727      * hurts by causing expensive COW for the next snapshot. */
728     qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
729                           ROUND_UP(sn->vm_state_size, s->cluster_size),
730                           QCOW2_DISCARD_NEVER, false);
731 
732 #ifdef DEBUG_ALLOC
733     {
734       BdrvCheckResult result = {0};
735       qcow2_check_refcounts(bs, &result, 0);
736     }
737 #endif
738     return 0;
739 
740 fail:
741     g_free(sn->id_str);
742     g_free(sn->name);
743     g_free(l1_table);
744 
745     return ret;
746 }
747 
748 /* copy the snapshot 'snapshot_name' into the current disk image */
749 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
750 {
751     BDRVQcow2State *s = bs->opaque;
752     QCowSnapshot *sn;
753     Error *local_err = NULL;
754     int i, snapshot_index;
755     int cur_l1_bytes, sn_l1_bytes;
756     int ret;
757     uint64_t *sn_l1_table = NULL;
758 
759     if (has_data_file(bs)) {
760         return -ENOTSUP;
761     }
762 
763     /* Search the snapshot */
764     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
765     if (snapshot_index < 0) {
766         return -ENOENT;
767     }
768     sn = &s->snapshots[snapshot_index];
769 
770     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
771                                sizeof(uint64_t), QCOW_MAX_L1_SIZE,
772                                "Snapshot L1 table", &local_err);
773     if (ret < 0) {
774         error_report_err(local_err);
775         goto fail;
776     }
777 
778     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
779         BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
780                                             &local_err);
781         if (!blk) {
782             error_report_err(local_err);
783             ret = -ENOTSUP;
784             goto fail;
785         }
786 
787         ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
788                            &local_err);
789         blk_unref(blk);
790         if (ret < 0) {
791             error_report_err(local_err);
792             goto fail;
793         }
794     }
795 
796     /*
797      * Make sure that the current L1 table is big enough to contain the whole
798      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
799      * current one must be padded with zeros.
800      */
801     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
802     if (ret < 0) {
803         goto fail;
804     }
805 
806     cur_l1_bytes = s->l1_size * sizeof(uint64_t);
807     sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
808 
809     /*
810      * Copy the snapshot L1 table to the current L1 table.
811      *
812      * Before overwriting the old current L1 table on disk, make sure to
813      * increase all refcounts for the clusters referenced by the new one.
814      * Decrease the refcount referenced by the old one only when the L1
815      * table is overwritten.
816      */
817     sn_l1_table = g_try_malloc0(cur_l1_bytes);
818     if (cur_l1_bytes && sn_l1_table == NULL) {
819         ret = -ENOMEM;
820         goto fail;
821     }
822 
823     ret = bdrv_pread(bs->file, sn->l1_table_offset,
824                      sn_l1_table, sn_l1_bytes);
825     if (ret < 0) {
826         goto fail;
827     }
828 
829     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
830                                          sn->l1_size, 1);
831     if (ret < 0) {
832         goto fail;
833     }
834 
835     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
836                                         s->l1_table_offset, cur_l1_bytes,
837                                         false);
838     if (ret < 0) {
839         goto fail;
840     }
841 
842     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
843                            cur_l1_bytes);
844     if (ret < 0) {
845         goto fail;
846     }
847 
848     /*
849      * Decrease refcount of clusters of current L1 table.
850      *
851      * At this point, the in-memory s->l1_table points to the old L1 table,
852      * whereas on disk we already have the new one.
853      *
854      * qcow2_update_snapshot_refcount special cases the current L1 table to use
855      * the in-memory data instead of really using the offset to load a new one,
856      * which is why this works.
857      */
858     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
859                                          s->l1_size, -1);
860 
861     /*
862      * Now update the in-memory L1 table to be in sync with the on-disk one. We
863      * need to do this even if updating refcounts failed.
864      */
865     for(i = 0;i < s->l1_size; i++) {
866         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
867     }
868 
869     if (ret < 0) {
870         goto fail;
871     }
872 
873     g_free(sn_l1_table);
874     sn_l1_table = NULL;
875 
876     /*
877      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
878      * when we decreased the refcount of the old snapshot.
879      */
880     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
881     if (ret < 0) {
882         goto fail;
883     }
884 
885 #ifdef DEBUG_ALLOC
886     {
887         BdrvCheckResult result = {0};
888         qcow2_check_refcounts(bs, &result, 0);
889     }
890 #endif
891     return 0;
892 
893 fail:
894     g_free(sn_l1_table);
895     return ret;
896 }
897 
898 int qcow2_snapshot_delete(BlockDriverState *bs,
899                           const char *snapshot_id,
900                           const char *name,
901                           Error **errp)
902 {
903     BDRVQcow2State *s = bs->opaque;
904     QCowSnapshot sn;
905     int snapshot_index, ret;
906 
907     if (has_data_file(bs)) {
908         return -ENOTSUP;
909     }
910 
911     /* Search the snapshot */
912     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
913     if (snapshot_index < 0) {
914         error_setg(errp, "Can't find the snapshot");
915         return -ENOENT;
916     }
917     sn = s->snapshots[snapshot_index];
918 
919     ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
920                                sizeof(uint64_t), QCOW_MAX_L1_SIZE,
921                                "Snapshot L1 table", errp);
922     if (ret < 0) {
923         return ret;
924     }
925 
926     /* Remove it from the snapshot list */
927     memmove(s->snapshots + snapshot_index,
928             s->snapshots + snapshot_index + 1,
929             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
930     s->nb_snapshots--;
931     ret = qcow2_write_snapshots(bs);
932     if (ret < 0) {
933         error_setg_errno(errp, -ret,
934                          "Failed to remove snapshot from snapshot list");
935         return ret;
936     }
937 
938     /*
939      * The snapshot is now unused, clean up. If we fail after this point, we
940      * won't recover but just leak clusters.
941      */
942     g_free(sn.unknown_extra_data);
943     g_free(sn.id_str);
944     g_free(sn.name);
945 
946     /*
947      * Now decrease the refcounts of clusters referenced by the snapshot and
948      * free the L1 table.
949      */
950     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
951                                          sn.l1_size, -1);
952     if (ret < 0) {
953         error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
954         return ret;
955     }
956     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
957                         QCOW2_DISCARD_SNAPSHOT);
958 
959     /* must update the copied flag on the current cluster offsets */
960     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
961     if (ret < 0) {
962         error_setg_errno(errp, -ret,
963                          "Failed to update snapshot status in disk");
964         return ret;
965     }
966 
967 #ifdef DEBUG_ALLOC
968     {
969         BdrvCheckResult result = {0};
970         qcow2_check_refcounts(bs, &result, 0);
971     }
972 #endif
973     return 0;
974 }
975 
976 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
977 {
978     BDRVQcow2State *s = bs->opaque;
979     QEMUSnapshotInfo *sn_tab, *sn_info;
980     QCowSnapshot *sn;
981     int i;
982 
983     if (has_data_file(bs)) {
984         return -ENOTSUP;
985     }
986     if (!s->nb_snapshots) {
987         *psn_tab = NULL;
988         return s->nb_snapshots;
989     }
990 
991     sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
992     for(i = 0; i < s->nb_snapshots; i++) {
993         sn_info = sn_tab + i;
994         sn = s->snapshots + i;
995         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
996                 sn->id_str);
997         pstrcpy(sn_info->name, sizeof(sn_info->name),
998                 sn->name);
999         sn_info->vm_state_size = sn->vm_state_size;
1000         sn_info->date_sec = sn->date_sec;
1001         sn_info->date_nsec = sn->date_nsec;
1002         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
1003     }
1004     *psn_tab = sn_tab;
1005     return s->nb_snapshots;
1006 }
1007 
1008 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
1009                             const char *snapshot_id,
1010                             const char *name,
1011                             Error **errp)
1012 {
1013     int i, snapshot_index;
1014     BDRVQcow2State *s = bs->opaque;
1015     QCowSnapshot *sn;
1016     uint64_t *new_l1_table;
1017     int new_l1_bytes;
1018     int ret;
1019 
1020     assert(bs->read_only);
1021 
1022     /* Search the snapshot */
1023     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1024     if (snapshot_index < 0) {
1025         error_setg(errp,
1026                    "Can't find snapshot");
1027         return -ENOENT;
1028     }
1029     sn = &s->snapshots[snapshot_index];
1030 
1031     /* Allocate and read in the snapshot's L1 table */
1032     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1033                                sizeof(uint64_t), QCOW_MAX_L1_SIZE,
1034                                "Snapshot L1 table", errp);
1035     if (ret < 0) {
1036         return ret;
1037     }
1038     new_l1_bytes = sn->l1_size * sizeof(uint64_t);
1039     new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1040     if (new_l1_table == NULL) {
1041         return -ENOMEM;
1042     }
1043 
1044     ret = bdrv_pread(bs->file, sn->l1_table_offset,
1045                      new_l1_table, new_l1_bytes);
1046     if (ret < 0) {
1047         error_setg(errp, "Failed to read l1 table for snapshot");
1048         qemu_vfree(new_l1_table);
1049         return ret;
1050     }
1051 
1052     /* Switch the L1 table */
1053     qemu_vfree(s->l1_table);
1054 
1055     s->l1_size = sn->l1_size;
1056     s->l1_table_offset = sn->l1_table_offset;
1057     s->l1_table = new_l1_table;
1058 
1059     for(i = 0;i < s->l1_size; i++) {
1060         be64_to_cpus(&s->l1_table[i]);
1061     }
1062 
1063     return 0;
1064 }
1065