xref: /openbmc/qemu/block/qcow2-snapshot.c (revision 979a8902)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
27 #include "qcow2.h"
28 #include "qemu/bswap.h"
29 #include "qemu/error-report.h"
30 #include "qemu/cutils.h"
31 
32 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
33 {
34     BDRVQcow2State *s = bs->opaque;
35 
36     assert(i >= 0 && i < s->nb_snapshots);
37     g_free(s->snapshots[i].name);
38     g_free(s->snapshots[i].id_str);
39     g_free(s->snapshots[i].unknown_extra_data);
40     memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
41 }
42 
43 void qcow2_free_snapshots(BlockDriverState *bs)
44 {
45     BDRVQcow2State *s = bs->opaque;
46     int i;
47 
48     for(i = 0; i < s->nb_snapshots; i++) {
49         qcow2_free_single_snapshot(bs, i);
50     }
51     g_free(s->snapshots);
52     s->snapshots = NULL;
53     s->nb_snapshots = 0;
54 }
55 
56 /*
57  * If @repair is true, try to repair a broken snapshot table instead
58  * of just returning an error:
59  *
60  * - If the snapshot table was too long, set *nb_clusters_reduced to
61  *   the number of snapshots removed off the end.
62  *   The caller will update the on-disk nb_snapshots accordingly;
63  *   this leaks clusters, but is safe.
64  *   (The on-disk information must be updated before
65  *   qcow2_check_refcounts(), because that function relies on
66  *   s->nb_snapshots to reflect the on-disk value.)
67  *
68  * - If there were snapshots with too much extra metadata, increment
69  *   *extra_data_dropped for each.
70  *   This requires the caller to eventually rewrite the whole snapshot
71  *   table, which requires cluster allocation.  Therefore, this should
72  *   be done only after qcow2_check_refcounts() made sure the refcount
73  *   structures are valid.
74  *   (In the meantime, the image is still valid because
75  *   qcow2_check_refcounts() does not do anything with snapshots'
76  *   extra data.)
77  */
78 static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
79                                    int *nb_clusters_reduced,
80                                    int *extra_data_dropped,
81                                    Error **errp)
82 {
83     BDRVQcow2State *s = bs->opaque;
84     QCowSnapshotHeader h;
85     QCowSnapshotExtraData extra;
86     QCowSnapshot *sn;
87     int i, id_str_size, name_size;
88     int64_t offset, pre_sn_offset;
89     uint64_t table_length = 0;
90     int ret;
91 
92     if (!s->nb_snapshots) {
93         s->snapshots = NULL;
94         s->snapshots_size = 0;
95         return 0;
96     }
97 
98     offset = s->snapshots_offset;
99     s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
100 
101     for(i = 0; i < s->nb_snapshots; i++) {
102         bool truncate_unknown_extra_data = false;
103 
104         pre_sn_offset = offset;
105         table_length = ROUND_UP(table_length, 8);
106 
107         /* Read statically sized part of the snapshot header */
108         offset = ROUND_UP(offset, 8);
109         ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
110         if (ret < 0) {
111             error_setg_errno(errp, -ret, "Failed to read snapshot table");
112             goto fail;
113         }
114 
115         offset += sizeof(h);
116         sn = s->snapshots + i;
117         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
118         sn->l1_size = be32_to_cpu(h.l1_size);
119         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
120         sn->date_sec = be32_to_cpu(h.date_sec);
121         sn->date_nsec = be32_to_cpu(h.date_nsec);
122         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
123         sn->extra_data_size = be32_to_cpu(h.extra_data_size);
124 
125         id_str_size = be16_to_cpu(h.id_str_size);
126         name_size = be16_to_cpu(h.name_size);
127 
128         if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
129             if (!repair) {
130                 ret = -EFBIG;
131                 error_setg(errp, "Too much extra metadata in snapshot table "
132                            "entry %i", i);
133                 error_append_hint(errp, "You can force-remove this extra "
134                                   "metadata with qemu-img check -r all\n");
135                 goto fail;
136             }
137 
138             fprintf(stderr, "Discarding too much extra metadata in snapshot "
139                     "table entry %i (%" PRIu32 " > %u)\n",
140                     i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
141 
142             (*extra_data_dropped)++;
143             truncate_unknown_extra_data = true;
144         }
145 
146         /* Read known extra data */
147         ret = bdrv_pread(bs->file, offset, &extra,
148                          MIN(sizeof(extra), sn->extra_data_size));
149         if (ret < 0) {
150             error_setg_errno(errp, -ret, "Failed to read snapshot table");
151             goto fail;
152         }
153         offset += MIN(sizeof(extra), sn->extra_data_size);
154 
155         if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
156                                          vm_state_size_large)) {
157             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
158         }
159 
160         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
161             sn->disk_size = be64_to_cpu(extra.disk_size);
162         } else {
163             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
164         }
165 
166         if (sn->extra_data_size > sizeof(extra)) {
167             uint64_t extra_data_end;
168             size_t unknown_extra_data_size;
169 
170             extra_data_end = offset + sn->extra_data_size - sizeof(extra);
171 
172             if (truncate_unknown_extra_data) {
173                 sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
174             }
175 
176             /* Store unknown extra data */
177             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
178             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
179             ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
180                              unknown_extra_data_size);
181             if (ret < 0) {
182                 error_setg_errno(errp, -ret,
183                                  "Failed to read snapshot table");
184                 goto fail;
185             }
186             offset = extra_data_end;
187         }
188 
189         /* Read snapshot ID */
190         sn->id_str = g_malloc(id_str_size + 1);
191         ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
192         if (ret < 0) {
193             error_setg_errno(errp, -ret, "Failed to read snapshot table");
194             goto fail;
195         }
196         offset += id_str_size;
197         sn->id_str[id_str_size] = '\0';
198 
199         /* Read snapshot name */
200         sn->name = g_malloc(name_size + 1);
201         ret = bdrv_pread(bs->file, offset, sn->name, name_size);
202         if (ret < 0) {
203             error_setg_errno(errp, -ret, "Failed to read snapshot table");
204             goto fail;
205         }
206         offset += name_size;
207         sn->name[name_size] = '\0';
208 
209         /* Note that the extra data may have been truncated */
210         table_length += sizeof(h) + sn->extra_data_size + id_str_size +
211                         name_size;
212         if (!repair) {
213             assert(table_length == offset - s->snapshots_offset);
214         }
215 
216         if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
217             offset - s->snapshots_offset > INT_MAX)
218         {
219             if (!repair) {
220                 ret = -EFBIG;
221                 error_setg(errp, "Snapshot table is too big");
222                 error_append_hint(errp, "You can force-remove all %u "
223                                   "overhanging snapshots with qemu-img check "
224                                   "-r all\n", s->nb_snapshots - i);
225                 goto fail;
226             }
227 
228             fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
229                     "table is too big)\n", s->nb_snapshots - i);
230 
231             *nb_clusters_reduced += (s->nb_snapshots - i);
232 
233             /* Discard current snapshot also */
234             qcow2_free_single_snapshot(bs, i);
235 
236             /*
237              * This leaks all the rest of the snapshot table and the
238              * snapshots' clusters, but we run in check -r all mode,
239              * so qcow2_check_refcounts() will take care of it.
240              */
241             s->nb_snapshots = i;
242             offset = pre_sn_offset;
243             break;
244         }
245     }
246 
247     assert(offset - s->snapshots_offset <= INT_MAX);
248     s->snapshots_size = offset - s->snapshots_offset;
249     return 0;
250 
251 fail:
252     qcow2_free_snapshots(bs);
253     return ret;
254 }
255 
256 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
257 {
258     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
259 }
260 
261 /* add at the end of the file a new list of snapshots */
262 int qcow2_write_snapshots(BlockDriverState *bs)
263 {
264     BDRVQcow2State *s = bs->opaque;
265     QCowSnapshot *sn;
266     QCowSnapshotHeader h;
267     QCowSnapshotExtraData extra;
268     int i, name_size, id_str_size, snapshots_size;
269     struct {
270         uint32_t nb_snapshots;
271         uint64_t snapshots_offset;
272     } QEMU_PACKED header_data;
273     int64_t offset, snapshots_offset = 0;
274     int ret;
275 
276     /* compute the size of the snapshots */
277     offset = 0;
278     for(i = 0; i < s->nb_snapshots; i++) {
279         sn = s->snapshots + i;
280         offset = ROUND_UP(offset, 8);
281         offset += sizeof(h);
282         offset += MAX(sizeof(extra), sn->extra_data_size);
283         offset += strlen(sn->id_str);
284         offset += strlen(sn->name);
285 
286         if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
287             ret = -EFBIG;
288             goto fail;
289         }
290     }
291 
292     assert(offset <= INT_MAX);
293     snapshots_size = offset;
294 
295     /* Allocate space for the new snapshot list */
296     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
297     offset = snapshots_offset;
298     if (offset < 0) {
299         ret = offset;
300         goto fail;
301     }
302     ret = bdrv_flush(bs);
303     if (ret < 0) {
304         goto fail;
305     }
306 
307     /* The snapshot list position has not yet been updated, so these clusters
308      * must indeed be completely free */
309     ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
310     if (ret < 0) {
311         goto fail;
312     }
313 
314 
315     /* Write all snapshots to the new list */
316     for(i = 0; i < s->nb_snapshots; i++) {
317         sn = s->snapshots + i;
318         memset(&h, 0, sizeof(h));
319         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
320         h.l1_size = cpu_to_be32(sn->l1_size);
321         /* If it doesn't fit in 32 bit, older implementations should treat it
322          * as a disk-only snapshot rather than truncate the VM state */
323         if (sn->vm_state_size <= 0xffffffff) {
324             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
325         }
326         h.date_sec = cpu_to_be32(sn->date_sec);
327         h.date_nsec = cpu_to_be32(sn->date_nsec);
328         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
329         h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
330                                             sn->extra_data_size));
331 
332         memset(&extra, 0, sizeof(extra));
333         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
334         extra.disk_size = cpu_to_be64(sn->disk_size);
335 
336         id_str_size = strlen(sn->id_str);
337         name_size = strlen(sn->name);
338         assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
339         h.id_str_size = cpu_to_be16(id_str_size);
340         h.name_size = cpu_to_be16(name_size);
341         offset = ROUND_UP(offset, 8);
342 
343         ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
344         if (ret < 0) {
345             goto fail;
346         }
347         offset += sizeof(h);
348 
349         ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
350         if (ret < 0) {
351             goto fail;
352         }
353         offset += sizeof(extra);
354 
355         if (sn->extra_data_size > sizeof(extra)) {
356             size_t unknown_extra_data_size =
357                 sn->extra_data_size - sizeof(extra);
358 
359             /* qcow2_read_snapshots() ensures no unbounded allocation */
360             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
361             assert(sn->unknown_extra_data);
362 
363             ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data,
364                               unknown_extra_data_size);
365             if (ret < 0) {
366                 goto fail;
367             }
368             offset += unknown_extra_data_size;
369         }
370 
371         ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
372         if (ret < 0) {
373             goto fail;
374         }
375         offset += id_str_size;
376 
377         ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
378         if (ret < 0) {
379             goto fail;
380         }
381         offset += name_size;
382     }
383 
384     /*
385      * Update the header to point to the new snapshot table. This requires the
386      * new table and its refcounts to be stable on disk.
387      */
388     ret = bdrv_flush(bs);
389     if (ret < 0) {
390         goto fail;
391     }
392 
393     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
394                       endof(QCowHeader, nb_snapshots));
395 
396     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
397     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
398 
399     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
400                            &header_data, sizeof(header_data));
401     if (ret < 0) {
402         goto fail;
403     }
404 
405     /* free the old snapshot table */
406     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
407                         QCOW2_DISCARD_SNAPSHOT);
408     s->snapshots_offset = snapshots_offset;
409     s->snapshots_size = snapshots_size;
410     return 0;
411 
412 fail:
413     if (snapshots_offset > 0) {
414         qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
415                             QCOW2_DISCARD_ALWAYS);
416     }
417     return ret;
418 }
419 
420 int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
421                                                  BdrvCheckResult *result,
422                                                  BdrvCheckMode fix)
423 {
424     BDRVQcow2State *s = bs->opaque;
425     Error *local_err = NULL;
426     int nb_clusters_reduced = 0;
427     int extra_data_dropped = 0;
428     int ret;
429     struct {
430         uint32_t nb_snapshots;
431         uint64_t snapshots_offset;
432     } QEMU_PACKED snapshot_table_pointer;
433 
434     /* qcow2_do_open() discards this information in check mode */
435     ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
436                      &snapshot_table_pointer, sizeof(snapshot_table_pointer));
437     if (ret < 0) {
438         result->check_errors++;
439         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
440                 "the image header: %s\n", strerror(-ret));
441         return ret;
442     }
443 
444     s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
445     s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
446 
447     if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
448         fprintf(stderr, "Discarding %u overhanging snapshots\n",
449                 s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
450 
451         nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
452         s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
453     }
454 
455     ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
456                                sizeof(QCowSnapshotHeader),
457                                sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
458                                "snapshot table", &local_err);
459     if (ret < 0) {
460         result->check_errors++;
461         error_reportf_err(local_err, "ERROR ");
462 
463         if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
464             fprintf(stderr, "You can force-remove all %u overhanging snapshots "
465                     "with qemu-img check -r all\n",
466                     s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
467         }
468 
469         /* We did not read the snapshot table, so invalidate this information */
470         s->snapshots_offset = 0;
471         s->nb_snapshots = 0;
472 
473         return ret;
474     }
475 
476     qemu_co_mutex_unlock(&s->lock);
477     ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
478                                   &nb_clusters_reduced, &extra_data_dropped,
479                                   &local_err);
480     qemu_co_mutex_lock(&s->lock);
481     if (ret < 0) {
482         result->check_errors++;
483         error_reportf_err(local_err,
484                           "ERROR failed to read the snapshot table: ");
485 
486         /* We did not read the snapshot table, so invalidate this information */
487         s->snapshots_offset = 0;
488         s->nb_snapshots = 0;
489 
490         return ret;
491     }
492     result->corruptions += nb_clusters_reduced + extra_data_dropped;
493 
494     if (nb_clusters_reduced) {
495         /*
496          * Update image header now, because:
497          * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
498          *     the same as what the image header says,
499          * (2) this leaks clusters, but qcow2_check_refcounts() will
500          *     fix that.
501          */
502         assert(fix & BDRV_FIX_ERRORS);
503 
504         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
505         ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
506                                &snapshot_table_pointer.nb_snapshots,
507                                sizeof(snapshot_table_pointer.nb_snapshots));
508         if (ret < 0) {
509             result->check_errors++;
510             fprintf(stderr, "ERROR failed to update the snapshot count in the "
511                     "image header: %s\n", strerror(-ret));
512             return ret;
513         }
514 
515         result->corruptions_fixed += nb_clusters_reduced;
516         result->corruptions -= nb_clusters_reduced;
517     }
518 
519     /*
520      * All of v3 images' snapshot table entries need to have at least
521      * 16 bytes of extra data.
522      */
523     if (s->qcow_version >= 3) {
524         int i;
525         for (i = 0; i < s->nb_snapshots; i++) {
526             if (s->snapshots[i].extra_data_size <
527                 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
528                 sizeof_field(QCowSnapshotExtraData, disk_size))
529             {
530                 result->corruptions++;
531                 fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
532                         fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
533             }
534         }
535     }
536 
537     return 0;
538 }
539 
540 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
541                                                 BdrvCheckResult *result,
542                                                 BdrvCheckMode fix)
543 {
544     BDRVQcow2State *s = bs->opaque;
545     int ret;
546 
547     if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
548         qemu_co_mutex_unlock(&s->lock);
549         ret = qcow2_write_snapshots(bs);
550         qemu_co_mutex_lock(&s->lock);
551         if (ret < 0) {
552             result->check_errors++;
553             fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
554                     strerror(-ret));
555             return ret;
556         }
557 
558         result->corruptions_fixed += result->corruptions;
559         result->corruptions = 0;
560     }
561 
562     return 0;
563 }
564 
565 static void find_new_snapshot_id(BlockDriverState *bs,
566                                  char *id_str, int id_str_size)
567 {
568     BDRVQcow2State *s = bs->opaque;
569     QCowSnapshot *sn;
570     int i;
571     unsigned long id, id_max = 0;
572 
573     for(i = 0; i < s->nb_snapshots; i++) {
574         sn = s->snapshots + i;
575         id = strtoul(sn->id_str, NULL, 10);
576         if (id > id_max)
577             id_max = id;
578     }
579     snprintf(id_str, id_str_size, "%lu", id_max + 1);
580 }
581 
582 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
583                                         const char *id,
584                                         const char *name)
585 {
586     BDRVQcow2State *s = bs->opaque;
587     int i;
588 
589     if (id && name) {
590         for (i = 0; i < s->nb_snapshots; i++) {
591             if (!strcmp(s->snapshots[i].id_str, id) &&
592                 !strcmp(s->snapshots[i].name, name)) {
593                 return i;
594             }
595         }
596     } else if (id) {
597         for (i = 0; i < s->nb_snapshots; i++) {
598             if (!strcmp(s->snapshots[i].id_str, id)) {
599                 return i;
600             }
601         }
602     } else if (name) {
603         for (i = 0; i < s->nb_snapshots; i++) {
604             if (!strcmp(s->snapshots[i].name, name)) {
605                 return i;
606             }
607         }
608     }
609 
610     return -1;
611 }
612 
613 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
614                                        const char *id_or_name)
615 {
616     int ret;
617 
618     ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
619     if (ret >= 0) {
620         return ret;
621     }
622     return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
623 }
624 
625 /* if no id is provided, a new one is constructed */
626 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
627 {
628     BDRVQcow2State *s = bs->opaque;
629     QCowSnapshot *new_snapshot_list = NULL;
630     QCowSnapshot *old_snapshot_list = NULL;
631     QCowSnapshot sn1, *sn = &sn1;
632     int i, ret;
633     uint64_t *l1_table = NULL;
634     int64_t l1_table_offset;
635 
636     if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
637         return -EFBIG;
638     }
639 
640     if (has_data_file(bs)) {
641         return -ENOTSUP;
642     }
643 
644     memset(sn, 0, sizeof(*sn));
645 
646     /* Generate an ID */
647     find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
648 
649     /* Populate sn with passed data */
650     sn->id_str = g_strdup(sn_info->id_str);
651     sn->name = g_strdup(sn_info->name);
652 
653     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
654     sn->vm_state_size = sn_info->vm_state_size;
655     sn->date_sec = sn_info->date_sec;
656     sn->date_nsec = sn_info->date_nsec;
657     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
658     sn->extra_data_size = sizeof(QCowSnapshotExtraData);
659 
660     /* Allocate the L1 table of the snapshot and copy the current one there. */
661     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
662     if (l1_table_offset < 0) {
663         ret = l1_table_offset;
664         goto fail;
665     }
666 
667     sn->l1_table_offset = l1_table_offset;
668     sn->l1_size = s->l1_size;
669 
670     l1_table = g_try_new(uint64_t, s->l1_size);
671     if (s->l1_size && l1_table == NULL) {
672         ret = -ENOMEM;
673         goto fail;
674     }
675 
676     for(i = 0; i < s->l1_size; i++) {
677         l1_table[i] = cpu_to_be64(s->l1_table[i]);
678     }
679 
680     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
681                                         s->l1_size * sizeof(uint64_t), false);
682     if (ret < 0) {
683         goto fail;
684     }
685 
686     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
687                       s->l1_size * sizeof(uint64_t));
688     if (ret < 0) {
689         goto fail;
690     }
691 
692     g_free(l1_table);
693     l1_table = NULL;
694 
695     /*
696      * Increase the refcounts of all clusters and make sure everything is
697      * stable on disk before updating the snapshot table to contain a pointer
698      * to the new L1 table.
699      */
700     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
701     if (ret < 0) {
702         goto fail;
703     }
704 
705     /* Append the new snapshot to the snapshot list */
706     new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
707     if (s->snapshots) {
708         memcpy(new_snapshot_list, s->snapshots,
709                s->nb_snapshots * sizeof(QCowSnapshot));
710         old_snapshot_list = s->snapshots;
711     }
712     s->snapshots = new_snapshot_list;
713     s->snapshots[s->nb_snapshots++] = *sn;
714 
715     ret = qcow2_write_snapshots(bs);
716     if (ret < 0) {
717         g_free(s->snapshots);
718         s->snapshots = old_snapshot_list;
719         s->nb_snapshots--;
720         goto fail;
721     }
722 
723     g_free(old_snapshot_list);
724 
725     /* The VM state isn't needed any more in the active L1 table; in fact, it
726      * hurts by causing expensive COW for the next snapshot. */
727     qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
728                           ROUND_UP(sn->vm_state_size, s->cluster_size),
729                           QCOW2_DISCARD_NEVER, false);
730 
731 #ifdef DEBUG_ALLOC
732     {
733       BdrvCheckResult result = {0};
734       qcow2_check_refcounts(bs, &result, 0);
735     }
736 #endif
737     return 0;
738 
739 fail:
740     g_free(sn->id_str);
741     g_free(sn->name);
742     g_free(l1_table);
743 
744     return ret;
745 }
746 
747 /* copy the snapshot 'snapshot_name' into the current disk image */
748 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
749 {
750     BDRVQcow2State *s = bs->opaque;
751     QCowSnapshot *sn;
752     Error *local_err = NULL;
753     int i, snapshot_index;
754     int cur_l1_bytes, sn_l1_bytes;
755     int ret;
756     uint64_t *sn_l1_table = NULL;
757 
758     if (has_data_file(bs)) {
759         return -ENOTSUP;
760     }
761 
762     /* Search the snapshot */
763     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
764     if (snapshot_index < 0) {
765         return -ENOENT;
766     }
767     sn = &s->snapshots[snapshot_index];
768 
769     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
770                                sizeof(uint64_t), QCOW_MAX_L1_SIZE,
771                                "Snapshot L1 table", &local_err);
772     if (ret < 0) {
773         error_report_err(local_err);
774         goto fail;
775     }
776 
777     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
778         error_report("qcow2: Loading snapshots with different disk "
779             "size is not implemented");
780         ret = -ENOTSUP;
781         goto fail;
782     }
783 
784     /*
785      * Make sure that the current L1 table is big enough to contain the whole
786      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
787      * current one must be padded with zeros.
788      */
789     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
790     if (ret < 0) {
791         goto fail;
792     }
793 
794     cur_l1_bytes = s->l1_size * sizeof(uint64_t);
795     sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
796 
797     /*
798      * Copy the snapshot L1 table to the current L1 table.
799      *
800      * Before overwriting the old current L1 table on disk, make sure to
801      * increase all refcounts for the clusters referenced by the new one.
802      * Decrease the refcount referenced by the old one only when the L1
803      * table is overwritten.
804      */
805     sn_l1_table = g_try_malloc0(cur_l1_bytes);
806     if (cur_l1_bytes && sn_l1_table == NULL) {
807         ret = -ENOMEM;
808         goto fail;
809     }
810 
811     ret = bdrv_pread(bs->file, sn->l1_table_offset,
812                      sn_l1_table, sn_l1_bytes);
813     if (ret < 0) {
814         goto fail;
815     }
816 
817     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
818                                          sn->l1_size, 1);
819     if (ret < 0) {
820         goto fail;
821     }
822 
823     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
824                                         s->l1_table_offset, cur_l1_bytes,
825                                         false);
826     if (ret < 0) {
827         goto fail;
828     }
829 
830     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
831                            cur_l1_bytes);
832     if (ret < 0) {
833         goto fail;
834     }
835 
836     /*
837      * Decrease refcount of clusters of current L1 table.
838      *
839      * At this point, the in-memory s->l1_table points to the old L1 table,
840      * whereas on disk we already have the new one.
841      *
842      * qcow2_update_snapshot_refcount special cases the current L1 table to use
843      * the in-memory data instead of really using the offset to load a new one,
844      * which is why this works.
845      */
846     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
847                                          s->l1_size, -1);
848 
849     /*
850      * Now update the in-memory L1 table to be in sync with the on-disk one. We
851      * need to do this even if updating refcounts failed.
852      */
853     for(i = 0;i < s->l1_size; i++) {
854         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
855     }
856 
857     if (ret < 0) {
858         goto fail;
859     }
860 
861     g_free(sn_l1_table);
862     sn_l1_table = NULL;
863 
864     /*
865      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
866      * when we decreased the refcount of the old snapshot.
867      */
868     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
869     if (ret < 0) {
870         goto fail;
871     }
872 
873 #ifdef DEBUG_ALLOC
874     {
875         BdrvCheckResult result = {0};
876         qcow2_check_refcounts(bs, &result, 0);
877     }
878 #endif
879     return 0;
880 
881 fail:
882     g_free(sn_l1_table);
883     return ret;
884 }
885 
886 int qcow2_snapshot_delete(BlockDriverState *bs,
887                           const char *snapshot_id,
888                           const char *name,
889                           Error **errp)
890 {
891     BDRVQcow2State *s = bs->opaque;
892     QCowSnapshot sn;
893     int snapshot_index, ret;
894 
895     if (has_data_file(bs)) {
896         return -ENOTSUP;
897     }
898 
899     /* Search the snapshot */
900     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
901     if (snapshot_index < 0) {
902         error_setg(errp, "Can't find the snapshot");
903         return -ENOENT;
904     }
905     sn = s->snapshots[snapshot_index];
906 
907     ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
908                                sizeof(uint64_t), QCOW_MAX_L1_SIZE,
909                                "Snapshot L1 table", errp);
910     if (ret < 0) {
911         return ret;
912     }
913 
914     /* Remove it from the snapshot list */
915     memmove(s->snapshots + snapshot_index,
916             s->snapshots + snapshot_index + 1,
917             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
918     s->nb_snapshots--;
919     ret = qcow2_write_snapshots(bs);
920     if (ret < 0) {
921         error_setg_errno(errp, -ret,
922                          "Failed to remove snapshot from snapshot list");
923         return ret;
924     }
925 
926     /*
927      * The snapshot is now unused, clean up. If we fail after this point, we
928      * won't recover but just leak clusters.
929      */
930     g_free(sn.unknown_extra_data);
931     g_free(sn.id_str);
932     g_free(sn.name);
933 
934     /*
935      * Now decrease the refcounts of clusters referenced by the snapshot and
936      * free the L1 table.
937      */
938     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
939                                          sn.l1_size, -1);
940     if (ret < 0) {
941         error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
942         return ret;
943     }
944     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
945                         QCOW2_DISCARD_SNAPSHOT);
946 
947     /* must update the copied flag on the current cluster offsets */
948     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
949     if (ret < 0) {
950         error_setg_errno(errp, -ret,
951                          "Failed to update snapshot status in disk");
952         return ret;
953     }
954 
955 #ifdef DEBUG_ALLOC
956     {
957         BdrvCheckResult result = {0};
958         qcow2_check_refcounts(bs, &result, 0);
959     }
960 #endif
961     return 0;
962 }
963 
964 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
965 {
966     BDRVQcow2State *s = bs->opaque;
967     QEMUSnapshotInfo *sn_tab, *sn_info;
968     QCowSnapshot *sn;
969     int i;
970 
971     if (has_data_file(bs)) {
972         return -ENOTSUP;
973     }
974     if (!s->nb_snapshots) {
975         *psn_tab = NULL;
976         return s->nb_snapshots;
977     }
978 
979     sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
980     for(i = 0; i < s->nb_snapshots; i++) {
981         sn_info = sn_tab + i;
982         sn = s->snapshots + i;
983         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
984                 sn->id_str);
985         pstrcpy(sn_info->name, sizeof(sn_info->name),
986                 sn->name);
987         sn_info->vm_state_size = sn->vm_state_size;
988         sn_info->date_sec = sn->date_sec;
989         sn_info->date_nsec = sn->date_nsec;
990         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
991     }
992     *psn_tab = sn_tab;
993     return s->nb_snapshots;
994 }
995 
996 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
997                             const char *snapshot_id,
998                             const char *name,
999                             Error **errp)
1000 {
1001     int i, snapshot_index;
1002     BDRVQcow2State *s = bs->opaque;
1003     QCowSnapshot *sn;
1004     uint64_t *new_l1_table;
1005     int new_l1_bytes;
1006     int ret;
1007 
1008     assert(bs->read_only);
1009 
1010     /* Search the snapshot */
1011     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1012     if (snapshot_index < 0) {
1013         error_setg(errp,
1014                    "Can't find snapshot");
1015         return -ENOENT;
1016     }
1017     sn = &s->snapshots[snapshot_index];
1018 
1019     /* Allocate and read in the snapshot's L1 table */
1020     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1021                                sizeof(uint64_t), QCOW_MAX_L1_SIZE,
1022                                "Snapshot L1 table", errp);
1023     if (ret < 0) {
1024         return ret;
1025     }
1026     new_l1_bytes = sn->l1_size * sizeof(uint64_t);
1027     new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1028     if (new_l1_table == NULL) {
1029         return -ENOMEM;
1030     }
1031 
1032     ret = bdrv_pread(bs->file, sn->l1_table_offset,
1033                      new_l1_table, new_l1_bytes);
1034     if (ret < 0) {
1035         error_setg(errp, "Failed to read l1 table for snapshot");
1036         qemu_vfree(new_l1_table);
1037         return ret;
1038     }
1039 
1040     /* Switch the L1 table */
1041     qemu_vfree(s->l1_table);
1042 
1043     s->l1_size = sn->l1_size;
1044     s->l1_table_offset = sn->l1_table_offset;
1045     s->l1_table = new_l1_table;
1046 
1047     for(i = 0;i < s->l1_size; i++) {
1048         be64_to_cpus(&s->l1_table[i]);
1049     }
1050 
1051     return 0;
1052 }
1053