xref: /openbmc/qemu/block/qcow2-snapshot.c (revision d2dfe0b5)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "sysemu/block-backend.h"
27 #include "qapi/error.h"
28 #include "qcow2.h"
29 #include "qemu/bswap.h"
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/memalign.h"
33 
34 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
35 {
36     BDRVQcow2State *s = bs->opaque;
37 
38     assert(i >= 0 && i < s->nb_snapshots);
39     g_free(s->snapshots[i].name);
40     g_free(s->snapshots[i].id_str);
41     g_free(s->snapshots[i].unknown_extra_data);
42     memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
43 }
44 
45 void qcow2_free_snapshots(BlockDriverState *bs)
46 {
47     BDRVQcow2State *s = bs->opaque;
48     int i;
49 
50     for(i = 0; i < s->nb_snapshots; i++) {
51         qcow2_free_single_snapshot(bs, i);
52     }
53     g_free(s->snapshots);
54     s->snapshots = NULL;
55     s->nb_snapshots = 0;
56 }
57 
58 /*
59  * If @repair is true, try to repair a broken snapshot table instead
60  * of just returning an error:
61  *
62  * - If the snapshot table was too long, set *nb_clusters_reduced to
63  *   the number of snapshots removed off the end.
64  *   The caller will update the on-disk nb_snapshots accordingly;
65  *   this leaks clusters, but is safe.
66  *   (The on-disk information must be updated before
67  *   qcow2_check_refcounts(), because that function relies on
68  *   s->nb_snapshots to reflect the on-disk value.)
69  *
70  * - If there were snapshots with too much extra metadata, increment
71  *   *extra_data_dropped for each.
72  *   This requires the caller to eventually rewrite the whole snapshot
73  *   table, which requires cluster allocation.  Therefore, this should
74  *   be done only after qcow2_check_refcounts() made sure the refcount
75  *   structures are valid.
76  *   (In the meantime, the image is still valid because
77  *   qcow2_check_refcounts() does not do anything with snapshots'
78  *   extra data.)
79  */
80 static coroutine_fn GRAPH_RDLOCK
81 int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
82                             int *nb_clusters_reduced,
83                             int *extra_data_dropped,
84                             Error **errp)
85 {
86     BDRVQcow2State *s = bs->opaque;
87     QCowSnapshotHeader h;
88     QCowSnapshotExtraData extra;
89     QCowSnapshot *sn;
90     int i, id_str_size, name_size;
91     int64_t offset, pre_sn_offset;
92     uint64_t table_length = 0;
93     int ret;
94 
95     if (!s->nb_snapshots) {
96         s->snapshots = NULL;
97         s->snapshots_size = 0;
98         return 0;
99     }
100 
101     offset = s->snapshots_offset;
102     s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
103 
104     for(i = 0; i < s->nb_snapshots; i++) {
105         bool truncate_unknown_extra_data = false;
106 
107         pre_sn_offset = offset;
108         table_length = ROUND_UP(table_length, 8);
109 
110         /* Read statically sized part of the snapshot header */
111         offset = ROUND_UP(offset, 8);
112         ret = bdrv_co_pread(bs->file, offset, sizeof(h), &h, 0);
113         if (ret < 0) {
114             error_setg_errno(errp, -ret, "Failed to read snapshot table");
115             goto fail;
116         }
117 
118         offset += sizeof(h);
119         sn = s->snapshots + i;
120         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
121         sn->l1_size = be32_to_cpu(h.l1_size);
122         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
123         sn->date_sec = be32_to_cpu(h.date_sec);
124         sn->date_nsec = be32_to_cpu(h.date_nsec);
125         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
126         sn->extra_data_size = be32_to_cpu(h.extra_data_size);
127 
128         id_str_size = be16_to_cpu(h.id_str_size);
129         name_size = be16_to_cpu(h.name_size);
130 
131         if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
132             if (!repair) {
133                 ret = -EFBIG;
134                 error_setg(errp, "Too much extra metadata in snapshot table "
135                            "entry %i", i);
136                 error_append_hint(errp, "You can force-remove this extra "
137                                   "metadata with qemu-img check -r all\n");
138                 goto fail;
139             }
140 
141             fprintf(stderr, "Discarding too much extra metadata in snapshot "
142                     "table entry %i (%" PRIu32 " > %u)\n",
143                     i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
144 
145             (*extra_data_dropped)++;
146             truncate_unknown_extra_data = true;
147         }
148 
149         /* Read known extra data */
150         ret = bdrv_co_pread(bs->file, offset,
151                             MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
152         if (ret < 0) {
153             error_setg_errno(errp, -ret, "Failed to read snapshot table");
154             goto fail;
155         }
156         offset += MIN(sizeof(extra), sn->extra_data_size);
157 
158         if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
159                                          vm_state_size_large)) {
160             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
161         }
162 
163         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
164             sn->disk_size = be64_to_cpu(extra.disk_size);
165         } else {
166             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
167         }
168 
169         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) {
170             sn->icount = be64_to_cpu(extra.icount);
171         } else {
172             sn->icount = -1ULL;
173         }
174 
175         if (sn->extra_data_size > sizeof(extra)) {
176             uint64_t extra_data_end;
177             size_t unknown_extra_data_size;
178 
179             extra_data_end = offset + sn->extra_data_size - sizeof(extra);
180 
181             if (truncate_unknown_extra_data) {
182                 sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
183             }
184 
185             /* Store unknown extra data */
186             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
187             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
188             ret = bdrv_co_pread(bs->file, offset, unknown_extra_data_size,
189                                 sn->unknown_extra_data, 0);
190             if (ret < 0) {
191                 error_setg_errno(errp, -ret,
192                                  "Failed to read snapshot table");
193                 goto fail;
194             }
195             offset = extra_data_end;
196         }
197 
198         /* Read snapshot ID */
199         sn->id_str = g_malloc(id_str_size + 1);
200         ret = bdrv_co_pread(bs->file, offset, id_str_size, sn->id_str, 0);
201         if (ret < 0) {
202             error_setg_errno(errp, -ret, "Failed to read snapshot table");
203             goto fail;
204         }
205         offset += id_str_size;
206         sn->id_str[id_str_size] = '\0';
207 
208         /* Read snapshot name */
209         sn->name = g_malloc(name_size + 1);
210         ret = bdrv_co_pread(bs->file, offset, name_size, sn->name, 0);
211         if (ret < 0) {
212             error_setg_errno(errp, -ret, "Failed to read snapshot table");
213             goto fail;
214         }
215         offset += name_size;
216         sn->name[name_size] = '\0';
217 
218         /* Note that the extra data may have been truncated */
219         table_length += sizeof(h) + sn->extra_data_size + id_str_size +
220                         name_size;
221         if (!repair) {
222             assert(table_length == offset - s->snapshots_offset);
223         }
224 
225         if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
226             offset - s->snapshots_offset > INT_MAX)
227         {
228             if (!repair) {
229                 ret = -EFBIG;
230                 error_setg(errp, "Snapshot table is too big");
231                 error_append_hint(errp, "You can force-remove all %u "
232                                   "overhanging snapshots with qemu-img check "
233                                   "-r all\n", s->nb_snapshots - i);
234                 goto fail;
235             }
236 
237             fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
238                     "table is too big)\n", s->nb_snapshots - i);
239 
240             *nb_clusters_reduced += (s->nb_snapshots - i);
241 
242             /* Discard current snapshot also */
243             qcow2_free_single_snapshot(bs, i);
244 
245             /*
246              * This leaks all the rest of the snapshot table and the
247              * snapshots' clusters, but we run in check -r all mode,
248              * so qcow2_check_refcounts() will take care of it.
249              */
250             s->nb_snapshots = i;
251             offset = pre_sn_offset;
252             break;
253         }
254     }
255 
256     assert(offset - s->snapshots_offset <= INT_MAX);
257     s->snapshots_size = offset - s->snapshots_offset;
258     return 0;
259 
260 fail:
261     qcow2_free_snapshots(bs);
262     return ret;
263 }
264 
265 int coroutine_fn qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
266 {
267     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
268 }
269 
270 /* add at the end of the file a new list of snapshots */
271 int qcow2_write_snapshots(BlockDriverState *bs)
272 {
273     BDRVQcow2State *s = bs->opaque;
274     QCowSnapshot *sn;
275     QCowSnapshotHeader h;
276     QCowSnapshotExtraData extra;
277     int i, name_size, id_str_size, snapshots_size;
278     struct {
279         uint32_t nb_snapshots;
280         uint64_t snapshots_offset;
281     } QEMU_PACKED header_data;
282     int64_t offset, snapshots_offset = 0;
283     int ret;
284 
285     /* compute the size of the snapshots */
286     offset = 0;
287     for(i = 0; i < s->nb_snapshots; i++) {
288         sn = s->snapshots + i;
289         offset = ROUND_UP(offset, 8);
290         offset += sizeof(h);
291         offset += MAX(sizeof(extra), sn->extra_data_size);
292         offset += strlen(sn->id_str);
293         offset += strlen(sn->name);
294 
295         if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
296             ret = -EFBIG;
297             goto fail;
298         }
299     }
300 
301     assert(offset <= INT_MAX);
302     snapshots_size = offset;
303 
304     /* Allocate space for the new snapshot list */
305     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
306     offset = snapshots_offset;
307     if (offset < 0) {
308         ret = offset;
309         goto fail;
310     }
311     ret = bdrv_flush(bs);
312     if (ret < 0) {
313         goto fail;
314     }
315 
316     /* The snapshot list position has not yet been updated, so these clusters
317      * must indeed be completely free */
318     ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
319     if (ret < 0) {
320         goto fail;
321     }
322 
323 
324     /* Write all snapshots to the new list */
325     for(i = 0; i < s->nb_snapshots; i++) {
326         sn = s->snapshots + i;
327         memset(&h, 0, sizeof(h));
328         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
329         h.l1_size = cpu_to_be32(sn->l1_size);
330         /* If it doesn't fit in 32 bit, older implementations should treat it
331          * as a disk-only snapshot rather than truncate the VM state */
332         if (sn->vm_state_size <= 0xffffffff) {
333             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
334         }
335         h.date_sec = cpu_to_be32(sn->date_sec);
336         h.date_nsec = cpu_to_be32(sn->date_nsec);
337         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
338         h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
339                                             sn->extra_data_size));
340 
341         memset(&extra, 0, sizeof(extra));
342         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
343         extra.disk_size = cpu_to_be64(sn->disk_size);
344         extra.icount = cpu_to_be64(sn->icount);
345 
346         id_str_size = strlen(sn->id_str);
347         name_size = strlen(sn->name);
348         assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
349         h.id_str_size = cpu_to_be16(id_str_size);
350         h.name_size = cpu_to_be16(name_size);
351         offset = ROUND_UP(offset, 8);
352 
353         ret = bdrv_pwrite(bs->file, offset, sizeof(h), &h, 0);
354         if (ret < 0) {
355             goto fail;
356         }
357         offset += sizeof(h);
358 
359         ret = bdrv_pwrite(bs->file, offset, sizeof(extra), &extra, 0);
360         if (ret < 0) {
361             goto fail;
362         }
363         offset += sizeof(extra);
364 
365         if (sn->extra_data_size > sizeof(extra)) {
366             size_t unknown_extra_data_size =
367                 sn->extra_data_size - sizeof(extra);
368 
369             /* qcow2_read_snapshots() ensures no unbounded allocation */
370             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
371             assert(sn->unknown_extra_data);
372 
373             ret = bdrv_pwrite(bs->file, offset, unknown_extra_data_size,
374                               sn->unknown_extra_data, 0);
375             if (ret < 0) {
376                 goto fail;
377             }
378             offset += unknown_extra_data_size;
379         }
380 
381         ret = bdrv_pwrite(bs->file, offset, id_str_size, sn->id_str, 0);
382         if (ret < 0) {
383             goto fail;
384         }
385         offset += id_str_size;
386 
387         ret = bdrv_pwrite(bs->file, offset, name_size, sn->name, 0);
388         if (ret < 0) {
389             goto fail;
390         }
391         offset += name_size;
392     }
393 
394     /*
395      * Update the header to point to the new snapshot table. This requires the
396      * new table and its refcounts to be stable on disk.
397      */
398     ret = bdrv_flush(bs);
399     if (ret < 0) {
400         goto fail;
401     }
402 
403     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
404                       endof(QCowHeader, nb_snapshots));
405 
406     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
407     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
408 
409     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
410                            sizeof(header_data), &header_data, 0);
411     if (ret < 0) {
412         goto fail;
413     }
414 
415     /* free the old snapshot table */
416     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
417                         QCOW2_DISCARD_SNAPSHOT);
418     s->snapshots_offset = snapshots_offset;
419     s->snapshots_size = snapshots_size;
420     return 0;
421 
422 fail:
423     if (snapshots_offset > 0) {
424         qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
425                             QCOW2_DISCARD_ALWAYS);
426     }
427     return ret;
428 }
429 
430 int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
431                                                  BdrvCheckResult *result,
432                                                  BdrvCheckMode fix)
433 {
434     BDRVQcow2State *s = bs->opaque;
435     Error *local_err = NULL;
436     int nb_clusters_reduced = 0;
437     int extra_data_dropped = 0;
438     int ret;
439     struct {
440         uint32_t nb_snapshots;
441         uint64_t snapshots_offset;
442     } QEMU_PACKED snapshot_table_pointer;
443 
444     /* qcow2_do_open() discards this information in check mode */
445     ret = bdrv_co_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
446                         sizeof(snapshot_table_pointer), &snapshot_table_pointer,
447                         0);
448     if (ret < 0) {
449         result->check_errors++;
450         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
451                 "the image header: %s\n", strerror(-ret));
452         return ret;
453     }
454 
455     s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
456     s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
457 
458     if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
459         fprintf(stderr, "Discarding %u overhanging snapshots\n",
460                 s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
461 
462         nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
463         s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
464     }
465 
466     ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
467                                sizeof(QCowSnapshotHeader),
468                                sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
469                                "snapshot table", &local_err);
470     if (ret < 0) {
471         result->check_errors++;
472         error_reportf_err(local_err, "ERROR ");
473 
474         if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
475             fprintf(stderr, "You can force-remove all %u overhanging snapshots "
476                     "with qemu-img check -r all\n",
477                     s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
478         }
479 
480         /* We did not read the snapshot table, so invalidate this information */
481         s->snapshots_offset = 0;
482         s->nb_snapshots = 0;
483 
484         return ret;
485     }
486 
487     qemu_co_mutex_unlock(&s->lock);
488     ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
489                                   &nb_clusters_reduced, &extra_data_dropped,
490                                   &local_err);
491     qemu_co_mutex_lock(&s->lock);
492     if (ret < 0) {
493         result->check_errors++;
494         error_reportf_err(local_err,
495                           "ERROR failed to read the snapshot table: ");
496 
497         /* We did not read the snapshot table, so invalidate this information */
498         s->snapshots_offset = 0;
499         s->nb_snapshots = 0;
500 
501         return ret;
502     }
503     result->corruptions += nb_clusters_reduced + extra_data_dropped;
504 
505     if (nb_clusters_reduced) {
506         /*
507          * Update image header now, because:
508          * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
509          *     the same as what the image header says,
510          * (2) this leaks clusters, but qcow2_check_refcounts() will
511          *     fix that.
512          */
513         assert(fix & BDRV_FIX_ERRORS);
514 
515         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
516         ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
517                                   sizeof(snapshot_table_pointer.nb_snapshots),
518                                   &snapshot_table_pointer.nb_snapshots, 0);
519         if (ret < 0) {
520             result->check_errors++;
521             fprintf(stderr, "ERROR failed to update the snapshot count in the "
522                     "image header: %s\n", strerror(-ret));
523             return ret;
524         }
525 
526         result->corruptions_fixed += nb_clusters_reduced;
527         result->corruptions -= nb_clusters_reduced;
528     }
529 
530     /*
531      * All of v3 images' snapshot table entries need to have at least
532      * 16 bytes of extra data.
533      */
534     if (s->qcow_version >= 3) {
535         int i;
536         for (i = 0; i < s->nb_snapshots; i++) {
537             if (s->snapshots[i].extra_data_size <
538                 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
539                 sizeof_field(QCowSnapshotExtraData, disk_size))
540             {
541                 result->corruptions++;
542                 fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
543                         fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
544             }
545         }
546     }
547 
548     return 0;
549 }
550 
551 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
552                                                 BdrvCheckResult *result,
553                                                 BdrvCheckMode fix)
554 {
555     BDRVQcow2State *s = bs->opaque;
556     int ret;
557 
558     if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
559         qemu_co_mutex_unlock(&s->lock);
560         ret = qcow2_write_snapshots(bs);
561         qemu_co_mutex_lock(&s->lock);
562         if (ret < 0) {
563             result->check_errors++;
564             fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
565                     strerror(-ret));
566             return ret;
567         }
568 
569         result->corruptions_fixed += result->corruptions;
570         result->corruptions = 0;
571     }
572 
573     return 0;
574 }
575 
576 static void find_new_snapshot_id(BlockDriverState *bs,
577                                  char *id_str, int id_str_size)
578 {
579     BDRVQcow2State *s = bs->opaque;
580     QCowSnapshot *sn;
581     int i;
582     unsigned long id, id_max = 0;
583 
584     for(i = 0; i < s->nb_snapshots; i++) {
585         sn = s->snapshots + i;
586         id = strtoul(sn->id_str, NULL, 10);
587         if (id > id_max)
588             id_max = id;
589     }
590     snprintf(id_str, id_str_size, "%lu", id_max + 1);
591 }
592 
593 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
594                                         const char *id,
595                                         const char *name)
596 {
597     BDRVQcow2State *s = bs->opaque;
598     int i;
599 
600     if (id && name) {
601         for (i = 0; i < s->nb_snapshots; i++) {
602             if (!strcmp(s->snapshots[i].id_str, id) &&
603                 !strcmp(s->snapshots[i].name, name)) {
604                 return i;
605             }
606         }
607     } else if (id) {
608         for (i = 0; i < s->nb_snapshots; i++) {
609             if (!strcmp(s->snapshots[i].id_str, id)) {
610                 return i;
611             }
612         }
613     } else if (name) {
614         for (i = 0; i < s->nb_snapshots; i++) {
615             if (!strcmp(s->snapshots[i].name, name)) {
616                 return i;
617             }
618         }
619     }
620 
621     return -1;
622 }
623 
624 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
625                                        const char *id_or_name)
626 {
627     int ret;
628 
629     ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
630     if (ret >= 0) {
631         return ret;
632     }
633     return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
634 }
635 
636 /* if no id is provided, a new one is constructed */
637 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
638 {
639     BDRVQcow2State *s = bs->opaque;
640     QCowSnapshot *new_snapshot_list = NULL;
641     QCowSnapshot *old_snapshot_list = NULL;
642     QCowSnapshot sn1, *sn = &sn1;
643     int i, ret;
644     uint64_t *l1_table = NULL;
645     int64_t l1_table_offset;
646 
647     if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
648         return -EFBIG;
649     }
650 
651     if (has_data_file(bs)) {
652         return -ENOTSUP;
653     }
654 
655     memset(sn, 0, sizeof(*sn));
656 
657     /* Generate an ID */
658     find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
659 
660     /* Populate sn with passed data */
661     sn->id_str = g_strdup(sn_info->id_str);
662     sn->name = g_strdup(sn_info->name);
663 
664     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
665     sn->vm_state_size = sn_info->vm_state_size;
666     sn->date_sec = sn_info->date_sec;
667     sn->date_nsec = sn_info->date_nsec;
668     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
669     sn->icount = sn_info->icount;
670     sn->extra_data_size = sizeof(QCowSnapshotExtraData);
671 
672     /* Allocate the L1 table of the snapshot and copy the current one there. */
673     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE);
674     if (l1_table_offset < 0) {
675         ret = l1_table_offset;
676         goto fail;
677     }
678 
679     sn->l1_table_offset = l1_table_offset;
680     sn->l1_size = s->l1_size;
681 
682     l1_table = g_try_new(uint64_t, s->l1_size);
683     if (s->l1_size && l1_table == NULL) {
684         ret = -ENOMEM;
685         goto fail;
686     }
687 
688     for(i = 0; i < s->l1_size; i++) {
689         l1_table[i] = cpu_to_be64(s->l1_table[i]);
690     }
691 
692     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
693                                         s->l1_size * L1E_SIZE, false);
694     if (ret < 0) {
695         goto fail;
696     }
697 
698     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, s->l1_size * L1E_SIZE,
699                       l1_table, 0);
700     if (ret < 0) {
701         goto fail;
702     }
703 
704     g_free(l1_table);
705     l1_table = NULL;
706 
707     /*
708      * Increase the refcounts of all clusters and make sure everything is
709      * stable on disk before updating the snapshot table to contain a pointer
710      * to the new L1 table.
711      */
712     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
713     if (ret < 0) {
714         goto fail;
715     }
716 
717     /* Append the new snapshot to the snapshot list */
718     new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
719     if (s->snapshots) {
720         memcpy(new_snapshot_list, s->snapshots,
721                s->nb_snapshots * sizeof(QCowSnapshot));
722         old_snapshot_list = s->snapshots;
723     }
724     s->snapshots = new_snapshot_list;
725     s->snapshots[s->nb_snapshots++] = *sn;
726 
727     ret = qcow2_write_snapshots(bs);
728     if (ret < 0) {
729         g_free(s->snapshots);
730         s->snapshots = old_snapshot_list;
731         s->nb_snapshots--;
732         goto fail;
733     }
734 
735     g_free(old_snapshot_list);
736 
737     /* The VM state isn't needed any more in the active L1 table; in fact, it
738      * hurts by causing expensive COW for the next snapshot. */
739     qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
740                           ROUND_UP(sn->vm_state_size, s->cluster_size),
741                           QCOW2_DISCARD_NEVER, false);
742 
743 #ifdef DEBUG_ALLOC
744     {
745       BdrvCheckResult result = {0};
746       qcow2_check_refcounts(bs, &result, 0);
747     }
748 #endif
749     return 0;
750 
751 fail:
752     g_free(sn->id_str);
753     g_free(sn->name);
754     g_free(l1_table);
755 
756     return ret;
757 }
758 
759 /* copy the snapshot 'snapshot_name' into the current disk image */
760 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
761 {
762     BDRVQcow2State *s = bs->opaque;
763     QCowSnapshot *sn;
764     Error *local_err = NULL;
765     int i, snapshot_index;
766     int cur_l1_bytes, sn_l1_bytes;
767     int ret;
768     uint64_t *sn_l1_table = NULL;
769 
770     if (has_data_file(bs)) {
771         return -ENOTSUP;
772     }
773 
774     /* Search the snapshot */
775     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
776     if (snapshot_index < 0) {
777         return -ENOENT;
778     }
779     sn = &s->snapshots[snapshot_index];
780 
781     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
782                                L1E_SIZE, QCOW_MAX_L1_SIZE,
783                                "Snapshot L1 table", &local_err);
784     if (ret < 0) {
785         error_report_err(local_err);
786         goto fail;
787     }
788 
789     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
790         BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
791                                             &local_err);
792         if (!blk) {
793             error_report_err(local_err);
794             ret = -ENOTSUP;
795             goto fail;
796         }
797 
798         ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
799                            &local_err);
800         blk_unref(blk);
801         if (ret < 0) {
802             error_report_err(local_err);
803             goto fail;
804         }
805     }
806 
807     /*
808      * Make sure that the current L1 table is big enough to contain the whole
809      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
810      * current one must be padded with zeros.
811      */
812     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
813     if (ret < 0) {
814         goto fail;
815     }
816 
817     cur_l1_bytes = s->l1_size * L1E_SIZE;
818     sn_l1_bytes = sn->l1_size * L1E_SIZE;
819 
820     /*
821      * Copy the snapshot L1 table to the current L1 table.
822      *
823      * Before overwriting the old current L1 table on disk, make sure to
824      * increase all refcounts for the clusters referenced by the new one.
825      * Decrease the refcount referenced by the old one only when the L1
826      * table is overwritten.
827      */
828     sn_l1_table = g_try_malloc0(cur_l1_bytes);
829     if (cur_l1_bytes && sn_l1_table == NULL) {
830         ret = -ENOMEM;
831         goto fail;
832     }
833 
834     ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_bytes, sn_l1_table,
835                      0);
836     if (ret < 0) {
837         goto fail;
838     }
839 
840     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
841                                          sn->l1_size, 1);
842     if (ret < 0) {
843         goto fail;
844     }
845 
846     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
847                                         s->l1_table_offset, cur_l1_bytes,
848                                         false);
849     if (ret < 0) {
850         goto fail;
851     }
852 
853     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, cur_l1_bytes,
854                            sn_l1_table, 0);
855     if (ret < 0) {
856         goto fail;
857     }
858 
859     /*
860      * Decrease refcount of clusters of current L1 table.
861      *
862      * At this point, the in-memory s->l1_table points to the old L1 table,
863      * whereas on disk we already have the new one.
864      *
865      * qcow2_update_snapshot_refcount special cases the current L1 table to use
866      * the in-memory data instead of really using the offset to load a new one,
867      * which is why this works.
868      */
869     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
870                                          s->l1_size, -1);
871 
872     /*
873      * Now update the in-memory L1 table to be in sync with the on-disk one. We
874      * need to do this even if updating refcounts failed.
875      */
876     for(i = 0;i < s->l1_size; i++) {
877         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
878     }
879 
880     if (ret < 0) {
881         goto fail;
882     }
883 
884     g_free(sn_l1_table);
885     sn_l1_table = NULL;
886 
887     /*
888      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
889      * when we decreased the refcount of the old snapshot.
890      */
891     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
892     if (ret < 0) {
893         goto fail;
894     }
895 
896 #ifdef DEBUG_ALLOC
897     {
898         BdrvCheckResult result = {0};
899         qcow2_check_refcounts(bs, &result, 0);
900     }
901 #endif
902     return 0;
903 
904 fail:
905     g_free(sn_l1_table);
906     return ret;
907 }
908 
909 int qcow2_snapshot_delete(BlockDriverState *bs,
910                           const char *snapshot_id,
911                           const char *name,
912                           Error **errp)
913 {
914     BDRVQcow2State *s = bs->opaque;
915     QCowSnapshot sn;
916     int snapshot_index, ret;
917 
918     if (has_data_file(bs)) {
919         return -ENOTSUP;
920     }
921 
922     /* Search the snapshot */
923     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
924     if (snapshot_index < 0) {
925         error_setg(errp, "Can't find the snapshot");
926         return -ENOENT;
927     }
928     sn = s->snapshots[snapshot_index];
929 
930     ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
931                                L1E_SIZE, QCOW_MAX_L1_SIZE,
932                                "Snapshot L1 table", errp);
933     if (ret < 0) {
934         return ret;
935     }
936 
937     /* Remove it from the snapshot list */
938     memmove(s->snapshots + snapshot_index,
939             s->snapshots + snapshot_index + 1,
940             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
941     s->nb_snapshots--;
942     ret = qcow2_write_snapshots(bs);
943     if (ret < 0) {
944         error_setg_errno(errp, -ret,
945                          "Failed to remove snapshot from snapshot list");
946         return ret;
947     }
948 
949     /*
950      * The snapshot is now unused, clean up. If we fail after this point, we
951      * won't recover but just leak clusters.
952      */
953     g_free(sn.unknown_extra_data);
954     g_free(sn.id_str);
955     g_free(sn.name);
956 
957     /*
958      * Now decrease the refcounts of clusters referenced by the snapshot and
959      * free the L1 table.
960      */
961     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
962                                          sn.l1_size, -1);
963     if (ret < 0) {
964         error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
965         return ret;
966     }
967     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE,
968                         QCOW2_DISCARD_SNAPSHOT);
969 
970     /* must update the copied flag on the current cluster offsets */
971     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
972     if (ret < 0) {
973         error_setg_errno(errp, -ret,
974                          "Failed to update snapshot status in disk");
975         return ret;
976     }
977 
978 #ifdef DEBUG_ALLOC
979     {
980         BdrvCheckResult result = {0};
981         qcow2_check_refcounts(bs, &result, 0);
982     }
983 #endif
984     return 0;
985 }
986 
987 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
988 {
989     BDRVQcow2State *s = bs->opaque;
990     QEMUSnapshotInfo *sn_tab, *sn_info;
991     QCowSnapshot *sn;
992     int i;
993 
994     if (has_data_file(bs)) {
995         return -ENOTSUP;
996     }
997     if (!s->nb_snapshots) {
998         *psn_tab = NULL;
999         return s->nb_snapshots;
1000     }
1001 
1002     sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
1003     for(i = 0; i < s->nb_snapshots; i++) {
1004         sn_info = sn_tab + i;
1005         sn = s->snapshots + i;
1006         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
1007                 sn->id_str);
1008         pstrcpy(sn_info->name, sizeof(sn_info->name),
1009                 sn->name);
1010         sn_info->vm_state_size = sn->vm_state_size;
1011         sn_info->date_sec = sn->date_sec;
1012         sn_info->date_nsec = sn->date_nsec;
1013         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
1014         sn_info->icount = sn->icount;
1015     }
1016     *psn_tab = sn_tab;
1017     return s->nb_snapshots;
1018 }
1019 
1020 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
1021                             const char *snapshot_id,
1022                             const char *name,
1023                             Error **errp)
1024 {
1025     int i, snapshot_index;
1026     BDRVQcow2State *s = bs->opaque;
1027     QCowSnapshot *sn;
1028     uint64_t *new_l1_table;
1029     int new_l1_bytes;
1030     int ret;
1031 
1032     assert(bdrv_is_read_only(bs));
1033 
1034     /* Search the snapshot */
1035     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1036     if (snapshot_index < 0) {
1037         error_setg(errp,
1038                    "Can't find snapshot");
1039         return -ENOENT;
1040     }
1041     sn = &s->snapshots[snapshot_index];
1042 
1043     /* Allocate and read in the snapshot's L1 table */
1044     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1045                                L1E_SIZE, QCOW_MAX_L1_SIZE,
1046                                "Snapshot L1 table", errp);
1047     if (ret < 0) {
1048         return ret;
1049     }
1050     new_l1_bytes = sn->l1_size * L1E_SIZE;
1051     new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1052     if (new_l1_table == NULL) {
1053         return -ENOMEM;
1054     }
1055 
1056     ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_bytes,
1057                      new_l1_table, 0);
1058     if (ret < 0) {
1059         error_setg(errp, "Failed to read l1 table for snapshot");
1060         qemu_vfree(new_l1_table);
1061         return ret;
1062     }
1063 
1064     /* Switch the L1 table */
1065     qemu_vfree(s->l1_table);
1066 
1067     s->l1_size = sn->l1_size;
1068     s->l1_table_offset = sn->l1_table_offset;
1069     s->l1_table = new_l1_table;
1070 
1071     for(i = 0;i < s->l1_size; i++) {
1072         be64_to_cpus(&s->l1_table[i]);
1073     }
1074 
1075     return 0;
1076 }
1077