xref: /openbmc/qemu/block/qcow2-snapshot.c (revision f227c07bbb9569ed12e1559083fe27a797e40c66)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "sysemu/block-backend.h"
27 #include "qapi/error.h"
28 #include "qcow2.h"
29 #include "qemu/bswap.h"
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/memalign.h"
33 
34 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
35 {
36     BDRVQcow2State *s = bs->opaque;
37 
38     assert(i >= 0 && i < s->nb_snapshots);
39     g_free(s->snapshots[i].name);
40     g_free(s->snapshots[i].id_str);
41     g_free(s->snapshots[i].unknown_extra_data);
42     memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
43 }
44 
45 void qcow2_free_snapshots(BlockDriverState *bs)
46 {
47     BDRVQcow2State *s = bs->opaque;
48     int i;
49 
50     for(i = 0; i < s->nb_snapshots; i++) {
51         qcow2_free_single_snapshot(bs, i);
52     }
53     g_free(s->snapshots);
54     s->snapshots = NULL;
55     s->nb_snapshots = 0;
56 }
57 
58 /*
59  * If @repair is true, try to repair a broken snapshot table instead
60  * of just returning an error:
61  *
62  * - If the snapshot table was too long, set *nb_clusters_reduced to
63  *   the number of snapshots removed off the end.
64  *   The caller will update the on-disk nb_snapshots accordingly;
65  *   this leaks clusters, but is safe.
66  *   (The on-disk information must be updated before
67  *   qcow2_check_refcounts(), because that function relies on
68  *   s->nb_snapshots to reflect the on-disk value.)
69  *
70  * - If there were snapshots with too much extra metadata, increment
71  *   *extra_data_dropped for each.
72  *   This requires the caller to eventually rewrite the whole snapshot
73  *   table, which requires cluster allocation.  Therefore, this should
74  *   be done only after qcow2_check_refcounts() made sure the refcount
75  *   structures are valid.
76  *   (In the meantime, the image is still valid because
77  *   qcow2_check_refcounts() does not do anything with snapshots'
78  *   extra data.)
79  */
80 static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
81                                    int *nb_clusters_reduced,
82                                    int *extra_data_dropped,
83                                    Error **errp)
84 {
85     BDRVQcow2State *s = bs->opaque;
86     QCowSnapshotHeader h;
87     QCowSnapshotExtraData extra;
88     QCowSnapshot *sn;
89     int i, id_str_size, name_size;
90     int64_t offset, pre_sn_offset;
91     uint64_t table_length = 0;
92     int ret;
93 
94     if (!s->nb_snapshots) {
95         s->snapshots = NULL;
96         s->snapshots_size = 0;
97         return 0;
98     }
99 
100     offset = s->snapshots_offset;
101     s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
102 
103     for(i = 0; i < s->nb_snapshots; i++) {
104         bool truncate_unknown_extra_data = false;
105 
106         pre_sn_offset = offset;
107         table_length = ROUND_UP(table_length, 8);
108 
109         /* Read statically sized part of the snapshot header */
110         offset = ROUND_UP(offset, 8);
111         ret = bdrv_pread(bs->file, offset, sizeof(h), &h, 0);
112         if (ret < 0) {
113             error_setg_errno(errp, -ret, "Failed to read snapshot table");
114             goto fail;
115         }
116 
117         offset += sizeof(h);
118         sn = s->snapshots + i;
119         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
120         sn->l1_size = be32_to_cpu(h.l1_size);
121         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
122         sn->date_sec = be32_to_cpu(h.date_sec);
123         sn->date_nsec = be32_to_cpu(h.date_nsec);
124         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
125         sn->extra_data_size = be32_to_cpu(h.extra_data_size);
126 
127         id_str_size = be16_to_cpu(h.id_str_size);
128         name_size = be16_to_cpu(h.name_size);
129 
130         if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
131             if (!repair) {
132                 ret = -EFBIG;
133                 error_setg(errp, "Too much extra metadata in snapshot table "
134                            "entry %i", i);
135                 error_append_hint(errp, "You can force-remove this extra "
136                                   "metadata with qemu-img check -r all\n");
137                 goto fail;
138             }
139 
140             fprintf(stderr, "Discarding too much extra metadata in snapshot "
141                     "table entry %i (%" PRIu32 " > %u)\n",
142                     i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
143 
144             (*extra_data_dropped)++;
145             truncate_unknown_extra_data = true;
146         }
147 
148         /* Read known extra data */
149         ret = bdrv_pread(bs->file, offset,
150                          MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
151         if (ret < 0) {
152             error_setg_errno(errp, -ret, "Failed to read snapshot table");
153             goto fail;
154         }
155         offset += MIN(sizeof(extra), sn->extra_data_size);
156 
157         if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
158                                          vm_state_size_large)) {
159             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
160         }
161 
162         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
163             sn->disk_size = be64_to_cpu(extra.disk_size);
164         } else {
165             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
166         }
167 
168         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) {
169             sn->icount = be64_to_cpu(extra.icount);
170         } else {
171             sn->icount = -1ULL;
172         }
173 
174         if (sn->extra_data_size > sizeof(extra)) {
175             uint64_t extra_data_end;
176             size_t unknown_extra_data_size;
177 
178             extra_data_end = offset + sn->extra_data_size - sizeof(extra);
179 
180             if (truncate_unknown_extra_data) {
181                 sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
182             }
183 
184             /* Store unknown extra data */
185             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
186             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
187             ret = bdrv_pread(bs->file, offset, unknown_extra_data_size,
188                              sn->unknown_extra_data, 0);
189             if (ret < 0) {
190                 error_setg_errno(errp, -ret,
191                                  "Failed to read snapshot table");
192                 goto fail;
193             }
194             offset = extra_data_end;
195         }
196 
197         /* Read snapshot ID */
198         sn->id_str = g_malloc(id_str_size + 1);
199         ret = bdrv_pread(bs->file, offset, id_str_size, sn->id_str, 0);
200         if (ret < 0) {
201             error_setg_errno(errp, -ret, "Failed to read snapshot table");
202             goto fail;
203         }
204         offset += id_str_size;
205         sn->id_str[id_str_size] = '\0';
206 
207         /* Read snapshot name */
208         sn->name = g_malloc(name_size + 1);
209         ret = bdrv_pread(bs->file, offset, name_size, sn->name, 0);
210         if (ret < 0) {
211             error_setg_errno(errp, -ret, "Failed to read snapshot table");
212             goto fail;
213         }
214         offset += name_size;
215         sn->name[name_size] = '\0';
216 
217         /* Note that the extra data may have been truncated */
218         table_length += sizeof(h) + sn->extra_data_size + id_str_size +
219                         name_size;
220         if (!repair) {
221             assert(table_length == offset - s->snapshots_offset);
222         }
223 
224         if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
225             offset - s->snapshots_offset > INT_MAX)
226         {
227             if (!repair) {
228                 ret = -EFBIG;
229                 error_setg(errp, "Snapshot table is too big");
230                 error_append_hint(errp, "You can force-remove all %u "
231                                   "overhanging snapshots with qemu-img check "
232                                   "-r all\n", s->nb_snapshots - i);
233                 goto fail;
234             }
235 
236             fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
237                     "table is too big)\n", s->nb_snapshots - i);
238 
239             *nb_clusters_reduced += (s->nb_snapshots - i);
240 
241             /* Discard current snapshot also */
242             qcow2_free_single_snapshot(bs, i);
243 
244             /*
245              * This leaks all the rest of the snapshot table and the
246              * snapshots' clusters, but we run in check -r all mode,
247              * so qcow2_check_refcounts() will take care of it.
248              */
249             s->nb_snapshots = i;
250             offset = pre_sn_offset;
251             break;
252         }
253     }
254 
255     assert(offset - s->snapshots_offset <= INT_MAX);
256     s->snapshots_size = offset - s->snapshots_offset;
257     return 0;
258 
259 fail:
260     qcow2_free_snapshots(bs);
261     return ret;
262 }
263 
264 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
265 {
266     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
267 }
268 
269 /* add at the end of the file a new list of snapshots */
270 int qcow2_write_snapshots(BlockDriverState *bs)
271 {
272     BDRVQcow2State *s = bs->opaque;
273     QCowSnapshot *sn;
274     QCowSnapshotHeader h;
275     QCowSnapshotExtraData extra;
276     int i, name_size, id_str_size, snapshots_size;
277     struct {
278         uint32_t nb_snapshots;
279         uint64_t snapshots_offset;
280     } QEMU_PACKED header_data;
281     int64_t offset, snapshots_offset = 0;
282     int ret;
283 
284     /* compute the size of the snapshots */
285     offset = 0;
286     for(i = 0; i < s->nb_snapshots; i++) {
287         sn = s->snapshots + i;
288         offset = ROUND_UP(offset, 8);
289         offset += sizeof(h);
290         offset += MAX(sizeof(extra), sn->extra_data_size);
291         offset += strlen(sn->id_str);
292         offset += strlen(sn->name);
293 
294         if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
295             ret = -EFBIG;
296             goto fail;
297         }
298     }
299 
300     assert(offset <= INT_MAX);
301     snapshots_size = offset;
302 
303     /* Allocate space for the new snapshot list */
304     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
305     offset = snapshots_offset;
306     if (offset < 0) {
307         ret = offset;
308         goto fail;
309     }
310     ret = bdrv_flush(bs);
311     if (ret < 0) {
312         goto fail;
313     }
314 
315     /* The snapshot list position has not yet been updated, so these clusters
316      * must indeed be completely free */
317     ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
318     if (ret < 0) {
319         goto fail;
320     }
321 
322 
323     /* Write all snapshots to the new list */
324     for(i = 0; i < s->nb_snapshots; i++) {
325         sn = s->snapshots + i;
326         memset(&h, 0, sizeof(h));
327         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
328         h.l1_size = cpu_to_be32(sn->l1_size);
329         /* If it doesn't fit in 32 bit, older implementations should treat it
330          * as a disk-only snapshot rather than truncate the VM state */
331         if (sn->vm_state_size <= 0xffffffff) {
332             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
333         }
334         h.date_sec = cpu_to_be32(sn->date_sec);
335         h.date_nsec = cpu_to_be32(sn->date_nsec);
336         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
337         h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
338                                             sn->extra_data_size));
339 
340         memset(&extra, 0, sizeof(extra));
341         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
342         extra.disk_size = cpu_to_be64(sn->disk_size);
343         extra.icount = cpu_to_be64(sn->icount);
344 
345         id_str_size = strlen(sn->id_str);
346         name_size = strlen(sn->name);
347         assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
348         h.id_str_size = cpu_to_be16(id_str_size);
349         h.name_size = cpu_to_be16(name_size);
350         offset = ROUND_UP(offset, 8);
351 
352         ret = bdrv_pwrite(bs->file, offset, sizeof(h), &h, 0);
353         if (ret < 0) {
354             goto fail;
355         }
356         offset += sizeof(h);
357 
358         ret = bdrv_pwrite(bs->file, offset, sizeof(extra), &extra, 0);
359         if (ret < 0) {
360             goto fail;
361         }
362         offset += sizeof(extra);
363 
364         if (sn->extra_data_size > sizeof(extra)) {
365             size_t unknown_extra_data_size =
366                 sn->extra_data_size - sizeof(extra);
367 
368             /* qcow2_read_snapshots() ensures no unbounded allocation */
369             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
370             assert(sn->unknown_extra_data);
371 
372             ret = bdrv_pwrite(bs->file, offset, unknown_extra_data_size,
373                               sn->unknown_extra_data, 0);
374             if (ret < 0) {
375                 goto fail;
376             }
377             offset += unknown_extra_data_size;
378         }
379 
380         ret = bdrv_pwrite(bs->file, offset, id_str_size, sn->id_str, 0);
381         if (ret < 0) {
382             goto fail;
383         }
384         offset += id_str_size;
385 
386         ret = bdrv_pwrite(bs->file, offset, name_size, sn->name, 0);
387         if (ret < 0) {
388             goto fail;
389         }
390         offset += name_size;
391     }
392 
393     /*
394      * Update the header to point to the new snapshot table. This requires the
395      * new table and its refcounts to be stable on disk.
396      */
397     ret = bdrv_flush(bs);
398     if (ret < 0) {
399         goto fail;
400     }
401 
402     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
403                       endof(QCowHeader, nb_snapshots));
404 
405     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
406     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
407 
408     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
409                            sizeof(header_data), &header_data, 0);
410     if (ret < 0) {
411         goto fail;
412     }
413 
414     /* free the old snapshot table */
415     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
416                         QCOW2_DISCARD_SNAPSHOT);
417     s->snapshots_offset = snapshots_offset;
418     s->snapshots_size = snapshots_size;
419     return 0;
420 
421 fail:
422     if (snapshots_offset > 0) {
423         qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
424                             QCOW2_DISCARD_ALWAYS);
425     }
426     return ret;
427 }
428 
429 int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
430                                                  BdrvCheckResult *result,
431                                                  BdrvCheckMode fix)
432 {
433     BDRVQcow2State *s = bs->opaque;
434     Error *local_err = NULL;
435     int nb_clusters_reduced = 0;
436     int extra_data_dropped = 0;
437     int ret;
438     struct {
439         uint32_t nb_snapshots;
440         uint64_t snapshots_offset;
441     } QEMU_PACKED snapshot_table_pointer;
442 
443     /* qcow2_do_open() discards this information in check mode */
444     ret = bdrv_co_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
445                         sizeof(snapshot_table_pointer), &snapshot_table_pointer,
446                         0);
447     if (ret < 0) {
448         result->check_errors++;
449         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
450                 "the image header: %s\n", strerror(-ret));
451         return ret;
452     }
453 
454     s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
455     s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
456 
457     if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
458         fprintf(stderr, "Discarding %u overhanging snapshots\n",
459                 s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
460 
461         nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
462         s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
463     }
464 
465     ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
466                                sizeof(QCowSnapshotHeader),
467                                sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
468                                "snapshot table", &local_err);
469     if (ret < 0) {
470         result->check_errors++;
471         error_reportf_err(local_err, "ERROR ");
472 
473         if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
474             fprintf(stderr, "You can force-remove all %u overhanging snapshots "
475                     "with qemu-img check -r all\n",
476                     s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
477         }
478 
479         /* We did not read the snapshot table, so invalidate this information */
480         s->snapshots_offset = 0;
481         s->nb_snapshots = 0;
482 
483         return ret;
484     }
485 
486     qemu_co_mutex_unlock(&s->lock);
487     ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
488                                   &nb_clusters_reduced, &extra_data_dropped,
489                                   &local_err);
490     qemu_co_mutex_lock(&s->lock);
491     if (ret < 0) {
492         result->check_errors++;
493         error_reportf_err(local_err,
494                           "ERROR failed to read the snapshot table: ");
495 
496         /* We did not read the snapshot table, so invalidate this information */
497         s->snapshots_offset = 0;
498         s->nb_snapshots = 0;
499 
500         return ret;
501     }
502     result->corruptions += nb_clusters_reduced + extra_data_dropped;
503 
504     if (nb_clusters_reduced) {
505         /*
506          * Update image header now, because:
507          * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
508          *     the same as what the image header says,
509          * (2) this leaks clusters, but qcow2_check_refcounts() will
510          *     fix that.
511          */
512         assert(fix & BDRV_FIX_ERRORS);
513 
514         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
515         ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
516                                   sizeof(snapshot_table_pointer.nb_snapshots),
517                                   &snapshot_table_pointer.nb_snapshots, 0);
518         if (ret < 0) {
519             result->check_errors++;
520             fprintf(stderr, "ERROR failed to update the snapshot count in the "
521                     "image header: %s\n", strerror(-ret));
522             return ret;
523         }
524 
525         result->corruptions_fixed += nb_clusters_reduced;
526         result->corruptions -= nb_clusters_reduced;
527     }
528 
529     /*
530      * All of v3 images' snapshot table entries need to have at least
531      * 16 bytes of extra data.
532      */
533     if (s->qcow_version >= 3) {
534         int i;
535         for (i = 0; i < s->nb_snapshots; i++) {
536             if (s->snapshots[i].extra_data_size <
537                 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
538                 sizeof_field(QCowSnapshotExtraData, disk_size))
539             {
540                 result->corruptions++;
541                 fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
542                         fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
543             }
544         }
545     }
546 
547     return 0;
548 }
549 
550 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
551                                                 BdrvCheckResult *result,
552                                                 BdrvCheckMode fix)
553 {
554     BDRVQcow2State *s = bs->opaque;
555     int ret;
556 
557     if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
558         qemu_co_mutex_unlock(&s->lock);
559         ret = qcow2_write_snapshots(bs);
560         qemu_co_mutex_lock(&s->lock);
561         if (ret < 0) {
562             result->check_errors++;
563             fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
564                     strerror(-ret));
565             return ret;
566         }
567 
568         result->corruptions_fixed += result->corruptions;
569         result->corruptions = 0;
570     }
571 
572     return 0;
573 }
574 
575 static void find_new_snapshot_id(BlockDriverState *bs,
576                                  char *id_str, int id_str_size)
577 {
578     BDRVQcow2State *s = bs->opaque;
579     QCowSnapshot *sn;
580     int i;
581     unsigned long id, id_max = 0;
582 
583     for(i = 0; i < s->nb_snapshots; i++) {
584         sn = s->snapshots + i;
585         id = strtoul(sn->id_str, NULL, 10);
586         if (id > id_max)
587             id_max = id;
588     }
589     snprintf(id_str, id_str_size, "%lu", id_max + 1);
590 }
591 
592 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
593                                         const char *id,
594                                         const char *name)
595 {
596     BDRVQcow2State *s = bs->opaque;
597     int i;
598 
599     if (id && name) {
600         for (i = 0; i < s->nb_snapshots; i++) {
601             if (!strcmp(s->snapshots[i].id_str, id) &&
602                 !strcmp(s->snapshots[i].name, name)) {
603                 return i;
604             }
605         }
606     } else if (id) {
607         for (i = 0; i < s->nb_snapshots; i++) {
608             if (!strcmp(s->snapshots[i].id_str, id)) {
609                 return i;
610             }
611         }
612     } else if (name) {
613         for (i = 0; i < s->nb_snapshots; i++) {
614             if (!strcmp(s->snapshots[i].name, name)) {
615                 return i;
616             }
617         }
618     }
619 
620     return -1;
621 }
622 
623 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
624                                        const char *id_or_name)
625 {
626     int ret;
627 
628     ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
629     if (ret >= 0) {
630         return ret;
631     }
632     return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
633 }
634 
635 /* if no id is provided, a new one is constructed */
636 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
637 {
638     BDRVQcow2State *s = bs->opaque;
639     QCowSnapshot *new_snapshot_list = NULL;
640     QCowSnapshot *old_snapshot_list = NULL;
641     QCowSnapshot sn1, *sn = &sn1;
642     int i, ret;
643     uint64_t *l1_table = NULL;
644     int64_t l1_table_offset;
645 
646     if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
647         return -EFBIG;
648     }
649 
650     if (has_data_file(bs)) {
651         return -ENOTSUP;
652     }
653 
654     memset(sn, 0, sizeof(*sn));
655 
656     /* Generate an ID */
657     find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
658 
659     /* Populate sn with passed data */
660     sn->id_str = g_strdup(sn_info->id_str);
661     sn->name = g_strdup(sn_info->name);
662 
663     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
664     sn->vm_state_size = sn_info->vm_state_size;
665     sn->date_sec = sn_info->date_sec;
666     sn->date_nsec = sn_info->date_nsec;
667     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
668     sn->icount = sn_info->icount;
669     sn->extra_data_size = sizeof(QCowSnapshotExtraData);
670 
671     /* Allocate the L1 table of the snapshot and copy the current one there. */
672     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE);
673     if (l1_table_offset < 0) {
674         ret = l1_table_offset;
675         goto fail;
676     }
677 
678     sn->l1_table_offset = l1_table_offset;
679     sn->l1_size = s->l1_size;
680 
681     l1_table = g_try_new(uint64_t, s->l1_size);
682     if (s->l1_size && l1_table == NULL) {
683         ret = -ENOMEM;
684         goto fail;
685     }
686 
687     for(i = 0; i < s->l1_size; i++) {
688         l1_table[i] = cpu_to_be64(s->l1_table[i]);
689     }
690 
691     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
692                                         s->l1_size * L1E_SIZE, false);
693     if (ret < 0) {
694         goto fail;
695     }
696 
697     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, s->l1_size * L1E_SIZE,
698                       l1_table, 0);
699     if (ret < 0) {
700         goto fail;
701     }
702 
703     g_free(l1_table);
704     l1_table = NULL;
705 
706     /*
707      * Increase the refcounts of all clusters and make sure everything is
708      * stable on disk before updating the snapshot table to contain a pointer
709      * to the new L1 table.
710      */
711     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
712     if (ret < 0) {
713         goto fail;
714     }
715 
716     /* Append the new snapshot to the snapshot list */
717     new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
718     if (s->snapshots) {
719         memcpy(new_snapshot_list, s->snapshots,
720                s->nb_snapshots * sizeof(QCowSnapshot));
721         old_snapshot_list = s->snapshots;
722     }
723     s->snapshots = new_snapshot_list;
724     s->snapshots[s->nb_snapshots++] = *sn;
725 
726     ret = qcow2_write_snapshots(bs);
727     if (ret < 0) {
728         g_free(s->snapshots);
729         s->snapshots = old_snapshot_list;
730         s->nb_snapshots--;
731         goto fail;
732     }
733 
734     g_free(old_snapshot_list);
735 
736     /* The VM state isn't needed any more in the active L1 table; in fact, it
737      * hurts by causing expensive COW for the next snapshot. */
738     qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
739                           ROUND_UP(sn->vm_state_size, s->cluster_size),
740                           QCOW2_DISCARD_NEVER, false);
741 
742 #ifdef DEBUG_ALLOC
743     {
744       BdrvCheckResult result = {0};
745       qcow2_check_refcounts(bs, &result, 0);
746     }
747 #endif
748     return 0;
749 
750 fail:
751     g_free(sn->id_str);
752     g_free(sn->name);
753     g_free(l1_table);
754 
755     return ret;
756 }
757 
758 /* copy the snapshot 'snapshot_name' into the current disk image */
759 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
760 {
761     BDRVQcow2State *s = bs->opaque;
762     QCowSnapshot *sn;
763     Error *local_err = NULL;
764     int i, snapshot_index;
765     int cur_l1_bytes, sn_l1_bytes;
766     int ret;
767     uint64_t *sn_l1_table = NULL;
768 
769     if (has_data_file(bs)) {
770         return -ENOTSUP;
771     }
772 
773     /* Search the snapshot */
774     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
775     if (snapshot_index < 0) {
776         return -ENOENT;
777     }
778     sn = &s->snapshots[snapshot_index];
779 
780     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
781                                L1E_SIZE, QCOW_MAX_L1_SIZE,
782                                "Snapshot L1 table", &local_err);
783     if (ret < 0) {
784         error_report_err(local_err);
785         goto fail;
786     }
787 
788     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
789         BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
790                                             &local_err);
791         if (!blk) {
792             error_report_err(local_err);
793             ret = -ENOTSUP;
794             goto fail;
795         }
796 
797         ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
798                            &local_err);
799         blk_unref(blk);
800         if (ret < 0) {
801             error_report_err(local_err);
802             goto fail;
803         }
804     }
805 
806     /*
807      * Make sure that the current L1 table is big enough to contain the whole
808      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
809      * current one must be padded with zeros.
810      */
811     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
812     if (ret < 0) {
813         goto fail;
814     }
815 
816     cur_l1_bytes = s->l1_size * L1E_SIZE;
817     sn_l1_bytes = sn->l1_size * L1E_SIZE;
818 
819     /*
820      * Copy the snapshot L1 table to the current L1 table.
821      *
822      * Before overwriting the old current L1 table on disk, make sure to
823      * increase all refcounts for the clusters referenced by the new one.
824      * Decrease the refcount referenced by the old one only when the L1
825      * table is overwritten.
826      */
827     sn_l1_table = g_try_malloc0(cur_l1_bytes);
828     if (cur_l1_bytes && sn_l1_table == NULL) {
829         ret = -ENOMEM;
830         goto fail;
831     }
832 
833     ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_bytes, sn_l1_table,
834                      0);
835     if (ret < 0) {
836         goto fail;
837     }
838 
839     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
840                                          sn->l1_size, 1);
841     if (ret < 0) {
842         goto fail;
843     }
844 
845     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
846                                         s->l1_table_offset, cur_l1_bytes,
847                                         false);
848     if (ret < 0) {
849         goto fail;
850     }
851 
852     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, cur_l1_bytes,
853                            sn_l1_table, 0);
854     if (ret < 0) {
855         goto fail;
856     }
857 
858     /*
859      * Decrease refcount of clusters of current L1 table.
860      *
861      * At this point, the in-memory s->l1_table points to the old L1 table,
862      * whereas on disk we already have the new one.
863      *
864      * qcow2_update_snapshot_refcount special cases the current L1 table to use
865      * the in-memory data instead of really using the offset to load a new one,
866      * which is why this works.
867      */
868     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
869                                          s->l1_size, -1);
870 
871     /*
872      * Now update the in-memory L1 table to be in sync with the on-disk one. We
873      * need to do this even if updating refcounts failed.
874      */
875     for(i = 0;i < s->l1_size; i++) {
876         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
877     }
878 
879     if (ret < 0) {
880         goto fail;
881     }
882 
883     g_free(sn_l1_table);
884     sn_l1_table = NULL;
885 
886     /*
887      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
888      * when we decreased the refcount of the old snapshot.
889      */
890     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
891     if (ret < 0) {
892         goto fail;
893     }
894 
895 #ifdef DEBUG_ALLOC
896     {
897         BdrvCheckResult result = {0};
898         qcow2_check_refcounts(bs, &result, 0);
899     }
900 #endif
901     return 0;
902 
903 fail:
904     g_free(sn_l1_table);
905     return ret;
906 }
907 
908 int qcow2_snapshot_delete(BlockDriverState *bs,
909                           const char *snapshot_id,
910                           const char *name,
911                           Error **errp)
912 {
913     BDRVQcow2State *s = bs->opaque;
914     QCowSnapshot sn;
915     int snapshot_index, ret;
916 
917     if (has_data_file(bs)) {
918         return -ENOTSUP;
919     }
920 
921     /* Search the snapshot */
922     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
923     if (snapshot_index < 0) {
924         error_setg(errp, "Can't find the snapshot");
925         return -ENOENT;
926     }
927     sn = s->snapshots[snapshot_index];
928 
929     ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
930                                L1E_SIZE, QCOW_MAX_L1_SIZE,
931                                "Snapshot L1 table", errp);
932     if (ret < 0) {
933         return ret;
934     }
935 
936     /* Remove it from the snapshot list */
937     memmove(s->snapshots + snapshot_index,
938             s->snapshots + snapshot_index + 1,
939             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
940     s->nb_snapshots--;
941     ret = qcow2_write_snapshots(bs);
942     if (ret < 0) {
943         error_setg_errno(errp, -ret,
944                          "Failed to remove snapshot from snapshot list");
945         return ret;
946     }
947 
948     /*
949      * The snapshot is now unused, clean up. If we fail after this point, we
950      * won't recover but just leak clusters.
951      */
952     g_free(sn.unknown_extra_data);
953     g_free(sn.id_str);
954     g_free(sn.name);
955 
956     /*
957      * Now decrease the refcounts of clusters referenced by the snapshot and
958      * free the L1 table.
959      */
960     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
961                                          sn.l1_size, -1);
962     if (ret < 0) {
963         error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
964         return ret;
965     }
966     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE,
967                         QCOW2_DISCARD_SNAPSHOT);
968 
969     /* must update the copied flag on the current cluster offsets */
970     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
971     if (ret < 0) {
972         error_setg_errno(errp, -ret,
973                          "Failed to update snapshot status in disk");
974         return ret;
975     }
976 
977 #ifdef DEBUG_ALLOC
978     {
979         BdrvCheckResult result = {0};
980         qcow2_check_refcounts(bs, &result, 0);
981     }
982 #endif
983     return 0;
984 }
985 
986 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
987 {
988     BDRVQcow2State *s = bs->opaque;
989     QEMUSnapshotInfo *sn_tab, *sn_info;
990     QCowSnapshot *sn;
991     int i;
992 
993     if (has_data_file(bs)) {
994         return -ENOTSUP;
995     }
996     if (!s->nb_snapshots) {
997         *psn_tab = NULL;
998         return s->nb_snapshots;
999     }
1000 
1001     sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
1002     for(i = 0; i < s->nb_snapshots; i++) {
1003         sn_info = sn_tab + i;
1004         sn = s->snapshots + i;
1005         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
1006                 sn->id_str);
1007         pstrcpy(sn_info->name, sizeof(sn_info->name),
1008                 sn->name);
1009         sn_info->vm_state_size = sn->vm_state_size;
1010         sn_info->date_sec = sn->date_sec;
1011         sn_info->date_nsec = sn->date_nsec;
1012         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
1013         sn_info->icount = sn->icount;
1014     }
1015     *psn_tab = sn_tab;
1016     return s->nb_snapshots;
1017 }
1018 
1019 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
1020                             const char *snapshot_id,
1021                             const char *name,
1022                             Error **errp)
1023 {
1024     int i, snapshot_index;
1025     BDRVQcow2State *s = bs->opaque;
1026     QCowSnapshot *sn;
1027     uint64_t *new_l1_table;
1028     int new_l1_bytes;
1029     int ret;
1030 
1031     assert(bdrv_is_read_only(bs));
1032 
1033     /* Search the snapshot */
1034     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1035     if (snapshot_index < 0) {
1036         error_setg(errp,
1037                    "Can't find snapshot");
1038         return -ENOENT;
1039     }
1040     sn = &s->snapshots[snapshot_index];
1041 
1042     /* Allocate and read in the snapshot's L1 table */
1043     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1044                                L1E_SIZE, QCOW_MAX_L1_SIZE,
1045                                "Snapshot L1 table", errp);
1046     if (ret < 0) {
1047         return ret;
1048     }
1049     new_l1_bytes = sn->l1_size * L1E_SIZE;
1050     new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1051     if (new_l1_table == NULL) {
1052         return -ENOMEM;
1053     }
1054 
1055     ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_bytes,
1056                      new_l1_table, 0);
1057     if (ret < 0) {
1058         error_setg(errp, "Failed to read l1 table for snapshot");
1059         qemu_vfree(new_l1_table);
1060         return ret;
1061     }
1062 
1063     /* Switch the L1 table */
1064     qemu_vfree(s->l1_table);
1065 
1066     s->l1_size = sn->l1_size;
1067     s->l1_table_offset = sn->l1_table_offset;
1068     s->l1_table = new_l1_table;
1069 
1070     for(i = 0;i < s->l1_size; i++) {
1071         be64_to_cpus(&s->l1_table[i]);
1072     }
1073 
1074     return 0;
1075 }
1076