145aba42fSKevin Wolf /* 245aba42fSKevin Wolf * Block driver for the QCOW version 2 format 345aba42fSKevin Wolf * 445aba42fSKevin Wolf * Copyright (c) 2004-2006 Fabrice Bellard 545aba42fSKevin Wolf * 645aba42fSKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 745aba42fSKevin Wolf * of this software and associated documentation files (the "Software"), to deal 845aba42fSKevin Wolf * in the Software without restriction, including without limitation the rights 945aba42fSKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1045aba42fSKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1145aba42fSKevin Wolf * furnished to do so, subject to the following conditions: 1245aba42fSKevin Wolf * 1345aba42fSKevin Wolf * The above copyright notice and this permission notice shall be included in 1445aba42fSKevin Wolf * all copies or substantial portions of the Software. 1545aba42fSKevin Wolf * 1645aba42fSKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1745aba42fSKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1845aba42fSKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1945aba42fSKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2045aba42fSKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2145aba42fSKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2245aba42fSKevin Wolf * THE SOFTWARE. 2345aba42fSKevin Wolf */ 2445aba42fSKevin Wolf 25*80c71a24SPeter Maydell #include "qemu/osdep.h" 2645aba42fSKevin Wolf #include <zlib.h> 2745aba42fSKevin Wolf 2845aba42fSKevin Wolf #include "qemu-common.h" 29737e150eSPaolo Bonzini #include "block/block_int.h" 3045aba42fSKevin Wolf #include "block/qcow2.h" 313cce16f4SKevin Wolf #include "trace.h" 3245aba42fSKevin Wolf 332cf7cfa1SKevin Wolf int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 342cf7cfa1SKevin Wolf bool exact_size) 3545aba42fSKevin Wolf { 36ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 372cf7cfa1SKevin Wolf int new_l1_size2, ret, i; 3845aba42fSKevin Wolf uint64_t *new_l1_table; 39fda74f82SMax Reitz int64_t old_l1_table_offset, old_l1_size; 402cf7cfa1SKevin Wolf int64_t new_l1_table_offset, new_l1_size; 4145aba42fSKevin Wolf uint8_t data[12]; 4245aba42fSKevin Wolf 4372893756SStefan Hajnoczi if (min_size <= s->l1_size) 4445aba42fSKevin Wolf return 0; 4572893756SStefan Hajnoczi 46b93f9950SMax Reitz /* Do a sanity check on min_size before trying to calculate new_l1_size 47b93f9950SMax Reitz * (this prevents overflows during the while loop for the calculation of 48b93f9950SMax Reitz * new_l1_size) */ 49b93f9950SMax Reitz if (min_size > INT_MAX / sizeof(uint64_t)) { 50b93f9950SMax Reitz return -EFBIG; 51b93f9950SMax Reitz } 52b93f9950SMax Reitz 5372893756SStefan Hajnoczi if (exact_size) { 5472893756SStefan Hajnoczi new_l1_size = min_size; 5572893756SStefan Hajnoczi } else { 5672893756SStefan Hajnoczi /* Bump size up to reduce the number of times we have to grow */ 5772893756SStefan Hajnoczi new_l1_size = s->l1_size; 58d191d12dSStefan Weil if (new_l1_size == 0) { 59d191d12dSStefan Weil new_l1_size = 1; 60d191d12dSStefan Weil } 6145aba42fSKevin Wolf while (min_size > new_l1_size) { 6245aba42fSKevin Wolf new_l1_size = (new_l1_size * 3 + 1) / 2; 6345aba42fSKevin Wolf } 6472893756SStefan Hajnoczi } 6572893756SStefan Hajnoczi 66cab60de9SKevin Wolf if (new_l1_size > INT_MAX / sizeof(uint64_t)) { 672cf7cfa1SKevin Wolf return -EFBIG; 682cf7cfa1SKevin Wolf } 692cf7cfa1SKevin Wolf 7045aba42fSKevin Wolf #ifdef DEBUG_ALLOC2 712cf7cfa1SKevin Wolf fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 722cf7cfa1SKevin Wolf s->l1_size, new_l1_size); 7345aba42fSKevin Wolf #endif 7445aba42fSKevin Wolf 7545aba42fSKevin Wolf new_l1_size2 = sizeof(uint64_t) * new_l1_size; 769a4f4c31SKevin Wolf new_l1_table = qemu_try_blockalign(bs->file->bs, 77de82815dSKevin Wolf align_offset(new_l1_size2, 512)); 78de82815dSKevin Wolf if (new_l1_table == NULL) { 79de82815dSKevin Wolf return -ENOMEM; 80de82815dSKevin Wolf } 81de82815dSKevin Wolf memset(new_l1_table, 0, align_offset(new_l1_size2, 512)); 82de82815dSKevin Wolf 8345aba42fSKevin Wolf memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); 8445aba42fSKevin Wolf 8545aba42fSKevin Wolf /* write new table (align to cluster) */ 8666f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 87ed6ccf0fSKevin Wolf new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 885d757b56SKevin Wolf if (new_l1_table_offset < 0) { 89de82815dSKevin Wolf qemu_vfree(new_l1_table); 905d757b56SKevin Wolf return new_l1_table_offset; 915d757b56SKevin Wolf } 9229c1a730SKevin Wolf 9329c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 9429c1a730SKevin Wolf if (ret < 0) { 9580fa3341SKevin Wolf goto fail; 9629c1a730SKevin Wolf } 9745aba42fSKevin Wolf 98cf93980eSMax Reitz /* the L1 position has not yet been updated, so these clusters must 99cf93980eSMax Reitz * indeed be completely free */ 100231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 101231bb267SMax Reitz new_l1_size2); 102cf93980eSMax Reitz if (ret < 0) { 103cf93980eSMax Reitz goto fail; 104cf93980eSMax Reitz } 105cf93980eSMax Reitz 10666f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 10745aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 10845aba42fSKevin Wolf new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 1099a4f4c31SKevin Wolf ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset, 1109a4f4c31SKevin Wolf new_l1_table, new_l1_size2); 1118b3b7206SKevin Wolf if (ret < 0) 11245aba42fSKevin Wolf goto fail; 11345aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 11445aba42fSKevin Wolf new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 11545aba42fSKevin Wolf 11645aba42fSKevin Wolf /* set new table */ 11766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 11845aba42fSKevin Wolf cpu_to_be32w((uint32_t*)data, new_l1_size); 119e4ef9f46SPeter Maydell stq_be_p(data + 4, new_l1_table_offset); 1209a4f4c31SKevin Wolf ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size), 1219a4f4c31SKevin Wolf data, sizeof(data)); 1228b3b7206SKevin Wolf if (ret < 0) { 12345aba42fSKevin Wolf goto fail; 124fb8fa77cSKevin Wolf } 125de82815dSKevin Wolf qemu_vfree(s->l1_table); 126fda74f82SMax Reitz old_l1_table_offset = s->l1_table_offset; 12745aba42fSKevin Wolf s->l1_table_offset = new_l1_table_offset; 12845aba42fSKevin Wolf s->l1_table = new_l1_table; 129fda74f82SMax Reitz old_l1_size = s->l1_size; 13045aba42fSKevin Wolf s->l1_size = new_l1_size; 131fda74f82SMax Reitz qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t), 132fda74f82SMax Reitz QCOW2_DISCARD_OTHER); 13345aba42fSKevin Wolf return 0; 13445aba42fSKevin Wolf fail: 135de82815dSKevin Wolf qemu_vfree(new_l1_table); 1366cfcb9b8SKevin Wolf qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 1376cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 1388b3b7206SKevin Wolf return ret; 13945aba42fSKevin Wolf } 14045aba42fSKevin Wolf 14145aba42fSKevin Wolf /* 14245aba42fSKevin Wolf * l2_load 14345aba42fSKevin Wolf * 14445aba42fSKevin Wolf * Loads a L2 table into memory. If the table is in the cache, the cache 14545aba42fSKevin Wolf * is used; otherwise the L2 table is loaded from the image file. 14645aba42fSKevin Wolf * 14745aba42fSKevin Wolf * Returns a pointer to the L2 table on success, or NULL if the read from 14845aba42fSKevin Wolf * the image file failed. 14945aba42fSKevin Wolf */ 15045aba42fSKevin Wolf 15155c17e98SKevin Wolf static int l2_load(BlockDriverState *bs, uint64_t l2_offset, 15255c17e98SKevin Wolf uint64_t **l2_table) 15345aba42fSKevin Wolf { 154ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 15555c17e98SKevin Wolf int ret; 15645aba42fSKevin Wolf 15729c1a730SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); 15845aba42fSKevin Wolf 15955c17e98SKevin Wolf return ret; 16055c17e98SKevin Wolf } 16155c17e98SKevin Wolf 16245aba42fSKevin Wolf /* 1636583e3c7SKevin Wolf * Writes one sector of the L1 table to the disk (can't update single entries 1646583e3c7SKevin Wolf * and we really don't want bdrv_pread to perform a read-modify-write) 1656583e3c7SKevin Wolf */ 1666583e3c7SKevin Wolf #define L1_ENTRIES_PER_SECTOR (512 / 8) 167e23e400eSMax Reitz int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 1686583e3c7SKevin Wolf { 169ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 170a1391444SMax Reitz uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 }; 1716583e3c7SKevin Wolf int l1_start_index; 172f7defcb6SKevin Wolf int i, ret; 1736583e3c7SKevin Wolf 1746583e3c7SKevin Wolf l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1); 175a1391444SMax Reitz for (i = 0; i < L1_ENTRIES_PER_SECTOR && l1_start_index + i < s->l1_size; 176a1391444SMax Reitz i++) 177a1391444SMax Reitz { 1786583e3c7SKevin Wolf buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 1796583e3c7SKevin Wolf } 1806583e3c7SKevin Wolf 181231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 182cf93980eSMax Reitz s->l1_table_offset + 8 * l1_start_index, sizeof(buf)); 183cf93980eSMax Reitz if (ret < 0) { 184cf93980eSMax Reitz return ret; 185cf93980eSMax Reitz } 186cf93980eSMax Reitz 18766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 1889a4f4c31SKevin Wolf ret = bdrv_pwrite_sync(bs->file->bs, 1899a4f4c31SKevin Wolf s->l1_table_offset + 8 * l1_start_index, 190f7defcb6SKevin Wolf buf, sizeof(buf)); 191f7defcb6SKevin Wolf if (ret < 0) { 192f7defcb6SKevin Wolf return ret; 1936583e3c7SKevin Wolf } 1946583e3c7SKevin Wolf 1956583e3c7SKevin Wolf return 0; 1966583e3c7SKevin Wolf } 1976583e3c7SKevin Wolf 1986583e3c7SKevin Wolf /* 19945aba42fSKevin Wolf * l2_allocate 20045aba42fSKevin Wolf * 20145aba42fSKevin Wolf * Allocate a new l2 entry in the file. If l1_index points to an already 20245aba42fSKevin Wolf * used entry in the L2 table (i.e. we are doing a copy on write for the L2 20345aba42fSKevin Wolf * table) copy the contents of the old L2 table into the newly allocated one. 20445aba42fSKevin Wolf * Otherwise the new table is initialized with zeros. 20545aba42fSKevin Wolf * 20645aba42fSKevin Wolf */ 20745aba42fSKevin Wolf 208c46e1167SKevin Wolf static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) 20945aba42fSKevin Wolf { 210ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2116583e3c7SKevin Wolf uint64_t old_l2_offset; 2128585afd8SMax Reitz uint64_t *l2_table = NULL; 213f4f0d391SKevin Wolf int64_t l2_offset; 214c46e1167SKevin Wolf int ret; 21545aba42fSKevin Wolf 21645aba42fSKevin Wolf old_l2_offset = s->l1_table[l1_index]; 21745aba42fSKevin Wolf 2183cce16f4SKevin Wolf trace_qcow2_l2_allocate(bs, l1_index); 2193cce16f4SKevin Wolf 22045aba42fSKevin Wolf /* allocate a new l2 entry */ 22145aba42fSKevin Wolf 222ed6ccf0fSKevin Wolf l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); 2235d757b56SKevin Wolf if (l2_offset < 0) { 224be0b742eSMax Reitz ret = l2_offset; 225be0b742eSMax Reitz goto fail; 2265d757b56SKevin Wolf } 22729c1a730SKevin Wolf 22829c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 22929c1a730SKevin Wolf if (ret < 0) { 23029c1a730SKevin Wolf goto fail; 23129c1a730SKevin Wolf } 23245aba42fSKevin Wolf 23345aba42fSKevin Wolf /* allocate a new entry in the l2 cache */ 23445aba42fSKevin Wolf 2353cce16f4SKevin Wolf trace_qcow2_l2_allocate_get_empty(bs, l1_index); 23629c1a730SKevin Wolf ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); 23729c1a730SKevin Wolf if (ret < 0) { 238be0b742eSMax Reitz goto fail; 23929c1a730SKevin Wolf } 24029c1a730SKevin Wolf 24129c1a730SKevin Wolf l2_table = *table; 24245aba42fSKevin Wolf 2438e37f681SKevin Wolf if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 24445aba42fSKevin Wolf /* if there was no old l2 table, clear the new table */ 24545aba42fSKevin Wolf memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 24645aba42fSKevin Wolf } else { 24729c1a730SKevin Wolf uint64_t* old_table; 24829c1a730SKevin Wolf 24945aba42fSKevin Wolf /* if there was an old l2 table, read it from the disk */ 25066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 2518e37f681SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, 2528e37f681SKevin Wolf old_l2_offset & L1E_OFFSET_MASK, 25329c1a730SKevin Wolf (void**) &old_table); 25429c1a730SKevin Wolf if (ret < 0) { 25529c1a730SKevin Wolf goto fail; 25629c1a730SKevin Wolf } 25729c1a730SKevin Wolf 25829c1a730SKevin Wolf memcpy(l2_table, old_table, s->cluster_size); 25929c1a730SKevin Wolf 260a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table); 26145aba42fSKevin Wolf } 26229c1a730SKevin Wolf 26345aba42fSKevin Wolf /* write the l2 table to the file */ 26466f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 26529c1a730SKevin Wolf 2663cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l2(bs, l1_index); 26772e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 26829c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 269c46e1167SKevin Wolf if (ret < 0) { 270175e1152SKevin Wolf goto fail; 271175e1152SKevin Wolf } 272175e1152SKevin Wolf 273175e1152SKevin Wolf /* update the L1 entry */ 2743cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l1(bs, l1_index); 275175e1152SKevin Wolf s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 276e23e400eSMax Reitz ret = qcow2_write_l1_entry(bs, l1_index); 277175e1152SKevin Wolf if (ret < 0) { 278175e1152SKevin Wolf goto fail; 279c46e1167SKevin Wolf } 28045aba42fSKevin Wolf 281c46e1167SKevin Wolf *table = l2_table; 2823cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, 0); 283c46e1167SKevin Wolf return 0; 284175e1152SKevin Wolf 285175e1152SKevin Wolf fail: 2863cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, ret); 2878585afd8SMax Reitz if (l2_table != NULL) { 28829c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) table); 2898585afd8SMax Reitz } 29068dba0bfSKevin Wolf s->l1_table[l1_index] = old_l2_offset; 291e3b21ef9SMax Reitz if (l2_offset > 0) { 292e3b21ef9SMax Reitz qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), 293e3b21ef9SMax Reitz QCOW2_DISCARD_ALWAYS); 294e3b21ef9SMax Reitz } 295175e1152SKevin Wolf return ret; 29645aba42fSKevin Wolf } 29745aba42fSKevin Wolf 2982bfcc4a0SKevin Wolf /* 2992bfcc4a0SKevin Wolf * Checks how many clusters in a given L2 table are contiguous in the image 3002bfcc4a0SKevin Wolf * file. As soon as one of the flags in the bitmask stop_flags changes compared 3012bfcc4a0SKevin Wolf * to the first cluster, the search is stopped and the cluster is not counted 3022bfcc4a0SKevin Wolf * as contiguous. (This allows it, for example, to stop at the first compressed 3032bfcc4a0SKevin Wolf * cluster which may require a different handling) 3042bfcc4a0SKevin Wolf */ 305b6d36defSMax Reitz static int count_contiguous_clusters(int nb_clusters, int cluster_size, 30661653008SKevin Wolf uint64_t *l2_table, uint64_t stop_flags) 30745aba42fSKevin Wolf { 30845aba42fSKevin Wolf int i; 30978a52ad5SPeter Lieven uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; 31015684a47SMax Reitz uint64_t first_entry = be64_to_cpu(l2_table[0]); 31115684a47SMax Reitz uint64_t offset = first_entry & mask; 31245aba42fSKevin Wolf 31345aba42fSKevin Wolf if (!offset) 31445aba42fSKevin Wolf return 0; 31545aba42fSKevin Wolf 316a99dfb45SKevin Wolf assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); 31715684a47SMax Reitz 31861653008SKevin Wolf for (i = 0; i < nb_clusters; i++) { 3192bfcc4a0SKevin Wolf uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; 3202bfcc4a0SKevin Wolf if (offset + (uint64_t) i * cluster_size != l2_entry) { 32145aba42fSKevin Wolf break; 3222bfcc4a0SKevin Wolf } 3232bfcc4a0SKevin Wolf } 32445aba42fSKevin Wolf 32561653008SKevin Wolf return i; 32645aba42fSKevin Wolf } 32745aba42fSKevin Wolf 328a99dfb45SKevin Wolf static int count_contiguous_clusters_by_type(int nb_clusters, 329a99dfb45SKevin Wolf uint64_t *l2_table, 330a99dfb45SKevin Wolf int wanted_type) 33145aba42fSKevin Wolf { 3322bfcc4a0SKevin Wolf int i; 33345aba42fSKevin Wolf 3342bfcc4a0SKevin Wolf for (i = 0; i < nb_clusters; i++) { 3352bfcc4a0SKevin Wolf int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); 3362bfcc4a0SKevin Wolf 337a99dfb45SKevin Wolf if (type != wanted_type) { 3382bfcc4a0SKevin Wolf break; 3392bfcc4a0SKevin Wolf } 3402bfcc4a0SKevin Wolf } 34145aba42fSKevin Wolf 34245aba42fSKevin Wolf return i; 34345aba42fSKevin Wolf } 34445aba42fSKevin Wolf 34545aba42fSKevin Wolf /* The crypt function is compatible with the linux cryptoloop 34645aba42fSKevin Wolf algorithm for < 4 GB images. NOTE: out_buf == in_buf is 34745aba42fSKevin Wolf supported */ 348ff99129aSKevin Wolf int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, 34945aba42fSKevin Wolf uint8_t *out_buf, const uint8_t *in_buf, 350f6fa64f6SDaniel P. Berrange int nb_sectors, bool enc, 351f6fa64f6SDaniel P. Berrange Error **errp) 35245aba42fSKevin Wolf { 35345aba42fSKevin Wolf union { 35445aba42fSKevin Wolf uint64_t ll[2]; 35545aba42fSKevin Wolf uint8_t b[16]; 35645aba42fSKevin Wolf } ivec; 35745aba42fSKevin Wolf int i; 358f6fa64f6SDaniel P. Berrange int ret; 35945aba42fSKevin Wolf 36045aba42fSKevin Wolf for(i = 0; i < nb_sectors; i++) { 36145aba42fSKevin Wolf ivec.ll[0] = cpu_to_le64(sector_num); 36245aba42fSKevin Wolf ivec.ll[1] = 0; 363f6fa64f6SDaniel P. Berrange if (qcrypto_cipher_setiv(s->cipher, 364f6fa64f6SDaniel P. Berrange ivec.b, G_N_ELEMENTS(ivec.b), 365f6fa64f6SDaniel P. Berrange errp) < 0) { 366f6fa64f6SDaniel P. Berrange return -1; 367f6fa64f6SDaniel P. Berrange } 368f6fa64f6SDaniel P. Berrange if (enc) { 369f6fa64f6SDaniel P. Berrange ret = qcrypto_cipher_encrypt(s->cipher, 370f6fa64f6SDaniel P. Berrange in_buf, 371f6fa64f6SDaniel P. Berrange out_buf, 372f6fa64f6SDaniel P. Berrange 512, 373f6fa64f6SDaniel P. Berrange errp); 374f6fa64f6SDaniel P. Berrange } else { 375f6fa64f6SDaniel P. Berrange ret = qcrypto_cipher_decrypt(s->cipher, 376f6fa64f6SDaniel P. Berrange in_buf, 377f6fa64f6SDaniel P. Berrange out_buf, 378f6fa64f6SDaniel P. Berrange 512, 379f6fa64f6SDaniel P. Berrange errp); 380f6fa64f6SDaniel P. Berrange } 381f6fa64f6SDaniel P. Berrange if (ret < 0) { 382f6fa64f6SDaniel P. Berrange return -1; 383f6fa64f6SDaniel P. Berrange } 38445aba42fSKevin Wolf sector_num++; 38545aba42fSKevin Wolf in_buf += 512; 38645aba42fSKevin Wolf out_buf += 512; 38745aba42fSKevin Wolf } 388f6fa64f6SDaniel P. Berrange return 0; 38945aba42fSKevin Wolf } 39045aba42fSKevin Wolf 391aef4acb6SStefan Hajnoczi static int coroutine_fn copy_sectors(BlockDriverState *bs, 392aef4acb6SStefan Hajnoczi uint64_t start_sect, 393aef4acb6SStefan Hajnoczi uint64_t cluster_offset, 394aef4acb6SStefan Hajnoczi int n_start, int n_end) 39545aba42fSKevin Wolf { 396ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 397aef4acb6SStefan Hajnoczi QEMUIOVector qiov; 398aef4acb6SStefan Hajnoczi struct iovec iov; 39945aba42fSKevin Wolf int n, ret; 4001b9f1491SKevin Wolf 40145aba42fSKevin Wolf n = n_end - n_start; 4021b9f1491SKevin Wolf if (n <= 0) { 40345aba42fSKevin Wolf return 0; 4041b9f1491SKevin Wolf } 4051b9f1491SKevin Wolf 406aef4acb6SStefan Hajnoczi iov.iov_len = n * BDRV_SECTOR_SIZE; 407de82815dSKevin Wolf iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); 408de82815dSKevin Wolf if (iov.iov_base == NULL) { 409de82815dSKevin Wolf return -ENOMEM; 410de82815dSKevin Wolf } 411aef4acb6SStefan Hajnoczi 412aef4acb6SStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 4131b9f1491SKevin Wolf 41466f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 415aef4acb6SStefan Hajnoczi 416dba28555SMax Reitz if (!bs->drv) { 417bd604369SKevin Wolf ret = -ENOMEDIUM; 418bd604369SKevin Wolf goto out; 419dba28555SMax Reitz } 420dba28555SMax Reitz 421aef4acb6SStefan Hajnoczi /* Call .bdrv_co_readv() directly instead of using the public block-layer 422aef4acb6SStefan Hajnoczi * interface. This avoids double I/O throttling and request tracking, 423aef4acb6SStefan Hajnoczi * which can lead to deadlock when block layer copy-on-read is enabled. 424aef4acb6SStefan Hajnoczi */ 425aef4acb6SStefan Hajnoczi ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); 4261b9f1491SKevin Wolf if (ret < 0) { 4271b9f1491SKevin Wolf goto out; 4281b9f1491SKevin Wolf } 4291b9f1491SKevin Wolf 4308336aafaSDaniel P. Berrange if (bs->encrypted) { 431f6fa64f6SDaniel P. Berrange Error *err = NULL; 432f6fa64f6SDaniel P. Berrange assert(s->cipher); 433f6fa64f6SDaniel P. Berrange if (qcow2_encrypt_sectors(s, start_sect + n_start, 434f6fa64f6SDaniel P. Berrange iov.iov_base, iov.iov_base, n, 435f6fa64f6SDaniel P. Berrange true, &err) < 0) { 436f6fa64f6SDaniel P. Berrange ret = -EIO; 437f6fa64f6SDaniel P. Berrange error_free(err); 438f6fa64f6SDaniel P. Berrange goto out; 439f6fa64f6SDaniel P. Berrange } 44045aba42fSKevin Wolf } 4411b9f1491SKevin Wolf 442231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 443cf93980eSMax Reitz cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE); 444cf93980eSMax Reitz if (ret < 0) { 445cf93980eSMax Reitz goto out; 446cf93980eSMax Reitz } 447cf93980eSMax Reitz 44866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 4499a4f4c31SKevin Wolf ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n, 4509a4f4c31SKevin Wolf &qiov); 4511b9f1491SKevin Wolf if (ret < 0) { 4521b9f1491SKevin Wolf goto out; 4531b9f1491SKevin Wolf } 4541b9f1491SKevin Wolf 4551b9f1491SKevin Wolf ret = 0; 4561b9f1491SKevin Wolf out: 457aef4acb6SStefan Hajnoczi qemu_vfree(iov.iov_base); 45845aba42fSKevin Wolf return ret; 45945aba42fSKevin Wolf } 46045aba42fSKevin Wolf 46145aba42fSKevin Wolf 46245aba42fSKevin Wolf /* 46345aba42fSKevin Wolf * get_cluster_offset 46445aba42fSKevin Wolf * 4651c46efaaSKevin Wolf * For a given offset of the disk image, find the cluster offset in 4661c46efaaSKevin Wolf * qcow2 file. The offset is stored in *cluster_offset. 46745aba42fSKevin Wolf * 468d57237f2SDevin Nakamura * on entry, *num is the number of contiguous sectors we'd like to 46945aba42fSKevin Wolf * access following offset. 47045aba42fSKevin Wolf * 471d57237f2SDevin Nakamura * on exit, *num is the number of contiguous sectors we can read. 47245aba42fSKevin Wolf * 47368d000a3SKevin Wolf * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error 47468d000a3SKevin Wolf * cases. 47545aba42fSKevin Wolf */ 4761c46efaaSKevin Wolf int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, 4771c46efaaSKevin Wolf int *num, uint64_t *cluster_offset) 47845aba42fSKevin Wolf { 479ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4802cf7cfa1SKevin Wolf unsigned int l2_index; 4812cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset, *l2_table; 48245aba42fSKevin Wolf int l1_bits, c; 48380ee15a6SKevin Wolf unsigned int index_in_cluster, nb_clusters; 48480ee15a6SKevin Wolf uint64_t nb_available, nb_needed; 48555c17e98SKevin Wolf int ret; 48645aba42fSKevin Wolf 48745aba42fSKevin Wolf index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); 48845aba42fSKevin Wolf nb_needed = *num + index_in_cluster; 48945aba42fSKevin Wolf 49045aba42fSKevin Wolf l1_bits = s->l2_bits + s->cluster_bits; 49145aba42fSKevin Wolf 49245aba42fSKevin Wolf /* compute how many bytes there are between the offset and 49345aba42fSKevin Wolf * the end of the l1 entry 49445aba42fSKevin Wolf */ 49545aba42fSKevin Wolf 49680ee15a6SKevin Wolf nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)); 49745aba42fSKevin Wolf 49845aba42fSKevin Wolf /* compute the number of available sectors */ 49945aba42fSKevin Wolf 50045aba42fSKevin Wolf nb_available = (nb_available >> 9) + index_in_cluster; 50145aba42fSKevin Wolf 50245aba42fSKevin Wolf if (nb_needed > nb_available) { 50345aba42fSKevin Wolf nb_needed = nb_available; 50445aba42fSKevin Wolf } 505b6d36defSMax Reitz assert(nb_needed <= INT_MAX); 50645aba42fSKevin Wolf 5071c46efaaSKevin Wolf *cluster_offset = 0; 50845aba42fSKevin Wolf 509b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 51045aba42fSKevin Wolf 51145aba42fSKevin Wolf l1_index = offset >> l1_bits; 51268d000a3SKevin Wolf if (l1_index >= s->l1_size) { 51368d000a3SKevin Wolf ret = QCOW2_CLUSTER_UNALLOCATED; 51445aba42fSKevin Wolf goto out; 51568d000a3SKevin Wolf } 51645aba42fSKevin Wolf 51768d000a3SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 51868d000a3SKevin Wolf if (!l2_offset) { 51968d000a3SKevin Wolf ret = QCOW2_CLUSTER_UNALLOCATED; 52045aba42fSKevin Wolf goto out; 52168d000a3SKevin Wolf } 52245aba42fSKevin Wolf 523a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 524a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 525a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 526a97c67eeSMax Reitz l2_offset, l1_index); 527a97c67eeSMax Reitz return -EIO; 528a97c67eeSMax Reitz } 529a97c67eeSMax Reitz 53045aba42fSKevin Wolf /* load the l2 table in memory */ 53145aba42fSKevin Wolf 53255c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 53355c17e98SKevin Wolf if (ret < 0) { 53455c17e98SKevin Wolf return ret; 5351c46efaaSKevin Wolf } 53645aba42fSKevin Wolf 53745aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 53845aba42fSKevin Wolf 53945aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 5401c46efaaSKevin Wolf *cluster_offset = be64_to_cpu(l2_table[l2_index]); 541b6d36defSMax Reitz 542b6d36defSMax Reitz /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */ 54345aba42fSKevin Wolf nb_clusters = size_to_clusters(s, nb_needed << 9); 54445aba42fSKevin Wolf 54568d000a3SKevin Wolf ret = qcow2_get_cluster_type(*cluster_offset); 54668d000a3SKevin Wolf switch (ret) { 54768d000a3SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 54868d000a3SKevin Wolf /* Compressed clusters can only be processed one by one */ 54968d000a3SKevin Wolf c = 1; 55068d000a3SKevin Wolf *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; 55168d000a3SKevin Wolf break; 5526377af48SKevin Wolf case QCOW2_CLUSTER_ZERO: 553381b487dSPaolo Bonzini if (s->qcow_version < 3) { 554a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 555a97c67eeSMax Reitz " in pre-v3 image (L2 offset: %#" PRIx64 556a97c67eeSMax Reitz ", L2 index: %#x)", l2_offset, l2_index); 557a97c67eeSMax Reitz ret = -EIO; 558a97c67eeSMax Reitz goto fail; 559381b487dSPaolo Bonzini } 560a99dfb45SKevin Wolf c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], 561a99dfb45SKevin Wolf QCOW2_CLUSTER_ZERO); 5626377af48SKevin Wolf *cluster_offset = 0; 5636377af48SKevin Wolf break; 56468d000a3SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 56545aba42fSKevin Wolf /* how many empty clusters ? */ 566a99dfb45SKevin Wolf c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], 567a99dfb45SKevin Wolf QCOW2_CLUSTER_UNALLOCATED); 56868d000a3SKevin Wolf *cluster_offset = 0; 56968d000a3SKevin Wolf break; 57068d000a3SKevin Wolf case QCOW2_CLUSTER_NORMAL: 57145aba42fSKevin Wolf /* how many allocated clusters ? */ 57245aba42fSKevin Wolf c = count_contiguous_clusters(nb_clusters, s->cluster_size, 57361653008SKevin Wolf &l2_table[l2_index], QCOW_OFLAG_ZERO); 57468d000a3SKevin Wolf *cluster_offset &= L2E_OFFSET_MASK; 575a97c67eeSMax Reitz if (offset_into_cluster(s, *cluster_offset)) { 576a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#" 577a97c67eeSMax Reitz PRIx64 " unaligned (L2 offset: %#" PRIx64 578a97c67eeSMax Reitz ", L2 index: %#x)", *cluster_offset, 579a97c67eeSMax Reitz l2_offset, l2_index); 580a97c67eeSMax Reitz ret = -EIO; 581a97c67eeSMax Reitz goto fail; 582a97c67eeSMax Reitz } 58368d000a3SKevin Wolf break; 5841417d7e4SKevin Wolf default: 5851417d7e4SKevin Wolf abort(); 58645aba42fSKevin Wolf } 58745aba42fSKevin Wolf 58829c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 58929c1a730SKevin Wolf 59045aba42fSKevin Wolf nb_available = (c * s->cluster_sectors); 59168d000a3SKevin Wolf 59245aba42fSKevin Wolf out: 59345aba42fSKevin Wolf if (nb_available > nb_needed) 59445aba42fSKevin Wolf nb_available = nb_needed; 59545aba42fSKevin Wolf 59645aba42fSKevin Wolf *num = nb_available - index_in_cluster; 59745aba42fSKevin Wolf 59868d000a3SKevin Wolf return ret; 599a97c67eeSMax Reitz 600a97c67eeSMax Reitz fail: 601a97c67eeSMax Reitz qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); 602a97c67eeSMax Reitz return ret; 60345aba42fSKevin Wolf } 60445aba42fSKevin Wolf 60545aba42fSKevin Wolf /* 60645aba42fSKevin Wolf * get_cluster_table 60745aba42fSKevin Wolf * 60845aba42fSKevin Wolf * for a given disk offset, load (and allocate if needed) 60945aba42fSKevin Wolf * the l2 table. 61045aba42fSKevin Wolf * 61145aba42fSKevin Wolf * the l2 table offset in the qcow2 file and the cluster index 61245aba42fSKevin Wolf * in the l2 table are given to the caller. 61345aba42fSKevin Wolf * 6141e3e8f1aSKevin Wolf * Returns 0 on success, -errno in failure case 61545aba42fSKevin Wolf */ 61645aba42fSKevin Wolf static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 61745aba42fSKevin Wolf uint64_t **new_l2_table, 61845aba42fSKevin Wolf int *new_l2_index) 61945aba42fSKevin Wolf { 620ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 6212cf7cfa1SKevin Wolf unsigned int l2_index; 6222cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset; 623c46e1167SKevin Wolf uint64_t *l2_table = NULL; 62480ee15a6SKevin Wolf int ret; 62545aba42fSKevin Wolf 626b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 62745aba42fSKevin Wolf 62845aba42fSKevin Wolf l1_index = offset >> (s->l2_bits + s->cluster_bits); 62945aba42fSKevin Wolf if (l1_index >= s->l1_size) { 63072893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 6311e3e8f1aSKevin Wolf if (ret < 0) { 6321e3e8f1aSKevin Wolf return ret; 6331e3e8f1aSKevin Wolf } 63445aba42fSKevin Wolf } 6358e37f681SKevin Wolf 6362cf7cfa1SKevin Wolf assert(l1_index < s->l1_size); 6378e37f681SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 638a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 639a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 640a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 641a97c67eeSMax Reitz l2_offset, l1_index); 642a97c67eeSMax Reitz return -EIO; 643a97c67eeSMax Reitz } 64445aba42fSKevin Wolf 64545aba42fSKevin Wolf /* seek the l2 table of the given l2 offset */ 64645aba42fSKevin Wolf 6478e37f681SKevin Wolf if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { 64845aba42fSKevin Wolf /* load the l2 table in memory */ 64955c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 65055c17e98SKevin Wolf if (ret < 0) { 65155c17e98SKevin Wolf return ret; 6521e3e8f1aSKevin Wolf } 65345aba42fSKevin Wolf } else { 65416fde5f2SKevin Wolf /* First allocate a new L2 table (and do COW if needed) */ 655c46e1167SKevin Wolf ret = l2_allocate(bs, l1_index, &l2_table); 656c46e1167SKevin Wolf if (ret < 0) { 657c46e1167SKevin Wolf return ret; 6581e3e8f1aSKevin Wolf } 65916fde5f2SKevin Wolf 66016fde5f2SKevin Wolf /* Then decrease the refcount of the old table */ 66116fde5f2SKevin Wolf if (l2_offset) { 6626cfcb9b8SKevin Wolf qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), 6636cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 66416fde5f2SKevin Wolf } 66545aba42fSKevin Wolf } 66645aba42fSKevin Wolf 66745aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 66845aba42fSKevin Wolf 66945aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 67045aba42fSKevin Wolf 67145aba42fSKevin Wolf *new_l2_table = l2_table; 67245aba42fSKevin Wolf *new_l2_index = l2_index; 67345aba42fSKevin Wolf 6741e3e8f1aSKevin Wolf return 0; 67545aba42fSKevin Wolf } 67645aba42fSKevin Wolf 67745aba42fSKevin Wolf /* 67845aba42fSKevin Wolf * alloc_compressed_cluster_offset 67945aba42fSKevin Wolf * 68045aba42fSKevin Wolf * For a given offset of the disk image, return cluster offset in 68145aba42fSKevin Wolf * qcow2 file. 68245aba42fSKevin Wolf * 68345aba42fSKevin Wolf * If the offset is not found, allocate a new compressed cluster. 68445aba42fSKevin Wolf * 68545aba42fSKevin Wolf * Return the cluster offset if successful, 68645aba42fSKevin Wolf * Return 0, otherwise. 68745aba42fSKevin Wolf * 68845aba42fSKevin Wolf */ 68945aba42fSKevin Wolf 690ed6ccf0fSKevin Wolf uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 69145aba42fSKevin Wolf uint64_t offset, 69245aba42fSKevin Wolf int compressed_size) 69345aba42fSKevin Wolf { 694ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 69545aba42fSKevin Wolf int l2_index, ret; 6963948d1d4SKevin Wolf uint64_t *l2_table; 697f4f0d391SKevin Wolf int64_t cluster_offset; 69845aba42fSKevin Wolf int nb_csectors; 69945aba42fSKevin Wolf 7003948d1d4SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 7011e3e8f1aSKevin Wolf if (ret < 0) { 70245aba42fSKevin Wolf return 0; 7031e3e8f1aSKevin Wolf } 70445aba42fSKevin Wolf 705b0b6862eSKevin Wolf /* Compression can't overwrite anything. Fail if the cluster was already 706b0b6862eSKevin Wolf * allocated. */ 70745aba42fSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 708b0b6862eSKevin Wolf if (cluster_offset & L2E_OFFSET_MASK) { 7098f1efd00SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 7108f1efd00SKevin Wolf return 0; 7118f1efd00SKevin Wolf } 71245aba42fSKevin Wolf 713ed6ccf0fSKevin Wolf cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 7145d757b56SKevin Wolf if (cluster_offset < 0) { 71529c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 7165d757b56SKevin Wolf return 0; 7175d757b56SKevin Wolf } 7185d757b56SKevin Wolf 71945aba42fSKevin Wolf nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - 72045aba42fSKevin Wolf (cluster_offset >> 9); 72145aba42fSKevin Wolf 72245aba42fSKevin Wolf cluster_offset |= QCOW_OFLAG_COMPRESSED | 72345aba42fSKevin Wolf ((uint64_t)nb_csectors << s->csize_shift); 72445aba42fSKevin Wolf 72545aba42fSKevin Wolf /* update L2 table */ 72645aba42fSKevin Wolf 72745aba42fSKevin Wolf /* compressed clusters never have the copied flag */ 72845aba42fSKevin Wolf 72966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 73072e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 73145aba42fSKevin Wolf l2_table[l2_index] = cpu_to_be64(cluster_offset); 732a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 73345aba42fSKevin Wolf 73445aba42fSKevin Wolf return cluster_offset; 73545aba42fSKevin Wolf } 73645aba42fSKevin Wolf 737593fb83cSKevin Wolf static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) 738593fb83cSKevin Wolf { 739ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 740593fb83cSKevin Wolf int ret; 741593fb83cSKevin Wolf 742593fb83cSKevin Wolf if (r->nb_sectors == 0) { 743593fb83cSKevin Wolf return 0; 744593fb83cSKevin Wolf } 745593fb83cSKevin Wolf 746593fb83cSKevin Wolf qemu_co_mutex_unlock(&s->lock); 747593fb83cSKevin Wolf ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, 748593fb83cSKevin Wolf r->offset / BDRV_SECTOR_SIZE, 749593fb83cSKevin Wolf r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); 750593fb83cSKevin Wolf qemu_co_mutex_lock(&s->lock); 751593fb83cSKevin Wolf 752593fb83cSKevin Wolf if (ret < 0) { 753593fb83cSKevin Wolf return ret; 754593fb83cSKevin Wolf } 755593fb83cSKevin Wolf 756593fb83cSKevin Wolf /* 757593fb83cSKevin Wolf * Before we update the L2 table to actually point to the new cluster, we 758593fb83cSKevin Wolf * need to be sure that the refcounts have been increased and COW was 759593fb83cSKevin Wolf * handled. 760593fb83cSKevin Wolf */ 761593fb83cSKevin Wolf qcow2_cache_depends_on_flush(s->l2_table_cache); 762593fb83cSKevin Wolf 763593fb83cSKevin Wolf return 0; 764593fb83cSKevin Wolf } 765593fb83cSKevin Wolf 766148da7eaSKevin Wolf int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) 76745aba42fSKevin Wolf { 768ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 76945aba42fSKevin Wolf int i, j = 0, l2_index, ret; 770593fb83cSKevin Wolf uint64_t *old_cluster, *l2_table; 771250196f1SKevin Wolf uint64_t cluster_offset = m->alloc_offset; 77245aba42fSKevin Wolf 7733cce16f4SKevin Wolf trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 774f50f88b9SKevin Wolf assert(m->nb_clusters > 0); 77545aba42fSKevin Wolf 7765839e53bSMarkus Armbruster old_cluster = g_try_new(uint64_t, m->nb_clusters); 777de82815dSKevin Wolf if (old_cluster == NULL) { 778de82815dSKevin Wolf ret = -ENOMEM; 779de82815dSKevin Wolf goto err; 780de82815dSKevin Wolf } 78145aba42fSKevin Wolf 78245aba42fSKevin Wolf /* copy content of unmodified sectors */ 783593fb83cSKevin Wolf ret = perform_cow(bs, m, &m->cow_start); 784593fb83cSKevin Wolf if (ret < 0) { 78545aba42fSKevin Wolf goto err; 78645aba42fSKevin Wolf } 78745aba42fSKevin Wolf 788593fb83cSKevin Wolf ret = perform_cow(bs, m, &m->cow_end); 789593fb83cSKevin Wolf if (ret < 0) { 79045aba42fSKevin Wolf goto err; 79145aba42fSKevin Wolf } 79245aba42fSKevin Wolf 793593fb83cSKevin Wolf /* Update L2 table. */ 79474c4510aSKevin Wolf if (s->use_lazy_refcounts) { 795280d3735SKevin Wolf qcow2_mark_dirty(bs); 796280d3735SKevin Wolf } 797bfe8043eSStefan Hajnoczi if (qcow2_need_accurate_refcounts(s)) { 798bfe8043eSStefan Hajnoczi qcow2_cache_set_dependency(bs, s->l2_table_cache, 799bfe8043eSStefan Hajnoczi s->refcount_block_cache); 800bfe8043eSStefan Hajnoczi } 801280d3735SKevin Wolf 8023948d1d4SKevin Wolf ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); 8031e3e8f1aSKevin Wolf if (ret < 0) { 80445aba42fSKevin Wolf goto err; 8051e3e8f1aSKevin Wolf } 80672e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 80745aba42fSKevin Wolf 808c01dbccbSMax Reitz assert(l2_index + m->nb_clusters <= s->l2_size); 80945aba42fSKevin Wolf for (i = 0; i < m->nb_clusters; i++) { 81045aba42fSKevin Wolf /* if two concurrent writes happen to the same unallocated cluster 81145aba42fSKevin Wolf * each write allocates separate cluster and writes data concurrently. 81245aba42fSKevin Wolf * The first one to complete updates l2 table with pointer to its 81345aba42fSKevin Wolf * cluster the second one has to do RMW (which is done above by 81445aba42fSKevin Wolf * copy_sectors()), update l2 table with its cluster pointer and free 81545aba42fSKevin Wolf * old cluster. This is what this loop does */ 81645aba42fSKevin Wolf if(l2_table[l2_index + i] != 0) 81745aba42fSKevin Wolf old_cluster[j++] = l2_table[l2_index + i]; 81845aba42fSKevin Wolf 81945aba42fSKevin Wolf l2_table[l2_index + i] = cpu_to_be64((cluster_offset + 82045aba42fSKevin Wolf (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); 82145aba42fSKevin Wolf } 82245aba42fSKevin Wolf 8239f8e668eSKevin Wolf 824a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 82545aba42fSKevin Wolf 8267ec5e6a4SKevin Wolf /* 8277ec5e6a4SKevin Wolf * If this was a COW, we need to decrease the refcount of the old cluster. 8286cfcb9b8SKevin Wolf * 8296cfcb9b8SKevin Wolf * Don't discard clusters that reach a refcount of 0 (e.g. compressed 8306cfcb9b8SKevin Wolf * clusters), the next write will reuse them anyway. 8317ec5e6a4SKevin Wolf */ 8327ec5e6a4SKevin Wolf if (j != 0) { 8337ec5e6a4SKevin Wolf for (i = 0; i < j; i++) { 8346cfcb9b8SKevin Wolf qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, 8356cfcb9b8SKevin Wolf QCOW2_DISCARD_NEVER); 8367ec5e6a4SKevin Wolf } 8377ec5e6a4SKevin Wolf } 83845aba42fSKevin Wolf 83945aba42fSKevin Wolf ret = 0; 84045aba42fSKevin Wolf err: 8417267c094SAnthony Liguori g_free(old_cluster); 84245aba42fSKevin Wolf return ret; 84345aba42fSKevin Wolf } 84445aba42fSKevin Wolf 84545aba42fSKevin Wolf /* 846bf319eceSKevin Wolf * Returns the number of contiguous clusters that can be used for an allocating 847bf319eceSKevin Wolf * write, but require COW to be performed (this includes yet unallocated space, 848bf319eceSKevin Wolf * which must copy from the backing file) 849bf319eceSKevin Wolf */ 850ff99129aSKevin Wolf static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, 851bf319eceSKevin Wolf uint64_t *l2_table, int l2_index) 852bf319eceSKevin Wolf { 853143550a8SKevin Wolf int i; 854bf319eceSKevin Wolf 855143550a8SKevin Wolf for (i = 0; i < nb_clusters; i++) { 856143550a8SKevin Wolf uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); 857143550a8SKevin Wolf int cluster_type = qcow2_get_cluster_type(l2_entry); 858143550a8SKevin Wolf 859143550a8SKevin Wolf switch(cluster_type) { 860143550a8SKevin Wolf case QCOW2_CLUSTER_NORMAL: 861143550a8SKevin Wolf if (l2_entry & QCOW_OFLAG_COPIED) { 862143550a8SKevin Wolf goto out; 863143550a8SKevin Wolf } 864bf319eceSKevin Wolf break; 865143550a8SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 866143550a8SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 8676377af48SKevin Wolf case QCOW2_CLUSTER_ZERO: 868143550a8SKevin Wolf break; 869143550a8SKevin Wolf default: 870143550a8SKevin Wolf abort(); 871143550a8SKevin Wolf } 872bf319eceSKevin Wolf } 873bf319eceSKevin Wolf 874143550a8SKevin Wolf out: 875bf319eceSKevin Wolf assert(i <= nb_clusters); 876bf319eceSKevin Wolf return i; 877bf319eceSKevin Wolf } 878bf319eceSKevin Wolf 879bf319eceSKevin Wolf /* 880250196f1SKevin Wolf * Check if there already is an AIO write request in flight which allocates 881250196f1SKevin Wolf * the same cluster. In this case we need to wait until the previous 882250196f1SKevin Wolf * request has completed and updated the L2 table accordingly. 88365eb2e35SKevin Wolf * 88465eb2e35SKevin Wolf * Returns: 88565eb2e35SKevin Wolf * 0 if there was no dependency. *cur_bytes indicates the number of 88665eb2e35SKevin Wolf * bytes from guest_offset that can be read before the next 88765eb2e35SKevin Wolf * dependency must be processed (or the request is complete) 88865eb2e35SKevin Wolf * 88965eb2e35SKevin Wolf * -EAGAIN if we had to wait for another request, previously gathered 89065eb2e35SKevin Wolf * information on cluster allocation may be invalid now. The caller 89165eb2e35SKevin Wolf * must start over anyway, so consider *cur_bytes undefined. 892250196f1SKevin Wolf */ 893226c3c26SKevin Wolf static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, 894ecdd5333SKevin Wolf uint64_t *cur_bytes, QCowL2Meta **m) 895226c3c26SKevin Wolf { 896ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 897226c3c26SKevin Wolf QCowL2Meta *old_alloc; 89865eb2e35SKevin Wolf uint64_t bytes = *cur_bytes; 899226c3c26SKevin Wolf 900250196f1SKevin Wolf QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 901250196f1SKevin Wolf 90265eb2e35SKevin Wolf uint64_t start = guest_offset; 90365eb2e35SKevin Wolf uint64_t end = start + bytes; 90465eb2e35SKevin Wolf uint64_t old_start = l2meta_cow_start(old_alloc); 90565eb2e35SKevin Wolf uint64_t old_end = l2meta_cow_end(old_alloc); 906250196f1SKevin Wolf 907d9d74f41SKevin Wolf if (end <= old_start || start >= old_end) { 908250196f1SKevin Wolf /* No intersection */ 909250196f1SKevin Wolf } else { 910250196f1SKevin Wolf if (start < old_start) { 911250196f1SKevin Wolf /* Stop at the start of a running allocation */ 91265eb2e35SKevin Wolf bytes = old_start - start; 913250196f1SKevin Wolf } else { 91465eb2e35SKevin Wolf bytes = 0; 915250196f1SKevin Wolf } 916250196f1SKevin Wolf 917ecdd5333SKevin Wolf /* Stop if already an l2meta exists. After yielding, it wouldn't 918ecdd5333SKevin Wolf * be valid any more, so we'd have to clean up the old L2Metas 919ecdd5333SKevin Wolf * and deal with requests depending on them before starting to 920ecdd5333SKevin Wolf * gather new ones. Not worth the trouble. */ 921ecdd5333SKevin Wolf if (bytes == 0 && *m) { 922ecdd5333SKevin Wolf *cur_bytes = 0; 923ecdd5333SKevin Wolf return 0; 924ecdd5333SKevin Wolf } 925ecdd5333SKevin Wolf 92665eb2e35SKevin Wolf if (bytes == 0) { 927250196f1SKevin Wolf /* Wait for the dependency to complete. We need to recheck 928250196f1SKevin Wolf * the free/allocated clusters when we continue. */ 929250196f1SKevin Wolf qemu_co_mutex_unlock(&s->lock); 930250196f1SKevin Wolf qemu_co_queue_wait(&old_alloc->dependent_requests); 931250196f1SKevin Wolf qemu_co_mutex_lock(&s->lock); 932250196f1SKevin Wolf return -EAGAIN; 933250196f1SKevin Wolf } 934250196f1SKevin Wolf } 935250196f1SKevin Wolf } 936250196f1SKevin Wolf 93765eb2e35SKevin Wolf /* Make sure that existing clusters and new allocations are only used up to 93865eb2e35SKevin Wolf * the next dependency if we shortened the request above */ 93965eb2e35SKevin Wolf *cur_bytes = bytes; 940250196f1SKevin Wolf 941226c3c26SKevin Wolf return 0; 942226c3c26SKevin Wolf } 943226c3c26SKevin Wolf 944226c3c26SKevin Wolf /* 9450af729ecSKevin Wolf * Checks how many already allocated clusters that don't require a copy on 9460af729ecSKevin Wolf * write there are at the given guest_offset (up to *bytes). If 9470af729ecSKevin Wolf * *host_offset is not zero, only physically contiguous clusters beginning at 9480af729ecSKevin Wolf * this host offset are counted. 9490af729ecSKevin Wolf * 950411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 951411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 952411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 9530af729ecSKevin Wolf * 9540af729ecSKevin Wolf * Returns: 9550af729ecSKevin Wolf * 0: if no allocated clusters are available at the given offset. 9560af729ecSKevin Wolf * *bytes is normally unchanged. It is set to 0 if the cluster 9570af729ecSKevin Wolf * is allocated and doesn't need COW, but doesn't have the right 9580af729ecSKevin Wolf * physical offset. 9590af729ecSKevin Wolf * 9600af729ecSKevin Wolf * 1: if allocated clusters that don't require a COW are available at 9610af729ecSKevin Wolf * the requested offset. *bytes may have decreased and describes 9620af729ecSKevin Wolf * the length of the area that can be written to. 9630af729ecSKevin Wolf * 9640af729ecSKevin Wolf * -errno: in error cases 9650af729ecSKevin Wolf */ 9660af729ecSKevin Wolf static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, 967c53ede9fSKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 9680af729ecSKevin Wolf { 969ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 9700af729ecSKevin Wolf int l2_index; 9710af729ecSKevin Wolf uint64_t cluster_offset; 9720af729ecSKevin Wolf uint64_t *l2_table; 973b6d36defSMax Reitz uint64_t nb_clusters; 974c53ede9fSKevin Wolf unsigned int keep_clusters; 975a3f1afb4SAlberto Garcia int ret; 9760af729ecSKevin Wolf 9770af729ecSKevin Wolf trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 9780af729ecSKevin Wolf *bytes); 9790af729ecSKevin Wolf 980411d62b0SKevin Wolf assert(*host_offset == 0 || offset_into_cluster(s, guest_offset) 981411d62b0SKevin Wolf == offset_into_cluster(s, *host_offset)); 982411d62b0SKevin Wolf 983acb0467fSKevin Wolf /* 984acb0467fSKevin Wolf * Calculate the number of clusters to look for. We stop at L2 table 985acb0467fSKevin Wolf * boundaries to keep things simple. 986acb0467fSKevin Wolf */ 987acb0467fSKevin Wolf nb_clusters = 988acb0467fSKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 989acb0467fSKevin Wolf 990acb0467fSKevin Wolf l2_index = offset_to_l2_index(s, guest_offset); 991acb0467fSKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 992b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 993acb0467fSKevin Wolf 9940af729ecSKevin Wolf /* Find L2 entry for the first involved cluster */ 9950af729ecSKevin Wolf ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); 9960af729ecSKevin Wolf if (ret < 0) { 9970af729ecSKevin Wolf return ret; 9980af729ecSKevin Wolf } 9990af729ecSKevin Wolf 10000af729ecSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 10010af729ecSKevin Wolf 10020af729ecSKevin Wolf /* Check how many clusters are already allocated and don't need COW */ 10030af729ecSKevin Wolf if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL 10040af729ecSKevin Wolf && (cluster_offset & QCOW_OFLAG_COPIED)) 10050af729ecSKevin Wolf { 1006e62daaf6SKevin Wolf /* If a specific host_offset is required, check it */ 1007e62daaf6SKevin Wolf bool offset_matches = 1008e62daaf6SKevin Wolf (cluster_offset & L2E_OFFSET_MASK) == *host_offset; 1009e62daaf6SKevin Wolf 1010a97c67eeSMax Reitz if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) { 1011a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " 1012a97c67eeSMax Reitz "%#llx unaligned (guest offset: %#" PRIx64 1013a97c67eeSMax Reitz ")", cluster_offset & L2E_OFFSET_MASK, 1014a97c67eeSMax Reitz guest_offset); 1015a97c67eeSMax Reitz ret = -EIO; 1016a97c67eeSMax Reitz goto out; 1017a97c67eeSMax Reitz } 1018a97c67eeSMax Reitz 1019e62daaf6SKevin Wolf if (*host_offset != 0 && !offset_matches) { 1020e62daaf6SKevin Wolf *bytes = 0; 1021e62daaf6SKevin Wolf ret = 0; 1022e62daaf6SKevin Wolf goto out; 1023e62daaf6SKevin Wolf } 1024e62daaf6SKevin Wolf 10250af729ecSKevin Wolf /* We keep all QCOW_OFLAG_COPIED clusters */ 1026c53ede9fSKevin Wolf keep_clusters = 1027acb0467fSKevin Wolf count_contiguous_clusters(nb_clusters, s->cluster_size, 102861653008SKevin Wolf &l2_table[l2_index], 10290af729ecSKevin Wolf QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); 1030c53ede9fSKevin Wolf assert(keep_clusters <= nb_clusters); 1031c53ede9fSKevin Wolf 1032c53ede9fSKevin Wolf *bytes = MIN(*bytes, 1033c53ede9fSKevin Wolf keep_clusters * s->cluster_size 1034c53ede9fSKevin Wolf - offset_into_cluster(s, guest_offset)); 10350af729ecSKevin Wolf 10360af729ecSKevin Wolf ret = 1; 10370af729ecSKevin Wolf } else { 10380af729ecSKevin Wolf ret = 0; 10390af729ecSKevin Wolf } 10400af729ecSKevin Wolf 10410af729ecSKevin Wolf /* Cleanup */ 1042e62daaf6SKevin Wolf out: 1043a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 10440af729ecSKevin Wolf 1045e62daaf6SKevin Wolf /* Only return a host offset if we actually made progress. Otherwise we 1046e62daaf6SKevin Wolf * would make requirements for handle_alloc() that it can't fulfill */ 1047a97c67eeSMax Reitz if (ret > 0) { 1048411d62b0SKevin Wolf *host_offset = (cluster_offset & L2E_OFFSET_MASK) 1049411d62b0SKevin Wolf + offset_into_cluster(s, guest_offset); 1050e62daaf6SKevin Wolf } 1051e62daaf6SKevin Wolf 10520af729ecSKevin Wolf return ret; 10530af729ecSKevin Wolf } 10540af729ecSKevin Wolf 10550af729ecSKevin Wolf /* 1056226c3c26SKevin Wolf * Allocates new clusters for the given guest_offset. 1057226c3c26SKevin Wolf * 1058226c3c26SKevin Wolf * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1059226c3c26SKevin Wolf * contain the number of clusters that have been allocated and are contiguous 1060226c3c26SKevin Wolf * in the image file. 1061226c3c26SKevin Wolf * 1062226c3c26SKevin Wolf * If *host_offset is non-zero, it specifies the offset in the image file at 1063226c3c26SKevin Wolf * which the new clusters must start. *nb_clusters can be 0 on return in this 1064226c3c26SKevin Wolf * case if the cluster at host_offset is already in use. If *host_offset is 1065226c3c26SKevin Wolf * zero, the clusters can be allocated anywhere in the image file. 1066226c3c26SKevin Wolf * 1067226c3c26SKevin Wolf * *host_offset is updated to contain the offset into the image file at which 1068226c3c26SKevin Wolf * the first allocated cluster starts. 1069226c3c26SKevin Wolf * 1070226c3c26SKevin Wolf * Return 0 on success and -errno in error cases. -EAGAIN means that the 1071226c3c26SKevin Wolf * function has been waiting for another request and the allocation must be 1072226c3c26SKevin Wolf * restarted, but the whole request should not be failed. 1073226c3c26SKevin Wolf */ 1074226c3c26SKevin Wolf static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1075b6d36defSMax Reitz uint64_t *host_offset, uint64_t *nb_clusters) 1076226c3c26SKevin Wolf { 1077ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1078226c3c26SKevin Wolf 1079226c3c26SKevin Wolf trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1080226c3c26SKevin Wolf *host_offset, *nb_clusters); 1081226c3c26SKevin Wolf 1082250196f1SKevin Wolf /* Allocate new clusters */ 1083250196f1SKevin Wolf trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1084250196f1SKevin Wolf if (*host_offset == 0) { 1085df021791SKevin Wolf int64_t cluster_offset = 1086df021791SKevin Wolf qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1087250196f1SKevin Wolf if (cluster_offset < 0) { 1088250196f1SKevin Wolf return cluster_offset; 1089250196f1SKevin Wolf } 1090250196f1SKevin Wolf *host_offset = cluster_offset; 1091250196f1SKevin Wolf return 0; 1092df021791SKevin Wolf } else { 1093b6d36defSMax Reitz int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1094df021791SKevin Wolf if (ret < 0) { 1095df021791SKevin Wolf return ret; 1096df021791SKevin Wolf } 1097df021791SKevin Wolf *nb_clusters = ret; 1098df021791SKevin Wolf return 0; 1099df021791SKevin Wolf } 1100250196f1SKevin Wolf } 1101250196f1SKevin Wolf 1102250196f1SKevin Wolf /* 110310f0ed8bSKevin Wolf * Allocates new clusters for an area that either is yet unallocated or needs a 110410f0ed8bSKevin Wolf * copy on write. If *host_offset is non-zero, clusters are only allocated if 110510f0ed8bSKevin Wolf * the new allocation can match the specified host offset. 110610f0ed8bSKevin Wolf * 1107411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 1108411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 1109411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 111010f0ed8bSKevin Wolf * 111110f0ed8bSKevin Wolf * Returns: 111210f0ed8bSKevin Wolf * 0: if no clusters could be allocated. *bytes is set to 0, 111310f0ed8bSKevin Wolf * *host_offset is left unchanged. 111410f0ed8bSKevin Wolf * 111510f0ed8bSKevin Wolf * 1: if new clusters were allocated. *bytes may be decreased if the 111610f0ed8bSKevin Wolf * new allocation doesn't cover all of the requested area. 111710f0ed8bSKevin Wolf * *host_offset is updated to contain the host offset of the first 111810f0ed8bSKevin Wolf * newly allocated cluster. 111910f0ed8bSKevin Wolf * 112010f0ed8bSKevin Wolf * -errno: in error cases 112110f0ed8bSKevin Wolf */ 112210f0ed8bSKevin Wolf static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, 1123c37f4cd7SKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 112410f0ed8bSKevin Wolf { 1125ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 112610f0ed8bSKevin Wolf int l2_index; 112710f0ed8bSKevin Wolf uint64_t *l2_table; 112810f0ed8bSKevin Wolf uint64_t entry; 1129b6d36defSMax Reitz uint64_t nb_clusters; 113010f0ed8bSKevin Wolf int ret; 113110f0ed8bSKevin Wolf 113210f0ed8bSKevin Wolf uint64_t alloc_cluster_offset; 113310f0ed8bSKevin Wolf 113410f0ed8bSKevin Wolf trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 113510f0ed8bSKevin Wolf *bytes); 113610f0ed8bSKevin Wolf assert(*bytes > 0); 113710f0ed8bSKevin Wolf 1138f5bc6350SKevin Wolf /* 1139f5bc6350SKevin Wolf * Calculate the number of clusters to look for. We stop at L2 table 1140f5bc6350SKevin Wolf * boundaries to keep things simple. 1141f5bc6350SKevin Wolf */ 1142c37f4cd7SKevin Wolf nb_clusters = 1143c37f4cd7SKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1144c37f4cd7SKevin Wolf 1145f5bc6350SKevin Wolf l2_index = offset_to_l2_index(s, guest_offset); 1146c37f4cd7SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1147b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1148f5bc6350SKevin Wolf 114910f0ed8bSKevin Wolf /* Find L2 entry for the first involved cluster */ 115010f0ed8bSKevin Wolf ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); 115110f0ed8bSKevin Wolf if (ret < 0) { 115210f0ed8bSKevin Wolf return ret; 115310f0ed8bSKevin Wolf } 115410f0ed8bSKevin Wolf 11553b8e2e26SKevin Wolf entry = be64_to_cpu(l2_table[l2_index]); 115610f0ed8bSKevin Wolf 115710f0ed8bSKevin Wolf /* For the moment, overwrite compressed clusters one by one */ 115810f0ed8bSKevin Wolf if (entry & QCOW_OFLAG_COMPRESSED) { 115910f0ed8bSKevin Wolf nb_clusters = 1; 116010f0ed8bSKevin Wolf } else { 11613b8e2e26SKevin Wolf nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); 116210f0ed8bSKevin Wolf } 116310f0ed8bSKevin Wolf 1164ecdd5333SKevin Wolf /* This function is only called when there were no non-COW clusters, so if 1165ecdd5333SKevin Wolf * we can't find any unallocated or COW clusters either, something is 1166ecdd5333SKevin Wolf * wrong with our code. */ 1167ecdd5333SKevin Wolf assert(nb_clusters > 0); 1168ecdd5333SKevin Wolf 1169a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 117010f0ed8bSKevin Wolf 117110f0ed8bSKevin Wolf /* Allocate, if necessary at a given offset in the image file */ 1172411d62b0SKevin Wolf alloc_cluster_offset = start_of_cluster(s, *host_offset); 117383baa9a4SKevin Wolf ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 117410f0ed8bSKevin Wolf &nb_clusters); 117510f0ed8bSKevin Wolf if (ret < 0) { 117610f0ed8bSKevin Wolf goto fail; 117710f0ed8bSKevin Wolf } 117810f0ed8bSKevin Wolf 117983baa9a4SKevin Wolf /* Can't extend contiguous allocation */ 118083baa9a4SKevin Wolf if (nb_clusters == 0) { 118183baa9a4SKevin Wolf *bytes = 0; 118283baa9a4SKevin Wolf return 0; 118383baa9a4SKevin Wolf } 118483baa9a4SKevin Wolf 1185ff52aab2SMax Reitz /* !*host_offset would overwrite the image header and is reserved for "no 1186ff52aab2SMax Reitz * host offset preferred". If 0 was a valid host offset, it'd trigger the 1187ff52aab2SMax Reitz * following overlap check; do that now to avoid having an invalid value in 1188ff52aab2SMax Reitz * *host_offset. */ 1189ff52aab2SMax Reitz if (!alloc_cluster_offset) { 1190ff52aab2SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, 1191ff52aab2SMax Reitz nb_clusters * s->cluster_size); 1192ff52aab2SMax Reitz assert(ret < 0); 1193ff52aab2SMax Reitz goto fail; 1194ff52aab2SMax Reitz } 1195ff52aab2SMax Reitz 119610f0ed8bSKevin Wolf /* 119783baa9a4SKevin Wolf * Save info needed for meta data update. 119883baa9a4SKevin Wolf * 119910f0ed8bSKevin Wolf * requested_sectors: Number of sectors from the start of the first 120010f0ed8bSKevin Wolf * newly allocated cluster to the end of the (possibly shortened 120110f0ed8bSKevin Wolf * before) write request. 120210f0ed8bSKevin Wolf * 120310f0ed8bSKevin Wolf * avail_sectors: Number of sectors from the start of the first 120410f0ed8bSKevin Wolf * newly allocated to the end of the last newly allocated cluster. 120510f0ed8bSKevin Wolf * 120610f0ed8bSKevin Wolf * nb_sectors: The number of sectors from the start of the first 120783baa9a4SKevin Wolf * newly allocated cluster to the end of the area that the write 120810f0ed8bSKevin Wolf * request actually writes to (excluding COW at the end) 120910f0ed8bSKevin Wolf */ 1210c37f4cd7SKevin Wolf int requested_sectors = 1211c37f4cd7SKevin Wolf (*bytes + offset_into_cluster(s, guest_offset)) 1212c37f4cd7SKevin Wolf >> BDRV_SECTOR_BITS; 121310f0ed8bSKevin Wolf int avail_sectors = nb_clusters 121410f0ed8bSKevin Wolf << (s->cluster_bits - BDRV_SECTOR_BITS); 1215c37f4cd7SKevin Wolf int alloc_n_start = offset_into_cluster(s, guest_offset) 1216c37f4cd7SKevin Wolf >> BDRV_SECTOR_BITS; 121710f0ed8bSKevin Wolf int nb_sectors = MIN(requested_sectors, avail_sectors); 121888c6588cSKevin Wolf QCowL2Meta *old_m = *m; 121910f0ed8bSKevin Wolf 122010f0ed8bSKevin Wolf *m = g_malloc0(sizeof(**m)); 122110f0ed8bSKevin Wolf 122210f0ed8bSKevin Wolf **m = (QCowL2Meta) { 122388c6588cSKevin Wolf .next = old_m, 122488c6588cSKevin Wolf 1225411d62b0SKevin Wolf .alloc_offset = alloc_cluster_offset, 122683baa9a4SKevin Wolf .offset = start_of_cluster(s, guest_offset), 122710f0ed8bSKevin Wolf .nb_clusters = nb_clusters, 122810f0ed8bSKevin Wolf .nb_available = nb_sectors, 122910f0ed8bSKevin Wolf 123010f0ed8bSKevin Wolf .cow_start = { 123110f0ed8bSKevin Wolf .offset = 0, 123210f0ed8bSKevin Wolf .nb_sectors = alloc_n_start, 123310f0ed8bSKevin Wolf }, 123410f0ed8bSKevin Wolf .cow_end = { 123510f0ed8bSKevin Wolf .offset = nb_sectors * BDRV_SECTOR_SIZE, 123610f0ed8bSKevin Wolf .nb_sectors = avail_sectors - nb_sectors, 123710f0ed8bSKevin Wolf }, 123810f0ed8bSKevin Wolf }; 123910f0ed8bSKevin Wolf qemu_co_queue_init(&(*m)->dependent_requests); 124010f0ed8bSKevin Wolf QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 124110f0ed8bSKevin Wolf 1242411d62b0SKevin Wolf *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 1243c37f4cd7SKevin Wolf *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) 1244c37f4cd7SKevin Wolf - offset_into_cluster(s, guest_offset)); 1245c37f4cd7SKevin Wolf assert(*bytes != 0); 124610f0ed8bSKevin Wolf 124710f0ed8bSKevin Wolf return 1; 124810f0ed8bSKevin Wolf 124910f0ed8bSKevin Wolf fail: 125010f0ed8bSKevin Wolf if (*m && (*m)->nb_clusters > 0) { 125110f0ed8bSKevin Wolf QLIST_REMOVE(*m, next_in_flight); 125210f0ed8bSKevin Wolf } 125310f0ed8bSKevin Wolf return ret; 125410f0ed8bSKevin Wolf } 125510f0ed8bSKevin Wolf 125610f0ed8bSKevin Wolf /* 125745aba42fSKevin Wolf * alloc_cluster_offset 125845aba42fSKevin Wolf * 1259250196f1SKevin Wolf * For a given offset on the virtual disk, find the cluster offset in qcow2 1260250196f1SKevin Wolf * file. If the offset is not found, allocate a new cluster. 126145aba42fSKevin Wolf * 1262250196f1SKevin Wolf * If the cluster was already allocated, m->nb_clusters is set to 0 and 1263a7912369SFrediano Ziglio * other fields in m are meaningless. 126445aba42fSKevin Wolf * 1265148da7eaSKevin Wolf * If the cluster is newly allocated, m->nb_clusters is set to the number of 126668d100e9SKevin Wolf * contiguous clusters that have been allocated. In this case, the other 126768d100e9SKevin Wolf * fields of m are valid and contain information about the first allocated 126868d100e9SKevin Wolf * cluster. 1269148da7eaSKevin Wolf * 127068d100e9SKevin Wolf * If the request conflicts with another write request in flight, the coroutine 127168d100e9SKevin Wolf * is queued and will be reentered when the dependency has completed. 1272148da7eaSKevin Wolf * 1273148da7eaSKevin Wolf * Return 0 on success and -errno in error cases 127445aba42fSKevin Wolf */ 1275f4f0d391SKevin Wolf int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, 127616f0587eSHu Tao int *num, uint64_t *host_offset, QCowL2Meta **m) 127745aba42fSKevin Wolf { 1278ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1279710c2496SKevin Wolf uint64_t start, remaining; 1280250196f1SKevin Wolf uint64_t cluster_offset; 128165eb2e35SKevin Wolf uint64_t cur_bytes; 1282710c2496SKevin Wolf int ret; 128345aba42fSKevin Wolf 128416f0587eSHu Tao trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num); 12853cce16f4SKevin Wolf 128616f0587eSHu Tao assert((offset & ~BDRV_SECTOR_MASK) == 0); 1287710c2496SKevin Wolf 128872424114SKevin Wolf again: 128916f0587eSHu Tao start = offset; 129011c89769SMax Reitz remaining = (uint64_t)*num << BDRV_SECTOR_BITS; 12910af729ecSKevin Wolf cluster_offset = 0; 12920af729ecSKevin Wolf *host_offset = 0; 1293ecdd5333SKevin Wolf cur_bytes = 0; 1294ecdd5333SKevin Wolf *m = NULL; 12950af729ecSKevin Wolf 12962c3b32d2SKevin Wolf while (true) { 1297ecdd5333SKevin Wolf 1298ecdd5333SKevin Wolf if (!*host_offset) { 1299ecdd5333SKevin Wolf *host_offset = start_of_cluster(s, cluster_offset); 1300ecdd5333SKevin Wolf } 1301ecdd5333SKevin Wolf 1302ecdd5333SKevin Wolf assert(remaining >= cur_bytes); 1303ecdd5333SKevin Wolf 1304ecdd5333SKevin Wolf start += cur_bytes; 1305ecdd5333SKevin Wolf remaining -= cur_bytes; 1306ecdd5333SKevin Wolf cluster_offset += cur_bytes; 1307ecdd5333SKevin Wolf 1308ecdd5333SKevin Wolf if (remaining == 0) { 1309ecdd5333SKevin Wolf break; 1310ecdd5333SKevin Wolf } 1311ecdd5333SKevin Wolf 1312ecdd5333SKevin Wolf cur_bytes = remaining; 1313ecdd5333SKevin Wolf 1314250196f1SKevin Wolf /* 131517a71e58SKevin Wolf * Now start gathering as many contiguous clusters as possible: 131617a71e58SKevin Wolf * 131717a71e58SKevin Wolf * 1. Check for overlaps with in-flight allocations 131817a71e58SKevin Wolf * 13192c3b32d2SKevin Wolf * a) Overlap not in the first cluster -> shorten this request and 13202c3b32d2SKevin Wolf * let the caller handle the rest in its next loop iteration. 132117a71e58SKevin Wolf * 13222c3b32d2SKevin Wolf * b) Real overlaps of two requests. Yield and restart the search 13232c3b32d2SKevin Wolf * for contiguous clusters (the situation could have changed 13242c3b32d2SKevin Wolf * while we were sleeping) 132517a71e58SKevin Wolf * 132617a71e58SKevin Wolf * c) TODO: Request starts in the same cluster as the in-flight 13272c3b32d2SKevin Wolf * allocation ends. Shorten the COW of the in-fight allocation, 13282c3b32d2SKevin Wolf * set cluster_offset to write to the same cluster and set up 13292c3b32d2SKevin Wolf * the right synchronisation between the in-flight request and 13302c3b32d2SKevin Wolf * the new one. 133117a71e58SKevin Wolf */ 1332ecdd5333SKevin Wolf ret = handle_dependencies(bs, start, &cur_bytes, m); 133317a71e58SKevin Wolf if (ret == -EAGAIN) { 1334ecdd5333SKevin Wolf /* Currently handle_dependencies() doesn't yield if we already had 1335ecdd5333SKevin Wolf * an allocation. If it did, we would have to clean up the L2Meta 1336ecdd5333SKevin Wolf * structs before starting over. */ 1337ecdd5333SKevin Wolf assert(*m == NULL); 133817a71e58SKevin Wolf goto again; 133917a71e58SKevin Wolf } else if (ret < 0) { 134017a71e58SKevin Wolf return ret; 1341ecdd5333SKevin Wolf } else if (cur_bytes == 0) { 1342ecdd5333SKevin Wolf break; 134317a71e58SKevin Wolf } else { 134417a71e58SKevin Wolf /* handle_dependencies() may have decreased cur_bytes (shortened 134517a71e58SKevin Wolf * the allocations below) so that the next dependency is processed 134617a71e58SKevin Wolf * correctly during the next loop iteration. */ 134717a71e58SKevin Wolf } 134817a71e58SKevin Wolf 134972424114SKevin Wolf /* 13500af729ecSKevin Wolf * 2. Count contiguous COPIED clusters. 135172424114SKevin Wolf */ 1352710c2496SKevin Wolf ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 135372424114SKevin Wolf if (ret < 0) { 135472424114SKevin Wolf return ret; 13550af729ecSKevin Wolf } else if (ret) { 1356ecdd5333SKevin Wolf continue; 1357e62daaf6SKevin Wolf } else if (cur_bytes == 0) { 13582c3b32d2SKevin Wolf break; 135972424114SKevin Wolf } 136072424114SKevin Wolf 13610af729ecSKevin Wolf /* 13620af729ecSKevin Wolf * 3. If the request still hasn't completed, allocate new clusters, 13630af729ecSKevin Wolf * considering any cluster_offset of steps 1c or 2. 13640af729ecSKevin Wolf */ 1365710c2496SKevin Wolf ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1366037689d8SKevin Wolf if (ret < 0) { 1367037689d8SKevin Wolf return ret; 1368710c2496SKevin Wolf } else if (ret) { 1369ecdd5333SKevin Wolf continue; 13702c3b32d2SKevin Wolf } else { 13712c3b32d2SKevin Wolf assert(cur_bytes == 0); 13722c3b32d2SKevin Wolf break; 13732c3b32d2SKevin Wolf } 1374710c2496SKevin Wolf } 1375250196f1SKevin Wolf 137616f0587eSHu Tao *num -= remaining >> BDRV_SECTOR_BITS; 1377710c2496SKevin Wolf assert(*num > 0); 1378710c2496SKevin Wolf assert(*host_offset != 0); 137945aba42fSKevin Wolf 1380148da7eaSKevin Wolf return 0; 138145aba42fSKevin Wolf } 138245aba42fSKevin Wolf 138345aba42fSKevin Wolf static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 138445aba42fSKevin Wolf const uint8_t *buf, int buf_size) 138545aba42fSKevin Wolf { 138645aba42fSKevin Wolf z_stream strm1, *strm = &strm1; 138745aba42fSKevin Wolf int ret, out_len; 138845aba42fSKevin Wolf 138945aba42fSKevin Wolf memset(strm, 0, sizeof(*strm)); 139045aba42fSKevin Wolf 139145aba42fSKevin Wolf strm->next_in = (uint8_t *)buf; 139245aba42fSKevin Wolf strm->avail_in = buf_size; 139345aba42fSKevin Wolf strm->next_out = out_buf; 139445aba42fSKevin Wolf strm->avail_out = out_buf_size; 139545aba42fSKevin Wolf 139645aba42fSKevin Wolf ret = inflateInit2(strm, -12); 139745aba42fSKevin Wolf if (ret != Z_OK) 139845aba42fSKevin Wolf return -1; 139945aba42fSKevin Wolf ret = inflate(strm, Z_FINISH); 140045aba42fSKevin Wolf out_len = strm->next_out - out_buf; 140145aba42fSKevin Wolf if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 140245aba42fSKevin Wolf out_len != out_buf_size) { 140345aba42fSKevin Wolf inflateEnd(strm); 140445aba42fSKevin Wolf return -1; 140545aba42fSKevin Wolf } 140645aba42fSKevin Wolf inflateEnd(strm); 140745aba42fSKevin Wolf return 0; 140845aba42fSKevin Wolf } 140945aba42fSKevin Wolf 141066f82ceeSKevin Wolf int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 141145aba42fSKevin Wolf { 1412ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 141345aba42fSKevin Wolf int ret, csize, nb_csectors, sector_offset; 141445aba42fSKevin Wolf uint64_t coffset; 141545aba42fSKevin Wolf 141645aba42fSKevin Wolf coffset = cluster_offset & s->cluster_offset_mask; 141745aba42fSKevin Wolf if (s->cluster_cache_offset != coffset) { 141845aba42fSKevin Wolf nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; 141945aba42fSKevin Wolf sector_offset = coffset & 511; 142045aba42fSKevin Wolf csize = nb_csectors * 512 - sector_offset; 142166f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 14229a4f4c31SKevin Wolf ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data, 14239a4f4c31SKevin Wolf nb_csectors); 142445aba42fSKevin Wolf if (ret < 0) { 14258af36488SKevin Wolf return ret; 142645aba42fSKevin Wolf } 142745aba42fSKevin Wolf if (decompress_buffer(s->cluster_cache, s->cluster_size, 142845aba42fSKevin Wolf s->cluster_data + sector_offset, csize) < 0) { 14298af36488SKevin Wolf return -EIO; 143045aba42fSKevin Wolf } 143145aba42fSKevin Wolf s->cluster_cache_offset = coffset; 143245aba42fSKevin Wolf } 143345aba42fSKevin Wolf return 0; 143445aba42fSKevin Wolf } 14355ea929e3SKevin Wolf 14365ea929e3SKevin Wolf /* 14375ea929e3SKevin Wolf * This discards as many clusters of nb_clusters as possible at once (i.e. 14385ea929e3SKevin Wolf * all clusters in the same L2 table) and returns the number of discarded 14395ea929e3SKevin Wolf * clusters. 14405ea929e3SKevin Wolf */ 14415ea929e3SKevin Wolf static int discard_single_l2(BlockDriverState *bs, uint64_t offset, 1442b6d36defSMax Reitz uint64_t nb_clusters, enum qcow2_discard_type type, 1443b6d36defSMax Reitz bool full_discard) 14445ea929e3SKevin Wolf { 1445ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 14463948d1d4SKevin Wolf uint64_t *l2_table; 14475ea929e3SKevin Wolf int l2_index; 14485ea929e3SKevin Wolf int ret; 14495ea929e3SKevin Wolf int i; 14505ea929e3SKevin Wolf 14513948d1d4SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 14525ea929e3SKevin Wolf if (ret < 0) { 14535ea929e3SKevin Wolf return ret; 14545ea929e3SKevin Wolf } 14555ea929e3SKevin Wolf 14565ea929e3SKevin Wolf /* Limit nb_clusters to one L2 table */ 14575ea929e3SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1458b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 14595ea929e3SKevin Wolf 14605ea929e3SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1461c883db0dSMax Reitz uint64_t old_l2_entry; 14625ea929e3SKevin Wolf 1463c883db0dSMax Reitz old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); 1464a71835a0SKevin Wolf 1465a71835a0SKevin Wolf /* 1466808c4b6fSMax Reitz * If full_discard is false, make sure that a discarded area reads back 1467808c4b6fSMax Reitz * as zeroes for v3 images (we cannot do it for v2 without actually 1468808c4b6fSMax Reitz * writing a zero-filled buffer). We can skip the operation if the 1469808c4b6fSMax Reitz * cluster is already marked as zero, or if it's unallocated and we 1470808c4b6fSMax Reitz * don't have a backing file. 1471a71835a0SKevin Wolf * 1472a71835a0SKevin Wolf * TODO We might want to use bdrv_get_block_status(bs) here, but we're 1473a71835a0SKevin Wolf * holding s->lock, so that doesn't work today. 1474808c4b6fSMax Reitz * 1475808c4b6fSMax Reitz * If full_discard is true, the sector should not read back as zeroes, 1476808c4b6fSMax Reitz * but rather fall through to the backing file. 1477a71835a0SKevin Wolf */ 1478c883db0dSMax Reitz switch (qcow2_get_cluster_type(old_l2_entry)) { 1479c883db0dSMax Reitz case QCOW2_CLUSTER_UNALLOCATED: 1480760e0063SKevin Wolf if (full_discard || !bs->backing) { 1481a71835a0SKevin Wolf continue; 1482a71835a0SKevin Wolf } 1483c883db0dSMax Reitz break; 1484a71835a0SKevin Wolf 1485c883db0dSMax Reitz case QCOW2_CLUSTER_ZERO: 1486808c4b6fSMax Reitz if (!full_discard) { 14875ea929e3SKevin Wolf continue; 1488808c4b6fSMax Reitz } 1489808c4b6fSMax Reitz break; 1490c883db0dSMax Reitz 1491c883db0dSMax Reitz case QCOW2_CLUSTER_NORMAL: 1492c883db0dSMax Reitz case QCOW2_CLUSTER_COMPRESSED: 1493c883db0dSMax Reitz break; 1494c883db0dSMax Reitz 1495c883db0dSMax Reitz default: 1496c883db0dSMax Reitz abort(); 14975ea929e3SKevin Wolf } 14985ea929e3SKevin Wolf 14995ea929e3SKevin Wolf /* First remove L2 entries */ 150072e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 1501808c4b6fSMax Reitz if (!full_discard && s->qcow_version >= 3) { 1502a71835a0SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); 1503a71835a0SKevin Wolf } else { 15045ea929e3SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(0); 1505a71835a0SKevin Wolf } 15065ea929e3SKevin Wolf 15075ea929e3SKevin Wolf /* Then decrease the refcount */ 1508c883db0dSMax Reitz qcow2_free_any_clusters(bs, old_l2_entry, 1, type); 15095ea929e3SKevin Wolf } 15105ea929e3SKevin Wolf 1511a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 15125ea929e3SKevin Wolf 15135ea929e3SKevin Wolf return nb_clusters; 15145ea929e3SKevin Wolf } 15155ea929e3SKevin Wolf 15165ea929e3SKevin Wolf int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, 1517808c4b6fSMax Reitz int nb_sectors, enum qcow2_discard_type type, bool full_discard) 15185ea929e3SKevin Wolf { 1519ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 15205ea929e3SKevin Wolf uint64_t end_offset; 1521b6d36defSMax Reitz uint64_t nb_clusters; 15225ea929e3SKevin Wolf int ret; 15235ea929e3SKevin Wolf 15245ea929e3SKevin Wolf end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); 15255ea929e3SKevin Wolf 15265ea929e3SKevin Wolf /* Round start up and end down */ 15275ea929e3SKevin Wolf offset = align_offset(offset, s->cluster_size); 1528ac95acdbSHu Tao end_offset = start_of_cluster(s, end_offset); 15295ea929e3SKevin Wolf 15305ea929e3SKevin Wolf if (offset > end_offset) { 15315ea929e3SKevin Wolf return 0; 15325ea929e3SKevin Wolf } 15335ea929e3SKevin Wolf 15345ea929e3SKevin Wolf nb_clusters = size_to_clusters(s, end_offset - offset); 15355ea929e3SKevin Wolf 15360b919faeSKevin Wolf s->cache_discards = true; 15370b919faeSKevin Wolf 15385ea929e3SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 15395ea929e3SKevin Wolf while (nb_clusters > 0) { 1540808c4b6fSMax Reitz ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard); 15415ea929e3SKevin Wolf if (ret < 0) { 15420b919faeSKevin Wolf goto fail; 15435ea929e3SKevin Wolf } 15445ea929e3SKevin Wolf 15455ea929e3SKevin Wolf nb_clusters -= ret; 15465ea929e3SKevin Wolf offset += (ret * s->cluster_size); 15475ea929e3SKevin Wolf } 15485ea929e3SKevin Wolf 15490b919faeSKevin Wolf ret = 0; 15500b919faeSKevin Wolf fail: 15510b919faeSKevin Wolf s->cache_discards = false; 15520b919faeSKevin Wolf qcow2_process_discards(bs, ret); 15530b919faeSKevin Wolf 15540b919faeSKevin Wolf return ret; 15555ea929e3SKevin Wolf } 1556621f0589SKevin Wolf 1557621f0589SKevin Wolf /* 1558621f0589SKevin Wolf * This zeroes as many clusters of nb_clusters as possible at once (i.e. 1559621f0589SKevin Wolf * all clusters in the same L2 table) and returns the number of zeroed 1560621f0589SKevin Wolf * clusters. 1561621f0589SKevin Wolf */ 1562621f0589SKevin Wolf static int zero_single_l2(BlockDriverState *bs, uint64_t offset, 1563b6d36defSMax Reitz uint64_t nb_clusters) 1564621f0589SKevin Wolf { 1565ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1566621f0589SKevin Wolf uint64_t *l2_table; 1567621f0589SKevin Wolf int l2_index; 1568621f0589SKevin Wolf int ret; 1569621f0589SKevin Wolf int i; 1570621f0589SKevin Wolf 1571621f0589SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 1572621f0589SKevin Wolf if (ret < 0) { 1573621f0589SKevin Wolf return ret; 1574621f0589SKevin Wolf } 1575621f0589SKevin Wolf 1576621f0589SKevin Wolf /* Limit nb_clusters to one L2 table */ 1577621f0589SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1578b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1579621f0589SKevin Wolf 1580621f0589SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1581621f0589SKevin Wolf uint64_t old_offset; 1582621f0589SKevin Wolf 1583621f0589SKevin Wolf old_offset = be64_to_cpu(l2_table[l2_index + i]); 1584621f0589SKevin Wolf 1585621f0589SKevin Wolf /* Update L2 entries */ 158672e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 1587621f0589SKevin Wolf if (old_offset & QCOW_OFLAG_COMPRESSED) { 1588621f0589SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); 15896cfcb9b8SKevin Wolf qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); 1590621f0589SKevin Wolf } else { 1591621f0589SKevin Wolf l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); 1592621f0589SKevin Wolf } 1593621f0589SKevin Wolf } 1594621f0589SKevin Wolf 1595a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 1596621f0589SKevin Wolf 1597621f0589SKevin Wolf return nb_clusters; 1598621f0589SKevin Wolf } 1599621f0589SKevin Wolf 1600621f0589SKevin Wolf int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) 1601621f0589SKevin Wolf { 1602ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1603b6d36defSMax Reitz uint64_t nb_clusters; 1604621f0589SKevin Wolf int ret; 1605621f0589SKevin Wolf 1606621f0589SKevin Wolf /* The zero flag is only supported by version 3 and newer */ 1607621f0589SKevin Wolf if (s->qcow_version < 3) { 1608621f0589SKevin Wolf return -ENOTSUP; 1609621f0589SKevin Wolf } 1610621f0589SKevin Wolf 1611621f0589SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 1612621f0589SKevin Wolf nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); 1613621f0589SKevin Wolf 16140b919faeSKevin Wolf s->cache_discards = true; 16150b919faeSKevin Wolf 1616621f0589SKevin Wolf while (nb_clusters > 0) { 1617621f0589SKevin Wolf ret = zero_single_l2(bs, offset, nb_clusters); 1618621f0589SKevin Wolf if (ret < 0) { 16190b919faeSKevin Wolf goto fail; 1620621f0589SKevin Wolf } 1621621f0589SKevin Wolf 1622621f0589SKevin Wolf nb_clusters -= ret; 1623621f0589SKevin Wolf offset += (ret * s->cluster_size); 1624621f0589SKevin Wolf } 1625621f0589SKevin Wolf 16260b919faeSKevin Wolf ret = 0; 16270b919faeSKevin Wolf fail: 16280b919faeSKevin Wolf s->cache_discards = false; 16290b919faeSKevin Wolf qcow2_process_discards(bs, ret); 16300b919faeSKevin Wolf 16310b919faeSKevin Wolf return ret; 1632621f0589SKevin Wolf } 163332b6444dSMax Reitz 163432b6444dSMax Reitz /* 163532b6444dSMax Reitz * Expands all zero clusters in a specific L1 table (or deallocates them, for 163632b6444dSMax Reitz * non-backed non-pre-allocated zero clusters). 163732b6444dSMax Reitz * 16384057a2b2SMax Reitz * l1_entries and *visited_l1_entries are used to keep track of progress for 16394057a2b2SMax Reitz * status_cb(). l1_entries contains the total number of L1 entries and 16404057a2b2SMax Reitz * *visited_l1_entries counts all visited L1 entries. 164132b6444dSMax Reitz */ 164232b6444dSMax Reitz static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 1643ecf58777SMax Reitz int l1_size, int64_t *visited_l1_entries, 16444057a2b2SMax Reitz int64_t l1_entries, 16458b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 16468b13976dSMax Reitz void *cb_opaque) 164732b6444dSMax Reitz { 1648ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 164932b6444dSMax Reitz bool is_active_l1 = (l1_table == s->l1_table); 165032b6444dSMax Reitz uint64_t *l2_table = NULL; 165132b6444dSMax Reitz int ret; 165232b6444dSMax Reitz int i, j; 165332b6444dSMax Reitz 165432b6444dSMax Reitz if (!is_active_l1) { 165532b6444dSMax Reitz /* inactive L2 tables require a buffer to be stored in when loading 165632b6444dSMax Reitz * them from disk */ 16579a4f4c31SKevin Wolf l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size); 1658de82815dSKevin Wolf if (l2_table == NULL) { 1659de82815dSKevin Wolf return -ENOMEM; 1660de82815dSKevin Wolf } 166132b6444dSMax Reitz } 166232b6444dSMax Reitz 166332b6444dSMax Reitz for (i = 0; i < l1_size; i++) { 166432b6444dSMax Reitz uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 166532b6444dSMax Reitz bool l2_dirty = false; 16660e06528eSMax Reitz uint64_t l2_refcount; 166732b6444dSMax Reitz 166832b6444dSMax Reitz if (!l2_offset) { 166932b6444dSMax Reitz /* unallocated */ 16704057a2b2SMax Reitz (*visited_l1_entries)++; 16714057a2b2SMax Reitz if (status_cb) { 16728b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 16734057a2b2SMax Reitz } 167432b6444dSMax Reitz continue; 167532b6444dSMax Reitz } 167632b6444dSMax Reitz 16778dd93d93SMax Reitz if (offset_into_cluster(s, l2_offset)) { 16788dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 16798dd93d93SMax Reitz PRIx64 " unaligned (L1 index: %#x)", 16808dd93d93SMax Reitz l2_offset, i); 16818dd93d93SMax Reitz ret = -EIO; 16828dd93d93SMax Reitz goto fail; 16838dd93d93SMax Reitz } 16848dd93d93SMax Reitz 168532b6444dSMax Reitz if (is_active_l1) { 168632b6444dSMax Reitz /* get active L2 tables from cache */ 168732b6444dSMax Reitz ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, 168832b6444dSMax Reitz (void **)&l2_table); 168932b6444dSMax Reitz } else { 169032b6444dSMax Reitz /* load inactive L2 tables from disk */ 16919a4f4c31SKevin Wolf ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE, 169232b6444dSMax Reitz (void *)l2_table, s->cluster_sectors); 169332b6444dSMax Reitz } 169432b6444dSMax Reitz if (ret < 0) { 169532b6444dSMax Reitz goto fail; 169632b6444dSMax Reitz } 169732b6444dSMax Reitz 16987324c10fSMax Reitz ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 16997324c10fSMax Reitz &l2_refcount); 17007324c10fSMax Reitz if (ret < 0) { 1701ecf58777SMax Reitz goto fail; 1702ecf58777SMax Reitz } 1703ecf58777SMax Reitz 170432b6444dSMax Reitz for (j = 0; j < s->l2_size; j++) { 170532b6444dSMax Reitz uint64_t l2_entry = be64_to_cpu(l2_table[j]); 1706ecf58777SMax Reitz int64_t offset = l2_entry & L2E_OFFSET_MASK; 170732b6444dSMax Reitz int cluster_type = qcow2_get_cluster_type(l2_entry); 1708320c7066SMax Reitz bool preallocated = offset != 0; 170932b6444dSMax Reitz 1710ecf58777SMax Reitz if (cluster_type != QCOW2_CLUSTER_ZERO) { 171132b6444dSMax Reitz continue; 171232b6444dSMax Reitz } 171332b6444dSMax Reitz 1714320c7066SMax Reitz if (!preallocated) { 1715760e0063SKevin Wolf if (!bs->backing) { 171632b6444dSMax Reitz /* not backed; therefore we can simply deallocate the 171732b6444dSMax Reitz * cluster */ 171832b6444dSMax Reitz l2_table[j] = 0; 171932b6444dSMax Reitz l2_dirty = true; 172032b6444dSMax Reitz continue; 172132b6444dSMax Reitz } 172232b6444dSMax Reitz 172332b6444dSMax Reitz offset = qcow2_alloc_clusters(bs, s->cluster_size); 172432b6444dSMax Reitz if (offset < 0) { 172532b6444dSMax Reitz ret = offset; 172632b6444dSMax Reitz goto fail; 172732b6444dSMax Reitz } 1728ecf58777SMax Reitz 1729ecf58777SMax Reitz if (l2_refcount > 1) { 1730ecf58777SMax Reitz /* For shared L2 tables, set the refcount accordingly (it is 1731ecf58777SMax Reitz * already 1 and needs to be l2_refcount) */ 1732ecf58777SMax Reitz ret = qcow2_update_cluster_refcount(bs, 17332aabe7c7SMax Reitz offset >> s->cluster_bits, 17342aabe7c7SMax Reitz refcount_diff(1, l2_refcount), false, 1735ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 1736ecf58777SMax Reitz if (ret < 0) { 1737ecf58777SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 1738ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 1739ecf58777SMax Reitz goto fail; 1740ecf58777SMax Reitz } 1741ecf58777SMax Reitz } 174232b6444dSMax Reitz } 174332b6444dSMax Reitz 17448dd93d93SMax Reitz if (offset_into_cluster(s, offset)) { 17458dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " 17468dd93d93SMax Reitz "%#" PRIx64 " unaligned (L2 offset: %#" 17478dd93d93SMax Reitz PRIx64 ", L2 index: %#x)", offset, 17488dd93d93SMax Reitz l2_offset, j); 17498dd93d93SMax Reitz if (!preallocated) { 17508dd93d93SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 17518dd93d93SMax Reitz QCOW2_DISCARD_ALWAYS); 17528dd93d93SMax Reitz } 17538dd93d93SMax Reitz ret = -EIO; 17548dd93d93SMax Reitz goto fail; 17558dd93d93SMax Reitz } 17568dd93d93SMax Reitz 1757231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); 175832b6444dSMax Reitz if (ret < 0) { 1759320c7066SMax Reitz if (!preallocated) { 176032b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 176132b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 1762320c7066SMax Reitz } 176332b6444dSMax Reitz goto fail; 176432b6444dSMax Reitz } 176532b6444dSMax Reitz 17669a4f4c31SKevin Wolf ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE, 1767aa7bfbffSPeter Lieven s->cluster_sectors, 0); 176832b6444dSMax Reitz if (ret < 0) { 1769320c7066SMax Reitz if (!preallocated) { 177032b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 177132b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 1772320c7066SMax Reitz } 177332b6444dSMax Reitz goto fail; 177432b6444dSMax Reitz } 177532b6444dSMax Reitz 1776ecf58777SMax Reitz if (l2_refcount == 1) { 177732b6444dSMax Reitz l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); 1778ecf58777SMax Reitz } else { 1779ecf58777SMax Reitz l2_table[j] = cpu_to_be64(offset); 1780e390cf5aSMax Reitz } 1781ecf58777SMax Reitz l2_dirty = true; 178232b6444dSMax Reitz } 178332b6444dSMax Reitz 178432b6444dSMax Reitz if (is_active_l1) { 178532b6444dSMax Reitz if (l2_dirty) { 178672e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 178732b6444dSMax Reitz qcow2_cache_depends_on_flush(s->l2_table_cache); 178832b6444dSMax Reitz } 1789a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 179032b6444dSMax Reitz } else { 179132b6444dSMax Reitz if (l2_dirty) { 1792231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 1793231bb267SMax Reitz QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset, 179432b6444dSMax Reitz s->cluster_size); 179532b6444dSMax Reitz if (ret < 0) { 179632b6444dSMax Reitz goto fail; 179732b6444dSMax Reitz } 179832b6444dSMax Reitz 17999a4f4c31SKevin Wolf ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE, 180032b6444dSMax Reitz (void *)l2_table, s->cluster_sectors); 180132b6444dSMax Reitz if (ret < 0) { 180232b6444dSMax Reitz goto fail; 180332b6444dSMax Reitz } 180432b6444dSMax Reitz } 180532b6444dSMax Reitz } 18064057a2b2SMax Reitz 18074057a2b2SMax Reitz (*visited_l1_entries)++; 18084057a2b2SMax Reitz if (status_cb) { 18098b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 18104057a2b2SMax Reitz } 181132b6444dSMax Reitz } 181232b6444dSMax Reitz 181332b6444dSMax Reitz ret = 0; 181432b6444dSMax Reitz 181532b6444dSMax Reitz fail: 181632b6444dSMax Reitz if (l2_table) { 181732b6444dSMax Reitz if (!is_active_l1) { 181832b6444dSMax Reitz qemu_vfree(l2_table); 181932b6444dSMax Reitz } else { 182032b6444dSMax Reitz qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 182132b6444dSMax Reitz } 182232b6444dSMax Reitz } 182332b6444dSMax Reitz return ret; 182432b6444dSMax Reitz } 182532b6444dSMax Reitz 182632b6444dSMax Reitz /* 182732b6444dSMax Reitz * For backed images, expands all zero clusters on the image. For non-backed 182832b6444dSMax Reitz * images, deallocates all non-pre-allocated zero clusters (and claims the 182932b6444dSMax Reitz * allocation for pre-allocated ones). This is important for downgrading to a 183032b6444dSMax Reitz * qcow2 version which doesn't yet support metadata zero clusters. 183132b6444dSMax Reitz */ 18324057a2b2SMax Reitz int qcow2_expand_zero_clusters(BlockDriverState *bs, 18338b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 18348b13976dSMax Reitz void *cb_opaque) 183532b6444dSMax Reitz { 1836ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 183732b6444dSMax Reitz uint64_t *l1_table = NULL; 18384057a2b2SMax Reitz int64_t l1_entries = 0, visited_l1_entries = 0; 183932b6444dSMax Reitz int ret; 184032b6444dSMax Reitz int i, j; 184132b6444dSMax Reitz 18424057a2b2SMax Reitz if (status_cb) { 18434057a2b2SMax Reitz l1_entries = s->l1_size; 18444057a2b2SMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 18454057a2b2SMax Reitz l1_entries += s->snapshots[i].l1_size; 18464057a2b2SMax Reitz } 18474057a2b2SMax Reitz } 18484057a2b2SMax Reitz 184932b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 18504057a2b2SMax Reitz &visited_l1_entries, l1_entries, 18518b13976dSMax Reitz status_cb, cb_opaque); 185232b6444dSMax Reitz if (ret < 0) { 185332b6444dSMax Reitz goto fail; 185432b6444dSMax Reitz } 185532b6444dSMax Reitz 185632b6444dSMax Reitz /* Inactive L1 tables may point to active L2 tables - therefore it is 185732b6444dSMax Reitz * necessary to flush the L2 table cache before trying to access the L2 185832b6444dSMax Reitz * tables pointed to by inactive L1 entries (else we might try to expand 185932b6444dSMax Reitz * zero clusters that have already been expanded); furthermore, it is also 186032b6444dSMax Reitz * necessary to empty the L2 table cache, since it may contain tables which 186132b6444dSMax Reitz * are now going to be modified directly on disk, bypassing the cache. 186232b6444dSMax Reitz * qcow2_cache_empty() does both for us. */ 186332b6444dSMax Reitz ret = qcow2_cache_empty(bs, s->l2_table_cache); 186432b6444dSMax Reitz if (ret < 0) { 186532b6444dSMax Reitz goto fail; 186632b6444dSMax Reitz } 186732b6444dSMax Reitz 186832b6444dSMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 186932b6444dSMax Reitz int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) + 187032b6444dSMax Reitz BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE; 187132b6444dSMax Reitz 187232b6444dSMax Reitz l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); 187332b6444dSMax Reitz 18749a4f4c31SKevin Wolf ret = bdrv_read(bs->file->bs, 18759a4f4c31SKevin Wolf s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE, 18769a4f4c31SKevin Wolf (void *)l1_table, l1_sectors); 187732b6444dSMax Reitz if (ret < 0) { 187832b6444dSMax Reitz goto fail; 187932b6444dSMax Reitz } 188032b6444dSMax Reitz 188132b6444dSMax Reitz for (j = 0; j < s->snapshots[i].l1_size; j++) { 188232b6444dSMax Reitz be64_to_cpus(&l1_table[j]); 188332b6444dSMax Reitz } 188432b6444dSMax Reitz 188532b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 18864057a2b2SMax Reitz &visited_l1_entries, l1_entries, 18878b13976dSMax Reitz status_cb, cb_opaque); 188832b6444dSMax Reitz if (ret < 0) { 188932b6444dSMax Reitz goto fail; 189032b6444dSMax Reitz } 189132b6444dSMax Reitz } 189232b6444dSMax Reitz 189332b6444dSMax Reitz ret = 0; 189432b6444dSMax Reitz 189532b6444dSMax Reitz fail: 189632b6444dSMax Reitz g_free(l1_table); 189732b6444dSMax Reitz return ret; 189832b6444dSMax Reitz } 1899