1 /* 2 * L2/refcount table cache for the QCOW2 format 3 * 4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "block/block_int.h" 26 #include "qemu-common.h" 27 #include "qcow2.h" 28 #include "trace.h" 29 30 typedef struct Qcow2CachedTable { 31 void* table; 32 int64_t offset; 33 bool dirty; 34 int cache_hits; 35 int ref; 36 } Qcow2CachedTable; 37 38 struct Qcow2Cache { 39 Qcow2CachedTable* entries; 40 struct Qcow2Cache* depends; 41 int size; 42 bool depends_on_flush; 43 }; 44 45 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) 46 { 47 BDRVQcowState *s = bs->opaque; 48 Qcow2Cache *c; 49 int i; 50 51 c = g_new0(Qcow2Cache, 1); 52 c->size = num_tables; 53 c->entries = g_try_new0(Qcow2CachedTable, num_tables); 54 if (!c->entries) { 55 goto fail; 56 } 57 58 for (i = 0; i < c->size; i++) { 59 c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size); 60 if (c->entries[i].table == NULL) { 61 goto fail; 62 } 63 } 64 65 return c; 66 67 fail: 68 if (c->entries) { 69 for (i = 0; i < c->size; i++) { 70 qemu_vfree(c->entries[i].table); 71 } 72 } 73 g_free(c->entries); 74 g_free(c); 75 return NULL; 76 } 77 78 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 79 { 80 int i; 81 82 for (i = 0; i < c->size; i++) { 83 assert(c->entries[i].ref == 0); 84 qemu_vfree(c->entries[i].table); 85 } 86 87 g_free(c->entries); 88 g_free(c); 89 90 return 0; 91 } 92 93 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 94 { 95 int ret; 96 97 ret = qcow2_cache_flush(bs, c->depends); 98 if (ret < 0) { 99 return ret; 100 } 101 102 c->depends = NULL; 103 c->depends_on_flush = false; 104 105 return 0; 106 } 107 108 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 109 { 110 BDRVQcowState *s = bs->opaque; 111 int ret = 0; 112 113 if (!c->entries[i].dirty || !c->entries[i].offset) { 114 return 0; 115 } 116 117 trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 118 c == s->l2_table_cache, i); 119 120 if (c->depends) { 121 ret = qcow2_cache_flush_dependency(bs, c); 122 } else if (c->depends_on_flush) { 123 ret = bdrv_flush(bs->file); 124 if (ret >= 0) { 125 c->depends_on_flush = false; 126 } 127 } 128 129 if (ret < 0) { 130 return ret; 131 } 132 133 if (c == s->refcount_block_cache) { 134 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, 135 c->entries[i].offset, s->cluster_size); 136 } else if (c == s->l2_table_cache) { 137 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, 138 c->entries[i].offset, s->cluster_size); 139 } else { 140 ret = qcow2_pre_write_overlap_check(bs, 0, 141 c->entries[i].offset, s->cluster_size); 142 } 143 144 if (ret < 0) { 145 return ret; 146 } 147 148 if (c == s->refcount_block_cache) { 149 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 150 } else if (c == s->l2_table_cache) { 151 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 152 } 153 154 ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, 155 s->cluster_size); 156 if (ret < 0) { 157 return ret; 158 } 159 160 c->entries[i].dirty = false; 161 162 return 0; 163 } 164 165 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 166 { 167 BDRVQcowState *s = bs->opaque; 168 int result = 0; 169 int ret; 170 int i; 171 172 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 173 174 for (i = 0; i < c->size; i++) { 175 ret = qcow2_cache_entry_flush(bs, c, i); 176 if (ret < 0 && result != -ENOSPC) { 177 result = ret; 178 } 179 } 180 181 if (result == 0) { 182 ret = bdrv_flush(bs->file); 183 if (ret < 0) { 184 result = ret; 185 } 186 } 187 188 return result; 189 } 190 191 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 192 Qcow2Cache *dependency) 193 { 194 int ret; 195 196 if (dependency->depends) { 197 ret = qcow2_cache_flush_dependency(bs, dependency); 198 if (ret < 0) { 199 return ret; 200 } 201 } 202 203 if (c->depends && (c->depends != dependency)) { 204 ret = qcow2_cache_flush_dependency(bs, c); 205 if (ret < 0) { 206 return ret; 207 } 208 } 209 210 c->depends = dependency; 211 return 0; 212 } 213 214 void qcow2_cache_depends_on_flush(Qcow2Cache *c) 215 { 216 c->depends_on_flush = true; 217 } 218 219 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) 220 { 221 int ret, i; 222 223 ret = qcow2_cache_flush(bs, c); 224 if (ret < 0) { 225 return ret; 226 } 227 228 for (i = 0; i < c->size; i++) { 229 assert(c->entries[i].ref == 0); 230 c->entries[i].offset = 0; 231 c->entries[i].cache_hits = 0; 232 } 233 234 return 0; 235 } 236 237 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 238 { 239 int i; 240 int min_count = INT_MAX; 241 int min_index = -1; 242 243 244 for (i = 0; i < c->size; i++) { 245 if (c->entries[i].ref) { 246 continue; 247 } 248 249 if (c->entries[i].cache_hits < min_count) { 250 min_index = i; 251 min_count = c->entries[i].cache_hits; 252 } 253 254 /* Give newer hits priority */ 255 /* TODO Check how to optimize the replacement strategy */ 256 if (c->entries[i].cache_hits > 1) { 257 c->entries[i].cache_hits /= 2; 258 } 259 } 260 261 if (min_index == -1) { 262 /* This can't happen in current synchronous code, but leave the check 263 * here as a reminder for whoever starts using AIO with the cache */ 264 abort(); 265 } 266 return min_index; 267 } 268 269 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 270 uint64_t offset, void **table, bool read_from_disk) 271 { 272 BDRVQcowState *s = bs->opaque; 273 int i; 274 int ret; 275 276 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 277 offset, read_from_disk); 278 279 /* Check if the table is already cached */ 280 for (i = 0; i < c->size; i++) { 281 if (c->entries[i].offset == offset) { 282 goto found; 283 } 284 } 285 286 /* If not, write a table back and replace it */ 287 i = qcow2_cache_find_entry_to_replace(c); 288 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 289 c == s->l2_table_cache, i); 290 if (i < 0) { 291 return i; 292 } 293 294 ret = qcow2_cache_entry_flush(bs, c, i); 295 if (ret < 0) { 296 return ret; 297 } 298 299 trace_qcow2_cache_get_read(qemu_coroutine_self(), 300 c == s->l2_table_cache, i); 301 c->entries[i].offset = 0; 302 if (read_from_disk) { 303 if (c == s->l2_table_cache) { 304 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 305 } 306 307 ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); 308 if (ret < 0) { 309 return ret; 310 } 311 } 312 313 /* Give the table some hits for the start so that it won't be replaced 314 * immediately. The number 32 is completely arbitrary. */ 315 c->entries[i].cache_hits = 32; 316 c->entries[i].offset = offset; 317 318 /* And return the right table */ 319 found: 320 c->entries[i].cache_hits++; 321 c->entries[i].ref++; 322 *table = c->entries[i].table; 323 324 trace_qcow2_cache_get_done(qemu_coroutine_self(), 325 c == s->l2_table_cache, i); 326 327 return 0; 328 } 329 330 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 331 void **table) 332 { 333 return qcow2_cache_do_get(bs, c, offset, table, true); 334 } 335 336 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 337 void **table) 338 { 339 return qcow2_cache_do_get(bs, c, offset, table, false); 340 } 341 342 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 343 { 344 int i; 345 346 for (i = 0; i < c->size; i++) { 347 if (c->entries[i].table == *table) { 348 goto found; 349 } 350 } 351 return -ENOENT; 352 353 found: 354 c->entries[i].ref--; 355 *table = NULL; 356 357 assert(c->entries[i].ref >= 0); 358 return 0; 359 } 360 361 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) 362 { 363 int i; 364 365 for (i = 0; i < c->size; i++) { 366 if (c->entries[i].table == table) { 367 goto found; 368 } 369 } 370 abort(); 371 372 found: 373 c->entries[i].dirty = true; 374 } 375