1 /* 2 * L2/refcount table cache for the QCOW2 format 3 * 4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "block/block_int.h" 26 #include "qemu-common.h" 27 #include "qcow2.h" 28 #include "trace.h" 29 30 typedef struct Qcow2CachedTable { 31 void* table; 32 int64_t offset; 33 bool dirty; 34 int cache_hits; 35 int ref; 36 } Qcow2CachedTable; 37 38 struct Qcow2Cache { 39 Qcow2CachedTable* entries; 40 struct Qcow2Cache* depends; 41 int size; 42 bool depends_on_flush; 43 }; 44 45 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) 46 { 47 BDRVQcowState *s = bs->opaque; 48 Qcow2Cache *c; 49 int i; 50 51 c = g_malloc0(sizeof(*c)); 52 c->size = num_tables; 53 c->entries = g_malloc0(sizeof(*c->entries) * num_tables); 54 55 for (i = 0; i < c->size; i++) { 56 c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size); 57 if (c->entries[i].table == NULL) { 58 goto fail; 59 } 60 } 61 62 return c; 63 64 fail: 65 for (i = 0; i < c->size; i++) { 66 qemu_vfree(c->entries[i].table); 67 } 68 g_free(c->entries); 69 g_free(c); 70 return NULL; 71 } 72 73 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 74 { 75 int i; 76 77 for (i = 0; i < c->size; i++) { 78 assert(c->entries[i].ref == 0); 79 qemu_vfree(c->entries[i].table); 80 } 81 82 g_free(c->entries); 83 g_free(c); 84 85 return 0; 86 } 87 88 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 89 { 90 int ret; 91 92 ret = qcow2_cache_flush(bs, c->depends); 93 if (ret < 0) { 94 return ret; 95 } 96 97 c->depends = NULL; 98 c->depends_on_flush = false; 99 100 return 0; 101 } 102 103 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 104 { 105 BDRVQcowState *s = bs->opaque; 106 int ret = 0; 107 108 if (!c->entries[i].dirty || !c->entries[i].offset) { 109 return 0; 110 } 111 112 trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 113 c == s->l2_table_cache, i); 114 115 if (c->depends) { 116 ret = qcow2_cache_flush_dependency(bs, c); 117 } else if (c->depends_on_flush) { 118 ret = bdrv_flush(bs->file); 119 if (ret >= 0) { 120 c->depends_on_flush = false; 121 } 122 } 123 124 if (ret < 0) { 125 return ret; 126 } 127 128 if (c == s->refcount_block_cache) { 129 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, 130 c->entries[i].offset, s->cluster_size); 131 } else if (c == s->l2_table_cache) { 132 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, 133 c->entries[i].offset, s->cluster_size); 134 } else { 135 ret = qcow2_pre_write_overlap_check(bs, 0, 136 c->entries[i].offset, s->cluster_size); 137 } 138 139 if (ret < 0) { 140 return ret; 141 } 142 143 if (c == s->refcount_block_cache) { 144 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 145 } else if (c == s->l2_table_cache) { 146 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 147 } 148 149 ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, 150 s->cluster_size); 151 if (ret < 0) { 152 return ret; 153 } 154 155 c->entries[i].dirty = false; 156 157 return 0; 158 } 159 160 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 161 { 162 BDRVQcowState *s = bs->opaque; 163 int result = 0; 164 int ret; 165 int i; 166 167 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 168 169 for (i = 0; i < c->size; i++) { 170 ret = qcow2_cache_entry_flush(bs, c, i); 171 if (ret < 0 && result != -ENOSPC) { 172 result = ret; 173 } 174 } 175 176 if (result == 0) { 177 ret = bdrv_flush(bs->file); 178 if (ret < 0) { 179 result = ret; 180 } 181 } 182 183 return result; 184 } 185 186 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 187 Qcow2Cache *dependency) 188 { 189 int ret; 190 191 if (dependency->depends) { 192 ret = qcow2_cache_flush_dependency(bs, dependency); 193 if (ret < 0) { 194 return ret; 195 } 196 } 197 198 if (c->depends && (c->depends != dependency)) { 199 ret = qcow2_cache_flush_dependency(bs, c); 200 if (ret < 0) { 201 return ret; 202 } 203 } 204 205 c->depends = dependency; 206 return 0; 207 } 208 209 void qcow2_cache_depends_on_flush(Qcow2Cache *c) 210 { 211 c->depends_on_flush = true; 212 } 213 214 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) 215 { 216 int ret, i; 217 218 ret = qcow2_cache_flush(bs, c); 219 if (ret < 0) { 220 return ret; 221 } 222 223 for (i = 0; i < c->size; i++) { 224 assert(c->entries[i].ref == 0); 225 c->entries[i].offset = 0; 226 c->entries[i].cache_hits = 0; 227 } 228 229 return 0; 230 } 231 232 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 233 { 234 int i; 235 int min_count = INT_MAX; 236 int min_index = -1; 237 238 239 for (i = 0; i < c->size; i++) { 240 if (c->entries[i].ref) { 241 continue; 242 } 243 244 if (c->entries[i].cache_hits < min_count) { 245 min_index = i; 246 min_count = c->entries[i].cache_hits; 247 } 248 249 /* Give newer hits priority */ 250 /* TODO Check how to optimize the replacement strategy */ 251 c->entries[i].cache_hits /= 2; 252 } 253 254 if (min_index == -1) { 255 /* This can't happen in current synchronous code, but leave the check 256 * here as a reminder for whoever starts using AIO with the cache */ 257 abort(); 258 } 259 return min_index; 260 } 261 262 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 263 uint64_t offset, void **table, bool read_from_disk) 264 { 265 BDRVQcowState *s = bs->opaque; 266 int i; 267 int ret; 268 269 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 270 offset, read_from_disk); 271 272 /* Check if the table is already cached */ 273 for (i = 0; i < c->size; i++) { 274 if (c->entries[i].offset == offset) { 275 goto found; 276 } 277 } 278 279 /* If not, write a table back and replace it */ 280 i = qcow2_cache_find_entry_to_replace(c); 281 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 282 c == s->l2_table_cache, i); 283 if (i < 0) { 284 return i; 285 } 286 287 ret = qcow2_cache_entry_flush(bs, c, i); 288 if (ret < 0) { 289 return ret; 290 } 291 292 trace_qcow2_cache_get_read(qemu_coroutine_self(), 293 c == s->l2_table_cache, i); 294 c->entries[i].offset = 0; 295 if (read_from_disk) { 296 if (c == s->l2_table_cache) { 297 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 298 } 299 300 ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); 301 if (ret < 0) { 302 return ret; 303 } 304 } 305 306 /* Give the table some hits for the start so that it won't be replaced 307 * immediately. The number 32 is completely arbitrary. */ 308 c->entries[i].cache_hits = 32; 309 c->entries[i].offset = offset; 310 311 /* And return the right table */ 312 found: 313 c->entries[i].cache_hits++; 314 c->entries[i].ref++; 315 *table = c->entries[i].table; 316 317 trace_qcow2_cache_get_done(qemu_coroutine_self(), 318 c == s->l2_table_cache, i); 319 320 return 0; 321 } 322 323 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 324 void **table) 325 { 326 return qcow2_cache_do_get(bs, c, offset, table, true); 327 } 328 329 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 330 void **table) 331 { 332 return qcow2_cache_do_get(bs, c, offset, table, false); 333 } 334 335 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 336 { 337 int i; 338 339 for (i = 0; i < c->size; i++) { 340 if (c->entries[i].table == *table) { 341 goto found; 342 } 343 } 344 return -ENOENT; 345 346 found: 347 c->entries[i].ref--; 348 *table = NULL; 349 350 assert(c->entries[i].ref >= 0); 351 return 0; 352 } 353 354 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) 355 { 356 int i; 357 358 for (i = 0; i < c->size; i++) { 359 if (c->entries[i].table == table) { 360 goto found; 361 } 362 } 363 abort(); 364 365 found: 366 c->entries[i].dirty = true; 367 } 368