1 /* 2 * L2/refcount table cache for the QCOW2 format 3 * 4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "block_int.h" 26 #include "qemu-common.h" 27 #include "qcow2.h" 28 #include "trace.h" 29 30 typedef struct Qcow2CachedTable { 31 void* table; 32 int64_t offset; 33 bool dirty; 34 int cache_hits; 35 int ref; 36 } Qcow2CachedTable; 37 38 struct Qcow2Cache { 39 Qcow2CachedTable* entries; 40 struct Qcow2Cache* depends; 41 int size; 42 bool depends_on_flush; 43 bool writethrough; 44 }; 45 46 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, 47 bool writethrough) 48 { 49 BDRVQcowState *s = bs->opaque; 50 Qcow2Cache *c; 51 int i; 52 53 c = g_malloc0(sizeof(*c)); 54 c->size = num_tables; 55 c->entries = g_malloc0(sizeof(*c->entries) * num_tables); 56 c->writethrough = writethrough; 57 58 for (i = 0; i < c->size; i++) { 59 c->entries[i].table = qemu_blockalign(bs, s->cluster_size); 60 } 61 62 return c; 63 } 64 65 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 66 { 67 int i; 68 69 for (i = 0; i < c->size; i++) { 70 assert(c->entries[i].ref == 0); 71 qemu_vfree(c->entries[i].table); 72 } 73 74 g_free(c->entries); 75 g_free(c); 76 77 return 0; 78 } 79 80 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 81 { 82 int ret; 83 84 ret = qcow2_cache_flush(bs, c->depends); 85 if (ret < 0) { 86 return ret; 87 } 88 89 c->depends = NULL; 90 c->depends_on_flush = false; 91 92 return 0; 93 } 94 95 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 96 { 97 BDRVQcowState *s = bs->opaque; 98 int ret = 0; 99 100 if (!c->entries[i].dirty || !c->entries[i].offset) { 101 return 0; 102 } 103 104 trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 105 c == s->l2_table_cache, i); 106 107 if (c->depends) { 108 ret = qcow2_cache_flush_dependency(bs, c); 109 } else if (c->depends_on_flush) { 110 ret = bdrv_flush(bs->file); 111 if (ret >= 0) { 112 c->depends_on_flush = false; 113 } 114 } 115 116 if (ret < 0) { 117 return ret; 118 } 119 120 if (c == s->refcount_block_cache) { 121 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 122 } else if (c == s->l2_table_cache) { 123 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 124 } 125 126 ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, 127 s->cluster_size); 128 if (ret < 0) { 129 return ret; 130 } 131 132 c->entries[i].dirty = false; 133 134 return 0; 135 } 136 137 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 138 { 139 BDRVQcowState *s = bs->opaque; 140 int result = 0; 141 int ret; 142 int i; 143 144 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 145 146 for (i = 0; i < c->size; i++) { 147 ret = qcow2_cache_entry_flush(bs, c, i); 148 if (ret < 0 && result != -ENOSPC) { 149 result = ret; 150 } 151 } 152 153 if (result == 0) { 154 ret = bdrv_flush(bs->file); 155 if (ret < 0) { 156 result = ret; 157 } 158 } 159 160 return result; 161 } 162 163 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 164 Qcow2Cache *dependency) 165 { 166 int ret; 167 168 if (dependency->depends) { 169 ret = qcow2_cache_flush_dependency(bs, dependency); 170 if (ret < 0) { 171 return ret; 172 } 173 } 174 175 if (c->depends && (c->depends != dependency)) { 176 ret = qcow2_cache_flush_dependency(bs, c); 177 if (ret < 0) { 178 return ret; 179 } 180 } 181 182 c->depends = dependency; 183 return 0; 184 } 185 186 void qcow2_cache_depends_on_flush(Qcow2Cache *c) 187 { 188 c->depends_on_flush = true; 189 } 190 191 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 192 { 193 int i; 194 int min_count = INT_MAX; 195 int min_index = -1; 196 197 198 for (i = 0; i < c->size; i++) { 199 if (c->entries[i].ref) { 200 continue; 201 } 202 203 if (c->entries[i].cache_hits < min_count) { 204 min_index = i; 205 min_count = c->entries[i].cache_hits; 206 } 207 208 /* Give newer hits priority */ 209 /* TODO Check how to optimize the replacement strategy */ 210 c->entries[i].cache_hits /= 2; 211 } 212 213 if (min_index == -1) { 214 /* This can't happen in current synchronous code, but leave the check 215 * here as a reminder for whoever starts using AIO with the cache */ 216 abort(); 217 } 218 return min_index; 219 } 220 221 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 222 uint64_t offset, void **table, bool read_from_disk) 223 { 224 BDRVQcowState *s = bs->opaque; 225 int i; 226 int ret; 227 228 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 229 offset, read_from_disk); 230 231 /* Check if the table is already cached */ 232 for (i = 0; i < c->size; i++) { 233 if (c->entries[i].offset == offset) { 234 goto found; 235 } 236 } 237 238 /* If not, write a table back and replace it */ 239 i = qcow2_cache_find_entry_to_replace(c); 240 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 241 c == s->l2_table_cache, i); 242 if (i < 0) { 243 return i; 244 } 245 246 ret = qcow2_cache_entry_flush(bs, c, i); 247 if (ret < 0) { 248 return ret; 249 } 250 251 trace_qcow2_cache_get_read(qemu_coroutine_self(), 252 c == s->l2_table_cache, i); 253 c->entries[i].offset = 0; 254 if (read_from_disk) { 255 if (c == s->l2_table_cache) { 256 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 257 } 258 259 ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); 260 if (ret < 0) { 261 return ret; 262 } 263 } 264 265 /* Give the table some hits for the start so that it won't be replaced 266 * immediately. The number 32 is completely arbitrary. */ 267 c->entries[i].cache_hits = 32; 268 c->entries[i].offset = offset; 269 270 /* And return the right table */ 271 found: 272 c->entries[i].cache_hits++; 273 c->entries[i].ref++; 274 *table = c->entries[i].table; 275 276 trace_qcow2_cache_get_done(qemu_coroutine_self(), 277 c == s->l2_table_cache, i); 278 279 return 0; 280 } 281 282 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 283 void **table) 284 { 285 return qcow2_cache_do_get(bs, c, offset, table, true); 286 } 287 288 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 289 void **table) 290 { 291 return qcow2_cache_do_get(bs, c, offset, table, false); 292 } 293 294 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 295 { 296 int i; 297 298 for (i = 0; i < c->size; i++) { 299 if (c->entries[i].table == *table) { 300 goto found; 301 } 302 } 303 return -ENOENT; 304 305 found: 306 c->entries[i].ref--; 307 *table = NULL; 308 309 assert(c->entries[i].ref >= 0); 310 311 if (c->writethrough) { 312 return qcow2_cache_entry_flush(bs, c, i); 313 } else { 314 return 0; 315 } 316 } 317 318 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) 319 { 320 int i; 321 322 for (i = 0; i < c->size; i++) { 323 if (c->entries[i].table == table) { 324 goto found; 325 } 326 } 327 abort(); 328 329 found: 330 c->entries[i].dirty = true; 331 } 332 333 bool qcow2_cache_set_writethrough(BlockDriverState *bs, Qcow2Cache *c, 334 bool enable) 335 { 336 bool old = c->writethrough; 337 338 if (!old && enable) { 339 qcow2_cache_flush(bs, c); 340 } 341 342 c->writethrough = enable; 343 return old; 344 } 345