1 /* 2 * L2/refcount table cache for the QCOW2 format 3 * 4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "block_int.h" 26 #include "qemu-common.h" 27 #include "qcow2.h" 28 29 typedef struct Qcow2CachedTable { 30 void* table; 31 int64_t offset; 32 bool dirty; 33 int cache_hits; 34 int ref; 35 } Qcow2CachedTable; 36 37 struct Qcow2Cache { 38 Qcow2CachedTable* entries; 39 struct Qcow2Cache* depends; 40 int size; 41 bool depends_on_flush; 42 bool writethrough; 43 }; 44 45 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, 46 bool writethrough) 47 { 48 BDRVQcowState *s = bs->opaque; 49 Qcow2Cache *c; 50 int i; 51 52 c = g_malloc0(sizeof(*c)); 53 c->size = num_tables; 54 c->entries = g_malloc0(sizeof(*c->entries) * num_tables); 55 c->writethrough = writethrough; 56 57 for (i = 0; i < c->size; i++) { 58 c->entries[i].table = qemu_blockalign(bs, s->cluster_size); 59 } 60 61 return c; 62 } 63 64 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 65 { 66 int i; 67 68 for (i = 0; i < c->size; i++) { 69 assert(c->entries[i].ref == 0); 70 qemu_vfree(c->entries[i].table); 71 } 72 73 g_free(c->entries); 74 g_free(c); 75 76 return 0; 77 } 78 79 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 80 { 81 int ret; 82 83 ret = qcow2_cache_flush(bs, c->depends); 84 if (ret < 0) { 85 return ret; 86 } 87 88 c->depends = NULL; 89 c->depends_on_flush = false; 90 91 return 0; 92 } 93 94 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 95 { 96 BDRVQcowState *s = bs->opaque; 97 int ret = 0; 98 99 if (!c->entries[i].dirty || !c->entries[i].offset) { 100 return 0; 101 } 102 103 if (c->depends) { 104 ret = qcow2_cache_flush_dependency(bs, c); 105 } else if (c->depends_on_flush) { 106 ret = bdrv_flush(bs->file); 107 if (ret >= 0) { 108 c->depends_on_flush = false; 109 } 110 } 111 112 if (ret < 0) { 113 return ret; 114 } 115 116 if (c == s->refcount_block_cache) { 117 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 118 } else if (c == s->l2_table_cache) { 119 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 120 } 121 122 ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, 123 s->cluster_size); 124 if (ret < 0) { 125 return ret; 126 } 127 128 c->entries[i].dirty = false; 129 130 return 0; 131 } 132 133 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 134 { 135 int result = 0; 136 int ret; 137 int i; 138 139 for (i = 0; i < c->size; i++) { 140 ret = qcow2_cache_entry_flush(bs, c, i); 141 if (ret < 0 && result != -ENOSPC) { 142 result = ret; 143 } 144 } 145 146 if (result == 0) { 147 ret = bdrv_flush(bs->file); 148 if (ret < 0) { 149 result = ret; 150 } 151 } 152 153 return result; 154 } 155 156 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 157 Qcow2Cache *dependency) 158 { 159 int ret; 160 161 if (dependency->depends) { 162 ret = qcow2_cache_flush_dependency(bs, dependency); 163 if (ret < 0) { 164 return ret; 165 } 166 } 167 168 if (c->depends && (c->depends != dependency)) { 169 ret = qcow2_cache_flush_dependency(bs, c); 170 if (ret < 0) { 171 return ret; 172 } 173 } 174 175 c->depends = dependency; 176 return 0; 177 } 178 179 void qcow2_cache_depends_on_flush(Qcow2Cache *c) 180 { 181 c->depends_on_flush = true; 182 } 183 184 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 185 { 186 int i; 187 int min_count = INT_MAX; 188 int min_index = -1; 189 190 191 for (i = 0; i < c->size; i++) { 192 if (c->entries[i].ref) { 193 continue; 194 } 195 196 if (c->entries[i].cache_hits < min_count) { 197 min_index = i; 198 min_count = c->entries[i].cache_hits; 199 } 200 201 /* Give newer hits priority */ 202 /* TODO Check how to optimize the replacement strategy */ 203 c->entries[i].cache_hits /= 2; 204 } 205 206 if (min_index == -1) { 207 /* This can't happen in current synchronous code, but leave the check 208 * here as a reminder for whoever starts using AIO with the cache */ 209 abort(); 210 } 211 return min_index; 212 } 213 214 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 215 uint64_t offset, void **table, bool read_from_disk) 216 { 217 BDRVQcowState *s = bs->opaque; 218 int i; 219 int ret; 220 221 /* Check if the table is already cached */ 222 for (i = 0; i < c->size; i++) { 223 if (c->entries[i].offset == offset) { 224 goto found; 225 } 226 } 227 228 /* If not, write a table back and replace it */ 229 i = qcow2_cache_find_entry_to_replace(c); 230 if (i < 0) { 231 return i; 232 } 233 234 ret = qcow2_cache_entry_flush(bs, c, i); 235 if (ret < 0) { 236 return ret; 237 } 238 239 c->entries[i].offset = 0; 240 if (read_from_disk) { 241 if (c == s->l2_table_cache) { 242 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 243 } 244 245 ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); 246 if (ret < 0) { 247 return ret; 248 } 249 } 250 251 /* Give the table some hits for the start so that it won't be replaced 252 * immediately. The number 32 is completely arbitrary. */ 253 c->entries[i].cache_hits = 32; 254 c->entries[i].offset = offset; 255 256 /* And return the right table */ 257 found: 258 c->entries[i].cache_hits++; 259 c->entries[i].ref++; 260 *table = c->entries[i].table; 261 return 0; 262 } 263 264 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 265 void **table) 266 { 267 return qcow2_cache_do_get(bs, c, offset, table, true); 268 } 269 270 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 271 void **table) 272 { 273 return qcow2_cache_do_get(bs, c, offset, table, false); 274 } 275 276 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 277 { 278 int i; 279 280 for (i = 0; i < c->size; i++) { 281 if (c->entries[i].table == *table) { 282 goto found; 283 } 284 } 285 return -ENOENT; 286 287 found: 288 c->entries[i].ref--; 289 *table = NULL; 290 291 assert(c->entries[i].ref >= 0); 292 293 if (c->writethrough) { 294 return qcow2_cache_entry_flush(bs, c, i); 295 } else { 296 return 0; 297 } 298 } 299 300 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) 301 { 302 int i; 303 304 for (i = 0; i < c->size; i++) { 305 if (c->entries[i].table == table) { 306 goto found; 307 } 308 } 309 abort(); 310 311 found: 312 c->entries[i].dirty = true; 313 } 314 315 bool qcow2_cache_set_writethrough(BlockDriverState *bs, Qcow2Cache *c, 316 bool enable) 317 { 318 bool old = c->writethrough; 319 320 if (!old && enable) { 321 qcow2_cache_flush(bs, c); 322 } 323 324 c->writethrough = enable; 325 return old; 326 } 327