xref: /openbmc/qemu/block/qcow2-cache.c (revision 105bb7cd)
1 /*
2  * L2/refcount table cache for the QCOW2 format
3  *
4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/memalign.h"
27 #include "qcow2.h"
28 #include "trace.h"
29 
30 typedef struct Qcow2CachedTable {
31     int64_t  offset;
32     uint64_t lru_counter;
33     int      ref;
34     bool     dirty;
35 } Qcow2CachedTable;
36 
37 struct Qcow2Cache {
38     Qcow2CachedTable       *entries;
39     struct Qcow2Cache      *depends;
40     int                     size;
41     int                     table_size;
42     bool                    depends_on_flush;
43     void                   *table_array;
44     uint64_t                lru_counter;
45     uint64_t                cache_clean_lru_counter;
46 };
47 
48 static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
49 {
50     return (uint8_t *) c->table_array + (size_t) table * c->table_size;
51 }
52 
53 static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
54 {
55     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
56     int idx = table_offset / c->table_size;
57     assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
58     return idx;
59 }
60 
61 static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
62 {
63     if (c == s->refcount_block_cache) {
64         return "refcount block";
65     } else if (c == s->l2_table_cache) {
66         return "L2 table";
67     } else {
68         /* Do not abort, because this is not critical */
69         return "unknown";
70     }
71 }
72 
73 static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
74 {
75 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
76 #ifdef CONFIG_LINUX
77     void *t = qcow2_cache_get_table_addr(c, i);
78     int align = qemu_real_host_page_size();
79     size_t mem_size = (size_t) c->table_size * num_tables;
80     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
82     if (mem_size > offset && length > 0) {
83         madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
84     }
85 #endif
86 }
87 
88 static inline bool can_clean_entry(Qcow2Cache *c, int i)
89 {
90     Qcow2CachedTable *t = &c->entries[i];
91     return t->ref == 0 && !t->dirty && t->offset != 0 &&
92         t->lru_counter <= c->cache_clean_lru_counter;
93 }
94 
95 void qcow2_cache_clean_unused(Qcow2Cache *c)
96 {
97     int i = 0;
98     while (i < c->size) {
99         int to_clean = 0;
100 
101         /* Skip the entries that we don't need to clean */
102         while (i < c->size && !can_clean_entry(c, i)) {
103             i++;
104         }
105 
106         /* And count how many we can clean in a row */
107         while (i < c->size && can_clean_entry(c, i)) {
108             c->entries[i].offset = 0;
109             c->entries[i].lru_counter = 0;
110             i++;
111             to_clean++;
112         }
113 
114         if (to_clean > 0) {
115             qcow2_cache_table_release(c, i - to_clean, to_clean);
116         }
117     }
118 
119     c->cache_clean_lru_counter = c->lru_counter;
120 }
121 
122 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
123                                unsigned table_size)
124 {
125     BDRVQcow2State *s = bs->opaque;
126     Qcow2Cache *c;
127 
128     assert(num_tables > 0);
129     assert(is_power_of_2(table_size));
130     assert(table_size >= (1 << MIN_CLUSTER_BITS));
131     assert(table_size <= s->cluster_size);
132 
133     c = g_new0(Qcow2Cache, 1);
134     c->size = num_tables;
135     c->table_size = table_size;
136     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
137     c->table_array = qemu_try_blockalign(bs->file->bs,
138                                          (size_t) num_tables * c->table_size);
139 
140     if (!c->entries || !c->table_array) {
141         qemu_vfree(c->table_array);
142         g_free(c->entries);
143         g_free(c);
144         c = NULL;
145     }
146 
147     return c;
148 }
149 
150 int qcow2_cache_destroy(Qcow2Cache *c)
151 {
152     int i;
153 
154     for (i = 0; i < c->size; i++) {
155         assert(c->entries[i].ref == 0);
156     }
157 
158     qemu_vfree(c->table_array);
159     g_free(c->entries);
160     g_free(c);
161 
162     return 0;
163 }
164 
165 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
166 {
167     int ret;
168 
169     ret = qcow2_cache_flush(bs, c->depends);
170     if (ret < 0) {
171         return ret;
172     }
173 
174     c->depends = NULL;
175     c->depends_on_flush = false;
176 
177     return 0;
178 }
179 
180 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
181 {
182     BDRVQcow2State *s = bs->opaque;
183     int ret = 0;
184 
185     if (!c->entries[i].dirty || !c->entries[i].offset) {
186         return 0;
187     }
188 
189     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
190                                   c == s->l2_table_cache, i);
191 
192     if (c->depends) {
193         ret = qcow2_cache_flush_dependency(bs, c);
194     } else if (c->depends_on_flush) {
195         ret = bdrv_flush(bs->file->bs);
196         if (ret >= 0) {
197             c->depends_on_flush = false;
198         }
199     }
200 
201     if (ret < 0) {
202         return ret;
203     }
204 
205     if (c == s->refcount_block_cache) {
206         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
207                 c->entries[i].offset, c->table_size, false);
208     } else if (c == s->l2_table_cache) {
209         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
210                 c->entries[i].offset, c->table_size, false);
211     } else {
212         ret = qcow2_pre_write_overlap_check(bs, 0,
213                 c->entries[i].offset, c->table_size, false);
214     }
215 
216     if (ret < 0) {
217         return ret;
218     }
219 
220     if (c == s->refcount_block_cache) {
221         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
222     } else if (c == s->l2_table_cache) {
223         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
224     }
225 
226     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->table_size,
227                       qcow2_cache_get_table_addr(c, i), 0);
228     if (ret < 0) {
229         return ret;
230     }
231 
232     c->entries[i].dirty = false;
233 
234     return 0;
235 }
236 
237 int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
238 {
239     BDRVQcow2State *s = bs->opaque;
240     int result = 0;
241     int ret;
242     int i;
243 
244     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
245 
246     for (i = 0; i < c->size; i++) {
247         ret = qcow2_cache_entry_flush(bs, c, i);
248         if (ret < 0 && result != -ENOSPC) {
249             result = ret;
250         }
251     }
252 
253     return result;
254 }
255 
256 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
257 {
258     int result = qcow2_cache_write(bs, c);
259 
260     if (result == 0) {
261         int ret = bdrv_flush(bs->file->bs);
262         if (ret < 0) {
263             result = ret;
264         }
265     }
266 
267     return result;
268 }
269 
270 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
271     Qcow2Cache *dependency)
272 {
273     int ret;
274 
275     if (dependency->depends) {
276         ret = qcow2_cache_flush_dependency(bs, dependency);
277         if (ret < 0) {
278             return ret;
279         }
280     }
281 
282     if (c->depends && (c->depends != dependency)) {
283         ret = qcow2_cache_flush_dependency(bs, c);
284         if (ret < 0) {
285             return ret;
286         }
287     }
288 
289     c->depends = dependency;
290     return 0;
291 }
292 
293 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
294 {
295     c->depends_on_flush = true;
296 }
297 
298 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
299 {
300     int ret, i;
301 
302     ret = qcow2_cache_flush(bs, c);
303     if (ret < 0) {
304         return ret;
305     }
306 
307     for (i = 0; i < c->size; i++) {
308         assert(c->entries[i].ref == 0);
309         c->entries[i].offset = 0;
310         c->entries[i].lru_counter = 0;
311     }
312 
313     qcow2_cache_table_release(c, 0, c->size);
314 
315     c->lru_counter = 0;
316 
317     return 0;
318 }
319 
320 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
321     uint64_t offset, void **table, bool read_from_disk)
322 {
323     BDRVQcow2State *s = bs->opaque;
324     int i;
325     int ret;
326     int lookup_index;
327     uint64_t min_lru_counter = UINT64_MAX;
328     int min_lru_index = -1;
329 
330     assert(offset != 0);
331 
332     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
333                           offset, read_from_disk);
334 
335     if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
336         qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
337                                 "cache: Offset %#" PRIx64 " is unaligned",
338                                 qcow2_cache_get_name(s, c), offset);
339         return -EIO;
340     }
341 
342     /* Check if the table is already cached */
343     i = lookup_index = (offset / c->table_size * 4) % c->size;
344     do {
345         const Qcow2CachedTable *t = &c->entries[i];
346         if (t->offset == offset) {
347             goto found;
348         }
349         if (t->ref == 0 && t->lru_counter < min_lru_counter) {
350             min_lru_counter = t->lru_counter;
351             min_lru_index = i;
352         }
353         if (++i == c->size) {
354             i = 0;
355         }
356     } while (i != lookup_index);
357 
358     if (min_lru_index == -1) {
359         /* This can't happen in current synchronous code, but leave the check
360          * here as a reminder for whoever starts using AIO with the cache */
361         abort();
362     }
363 
364     /* Cache miss: write a table back and replace it */
365     i = min_lru_index;
366     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
367                                         c == s->l2_table_cache, i);
368 
369     ret = qcow2_cache_entry_flush(bs, c, i);
370     if (ret < 0) {
371         return ret;
372     }
373 
374     trace_qcow2_cache_get_read(qemu_coroutine_self(),
375                                c == s->l2_table_cache, i);
376     c->entries[i].offset = 0;
377     if (read_from_disk) {
378         if (c == s->l2_table_cache) {
379             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
380         }
381 
382         ret = bdrv_pread(bs->file, offset, c->table_size,
383                          qcow2_cache_get_table_addr(c, i), 0);
384         if (ret < 0) {
385             return ret;
386         }
387     }
388 
389     c->entries[i].offset = offset;
390 
391     /* And return the right table */
392 found:
393     c->entries[i].ref++;
394     *table = qcow2_cache_get_table_addr(c, i);
395 
396     trace_qcow2_cache_get_done(qemu_coroutine_self(),
397                                c == s->l2_table_cache, i);
398 
399     return 0;
400 }
401 
402 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
403     void **table)
404 {
405     return qcow2_cache_do_get(bs, c, offset, table, true);
406 }
407 
408 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
409     void **table)
410 {
411     return qcow2_cache_do_get(bs, c, offset, table, false);
412 }
413 
414 void qcow2_cache_put(Qcow2Cache *c, void **table)
415 {
416     int i = qcow2_cache_get_table_idx(c, *table);
417 
418     c->entries[i].ref--;
419     *table = NULL;
420 
421     if (c->entries[i].ref == 0) {
422         c->entries[i].lru_counter = ++c->lru_counter;
423     }
424 
425     assert(c->entries[i].ref >= 0);
426 }
427 
428 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
429 {
430     int i = qcow2_cache_get_table_idx(c, table);
431     assert(c->entries[i].offset != 0);
432     c->entries[i].dirty = true;
433 }
434 
435 void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
436 {
437     int i;
438 
439     for (i = 0; i < c->size; i++) {
440         if (c->entries[i].offset == offset) {
441             return qcow2_cache_get_table_addr(c, i);
442         }
443     }
444     return NULL;
445 }
446 
447 void qcow2_cache_discard(Qcow2Cache *c, void *table)
448 {
449     int i = qcow2_cache_get_table_idx(c, table);
450 
451     assert(c->entries[i].ref == 0);
452 
453     c->entries[i].offset = 0;
454     c->entries[i].lru_counter = 0;
455     c->entries[i].dirty = false;
456 
457     qcow2_cache_table_release(c, i, 1);
458 }
459