xref: /openbmc/qemu/block/qcow2-cache.c (revision 3ab8bf83)
1 /*
2  * L2/refcount table cache for the QCOW2 format
3  *
4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "block/block-io.h"
27 #include "qemu/memalign.h"
28 #include "qcow2.h"
29 #include "trace.h"
30 
31 typedef struct Qcow2CachedTable {
32     int64_t  offset;
33     uint64_t lru_counter;
34     int      ref;
35     bool     dirty;
36 } Qcow2CachedTable;
37 
38 struct Qcow2Cache {
39     Qcow2CachedTable       *entries;
40     struct Qcow2Cache      *depends;
41     int                     size;
42     int                     table_size;
43     bool                    depends_on_flush;
44     void                   *table_array;
45     uint64_t                lru_counter;
46     uint64_t                cache_clean_lru_counter;
47 };
48 
49 static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
50 {
51     return (uint8_t *) c->table_array + (size_t) table * c->table_size;
52 }
53 
54 static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
55 {
56     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
57     int idx = table_offset / c->table_size;
58     assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
59     return idx;
60 }
61 
62 static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
63 {
64     if (c == s->refcount_block_cache) {
65         return "refcount block";
66     } else if (c == s->l2_table_cache) {
67         return "L2 table";
68     } else {
69         /* Do not abort, because this is not critical */
70         return "unknown";
71     }
72 }
73 
74 static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
75 {
76 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
77 #ifdef CONFIG_LINUX
78     void *t = qcow2_cache_get_table_addr(c, i);
79     int align = qemu_real_host_page_size();
80     size_t mem_size = (size_t) c->table_size * num_tables;
81     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
82     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
83     if (mem_size > offset && length > 0) {
84         madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
85     }
86 #endif
87 }
88 
89 static inline bool can_clean_entry(Qcow2Cache *c, int i)
90 {
91     Qcow2CachedTable *t = &c->entries[i];
92     return t->ref == 0 && !t->dirty && t->offset != 0 &&
93         t->lru_counter <= c->cache_clean_lru_counter;
94 }
95 
96 void qcow2_cache_clean_unused(Qcow2Cache *c)
97 {
98     int i = 0;
99     while (i < c->size) {
100         int to_clean = 0;
101 
102         /* Skip the entries that we don't need to clean */
103         while (i < c->size && !can_clean_entry(c, i)) {
104             i++;
105         }
106 
107         /* And count how many we can clean in a row */
108         while (i < c->size && can_clean_entry(c, i)) {
109             c->entries[i].offset = 0;
110             c->entries[i].lru_counter = 0;
111             i++;
112             to_clean++;
113         }
114 
115         if (to_clean > 0) {
116             qcow2_cache_table_release(c, i - to_clean, to_clean);
117         }
118     }
119 
120     c->cache_clean_lru_counter = c->lru_counter;
121 }
122 
123 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
124                                unsigned table_size)
125 {
126     BDRVQcow2State *s = bs->opaque;
127     Qcow2Cache *c;
128 
129     assert(num_tables > 0);
130     assert(is_power_of_2(table_size));
131     assert(table_size >= (1 << MIN_CLUSTER_BITS));
132     assert(table_size <= s->cluster_size);
133 
134     c = g_new0(Qcow2Cache, 1);
135     c->size = num_tables;
136     c->table_size = table_size;
137     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
138     c->table_array = qemu_try_blockalign(bs->file->bs,
139                                          (size_t) num_tables * c->table_size);
140 
141     if (!c->entries || !c->table_array) {
142         qemu_vfree(c->table_array);
143         g_free(c->entries);
144         g_free(c);
145         c = NULL;
146     }
147 
148     return c;
149 }
150 
151 int qcow2_cache_destroy(Qcow2Cache *c)
152 {
153     int i;
154 
155     for (i = 0; i < c->size; i++) {
156         assert(c->entries[i].ref == 0);
157     }
158 
159     qemu_vfree(c->table_array);
160     g_free(c->entries);
161     g_free(c);
162 
163     return 0;
164 }
165 
166 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
167 {
168     int ret;
169 
170     ret = qcow2_cache_flush(bs, c->depends);
171     if (ret < 0) {
172         return ret;
173     }
174 
175     c->depends = NULL;
176     c->depends_on_flush = false;
177 
178     return 0;
179 }
180 
181 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
182 {
183     BDRVQcow2State *s = bs->opaque;
184     int ret = 0;
185 
186     if (!c->entries[i].dirty || !c->entries[i].offset) {
187         return 0;
188     }
189 
190     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
191                                   c == s->l2_table_cache, i);
192 
193     if (c->depends) {
194         ret = qcow2_cache_flush_dependency(bs, c);
195     } else if (c->depends_on_flush) {
196         ret = bdrv_flush(bs->file->bs);
197         if (ret >= 0) {
198             c->depends_on_flush = false;
199         }
200     }
201 
202     if (ret < 0) {
203         return ret;
204     }
205 
206     if (c == s->refcount_block_cache) {
207         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
208                 c->entries[i].offset, c->table_size, false);
209     } else if (c == s->l2_table_cache) {
210         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
211                 c->entries[i].offset, c->table_size, false);
212     } else {
213         ret = qcow2_pre_write_overlap_check(bs, 0,
214                 c->entries[i].offset, c->table_size, false);
215     }
216 
217     if (ret < 0) {
218         return ret;
219     }
220 
221     if (c == s->refcount_block_cache) {
222         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
223     } else if (c == s->l2_table_cache) {
224         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
225     }
226 
227     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->table_size,
228                       qcow2_cache_get_table_addr(c, i), 0);
229     if (ret < 0) {
230         return ret;
231     }
232 
233     c->entries[i].dirty = false;
234 
235     return 0;
236 }
237 
238 int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
239 {
240     BDRVQcow2State *s = bs->opaque;
241     int result = 0;
242     int ret;
243     int i;
244 
245     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
246 
247     for (i = 0; i < c->size; i++) {
248         ret = qcow2_cache_entry_flush(bs, c, i);
249         if (ret < 0 && result != -ENOSPC) {
250             result = ret;
251         }
252     }
253 
254     return result;
255 }
256 
257 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
258 {
259     int result = qcow2_cache_write(bs, c);
260 
261     if (result == 0) {
262         int ret = bdrv_flush(bs->file->bs);
263         if (ret < 0) {
264             result = ret;
265         }
266     }
267 
268     return result;
269 }
270 
271 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
272     Qcow2Cache *dependency)
273 {
274     int ret;
275 
276     if (dependency->depends) {
277         ret = qcow2_cache_flush_dependency(bs, dependency);
278         if (ret < 0) {
279             return ret;
280         }
281     }
282 
283     if (c->depends && (c->depends != dependency)) {
284         ret = qcow2_cache_flush_dependency(bs, c);
285         if (ret < 0) {
286             return ret;
287         }
288     }
289 
290     c->depends = dependency;
291     return 0;
292 }
293 
294 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
295 {
296     c->depends_on_flush = true;
297 }
298 
299 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
300 {
301     int ret, i;
302 
303     ret = qcow2_cache_flush(bs, c);
304     if (ret < 0) {
305         return ret;
306     }
307 
308     for (i = 0; i < c->size; i++) {
309         assert(c->entries[i].ref == 0);
310         c->entries[i].offset = 0;
311         c->entries[i].lru_counter = 0;
312     }
313 
314     qcow2_cache_table_release(c, 0, c->size);
315 
316     c->lru_counter = 0;
317 
318     return 0;
319 }
320 
321 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
322     uint64_t offset, void **table, bool read_from_disk)
323 {
324     BDRVQcow2State *s = bs->opaque;
325     int i;
326     int ret;
327     int lookup_index;
328     uint64_t min_lru_counter = UINT64_MAX;
329     int min_lru_index = -1;
330 
331     assert(offset != 0);
332 
333     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
334                           offset, read_from_disk);
335 
336     if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
337         qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
338                                 "cache: Offset %#" PRIx64 " is unaligned",
339                                 qcow2_cache_get_name(s, c), offset);
340         return -EIO;
341     }
342 
343     /* Check if the table is already cached */
344     i = lookup_index = (offset / c->table_size * 4) % c->size;
345     do {
346         const Qcow2CachedTable *t = &c->entries[i];
347         if (t->offset == offset) {
348             goto found;
349         }
350         if (t->ref == 0 && t->lru_counter < min_lru_counter) {
351             min_lru_counter = t->lru_counter;
352             min_lru_index = i;
353         }
354         if (++i == c->size) {
355             i = 0;
356         }
357     } while (i != lookup_index);
358 
359     if (min_lru_index == -1) {
360         /* This can't happen in current synchronous code, but leave the check
361          * here as a reminder for whoever starts using AIO with the cache */
362         abort();
363     }
364 
365     /* Cache miss: write a table back and replace it */
366     i = min_lru_index;
367     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
368                                         c == s->l2_table_cache, i);
369 
370     ret = qcow2_cache_entry_flush(bs, c, i);
371     if (ret < 0) {
372         return ret;
373     }
374 
375     trace_qcow2_cache_get_read(qemu_coroutine_self(),
376                                c == s->l2_table_cache, i);
377     c->entries[i].offset = 0;
378     if (read_from_disk) {
379         if (c == s->l2_table_cache) {
380             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
381         }
382 
383         ret = bdrv_pread(bs->file, offset, c->table_size,
384                          qcow2_cache_get_table_addr(c, i), 0);
385         if (ret < 0) {
386             return ret;
387         }
388     }
389 
390     c->entries[i].offset = offset;
391 
392     /* And return the right table */
393 found:
394     c->entries[i].ref++;
395     *table = qcow2_cache_get_table_addr(c, i);
396 
397     trace_qcow2_cache_get_done(qemu_coroutine_self(),
398                                c == s->l2_table_cache, i);
399 
400     return 0;
401 }
402 
403 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
404     void **table)
405 {
406     return qcow2_cache_do_get(bs, c, offset, table, true);
407 }
408 
409 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
410     void **table)
411 {
412     return qcow2_cache_do_get(bs, c, offset, table, false);
413 }
414 
415 void qcow2_cache_put(Qcow2Cache *c, void **table)
416 {
417     int i = qcow2_cache_get_table_idx(c, *table);
418 
419     c->entries[i].ref--;
420     *table = NULL;
421 
422     if (c->entries[i].ref == 0) {
423         c->entries[i].lru_counter = ++c->lru_counter;
424     }
425 
426     assert(c->entries[i].ref >= 0);
427 }
428 
429 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
430 {
431     int i = qcow2_cache_get_table_idx(c, table);
432     assert(c->entries[i].offset != 0);
433     c->entries[i].dirty = true;
434 }
435 
436 void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
437 {
438     int i;
439 
440     for (i = 0; i < c->size; i++) {
441         if (c->entries[i].offset == offset) {
442             return qcow2_cache_get_table_addr(c, i);
443         }
444     }
445     return NULL;
446 }
447 
448 void qcow2_cache_discard(Qcow2Cache *c, void *table)
449 {
450     int i = qcow2_cache_get_table_idx(c, table);
451 
452     assert(c->entries[i].ref == 0);
453 
454     c->entries[i].offset = 0;
455     c->entries[i].lru_counter = 0;
456     c->entries[i].dirty = false;
457 
458     qcow2_cache_table_release(c, i, 1);
459 }
460