xref: /openbmc/qemu/block/qcow2-cache.c (revision c11b0583)
1 /*
2  * L2/refcount table cache for the QCOW2 format
3  *
4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* Needed for CONFIG_MADVISE */
26 #include "config-host.h"
27 
28 #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
29 #include <sys/mman.h>
30 #endif
31 
32 #include "block/block_int.h"
33 #include "qemu-common.h"
34 #include "qemu/osdep.h"
35 #include "qcow2.h"
36 #include "trace.h"
37 
38 typedef struct Qcow2CachedTable {
39     int64_t  offset;
40     uint64_t lru_counter;
41     int      ref;
42     bool     dirty;
43 } Qcow2CachedTable;
44 
45 struct Qcow2Cache {
46     Qcow2CachedTable       *entries;
47     struct Qcow2Cache      *depends;
48     int                     size;
49     bool                    depends_on_flush;
50     void                   *table_array;
51     uint64_t                lru_counter;
52     uint64_t                cache_clean_lru_counter;
53 };
54 
55 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
56                     Qcow2Cache *c, int table)
57 {
58     BDRVQcowState *s = bs->opaque;
59     return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
60 }
61 
62 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
63                   Qcow2Cache *c, void *table)
64 {
65     BDRVQcowState *s = bs->opaque;
66     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
67     int idx = table_offset / s->cluster_size;
68     assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
69     return idx;
70 }
71 
72 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
73                                       int i, int num_tables)
74 {
75 #if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
76     BDRVQcowState *s = bs->opaque;
77     void *t = qcow2_cache_get_table_addr(bs, c, i);
78     int align = getpagesize();
79     size_t mem_size = (size_t) s->cluster_size * num_tables;
80     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
82     if (length > 0) {
83         qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
84     }
85 #endif
86 }
87 
88 static inline bool can_clean_entry(Qcow2Cache *c, int i)
89 {
90     Qcow2CachedTable *t = &c->entries[i];
91     return t->ref == 0 && !t->dirty && t->offset != 0 &&
92         t->lru_counter <= c->cache_clean_lru_counter;
93 }
94 
95 void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
96 {
97     int i = 0;
98     while (i < c->size) {
99         int to_clean = 0;
100 
101         /* Skip the entries that we don't need to clean */
102         while (i < c->size && !can_clean_entry(c, i)) {
103             i++;
104         }
105 
106         /* And count how many we can clean in a row */
107         while (i < c->size && can_clean_entry(c, i)) {
108             c->entries[i].offset = 0;
109             c->entries[i].lru_counter = 0;
110             i++;
111             to_clean++;
112         }
113 
114         if (to_clean > 0) {
115             qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
116         }
117     }
118 
119     c->cache_clean_lru_counter = c->lru_counter;
120 }
121 
122 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
123 {
124     BDRVQcowState *s = bs->opaque;
125     Qcow2Cache *c;
126 
127     c = g_new0(Qcow2Cache, 1);
128     c->size = num_tables;
129     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
130     c->table_array = qemu_try_blockalign(bs->file,
131                                          (size_t) num_tables * s->cluster_size);
132 
133     if (!c->entries || !c->table_array) {
134         qemu_vfree(c->table_array);
135         g_free(c->entries);
136         g_free(c);
137         c = NULL;
138     }
139 
140     return c;
141 }
142 
143 int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
144 {
145     int i;
146 
147     for (i = 0; i < c->size; i++) {
148         assert(c->entries[i].ref == 0);
149     }
150 
151     qemu_vfree(c->table_array);
152     g_free(c->entries);
153     g_free(c);
154 
155     return 0;
156 }
157 
158 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
159 {
160     int ret;
161 
162     ret = qcow2_cache_flush(bs, c->depends);
163     if (ret < 0) {
164         return ret;
165     }
166 
167     c->depends = NULL;
168     c->depends_on_flush = false;
169 
170     return 0;
171 }
172 
173 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
174 {
175     BDRVQcowState *s = bs->opaque;
176     int ret = 0;
177 
178     if (!c->entries[i].dirty || !c->entries[i].offset) {
179         return 0;
180     }
181 
182     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
183                                   c == s->l2_table_cache, i);
184 
185     if (c->depends) {
186         ret = qcow2_cache_flush_dependency(bs, c);
187     } else if (c->depends_on_flush) {
188         ret = bdrv_flush(bs->file);
189         if (ret >= 0) {
190             c->depends_on_flush = false;
191         }
192     }
193 
194     if (ret < 0) {
195         return ret;
196     }
197 
198     if (c == s->refcount_block_cache) {
199         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
200                 c->entries[i].offset, s->cluster_size);
201     } else if (c == s->l2_table_cache) {
202         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
203                 c->entries[i].offset, s->cluster_size);
204     } else {
205         ret = qcow2_pre_write_overlap_check(bs, 0,
206                 c->entries[i].offset, s->cluster_size);
207     }
208 
209     if (ret < 0) {
210         return ret;
211     }
212 
213     if (c == s->refcount_block_cache) {
214         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
215     } else if (c == s->l2_table_cache) {
216         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
217     }
218 
219     ret = bdrv_pwrite(bs->file, c->entries[i].offset,
220                       qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
221     if (ret < 0) {
222         return ret;
223     }
224 
225     c->entries[i].dirty = false;
226 
227     return 0;
228 }
229 
230 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
231 {
232     BDRVQcowState *s = bs->opaque;
233     int result = 0;
234     int ret;
235     int i;
236 
237     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
238 
239     for (i = 0; i < c->size; i++) {
240         ret = qcow2_cache_entry_flush(bs, c, i);
241         if (ret < 0 && result != -ENOSPC) {
242             result = ret;
243         }
244     }
245 
246     if (result == 0) {
247         ret = bdrv_flush(bs->file);
248         if (ret < 0) {
249             result = ret;
250         }
251     }
252 
253     return result;
254 }
255 
256 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
257     Qcow2Cache *dependency)
258 {
259     int ret;
260 
261     if (dependency->depends) {
262         ret = qcow2_cache_flush_dependency(bs, dependency);
263         if (ret < 0) {
264             return ret;
265         }
266     }
267 
268     if (c->depends && (c->depends != dependency)) {
269         ret = qcow2_cache_flush_dependency(bs, c);
270         if (ret < 0) {
271             return ret;
272         }
273     }
274 
275     c->depends = dependency;
276     return 0;
277 }
278 
279 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
280 {
281     c->depends_on_flush = true;
282 }
283 
284 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
285 {
286     int ret, i;
287 
288     ret = qcow2_cache_flush(bs, c);
289     if (ret < 0) {
290         return ret;
291     }
292 
293     for (i = 0; i < c->size; i++) {
294         assert(c->entries[i].ref == 0);
295         c->entries[i].offset = 0;
296         c->entries[i].lru_counter = 0;
297     }
298 
299     qcow2_cache_table_release(bs, c, 0, c->size);
300 
301     c->lru_counter = 0;
302 
303     return 0;
304 }
305 
306 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
307     uint64_t offset, void **table, bool read_from_disk)
308 {
309     BDRVQcowState *s = bs->opaque;
310     int i;
311     int ret;
312     int lookup_index;
313     uint64_t min_lru_counter = UINT64_MAX;
314     int min_lru_index = -1;
315 
316     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
317                           offset, read_from_disk);
318 
319     /* Check if the table is already cached */
320     i = lookup_index = (offset / s->cluster_size * 4) % c->size;
321     do {
322         const Qcow2CachedTable *t = &c->entries[i];
323         if (t->offset == offset) {
324             goto found;
325         }
326         if (t->ref == 0 && t->lru_counter < min_lru_counter) {
327             min_lru_counter = t->lru_counter;
328             min_lru_index = i;
329         }
330         if (++i == c->size) {
331             i = 0;
332         }
333     } while (i != lookup_index);
334 
335     if (min_lru_index == -1) {
336         /* This can't happen in current synchronous code, but leave the check
337          * here as a reminder for whoever starts using AIO with the cache */
338         abort();
339     }
340 
341     /* Cache miss: write a table back and replace it */
342     i = min_lru_index;
343     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
344                                         c == s->l2_table_cache, i);
345 
346     ret = qcow2_cache_entry_flush(bs, c, i);
347     if (ret < 0) {
348         return ret;
349     }
350 
351     trace_qcow2_cache_get_read(qemu_coroutine_self(),
352                                c == s->l2_table_cache, i);
353     c->entries[i].offset = 0;
354     if (read_from_disk) {
355         if (c == s->l2_table_cache) {
356             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
357         }
358 
359         ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
360                          s->cluster_size);
361         if (ret < 0) {
362             return ret;
363         }
364     }
365 
366     c->entries[i].offset = offset;
367 
368     /* And return the right table */
369 found:
370     c->entries[i].ref++;
371     *table = qcow2_cache_get_table_addr(bs, c, i);
372 
373     trace_qcow2_cache_get_done(qemu_coroutine_self(),
374                                c == s->l2_table_cache, i);
375 
376     return 0;
377 }
378 
379 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
380     void **table)
381 {
382     return qcow2_cache_do_get(bs, c, offset, table, true);
383 }
384 
385 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
386     void **table)
387 {
388     return qcow2_cache_do_get(bs, c, offset, table, false);
389 }
390 
391 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
392 {
393     int i = qcow2_cache_get_table_idx(bs, c, *table);
394 
395     c->entries[i].ref--;
396     *table = NULL;
397 
398     if (c->entries[i].ref == 0) {
399         c->entries[i].lru_counter = ++c->lru_counter;
400     }
401 
402     assert(c->entries[i].ref >= 0);
403 }
404 
405 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
406      void *table)
407 {
408     int i = qcow2_cache_get_table_idx(bs, c, table);
409     assert(c->entries[i].offset != 0);
410     c->entries[i].dirty = true;
411 }
412