xref: /openbmc/qemu/block/qcow2-cache.c (revision c79aa350)
1  /*
2   * L2/refcount table cache for the QCOW2 format
3   *
4   * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  #include "qemu/memalign.h"
27  #include "qcow2.h"
28  #include "trace.h"
29  
30  typedef struct Qcow2CachedTable {
31      int64_t  offset;
32      uint64_t lru_counter;
33      int      ref;
34      bool     dirty;
35  } Qcow2CachedTable;
36  
37  struct Qcow2Cache {
38      Qcow2CachedTable       *entries;
39      struct Qcow2Cache      *depends;
40      int                     size;
41      int                     table_size;
42      bool                    depends_on_flush;
43      void                   *table_array;
44      uint64_t                lru_counter;
45      uint64_t                cache_clean_lru_counter;
46  };
47  
48  static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
49  {
50      return (uint8_t *) c->table_array + (size_t) table * c->table_size;
51  }
52  
53  static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
54  {
55      ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
56      int idx = table_offset / c->table_size;
57      assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
58      return idx;
59  }
60  
61  static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
62  {
63      if (c == s->refcount_block_cache) {
64          return "refcount block";
65      } else if (c == s->l2_table_cache) {
66          return "L2 table";
67      } else {
68          /* Do not abort, because this is not critical */
69          return "unknown";
70      }
71  }
72  
73  static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
74  {
75  /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
76  #ifdef CONFIG_LINUX
77      void *t = qcow2_cache_get_table_addr(c, i);
78      int align = qemu_real_host_page_size();
79      size_t mem_size = (size_t) c->table_size * num_tables;
80      size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81      size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
82      if (mem_size > offset && length > 0) {
83          madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
84      }
85  #endif
86  }
87  
88  static inline bool can_clean_entry(Qcow2Cache *c, int i)
89  {
90      Qcow2CachedTable *t = &c->entries[i];
91      return t->ref == 0 && !t->dirty && t->offset != 0 &&
92          t->lru_counter <= c->cache_clean_lru_counter;
93  }
94  
95  void qcow2_cache_clean_unused(Qcow2Cache *c)
96  {
97      int i = 0;
98      while (i < c->size) {
99          int to_clean = 0;
100  
101          /* Skip the entries that we don't need to clean */
102          while (i < c->size && !can_clean_entry(c, i)) {
103              i++;
104          }
105  
106          /* And count how many we can clean in a row */
107          while (i < c->size && can_clean_entry(c, i)) {
108              c->entries[i].offset = 0;
109              c->entries[i].lru_counter = 0;
110              i++;
111              to_clean++;
112          }
113  
114          if (to_clean > 0) {
115              qcow2_cache_table_release(c, i - to_clean, to_clean);
116          }
117      }
118  
119      c->cache_clean_lru_counter = c->lru_counter;
120  }
121  
122  Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
123                                 unsigned table_size)
124  {
125      BDRVQcow2State *s = bs->opaque;
126      Qcow2Cache *c;
127  
128      assert(num_tables > 0);
129      assert(is_power_of_2(table_size));
130      assert(table_size >= (1 << MIN_CLUSTER_BITS));
131      assert(table_size <= s->cluster_size);
132  
133      c = g_new0(Qcow2Cache, 1);
134      c->size = num_tables;
135      c->table_size = table_size;
136      c->entries = g_try_new0(Qcow2CachedTable, num_tables);
137      c->table_array = qemu_try_blockalign(bs->file->bs,
138                                           (size_t) num_tables * c->table_size);
139  
140      if (!c->entries || !c->table_array) {
141          qemu_vfree(c->table_array);
142          g_free(c->entries);
143          g_free(c);
144          c = NULL;
145      }
146  
147      return c;
148  }
149  
150  int qcow2_cache_destroy(Qcow2Cache *c)
151  {
152      int i;
153  
154      for (i = 0; i < c->size; i++) {
155          assert(c->entries[i].ref == 0);
156      }
157  
158      qemu_vfree(c->table_array);
159      g_free(c->entries);
160      g_free(c);
161  
162      return 0;
163  }
164  
165  static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
166  {
167      int ret;
168  
169      ret = qcow2_cache_flush(bs, c->depends);
170      if (ret < 0) {
171          return ret;
172      }
173  
174      c->depends = NULL;
175      c->depends_on_flush = false;
176  
177      return 0;
178  }
179  
180  static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
181  {
182      BDRVQcow2State *s = bs->opaque;
183      int ret = 0;
184  
185      if (!c->entries[i].dirty || !c->entries[i].offset) {
186          return 0;
187      }
188  
189      trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
190                                    c == s->l2_table_cache, i);
191  
192      if (c->depends) {
193          ret = qcow2_cache_flush_dependency(bs, c);
194      } else if (c->depends_on_flush) {
195          ret = bdrv_flush(bs->file->bs);
196          if (ret >= 0) {
197              c->depends_on_flush = false;
198          }
199      }
200  
201      if (ret < 0) {
202          return ret;
203      }
204  
205      if (c == s->refcount_block_cache) {
206          ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
207                  c->entries[i].offset, c->table_size, false);
208      } else if (c == s->l2_table_cache) {
209          ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
210                  c->entries[i].offset, c->table_size, false);
211      } else {
212          ret = qcow2_pre_write_overlap_check(bs, 0,
213                  c->entries[i].offset, c->table_size, false);
214      }
215  
216      if (ret < 0) {
217          return ret;
218      }
219  
220      if (c == s->refcount_block_cache) {
221          BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
222      } else if (c == s->l2_table_cache) {
223          BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
224      }
225  
226      ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->table_size,
227                        qcow2_cache_get_table_addr(c, i), 0);
228      if (ret < 0) {
229          return ret;
230      }
231  
232      c->entries[i].dirty = false;
233  
234      return 0;
235  }
236  
237  int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
238  {
239      BDRVQcow2State *s = bs->opaque;
240      int result = 0;
241      int ret;
242      int i;
243  
244      trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
245  
246      for (i = 0; i < c->size; i++) {
247          ret = qcow2_cache_entry_flush(bs, c, i);
248          if (ret < 0 && result != -ENOSPC) {
249              result = ret;
250          }
251      }
252  
253      return result;
254  }
255  
256  int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
257  {
258      int result = qcow2_cache_write(bs, c);
259  
260      if (result == 0) {
261          int ret = bdrv_flush(bs->file->bs);
262          if (ret < 0) {
263              result = ret;
264          }
265      }
266  
267      return result;
268  }
269  
270  int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
271      Qcow2Cache *dependency)
272  {
273      int ret;
274  
275      if (dependency->depends) {
276          ret = qcow2_cache_flush_dependency(bs, dependency);
277          if (ret < 0) {
278              return ret;
279          }
280      }
281  
282      if (c->depends && (c->depends != dependency)) {
283          ret = qcow2_cache_flush_dependency(bs, c);
284          if (ret < 0) {
285              return ret;
286          }
287      }
288  
289      c->depends = dependency;
290      return 0;
291  }
292  
293  void qcow2_cache_depends_on_flush(Qcow2Cache *c)
294  {
295      c->depends_on_flush = true;
296  }
297  
298  int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
299  {
300      int ret, i;
301  
302      ret = qcow2_cache_flush(bs, c);
303      if (ret < 0) {
304          return ret;
305      }
306  
307      for (i = 0; i < c->size; i++) {
308          assert(c->entries[i].ref == 0);
309          c->entries[i].offset = 0;
310          c->entries[i].lru_counter = 0;
311      }
312  
313      qcow2_cache_table_release(c, 0, c->size);
314  
315      c->lru_counter = 0;
316  
317      return 0;
318  }
319  
320  static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
321      uint64_t offset, void **table, bool read_from_disk)
322  {
323      BDRVQcow2State *s = bs->opaque;
324      int i;
325      int ret;
326      int lookup_index;
327      uint64_t min_lru_counter = UINT64_MAX;
328      int min_lru_index = -1;
329  
330      assert(offset != 0);
331  
332      trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
333                            offset, read_from_disk);
334  
335      if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
336          qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
337                                  "cache: Offset %#" PRIx64 " is unaligned",
338                                  qcow2_cache_get_name(s, c), offset);
339          return -EIO;
340      }
341  
342      /* Check if the table is already cached */
343      i = lookup_index = (offset / c->table_size * 4) % c->size;
344      do {
345          const Qcow2CachedTable *t = &c->entries[i];
346          if (t->offset == offset) {
347              goto found;
348          }
349          if (t->ref == 0 && t->lru_counter < min_lru_counter) {
350              min_lru_counter = t->lru_counter;
351              min_lru_index = i;
352          }
353          if (++i == c->size) {
354              i = 0;
355          }
356      } while (i != lookup_index);
357  
358      if (min_lru_index == -1) {
359          /* This can't happen in current synchronous code, but leave the check
360           * here as a reminder for whoever starts using AIO with the cache */
361          abort();
362      }
363  
364      /* Cache miss: write a table back and replace it */
365      i = min_lru_index;
366      trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
367                                          c == s->l2_table_cache, i);
368  
369      ret = qcow2_cache_entry_flush(bs, c, i);
370      if (ret < 0) {
371          return ret;
372      }
373  
374      trace_qcow2_cache_get_read(qemu_coroutine_self(),
375                                 c == s->l2_table_cache, i);
376      c->entries[i].offset = 0;
377      if (read_from_disk) {
378          if (c == s->l2_table_cache) {
379              BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
380          }
381  
382          ret = bdrv_pread(bs->file, offset, c->table_size,
383                           qcow2_cache_get_table_addr(c, i), 0);
384          if (ret < 0) {
385              return ret;
386          }
387      }
388  
389      c->entries[i].offset = offset;
390  
391      /* And return the right table */
392  found:
393      c->entries[i].ref++;
394      *table = qcow2_cache_get_table_addr(c, i);
395  
396      trace_qcow2_cache_get_done(qemu_coroutine_self(),
397                                 c == s->l2_table_cache, i);
398  
399      return 0;
400  }
401  
402  int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
403      void **table)
404  {
405      return qcow2_cache_do_get(bs, c, offset, table, true);
406  }
407  
408  int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
409      void **table)
410  {
411      return qcow2_cache_do_get(bs, c, offset, table, false);
412  }
413  
414  void qcow2_cache_put(Qcow2Cache *c, void **table)
415  {
416      int i = qcow2_cache_get_table_idx(c, *table);
417  
418      c->entries[i].ref--;
419      *table = NULL;
420  
421      if (c->entries[i].ref == 0) {
422          c->entries[i].lru_counter = ++c->lru_counter;
423      }
424  
425      assert(c->entries[i].ref >= 0);
426  }
427  
428  void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
429  {
430      int i = qcow2_cache_get_table_idx(c, table);
431      assert(c->entries[i].offset != 0);
432      c->entries[i].dirty = true;
433  }
434  
435  void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
436  {
437      int i;
438  
439      for (i = 0; i < c->size; i++) {
440          if (c->entries[i].offset == offset) {
441              return qcow2_cache_get_table_addr(c, i);
442          }
443      }
444      return NULL;
445  }
446  
447  void qcow2_cache_discard(Qcow2Cache *c, void *table)
448  {
449      int i = qcow2_cache_get_table_idx(c, table);
450  
451      assert(c->entries[i].ref == 0);
452  
453      c->entries[i].offset = 0;
454      c->entries[i].lru_counter = 0;
455      c->entries[i].dirty = false;
456  
457      qcow2_cache_table_release(c, i, 1);
458  }
459