xref: /openbmc/qemu/block/commit.c (revision 58ea30f5)
1  /*
2   * Live block commit
3   *
4   * Copyright Red Hat, Inc. 2012
5   *
6   * Authors:
7   *  Jeff Cody   <jcody@redhat.com>
8   *  Based on stream.c by Stefan Hajnoczi
9   *
10   * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11   * See the COPYING.LIB file in the top-level directory.
12   *
13   */
14  
15  #include "qemu/osdep.h"
16  #include "qemu/cutils.h"
17  #include "trace.h"
18  #include "block/block_int.h"
19  #include "block/blockjob_int.h"
20  #include "qapi/error.h"
21  #include "qapi/qmp/qerror.h"
22  #include "qemu/ratelimit.h"
23  #include "sysemu/block-backend.h"
24  
25  enum {
26      /*
27       * Size of data buffer for populating the image file.  This should be large
28       * enough to process multiple clusters in a single call, so that populating
29       * contiguous regions of the image is efficient.
30       */
31      COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
32  };
33  
34  typedef struct CommitBlockJob {
35      BlockJob common;
36      BlockDriverState *commit_top_bs;
37      BlockBackend *top;
38      BlockBackend *base;
39      BlockDriverState *base_bs;
40      BlockdevOnError on_error;
41      bool base_read_only;
42      bool chain_frozen;
43      char *backing_file_str;
44  } CommitBlockJob;
45  
46  static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
47                                          int64_t offset, uint64_t bytes,
48                                          void *buf)
49  {
50      int ret = 0;
51  
52      assert(bytes < SIZE_MAX);
53  
54      ret = blk_co_pread(bs, offset, bytes, buf, 0);
55      if (ret < 0) {
56          return ret;
57      }
58  
59      ret = blk_co_pwrite(base, offset, bytes, buf, 0);
60      if (ret < 0) {
61          return ret;
62      }
63  
64      return 0;
65  }
66  
67  static int commit_prepare(Job *job)
68  {
69      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
70  
71      bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
72      s->chain_frozen = false;
73  
74      /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
75       * the normal backing chain can be restored. */
76      blk_unref(s->base);
77      s->base = NULL;
78  
79      /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
80       * identically. Further work is needed to disambiguate these cases. */
81      return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
82                                    s->backing_file_str);
83  }
84  
85  static void commit_abort(Job *job)
86  {
87      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
88      BlockDriverState *top_bs = blk_bs(s->top);
89  
90      if (s->chain_frozen) {
91          bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
92      }
93  
94      /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
95      bdrv_ref(top_bs);
96      bdrv_ref(s->commit_top_bs);
97  
98      if (s->base) {
99          blk_unref(s->base);
100      }
101  
102      /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
103       * can succeed */
104      block_job_remove_all_bdrv(&s->common);
105  
106      /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
107       * commit filter driver from the backing chain now. Do this as the final
108       * step so that the 'consistent read' permission can be granted.
109       *
110       * XXX Can (or should) we somehow keep 'consistent read' blocked even
111       * after the failed/cancelled commit job is gone? If we already wrote
112       * something to base, the intermediate images aren't valid any more. */
113      bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
114                              &error_abort);
115      bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
116                        &error_abort);
117  
118      bdrv_unref(s->commit_top_bs);
119      bdrv_unref(top_bs);
120  }
121  
122  static void commit_clean(Job *job)
123  {
124      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
125  
126      /* restore base open flags here if appropriate (e.g., change the base back
127       * to r/o). These reopens do not need to be atomic, since we won't abort
128       * even on failure here */
129      if (s->base_read_only) {
130          bdrv_reopen_set_read_only(s->base_bs, true, NULL);
131      }
132  
133      g_free(s->backing_file_str);
134      blk_unref(s->top);
135  }
136  
137  static int coroutine_fn commit_run(Job *job, Error **errp)
138  {
139      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
140      int64_t offset;
141      uint64_t delay_ns = 0;
142      int ret = 0;
143      int64_t n = 0; /* bytes */
144      void *buf = NULL;
145      int bytes_written = 0;
146      int64_t len, base_len;
147  
148      ret = len = blk_getlength(s->top);
149      if (len < 0) {
150          goto out;
151      }
152      job_progress_set_remaining(&s->common.job, len);
153  
154      ret = base_len = blk_getlength(s->base);
155      if (base_len < 0) {
156          goto out;
157      }
158  
159      if (base_len < len) {
160          ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL);
161          if (ret) {
162              goto out;
163          }
164      }
165  
166      buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
167  
168      for (offset = 0; offset < len; offset += n) {
169          bool copy;
170  
171          /* Note that even when no rate limit is applied we need to yield
172           * with no pending I/O here so that bdrv_drain_all() returns.
173           */
174          job_sleep_ns(&s->common.job, delay_ns);
175          if (job_is_cancelled(&s->common.job)) {
176              break;
177          }
178          /* Copy if allocated above the base */
179          ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
180                                        offset, COMMIT_BUFFER_SIZE, &n);
181          copy = (ret == 1);
182          trace_commit_one_iteration(s, offset, n, ret);
183          if (copy) {
184              ret = commit_populate(s->top, s->base, offset, n, buf);
185              bytes_written += n;
186          }
187          if (ret < 0) {
188              BlockErrorAction action =
189                  block_job_error_action(&s->common, false, s->on_error, -ret);
190              if (action == BLOCK_ERROR_ACTION_REPORT) {
191                  goto out;
192              } else {
193                  n = 0;
194                  continue;
195              }
196          }
197          /* Publish progress */
198          job_progress_update(&s->common.job, n);
199  
200          if (copy) {
201              delay_ns = block_job_ratelimit_get_delay(&s->common, n);
202          } else {
203              delay_ns = 0;
204          }
205      }
206  
207      ret = 0;
208  
209  out:
210      qemu_vfree(buf);
211  
212      return ret;
213  }
214  
215  static const BlockJobDriver commit_job_driver = {
216      .job_driver = {
217          .instance_size = sizeof(CommitBlockJob),
218          .job_type      = JOB_TYPE_COMMIT,
219          .free          = block_job_free,
220          .user_resume   = block_job_user_resume,
221          .drain         = block_job_drain,
222          .run           = commit_run,
223          .prepare       = commit_prepare,
224          .abort         = commit_abort,
225          .clean         = commit_clean
226      },
227  };
228  
229  static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
230      uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
231  {
232      return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
233  }
234  
235  static void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
236  {
237      pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
238              bs->backing->bs->filename);
239  }
240  
241  static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
242                                         const BdrvChildRole *role,
243                                         BlockReopenQueue *reopen_queue,
244                                         uint64_t perm, uint64_t shared,
245                                         uint64_t *nperm, uint64_t *nshared)
246  {
247      *nperm = 0;
248      *nshared = BLK_PERM_ALL;
249  }
250  
251  /* Dummy node that provides consistent read to its users without requiring it
252   * from its backing file and that allows writes on the backing file chain. */
253  static BlockDriver bdrv_commit_top = {
254      .format_name                = "commit_top",
255      .bdrv_co_preadv             = bdrv_commit_top_preadv,
256      .bdrv_co_block_status       = bdrv_co_block_status_from_backing,
257      .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
258      .bdrv_child_perm            = bdrv_commit_top_child_perm,
259  };
260  
261  void commit_start(const char *job_id, BlockDriverState *bs,
262                    BlockDriverState *base, BlockDriverState *top,
263                    int creation_flags, int64_t speed,
264                    BlockdevOnError on_error, const char *backing_file_str,
265                    const char *filter_node_name, Error **errp)
266  {
267      CommitBlockJob *s;
268      BlockDriverState *iter;
269      BlockDriverState *commit_top_bs = NULL;
270      Error *local_err = NULL;
271      int ret;
272  
273      assert(top != bs);
274      if (top == base) {
275          error_setg(errp, "Invalid files for merge: top and base are the same");
276          return;
277      }
278  
279      s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
280                           speed, creation_flags, NULL, NULL, errp);
281      if (!s) {
282          return;
283      }
284  
285      /* convert base to r/w, if necessary */
286      s->base_read_only = bdrv_is_read_only(base);
287      if (s->base_read_only) {
288          if (bdrv_reopen_set_read_only(base, false, errp) != 0) {
289              goto fail;
290          }
291      }
292  
293      /* Insert commit_top block node above top, so we can block consistent read
294       * on the backing chain below it */
295      commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
296                                           errp);
297      if (commit_top_bs == NULL) {
298          goto fail;
299      }
300      if (!filter_node_name) {
301          commit_top_bs->implicit = true;
302      }
303      commit_top_bs->total_sectors = top->total_sectors;
304      bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
305  
306      bdrv_append(commit_top_bs, top, &local_err);
307      if (local_err) {
308          commit_top_bs = NULL;
309          error_propagate(errp, local_err);
310          goto fail;
311      }
312  
313      s->commit_top_bs = commit_top_bs;
314  
315      /* Block all nodes between top and base, because they will
316       * disappear from the chain after this operation. */
317      assert(bdrv_chain_contains(top, base));
318      for (iter = top; iter != base; iter = backing_bs(iter)) {
319          /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
320           * at s->base (if writes are blocked for a node, they are also blocked
321           * for its backing file). The other options would be a second filter
322           * driver above s->base. */
323          ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
324                                   BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
325                                   errp);
326          if (ret < 0) {
327              goto fail;
328          }
329      }
330  
331      if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
332          goto fail;
333      }
334      s->chain_frozen = true;
335  
336      ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
337      if (ret < 0) {
338          goto fail;
339      }
340  
341      s->base = blk_new(BLK_PERM_CONSISTENT_READ
342                        | BLK_PERM_WRITE
343                        | BLK_PERM_RESIZE,
344                        BLK_PERM_CONSISTENT_READ
345                        | BLK_PERM_GRAPH_MOD
346                        | BLK_PERM_WRITE_UNCHANGED);
347      ret = blk_insert_bs(s->base, base, errp);
348      if (ret < 0) {
349          goto fail;
350      }
351      s->base_bs = base;
352  
353      /* Required permissions are already taken with block_job_add_bdrv() */
354      s->top = blk_new(0, BLK_PERM_ALL);
355      ret = blk_insert_bs(s->top, top, errp);
356      if (ret < 0) {
357          goto fail;
358      }
359  
360      s->backing_file_str = g_strdup(backing_file_str);
361      s->on_error = on_error;
362  
363      trace_commit_start(bs, base, top, s);
364      job_start(&s->common.job);
365      return;
366  
367  fail:
368      if (s->chain_frozen) {
369          bdrv_unfreeze_backing_chain(commit_top_bs, base);
370      }
371      if (s->base) {
372          blk_unref(s->base);
373      }
374      if (s->top) {
375          blk_unref(s->top);
376      }
377      if (s->base_read_only) {
378          bdrv_reopen_set_read_only(base, true, NULL);
379      }
380      job_early_fail(&s->common.job);
381      /* commit_top_bs has to be replaced after deleting the block job,
382       * otherwise this would fail because of lack of permissions. */
383      if (commit_top_bs) {
384          bdrv_replace_node(commit_top_bs, top, &error_abort);
385      }
386  }
387  
388  
389  #define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
390  
391  /* commit COW file into the raw image */
392  int bdrv_commit(BlockDriverState *bs)
393  {
394      BlockBackend *src, *backing;
395      BlockDriverState *backing_file_bs = NULL;
396      BlockDriverState *commit_top_bs = NULL;
397      BlockDriver *drv = bs->drv;
398      int64_t offset, length, backing_length;
399      int ro;
400      int64_t n;
401      int ret = 0;
402      uint8_t *buf = NULL;
403      Error *local_err = NULL;
404  
405      if (!drv)
406          return -ENOMEDIUM;
407  
408      if (!bs->backing) {
409          return -ENOTSUP;
410      }
411  
412      if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
413          bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
414          return -EBUSY;
415      }
416  
417      ro = bs->backing->bs->read_only;
418  
419      if (ro) {
420          if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) {
421              return -EACCES;
422          }
423      }
424  
425      src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
426      backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
427  
428      ret = blk_insert_bs(src, bs, &local_err);
429      if (ret < 0) {
430          error_report_err(local_err);
431          goto ro_cleanup;
432      }
433  
434      /* Insert commit_top block node above backing, so we can write to it */
435      backing_file_bs = backing_bs(bs);
436  
437      commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
438                                           &local_err);
439      if (commit_top_bs == NULL) {
440          error_report_err(local_err);
441          goto ro_cleanup;
442      }
443      bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
444  
445      bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
446      bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
447  
448      ret = blk_insert_bs(backing, backing_file_bs, &local_err);
449      if (ret < 0) {
450          error_report_err(local_err);
451          goto ro_cleanup;
452      }
453  
454      length = blk_getlength(src);
455      if (length < 0) {
456          ret = length;
457          goto ro_cleanup;
458      }
459  
460      backing_length = blk_getlength(backing);
461      if (backing_length < 0) {
462          ret = backing_length;
463          goto ro_cleanup;
464      }
465  
466      /* If our top snapshot is larger than the backing file image,
467       * grow the backing file image if possible.  If not possible,
468       * we must return an error */
469      if (length > backing_length) {
470          ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
471          if (ret < 0) {
472              error_report_err(local_err);
473              goto ro_cleanup;
474          }
475      }
476  
477      /* blk_try_blockalign() for src will choose an alignment that works for
478       * backing as well, so no need to compare the alignment manually. */
479      buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
480      if (buf == NULL) {
481          ret = -ENOMEM;
482          goto ro_cleanup;
483      }
484  
485      for (offset = 0; offset < length; offset += n) {
486          ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
487          if (ret < 0) {
488              goto ro_cleanup;
489          }
490          if (ret) {
491              ret = blk_pread(src, offset, buf, n);
492              if (ret < 0) {
493                  goto ro_cleanup;
494              }
495  
496              ret = blk_pwrite(backing, offset, buf, n, 0);
497              if (ret < 0) {
498                  goto ro_cleanup;
499              }
500          }
501      }
502  
503      if (drv->bdrv_make_empty) {
504          ret = drv->bdrv_make_empty(bs);
505          if (ret < 0) {
506              goto ro_cleanup;
507          }
508          blk_flush(src);
509      }
510  
511      /*
512       * Make sure all data we wrote to the backing device is actually
513       * stable on disk.
514       */
515      blk_flush(backing);
516  
517      ret = 0;
518  ro_cleanup:
519      qemu_vfree(buf);
520  
521      blk_unref(backing);
522      if (backing_file_bs) {
523          bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
524      }
525      bdrv_unref(commit_top_bs);
526      blk_unref(src);
527  
528      if (ro) {
529          /* ignoring error return here */
530          bdrv_reopen_set_read_only(bs->backing->bs, true, NULL);
531      }
532  
533      return ret;
534  }
535