135a6ed4fSzhanghailiang /*
235a6ed4fSzhanghailiang * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
335a6ed4fSzhanghailiang * (a.k.a. Fault Tolerance or Continuous Replication)
435a6ed4fSzhanghailiang *
535a6ed4fSzhanghailiang * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
635a6ed4fSzhanghailiang * Copyright (c) 2016 FUJITSU LIMITED
735a6ed4fSzhanghailiang * Copyright (c) 2016 Intel Corporation
835a6ed4fSzhanghailiang *
935a6ed4fSzhanghailiang * This work is licensed under the terms of the GNU GPL, version 2 or
1035a6ed4fSzhanghailiang * later. See the COPYING file in the top-level directory.
1135a6ed4fSzhanghailiang */
1235a6ed4fSzhanghailiang
1335a6ed4fSzhanghailiang #include "qemu/osdep.h"
140b827d5eSzhanghailiang #include "sysemu/sysemu.h"
15e688df6bSMarkus Armbruster #include "qapi/error.h"
169af23989SMarkus Armbruster #include "qapi/qapi-commands-migration.h"
176666c96aSJuan Quintela #include "migration.h"
1808a0aee1SJuan Quintela #include "qemu-file.h"
1920a519a0SJuan Quintela #include "savevm.h"
2035a6ed4fSzhanghailiang #include "migration/colo.h"
21a91246c9Szhanghailiang #include "io/channel-buffer.h"
220b827d5eSzhanghailiang #include "trace.h"
2356ba83d2Szhanghailiang #include "qemu/error-report.h"
24db725815SMarkus Armbruster #include "qemu/main-loop.h"
25d4842052SMarkus Armbruster #include "qemu/rcu.h"
26d89e666eSzhanghailiang #include "migration/failover.h"
270393031aSzhanghailiang #include "migration/ram.h"
28b0262955SPaolo Bonzini #include "block/replication.h"
29131b2153SZhang Chen #include "net/colo-compare.h"
30131b2153SZhang Chen #include "net/colo.h"
318e48ac95SZhang Chen #include "block/block.h"
329ecff6d6Szhanghailiang #include "qapi/qapi-events-migration.h"
333f6df99dSZhang Chen #include "sysemu/cpus.h"
3454d31236SMarkus Armbruster #include "sysemu/runstate.h"
357b343530Szhanghailiang #include "net/filter.h"
361f0776f1SJuan Quintela #include "options.h"
3735a6ed4fSzhanghailiang
38a8664ba5Szhanghailiang static bool vmstate_loading;
39131b2153SZhang Chen static Notifier packets_compare_notifier;
40a8664ba5Szhanghailiang
415ed0decaSZhang Chen /* User need to know colo mode after COLO failover */
425ed0decaSZhang Chen static COLOMode last_colo_mode;
435ed0decaSZhang Chen
44a91246c9Szhanghailiang #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
45a91246c9Szhanghailiang
migration_in_colo_state(void)460b827d5eSzhanghailiang bool migration_in_colo_state(void)
470b827d5eSzhanghailiang {
480b827d5eSzhanghailiang MigrationState *s = migrate_get_current();
490b827d5eSzhanghailiang
500b827d5eSzhanghailiang return (s->state == MIGRATION_STATUS_COLO);
510b827d5eSzhanghailiang }
520b827d5eSzhanghailiang
migration_incoming_in_colo_state(void)5325d0c16fSzhanghailiang bool migration_incoming_in_colo_state(void)
5425d0c16fSzhanghailiang {
5525d0c16fSzhanghailiang MigrationIncomingState *mis = migration_incoming_get_current();
5625d0c16fSzhanghailiang
5725d0c16fSzhanghailiang return mis && (mis->state == MIGRATION_STATUS_COLO);
5825d0c16fSzhanghailiang }
5925d0c16fSzhanghailiang
colo_runstate_is_stopped(void)60b3f7f0c5Szhanghailiang static bool colo_runstate_is_stopped(void)
61b3f7f0c5Szhanghailiang {
62b3f7f0c5Szhanghailiang return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
63b3f7f0c5Szhanghailiang }
64b3f7f0c5Szhanghailiang
colo_checkpoint_notify(void)657395127fSSteve Sistare static void colo_checkpoint_notify(void)
664332ffcdSVladimir Sementsov-Ogievskiy {
677395127fSSteve Sistare MigrationState *s = migrate_get_current();
684332ffcdSVladimir Sementsov-Ogievskiy int64_t next_notify_time;
694332ffcdSVladimir Sementsov-Ogievskiy
704332ffcdSVladimir Sementsov-Ogievskiy qemu_event_set(&s->colo_checkpoint_event);
714332ffcdSVladimir Sementsov-Ogievskiy s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
724332ffcdSVladimir Sementsov-Ogievskiy next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
734332ffcdSVladimir Sementsov-Ogievskiy timer_mod(s->colo_delay_timer, next_notify_time);
744332ffcdSVladimir Sementsov-Ogievskiy }
754332ffcdSVladimir Sementsov-Ogievskiy
colo_checkpoint_notify_timer(void * opaque)767395127fSSteve Sistare static void colo_checkpoint_notify_timer(void *opaque)
777395127fSSteve Sistare {
787395127fSSteve Sistare colo_checkpoint_notify();
797395127fSSteve Sistare }
807395127fSSteve Sistare
colo_checkpoint_delay_set(void)814332ffcdSVladimir Sementsov-Ogievskiy void colo_checkpoint_delay_set(void)
824332ffcdSVladimir Sementsov-Ogievskiy {
834332ffcdSVladimir Sementsov-Ogievskiy if (migration_in_colo_state()) {
847395127fSSteve Sistare colo_checkpoint_notify();
854332ffcdSVladimir Sementsov-Ogievskiy }
864332ffcdSVladimir Sementsov-Ogievskiy }
874332ffcdSVladimir Sementsov-Ogievskiy
secondary_vm_do_failover(void)889d2db376Szhanghailiang static void secondary_vm_do_failover(void)
899d2db376Szhanghailiang {
903ebb9c4fSZhang Chen /* COLO needs enable block-replication */
919d2db376Szhanghailiang int old_state;
929d2db376Szhanghailiang MigrationIncomingState *mis = migration_incoming_get_current();
938e48ac95SZhang Chen Error *local_err = NULL;
949d2db376Szhanghailiang
95a8664ba5Szhanghailiang /* Can not do failover during the process of VM's loading VMstate, Or
96a8664ba5Szhanghailiang * it will break the secondary VM.
97a8664ba5Szhanghailiang */
98a8664ba5Szhanghailiang if (vmstate_loading) {
99a8664ba5Szhanghailiang old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
100a8664ba5Szhanghailiang FAILOVER_STATUS_RELAUNCH);
101a8664ba5Szhanghailiang if (old_state != FAILOVER_STATUS_ACTIVE) {
102a8664ba5Szhanghailiang error_report("Unknown error while do failover for secondary VM,"
103977c736fSMarkus Armbruster "old_state: %s", FailoverStatus_str(old_state));
104a8664ba5Szhanghailiang }
105a8664ba5Szhanghailiang return;
106a8664ba5Szhanghailiang }
107a8664ba5Szhanghailiang
1089d2db376Szhanghailiang migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
1099d2db376Szhanghailiang MIGRATION_STATUS_COMPLETED);
1109d2db376Szhanghailiang
1118e48ac95SZhang Chen replication_stop_all(true, &local_err);
1128e48ac95SZhang Chen if (local_err) {
1138e48ac95SZhang Chen error_report_err(local_err);
11427d07fcfSVladimir Sementsov-Ogievskiy local_err = NULL;
1158e48ac95SZhang Chen }
1168e48ac95SZhang Chen
1177b343530Szhanghailiang /* Notify all filters of all NIC to do checkpoint */
1187b343530Szhanghailiang colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
1197b343530Szhanghailiang if (local_err) {
1207b343530Szhanghailiang error_report_err(local_err);
1217b343530Szhanghailiang }
1227b343530Szhanghailiang
1239d2db376Szhanghailiang if (!autostart) {
1249d2db376Szhanghailiang error_report("\"-S\" qemu option will be ignored in secondary side");
1259d2db376Szhanghailiang /* recover runstate to normal migration finish state */
1269d2db376Szhanghailiang autostart = true;
1279d2db376Szhanghailiang }
128c937b9a6Szhanghailiang /*
129c937b9a6Szhanghailiang * Make sure COLO incoming thread not block in recv or send,
130c937b9a6Szhanghailiang * If mis->from_src_file and mis->to_src_file use the same fd,
131c937b9a6Szhanghailiang * The second shutdown() will return -1, we ignore this value,
132c937b9a6Szhanghailiang * It is harmless.
133c937b9a6Szhanghailiang */
134c937b9a6Szhanghailiang if (mis->from_src_file) {
135c937b9a6Szhanghailiang qemu_file_shutdown(mis->from_src_file);
136c937b9a6Szhanghailiang }
137c937b9a6Szhanghailiang if (mis->to_src_file) {
138c937b9a6Szhanghailiang qemu_file_shutdown(mis->to_src_file);
139c937b9a6Szhanghailiang }
1409d2db376Szhanghailiang
1419d2db376Szhanghailiang old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
1429d2db376Szhanghailiang FAILOVER_STATUS_COMPLETED);
1439d2db376Szhanghailiang if (old_state != FAILOVER_STATUS_ACTIVE) {
1449d2db376Szhanghailiang error_report("Incorrect state (%s) while doing failover for "
145977c736fSMarkus Armbruster "secondary VM", FailoverStatus_str(old_state));
1469d2db376Szhanghailiang return;
1479d2db376Szhanghailiang }
148c937b9a6Szhanghailiang /* Notify COLO incoming thread that failover work is finished */
149c937b9a6Szhanghailiang qemu_sem_post(&mis->colo_incoming_sem);
1501fe6ab26SZhang Chen
1519d2db376Szhanghailiang /* For Secondary VM, jump to incoming co */
152dd42ce24SVladimir Sementsov-Ogievskiy if (mis->colo_incoming_co) {
153dd42ce24SVladimir Sementsov-Ogievskiy qemu_coroutine_enter(mis->colo_incoming_co);
1549d2db376Szhanghailiang }
1559d2db376Szhanghailiang }
1569d2db376Szhanghailiang
primary_vm_do_failover(void)157b3f7f0c5Szhanghailiang static void primary_vm_do_failover(void)
158b3f7f0c5Szhanghailiang {
159b3f7f0c5Szhanghailiang MigrationState *s = migrate_get_current();
160b3f7f0c5Szhanghailiang int old_state;
1618e48ac95SZhang Chen Error *local_err = NULL;
162b3f7f0c5Szhanghailiang
163b3f7f0c5Szhanghailiang migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
164b3f7f0c5Szhanghailiang MIGRATION_STATUS_COMPLETED);
1652518aec1Szhanghailiang /*
1662518aec1Szhanghailiang * kick COLO thread which might wait at
1672518aec1Szhanghailiang * qemu_sem_wait(&s->colo_checkpoint_sem).
1682518aec1Szhanghailiang */
1697395127fSSteve Sistare colo_checkpoint_notify();
170b3f7f0c5Szhanghailiang
171c937b9a6Szhanghailiang /*
172c937b9a6Szhanghailiang * Wake up COLO thread which may blocked in recv() or send(),
173c937b9a6Szhanghailiang * The s->rp_state.from_dst_file and s->to_dst_file may use the
174c937b9a6Szhanghailiang * same fd, but we still shutdown the fd for twice, it is harmless.
175c937b9a6Szhanghailiang */
176c937b9a6Szhanghailiang if (s->to_dst_file) {
177c937b9a6Szhanghailiang qemu_file_shutdown(s->to_dst_file);
178c937b9a6Szhanghailiang }
179c937b9a6Szhanghailiang if (s->rp_state.from_dst_file) {
180c937b9a6Szhanghailiang qemu_file_shutdown(s->rp_state.from_dst_file);
181c937b9a6Szhanghailiang }
182c937b9a6Szhanghailiang
183b3f7f0c5Szhanghailiang old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
184b3f7f0c5Szhanghailiang FAILOVER_STATUS_COMPLETED);
185b3f7f0c5Szhanghailiang if (old_state != FAILOVER_STATUS_ACTIVE) {
186b3f7f0c5Szhanghailiang error_report("Incorrect state (%s) while doing failover for Primary VM",
187977c736fSMarkus Armbruster FailoverStatus_str(old_state));
188b3f7f0c5Szhanghailiang return;
189b3f7f0c5Szhanghailiang }
1908e48ac95SZhang Chen
1918e48ac95SZhang Chen replication_stop_all(true, &local_err);
1928e48ac95SZhang Chen if (local_err) {
1938e48ac95SZhang Chen error_report_err(local_err);
1948e48ac95SZhang Chen local_err = NULL;
1958e48ac95SZhang Chen }
1968e48ac95SZhang Chen
197c937b9a6Szhanghailiang /* Notify COLO thread that failover work is finished */
198c937b9a6Szhanghailiang qemu_sem_post(&s->colo_exit_sem);
199b3f7f0c5Szhanghailiang }
200b3f7f0c5Szhanghailiang
get_colo_mode(void)201aad555c2SZhang Chen COLOMode get_colo_mode(void)
202aad555c2SZhang Chen {
203aad555c2SZhang Chen if (migration_in_colo_state()) {
204aad555c2SZhang Chen return COLO_MODE_PRIMARY;
205aad555c2SZhang Chen } else if (migration_incoming_in_colo_state()) {
206aad555c2SZhang Chen return COLO_MODE_SECONDARY;
207aad555c2SZhang Chen } else {
20841b6b779SZhang Chen return COLO_MODE_NONE;
209aad555c2SZhang Chen }
210aad555c2SZhang Chen }
211aad555c2SZhang Chen
colo_do_failover(void)212c0913d1dSZhang Chen void colo_do_failover(void)
213b3f7f0c5Szhanghailiang {
214b3f7f0c5Szhanghailiang /* Make sure VM stopped while failover happened. */
215b3f7f0c5Szhanghailiang if (!colo_runstate_is_stopped()) {
216b3f7f0c5Szhanghailiang vm_stop_force_state(RUN_STATE_COLO);
217b3f7f0c5Szhanghailiang }
218b3f7f0c5Szhanghailiang
2192b9f6bf3SRao, Lei switch (last_colo_mode = get_colo_mode()) {
22082cd368cSZhang Chen case COLO_MODE_PRIMARY:
221b3f7f0c5Szhanghailiang primary_vm_do_failover();
22282cd368cSZhang Chen break;
22382cd368cSZhang Chen case COLO_MODE_SECONDARY:
2249d2db376Szhanghailiang secondary_vm_do_failover();
22582cd368cSZhang Chen break;
22682cd368cSZhang Chen default:
22782cd368cSZhang Chen error_report("colo_do_failover failed because the colo mode"
22882cd368cSZhang Chen " could not be obtained");
229b3f7f0c5Szhanghailiang }
230b3f7f0c5Szhanghailiang }
231b3f7f0c5Szhanghailiang
qmp_xen_set_replication(bool enable,bool primary,bool has_failover,bool failover,Error ** errp)2322c9639ecSZhang Chen void qmp_xen_set_replication(bool enable, bool primary,
2332c9639ecSZhang Chen bool has_failover, bool failover,
2342c9639ecSZhang Chen Error **errp)
2352c9639ecSZhang Chen {
2362c9639ecSZhang Chen ReplicationMode mode = primary ?
2372c9639ecSZhang Chen REPLICATION_MODE_PRIMARY :
2382c9639ecSZhang Chen REPLICATION_MODE_SECONDARY;
2392c9639ecSZhang Chen
2402c9639ecSZhang Chen if (has_failover && enable) {
2412c9639ecSZhang Chen error_setg(errp, "Parameter 'failover' is only for"
2422c9639ecSZhang Chen " stopping replication");
2432c9639ecSZhang Chen return;
2442c9639ecSZhang Chen }
2452c9639ecSZhang Chen
2462c9639ecSZhang Chen if (enable) {
2472c9639ecSZhang Chen replication_start_all(mode, errp);
2482c9639ecSZhang Chen } else {
2492c9639ecSZhang Chen if (!has_failover) {
2502c9639ecSZhang Chen failover = NULL;
2512c9639ecSZhang Chen }
2522c9639ecSZhang Chen replication_stop_all(failover, failover ? NULL : errp);
2532c9639ecSZhang Chen }
2542c9639ecSZhang Chen }
2552c9639ecSZhang Chen
qmp_query_xen_replication_status(Error ** errp)256daa33c52SZhang Chen ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
257daa33c52SZhang Chen {
258daa33c52SZhang Chen Error *err = NULL;
259daa33c52SZhang Chen ReplicationStatus *s = g_new0(ReplicationStatus, 1);
260daa33c52SZhang Chen
261daa33c52SZhang Chen replication_get_error_all(&err);
262daa33c52SZhang Chen if (err) {
263daa33c52SZhang Chen s->error = true;
264daa33c52SZhang Chen s->desc = g_strdup(error_get_pretty(err));
265daa33c52SZhang Chen } else {
266daa33c52SZhang Chen s->error = false;
267daa33c52SZhang Chen }
268daa33c52SZhang Chen
269daa33c52SZhang Chen error_free(err);
270daa33c52SZhang Chen return s;
271daa33c52SZhang Chen }
272daa33c52SZhang Chen
qmp_xen_colo_do_checkpoint(Error ** errp)273daa33c52SZhang Chen void qmp_xen_colo_do_checkpoint(Error **errp)
274daa33c52SZhang Chen {
275735527e1SMarkus Armbruster Error *err = NULL;
276735527e1SMarkus Armbruster
277735527e1SMarkus Armbruster replication_do_checkpoint_all(&err);
278735527e1SMarkus Armbruster if (err) {
279735527e1SMarkus Armbruster error_propagate(errp, err);
280735527e1SMarkus Armbruster return;
281735527e1SMarkus Armbruster }
2820e8818f0SZhang Chen /* Notify all filters of all NIC to do checkpoint */
2830e8818f0SZhang Chen colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
284daa33c52SZhang Chen }
285daa33c52SZhang Chen
qmp_query_colo_status(Error ** errp)286f56c0065SZhang Chen COLOStatus *qmp_query_colo_status(Error **errp)
287f56c0065SZhang Chen {
288f56c0065SZhang Chen COLOStatus *s = g_new0(COLOStatus, 1);
289f56c0065SZhang Chen
290f56c0065SZhang Chen s->mode = get_colo_mode();
2915ed0decaSZhang Chen s->last_mode = last_colo_mode;
292f56c0065SZhang Chen
293f56c0065SZhang Chen switch (failover_get_state()) {
294f56c0065SZhang Chen case FAILOVER_STATUS_NONE:
295f56c0065SZhang Chen s->reason = COLO_EXIT_REASON_NONE;
296f56c0065SZhang Chen break;
2971fe6ab26SZhang Chen case FAILOVER_STATUS_COMPLETED:
298f56c0065SZhang Chen s->reason = COLO_EXIT_REASON_REQUEST;
299f56c0065SZhang Chen break;
300f56c0065SZhang Chen default:
3013a43ac47SZhang Chen if (migration_in_colo_state()) {
3023a43ac47SZhang Chen s->reason = COLO_EXIT_REASON_PROCESSING;
3033a43ac47SZhang Chen } else {
304f56c0065SZhang Chen s->reason = COLO_EXIT_REASON_ERROR;
305f56c0065SZhang Chen }
3063a43ac47SZhang Chen }
307f56c0065SZhang Chen
308f56c0065SZhang Chen return s;
309f56c0065SZhang Chen }
310f56c0065SZhang Chen
colo_send_message(QEMUFile * f,COLOMessage msg,Error ** errp)3114f97558eSzhanghailiang static void colo_send_message(QEMUFile *f, COLOMessage msg,
3124f97558eSzhanghailiang Error **errp)
3134f97558eSzhanghailiang {
3144f97558eSzhanghailiang int ret;
3154f97558eSzhanghailiang
3164f97558eSzhanghailiang if (msg >= COLO_MESSAGE__MAX) {
3174f97558eSzhanghailiang error_setg(errp, "%s: Invalid message", __func__);
3184f97558eSzhanghailiang return;
3194f97558eSzhanghailiang }
3204f97558eSzhanghailiang qemu_put_be32(f, msg);
321be07a0edSJuan Quintela ret = qemu_fflush(f);
3224f97558eSzhanghailiang if (ret < 0) {
3234f97558eSzhanghailiang error_setg_errno(errp, -ret, "Can't send COLO message");
3244f97558eSzhanghailiang }
325977c736fSMarkus Armbruster trace_colo_send_message(COLOMessage_str(msg));
3264f97558eSzhanghailiang }
3274f97558eSzhanghailiang
colo_send_message_value(QEMUFile * f,COLOMessage msg,uint64_t value,Error ** errp)328a91246c9Szhanghailiang static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
329a91246c9Szhanghailiang uint64_t value, Error **errp)
330a91246c9Szhanghailiang {
331a91246c9Szhanghailiang Error *local_err = NULL;
332a91246c9Szhanghailiang int ret;
333a91246c9Szhanghailiang
334a91246c9Szhanghailiang colo_send_message(f, msg, &local_err);
335a91246c9Szhanghailiang if (local_err) {
336a91246c9Szhanghailiang error_propagate(errp, local_err);
337a91246c9Szhanghailiang return;
338a91246c9Szhanghailiang }
339a91246c9Szhanghailiang qemu_put_be64(f, value);
340be07a0edSJuan Quintela ret = qemu_fflush(f);
341a91246c9Szhanghailiang if (ret < 0) {
342a91246c9Szhanghailiang error_setg_errno(errp, -ret, "Failed to send value for message:%s",
343977c736fSMarkus Armbruster COLOMessage_str(msg));
344a91246c9Szhanghailiang }
345a91246c9Szhanghailiang }
346a91246c9Szhanghailiang
colo_receive_message(QEMUFile * f,Error ** errp)3474f97558eSzhanghailiang static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
3484f97558eSzhanghailiang {
3494f97558eSzhanghailiang COLOMessage msg;
3504f97558eSzhanghailiang int ret;
3514f97558eSzhanghailiang
3524f97558eSzhanghailiang msg = qemu_get_be32(f);
3534f97558eSzhanghailiang ret = qemu_file_get_error(f);
3544f97558eSzhanghailiang if (ret < 0) {
3554f97558eSzhanghailiang error_setg_errno(errp, -ret, "Can't receive COLO message");
3564f97558eSzhanghailiang return msg;
3574f97558eSzhanghailiang }
3584f97558eSzhanghailiang if (msg >= COLO_MESSAGE__MAX) {
3594f97558eSzhanghailiang error_setg(errp, "%s: Invalid message", __func__);
3604f97558eSzhanghailiang return msg;
3614f97558eSzhanghailiang }
362977c736fSMarkus Armbruster trace_colo_receive_message(COLOMessage_str(msg));
3634f97558eSzhanghailiang return msg;
3644f97558eSzhanghailiang }
3654f97558eSzhanghailiang
colo_receive_check_message(QEMUFile * f,COLOMessage expect_msg,Error ** errp)3664f97558eSzhanghailiang static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
3674f97558eSzhanghailiang Error **errp)
3684f97558eSzhanghailiang {
3694f97558eSzhanghailiang COLOMessage msg;
3704f97558eSzhanghailiang Error *local_err = NULL;
3714f97558eSzhanghailiang
3724f97558eSzhanghailiang msg = colo_receive_message(f, &local_err);
3734f97558eSzhanghailiang if (local_err) {
3744f97558eSzhanghailiang error_propagate(errp, local_err);
3754f97558eSzhanghailiang return;
3764f97558eSzhanghailiang }
3774f97558eSzhanghailiang if (msg != expect_msg) {
3784f97558eSzhanghailiang error_setg(errp, "Unexpected COLO message %d, expected %d",
3794f97558eSzhanghailiang msg, expect_msg);
3804f97558eSzhanghailiang }
3814f97558eSzhanghailiang }
3824f97558eSzhanghailiang
colo_receive_message_value(QEMUFile * f,uint32_t expect_msg,Error ** errp)3834291d372Szhanghailiang static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
3844291d372Szhanghailiang Error **errp)
3854291d372Szhanghailiang {
3864291d372Szhanghailiang Error *local_err = NULL;
3874291d372Szhanghailiang uint64_t value;
3884291d372Szhanghailiang int ret;
3894291d372Szhanghailiang
3904291d372Szhanghailiang colo_receive_check_message(f, expect_msg, &local_err);
3914291d372Szhanghailiang if (local_err) {
3924291d372Szhanghailiang error_propagate(errp, local_err);
3934291d372Szhanghailiang return 0;
3944291d372Szhanghailiang }
3954291d372Szhanghailiang
3964291d372Szhanghailiang value = qemu_get_be64(f);
3974291d372Szhanghailiang ret = qemu_file_get_error(f);
3984291d372Szhanghailiang if (ret < 0) {
3994291d372Szhanghailiang error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
400977c736fSMarkus Armbruster COLOMessage_str(expect_msg));
4014291d372Szhanghailiang }
4024291d372Szhanghailiang return value;
4034291d372Szhanghailiang }
4044291d372Szhanghailiang
colo_do_checkpoint_transaction(MigrationState * s,QIOChannelBuffer * bioc,QEMUFile * fb)405a91246c9Szhanghailiang static int colo_do_checkpoint_transaction(MigrationState *s,
406a91246c9Szhanghailiang QIOChannelBuffer *bioc,
407a91246c9Szhanghailiang QEMUFile *fb)
4084f97558eSzhanghailiang {
4094f97558eSzhanghailiang Error *local_err = NULL;
410a91246c9Szhanghailiang int ret = -1;
4114f97558eSzhanghailiang
4124f97558eSzhanghailiang colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
4134f97558eSzhanghailiang &local_err);
4144f97558eSzhanghailiang if (local_err) {
4154f97558eSzhanghailiang goto out;
4164f97558eSzhanghailiang }
4174f97558eSzhanghailiang
4184f97558eSzhanghailiang colo_receive_check_message(s->rp_state.from_dst_file,
4194f97558eSzhanghailiang COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
4204f97558eSzhanghailiang if (local_err) {
4214f97558eSzhanghailiang goto out;
4224f97558eSzhanghailiang }
423a91246c9Szhanghailiang /* Reset channel-buffer directly */
424a91246c9Szhanghailiang qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
425a91246c9Szhanghailiang bioc->usage = 0;
4264f97558eSzhanghailiang
427195801d7SStefan Hajnoczi bql_lock();
428b3f7f0c5Szhanghailiang if (failover_get_state() != FAILOVER_STATUS_NONE) {
429195801d7SStefan Hajnoczi bql_unlock();
430b3f7f0c5Szhanghailiang goto out;
431b3f7f0c5Szhanghailiang }
432a91246c9Szhanghailiang vm_stop_force_state(RUN_STATE_COLO);
433195801d7SStefan Hajnoczi bql_unlock();
434a91246c9Szhanghailiang trace_colo_vm_state_change("run", "stop");
435b3f7f0c5Szhanghailiang /*
436b3f7f0c5Szhanghailiang * Failover request bh could be called after vm_stop_force_state(),
437b3f7f0c5Szhanghailiang * So we need check failover_request_is_active() again.
438b3f7f0c5Szhanghailiang */
439b3f7f0c5Szhanghailiang if (failover_get_state() != FAILOVER_STATUS_NONE) {
440b3f7f0c5Szhanghailiang goto out;
441b3f7f0c5Szhanghailiang }
442195801d7SStefan Hajnoczi bql_lock();
4433ebb9c4fSZhang Chen
4448e48ac95SZhang Chen replication_do_checkpoint_all(&local_err);
4458e48ac95SZhang Chen if (local_err) {
446195801d7SStefan Hajnoczi bql_unlock();
4478e48ac95SZhang Chen goto out;
4488e48ac95SZhang Chen }
4494f97558eSzhanghailiang
4504f97558eSzhanghailiang colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
4514f97558eSzhanghailiang if (local_err) {
452195801d7SStefan Hajnoczi bql_unlock();
4534f97558eSzhanghailiang goto out;
4544f97558eSzhanghailiang }
4553f6df99dSZhang Chen /* Note: device state is saved into buffer */
4563f6df99dSZhang Chen ret = qemu_save_device_state(fb);
4573f6df99dSZhang Chen
458195801d7SStefan Hajnoczi bql_unlock();
4593f6df99dSZhang Chen if (ret < 0) {
4603f6df99dSZhang Chen goto out;
4613f6df99dSZhang Chen }
46291fe9a8dSRao, Lei
46391fe9a8dSRao, Lei if (migrate_auto_converge()) {
46491fe9a8dSRao, Lei mig_throttle_counter_reset();
46591fe9a8dSRao, Lei }
4663f6df99dSZhang Chen /*
4673f6df99dSZhang Chen * Only save VM's live state, which not including device state.
4683f6df99dSZhang Chen * TODO: We may need a timeout mechanism to prevent COLO process
4693f6df99dSZhang Chen * to be blocked here.
4703f6df99dSZhang Chen */
4713f6df99dSZhang Chen qemu_savevm_live_state(s->to_dst_file);
4723f6df99dSZhang Chen
4733f6df99dSZhang Chen qemu_fflush(fb);
4743f6df99dSZhang Chen
475a91246c9Szhanghailiang /*
476a91246c9Szhanghailiang * We need the size of the VMstate data in Secondary side,
477a91246c9Szhanghailiang * With which we can decide how much data should be read.
478a91246c9Szhanghailiang */
479a91246c9Szhanghailiang colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
480a91246c9Szhanghailiang bioc->usage, &local_err);
481a91246c9Szhanghailiang if (local_err) {
482a91246c9Szhanghailiang goto out;
483a91246c9Szhanghailiang }
4844f97558eSzhanghailiang
485a91246c9Szhanghailiang qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
486be07a0edSJuan Quintela ret = qemu_fflush(s->to_dst_file);
487a91246c9Szhanghailiang if (ret < 0) {
488a91246c9Szhanghailiang goto out;
489a91246c9Szhanghailiang }
4904f97558eSzhanghailiang
4914f97558eSzhanghailiang colo_receive_check_message(s->rp_state.from_dst_file,
4924f97558eSzhanghailiang COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
4934f97558eSzhanghailiang if (local_err) {
4944f97558eSzhanghailiang goto out;
4954f97558eSzhanghailiang }
4964f97558eSzhanghailiang
4974fa8ed25SLukas Straub qemu_event_reset(&s->colo_checkpoint_event);
4984fa8ed25SLukas Straub colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
4994fa8ed25SLukas Straub if (local_err) {
5004fa8ed25SLukas Straub goto out;
5014fa8ed25SLukas Straub }
5024fa8ed25SLukas Straub
5034f97558eSzhanghailiang colo_receive_check_message(s->rp_state.from_dst_file,
5044f97558eSzhanghailiang COLO_MESSAGE_VMSTATE_LOADED, &local_err);
5054f97558eSzhanghailiang if (local_err) {
5064f97558eSzhanghailiang goto out;
5074f97558eSzhanghailiang }
5084f97558eSzhanghailiang
509a91246c9Szhanghailiang ret = 0;
5104f97558eSzhanghailiang
511195801d7SStefan Hajnoczi bql_lock();
512a91246c9Szhanghailiang vm_start();
513195801d7SStefan Hajnoczi bql_unlock();
514a91246c9Szhanghailiang trace_colo_vm_state_change("stop", "run");
515a91246c9Szhanghailiang
5164f97558eSzhanghailiang out:
5174f97558eSzhanghailiang if (local_err) {
5184f97558eSzhanghailiang error_report_err(local_err);
5194f97558eSzhanghailiang }
520a91246c9Szhanghailiang return ret;
5214f97558eSzhanghailiang }
5224f97558eSzhanghailiang
colo_compare_notify_checkpoint(Notifier * notifier,void * data)523131b2153SZhang Chen static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
524131b2153SZhang Chen {
5257395127fSSteve Sistare colo_checkpoint_notify();
526131b2153SZhang Chen }
527131b2153SZhang Chen
colo_process_checkpoint(MigrationState * s)5280b827d5eSzhanghailiang static void colo_process_checkpoint(MigrationState *s)
5290b827d5eSzhanghailiang {
530a91246c9Szhanghailiang QIOChannelBuffer *bioc;
531a91246c9Szhanghailiang QEMUFile *fb = NULL;
5324f97558eSzhanghailiang Error *local_err = NULL;
5334f97558eSzhanghailiang int ret;
5344f97558eSzhanghailiang
5352b9f6bf3SRao, Lei if (get_colo_mode() != COLO_MODE_PRIMARY) {
5365ed0decaSZhang Chen error_report("COLO mode must be COLO_MODE_PRIMARY");
5375ed0decaSZhang Chen return;
5385ed0decaSZhang Chen }
5395ed0decaSZhang Chen
540aef06085Szhanghailiang failover_init_state();
541aef06085Szhanghailiang
54256ba83d2Szhanghailiang s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
54356ba83d2Szhanghailiang if (!s->rp_state.from_dst_file) {
54456ba83d2Szhanghailiang error_report("Open QEMUFile from_dst_file failed");
54556ba83d2Szhanghailiang goto out;
54656ba83d2Szhanghailiang }
54756ba83d2Szhanghailiang
548131b2153SZhang Chen packets_compare_notifier.notify = colo_compare_notify_checkpoint;
549131b2153SZhang Chen colo_compare_register_notifier(&packets_compare_notifier);
550131b2153SZhang Chen
5514f97558eSzhanghailiang /*
5524f97558eSzhanghailiang * Wait for Secondary finish loading VM states and enter COLO
5534f97558eSzhanghailiang * restore.
5544f97558eSzhanghailiang */
5554f97558eSzhanghailiang colo_receive_check_message(s->rp_state.from_dst_file,
5564f97558eSzhanghailiang COLO_MESSAGE_CHECKPOINT_READY, &local_err);
5574f97558eSzhanghailiang if (local_err) {
5584f97558eSzhanghailiang goto out;
5594f97558eSzhanghailiang }
560a91246c9Szhanghailiang bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
56177ef2dc1SDaniel P. Berrangé fb = qemu_file_new_output(QIO_CHANNEL(bioc));
562a91246c9Szhanghailiang object_unref(OBJECT(bioc));
5634f97558eSzhanghailiang
564195801d7SStefan Hajnoczi bql_lock();
5658e48ac95SZhang Chen replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
5668e48ac95SZhang Chen if (local_err) {
567195801d7SStefan Hajnoczi bql_unlock();
5688e48ac95SZhang Chen goto out;
5698e48ac95SZhang Chen }
5708e48ac95SZhang Chen
5710b827d5eSzhanghailiang vm_start();
572195801d7SStefan Hajnoczi bql_unlock();
5730b827d5eSzhanghailiang trace_colo_vm_state_change("stop", "run");
5740b827d5eSzhanghailiang
5750e0f0479SZhang Chen timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
576f94a858fSJuan Quintela migrate_checkpoint_delay());
577479125d5Szhanghailiang
5784f97558eSzhanghailiang while (s->state == MIGRATION_STATUS_COLO) {
579b3f7f0c5Szhanghailiang if (failover_get_state() != FAILOVER_STATUS_NONE) {
580b3f7f0c5Szhanghailiang error_report("failover request");
581b3f7f0c5Szhanghailiang goto out;
582b3f7f0c5Szhanghailiang }
583b3f7f0c5Szhanghailiang
584bb70b66eSLukas Straub qemu_event_wait(&s->colo_checkpoint_event);
58518cc23d7Szhanghailiang
5862518aec1Szhanghailiang if (s->state != MIGRATION_STATUS_COLO) {
5872518aec1Szhanghailiang goto out;
5882518aec1Szhanghailiang }
589a91246c9Szhanghailiang ret = colo_do_checkpoint_transaction(s, bioc, fb);
5904f97558eSzhanghailiang if (ret < 0) {
5914f97558eSzhanghailiang goto out;
5924f97558eSzhanghailiang }
5934f97558eSzhanghailiang }
5940b827d5eSzhanghailiang
59556ba83d2Szhanghailiang out:
5964f97558eSzhanghailiang /* Throw the unreported error message after exited from loop */
5974f97558eSzhanghailiang if (local_err) {
5984f97558eSzhanghailiang error_report_err(local_err);
5994f97558eSzhanghailiang }
6004f97558eSzhanghailiang
601a91246c9Szhanghailiang if (fb) {
602a91246c9Szhanghailiang qemu_fclose(fb);
603a91246c9Szhanghailiang }
604a91246c9Szhanghailiang
6059ecff6d6Szhanghailiang /*
6069ecff6d6Szhanghailiang * There are only two reasons we can get here, some error happened
6079ecff6d6Szhanghailiang * or the user triggered failover.
6089ecff6d6Szhanghailiang */
6099ecff6d6Szhanghailiang switch (failover_get_state()) {
6101fe6ab26SZhang Chen case FAILOVER_STATUS_COMPLETED:
6119ecff6d6Szhanghailiang qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
6129ecff6d6Szhanghailiang COLO_EXIT_REASON_REQUEST);
6139ecff6d6Szhanghailiang break;
6149ecff6d6Szhanghailiang default:
6153a43ac47SZhang Chen qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
6163a43ac47SZhang Chen COLO_EXIT_REASON_ERROR);
6179ecff6d6Szhanghailiang }
6189ecff6d6Szhanghailiang
619c937b9a6Szhanghailiang /* Hope this not to be too long to wait here */
620c937b9a6Szhanghailiang qemu_sem_wait(&s->colo_exit_sem);
621c937b9a6Szhanghailiang qemu_sem_destroy(&s->colo_exit_sem);
622131b2153SZhang Chen
623131b2153SZhang Chen /*
624131b2153SZhang Chen * It is safe to unregister notifier after failover finished.
625131b2153SZhang Chen * Besides, colo_delay_timer and colo_checkpoint_sem can't be
6263a4452d8Szhaolichang * released before unregister notifier, or there will be use-after-free
627131b2153SZhang Chen * error.
628131b2153SZhang Chen */
629131b2153SZhang Chen colo_compare_unregister_notifier(&packets_compare_notifier);
630131b2153SZhang Chen timer_free(s->colo_delay_timer);
631bb70b66eSLukas Straub qemu_event_destroy(&s->colo_checkpoint_event);
632131b2153SZhang Chen
633c937b9a6Szhanghailiang /*
634c937b9a6Szhanghailiang * Must be called after failover BH is completed,
635c937b9a6Szhanghailiang * Or the failover BH may shutdown the wrong fd that
636c937b9a6Szhanghailiang * re-used by other threads after we release here.
637c937b9a6Szhanghailiang */
63856ba83d2Szhanghailiang if (s->rp_state.from_dst_file) {
63956ba83d2Szhanghailiang qemu_fclose(s->rp_state.from_dst_file);
640ac183dacSRao, Lei s->rp_state.from_dst_file = NULL;
64156ba83d2Szhanghailiang }
6420b827d5eSzhanghailiang }
6430b827d5eSzhanghailiang
migrate_start_colo_process(MigrationState * s)6440b827d5eSzhanghailiang void migrate_start_colo_process(MigrationState *s)
6450b827d5eSzhanghailiang {
646195801d7SStefan Hajnoczi bql_unlock();
647bb70b66eSLukas Straub qemu_event_init(&s->colo_checkpoint_event, false);
648479125d5Szhanghailiang s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
6497395127fSSteve Sistare colo_checkpoint_notify_timer, NULL);
650479125d5Szhanghailiang
651c937b9a6Szhanghailiang qemu_sem_init(&s->colo_exit_sem, 0);
6520b827d5eSzhanghailiang colo_process_checkpoint(s);
653195801d7SStefan Hajnoczi bql_lock();
6540b827d5eSzhanghailiang }
65525d0c16fSzhanghailiang
colo_incoming_process_checkpoint(MigrationIncomingState * mis,QEMUFile * fb,QIOChannelBuffer * bioc,Error ** errp)6566ad8ad38Szhanghailiang static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
6576ad8ad38Szhanghailiang QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
6586ad8ad38Szhanghailiang {
6596ad8ad38Szhanghailiang uint64_t total_size;
6606ad8ad38Szhanghailiang uint64_t value;
6616ad8ad38Szhanghailiang Error *local_err = NULL;
6626ad8ad38Szhanghailiang int ret;
6636ad8ad38Szhanghailiang
664195801d7SStefan Hajnoczi bql_lock();
6656ad8ad38Szhanghailiang vm_stop_force_state(RUN_STATE_COLO);
666195801d7SStefan Hajnoczi bql_unlock();
6679c5c8ff2SRao, Lei trace_colo_vm_state_change("run", "stop");
6686ad8ad38Szhanghailiang
6696ad8ad38Szhanghailiang /* FIXME: This is unnecessary for periodic checkpoint mode */
6706ad8ad38Szhanghailiang colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
6716ad8ad38Szhanghailiang &local_err);
6726ad8ad38Szhanghailiang if (local_err) {
6736ad8ad38Szhanghailiang error_propagate(errp, local_err);
6746ad8ad38Szhanghailiang return;
6756ad8ad38Szhanghailiang }
6766ad8ad38Szhanghailiang
6776ad8ad38Szhanghailiang colo_receive_check_message(mis->from_src_file,
6786ad8ad38Szhanghailiang COLO_MESSAGE_VMSTATE_SEND, &local_err);
6796ad8ad38Szhanghailiang if (local_err) {
6806ad8ad38Szhanghailiang error_propagate(errp, local_err);
6816ad8ad38Szhanghailiang return;
6826ad8ad38Szhanghailiang }
6836ad8ad38Szhanghailiang
684195801d7SStefan Hajnoczi bql_lock();
685786d8b8eSLukas Straub cpu_synchronize_all_states();
6866ad8ad38Szhanghailiang ret = qemu_loadvm_state_main(mis->from_src_file, mis);
687195801d7SStefan Hajnoczi bql_unlock();
6886ad8ad38Szhanghailiang
6896ad8ad38Szhanghailiang if (ret < 0) {
6906ad8ad38Szhanghailiang error_setg(errp, "Load VM's live state (ram) error");
6916ad8ad38Szhanghailiang return;
6926ad8ad38Szhanghailiang }
6936ad8ad38Szhanghailiang
6946ad8ad38Szhanghailiang value = colo_receive_message_value(mis->from_src_file,
6956ad8ad38Szhanghailiang COLO_MESSAGE_VMSTATE_SIZE, &local_err);
6966ad8ad38Szhanghailiang if (local_err) {
6976ad8ad38Szhanghailiang error_propagate(errp, local_err);
6986ad8ad38Szhanghailiang return;
6996ad8ad38Szhanghailiang }
7006ad8ad38Szhanghailiang
7016ad8ad38Szhanghailiang /*
7026ad8ad38Szhanghailiang * Read VM device state data into channel buffer,
7036ad8ad38Szhanghailiang * It's better to re-use the memory allocated.
7046ad8ad38Szhanghailiang * Here we need to handle the channel buffer directly.
7056ad8ad38Szhanghailiang */
7066ad8ad38Szhanghailiang if (value > bioc->capacity) {
7076ad8ad38Szhanghailiang bioc->capacity = value;
7086ad8ad38Szhanghailiang bioc->data = g_realloc(bioc->data, bioc->capacity);
7096ad8ad38Szhanghailiang }
7106ad8ad38Szhanghailiang total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
7116ad8ad38Szhanghailiang if (total_size != value) {
7126ad8ad38Szhanghailiang error_setg(errp, "Got %" PRIu64 " VMState data, less than expected"
7136ad8ad38Szhanghailiang " %" PRIu64, total_size, value);
7146ad8ad38Szhanghailiang return;
7156ad8ad38Szhanghailiang }
7166ad8ad38Szhanghailiang bioc->usage = total_size;
7176ad8ad38Szhanghailiang qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
7186ad8ad38Szhanghailiang
7196ad8ad38Szhanghailiang colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
7206ad8ad38Szhanghailiang &local_err);
7216ad8ad38Szhanghailiang if (local_err) {
7226ad8ad38Szhanghailiang error_propagate(errp, local_err);
7236ad8ad38Szhanghailiang return;
7246ad8ad38Szhanghailiang }
7256ad8ad38Szhanghailiang
726195801d7SStefan Hajnoczi bql_lock();
7276ad8ad38Szhanghailiang vmstate_loading = true;
72824fa16f8SLukas Straub colo_flush_ram_cache();
7296ad8ad38Szhanghailiang ret = qemu_load_device_state(fb);
7306ad8ad38Szhanghailiang if (ret < 0) {
7316ad8ad38Szhanghailiang error_setg(errp, "COLO: load device state failed");
73292c932deSLukas Straub vmstate_loading = false;
733195801d7SStefan Hajnoczi bql_unlock();
7346ad8ad38Szhanghailiang return;
7356ad8ad38Szhanghailiang }
7366ad8ad38Szhanghailiang
7376ad8ad38Szhanghailiang replication_get_error_all(&local_err);
7386ad8ad38Szhanghailiang if (local_err) {
7396ad8ad38Szhanghailiang error_propagate(errp, local_err);
74092c932deSLukas Straub vmstate_loading = false;
741195801d7SStefan Hajnoczi bql_unlock();
7426ad8ad38Szhanghailiang return;
7436ad8ad38Szhanghailiang }
7446ad8ad38Szhanghailiang
7456ad8ad38Szhanghailiang /* discard colo disk buffer */
7466ad8ad38Szhanghailiang replication_do_checkpoint_all(&local_err);
7476ad8ad38Szhanghailiang if (local_err) {
7486ad8ad38Szhanghailiang error_propagate(errp, local_err);
74992c932deSLukas Straub vmstate_loading = false;
750195801d7SStefan Hajnoczi bql_unlock();
7516ad8ad38Szhanghailiang return;
7526ad8ad38Szhanghailiang }
7536ad8ad38Szhanghailiang /* Notify all filters of all NIC to do checkpoint */
7546ad8ad38Szhanghailiang colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
7556ad8ad38Szhanghailiang
7566ad8ad38Szhanghailiang if (local_err) {
7576ad8ad38Szhanghailiang error_propagate(errp, local_err);
75892c932deSLukas Straub vmstate_loading = false;
759195801d7SStefan Hajnoczi bql_unlock();
7606ad8ad38Szhanghailiang return;
7616ad8ad38Szhanghailiang }
7626ad8ad38Szhanghailiang
7636ad8ad38Szhanghailiang vmstate_loading = false;
7646ad8ad38Szhanghailiang vm_start();
765195801d7SStefan Hajnoczi bql_unlock();
7669c5c8ff2SRao, Lei trace_colo_vm_state_change("stop", "run");
7676ad8ad38Szhanghailiang
7686ad8ad38Szhanghailiang if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
7696ad8ad38Szhanghailiang return;
7706ad8ad38Szhanghailiang }
7716ad8ad38Szhanghailiang
7726ad8ad38Szhanghailiang colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
7736ad8ad38Szhanghailiang &local_err);
7746ad8ad38Szhanghailiang error_propagate(errp, local_err);
7756ad8ad38Szhanghailiang }
7766ad8ad38Szhanghailiang
colo_wait_handle_message(MigrationIncomingState * mis,QEMUFile * fb,QIOChannelBuffer * bioc,Error ** errp)7776ad8ad38Szhanghailiang static void colo_wait_handle_message(MigrationIncomingState *mis,
7786ad8ad38Szhanghailiang QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
7794f97558eSzhanghailiang {
7804f97558eSzhanghailiang COLOMessage msg;
7814f97558eSzhanghailiang Error *local_err = NULL;
7824f97558eSzhanghailiang
7836ad8ad38Szhanghailiang msg = colo_receive_message(mis->from_src_file, &local_err);
7844f97558eSzhanghailiang if (local_err) {
7854f97558eSzhanghailiang error_propagate(errp, local_err);
7864f97558eSzhanghailiang return;
7874f97558eSzhanghailiang }
7884f97558eSzhanghailiang
7894f97558eSzhanghailiang switch (msg) {
7904f97558eSzhanghailiang case COLO_MESSAGE_CHECKPOINT_REQUEST:
7916ad8ad38Szhanghailiang colo_incoming_process_checkpoint(mis, fb, bioc, errp);
7924f97558eSzhanghailiang break;
7934f97558eSzhanghailiang default:
7944f97558eSzhanghailiang error_setg(errp, "Got unknown COLO message: %d", msg);
7954f97558eSzhanghailiang break;
7964f97558eSzhanghailiang }
7974f97558eSzhanghailiang }
7984f97558eSzhanghailiang
colo_shutdown(void)799795969abSRao, Lei void colo_shutdown(void)
800795969abSRao, Lei {
801795969abSRao, Lei MigrationIncomingState *mis = NULL;
802795969abSRao, Lei MigrationState *s = NULL;
803795969abSRao, Lei
804795969abSRao, Lei switch (get_colo_mode()) {
805795969abSRao, Lei case COLO_MODE_PRIMARY:
806795969abSRao, Lei s = migrate_get_current();
807795969abSRao, Lei qemu_event_set(&s->colo_checkpoint_event);
808795969abSRao, Lei qemu_sem_post(&s->colo_exit_sem);
809795969abSRao, Lei break;
810795969abSRao, Lei case COLO_MODE_SECONDARY:
811795969abSRao, Lei mis = migration_incoming_get_current();
812795969abSRao, Lei qemu_sem_post(&mis->colo_incoming_sem);
813795969abSRao, Lei break;
814795969abSRao, Lei default:
815795969abSRao, Lei break;
816795969abSRao, Lei }
817795969abSRao, Lei }
818795969abSRao, Lei
colo_process_incoming_thread(void * opaque)819d0a14a2bSVladimir Sementsov-Ogievskiy static void *colo_process_incoming_thread(void *opaque)
82025d0c16fSzhanghailiang {
82125d0c16fSzhanghailiang MigrationIncomingState *mis = opaque;
8224291d372Szhanghailiang QEMUFile *fb = NULL;
8234291d372Szhanghailiang QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
8244f97558eSzhanghailiang Error *local_err = NULL;
82525d0c16fSzhanghailiang
82674637e6fSLidong Chen rcu_register_thread();
827c937b9a6Szhanghailiang qemu_sem_init(&mis->colo_incoming_sem, 0);
828c937b9a6Szhanghailiang
82925d0c16fSzhanghailiang migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
83025d0c16fSzhanghailiang MIGRATION_STATUS_COLO);
83125d0c16fSzhanghailiang
8322b9f6bf3SRao, Lei if (get_colo_mode() != COLO_MODE_SECONDARY) {
8335ed0decaSZhang Chen error_report("COLO mode must be COLO_MODE_SECONDARY");
8345ed0decaSZhang Chen return NULL;
8355ed0decaSZhang Chen }
8365ed0decaSZhang Chen
8372cc637f1SLi Zhijian /* Make sure all file formats throw away their mutable metadata */
8382cc637f1SLi Zhijian bql_lock();
8392cc637f1SLi Zhijian bdrv_activate_all(&local_err);
8402cc637f1SLi Zhijian bql_unlock();
8413dc27facSLi Zhijian if (local_err) {
8422cc637f1SLi Zhijian error_report_err(local_err);
8432cc637f1SLi Zhijian return NULL;
8442cc637f1SLi Zhijian }
8452cc637f1SLi Zhijian
846aef06085Szhanghailiang failover_init_state();
847aef06085Szhanghailiang
84856ba83d2Szhanghailiang mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
84956ba83d2Szhanghailiang if (!mis->to_src_file) {
85056ba83d2Szhanghailiang error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
85156ba83d2Szhanghailiang goto out;
85256ba83d2Szhanghailiang }
85356ba83d2Szhanghailiang /*
85456ba83d2Szhanghailiang * Note: the communication between Primary side and Secondary side
85556ba83d2Szhanghailiang * should be sequential, we set the fd to unblocked in migration incoming
85656ba83d2Szhanghailiang * coroutine, and here we are in the COLO incoming thread, so it is ok to
85756ba83d2Szhanghailiang * set the fd back to blocked.
85856ba83d2Szhanghailiang */
85956ba83d2Szhanghailiang qemu_file_set_blocking(mis->from_src_file, true);
86056ba83d2Szhanghailiang
8610393031aSzhanghailiang colo_incoming_start_dirty_log();
8620393031aSzhanghailiang
8634291d372Szhanghailiang bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
86477ef2dc1SDaniel P. Berrangé fb = qemu_file_new_input(QIO_CHANNEL(bioc));
8654291d372Szhanghailiang object_unref(OBJECT(bioc));
8664291d372Szhanghailiang
867195801d7SStefan Hajnoczi bql_lock();
8688e48ac95SZhang Chen replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
8698e48ac95SZhang Chen if (local_err) {
870195801d7SStefan Hajnoczi bql_unlock();
8718e48ac95SZhang Chen goto out;
8728e48ac95SZhang Chen }
873131b2153SZhang Chen vm_start();
874195801d7SStefan Hajnoczi bql_unlock();
8759c5c8ff2SRao, Lei trace_colo_vm_state_change("stop", "run");
876131b2153SZhang Chen
8774f97558eSzhanghailiang colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
8784f97558eSzhanghailiang &local_err);
8794f97558eSzhanghailiang if (local_err) {
8804f97558eSzhanghailiang goto out;
8814f97558eSzhanghailiang }
8824f97558eSzhanghailiang
8834f97558eSzhanghailiang while (mis->state == MIGRATION_STATUS_COLO) {
8846ad8ad38Szhanghailiang colo_wait_handle_message(mis, fb, bioc, &local_err);
8854f97558eSzhanghailiang if (local_err) {
8866ad8ad38Szhanghailiang error_report_err(local_err);
8876ad8ad38Szhanghailiang break;
8884f97558eSzhanghailiang }
88992c932deSLukas Straub
89092c932deSLukas Straub if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
89192c932deSLukas Straub failover_set_state(FAILOVER_STATUS_RELAUNCH,
89292c932deSLukas Straub FAILOVER_STATUS_NONE);
89392c932deSLukas Straub failover_request_active(NULL);
89492c932deSLukas Straub break;
89592c932deSLukas Straub }
89692c932deSLukas Straub
8979d2db376Szhanghailiang if (failover_get_state() != FAILOVER_STATUS_NONE) {
8989d2db376Szhanghailiang error_report("failover request");
8996ad8ad38Szhanghailiang break;
9004f97558eSzhanghailiang }
9014f97558eSzhanghailiang }
90225d0c16fSzhanghailiang
90356ba83d2Szhanghailiang out:
9043a43ac47SZhang Chen /*
9053a43ac47SZhang Chen * There are only two reasons we can get here, some error happened
9063a43ac47SZhang Chen * or the user triggered failover.
9073a43ac47SZhang Chen */
9089ecff6d6Szhanghailiang switch (failover_get_state()) {
9091fe6ab26SZhang Chen case FAILOVER_STATUS_COMPLETED:
9109ecff6d6Szhanghailiang qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
9119ecff6d6Szhanghailiang COLO_EXIT_REASON_REQUEST);
9129ecff6d6Szhanghailiang break;
9139ecff6d6Szhanghailiang default:
9143a43ac47SZhang Chen qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
9153a43ac47SZhang Chen COLO_EXIT_REASON_ERROR);
9169ecff6d6Szhanghailiang }
9179ecff6d6Szhanghailiang
9184291d372Szhanghailiang if (fb) {
9194291d372Szhanghailiang qemu_fclose(fb);
9204291d372Szhanghailiang }
9214291d372Szhanghailiang
922c937b9a6Szhanghailiang /* Hope this not to be too long to loop here */
923c937b9a6Szhanghailiang qemu_sem_wait(&mis->colo_incoming_sem);
924c937b9a6Szhanghailiang qemu_sem_destroy(&mis->colo_incoming_sem);
92525d0c16fSzhanghailiang
92674637e6fSLidong Chen rcu_unregister_thread();
92725d0c16fSzhanghailiang return NULL;
92825d0c16fSzhanghailiang }
929d0a14a2bSVladimir Sementsov-Ogievskiy
colo_incoming_co(void)930787ea49eSLi Zhijian void coroutine_fn colo_incoming_co(void)
931d0a14a2bSVladimir Sementsov-Ogievskiy {
932d0a14a2bSVladimir Sementsov-Ogievskiy MigrationIncomingState *mis = migration_incoming_get_current();
933d0a14a2bSVladimir Sementsov-Ogievskiy QemuThread th;
934d0a14a2bSVladimir Sementsov-Ogievskiy
935195801d7SStefan Hajnoczi assert(bql_locked());
936787ea49eSLi Zhijian assert(migration_incoming_colo_enabled());
937d0a14a2bSVladimir Sementsov-Ogievskiy
938*e620b1e4SPeter Xu qemu_thread_create(&th, MIGRATION_THREAD_DST_COLO,
939*e620b1e4SPeter Xu colo_process_incoming_thread,
940d0a14a2bSVladimir Sementsov-Ogievskiy mis, QEMU_THREAD_JOINABLE);
941d0a14a2bSVladimir Sementsov-Ogievskiy
942d0a14a2bSVladimir Sementsov-Ogievskiy mis->colo_incoming_co = qemu_coroutine_self();
943d0a14a2bSVladimir Sementsov-Ogievskiy qemu_coroutine_yield();
944d0a14a2bSVladimir Sementsov-Ogievskiy mis->colo_incoming_co = NULL;
945d0a14a2bSVladimir Sementsov-Ogievskiy
946195801d7SStefan Hajnoczi bql_unlock();
947d0a14a2bSVladimir Sementsov-Ogievskiy /* Wait checkpoint incoming thread exit before free resource */
948d0a14a2bSVladimir Sementsov-Ogievskiy qemu_thread_join(&th);
949195801d7SStefan Hajnoczi bql_lock();
950d0a14a2bSVladimir Sementsov-Ogievskiy
951a4a411fbSStefan Hajnoczi /* We hold the global BQL, so it is safe here */
952d0a14a2bSVladimir Sementsov-Ogievskiy colo_release_ram_cache();
953d0a14a2bSVladimir Sementsov-Ogievskiy }
954