xref: /openbmc/qemu/migration/colo.c (revision cbad45511840077dafb6e1d1bc2e228baabecff5)
135a6ed4fSzhanghailiang /*
235a6ed4fSzhanghailiang  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
335a6ed4fSzhanghailiang  * (a.k.a. Fault Tolerance or Continuous Replication)
435a6ed4fSzhanghailiang  *
535a6ed4fSzhanghailiang  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
635a6ed4fSzhanghailiang  * Copyright (c) 2016 FUJITSU LIMITED
735a6ed4fSzhanghailiang  * Copyright (c) 2016 Intel Corporation
835a6ed4fSzhanghailiang  *
935a6ed4fSzhanghailiang  * This work is licensed under the terms of the GNU GPL, version 2 or
1035a6ed4fSzhanghailiang  * later.  See the COPYING file in the top-level directory.
1135a6ed4fSzhanghailiang  */
1235a6ed4fSzhanghailiang 
1335a6ed4fSzhanghailiang #include "qemu/osdep.h"
140b827d5eSzhanghailiang #include "sysemu/sysemu.h"
15e688df6bSMarkus Armbruster #include "qapi/error.h"
169af23989SMarkus Armbruster #include "qapi/qapi-commands-migration.h"
176666c96aSJuan Quintela #include "migration.h"
1808a0aee1SJuan Quintela #include "qemu-file.h"
1920a519a0SJuan Quintela #include "savevm.h"
2035a6ed4fSzhanghailiang #include "migration/colo.h"
21a91246c9Szhanghailiang #include "io/channel-buffer.h"
220b827d5eSzhanghailiang #include "trace.h"
2356ba83d2Szhanghailiang #include "qemu/error-report.h"
24db725815SMarkus Armbruster #include "qemu/main-loop.h"
25d4842052SMarkus Armbruster #include "qemu/rcu.h"
26d89e666eSzhanghailiang #include "migration/failover.h"
270393031aSzhanghailiang #include "migration/ram.h"
28b0262955SPaolo Bonzini #include "block/replication.h"
29131b2153SZhang Chen #include "net/colo-compare.h"
30131b2153SZhang Chen #include "net/colo.h"
318e48ac95SZhang Chen #include "block/block.h"
329ecff6d6Szhanghailiang #include "qapi/qapi-events-migration.h"
333f6df99dSZhang Chen #include "sysemu/cpus.h"
3454d31236SMarkus Armbruster #include "sysemu/runstate.h"
357b343530Szhanghailiang #include "net/filter.h"
361f0776f1SJuan Quintela #include "options.h"
3735a6ed4fSzhanghailiang 
38a8664ba5Szhanghailiang static bool vmstate_loading;
39131b2153SZhang Chen static Notifier packets_compare_notifier;
40a8664ba5Szhanghailiang 
415ed0decaSZhang Chen /* User need to know colo mode after COLO failover */
425ed0decaSZhang Chen static COLOMode last_colo_mode;
435ed0decaSZhang Chen 
44a91246c9Szhanghailiang #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
45a91246c9Szhanghailiang 
migration_in_colo_state(void)460b827d5eSzhanghailiang bool migration_in_colo_state(void)
470b827d5eSzhanghailiang {
480b827d5eSzhanghailiang     MigrationState *s = migrate_get_current();
490b827d5eSzhanghailiang 
500b827d5eSzhanghailiang     return (s->state == MIGRATION_STATUS_COLO);
510b827d5eSzhanghailiang }
520b827d5eSzhanghailiang 
migration_incoming_in_colo_state(void)5325d0c16fSzhanghailiang bool migration_incoming_in_colo_state(void)
5425d0c16fSzhanghailiang {
5525d0c16fSzhanghailiang     MigrationIncomingState *mis = migration_incoming_get_current();
5625d0c16fSzhanghailiang 
5725d0c16fSzhanghailiang     return mis && (mis->state == MIGRATION_STATUS_COLO);
5825d0c16fSzhanghailiang }
5925d0c16fSzhanghailiang 
colo_runstate_is_stopped(void)60b3f7f0c5Szhanghailiang static bool colo_runstate_is_stopped(void)
61b3f7f0c5Szhanghailiang {
62b3f7f0c5Szhanghailiang     return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
63b3f7f0c5Szhanghailiang }
64b3f7f0c5Szhanghailiang 
colo_checkpoint_notify(void)657395127fSSteve Sistare static void colo_checkpoint_notify(void)
664332ffcdSVladimir Sementsov-Ogievskiy {
677395127fSSteve Sistare     MigrationState *s = migrate_get_current();
684332ffcdSVladimir Sementsov-Ogievskiy     int64_t next_notify_time;
694332ffcdSVladimir Sementsov-Ogievskiy 
704332ffcdSVladimir Sementsov-Ogievskiy     qemu_event_set(&s->colo_checkpoint_event);
714332ffcdSVladimir Sementsov-Ogievskiy     s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
724332ffcdSVladimir Sementsov-Ogievskiy     next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
734332ffcdSVladimir Sementsov-Ogievskiy     timer_mod(s->colo_delay_timer, next_notify_time);
744332ffcdSVladimir Sementsov-Ogievskiy }
754332ffcdSVladimir Sementsov-Ogievskiy 
colo_checkpoint_notify_timer(void * opaque)767395127fSSteve Sistare static void colo_checkpoint_notify_timer(void *opaque)
777395127fSSteve Sistare {
787395127fSSteve Sistare     colo_checkpoint_notify();
797395127fSSteve Sistare }
807395127fSSteve Sistare 
colo_checkpoint_delay_set(void)814332ffcdSVladimir Sementsov-Ogievskiy void colo_checkpoint_delay_set(void)
824332ffcdSVladimir Sementsov-Ogievskiy {
834332ffcdSVladimir Sementsov-Ogievskiy     if (migration_in_colo_state()) {
847395127fSSteve Sistare         colo_checkpoint_notify();
854332ffcdSVladimir Sementsov-Ogievskiy     }
864332ffcdSVladimir Sementsov-Ogievskiy }
874332ffcdSVladimir Sementsov-Ogievskiy 
secondary_vm_do_failover(void)889d2db376Szhanghailiang static void secondary_vm_do_failover(void)
899d2db376Szhanghailiang {
903ebb9c4fSZhang Chen /* COLO needs enable block-replication */
919d2db376Szhanghailiang     int old_state;
929d2db376Szhanghailiang     MigrationIncomingState *mis = migration_incoming_get_current();
938e48ac95SZhang Chen     Error *local_err = NULL;
949d2db376Szhanghailiang 
95a8664ba5Szhanghailiang     /* Can not do failover during the process of VM's loading VMstate, Or
96a8664ba5Szhanghailiang      * it will break the secondary VM.
97a8664ba5Szhanghailiang      */
98a8664ba5Szhanghailiang     if (vmstate_loading) {
99a8664ba5Szhanghailiang         old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
100a8664ba5Szhanghailiang                         FAILOVER_STATUS_RELAUNCH);
101a8664ba5Szhanghailiang         if (old_state != FAILOVER_STATUS_ACTIVE) {
102a8664ba5Szhanghailiang             error_report("Unknown error while do failover for secondary VM,"
103977c736fSMarkus Armbruster                          "old_state: %s", FailoverStatus_str(old_state));
104a8664ba5Szhanghailiang         }
105a8664ba5Szhanghailiang         return;
106a8664ba5Szhanghailiang     }
107a8664ba5Szhanghailiang 
1089d2db376Szhanghailiang     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
1099d2db376Szhanghailiang                       MIGRATION_STATUS_COMPLETED);
1109d2db376Szhanghailiang 
1118e48ac95SZhang Chen     replication_stop_all(true, &local_err);
1128e48ac95SZhang Chen     if (local_err) {
1138e48ac95SZhang Chen         error_report_err(local_err);
11427d07fcfSVladimir Sementsov-Ogievskiy         local_err = NULL;
1158e48ac95SZhang Chen     }
1168e48ac95SZhang Chen 
1177b343530Szhanghailiang     /* Notify all filters of all NIC to do checkpoint */
1187b343530Szhanghailiang     colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
1197b343530Szhanghailiang     if (local_err) {
1207b343530Szhanghailiang         error_report_err(local_err);
1217b343530Szhanghailiang     }
1227b343530Szhanghailiang 
1239d2db376Szhanghailiang     if (!autostart) {
1249d2db376Szhanghailiang         error_report("\"-S\" qemu option will be ignored in secondary side");
1259d2db376Szhanghailiang         /* recover runstate to normal migration finish state */
1269d2db376Szhanghailiang         autostart = true;
1279d2db376Szhanghailiang     }
128c937b9a6Szhanghailiang     /*
129c937b9a6Szhanghailiang      * Make sure COLO incoming thread not block in recv or send,
130c937b9a6Szhanghailiang      * If mis->from_src_file and mis->to_src_file use the same fd,
131c937b9a6Szhanghailiang      * The second shutdown() will return -1, we ignore this value,
132c937b9a6Szhanghailiang      * It is harmless.
133c937b9a6Szhanghailiang      */
134c937b9a6Szhanghailiang     if (mis->from_src_file) {
135c937b9a6Szhanghailiang         qemu_file_shutdown(mis->from_src_file);
136c937b9a6Szhanghailiang     }
137c937b9a6Szhanghailiang     if (mis->to_src_file) {
138c937b9a6Szhanghailiang         qemu_file_shutdown(mis->to_src_file);
139c937b9a6Szhanghailiang     }
1409d2db376Szhanghailiang 
1419d2db376Szhanghailiang     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
1429d2db376Szhanghailiang                                    FAILOVER_STATUS_COMPLETED);
1439d2db376Szhanghailiang     if (old_state != FAILOVER_STATUS_ACTIVE) {
1449d2db376Szhanghailiang         error_report("Incorrect state (%s) while doing failover for "
145977c736fSMarkus Armbruster                      "secondary VM", FailoverStatus_str(old_state));
1469d2db376Szhanghailiang         return;
1479d2db376Szhanghailiang     }
148c937b9a6Szhanghailiang     /* Notify COLO incoming thread that failover work is finished */
149c937b9a6Szhanghailiang     qemu_sem_post(&mis->colo_incoming_sem);
1501fe6ab26SZhang Chen 
1519d2db376Szhanghailiang     /* For Secondary VM, jump to incoming co */
152dd42ce24SVladimir Sementsov-Ogievskiy     if (mis->colo_incoming_co) {
153dd42ce24SVladimir Sementsov-Ogievskiy         qemu_coroutine_enter(mis->colo_incoming_co);
1549d2db376Szhanghailiang     }
1559d2db376Szhanghailiang }
1569d2db376Szhanghailiang 
primary_vm_do_failover(void)157b3f7f0c5Szhanghailiang static void primary_vm_do_failover(void)
158b3f7f0c5Szhanghailiang {
159b3f7f0c5Szhanghailiang     MigrationState *s = migrate_get_current();
160b3f7f0c5Szhanghailiang     int old_state;
1618e48ac95SZhang Chen     Error *local_err = NULL;
162b3f7f0c5Szhanghailiang 
163b3f7f0c5Szhanghailiang     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
164b3f7f0c5Szhanghailiang                       MIGRATION_STATUS_COMPLETED);
1652518aec1Szhanghailiang     /*
1662518aec1Szhanghailiang      * kick COLO thread which might wait at
1672518aec1Szhanghailiang      * qemu_sem_wait(&s->colo_checkpoint_sem).
1682518aec1Szhanghailiang      */
1697395127fSSteve Sistare     colo_checkpoint_notify();
170b3f7f0c5Szhanghailiang 
171c937b9a6Szhanghailiang     /*
172c937b9a6Szhanghailiang      * Wake up COLO thread which may blocked in recv() or send(),
173c937b9a6Szhanghailiang      * The s->rp_state.from_dst_file and s->to_dst_file may use the
174c937b9a6Szhanghailiang      * same fd, but we still shutdown the fd for twice, it is harmless.
175c937b9a6Szhanghailiang      */
176c937b9a6Szhanghailiang     if (s->to_dst_file) {
177c937b9a6Szhanghailiang         qemu_file_shutdown(s->to_dst_file);
178c937b9a6Szhanghailiang     }
179c937b9a6Szhanghailiang     if (s->rp_state.from_dst_file) {
180c937b9a6Szhanghailiang         qemu_file_shutdown(s->rp_state.from_dst_file);
181c937b9a6Szhanghailiang     }
182c937b9a6Szhanghailiang 
183b3f7f0c5Szhanghailiang     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
184b3f7f0c5Szhanghailiang                                    FAILOVER_STATUS_COMPLETED);
185b3f7f0c5Szhanghailiang     if (old_state != FAILOVER_STATUS_ACTIVE) {
186b3f7f0c5Szhanghailiang         error_report("Incorrect state (%s) while doing failover for Primary VM",
187977c736fSMarkus Armbruster                      FailoverStatus_str(old_state));
188b3f7f0c5Szhanghailiang         return;
189b3f7f0c5Szhanghailiang     }
1908e48ac95SZhang Chen 
1918e48ac95SZhang Chen     replication_stop_all(true, &local_err);
1928e48ac95SZhang Chen     if (local_err) {
1938e48ac95SZhang Chen         error_report_err(local_err);
1948e48ac95SZhang Chen         local_err = NULL;
1958e48ac95SZhang Chen     }
1968e48ac95SZhang Chen 
197c937b9a6Szhanghailiang     /* Notify COLO thread that failover work is finished */
198c937b9a6Szhanghailiang     qemu_sem_post(&s->colo_exit_sem);
199b3f7f0c5Szhanghailiang }
200b3f7f0c5Szhanghailiang 
get_colo_mode(void)201aad555c2SZhang Chen COLOMode get_colo_mode(void)
202aad555c2SZhang Chen {
203aad555c2SZhang Chen     if (migration_in_colo_state()) {
204aad555c2SZhang Chen         return COLO_MODE_PRIMARY;
205aad555c2SZhang Chen     } else if (migration_incoming_in_colo_state()) {
206aad555c2SZhang Chen         return COLO_MODE_SECONDARY;
207aad555c2SZhang Chen     } else {
20841b6b779SZhang Chen         return COLO_MODE_NONE;
209aad555c2SZhang Chen     }
210aad555c2SZhang Chen }
211aad555c2SZhang Chen 
colo_do_failover(void)212c0913d1dSZhang Chen void colo_do_failover(void)
213b3f7f0c5Szhanghailiang {
214b3f7f0c5Szhanghailiang     /* Make sure VM stopped while failover happened. */
215b3f7f0c5Szhanghailiang     if (!colo_runstate_is_stopped()) {
216b3f7f0c5Szhanghailiang         vm_stop_force_state(RUN_STATE_COLO);
217b3f7f0c5Szhanghailiang     }
218b3f7f0c5Szhanghailiang 
2192b9f6bf3SRao, Lei     switch (last_colo_mode = get_colo_mode()) {
22082cd368cSZhang Chen     case COLO_MODE_PRIMARY:
221b3f7f0c5Szhanghailiang         primary_vm_do_failover();
22282cd368cSZhang Chen         break;
22382cd368cSZhang Chen     case COLO_MODE_SECONDARY:
2249d2db376Szhanghailiang         secondary_vm_do_failover();
22582cd368cSZhang Chen         break;
22682cd368cSZhang Chen     default:
22782cd368cSZhang Chen         error_report("colo_do_failover failed because the colo mode"
22882cd368cSZhang Chen                      " could not be obtained");
229b3f7f0c5Szhanghailiang     }
230b3f7f0c5Szhanghailiang }
231b3f7f0c5Szhanghailiang 
qmp_xen_set_replication(bool enable,bool primary,bool has_failover,bool failover,Error ** errp)2322c9639ecSZhang Chen void qmp_xen_set_replication(bool enable, bool primary,
2332c9639ecSZhang Chen                              bool has_failover, bool failover,
2342c9639ecSZhang Chen                              Error **errp)
2352c9639ecSZhang Chen {
2362c9639ecSZhang Chen     ReplicationMode mode = primary ?
2372c9639ecSZhang Chen                            REPLICATION_MODE_PRIMARY :
2382c9639ecSZhang Chen                            REPLICATION_MODE_SECONDARY;
2392c9639ecSZhang Chen 
2402c9639ecSZhang Chen     if (has_failover && enable) {
2412c9639ecSZhang Chen         error_setg(errp, "Parameter 'failover' is only for"
2422c9639ecSZhang Chen                    " stopping replication");
2432c9639ecSZhang Chen         return;
2442c9639ecSZhang Chen     }
2452c9639ecSZhang Chen 
2462c9639ecSZhang Chen     if (enable) {
2472c9639ecSZhang Chen         replication_start_all(mode, errp);
2482c9639ecSZhang Chen     } else {
2492c9639ecSZhang Chen         if (!has_failover) {
2502c9639ecSZhang Chen             failover = NULL;
2512c9639ecSZhang Chen         }
2522c9639ecSZhang Chen         replication_stop_all(failover, failover ? NULL : errp);
2532c9639ecSZhang Chen     }
2542c9639ecSZhang Chen }
2552c9639ecSZhang Chen 
qmp_query_xen_replication_status(Error ** errp)256daa33c52SZhang Chen ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
257daa33c52SZhang Chen {
258daa33c52SZhang Chen     Error *err = NULL;
259daa33c52SZhang Chen     ReplicationStatus *s = g_new0(ReplicationStatus, 1);
260daa33c52SZhang Chen 
261daa33c52SZhang Chen     replication_get_error_all(&err);
262daa33c52SZhang Chen     if (err) {
263daa33c52SZhang Chen         s->error = true;
264daa33c52SZhang Chen         s->desc = g_strdup(error_get_pretty(err));
265daa33c52SZhang Chen     } else {
266daa33c52SZhang Chen         s->error = false;
267daa33c52SZhang Chen     }
268daa33c52SZhang Chen 
269daa33c52SZhang Chen     error_free(err);
270daa33c52SZhang Chen     return s;
271daa33c52SZhang Chen }
272daa33c52SZhang Chen 
qmp_xen_colo_do_checkpoint(Error ** errp)273daa33c52SZhang Chen void qmp_xen_colo_do_checkpoint(Error **errp)
274daa33c52SZhang Chen {
275735527e1SMarkus Armbruster     Error *err = NULL;
276735527e1SMarkus Armbruster 
277735527e1SMarkus Armbruster     replication_do_checkpoint_all(&err);
278735527e1SMarkus Armbruster     if (err) {
279735527e1SMarkus Armbruster         error_propagate(errp, err);
280735527e1SMarkus Armbruster         return;
281735527e1SMarkus Armbruster     }
2820e8818f0SZhang Chen     /* Notify all filters of all NIC to do checkpoint */
2830e8818f0SZhang Chen     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
284daa33c52SZhang Chen }
285daa33c52SZhang Chen 
qmp_query_colo_status(Error ** errp)286f56c0065SZhang Chen COLOStatus *qmp_query_colo_status(Error **errp)
287f56c0065SZhang Chen {
288f56c0065SZhang Chen     COLOStatus *s = g_new0(COLOStatus, 1);
289f56c0065SZhang Chen 
290f56c0065SZhang Chen     s->mode = get_colo_mode();
2915ed0decaSZhang Chen     s->last_mode = last_colo_mode;
292f56c0065SZhang Chen 
293f56c0065SZhang Chen     switch (failover_get_state()) {
294f56c0065SZhang Chen     case FAILOVER_STATUS_NONE:
295f56c0065SZhang Chen         s->reason = COLO_EXIT_REASON_NONE;
296f56c0065SZhang Chen         break;
2971fe6ab26SZhang Chen     case FAILOVER_STATUS_COMPLETED:
298f56c0065SZhang Chen         s->reason = COLO_EXIT_REASON_REQUEST;
299f56c0065SZhang Chen         break;
300f56c0065SZhang Chen     default:
3013a43ac47SZhang Chen         if (migration_in_colo_state()) {
3023a43ac47SZhang Chen             s->reason = COLO_EXIT_REASON_PROCESSING;
3033a43ac47SZhang Chen         } else {
304f56c0065SZhang Chen             s->reason = COLO_EXIT_REASON_ERROR;
305f56c0065SZhang Chen         }
3063a43ac47SZhang Chen     }
307f56c0065SZhang Chen 
308f56c0065SZhang Chen     return s;
309f56c0065SZhang Chen }
310f56c0065SZhang Chen 
colo_send_message(QEMUFile * f,COLOMessage msg,Error ** errp)3114f97558eSzhanghailiang static void colo_send_message(QEMUFile *f, COLOMessage msg,
3124f97558eSzhanghailiang                               Error **errp)
3134f97558eSzhanghailiang {
3144f97558eSzhanghailiang     int ret;
3154f97558eSzhanghailiang 
3164f97558eSzhanghailiang     if (msg >= COLO_MESSAGE__MAX) {
3174f97558eSzhanghailiang         error_setg(errp, "%s: Invalid message", __func__);
3184f97558eSzhanghailiang         return;
3194f97558eSzhanghailiang     }
3204f97558eSzhanghailiang     qemu_put_be32(f, msg);
321be07a0edSJuan Quintela     ret = qemu_fflush(f);
3224f97558eSzhanghailiang     if (ret < 0) {
3234f97558eSzhanghailiang         error_setg_errno(errp, -ret, "Can't send COLO message");
3244f97558eSzhanghailiang     }
325977c736fSMarkus Armbruster     trace_colo_send_message(COLOMessage_str(msg));
3264f97558eSzhanghailiang }
3274f97558eSzhanghailiang 
colo_send_message_value(QEMUFile * f,COLOMessage msg,uint64_t value,Error ** errp)328a91246c9Szhanghailiang static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
329a91246c9Szhanghailiang                                     uint64_t value, Error **errp)
330a91246c9Szhanghailiang {
331a91246c9Szhanghailiang     Error *local_err = NULL;
332a91246c9Szhanghailiang     int ret;
333a91246c9Szhanghailiang 
334a91246c9Szhanghailiang     colo_send_message(f, msg, &local_err);
335a91246c9Szhanghailiang     if (local_err) {
336a91246c9Szhanghailiang         error_propagate(errp, local_err);
337a91246c9Szhanghailiang         return;
338a91246c9Szhanghailiang     }
339a91246c9Szhanghailiang     qemu_put_be64(f, value);
340be07a0edSJuan Quintela     ret = qemu_fflush(f);
341a91246c9Szhanghailiang     if (ret < 0) {
342a91246c9Szhanghailiang         error_setg_errno(errp, -ret, "Failed to send value for message:%s",
343977c736fSMarkus Armbruster                          COLOMessage_str(msg));
344a91246c9Szhanghailiang     }
345a91246c9Szhanghailiang }
346a91246c9Szhanghailiang 
colo_receive_message(QEMUFile * f,Error ** errp)3474f97558eSzhanghailiang static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
3484f97558eSzhanghailiang {
3494f97558eSzhanghailiang     COLOMessage msg;
3504f97558eSzhanghailiang     int ret;
3514f97558eSzhanghailiang 
3524f97558eSzhanghailiang     msg = qemu_get_be32(f);
3534f97558eSzhanghailiang     ret = qemu_file_get_error(f);
3544f97558eSzhanghailiang     if (ret < 0) {
3554f97558eSzhanghailiang         error_setg_errno(errp, -ret, "Can't receive COLO message");
3564f97558eSzhanghailiang         return msg;
3574f97558eSzhanghailiang     }
3584f97558eSzhanghailiang     if (msg >= COLO_MESSAGE__MAX) {
3594f97558eSzhanghailiang         error_setg(errp, "%s: Invalid message", __func__);
3604f97558eSzhanghailiang         return msg;
3614f97558eSzhanghailiang     }
362977c736fSMarkus Armbruster     trace_colo_receive_message(COLOMessage_str(msg));
3634f97558eSzhanghailiang     return msg;
3644f97558eSzhanghailiang }
3654f97558eSzhanghailiang 
colo_receive_check_message(QEMUFile * f,COLOMessage expect_msg,Error ** errp)3664f97558eSzhanghailiang static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
3674f97558eSzhanghailiang                                        Error **errp)
3684f97558eSzhanghailiang {
3694f97558eSzhanghailiang     COLOMessage msg;
3704f97558eSzhanghailiang     Error *local_err = NULL;
3714f97558eSzhanghailiang 
3724f97558eSzhanghailiang     msg = colo_receive_message(f, &local_err);
3734f97558eSzhanghailiang     if (local_err) {
3744f97558eSzhanghailiang         error_propagate(errp, local_err);
3754f97558eSzhanghailiang         return;
3764f97558eSzhanghailiang     }
3774f97558eSzhanghailiang     if (msg != expect_msg) {
3784f97558eSzhanghailiang         error_setg(errp, "Unexpected COLO message %d, expected %d",
3794f97558eSzhanghailiang                           msg, expect_msg);
3804f97558eSzhanghailiang     }
3814f97558eSzhanghailiang }
3824f97558eSzhanghailiang 
colo_receive_message_value(QEMUFile * f,uint32_t expect_msg,Error ** errp)3834291d372Szhanghailiang static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
3844291d372Szhanghailiang                                            Error **errp)
3854291d372Szhanghailiang {
3864291d372Szhanghailiang     Error *local_err = NULL;
3874291d372Szhanghailiang     uint64_t value;
3884291d372Szhanghailiang     int ret;
3894291d372Szhanghailiang 
3904291d372Szhanghailiang     colo_receive_check_message(f, expect_msg, &local_err);
3914291d372Szhanghailiang     if (local_err) {
3924291d372Szhanghailiang         error_propagate(errp, local_err);
3934291d372Szhanghailiang         return 0;
3944291d372Szhanghailiang     }
3954291d372Szhanghailiang 
3964291d372Szhanghailiang     value = qemu_get_be64(f);
3974291d372Szhanghailiang     ret = qemu_file_get_error(f);
3984291d372Szhanghailiang     if (ret < 0) {
3994291d372Szhanghailiang         error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
400977c736fSMarkus Armbruster                          COLOMessage_str(expect_msg));
4014291d372Szhanghailiang     }
4024291d372Szhanghailiang     return value;
4034291d372Szhanghailiang }
4044291d372Szhanghailiang 
colo_do_checkpoint_transaction(MigrationState * s,QIOChannelBuffer * bioc,QEMUFile * fb)405a91246c9Szhanghailiang static int colo_do_checkpoint_transaction(MigrationState *s,
406a91246c9Szhanghailiang                                           QIOChannelBuffer *bioc,
407a91246c9Szhanghailiang                                           QEMUFile *fb)
4084f97558eSzhanghailiang {
4094f97558eSzhanghailiang     Error *local_err = NULL;
410a91246c9Szhanghailiang     int ret = -1;
4114f97558eSzhanghailiang 
4124f97558eSzhanghailiang     colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
4134f97558eSzhanghailiang                       &local_err);
4144f97558eSzhanghailiang     if (local_err) {
4154f97558eSzhanghailiang         goto out;
4164f97558eSzhanghailiang     }
4174f97558eSzhanghailiang 
4184f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
4194f97558eSzhanghailiang                     COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
4204f97558eSzhanghailiang     if (local_err) {
4214f97558eSzhanghailiang         goto out;
4224f97558eSzhanghailiang     }
423a91246c9Szhanghailiang     /* Reset channel-buffer directly */
424a91246c9Szhanghailiang     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
425a91246c9Szhanghailiang     bioc->usage = 0;
4264f97558eSzhanghailiang 
427195801d7SStefan Hajnoczi     bql_lock();
428b3f7f0c5Szhanghailiang     if (failover_get_state() != FAILOVER_STATUS_NONE) {
429195801d7SStefan Hajnoczi         bql_unlock();
430b3f7f0c5Szhanghailiang         goto out;
431b3f7f0c5Szhanghailiang     }
432a91246c9Szhanghailiang     vm_stop_force_state(RUN_STATE_COLO);
433195801d7SStefan Hajnoczi     bql_unlock();
434a91246c9Szhanghailiang     trace_colo_vm_state_change("run", "stop");
435b3f7f0c5Szhanghailiang     /*
436b3f7f0c5Szhanghailiang      * Failover request bh could be called after vm_stop_force_state(),
437b3f7f0c5Szhanghailiang      * So we need check failover_request_is_active() again.
438b3f7f0c5Szhanghailiang      */
439b3f7f0c5Szhanghailiang     if (failover_get_state() != FAILOVER_STATUS_NONE) {
440b3f7f0c5Szhanghailiang         goto out;
441b3f7f0c5Szhanghailiang     }
442195801d7SStefan Hajnoczi     bql_lock();
4433ebb9c4fSZhang Chen 
4448e48ac95SZhang Chen     replication_do_checkpoint_all(&local_err);
4458e48ac95SZhang Chen     if (local_err) {
446195801d7SStefan Hajnoczi         bql_unlock();
4478e48ac95SZhang Chen         goto out;
4488e48ac95SZhang Chen     }
4494f97558eSzhanghailiang 
4504f97558eSzhanghailiang     colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
4514f97558eSzhanghailiang     if (local_err) {
452195801d7SStefan Hajnoczi         bql_unlock();
4534f97558eSzhanghailiang         goto out;
4544f97558eSzhanghailiang     }
4553f6df99dSZhang Chen     /* Note: device state is saved into buffer */
4563f6df99dSZhang Chen     ret = qemu_save_device_state(fb);
4573f6df99dSZhang Chen 
458195801d7SStefan Hajnoczi     bql_unlock();
4593f6df99dSZhang Chen     if (ret < 0) {
4603f6df99dSZhang Chen         goto out;
4613f6df99dSZhang Chen     }
46291fe9a8dSRao, Lei 
46391fe9a8dSRao, Lei     if (migrate_auto_converge()) {
46491fe9a8dSRao, Lei         mig_throttle_counter_reset();
46591fe9a8dSRao, Lei     }
4663f6df99dSZhang Chen     /*
4673f6df99dSZhang Chen      * Only save VM's live state, which not including device state.
4683f6df99dSZhang Chen      * TODO: We may need a timeout mechanism to prevent COLO process
4693f6df99dSZhang Chen      * to be blocked here.
4703f6df99dSZhang Chen      */
4713f6df99dSZhang Chen     qemu_savevm_live_state(s->to_dst_file);
4723f6df99dSZhang Chen 
4733f6df99dSZhang Chen     qemu_fflush(fb);
4743f6df99dSZhang Chen 
475a91246c9Szhanghailiang     /*
476a91246c9Szhanghailiang      * We need the size of the VMstate data in Secondary side,
477a91246c9Szhanghailiang      * With which we can decide how much data should be read.
478a91246c9Szhanghailiang      */
479a91246c9Szhanghailiang     colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
480a91246c9Szhanghailiang                             bioc->usage, &local_err);
481a91246c9Szhanghailiang     if (local_err) {
482a91246c9Szhanghailiang         goto out;
483a91246c9Szhanghailiang     }
4844f97558eSzhanghailiang 
485a91246c9Szhanghailiang     qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
486be07a0edSJuan Quintela     ret = qemu_fflush(s->to_dst_file);
487a91246c9Szhanghailiang     if (ret < 0) {
488a91246c9Szhanghailiang         goto out;
489a91246c9Szhanghailiang     }
4904f97558eSzhanghailiang 
4914f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
4924f97558eSzhanghailiang                        COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
4934f97558eSzhanghailiang     if (local_err) {
4944f97558eSzhanghailiang         goto out;
4954f97558eSzhanghailiang     }
4964f97558eSzhanghailiang 
4974fa8ed25SLukas Straub     qemu_event_reset(&s->colo_checkpoint_event);
4984fa8ed25SLukas Straub     colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
4994fa8ed25SLukas Straub     if (local_err) {
5004fa8ed25SLukas Straub         goto out;
5014fa8ed25SLukas Straub     }
5024fa8ed25SLukas Straub 
5034f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
5044f97558eSzhanghailiang                        COLO_MESSAGE_VMSTATE_LOADED, &local_err);
5054f97558eSzhanghailiang     if (local_err) {
5064f97558eSzhanghailiang         goto out;
5074f97558eSzhanghailiang     }
5084f97558eSzhanghailiang 
509a91246c9Szhanghailiang     ret = 0;
5104f97558eSzhanghailiang 
511195801d7SStefan Hajnoczi     bql_lock();
512a91246c9Szhanghailiang     vm_start();
513195801d7SStefan Hajnoczi     bql_unlock();
514a91246c9Szhanghailiang     trace_colo_vm_state_change("stop", "run");
515a91246c9Szhanghailiang 
5164f97558eSzhanghailiang out:
5174f97558eSzhanghailiang     if (local_err) {
5184f97558eSzhanghailiang         error_report_err(local_err);
5194f97558eSzhanghailiang     }
520a91246c9Szhanghailiang     return ret;
5214f97558eSzhanghailiang }
5224f97558eSzhanghailiang 
colo_compare_notify_checkpoint(Notifier * notifier,void * data)523131b2153SZhang Chen static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
524131b2153SZhang Chen {
5257395127fSSteve Sistare     colo_checkpoint_notify();
526131b2153SZhang Chen }
527131b2153SZhang Chen 
colo_process_checkpoint(MigrationState * s)5280b827d5eSzhanghailiang static void colo_process_checkpoint(MigrationState *s)
5290b827d5eSzhanghailiang {
530a91246c9Szhanghailiang     QIOChannelBuffer *bioc;
531a91246c9Szhanghailiang     QEMUFile *fb = NULL;
5324f97558eSzhanghailiang     Error *local_err = NULL;
5334f97558eSzhanghailiang     int ret;
5344f97558eSzhanghailiang 
5352b9f6bf3SRao, Lei     if (get_colo_mode() != COLO_MODE_PRIMARY) {
5365ed0decaSZhang Chen         error_report("COLO mode must be COLO_MODE_PRIMARY");
5375ed0decaSZhang Chen         return;
5385ed0decaSZhang Chen     }
5395ed0decaSZhang Chen 
540aef06085Szhanghailiang     failover_init_state();
541aef06085Szhanghailiang 
54256ba83d2Szhanghailiang     s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
54356ba83d2Szhanghailiang     if (!s->rp_state.from_dst_file) {
54456ba83d2Szhanghailiang         error_report("Open QEMUFile from_dst_file failed");
54556ba83d2Szhanghailiang         goto out;
54656ba83d2Szhanghailiang     }
54756ba83d2Szhanghailiang 
548131b2153SZhang Chen     packets_compare_notifier.notify = colo_compare_notify_checkpoint;
549131b2153SZhang Chen     colo_compare_register_notifier(&packets_compare_notifier);
550131b2153SZhang Chen 
5514f97558eSzhanghailiang     /*
5524f97558eSzhanghailiang      * Wait for Secondary finish loading VM states and enter COLO
5534f97558eSzhanghailiang      * restore.
5544f97558eSzhanghailiang      */
5554f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
5564f97558eSzhanghailiang                        COLO_MESSAGE_CHECKPOINT_READY, &local_err);
5574f97558eSzhanghailiang     if (local_err) {
5584f97558eSzhanghailiang         goto out;
5594f97558eSzhanghailiang     }
560a91246c9Szhanghailiang     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
56177ef2dc1SDaniel P. Berrangé     fb = qemu_file_new_output(QIO_CHANNEL(bioc));
562a91246c9Szhanghailiang     object_unref(OBJECT(bioc));
5634f97558eSzhanghailiang 
564195801d7SStefan Hajnoczi     bql_lock();
5658e48ac95SZhang Chen     replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
5668e48ac95SZhang Chen     if (local_err) {
567195801d7SStefan Hajnoczi         bql_unlock();
5688e48ac95SZhang Chen         goto out;
5698e48ac95SZhang Chen     }
5708e48ac95SZhang Chen 
5710b827d5eSzhanghailiang     vm_start();
572195801d7SStefan Hajnoczi     bql_unlock();
5730b827d5eSzhanghailiang     trace_colo_vm_state_change("stop", "run");
5740b827d5eSzhanghailiang 
5750e0f0479SZhang Chen     timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
576f94a858fSJuan Quintela               migrate_checkpoint_delay());
577479125d5Szhanghailiang 
5784f97558eSzhanghailiang     while (s->state == MIGRATION_STATUS_COLO) {
579b3f7f0c5Szhanghailiang         if (failover_get_state() != FAILOVER_STATUS_NONE) {
580b3f7f0c5Szhanghailiang             error_report("failover request");
581b3f7f0c5Szhanghailiang             goto out;
582b3f7f0c5Szhanghailiang         }
583b3f7f0c5Szhanghailiang 
584bb70b66eSLukas Straub         qemu_event_wait(&s->colo_checkpoint_event);
58518cc23d7Szhanghailiang 
5862518aec1Szhanghailiang         if (s->state != MIGRATION_STATUS_COLO) {
5872518aec1Szhanghailiang             goto out;
5882518aec1Szhanghailiang         }
589a91246c9Szhanghailiang         ret = colo_do_checkpoint_transaction(s, bioc, fb);
5904f97558eSzhanghailiang         if (ret < 0) {
5914f97558eSzhanghailiang             goto out;
5924f97558eSzhanghailiang         }
5934f97558eSzhanghailiang     }
5940b827d5eSzhanghailiang 
59556ba83d2Szhanghailiang out:
5964f97558eSzhanghailiang     /* Throw the unreported error message after exited from loop */
5974f97558eSzhanghailiang     if (local_err) {
5984f97558eSzhanghailiang         error_report_err(local_err);
5994f97558eSzhanghailiang     }
6004f97558eSzhanghailiang 
601a91246c9Szhanghailiang     if (fb) {
602a91246c9Szhanghailiang         qemu_fclose(fb);
603a91246c9Szhanghailiang     }
604a91246c9Szhanghailiang 
6059ecff6d6Szhanghailiang     /*
6069ecff6d6Szhanghailiang      * There are only two reasons we can get here, some error happened
6079ecff6d6Szhanghailiang      * or the user triggered failover.
6089ecff6d6Szhanghailiang      */
6099ecff6d6Szhanghailiang     switch (failover_get_state()) {
6101fe6ab26SZhang Chen     case FAILOVER_STATUS_COMPLETED:
6119ecff6d6Szhanghailiang         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
6129ecff6d6Szhanghailiang                                   COLO_EXIT_REASON_REQUEST);
6139ecff6d6Szhanghailiang         break;
6149ecff6d6Szhanghailiang     default:
6153a43ac47SZhang Chen         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
6163a43ac47SZhang Chen                                   COLO_EXIT_REASON_ERROR);
6179ecff6d6Szhanghailiang     }
6189ecff6d6Szhanghailiang 
619c937b9a6Szhanghailiang     /* Hope this not to be too long to wait here */
620c937b9a6Szhanghailiang     qemu_sem_wait(&s->colo_exit_sem);
621c937b9a6Szhanghailiang     qemu_sem_destroy(&s->colo_exit_sem);
622131b2153SZhang Chen 
623131b2153SZhang Chen     /*
624131b2153SZhang Chen      * It is safe to unregister notifier after failover finished.
625131b2153SZhang Chen      * Besides, colo_delay_timer and colo_checkpoint_sem can't be
6263a4452d8Szhaolichang      * released before unregister notifier, or there will be use-after-free
627131b2153SZhang Chen      * error.
628131b2153SZhang Chen      */
629131b2153SZhang Chen     colo_compare_unregister_notifier(&packets_compare_notifier);
630131b2153SZhang Chen     timer_free(s->colo_delay_timer);
631bb70b66eSLukas Straub     qemu_event_destroy(&s->colo_checkpoint_event);
632131b2153SZhang Chen 
633c937b9a6Szhanghailiang     /*
634c937b9a6Szhanghailiang      * Must be called after failover BH is completed,
635c937b9a6Szhanghailiang      * Or the failover BH may shutdown the wrong fd that
636c937b9a6Szhanghailiang      * re-used by other threads after we release here.
637c937b9a6Szhanghailiang      */
63856ba83d2Szhanghailiang     if (s->rp_state.from_dst_file) {
63956ba83d2Szhanghailiang         qemu_fclose(s->rp_state.from_dst_file);
640ac183dacSRao, Lei         s->rp_state.from_dst_file = NULL;
64156ba83d2Szhanghailiang     }
6420b827d5eSzhanghailiang }
6430b827d5eSzhanghailiang 
migrate_start_colo_process(MigrationState * s)6440b827d5eSzhanghailiang void migrate_start_colo_process(MigrationState *s)
6450b827d5eSzhanghailiang {
646195801d7SStefan Hajnoczi     bql_unlock();
647bb70b66eSLukas Straub     qemu_event_init(&s->colo_checkpoint_event, false);
648479125d5Szhanghailiang     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
6497395127fSSteve Sistare                                 colo_checkpoint_notify_timer, NULL);
650479125d5Szhanghailiang 
651c937b9a6Szhanghailiang     qemu_sem_init(&s->colo_exit_sem, 0);
6520b827d5eSzhanghailiang     colo_process_checkpoint(s);
653195801d7SStefan Hajnoczi     bql_lock();
6540b827d5eSzhanghailiang }
65525d0c16fSzhanghailiang 
colo_incoming_process_checkpoint(MigrationIncomingState * mis,QEMUFile * fb,QIOChannelBuffer * bioc,Error ** errp)6566ad8ad38Szhanghailiang static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
6576ad8ad38Szhanghailiang                       QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
6586ad8ad38Szhanghailiang {
6596ad8ad38Szhanghailiang     uint64_t total_size;
6606ad8ad38Szhanghailiang     uint64_t value;
6616ad8ad38Szhanghailiang     Error *local_err = NULL;
6626ad8ad38Szhanghailiang     int ret;
6636ad8ad38Szhanghailiang 
664195801d7SStefan Hajnoczi     bql_lock();
6656ad8ad38Szhanghailiang     vm_stop_force_state(RUN_STATE_COLO);
666195801d7SStefan Hajnoczi     bql_unlock();
6679c5c8ff2SRao, Lei     trace_colo_vm_state_change("run", "stop");
6686ad8ad38Szhanghailiang 
6696ad8ad38Szhanghailiang     /* FIXME: This is unnecessary for periodic checkpoint mode */
6706ad8ad38Szhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
6716ad8ad38Szhanghailiang                  &local_err);
6726ad8ad38Szhanghailiang     if (local_err) {
6736ad8ad38Szhanghailiang         error_propagate(errp, local_err);
6746ad8ad38Szhanghailiang         return;
6756ad8ad38Szhanghailiang     }
6766ad8ad38Szhanghailiang 
6776ad8ad38Szhanghailiang     colo_receive_check_message(mis->from_src_file,
6786ad8ad38Szhanghailiang                        COLO_MESSAGE_VMSTATE_SEND, &local_err);
6796ad8ad38Szhanghailiang     if (local_err) {
6806ad8ad38Szhanghailiang         error_propagate(errp, local_err);
6816ad8ad38Szhanghailiang         return;
6826ad8ad38Szhanghailiang     }
6836ad8ad38Szhanghailiang 
684195801d7SStefan Hajnoczi     bql_lock();
685786d8b8eSLukas Straub     cpu_synchronize_all_states();
6866ad8ad38Szhanghailiang     ret = qemu_loadvm_state_main(mis->from_src_file, mis);
687195801d7SStefan Hajnoczi     bql_unlock();
6886ad8ad38Szhanghailiang 
6896ad8ad38Szhanghailiang     if (ret < 0) {
6906ad8ad38Szhanghailiang         error_setg(errp, "Load VM's live state (ram) error");
6916ad8ad38Szhanghailiang         return;
6926ad8ad38Szhanghailiang     }
6936ad8ad38Szhanghailiang 
6946ad8ad38Szhanghailiang     value = colo_receive_message_value(mis->from_src_file,
6956ad8ad38Szhanghailiang                              COLO_MESSAGE_VMSTATE_SIZE, &local_err);
6966ad8ad38Szhanghailiang     if (local_err) {
6976ad8ad38Szhanghailiang         error_propagate(errp, local_err);
6986ad8ad38Szhanghailiang         return;
6996ad8ad38Szhanghailiang     }
7006ad8ad38Szhanghailiang 
7016ad8ad38Szhanghailiang     /*
7026ad8ad38Szhanghailiang      * Read VM device state data into channel buffer,
7036ad8ad38Szhanghailiang      * It's better to re-use the memory allocated.
7046ad8ad38Szhanghailiang      * Here we need to handle the channel buffer directly.
7056ad8ad38Szhanghailiang      */
7066ad8ad38Szhanghailiang     if (value > bioc->capacity) {
7076ad8ad38Szhanghailiang         bioc->capacity = value;
7086ad8ad38Szhanghailiang         bioc->data = g_realloc(bioc->data, bioc->capacity);
7096ad8ad38Szhanghailiang     }
7106ad8ad38Szhanghailiang     total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
7116ad8ad38Szhanghailiang     if (total_size != value) {
7126ad8ad38Szhanghailiang         error_setg(errp, "Got %" PRIu64 " VMState data, less than expected"
7136ad8ad38Szhanghailiang                     " %" PRIu64, total_size, value);
7146ad8ad38Szhanghailiang         return;
7156ad8ad38Szhanghailiang     }
7166ad8ad38Szhanghailiang     bioc->usage = total_size;
7176ad8ad38Szhanghailiang     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
7186ad8ad38Szhanghailiang 
7196ad8ad38Szhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
7206ad8ad38Szhanghailiang                  &local_err);
7216ad8ad38Szhanghailiang     if (local_err) {
7226ad8ad38Szhanghailiang         error_propagate(errp, local_err);
7236ad8ad38Szhanghailiang         return;
7246ad8ad38Szhanghailiang     }
7256ad8ad38Szhanghailiang 
726195801d7SStefan Hajnoczi     bql_lock();
7276ad8ad38Szhanghailiang     vmstate_loading = true;
72824fa16f8SLukas Straub     colo_flush_ram_cache();
7296ad8ad38Szhanghailiang     ret = qemu_load_device_state(fb);
7306ad8ad38Szhanghailiang     if (ret < 0) {
7316ad8ad38Szhanghailiang         error_setg(errp, "COLO: load device state failed");
73292c932deSLukas Straub         vmstate_loading = false;
733195801d7SStefan Hajnoczi         bql_unlock();
7346ad8ad38Szhanghailiang         return;
7356ad8ad38Szhanghailiang     }
7366ad8ad38Szhanghailiang 
7376ad8ad38Szhanghailiang     replication_get_error_all(&local_err);
7386ad8ad38Szhanghailiang     if (local_err) {
7396ad8ad38Szhanghailiang         error_propagate(errp, local_err);
74092c932deSLukas Straub         vmstate_loading = false;
741195801d7SStefan Hajnoczi         bql_unlock();
7426ad8ad38Szhanghailiang         return;
7436ad8ad38Szhanghailiang     }
7446ad8ad38Szhanghailiang 
7456ad8ad38Szhanghailiang     /* discard colo disk buffer */
7466ad8ad38Szhanghailiang     replication_do_checkpoint_all(&local_err);
7476ad8ad38Szhanghailiang     if (local_err) {
7486ad8ad38Szhanghailiang         error_propagate(errp, local_err);
74992c932deSLukas Straub         vmstate_loading = false;
750195801d7SStefan Hajnoczi         bql_unlock();
7516ad8ad38Szhanghailiang         return;
7526ad8ad38Szhanghailiang     }
7536ad8ad38Szhanghailiang     /* Notify all filters of all NIC to do checkpoint */
7546ad8ad38Szhanghailiang     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
7556ad8ad38Szhanghailiang 
7566ad8ad38Szhanghailiang     if (local_err) {
7576ad8ad38Szhanghailiang         error_propagate(errp, local_err);
75892c932deSLukas Straub         vmstate_loading = false;
759195801d7SStefan Hajnoczi         bql_unlock();
7606ad8ad38Szhanghailiang         return;
7616ad8ad38Szhanghailiang     }
7626ad8ad38Szhanghailiang 
7636ad8ad38Szhanghailiang     vmstate_loading = false;
7646ad8ad38Szhanghailiang     vm_start();
765195801d7SStefan Hajnoczi     bql_unlock();
7669c5c8ff2SRao, Lei     trace_colo_vm_state_change("stop", "run");
7676ad8ad38Szhanghailiang 
7686ad8ad38Szhanghailiang     if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
7696ad8ad38Szhanghailiang         return;
7706ad8ad38Szhanghailiang     }
7716ad8ad38Szhanghailiang 
7726ad8ad38Szhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
7736ad8ad38Szhanghailiang                  &local_err);
7746ad8ad38Szhanghailiang     error_propagate(errp, local_err);
7756ad8ad38Szhanghailiang }
7766ad8ad38Szhanghailiang 
colo_wait_handle_message(MigrationIncomingState * mis,QEMUFile * fb,QIOChannelBuffer * bioc,Error ** errp)7776ad8ad38Szhanghailiang static void colo_wait_handle_message(MigrationIncomingState *mis,
7786ad8ad38Szhanghailiang                 QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
7794f97558eSzhanghailiang {
7804f97558eSzhanghailiang     COLOMessage msg;
7814f97558eSzhanghailiang     Error *local_err = NULL;
7824f97558eSzhanghailiang 
7836ad8ad38Szhanghailiang     msg = colo_receive_message(mis->from_src_file, &local_err);
7844f97558eSzhanghailiang     if (local_err) {
7854f97558eSzhanghailiang         error_propagate(errp, local_err);
7864f97558eSzhanghailiang         return;
7874f97558eSzhanghailiang     }
7884f97558eSzhanghailiang 
7894f97558eSzhanghailiang     switch (msg) {
7904f97558eSzhanghailiang     case COLO_MESSAGE_CHECKPOINT_REQUEST:
7916ad8ad38Szhanghailiang         colo_incoming_process_checkpoint(mis, fb, bioc, errp);
7924f97558eSzhanghailiang         break;
7934f97558eSzhanghailiang     default:
7944f97558eSzhanghailiang         error_setg(errp, "Got unknown COLO message: %d", msg);
7954f97558eSzhanghailiang         break;
7964f97558eSzhanghailiang     }
7974f97558eSzhanghailiang }
7984f97558eSzhanghailiang 
colo_shutdown(void)799795969abSRao, Lei void colo_shutdown(void)
800795969abSRao, Lei {
801795969abSRao, Lei     MigrationIncomingState *mis = NULL;
802795969abSRao, Lei     MigrationState *s = NULL;
803795969abSRao, Lei 
804795969abSRao, Lei     switch (get_colo_mode()) {
805795969abSRao, Lei     case COLO_MODE_PRIMARY:
806795969abSRao, Lei         s = migrate_get_current();
807795969abSRao, Lei         qemu_event_set(&s->colo_checkpoint_event);
808795969abSRao, Lei         qemu_sem_post(&s->colo_exit_sem);
809795969abSRao, Lei         break;
810795969abSRao, Lei     case COLO_MODE_SECONDARY:
811795969abSRao, Lei         mis = migration_incoming_get_current();
812795969abSRao, Lei         qemu_sem_post(&mis->colo_incoming_sem);
813795969abSRao, Lei         break;
814795969abSRao, Lei     default:
815795969abSRao, Lei         break;
816795969abSRao, Lei     }
817795969abSRao, Lei }
818795969abSRao, Lei 
colo_process_incoming_thread(void * opaque)819d0a14a2bSVladimir Sementsov-Ogievskiy static void *colo_process_incoming_thread(void *opaque)
82025d0c16fSzhanghailiang {
82125d0c16fSzhanghailiang     MigrationIncomingState *mis = opaque;
8224291d372Szhanghailiang     QEMUFile *fb = NULL;
8234291d372Szhanghailiang     QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
8244f97558eSzhanghailiang     Error *local_err = NULL;
82525d0c16fSzhanghailiang 
82674637e6fSLidong Chen     rcu_register_thread();
827c937b9a6Szhanghailiang     qemu_sem_init(&mis->colo_incoming_sem, 0);
828c937b9a6Szhanghailiang 
82925d0c16fSzhanghailiang     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
83025d0c16fSzhanghailiang                       MIGRATION_STATUS_COLO);
83125d0c16fSzhanghailiang 
8322b9f6bf3SRao, Lei     if (get_colo_mode() != COLO_MODE_SECONDARY) {
8335ed0decaSZhang Chen         error_report("COLO mode must be COLO_MODE_SECONDARY");
8345ed0decaSZhang Chen         return NULL;
8355ed0decaSZhang Chen     }
8365ed0decaSZhang Chen 
8372cc637f1SLi Zhijian     /* Make sure all file formats throw away their mutable metadata */
8382cc637f1SLi Zhijian     bql_lock();
8392cc637f1SLi Zhijian     bdrv_activate_all(&local_err);
8402cc637f1SLi Zhijian     bql_unlock();
8413dc27facSLi Zhijian     if (local_err) {
8422cc637f1SLi Zhijian         error_report_err(local_err);
8432cc637f1SLi Zhijian         return NULL;
8442cc637f1SLi Zhijian     }
8452cc637f1SLi Zhijian 
846aef06085Szhanghailiang     failover_init_state();
847aef06085Szhanghailiang 
84856ba83d2Szhanghailiang     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
84956ba83d2Szhanghailiang     if (!mis->to_src_file) {
85056ba83d2Szhanghailiang         error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
85156ba83d2Szhanghailiang         goto out;
85256ba83d2Szhanghailiang     }
85356ba83d2Szhanghailiang     /*
85456ba83d2Szhanghailiang      * Note: the communication between Primary side and Secondary side
85556ba83d2Szhanghailiang      * should be sequential, we set the fd to unblocked in migration incoming
85656ba83d2Szhanghailiang      * coroutine, and here we are in the COLO incoming thread, so it is ok to
85756ba83d2Szhanghailiang      * set the fd back to blocked.
85856ba83d2Szhanghailiang      */
85956ba83d2Szhanghailiang     qemu_file_set_blocking(mis->from_src_file, true);
86056ba83d2Szhanghailiang 
8610393031aSzhanghailiang     colo_incoming_start_dirty_log();
8620393031aSzhanghailiang 
8634291d372Szhanghailiang     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
86477ef2dc1SDaniel P. Berrangé     fb = qemu_file_new_input(QIO_CHANNEL(bioc));
8654291d372Szhanghailiang     object_unref(OBJECT(bioc));
8664291d372Szhanghailiang 
867195801d7SStefan Hajnoczi     bql_lock();
8688e48ac95SZhang Chen     replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
8698e48ac95SZhang Chen     if (local_err) {
870195801d7SStefan Hajnoczi         bql_unlock();
8718e48ac95SZhang Chen         goto out;
8728e48ac95SZhang Chen     }
873131b2153SZhang Chen     vm_start();
874195801d7SStefan Hajnoczi     bql_unlock();
8759c5c8ff2SRao, Lei     trace_colo_vm_state_change("stop", "run");
876131b2153SZhang Chen 
8774f97558eSzhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
8784f97558eSzhanghailiang                       &local_err);
8794f97558eSzhanghailiang     if (local_err) {
8804f97558eSzhanghailiang         goto out;
8814f97558eSzhanghailiang     }
8824f97558eSzhanghailiang 
8834f97558eSzhanghailiang     while (mis->state == MIGRATION_STATUS_COLO) {
8846ad8ad38Szhanghailiang         colo_wait_handle_message(mis, fb, bioc, &local_err);
8854f97558eSzhanghailiang         if (local_err) {
8866ad8ad38Szhanghailiang             error_report_err(local_err);
8876ad8ad38Szhanghailiang             break;
8884f97558eSzhanghailiang         }
88992c932deSLukas Straub 
89092c932deSLukas Straub         if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
89192c932deSLukas Straub             failover_set_state(FAILOVER_STATUS_RELAUNCH,
89292c932deSLukas Straub                             FAILOVER_STATUS_NONE);
89392c932deSLukas Straub             failover_request_active(NULL);
89492c932deSLukas Straub             break;
89592c932deSLukas Straub         }
89692c932deSLukas Straub 
8979d2db376Szhanghailiang         if (failover_get_state() != FAILOVER_STATUS_NONE) {
8989d2db376Szhanghailiang             error_report("failover request");
8996ad8ad38Szhanghailiang             break;
9004f97558eSzhanghailiang         }
9014f97558eSzhanghailiang     }
90225d0c16fSzhanghailiang 
90356ba83d2Szhanghailiang out:
9043a43ac47SZhang Chen     /*
9053a43ac47SZhang Chen      * There are only two reasons we can get here, some error happened
9063a43ac47SZhang Chen      * or the user triggered failover.
9073a43ac47SZhang Chen      */
9089ecff6d6Szhanghailiang     switch (failover_get_state()) {
9091fe6ab26SZhang Chen     case FAILOVER_STATUS_COMPLETED:
9109ecff6d6Szhanghailiang         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
9119ecff6d6Szhanghailiang                                   COLO_EXIT_REASON_REQUEST);
9129ecff6d6Szhanghailiang         break;
9139ecff6d6Szhanghailiang     default:
9143a43ac47SZhang Chen         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
9153a43ac47SZhang Chen                                   COLO_EXIT_REASON_ERROR);
9169ecff6d6Szhanghailiang     }
9179ecff6d6Szhanghailiang 
9184291d372Szhanghailiang     if (fb) {
9194291d372Szhanghailiang         qemu_fclose(fb);
9204291d372Szhanghailiang     }
9214291d372Szhanghailiang 
922c937b9a6Szhanghailiang     /* Hope this not to be too long to loop here */
923c937b9a6Szhanghailiang     qemu_sem_wait(&mis->colo_incoming_sem);
924c937b9a6Szhanghailiang     qemu_sem_destroy(&mis->colo_incoming_sem);
92525d0c16fSzhanghailiang 
92674637e6fSLidong Chen     rcu_unregister_thread();
92725d0c16fSzhanghailiang     return NULL;
92825d0c16fSzhanghailiang }
929d0a14a2bSVladimir Sementsov-Ogievskiy 
colo_incoming_co(void)930787ea49eSLi Zhijian void coroutine_fn colo_incoming_co(void)
931d0a14a2bSVladimir Sementsov-Ogievskiy {
932d0a14a2bSVladimir Sementsov-Ogievskiy     MigrationIncomingState *mis = migration_incoming_get_current();
933d0a14a2bSVladimir Sementsov-Ogievskiy     QemuThread th;
934d0a14a2bSVladimir Sementsov-Ogievskiy 
935195801d7SStefan Hajnoczi     assert(bql_locked());
936787ea49eSLi Zhijian     assert(migration_incoming_colo_enabled());
937d0a14a2bSVladimir Sementsov-Ogievskiy 
938*e620b1e4SPeter Xu     qemu_thread_create(&th, MIGRATION_THREAD_DST_COLO,
939*e620b1e4SPeter Xu                        colo_process_incoming_thread,
940d0a14a2bSVladimir Sementsov-Ogievskiy                        mis, QEMU_THREAD_JOINABLE);
941d0a14a2bSVladimir Sementsov-Ogievskiy 
942d0a14a2bSVladimir Sementsov-Ogievskiy     mis->colo_incoming_co = qemu_coroutine_self();
943d0a14a2bSVladimir Sementsov-Ogievskiy     qemu_coroutine_yield();
944d0a14a2bSVladimir Sementsov-Ogievskiy     mis->colo_incoming_co = NULL;
945d0a14a2bSVladimir Sementsov-Ogievskiy 
946195801d7SStefan Hajnoczi     bql_unlock();
947d0a14a2bSVladimir Sementsov-Ogievskiy     /* Wait checkpoint incoming thread exit before free resource */
948d0a14a2bSVladimir Sementsov-Ogievskiy     qemu_thread_join(&th);
949195801d7SStefan Hajnoczi     bql_lock();
950d0a14a2bSVladimir Sementsov-Ogievskiy 
951a4a411fbSStefan Hajnoczi     /* We hold the global BQL, so it is safe here */
952d0a14a2bSVladimir Sementsov-Ogievskiy     colo_release_ram_cache();
953d0a14a2bSVladimir Sementsov-Ogievskiy }
954