17dce4e6fSZhang Chen /* 27dce4e6fSZhang Chen * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) 37dce4e6fSZhang Chen * (a.k.a. Fault Tolerance or Continuous Replication) 47dce4e6fSZhang Chen * 57dce4e6fSZhang Chen * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. 67dce4e6fSZhang Chen * Copyright (c) 2016 FUJITSU LIMITED 77dce4e6fSZhang Chen * Copyright (c) 2016 Intel Corporation 87dce4e6fSZhang Chen * 97dce4e6fSZhang Chen * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> 107dce4e6fSZhang Chen * 117dce4e6fSZhang Chen * This work is licensed under the terms of the GNU GPL, version 2 or 127dce4e6fSZhang Chen * later. See the COPYING file in the top-level directory. 137dce4e6fSZhang Chen */ 147dce4e6fSZhang Chen 157dce4e6fSZhang Chen #include "qemu/osdep.h" 167dce4e6fSZhang Chen #include "qemu/error-report.h" 1759509ec1SZhang Chen #include "trace.h" 187dce4e6fSZhang Chen #include "qemu-common.h" 197dce4e6fSZhang Chen #include "qapi/error.h" 207dce4e6fSZhang Chen #include "net/net.h" 21f4b61836SZhang Chen #include "net/eth.h" 227dce4e6fSZhang Chen #include "qom/object_interfaces.h" 237dce4e6fSZhang Chen #include "qemu/iov.h" 247dce4e6fSZhang Chen #include "qom/object.h" 257dce4e6fSZhang Chen #include "net/queue.h" 264d43a603SMarc-André Lureau #include "chardev/char-fe.h" 277dce4e6fSZhang Chen #include "qemu/sockets.h" 28f27f01dbSMichael S. Tsirkin #include "colo.h" 29dd321ecfSWang Yong #include "sysemu/iothread.h" 30*0ffcece3SZhang Chen #include "net/colo-compare.h" 31*0ffcece3SZhang Chen #include "migration/colo.h" 327dce4e6fSZhang Chen 337dce4e6fSZhang Chen #define TYPE_COLO_COMPARE "colo-compare" 347dce4e6fSZhang Chen #define COLO_COMPARE(obj) \ 357dce4e6fSZhang Chen OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) 367dce4e6fSZhang Chen 37*0ffcece3SZhang Chen static QTAILQ_HEAD(, CompareState) net_compares = 38*0ffcece3SZhang Chen QTAILQ_HEAD_INITIALIZER(net_compares); 39*0ffcece3SZhang Chen 400682e15bSZhang Chen #define COMPARE_READ_LEN_MAX NET_BUFSIZE 41b6540d40SZhang Chen #define MAX_QUEUE_SIZE 1024 42b6540d40SZhang Chen 43f449c9e5SMao Zhongyi #define COLO_COMPARE_FREE_PRIMARY 0x01 44f449c9e5SMao Zhongyi #define COLO_COMPARE_FREE_SECONDARY 0x02 45f449c9e5SMao Zhongyi 460682e15bSZhang Chen /* TODO: Should be configurable */ 470682e15bSZhang Chen #define REGULAR_PACKET_CHECK_MS 3000 480682e15bSZhang Chen 49*0ffcece3SZhang Chen static QemuMutex event_mtx; 50*0ffcece3SZhang Chen static QemuCond event_complete_cond; 51*0ffcece3SZhang Chen static int event_unhandled_count; 52*0ffcece3SZhang Chen 5359509ec1SZhang Chen /* 5461c5f469SZhang Chen * + CompareState ++ 5561c5f469SZhang Chen * | | 5661c5f469SZhang Chen * +---------------+ +---------------+ +---------------+ 5761c5f469SZhang Chen * | conn list + - > conn + ------- > conn + -- > ...... 5861c5f469SZhang Chen * +---------------+ +---------------+ +---------------+ 5961c5f469SZhang Chen * | | | | | | 6061c5f469SZhang Chen * +---------------+ +---v----+ +---v----+ +---v----+ +---v----+ 6161c5f469SZhang Chen * |primary | |secondary |primary | |secondary 6261c5f469SZhang Chen * |packet | |packet + |packet | |packet + 6361c5f469SZhang Chen * +--------+ +--------+ +--------+ +--------+ 6461c5f469SZhang Chen * | | | | 6561c5f469SZhang Chen * +---v----+ +---v----+ +---v----+ +---v----+ 6661c5f469SZhang Chen * |primary | |secondary |primary | |secondary 6761c5f469SZhang Chen * |packet | |packet + |packet | |packet + 6861c5f469SZhang Chen * +--------+ +--------+ +--------+ +--------+ 6961c5f469SZhang Chen * | | | | 7061c5f469SZhang Chen * +---v----+ +---v----+ +---v----+ +---v----+ 7161c5f469SZhang Chen * |primary | |secondary |primary | |secondary 7261c5f469SZhang Chen * |packet | |packet + |packet | |packet + 7361c5f469SZhang Chen * +--------+ +--------+ +--------+ +--------+ 7459509ec1SZhang Chen */ 757dce4e6fSZhang Chen typedef struct CompareState { 767dce4e6fSZhang Chen Object parent; 777dce4e6fSZhang Chen 787dce4e6fSZhang Chen char *pri_indev; 797dce4e6fSZhang Chen char *sec_indev; 807dce4e6fSZhang Chen char *outdev; 8132a6ebecSMarc-André Lureau CharBackend chr_pri_in; 8232a6ebecSMarc-André Lureau CharBackend chr_sec_in; 8332a6ebecSMarc-André Lureau CharBackend chr_out; 847dce4e6fSZhang Chen SocketReadState pri_rs; 857dce4e6fSZhang Chen SocketReadState sec_rs; 86aa3a7032SZhang Chen bool vnet_hdr; 8759509ec1SZhang Chen 8861c5f469SZhang Chen /* 8961c5f469SZhang Chen * Record the connection that through the NIC 9061c5f469SZhang Chen * Element type: Connection 91b6540d40SZhang Chen */ 92b6540d40SZhang Chen GQueue conn_list; 9361c5f469SZhang Chen /* Record the connection without repetition */ 9459509ec1SZhang Chen GHashTable *connection_track_table; 95dfd917a9Szhanghailiang 96dd321ecfSWang Yong IOThread *iothread; 97b43decb0Szhanghailiang GMainContext *worker_context; 98dd321ecfSWang Yong QEMUTimer *packet_check_timer; 99*0ffcece3SZhang Chen 100*0ffcece3SZhang Chen QEMUBH *event_bh; 101*0ffcece3SZhang Chen enum colo_event event; 102*0ffcece3SZhang Chen 103*0ffcece3SZhang Chen QTAILQ_ENTRY(CompareState) next; 1047dce4e6fSZhang Chen } CompareState; 1057dce4e6fSZhang Chen 1067dce4e6fSZhang Chen typedef struct CompareClass { 1077dce4e6fSZhang Chen ObjectClass parent_class; 1087dce4e6fSZhang Chen } CompareClass; 1097dce4e6fSZhang Chen 11059509ec1SZhang Chen enum { 11159509ec1SZhang Chen PRIMARY_IN = 0, 11259509ec1SZhang Chen SECONDARY_IN, 11359509ec1SZhang Chen }; 11459509ec1SZhang Chen 1153037e7a5SZhang Chen static int compare_chr_send(CompareState *s, 11659509ec1SZhang Chen const uint8_t *buf, 117aa3a7032SZhang Chen uint32_t size, 118aa3a7032SZhang Chen uint32_t vnet_hdr_len); 11959509ec1SZhang Chen 120a935cc31SZhang Chen static gint seq_sorter(Packet *a, Packet *b, gpointer data) 121a935cc31SZhang Chen { 122a935cc31SZhang Chen struct tcphdr *atcp, *btcp; 123a935cc31SZhang Chen 124a935cc31SZhang Chen atcp = (struct tcphdr *)(a->transport_header); 125a935cc31SZhang Chen btcp = (struct tcphdr *)(b->transport_header); 126a935cc31SZhang Chen return ntohl(atcp->th_seq) - ntohl(btcp->th_seq); 127a935cc31SZhang Chen } 128a935cc31SZhang Chen 129f449c9e5SMao Zhongyi static void fill_pkt_tcp_info(void *data, uint32_t *max_ack) 130f449c9e5SMao Zhongyi { 131f449c9e5SMao Zhongyi Packet *pkt = data; 132f449c9e5SMao Zhongyi struct tcphdr *tcphd; 133f449c9e5SMao Zhongyi 134f449c9e5SMao Zhongyi tcphd = (struct tcphdr *)pkt->transport_header; 135f449c9e5SMao Zhongyi 136f449c9e5SMao Zhongyi pkt->tcp_seq = ntohl(tcphd->th_seq); 137f449c9e5SMao Zhongyi pkt->tcp_ack = ntohl(tcphd->th_ack); 138f449c9e5SMao Zhongyi *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack; 139f449c9e5SMao Zhongyi pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data 140f449c9e5SMao Zhongyi + (tcphd->th_off << 2) - pkt->vnet_hdr_len; 141f449c9e5SMao Zhongyi pkt->payload_size = pkt->size - pkt->header_size; 142f449c9e5SMao Zhongyi pkt->seq_end = pkt->tcp_seq + pkt->payload_size; 143f449c9e5SMao Zhongyi pkt->flags = tcphd->th_flags; 144f449c9e5SMao Zhongyi } 145f449c9e5SMao Zhongyi 14659509ec1SZhang Chen /* 1478850d4caSMao Zhongyi * Return 1 on success, if return 0 means the 1488850d4caSMao Zhongyi * packet will be dropped 1498850d4caSMao Zhongyi */ 150f449c9e5SMao Zhongyi static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack) 1518850d4caSMao Zhongyi { 1528850d4caSMao Zhongyi if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) { 1538850d4caSMao Zhongyi if (pkt->ip->ip_p == IPPROTO_TCP) { 154f449c9e5SMao Zhongyi fill_pkt_tcp_info(pkt, max_ack); 1558850d4caSMao Zhongyi g_queue_insert_sorted(queue, 1568850d4caSMao Zhongyi pkt, 1578850d4caSMao Zhongyi (GCompareDataFunc)seq_sorter, 1588850d4caSMao Zhongyi NULL); 1598850d4caSMao Zhongyi } else { 1608850d4caSMao Zhongyi g_queue_push_tail(queue, pkt); 1618850d4caSMao Zhongyi } 1628850d4caSMao Zhongyi return 1; 1638850d4caSMao Zhongyi } 1648850d4caSMao Zhongyi return 0; 1658850d4caSMao Zhongyi } 1668850d4caSMao Zhongyi 1678850d4caSMao Zhongyi /* 16859509ec1SZhang Chen * Return 0 on success, if return -1 means the pkt 16959509ec1SZhang Chen * is unsupported(arp and ipv6) and will be sent later 17059509ec1SZhang Chen */ 1718ec14402SMao Zhongyi static int packet_enqueue(CompareState *s, int mode, Connection **con) 17259509ec1SZhang Chen { 173b6540d40SZhang Chen ConnectionKey key; 17459509ec1SZhang Chen Packet *pkt = NULL; 175b6540d40SZhang Chen Connection *conn; 17659509ec1SZhang Chen 17759509ec1SZhang Chen if (mode == PRIMARY_IN) { 178ada1a33fSZhang Chen pkt = packet_new(s->pri_rs.buf, 179ada1a33fSZhang Chen s->pri_rs.packet_len, 180ada1a33fSZhang Chen s->pri_rs.vnet_hdr_len); 18159509ec1SZhang Chen } else { 182ada1a33fSZhang Chen pkt = packet_new(s->sec_rs.buf, 183ada1a33fSZhang Chen s->sec_rs.packet_len, 184ada1a33fSZhang Chen s->sec_rs.vnet_hdr_len); 18559509ec1SZhang Chen } 18659509ec1SZhang Chen 18759509ec1SZhang Chen if (parse_packet_early(pkt)) { 18859509ec1SZhang Chen packet_destroy(pkt, NULL); 18959509ec1SZhang Chen pkt = NULL; 19059509ec1SZhang Chen return -1; 19159509ec1SZhang Chen } 192b6540d40SZhang Chen fill_connection_key(pkt, &key); 19359509ec1SZhang Chen 194b6540d40SZhang Chen conn = connection_get(s->connection_track_table, 195b6540d40SZhang Chen &key, 196b6540d40SZhang Chen &s->conn_list); 19759509ec1SZhang Chen 198b6540d40SZhang Chen if (!conn->processing) { 199b6540d40SZhang Chen g_queue_push_tail(&s->conn_list, conn); 200b6540d40SZhang Chen conn->processing = true; 201b6540d40SZhang Chen } 202b6540d40SZhang Chen 203b6540d40SZhang Chen if (mode == PRIMARY_IN) { 204f449c9e5SMao Zhongyi if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) { 205b6540d40SZhang Chen error_report("colo compare primary queue size too big," 206b6540d40SZhang Chen "drop packet"); 207b6540d40SZhang Chen } 208b6540d40SZhang Chen } else { 209f449c9e5SMao Zhongyi if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) { 210b6540d40SZhang Chen error_report("colo compare secondary queue size too big," 211b6540d40SZhang Chen "drop packet"); 212b6540d40SZhang Chen } 213b6540d40SZhang Chen } 2144d366235SMao Zhongyi *con = conn; 21559509ec1SZhang Chen 21659509ec1SZhang Chen return 0; 21759509ec1SZhang Chen } 21859509ec1SZhang Chen 219f449c9e5SMao Zhongyi static inline bool after(uint32_t seq1, uint32_t seq2) 220f449c9e5SMao Zhongyi { 221f449c9e5SMao Zhongyi return (int32_t)(seq1 - seq2) > 0; 222f449c9e5SMao Zhongyi } 223f449c9e5SMao Zhongyi 224f449c9e5SMao Zhongyi static void colo_release_primary_pkt(CompareState *s, Packet *pkt) 225f449c9e5SMao Zhongyi { 226f449c9e5SMao Zhongyi int ret; 227f449c9e5SMao Zhongyi ret = compare_chr_send(s, 228f449c9e5SMao Zhongyi pkt->data, 229f449c9e5SMao Zhongyi pkt->size, 230f449c9e5SMao Zhongyi pkt->vnet_hdr_len); 231f449c9e5SMao Zhongyi if (ret < 0) { 232f449c9e5SMao Zhongyi error_report("colo send primary packet failed"); 233f449c9e5SMao Zhongyi } 234f449c9e5SMao Zhongyi trace_colo_compare_main("packet same and release packet"); 235f449c9e5SMao Zhongyi packet_destroy(pkt, NULL); 236f449c9e5SMao Zhongyi } 237f449c9e5SMao Zhongyi 2380682e15bSZhang Chen /* 2390682e15bSZhang Chen * The IP packets sent by primary and secondary 2400682e15bSZhang Chen * will be compared in here 2410682e15bSZhang Chen * TODO support ip fragment, Out-Of-Order 2420682e15bSZhang Chen * return: 0 means packet same 2430682e15bSZhang Chen * > 0 || < 0 means packet different 2440682e15bSZhang Chen */ 2459394133fSMao Zhongyi static int colo_compare_packet_payload(Packet *ppkt, 2466f5009c3SZhang Chen Packet *spkt, 2479394133fSMao Zhongyi uint16_t poffset, 2489394133fSMao Zhongyi uint16_t soffset, 2499394133fSMao Zhongyi uint16_t len) 2509394133fSMao Zhongyi 2510682e15bSZhang Chen { 252d87aa138SStefan Hajnoczi if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { 253e630b2bfSZhang Chen char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20]; 254e630b2bfSZhang Chen 255e630b2bfSZhang Chen strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src)); 256e630b2bfSZhang Chen strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst)); 257e630b2bfSZhang Chen strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src)); 258e630b2bfSZhang Chen strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst)); 259e630b2bfSZhang Chen 260e630b2bfSZhang Chen trace_colo_compare_ip_info(ppkt->size, pri_ip_src, 261e630b2bfSZhang Chen pri_ip_dst, spkt->size, 262e630b2bfSZhang Chen sec_ip_src, sec_ip_dst); 263e630b2bfSZhang Chen } 2640682e15bSZhang Chen 2659394133fSMao Zhongyi return memcmp(ppkt->data + poffset, spkt->data + soffset, len); 2660682e15bSZhang Chen } 2670682e15bSZhang Chen 268f4b61836SZhang Chen /* 269f449c9e5SMao Zhongyi * return true means that the payload is consist and 270f449c9e5SMao Zhongyi * need to make the next comparison, false means do 271f449c9e5SMao Zhongyi * the checkpoint 272f4b61836SZhang Chen */ 273f449c9e5SMao Zhongyi static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt, 274f449c9e5SMao Zhongyi int8_t *mark, uint32_t max_ack) 2750682e15bSZhang Chen { 276f449c9e5SMao Zhongyi *mark = 0; 277f4b61836SZhang Chen 278f449c9e5SMao Zhongyi if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) { 279f449c9e5SMao Zhongyi if (colo_compare_packet_payload(ppkt, spkt, 280f449c9e5SMao Zhongyi ppkt->header_size, spkt->header_size, 281f449c9e5SMao Zhongyi ppkt->payload_size)) { 282f449c9e5SMao Zhongyi *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY; 283f449c9e5SMao Zhongyi return true; 284f449c9e5SMao Zhongyi } 285f449c9e5SMao Zhongyi } 286f449c9e5SMao Zhongyi if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) { 287f449c9e5SMao Zhongyi if (colo_compare_packet_payload(ppkt, spkt, 288f449c9e5SMao Zhongyi ppkt->header_size, spkt->header_size, 289f449c9e5SMao Zhongyi ppkt->payload_size)) { 290f449c9e5SMao Zhongyi *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY; 291f449c9e5SMao Zhongyi return true; 292f449c9e5SMao Zhongyi } 293f4b61836SZhang Chen } 294f4b61836SZhang Chen 295f449c9e5SMao Zhongyi /* one part of secondary packet payload still need to be compared */ 296f449c9e5SMao Zhongyi if (!after(ppkt->seq_end, spkt->seq_end)) { 297f449c9e5SMao Zhongyi if (colo_compare_packet_payload(ppkt, spkt, 298f449c9e5SMao Zhongyi ppkt->header_size + ppkt->offset, 299f449c9e5SMao Zhongyi spkt->header_size + spkt->offset, 300f449c9e5SMao Zhongyi ppkt->payload_size - ppkt->offset)) { 301f449c9e5SMao Zhongyi if (!after(ppkt->tcp_ack, max_ack)) { 302f449c9e5SMao Zhongyi *mark = COLO_COMPARE_FREE_PRIMARY; 303f449c9e5SMao Zhongyi spkt->offset += ppkt->payload_size - ppkt->offset; 304f449c9e5SMao Zhongyi return true; 3056efeb328SZhang Chen } else { 306f449c9e5SMao Zhongyi /* secondary guest hasn't ack the data, don't send 307f449c9e5SMao Zhongyi * out this packet 308f449c9e5SMao Zhongyi */ 309f449c9e5SMao Zhongyi return false; 310f449c9e5SMao Zhongyi } 311f449c9e5SMao Zhongyi } 312f449c9e5SMao Zhongyi } else { 313f449c9e5SMao Zhongyi /* primary packet is longer than secondary packet, compare 314f449c9e5SMao Zhongyi * the same part and mark the primary packet offset 315f449c9e5SMao Zhongyi */ 316f449c9e5SMao Zhongyi if (colo_compare_packet_payload(ppkt, spkt, 317f449c9e5SMao Zhongyi ppkt->header_size + ppkt->offset, 318f449c9e5SMao Zhongyi spkt->header_size + spkt->offset, 319f449c9e5SMao Zhongyi spkt->payload_size - spkt->offset)) { 320f449c9e5SMao Zhongyi *mark = COLO_COMPARE_FREE_SECONDARY; 321f449c9e5SMao Zhongyi ppkt->offset += spkt->payload_size - spkt->offset; 322f449c9e5SMao Zhongyi return true; 323f449c9e5SMao Zhongyi } 3246efeb328SZhang Chen } 325f4b61836SZhang Chen 326f449c9e5SMao Zhongyi return false; 327f449c9e5SMao Zhongyi } 3282dfe5113SAlex Bennée 329f449c9e5SMao Zhongyi static void colo_compare_tcp(CompareState *s, Connection *conn) 330f449c9e5SMao Zhongyi { 331f449c9e5SMao Zhongyi Packet *ppkt = NULL, *spkt = NULL; 332f449c9e5SMao Zhongyi int8_t mark; 333f583dca9SZhang Chen 334f449c9e5SMao Zhongyi /* 335f449c9e5SMao Zhongyi * If ppkt and spkt have the same payload, but ppkt's ACK 336f449c9e5SMao Zhongyi * is greater than spkt's ACK, in this case we can not 337f449c9e5SMao Zhongyi * send the ppkt because it will cause the secondary guest 338f449c9e5SMao Zhongyi * to miss sending some data in the next. Therefore, we 339f449c9e5SMao Zhongyi * record the maximum ACK in the current queue at both 340f449c9e5SMao Zhongyi * primary side and secondary side. Only when the ack is 341f449c9e5SMao Zhongyi * less than the smaller of the two maximum ack, then we 342f449c9e5SMao Zhongyi * can ensure that the packet's payload is acknowledged by 343f449c9e5SMao Zhongyi * primary and secondary. 344f449c9e5SMao Zhongyi */ 345f449c9e5SMao Zhongyi uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack; 346f583dca9SZhang Chen 347f449c9e5SMao Zhongyi pri: 348f449c9e5SMao Zhongyi if (g_queue_is_empty(&conn->primary_list)) { 349f449c9e5SMao Zhongyi return; 350f449c9e5SMao Zhongyi } 351f449c9e5SMao Zhongyi ppkt = g_queue_pop_head(&conn->primary_list); 352f449c9e5SMao Zhongyi sec: 353f449c9e5SMao Zhongyi if (g_queue_is_empty(&conn->secondary_list)) { 354f449c9e5SMao Zhongyi g_queue_push_head(&conn->primary_list, ppkt); 355f449c9e5SMao Zhongyi return; 356f449c9e5SMao Zhongyi } 357f449c9e5SMao Zhongyi spkt = g_queue_pop_head(&conn->secondary_list); 35851b9d495SPeter Maydell 359f449c9e5SMao Zhongyi if (ppkt->tcp_seq == ppkt->seq_end) { 360f449c9e5SMao Zhongyi colo_release_primary_pkt(s, ppkt); 361f449c9e5SMao Zhongyi ppkt = NULL; 362f449c9e5SMao Zhongyi } 363f449c9e5SMao Zhongyi 364f449c9e5SMao Zhongyi if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) { 365f449c9e5SMao Zhongyi trace_colo_compare_main("pri: this packet has compared"); 366f449c9e5SMao Zhongyi colo_release_primary_pkt(s, ppkt); 367f449c9e5SMao Zhongyi ppkt = NULL; 368f449c9e5SMao Zhongyi } 369f449c9e5SMao Zhongyi 370f449c9e5SMao Zhongyi if (spkt->tcp_seq == spkt->seq_end) { 371f449c9e5SMao Zhongyi packet_destroy(spkt, NULL); 372f449c9e5SMao Zhongyi if (!ppkt) { 373f449c9e5SMao Zhongyi goto pri; 374f449c9e5SMao Zhongyi } else { 375f449c9e5SMao Zhongyi goto sec; 376f449c9e5SMao Zhongyi } 377f449c9e5SMao Zhongyi } else { 378f449c9e5SMao Zhongyi if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) { 379f449c9e5SMao Zhongyi trace_colo_compare_main("sec: this packet has compared"); 380f449c9e5SMao Zhongyi packet_destroy(spkt, NULL); 381f449c9e5SMao Zhongyi if (!ppkt) { 382f449c9e5SMao Zhongyi goto pri; 383f449c9e5SMao Zhongyi } else { 384f449c9e5SMao Zhongyi goto sec; 385f449c9e5SMao Zhongyi } 386f449c9e5SMao Zhongyi } 387f449c9e5SMao Zhongyi if (!ppkt) { 388f449c9e5SMao Zhongyi g_queue_push_head(&conn->secondary_list, spkt); 389f449c9e5SMao Zhongyi goto pri; 390f449c9e5SMao Zhongyi } 391f449c9e5SMao Zhongyi } 392f449c9e5SMao Zhongyi 393f449c9e5SMao Zhongyi if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) { 394f449c9e5SMao Zhongyi trace_colo_compare_tcp_info("pri", 395f449c9e5SMao Zhongyi ppkt->tcp_seq, ppkt->tcp_ack, 396f449c9e5SMao Zhongyi ppkt->header_size, ppkt->payload_size, 397f449c9e5SMao Zhongyi ppkt->offset, ppkt->flags); 398f449c9e5SMao Zhongyi 399f449c9e5SMao Zhongyi trace_colo_compare_tcp_info("sec", 400f449c9e5SMao Zhongyi spkt->tcp_seq, spkt->tcp_ack, 401f449c9e5SMao Zhongyi spkt->header_size, spkt->payload_size, 402f449c9e5SMao Zhongyi spkt->offset, spkt->flags); 403f449c9e5SMao Zhongyi 404f449c9e5SMao Zhongyi if (mark == COLO_COMPARE_FREE_PRIMARY) { 405f449c9e5SMao Zhongyi conn->compare_seq = ppkt->seq_end; 406f449c9e5SMao Zhongyi colo_release_primary_pkt(s, ppkt); 407f449c9e5SMao Zhongyi g_queue_push_head(&conn->secondary_list, spkt); 408f449c9e5SMao Zhongyi goto pri; 409f449c9e5SMao Zhongyi } 410f449c9e5SMao Zhongyi if (mark == COLO_COMPARE_FREE_SECONDARY) { 411f449c9e5SMao Zhongyi conn->compare_seq = spkt->seq_end; 412f449c9e5SMao Zhongyi packet_destroy(spkt, NULL); 413f449c9e5SMao Zhongyi goto sec; 414f449c9e5SMao Zhongyi } 415f449c9e5SMao Zhongyi if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) { 416f449c9e5SMao Zhongyi conn->compare_seq = ppkt->seq_end; 417f449c9e5SMao Zhongyi colo_release_primary_pkt(s, ppkt); 418f449c9e5SMao Zhongyi packet_destroy(spkt, NULL); 419f449c9e5SMao Zhongyi goto pri; 420f449c9e5SMao Zhongyi } 421f449c9e5SMao Zhongyi } else { 422f449c9e5SMao Zhongyi g_queue_push_head(&conn->primary_list, ppkt); 423f449c9e5SMao Zhongyi g_queue_push_head(&conn->secondary_list, spkt); 424f583dca9SZhang Chen 4252061c14cSZhang Chen qemu_hexdump((char *)ppkt->data, stderr, 4262061c14cSZhang Chen "colo-compare ppkt", ppkt->size); 4272061c14cSZhang Chen qemu_hexdump((char *)spkt->data, stderr, 4282061c14cSZhang Chen "colo-compare spkt", spkt->size); 429f449c9e5SMao Zhongyi 430f449c9e5SMao Zhongyi /* 431f449c9e5SMao Zhongyi * colo_compare_inconsistent_notify(); 432f449c9e5SMao Zhongyi * TODO: notice to checkpoint(); 433f449c9e5SMao Zhongyi */ 434f449c9e5SMao Zhongyi } 435f4b61836SZhang Chen } 436f4b61836SZhang Chen 437f4b61836SZhang Chen 438f4b61836SZhang Chen /* 439f4b61836SZhang Chen * Called from the compare thread on the primary 440f4b61836SZhang Chen * for compare udp packet 441f4b61836SZhang Chen */ 442f4b61836SZhang Chen static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) 443f4b61836SZhang Chen { 4449394133fSMao Zhongyi uint16_t network_header_length = ppkt->ip->ip_hl << 2; 4459394133fSMao Zhongyi uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len; 446f4b61836SZhang Chen 447f4b61836SZhang Chen trace_colo_compare_main("compare udp"); 4482ad7ca4cSZhang Chen 4496efeb328SZhang Chen /* 4506efeb328SZhang Chen * Because of ppkt and spkt are both in the same connection, 4516efeb328SZhang Chen * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are 4526efeb328SZhang Chen * same with spkt. In addition, IP header's Identification is a random 4536efeb328SZhang Chen * field, we can handle it in IP fragmentation function later. 4546efeb328SZhang Chen * COLO just concern the response net packet payload from primary guest 4556efeb328SZhang Chen * and secondary guest are same or not, So we ignored all IP header include 4566efeb328SZhang Chen * other field like TOS,TTL,IP Checksum. we only need to compare 4576efeb328SZhang Chen * the ip payload here. 4586efeb328SZhang Chen */ 4599394133fSMao Zhongyi if (ppkt->size != spkt->size) { 4609394133fSMao Zhongyi trace_colo_compare_main("UDP: payload size of packets are different"); 4619394133fSMao Zhongyi return -1; 4629394133fSMao Zhongyi } 4639394133fSMao Zhongyi if (colo_compare_packet_payload(ppkt, spkt, offset, offset, 4649394133fSMao Zhongyi ppkt->size - offset)) { 465f4b61836SZhang Chen trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size); 466f4b61836SZhang Chen trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size); 467d87aa138SStefan Hajnoczi if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { 4681723a7f7SZhang Chen qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt", 4691723a7f7SZhang Chen ppkt->size); 4701723a7f7SZhang Chen qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt", 4711723a7f7SZhang Chen spkt->size); 4721723a7f7SZhang Chen } 4739394133fSMao Zhongyi return -1; 4749394133fSMao Zhongyi } else { 4759394133fSMao Zhongyi return 0; 476f4b61836SZhang Chen } 477f4b61836SZhang Chen } 478f4b61836SZhang Chen 479f4b61836SZhang Chen /* 480f4b61836SZhang Chen * Called from the compare thread on the primary 481f4b61836SZhang Chen * for compare icmp packet 482f4b61836SZhang Chen */ 483f4b61836SZhang Chen static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) 484f4b61836SZhang Chen { 4859394133fSMao Zhongyi uint16_t network_header_length = ppkt->ip->ip_hl << 2; 4869394133fSMao Zhongyi uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len; 4876efeb328SZhang Chen 488f4b61836SZhang Chen trace_colo_compare_main("compare icmp"); 489f4b61836SZhang Chen 4906efeb328SZhang Chen /* 4916efeb328SZhang Chen * Because of ppkt and spkt are both in the same connection, 4926efeb328SZhang Chen * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are 4936efeb328SZhang Chen * same with spkt. In addition, IP header's Identification is a random 4946efeb328SZhang Chen * field, we can handle it in IP fragmentation function later. 4956efeb328SZhang Chen * COLO just concern the response net packet payload from primary guest 4966efeb328SZhang Chen * and secondary guest are same or not, So we ignored all IP header include 4976efeb328SZhang Chen * other field like TOS,TTL,IP Checksum. we only need to compare 4986efeb328SZhang Chen * the ip payload here. 4996efeb328SZhang Chen */ 5009394133fSMao Zhongyi if (ppkt->size != spkt->size) { 5019394133fSMao Zhongyi trace_colo_compare_main("ICMP: payload size of packets are different"); 5029394133fSMao Zhongyi return -1; 5039394133fSMao Zhongyi } 5049394133fSMao Zhongyi if (colo_compare_packet_payload(ppkt, spkt, offset, offset, 5059394133fSMao Zhongyi ppkt->size - offset)) { 506f4b61836SZhang Chen trace_colo_compare_icmp_miscompare("primary pkt size", 507f4b61836SZhang Chen ppkt->size); 508f4b61836SZhang Chen trace_colo_compare_icmp_miscompare("Secondary pkt size", 509f4b61836SZhang Chen spkt->size); 510d87aa138SStefan Hajnoczi if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { 5111723a7f7SZhang Chen qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt", 5121723a7f7SZhang Chen ppkt->size); 5131723a7f7SZhang Chen qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt", 514f4b61836SZhang Chen spkt->size); 5151723a7f7SZhang Chen } 516f4b61836SZhang Chen return -1; 517f4b61836SZhang Chen } else { 518f4b61836SZhang Chen return 0; 519f4b61836SZhang Chen } 520f4b61836SZhang Chen } 521f4b61836SZhang Chen 522f4b61836SZhang Chen /* 523f4b61836SZhang Chen * Called from the compare thread on the primary 524f4b61836SZhang Chen * for compare other packet 525f4b61836SZhang Chen */ 526f4b61836SZhang Chen static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) 527f4b61836SZhang Chen { 5289394133fSMao Zhongyi uint16_t offset = ppkt->vnet_hdr_len; 5299394133fSMao Zhongyi 530f4b61836SZhang Chen trace_colo_compare_main("compare other"); 531d87aa138SStefan Hajnoczi if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { 532e630b2bfSZhang Chen char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20]; 533e630b2bfSZhang Chen 534e630b2bfSZhang Chen strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src)); 535e630b2bfSZhang Chen strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst)); 536e630b2bfSZhang Chen strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src)); 537e630b2bfSZhang Chen strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst)); 538e630b2bfSZhang Chen 539e630b2bfSZhang Chen trace_colo_compare_ip_info(ppkt->size, pri_ip_src, 540e630b2bfSZhang Chen pri_ip_dst, spkt->size, 541e630b2bfSZhang Chen sec_ip_src, sec_ip_dst); 542e630b2bfSZhang Chen } 543e630b2bfSZhang Chen 5449394133fSMao Zhongyi if (ppkt->size != spkt->size) { 5459394133fSMao Zhongyi trace_colo_compare_main("Other: payload size of packets are different"); 5469394133fSMao Zhongyi return -1; 5479394133fSMao Zhongyi } 5489394133fSMao Zhongyi return colo_compare_packet_payload(ppkt, spkt, offset, offset, 5499394133fSMao Zhongyi ppkt->size - offset); 5500682e15bSZhang Chen } 5510682e15bSZhang Chen 5520682e15bSZhang Chen static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time) 5530682e15bSZhang Chen { 5540682e15bSZhang Chen int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST); 5550682e15bSZhang Chen 5560682e15bSZhang Chen if ((now - pkt->creation_ms) > (*check_time)) { 5570682e15bSZhang Chen trace_colo_old_packet_check_found(pkt->creation_ms); 5580682e15bSZhang Chen return 0; 5590682e15bSZhang Chen } else { 5600682e15bSZhang Chen return 1; 5610682e15bSZhang Chen } 5620682e15bSZhang Chen } 5630682e15bSZhang Chen 564d25a7dabSZhang Chen static int colo_old_packet_check_one_conn(Connection *conn, 5650682e15bSZhang Chen void *user_data) 5660682e15bSZhang Chen { 5670682e15bSZhang Chen GList *result = NULL; 5680682e15bSZhang Chen int64_t check_time = REGULAR_PACKET_CHECK_MS; 5690682e15bSZhang Chen 5700682e15bSZhang Chen result = g_queue_find_custom(&conn->primary_list, 5710682e15bSZhang Chen &check_time, 5720682e15bSZhang Chen (GCompareFunc)colo_old_packet_check_one); 5730682e15bSZhang Chen 5740682e15bSZhang Chen if (result) { 57561c5f469SZhang Chen /* Do checkpoint will flush old packet */ 57661c5f469SZhang Chen /* 57761c5f469SZhang Chen * TODO: Notify colo frame to do checkpoint. 57861c5f469SZhang Chen * colo_compare_inconsistent_notify(); 57961c5f469SZhang Chen */ 580d25a7dabSZhang Chen return 0; 5810682e15bSZhang Chen } 582d25a7dabSZhang Chen 583d25a7dabSZhang Chen return 1; 5840682e15bSZhang Chen } 5850682e15bSZhang Chen 5860682e15bSZhang Chen /* 5870682e15bSZhang Chen * Look for old packets that the secondary hasn't matched, 5880682e15bSZhang Chen * if we have some then we have to checkpoint to wake 5890682e15bSZhang Chen * the secondary up. 5900682e15bSZhang Chen */ 5910682e15bSZhang Chen static void colo_old_packet_check(void *opaque) 5920682e15bSZhang Chen { 5930682e15bSZhang Chen CompareState *s = opaque; 5940682e15bSZhang Chen 595d25a7dabSZhang Chen /* 596d25a7dabSZhang Chen * If we find one old packet, stop finding job and notify 597d25a7dabSZhang Chen * COLO frame do checkpoint. 598d25a7dabSZhang Chen */ 599d25a7dabSZhang Chen g_queue_find_custom(&s->conn_list, NULL, 600d25a7dabSZhang Chen (GCompareFunc)colo_old_packet_check_one_conn); 6010682e15bSZhang Chen } 6020682e15bSZhang Chen 603f449c9e5SMao Zhongyi static void colo_compare_packet(CompareState *s, Connection *conn, 604f449c9e5SMao Zhongyi int (*HandlePacket)(Packet *spkt, 605f449c9e5SMao Zhongyi Packet *ppkt)) 6060682e15bSZhang Chen { 6070682e15bSZhang Chen Packet *pkt = NULL; 6080682e15bSZhang Chen GList *result = NULL; 6090682e15bSZhang Chen 6100682e15bSZhang Chen while (!g_queue_is_empty(&conn->primary_list) && 6110682e15bSZhang Chen !g_queue_is_empty(&conn->secondary_list)) { 612626bba98SZhang Chen pkt = g_queue_pop_head(&conn->primary_list); 6130682e15bSZhang Chen result = g_queue_find_custom(&conn->secondary_list, 614f449c9e5SMao Zhongyi pkt, (GCompareFunc)HandlePacket); 6150682e15bSZhang Chen 6160682e15bSZhang Chen if (result) { 617f449c9e5SMao Zhongyi colo_release_primary_pkt(s, pkt); 6180682e15bSZhang Chen g_queue_remove(&conn->secondary_list, result->data); 6190682e15bSZhang Chen } else { 6200682e15bSZhang Chen /* 6210682e15bSZhang Chen * If one packet arrive late, the secondary_list or 6220682e15bSZhang Chen * primary_list will be empty, so we can't compare it 6230682e15bSZhang Chen * until next comparison. 6240682e15bSZhang Chen */ 6250682e15bSZhang Chen trace_colo_compare_main("packet different"); 626626bba98SZhang Chen g_queue_push_head(&conn->primary_list, pkt); 6270682e15bSZhang Chen /* TODO: colo_notify_checkpoint();*/ 6280682e15bSZhang Chen break; 6290682e15bSZhang Chen } 6300682e15bSZhang Chen } 6310682e15bSZhang Chen } 6320682e15bSZhang Chen 633f449c9e5SMao Zhongyi /* 634f449c9e5SMao Zhongyi * Called from the compare thread on the primary 635f449c9e5SMao Zhongyi * for compare packet with secondary list of the 636f449c9e5SMao Zhongyi * specified connection when a new packet was 637f449c9e5SMao Zhongyi * queued to it. 638f449c9e5SMao Zhongyi */ 639f449c9e5SMao Zhongyi static void colo_compare_connection(void *opaque, void *user_data) 640f449c9e5SMao Zhongyi { 641f449c9e5SMao Zhongyi CompareState *s = user_data; 642f449c9e5SMao Zhongyi Connection *conn = opaque; 643f449c9e5SMao Zhongyi 644f449c9e5SMao Zhongyi switch (conn->ip_proto) { 645f449c9e5SMao Zhongyi case IPPROTO_TCP: 646f449c9e5SMao Zhongyi colo_compare_tcp(s, conn); 647f449c9e5SMao Zhongyi break; 648f449c9e5SMao Zhongyi case IPPROTO_UDP: 649f449c9e5SMao Zhongyi colo_compare_packet(s, conn, colo_packet_compare_udp); 650f449c9e5SMao Zhongyi break; 651f449c9e5SMao Zhongyi case IPPROTO_ICMP: 652f449c9e5SMao Zhongyi colo_compare_packet(s, conn, colo_packet_compare_icmp); 653f449c9e5SMao Zhongyi break; 654f449c9e5SMao Zhongyi default: 655f449c9e5SMao Zhongyi colo_compare_packet(s, conn, colo_packet_compare_other); 656f449c9e5SMao Zhongyi break; 657f449c9e5SMao Zhongyi } 658f449c9e5SMao Zhongyi } 659f449c9e5SMao Zhongyi 6603037e7a5SZhang Chen static int compare_chr_send(CompareState *s, 66159509ec1SZhang Chen const uint8_t *buf, 662aa3a7032SZhang Chen uint32_t size, 663aa3a7032SZhang Chen uint32_t vnet_hdr_len) 66459509ec1SZhang Chen { 66559509ec1SZhang Chen int ret = 0; 66659509ec1SZhang Chen uint32_t len = htonl(size); 66759509ec1SZhang Chen 66859509ec1SZhang Chen if (!size) { 66959509ec1SZhang Chen return 0; 67059509ec1SZhang Chen } 67159509ec1SZhang Chen 6723037e7a5SZhang Chen ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); 67359509ec1SZhang Chen if (ret != sizeof(len)) { 67459509ec1SZhang Chen goto err; 67559509ec1SZhang Chen } 67659509ec1SZhang Chen 677aa3a7032SZhang Chen if (s->vnet_hdr) { 678aa3a7032SZhang Chen /* 679aa3a7032SZhang Chen * We send vnet header len make other module(like filter-redirector) 680aa3a7032SZhang Chen * know how to parse net packet correctly. 681aa3a7032SZhang Chen */ 682aa3a7032SZhang Chen len = htonl(vnet_hdr_len); 683aa3a7032SZhang Chen ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); 684aa3a7032SZhang Chen if (ret != sizeof(len)) { 685aa3a7032SZhang Chen goto err; 686aa3a7032SZhang Chen } 687aa3a7032SZhang Chen } 688aa3a7032SZhang Chen 6893037e7a5SZhang Chen ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); 69059509ec1SZhang Chen if (ret != size) { 69159509ec1SZhang Chen goto err; 69259509ec1SZhang Chen } 69359509ec1SZhang Chen 69459509ec1SZhang Chen return 0; 69559509ec1SZhang Chen 69659509ec1SZhang Chen err: 69759509ec1SZhang Chen return ret < 0 ? ret : -EIO; 69859509ec1SZhang Chen } 69959509ec1SZhang Chen 7000682e15bSZhang Chen static int compare_chr_can_read(void *opaque) 7010682e15bSZhang Chen { 7020682e15bSZhang Chen return COMPARE_READ_LEN_MAX; 7030682e15bSZhang Chen } 7040682e15bSZhang Chen 7050682e15bSZhang Chen /* 7060682e15bSZhang Chen * Called from the main thread on the primary for packets 7070682e15bSZhang Chen * arriving over the socket from the primary. 7080682e15bSZhang Chen */ 7090682e15bSZhang Chen static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size) 7100682e15bSZhang Chen { 7110682e15bSZhang Chen CompareState *s = COLO_COMPARE(opaque); 7120682e15bSZhang Chen int ret; 7130682e15bSZhang Chen 7140682e15bSZhang Chen ret = net_fill_rstate(&s->pri_rs, buf, size); 7150682e15bSZhang Chen if (ret == -1) { 71681517ba3SAnton Nefedov qemu_chr_fe_set_handlers(&s->chr_pri_in, NULL, NULL, NULL, NULL, 71739ab61c6SMarc-André Lureau NULL, NULL, true); 7180682e15bSZhang Chen error_report("colo-compare primary_in error"); 7190682e15bSZhang Chen } 7200682e15bSZhang Chen } 7210682e15bSZhang Chen 7220682e15bSZhang Chen /* 7230682e15bSZhang Chen * Called from the main thread on the primary for packets 7240682e15bSZhang Chen * arriving over the socket from the secondary. 7250682e15bSZhang Chen */ 7260682e15bSZhang Chen static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size) 7270682e15bSZhang Chen { 7280682e15bSZhang Chen CompareState *s = COLO_COMPARE(opaque); 7290682e15bSZhang Chen int ret; 7300682e15bSZhang Chen 7310682e15bSZhang Chen ret = net_fill_rstate(&s->sec_rs, buf, size); 7320682e15bSZhang Chen if (ret == -1) { 73381517ba3SAnton Nefedov qemu_chr_fe_set_handlers(&s->chr_sec_in, NULL, NULL, NULL, NULL, 73439ab61c6SMarc-André Lureau NULL, NULL, true); 7350682e15bSZhang Chen error_report("colo-compare secondary_in error"); 7360682e15bSZhang Chen } 7370682e15bSZhang Chen } 7380682e15bSZhang Chen 73966d2a242Szhanghailiang /* 74066d2a242Szhanghailiang * Check old packet regularly so it can watch for any packets 74166d2a242Szhanghailiang * that the secondary hasn't produced equivalents of. 74266d2a242Szhanghailiang */ 743dd321ecfSWang Yong static void check_old_packet_regular(void *opaque) 74466d2a242Szhanghailiang { 74566d2a242Szhanghailiang CompareState *s = opaque; 74666d2a242Szhanghailiang 74766d2a242Szhanghailiang /* if have old packet we will notify checkpoint */ 74866d2a242Szhanghailiang colo_old_packet_check(s); 749dd321ecfSWang Yong timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 750dd321ecfSWang Yong REGULAR_PACKET_CHECK_MS); 75166d2a242Szhanghailiang } 75266d2a242Szhanghailiang 753*0ffcece3SZhang Chen /* Public API, Used for COLO frame to notify compare event */ 754*0ffcece3SZhang Chen void colo_notify_compares_event(void *opaque, int event, Error **errp) 755*0ffcece3SZhang Chen { 756*0ffcece3SZhang Chen CompareState *s; 757*0ffcece3SZhang Chen 758*0ffcece3SZhang Chen qemu_mutex_lock(&event_mtx); 759*0ffcece3SZhang Chen QTAILQ_FOREACH(s, &net_compares, next) { 760*0ffcece3SZhang Chen s->event = event; 761*0ffcece3SZhang Chen qemu_bh_schedule(s->event_bh); 762*0ffcece3SZhang Chen event_unhandled_count++; 763*0ffcece3SZhang Chen } 764*0ffcece3SZhang Chen /* Wait all compare threads to finish handling this event */ 765*0ffcece3SZhang Chen while (event_unhandled_count > 0) { 766*0ffcece3SZhang Chen qemu_cond_wait(&event_complete_cond, &event_mtx); 767*0ffcece3SZhang Chen } 768*0ffcece3SZhang Chen 769*0ffcece3SZhang Chen qemu_mutex_unlock(&event_mtx); 770*0ffcece3SZhang Chen } 771*0ffcece3SZhang Chen 772dd321ecfSWang Yong static void colo_compare_timer_init(CompareState *s) 7730682e15bSZhang Chen { 774dd321ecfSWang Yong AioContext *ctx = iothread_get_aio_context(s->iothread); 7750682e15bSZhang Chen 776dd321ecfSWang Yong s->packet_check_timer = aio_timer_new(ctx, QEMU_CLOCK_VIRTUAL, 777dd321ecfSWang Yong SCALE_MS, check_old_packet_regular, 778dd321ecfSWang Yong s); 779dd321ecfSWang Yong timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 780dd321ecfSWang Yong REGULAR_PACKET_CHECK_MS); 781dd321ecfSWang Yong } 782dd321ecfSWang Yong 783dd321ecfSWang Yong static void colo_compare_timer_del(CompareState *s) 784dd321ecfSWang Yong { 785dd321ecfSWang Yong if (s->packet_check_timer) { 786dd321ecfSWang Yong timer_del(s->packet_check_timer); 787dd321ecfSWang Yong timer_free(s->packet_check_timer); 788dd321ecfSWang Yong s->packet_check_timer = NULL; 789dd321ecfSWang Yong } 790dd321ecfSWang Yong } 791dd321ecfSWang Yong 792*0ffcece3SZhang Chen static void colo_flush_packets(void *opaque, void *user_data); 793*0ffcece3SZhang Chen 794*0ffcece3SZhang Chen static void colo_compare_handle_event(void *opaque) 795*0ffcece3SZhang Chen { 796*0ffcece3SZhang Chen CompareState *s = opaque; 797*0ffcece3SZhang Chen 798*0ffcece3SZhang Chen switch (s->event) { 799*0ffcece3SZhang Chen case COLO_EVENT_CHECKPOINT: 800*0ffcece3SZhang Chen g_queue_foreach(&s->conn_list, colo_flush_packets, s); 801*0ffcece3SZhang Chen break; 802*0ffcece3SZhang Chen case COLO_EVENT_FAILOVER: 803*0ffcece3SZhang Chen break; 804*0ffcece3SZhang Chen default: 805*0ffcece3SZhang Chen break; 806*0ffcece3SZhang Chen } 807*0ffcece3SZhang Chen 808*0ffcece3SZhang Chen assert(event_unhandled_count > 0); 809*0ffcece3SZhang Chen 810*0ffcece3SZhang Chen qemu_mutex_lock(&event_mtx); 811*0ffcece3SZhang Chen event_unhandled_count--; 812*0ffcece3SZhang Chen qemu_cond_broadcast(&event_complete_cond); 813*0ffcece3SZhang Chen qemu_mutex_unlock(&event_mtx); 814*0ffcece3SZhang Chen } 815*0ffcece3SZhang Chen 816dd321ecfSWang Yong static void colo_compare_iothread(CompareState *s) 817dd321ecfSWang Yong { 818dd321ecfSWang Yong object_ref(OBJECT(s->iothread)); 819dd321ecfSWang Yong s->worker_context = iothread_get_g_main_context(s->iothread); 8200682e15bSZhang Chen 8215345fdb4SMarc-André Lureau qemu_chr_fe_set_handlers(&s->chr_pri_in, compare_chr_can_read, 82281517ba3SAnton Nefedov compare_pri_chr_in, NULL, NULL, 82381517ba3SAnton Nefedov s, s->worker_context, true); 8245345fdb4SMarc-André Lureau qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read, 82581517ba3SAnton Nefedov compare_sec_chr_in, NULL, NULL, 82681517ba3SAnton Nefedov s, s->worker_context, true); 8270682e15bSZhang Chen 828dd321ecfSWang Yong colo_compare_timer_init(s); 829*0ffcece3SZhang Chen s->event_bh = qemu_bh_new(colo_compare_handle_event, s); 8300682e15bSZhang Chen } 8310682e15bSZhang Chen 8327dce4e6fSZhang Chen static char *compare_get_pri_indev(Object *obj, Error **errp) 8337dce4e6fSZhang Chen { 8347dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 8357dce4e6fSZhang Chen 8367dce4e6fSZhang Chen return g_strdup(s->pri_indev); 8377dce4e6fSZhang Chen } 8387dce4e6fSZhang Chen 8397dce4e6fSZhang Chen static void compare_set_pri_indev(Object *obj, const char *value, Error **errp) 8407dce4e6fSZhang Chen { 8417dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 8427dce4e6fSZhang Chen 8437dce4e6fSZhang Chen g_free(s->pri_indev); 8447dce4e6fSZhang Chen s->pri_indev = g_strdup(value); 8457dce4e6fSZhang Chen } 8467dce4e6fSZhang Chen 8477dce4e6fSZhang Chen static char *compare_get_sec_indev(Object *obj, Error **errp) 8487dce4e6fSZhang Chen { 8497dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 8507dce4e6fSZhang Chen 8517dce4e6fSZhang Chen return g_strdup(s->sec_indev); 8527dce4e6fSZhang Chen } 8537dce4e6fSZhang Chen 8547dce4e6fSZhang Chen static void compare_set_sec_indev(Object *obj, const char *value, Error **errp) 8557dce4e6fSZhang Chen { 8567dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 8577dce4e6fSZhang Chen 8587dce4e6fSZhang Chen g_free(s->sec_indev); 8597dce4e6fSZhang Chen s->sec_indev = g_strdup(value); 8607dce4e6fSZhang Chen } 8617dce4e6fSZhang Chen 8627dce4e6fSZhang Chen static char *compare_get_outdev(Object *obj, Error **errp) 8637dce4e6fSZhang Chen { 8647dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 8657dce4e6fSZhang Chen 8667dce4e6fSZhang Chen return g_strdup(s->outdev); 8677dce4e6fSZhang Chen } 8687dce4e6fSZhang Chen 8697dce4e6fSZhang Chen static void compare_set_outdev(Object *obj, const char *value, Error **errp) 8707dce4e6fSZhang Chen { 8717dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 8727dce4e6fSZhang Chen 8737dce4e6fSZhang Chen g_free(s->outdev); 8747dce4e6fSZhang Chen s->outdev = g_strdup(value); 8757dce4e6fSZhang Chen } 8767dce4e6fSZhang Chen 877aa3a7032SZhang Chen static bool compare_get_vnet_hdr(Object *obj, Error **errp) 878aa3a7032SZhang Chen { 879aa3a7032SZhang Chen CompareState *s = COLO_COMPARE(obj); 880aa3a7032SZhang Chen 881aa3a7032SZhang Chen return s->vnet_hdr; 882aa3a7032SZhang Chen } 883aa3a7032SZhang Chen 884aa3a7032SZhang Chen static void compare_set_vnet_hdr(Object *obj, 885aa3a7032SZhang Chen bool value, 886aa3a7032SZhang Chen Error **errp) 887aa3a7032SZhang Chen { 888aa3a7032SZhang Chen CompareState *s = COLO_COMPARE(obj); 889aa3a7032SZhang Chen 890aa3a7032SZhang Chen s->vnet_hdr = value; 891aa3a7032SZhang Chen } 892aa3a7032SZhang Chen 8937dce4e6fSZhang Chen static void compare_pri_rs_finalize(SocketReadState *pri_rs) 8947dce4e6fSZhang Chen { 89559509ec1SZhang Chen CompareState *s = container_of(pri_rs, CompareState, pri_rs); 8968ec14402SMao Zhongyi Connection *conn = NULL; 89759509ec1SZhang Chen 8988ec14402SMao Zhongyi if (packet_enqueue(s, PRIMARY_IN, &conn)) { 89959509ec1SZhang Chen trace_colo_compare_main("primary: unsupported packet in"); 900aa3a7032SZhang Chen compare_chr_send(s, 901aa3a7032SZhang Chen pri_rs->buf, 902aa3a7032SZhang Chen pri_rs->packet_len, 903aa3a7032SZhang Chen pri_rs->vnet_hdr_len); 9040682e15bSZhang Chen } else { 9053463218cSMao Zhongyi /* compare packet in the specified connection */ 9068ec14402SMao Zhongyi colo_compare_connection(conn, s); 90759509ec1SZhang Chen } 9087dce4e6fSZhang Chen } 9097dce4e6fSZhang Chen 9107dce4e6fSZhang Chen static void compare_sec_rs_finalize(SocketReadState *sec_rs) 9117dce4e6fSZhang Chen { 91259509ec1SZhang Chen CompareState *s = container_of(sec_rs, CompareState, sec_rs); 9138ec14402SMao Zhongyi Connection *conn = NULL; 91459509ec1SZhang Chen 9158ec14402SMao Zhongyi if (packet_enqueue(s, SECONDARY_IN, &conn)) { 91659509ec1SZhang Chen trace_colo_compare_main("secondary: unsupported packet in"); 9170682e15bSZhang Chen } else { 9183463218cSMao Zhongyi /* compare packet in the specified connection */ 9198ec14402SMao Zhongyi colo_compare_connection(conn, s); 92059509ec1SZhang Chen } 9217dce4e6fSZhang Chen } 9227dce4e6fSZhang Chen 9237dce4e6fSZhang Chen 9247dce4e6fSZhang Chen /* 9257dce4e6fSZhang Chen * Return 0 is success. 9267dce4e6fSZhang Chen * Return 1 is failed. 9277dce4e6fSZhang Chen */ 9280ec7b3e7SMarc-André Lureau static int find_and_check_chardev(Chardev **chr, 9297dce4e6fSZhang Chen char *chr_name, 9307dce4e6fSZhang Chen Error **errp) 9317dce4e6fSZhang Chen { 9327dce4e6fSZhang Chen *chr = qemu_chr_find(chr_name); 9337dce4e6fSZhang Chen if (*chr == NULL) { 9347dce4e6fSZhang Chen error_setg(errp, "Device '%s' not found", 9357dce4e6fSZhang Chen chr_name); 9367dce4e6fSZhang Chen return 1; 9377dce4e6fSZhang Chen } 9387dce4e6fSZhang Chen 9390a73336dSDaniel P. Berrange if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) { 9400a73336dSDaniel P. Berrange error_setg(errp, "chardev \"%s\" is not reconnectable", 9417dce4e6fSZhang Chen chr_name); 9427dce4e6fSZhang Chen return 1; 9437dce4e6fSZhang Chen } 944fbf3cc3aSMarc-André Lureau 9457dce4e6fSZhang Chen return 0; 9467dce4e6fSZhang Chen } 9477dce4e6fSZhang Chen 9487dce4e6fSZhang Chen /* 9497dce4e6fSZhang Chen * Called from the main thread on the primary 9507dce4e6fSZhang Chen * to setup colo-compare. 9517dce4e6fSZhang Chen */ 9527dce4e6fSZhang Chen static void colo_compare_complete(UserCreatable *uc, Error **errp) 9537dce4e6fSZhang Chen { 9547dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(uc); 9550ec7b3e7SMarc-André Lureau Chardev *chr; 9567dce4e6fSZhang Chen 957dd321ecfSWang Yong if (!s->pri_indev || !s->sec_indev || !s->outdev || !s->iothread) { 9587dce4e6fSZhang Chen error_setg(errp, "colo compare needs 'primary_in' ," 959dd321ecfSWang Yong "'secondary_in','outdev','iothread' property set"); 9607dce4e6fSZhang Chen return; 9617dce4e6fSZhang Chen } else if (!strcmp(s->pri_indev, s->outdev) || 9627dce4e6fSZhang Chen !strcmp(s->sec_indev, s->outdev) || 9637dce4e6fSZhang Chen !strcmp(s->pri_indev, s->sec_indev)) { 9647dce4e6fSZhang Chen error_setg(errp, "'indev' and 'outdev' could not be same " 9657dce4e6fSZhang Chen "for compare module"); 9667dce4e6fSZhang Chen return; 9677dce4e6fSZhang Chen } 9687dce4e6fSZhang Chen 9695345fdb4SMarc-André Lureau if (find_and_check_chardev(&chr, s->pri_indev, errp) || 9705345fdb4SMarc-André Lureau !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) { 9717dce4e6fSZhang Chen return; 9727dce4e6fSZhang Chen } 9737dce4e6fSZhang Chen 9745345fdb4SMarc-André Lureau if (find_and_check_chardev(&chr, s->sec_indev, errp) || 9755345fdb4SMarc-André Lureau !qemu_chr_fe_init(&s->chr_sec_in, chr, errp)) { 9767dce4e6fSZhang Chen return; 9777dce4e6fSZhang Chen } 9787dce4e6fSZhang Chen 9795345fdb4SMarc-André Lureau if (find_and_check_chardev(&chr, s->outdev, errp) || 9805345fdb4SMarc-André Lureau !qemu_chr_fe_init(&s->chr_out, chr, errp)) { 9817dce4e6fSZhang Chen return; 9827dce4e6fSZhang Chen } 9837dce4e6fSZhang Chen 984aa3a7032SZhang Chen net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr); 985aa3a7032SZhang Chen net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr); 9867dce4e6fSZhang Chen 987*0ffcece3SZhang Chen QTAILQ_INSERT_TAIL(&net_compares, s, next); 988*0ffcece3SZhang Chen 989b6540d40SZhang Chen g_queue_init(&s->conn_list); 990b6540d40SZhang Chen 991*0ffcece3SZhang Chen qemu_mutex_init(&event_mtx); 992*0ffcece3SZhang Chen qemu_cond_init(&event_complete_cond); 993*0ffcece3SZhang Chen 994b6540d40SZhang Chen s->connection_track_table = g_hash_table_new_full(connection_key_hash, 995b6540d40SZhang Chen connection_key_equal, 996b6540d40SZhang Chen g_free, 997b6540d40SZhang Chen connection_destroy); 99859509ec1SZhang Chen 999dd321ecfSWang Yong colo_compare_iothread(s); 10007dce4e6fSZhang Chen return; 10017dce4e6fSZhang Chen } 10027dce4e6fSZhang Chen 1003dfd917a9Szhanghailiang static void colo_flush_packets(void *opaque, void *user_data) 1004dfd917a9Szhanghailiang { 1005dfd917a9Szhanghailiang CompareState *s = user_data; 1006dfd917a9Szhanghailiang Connection *conn = opaque; 1007dfd917a9Szhanghailiang Packet *pkt = NULL; 1008dfd917a9Szhanghailiang 1009dfd917a9Szhanghailiang while (!g_queue_is_empty(&conn->primary_list)) { 1010dfd917a9Szhanghailiang pkt = g_queue_pop_head(&conn->primary_list); 1011aa3a7032SZhang Chen compare_chr_send(s, 1012aa3a7032SZhang Chen pkt->data, 1013aa3a7032SZhang Chen pkt->size, 1014aa3a7032SZhang Chen pkt->vnet_hdr_len); 1015dfd917a9Szhanghailiang packet_destroy(pkt, NULL); 1016dfd917a9Szhanghailiang } 1017dfd917a9Szhanghailiang while (!g_queue_is_empty(&conn->secondary_list)) { 1018dfd917a9Szhanghailiang pkt = g_queue_pop_head(&conn->secondary_list); 1019dfd917a9Szhanghailiang packet_destroy(pkt, NULL); 1020dfd917a9Szhanghailiang } 1021dfd917a9Szhanghailiang } 1022dfd917a9Szhanghailiang 10237dce4e6fSZhang Chen static void colo_compare_class_init(ObjectClass *oc, void *data) 10247dce4e6fSZhang Chen { 10257dce4e6fSZhang Chen UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 10267dce4e6fSZhang Chen 10277dce4e6fSZhang Chen ucc->complete = colo_compare_complete; 10287dce4e6fSZhang Chen } 10297dce4e6fSZhang Chen 10307dce4e6fSZhang Chen static void colo_compare_init(Object *obj) 10317dce4e6fSZhang Chen { 1032aa3a7032SZhang Chen CompareState *s = COLO_COMPARE(obj); 1033aa3a7032SZhang Chen 10347dce4e6fSZhang Chen object_property_add_str(obj, "primary_in", 10357dce4e6fSZhang Chen compare_get_pri_indev, compare_set_pri_indev, 10367dce4e6fSZhang Chen NULL); 10377dce4e6fSZhang Chen object_property_add_str(obj, "secondary_in", 10387dce4e6fSZhang Chen compare_get_sec_indev, compare_set_sec_indev, 10397dce4e6fSZhang Chen NULL); 10407dce4e6fSZhang Chen object_property_add_str(obj, "outdev", 10417dce4e6fSZhang Chen compare_get_outdev, compare_set_outdev, 10427dce4e6fSZhang Chen NULL); 1043dd321ecfSWang Yong object_property_add_link(obj, "iothread", TYPE_IOTHREAD, 1044dd321ecfSWang Yong (Object **)&s->iothread, 1045dd321ecfSWang Yong object_property_allow_set_link, 1046265b578cSMarc-André Lureau OBJ_PROP_LINK_STRONG, NULL); 1047aa3a7032SZhang Chen 1048aa3a7032SZhang Chen s->vnet_hdr = false; 1049aa3a7032SZhang Chen object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr, 1050aa3a7032SZhang Chen compare_set_vnet_hdr, NULL); 10517dce4e6fSZhang Chen } 10527dce4e6fSZhang Chen 10537dce4e6fSZhang Chen static void colo_compare_finalize(Object *obj) 10547dce4e6fSZhang Chen { 10557dce4e6fSZhang Chen CompareState *s = COLO_COMPARE(obj); 1056*0ffcece3SZhang Chen CompareState *tmp = NULL; 10577dce4e6fSZhang Chen 10581ce2610cSMarc-André Lureau qemu_chr_fe_deinit(&s->chr_pri_in, false); 10591ce2610cSMarc-André Lureau qemu_chr_fe_deinit(&s->chr_sec_in, false); 10601ce2610cSMarc-André Lureau qemu_chr_fe_deinit(&s->chr_out, false); 1061dd321ecfSWang Yong if (s->iothread) { 1062dd321ecfSWang Yong colo_compare_timer_del(s); 1063dd321ecfSWang Yong } 1064*0ffcece3SZhang Chen 1065*0ffcece3SZhang Chen qemu_bh_delete(s->event_bh); 1066*0ffcece3SZhang Chen 1067*0ffcece3SZhang Chen QTAILQ_FOREACH(tmp, &net_compares, next) { 1068*0ffcece3SZhang Chen if (tmp == s) { 1069*0ffcece3SZhang Chen QTAILQ_REMOVE(&net_compares, s, next); 1070*0ffcece3SZhang Chen break; 1071*0ffcece3SZhang Chen } 1072*0ffcece3SZhang Chen } 1073*0ffcece3SZhang Chen 1074dfd917a9Szhanghailiang /* Release all unhandled packets after compare thead exited */ 1075dfd917a9Szhanghailiang g_queue_foreach(&s->conn_list, colo_flush_packets, s); 1076dfd917a9Szhanghailiang 1077727c2d76SZhang Chen g_queue_clear(&s->conn_list); 1078b6540d40SZhang Chen 1079dd321ecfSWang Yong if (s->connection_track_table) { 1080dfd917a9Szhanghailiang g_hash_table_destroy(s->connection_track_table); 1081dd321ecfSWang Yong } 1082dd321ecfSWang Yong 1083dd321ecfSWang Yong if (s->iothread) { 1084dd321ecfSWang Yong object_unref(OBJECT(s->iothread)); 1085dd321ecfSWang Yong } 1086*0ffcece3SZhang Chen 1087*0ffcece3SZhang Chen qemu_mutex_destroy(&event_mtx); 1088*0ffcece3SZhang Chen qemu_cond_destroy(&event_complete_cond); 1089*0ffcece3SZhang Chen 10907dce4e6fSZhang Chen g_free(s->pri_indev); 10917dce4e6fSZhang Chen g_free(s->sec_indev); 10927dce4e6fSZhang Chen g_free(s->outdev); 10937dce4e6fSZhang Chen } 10947dce4e6fSZhang Chen 10957dce4e6fSZhang Chen static const TypeInfo colo_compare_info = { 10967dce4e6fSZhang Chen .name = TYPE_COLO_COMPARE, 10977dce4e6fSZhang Chen .parent = TYPE_OBJECT, 10987dce4e6fSZhang Chen .instance_size = sizeof(CompareState), 10997dce4e6fSZhang Chen .instance_init = colo_compare_init, 11007dce4e6fSZhang Chen .instance_finalize = colo_compare_finalize, 11017dce4e6fSZhang Chen .class_size = sizeof(CompareClass), 11027dce4e6fSZhang Chen .class_init = colo_compare_class_init, 11037dce4e6fSZhang Chen .interfaces = (InterfaceInfo[]) { 11047dce4e6fSZhang Chen { TYPE_USER_CREATABLE }, 11057dce4e6fSZhang Chen { } 11067dce4e6fSZhang Chen } 11077dce4e6fSZhang Chen }; 11087dce4e6fSZhang Chen 11097dce4e6fSZhang Chen static void register_types(void) 11107dce4e6fSZhang Chen { 11117dce4e6fSZhang Chen type_register_static(&colo_compare_info); 11127dce4e6fSZhang Chen } 11137dce4e6fSZhang Chen 11147dce4e6fSZhang Chen type_init(register_types); 1115