1 /* 2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) 3 * (a.k.a. Fault Tolerance or Continuous Replication) 4 * 5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. 6 * Copyright (c) 2016 FUJITSU LIMITED 7 * Copyright (c) 2016 Intel Corporation 8 * 9 * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2 or 12 * later. See the COPYING file in the top-level directory. 13 */ 14 15 #include "qemu/osdep.h" 16 #include "trace.h" 17 #include "colo.h" 18 #include "util.h" 19 20 uint32_t connection_key_hash(const void *opaque) 21 { 22 const ConnectionKey *key = opaque; 23 uint32_t a, b, c; 24 25 /* Jenkins hash */ 26 a = b = c = JHASH_INITVAL + sizeof(*key); 27 a += key->src.s_addr; 28 b += key->dst.s_addr; 29 c += (key->src_port | key->dst_port << 16); 30 __jhash_mix(a, b, c); 31 32 a += key->ip_proto; 33 __jhash_final(a, b, c); 34 35 return c; 36 } 37 38 int connection_key_equal(const void *key1, const void *key2) 39 { 40 return memcmp(key1, key2, sizeof(ConnectionKey)) == 0; 41 } 42 43 int parse_packet_early(Packet *pkt) 44 { 45 int network_length; 46 static const uint8_t vlan[] = {0x81, 0x00}; 47 uint8_t *data = pkt->data + pkt->vnet_hdr_len; 48 uint16_t l3_proto; 49 ssize_t l2hdr_len = eth_get_l2_hdr_length(data); 50 51 if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) { 52 trace_colo_proxy_main("pkt->size < ETH_HLEN"); 53 return 1; 54 } 55 56 /* 57 * TODO: support vlan. 58 */ 59 if (!memcmp(&data[12], vlan, sizeof(vlan))) { 60 trace_colo_proxy_main("COLO-proxy don't support vlan"); 61 return 1; 62 } 63 64 pkt->network_header = data + l2hdr_len; 65 66 const struct iovec l2vec = { 67 .iov_base = (void *) data, 68 .iov_len = l2hdr_len 69 }; 70 l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len); 71 72 if (l3_proto != ETH_P_IP) { 73 return 1; 74 } 75 76 network_length = pkt->ip->ip_hl * 4; 77 if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) { 78 trace_colo_proxy_main("pkt->size < network_header + network_length"); 79 return 1; 80 } 81 pkt->transport_header = pkt->network_header + network_length; 82 83 return 0; 84 } 85 86 void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key, 87 Packet *pkt, bool reverse) 88 { 89 if (reverse) { 90 key->src = pkt->ip->ip_dst; 91 key->dst = pkt->ip->ip_src; 92 key->src_port = ntohs(tmp_ports & 0xffff); 93 key->dst_port = ntohs(tmp_ports >> 16); 94 } else { 95 key->src = pkt->ip->ip_src; 96 key->dst = pkt->ip->ip_dst; 97 key->src_port = ntohs(tmp_ports >> 16); 98 key->dst_port = ntohs(tmp_ports & 0xffff); 99 } 100 } 101 102 void fill_connection_key(Packet *pkt, ConnectionKey *key, bool reverse) 103 { 104 uint32_t tmp_ports = 0; 105 106 key->ip_proto = pkt->ip->ip_p; 107 108 switch (key->ip_proto) { 109 case IPPROTO_TCP: 110 case IPPROTO_UDP: 111 case IPPROTO_DCCP: 112 case IPPROTO_ESP: 113 case IPPROTO_SCTP: 114 case IPPROTO_UDPLITE: 115 tmp_ports = *(uint32_t *)(pkt->transport_header); 116 break; 117 case IPPROTO_AH: 118 tmp_ports = *(uint32_t *)(pkt->transport_header + 4); 119 break; 120 default: 121 break; 122 } 123 124 extract_ip_and_port(tmp_ports, key, pkt, reverse); 125 } 126 127 Connection *connection_new(ConnectionKey *key) 128 { 129 Connection *conn = g_slice_new0(Connection); 130 131 conn->ip_proto = key->ip_proto; 132 conn->processing = false; 133 conn->tcp_state = TCPS_CLOSED; 134 g_queue_init(&conn->primary_list); 135 g_queue_init(&conn->secondary_list); 136 137 return conn; 138 } 139 140 void connection_destroy(void *opaque) 141 { 142 Connection *conn = opaque; 143 144 g_queue_foreach(&conn->primary_list, packet_destroy, NULL); 145 g_queue_clear(&conn->primary_list); 146 g_queue_foreach(&conn->secondary_list, packet_destroy, NULL); 147 g_queue_clear(&conn->secondary_list); 148 g_slice_free(Connection, conn); 149 } 150 151 Packet *packet_new(const void *data, int size, int vnet_hdr_len) 152 { 153 Packet *pkt = g_slice_new0(Packet); 154 155 pkt->data = g_memdup(data, size); 156 pkt->size = size; 157 pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); 158 pkt->vnet_hdr_len = vnet_hdr_len; 159 160 return pkt; 161 } 162 163 /* 164 * packet_new_nocopy will not copy data, so the caller can't release 165 * the data. And it will be released in packet_destroy. 166 */ 167 Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len) 168 { 169 Packet *pkt = g_slice_new0(Packet); 170 171 pkt->data = data; 172 pkt->size = size; 173 pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); 174 pkt->vnet_hdr_len = vnet_hdr_len; 175 176 return pkt; 177 } 178 179 void packet_destroy(void *opaque, void *user_data) 180 { 181 Packet *pkt = opaque; 182 183 g_free(pkt->data); 184 g_slice_free(Packet, pkt); 185 } 186 187 void packet_destroy_partial(void *opaque, void *user_data) 188 { 189 Packet *pkt = opaque; 190 191 g_slice_free(Packet, pkt); 192 } 193 194 /* 195 * Clear hashtable, stop this hash growing really huge 196 */ 197 void connection_hashtable_reset(GHashTable *connection_track_table) 198 { 199 g_hash_table_remove_all(connection_track_table); 200 } 201 202 /* if not found, create a new connection and add to hash table */ 203 Connection *connection_get(GHashTable *connection_track_table, 204 ConnectionKey *key, 205 GQueue *conn_list) 206 { 207 Connection *conn = g_hash_table_lookup(connection_track_table, key); 208 209 if (conn == NULL) { 210 ConnectionKey *new_key = g_memdup(key, sizeof(*key)); 211 212 conn = connection_new(key); 213 214 if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) { 215 trace_colo_proxy_main("colo proxy connection hashtable full," 216 " clear it"); 217 connection_hashtable_reset(connection_track_table); 218 /* 219 * clear the conn_list 220 */ 221 while (!g_queue_is_empty(conn_list)) { 222 connection_destroy(g_queue_pop_head(conn_list)); 223 } 224 } 225 226 g_hash_table_insert(connection_track_table, new_key, conn); 227 } 228 229 return conn; 230 } 231 232 bool connection_has_tracked(GHashTable *connection_track_table, 233 ConnectionKey *key) 234 { 235 Connection *conn = g_hash_table_lookup(connection_track_table, key); 236 237 return conn ? true : false; 238 } 239