1 /* 2 * QEMU network structures definitions and helper functions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "net/eth.h" 20 #include "net/checksum.h" 21 #include "qemu-common.h" 22 #include "net/tap.h" 23 24 void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, 25 uint16_t vlan_ethtype, bool *is_new) 26 { 27 struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); 28 29 switch (be16_to_cpu(ehdr->h_proto)) { 30 case ETH_P_VLAN: 31 case ETH_P_DVLAN: 32 /* vlan hdr exists */ 33 *is_new = false; 34 break; 35 36 default: 37 /* No VLAN header, put a new one */ 38 vhdr->h_proto = ehdr->h_proto; 39 ehdr->h_proto = cpu_to_be16(vlan_ethtype); 40 *is_new = true; 41 break; 42 } 43 vhdr->h_tci = cpu_to_be16(vlan_tag); 44 } 45 46 uint8_t 47 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) 48 { 49 uint8_t ecn_state = 0; 50 51 if (l3_proto == ETH_P_IP) { 52 struct ip_header *iphdr = (struct ip_header *) l3_hdr; 53 54 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { 55 if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) { 56 ecn_state = VIRTIO_NET_HDR_GSO_ECN; 57 } 58 if (l4proto == IP_PROTO_TCP) { 59 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state; 60 } else if (l4proto == IP_PROTO_UDP) { 61 return VIRTIO_NET_HDR_GSO_UDP | ecn_state; 62 } 63 } 64 } else if (l3_proto == ETH_P_IPV6) { 65 struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr; 66 67 if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) { 68 ecn_state = VIRTIO_NET_HDR_GSO_ECN; 69 } 70 71 if (l4proto == IP_PROTO_TCP) { 72 return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; 73 } 74 } 75 76 /* Unsupported offload */ 77 g_assert_not_reached(); 78 79 return VIRTIO_NET_HDR_GSO_NONE | ecn_state; 80 } 81 82 uint16_t 83 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) 84 { 85 uint16_t proto; 86 size_t copied; 87 size_t size = iov_size(l2hdr_iov, iovcnt); 88 size_t proto_offset = l2hdr_len - sizeof(proto); 89 90 if (size < proto_offset) { 91 return ETH_P_UNKNOWN; 92 } 93 94 copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, 95 &proto, sizeof(proto)); 96 97 return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; 98 } 99 100 static bool 101 _eth_copy_chunk(size_t input_size, 102 const struct iovec *iov, int iovcnt, 103 size_t offset, size_t length, 104 void *buffer) 105 { 106 size_t copied; 107 108 if (input_size < offset) { 109 return false; 110 } 111 112 copied = iov_to_buf(iov, iovcnt, offset, buffer, length); 113 114 if (copied < length) { 115 return false; 116 } 117 118 return true; 119 } 120 121 static bool 122 _eth_tcp_has_data(bool is_ip4, 123 const struct ip_header *ip4_hdr, 124 const struct ip6_header *ip6_hdr, 125 size_t full_ip6hdr_len, 126 const struct tcp_header *tcp) 127 { 128 uint32_t l4len; 129 130 if (is_ip4) { 131 l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); 132 } else { 133 size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 134 l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; 135 } 136 137 return l4len > TCP_HEADER_DATA_OFFSET(tcp); 138 } 139 140 void eth_get_protocols(const struct iovec *iov, int iovcnt, 141 bool *isip4, bool *isip6, 142 bool *isudp, bool *istcp, 143 size_t *l3hdr_off, 144 size_t *l4hdr_off, 145 size_t *l5hdr_off, 146 eth_ip6_hdr_info *ip6hdr_info, 147 eth_ip4_hdr_info *ip4hdr_info, 148 eth_l4_hdr_info *l4hdr_info) 149 { 150 int proto; 151 bool fragment = false; 152 size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); 153 size_t input_size = iov_size(iov, iovcnt); 154 size_t copied; 155 156 *isip4 = *isip6 = *isudp = *istcp = false; 157 158 proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); 159 160 *l3hdr_off = l2hdr_len; 161 162 if (proto == ETH_P_IP) { 163 struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; 164 165 if (input_size < l2hdr_len) { 166 return; 167 } 168 169 copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); 170 171 *isip4 = true; 172 173 if (copied < sizeof(*iphdr)) { 174 return; 175 } 176 177 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { 178 if (iphdr->ip_p == IP_PROTO_TCP) { 179 *istcp = true; 180 } else if (iphdr->ip_p == IP_PROTO_UDP) { 181 *isudp = true; 182 } 183 } 184 185 ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); 186 *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); 187 188 fragment = ip4hdr_info->fragment; 189 } else if (proto == ETH_P_IPV6) { 190 191 *isip6 = true; 192 if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, 193 ip6hdr_info)) { 194 if (ip6hdr_info->l4proto == IP_PROTO_TCP) { 195 *istcp = true; 196 } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) { 197 *isudp = true; 198 } 199 } else { 200 return; 201 } 202 203 *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; 204 fragment = ip6hdr_info->fragment; 205 } 206 207 if (!fragment) { 208 if (*istcp) { 209 *istcp = _eth_copy_chunk(input_size, 210 iov, iovcnt, 211 *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), 212 &l4hdr_info->hdr.tcp); 213 214 if (*istcp) { 215 *l5hdr_off = *l4hdr_off + 216 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); 217 218 l4hdr_info->has_tcp_data = 219 _eth_tcp_has_data(proto == ETH_P_IP, 220 &ip4hdr_info->ip4_hdr, 221 &ip6hdr_info->ip6_hdr, 222 *l4hdr_off - *l3hdr_off, 223 &l4hdr_info->hdr.tcp); 224 } 225 } else if (*isudp) { 226 *isudp = _eth_copy_chunk(input_size, 227 iov, iovcnt, 228 *l4hdr_off, sizeof(l4hdr_info->hdr.udp), 229 &l4hdr_info->hdr.udp); 230 *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); 231 } 232 } 233 } 234 235 size_t 236 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, 237 uint8_t *new_ehdr_buf, 238 uint16_t *payload_offset, uint16_t *tci) 239 { 240 struct vlan_header vlan_hdr; 241 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; 242 243 size_t copied = iov_to_buf(iov, iovcnt, iovoff, 244 new_ehdr, sizeof(*new_ehdr)); 245 246 if (copied < sizeof(*new_ehdr)) { 247 return 0; 248 } 249 250 switch (be16_to_cpu(new_ehdr->h_proto)) { 251 case ETH_P_VLAN: 252 case ETH_P_DVLAN: 253 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), 254 &vlan_hdr, sizeof(vlan_hdr)); 255 256 if (copied < sizeof(vlan_hdr)) { 257 return 0; 258 } 259 260 new_ehdr->h_proto = vlan_hdr.h_proto; 261 262 *tci = be16_to_cpu(vlan_hdr.h_tci); 263 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); 264 265 if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { 266 267 copied = iov_to_buf(iov, iovcnt, *payload_offset, 268 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); 269 270 if (copied < sizeof(vlan_hdr)) { 271 return 0; 272 } 273 274 *payload_offset += sizeof(vlan_hdr); 275 276 return sizeof(struct eth_header) + sizeof(struct vlan_header); 277 } else { 278 return sizeof(struct eth_header); 279 } 280 default: 281 return 0; 282 } 283 } 284 285 size_t 286 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, 287 uint16_t vet, uint8_t *new_ehdr_buf, 288 uint16_t *payload_offset, uint16_t *tci) 289 { 290 struct vlan_header vlan_hdr; 291 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; 292 293 size_t copied = iov_to_buf(iov, iovcnt, iovoff, 294 new_ehdr, sizeof(*new_ehdr)); 295 296 if (copied < sizeof(*new_ehdr)) { 297 return 0; 298 } 299 300 if (be16_to_cpu(new_ehdr->h_proto) == vet) { 301 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), 302 &vlan_hdr, sizeof(vlan_hdr)); 303 304 if (copied < sizeof(vlan_hdr)) { 305 return 0; 306 } 307 308 new_ehdr->h_proto = vlan_hdr.h_proto; 309 310 *tci = be16_to_cpu(vlan_hdr.h_tci); 311 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); 312 return sizeof(struct eth_header); 313 } 314 315 return 0; 316 } 317 318 void 319 eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, 320 void *l3hdr, size_t l3hdr_len, 321 size_t l3payload_len, 322 size_t frag_offset, bool more_frags) 323 { 324 const struct iovec l2vec = { 325 .iov_base = (void *) l2hdr, 326 .iov_len = l2hdr_len 327 }; 328 329 if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) { 330 uint16_t orig_flags; 331 struct ip_header *iphdr = (struct ip_header *) l3hdr; 332 uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; 333 uint16_t new_ip_off; 334 335 assert(frag_offset % IP_FRAG_UNIT_SIZE == 0); 336 assert((frag_off_units & ~IP_OFFMASK) == 0); 337 338 orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF); 339 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0); 340 iphdr->ip_off = cpu_to_be16(new_ip_off); 341 iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len); 342 } 343 } 344 345 void 346 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) 347 { 348 struct ip_header *iphdr = (struct ip_header *) l3hdr; 349 iphdr->ip_sum = 0; 350 iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len)); 351 } 352 353 uint32_t 354 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, 355 uint16_t csl, 356 uint32_t *cso) 357 { 358 struct ip_pseudo_header ipph; 359 ipph.ip_src = iphdr->ip_src; 360 ipph.ip_dst = iphdr->ip_dst; 361 ipph.ip_payload = cpu_to_be16(csl); 362 ipph.ip_proto = iphdr->ip_p; 363 ipph.zeros = 0; 364 *cso = sizeof(ipph); 365 return net_checksum_add(*cso, (uint8_t *) &ipph); 366 } 367 368 uint32_t 369 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, 370 uint16_t csl, 371 uint8_t l4_proto, 372 uint32_t *cso) 373 { 374 struct ip6_pseudo_header ipph; 375 ipph.ip6_src = iphdr->ip6_src; 376 ipph.ip6_dst = iphdr->ip6_dst; 377 ipph.len = cpu_to_be16(csl); 378 ipph.zero[0] = 0; 379 ipph.zero[1] = 0; 380 ipph.zero[2] = 0; 381 ipph.next_hdr = l4_proto; 382 *cso = sizeof(ipph); 383 return net_checksum_add(*cso, (uint8_t *)&ipph); 384 } 385 386 static bool 387 eth_is_ip6_extension_header_type(uint8_t hdr_type) 388 { 389 switch (hdr_type) { 390 case IP6_HOP_BY_HOP: 391 case IP6_ROUTING: 392 case IP6_FRAGMENT: 393 case IP6_ESP: 394 case IP6_AUTHENTICATION: 395 case IP6_DESTINATON: 396 case IP6_MOBILITY: 397 return true; 398 default: 399 return false; 400 } 401 } 402 403 static bool 404 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, 405 size_t rthdr_offset, 406 struct ip6_ext_hdr *ext_hdr, 407 struct in6_address *dst_addr) 408 { 409 struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; 410 411 if ((rthdr->rtype == 2) && 412 (rthdr->len == sizeof(struct in6_address) / 8) && 413 (rthdr->segleft == 1)) { 414 415 size_t input_size = iov_size(pkt, pkt_frags); 416 size_t bytes_read; 417 418 if (input_size < rthdr_offset + sizeof(*ext_hdr)) { 419 return false; 420 } 421 422 bytes_read = iov_to_buf(pkt, pkt_frags, 423 rthdr_offset + sizeof(*ext_hdr), 424 dst_addr, sizeof(*dst_addr)); 425 426 return bytes_read == sizeof(dst_addr); 427 } 428 429 return false; 430 } 431 432 static bool 433 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags, 434 size_t dsthdr_offset, 435 struct ip6_ext_hdr *ext_hdr, 436 struct in6_address *src_addr) 437 { 438 size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); 439 struct ip6_option_hdr opthdr; 440 size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); 441 442 while (bytes_left > sizeof(opthdr)) { 443 size_t input_size = iov_size(pkt, pkt_frags); 444 size_t bytes_read, optlen; 445 446 if (input_size < opt_offset) { 447 return false; 448 } 449 450 bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, 451 &opthdr, sizeof(opthdr)); 452 453 if (bytes_read != sizeof(opthdr)) { 454 return false; 455 } 456 457 optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 458 : (opthdr.len + sizeof(opthdr)); 459 460 if (optlen > bytes_left) { 461 return false; 462 } 463 464 if (opthdr.type == IP6_OPT_HOME) { 465 size_t input_size = iov_size(pkt, pkt_frags); 466 467 if (input_size < opt_offset + sizeof(opthdr)) { 468 return false; 469 } 470 471 bytes_read = iov_to_buf(pkt, pkt_frags, 472 opt_offset + sizeof(opthdr), 473 src_addr, sizeof(*src_addr)); 474 475 return bytes_read == sizeof(src_addr); 476 } 477 478 opt_offset += optlen; 479 bytes_left -= optlen; 480 } 481 482 return false; 483 } 484 485 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, 486 size_t ip6hdr_off, eth_ip6_hdr_info *info) 487 { 488 struct ip6_ext_hdr ext_hdr; 489 size_t bytes_read; 490 uint8_t curr_ext_hdr_type; 491 size_t input_size = iov_size(pkt, pkt_frags); 492 493 info->rss_ex_dst_valid = false; 494 info->rss_ex_src_valid = false; 495 info->fragment = false; 496 497 if (input_size < ip6hdr_off) { 498 return false; 499 } 500 501 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, 502 &info->ip6_hdr, sizeof(info->ip6_hdr)); 503 if (bytes_read < sizeof(info->ip6_hdr)) { 504 return false; 505 } 506 507 info->full_hdr_len = sizeof(struct ip6_header); 508 509 curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; 510 511 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { 512 info->l4proto = info->ip6_hdr.ip6_nxt; 513 info->has_ext_hdrs = false; 514 return true; 515 } 516 517 info->has_ext_hdrs = true; 518 519 do { 520 if (input_size < ip6hdr_off + info->full_hdr_len) { 521 return false; 522 } 523 524 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, 525 &ext_hdr, sizeof(ext_hdr)); 526 527 if (bytes_read < sizeof(ext_hdr)) { 528 return false; 529 } 530 531 if (curr_ext_hdr_type == IP6_ROUTING) { 532 info->rss_ex_dst_valid = 533 _eth_get_rss_ex_dst_addr(pkt, pkt_frags, 534 ip6hdr_off + info->full_hdr_len, 535 &ext_hdr, &info->rss_ex_dst); 536 } else if (curr_ext_hdr_type == IP6_DESTINATON) { 537 info->rss_ex_src_valid = 538 _eth_get_rss_ex_src_addr(pkt, pkt_frags, 539 ip6hdr_off + info->full_hdr_len, 540 &ext_hdr, &info->rss_ex_src); 541 } else if (curr_ext_hdr_type == IP6_FRAGMENT) { 542 info->fragment = true; 543 } 544 545 info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; 546 curr_ext_hdr_type = ext_hdr.ip6r_nxt; 547 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); 548 549 info->l4proto = ext_hdr.ip6r_nxt; 550 return true; 551 } 552