1 /* 2 * QEMU network structures definitions and helper functions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "qemu/log.h" 20 #include "net/eth.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 24 void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size, 25 uint16_t vlan_tag, uint16_t vlan_ethtype) 26 { 27 struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); 28 29 memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN); 30 vhdr->h_tci = cpu_to_be16(vlan_tag); 31 vhdr->h_proto = ehdr->h_proto; 32 ehdr->h_proto = cpu_to_be16(vlan_ethtype); 33 *ehdr_size += sizeof(*vhdr); 34 } 35 36 uint8_t 37 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) 38 { 39 uint8_t ecn_state = 0; 40 41 if (l3_proto == ETH_P_IP) { 42 struct ip_header *iphdr = (struct ip_header *) l3_hdr; 43 44 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { 45 if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) { 46 ecn_state = VIRTIO_NET_HDR_GSO_ECN; 47 } 48 if (l4proto == IP_PROTO_TCP) { 49 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state; 50 } else if (l4proto == IP_PROTO_UDP) { 51 return VIRTIO_NET_HDR_GSO_UDP | ecn_state; 52 } 53 } 54 } else if (l3_proto == ETH_P_IPV6) { 55 struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr; 56 57 if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) { 58 ecn_state = VIRTIO_NET_HDR_GSO_ECN; 59 } 60 61 if (l4proto == IP_PROTO_TCP) { 62 return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; 63 } 64 } 65 qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, " 66 "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto); 67 68 return VIRTIO_NET_HDR_GSO_NONE | ecn_state; 69 } 70 71 uint16_t 72 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) 73 { 74 uint16_t proto; 75 size_t copied; 76 size_t size = iov_size(l2hdr_iov, iovcnt); 77 size_t proto_offset = l2hdr_len - sizeof(proto); 78 79 if (size < proto_offset) { 80 return ETH_P_UNKNOWN; 81 } 82 83 copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, 84 &proto, sizeof(proto)); 85 86 return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; 87 } 88 89 static bool 90 _eth_copy_chunk(size_t input_size, 91 const struct iovec *iov, int iovcnt, 92 size_t offset, size_t length, 93 void *buffer) 94 { 95 size_t copied; 96 97 if (input_size < offset) { 98 return false; 99 } 100 101 copied = iov_to_buf(iov, iovcnt, offset, buffer, length); 102 103 if (copied < length) { 104 return false; 105 } 106 107 return true; 108 } 109 110 static bool 111 _eth_tcp_has_data(bool is_ip4, 112 const struct ip_header *ip4_hdr, 113 const struct ip6_header *ip6_hdr, 114 size_t full_ip6hdr_len, 115 const struct tcp_header *tcp) 116 { 117 uint32_t l4len; 118 119 if (is_ip4) { 120 l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); 121 } else { 122 size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 123 l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; 124 } 125 126 return l4len > TCP_HEADER_DATA_OFFSET(tcp); 127 } 128 129 void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff, 130 bool *hasip4, bool *hasip6, 131 size_t *l3hdr_off, 132 size_t *l4hdr_off, 133 size_t *l5hdr_off, 134 eth_ip6_hdr_info *ip6hdr_info, 135 eth_ip4_hdr_info *ip4hdr_info, 136 eth_l4_hdr_info *l4hdr_info) 137 { 138 int proto; 139 bool fragment = false; 140 size_t input_size = iov_size(iov, iovcnt); 141 size_t copied; 142 uint8_t ip_p; 143 144 *hasip4 = *hasip6 = false; 145 *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff); 146 l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID; 147 148 proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off); 149 150 if (proto == ETH_P_IP) { 151 struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; 152 153 if (input_size < *l3hdr_off) { 154 return; 155 } 156 157 copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr)); 158 if (copied < sizeof(*iphdr) || 159 IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) { 160 return; 161 } 162 163 *hasip4 = true; 164 ip_p = iphdr->ip_p; 165 ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); 166 *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr); 167 168 fragment = ip4hdr_info->fragment; 169 } else if (proto == ETH_P_IPV6) { 170 if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) { 171 return; 172 } 173 174 *hasip6 = true; 175 ip_p = ip6hdr_info->l4proto; 176 *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len; 177 fragment = ip6hdr_info->fragment; 178 } else { 179 return; 180 } 181 182 if (fragment) { 183 return; 184 } 185 186 switch (ip_p) { 187 case IP_PROTO_TCP: 188 if (_eth_copy_chunk(input_size, 189 iov, iovcnt, 190 *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), 191 &l4hdr_info->hdr.tcp)) { 192 l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP; 193 *l5hdr_off = *l4hdr_off + 194 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); 195 196 l4hdr_info->has_tcp_data = 197 _eth_tcp_has_data(proto == ETH_P_IP, 198 &ip4hdr_info->ip4_hdr, 199 &ip6hdr_info->ip6_hdr, 200 *l4hdr_off - *l3hdr_off, 201 &l4hdr_info->hdr.tcp); 202 } 203 break; 204 205 case IP_PROTO_UDP: 206 if (_eth_copy_chunk(input_size, 207 iov, iovcnt, 208 *l4hdr_off, sizeof(l4hdr_info->hdr.udp), 209 &l4hdr_info->hdr.udp)) { 210 l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP; 211 *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); 212 } 213 break; 214 215 case IP_PROTO_SCTP: 216 l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP; 217 break; 218 } 219 } 220 221 size_t 222 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, 223 void *new_ehdr_buf, 224 uint16_t *payload_offset, uint16_t *tci) 225 { 226 struct vlan_header vlan_hdr; 227 struct eth_header *new_ehdr = new_ehdr_buf; 228 229 size_t copied = iov_to_buf(iov, iovcnt, iovoff, 230 new_ehdr, sizeof(*new_ehdr)); 231 232 if (copied < sizeof(*new_ehdr)) { 233 return 0; 234 } 235 236 switch (be16_to_cpu(new_ehdr->h_proto)) { 237 case ETH_P_VLAN: 238 case ETH_P_DVLAN: 239 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), 240 &vlan_hdr, sizeof(vlan_hdr)); 241 242 if (copied < sizeof(vlan_hdr)) { 243 return 0; 244 } 245 246 new_ehdr->h_proto = vlan_hdr.h_proto; 247 248 *tci = be16_to_cpu(vlan_hdr.h_tci); 249 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); 250 251 if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { 252 253 copied = iov_to_buf(iov, iovcnt, *payload_offset, 254 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); 255 256 if (copied < sizeof(vlan_hdr)) { 257 return 0; 258 } 259 260 *payload_offset += sizeof(vlan_hdr); 261 262 return sizeof(struct eth_header) + sizeof(struct vlan_header); 263 } else { 264 return sizeof(struct eth_header); 265 } 266 default: 267 return 0; 268 } 269 } 270 271 size_t 272 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index, 273 uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf, 274 uint16_t *payload_offset, uint16_t *tci) 275 { 276 struct vlan_header vlan_hdr; 277 uint16_t *new_ehdr_proto; 278 size_t new_ehdr_size; 279 size_t copied; 280 281 switch (index) { 282 case 0: 283 new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto; 284 new_ehdr_size = sizeof(struct eth_header); 285 copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size); 286 break; 287 288 case 1: 289 new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto; 290 new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header); 291 copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size); 292 if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) { 293 return 0; 294 } 295 break; 296 297 default: 298 return 0; 299 } 300 301 if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) { 302 return 0; 303 } 304 305 copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size, 306 &vlan_hdr, sizeof(vlan_hdr)); 307 if (copied < sizeof(vlan_hdr)) { 308 return 0; 309 } 310 311 *new_ehdr_proto = vlan_hdr.h_proto; 312 *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr); 313 *tci = be16_to_cpu(vlan_hdr.h_tci); 314 315 return new_ehdr_size; 316 } 317 318 void 319 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) 320 { 321 struct ip_header *iphdr = (struct ip_header *) l3hdr; 322 iphdr->ip_sum = 0; 323 iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len)); 324 } 325 326 uint32_t 327 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, 328 uint16_t csl, 329 uint32_t *cso) 330 { 331 struct ip_pseudo_header ipph; 332 ipph.ip_src = iphdr->ip_src; 333 ipph.ip_dst = iphdr->ip_dst; 334 ipph.ip_payload = cpu_to_be16(csl); 335 ipph.ip_proto = iphdr->ip_p; 336 ipph.zeros = 0; 337 *cso = sizeof(ipph); 338 return net_checksum_add(*cso, (uint8_t *) &ipph); 339 } 340 341 uint32_t 342 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, 343 uint16_t csl, 344 uint8_t l4_proto, 345 uint32_t *cso) 346 { 347 struct ip6_pseudo_header ipph; 348 ipph.ip6_src = iphdr->ip6_src; 349 ipph.ip6_dst = iphdr->ip6_dst; 350 ipph.len = cpu_to_be16(csl); 351 ipph.zero[0] = 0; 352 ipph.zero[1] = 0; 353 ipph.zero[2] = 0; 354 ipph.next_hdr = l4_proto; 355 *cso = sizeof(ipph); 356 return net_checksum_add(*cso, (uint8_t *)&ipph); 357 } 358 359 static bool 360 eth_is_ip6_extension_header_type(uint8_t hdr_type) 361 { 362 switch (hdr_type) { 363 case IP6_HOP_BY_HOP: 364 case IP6_ROUTING: 365 case IP6_FRAGMENT: 366 case IP6_AUTHENTICATION: 367 case IP6_DESTINATON: 368 case IP6_MOBILITY: 369 return true; 370 default: 371 return false; 372 } 373 } 374 375 static bool 376 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, 377 size_t ext_hdr_offset, 378 struct ip6_ext_hdr *ext_hdr, 379 struct in6_address *dst_addr) 380 { 381 struct ip6_ext_hdr_routing rt_hdr; 382 size_t input_size = iov_size(pkt, pkt_frags); 383 size_t bytes_read; 384 385 if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) { 386 return false; 387 } 388 389 bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset, 390 &rt_hdr, sizeof(rt_hdr)); 391 assert(bytes_read == sizeof(rt_hdr)); 392 if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) { 393 return false; 394 } 395 bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr), 396 dst_addr, sizeof(*dst_addr)); 397 assert(bytes_read == sizeof(*dst_addr)); 398 399 return true; 400 } 401 402 static bool 403 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags, 404 size_t dsthdr_offset, 405 struct ip6_ext_hdr *ext_hdr, 406 struct in6_address *src_addr) 407 { 408 size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); 409 struct ip6_option_hdr opthdr; 410 size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); 411 412 while (bytes_left > sizeof(opthdr)) { 413 size_t input_size = iov_size(pkt, pkt_frags); 414 size_t bytes_read, optlen; 415 416 if (input_size < opt_offset) { 417 return false; 418 } 419 420 bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, 421 &opthdr, sizeof(opthdr)); 422 423 if (bytes_read != sizeof(opthdr)) { 424 return false; 425 } 426 427 optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 428 : (opthdr.len + sizeof(opthdr)); 429 430 if (optlen > bytes_left) { 431 return false; 432 } 433 434 if (opthdr.type == IP6_OPT_HOME) { 435 size_t input_size = iov_size(pkt, pkt_frags); 436 437 if (input_size < opt_offset + sizeof(opthdr)) { 438 return false; 439 } 440 441 bytes_read = iov_to_buf(pkt, pkt_frags, 442 opt_offset + sizeof(opthdr), 443 src_addr, sizeof(*src_addr)); 444 445 return bytes_read == sizeof(*src_addr); 446 } 447 448 opt_offset += optlen; 449 bytes_left -= optlen; 450 } 451 452 return false; 453 } 454 455 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, 456 size_t ip6hdr_off, eth_ip6_hdr_info *info) 457 { 458 struct ip6_ext_hdr ext_hdr; 459 size_t bytes_read; 460 uint8_t curr_ext_hdr_type; 461 size_t input_size = iov_size(pkt, pkt_frags); 462 463 info->rss_ex_dst_valid = false; 464 info->rss_ex_src_valid = false; 465 info->fragment = false; 466 467 if (input_size < ip6hdr_off) { 468 return false; 469 } 470 471 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, 472 &info->ip6_hdr, sizeof(info->ip6_hdr)); 473 if (bytes_read < sizeof(info->ip6_hdr)) { 474 return false; 475 } 476 477 info->full_hdr_len = sizeof(struct ip6_header); 478 479 curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; 480 481 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { 482 info->l4proto = info->ip6_hdr.ip6_nxt; 483 info->has_ext_hdrs = false; 484 return true; 485 } 486 487 info->has_ext_hdrs = true; 488 489 do { 490 if (input_size < ip6hdr_off + info->full_hdr_len) { 491 return false; 492 } 493 494 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, 495 &ext_hdr, sizeof(ext_hdr)); 496 497 if (bytes_read < sizeof(ext_hdr)) { 498 return false; 499 } 500 501 if (curr_ext_hdr_type == IP6_ROUTING) { 502 if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) { 503 info->rss_ex_dst_valid = 504 _eth_get_rss_ex_dst_addr(pkt, pkt_frags, 505 ip6hdr_off + info->full_hdr_len, 506 &ext_hdr, &info->rss_ex_dst); 507 } 508 } else if (curr_ext_hdr_type == IP6_DESTINATON) { 509 info->rss_ex_src_valid = 510 _eth_get_rss_ex_src_addr(pkt, pkt_frags, 511 ip6hdr_off + info->full_hdr_len, 512 &ext_hdr, &info->rss_ex_src); 513 } else if (curr_ext_hdr_type == IP6_FRAGMENT) { 514 info->fragment = true; 515 } 516 517 info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; 518 curr_ext_hdr_type = ext_hdr.ip6r_nxt; 519 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); 520 521 info->l4proto = ext_hdr.ip6r_nxt; 522 return true; 523 } 524 525 bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen, 526 const void *pkt, size_t pkt_size) 527 { 528 assert(padded_buflen && *padded_buflen >= ETH_ZLEN); 529 530 if (pkt_size >= ETH_ZLEN) { 531 return false; 532 } 533 534 /* pad to minimum Ethernet frame length */ 535 memcpy(padded_pkt, pkt, pkt_size); 536 memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size); 537 *padded_buflen = ETH_ZLEN; 538 539 return true; 540 } 541