1 /* 2 * QEMU network structures definitions and helper functions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "net/eth.h" 20 #include "net/checksum.h" 21 #include "net/tap.h" 22 23 void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, 24 uint16_t vlan_ethtype, bool *is_new) 25 { 26 struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); 27 28 switch (be16_to_cpu(ehdr->h_proto)) { 29 case ETH_P_VLAN: 30 case ETH_P_DVLAN: 31 /* vlan hdr exists */ 32 *is_new = false; 33 break; 34 35 default: 36 /* No VLAN header, put a new one */ 37 vhdr->h_proto = ehdr->h_proto; 38 ehdr->h_proto = cpu_to_be16(vlan_ethtype); 39 *is_new = true; 40 break; 41 } 42 vhdr->h_tci = cpu_to_be16(vlan_tag); 43 } 44 45 uint8_t 46 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) 47 { 48 uint8_t ecn_state = 0; 49 50 if (l3_proto == ETH_P_IP) { 51 struct ip_header *iphdr = (struct ip_header *) l3_hdr; 52 53 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { 54 if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) { 55 ecn_state = VIRTIO_NET_HDR_GSO_ECN; 56 } 57 if (l4proto == IP_PROTO_TCP) { 58 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state; 59 } else if (l4proto == IP_PROTO_UDP) { 60 return VIRTIO_NET_HDR_GSO_UDP | ecn_state; 61 } 62 } 63 } else if (l3_proto == ETH_P_IPV6) { 64 struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr; 65 66 if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) { 67 ecn_state = VIRTIO_NET_HDR_GSO_ECN; 68 } 69 70 if (l4proto == IP_PROTO_TCP) { 71 return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; 72 } 73 } 74 75 /* Unsupported offload */ 76 g_assert_not_reached(); 77 78 return VIRTIO_NET_HDR_GSO_NONE | ecn_state; 79 } 80 81 uint16_t 82 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) 83 { 84 uint16_t proto; 85 size_t copied; 86 size_t size = iov_size(l2hdr_iov, iovcnt); 87 size_t proto_offset = l2hdr_len - sizeof(proto); 88 89 if (size < proto_offset) { 90 return ETH_P_UNKNOWN; 91 } 92 93 copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, 94 &proto, sizeof(proto)); 95 96 return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; 97 } 98 99 static bool 100 _eth_copy_chunk(size_t input_size, 101 const struct iovec *iov, int iovcnt, 102 size_t offset, size_t length, 103 void *buffer) 104 { 105 size_t copied; 106 107 if (input_size < offset) { 108 return false; 109 } 110 111 copied = iov_to_buf(iov, iovcnt, offset, buffer, length); 112 113 if (copied < length) { 114 return false; 115 } 116 117 return true; 118 } 119 120 static bool 121 _eth_tcp_has_data(bool is_ip4, 122 const struct ip_header *ip4_hdr, 123 const struct ip6_header *ip6_hdr, 124 size_t full_ip6hdr_len, 125 const struct tcp_header *tcp) 126 { 127 uint32_t l4len; 128 129 if (is_ip4) { 130 l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); 131 } else { 132 size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 133 l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; 134 } 135 136 return l4len > TCP_HEADER_DATA_OFFSET(tcp); 137 } 138 139 void eth_get_protocols(const struct iovec *iov, int iovcnt, 140 bool *isip4, bool *isip6, 141 bool *isudp, bool *istcp, 142 size_t *l3hdr_off, 143 size_t *l4hdr_off, 144 size_t *l5hdr_off, 145 eth_ip6_hdr_info *ip6hdr_info, 146 eth_ip4_hdr_info *ip4hdr_info, 147 eth_l4_hdr_info *l4hdr_info) 148 { 149 int proto; 150 bool fragment = false; 151 size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); 152 size_t input_size = iov_size(iov, iovcnt); 153 size_t copied; 154 155 *isip4 = *isip6 = *isudp = *istcp = false; 156 157 proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); 158 159 *l3hdr_off = l2hdr_len; 160 161 if (proto == ETH_P_IP) { 162 struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; 163 164 if (input_size < l2hdr_len) { 165 return; 166 } 167 168 copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); 169 170 *isip4 = true; 171 172 if (copied < sizeof(*iphdr)) { 173 return; 174 } 175 176 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { 177 if (iphdr->ip_p == IP_PROTO_TCP) { 178 *istcp = true; 179 } else if (iphdr->ip_p == IP_PROTO_UDP) { 180 *isudp = true; 181 } 182 } 183 184 ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); 185 *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); 186 187 fragment = ip4hdr_info->fragment; 188 } else if (proto == ETH_P_IPV6) { 189 190 *isip6 = true; 191 if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, 192 ip6hdr_info)) { 193 if (ip6hdr_info->l4proto == IP_PROTO_TCP) { 194 *istcp = true; 195 } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) { 196 *isudp = true; 197 } 198 } else { 199 return; 200 } 201 202 *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; 203 fragment = ip6hdr_info->fragment; 204 } 205 206 if (!fragment) { 207 if (*istcp) { 208 *istcp = _eth_copy_chunk(input_size, 209 iov, iovcnt, 210 *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), 211 &l4hdr_info->hdr.tcp); 212 213 if (*istcp) { 214 *l5hdr_off = *l4hdr_off + 215 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); 216 217 l4hdr_info->has_tcp_data = 218 _eth_tcp_has_data(proto == ETH_P_IP, 219 &ip4hdr_info->ip4_hdr, 220 &ip6hdr_info->ip6_hdr, 221 *l4hdr_off - *l3hdr_off, 222 &l4hdr_info->hdr.tcp); 223 } 224 } else if (*isudp) { 225 *isudp = _eth_copy_chunk(input_size, 226 iov, iovcnt, 227 *l4hdr_off, sizeof(l4hdr_info->hdr.udp), 228 &l4hdr_info->hdr.udp); 229 *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); 230 } 231 } 232 } 233 234 size_t 235 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, 236 uint8_t *new_ehdr_buf, 237 uint16_t *payload_offset, uint16_t *tci) 238 { 239 struct vlan_header vlan_hdr; 240 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; 241 242 size_t copied = iov_to_buf(iov, iovcnt, iovoff, 243 new_ehdr, sizeof(*new_ehdr)); 244 245 if (copied < sizeof(*new_ehdr)) { 246 return 0; 247 } 248 249 switch (be16_to_cpu(new_ehdr->h_proto)) { 250 case ETH_P_VLAN: 251 case ETH_P_DVLAN: 252 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), 253 &vlan_hdr, sizeof(vlan_hdr)); 254 255 if (copied < sizeof(vlan_hdr)) { 256 return 0; 257 } 258 259 new_ehdr->h_proto = vlan_hdr.h_proto; 260 261 *tci = be16_to_cpu(vlan_hdr.h_tci); 262 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); 263 264 if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { 265 266 copied = iov_to_buf(iov, iovcnt, *payload_offset, 267 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); 268 269 if (copied < sizeof(vlan_hdr)) { 270 return 0; 271 } 272 273 *payload_offset += sizeof(vlan_hdr); 274 275 return sizeof(struct eth_header) + sizeof(struct vlan_header); 276 } else { 277 return sizeof(struct eth_header); 278 } 279 default: 280 return 0; 281 } 282 } 283 284 size_t 285 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, 286 uint16_t vet, uint8_t *new_ehdr_buf, 287 uint16_t *payload_offset, uint16_t *tci) 288 { 289 struct vlan_header vlan_hdr; 290 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; 291 292 size_t copied = iov_to_buf(iov, iovcnt, iovoff, 293 new_ehdr, sizeof(*new_ehdr)); 294 295 if (copied < sizeof(*new_ehdr)) { 296 return 0; 297 } 298 299 if (be16_to_cpu(new_ehdr->h_proto) == vet) { 300 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), 301 &vlan_hdr, sizeof(vlan_hdr)); 302 303 if (copied < sizeof(vlan_hdr)) { 304 return 0; 305 } 306 307 new_ehdr->h_proto = vlan_hdr.h_proto; 308 309 *tci = be16_to_cpu(vlan_hdr.h_tci); 310 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); 311 return sizeof(struct eth_header); 312 } 313 314 return 0; 315 } 316 317 void 318 eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, 319 void *l3hdr, size_t l3hdr_len, 320 size_t l3payload_len, 321 size_t frag_offset, bool more_frags) 322 { 323 const struct iovec l2vec = { 324 .iov_base = (void *) l2hdr, 325 .iov_len = l2hdr_len 326 }; 327 328 if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) { 329 uint16_t orig_flags; 330 struct ip_header *iphdr = (struct ip_header *) l3hdr; 331 uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; 332 uint16_t new_ip_off; 333 334 assert(frag_offset % IP_FRAG_UNIT_SIZE == 0); 335 assert((frag_off_units & ~IP_OFFMASK) == 0); 336 337 orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF); 338 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0); 339 iphdr->ip_off = cpu_to_be16(new_ip_off); 340 iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len); 341 } 342 } 343 344 void 345 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) 346 { 347 struct ip_header *iphdr = (struct ip_header *) l3hdr; 348 iphdr->ip_sum = 0; 349 iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len)); 350 } 351 352 uint32_t 353 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, 354 uint16_t csl, 355 uint32_t *cso) 356 { 357 struct ip_pseudo_header ipph; 358 ipph.ip_src = iphdr->ip_src; 359 ipph.ip_dst = iphdr->ip_dst; 360 ipph.ip_payload = cpu_to_be16(csl); 361 ipph.ip_proto = iphdr->ip_p; 362 ipph.zeros = 0; 363 *cso = sizeof(ipph); 364 return net_checksum_add(*cso, (uint8_t *) &ipph); 365 } 366 367 uint32_t 368 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, 369 uint16_t csl, 370 uint8_t l4_proto, 371 uint32_t *cso) 372 { 373 struct ip6_pseudo_header ipph; 374 ipph.ip6_src = iphdr->ip6_src; 375 ipph.ip6_dst = iphdr->ip6_dst; 376 ipph.len = cpu_to_be16(csl); 377 ipph.zero[0] = 0; 378 ipph.zero[1] = 0; 379 ipph.zero[2] = 0; 380 ipph.next_hdr = l4_proto; 381 *cso = sizeof(ipph); 382 return net_checksum_add(*cso, (uint8_t *)&ipph); 383 } 384 385 static bool 386 eth_is_ip6_extension_header_type(uint8_t hdr_type) 387 { 388 switch (hdr_type) { 389 case IP6_HOP_BY_HOP: 390 case IP6_ROUTING: 391 case IP6_FRAGMENT: 392 case IP6_ESP: 393 case IP6_AUTHENTICATION: 394 case IP6_DESTINATON: 395 case IP6_MOBILITY: 396 return true; 397 default: 398 return false; 399 } 400 } 401 402 static bool 403 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, 404 size_t rthdr_offset, 405 struct ip6_ext_hdr *ext_hdr, 406 struct in6_address *dst_addr) 407 { 408 struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; 409 410 if ((rthdr->rtype == 2) && 411 (rthdr->len == sizeof(struct in6_address) / 8) && 412 (rthdr->segleft == 1)) { 413 414 size_t input_size = iov_size(pkt, pkt_frags); 415 size_t bytes_read; 416 417 if (input_size < rthdr_offset + sizeof(*ext_hdr)) { 418 return false; 419 } 420 421 bytes_read = iov_to_buf(pkt, pkt_frags, 422 rthdr_offset + sizeof(*ext_hdr), 423 dst_addr, sizeof(*dst_addr)); 424 425 return bytes_read == sizeof(*dst_addr); 426 } 427 428 return false; 429 } 430 431 static bool 432 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags, 433 size_t dsthdr_offset, 434 struct ip6_ext_hdr *ext_hdr, 435 struct in6_address *src_addr) 436 { 437 size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); 438 struct ip6_option_hdr opthdr; 439 size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); 440 441 while (bytes_left > sizeof(opthdr)) { 442 size_t input_size = iov_size(pkt, pkt_frags); 443 size_t bytes_read, optlen; 444 445 if (input_size < opt_offset) { 446 return false; 447 } 448 449 bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, 450 &opthdr, sizeof(opthdr)); 451 452 if (bytes_read != sizeof(opthdr)) { 453 return false; 454 } 455 456 optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 457 : (opthdr.len + sizeof(opthdr)); 458 459 if (optlen > bytes_left) { 460 return false; 461 } 462 463 if (opthdr.type == IP6_OPT_HOME) { 464 size_t input_size = iov_size(pkt, pkt_frags); 465 466 if (input_size < opt_offset + sizeof(opthdr)) { 467 return false; 468 } 469 470 bytes_read = iov_to_buf(pkt, pkt_frags, 471 opt_offset + sizeof(opthdr), 472 src_addr, sizeof(*src_addr)); 473 474 return bytes_read == sizeof(*src_addr); 475 } 476 477 opt_offset += optlen; 478 bytes_left -= optlen; 479 } 480 481 return false; 482 } 483 484 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, 485 size_t ip6hdr_off, eth_ip6_hdr_info *info) 486 { 487 struct ip6_ext_hdr ext_hdr; 488 size_t bytes_read; 489 uint8_t curr_ext_hdr_type; 490 size_t input_size = iov_size(pkt, pkt_frags); 491 492 info->rss_ex_dst_valid = false; 493 info->rss_ex_src_valid = false; 494 info->fragment = false; 495 496 if (input_size < ip6hdr_off) { 497 return false; 498 } 499 500 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, 501 &info->ip6_hdr, sizeof(info->ip6_hdr)); 502 if (bytes_read < sizeof(info->ip6_hdr)) { 503 return false; 504 } 505 506 info->full_hdr_len = sizeof(struct ip6_header); 507 508 curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; 509 510 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { 511 info->l4proto = info->ip6_hdr.ip6_nxt; 512 info->has_ext_hdrs = false; 513 return true; 514 } 515 516 info->has_ext_hdrs = true; 517 518 do { 519 if (input_size < ip6hdr_off + info->full_hdr_len) { 520 return false; 521 } 522 523 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, 524 &ext_hdr, sizeof(ext_hdr)); 525 526 if (bytes_read < sizeof(ext_hdr)) { 527 return false; 528 } 529 530 if (curr_ext_hdr_type == IP6_ROUTING) { 531 info->rss_ex_dst_valid = 532 _eth_get_rss_ex_dst_addr(pkt, pkt_frags, 533 ip6hdr_off + info->full_hdr_len, 534 &ext_hdr, &info->rss_ex_dst); 535 } else if (curr_ext_hdr_type == IP6_DESTINATON) { 536 info->rss_ex_src_valid = 537 _eth_get_rss_ex_src_addr(pkt, pkt_frags, 538 ip6hdr_off + info->full_hdr_len, 539 &ext_hdr, &info->rss_ex_src); 540 } else if (curr_ext_hdr_type == IP6_FRAGMENT) { 541 info->fragment = true; 542 } 543 544 info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; 545 curr_ext_hdr_type = ext_hdr.ip6r_nxt; 546 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); 547 548 info->l4proto = ext_hdr.ip6r_nxt; 549 return true; 550 } 551