1 /* 2 * QEMU RX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "trace.h" 20 #include "net_rx_pkt.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 24 struct NetRxPkt { 25 struct virtio_net_hdr virt_hdr; 26 uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)]; 27 struct iovec *vec; 28 uint16_t vec_len_total; 29 uint16_t vec_len; 30 uint32_t tot_len; 31 uint16_t tci; 32 size_t ehdr_buf_len; 33 bool has_virt_hdr; 34 eth_pkt_types_e packet_type; 35 36 /* Analysis results */ 37 bool isip4; 38 bool isip6; 39 bool isudp; 40 bool istcp; 41 42 size_t l3hdr_off; 43 size_t l4hdr_off; 44 size_t l5hdr_off; 45 46 eth_ip6_hdr_info ip6hdr_info; 47 eth_ip4_hdr_info ip4hdr_info; 48 eth_l4_hdr_info l4hdr_info; 49 }; 50 51 void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) 52 { 53 struct NetRxPkt *p = g_malloc0(sizeof *p); 54 p->has_virt_hdr = has_virt_hdr; 55 p->vec = NULL; 56 p->vec_len_total = 0; 57 *pkt = p; 58 } 59 60 void net_rx_pkt_uninit(struct NetRxPkt *pkt) 61 { 62 if (pkt->vec_len_total != 0) { 63 g_free(pkt->vec); 64 } 65 66 g_free(pkt); 67 } 68 69 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) 70 { 71 assert(pkt); 72 return &pkt->virt_hdr; 73 } 74 75 static inline void 76 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, 77 int new_iov_len) 78 { 79 if (pkt->vec_len_total < new_iov_len) { 80 g_free(pkt->vec); 81 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); 82 pkt->vec_len_total = new_iov_len; 83 } 84 } 85 86 static void 87 net_rx_pkt_pull_data(struct NetRxPkt *pkt, 88 const struct iovec *iov, int iovcnt, 89 size_t ploff) 90 { 91 uint32_t pllen = iov_size(iov, iovcnt) - ploff; 92 93 if (pkt->ehdr_buf_len) { 94 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); 95 96 pkt->vec[0].iov_base = pkt->ehdr_buf; 97 pkt->vec[0].iov_len = pkt->ehdr_buf_len; 98 99 pkt->tot_len = pllen + pkt->ehdr_buf_len; 100 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, 101 iov, iovcnt, ploff, pllen) + 1; 102 } else { 103 net_rx_pkt_iovec_realloc(pkt, iovcnt); 104 105 pkt->tot_len = pllen; 106 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, 107 iov, iovcnt, ploff, pkt->tot_len); 108 } 109 110 eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, 111 &pkt->isudp, &pkt->istcp, 112 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 113 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 114 115 trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, 116 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); 117 } 118 119 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, 120 const struct iovec *iov, int iovcnt, 121 size_t iovoff, bool strip_vlan) 122 { 123 uint16_t tci = 0; 124 uint16_t ploff = iovoff; 125 assert(pkt); 126 127 if (strip_vlan) { 128 pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, 129 &ploff, &tci); 130 } else { 131 pkt->ehdr_buf_len = 0; 132 } 133 134 pkt->tci = tci; 135 136 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 137 } 138 139 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, 140 const struct iovec *iov, int iovcnt, 141 size_t iovoff, bool strip_vlan, 142 uint16_t vet) 143 { 144 uint16_t tci = 0; 145 uint16_t ploff = iovoff; 146 assert(pkt); 147 148 if (strip_vlan) { 149 pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, 150 pkt->ehdr_buf, 151 &ploff, &tci); 152 } else { 153 pkt->ehdr_buf_len = 0; 154 } 155 156 pkt->tci = tci; 157 158 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 159 } 160 161 void net_rx_pkt_dump(struct NetRxPkt *pkt) 162 { 163 #ifdef NET_RX_PKT_DEBUG 164 assert(pkt); 165 166 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n", 167 pkt->tot_len, pkt->ehdr_buf_len, pkt->tci); 168 #endif 169 } 170 171 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, 172 eth_pkt_types_e packet_type) 173 { 174 assert(pkt); 175 176 pkt->packet_type = packet_type; 177 178 } 179 180 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) 181 { 182 assert(pkt); 183 184 return pkt->packet_type; 185 } 186 187 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) 188 { 189 assert(pkt); 190 191 return pkt->tot_len; 192 } 193 194 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, 195 size_t len) 196 { 197 const struct iovec iov = { 198 .iov_base = (void *)data, 199 .iov_len = len 200 }; 201 202 assert(pkt); 203 204 eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, 205 &pkt->isudp, &pkt->istcp, 206 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 207 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 208 } 209 210 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, 211 bool *isip4, bool *isip6, 212 bool *isudp, bool *istcp) 213 { 214 assert(pkt); 215 216 *isip4 = pkt->isip4; 217 *isip6 = pkt->isip6; 218 *isudp = pkt->isudp; 219 *istcp = pkt->istcp; 220 } 221 222 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) 223 { 224 assert(pkt); 225 return pkt->l3hdr_off; 226 } 227 228 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) 229 { 230 assert(pkt); 231 return pkt->l4hdr_off; 232 } 233 234 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) 235 { 236 assert(pkt); 237 return pkt->l5hdr_off; 238 } 239 240 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) 241 { 242 return &pkt->ip6hdr_info; 243 } 244 245 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) 246 { 247 return &pkt->ip4hdr_info; 248 } 249 250 eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) 251 { 252 return &pkt->l4hdr_info; 253 } 254 255 static inline void 256 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, 257 void *ptr, size_t size) 258 { 259 memcpy(&rss_input[*bytes_written], ptr, size); 260 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); 261 *bytes_written += size; 262 } 263 264 static inline void 265 _net_rx_rss_prepare_ip4(uint8_t *rss_input, 266 struct NetRxPkt *pkt, 267 size_t *bytes_written) 268 { 269 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; 270 271 _net_rx_rss_add_chunk(rss_input, bytes_written, 272 &ip4_hdr->ip_src, sizeof(uint32_t)); 273 274 _net_rx_rss_add_chunk(rss_input, bytes_written, 275 &ip4_hdr->ip_dst, sizeof(uint32_t)); 276 } 277 278 static inline void 279 _net_rx_rss_prepare_ip6(uint8_t *rss_input, 280 struct NetRxPkt *pkt, 281 bool ipv6ex, size_t *bytes_written) 282 { 283 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; 284 285 _net_rx_rss_add_chunk(rss_input, bytes_written, 286 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src 287 : &ip6info->ip6_hdr.ip6_src, 288 sizeof(struct in6_address)); 289 290 _net_rx_rss_add_chunk(rss_input, bytes_written, 291 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst 292 : &ip6info->ip6_hdr.ip6_dst, 293 sizeof(struct in6_address)); 294 } 295 296 static inline void 297 _net_rx_rss_prepare_tcp(uint8_t *rss_input, 298 struct NetRxPkt *pkt, 299 size_t *bytes_written) 300 { 301 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; 302 303 _net_rx_rss_add_chunk(rss_input, bytes_written, 304 &tcphdr->th_sport, sizeof(uint16_t)); 305 306 _net_rx_rss_add_chunk(rss_input, bytes_written, 307 &tcphdr->th_dport, sizeof(uint16_t)); 308 } 309 310 static inline void 311 _net_rx_rss_prepare_udp(uint8_t *rss_input, 312 struct NetRxPkt *pkt, 313 size_t *bytes_written) 314 { 315 struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp; 316 317 _net_rx_rss_add_chunk(rss_input, bytes_written, 318 &udphdr->uh_sport, sizeof(uint16_t)); 319 320 _net_rx_rss_add_chunk(rss_input, bytes_written, 321 &udphdr->uh_dport, sizeof(uint16_t)); 322 } 323 324 uint32_t 325 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, 326 NetRxPktRssType type, 327 uint8_t *key) 328 { 329 uint8_t rss_input[36]; 330 size_t rss_length = 0; 331 uint32_t rss_hash = 0; 332 net_toeplitz_key key_data; 333 334 switch (type) { 335 case NetPktRssIpV4: 336 assert(pkt->isip4); 337 trace_net_rx_pkt_rss_ip4(); 338 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 339 break; 340 case NetPktRssIpV4Tcp: 341 assert(pkt->isip4); 342 assert(pkt->istcp); 343 trace_net_rx_pkt_rss_ip4_tcp(); 344 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 345 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 346 break; 347 case NetPktRssIpV6Tcp: 348 assert(pkt->isip6); 349 assert(pkt->istcp); 350 trace_net_rx_pkt_rss_ip6_tcp(); 351 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 352 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 353 break; 354 case NetPktRssIpV6: 355 assert(pkt->isip6); 356 trace_net_rx_pkt_rss_ip6(); 357 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 358 break; 359 case NetPktRssIpV6Ex: 360 assert(pkt->isip6); 361 trace_net_rx_pkt_rss_ip6_ex(); 362 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 363 break; 364 case NetPktRssIpV6TcpEx: 365 assert(pkt->isip6); 366 assert(pkt->istcp); 367 trace_net_rx_pkt_rss_ip6_ex_tcp(); 368 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 369 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 370 break; 371 case NetPktRssIpV4Udp: 372 assert(pkt->isip4); 373 assert(pkt->isudp); 374 trace_net_rx_pkt_rss_ip4_udp(); 375 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 376 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length); 377 break; 378 case NetPktRssIpV6Udp: 379 assert(pkt->isip6); 380 assert(pkt->isudp); 381 trace_net_rx_pkt_rss_ip6_udp(); 382 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 383 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length); 384 break; 385 case NetPktRssIpV6UdpEx: 386 assert(pkt->isip6); 387 assert(pkt->isudp); 388 trace_net_rx_pkt_rss_ip6_ex_udp(); 389 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 390 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length); 391 break; 392 default: 393 assert(false); 394 break; 395 } 396 397 net_toeplitz_key_init(&key_data, key); 398 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); 399 400 trace_net_rx_pkt_rss_hash(rss_length, rss_hash); 401 402 return rss_hash; 403 } 404 405 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) 406 { 407 assert(pkt); 408 409 if (pkt->isip4) { 410 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); 411 } 412 413 return 0; 414 } 415 416 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) 417 { 418 assert(pkt); 419 420 if (pkt->istcp) { 421 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; 422 } 423 424 return false; 425 } 426 427 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) 428 { 429 assert(pkt); 430 431 if (pkt->istcp) { 432 return pkt->l4hdr_info.has_tcp_data; 433 } 434 435 return false; 436 } 437 438 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) 439 { 440 assert(pkt); 441 442 return pkt->vec; 443 } 444 445 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) 446 { 447 assert(pkt); 448 449 return pkt->vec_len; 450 } 451 452 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, 453 struct virtio_net_hdr *vhdr) 454 { 455 assert(pkt); 456 457 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); 458 } 459 460 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, 461 const struct iovec *iov, int iovcnt) 462 { 463 assert(pkt); 464 465 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); 466 } 467 468 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) 469 { 470 assert(pkt); 471 472 return pkt->ehdr_buf_len ? true : false; 473 } 474 475 bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt) 476 { 477 assert(pkt); 478 479 return pkt->has_virt_hdr; 480 } 481 482 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) 483 { 484 assert(pkt); 485 486 return pkt->tci; 487 } 488 489 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) 490 { 491 uint32_t cntr; 492 uint16_t csum; 493 uint32_t csl; 494 495 trace_net_rx_pkt_l3_csum_validate_entry(); 496 497 if (!pkt->isip4) { 498 trace_net_rx_pkt_l3_csum_validate_not_ip4(); 499 return false; 500 } 501 502 csl = pkt->l4hdr_off - pkt->l3hdr_off; 503 504 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, 505 pkt->l3hdr_off, 506 csl, 0); 507 508 csum = net_checksum_finish(cntr); 509 510 *csum_valid = (csum == 0); 511 512 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, 513 cntr, csum, *csum_valid); 514 515 return true; 516 } 517 518 static uint16_t 519 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) 520 { 521 uint32_t cntr; 522 uint16_t csum; 523 uint16_t csl; 524 uint32_t cso; 525 526 trace_net_rx_pkt_l4_csum_calc_entry(); 527 528 if (pkt->isip4) { 529 if (pkt->isudp) { 530 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 531 trace_net_rx_pkt_l4_csum_calc_ip4_udp(); 532 } else { 533 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - 534 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); 535 trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); 536 } 537 538 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, 539 csl, &cso); 540 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 541 } else { 542 if (pkt->isudp) { 543 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 544 trace_net_rx_pkt_l4_csum_calc_ip6_udp(); 545 } else { 546 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; 547 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; 548 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 549 550 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - 551 ip6opts_len; 552 trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); 553 } 554 555 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, 556 pkt->ip6hdr_info.l4proto, &cso); 557 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 558 } 559 560 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, 561 pkt->l4hdr_off, csl, cso); 562 563 csum = net_checksum_finish_nozero(cntr); 564 565 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); 566 567 return csum; 568 } 569 570 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) 571 { 572 uint16_t csum; 573 574 trace_net_rx_pkt_l4_csum_validate_entry(); 575 576 if (!pkt->istcp && !pkt->isudp) { 577 trace_net_rx_pkt_l4_csum_validate_not_xxp(); 578 return false; 579 } 580 581 if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { 582 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); 583 return false; 584 } 585 586 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 587 trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); 588 return false; 589 } 590 591 csum = _net_rx_pkt_calc_l4_csum(pkt); 592 593 *csum_valid = ((csum == 0) || (csum == 0xFFFF)); 594 595 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); 596 597 return true; 598 } 599 600 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) 601 { 602 uint16_t csum = 0; 603 uint32_t l4_cso; 604 605 trace_net_rx_pkt_l4_csum_fix_entry(); 606 607 if (pkt->istcp) { 608 l4_cso = offsetof(struct tcp_header, th_sum); 609 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); 610 } else if (pkt->isudp) { 611 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { 612 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); 613 return false; 614 } 615 l4_cso = offsetof(struct udp_header, uh_sum); 616 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); 617 } else { 618 trace_net_rx_pkt_l4_csum_fix_not_xxp(); 619 return false; 620 } 621 622 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 623 trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); 624 return false; 625 } 626 627 /* Set zero to checksum word */ 628 iov_from_buf(pkt->vec, pkt->vec_len, 629 pkt->l4hdr_off + l4_cso, 630 &csum, sizeof(csum)); 631 632 /* Calculate L4 checksum */ 633 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); 634 635 /* Set calculated checksum to checksum word */ 636 iov_from_buf(pkt->vec, pkt->vec_len, 637 pkt->l4hdr_off + l4_cso, 638 &csum, sizeof(csum)); 639 640 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); 641 642 return true; 643 } 644