1 /* 2 * QEMU RX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "trace.h" 20 #include "net_rx_pkt.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 24 struct NetRxPkt { 25 struct virtio_net_hdr virt_hdr; 26 uint8_t ehdr_buf[sizeof(struct eth_header)]; 27 struct iovec *vec; 28 uint16_t vec_len_total; 29 uint16_t vec_len; 30 uint32_t tot_len; 31 uint16_t tci; 32 bool vlan_stripped; 33 bool has_virt_hdr; 34 eth_pkt_types_e packet_type; 35 36 /* Analysis results */ 37 bool isip4; 38 bool isip6; 39 bool isudp; 40 bool istcp; 41 42 size_t l3hdr_off; 43 size_t l4hdr_off; 44 size_t l5hdr_off; 45 46 eth_ip6_hdr_info ip6hdr_info; 47 eth_ip4_hdr_info ip4hdr_info; 48 eth_l4_hdr_info l4hdr_info; 49 }; 50 51 void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) 52 { 53 struct NetRxPkt *p = g_malloc0(sizeof *p); 54 p->has_virt_hdr = has_virt_hdr; 55 p->vec = NULL; 56 p->vec_len_total = 0; 57 *pkt = p; 58 } 59 60 void net_rx_pkt_uninit(struct NetRxPkt *pkt) 61 { 62 if (pkt->vec_len_total != 0) { 63 g_free(pkt->vec); 64 } 65 66 g_free(pkt); 67 } 68 69 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) 70 { 71 assert(pkt); 72 return &pkt->virt_hdr; 73 } 74 75 static inline void 76 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, 77 int new_iov_len) 78 { 79 if (pkt->vec_len_total < new_iov_len) { 80 g_free(pkt->vec); 81 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); 82 pkt->vec_len_total = new_iov_len; 83 } 84 } 85 86 static void 87 net_rx_pkt_pull_data(struct NetRxPkt *pkt, 88 const struct iovec *iov, int iovcnt, 89 size_t ploff) 90 { 91 if (pkt->vlan_stripped) { 92 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); 93 94 pkt->vec[0].iov_base = pkt->ehdr_buf; 95 pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf); 96 97 pkt->tot_len = 98 iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header); 99 100 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, 101 iov, iovcnt, ploff, pkt->tot_len); 102 } else { 103 net_rx_pkt_iovec_realloc(pkt, iovcnt); 104 105 pkt->tot_len = iov_size(iov, iovcnt) - ploff; 106 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, 107 iov, iovcnt, ploff, pkt->tot_len); 108 } 109 110 eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, 111 &pkt->isudp, &pkt->istcp, 112 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 113 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 114 115 trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, 116 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); 117 } 118 119 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, 120 const struct iovec *iov, int iovcnt, 121 size_t iovoff, bool strip_vlan) 122 { 123 uint16_t tci = 0; 124 uint16_t ploff = iovoff; 125 assert(pkt); 126 pkt->vlan_stripped = false; 127 128 if (strip_vlan) { 129 pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, 130 &ploff, &tci); 131 } 132 133 pkt->tci = tci; 134 135 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 136 } 137 138 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, 139 const struct iovec *iov, int iovcnt, 140 size_t iovoff, bool strip_vlan, 141 uint16_t vet) 142 { 143 uint16_t tci = 0; 144 uint16_t ploff = iovoff; 145 assert(pkt); 146 pkt->vlan_stripped = false; 147 148 if (strip_vlan) { 149 pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, 150 pkt->ehdr_buf, 151 &ploff, &tci); 152 } 153 154 pkt->tci = tci; 155 156 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 157 } 158 159 void net_rx_pkt_dump(struct NetRxPkt *pkt) 160 { 161 #ifdef NET_RX_PKT_DEBUG 162 NetRxPkt *pkt = (NetRxPkt *)pkt; 163 assert(pkt); 164 165 printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n", 166 pkt->tot_len, pkt->vlan_stripped, pkt->tci); 167 #endif 168 } 169 170 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, 171 eth_pkt_types_e packet_type) 172 { 173 assert(pkt); 174 175 pkt->packet_type = packet_type; 176 177 } 178 179 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) 180 { 181 assert(pkt); 182 183 return pkt->packet_type; 184 } 185 186 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) 187 { 188 assert(pkt); 189 190 return pkt->tot_len; 191 } 192 193 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, 194 size_t len) 195 { 196 const struct iovec iov = { 197 .iov_base = (void *)data, 198 .iov_len = len 199 }; 200 201 assert(pkt); 202 203 eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, 204 &pkt->isudp, &pkt->istcp, 205 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 206 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 207 } 208 209 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, 210 bool *isip4, bool *isip6, 211 bool *isudp, bool *istcp) 212 { 213 assert(pkt); 214 215 *isip4 = pkt->isip4; 216 *isip6 = pkt->isip6; 217 *isudp = pkt->isudp; 218 *istcp = pkt->istcp; 219 } 220 221 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) 222 { 223 assert(pkt); 224 return pkt->l3hdr_off; 225 } 226 227 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) 228 { 229 assert(pkt); 230 return pkt->l4hdr_off; 231 } 232 233 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) 234 { 235 assert(pkt); 236 return pkt->l5hdr_off; 237 } 238 239 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) 240 { 241 return &pkt->ip6hdr_info; 242 } 243 244 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) 245 { 246 return &pkt->ip4hdr_info; 247 } 248 249 eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) 250 { 251 return &pkt->l4hdr_info; 252 } 253 254 static inline void 255 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, 256 void *ptr, size_t size) 257 { 258 memcpy(&rss_input[*bytes_written], ptr, size); 259 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); 260 *bytes_written += size; 261 } 262 263 static inline void 264 _net_rx_rss_prepare_ip4(uint8_t *rss_input, 265 struct NetRxPkt *pkt, 266 size_t *bytes_written) 267 { 268 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; 269 270 _net_rx_rss_add_chunk(rss_input, bytes_written, 271 &ip4_hdr->ip_src, sizeof(uint32_t)); 272 273 _net_rx_rss_add_chunk(rss_input, bytes_written, 274 &ip4_hdr->ip_dst, sizeof(uint32_t)); 275 } 276 277 static inline void 278 _net_rx_rss_prepare_ip6(uint8_t *rss_input, 279 struct NetRxPkt *pkt, 280 bool ipv6ex, size_t *bytes_written) 281 { 282 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; 283 284 _net_rx_rss_add_chunk(rss_input, bytes_written, 285 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src 286 : &ip6info->ip6_hdr.ip6_src, 287 sizeof(struct in6_address)); 288 289 _net_rx_rss_add_chunk(rss_input, bytes_written, 290 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst 291 : &ip6info->ip6_hdr.ip6_dst, 292 sizeof(struct in6_address)); 293 } 294 295 static inline void 296 _net_rx_rss_prepare_tcp(uint8_t *rss_input, 297 struct NetRxPkt *pkt, 298 size_t *bytes_written) 299 { 300 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; 301 302 _net_rx_rss_add_chunk(rss_input, bytes_written, 303 &tcphdr->th_sport, sizeof(uint16_t)); 304 305 _net_rx_rss_add_chunk(rss_input, bytes_written, 306 &tcphdr->th_dport, sizeof(uint16_t)); 307 } 308 309 uint32_t 310 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, 311 NetRxPktRssType type, 312 uint8_t *key) 313 { 314 uint8_t rss_input[36]; 315 size_t rss_length = 0; 316 uint32_t rss_hash = 0; 317 net_toeplitz_key key_data; 318 319 switch (type) { 320 case NetPktRssIpV4: 321 assert(pkt->isip4); 322 trace_net_rx_pkt_rss_ip4(); 323 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 324 break; 325 case NetPktRssIpV4Tcp: 326 assert(pkt->isip4); 327 assert(pkt->istcp); 328 trace_net_rx_pkt_rss_ip4_tcp(); 329 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 330 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 331 break; 332 case NetPktRssIpV6Tcp: 333 assert(pkt->isip6); 334 assert(pkt->istcp); 335 trace_net_rx_pkt_rss_ip6_tcp(); 336 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 337 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 338 break; 339 case NetPktRssIpV6: 340 assert(pkt->isip6); 341 trace_net_rx_pkt_rss_ip6(); 342 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 343 break; 344 case NetPktRssIpV6Ex: 345 assert(pkt->isip6); 346 trace_net_rx_pkt_rss_ip6_ex(); 347 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 348 break; 349 default: 350 assert(false); 351 break; 352 } 353 354 net_toeplitz_key_init(&key_data, key); 355 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); 356 357 trace_net_rx_pkt_rss_hash(rss_length, rss_hash); 358 359 return rss_hash; 360 } 361 362 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) 363 { 364 assert(pkt); 365 366 if (pkt->isip4) { 367 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); 368 } 369 370 return 0; 371 } 372 373 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) 374 { 375 assert(pkt); 376 377 if (pkt->istcp) { 378 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; 379 } 380 381 return false; 382 } 383 384 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) 385 { 386 assert(pkt); 387 388 if (pkt->istcp) { 389 return pkt->l4hdr_info.has_tcp_data; 390 } 391 392 return false; 393 } 394 395 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) 396 { 397 assert(pkt); 398 399 return pkt->vec; 400 } 401 402 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) 403 { 404 assert(pkt); 405 406 return pkt->vec_len; 407 } 408 409 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, 410 struct virtio_net_hdr *vhdr) 411 { 412 assert(pkt); 413 414 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); 415 } 416 417 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, 418 const struct iovec *iov, int iovcnt) 419 { 420 assert(pkt); 421 422 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); 423 } 424 425 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) 426 { 427 assert(pkt); 428 429 return pkt->vlan_stripped; 430 } 431 432 bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt) 433 { 434 assert(pkt); 435 436 return pkt->has_virt_hdr; 437 } 438 439 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) 440 { 441 assert(pkt); 442 443 return pkt->tci; 444 } 445 446 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) 447 { 448 uint32_t cntr; 449 uint16_t csum; 450 uint32_t csl; 451 452 trace_net_rx_pkt_l3_csum_validate_entry(); 453 454 if (!pkt->isip4) { 455 trace_net_rx_pkt_l3_csum_validate_not_ip4(); 456 return false; 457 } 458 459 csl = pkt->l4hdr_off - pkt->l3hdr_off; 460 461 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, 462 pkt->l3hdr_off, 463 csl, 0); 464 465 csum = net_checksum_finish(cntr); 466 467 *csum_valid = (csum == 0); 468 469 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, 470 cntr, csum, *csum_valid); 471 472 return true; 473 } 474 475 static uint16_t 476 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) 477 { 478 uint32_t cntr; 479 uint16_t csum; 480 uint16_t csl; 481 uint32_t cso; 482 483 trace_net_rx_pkt_l4_csum_calc_entry(); 484 485 if (pkt->isip4) { 486 if (pkt->isudp) { 487 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 488 trace_net_rx_pkt_l4_csum_calc_ip4_udp(); 489 } else { 490 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - 491 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); 492 trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); 493 } 494 495 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, 496 csl, &cso); 497 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 498 } else { 499 if (pkt->isudp) { 500 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 501 trace_net_rx_pkt_l4_csum_calc_ip6_udp(); 502 } else { 503 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; 504 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; 505 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 506 507 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - 508 ip6opts_len; 509 trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); 510 } 511 512 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, 513 pkt->ip6hdr_info.l4proto, &cso); 514 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 515 } 516 517 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, 518 pkt->l4hdr_off, csl, cso); 519 520 csum = net_checksum_finish(cntr); 521 522 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); 523 524 return csum; 525 } 526 527 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) 528 { 529 uint16_t csum; 530 531 trace_net_rx_pkt_l4_csum_validate_entry(); 532 533 if (!pkt->istcp && !pkt->isudp) { 534 trace_net_rx_pkt_l4_csum_validate_not_xxp(); 535 return false; 536 } 537 538 if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { 539 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); 540 return false; 541 } 542 543 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 544 trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); 545 return false; 546 } 547 548 csum = _net_rx_pkt_calc_l4_csum(pkt); 549 550 *csum_valid = ((csum == 0) || (csum == 0xFFFF)); 551 552 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); 553 554 return true; 555 } 556 557 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) 558 { 559 uint16_t csum = 0; 560 uint32_t l4_cso; 561 562 trace_net_rx_pkt_l4_csum_fix_entry(); 563 564 if (pkt->istcp) { 565 l4_cso = offsetof(struct tcp_header, th_sum); 566 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); 567 } else if (pkt->isudp) { 568 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { 569 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); 570 return false; 571 } 572 l4_cso = offsetof(struct udp_header, uh_sum); 573 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); 574 } else { 575 trace_net_rx_pkt_l4_csum_fix_not_xxp(); 576 return false; 577 } 578 579 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 580 trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); 581 return false; 582 } 583 584 /* Set zero to checksum word */ 585 iov_from_buf(pkt->vec, pkt->vec_len, 586 pkt->l4hdr_off + l4_cso, 587 &csum, sizeof(csum)); 588 589 /* Calculate L4 checksum */ 590 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); 591 592 /* Set calculated checksum to checksum word */ 593 iov_from_buf(pkt->vec, pkt->vec_len, 594 pkt->l4hdr_off + l4_cso, 595 &csum, sizeof(csum)); 596 597 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); 598 599 return true; 600 } 601