1 /* 2 * QEMU RX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "trace.h" 20 #include "net_rx_pkt.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 24 struct NetRxPkt { 25 struct virtio_net_hdr virt_hdr; 26 uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)]; 27 struct iovec *vec; 28 uint16_t vec_len_total; 29 uint16_t vec_len; 30 uint32_t tot_len; 31 uint16_t tci; 32 size_t ehdr_buf_len; 33 bool has_virt_hdr; 34 eth_pkt_types_e packet_type; 35 36 /* Analysis results */ 37 bool isip4; 38 bool isip6; 39 bool isudp; 40 bool istcp; 41 42 size_t l3hdr_off; 43 size_t l4hdr_off; 44 size_t l5hdr_off; 45 46 eth_ip6_hdr_info ip6hdr_info; 47 eth_ip4_hdr_info ip4hdr_info; 48 eth_l4_hdr_info l4hdr_info; 49 }; 50 51 void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) 52 { 53 struct NetRxPkt *p = g_malloc0(sizeof *p); 54 p->has_virt_hdr = has_virt_hdr; 55 p->vec = NULL; 56 p->vec_len_total = 0; 57 *pkt = p; 58 } 59 60 void net_rx_pkt_uninit(struct NetRxPkt *pkt) 61 { 62 if (pkt->vec_len_total != 0) { 63 g_free(pkt->vec); 64 } 65 66 g_free(pkt); 67 } 68 69 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) 70 { 71 assert(pkt); 72 return &pkt->virt_hdr; 73 } 74 75 static inline void 76 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, 77 int new_iov_len) 78 { 79 if (pkt->vec_len_total < new_iov_len) { 80 g_free(pkt->vec); 81 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); 82 pkt->vec_len_total = new_iov_len; 83 } 84 } 85 86 static void 87 net_rx_pkt_pull_data(struct NetRxPkt *pkt, 88 const struct iovec *iov, int iovcnt, 89 size_t ploff) 90 { 91 uint32_t pllen = iov_size(iov, iovcnt) - ploff; 92 93 if (pkt->ehdr_buf_len) { 94 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); 95 96 pkt->vec[0].iov_base = pkt->ehdr_buf; 97 pkt->vec[0].iov_len = pkt->ehdr_buf_len; 98 99 pkt->tot_len = pllen + pkt->ehdr_buf_len; 100 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, 101 iov, iovcnt, ploff, pllen) + 1; 102 } else { 103 net_rx_pkt_iovec_realloc(pkt, iovcnt); 104 105 pkt->tot_len = pllen; 106 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, 107 iov, iovcnt, ploff, pkt->tot_len); 108 } 109 110 eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, 111 &pkt->isudp, &pkt->istcp, 112 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 113 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 114 115 trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, 116 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); 117 } 118 119 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, 120 const struct iovec *iov, int iovcnt, 121 size_t iovoff, bool strip_vlan) 122 { 123 uint16_t tci = 0; 124 uint16_t ploff = iovoff; 125 assert(pkt); 126 127 if (strip_vlan) { 128 pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, 129 &ploff, &tci); 130 } else { 131 pkt->ehdr_buf_len = 0; 132 } 133 134 pkt->tci = tci; 135 136 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 137 } 138 139 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, 140 const struct iovec *iov, int iovcnt, 141 size_t iovoff, bool strip_vlan, 142 uint16_t vet) 143 { 144 uint16_t tci = 0; 145 uint16_t ploff = iovoff; 146 assert(pkt); 147 148 if (strip_vlan) { 149 pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, 150 pkt->ehdr_buf, 151 &ploff, &tci); 152 } else { 153 pkt->ehdr_buf_len = 0; 154 } 155 156 pkt->tci = tci; 157 158 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 159 } 160 161 void net_rx_pkt_dump(struct NetRxPkt *pkt) 162 { 163 #ifdef NET_RX_PKT_DEBUG 164 assert(pkt); 165 166 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n", 167 pkt->tot_len, pkt->ehdr_buf_len, pkt->tci); 168 #endif 169 } 170 171 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, 172 eth_pkt_types_e packet_type) 173 { 174 assert(pkt); 175 176 pkt->packet_type = packet_type; 177 178 } 179 180 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) 181 { 182 assert(pkt); 183 184 return pkt->packet_type; 185 } 186 187 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) 188 { 189 assert(pkt); 190 191 return pkt->tot_len; 192 } 193 194 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, 195 size_t len) 196 { 197 const struct iovec iov = { 198 .iov_base = (void *)data, 199 .iov_len = len 200 }; 201 202 assert(pkt); 203 204 eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, 205 &pkt->isudp, &pkt->istcp, 206 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 207 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 208 } 209 210 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, 211 bool *isip4, bool *isip6, 212 bool *isudp, bool *istcp) 213 { 214 assert(pkt); 215 216 *isip4 = pkt->isip4; 217 *isip6 = pkt->isip6; 218 *isudp = pkt->isudp; 219 *istcp = pkt->istcp; 220 } 221 222 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) 223 { 224 assert(pkt); 225 return pkt->l3hdr_off; 226 } 227 228 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) 229 { 230 assert(pkt); 231 return pkt->l4hdr_off; 232 } 233 234 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) 235 { 236 assert(pkt); 237 return pkt->l5hdr_off; 238 } 239 240 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) 241 { 242 return &pkt->ip6hdr_info; 243 } 244 245 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) 246 { 247 return &pkt->ip4hdr_info; 248 } 249 250 eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) 251 { 252 return &pkt->l4hdr_info; 253 } 254 255 static inline void 256 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, 257 void *ptr, size_t size) 258 { 259 memcpy(&rss_input[*bytes_written], ptr, size); 260 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); 261 *bytes_written += size; 262 } 263 264 static inline void 265 _net_rx_rss_prepare_ip4(uint8_t *rss_input, 266 struct NetRxPkt *pkt, 267 size_t *bytes_written) 268 { 269 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; 270 271 _net_rx_rss_add_chunk(rss_input, bytes_written, 272 &ip4_hdr->ip_src, sizeof(uint32_t)); 273 274 _net_rx_rss_add_chunk(rss_input, bytes_written, 275 &ip4_hdr->ip_dst, sizeof(uint32_t)); 276 } 277 278 static inline void 279 _net_rx_rss_prepare_ip6(uint8_t *rss_input, 280 struct NetRxPkt *pkt, 281 bool ipv6ex, size_t *bytes_written) 282 { 283 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; 284 285 _net_rx_rss_add_chunk(rss_input, bytes_written, 286 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src 287 : &ip6info->ip6_hdr.ip6_src, 288 sizeof(struct in6_address)); 289 290 _net_rx_rss_add_chunk(rss_input, bytes_written, 291 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst 292 : &ip6info->ip6_hdr.ip6_dst, 293 sizeof(struct in6_address)); 294 } 295 296 static inline void 297 _net_rx_rss_prepare_tcp(uint8_t *rss_input, 298 struct NetRxPkt *pkt, 299 size_t *bytes_written) 300 { 301 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; 302 303 _net_rx_rss_add_chunk(rss_input, bytes_written, 304 &tcphdr->th_sport, sizeof(uint16_t)); 305 306 _net_rx_rss_add_chunk(rss_input, bytes_written, 307 &tcphdr->th_dport, sizeof(uint16_t)); 308 } 309 310 uint32_t 311 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, 312 NetRxPktRssType type, 313 uint8_t *key) 314 { 315 uint8_t rss_input[36]; 316 size_t rss_length = 0; 317 uint32_t rss_hash = 0; 318 net_toeplitz_key key_data; 319 320 switch (type) { 321 case NetPktRssIpV4: 322 assert(pkt->isip4); 323 trace_net_rx_pkt_rss_ip4(); 324 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 325 break; 326 case NetPktRssIpV4Tcp: 327 assert(pkt->isip4); 328 assert(pkt->istcp); 329 trace_net_rx_pkt_rss_ip4_tcp(); 330 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 331 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 332 break; 333 case NetPktRssIpV6Tcp: 334 assert(pkt->isip6); 335 assert(pkt->istcp); 336 trace_net_rx_pkt_rss_ip6_tcp(); 337 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 338 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 339 break; 340 case NetPktRssIpV6: 341 assert(pkt->isip6); 342 trace_net_rx_pkt_rss_ip6(); 343 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 344 break; 345 case NetPktRssIpV6Ex: 346 assert(pkt->isip6); 347 trace_net_rx_pkt_rss_ip6_ex(); 348 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 349 break; 350 default: 351 assert(false); 352 break; 353 } 354 355 net_toeplitz_key_init(&key_data, key); 356 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); 357 358 trace_net_rx_pkt_rss_hash(rss_length, rss_hash); 359 360 return rss_hash; 361 } 362 363 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) 364 { 365 assert(pkt); 366 367 if (pkt->isip4) { 368 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); 369 } 370 371 return 0; 372 } 373 374 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) 375 { 376 assert(pkt); 377 378 if (pkt->istcp) { 379 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; 380 } 381 382 return false; 383 } 384 385 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) 386 { 387 assert(pkt); 388 389 if (pkt->istcp) { 390 return pkt->l4hdr_info.has_tcp_data; 391 } 392 393 return false; 394 } 395 396 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) 397 { 398 assert(pkt); 399 400 return pkt->vec; 401 } 402 403 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) 404 { 405 assert(pkt); 406 407 return pkt->vec_len; 408 } 409 410 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, 411 struct virtio_net_hdr *vhdr) 412 { 413 assert(pkt); 414 415 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); 416 } 417 418 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, 419 const struct iovec *iov, int iovcnt) 420 { 421 assert(pkt); 422 423 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); 424 } 425 426 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) 427 { 428 assert(pkt); 429 430 return pkt->ehdr_buf_len ? true : false; 431 } 432 433 bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt) 434 { 435 assert(pkt); 436 437 return pkt->has_virt_hdr; 438 } 439 440 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) 441 { 442 assert(pkt); 443 444 return pkt->tci; 445 } 446 447 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) 448 { 449 uint32_t cntr; 450 uint16_t csum; 451 uint32_t csl; 452 453 trace_net_rx_pkt_l3_csum_validate_entry(); 454 455 if (!pkt->isip4) { 456 trace_net_rx_pkt_l3_csum_validate_not_ip4(); 457 return false; 458 } 459 460 csl = pkt->l4hdr_off - pkt->l3hdr_off; 461 462 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, 463 pkt->l3hdr_off, 464 csl, 0); 465 466 csum = net_checksum_finish(cntr); 467 468 *csum_valid = (csum == 0); 469 470 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, 471 cntr, csum, *csum_valid); 472 473 return true; 474 } 475 476 static uint16_t 477 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) 478 { 479 uint32_t cntr; 480 uint16_t csum; 481 uint16_t csl; 482 uint32_t cso; 483 484 trace_net_rx_pkt_l4_csum_calc_entry(); 485 486 if (pkt->isip4) { 487 if (pkt->isudp) { 488 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 489 trace_net_rx_pkt_l4_csum_calc_ip4_udp(); 490 } else { 491 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - 492 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); 493 trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); 494 } 495 496 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, 497 csl, &cso); 498 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 499 } else { 500 if (pkt->isudp) { 501 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 502 trace_net_rx_pkt_l4_csum_calc_ip6_udp(); 503 } else { 504 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; 505 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; 506 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 507 508 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - 509 ip6opts_len; 510 trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); 511 } 512 513 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, 514 pkt->ip6hdr_info.l4proto, &cso); 515 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 516 } 517 518 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, 519 pkt->l4hdr_off, csl, cso); 520 521 csum = net_checksum_finish(cntr); 522 523 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); 524 525 return csum; 526 } 527 528 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) 529 { 530 uint16_t csum; 531 532 trace_net_rx_pkt_l4_csum_validate_entry(); 533 534 if (!pkt->istcp && !pkt->isudp) { 535 trace_net_rx_pkt_l4_csum_validate_not_xxp(); 536 return false; 537 } 538 539 if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { 540 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); 541 return false; 542 } 543 544 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 545 trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); 546 return false; 547 } 548 549 csum = _net_rx_pkt_calc_l4_csum(pkt); 550 551 *csum_valid = ((csum == 0) || (csum == 0xFFFF)); 552 553 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); 554 555 return true; 556 } 557 558 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) 559 { 560 uint16_t csum = 0; 561 uint32_t l4_cso; 562 563 trace_net_rx_pkt_l4_csum_fix_entry(); 564 565 if (pkt->istcp) { 566 l4_cso = offsetof(struct tcp_header, th_sum); 567 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); 568 } else if (pkt->isudp) { 569 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { 570 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); 571 return false; 572 } 573 l4_cso = offsetof(struct udp_header, uh_sum); 574 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); 575 } else { 576 trace_net_rx_pkt_l4_csum_fix_not_xxp(); 577 return false; 578 } 579 580 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 581 trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); 582 return false; 583 } 584 585 /* Set zero to checksum word */ 586 iov_from_buf(pkt->vec, pkt->vec_len, 587 pkt->l4hdr_off + l4_cso, 588 &csum, sizeof(csum)); 589 590 /* Calculate L4 checksum */ 591 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); 592 593 /* Set calculated checksum to checksum word */ 594 iov_from_buf(pkt->vec, pkt->vec_len, 595 pkt->l4hdr_off + l4_cso, 596 &csum, sizeof(csum)); 597 598 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); 599 600 return true; 601 } 602