1 /* 2 * QEMU TX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "qemu/crc32c.h" 20 #include "net/eth.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 #include "net/net.h" 24 #include "hw/pci/pci_device.h" 25 #include "net_tx_pkt.h" 26 27 enum { 28 NET_TX_PKT_VHDR_FRAG = 0, 29 NET_TX_PKT_L2HDR_FRAG, 30 NET_TX_PKT_L3HDR_FRAG, 31 NET_TX_PKT_PL_START_FRAG 32 }; 33 34 /* TX packet private context */ 35 struct NetTxPkt { 36 struct virtio_net_hdr virt_hdr; 37 38 struct iovec *raw; 39 uint32_t raw_frags; 40 uint32_t max_raw_frags; 41 42 struct iovec *vec; 43 44 struct { 45 struct eth_header eth; 46 struct vlan_header vlan[3]; 47 } l2_hdr; 48 union { 49 struct ip_header ip; 50 struct ip6_header ip6; 51 uint8_t octets[ETH_MAX_IP_DGRAM_LEN]; 52 } l3_hdr; 53 54 uint32_t payload_len; 55 56 uint32_t payload_frags; 57 uint32_t max_payload_frags; 58 59 uint16_t hdr_len; 60 eth_pkt_types_e packet_type; 61 uint8_t l4proto; 62 }; 63 64 void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags) 65 { 66 struct NetTxPkt *p = g_malloc0(sizeof *p); 67 68 p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG); 69 70 p->raw = g_new(struct iovec, max_frags); 71 72 p->max_payload_frags = max_frags; 73 p->max_raw_frags = max_frags; 74 p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; 75 p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof p->virt_hdr; 76 p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; 77 p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; 78 79 *pkt = p; 80 } 81 82 void net_tx_pkt_uninit(struct NetTxPkt *pkt) 83 { 84 if (pkt) { 85 g_free(pkt->vec); 86 g_free(pkt->raw); 87 g_free(pkt); 88 } 89 } 90 91 void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) 92 { 93 uint16_t csum; 94 assert(pkt); 95 96 pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len + 97 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); 98 99 pkt->l3_hdr.ip.ip_sum = 0; 100 csum = net_raw_checksum(pkt->l3_hdr.octets, 101 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); 102 pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum); 103 } 104 105 void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) 106 { 107 uint16_t csum; 108 uint32_t cntr, cso; 109 assert(pkt); 110 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 111 void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; 112 113 if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > 114 ETH_MAX_IP_DGRAM_LEN) { 115 return; 116 } 117 118 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || 119 gso_type == VIRTIO_NET_HDR_GSO_UDP) { 120 /* Calculate IP header checksum */ 121 net_tx_pkt_update_ip_hdr_checksum(pkt); 122 123 /* Calculate IP pseudo header checksum */ 124 cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); 125 csum = cpu_to_be16(~net_checksum_finish(cntr)); 126 } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { 127 /* Calculate IP pseudo header checksum */ 128 cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, 129 IP_PROTO_TCP, &cso); 130 csum = cpu_to_be16(~net_checksum_finish(cntr)); 131 } else { 132 return; 133 } 134 135 iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 136 pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); 137 } 138 139 bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt) 140 { 141 uint32_t csum = 0; 142 struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG; 143 144 if (iov_size(pl_start_frag, pkt->payload_frags) < 8 + sizeof(csum)) { 145 return false; 146 } 147 148 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { 149 return false; 150 } 151 152 csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags)); 153 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { 154 return false; 155 } 156 157 return true; 158 } 159 160 static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) 161 { 162 pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + 163 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; 164 } 165 166 static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) 167 { 168 struct iovec *l2_hdr, *l3_hdr; 169 size_t bytes_read; 170 size_t full_ip6hdr_len; 171 uint16_t l3_proto; 172 173 assert(pkt); 174 175 l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; 176 l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG]; 177 178 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, 179 ETH_MAX_L2_HDR_LEN); 180 if (bytes_read < sizeof(struct eth_header)) { 181 l2_hdr->iov_len = 0; 182 return false; 183 } 184 185 l2_hdr->iov_len = sizeof(struct eth_header); 186 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { 187 case ETH_P_VLAN: 188 l2_hdr->iov_len += sizeof(struct vlan_header); 189 break; 190 case ETH_P_DVLAN: 191 l2_hdr->iov_len += 2 * sizeof(struct vlan_header); 192 break; 193 } 194 195 if (bytes_read < l2_hdr->iov_len) { 196 l2_hdr->iov_len = 0; 197 l3_hdr->iov_len = 0; 198 pkt->packet_type = ETH_PKT_UCAST; 199 return false; 200 } else { 201 l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; 202 l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); 203 pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); 204 } 205 206 l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); 207 208 switch (l3_proto) { 209 case ETH_P_IP: 210 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 211 l3_hdr->iov_base, sizeof(struct ip_header)); 212 213 if (bytes_read < sizeof(struct ip_header)) { 214 l3_hdr->iov_len = 0; 215 return false; 216 } 217 218 l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); 219 220 if (l3_hdr->iov_len < sizeof(struct ip_header)) { 221 l3_hdr->iov_len = 0; 222 return false; 223 } 224 225 pkt->l4proto = IP_HDR_GET_P(l3_hdr->iov_base); 226 227 if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { 228 /* copy optional IPv4 header data if any*/ 229 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 230 l2_hdr->iov_len + sizeof(struct ip_header), 231 l3_hdr->iov_base + sizeof(struct ip_header), 232 l3_hdr->iov_len - sizeof(struct ip_header)); 233 if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { 234 l3_hdr->iov_len = 0; 235 return false; 236 } 237 } 238 239 break; 240 241 case ETH_P_IPV6: 242 { 243 eth_ip6_hdr_info hdrinfo; 244 245 if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 246 &hdrinfo)) { 247 l3_hdr->iov_len = 0; 248 return false; 249 } 250 251 pkt->l4proto = hdrinfo.l4proto; 252 full_ip6hdr_len = hdrinfo.full_hdr_len; 253 254 if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { 255 l3_hdr->iov_len = 0; 256 return false; 257 } 258 259 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 260 l3_hdr->iov_base, full_ip6hdr_len); 261 262 if (bytes_read < full_ip6hdr_len) { 263 l3_hdr->iov_len = 0; 264 return false; 265 } else { 266 l3_hdr->iov_len = full_ip6hdr_len; 267 } 268 break; 269 } 270 default: 271 l3_hdr->iov_len = 0; 272 break; 273 } 274 275 net_tx_pkt_calculate_hdr_len(pkt); 276 return true; 277 } 278 279 static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) 280 { 281 pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; 282 pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], 283 pkt->max_payload_frags, 284 pkt->raw, pkt->raw_frags, 285 pkt->hdr_len, pkt->payload_len); 286 } 287 288 bool net_tx_pkt_parse(struct NetTxPkt *pkt) 289 { 290 if (net_tx_pkt_parse_headers(pkt)) { 291 net_tx_pkt_rebuild_payload(pkt); 292 return true; 293 } else { 294 return false; 295 } 296 } 297 298 struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) 299 { 300 assert(pkt); 301 return &pkt->virt_hdr; 302 } 303 304 static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, 305 bool tso_enable) 306 { 307 uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; 308 uint16_t l3_proto; 309 310 l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, 311 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); 312 313 if (!tso_enable) { 314 goto func_exit; 315 } 316 317 rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, 318 pkt->l4proto); 319 320 func_exit: 321 return rc; 322 } 323 324 bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, 325 bool csum_enable, uint32_t gso_size) 326 { 327 struct tcp_hdr l4hdr; 328 size_t bytes_read; 329 assert(pkt); 330 331 /* csum has to be enabled if tso is. */ 332 assert(csum_enable || !tso_enable); 333 334 pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable); 335 336 switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 337 case VIRTIO_NET_HDR_GSO_NONE: 338 pkt->virt_hdr.hdr_len = 0; 339 pkt->virt_hdr.gso_size = 0; 340 break; 341 342 case VIRTIO_NET_HDR_GSO_UDP: 343 pkt->virt_hdr.gso_size = gso_size; 344 pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); 345 break; 346 347 case VIRTIO_NET_HDR_GSO_TCPV4: 348 case VIRTIO_NET_HDR_GSO_TCPV6: 349 bytes_read = iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], 350 pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr)); 351 if (bytes_read < sizeof(l4hdr) || 352 l4hdr.th_off * sizeof(uint32_t) < sizeof(l4hdr)) { 353 return false; 354 } 355 356 pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); 357 pkt->virt_hdr.gso_size = gso_size; 358 break; 359 360 default: 361 g_assert_not_reached(); 362 } 363 364 if (csum_enable) { 365 switch (pkt->l4proto) { 366 case IP_PROTO_TCP: 367 if (pkt->payload_len < sizeof(struct tcp_hdr)) { 368 return false; 369 } 370 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 371 pkt->virt_hdr.csum_start = pkt->hdr_len; 372 pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); 373 break; 374 case IP_PROTO_UDP: 375 if (pkt->payload_len < sizeof(struct udp_hdr)) { 376 return false; 377 } 378 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 379 pkt->virt_hdr.csum_start = pkt->hdr_len; 380 pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); 381 break; 382 default: 383 break; 384 } 385 } 386 387 return true; 388 } 389 390 void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, 391 uint16_t vlan, uint16_t vlan_ethtype) 392 { 393 assert(pkt); 394 395 eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, 396 &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, 397 vlan, vlan_ethtype); 398 399 pkt->hdr_len += sizeof(struct vlan_header); 400 } 401 402 bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len) 403 { 404 struct iovec *ventry; 405 assert(pkt); 406 407 if (pkt->raw_frags >= pkt->max_raw_frags) { 408 return false; 409 } 410 411 ventry = &pkt->raw[pkt->raw_frags]; 412 ventry->iov_base = base; 413 ventry->iov_len = len; 414 pkt->raw_frags++; 415 416 return true; 417 } 418 419 bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) 420 { 421 return pkt->raw_frags > 0; 422 } 423 424 eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) 425 { 426 assert(pkt); 427 428 return pkt->packet_type; 429 } 430 431 size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt) 432 { 433 assert(pkt); 434 435 return pkt->hdr_len + pkt->payload_len; 436 } 437 438 void net_tx_pkt_dump(struct NetTxPkt *pkt) 439 { 440 #ifdef NET_TX_PKT_DEBUG 441 assert(pkt); 442 443 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " 444 "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, 445 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, 446 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); 447 #endif 448 } 449 450 void net_tx_pkt_reset(struct NetTxPkt *pkt, 451 NetTxPktFreeFrag callback, void *context) 452 { 453 int i; 454 455 /* no assert, as reset can be called before tx_pkt_init */ 456 if (!pkt) { 457 return; 458 } 459 460 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); 461 462 assert(pkt->vec); 463 464 pkt->payload_len = 0; 465 pkt->payload_frags = 0; 466 467 if (pkt->max_raw_frags > 0) { 468 assert(pkt->raw); 469 for (i = 0; i < pkt->raw_frags; i++) { 470 assert(pkt->raw[i].iov_base); 471 callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len); 472 } 473 } 474 pkt->raw_frags = 0; 475 476 pkt->hdr_len = 0; 477 pkt->l4proto = 0; 478 } 479 480 void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len) 481 { 482 pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0); 483 } 484 485 bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev, 486 dma_addr_t pa, size_t len) 487 { 488 dma_addr_t mapped_len = len; 489 void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE); 490 if (!base) { 491 return false; 492 } 493 494 if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) { 495 net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len); 496 return false; 497 } 498 499 return true; 500 } 501 502 static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt, 503 struct iovec *iov, uint32_t iov_len, 504 uint16_t csl) 505 { 506 uint32_t csum_cntr; 507 uint16_t csum = 0; 508 uint32_t cso; 509 /* num of iovec without vhdr */ 510 size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; 511 uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len); 512 513 /* Put zero to checksum field */ 514 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); 515 516 /* Calculate L4 TCP/UDP checksum */ 517 csum_cntr = 0; 518 cso = 0; 519 /* add pseudo header to csum */ 520 if (l3_proto == ETH_P_IP) { 521 csum_cntr = eth_calc_ip4_pseudo_hdr_csum( 522 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, 523 csl, &cso); 524 } else if (l3_proto == ETH_P_IPV6) { 525 csum_cntr = eth_calc_ip6_pseudo_hdr_csum( 526 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, 527 csl, pkt->l4proto, &cso); 528 } 529 530 /* data checksum */ 531 csum_cntr += 532 net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); 533 534 /* Put the checksum obtained into the packet */ 535 csum = cpu_to_be16(net_checksum_finish_nozero(csum_cntr)); 536 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); 537 } 538 539 #define NET_MAX_FRAG_SG_LIST (64) 540 541 static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, 542 int *src_idx, size_t *src_offset, size_t src_len, 543 struct iovec *dst, int *dst_idx) 544 { 545 size_t fetched = 0; 546 struct iovec *src = pkt->vec; 547 548 while (fetched < src_len) { 549 550 /* no more place in fragment iov */ 551 if (*dst_idx == NET_MAX_FRAG_SG_LIST) { 552 break; 553 } 554 555 /* no more data in iovec */ 556 if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) { 557 break; 558 } 559 560 561 dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; 562 dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, 563 src_len - fetched); 564 565 *src_offset += dst[*dst_idx].iov_len; 566 fetched += dst[*dst_idx].iov_len; 567 568 if (*src_offset == src[*src_idx].iov_len) { 569 *src_offset = 0; 570 (*src_idx)++; 571 } 572 573 (*dst_idx)++; 574 } 575 576 return fetched; 577 } 578 579 static void net_tx_pkt_sendv( 580 void *opaque, const struct iovec *iov, int iov_cnt, 581 const struct iovec *virt_iov, int virt_iov_cnt) 582 { 583 NetClientState *nc = opaque; 584 585 if (qemu_get_using_vnet_hdr(nc->peer)) { 586 qemu_sendv_packet(nc, virt_iov, virt_iov_cnt); 587 } else { 588 qemu_sendv_packet(nc, iov, iov_cnt); 589 } 590 } 591 592 static bool net_tx_pkt_tcp_fragment_init(struct NetTxPkt *pkt, 593 struct iovec *fragment, 594 int *pl_idx, 595 size_t *l4hdr_len, 596 int *src_idx, 597 size_t *src_offset, 598 size_t *src_len) 599 { 600 struct iovec *l4 = fragment + NET_TX_PKT_PL_START_FRAG; 601 size_t bytes_read = 0; 602 struct tcp_hdr *th; 603 604 if (!pkt->payload_frags) { 605 return false; 606 } 607 608 l4->iov_len = pkt->virt_hdr.hdr_len - pkt->hdr_len; 609 l4->iov_base = g_malloc(l4->iov_len); 610 611 *src_idx = NET_TX_PKT_PL_START_FRAG; 612 while (pkt->vec[*src_idx].iov_len < l4->iov_len - bytes_read) { 613 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base, 614 pkt->vec[*src_idx].iov_len); 615 616 bytes_read += pkt->vec[*src_idx].iov_len; 617 618 (*src_idx)++; 619 if (*src_idx >= pkt->payload_frags + NET_TX_PKT_PL_START_FRAG) { 620 g_free(l4->iov_base); 621 return false; 622 } 623 } 624 625 *src_offset = l4->iov_len - bytes_read; 626 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base, 627 *src_offset); 628 629 th = l4->iov_base; 630 th->th_flags &= ~(TH_FIN | TH_PUSH); 631 632 *pl_idx = NET_TX_PKT_PL_START_FRAG + 1; 633 *l4hdr_len = l4->iov_len; 634 *src_len = pkt->virt_hdr.gso_size; 635 636 return true; 637 } 638 639 static void net_tx_pkt_tcp_fragment_deinit(struct iovec *fragment) 640 { 641 g_free(fragment[NET_TX_PKT_PL_START_FRAG].iov_base); 642 } 643 644 static void net_tx_pkt_tcp_fragment_fix(struct NetTxPkt *pkt, 645 struct iovec *fragment, 646 size_t fragment_len, 647 uint8_t gso_type) 648 { 649 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG; 650 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG; 651 struct ip_header *ip = l3hdr->iov_base; 652 struct ip6_header *ip6 = l3hdr->iov_base; 653 size_t len = l3hdr->iov_len + l4hdr->iov_len + fragment_len; 654 655 switch (gso_type) { 656 case VIRTIO_NET_HDR_GSO_TCPV4: 657 ip->ip_len = cpu_to_be16(len); 658 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len); 659 break; 660 661 case VIRTIO_NET_HDR_GSO_TCPV6: 662 len -= sizeof(struct ip6_header); 663 ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = cpu_to_be16(len); 664 break; 665 } 666 } 667 668 static void net_tx_pkt_tcp_fragment_advance(struct NetTxPkt *pkt, 669 struct iovec *fragment, 670 size_t fragment_len, 671 uint8_t gso_type) 672 { 673 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG; 674 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG; 675 struct ip_header *ip = l3hdr->iov_base; 676 struct tcp_hdr *th = l4hdr->iov_base; 677 678 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4) { 679 ip->ip_id = cpu_to_be16(be16_to_cpu(ip->ip_id) + 1); 680 } 681 682 th->th_seq = cpu_to_be32(be32_to_cpu(th->th_seq) + fragment_len); 683 th->th_flags &= ~TH_CWR; 684 } 685 686 static void net_tx_pkt_udp_fragment_init(struct NetTxPkt *pkt, 687 int *pl_idx, 688 size_t *l4hdr_len, 689 int *src_idx, size_t *src_offset, 690 size_t *src_len) 691 { 692 *pl_idx = NET_TX_PKT_PL_START_FRAG; 693 *l4hdr_len = 0; 694 *src_idx = NET_TX_PKT_PL_START_FRAG; 695 *src_offset = 0; 696 *src_len = IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size); 697 } 698 699 static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt, 700 struct iovec *fragment, 701 size_t fragment_offset, 702 size_t fragment_len) 703 { 704 bool more_frags = fragment_offset + fragment_len < pkt->payload_len; 705 uint16_t orig_flags; 706 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG; 707 struct ip_header *ip = l3hdr->iov_base; 708 uint16_t frag_off_units = fragment_offset / IP_FRAG_UNIT_SIZE; 709 uint16_t new_ip_off; 710 711 assert(fragment_offset % IP_FRAG_UNIT_SIZE == 0); 712 assert((frag_off_units & ~IP_OFFMASK) == 0); 713 714 orig_flags = be16_to_cpu(ip->ip_off) & ~(IP_OFFMASK | IP_MF); 715 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0); 716 ip->ip_off = cpu_to_be16(new_ip_off); 717 ip->ip_len = cpu_to_be16(l3hdr->iov_len + fragment_len); 718 719 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len); 720 } 721 722 static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, 723 NetTxPktSend callback, 724 void *context) 725 { 726 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 727 728 struct iovec fragment[NET_MAX_FRAG_SG_LIST]; 729 size_t fragment_len; 730 size_t l4hdr_len; 731 size_t src_len; 732 733 int src_idx, dst_idx, pl_idx; 734 size_t src_offset; 735 size_t fragment_offset = 0; 736 struct virtio_net_hdr virt_hdr = { 737 .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ? 738 VIRTIO_NET_HDR_F_DATA_VALID : 0 739 }; 740 741 /* Copy headers */ 742 fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr; 743 fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr); 744 fragment[NET_TX_PKT_L2HDR_FRAG] = pkt->vec[NET_TX_PKT_L2HDR_FRAG]; 745 fragment[NET_TX_PKT_L3HDR_FRAG] = pkt->vec[NET_TX_PKT_L3HDR_FRAG]; 746 747 switch (gso_type) { 748 case VIRTIO_NET_HDR_GSO_TCPV4: 749 case VIRTIO_NET_HDR_GSO_TCPV6: 750 if (!net_tx_pkt_tcp_fragment_init(pkt, fragment, &pl_idx, &l4hdr_len, 751 &src_idx, &src_offset, &src_len)) { 752 return false; 753 } 754 break; 755 756 case VIRTIO_NET_HDR_GSO_UDP: 757 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG], 758 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1, 759 pkt->payload_len); 760 net_tx_pkt_udp_fragment_init(pkt, &pl_idx, &l4hdr_len, 761 &src_idx, &src_offset, &src_len); 762 break; 763 764 default: 765 abort(); 766 } 767 768 /* Put as much data as possible and send */ 769 while (true) { 770 dst_idx = pl_idx; 771 fragment_len = net_tx_pkt_fetch_fragment(pkt, 772 &src_idx, &src_offset, src_len, fragment, &dst_idx); 773 if (!fragment_len) { 774 break; 775 } 776 777 switch (gso_type) { 778 case VIRTIO_NET_HDR_GSO_TCPV4: 779 case VIRTIO_NET_HDR_GSO_TCPV6: 780 net_tx_pkt_tcp_fragment_fix(pkt, fragment, fragment_len, gso_type); 781 net_tx_pkt_do_sw_csum(pkt, fragment + NET_TX_PKT_L2HDR_FRAG, 782 dst_idx - NET_TX_PKT_L2HDR_FRAG, 783 l4hdr_len + fragment_len); 784 break; 785 786 case VIRTIO_NET_HDR_GSO_UDP: 787 net_tx_pkt_udp_fragment_fix(pkt, fragment, fragment_offset, 788 fragment_len); 789 break; 790 } 791 792 callback(context, 793 fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG, 794 fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG); 795 796 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || 797 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { 798 net_tx_pkt_tcp_fragment_advance(pkt, fragment, fragment_len, 799 gso_type); 800 } 801 802 fragment_offset += fragment_len; 803 } 804 805 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || 806 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { 807 net_tx_pkt_tcp_fragment_deinit(fragment); 808 } 809 810 return true; 811 } 812 813 bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) 814 { 815 bool offload = qemu_get_using_vnet_hdr(nc->peer); 816 return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc); 817 } 818 819 bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload, 820 NetTxPktSend callback, void *context) 821 { 822 assert(pkt); 823 824 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 825 826 /* 827 * Since underlying infrastructure does not support IP datagrams longer 828 * than 64K we should drop such packets and don't even try to send 829 */ 830 if (VIRTIO_NET_HDR_GSO_NONE != gso_type) { 831 if (pkt->payload_len > 832 ETH_MAX_IP_DGRAM_LEN - 833 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { 834 return false; 835 } 836 } 837 838 if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) { 839 if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 840 pkt->virt_hdr.flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 841 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG], 842 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1, 843 pkt->payload_len); 844 } 845 846 net_tx_pkt_fix_ip6_payload_len(pkt); 847 callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG, 848 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG, 849 pkt->vec + NET_TX_PKT_VHDR_FRAG, 850 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG); 851 return true; 852 } 853 854 return net_tx_pkt_do_sw_fragmentation(pkt, callback, context); 855 } 856 857 void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt) 858 { 859 struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; 860 if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) { 861 /* 862 * TODO: if qemu would support >64K packets - add jumbo option check 863 * something like that: 864 * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {' 865 */ 866 if (pkt->l3_hdr.ip6.ip6_plen == 0) { 867 if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) { 868 pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len); 869 } 870 /* 871 * TODO: if qemu would support >64K packets 872 * add jumbo option for packets greater then 65,535 bytes 873 */ 874 } 875 } 876 } 877