1 /* 2 * QEMU TX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "qemu/crc32c.h" 20 #include "net/eth.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 #include "net/net.h" 24 #include "hw/pci/pci_device.h" 25 #include "net_tx_pkt.h" 26 27 enum { 28 NET_TX_PKT_VHDR_FRAG = 0, 29 NET_TX_PKT_L2HDR_FRAG, 30 NET_TX_PKT_L3HDR_FRAG, 31 NET_TX_PKT_PL_START_FRAG 32 }; 33 34 /* TX packet private context */ 35 struct NetTxPkt { 36 struct virtio_net_hdr virt_hdr; 37 38 struct iovec *raw; 39 uint32_t raw_frags; 40 uint32_t max_raw_frags; 41 42 struct iovec *vec; 43 44 struct { 45 struct eth_header eth; 46 struct vlan_header vlan[3]; 47 } l2_hdr; 48 union { 49 struct ip_header ip; 50 struct ip6_header ip6; 51 uint8_t octets[ETH_MAX_IP_DGRAM_LEN]; 52 } l3_hdr; 53 54 uint32_t payload_len; 55 56 uint32_t payload_frags; 57 uint32_t max_payload_frags; 58 59 uint16_t hdr_len; 60 eth_pkt_types_e packet_type; 61 uint8_t l4proto; 62 }; 63 64 void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags) 65 { 66 struct NetTxPkt *p = g_malloc0(sizeof *p); 67 68 p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG); 69 70 p->raw = g_new(struct iovec, max_frags); 71 72 p->max_payload_frags = max_frags; 73 p->max_raw_frags = max_frags; 74 p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; 75 p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof p->virt_hdr; 76 p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; 77 p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; 78 79 *pkt = p; 80 } 81 82 void net_tx_pkt_uninit(struct NetTxPkt *pkt) 83 { 84 if (pkt) { 85 g_free(pkt->vec); 86 g_free(pkt->raw); 87 g_free(pkt); 88 } 89 } 90 91 void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) 92 { 93 uint16_t csum; 94 assert(pkt); 95 96 pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len + 97 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); 98 99 pkt->l3_hdr.ip.ip_sum = 0; 100 csum = net_raw_checksum(pkt->l3_hdr.octets, 101 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); 102 pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum); 103 } 104 105 void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) 106 { 107 uint16_t csum; 108 uint32_t cntr, cso; 109 assert(pkt); 110 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 111 void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; 112 113 if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > 114 ETH_MAX_IP_DGRAM_LEN) { 115 return; 116 } 117 118 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || 119 gso_type == VIRTIO_NET_HDR_GSO_UDP) { 120 /* Calculate IP header checksum */ 121 net_tx_pkt_update_ip_hdr_checksum(pkt); 122 123 /* Calculate IP pseudo header checksum */ 124 cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); 125 csum = cpu_to_be16(~net_checksum_finish(cntr)); 126 } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { 127 /* Calculate IP pseudo header checksum */ 128 cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, 129 IP_PROTO_TCP, &cso); 130 csum = cpu_to_be16(~net_checksum_finish(cntr)); 131 } else { 132 return; 133 } 134 135 iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 136 pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); 137 } 138 139 bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt) 140 { 141 uint32_t csum = 0; 142 struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG; 143 144 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { 145 return false; 146 } 147 148 csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags)); 149 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { 150 return false; 151 } 152 153 return true; 154 } 155 156 static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) 157 { 158 pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + 159 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; 160 } 161 162 static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) 163 { 164 struct iovec *l2_hdr, *l3_hdr; 165 size_t bytes_read; 166 size_t full_ip6hdr_len; 167 uint16_t l3_proto; 168 169 assert(pkt); 170 171 l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; 172 l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG]; 173 174 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, 175 ETH_MAX_L2_HDR_LEN); 176 if (bytes_read < sizeof(struct eth_header)) { 177 l2_hdr->iov_len = 0; 178 return false; 179 } 180 181 l2_hdr->iov_len = sizeof(struct eth_header); 182 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { 183 case ETH_P_VLAN: 184 l2_hdr->iov_len += sizeof(struct vlan_header); 185 break; 186 case ETH_P_DVLAN: 187 l2_hdr->iov_len += 2 * sizeof(struct vlan_header); 188 break; 189 } 190 191 if (bytes_read < l2_hdr->iov_len) { 192 l2_hdr->iov_len = 0; 193 l3_hdr->iov_len = 0; 194 pkt->packet_type = ETH_PKT_UCAST; 195 return false; 196 } else { 197 l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; 198 l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); 199 pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); 200 } 201 202 l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); 203 204 switch (l3_proto) { 205 case ETH_P_IP: 206 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 207 l3_hdr->iov_base, sizeof(struct ip_header)); 208 209 if (bytes_read < sizeof(struct ip_header)) { 210 l3_hdr->iov_len = 0; 211 return false; 212 } 213 214 l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); 215 216 if (l3_hdr->iov_len < sizeof(struct ip_header)) { 217 l3_hdr->iov_len = 0; 218 return false; 219 } 220 221 pkt->l4proto = IP_HDR_GET_P(l3_hdr->iov_base); 222 223 if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { 224 /* copy optional IPv4 header data if any*/ 225 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 226 l2_hdr->iov_len + sizeof(struct ip_header), 227 l3_hdr->iov_base + sizeof(struct ip_header), 228 l3_hdr->iov_len - sizeof(struct ip_header)); 229 if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { 230 l3_hdr->iov_len = 0; 231 return false; 232 } 233 } 234 235 break; 236 237 case ETH_P_IPV6: 238 { 239 eth_ip6_hdr_info hdrinfo; 240 241 if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 242 &hdrinfo)) { 243 l3_hdr->iov_len = 0; 244 return false; 245 } 246 247 pkt->l4proto = hdrinfo.l4proto; 248 full_ip6hdr_len = hdrinfo.full_hdr_len; 249 250 if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { 251 l3_hdr->iov_len = 0; 252 return false; 253 } 254 255 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 256 l3_hdr->iov_base, full_ip6hdr_len); 257 258 if (bytes_read < full_ip6hdr_len) { 259 l3_hdr->iov_len = 0; 260 return false; 261 } else { 262 l3_hdr->iov_len = full_ip6hdr_len; 263 } 264 break; 265 } 266 default: 267 l3_hdr->iov_len = 0; 268 break; 269 } 270 271 net_tx_pkt_calculate_hdr_len(pkt); 272 return true; 273 } 274 275 static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) 276 { 277 pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; 278 pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], 279 pkt->max_payload_frags, 280 pkt->raw, pkt->raw_frags, 281 pkt->hdr_len, pkt->payload_len); 282 } 283 284 bool net_tx_pkt_parse(struct NetTxPkt *pkt) 285 { 286 if (net_tx_pkt_parse_headers(pkt)) { 287 net_tx_pkt_rebuild_payload(pkt); 288 return true; 289 } else { 290 return false; 291 } 292 } 293 294 struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) 295 { 296 assert(pkt); 297 return &pkt->virt_hdr; 298 } 299 300 static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, 301 bool tso_enable) 302 { 303 uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; 304 uint16_t l3_proto; 305 306 l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, 307 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); 308 309 if (!tso_enable) { 310 goto func_exit; 311 } 312 313 rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, 314 pkt->l4proto); 315 316 func_exit: 317 return rc; 318 } 319 320 bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, 321 bool csum_enable, uint32_t gso_size) 322 { 323 struct tcp_hdr l4hdr; 324 size_t bytes_read; 325 assert(pkt); 326 327 /* csum has to be enabled if tso is. */ 328 assert(csum_enable || !tso_enable); 329 330 pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable); 331 332 switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 333 case VIRTIO_NET_HDR_GSO_NONE: 334 pkt->virt_hdr.hdr_len = 0; 335 pkt->virt_hdr.gso_size = 0; 336 break; 337 338 case VIRTIO_NET_HDR_GSO_UDP: 339 pkt->virt_hdr.gso_size = gso_size; 340 pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); 341 break; 342 343 case VIRTIO_NET_HDR_GSO_TCPV4: 344 case VIRTIO_NET_HDR_GSO_TCPV6: 345 bytes_read = iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], 346 pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr)); 347 if (bytes_read < sizeof(l4hdr) || 348 l4hdr.th_off * sizeof(uint32_t) < sizeof(l4hdr)) { 349 return false; 350 } 351 352 pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); 353 pkt->virt_hdr.gso_size = gso_size; 354 break; 355 356 default: 357 g_assert_not_reached(); 358 } 359 360 if (csum_enable) { 361 switch (pkt->l4proto) { 362 case IP_PROTO_TCP: 363 if (pkt->payload_len < sizeof(struct tcp_hdr)) { 364 return false; 365 } 366 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 367 pkt->virt_hdr.csum_start = pkt->hdr_len; 368 pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); 369 break; 370 case IP_PROTO_UDP: 371 if (pkt->payload_len < sizeof(struct udp_hdr)) { 372 return false; 373 } 374 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 375 pkt->virt_hdr.csum_start = pkt->hdr_len; 376 pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); 377 break; 378 default: 379 break; 380 } 381 } 382 383 return true; 384 } 385 386 void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, 387 uint16_t vlan, uint16_t vlan_ethtype) 388 { 389 assert(pkt); 390 391 eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, 392 &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, 393 vlan, vlan_ethtype); 394 395 pkt->hdr_len += sizeof(struct vlan_header); 396 } 397 398 bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len) 399 { 400 struct iovec *ventry; 401 assert(pkt); 402 403 if (pkt->raw_frags >= pkt->max_raw_frags) { 404 return false; 405 } 406 407 ventry = &pkt->raw[pkt->raw_frags]; 408 ventry->iov_base = base; 409 ventry->iov_len = len; 410 pkt->raw_frags++; 411 412 return true; 413 } 414 415 bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) 416 { 417 return pkt->raw_frags > 0; 418 } 419 420 eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) 421 { 422 assert(pkt); 423 424 return pkt->packet_type; 425 } 426 427 size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt) 428 { 429 assert(pkt); 430 431 return pkt->hdr_len + pkt->payload_len; 432 } 433 434 void net_tx_pkt_dump(struct NetTxPkt *pkt) 435 { 436 #ifdef NET_TX_PKT_DEBUG 437 assert(pkt); 438 439 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " 440 "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, 441 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, 442 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); 443 #endif 444 } 445 446 void net_tx_pkt_reset(struct NetTxPkt *pkt, 447 NetTxPktFreeFrag callback, void *context) 448 { 449 int i; 450 451 /* no assert, as reset can be called before tx_pkt_init */ 452 if (!pkt) { 453 return; 454 } 455 456 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); 457 458 assert(pkt->vec); 459 460 pkt->payload_len = 0; 461 pkt->payload_frags = 0; 462 463 if (pkt->max_raw_frags > 0) { 464 assert(pkt->raw); 465 for (i = 0; i < pkt->raw_frags; i++) { 466 assert(pkt->raw[i].iov_base); 467 callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len); 468 } 469 } 470 pkt->raw_frags = 0; 471 472 pkt->hdr_len = 0; 473 pkt->l4proto = 0; 474 } 475 476 void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len) 477 { 478 pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0); 479 } 480 481 bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev, 482 dma_addr_t pa, size_t len) 483 { 484 dma_addr_t mapped_len = len; 485 void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE); 486 if (!base) { 487 return false; 488 } 489 490 if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) { 491 net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len); 492 return false; 493 } 494 495 return true; 496 } 497 498 static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt, 499 struct iovec *iov, uint32_t iov_len, 500 uint16_t csl) 501 { 502 uint32_t csum_cntr; 503 uint16_t csum = 0; 504 uint32_t cso; 505 /* num of iovec without vhdr */ 506 size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; 507 uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len); 508 509 /* Put zero to checksum field */ 510 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); 511 512 /* Calculate L4 TCP/UDP checksum */ 513 csum_cntr = 0; 514 cso = 0; 515 /* add pseudo header to csum */ 516 if (l3_proto == ETH_P_IP) { 517 csum_cntr = eth_calc_ip4_pseudo_hdr_csum( 518 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, 519 csl, &cso); 520 } else if (l3_proto == ETH_P_IPV6) { 521 csum_cntr = eth_calc_ip6_pseudo_hdr_csum( 522 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, 523 csl, pkt->l4proto, &cso); 524 } 525 526 /* data checksum */ 527 csum_cntr += 528 net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); 529 530 /* Put the checksum obtained into the packet */ 531 csum = cpu_to_be16(net_checksum_finish_nozero(csum_cntr)); 532 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); 533 } 534 535 #define NET_MAX_FRAG_SG_LIST (64) 536 537 static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, 538 int *src_idx, size_t *src_offset, size_t src_len, 539 struct iovec *dst, int *dst_idx) 540 { 541 size_t fetched = 0; 542 struct iovec *src = pkt->vec; 543 544 while (fetched < src_len) { 545 546 /* no more place in fragment iov */ 547 if (*dst_idx == NET_MAX_FRAG_SG_LIST) { 548 break; 549 } 550 551 /* no more data in iovec */ 552 if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) { 553 break; 554 } 555 556 557 dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; 558 dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, 559 src_len - fetched); 560 561 *src_offset += dst[*dst_idx].iov_len; 562 fetched += dst[*dst_idx].iov_len; 563 564 if (*src_offset == src[*src_idx].iov_len) { 565 *src_offset = 0; 566 (*src_idx)++; 567 } 568 569 (*dst_idx)++; 570 } 571 572 return fetched; 573 } 574 575 static void net_tx_pkt_sendv( 576 void *opaque, const struct iovec *iov, int iov_cnt, 577 const struct iovec *virt_iov, int virt_iov_cnt) 578 { 579 NetClientState *nc = opaque; 580 581 if (qemu_get_using_vnet_hdr(nc->peer)) { 582 qemu_sendv_packet(nc, virt_iov, virt_iov_cnt); 583 } else { 584 qemu_sendv_packet(nc, iov, iov_cnt); 585 } 586 } 587 588 static bool net_tx_pkt_tcp_fragment_init(struct NetTxPkt *pkt, 589 struct iovec *fragment, 590 int *pl_idx, 591 size_t *l4hdr_len, 592 int *src_idx, 593 size_t *src_offset, 594 size_t *src_len) 595 { 596 struct iovec *l4 = fragment + NET_TX_PKT_PL_START_FRAG; 597 size_t bytes_read = 0; 598 struct tcp_hdr *th; 599 600 if (!pkt->payload_frags) { 601 return false; 602 } 603 604 l4->iov_len = pkt->virt_hdr.hdr_len - pkt->hdr_len; 605 l4->iov_base = g_malloc(l4->iov_len); 606 607 *src_idx = NET_TX_PKT_PL_START_FRAG; 608 while (pkt->vec[*src_idx].iov_len < l4->iov_len - bytes_read) { 609 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base, 610 pkt->vec[*src_idx].iov_len); 611 612 bytes_read += pkt->vec[*src_idx].iov_len; 613 614 (*src_idx)++; 615 if (*src_idx >= pkt->payload_frags + NET_TX_PKT_PL_START_FRAG) { 616 g_free(l4->iov_base); 617 return false; 618 } 619 } 620 621 *src_offset = l4->iov_len - bytes_read; 622 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base, 623 *src_offset); 624 625 th = l4->iov_base; 626 th->th_flags &= ~(TH_FIN | TH_PUSH); 627 628 *pl_idx = NET_TX_PKT_PL_START_FRAG + 1; 629 *l4hdr_len = l4->iov_len; 630 *src_len = pkt->virt_hdr.gso_size; 631 632 return true; 633 } 634 635 static void net_tx_pkt_tcp_fragment_deinit(struct iovec *fragment) 636 { 637 g_free(fragment[NET_TX_PKT_PL_START_FRAG].iov_base); 638 } 639 640 static void net_tx_pkt_tcp_fragment_fix(struct NetTxPkt *pkt, 641 struct iovec *fragment, 642 size_t fragment_len, 643 uint8_t gso_type) 644 { 645 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG; 646 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG; 647 struct ip_header *ip = l3hdr->iov_base; 648 struct ip6_header *ip6 = l3hdr->iov_base; 649 size_t len = l3hdr->iov_len + l4hdr->iov_len + fragment_len; 650 651 switch (gso_type) { 652 case VIRTIO_NET_HDR_GSO_TCPV4: 653 ip->ip_len = cpu_to_be16(len); 654 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len); 655 break; 656 657 case VIRTIO_NET_HDR_GSO_TCPV6: 658 len -= sizeof(struct ip6_header); 659 ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = cpu_to_be16(len); 660 break; 661 } 662 } 663 664 static void net_tx_pkt_tcp_fragment_advance(struct NetTxPkt *pkt, 665 struct iovec *fragment, 666 size_t fragment_len, 667 uint8_t gso_type) 668 { 669 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG; 670 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG; 671 struct ip_header *ip = l3hdr->iov_base; 672 struct tcp_hdr *th = l4hdr->iov_base; 673 674 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4) { 675 ip->ip_id = cpu_to_be16(be16_to_cpu(ip->ip_id) + 1); 676 } 677 678 th->th_seq = cpu_to_be32(be32_to_cpu(th->th_seq) + fragment_len); 679 th->th_flags &= ~TH_CWR; 680 } 681 682 static void net_tx_pkt_udp_fragment_init(struct NetTxPkt *pkt, 683 int *pl_idx, 684 size_t *l4hdr_len, 685 int *src_idx, size_t *src_offset, 686 size_t *src_len) 687 { 688 *pl_idx = NET_TX_PKT_PL_START_FRAG; 689 *l4hdr_len = 0; 690 *src_idx = NET_TX_PKT_PL_START_FRAG; 691 *src_offset = 0; 692 *src_len = IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size); 693 } 694 695 static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt, 696 struct iovec *fragment, 697 size_t fragment_offset, 698 size_t fragment_len) 699 { 700 bool more_frags = fragment_offset + fragment_len < pkt->payload_len; 701 uint16_t orig_flags; 702 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG; 703 struct ip_header *ip = l3hdr->iov_base; 704 uint16_t frag_off_units = fragment_offset / IP_FRAG_UNIT_SIZE; 705 uint16_t new_ip_off; 706 707 assert(fragment_offset % IP_FRAG_UNIT_SIZE == 0); 708 assert((frag_off_units & ~IP_OFFMASK) == 0); 709 710 orig_flags = be16_to_cpu(ip->ip_off) & ~(IP_OFFMASK | IP_MF); 711 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0); 712 ip->ip_off = cpu_to_be16(new_ip_off); 713 ip->ip_len = cpu_to_be16(l3hdr->iov_len + fragment_len); 714 715 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len); 716 } 717 718 static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, 719 NetTxPktSend callback, 720 void *context) 721 { 722 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 723 724 struct iovec fragment[NET_MAX_FRAG_SG_LIST]; 725 size_t fragment_len; 726 size_t l4hdr_len; 727 size_t src_len; 728 729 int src_idx, dst_idx, pl_idx; 730 size_t src_offset; 731 size_t fragment_offset = 0; 732 struct virtio_net_hdr virt_hdr = { 733 .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ? 734 VIRTIO_NET_HDR_F_DATA_VALID : 0 735 }; 736 737 /* Copy headers */ 738 fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr; 739 fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr); 740 fragment[NET_TX_PKT_L2HDR_FRAG] = pkt->vec[NET_TX_PKT_L2HDR_FRAG]; 741 fragment[NET_TX_PKT_L3HDR_FRAG] = pkt->vec[NET_TX_PKT_L3HDR_FRAG]; 742 743 switch (gso_type) { 744 case VIRTIO_NET_HDR_GSO_TCPV4: 745 case VIRTIO_NET_HDR_GSO_TCPV6: 746 if (!net_tx_pkt_tcp_fragment_init(pkt, fragment, &pl_idx, &l4hdr_len, 747 &src_idx, &src_offset, &src_len)) { 748 return false; 749 } 750 break; 751 752 case VIRTIO_NET_HDR_GSO_UDP: 753 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG], 754 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1, 755 pkt->payload_len); 756 net_tx_pkt_udp_fragment_init(pkt, &pl_idx, &l4hdr_len, 757 &src_idx, &src_offset, &src_len); 758 break; 759 760 default: 761 abort(); 762 } 763 764 /* Put as much data as possible and send */ 765 while (true) { 766 dst_idx = pl_idx; 767 fragment_len = net_tx_pkt_fetch_fragment(pkt, 768 &src_idx, &src_offset, src_len, fragment, &dst_idx); 769 if (!fragment_len) { 770 break; 771 } 772 773 switch (gso_type) { 774 case VIRTIO_NET_HDR_GSO_TCPV4: 775 case VIRTIO_NET_HDR_GSO_TCPV6: 776 net_tx_pkt_tcp_fragment_fix(pkt, fragment, fragment_len, gso_type); 777 net_tx_pkt_do_sw_csum(pkt, fragment + NET_TX_PKT_L2HDR_FRAG, 778 dst_idx - NET_TX_PKT_L2HDR_FRAG, 779 l4hdr_len + fragment_len); 780 break; 781 782 case VIRTIO_NET_HDR_GSO_UDP: 783 net_tx_pkt_udp_fragment_fix(pkt, fragment, fragment_offset, 784 fragment_len); 785 break; 786 } 787 788 callback(context, 789 fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG, 790 fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG); 791 792 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || 793 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { 794 net_tx_pkt_tcp_fragment_advance(pkt, fragment, fragment_len, 795 gso_type); 796 } 797 798 fragment_offset += fragment_len; 799 } 800 801 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || 802 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { 803 net_tx_pkt_tcp_fragment_deinit(fragment); 804 } 805 806 return true; 807 } 808 809 bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) 810 { 811 bool offload = qemu_get_using_vnet_hdr(nc->peer); 812 return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc); 813 } 814 815 bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload, 816 NetTxPktSend callback, void *context) 817 { 818 assert(pkt); 819 820 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 821 822 /* 823 * Since underlying infrastructure does not support IP datagrams longer 824 * than 64K we should drop such packets and don't even try to send 825 */ 826 if (VIRTIO_NET_HDR_GSO_NONE != gso_type) { 827 if (pkt->payload_len > 828 ETH_MAX_IP_DGRAM_LEN - 829 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { 830 return false; 831 } 832 } 833 834 if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) { 835 if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 836 pkt->virt_hdr.flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 837 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG], 838 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1, 839 pkt->payload_len); 840 } 841 842 net_tx_pkt_fix_ip6_payload_len(pkt); 843 callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG, 844 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG, 845 pkt->vec + NET_TX_PKT_VHDR_FRAG, 846 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG); 847 return true; 848 } 849 850 return net_tx_pkt_do_sw_fragmentation(pkt, callback, context); 851 } 852 853 void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt) 854 { 855 struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; 856 if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) { 857 /* 858 * TODO: if qemu would support >64K packets - add jumbo option check 859 * something like that: 860 * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {' 861 */ 862 if (pkt->l3_hdr.ip6.ip6_plen == 0) { 863 if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) { 864 pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len); 865 } 866 /* 867 * TODO: if qemu would support >64K packets 868 * add jumbo option for packets greater then 65,535 bytes 869 */ 870 } 871 } 872 } 873