1 /*
2 * QEMU TX packets abstractions
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
18 #include "qemu/osdep.h"
19 #include "qemu/crc32c.h"
20 #include "net/eth.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "net/net.h"
24 #include "hw/pci/pci_device.h"
25 #include "net_tx_pkt.h"
26
27 enum {
28 NET_TX_PKT_VHDR_FRAG = 0,
29 NET_TX_PKT_L2HDR_FRAG,
30 NET_TX_PKT_L3HDR_FRAG,
31 NET_TX_PKT_PL_START_FRAG
32 };
33
34 /* TX packet private context */
35 struct NetTxPkt {
36 struct virtio_net_hdr virt_hdr;
37
38 struct iovec *raw;
39 uint32_t raw_frags;
40 uint32_t max_raw_frags;
41
42 struct iovec *vec;
43
44 struct {
45 struct eth_header eth;
46 struct vlan_header vlan[3];
47 } l2_hdr;
48 union {
49 struct ip_header ip;
50 struct ip6_header ip6;
51 uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
52 } l3_hdr;
53
54 uint32_t payload_len;
55
56 uint32_t payload_frags;
57 uint32_t max_payload_frags;
58
59 uint16_t hdr_len;
60 eth_pkt_types_e packet_type;
61 uint8_t l4proto;
62 };
63
net_tx_pkt_init(struct NetTxPkt ** pkt,uint32_t max_frags)64 void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags)
65 {
66 struct NetTxPkt *p = g_malloc0(sizeof *p);
67
68 p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG);
69
70 p->raw = g_new(struct iovec, max_frags);
71
72 p->max_payload_frags = max_frags;
73 p->max_raw_frags = max_frags;
74 p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr;
75 p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof p->virt_hdr;
76 p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr;
77 p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr;
78
79 *pkt = p;
80 }
81
net_tx_pkt_uninit(struct NetTxPkt * pkt)82 void net_tx_pkt_uninit(struct NetTxPkt *pkt)
83 {
84 if (pkt) {
85 g_free(pkt->vec);
86 g_free(pkt->raw);
87 g_free(pkt);
88 }
89 }
90
net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt * pkt)91 void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
92 {
93 uint16_t csum;
94 assert(pkt);
95
96 pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
97 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
98
99 pkt->l3_hdr.ip.ip_sum = 0;
100 csum = net_raw_checksum(pkt->l3_hdr.octets,
101 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
102 pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
103 }
104
net_tx_pkt_update_ip_checksums(struct NetTxPkt * pkt)105 void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
106 {
107 uint16_t csum;
108 uint32_t cntr, cso;
109 assert(pkt);
110 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
111 void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
112
113 if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len >
114 ETH_MAX_IP_DGRAM_LEN) {
115 return;
116 }
117
118 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
119 gso_type == VIRTIO_NET_HDR_GSO_UDP) {
120 /* Calculate IP header checksum */
121 net_tx_pkt_update_ip_hdr_checksum(pkt);
122
123 /* Calculate IP pseudo header checksum */
124 cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso);
125 csum = cpu_to_be16(~net_checksum_finish(cntr));
126 } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
127 /* Calculate IP pseudo header checksum */
128 cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len,
129 IP_PROTO_TCP, &cso);
130 csum = cpu_to_be16(~net_checksum_finish(cntr));
131 } else {
132 return;
133 }
134
135 iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
136 pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
137 }
138
net_tx_pkt_update_sctp_checksum(struct NetTxPkt * pkt)139 bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt)
140 {
141 uint32_t csum = 0;
142 struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG;
143
144 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
145 return false;
146 }
147
148 csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags));
149 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
150 return false;
151 }
152
153 return true;
154 }
155
net_tx_pkt_calculate_hdr_len(struct NetTxPkt * pkt)156 static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt)
157 {
158 pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +
159 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
160 }
161
net_tx_pkt_parse_headers(struct NetTxPkt * pkt)162 static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
163 {
164 struct iovec *l2_hdr, *l3_hdr;
165 size_t bytes_read;
166 size_t full_ip6hdr_len;
167 uint16_t l3_proto;
168
169 assert(pkt);
170
171 l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
172 l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG];
173
174 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base,
175 ETH_MAX_L2_HDR_LEN);
176 if (bytes_read < sizeof(struct eth_header)) {
177 l2_hdr->iov_len = 0;
178 return false;
179 }
180
181 l2_hdr->iov_len = sizeof(struct eth_header);
182 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) {
183 case ETH_P_VLAN:
184 l2_hdr->iov_len += sizeof(struct vlan_header);
185 break;
186 case ETH_P_DVLAN:
187 l2_hdr->iov_len += 2 * sizeof(struct vlan_header);
188 break;
189 }
190
191 if (bytes_read < l2_hdr->iov_len) {
192 l2_hdr->iov_len = 0;
193 l3_hdr->iov_len = 0;
194 pkt->packet_type = ETH_PKT_UCAST;
195 return false;
196 } else {
197 l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN;
198 l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
199 pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
200 }
201
202 l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len);
203
204 switch (l3_proto) {
205 case ETH_P_IP:
206 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
207 l3_hdr->iov_base, sizeof(struct ip_header));
208
209 if (bytes_read < sizeof(struct ip_header)) {
210 l3_hdr->iov_len = 0;
211 return false;
212 }
213
214 l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base);
215
216 if (l3_hdr->iov_len < sizeof(struct ip_header)) {
217 l3_hdr->iov_len = 0;
218 return false;
219 }
220
221 pkt->l4proto = IP_HDR_GET_P(l3_hdr->iov_base);
222
223 if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) {
224 /* copy optional IPv4 header data if any*/
225 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags,
226 l2_hdr->iov_len + sizeof(struct ip_header),
227 l3_hdr->iov_base + sizeof(struct ip_header),
228 l3_hdr->iov_len - sizeof(struct ip_header));
229 if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) {
230 l3_hdr->iov_len = 0;
231 return false;
232 }
233 }
234
235 break;
236
237 case ETH_P_IPV6:
238 {
239 eth_ip6_hdr_info hdrinfo;
240
241 if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
242 &hdrinfo)) {
243 l3_hdr->iov_len = 0;
244 return false;
245 }
246
247 pkt->l4proto = hdrinfo.l4proto;
248 full_ip6hdr_len = hdrinfo.full_hdr_len;
249
250 if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) {
251 l3_hdr->iov_len = 0;
252 return false;
253 }
254
255 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
256 l3_hdr->iov_base, full_ip6hdr_len);
257
258 if (bytes_read < full_ip6hdr_len) {
259 l3_hdr->iov_len = 0;
260 return false;
261 } else {
262 l3_hdr->iov_len = full_ip6hdr_len;
263 }
264 break;
265 }
266 default:
267 l3_hdr->iov_len = 0;
268 break;
269 }
270
271 net_tx_pkt_calculate_hdr_len(pkt);
272 return true;
273 }
274
net_tx_pkt_rebuild_payload(struct NetTxPkt * pkt)275 static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt)
276 {
277 pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len;
278 pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
279 pkt->max_payload_frags,
280 pkt->raw, pkt->raw_frags,
281 pkt->hdr_len, pkt->payload_len);
282 }
283
net_tx_pkt_parse(struct NetTxPkt * pkt)284 bool net_tx_pkt_parse(struct NetTxPkt *pkt)
285 {
286 if (net_tx_pkt_parse_headers(pkt)) {
287 net_tx_pkt_rebuild_payload(pkt);
288 return true;
289 } else {
290 return false;
291 }
292 }
293
net_tx_pkt_get_vhdr(struct NetTxPkt * pkt)294 struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt)
295 {
296 assert(pkt);
297 return &pkt->virt_hdr;
298 }
299
net_tx_pkt_get_gso_type(struct NetTxPkt * pkt,bool tso_enable)300 static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt,
301 bool tso_enable)
302 {
303 uint8_t rc = VIRTIO_NET_HDR_GSO_NONE;
304 uint16_t l3_proto;
305
306 l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1,
307 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len);
308
309 if (!tso_enable) {
310 goto func_exit;
311 }
312
313 rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
314 pkt->l4proto);
315
316 func_exit:
317 return rc;
318 }
319
net_tx_pkt_build_vheader(struct NetTxPkt * pkt,bool tso_enable,bool csum_enable,uint32_t gso_size)320 bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
321 bool csum_enable, uint32_t gso_size)
322 {
323 struct tcp_hdr l4hdr;
324 size_t bytes_read;
325 assert(pkt);
326
327 /* csum has to be enabled if tso is. */
328 assert(csum_enable || !tso_enable);
329
330 pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable);
331
332 switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
333 case VIRTIO_NET_HDR_GSO_NONE:
334 pkt->virt_hdr.hdr_len = 0;
335 pkt->virt_hdr.gso_size = 0;
336 break;
337
338 case VIRTIO_NET_HDR_GSO_UDP:
339 pkt->virt_hdr.gso_size = gso_size;
340 pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header);
341 break;
342
343 case VIRTIO_NET_HDR_GSO_TCPV4:
344 case VIRTIO_NET_HDR_GSO_TCPV6:
345 bytes_read = iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
346 pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr));
347 if (bytes_read < sizeof(l4hdr) ||
348 l4hdr.th_off * sizeof(uint32_t) < sizeof(l4hdr)) {
349 return false;
350 }
351
352 pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t);
353 pkt->virt_hdr.gso_size = gso_size;
354 break;
355
356 default:
357 g_assert_not_reached();
358 }
359
360 if (csum_enable) {
361 switch (pkt->l4proto) {
362 case IP_PROTO_TCP:
363 if (pkt->payload_len < sizeof(struct tcp_hdr)) {
364 return false;
365 }
366 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
367 pkt->virt_hdr.csum_start = pkt->hdr_len;
368 pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum);
369 break;
370 case IP_PROTO_UDP:
371 if (pkt->payload_len < sizeof(struct udp_hdr)) {
372 return false;
373 }
374 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
375 pkt->virt_hdr.csum_start = pkt->hdr_len;
376 pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum);
377 break;
378 default:
379 break;
380 }
381 }
382
383 return true;
384 }
385
net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt * pkt,uint16_t vlan,uint16_t vlan_ethtype)386 void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
387 uint16_t vlan, uint16_t vlan_ethtype)
388 {
389 assert(pkt);
390
391 eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
392 &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
393 vlan, vlan_ethtype);
394
395 pkt->hdr_len += sizeof(struct vlan_header);
396 }
397
net_tx_pkt_add_raw_fragment(struct NetTxPkt * pkt,void * base,size_t len)398 bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len)
399 {
400 struct iovec *ventry;
401 assert(pkt);
402
403 if (pkt->raw_frags >= pkt->max_raw_frags) {
404 return false;
405 }
406
407 ventry = &pkt->raw[pkt->raw_frags];
408 ventry->iov_base = base;
409 ventry->iov_len = len;
410 pkt->raw_frags++;
411
412 return true;
413 }
414
net_tx_pkt_has_fragments(struct NetTxPkt * pkt)415 bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt)
416 {
417 return pkt->raw_frags > 0;
418 }
419
net_tx_pkt_get_packet_type(struct NetTxPkt * pkt)420 eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt)
421 {
422 assert(pkt);
423
424 return pkt->packet_type;
425 }
426
net_tx_pkt_get_total_len(struct NetTxPkt * pkt)427 size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt)
428 {
429 assert(pkt);
430
431 return pkt->hdr_len + pkt->payload_len;
432 }
433
net_tx_pkt_dump(struct NetTxPkt * pkt)434 void net_tx_pkt_dump(struct NetTxPkt *pkt)
435 {
436 #ifdef NET_TX_PKT_DEBUG
437 assert(pkt);
438
439 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
440 "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type,
441 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
442 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len);
443 #endif
444 }
445
net_tx_pkt_reset(struct NetTxPkt * pkt,NetTxPktFreeFrag callback,void * context)446 void net_tx_pkt_reset(struct NetTxPkt *pkt,
447 NetTxPktFreeFrag callback, void *context)
448 {
449 int i;
450
451 /* no assert, as reset can be called before tx_pkt_init */
452 if (!pkt) {
453 return;
454 }
455
456 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
457
458 assert(pkt->vec);
459
460 pkt->payload_len = 0;
461 pkt->payload_frags = 0;
462
463 if (pkt->max_raw_frags > 0) {
464 assert(pkt->raw);
465 for (i = 0; i < pkt->raw_frags; i++) {
466 assert(pkt->raw[i].iov_base);
467 callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len);
468 }
469 }
470 pkt->raw_frags = 0;
471
472 pkt->hdr_len = 0;
473 pkt->l4proto = 0;
474 }
475
net_tx_pkt_unmap_frag_pci(void * context,void * base,size_t len)476 void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len)
477 {
478 pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0);
479 }
480
net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt * pkt,PCIDevice * pci_dev,dma_addr_t pa,size_t len)481 bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev,
482 dma_addr_t pa, size_t len)
483 {
484 dma_addr_t mapped_len = len;
485 void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE);
486 if (!base) {
487 return false;
488 }
489
490 if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) {
491 net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len);
492 return false;
493 }
494
495 return true;
496 }
497
net_tx_pkt_do_sw_csum(struct NetTxPkt * pkt,struct iovec * iov,uint32_t iov_len,uint16_t csl)498 static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt,
499 struct iovec *iov, uint32_t iov_len,
500 uint16_t csl)
501 {
502 uint32_t csum_cntr;
503 uint16_t csum = 0;
504 uint32_t cso;
505 /* num of iovec without vhdr */
506 size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
507 uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len);
508
509 /* Put zero to checksum field */
510 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
511
512 /* Calculate L4 TCP/UDP checksum */
513 csum_cntr = 0;
514 cso = 0;
515 /* add pseudo header to csum */
516 if (l3_proto == ETH_P_IP) {
517 csum_cntr = eth_calc_ip4_pseudo_hdr_csum(
518 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
519 csl, &cso);
520 } else if (l3_proto == ETH_P_IPV6) {
521 csum_cntr = eth_calc_ip6_pseudo_hdr_csum(
522 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
523 csl, pkt->l4proto, &cso);
524 }
525
526 /* data checksum */
527 csum_cntr +=
528 net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso);
529
530 /* Put the checksum obtained into the packet */
531 csum = cpu_to_be16(net_checksum_finish_nozero(csum_cntr));
532 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
533 }
534
535 #define NET_MAX_FRAG_SG_LIST (64)
536
net_tx_pkt_fetch_fragment(struct NetTxPkt * pkt,int * src_idx,size_t * src_offset,size_t src_len,struct iovec * dst,int * dst_idx)537 static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
538 int *src_idx, size_t *src_offset, size_t src_len,
539 struct iovec *dst, int *dst_idx)
540 {
541 size_t fetched = 0;
542 struct iovec *src = pkt->vec;
543
544 while (fetched < src_len) {
545
546 /* no more place in fragment iov */
547 if (*dst_idx == NET_MAX_FRAG_SG_LIST) {
548 break;
549 }
550
551 /* no more data in iovec */
552 if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) {
553 break;
554 }
555
556
557 dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset;
558 dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset,
559 src_len - fetched);
560
561 *src_offset += dst[*dst_idx].iov_len;
562 fetched += dst[*dst_idx].iov_len;
563
564 if (*src_offset == src[*src_idx].iov_len) {
565 *src_offset = 0;
566 (*src_idx)++;
567 }
568
569 (*dst_idx)++;
570 }
571
572 return fetched;
573 }
574
net_tx_pkt_sendv(void * opaque,const struct iovec * iov,int iov_cnt,const struct iovec * virt_iov,int virt_iov_cnt)575 static void net_tx_pkt_sendv(
576 void *opaque, const struct iovec *iov, int iov_cnt,
577 const struct iovec *virt_iov, int virt_iov_cnt)
578 {
579 NetClientState *nc = opaque;
580
581 if (qemu_get_using_vnet_hdr(nc->peer)) {
582 qemu_sendv_packet(nc, virt_iov, virt_iov_cnt);
583 } else {
584 qemu_sendv_packet(nc, iov, iov_cnt);
585 }
586 }
587
net_tx_pkt_tcp_fragment_init(struct NetTxPkt * pkt,struct iovec * fragment,int * pl_idx,size_t * l4hdr_len,int * src_idx,size_t * src_offset,size_t * src_len)588 static bool net_tx_pkt_tcp_fragment_init(struct NetTxPkt *pkt,
589 struct iovec *fragment,
590 int *pl_idx,
591 size_t *l4hdr_len,
592 int *src_idx,
593 size_t *src_offset,
594 size_t *src_len)
595 {
596 struct iovec *l4 = fragment + NET_TX_PKT_PL_START_FRAG;
597 size_t bytes_read = 0;
598 struct tcp_hdr *th;
599
600 if (!pkt->payload_frags) {
601 return false;
602 }
603
604 l4->iov_len = pkt->virt_hdr.hdr_len - pkt->hdr_len;
605 l4->iov_base = g_malloc(l4->iov_len);
606
607 *src_idx = NET_TX_PKT_PL_START_FRAG;
608 while (pkt->vec[*src_idx].iov_len < l4->iov_len - bytes_read) {
609 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base,
610 pkt->vec[*src_idx].iov_len);
611
612 bytes_read += pkt->vec[*src_idx].iov_len;
613
614 (*src_idx)++;
615 if (*src_idx >= pkt->payload_frags + NET_TX_PKT_PL_START_FRAG) {
616 g_free(l4->iov_base);
617 return false;
618 }
619 }
620
621 *src_offset = l4->iov_len - bytes_read;
622 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base,
623 *src_offset);
624
625 th = l4->iov_base;
626 th->th_flags &= ~(TH_FIN | TH_PUSH);
627
628 *pl_idx = NET_TX_PKT_PL_START_FRAG + 1;
629 *l4hdr_len = l4->iov_len;
630 *src_len = pkt->virt_hdr.gso_size;
631
632 return true;
633 }
634
net_tx_pkt_tcp_fragment_deinit(struct iovec * fragment)635 static void net_tx_pkt_tcp_fragment_deinit(struct iovec *fragment)
636 {
637 g_free(fragment[NET_TX_PKT_PL_START_FRAG].iov_base);
638 }
639
net_tx_pkt_tcp_fragment_fix(struct NetTxPkt * pkt,struct iovec * fragment,size_t fragment_len,uint8_t gso_type)640 static void net_tx_pkt_tcp_fragment_fix(struct NetTxPkt *pkt,
641 struct iovec *fragment,
642 size_t fragment_len,
643 uint8_t gso_type)
644 {
645 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
646 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG;
647 struct ip_header *ip = l3hdr->iov_base;
648 struct ip6_header *ip6 = l3hdr->iov_base;
649 size_t len = l3hdr->iov_len + l4hdr->iov_len + fragment_len;
650
651 switch (gso_type) {
652 case VIRTIO_NET_HDR_GSO_TCPV4:
653 ip->ip_len = cpu_to_be16(len);
654 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len);
655 break;
656
657 case VIRTIO_NET_HDR_GSO_TCPV6:
658 len -= sizeof(struct ip6_header);
659 ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = cpu_to_be16(len);
660 break;
661 }
662 }
663
net_tx_pkt_tcp_fragment_advance(struct NetTxPkt * pkt,struct iovec * fragment,size_t fragment_len,uint8_t gso_type)664 static void net_tx_pkt_tcp_fragment_advance(struct NetTxPkt *pkt,
665 struct iovec *fragment,
666 size_t fragment_len,
667 uint8_t gso_type)
668 {
669 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
670 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG;
671 struct ip_header *ip = l3hdr->iov_base;
672 struct tcp_hdr *th = l4hdr->iov_base;
673
674 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4) {
675 ip->ip_id = cpu_to_be16(be16_to_cpu(ip->ip_id) + 1);
676 }
677
678 th->th_seq = cpu_to_be32(be32_to_cpu(th->th_seq) + fragment_len);
679 th->th_flags &= ~TH_CWR;
680 }
681
net_tx_pkt_udp_fragment_init(struct NetTxPkt * pkt,int * pl_idx,size_t * l4hdr_len,int * src_idx,size_t * src_offset,size_t * src_len)682 static void net_tx_pkt_udp_fragment_init(struct NetTxPkt *pkt,
683 int *pl_idx,
684 size_t *l4hdr_len,
685 int *src_idx, size_t *src_offset,
686 size_t *src_len)
687 {
688 *pl_idx = NET_TX_PKT_PL_START_FRAG;
689 *l4hdr_len = 0;
690 *src_idx = NET_TX_PKT_PL_START_FRAG;
691 *src_offset = 0;
692 *src_len = IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size);
693 }
694
net_tx_pkt_udp_fragment_fix(struct NetTxPkt * pkt,struct iovec * fragment,size_t fragment_offset,size_t fragment_len)695 static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt,
696 struct iovec *fragment,
697 size_t fragment_offset,
698 size_t fragment_len)
699 {
700 bool more_frags = fragment_offset + fragment_len < pkt->payload_len;
701 uint16_t orig_flags;
702 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
703 struct ip_header *ip = l3hdr->iov_base;
704 uint16_t frag_off_units = fragment_offset / IP_FRAG_UNIT_SIZE;
705 uint16_t new_ip_off;
706
707 assert(fragment_offset % IP_FRAG_UNIT_SIZE == 0);
708 assert((frag_off_units & ~IP_OFFMASK) == 0);
709
710 orig_flags = be16_to_cpu(ip->ip_off) & ~(IP_OFFMASK | IP_MF);
711 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0);
712 ip->ip_off = cpu_to_be16(new_ip_off);
713 ip->ip_len = cpu_to_be16(l3hdr->iov_len + fragment_len);
714
715 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len);
716 }
717
net_tx_pkt_do_sw_fragmentation(struct NetTxPkt * pkt,NetTxPktSend callback,void * context)718 static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
719 NetTxPktSend callback,
720 void *context)
721 {
722 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
723
724 struct iovec fragment[NET_MAX_FRAG_SG_LIST];
725 size_t fragment_len;
726 size_t l4hdr_len;
727 size_t src_len;
728
729 int src_idx, dst_idx, pl_idx;
730 size_t src_offset;
731 size_t fragment_offset = 0;
732 struct virtio_net_hdr virt_hdr = {
733 .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ?
734 VIRTIO_NET_HDR_F_DATA_VALID : 0
735 };
736
737 /* Copy headers */
738 fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr;
739 fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr);
740 fragment[NET_TX_PKT_L2HDR_FRAG] = pkt->vec[NET_TX_PKT_L2HDR_FRAG];
741 fragment[NET_TX_PKT_L3HDR_FRAG] = pkt->vec[NET_TX_PKT_L3HDR_FRAG];
742
743 switch (gso_type) {
744 case VIRTIO_NET_HDR_GSO_TCPV4:
745 case VIRTIO_NET_HDR_GSO_TCPV6:
746 if (!net_tx_pkt_tcp_fragment_init(pkt, fragment, &pl_idx, &l4hdr_len,
747 &src_idx, &src_offset, &src_len)) {
748 return false;
749 }
750 break;
751
752 case VIRTIO_NET_HDR_GSO_UDP:
753 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
754 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
755 pkt->payload_len);
756 net_tx_pkt_udp_fragment_init(pkt, &pl_idx, &l4hdr_len,
757 &src_idx, &src_offset, &src_len);
758 break;
759
760 default:
761 abort();
762 }
763
764 /* Put as much data as possible and send */
765 while (true) {
766 dst_idx = pl_idx;
767 fragment_len = net_tx_pkt_fetch_fragment(pkt,
768 &src_idx, &src_offset, src_len, fragment, &dst_idx);
769 if (!fragment_len) {
770 break;
771 }
772
773 switch (gso_type) {
774 case VIRTIO_NET_HDR_GSO_TCPV4:
775 case VIRTIO_NET_HDR_GSO_TCPV6:
776 net_tx_pkt_tcp_fragment_fix(pkt, fragment, fragment_len, gso_type);
777 net_tx_pkt_do_sw_csum(pkt, fragment + NET_TX_PKT_L2HDR_FRAG,
778 dst_idx - NET_TX_PKT_L2HDR_FRAG,
779 l4hdr_len + fragment_len);
780 break;
781
782 case VIRTIO_NET_HDR_GSO_UDP:
783 net_tx_pkt_udp_fragment_fix(pkt, fragment, fragment_offset,
784 fragment_len);
785 break;
786 }
787
788 callback(context,
789 fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG,
790 fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG);
791
792 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
793 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
794 net_tx_pkt_tcp_fragment_advance(pkt, fragment, fragment_len,
795 gso_type);
796 }
797
798 fragment_offset += fragment_len;
799 }
800
801 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
802 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
803 net_tx_pkt_tcp_fragment_deinit(fragment);
804 }
805
806 return true;
807 }
808
net_tx_pkt_send(struct NetTxPkt * pkt,NetClientState * nc)809 bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
810 {
811 bool offload = qemu_get_using_vnet_hdr(nc->peer);
812 return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc);
813 }
814
net_tx_pkt_send_custom(struct NetTxPkt * pkt,bool offload,NetTxPktSend callback,void * context)815 bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
816 NetTxPktSend callback, void *context)
817 {
818 assert(pkt);
819
820 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
821
822 /*
823 * Since underlying infrastructure does not support IP datagrams longer
824 * than 64K we should drop such packets and don't even try to send
825 */
826 if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
827 if (pkt->payload_len >
828 ETH_MAX_IP_DGRAM_LEN -
829 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
830 return false;
831 }
832 }
833
834 if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
835 if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
836 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
837 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
838 pkt->payload_len);
839 }
840
841 net_tx_pkt_fix_ip6_payload_len(pkt);
842 callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG,
843 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG,
844 pkt->vec + NET_TX_PKT_VHDR_FRAG,
845 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG);
846 return true;
847 }
848
849 return net_tx_pkt_do_sw_fragmentation(pkt, callback, context);
850 }
851
net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt * pkt)852 void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
853 {
854 struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
855 if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
856 /*
857 * TODO: if qemu would support >64K packets - add jumbo option check
858 * something like that:
859 * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
860 */
861 if (pkt->l3_hdr.ip6.ip6_plen == 0) {
862 if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
863 pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
864 }
865 /*
866 * TODO: if qemu would support >64K packets
867 * add jumbo option for packets greater then 65,535 bytes
868 */
869 }
870 }
871 }
872