xref: /openbmc/qemu/net/eth.c (revision 5d7e601df37d8bdd490472fd4cfe3e4ca258df09)
1 /*
2  * QEMU network structures definitions and helper functions
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2 or later.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "net/eth.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 
eth_setup_vlan_headers(struct eth_header * ehdr,size_t * ehdr_size,uint16_t vlan_tag,uint16_t vlan_ethtype)24 void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
25                             uint16_t vlan_tag, uint16_t vlan_ethtype)
26 {
27     struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28 
29     memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN);
30     vhdr->h_tci = cpu_to_be16(vlan_tag);
31     vhdr->h_proto = ehdr->h_proto;
32     ehdr->h_proto = cpu_to_be16(vlan_ethtype);
33     *ehdr_size += sizeof(*vhdr);
34 }
35 
36 uint8_t
eth_get_gso_type(uint16_t l3_proto,uint8_t * l3_hdr,uint8_t l4proto)37 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
38 {
39     uint8_t ecn_state = 0;
40 
41     if (l3_proto == ETH_P_IP) {
42         struct ip_header *iphdr = (struct ip_header *) l3_hdr;
43 
44         if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
45             if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
46                 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
47             }
48             if (l4proto == IP_PROTO_TCP) {
49                 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
50             } else if (l4proto == IP_PROTO_UDP) {
51                 return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
52             }
53         }
54     } else if (l3_proto == ETH_P_IPV6) {
55         struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
56 
57         if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
58             ecn_state = VIRTIO_NET_HDR_GSO_ECN;
59         }
60 
61         if (l4proto == IP_PROTO_TCP) {
62             return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
63         }
64     }
65     qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
66         "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
67 
68     return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
69 }
70 
71 uint16_t
eth_get_l3_proto(const struct iovec * l2hdr_iov,int iovcnt,size_t l2hdr_len)72 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
73 {
74     uint16_t proto;
75     size_t copied;
76     size_t size = iov_size(l2hdr_iov, iovcnt);
77     size_t proto_offset = l2hdr_len - sizeof(proto);
78 
79     if (size < proto_offset) {
80         return ETH_P_UNKNOWN;
81     }
82 
83     copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
84                         &proto, sizeof(proto));
85 
86     return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
87 }
88 
89 static bool
_eth_copy_chunk(size_t input_size,const struct iovec * iov,int iovcnt,size_t offset,size_t length,void * buffer)90 _eth_copy_chunk(size_t input_size,
91                 const struct iovec *iov, int iovcnt,
92                 size_t offset, size_t length,
93                 void *buffer)
94 {
95     size_t copied;
96 
97     if (input_size < offset) {
98         return false;
99     }
100 
101     copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
102 
103     if (copied < length) {
104         return false;
105     }
106 
107     return true;
108 }
109 
110 static bool
_eth_tcp_has_data(bool is_ip4,const struct ip_header * ip4_hdr,const struct ip6_header * ip6_hdr,size_t full_ip6hdr_len,const struct tcp_header * tcp)111 _eth_tcp_has_data(bool is_ip4,
112                   const struct ip_header  *ip4_hdr,
113                   const struct ip6_header *ip6_hdr,
114                   size_t full_ip6hdr_len,
115                   const struct tcp_header *tcp)
116 {
117     uint32_t l4len;
118 
119     if (is_ip4) {
120         l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
121     } else {
122         size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
123         l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
124     }
125 
126     return l4len > TCP_HEADER_DATA_OFFSET(tcp);
127 }
128 
eth_get_protocols(const struct iovec * iov,size_t iovcnt,size_t iovoff,bool * hasip4,bool * hasip6,size_t * l3hdr_off,size_t * l4hdr_off,size_t * l5hdr_off,eth_ip6_hdr_info * ip6hdr_info,eth_ip4_hdr_info * ip4hdr_info,eth_l4_hdr_info * l4hdr_info)129 void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
130                        bool *hasip4, bool *hasip6,
131                        size_t *l3hdr_off,
132                        size_t *l4hdr_off,
133                        size_t *l5hdr_off,
134                        eth_ip6_hdr_info *ip6hdr_info,
135                        eth_ip4_hdr_info *ip4hdr_info,
136                        eth_l4_hdr_info  *l4hdr_info)
137 {
138     int proto;
139     bool fragment = false;
140     size_t input_size = iov_size(iov, iovcnt);
141     size_t copied;
142     uint8_t ip_p;
143 
144     *hasip4 = *hasip6 = false;
145     *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
146     l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
147 
148     proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
149 
150     if (proto == ETH_P_IP) {
151         struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
152 
153         if (input_size < *l3hdr_off) {
154             return;
155         }
156 
157         copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
158         if (copied < sizeof(*iphdr) ||
159             IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
160             return;
161         }
162 
163         *hasip4 = true;
164         ip_p = iphdr->ip_p;
165         ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
166         *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
167 
168         fragment = ip4hdr_info->fragment;
169     } else if (proto == ETH_P_IPV6) {
170         if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
171             return;
172         }
173 
174         *hasip6 = true;
175         ip_p = ip6hdr_info->l4proto;
176         *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
177         fragment = ip6hdr_info->fragment;
178     } else {
179         return;
180     }
181 
182     if (fragment) {
183         return;
184     }
185 
186     switch (ip_p) {
187     case IP_PROTO_TCP:
188         if (_eth_copy_chunk(input_size,
189                             iov, iovcnt,
190                             *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
191                             &l4hdr_info->hdr.tcp)) {
192             l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
193             *l5hdr_off = *l4hdr_off +
194                 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
195 
196             l4hdr_info->has_tcp_data =
197                 _eth_tcp_has_data(proto == ETH_P_IP,
198                                   &ip4hdr_info->ip4_hdr,
199                                   &ip6hdr_info->ip6_hdr,
200                                   *l4hdr_off - *l3hdr_off,
201                                   &l4hdr_info->hdr.tcp);
202         }
203         break;
204 
205     case IP_PROTO_UDP:
206         if (_eth_copy_chunk(input_size,
207                             iov, iovcnt,
208                             *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
209                             &l4hdr_info->hdr.udp)) {
210             l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
211             *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
212         }
213         break;
214 
215     case IP_PROTO_SCTP:
216         l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP;
217         break;
218     }
219 }
220 
221 size_t
eth_strip_vlan(const struct iovec * iov,int iovcnt,size_t iovoff,void * new_ehdr_buf,uint16_t * payload_offset,uint16_t * tci)222 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
223                void *new_ehdr_buf,
224                uint16_t *payload_offset, uint16_t *tci)
225 {
226     struct vlan_header vlan_hdr;
227     struct eth_header *new_ehdr = new_ehdr_buf;
228 
229     size_t copied = iov_to_buf(iov, iovcnt, iovoff,
230                                new_ehdr, sizeof(*new_ehdr));
231 
232     if (copied < sizeof(*new_ehdr)) {
233         return 0;
234     }
235 
236     switch (be16_to_cpu(new_ehdr->h_proto)) {
237     case ETH_P_VLAN:
238     case ETH_P_DVLAN:
239         copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
240                             &vlan_hdr, sizeof(vlan_hdr));
241 
242         if (copied < sizeof(vlan_hdr)) {
243             return 0;
244         }
245 
246         new_ehdr->h_proto = vlan_hdr.h_proto;
247 
248         *tci = be16_to_cpu(vlan_hdr.h_tci);
249         *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
250 
251         if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
252 
253             copied = iov_to_buf(iov, iovcnt, *payload_offset,
254                                 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
255 
256             if (copied < sizeof(vlan_hdr)) {
257                 return 0;
258             }
259 
260             *payload_offset += sizeof(vlan_hdr);
261 
262             return sizeof(struct eth_header) + sizeof(struct vlan_header);
263         } else {
264             return sizeof(struct eth_header);
265         }
266     default:
267         return 0;
268     }
269 }
270 
271 size_t
eth_strip_vlan_ex(const struct iovec * iov,int iovcnt,size_t iovoff,int index,uint16_t vet,uint16_t vet_ext,void * new_ehdr_buf,uint16_t * payload_offset,uint16_t * tci)272 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
273                   uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
274                   uint16_t *payload_offset, uint16_t *tci)
275 {
276     struct vlan_header vlan_hdr;
277     uint16_t *new_ehdr_proto;
278     size_t new_ehdr_size;
279     size_t copied;
280 
281     switch (index) {
282     case 0:
283         new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto;
284         new_ehdr_size = sizeof(struct eth_header);
285         copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
286         break;
287 
288     case 1:
289         new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto;
290         new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header);
291         copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
292         if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) {
293             return 0;
294         }
295         break;
296 
297     default:
298         return 0;
299     }
300 
301     if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) {
302         return 0;
303     }
304 
305     copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size,
306                         &vlan_hdr, sizeof(vlan_hdr));
307     if (copied < sizeof(vlan_hdr)) {
308         return 0;
309     }
310 
311     *new_ehdr_proto = vlan_hdr.h_proto;
312     *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr);
313     *tci = be16_to_cpu(vlan_hdr.h_tci);
314 
315     return new_ehdr_size;
316 }
317 
318 void
eth_fix_ip4_checksum(void * l3hdr,size_t l3hdr_len)319 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
320 {
321     struct ip_header *iphdr = (struct ip_header *) l3hdr;
322     iphdr->ip_sum = 0;
323     iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
324 }
325 
326 uint32_t
eth_calc_ip4_pseudo_hdr_csum(struct ip_header * iphdr,uint16_t csl,uint32_t * cso)327 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
328                              uint16_t csl,
329                              uint32_t *cso)
330 {
331     struct ip_pseudo_header ipph;
332     ipph.ip_src = iphdr->ip_src;
333     ipph.ip_dst = iphdr->ip_dst;
334     ipph.ip_payload = cpu_to_be16(csl);
335     ipph.ip_proto = iphdr->ip_p;
336     ipph.zeros = 0;
337     *cso = sizeof(ipph);
338     return net_checksum_add(*cso, (uint8_t *) &ipph);
339 }
340 
341 uint32_t
eth_calc_ip6_pseudo_hdr_csum(struct ip6_header * iphdr,uint16_t csl,uint8_t l4_proto,uint32_t * cso)342 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
343                              uint16_t csl,
344                              uint8_t l4_proto,
345                              uint32_t *cso)
346 {
347     struct ip6_pseudo_header ipph;
348     ipph.ip6_src = iphdr->ip6_src;
349     ipph.ip6_dst = iphdr->ip6_dst;
350     ipph.len = cpu_to_be16(csl);
351     ipph.zero[0] = 0;
352     ipph.zero[1] = 0;
353     ipph.zero[2] = 0;
354     ipph.next_hdr = l4_proto;
355     *cso = sizeof(ipph);
356     return net_checksum_add(*cso, (uint8_t *)&ipph);
357 }
358 
359 static bool
eth_is_ip6_extension_header_type(uint8_t hdr_type)360 eth_is_ip6_extension_header_type(uint8_t hdr_type)
361 {
362     switch (hdr_type) {
363     case IP6_HOP_BY_HOP:
364     case IP6_ROUTING:
365     case IP6_FRAGMENT:
366     case IP6_AUTHENTICATION:
367     case IP6_DESTINATON:
368     case IP6_MOBILITY:
369         return true;
370     default:
371         return false;
372     }
373 }
374 
375 static bool
_eth_get_rss_ex_dst_addr(const struct iovec * pkt,int pkt_frags,size_t ext_hdr_offset,struct ip6_ext_hdr * ext_hdr,struct in6_address * dst_addr)376 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
377                         size_t ext_hdr_offset,
378                         struct ip6_ext_hdr *ext_hdr,
379                         struct in6_address *dst_addr)
380 {
381     struct ip6_ext_hdr_routing rt_hdr;
382     size_t input_size = iov_size(pkt, pkt_frags);
383     size_t bytes_read;
384 
385     if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
386         return false;
387     }
388 
389     bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
390                             &rt_hdr, sizeof(rt_hdr));
391     assert(bytes_read == sizeof(rt_hdr));
392     if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
393         return false;
394     }
395     bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
396                             dst_addr, sizeof(*dst_addr));
397     assert(bytes_read == sizeof(*dst_addr));
398 
399     return true;
400 }
401 
402 static bool
_eth_get_rss_ex_src_addr(const struct iovec * pkt,int pkt_frags,size_t dsthdr_offset,struct ip6_ext_hdr * ext_hdr,struct in6_address * src_addr)403 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
404                         size_t dsthdr_offset,
405                         struct ip6_ext_hdr *ext_hdr,
406                         struct in6_address *src_addr)
407 {
408     size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
409     struct ip6_option_hdr opthdr;
410     size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
411 
412     while (bytes_left > sizeof(opthdr)) {
413         size_t input_size = iov_size(pkt, pkt_frags);
414         size_t bytes_read, optlen;
415 
416         if (input_size < opt_offset) {
417             return false;
418         }
419 
420         bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
421                                 &opthdr, sizeof(opthdr));
422 
423         if (bytes_read != sizeof(opthdr)) {
424             return false;
425         }
426 
427         optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
428                                                : (opthdr.len + sizeof(opthdr));
429 
430         if (optlen > bytes_left) {
431             return false;
432         }
433 
434         if (opthdr.type == IP6_OPT_HOME) {
435             if (input_size < opt_offset + sizeof(opthdr)) {
436                 return false;
437             }
438 
439             bytes_read = iov_to_buf(pkt, pkt_frags,
440                                     opt_offset + sizeof(opthdr),
441                                     src_addr, sizeof(*src_addr));
442 
443             return bytes_read == sizeof(*src_addr);
444         }
445 
446         opt_offset += optlen;
447         bytes_left -= optlen;
448     }
449 
450     return false;
451 }
452 
eth_parse_ipv6_hdr(const struct iovec * pkt,int pkt_frags,size_t ip6hdr_off,eth_ip6_hdr_info * info)453 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
454                         size_t ip6hdr_off, eth_ip6_hdr_info *info)
455 {
456     struct ip6_ext_hdr ext_hdr;
457     size_t bytes_read;
458     uint8_t curr_ext_hdr_type;
459     size_t input_size = iov_size(pkt, pkt_frags);
460 
461     info->rss_ex_dst_valid = false;
462     info->rss_ex_src_valid = false;
463     info->fragment = false;
464 
465     if (input_size < ip6hdr_off) {
466         return false;
467     }
468 
469     bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
470                             &info->ip6_hdr, sizeof(info->ip6_hdr));
471     if (bytes_read < sizeof(info->ip6_hdr)) {
472         return false;
473     }
474 
475     info->full_hdr_len = sizeof(struct ip6_header);
476 
477     curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
478 
479     if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
480         info->l4proto = info->ip6_hdr.ip6_nxt;
481         info->has_ext_hdrs = false;
482         return true;
483     }
484 
485     info->has_ext_hdrs = true;
486 
487     do {
488         if (input_size < ip6hdr_off + info->full_hdr_len) {
489             return false;
490         }
491 
492         bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
493                                 &ext_hdr, sizeof(ext_hdr));
494 
495         if (bytes_read < sizeof(ext_hdr)) {
496             return false;
497         }
498 
499         if (curr_ext_hdr_type == IP6_ROUTING) {
500             if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
501                 info->rss_ex_dst_valid =
502                     _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
503                                              ip6hdr_off + info->full_hdr_len,
504                                              &ext_hdr, &info->rss_ex_dst);
505             }
506         } else if (curr_ext_hdr_type == IP6_DESTINATON) {
507             info->rss_ex_src_valid =
508                 _eth_get_rss_ex_src_addr(pkt, pkt_frags,
509                                          ip6hdr_off + info->full_hdr_len,
510                                          &ext_hdr, &info->rss_ex_src);
511         } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
512             info->fragment = true;
513         }
514 
515         info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
516         curr_ext_hdr_type = ext_hdr.ip6r_nxt;
517     } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
518 
519     info->l4proto = ext_hdr.ip6r_nxt;
520     return true;
521 }
522 
eth_pad_short_frame(uint8_t * padded_pkt,size_t * padded_buflen,const void * pkt,size_t pkt_size)523 bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
524                          const void *pkt, size_t pkt_size)
525 {
526     assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
527 
528     if (pkt_size >= ETH_ZLEN) {
529         return false;
530     }
531 
532     /* pad to minimum Ethernet frame length */
533     memcpy(padded_pkt, pkt, pkt_size);
534     memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
535     *padded_buflen = ETH_ZLEN;
536 
537     return true;
538 }
539