xref: /openbmc/qemu/net/eth.c (revision 5d7e601df37d8bdd490472fd4cfe3e4ca258df09)
1  /*
2   * QEMU network structures definitions and helper functions
3   *
4   * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5   *
6   * Developed by Daynix Computing LTD (http://www.daynix.com)
7   *
8   * Authors:
9   * Dmitry Fleytman <dmitry@daynix.com>
10   * Tamir Shomer <tamirs@daynix.com>
11   * Yan Vugenfirer <yan@daynix.com>
12   *
13   * This work is licensed under the terms of the GNU GPL, version 2 or later.
14   * See the COPYING file in the top-level directory.
15   *
16   */
17  
18  #include "qemu/osdep.h"
19  #include "qemu/log.h"
20  #include "net/eth.h"
21  #include "net/checksum.h"
22  #include "net/tap.h"
23  
eth_setup_vlan_headers(struct eth_header * ehdr,size_t * ehdr_size,uint16_t vlan_tag,uint16_t vlan_ethtype)24  void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
25                              uint16_t vlan_tag, uint16_t vlan_ethtype)
26  {
27      struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28  
29      memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN);
30      vhdr->h_tci = cpu_to_be16(vlan_tag);
31      vhdr->h_proto = ehdr->h_proto;
32      ehdr->h_proto = cpu_to_be16(vlan_ethtype);
33      *ehdr_size += sizeof(*vhdr);
34  }
35  
36  uint8_t
eth_get_gso_type(uint16_t l3_proto,uint8_t * l3_hdr,uint8_t l4proto)37  eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
38  {
39      uint8_t ecn_state = 0;
40  
41      if (l3_proto == ETH_P_IP) {
42          struct ip_header *iphdr = (struct ip_header *) l3_hdr;
43  
44          if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
45              if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
46                  ecn_state = VIRTIO_NET_HDR_GSO_ECN;
47              }
48              if (l4proto == IP_PROTO_TCP) {
49                  return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
50              } else if (l4proto == IP_PROTO_UDP) {
51                  return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
52              }
53          }
54      } else if (l3_proto == ETH_P_IPV6) {
55          struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
56  
57          if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
58              ecn_state = VIRTIO_NET_HDR_GSO_ECN;
59          }
60  
61          if (l4proto == IP_PROTO_TCP) {
62              return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
63          }
64      }
65      qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
66          "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
67  
68      return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
69  }
70  
71  uint16_t
eth_get_l3_proto(const struct iovec * l2hdr_iov,int iovcnt,size_t l2hdr_len)72  eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
73  {
74      uint16_t proto;
75      size_t copied;
76      size_t size = iov_size(l2hdr_iov, iovcnt);
77      size_t proto_offset = l2hdr_len - sizeof(proto);
78  
79      if (size < proto_offset) {
80          return ETH_P_UNKNOWN;
81      }
82  
83      copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
84                          &proto, sizeof(proto));
85  
86      return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
87  }
88  
89  static bool
_eth_copy_chunk(size_t input_size,const struct iovec * iov,int iovcnt,size_t offset,size_t length,void * buffer)90  _eth_copy_chunk(size_t input_size,
91                  const struct iovec *iov, int iovcnt,
92                  size_t offset, size_t length,
93                  void *buffer)
94  {
95      size_t copied;
96  
97      if (input_size < offset) {
98          return false;
99      }
100  
101      copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
102  
103      if (copied < length) {
104          return false;
105      }
106  
107      return true;
108  }
109  
110  static bool
_eth_tcp_has_data(bool is_ip4,const struct ip_header * ip4_hdr,const struct ip6_header * ip6_hdr,size_t full_ip6hdr_len,const struct tcp_header * tcp)111  _eth_tcp_has_data(bool is_ip4,
112                    const struct ip_header  *ip4_hdr,
113                    const struct ip6_header *ip6_hdr,
114                    size_t full_ip6hdr_len,
115                    const struct tcp_header *tcp)
116  {
117      uint32_t l4len;
118  
119      if (is_ip4) {
120          l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
121      } else {
122          size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
123          l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
124      }
125  
126      return l4len > TCP_HEADER_DATA_OFFSET(tcp);
127  }
128  
eth_get_protocols(const struct iovec * iov,size_t iovcnt,size_t iovoff,bool * hasip4,bool * hasip6,size_t * l3hdr_off,size_t * l4hdr_off,size_t * l5hdr_off,eth_ip6_hdr_info * ip6hdr_info,eth_ip4_hdr_info * ip4hdr_info,eth_l4_hdr_info * l4hdr_info)129  void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
130                         bool *hasip4, bool *hasip6,
131                         size_t *l3hdr_off,
132                         size_t *l4hdr_off,
133                         size_t *l5hdr_off,
134                         eth_ip6_hdr_info *ip6hdr_info,
135                         eth_ip4_hdr_info *ip4hdr_info,
136                         eth_l4_hdr_info  *l4hdr_info)
137  {
138      int proto;
139      bool fragment = false;
140      size_t input_size = iov_size(iov, iovcnt);
141      size_t copied;
142      uint8_t ip_p;
143  
144      *hasip4 = *hasip6 = false;
145      *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
146      l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
147  
148      proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
149  
150      if (proto == ETH_P_IP) {
151          struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
152  
153          if (input_size < *l3hdr_off) {
154              return;
155          }
156  
157          copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
158          if (copied < sizeof(*iphdr) ||
159              IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
160              return;
161          }
162  
163          *hasip4 = true;
164          ip_p = iphdr->ip_p;
165          ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
166          *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
167  
168          fragment = ip4hdr_info->fragment;
169      } else if (proto == ETH_P_IPV6) {
170          if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
171              return;
172          }
173  
174          *hasip6 = true;
175          ip_p = ip6hdr_info->l4proto;
176          *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
177          fragment = ip6hdr_info->fragment;
178      } else {
179          return;
180      }
181  
182      if (fragment) {
183          return;
184      }
185  
186      switch (ip_p) {
187      case IP_PROTO_TCP:
188          if (_eth_copy_chunk(input_size,
189                              iov, iovcnt,
190                              *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
191                              &l4hdr_info->hdr.tcp)) {
192              l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
193              *l5hdr_off = *l4hdr_off +
194                  TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
195  
196              l4hdr_info->has_tcp_data =
197                  _eth_tcp_has_data(proto == ETH_P_IP,
198                                    &ip4hdr_info->ip4_hdr,
199                                    &ip6hdr_info->ip6_hdr,
200                                    *l4hdr_off - *l3hdr_off,
201                                    &l4hdr_info->hdr.tcp);
202          }
203          break;
204  
205      case IP_PROTO_UDP:
206          if (_eth_copy_chunk(input_size,
207                              iov, iovcnt,
208                              *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
209                              &l4hdr_info->hdr.udp)) {
210              l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
211              *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
212          }
213          break;
214  
215      case IP_PROTO_SCTP:
216          l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP;
217          break;
218      }
219  }
220  
221  size_t
eth_strip_vlan(const struct iovec * iov,int iovcnt,size_t iovoff,void * new_ehdr_buf,uint16_t * payload_offset,uint16_t * tci)222  eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
223                 void *new_ehdr_buf,
224                 uint16_t *payload_offset, uint16_t *tci)
225  {
226      struct vlan_header vlan_hdr;
227      struct eth_header *new_ehdr = new_ehdr_buf;
228  
229      size_t copied = iov_to_buf(iov, iovcnt, iovoff,
230                                 new_ehdr, sizeof(*new_ehdr));
231  
232      if (copied < sizeof(*new_ehdr)) {
233          return 0;
234      }
235  
236      switch (be16_to_cpu(new_ehdr->h_proto)) {
237      case ETH_P_VLAN:
238      case ETH_P_DVLAN:
239          copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
240                              &vlan_hdr, sizeof(vlan_hdr));
241  
242          if (copied < sizeof(vlan_hdr)) {
243              return 0;
244          }
245  
246          new_ehdr->h_proto = vlan_hdr.h_proto;
247  
248          *tci = be16_to_cpu(vlan_hdr.h_tci);
249          *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
250  
251          if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
252  
253              copied = iov_to_buf(iov, iovcnt, *payload_offset,
254                                  PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
255  
256              if (copied < sizeof(vlan_hdr)) {
257                  return 0;
258              }
259  
260              *payload_offset += sizeof(vlan_hdr);
261  
262              return sizeof(struct eth_header) + sizeof(struct vlan_header);
263          } else {
264              return sizeof(struct eth_header);
265          }
266      default:
267          return 0;
268      }
269  }
270  
271  size_t
eth_strip_vlan_ex(const struct iovec * iov,int iovcnt,size_t iovoff,int index,uint16_t vet,uint16_t vet_ext,void * new_ehdr_buf,uint16_t * payload_offset,uint16_t * tci)272  eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
273                    uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
274                    uint16_t *payload_offset, uint16_t *tci)
275  {
276      struct vlan_header vlan_hdr;
277      uint16_t *new_ehdr_proto;
278      size_t new_ehdr_size;
279      size_t copied;
280  
281      switch (index) {
282      case 0:
283          new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto;
284          new_ehdr_size = sizeof(struct eth_header);
285          copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
286          break;
287  
288      case 1:
289          new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto;
290          new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header);
291          copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
292          if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) {
293              return 0;
294          }
295          break;
296  
297      default:
298          return 0;
299      }
300  
301      if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) {
302          return 0;
303      }
304  
305      copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size,
306                          &vlan_hdr, sizeof(vlan_hdr));
307      if (copied < sizeof(vlan_hdr)) {
308          return 0;
309      }
310  
311      *new_ehdr_proto = vlan_hdr.h_proto;
312      *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr);
313      *tci = be16_to_cpu(vlan_hdr.h_tci);
314  
315      return new_ehdr_size;
316  }
317  
318  void
eth_fix_ip4_checksum(void * l3hdr,size_t l3hdr_len)319  eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
320  {
321      struct ip_header *iphdr = (struct ip_header *) l3hdr;
322      iphdr->ip_sum = 0;
323      iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
324  }
325  
326  uint32_t
eth_calc_ip4_pseudo_hdr_csum(struct ip_header * iphdr,uint16_t csl,uint32_t * cso)327  eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
328                               uint16_t csl,
329                               uint32_t *cso)
330  {
331      struct ip_pseudo_header ipph;
332      ipph.ip_src = iphdr->ip_src;
333      ipph.ip_dst = iphdr->ip_dst;
334      ipph.ip_payload = cpu_to_be16(csl);
335      ipph.ip_proto = iphdr->ip_p;
336      ipph.zeros = 0;
337      *cso = sizeof(ipph);
338      return net_checksum_add(*cso, (uint8_t *) &ipph);
339  }
340  
341  uint32_t
eth_calc_ip6_pseudo_hdr_csum(struct ip6_header * iphdr,uint16_t csl,uint8_t l4_proto,uint32_t * cso)342  eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
343                               uint16_t csl,
344                               uint8_t l4_proto,
345                               uint32_t *cso)
346  {
347      struct ip6_pseudo_header ipph;
348      ipph.ip6_src = iphdr->ip6_src;
349      ipph.ip6_dst = iphdr->ip6_dst;
350      ipph.len = cpu_to_be16(csl);
351      ipph.zero[0] = 0;
352      ipph.zero[1] = 0;
353      ipph.zero[2] = 0;
354      ipph.next_hdr = l4_proto;
355      *cso = sizeof(ipph);
356      return net_checksum_add(*cso, (uint8_t *)&ipph);
357  }
358  
359  static bool
eth_is_ip6_extension_header_type(uint8_t hdr_type)360  eth_is_ip6_extension_header_type(uint8_t hdr_type)
361  {
362      switch (hdr_type) {
363      case IP6_HOP_BY_HOP:
364      case IP6_ROUTING:
365      case IP6_FRAGMENT:
366      case IP6_AUTHENTICATION:
367      case IP6_DESTINATON:
368      case IP6_MOBILITY:
369          return true;
370      default:
371          return false;
372      }
373  }
374  
375  static bool
_eth_get_rss_ex_dst_addr(const struct iovec * pkt,int pkt_frags,size_t ext_hdr_offset,struct ip6_ext_hdr * ext_hdr,struct in6_address * dst_addr)376  _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
377                          size_t ext_hdr_offset,
378                          struct ip6_ext_hdr *ext_hdr,
379                          struct in6_address *dst_addr)
380  {
381      struct ip6_ext_hdr_routing rt_hdr;
382      size_t input_size = iov_size(pkt, pkt_frags);
383      size_t bytes_read;
384  
385      if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
386          return false;
387      }
388  
389      bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
390                              &rt_hdr, sizeof(rt_hdr));
391      assert(bytes_read == sizeof(rt_hdr));
392      if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
393          return false;
394      }
395      bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
396                              dst_addr, sizeof(*dst_addr));
397      assert(bytes_read == sizeof(*dst_addr));
398  
399      return true;
400  }
401  
402  static bool
_eth_get_rss_ex_src_addr(const struct iovec * pkt,int pkt_frags,size_t dsthdr_offset,struct ip6_ext_hdr * ext_hdr,struct in6_address * src_addr)403  _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
404                          size_t dsthdr_offset,
405                          struct ip6_ext_hdr *ext_hdr,
406                          struct in6_address *src_addr)
407  {
408      size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
409      struct ip6_option_hdr opthdr;
410      size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
411  
412      while (bytes_left > sizeof(opthdr)) {
413          size_t input_size = iov_size(pkt, pkt_frags);
414          size_t bytes_read, optlen;
415  
416          if (input_size < opt_offset) {
417              return false;
418          }
419  
420          bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
421                                  &opthdr, sizeof(opthdr));
422  
423          if (bytes_read != sizeof(opthdr)) {
424              return false;
425          }
426  
427          optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
428                                                 : (opthdr.len + sizeof(opthdr));
429  
430          if (optlen > bytes_left) {
431              return false;
432          }
433  
434          if (opthdr.type == IP6_OPT_HOME) {
435              if (input_size < opt_offset + sizeof(opthdr)) {
436                  return false;
437              }
438  
439              bytes_read = iov_to_buf(pkt, pkt_frags,
440                                      opt_offset + sizeof(opthdr),
441                                      src_addr, sizeof(*src_addr));
442  
443              return bytes_read == sizeof(*src_addr);
444          }
445  
446          opt_offset += optlen;
447          bytes_left -= optlen;
448      }
449  
450      return false;
451  }
452  
eth_parse_ipv6_hdr(const struct iovec * pkt,int pkt_frags,size_t ip6hdr_off,eth_ip6_hdr_info * info)453  bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
454                          size_t ip6hdr_off, eth_ip6_hdr_info *info)
455  {
456      struct ip6_ext_hdr ext_hdr;
457      size_t bytes_read;
458      uint8_t curr_ext_hdr_type;
459      size_t input_size = iov_size(pkt, pkt_frags);
460  
461      info->rss_ex_dst_valid = false;
462      info->rss_ex_src_valid = false;
463      info->fragment = false;
464  
465      if (input_size < ip6hdr_off) {
466          return false;
467      }
468  
469      bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
470                              &info->ip6_hdr, sizeof(info->ip6_hdr));
471      if (bytes_read < sizeof(info->ip6_hdr)) {
472          return false;
473      }
474  
475      info->full_hdr_len = sizeof(struct ip6_header);
476  
477      curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
478  
479      if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
480          info->l4proto = info->ip6_hdr.ip6_nxt;
481          info->has_ext_hdrs = false;
482          return true;
483      }
484  
485      info->has_ext_hdrs = true;
486  
487      do {
488          if (input_size < ip6hdr_off + info->full_hdr_len) {
489              return false;
490          }
491  
492          bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
493                                  &ext_hdr, sizeof(ext_hdr));
494  
495          if (bytes_read < sizeof(ext_hdr)) {
496              return false;
497          }
498  
499          if (curr_ext_hdr_type == IP6_ROUTING) {
500              if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
501                  info->rss_ex_dst_valid =
502                      _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
503                                               ip6hdr_off + info->full_hdr_len,
504                                               &ext_hdr, &info->rss_ex_dst);
505              }
506          } else if (curr_ext_hdr_type == IP6_DESTINATON) {
507              info->rss_ex_src_valid =
508                  _eth_get_rss_ex_src_addr(pkt, pkt_frags,
509                                           ip6hdr_off + info->full_hdr_len,
510                                           &ext_hdr, &info->rss_ex_src);
511          } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
512              info->fragment = true;
513          }
514  
515          info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
516          curr_ext_hdr_type = ext_hdr.ip6r_nxt;
517      } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
518  
519      info->l4proto = ext_hdr.ip6r_nxt;
520      return true;
521  }
522  
eth_pad_short_frame(uint8_t * padded_pkt,size_t * padded_buflen,const void * pkt,size_t pkt_size)523  bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
524                           const void *pkt, size_t pkt_size)
525  {
526      assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
527  
528      if (pkt_size >= ETH_ZLEN) {
529          return false;
530      }
531  
532      /* pad to minimum Ethernet frame length */
533      memcpy(padded_pkt, pkt, pkt_size);
534      memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
535      *padded_buflen = ETH_ZLEN;
536  
537      return true;
538  }
539