xref: /openbmc/qemu/net/eth.c (revision c49b67f72b1a875ca355a170759303b0faa959ca)
1  /*
2   * QEMU network structures definitions and helper functions
3   *
4   * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5   *
6   * Developed by Daynix Computing LTD (http://www.daynix.com)
7   *
8   * Authors:
9   * Dmitry Fleytman <dmitry@daynix.com>
10   * Tamir Shomer <tamirs@daynix.com>
11   * Yan Vugenfirer <yan@daynix.com>
12   *
13   * This work is licensed under the terms of the GNU GPL, version 2 or later.
14   * See the COPYING file in the top-level directory.
15   *
16   */
17  
18  #include "qemu/osdep.h"
19  #include "qemu/log.h"
20  #include "net/eth.h"
21  #include "net/checksum.h"
22  #include "net/tap.h"
23  
24  void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
25      uint16_t vlan_ethtype, bool *is_new)
26  {
27      struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28  
29      switch (be16_to_cpu(ehdr->h_proto)) {
30      case ETH_P_VLAN:
31      case ETH_P_DVLAN:
32          /* vlan hdr exists */
33          *is_new = false;
34          break;
35  
36      default:
37          /* No VLAN header, put a new one */
38          vhdr->h_proto = ehdr->h_proto;
39          ehdr->h_proto = cpu_to_be16(vlan_ethtype);
40          *is_new = true;
41          break;
42      }
43      vhdr->h_tci = cpu_to_be16(vlan_tag);
44  }
45  
46  uint8_t
47  eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
48  {
49      uint8_t ecn_state = 0;
50  
51      if (l3_proto == ETH_P_IP) {
52          struct ip_header *iphdr = (struct ip_header *) l3_hdr;
53  
54          if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
55              if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
56                  ecn_state = VIRTIO_NET_HDR_GSO_ECN;
57              }
58              if (l4proto == IP_PROTO_TCP) {
59                  return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
60              } else if (l4proto == IP_PROTO_UDP) {
61                  return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
62              }
63          }
64      } else if (l3_proto == ETH_P_IPV6) {
65          struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
66  
67          if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
68              ecn_state = VIRTIO_NET_HDR_GSO_ECN;
69          }
70  
71          if (l4proto == IP_PROTO_TCP) {
72              return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
73          }
74      }
75      qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
76          "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
77  
78      return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
79  }
80  
81  uint16_t
82  eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
83  {
84      uint16_t proto;
85      size_t copied;
86      size_t size = iov_size(l2hdr_iov, iovcnt);
87      size_t proto_offset = l2hdr_len - sizeof(proto);
88  
89      if (size < proto_offset) {
90          return ETH_P_UNKNOWN;
91      }
92  
93      copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
94                          &proto, sizeof(proto));
95  
96      return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
97  }
98  
99  static bool
100  _eth_copy_chunk(size_t input_size,
101                  const struct iovec *iov, int iovcnt,
102                  size_t offset, size_t length,
103                  void *buffer)
104  {
105      size_t copied;
106  
107      if (input_size < offset) {
108          return false;
109      }
110  
111      copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
112  
113      if (copied < length) {
114          return false;
115      }
116  
117      return true;
118  }
119  
120  static bool
121  _eth_tcp_has_data(bool is_ip4,
122                    const struct ip_header  *ip4_hdr,
123                    const struct ip6_header *ip6_hdr,
124                    size_t full_ip6hdr_len,
125                    const struct tcp_header *tcp)
126  {
127      uint32_t l4len;
128  
129      if (is_ip4) {
130          l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
131      } else {
132          size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
133          l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
134      }
135  
136      return l4len > TCP_HEADER_DATA_OFFSET(tcp);
137  }
138  
139  void eth_get_protocols(const struct iovec *iov, int iovcnt,
140                         bool *isip4, bool *isip6,
141                         bool *isudp, bool *istcp,
142                         size_t *l3hdr_off,
143                         size_t *l4hdr_off,
144                         size_t *l5hdr_off,
145                         eth_ip6_hdr_info *ip6hdr_info,
146                         eth_ip4_hdr_info *ip4hdr_info,
147                         eth_l4_hdr_info  *l4hdr_info)
148  {
149      int proto;
150      bool fragment = false;
151      size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
152      size_t input_size = iov_size(iov, iovcnt);
153      size_t copied;
154  
155      *isip4 = *isip6 = *isudp = *istcp = false;
156  
157      proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
158  
159      *l3hdr_off = l2hdr_len;
160  
161      if (proto == ETH_P_IP) {
162          struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
163  
164          if (input_size < l2hdr_len) {
165              return;
166          }
167  
168          copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
169  
170          *isip4 = true;
171  
172          if (copied < sizeof(*iphdr)) {
173              return;
174          }
175  
176          if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
177              if (iphdr->ip_p == IP_PROTO_TCP) {
178                  *istcp = true;
179              } else if (iphdr->ip_p == IP_PROTO_UDP) {
180                  *isudp = true;
181              }
182          }
183  
184          ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
185          *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
186  
187          fragment = ip4hdr_info->fragment;
188      } else if (proto == ETH_P_IPV6) {
189  
190          *isip6 = true;
191          if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
192                                 ip6hdr_info)) {
193              if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
194                  *istcp = true;
195              } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
196                  *isudp = true;
197              }
198          } else {
199              return;
200          }
201  
202          *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
203          fragment = ip6hdr_info->fragment;
204      }
205  
206      if (!fragment) {
207          if (*istcp) {
208              *istcp = _eth_copy_chunk(input_size,
209                                       iov, iovcnt,
210                                       *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
211                                       &l4hdr_info->hdr.tcp);
212  
213              if (*istcp) {
214                  *l5hdr_off = *l4hdr_off +
215                      TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
216  
217                  l4hdr_info->has_tcp_data =
218                      _eth_tcp_has_data(proto == ETH_P_IP,
219                                        &ip4hdr_info->ip4_hdr,
220                                        &ip6hdr_info->ip6_hdr,
221                                        *l4hdr_off - *l3hdr_off,
222                                        &l4hdr_info->hdr.tcp);
223              }
224          } else if (*isudp) {
225              *isudp = _eth_copy_chunk(input_size,
226                                       iov, iovcnt,
227                                       *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
228                                       &l4hdr_info->hdr.udp);
229              *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
230          }
231      }
232  }
233  
234  size_t
235  eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
236                 uint8_t *new_ehdr_buf,
237                 uint16_t *payload_offset, uint16_t *tci)
238  {
239      struct vlan_header vlan_hdr;
240      struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
241  
242      size_t copied = iov_to_buf(iov, iovcnt, iovoff,
243                                 new_ehdr, sizeof(*new_ehdr));
244  
245      if (copied < sizeof(*new_ehdr)) {
246          return 0;
247      }
248  
249      switch (be16_to_cpu(new_ehdr->h_proto)) {
250      case ETH_P_VLAN:
251      case ETH_P_DVLAN:
252          copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
253                              &vlan_hdr, sizeof(vlan_hdr));
254  
255          if (copied < sizeof(vlan_hdr)) {
256              return 0;
257          }
258  
259          new_ehdr->h_proto = vlan_hdr.h_proto;
260  
261          *tci = be16_to_cpu(vlan_hdr.h_tci);
262          *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
263  
264          if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
265  
266              copied = iov_to_buf(iov, iovcnt, *payload_offset,
267                                  PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
268  
269              if (copied < sizeof(vlan_hdr)) {
270                  return 0;
271              }
272  
273              *payload_offset += sizeof(vlan_hdr);
274  
275              return sizeof(struct eth_header) + sizeof(struct vlan_header);
276          } else {
277              return sizeof(struct eth_header);
278          }
279      default:
280          return 0;
281      }
282  }
283  
284  size_t
285  eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
286                    uint16_t vet, uint8_t *new_ehdr_buf,
287                    uint16_t *payload_offset, uint16_t *tci)
288  {
289      struct vlan_header vlan_hdr;
290      struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
291  
292      size_t copied = iov_to_buf(iov, iovcnt, iovoff,
293                                 new_ehdr, sizeof(*new_ehdr));
294  
295      if (copied < sizeof(*new_ehdr)) {
296          return 0;
297      }
298  
299      if (be16_to_cpu(new_ehdr->h_proto) == vet) {
300          copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
301                              &vlan_hdr, sizeof(vlan_hdr));
302  
303          if (copied < sizeof(vlan_hdr)) {
304              return 0;
305          }
306  
307          new_ehdr->h_proto = vlan_hdr.h_proto;
308  
309          *tci = be16_to_cpu(vlan_hdr.h_tci);
310          *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
311          return sizeof(struct eth_header);
312      }
313  
314      return 0;
315  }
316  
317  void
318  eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
319                              void *l3hdr, size_t l3hdr_len,
320                              size_t l3payload_len,
321                              size_t frag_offset, bool more_frags)
322  {
323      const struct iovec l2vec = {
324          .iov_base = (void *) l2hdr,
325          .iov_len = l2hdr_len
326      };
327  
328      if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
329          uint16_t orig_flags;
330          struct ip_header *iphdr = (struct ip_header *) l3hdr;
331          uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
332          uint16_t new_ip_off;
333  
334          assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
335          assert((frag_off_units & ~IP_OFFMASK) == 0);
336  
337          orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
338          new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
339          iphdr->ip_off = cpu_to_be16(new_ip_off);
340          iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
341      }
342  }
343  
344  void
345  eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
346  {
347      struct ip_header *iphdr = (struct ip_header *) l3hdr;
348      iphdr->ip_sum = 0;
349      iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
350  }
351  
352  uint32_t
353  eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
354                               uint16_t csl,
355                               uint32_t *cso)
356  {
357      struct ip_pseudo_header ipph;
358      ipph.ip_src = iphdr->ip_src;
359      ipph.ip_dst = iphdr->ip_dst;
360      ipph.ip_payload = cpu_to_be16(csl);
361      ipph.ip_proto = iphdr->ip_p;
362      ipph.zeros = 0;
363      *cso = sizeof(ipph);
364      return net_checksum_add(*cso, (uint8_t *) &ipph);
365  }
366  
367  uint32_t
368  eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
369                               uint16_t csl,
370                               uint8_t l4_proto,
371                               uint32_t *cso)
372  {
373      struct ip6_pseudo_header ipph;
374      ipph.ip6_src = iphdr->ip6_src;
375      ipph.ip6_dst = iphdr->ip6_dst;
376      ipph.len = cpu_to_be16(csl);
377      ipph.zero[0] = 0;
378      ipph.zero[1] = 0;
379      ipph.zero[2] = 0;
380      ipph.next_hdr = l4_proto;
381      *cso = sizeof(ipph);
382      return net_checksum_add(*cso, (uint8_t *)&ipph);
383  }
384  
385  static bool
386  eth_is_ip6_extension_header_type(uint8_t hdr_type)
387  {
388      switch (hdr_type) {
389      case IP6_HOP_BY_HOP:
390      case IP6_ROUTING:
391      case IP6_FRAGMENT:
392      case IP6_AUTHENTICATION:
393      case IP6_DESTINATON:
394      case IP6_MOBILITY:
395          return true;
396      default:
397          return false;
398      }
399  }
400  
401  static bool
402  _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
403                          size_t ext_hdr_offset,
404                          struct ip6_ext_hdr *ext_hdr,
405                          struct in6_address *dst_addr)
406  {
407      struct ip6_ext_hdr_routing rt_hdr;
408      size_t input_size = iov_size(pkt, pkt_frags);
409      size_t bytes_read;
410  
411      if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
412          return false;
413      }
414  
415      bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
416                              &rt_hdr, sizeof(rt_hdr));
417      assert(bytes_read == sizeof(rt_hdr));
418      if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
419          return false;
420      }
421      bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
422                              dst_addr, sizeof(*dst_addr));
423      assert(bytes_read == sizeof(*dst_addr));
424  
425      return true;
426  }
427  
428  static bool
429  _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
430                          size_t dsthdr_offset,
431                          struct ip6_ext_hdr *ext_hdr,
432                          struct in6_address *src_addr)
433  {
434      size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
435      struct ip6_option_hdr opthdr;
436      size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
437  
438      while (bytes_left > sizeof(opthdr)) {
439          size_t input_size = iov_size(pkt, pkt_frags);
440          size_t bytes_read, optlen;
441  
442          if (input_size < opt_offset) {
443              return false;
444          }
445  
446          bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
447                                  &opthdr, sizeof(opthdr));
448  
449          if (bytes_read != sizeof(opthdr)) {
450              return false;
451          }
452  
453          optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
454                                                 : (opthdr.len + sizeof(opthdr));
455  
456          if (optlen > bytes_left) {
457              return false;
458          }
459  
460          if (opthdr.type == IP6_OPT_HOME) {
461              size_t input_size = iov_size(pkt, pkt_frags);
462  
463              if (input_size < opt_offset + sizeof(opthdr)) {
464                  return false;
465              }
466  
467              bytes_read = iov_to_buf(pkt, pkt_frags,
468                                      opt_offset + sizeof(opthdr),
469                                      src_addr, sizeof(*src_addr));
470  
471              return bytes_read == sizeof(*src_addr);
472          }
473  
474          opt_offset += optlen;
475          bytes_left -= optlen;
476      }
477  
478      return false;
479  }
480  
481  bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
482                          size_t ip6hdr_off, eth_ip6_hdr_info *info)
483  {
484      struct ip6_ext_hdr ext_hdr;
485      size_t bytes_read;
486      uint8_t curr_ext_hdr_type;
487      size_t input_size = iov_size(pkt, pkt_frags);
488  
489      info->rss_ex_dst_valid = false;
490      info->rss_ex_src_valid = false;
491      info->fragment = false;
492  
493      if (input_size < ip6hdr_off) {
494          return false;
495      }
496  
497      bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
498                              &info->ip6_hdr, sizeof(info->ip6_hdr));
499      if (bytes_read < sizeof(info->ip6_hdr)) {
500          return false;
501      }
502  
503      info->full_hdr_len = sizeof(struct ip6_header);
504  
505      curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
506  
507      if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
508          info->l4proto = info->ip6_hdr.ip6_nxt;
509          info->has_ext_hdrs = false;
510          return true;
511      }
512  
513      info->has_ext_hdrs = true;
514  
515      do {
516          if (input_size < ip6hdr_off + info->full_hdr_len) {
517              return false;
518          }
519  
520          bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
521                                  &ext_hdr, sizeof(ext_hdr));
522  
523          if (bytes_read < sizeof(ext_hdr)) {
524              return false;
525          }
526  
527          if (curr_ext_hdr_type == IP6_ROUTING) {
528              if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
529                  info->rss_ex_dst_valid =
530                      _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
531                                               ip6hdr_off + info->full_hdr_len,
532                                               &ext_hdr, &info->rss_ex_dst);
533              }
534          } else if (curr_ext_hdr_type == IP6_DESTINATON) {
535              info->rss_ex_src_valid =
536                  _eth_get_rss_ex_src_addr(pkt, pkt_frags,
537                                           ip6hdr_off + info->full_hdr_len,
538                                           &ext_hdr, &info->rss_ex_src);
539          } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
540              info->fragment = true;
541          }
542  
543          info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
544          curr_ext_hdr_type = ext_hdr.ip6r_nxt;
545      } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
546  
547      info->l4proto = ext_hdr.ip6r_nxt;
548      return true;
549  }
550  
551  bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
552                           const void *pkt, size_t pkt_size)
553  {
554      assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
555  
556      if (pkt_size >= ETH_ZLEN) {
557          return false;
558      }
559  
560      /* pad to minimum Ethernet frame length */
561      memcpy(padded_pkt, pkt, pkt_size);
562      memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
563      *padded_buflen = ETH_ZLEN;
564  
565      return true;
566  }
567