xref: /openbmc/qemu/net/eth.c (revision 8e6c718a)
1 /*
2  * QEMU network structures definitions and helper functions
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2 or later.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "net/eth.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 
24 void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
25     uint16_t vlan_ethtype, bool *is_new)
26 {
27     struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28 
29     switch (be16_to_cpu(ehdr->h_proto)) {
30     case ETH_P_VLAN:
31     case ETH_P_DVLAN:
32         /* vlan hdr exists */
33         *is_new = false;
34         break;
35 
36     default:
37         /* No VLAN header, put a new one */
38         vhdr->h_proto = ehdr->h_proto;
39         ehdr->h_proto = cpu_to_be16(vlan_ethtype);
40         *is_new = true;
41         break;
42     }
43     vhdr->h_tci = cpu_to_be16(vlan_tag);
44 }
45 
46 uint8_t
47 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
48 {
49     uint8_t ecn_state = 0;
50 
51     if (l3_proto == ETH_P_IP) {
52         struct ip_header *iphdr = (struct ip_header *) l3_hdr;
53 
54         if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
55             if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
56                 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
57             }
58             if (l4proto == IP_PROTO_TCP) {
59                 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
60             } else if (l4proto == IP_PROTO_UDP) {
61                 return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
62             }
63         }
64     } else if (l3_proto == ETH_P_IPV6) {
65         struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
66 
67         if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
68             ecn_state = VIRTIO_NET_HDR_GSO_ECN;
69         }
70 
71         if (l4proto == IP_PROTO_TCP) {
72             return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
73         }
74     }
75     qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
76         "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
77 
78     return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
79 }
80 
81 uint16_t
82 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
83 {
84     uint16_t proto;
85     size_t copied;
86     size_t size = iov_size(l2hdr_iov, iovcnt);
87     size_t proto_offset = l2hdr_len - sizeof(proto);
88 
89     if (size < proto_offset) {
90         return ETH_P_UNKNOWN;
91     }
92 
93     copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
94                         &proto, sizeof(proto));
95 
96     return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
97 }
98 
99 static bool
100 _eth_copy_chunk(size_t input_size,
101                 const struct iovec *iov, int iovcnt,
102                 size_t offset, size_t length,
103                 void *buffer)
104 {
105     size_t copied;
106 
107     if (input_size < offset) {
108         return false;
109     }
110 
111     copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
112 
113     if (copied < length) {
114         return false;
115     }
116 
117     return true;
118 }
119 
120 static bool
121 _eth_tcp_has_data(bool is_ip4,
122                   const struct ip_header  *ip4_hdr,
123                   const struct ip6_header *ip6_hdr,
124                   size_t full_ip6hdr_len,
125                   const struct tcp_header *tcp)
126 {
127     uint32_t l4len;
128 
129     if (is_ip4) {
130         l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
131     } else {
132         size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
133         l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
134     }
135 
136     return l4len > TCP_HEADER_DATA_OFFSET(tcp);
137 }
138 
139 void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
140                        bool *hasip4, bool *hasip6,
141                        size_t *l3hdr_off,
142                        size_t *l4hdr_off,
143                        size_t *l5hdr_off,
144                        eth_ip6_hdr_info *ip6hdr_info,
145                        eth_ip4_hdr_info *ip4hdr_info,
146                        eth_l4_hdr_info  *l4hdr_info)
147 {
148     int proto;
149     bool fragment = false;
150     size_t input_size = iov_size(iov, iovcnt);
151     size_t copied;
152     uint8_t ip_p;
153 
154     *hasip4 = *hasip6 = false;
155     *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
156     l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
157 
158     proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
159 
160     if (proto == ETH_P_IP) {
161         struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
162 
163         if (input_size < *l3hdr_off) {
164             return;
165         }
166 
167         copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
168         if (copied < sizeof(*iphdr) ||
169             IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
170             return;
171         }
172 
173         *hasip4 = true;
174         ip_p = iphdr->ip_p;
175         ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
176         *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
177 
178         fragment = ip4hdr_info->fragment;
179     } else if (proto == ETH_P_IPV6) {
180         if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
181             return;
182         }
183 
184         *hasip6 = true;
185         ip_p = ip6hdr_info->l4proto;
186         *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
187         fragment = ip6hdr_info->fragment;
188     } else {
189         return;
190     }
191 
192     if (fragment) {
193         return;
194     }
195 
196     switch (ip_p) {
197     case IP_PROTO_TCP:
198         if (_eth_copy_chunk(input_size,
199                             iov, iovcnt,
200                             *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
201                             &l4hdr_info->hdr.tcp)) {
202             l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
203             *l5hdr_off = *l4hdr_off +
204                 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
205 
206             l4hdr_info->has_tcp_data =
207                 _eth_tcp_has_data(proto == ETH_P_IP,
208                                   &ip4hdr_info->ip4_hdr,
209                                   &ip6hdr_info->ip6_hdr,
210                                   *l4hdr_off - *l3hdr_off,
211                                   &l4hdr_info->hdr.tcp);
212         }
213         break;
214 
215     case IP_PROTO_UDP:
216         if (_eth_copy_chunk(input_size,
217                             iov, iovcnt,
218                             *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
219                             &l4hdr_info->hdr.udp)) {
220             l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
221             *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
222         }
223         break;
224     }
225 }
226 
227 size_t
228 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
229                uint8_t *new_ehdr_buf,
230                uint16_t *payload_offset, uint16_t *tci)
231 {
232     struct vlan_header vlan_hdr;
233     struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
234 
235     size_t copied = iov_to_buf(iov, iovcnt, iovoff,
236                                new_ehdr, sizeof(*new_ehdr));
237 
238     if (copied < sizeof(*new_ehdr)) {
239         return 0;
240     }
241 
242     switch (be16_to_cpu(new_ehdr->h_proto)) {
243     case ETH_P_VLAN:
244     case ETH_P_DVLAN:
245         copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
246                             &vlan_hdr, sizeof(vlan_hdr));
247 
248         if (copied < sizeof(vlan_hdr)) {
249             return 0;
250         }
251 
252         new_ehdr->h_proto = vlan_hdr.h_proto;
253 
254         *tci = be16_to_cpu(vlan_hdr.h_tci);
255         *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
256 
257         if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
258 
259             copied = iov_to_buf(iov, iovcnt, *payload_offset,
260                                 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
261 
262             if (copied < sizeof(vlan_hdr)) {
263                 return 0;
264             }
265 
266             *payload_offset += sizeof(vlan_hdr);
267 
268             return sizeof(struct eth_header) + sizeof(struct vlan_header);
269         } else {
270             return sizeof(struct eth_header);
271         }
272     default:
273         return 0;
274     }
275 }
276 
277 size_t
278 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
279                   uint16_t vet, uint8_t *new_ehdr_buf,
280                   uint16_t *payload_offset, uint16_t *tci)
281 {
282     struct vlan_header vlan_hdr;
283     struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
284 
285     size_t copied = iov_to_buf(iov, iovcnt, iovoff,
286                                new_ehdr, sizeof(*new_ehdr));
287 
288     if (copied < sizeof(*new_ehdr)) {
289         return 0;
290     }
291 
292     if (be16_to_cpu(new_ehdr->h_proto) == vet) {
293         copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
294                             &vlan_hdr, sizeof(vlan_hdr));
295 
296         if (copied < sizeof(vlan_hdr)) {
297             return 0;
298         }
299 
300         new_ehdr->h_proto = vlan_hdr.h_proto;
301 
302         *tci = be16_to_cpu(vlan_hdr.h_tci);
303         *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
304         return sizeof(struct eth_header);
305     }
306 
307     return 0;
308 }
309 
310 void
311 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
312 {
313     struct ip_header *iphdr = (struct ip_header *) l3hdr;
314     iphdr->ip_sum = 0;
315     iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
316 }
317 
318 uint32_t
319 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
320                              uint16_t csl,
321                              uint32_t *cso)
322 {
323     struct ip_pseudo_header ipph;
324     ipph.ip_src = iphdr->ip_src;
325     ipph.ip_dst = iphdr->ip_dst;
326     ipph.ip_payload = cpu_to_be16(csl);
327     ipph.ip_proto = iphdr->ip_p;
328     ipph.zeros = 0;
329     *cso = sizeof(ipph);
330     return net_checksum_add(*cso, (uint8_t *) &ipph);
331 }
332 
333 uint32_t
334 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
335                              uint16_t csl,
336                              uint8_t l4_proto,
337                              uint32_t *cso)
338 {
339     struct ip6_pseudo_header ipph;
340     ipph.ip6_src = iphdr->ip6_src;
341     ipph.ip6_dst = iphdr->ip6_dst;
342     ipph.len = cpu_to_be16(csl);
343     ipph.zero[0] = 0;
344     ipph.zero[1] = 0;
345     ipph.zero[2] = 0;
346     ipph.next_hdr = l4_proto;
347     *cso = sizeof(ipph);
348     return net_checksum_add(*cso, (uint8_t *)&ipph);
349 }
350 
351 static bool
352 eth_is_ip6_extension_header_type(uint8_t hdr_type)
353 {
354     switch (hdr_type) {
355     case IP6_HOP_BY_HOP:
356     case IP6_ROUTING:
357     case IP6_FRAGMENT:
358     case IP6_AUTHENTICATION:
359     case IP6_DESTINATON:
360     case IP6_MOBILITY:
361         return true;
362     default:
363         return false;
364     }
365 }
366 
367 static bool
368 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
369                         size_t ext_hdr_offset,
370                         struct ip6_ext_hdr *ext_hdr,
371                         struct in6_address *dst_addr)
372 {
373     struct ip6_ext_hdr_routing rt_hdr;
374     size_t input_size = iov_size(pkt, pkt_frags);
375     size_t bytes_read;
376 
377     if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
378         return false;
379     }
380 
381     bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
382                             &rt_hdr, sizeof(rt_hdr));
383     assert(bytes_read == sizeof(rt_hdr));
384     if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
385         return false;
386     }
387     bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
388                             dst_addr, sizeof(*dst_addr));
389     assert(bytes_read == sizeof(*dst_addr));
390 
391     return true;
392 }
393 
394 static bool
395 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
396                         size_t dsthdr_offset,
397                         struct ip6_ext_hdr *ext_hdr,
398                         struct in6_address *src_addr)
399 {
400     size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
401     struct ip6_option_hdr opthdr;
402     size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
403 
404     while (bytes_left > sizeof(opthdr)) {
405         size_t input_size = iov_size(pkt, pkt_frags);
406         size_t bytes_read, optlen;
407 
408         if (input_size < opt_offset) {
409             return false;
410         }
411 
412         bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
413                                 &opthdr, sizeof(opthdr));
414 
415         if (bytes_read != sizeof(opthdr)) {
416             return false;
417         }
418 
419         optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
420                                                : (opthdr.len + sizeof(opthdr));
421 
422         if (optlen > bytes_left) {
423             return false;
424         }
425 
426         if (opthdr.type == IP6_OPT_HOME) {
427             size_t input_size = iov_size(pkt, pkt_frags);
428 
429             if (input_size < opt_offset + sizeof(opthdr)) {
430                 return false;
431             }
432 
433             bytes_read = iov_to_buf(pkt, pkt_frags,
434                                     opt_offset + sizeof(opthdr),
435                                     src_addr, sizeof(*src_addr));
436 
437             return bytes_read == sizeof(*src_addr);
438         }
439 
440         opt_offset += optlen;
441         bytes_left -= optlen;
442     }
443 
444     return false;
445 }
446 
447 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
448                         size_t ip6hdr_off, eth_ip6_hdr_info *info)
449 {
450     struct ip6_ext_hdr ext_hdr;
451     size_t bytes_read;
452     uint8_t curr_ext_hdr_type;
453     size_t input_size = iov_size(pkt, pkt_frags);
454 
455     info->rss_ex_dst_valid = false;
456     info->rss_ex_src_valid = false;
457     info->fragment = false;
458 
459     if (input_size < ip6hdr_off) {
460         return false;
461     }
462 
463     bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
464                             &info->ip6_hdr, sizeof(info->ip6_hdr));
465     if (bytes_read < sizeof(info->ip6_hdr)) {
466         return false;
467     }
468 
469     info->full_hdr_len = sizeof(struct ip6_header);
470 
471     curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
472 
473     if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
474         info->l4proto = info->ip6_hdr.ip6_nxt;
475         info->has_ext_hdrs = false;
476         return true;
477     }
478 
479     info->has_ext_hdrs = true;
480 
481     do {
482         if (input_size < ip6hdr_off + info->full_hdr_len) {
483             return false;
484         }
485 
486         bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
487                                 &ext_hdr, sizeof(ext_hdr));
488 
489         if (bytes_read < sizeof(ext_hdr)) {
490             return false;
491         }
492 
493         if (curr_ext_hdr_type == IP6_ROUTING) {
494             if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
495                 info->rss_ex_dst_valid =
496                     _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
497                                              ip6hdr_off + info->full_hdr_len,
498                                              &ext_hdr, &info->rss_ex_dst);
499             }
500         } else if (curr_ext_hdr_type == IP6_DESTINATON) {
501             info->rss_ex_src_valid =
502                 _eth_get_rss_ex_src_addr(pkt, pkt_frags,
503                                          ip6hdr_off + info->full_hdr_len,
504                                          &ext_hdr, &info->rss_ex_src);
505         } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
506             info->fragment = true;
507         }
508 
509         info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
510         curr_ext_hdr_type = ext_hdr.ip6r_nxt;
511     } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
512 
513     info->l4proto = ext_hdr.ip6r_nxt;
514     return true;
515 }
516 
517 bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
518                          const void *pkt, size_t pkt_size)
519 {
520     assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
521 
522     if (pkt_size >= ETH_ZLEN) {
523         return false;
524     }
525 
526     /* pad to minimum Ethernet frame length */
527     memcpy(padded_pkt, pkt, pkt_size);
528     memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
529     *padded_buflen = ETH_ZLEN;
530 
531     return true;
532 }
533