xref: /openbmc/qemu/net/eth.c (revision ac06724a715864942e2b5e28f92d5d5421f0a0b0)
1 /*
2  * QEMU network structures definitions and helper functions
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2 or later.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "net/eth.h"
20 #include "net/checksum.h"
21 #include "qemu-common.h"
22 #include "net/tap.h"
23 
24 void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
25     uint16_t vlan_ethtype, bool *is_new)
26 {
27     struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28 
29     switch (be16_to_cpu(ehdr->h_proto)) {
30     case ETH_P_VLAN:
31     case ETH_P_DVLAN:
32         /* vlan hdr exists */
33         *is_new = false;
34         break;
35 
36     default:
37         /* No VLAN header, put a new one */
38         vhdr->h_proto = ehdr->h_proto;
39         ehdr->h_proto = cpu_to_be16(vlan_ethtype);
40         *is_new = true;
41         break;
42     }
43     vhdr->h_tci = cpu_to_be16(vlan_tag);
44 }
45 
46 uint8_t
47 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
48 {
49     uint8_t ecn_state = 0;
50 
51     if (l3_proto == ETH_P_IP) {
52         struct ip_header *iphdr = (struct ip_header *) l3_hdr;
53 
54         if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
55             if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
56                 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
57             }
58             if (l4proto == IP_PROTO_TCP) {
59                 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
60             } else if (l4proto == IP_PROTO_UDP) {
61                 return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
62             }
63         }
64     } else if (l3_proto == ETH_P_IPV6) {
65         struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
66 
67         if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
68             ecn_state = VIRTIO_NET_HDR_GSO_ECN;
69         }
70 
71         if (l4proto == IP_PROTO_TCP) {
72             return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
73         }
74     }
75 
76     /* Unsupported offload */
77     g_assert_not_reached();
78 
79     return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
80 }
81 
82 uint16_t
83 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
84 {
85     uint16_t proto;
86     size_t copied;
87     size_t size = iov_size(l2hdr_iov, iovcnt);
88     size_t proto_offset = l2hdr_len - sizeof(proto);
89 
90     if (size < proto_offset) {
91         return ETH_P_UNKNOWN;
92     }
93 
94     copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
95                         &proto, sizeof(proto));
96 
97     return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
98 }
99 
100 static bool
101 _eth_copy_chunk(size_t input_size,
102                 const struct iovec *iov, int iovcnt,
103                 size_t offset, size_t length,
104                 void *buffer)
105 {
106     size_t copied;
107 
108     if (input_size < offset) {
109         return false;
110     }
111 
112     copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
113 
114     if (copied < length) {
115         return false;
116     }
117 
118     return true;
119 }
120 
121 static bool
122 _eth_tcp_has_data(bool is_ip4,
123                   const struct ip_header  *ip4_hdr,
124                   const struct ip6_header *ip6_hdr,
125                   size_t full_ip6hdr_len,
126                   const struct tcp_header *tcp)
127 {
128     uint32_t l4len;
129 
130     if (is_ip4) {
131         l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
132     } else {
133         size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
134         l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
135     }
136 
137     return l4len > TCP_HEADER_DATA_OFFSET(tcp);
138 }
139 
140 void eth_get_protocols(const struct iovec *iov, int iovcnt,
141                        bool *isip4, bool *isip6,
142                        bool *isudp, bool *istcp,
143                        size_t *l3hdr_off,
144                        size_t *l4hdr_off,
145                        size_t *l5hdr_off,
146                        eth_ip6_hdr_info *ip6hdr_info,
147                        eth_ip4_hdr_info *ip4hdr_info,
148                        eth_l4_hdr_info  *l4hdr_info)
149 {
150     int proto;
151     bool fragment = false;
152     size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
153     size_t input_size = iov_size(iov, iovcnt);
154     size_t copied;
155 
156     *isip4 = *isip6 = *isudp = *istcp = false;
157 
158     proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
159 
160     *l3hdr_off = l2hdr_len;
161 
162     if (proto == ETH_P_IP) {
163         struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
164 
165         if (input_size < l2hdr_len) {
166             return;
167         }
168 
169         copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
170 
171         *isip4 = true;
172 
173         if (copied < sizeof(*iphdr)) {
174             return;
175         }
176 
177         if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
178             if (iphdr->ip_p == IP_PROTO_TCP) {
179                 *istcp = true;
180             } else if (iphdr->ip_p == IP_PROTO_UDP) {
181                 *isudp = true;
182             }
183         }
184 
185         ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
186         *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
187 
188         fragment = ip4hdr_info->fragment;
189     } else if (proto == ETH_P_IPV6) {
190 
191         *isip6 = true;
192         if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
193                                ip6hdr_info)) {
194             if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
195                 *istcp = true;
196             } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
197                 *isudp = true;
198             }
199         } else {
200             return;
201         }
202 
203         *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
204         fragment = ip6hdr_info->fragment;
205     }
206 
207     if (!fragment) {
208         if (*istcp) {
209             *istcp = _eth_copy_chunk(input_size,
210                                      iov, iovcnt,
211                                      *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
212                                      &l4hdr_info->hdr.tcp);
213 
214             if (*istcp) {
215                 *l5hdr_off = *l4hdr_off +
216                     TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
217 
218                 l4hdr_info->has_tcp_data =
219                     _eth_tcp_has_data(proto == ETH_P_IP,
220                                       &ip4hdr_info->ip4_hdr,
221                                       &ip6hdr_info->ip6_hdr,
222                                       *l4hdr_off - *l3hdr_off,
223                                       &l4hdr_info->hdr.tcp);
224             }
225         } else if (*isudp) {
226             *isudp = _eth_copy_chunk(input_size,
227                                      iov, iovcnt,
228                                      *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
229                                      &l4hdr_info->hdr.udp);
230             *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
231         }
232     }
233 }
234 
235 size_t
236 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
237                uint8_t *new_ehdr_buf,
238                uint16_t *payload_offset, uint16_t *tci)
239 {
240     struct vlan_header vlan_hdr;
241     struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
242 
243     size_t copied = iov_to_buf(iov, iovcnt, iovoff,
244                                new_ehdr, sizeof(*new_ehdr));
245 
246     if (copied < sizeof(*new_ehdr)) {
247         return 0;
248     }
249 
250     switch (be16_to_cpu(new_ehdr->h_proto)) {
251     case ETH_P_VLAN:
252     case ETH_P_DVLAN:
253         copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
254                             &vlan_hdr, sizeof(vlan_hdr));
255 
256         if (copied < sizeof(vlan_hdr)) {
257             return 0;
258         }
259 
260         new_ehdr->h_proto = vlan_hdr.h_proto;
261 
262         *tci = be16_to_cpu(vlan_hdr.h_tci);
263         *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
264 
265         if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
266 
267             copied = iov_to_buf(iov, iovcnt, *payload_offset,
268                                 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
269 
270             if (copied < sizeof(vlan_hdr)) {
271                 return 0;
272             }
273 
274             *payload_offset += sizeof(vlan_hdr);
275 
276             return sizeof(struct eth_header) + sizeof(struct vlan_header);
277         } else {
278             return sizeof(struct eth_header);
279         }
280     default:
281         return 0;
282     }
283 }
284 
285 size_t
286 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
287                   uint16_t vet, uint8_t *new_ehdr_buf,
288                   uint16_t *payload_offset, uint16_t *tci)
289 {
290     struct vlan_header vlan_hdr;
291     struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
292 
293     size_t copied = iov_to_buf(iov, iovcnt, iovoff,
294                                new_ehdr, sizeof(*new_ehdr));
295 
296     if (copied < sizeof(*new_ehdr)) {
297         return 0;
298     }
299 
300     if (be16_to_cpu(new_ehdr->h_proto) == vet) {
301         copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
302                             &vlan_hdr, sizeof(vlan_hdr));
303 
304         if (copied < sizeof(vlan_hdr)) {
305             return 0;
306         }
307 
308         new_ehdr->h_proto = vlan_hdr.h_proto;
309 
310         *tci = be16_to_cpu(vlan_hdr.h_tci);
311         *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
312         return sizeof(struct eth_header);
313     }
314 
315     return 0;
316 }
317 
318 void
319 eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
320                             void *l3hdr, size_t l3hdr_len,
321                             size_t l3payload_len,
322                             size_t frag_offset, bool more_frags)
323 {
324     const struct iovec l2vec = {
325         .iov_base = (void *) l2hdr,
326         .iov_len = l2hdr_len
327     };
328 
329     if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
330         uint16_t orig_flags;
331         struct ip_header *iphdr = (struct ip_header *) l3hdr;
332         uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
333         uint16_t new_ip_off;
334 
335         assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
336         assert((frag_off_units & ~IP_OFFMASK) == 0);
337 
338         orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
339         new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
340         iphdr->ip_off = cpu_to_be16(new_ip_off);
341         iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
342     }
343 }
344 
345 void
346 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
347 {
348     struct ip_header *iphdr = (struct ip_header *) l3hdr;
349     iphdr->ip_sum = 0;
350     iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
351 }
352 
353 uint32_t
354 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
355                              uint16_t csl,
356                              uint32_t *cso)
357 {
358     struct ip_pseudo_header ipph;
359     ipph.ip_src = iphdr->ip_src;
360     ipph.ip_dst = iphdr->ip_dst;
361     ipph.ip_payload = cpu_to_be16(csl);
362     ipph.ip_proto = iphdr->ip_p;
363     ipph.zeros = 0;
364     *cso = sizeof(ipph);
365     return net_checksum_add(*cso, (uint8_t *) &ipph);
366 }
367 
368 uint32_t
369 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
370                              uint16_t csl,
371                              uint8_t l4_proto,
372                              uint32_t *cso)
373 {
374     struct ip6_pseudo_header ipph;
375     ipph.ip6_src = iphdr->ip6_src;
376     ipph.ip6_dst = iphdr->ip6_dst;
377     ipph.len = cpu_to_be16(csl);
378     ipph.zero[0] = 0;
379     ipph.zero[1] = 0;
380     ipph.zero[2] = 0;
381     ipph.next_hdr = l4_proto;
382     *cso = sizeof(ipph);
383     return net_checksum_add(*cso, (uint8_t *)&ipph);
384 }
385 
386 static bool
387 eth_is_ip6_extension_header_type(uint8_t hdr_type)
388 {
389     switch (hdr_type) {
390     case IP6_HOP_BY_HOP:
391     case IP6_ROUTING:
392     case IP6_FRAGMENT:
393     case IP6_ESP:
394     case IP6_AUTHENTICATION:
395     case IP6_DESTINATON:
396     case IP6_MOBILITY:
397         return true;
398     default:
399         return false;
400     }
401 }
402 
403 static bool
404 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
405                         size_t rthdr_offset,
406                         struct ip6_ext_hdr *ext_hdr,
407                         struct in6_address *dst_addr)
408 {
409     struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
410 
411     if ((rthdr->rtype == 2) &&
412         (rthdr->len == sizeof(struct in6_address) / 8) &&
413         (rthdr->segleft == 1)) {
414 
415         size_t input_size = iov_size(pkt, pkt_frags);
416         size_t bytes_read;
417 
418         if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
419             return false;
420         }
421 
422         bytes_read = iov_to_buf(pkt, pkt_frags,
423                                 rthdr_offset + sizeof(*ext_hdr),
424                                 dst_addr, sizeof(*dst_addr));
425 
426         return bytes_read == sizeof(dst_addr);
427     }
428 
429     return false;
430 }
431 
432 static bool
433 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
434                         size_t dsthdr_offset,
435                         struct ip6_ext_hdr *ext_hdr,
436                         struct in6_address *src_addr)
437 {
438     size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
439     struct ip6_option_hdr opthdr;
440     size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
441 
442     while (bytes_left > sizeof(opthdr)) {
443         size_t input_size = iov_size(pkt, pkt_frags);
444         size_t bytes_read, optlen;
445 
446         if (input_size < opt_offset) {
447             return false;
448         }
449 
450         bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
451                                 &opthdr, sizeof(opthdr));
452 
453         if (bytes_read != sizeof(opthdr)) {
454             return false;
455         }
456 
457         optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
458                                                : (opthdr.len + sizeof(opthdr));
459 
460         if (optlen > bytes_left) {
461             return false;
462         }
463 
464         if (opthdr.type == IP6_OPT_HOME) {
465             size_t input_size = iov_size(pkt, pkt_frags);
466 
467             if (input_size < opt_offset + sizeof(opthdr)) {
468                 return false;
469             }
470 
471             bytes_read = iov_to_buf(pkt, pkt_frags,
472                                     opt_offset + sizeof(opthdr),
473                                     src_addr, sizeof(*src_addr));
474 
475             return bytes_read == sizeof(src_addr);
476         }
477 
478         opt_offset += optlen;
479         bytes_left -= optlen;
480     }
481 
482     return false;
483 }
484 
485 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
486                         size_t ip6hdr_off, eth_ip6_hdr_info *info)
487 {
488     struct ip6_ext_hdr ext_hdr;
489     size_t bytes_read;
490     uint8_t curr_ext_hdr_type;
491     size_t input_size = iov_size(pkt, pkt_frags);
492 
493     info->rss_ex_dst_valid = false;
494     info->rss_ex_src_valid = false;
495     info->fragment = false;
496 
497     if (input_size < ip6hdr_off) {
498         return false;
499     }
500 
501     bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
502                             &info->ip6_hdr, sizeof(info->ip6_hdr));
503     if (bytes_read < sizeof(info->ip6_hdr)) {
504         return false;
505     }
506 
507     info->full_hdr_len = sizeof(struct ip6_header);
508 
509     curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
510 
511     if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
512         info->l4proto = info->ip6_hdr.ip6_nxt;
513         info->has_ext_hdrs = false;
514         return true;
515     }
516 
517     info->has_ext_hdrs = true;
518 
519     do {
520         if (input_size < ip6hdr_off + info->full_hdr_len) {
521             return false;
522         }
523 
524         bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
525                                 &ext_hdr, sizeof(ext_hdr));
526 
527         if (bytes_read < sizeof(ext_hdr)) {
528             return false;
529         }
530 
531         if (curr_ext_hdr_type == IP6_ROUTING) {
532             info->rss_ex_dst_valid =
533                 _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
534                                          ip6hdr_off + info->full_hdr_len,
535                                          &ext_hdr, &info->rss_ex_dst);
536         } else if (curr_ext_hdr_type == IP6_DESTINATON) {
537             info->rss_ex_src_valid =
538                 _eth_get_rss_ex_src_addr(pkt, pkt_frags,
539                                          ip6hdr_off + info->full_hdr_len,
540                                          &ext_hdr, &info->rss_ex_src);
541         } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
542             info->fragment = true;
543         }
544 
545         info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
546         curr_ext_hdr_type = ext_hdr.ip6r_nxt;
547     } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
548 
549     info->l4proto = ext_hdr.ip6r_nxt;
550     return true;
551 }
552