1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * GENEVE: Generic Network Virtualization Encapsulation
4 *
5 * Copyright (c) 2015 Red Hat, Inc.
6 */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/ethtool.h>
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/etherdevice.h>
14 #include <linux/hash.h>
15 #include <net/ipv6_stubs.h>
16 #include <net/dst_metadata.h>
17 #include <net/gro_cells.h>
18 #include <net/rtnetlink.h>
19 #include <net/geneve.h>
20 #include <net/gro.h>
21 #include <net/protocol.h>
22
23 #define GENEVE_NETDEV_VER "0.6"
24
25 #define GENEVE_N_VID (1u << 24)
26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
27
28 #define VNI_HASH_BITS 10
29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
30
31 static bool log_ecn_error = true;
32 module_param(log_ecn_error, bool, 0644);
33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
34
35 #define GENEVE_VER 0
36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
39
40 /* per-network namespace private data for this module */
41 struct geneve_net {
42 struct list_head geneve_list;
43 struct list_head sock_list;
44 };
45
46 static unsigned int geneve_net_id;
47
48 struct geneve_dev_node {
49 struct hlist_node hlist;
50 struct geneve_dev *geneve;
51 };
52
53 struct geneve_config {
54 struct ip_tunnel_info info;
55 bool collect_md;
56 bool use_udp6_rx_checksums;
57 bool ttl_inherit;
58 enum ifla_geneve_df df;
59 bool inner_proto_inherit;
60 };
61
62 /* Pseudo network device */
63 struct geneve_dev {
64 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
65 #if IS_ENABLED(CONFIG_IPV6)
66 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */
67 #endif
68 struct net *net; /* netns for packet i/o */
69 struct net_device *dev; /* netdev for geneve tunnel */
70 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
71 #if IS_ENABLED(CONFIG_IPV6)
72 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
73 #endif
74 struct list_head next; /* geneve's per namespace list */
75 struct gro_cells gro_cells;
76 struct geneve_config cfg;
77 };
78
79 struct geneve_sock {
80 bool collect_md;
81 struct list_head list;
82 struct socket *sock;
83 struct rcu_head rcu;
84 int refcnt;
85 struct hlist_head vni_list[VNI_HASH_SIZE];
86 };
87
geneve_net_vni_hash(u8 vni[3])88 static inline __u32 geneve_net_vni_hash(u8 vni[3])
89 {
90 __u32 vnid;
91
92 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
93 return hash_32(vnid, VNI_HASH_BITS);
94 }
95
vni_to_tunnel_id(const __u8 * vni)96 static __be64 vni_to_tunnel_id(const __u8 *vni)
97 {
98 #ifdef __BIG_ENDIAN
99 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
100 #else
101 return (__force __be64)(((__force u64)vni[0] << 40) |
102 ((__force u64)vni[1] << 48) |
103 ((__force u64)vni[2] << 56));
104 #endif
105 }
106
107 /* Convert 64 bit tunnel ID to 24 bit VNI. */
tunnel_id_to_vni(__be64 tun_id,__u8 * vni)108 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
109 {
110 #ifdef __BIG_ENDIAN
111 vni[0] = (__force __u8)(tun_id >> 16);
112 vni[1] = (__force __u8)(tun_id >> 8);
113 vni[2] = (__force __u8)tun_id;
114 #else
115 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
116 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
117 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
118 #endif
119 }
120
eq_tun_id_and_vni(u8 * tun_id,u8 * vni)121 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
122 {
123 return !memcmp(vni, &tun_id[5], 3);
124 }
125
geneve_get_sk_family(struct geneve_sock * gs)126 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
127 {
128 return gs->sock->sk->sk_family;
129 }
130
geneve_lookup(struct geneve_sock * gs,__be32 addr,u8 vni[])131 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
132 __be32 addr, u8 vni[])
133 {
134 struct hlist_head *vni_list_head;
135 struct geneve_dev_node *node;
136 __u32 hash;
137
138 /* Find the device for this VNI */
139 hash = geneve_net_vni_hash(vni);
140 vni_list_head = &gs->vni_list[hash];
141 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
142 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
143 addr == node->geneve->cfg.info.key.u.ipv4.dst)
144 return node->geneve;
145 }
146 return NULL;
147 }
148
149 #if IS_ENABLED(CONFIG_IPV6)
geneve6_lookup(struct geneve_sock * gs,struct in6_addr addr6,u8 vni[])150 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
151 struct in6_addr addr6, u8 vni[])
152 {
153 struct hlist_head *vni_list_head;
154 struct geneve_dev_node *node;
155 __u32 hash;
156
157 /* Find the device for this VNI */
158 hash = geneve_net_vni_hash(vni);
159 vni_list_head = &gs->vni_list[hash];
160 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
161 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
162 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst))
163 return node->geneve;
164 }
165 return NULL;
166 }
167 #endif
168
geneve_hdr(const struct sk_buff * skb)169 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
170 {
171 return (struct genevehdr *)(udp_hdr(skb) + 1);
172 }
173
geneve_lookup_skb(struct geneve_sock * gs,struct sk_buff * skb)174 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
175 struct sk_buff *skb)
176 {
177 static u8 zero_vni[3];
178 u8 *vni;
179
180 if (geneve_get_sk_family(gs) == AF_INET) {
181 struct iphdr *iph;
182 __be32 addr;
183
184 iph = ip_hdr(skb); /* outer IP header... */
185
186 if (gs->collect_md) {
187 vni = zero_vni;
188 addr = 0;
189 } else {
190 vni = geneve_hdr(skb)->vni;
191 addr = iph->saddr;
192 }
193
194 return geneve_lookup(gs, addr, vni);
195 #if IS_ENABLED(CONFIG_IPV6)
196 } else if (geneve_get_sk_family(gs) == AF_INET6) {
197 static struct in6_addr zero_addr6;
198 struct ipv6hdr *ip6h;
199 struct in6_addr addr6;
200
201 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
202
203 if (gs->collect_md) {
204 vni = zero_vni;
205 addr6 = zero_addr6;
206 } else {
207 vni = geneve_hdr(skb)->vni;
208 addr6 = ip6h->saddr;
209 }
210
211 return geneve6_lookup(gs, addr6, vni);
212 #endif
213 }
214 return NULL;
215 }
216
217 /* geneve receive/decap routine */
geneve_rx(struct geneve_dev * geneve,struct geneve_sock * gs,struct sk_buff * skb)218 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
219 struct sk_buff *skb)
220 {
221 struct genevehdr *gnvh = geneve_hdr(skb);
222 struct metadata_dst *tun_dst = NULL;
223 unsigned int len;
224 int nh, err = 0;
225 void *oiph;
226
227 if (ip_tunnel_collect_metadata() || gs->collect_md) {
228 __be16 flags;
229
230 flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) |
231 (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
232
233 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
234 vni_to_tunnel_id(gnvh->vni),
235 gnvh->opt_len * 4);
236 if (!tun_dst) {
237 geneve->dev->stats.rx_dropped++;
238 goto drop;
239 }
240 /* Update tunnel dst according to Geneve options. */
241 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
242 gnvh->options, gnvh->opt_len * 4,
243 TUNNEL_GENEVE_OPT);
244 } else {
245 /* Drop packets w/ critical options,
246 * since we don't support any...
247 */
248 if (gnvh->critical) {
249 geneve->dev->stats.rx_frame_errors++;
250 geneve->dev->stats.rx_errors++;
251 goto drop;
252 }
253 }
254
255 if (tun_dst)
256 skb_dst_set(skb, &tun_dst->dst);
257
258 if (gnvh->proto_type == htons(ETH_P_TEB)) {
259 skb_reset_mac_header(skb);
260 skb->protocol = eth_type_trans(skb, geneve->dev);
261 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
262
263 /* Ignore packet loops (and multicast echo) */
264 if (ether_addr_equal(eth_hdr(skb)->h_source,
265 geneve->dev->dev_addr)) {
266 geneve->dev->stats.rx_errors++;
267 goto drop;
268 }
269 } else {
270 skb_reset_mac_header(skb);
271 skb->dev = geneve->dev;
272 skb->pkt_type = PACKET_HOST;
273 }
274
275 /* Save offset of outer header relative to skb->head,
276 * because we are going to reset the network header to the inner header
277 * and might change skb->head.
278 */
279 nh = skb_network_header(skb) - skb->head;
280
281 skb_reset_network_header(skb);
282
283 if (!pskb_inet_may_pull(skb)) {
284 DEV_STATS_INC(geneve->dev, rx_length_errors);
285 DEV_STATS_INC(geneve->dev, rx_errors);
286 goto drop;
287 }
288
289 /* Get the outer header. */
290 oiph = skb->head + nh;
291
292 if (geneve_get_sk_family(gs) == AF_INET)
293 err = IP_ECN_decapsulate(oiph, skb);
294 #if IS_ENABLED(CONFIG_IPV6)
295 else
296 err = IP6_ECN_decapsulate(oiph, skb);
297 #endif
298
299 if (unlikely(err)) {
300 if (log_ecn_error) {
301 if (geneve_get_sk_family(gs) == AF_INET)
302 net_info_ratelimited("non-ECT from %pI4 "
303 "with TOS=%#x\n",
304 &((struct iphdr *)oiph)->saddr,
305 ((struct iphdr *)oiph)->tos);
306 #if IS_ENABLED(CONFIG_IPV6)
307 else
308 net_info_ratelimited("non-ECT from %pI6\n",
309 &((struct ipv6hdr *)oiph)->saddr);
310 #endif
311 }
312 if (err > 1) {
313 ++geneve->dev->stats.rx_frame_errors;
314 ++geneve->dev->stats.rx_errors;
315 goto drop;
316 }
317 }
318
319 len = skb->len;
320 err = gro_cells_receive(&geneve->gro_cells, skb);
321 if (likely(err == NET_RX_SUCCESS))
322 dev_sw_netstats_rx_add(geneve->dev, len);
323
324 return;
325 drop:
326 /* Consume bad packet */
327 kfree_skb(skb);
328 }
329
330 /* Setup stats when device is created */
geneve_init(struct net_device * dev)331 static int geneve_init(struct net_device *dev)
332 {
333 struct geneve_dev *geneve = netdev_priv(dev);
334 int err;
335
336 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
337 if (!dev->tstats)
338 return -ENOMEM;
339
340 err = gro_cells_init(&geneve->gro_cells, dev);
341 if (err) {
342 free_percpu(dev->tstats);
343 return err;
344 }
345
346 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
347 if (err) {
348 free_percpu(dev->tstats);
349 gro_cells_destroy(&geneve->gro_cells);
350 return err;
351 }
352 netdev_lockdep_set_classes(dev);
353 return 0;
354 }
355
geneve_uninit(struct net_device * dev)356 static void geneve_uninit(struct net_device *dev)
357 {
358 struct geneve_dev *geneve = netdev_priv(dev);
359
360 dst_cache_destroy(&geneve->cfg.info.dst_cache);
361 gro_cells_destroy(&geneve->gro_cells);
362 free_percpu(dev->tstats);
363 }
364
365 /* Callback from net/ipv4/udp.c to receive packets */
geneve_udp_encap_recv(struct sock * sk,struct sk_buff * skb)366 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
367 {
368 struct genevehdr *geneveh;
369 struct geneve_dev *geneve;
370 struct geneve_sock *gs;
371 __be16 inner_proto;
372 int opts_len;
373
374 /* Need UDP and Geneve header to be present */
375 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
376 goto drop;
377
378 /* Return packets with reserved bits set */
379 geneveh = geneve_hdr(skb);
380 if (unlikely(geneveh->ver != GENEVE_VER))
381 goto drop;
382
383 gs = rcu_dereference_sk_user_data(sk);
384 if (!gs)
385 goto drop;
386
387 geneve = geneve_lookup_skb(gs, skb);
388 if (!geneve)
389 goto drop;
390
391 inner_proto = geneveh->proto_type;
392
393 if (unlikely((!geneve->cfg.inner_proto_inherit &&
394 inner_proto != htons(ETH_P_TEB)))) {
395 geneve->dev->stats.rx_dropped++;
396 goto drop;
397 }
398
399 opts_len = geneveh->opt_len * 4;
400 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
401 !net_eq(geneve->net, dev_net(geneve->dev)))) {
402 geneve->dev->stats.rx_dropped++;
403 goto drop;
404 }
405
406 geneve_rx(geneve, gs, skb);
407 return 0;
408
409 drop:
410 /* Consume bad packet */
411 kfree_skb(skb);
412 return 0;
413 }
414
415 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
geneve_udp_encap_err_lookup(struct sock * sk,struct sk_buff * skb)416 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
417 {
418 struct genevehdr *geneveh;
419 struct geneve_sock *gs;
420 u8 zero_vni[3] = { 0 };
421 u8 *vni = zero_vni;
422
423 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN))
424 return -EINVAL;
425
426 geneveh = geneve_hdr(skb);
427 if (geneveh->ver != GENEVE_VER)
428 return -EINVAL;
429
430 if (geneveh->proto_type != htons(ETH_P_TEB))
431 return -EINVAL;
432
433 gs = rcu_dereference_sk_user_data(sk);
434 if (!gs)
435 return -ENOENT;
436
437 if (geneve_get_sk_family(gs) == AF_INET) {
438 struct iphdr *iph = ip_hdr(skb);
439 __be32 addr4 = 0;
440
441 if (!gs->collect_md) {
442 vni = geneve_hdr(skb)->vni;
443 addr4 = iph->daddr;
444 }
445
446 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
447 }
448
449 #if IS_ENABLED(CONFIG_IPV6)
450 if (geneve_get_sk_family(gs) == AF_INET6) {
451 struct ipv6hdr *ip6h = ipv6_hdr(skb);
452 struct in6_addr addr6;
453
454 memset(&addr6, 0, sizeof(struct in6_addr));
455
456 if (!gs->collect_md) {
457 vni = geneve_hdr(skb)->vni;
458 addr6 = ip6h->daddr;
459 }
460
461 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
462 }
463 #endif
464
465 return -EPFNOSUPPORT;
466 }
467
geneve_create_sock(struct net * net,bool ipv6,__be16 port,bool ipv6_rx_csum)468 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
469 __be16 port, bool ipv6_rx_csum)
470 {
471 struct socket *sock;
472 struct udp_port_cfg udp_conf;
473 int err;
474
475 memset(&udp_conf, 0, sizeof(udp_conf));
476
477 if (ipv6) {
478 udp_conf.family = AF_INET6;
479 udp_conf.ipv6_v6only = 1;
480 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
481 } else {
482 udp_conf.family = AF_INET;
483 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
484 }
485
486 udp_conf.local_udp_port = port;
487
488 /* Open UDP socket */
489 err = udp_sock_create(net, &udp_conf, &sock);
490 if (err < 0)
491 return ERR_PTR(err);
492
493 udp_allow_gso(sock->sk);
494 return sock;
495 }
496
geneve_hlen(struct genevehdr * gh)497 static int geneve_hlen(struct genevehdr *gh)
498 {
499 return sizeof(*gh) + gh->opt_len * 4;
500 }
501
geneve_gro_receive(struct sock * sk,struct list_head * head,struct sk_buff * skb)502 static struct sk_buff *geneve_gro_receive(struct sock *sk,
503 struct list_head *head,
504 struct sk_buff *skb)
505 {
506 struct sk_buff *pp = NULL;
507 struct sk_buff *p;
508 struct genevehdr *gh, *gh2;
509 unsigned int hlen, gh_len, off_gnv;
510 const struct packet_offload *ptype;
511 __be16 type;
512 int flush = 1;
513
514 off_gnv = skb_gro_offset(skb);
515 hlen = off_gnv + sizeof(*gh);
516 gh = skb_gro_header(skb, hlen, off_gnv);
517 if (unlikely(!gh))
518 goto out;
519
520 if (gh->ver != GENEVE_VER || gh->oam)
521 goto out;
522 gh_len = geneve_hlen(gh);
523
524 hlen = off_gnv + gh_len;
525 if (skb_gro_header_hard(skb, hlen)) {
526 gh = skb_gro_header_slow(skb, hlen, off_gnv);
527 if (unlikely(!gh))
528 goto out;
529 }
530
531 list_for_each_entry(p, head, list) {
532 if (!NAPI_GRO_CB(p)->same_flow)
533 continue;
534
535 gh2 = (struct genevehdr *)(p->data + off_gnv);
536 if (gh->opt_len != gh2->opt_len ||
537 memcmp(gh, gh2, gh_len)) {
538 NAPI_GRO_CB(p)->same_flow = 0;
539 continue;
540 }
541 }
542
543 skb_gro_pull(skb, gh_len);
544 skb_gro_postpull_rcsum(skb, gh, gh_len);
545 type = gh->proto_type;
546 if (likely(type == htons(ETH_P_TEB)))
547 return call_gro_receive(eth_gro_receive, head, skb);
548
549 ptype = gro_find_receive_by_type(type);
550 if (!ptype)
551 goto out;
552
553 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
554 flush = 0;
555
556 out:
557 skb_gro_flush_final(skb, pp, flush);
558
559 return pp;
560 }
561
geneve_gro_complete(struct sock * sk,struct sk_buff * skb,int nhoff)562 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
563 int nhoff)
564 {
565 struct genevehdr *gh;
566 struct packet_offload *ptype;
567 __be16 type;
568 int gh_len;
569 int err = -ENOSYS;
570
571 gh = (struct genevehdr *)(skb->data + nhoff);
572 gh_len = geneve_hlen(gh);
573 type = gh->proto_type;
574
575 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
576 if (likely(type == htons(ETH_P_TEB)))
577 return eth_gro_complete(skb, nhoff + gh_len);
578
579 ptype = gro_find_complete_by_type(type);
580 if (ptype)
581 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
582
583 skb_set_inner_mac_header(skb, nhoff + gh_len);
584
585 return err;
586 }
587
588 /* Create new listen socket if needed */
geneve_socket_create(struct net * net,__be16 port,bool ipv6,bool ipv6_rx_csum)589 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
590 bool ipv6, bool ipv6_rx_csum)
591 {
592 struct geneve_net *gn = net_generic(net, geneve_net_id);
593 struct geneve_sock *gs;
594 struct socket *sock;
595 struct udp_tunnel_sock_cfg tunnel_cfg;
596 int h;
597
598 gs = kzalloc(sizeof(*gs), GFP_KERNEL);
599 if (!gs)
600 return ERR_PTR(-ENOMEM);
601
602 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
603 if (IS_ERR(sock)) {
604 kfree(gs);
605 return ERR_CAST(sock);
606 }
607
608 gs->sock = sock;
609 gs->refcnt = 1;
610 for (h = 0; h < VNI_HASH_SIZE; ++h)
611 INIT_HLIST_HEAD(&gs->vni_list[h]);
612
613 /* Initialize the geneve udp offloads structure */
614 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
615
616 /* Mark socket as an encapsulation socket */
617 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
618 tunnel_cfg.sk_user_data = gs;
619 tunnel_cfg.encap_type = 1;
620 tunnel_cfg.gro_receive = geneve_gro_receive;
621 tunnel_cfg.gro_complete = geneve_gro_complete;
622 tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
623 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
624 tunnel_cfg.encap_destroy = NULL;
625 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
626 list_add(&gs->list, &gn->sock_list);
627 return gs;
628 }
629
__geneve_sock_release(struct geneve_sock * gs)630 static void __geneve_sock_release(struct geneve_sock *gs)
631 {
632 if (!gs || --gs->refcnt)
633 return;
634
635 list_del(&gs->list);
636 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
637 udp_tunnel_sock_release(gs->sock);
638 kfree_rcu(gs, rcu);
639 }
640
geneve_sock_release(struct geneve_dev * geneve)641 static void geneve_sock_release(struct geneve_dev *geneve)
642 {
643 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
644 #if IS_ENABLED(CONFIG_IPV6)
645 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
646
647 rcu_assign_pointer(geneve->sock6, NULL);
648 #endif
649
650 rcu_assign_pointer(geneve->sock4, NULL);
651 synchronize_net();
652
653 __geneve_sock_release(gs4);
654 #if IS_ENABLED(CONFIG_IPV6)
655 __geneve_sock_release(gs6);
656 #endif
657 }
658
geneve_find_sock(struct geneve_net * gn,sa_family_t family,__be16 dst_port)659 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
660 sa_family_t family,
661 __be16 dst_port)
662 {
663 struct geneve_sock *gs;
664
665 list_for_each_entry(gs, &gn->sock_list, list) {
666 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
667 geneve_get_sk_family(gs) == family) {
668 return gs;
669 }
670 }
671 return NULL;
672 }
673
geneve_sock_add(struct geneve_dev * geneve,bool ipv6)674 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
675 {
676 struct net *net = geneve->net;
677 struct geneve_net *gn = net_generic(net, geneve_net_id);
678 struct geneve_dev_node *node;
679 struct geneve_sock *gs;
680 __u8 vni[3];
681 __u32 hash;
682
683 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst);
684 if (gs) {
685 gs->refcnt++;
686 goto out;
687 }
688
689 gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
690 geneve->cfg.use_udp6_rx_checksums);
691 if (IS_ERR(gs))
692 return PTR_ERR(gs);
693
694 out:
695 gs->collect_md = geneve->cfg.collect_md;
696 #if IS_ENABLED(CONFIG_IPV6)
697 if (ipv6) {
698 rcu_assign_pointer(geneve->sock6, gs);
699 node = &geneve->hlist6;
700 } else
701 #endif
702 {
703 rcu_assign_pointer(geneve->sock4, gs);
704 node = &geneve->hlist4;
705 }
706 node->geneve = geneve;
707
708 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni);
709 hash = geneve_net_vni_hash(vni);
710 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
711 return 0;
712 }
713
geneve_open(struct net_device * dev)714 static int geneve_open(struct net_device *dev)
715 {
716 struct geneve_dev *geneve = netdev_priv(dev);
717 bool metadata = geneve->cfg.collect_md;
718 bool ipv4, ipv6;
719 int ret = 0;
720
721 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
722 ipv4 = !ipv6 || metadata;
723 #if IS_ENABLED(CONFIG_IPV6)
724 if (ipv6) {
725 ret = geneve_sock_add(geneve, true);
726 if (ret < 0 && ret != -EAFNOSUPPORT)
727 ipv4 = false;
728 }
729 #endif
730 if (ipv4)
731 ret = geneve_sock_add(geneve, false);
732 if (ret < 0)
733 geneve_sock_release(geneve);
734
735 return ret;
736 }
737
geneve_stop(struct net_device * dev)738 static int geneve_stop(struct net_device *dev)
739 {
740 struct geneve_dev *geneve = netdev_priv(dev);
741
742 hlist_del_init_rcu(&geneve->hlist4.hlist);
743 #if IS_ENABLED(CONFIG_IPV6)
744 hlist_del_init_rcu(&geneve->hlist6.hlist);
745 #endif
746 geneve_sock_release(geneve);
747 return 0;
748 }
749
geneve_build_header(struct genevehdr * geneveh,const struct ip_tunnel_info * info,__be16 inner_proto)750 static void geneve_build_header(struct genevehdr *geneveh,
751 const struct ip_tunnel_info *info,
752 __be16 inner_proto)
753 {
754 geneveh->ver = GENEVE_VER;
755 geneveh->opt_len = info->options_len / 4;
756 geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
757 geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
758 geneveh->rsvd1 = 0;
759 tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
760 geneveh->proto_type = inner_proto;
761 geneveh->rsvd2 = 0;
762
763 if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
764 ip_tunnel_info_opts_get(geneveh->options, info);
765 }
766
geneve_build_skb(struct dst_entry * dst,struct sk_buff * skb,const struct ip_tunnel_info * info,bool xnet,int ip_hdr_len,bool inner_proto_inherit)767 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
768 const struct ip_tunnel_info *info,
769 bool xnet, int ip_hdr_len,
770 bool inner_proto_inherit)
771 {
772 bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
773 struct genevehdr *gnvh;
774 __be16 inner_proto;
775 int min_headroom;
776 int err;
777
778 skb_reset_mac_header(skb);
779 skb_scrub_packet(skb, xnet);
780
781 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
782 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
783 err = skb_cow_head(skb, min_headroom);
784 if (unlikely(err))
785 goto free_dst;
786
787 err = udp_tunnel_handle_offloads(skb, udp_sum);
788 if (err)
789 goto free_dst;
790
791 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
792 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB);
793 geneve_build_header(gnvh, info, inner_proto);
794 skb_set_inner_protocol(skb, inner_proto);
795 return 0;
796
797 free_dst:
798 dst_release(dst);
799 return err;
800 }
801
geneve_get_v4_rt(struct sk_buff * skb,struct net_device * dev,struct geneve_sock * gs4,struct flowi4 * fl4,const struct ip_tunnel_info * info,__be16 dport,__be16 sport,__u8 * full_tos)802 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
803 struct net_device *dev,
804 struct geneve_sock *gs4,
805 struct flowi4 *fl4,
806 const struct ip_tunnel_info *info,
807 __be16 dport, __be16 sport,
808 __u8 *full_tos)
809 {
810 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
811 struct geneve_dev *geneve = netdev_priv(dev);
812 struct dst_cache *dst_cache;
813 struct rtable *rt = NULL;
814 __u8 tos;
815
816 if (!gs4)
817 return ERR_PTR(-EIO);
818
819 memset(fl4, 0, sizeof(*fl4));
820 fl4->flowi4_mark = skb->mark;
821 fl4->flowi4_proto = IPPROTO_UDP;
822 fl4->daddr = info->key.u.ipv4.dst;
823 fl4->saddr = info->key.u.ipv4.src;
824 fl4->fl4_dport = dport;
825 fl4->fl4_sport = sport;
826 fl4->flowi4_flags = info->key.flow_flags;
827
828 tos = info->key.tos;
829 if ((tos == 1) && !geneve->cfg.collect_md) {
830 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
831 use_cache = false;
832 }
833 fl4->flowi4_tos = RT_TOS(tos);
834 if (full_tos)
835 *full_tos = tos;
836
837 dst_cache = (struct dst_cache *)&info->dst_cache;
838 if (use_cache) {
839 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
840 if (rt)
841 return rt;
842 }
843 rt = ip_route_output_key(geneve->net, fl4);
844 if (IS_ERR(rt)) {
845 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
846 return ERR_PTR(-ENETUNREACH);
847 }
848 if (rt->dst.dev == dev) { /* is this necessary? */
849 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
850 ip_rt_put(rt);
851 return ERR_PTR(-ELOOP);
852 }
853 if (use_cache)
854 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
855 return rt;
856 }
857
858 #if IS_ENABLED(CONFIG_IPV6)
geneve_get_v6_dst(struct sk_buff * skb,struct net_device * dev,struct geneve_sock * gs6,struct flowi6 * fl6,const struct ip_tunnel_info * info,__be16 dport,__be16 sport)859 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
860 struct net_device *dev,
861 struct geneve_sock *gs6,
862 struct flowi6 *fl6,
863 const struct ip_tunnel_info *info,
864 __be16 dport, __be16 sport)
865 {
866 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
867 struct geneve_dev *geneve = netdev_priv(dev);
868 struct dst_entry *dst = NULL;
869 struct dst_cache *dst_cache;
870 __u8 prio;
871
872 if (!gs6)
873 return ERR_PTR(-EIO);
874
875 memset(fl6, 0, sizeof(*fl6));
876 fl6->flowi6_mark = skb->mark;
877 fl6->flowi6_proto = IPPROTO_UDP;
878 fl6->daddr = info->key.u.ipv6.dst;
879 fl6->saddr = info->key.u.ipv6.src;
880 fl6->fl6_dport = dport;
881 fl6->fl6_sport = sport;
882
883 prio = info->key.tos;
884 if ((prio == 1) && !geneve->cfg.collect_md) {
885 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
886 use_cache = false;
887 }
888
889 fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label);
890 dst_cache = (struct dst_cache *)&info->dst_cache;
891 if (use_cache) {
892 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
893 if (dst)
894 return dst;
895 }
896 dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
897 NULL);
898 if (IS_ERR(dst)) {
899 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
900 return ERR_PTR(-ENETUNREACH);
901 }
902 if (dst->dev == dev) { /* is this necessary? */
903 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
904 dst_release(dst);
905 return ERR_PTR(-ELOOP);
906 }
907
908 if (use_cache)
909 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
910 return dst;
911 }
912 #endif
913
geneve_xmit_skb(struct sk_buff * skb,struct net_device * dev,struct geneve_dev * geneve,const struct ip_tunnel_info * info)914 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
915 struct geneve_dev *geneve,
916 const struct ip_tunnel_info *info)
917 {
918 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
919 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
920 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
921 const struct ip_tunnel_key *key = &info->key;
922 struct rtable *rt;
923 struct flowi4 fl4;
924 __u8 full_tos;
925 __u8 tos, ttl;
926 __be16 df = 0;
927 __be16 sport;
928 int err;
929
930 if (!skb_vlan_inet_prepare(skb, inner_proto_inherit))
931 return -EINVAL;
932
933 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
934 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
935 geneve->cfg.info.key.tp_dst, sport, &full_tos);
936 if (IS_ERR(rt))
937 return PTR_ERR(rt);
938
939 err = skb_tunnel_check_pmtu(skb, &rt->dst,
940 GENEVE_IPV4_HLEN + info->options_len,
941 netif_is_any_bridge_port(dev));
942 if (err < 0) {
943 dst_release(&rt->dst);
944 return err;
945 } else if (err) {
946 struct ip_tunnel_info *info;
947
948 info = skb_tunnel_info(skb);
949 if (info) {
950 struct ip_tunnel_info *unclone;
951
952 unclone = skb_tunnel_info_unclone(skb);
953 if (unlikely(!unclone)) {
954 dst_release(&rt->dst);
955 return -ENOMEM;
956 }
957
958 unclone->key.u.ipv4.dst = fl4.saddr;
959 unclone->key.u.ipv4.src = fl4.daddr;
960 }
961
962 if (!pskb_may_pull(skb, ETH_HLEN)) {
963 dst_release(&rt->dst);
964 return -EINVAL;
965 }
966
967 skb->protocol = eth_type_trans(skb, geneve->dev);
968 __netif_rx(skb);
969 dst_release(&rt->dst);
970 return -EMSGSIZE;
971 }
972
973 if (geneve->cfg.collect_md) {
974 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
975 ttl = key->ttl;
976
977 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
978 } else {
979 tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb);
980 if (geneve->cfg.ttl_inherit)
981 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
982 else
983 ttl = key->ttl;
984 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
985
986 if (geneve->cfg.df == GENEVE_DF_SET) {
987 df = htons(IP_DF);
988 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
989 struct ethhdr *eth = skb_eth_hdr(skb);
990
991 if (ntohs(eth->h_proto) == ETH_P_IPV6) {
992 df = htons(IP_DF);
993 } else if (ntohs(eth->h_proto) == ETH_P_IP) {
994 struct iphdr *iph = ip_hdr(skb);
995
996 if (iph->frag_off & htons(IP_DF))
997 df = htons(IP_DF);
998 }
999 }
1000 }
1001
1002 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr),
1003 inner_proto_inherit);
1004 if (unlikely(err))
1005 return err;
1006
1007 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
1008 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
1009 !net_eq(geneve->net, dev_net(geneve->dev)),
1010 !(info->key.tun_flags & TUNNEL_CSUM));
1011 return 0;
1012 }
1013
1014 #if IS_ENABLED(CONFIG_IPV6)
geneve6_xmit_skb(struct sk_buff * skb,struct net_device * dev,struct geneve_dev * geneve,const struct ip_tunnel_info * info)1015 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
1016 struct geneve_dev *geneve,
1017 const struct ip_tunnel_info *info)
1018 {
1019 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
1020 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
1021 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
1022 const struct ip_tunnel_key *key = &info->key;
1023 struct dst_entry *dst = NULL;
1024 struct flowi6 fl6;
1025 __u8 prio, ttl;
1026 __be16 sport;
1027 int err;
1028
1029 if (!skb_vlan_inet_prepare(skb, inner_proto_inherit))
1030 return -EINVAL;
1031
1032 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
1033 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
1034 geneve->cfg.info.key.tp_dst, sport);
1035 if (IS_ERR(dst))
1036 return PTR_ERR(dst);
1037
1038 err = skb_tunnel_check_pmtu(skb, dst,
1039 GENEVE_IPV6_HLEN + info->options_len,
1040 netif_is_any_bridge_port(dev));
1041 if (err < 0) {
1042 dst_release(dst);
1043 return err;
1044 } else if (err) {
1045 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1046
1047 if (info) {
1048 struct ip_tunnel_info *unclone;
1049
1050 unclone = skb_tunnel_info_unclone(skb);
1051 if (unlikely(!unclone)) {
1052 dst_release(dst);
1053 return -ENOMEM;
1054 }
1055
1056 unclone->key.u.ipv6.dst = fl6.saddr;
1057 unclone->key.u.ipv6.src = fl6.daddr;
1058 }
1059
1060 if (!pskb_may_pull(skb, ETH_HLEN)) {
1061 dst_release(dst);
1062 return -EINVAL;
1063 }
1064
1065 skb->protocol = eth_type_trans(skb, geneve->dev);
1066 __netif_rx(skb);
1067 dst_release(dst);
1068 return -EMSGSIZE;
1069 }
1070
1071 if (geneve->cfg.collect_md) {
1072 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
1073 ttl = key->ttl;
1074 } else {
1075 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
1076 ip_hdr(skb), skb);
1077 if (geneve->cfg.ttl_inherit)
1078 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
1079 else
1080 ttl = key->ttl;
1081 ttl = ttl ? : ip6_dst_hoplimit(dst);
1082 }
1083 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr),
1084 inner_proto_inherit);
1085 if (unlikely(err))
1086 return err;
1087
1088 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
1089 &fl6.saddr, &fl6.daddr, prio, ttl,
1090 info->key.label, sport, geneve->cfg.info.key.tp_dst,
1091 !(info->key.tun_flags & TUNNEL_CSUM));
1092 return 0;
1093 }
1094 #endif
1095
geneve_xmit(struct sk_buff * skb,struct net_device * dev)1096 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
1097 {
1098 struct geneve_dev *geneve = netdev_priv(dev);
1099 struct ip_tunnel_info *info = NULL;
1100 int err;
1101
1102 if (geneve->cfg.collect_md) {
1103 info = skb_tunnel_info(skb);
1104 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
1105 netdev_dbg(dev, "no tunnel metadata\n");
1106 dev_kfree_skb(skb);
1107 dev->stats.tx_dropped++;
1108 return NETDEV_TX_OK;
1109 }
1110 } else {
1111 info = &geneve->cfg.info;
1112 }
1113
1114 rcu_read_lock();
1115 #if IS_ENABLED(CONFIG_IPV6)
1116 if (info->mode & IP_TUNNEL_INFO_IPV6)
1117 err = geneve6_xmit_skb(skb, dev, geneve, info);
1118 else
1119 #endif
1120 err = geneve_xmit_skb(skb, dev, geneve, info);
1121 rcu_read_unlock();
1122
1123 if (likely(!err))
1124 return NETDEV_TX_OK;
1125
1126 if (err != -EMSGSIZE)
1127 dev_kfree_skb(skb);
1128
1129 if (err == -ELOOP)
1130 dev->stats.collisions++;
1131 else if (err == -ENETUNREACH)
1132 dev->stats.tx_carrier_errors++;
1133
1134 dev->stats.tx_errors++;
1135 return NETDEV_TX_OK;
1136 }
1137
geneve_change_mtu(struct net_device * dev,int new_mtu)1138 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
1139 {
1140 if (new_mtu > dev->max_mtu)
1141 new_mtu = dev->max_mtu;
1142 else if (new_mtu < dev->min_mtu)
1143 new_mtu = dev->min_mtu;
1144
1145 dev->mtu = new_mtu;
1146 return 0;
1147 }
1148
geneve_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)1149 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1150 {
1151 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1152 struct geneve_dev *geneve = netdev_priv(dev);
1153 __be16 sport;
1154
1155 if (ip_tunnel_info_af(info) == AF_INET) {
1156 struct rtable *rt;
1157 struct flowi4 fl4;
1158
1159 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
1160 sport = udp_flow_src_port(geneve->net, skb,
1161 1, USHRT_MAX, true);
1162
1163 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
1164 geneve->cfg.info.key.tp_dst, sport, NULL);
1165 if (IS_ERR(rt))
1166 return PTR_ERR(rt);
1167
1168 ip_rt_put(rt);
1169 info->key.u.ipv4.src = fl4.saddr;
1170 #if IS_ENABLED(CONFIG_IPV6)
1171 } else if (ip_tunnel_info_af(info) == AF_INET6) {
1172 struct dst_entry *dst;
1173 struct flowi6 fl6;
1174
1175 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
1176 sport = udp_flow_src_port(geneve->net, skb,
1177 1, USHRT_MAX, true);
1178
1179 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
1180 geneve->cfg.info.key.tp_dst, sport);
1181 if (IS_ERR(dst))
1182 return PTR_ERR(dst);
1183
1184 dst_release(dst);
1185 info->key.u.ipv6.src = fl6.saddr;
1186 #endif
1187 } else {
1188 return -EINVAL;
1189 }
1190
1191 info->key.tp_src = sport;
1192 info->key.tp_dst = geneve->cfg.info.key.tp_dst;
1193 return 0;
1194 }
1195
1196 static const struct net_device_ops geneve_netdev_ops = {
1197 .ndo_init = geneve_init,
1198 .ndo_uninit = geneve_uninit,
1199 .ndo_open = geneve_open,
1200 .ndo_stop = geneve_stop,
1201 .ndo_start_xmit = geneve_xmit,
1202 .ndo_get_stats64 = dev_get_tstats64,
1203 .ndo_change_mtu = geneve_change_mtu,
1204 .ndo_validate_addr = eth_validate_addr,
1205 .ndo_set_mac_address = eth_mac_addr,
1206 .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
1207 };
1208
geneve_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * drvinfo)1209 static void geneve_get_drvinfo(struct net_device *dev,
1210 struct ethtool_drvinfo *drvinfo)
1211 {
1212 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
1213 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
1214 }
1215
1216 static const struct ethtool_ops geneve_ethtool_ops = {
1217 .get_drvinfo = geneve_get_drvinfo,
1218 .get_link = ethtool_op_get_link,
1219 };
1220
1221 /* Info for udev, that this is a virtual tunnel endpoint */
1222 static struct device_type geneve_type = {
1223 .name = "geneve",
1224 };
1225
1226 /* Calls the ndo_udp_tunnel_add of the caller in order to
1227 * supply the listening GENEVE udp ports. Callers are expected
1228 * to implement the ndo_udp_tunnel_add.
1229 */
geneve_offload_rx_ports(struct net_device * dev,bool push)1230 static void geneve_offload_rx_ports(struct net_device *dev, bool push)
1231 {
1232 struct net *net = dev_net(dev);
1233 struct geneve_net *gn = net_generic(net, geneve_net_id);
1234 struct geneve_sock *gs;
1235
1236 rcu_read_lock();
1237 list_for_each_entry_rcu(gs, &gn->sock_list, list) {
1238 if (push) {
1239 udp_tunnel_push_rx_port(dev, gs->sock,
1240 UDP_TUNNEL_TYPE_GENEVE);
1241 } else {
1242 udp_tunnel_drop_rx_port(dev, gs->sock,
1243 UDP_TUNNEL_TYPE_GENEVE);
1244 }
1245 }
1246 rcu_read_unlock();
1247 }
1248
1249 /* Initialize the device structure. */
geneve_setup(struct net_device * dev)1250 static void geneve_setup(struct net_device *dev)
1251 {
1252 ether_setup(dev);
1253
1254 dev->netdev_ops = &geneve_netdev_ops;
1255 dev->ethtool_ops = &geneve_ethtool_ops;
1256 dev->needs_free_netdev = true;
1257
1258 SET_NETDEV_DEVTYPE(dev, &geneve_type);
1259
1260 dev->features |= NETIF_F_LLTX;
1261 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
1262 dev->features |= NETIF_F_RXCSUM;
1263 dev->features |= NETIF_F_GSO_SOFTWARE;
1264
1265 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
1266 dev->hw_features |= NETIF_F_RXCSUM;
1267 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1268
1269 /* MTU range: 68 - (something less than 65535) */
1270 dev->min_mtu = ETH_MIN_MTU;
1271 /* The max_mtu calculation does not take account of GENEVE
1272 * options, to avoid excluding potentially valid
1273 * configurations. This will be further reduced by IPvX hdr size.
1274 */
1275 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1276
1277 netif_keep_dst(dev);
1278 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1279 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
1280 eth_hw_addr_random(dev);
1281 }
1282
1283 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1284 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT },
1285 [IFLA_GENEVE_ID] = { .type = NLA_U32 },
1286 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
1287 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
1288 [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
1289 [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
1290 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
1291 [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
1292 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
1293 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
1294 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1295 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
1296 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
1297 [IFLA_GENEVE_DF] = { .type = NLA_U8 },
1298 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG },
1299 };
1300
geneve_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1301 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1302 struct netlink_ext_ack *extack)
1303 {
1304 if (tb[IFLA_ADDRESS]) {
1305 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1306 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1307 "Provided link layer address is not Ethernet");
1308 return -EINVAL;
1309 }
1310
1311 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1312 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1313 "Provided Ethernet address is not unicast");
1314 return -EADDRNOTAVAIL;
1315 }
1316 }
1317
1318 if (!data) {
1319 NL_SET_ERR_MSG(extack,
1320 "Not enough attributes provided to perform the operation");
1321 return -EINVAL;
1322 }
1323
1324 if (data[IFLA_GENEVE_ID]) {
1325 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1326
1327 if (vni >= GENEVE_N_VID) {
1328 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
1329 "Geneve ID must be lower than 16777216");
1330 return -ERANGE;
1331 }
1332 }
1333
1334 if (data[IFLA_GENEVE_DF]) {
1335 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
1336
1337 if (df < 0 || df > GENEVE_DF_MAX) {
1338 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF],
1339 "Invalid DF attribute");
1340 return -EINVAL;
1341 }
1342 }
1343
1344 return 0;
1345 }
1346
geneve_find_dev(struct geneve_net * gn,const struct ip_tunnel_info * info,bool * tun_on_same_port,bool * tun_collect_md)1347 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
1348 const struct ip_tunnel_info *info,
1349 bool *tun_on_same_port,
1350 bool *tun_collect_md)
1351 {
1352 struct geneve_dev *geneve, *t = NULL;
1353
1354 *tun_on_same_port = false;
1355 *tun_collect_md = false;
1356 list_for_each_entry(geneve, &gn->geneve_list, next) {
1357 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) {
1358 *tun_collect_md = geneve->cfg.collect_md;
1359 *tun_on_same_port = true;
1360 }
1361 if (info->key.tun_id == geneve->cfg.info.key.tun_id &&
1362 info->key.tp_dst == geneve->cfg.info.key.tp_dst &&
1363 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u)))
1364 t = geneve;
1365 }
1366 return t;
1367 }
1368
is_tnl_info_zero(const struct ip_tunnel_info * info)1369 static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1370 {
1371 return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
1372 info->key.ttl || info->key.label || info->key.tp_src ||
1373 memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
1374 }
1375
geneve_dst_addr_equal(struct ip_tunnel_info * a,struct ip_tunnel_info * b)1376 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
1377 struct ip_tunnel_info *b)
1378 {
1379 if (ip_tunnel_info_af(a) == AF_INET)
1380 return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
1381 else
1382 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
1383 }
1384
geneve_configure(struct net * net,struct net_device * dev,struct netlink_ext_ack * extack,const struct geneve_config * cfg)1385 static int geneve_configure(struct net *net, struct net_device *dev,
1386 struct netlink_ext_ack *extack,
1387 const struct geneve_config *cfg)
1388 {
1389 struct geneve_net *gn = net_generic(net, geneve_net_id);
1390 struct geneve_dev *t, *geneve = netdev_priv(dev);
1391 const struct ip_tunnel_info *info = &cfg->info;
1392 bool tun_collect_md, tun_on_same_port;
1393 int err, encap_len;
1394
1395 if (cfg->collect_md && !is_tnl_info_zero(info)) {
1396 NL_SET_ERR_MSG(extack,
1397 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
1398 return -EINVAL;
1399 }
1400
1401 geneve->net = net;
1402 geneve->dev = dev;
1403
1404 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
1405 if (t)
1406 return -EBUSY;
1407
1408 /* make enough headroom for basic scenario */
1409 encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
1410 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) {
1411 encap_len += sizeof(struct iphdr);
1412 dev->max_mtu -= sizeof(struct iphdr);
1413 } else {
1414 encap_len += sizeof(struct ipv6hdr);
1415 dev->max_mtu -= sizeof(struct ipv6hdr);
1416 }
1417 dev->needed_headroom = encap_len + ETH_HLEN;
1418
1419 if (cfg->collect_md) {
1420 if (tun_on_same_port) {
1421 NL_SET_ERR_MSG(extack,
1422 "There can be only one externally controlled device on a destination port");
1423 return -EPERM;
1424 }
1425 } else {
1426 if (tun_collect_md) {
1427 NL_SET_ERR_MSG(extack,
1428 "There already exists an externally controlled device on this destination port");
1429 return -EPERM;
1430 }
1431 }
1432
1433 dst_cache_reset(&geneve->cfg.info.dst_cache);
1434 memcpy(&geneve->cfg, cfg, sizeof(*cfg));
1435
1436 if (geneve->cfg.inner_proto_inherit) {
1437 dev->header_ops = NULL;
1438 dev->type = ARPHRD_NONE;
1439 dev->hard_header_len = 0;
1440 dev->addr_len = 0;
1441 dev->flags = IFF_POINTOPOINT | IFF_NOARP;
1442 }
1443
1444 err = register_netdevice(dev);
1445 if (err)
1446 return err;
1447
1448 list_add(&geneve->next, &gn->geneve_list);
1449 return 0;
1450 }
1451
init_tnl_info(struct ip_tunnel_info * info,__u16 dst_port)1452 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1453 {
1454 memset(info, 0, sizeof(*info));
1455 info->key.tp_dst = htons(dst_port);
1456 }
1457
geneve_nl2info(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack,struct geneve_config * cfg,bool changelink)1458 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
1459 struct netlink_ext_ack *extack,
1460 struct geneve_config *cfg, bool changelink)
1461 {
1462 struct ip_tunnel_info *info = &cfg->info;
1463 int attrtype;
1464
1465 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
1466 NL_SET_ERR_MSG(extack,
1467 "Cannot specify both IPv4 and IPv6 Remote addresses");
1468 return -EINVAL;
1469 }
1470
1471 if (data[IFLA_GENEVE_REMOTE]) {
1472 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
1473 attrtype = IFLA_GENEVE_REMOTE;
1474 goto change_notsup;
1475 }
1476
1477 info->key.u.ipv4.dst =
1478 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
1479
1480 if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
1481 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
1482 "Remote IPv4 address cannot be Multicast");
1483 return -EINVAL;
1484 }
1485 }
1486
1487 if (data[IFLA_GENEVE_REMOTE6]) {
1488 #if IS_ENABLED(CONFIG_IPV6)
1489 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
1490 attrtype = IFLA_GENEVE_REMOTE6;
1491 goto change_notsup;
1492 }
1493
1494 info->mode = IP_TUNNEL_INFO_IPV6;
1495 info->key.u.ipv6.dst =
1496 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1497
1498 if (ipv6_addr_type(&info->key.u.ipv6.dst) &
1499 IPV6_ADDR_LINKLOCAL) {
1500 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1501 "Remote IPv6 address cannot be link-local");
1502 return -EINVAL;
1503 }
1504 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
1505 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1506 "Remote IPv6 address cannot be Multicast");
1507 return -EINVAL;
1508 }
1509 info->key.tun_flags |= TUNNEL_CSUM;
1510 cfg->use_udp6_rx_checksums = true;
1511 #else
1512 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1513 "IPv6 support not enabled in the kernel");
1514 return -EPFNOSUPPORT;
1515 #endif
1516 }
1517
1518 if (data[IFLA_GENEVE_ID]) {
1519 __u32 vni;
1520 __u8 tvni[3];
1521 __be64 tunid;
1522
1523 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1524 tvni[0] = (vni & 0x00ff0000) >> 16;
1525 tvni[1] = (vni & 0x0000ff00) >> 8;
1526 tvni[2] = vni & 0x000000ff;
1527
1528 tunid = vni_to_tunnel_id(tvni);
1529 if (changelink && (tunid != info->key.tun_id)) {
1530 attrtype = IFLA_GENEVE_ID;
1531 goto change_notsup;
1532 }
1533 info->key.tun_id = tunid;
1534 }
1535
1536 if (data[IFLA_GENEVE_TTL_INHERIT]) {
1537 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
1538 cfg->ttl_inherit = true;
1539 else
1540 cfg->ttl_inherit = false;
1541 } else if (data[IFLA_GENEVE_TTL]) {
1542 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
1543 cfg->ttl_inherit = false;
1544 }
1545
1546 if (data[IFLA_GENEVE_TOS])
1547 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
1548
1549 if (data[IFLA_GENEVE_DF])
1550 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]);
1551
1552 if (data[IFLA_GENEVE_LABEL]) {
1553 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
1554 IPV6_FLOWLABEL_MASK;
1555 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
1556 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
1557 "Label attribute only applies for IPv6 Geneve devices");
1558 return -EINVAL;
1559 }
1560 }
1561
1562 if (data[IFLA_GENEVE_PORT]) {
1563 if (changelink) {
1564 attrtype = IFLA_GENEVE_PORT;
1565 goto change_notsup;
1566 }
1567 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
1568 }
1569
1570 if (data[IFLA_GENEVE_COLLECT_METADATA]) {
1571 if (changelink) {
1572 attrtype = IFLA_GENEVE_COLLECT_METADATA;
1573 goto change_notsup;
1574 }
1575 cfg->collect_md = true;
1576 }
1577
1578 if (data[IFLA_GENEVE_UDP_CSUM]) {
1579 if (changelink) {
1580 attrtype = IFLA_GENEVE_UDP_CSUM;
1581 goto change_notsup;
1582 }
1583 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
1584 info->key.tun_flags |= TUNNEL_CSUM;
1585 }
1586
1587 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
1588 #if IS_ENABLED(CONFIG_IPV6)
1589 if (changelink) {
1590 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
1591 goto change_notsup;
1592 }
1593 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
1594 info->key.tun_flags &= ~TUNNEL_CSUM;
1595 #else
1596 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
1597 "IPv6 support not enabled in the kernel");
1598 return -EPFNOSUPPORT;
1599 #endif
1600 }
1601
1602 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
1603 #if IS_ENABLED(CONFIG_IPV6)
1604 if (changelink) {
1605 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
1606 goto change_notsup;
1607 }
1608 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
1609 cfg->use_udp6_rx_checksums = false;
1610 #else
1611 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
1612 "IPv6 support not enabled in the kernel");
1613 return -EPFNOSUPPORT;
1614 #endif
1615 }
1616
1617 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) {
1618 if (changelink) {
1619 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT;
1620 goto change_notsup;
1621 }
1622 cfg->inner_proto_inherit = true;
1623 }
1624
1625 return 0;
1626 change_notsup:
1627 NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
1628 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported");
1629 return -EOPNOTSUPP;
1630 }
1631
geneve_link_config(struct net_device * dev,struct ip_tunnel_info * info,struct nlattr * tb[])1632 static void geneve_link_config(struct net_device *dev,
1633 struct ip_tunnel_info *info, struct nlattr *tb[])
1634 {
1635 struct geneve_dev *geneve = netdev_priv(dev);
1636 int ldev_mtu = 0;
1637
1638 if (tb[IFLA_MTU]) {
1639 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1640 return;
1641 }
1642
1643 switch (ip_tunnel_info_af(info)) {
1644 case AF_INET: {
1645 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
1646 struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
1647
1648 if (!IS_ERR(rt) && rt->dst.dev) {
1649 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
1650 ip_rt_put(rt);
1651 }
1652 break;
1653 }
1654 #if IS_ENABLED(CONFIG_IPV6)
1655 case AF_INET6: {
1656 struct rt6_info *rt;
1657
1658 if (!__in6_dev_get(dev))
1659 break;
1660
1661 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
1662 NULL, 0);
1663
1664 if (rt && rt->dst.dev)
1665 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
1666 ip6_rt_put(rt);
1667 break;
1668 }
1669 #endif
1670 }
1671
1672 if (ldev_mtu <= 0)
1673 return;
1674
1675 geneve_change_mtu(dev, ldev_mtu - info->options_len);
1676 }
1677
geneve_newlink(struct net * net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1678 static int geneve_newlink(struct net *net, struct net_device *dev,
1679 struct nlattr *tb[], struct nlattr *data[],
1680 struct netlink_ext_ack *extack)
1681 {
1682 struct geneve_config cfg = {
1683 .df = GENEVE_DF_UNSET,
1684 .use_udp6_rx_checksums = false,
1685 .ttl_inherit = false,
1686 .collect_md = false,
1687 };
1688 int err;
1689
1690 init_tnl_info(&cfg.info, GENEVE_UDP_PORT);
1691 err = geneve_nl2info(tb, data, extack, &cfg, false);
1692 if (err)
1693 return err;
1694
1695 err = geneve_configure(net, dev, extack, &cfg);
1696 if (err)
1697 return err;
1698
1699 geneve_link_config(dev, &cfg.info, tb);
1700
1701 return 0;
1702 }
1703
1704 /* Quiesces the geneve device data path for both TX and RX.
1705 *
1706 * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1707 * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1708 * to complete for the existing set of in-flight packets to be transmitted,
1709 * then we would have quiesced the transmit data path. All the future packets
1710 * will get dropped until we unquiesce the data path.
1711 *
1712 * On receive geneve dereference the geneve_sock stashed in the socket. So,
1713 * if we set that to NULL under RCU and wait for synchronize_net() to
1714 * complete, then we would have quiesced the receive data path.
1715 */
geneve_quiesce(struct geneve_dev * geneve,struct geneve_sock ** gs4,struct geneve_sock ** gs6)1716 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
1717 struct geneve_sock **gs6)
1718 {
1719 *gs4 = rtnl_dereference(geneve->sock4);
1720 rcu_assign_pointer(geneve->sock4, NULL);
1721 if (*gs4)
1722 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
1723 #if IS_ENABLED(CONFIG_IPV6)
1724 *gs6 = rtnl_dereference(geneve->sock6);
1725 rcu_assign_pointer(geneve->sock6, NULL);
1726 if (*gs6)
1727 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
1728 #else
1729 *gs6 = NULL;
1730 #endif
1731 synchronize_net();
1732 }
1733
1734 /* Resumes the geneve device data path for both TX and RX. */
geneve_unquiesce(struct geneve_dev * geneve,struct geneve_sock * gs4,struct geneve_sock __maybe_unused * gs6)1735 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
1736 struct geneve_sock __maybe_unused *gs6)
1737 {
1738 rcu_assign_pointer(geneve->sock4, gs4);
1739 if (gs4)
1740 rcu_assign_sk_user_data(gs4->sock->sk, gs4);
1741 #if IS_ENABLED(CONFIG_IPV6)
1742 rcu_assign_pointer(geneve->sock6, gs6);
1743 if (gs6)
1744 rcu_assign_sk_user_data(gs6->sock->sk, gs6);
1745 #endif
1746 synchronize_net();
1747 }
1748
geneve_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1749 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
1750 struct nlattr *data[],
1751 struct netlink_ext_ack *extack)
1752 {
1753 struct geneve_dev *geneve = netdev_priv(dev);
1754 struct geneve_sock *gs4, *gs6;
1755 struct geneve_config cfg;
1756 int err;
1757
1758 /* If the geneve device is configured for metadata (or externally
1759 * controlled, for example, OVS), then nothing can be changed.
1760 */
1761 if (geneve->cfg.collect_md)
1762 return -EOPNOTSUPP;
1763
1764 /* Start with the existing info. */
1765 memcpy(&cfg, &geneve->cfg, sizeof(cfg));
1766 err = geneve_nl2info(tb, data, extack, &cfg, true);
1767 if (err)
1768 return err;
1769
1770 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) {
1771 dst_cache_reset(&cfg.info.dst_cache);
1772 geneve_link_config(dev, &cfg.info, tb);
1773 }
1774
1775 geneve_quiesce(geneve, &gs4, &gs6);
1776 memcpy(&geneve->cfg, &cfg, sizeof(cfg));
1777 geneve_unquiesce(geneve, gs4, gs6);
1778
1779 return 0;
1780 }
1781
geneve_dellink(struct net_device * dev,struct list_head * head)1782 static void geneve_dellink(struct net_device *dev, struct list_head *head)
1783 {
1784 struct geneve_dev *geneve = netdev_priv(dev);
1785
1786 list_del(&geneve->next);
1787 unregister_netdevice_queue(dev, head);
1788 }
1789
geneve_get_size(const struct net_device * dev)1790 static size_t geneve_get_size(const struct net_device *dev)
1791 {
1792 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
1793 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
1794 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
1795 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
1796 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
1797 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
1798 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
1799 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
1800 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
1801 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1802 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1803 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
1804 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
1805 0;
1806 }
1807
geneve_fill_info(struct sk_buff * skb,const struct net_device * dev)1808 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
1809 {
1810 struct geneve_dev *geneve = netdev_priv(dev);
1811 struct ip_tunnel_info *info = &geneve->cfg.info;
1812 bool ttl_inherit = geneve->cfg.ttl_inherit;
1813 bool metadata = geneve->cfg.collect_md;
1814 __u8 tmp_vni[3];
1815 __u32 vni;
1816
1817 tunnel_id_to_vni(info->key.tun_id, tmp_vni);
1818 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
1819 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
1820 goto nla_put_failure;
1821
1822 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
1823 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
1824 info->key.u.ipv4.dst))
1825 goto nla_put_failure;
1826 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
1827 !!(info->key.tun_flags & TUNNEL_CSUM)))
1828 goto nla_put_failure;
1829
1830 #if IS_ENABLED(CONFIG_IPV6)
1831 } else if (!metadata) {
1832 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
1833 &info->key.u.ipv6.dst))
1834 goto nla_put_failure;
1835 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
1836 !(info->key.tun_flags & TUNNEL_CSUM)))
1837 goto nla_put_failure;
1838 #endif
1839 }
1840
1841 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
1842 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
1843 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
1844 goto nla_put_failure;
1845
1846 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df))
1847 goto nla_put_failure;
1848
1849 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
1850 goto nla_put_failure;
1851
1852 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
1853 goto nla_put_failure;
1854
1855 #if IS_ENABLED(CONFIG_IPV6)
1856 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
1857 !geneve->cfg.use_udp6_rx_checksums))
1858 goto nla_put_failure;
1859 #endif
1860
1861 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
1862 goto nla_put_failure;
1863
1864 if (geneve->cfg.inner_proto_inherit &&
1865 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT))
1866 goto nla_put_failure;
1867
1868 return 0;
1869
1870 nla_put_failure:
1871 return -EMSGSIZE;
1872 }
1873
1874 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1875 .kind = "geneve",
1876 .maxtype = IFLA_GENEVE_MAX,
1877 .policy = geneve_policy,
1878 .priv_size = sizeof(struct geneve_dev),
1879 .setup = geneve_setup,
1880 .validate = geneve_validate,
1881 .newlink = geneve_newlink,
1882 .changelink = geneve_changelink,
1883 .dellink = geneve_dellink,
1884 .get_size = geneve_get_size,
1885 .fill_info = geneve_fill_info,
1886 };
1887
geneve_dev_create_fb(struct net * net,const char * name,u8 name_assign_type,u16 dst_port)1888 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
1889 u8 name_assign_type, u16 dst_port)
1890 {
1891 struct nlattr *tb[IFLA_MAX + 1];
1892 struct net_device *dev;
1893 LIST_HEAD(list_kill);
1894 int err;
1895 struct geneve_config cfg = {
1896 .df = GENEVE_DF_UNSET,
1897 .use_udp6_rx_checksums = true,
1898 .ttl_inherit = false,
1899 .collect_md = true,
1900 };
1901
1902 memset(tb, 0, sizeof(tb));
1903 dev = rtnl_create_link(net, name, name_assign_type,
1904 &geneve_link_ops, tb, NULL);
1905 if (IS_ERR(dev))
1906 return dev;
1907
1908 init_tnl_info(&cfg.info, dst_port);
1909 err = geneve_configure(net, dev, NULL, &cfg);
1910 if (err) {
1911 free_netdev(dev);
1912 return ERR_PTR(err);
1913 }
1914
1915 /* openvswitch users expect packet sizes to be unrestricted,
1916 * so set the largest MTU we can.
1917 */
1918 err = geneve_change_mtu(dev, IP_MAX_MTU);
1919 if (err)
1920 goto err;
1921
1922 err = rtnl_configure_link(dev, NULL, 0, NULL);
1923 if (err < 0)
1924 goto err;
1925
1926 return dev;
1927 err:
1928 geneve_dellink(dev, &list_kill);
1929 unregister_netdevice_many(&list_kill);
1930 return ERR_PTR(err);
1931 }
1932 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
1933
geneve_netdevice_event(struct notifier_block * unused,unsigned long event,void * ptr)1934 static int geneve_netdevice_event(struct notifier_block *unused,
1935 unsigned long event, void *ptr)
1936 {
1937 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1938
1939 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
1940 geneve_offload_rx_ports(dev, true);
1941 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
1942 geneve_offload_rx_ports(dev, false);
1943
1944 return NOTIFY_DONE;
1945 }
1946
1947 static struct notifier_block geneve_notifier_block __read_mostly = {
1948 .notifier_call = geneve_netdevice_event,
1949 };
1950
geneve_init_net(struct net * net)1951 static __net_init int geneve_init_net(struct net *net)
1952 {
1953 struct geneve_net *gn = net_generic(net, geneve_net_id);
1954
1955 INIT_LIST_HEAD(&gn->geneve_list);
1956 INIT_LIST_HEAD(&gn->sock_list);
1957 return 0;
1958 }
1959
geneve_destroy_tunnels(struct net * net,struct list_head * head)1960 static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
1961 {
1962 struct geneve_net *gn = net_generic(net, geneve_net_id);
1963 struct geneve_dev *geneve, *next;
1964 struct net_device *dev, *aux;
1965
1966 /* gather any geneve devices that were moved into this ns */
1967 for_each_netdev_safe(net, dev, aux)
1968 if (dev->rtnl_link_ops == &geneve_link_ops)
1969 unregister_netdevice_queue(dev, head);
1970
1971 /* now gather any other geneve devices that were created in this ns */
1972 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1973 /* If geneve->dev is in the same netns, it was already added
1974 * to the list by the previous loop.
1975 */
1976 if (!net_eq(dev_net(geneve->dev), net))
1977 unregister_netdevice_queue(geneve->dev, head);
1978 }
1979 }
1980
geneve_exit_batch_net(struct list_head * net_list)1981 static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
1982 {
1983 struct net *net;
1984 LIST_HEAD(list);
1985
1986 rtnl_lock();
1987 list_for_each_entry(net, net_list, exit_list)
1988 geneve_destroy_tunnels(net, &list);
1989
1990 /* unregister the devices gathered above */
1991 unregister_netdevice_many(&list);
1992 rtnl_unlock();
1993
1994 list_for_each_entry(net, net_list, exit_list) {
1995 const struct geneve_net *gn = net_generic(net, geneve_net_id);
1996
1997 WARN_ON_ONCE(!list_empty(&gn->sock_list));
1998 }
1999 }
2000
2001 static struct pernet_operations geneve_net_ops = {
2002 .init = geneve_init_net,
2003 .exit_batch = geneve_exit_batch_net,
2004 .id = &geneve_net_id,
2005 .size = sizeof(struct geneve_net),
2006 };
2007
geneve_init_module(void)2008 static int __init geneve_init_module(void)
2009 {
2010 int rc;
2011
2012 rc = register_pernet_subsys(&geneve_net_ops);
2013 if (rc)
2014 goto out1;
2015
2016 rc = register_netdevice_notifier(&geneve_notifier_block);
2017 if (rc)
2018 goto out2;
2019
2020 rc = rtnl_link_register(&geneve_link_ops);
2021 if (rc)
2022 goto out3;
2023
2024 return 0;
2025 out3:
2026 unregister_netdevice_notifier(&geneve_notifier_block);
2027 out2:
2028 unregister_pernet_subsys(&geneve_net_ops);
2029 out1:
2030 return rc;
2031 }
2032 late_initcall(geneve_init_module);
2033
geneve_cleanup_module(void)2034 static void __exit geneve_cleanup_module(void)
2035 {
2036 rtnl_link_unregister(&geneve_link_ops);
2037 unregister_netdevice_notifier(&geneve_notifier_block);
2038 unregister_pernet_subsys(&geneve_net_ops);
2039 }
2040 module_exit(geneve_cleanup_module);
2041
2042 MODULE_LICENSE("GPL");
2043 MODULE_VERSION(GENEVE_NETDEV_VER);
2044 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
2045 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
2046 MODULE_ALIAS_RTNL_LINK("geneve");
2047