1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/ipv6_stubs.h> 16 #include <net/dst_metadata.h> 17 #include <net/gro_cells.h> 18 #include <net/rtnetlink.h> 19 #include <net/geneve.h> 20 #include <net/gro.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 /* per-network namespace private data for this module */ 41 struct geneve_net { 42 struct list_head geneve_list; 43 struct list_head sock_list; 44 }; 45 46 static unsigned int geneve_net_id; 47 48 struct geneve_dev_node { 49 struct hlist_node hlist; 50 struct geneve_dev *geneve; 51 }; 52 53 struct geneve_config { 54 struct ip_tunnel_info info; 55 bool collect_md; 56 bool use_udp6_rx_checksums; 57 bool ttl_inherit; 58 enum ifla_geneve_df df; 59 bool inner_proto_inherit; 60 }; 61 62 /* Pseudo network device */ 63 struct geneve_dev { 64 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 65 #if IS_ENABLED(CONFIG_IPV6) 66 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 67 #endif 68 struct net *net; /* netns for packet i/o */ 69 struct net_device *dev; /* netdev for geneve tunnel */ 70 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 71 #if IS_ENABLED(CONFIG_IPV6) 72 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 73 #endif 74 struct list_head next; /* geneve's per namespace list */ 75 struct gro_cells gro_cells; 76 struct geneve_config cfg; 77 }; 78 79 struct geneve_sock { 80 bool collect_md; 81 struct list_head list; 82 struct socket *sock; 83 struct rcu_head rcu; 84 int refcnt; 85 struct hlist_head vni_list[VNI_HASH_SIZE]; 86 }; 87 88 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 89 { 90 __u32 vnid; 91 92 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 93 return hash_32(vnid, VNI_HASH_BITS); 94 } 95 96 static __be64 vni_to_tunnel_id(const __u8 *vni) 97 { 98 #ifdef __BIG_ENDIAN 99 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 100 #else 101 return (__force __be64)(((__force u64)vni[0] << 40) | 102 ((__force u64)vni[1] << 48) | 103 ((__force u64)vni[2] << 56)); 104 #endif 105 } 106 107 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 108 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 109 { 110 #ifdef __BIG_ENDIAN 111 vni[0] = (__force __u8)(tun_id >> 16); 112 vni[1] = (__force __u8)(tun_id >> 8); 113 vni[2] = (__force __u8)tun_id; 114 #else 115 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 116 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 117 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 118 #endif 119 } 120 121 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 122 { 123 return !memcmp(vni, &tun_id[5], 3); 124 } 125 126 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 127 { 128 return gs->sock->sk->sk_family; 129 } 130 131 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 132 __be32 addr, u8 vni[]) 133 { 134 struct hlist_head *vni_list_head; 135 struct geneve_dev_node *node; 136 __u32 hash; 137 138 /* Find the device for this VNI */ 139 hash = geneve_net_vni_hash(vni); 140 vni_list_head = &gs->vni_list[hash]; 141 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 142 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 143 addr == node->geneve->cfg.info.key.u.ipv4.dst) 144 return node->geneve; 145 } 146 return NULL; 147 } 148 149 #if IS_ENABLED(CONFIG_IPV6) 150 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 151 struct in6_addr addr6, u8 vni[]) 152 { 153 struct hlist_head *vni_list_head; 154 struct geneve_dev_node *node; 155 __u32 hash; 156 157 /* Find the device for this VNI */ 158 hash = geneve_net_vni_hash(vni); 159 vni_list_head = &gs->vni_list[hash]; 160 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 161 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 162 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 163 return node->geneve; 164 } 165 return NULL; 166 } 167 #endif 168 169 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 170 { 171 return (struct genevehdr *)(udp_hdr(skb) + 1); 172 } 173 174 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 175 struct sk_buff *skb) 176 { 177 static u8 zero_vni[3]; 178 u8 *vni; 179 180 if (geneve_get_sk_family(gs) == AF_INET) { 181 struct iphdr *iph; 182 __be32 addr; 183 184 iph = ip_hdr(skb); /* outer IP header... */ 185 186 if (gs->collect_md) { 187 vni = zero_vni; 188 addr = 0; 189 } else { 190 vni = geneve_hdr(skb)->vni; 191 addr = iph->saddr; 192 } 193 194 return geneve_lookup(gs, addr, vni); 195 #if IS_ENABLED(CONFIG_IPV6) 196 } else if (geneve_get_sk_family(gs) == AF_INET6) { 197 static struct in6_addr zero_addr6; 198 struct ipv6hdr *ip6h; 199 struct in6_addr addr6; 200 201 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 202 203 if (gs->collect_md) { 204 vni = zero_vni; 205 addr6 = zero_addr6; 206 } else { 207 vni = geneve_hdr(skb)->vni; 208 addr6 = ip6h->saddr; 209 } 210 211 return geneve6_lookup(gs, addr6, vni); 212 #endif 213 } 214 return NULL; 215 } 216 217 /* geneve receive/decap routine */ 218 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 219 struct sk_buff *skb) 220 { 221 struct genevehdr *gnvh = geneve_hdr(skb); 222 struct metadata_dst *tun_dst = NULL; 223 unsigned int len; 224 int nh, err = 0; 225 void *oiph; 226 227 if (ip_tunnel_collect_metadata() || gs->collect_md) { 228 __be16 flags; 229 230 flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | 231 (gnvh->critical ? TUNNEL_CRIT_OPT : 0); 232 233 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 234 vni_to_tunnel_id(gnvh->vni), 235 gnvh->opt_len * 4); 236 if (!tun_dst) { 237 geneve->dev->stats.rx_dropped++; 238 goto drop; 239 } 240 /* Update tunnel dst according to Geneve options. */ 241 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 242 gnvh->options, gnvh->opt_len * 4, 243 TUNNEL_GENEVE_OPT); 244 } else { 245 /* Drop packets w/ critical options, 246 * since we don't support any... 247 */ 248 if (gnvh->critical) { 249 geneve->dev->stats.rx_frame_errors++; 250 geneve->dev->stats.rx_errors++; 251 goto drop; 252 } 253 } 254 255 if (tun_dst) 256 skb_dst_set(skb, &tun_dst->dst); 257 258 if (gnvh->proto_type == htons(ETH_P_TEB)) { 259 skb_reset_mac_header(skb); 260 skb->protocol = eth_type_trans(skb, geneve->dev); 261 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 262 263 /* Ignore packet loops (and multicast echo) */ 264 if (ether_addr_equal(eth_hdr(skb)->h_source, 265 geneve->dev->dev_addr)) { 266 geneve->dev->stats.rx_errors++; 267 goto drop; 268 } 269 } else { 270 skb_reset_mac_header(skb); 271 skb->dev = geneve->dev; 272 skb->pkt_type = PACKET_HOST; 273 } 274 275 /* Save offset of outer header relative to skb->head, 276 * because we are going to reset the network header to the inner header 277 * and might change skb->head. 278 */ 279 nh = skb_network_header(skb) - skb->head; 280 281 skb_reset_network_header(skb); 282 283 if (!pskb_inet_may_pull(skb)) { 284 DEV_STATS_INC(geneve->dev, rx_length_errors); 285 DEV_STATS_INC(geneve->dev, rx_errors); 286 goto drop; 287 } 288 289 /* Get the outer header. */ 290 oiph = skb->head + nh; 291 292 if (geneve_get_sk_family(gs) == AF_INET) 293 err = IP_ECN_decapsulate(oiph, skb); 294 #if IS_ENABLED(CONFIG_IPV6) 295 else 296 err = IP6_ECN_decapsulate(oiph, skb); 297 #endif 298 299 if (unlikely(err)) { 300 if (log_ecn_error) { 301 if (geneve_get_sk_family(gs) == AF_INET) 302 net_info_ratelimited("non-ECT from %pI4 " 303 "with TOS=%#x\n", 304 &((struct iphdr *)oiph)->saddr, 305 ((struct iphdr *)oiph)->tos); 306 #if IS_ENABLED(CONFIG_IPV6) 307 else 308 net_info_ratelimited("non-ECT from %pI6\n", 309 &((struct ipv6hdr *)oiph)->saddr); 310 #endif 311 } 312 if (err > 1) { 313 ++geneve->dev->stats.rx_frame_errors; 314 ++geneve->dev->stats.rx_errors; 315 goto drop; 316 } 317 } 318 319 len = skb->len; 320 err = gro_cells_receive(&geneve->gro_cells, skb); 321 if (likely(err == NET_RX_SUCCESS)) 322 dev_sw_netstats_rx_add(geneve->dev, len); 323 324 return; 325 drop: 326 /* Consume bad packet */ 327 kfree_skb(skb); 328 } 329 330 /* Setup stats when device is created */ 331 static int geneve_init(struct net_device *dev) 332 { 333 struct geneve_dev *geneve = netdev_priv(dev); 334 int err; 335 336 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 337 if (!dev->tstats) 338 return -ENOMEM; 339 340 err = gro_cells_init(&geneve->gro_cells, dev); 341 if (err) { 342 free_percpu(dev->tstats); 343 return err; 344 } 345 346 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 347 if (err) { 348 free_percpu(dev->tstats); 349 gro_cells_destroy(&geneve->gro_cells); 350 return err; 351 } 352 netdev_lockdep_set_classes(dev); 353 return 0; 354 } 355 356 static void geneve_uninit(struct net_device *dev) 357 { 358 struct geneve_dev *geneve = netdev_priv(dev); 359 360 dst_cache_destroy(&geneve->cfg.info.dst_cache); 361 gro_cells_destroy(&geneve->gro_cells); 362 free_percpu(dev->tstats); 363 } 364 365 /* Callback from net/ipv4/udp.c to receive packets */ 366 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 367 { 368 struct genevehdr *geneveh; 369 struct geneve_dev *geneve; 370 struct geneve_sock *gs; 371 __be16 inner_proto; 372 int opts_len; 373 374 /* Need UDP and Geneve header to be present */ 375 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 376 goto drop; 377 378 /* Return packets with reserved bits set */ 379 geneveh = geneve_hdr(skb); 380 if (unlikely(geneveh->ver != GENEVE_VER)) 381 goto drop; 382 383 gs = rcu_dereference_sk_user_data(sk); 384 if (!gs) 385 goto drop; 386 387 geneve = geneve_lookup_skb(gs, skb); 388 if (!geneve) 389 goto drop; 390 391 inner_proto = geneveh->proto_type; 392 393 if (unlikely((!geneve->cfg.inner_proto_inherit && 394 inner_proto != htons(ETH_P_TEB)))) { 395 geneve->dev->stats.rx_dropped++; 396 goto drop; 397 } 398 399 opts_len = geneveh->opt_len * 4; 400 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 401 !net_eq(geneve->net, dev_net(geneve->dev)))) { 402 geneve->dev->stats.rx_dropped++; 403 goto drop; 404 } 405 406 geneve_rx(geneve, gs, skb); 407 return 0; 408 409 drop: 410 /* Consume bad packet */ 411 kfree_skb(skb); 412 return 0; 413 } 414 415 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 416 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 417 { 418 struct genevehdr *geneveh; 419 struct geneve_sock *gs; 420 u8 zero_vni[3] = { 0 }; 421 u8 *vni = zero_vni; 422 423 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 424 return -EINVAL; 425 426 geneveh = geneve_hdr(skb); 427 if (geneveh->ver != GENEVE_VER) 428 return -EINVAL; 429 430 if (geneveh->proto_type != htons(ETH_P_TEB)) 431 return -EINVAL; 432 433 gs = rcu_dereference_sk_user_data(sk); 434 if (!gs) 435 return -ENOENT; 436 437 if (geneve_get_sk_family(gs) == AF_INET) { 438 struct iphdr *iph = ip_hdr(skb); 439 __be32 addr4 = 0; 440 441 if (!gs->collect_md) { 442 vni = geneve_hdr(skb)->vni; 443 addr4 = iph->daddr; 444 } 445 446 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 447 } 448 449 #if IS_ENABLED(CONFIG_IPV6) 450 if (geneve_get_sk_family(gs) == AF_INET6) { 451 struct ipv6hdr *ip6h = ipv6_hdr(skb); 452 struct in6_addr addr6; 453 454 memset(&addr6, 0, sizeof(struct in6_addr)); 455 456 if (!gs->collect_md) { 457 vni = geneve_hdr(skb)->vni; 458 addr6 = ip6h->daddr; 459 } 460 461 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 462 } 463 #endif 464 465 return -EPFNOSUPPORT; 466 } 467 468 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 469 __be16 port, bool ipv6_rx_csum) 470 { 471 struct socket *sock; 472 struct udp_port_cfg udp_conf; 473 int err; 474 475 memset(&udp_conf, 0, sizeof(udp_conf)); 476 477 if (ipv6) { 478 udp_conf.family = AF_INET6; 479 udp_conf.ipv6_v6only = 1; 480 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; 481 } else { 482 udp_conf.family = AF_INET; 483 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 484 } 485 486 udp_conf.local_udp_port = port; 487 488 /* Open UDP socket */ 489 err = udp_sock_create(net, &udp_conf, &sock); 490 if (err < 0) 491 return ERR_PTR(err); 492 493 udp_allow_gso(sock->sk); 494 return sock; 495 } 496 497 static int geneve_hlen(struct genevehdr *gh) 498 { 499 return sizeof(*gh) + gh->opt_len * 4; 500 } 501 502 static struct sk_buff *geneve_gro_receive(struct sock *sk, 503 struct list_head *head, 504 struct sk_buff *skb) 505 { 506 struct sk_buff *pp = NULL; 507 struct sk_buff *p; 508 struct genevehdr *gh, *gh2; 509 unsigned int hlen, gh_len, off_gnv; 510 const struct packet_offload *ptype; 511 __be16 type; 512 int flush = 1; 513 514 off_gnv = skb_gro_offset(skb); 515 hlen = off_gnv + sizeof(*gh); 516 gh = skb_gro_header(skb, hlen, off_gnv); 517 if (unlikely(!gh)) 518 goto out; 519 520 if (gh->ver != GENEVE_VER || gh->oam) 521 goto out; 522 gh_len = geneve_hlen(gh); 523 524 hlen = off_gnv + gh_len; 525 if (skb_gro_header_hard(skb, hlen)) { 526 gh = skb_gro_header_slow(skb, hlen, off_gnv); 527 if (unlikely(!gh)) 528 goto out; 529 } 530 531 list_for_each_entry(p, head, list) { 532 if (!NAPI_GRO_CB(p)->same_flow) 533 continue; 534 535 gh2 = (struct genevehdr *)(p->data + off_gnv); 536 if (gh->opt_len != gh2->opt_len || 537 memcmp(gh, gh2, gh_len)) { 538 NAPI_GRO_CB(p)->same_flow = 0; 539 continue; 540 } 541 } 542 543 skb_gro_pull(skb, gh_len); 544 skb_gro_postpull_rcsum(skb, gh, gh_len); 545 type = gh->proto_type; 546 if (likely(type == htons(ETH_P_TEB))) 547 return call_gro_receive(eth_gro_receive, head, skb); 548 549 ptype = gro_find_receive_by_type(type); 550 if (!ptype) 551 goto out; 552 553 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 554 flush = 0; 555 556 out: 557 skb_gro_flush_final(skb, pp, flush); 558 559 return pp; 560 } 561 562 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 563 int nhoff) 564 { 565 struct genevehdr *gh; 566 struct packet_offload *ptype; 567 __be16 type; 568 int gh_len; 569 int err = -ENOSYS; 570 571 gh = (struct genevehdr *)(skb->data + nhoff); 572 gh_len = geneve_hlen(gh); 573 type = gh->proto_type; 574 575 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 576 if (likely(type == htons(ETH_P_TEB))) 577 return eth_gro_complete(skb, nhoff + gh_len); 578 579 ptype = gro_find_complete_by_type(type); 580 if (ptype) 581 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 582 583 skb_set_inner_mac_header(skb, nhoff + gh_len); 584 585 return err; 586 } 587 588 /* Create new listen socket if needed */ 589 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 590 bool ipv6, bool ipv6_rx_csum) 591 { 592 struct geneve_net *gn = net_generic(net, geneve_net_id); 593 struct geneve_sock *gs; 594 struct socket *sock; 595 struct udp_tunnel_sock_cfg tunnel_cfg; 596 int h; 597 598 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 599 if (!gs) 600 return ERR_PTR(-ENOMEM); 601 602 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); 603 if (IS_ERR(sock)) { 604 kfree(gs); 605 return ERR_CAST(sock); 606 } 607 608 gs->sock = sock; 609 gs->refcnt = 1; 610 for (h = 0; h < VNI_HASH_SIZE; ++h) 611 INIT_HLIST_HEAD(&gs->vni_list[h]); 612 613 /* Initialize the geneve udp offloads structure */ 614 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); 615 616 /* Mark socket as an encapsulation socket */ 617 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 618 tunnel_cfg.sk_user_data = gs; 619 tunnel_cfg.encap_type = 1; 620 tunnel_cfg.gro_receive = geneve_gro_receive; 621 tunnel_cfg.gro_complete = geneve_gro_complete; 622 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 623 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 624 tunnel_cfg.encap_destroy = NULL; 625 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 626 list_add(&gs->list, &gn->sock_list); 627 return gs; 628 } 629 630 static void __geneve_sock_release(struct geneve_sock *gs) 631 { 632 if (!gs || --gs->refcnt) 633 return; 634 635 list_del(&gs->list); 636 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); 637 udp_tunnel_sock_release(gs->sock); 638 kfree_rcu(gs, rcu); 639 } 640 641 static void geneve_sock_release(struct geneve_dev *geneve) 642 { 643 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 644 #if IS_ENABLED(CONFIG_IPV6) 645 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 646 647 rcu_assign_pointer(geneve->sock6, NULL); 648 #endif 649 650 rcu_assign_pointer(geneve->sock4, NULL); 651 synchronize_net(); 652 653 __geneve_sock_release(gs4); 654 #if IS_ENABLED(CONFIG_IPV6) 655 __geneve_sock_release(gs6); 656 #endif 657 } 658 659 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 660 sa_family_t family, 661 __be16 dst_port) 662 { 663 struct geneve_sock *gs; 664 665 list_for_each_entry(gs, &gn->sock_list, list) { 666 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 667 geneve_get_sk_family(gs) == family) { 668 return gs; 669 } 670 } 671 return NULL; 672 } 673 674 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 675 { 676 struct net *net = geneve->net; 677 struct geneve_net *gn = net_generic(net, geneve_net_id); 678 struct geneve_dev_node *node; 679 struct geneve_sock *gs; 680 __u8 vni[3]; 681 __u32 hash; 682 683 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst); 684 if (gs) { 685 gs->refcnt++; 686 goto out; 687 } 688 689 gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, 690 geneve->cfg.use_udp6_rx_checksums); 691 if (IS_ERR(gs)) 692 return PTR_ERR(gs); 693 694 out: 695 gs->collect_md = geneve->cfg.collect_md; 696 #if IS_ENABLED(CONFIG_IPV6) 697 if (ipv6) { 698 rcu_assign_pointer(geneve->sock6, gs); 699 node = &geneve->hlist6; 700 } else 701 #endif 702 { 703 rcu_assign_pointer(geneve->sock4, gs); 704 node = &geneve->hlist4; 705 } 706 node->geneve = geneve; 707 708 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 709 hash = geneve_net_vni_hash(vni); 710 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 711 return 0; 712 } 713 714 static int geneve_open(struct net_device *dev) 715 { 716 struct geneve_dev *geneve = netdev_priv(dev); 717 bool metadata = geneve->cfg.collect_md; 718 bool ipv4, ipv6; 719 int ret = 0; 720 721 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; 722 ipv4 = !ipv6 || metadata; 723 #if IS_ENABLED(CONFIG_IPV6) 724 if (ipv6) { 725 ret = geneve_sock_add(geneve, true); 726 if (ret < 0 && ret != -EAFNOSUPPORT) 727 ipv4 = false; 728 } 729 #endif 730 if (ipv4) 731 ret = geneve_sock_add(geneve, false); 732 if (ret < 0) 733 geneve_sock_release(geneve); 734 735 return ret; 736 } 737 738 static int geneve_stop(struct net_device *dev) 739 { 740 struct geneve_dev *geneve = netdev_priv(dev); 741 742 hlist_del_init_rcu(&geneve->hlist4.hlist); 743 #if IS_ENABLED(CONFIG_IPV6) 744 hlist_del_init_rcu(&geneve->hlist6.hlist); 745 #endif 746 geneve_sock_release(geneve); 747 return 0; 748 } 749 750 static void geneve_build_header(struct genevehdr *geneveh, 751 const struct ip_tunnel_info *info, 752 __be16 inner_proto) 753 { 754 geneveh->ver = GENEVE_VER; 755 geneveh->opt_len = info->options_len / 4; 756 geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM); 757 geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT); 758 geneveh->rsvd1 = 0; 759 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 760 geneveh->proto_type = inner_proto; 761 geneveh->rsvd2 = 0; 762 763 if (info->key.tun_flags & TUNNEL_GENEVE_OPT) 764 ip_tunnel_info_opts_get(geneveh->options, info); 765 } 766 767 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 768 const struct ip_tunnel_info *info, 769 bool xnet, int ip_hdr_len, 770 bool inner_proto_inherit) 771 { 772 bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); 773 struct genevehdr *gnvh; 774 __be16 inner_proto; 775 int min_headroom; 776 int err; 777 778 skb_reset_mac_header(skb); 779 skb_scrub_packet(skb, xnet); 780 781 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 782 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len; 783 err = skb_cow_head(skb, min_headroom); 784 if (unlikely(err)) 785 goto free_dst; 786 787 err = udp_tunnel_handle_offloads(skb, udp_sum); 788 if (err) 789 goto free_dst; 790 791 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len); 792 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 793 geneve_build_header(gnvh, info, inner_proto); 794 skb_set_inner_protocol(skb, inner_proto); 795 return 0; 796 797 free_dst: 798 dst_release(dst); 799 return err; 800 } 801 802 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, 803 struct net_device *dev, 804 struct geneve_sock *gs4, 805 struct flowi4 *fl4, 806 const struct ip_tunnel_info *info, 807 __be16 dport, __be16 sport, 808 __u8 *full_tos) 809 { 810 bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 811 struct geneve_dev *geneve = netdev_priv(dev); 812 struct dst_cache *dst_cache; 813 struct rtable *rt = NULL; 814 __u8 tos; 815 816 if (!gs4) 817 return ERR_PTR(-EIO); 818 819 memset(fl4, 0, sizeof(*fl4)); 820 fl4->flowi4_mark = skb->mark; 821 fl4->flowi4_proto = IPPROTO_UDP; 822 fl4->daddr = info->key.u.ipv4.dst; 823 fl4->saddr = info->key.u.ipv4.src; 824 fl4->fl4_dport = dport; 825 fl4->fl4_sport = sport; 826 fl4->flowi4_flags = info->key.flow_flags; 827 828 tos = info->key.tos; 829 if ((tos == 1) && !geneve->cfg.collect_md) { 830 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 831 use_cache = false; 832 } 833 fl4->flowi4_tos = RT_TOS(tos); 834 if (full_tos) 835 *full_tos = tos; 836 837 dst_cache = (struct dst_cache *)&info->dst_cache; 838 if (use_cache) { 839 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); 840 if (rt) 841 return rt; 842 } 843 rt = ip_route_output_key(geneve->net, fl4); 844 if (IS_ERR(rt)) { 845 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); 846 return ERR_PTR(-ENETUNREACH); 847 } 848 if (rt->dst.dev == dev) { /* is this necessary? */ 849 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); 850 ip_rt_put(rt); 851 return ERR_PTR(-ELOOP); 852 } 853 if (use_cache) 854 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); 855 return rt; 856 } 857 858 #if IS_ENABLED(CONFIG_IPV6) 859 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, 860 struct net_device *dev, 861 struct geneve_sock *gs6, 862 struct flowi6 *fl6, 863 const struct ip_tunnel_info *info, 864 __be16 dport, __be16 sport) 865 { 866 bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 867 struct geneve_dev *geneve = netdev_priv(dev); 868 struct dst_entry *dst = NULL; 869 struct dst_cache *dst_cache; 870 __u8 prio; 871 872 if (!gs6) 873 return ERR_PTR(-EIO); 874 875 memset(fl6, 0, sizeof(*fl6)); 876 fl6->flowi6_mark = skb->mark; 877 fl6->flowi6_proto = IPPROTO_UDP; 878 fl6->daddr = info->key.u.ipv6.dst; 879 fl6->saddr = info->key.u.ipv6.src; 880 fl6->fl6_dport = dport; 881 fl6->fl6_sport = sport; 882 883 prio = info->key.tos; 884 if ((prio == 1) && !geneve->cfg.collect_md) { 885 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 886 use_cache = false; 887 } 888 889 fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label); 890 dst_cache = (struct dst_cache *)&info->dst_cache; 891 if (use_cache) { 892 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); 893 if (dst) 894 return dst; 895 } 896 dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6, 897 NULL); 898 if (IS_ERR(dst)) { 899 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); 900 return ERR_PTR(-ENETUNREACH); 901 } 902 if (dst->dev == dev) { /* is this necessary? */ 903 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr); 904 dst_release(dst); 905 return ERR_PTR(-ELOOP); 906 } 907 908 if (use_cache) 909 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr); 910 return dst; 911 } 912 #endif 913 914 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 915 struct geneve_dev *geneve, 916 const struct ip_tunnel_info *info) 917 { 918 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 919 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 920 const struct ip_tunnel_key *key = &info->key; 921 struct rtable *rt; 922 struct flowi4 fl4; 923 __u8 full_tos; 924 __u8 tos, ttl; 925 __be16 df = 0; 926 __be16 sport; 927 int err; 928 929 if (!skb_vlan_inet_prepare(skb)) 930 return -EINVAL; 931 932 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 933 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, 934 geneve->cfg.info.key.tp_dst, sport, &full_tos); 935 if (IS_ERR(rt)) 936 return PTR_ERR(rt); 937 938 err = skb_tunnel_check_pmtu(skb, &rt->dst, 939 GENEVE_IPV4_HLEN + info->options_len, 940 netif_is_any_bridge_port(dev)); 941 if (err < 0) { 942 dst_release(&rt->dst); 943 return err; 944 } else if (err) { 945 struct ip_tunnel_info *info; 946 947 info = skb_tunnel_info(skb); 948 if (info) { 949 struct ip_tunnel_info *unclone; 950 951 unclone = skb_tunnel_info_unclone(skb); 952 if (unlikely(!unclone)) { 953 dst_release(&rt->dst); 954 return -ENOMEM; 955 } 956 957 unclone->key.u.ipv4.dst = fl4.saddr; 958 unclone->key.u.ipv4.src = fl4.daddr; 959 } 960 961 if (!pskb_may_pull(skb, ETH_HLEN)) { 962 dst_release(&rt->dst); 963 return -EINVAL; 964 } 965 966 skb->protocol = eth_type_trans(skb, geneve->dev); 967 __netif_rx(skb); 968 dst_release(&rt->dst); 969 return -EMSGSIZE; 970 } 971 972 if (geneve->cfg.collect_md) { 973 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 974 ttl = key->ttl; 975 976 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 977 } else { 978 tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb); 979 if (geneve->cfg.ttl_inherit) 980 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 981 else 982 ttl = key->ttl; 983 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 984 985 if (geneve->cfg.df == GENEVE_DF_SET) { 986 df = htons(IP_DF); 987 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 988 struct ethhdr *eth = eth_hdr(skb); 989 990 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 991 df = htons(IP_DF); 992 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 993 struct iphdr *iph = ip_hdr(skb); 994 995 if (iph->frag_off & htons(IP_DF)) 996 df = htons(IP_DF); 997 } 998 } 999 } 1000 1001 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), 1002 geneve->cfg.inner_proto_inherit); 1003 if (unlikely(err)) 1004 return err; 1005 1006 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, 1007 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 1008 !net_eq(geneve->net, dev_net(geneve->dev)), 1009 !(info->key.tun_flags & TUNNEL_CSUM)); 1010 return 0; 1011 } 1012 1013 #if IS_ENABLED(CONFIG_IPV6) 1014 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1015 struct geneve_dev *geneve, 1016 const struct ip_tunnel_info *info) 1017 { 1018 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1019 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1020 const struct ip_tunnel_key *key = &info->key; 1021 struct dst_entry *dst = NULL; 1022 struct flowi6 fl6; 1023 __u8 prio, ttl; 1024 __be16 sport; 1025 int err; 1026 1027 if (!skb_vlan_inet_prepare(skb)) 1028 return -EINVAL; 1029 1030 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 1031 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, 1032 geneve->cfg.info.key.tp_dst, sport); 1033 if (IS_ERR(dst)) 1034 return PTR_ERR(dst); 1035 1036 err = skb_tunnel_check_pmtu(skb, dst, 1037 GENEVE_IPV6_HLEN + info->options_len, 1038 netif_is_any_bridge_port(dev)); 1039 if (err < 0) { 1040 dst_release(dst); 1041 return err; 1042 } else if (err) { 1043 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1044 1045 if (info) { 1046 struct ip_tunnel_info *unclone; 1047 1048 unclone = skb_tunnel_info_unclone(skb); 1049 if (unlikely(!unclone)) { 1050 dst_release(dst); 1051 return -ENOMEM; 1052 } 1053 1054 unclone->key.u.ipv6.dst = fl6.saddr; 1055 unclone->key.u.ipv6.src = fl6.daddr; 1056 } 1057 1058 if (!pskb_may_pull(skb, ETH_HLEN)) { 1059 dst_release(dst); 1060 return -EINVAL; 1061 } 1062 1063 skb->protocol = eth_type_trans(skb, geneve->dev); 1064 __netif_rx(skb); 1065 dst_release(dst); 1066 return -EMSGSIZE; 1067 } 1068 1069 if (geneve->cfg.collect_md) { 1070 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 1071 ttl = key->ttl; 1072 } else { 1073 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), 1074 ip_hdr(skb), skb); 1075 if (geneve->cfg.ttl_inherit) 1076 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1077 else 1078 ttl = key->ttl; 1079 ttl = ttl ? : ip6_dst_hoplimit(dst); 1080 } 1081 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), 1082 geneve->cfg.inner_proto_inherit); 1083 if (unlikely(err)) 1084 return err; 1085 1086 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1087 &fl6.saddr, &fl6.daddr, prio, ttl, 1088 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1089 !(info->key.tun_flags & TUNNEL_CSUM)); 1090 return 0; 1091 } 1092 #endif 1093 1094 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1095 { 1096 struct geneve_dev *geneve = netdev_priv(dev); 1097 struct ip_tunnel_info *info = NULL; 1098 int err; 1099 1100 if (geneve->cfg.collect_md) { 1101 info = skb_tunnel_info(skb); 1102 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1103 netdev_dbg(dev, "no tunnel metadata\n"); 1104 dev_kfree_skb(skb); 1105 dev->stats.tx_dropped++; 1106 return NETDEV_TX_OK; 1107 } 1108 } else { 1109 info = &geneve->cfg.info; 1110 } 1111 1112 rcu_read_lock(); 1113 #if IS_ENABLED(CONFIG_IPV6) 1114 if (info->mode & IP_TUNNEL_INFO_IPV6) 1115 err = geneve6_xmit_skb(skb, dev, geneve, info); 1116 else 1117 #endif 1118 err = geneve_xmit_skb(skb, dev, geneve, info); 1119 rcu_read_unlock(); 1120 1121 if (likely(!err)) 1122 return NETDEV_TX_OK; 1123 1124 if (err != -EMSGSIZE) 1125 dev_kfree_skb(skb); 1126 1127 if (err == -ELOOP) 1128 dev->stats.collisions++; 1129 else if (err == -ENETUNREACH) 1130 dev->stats.tx_carrier_errors++; 1131 1132 dev->stats.tx_errors++; 1133 return NETDEV_TX_OK; 1134 } 1135 1136 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1137 { 1138 if (new_mtu > dev->max_mtu) 1139 new_mtu = dev->max_mtu; 1140 else if (new_mtu < dev->min_mtu) 1141 new_mtu = dev->min_mtu; 1142 1143 dev->mtu = new_mtu; 1144 return 0; 1145 } 1146 1147 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1148 { 1149 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1150 struct geneve_dev *geneve = netdev_priv(dev); 1151 __be16 sport; 1152 1153 if (ip_tunnel_info_af(info) == AF_INET) { 1154 struct rtable *rt; 1155 struct flowi4 fl4; 1156 1157 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1158 sport = udp_flow_src_port(geneve->net, skb, 1159 1, USHRT_MAX, true); 1160 1161 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, 1162 geneve->cfg.info.key.tp_dst, sport, NULL); 1163 if (IS_ERR(rt)) 1164 return PTR_ERR(rt); 1165 1166 ip_rt_put(rt); 1167 info->key.u.ipv4.src = fl4.saddr; 1168 #if IS_ENABLED(CONFIG_IPV6) 1169 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1170 struct dst_entry *dst; 1171 struct flowi6 fl6; 1172 1173 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1174 sport = udp_flow_src_port(geneve->net, skb, 1175 1, USHRT_MAX, true); 1176 1177 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, 1178 geneve->cfg.info.key.tp_dst, sport); 1179 if (IS_ERR(dst)) 1180 return PTR_ERR(dst); 1181 1182 dst_release(dst); 1183 info->key.u.ipv6.src = fl6.saddr; 1184 #endif 1185 } else { 1186 return -EINVAL; 1187 } 1188 1189 info->key.tp_src = sport; 1190 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1191 return 0; 1192 } 1193 1194 static const struct net_device_ops geneve_netdev_ops = { 1195 .ndo_init = geneve_init, 1196 .ndo_uninit = geneve_uninit, 1197 .ndo_open = geneve_open, 1198 .ndo_stop = geneve_stop, 1199 .ndo_start_xmit = geneve_xmit, 1200 .ndo_get_stats64 = dev_get_tstats64, 1201 .ndo_change_mtu = geneve_change_mtu, 1202 .ndo_validate_addr = eth_validate_addr, 1203 .ndo_set_mac_address = eth_mac_addr, 1204 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1205 }; 1206 1207 static void geneve_get_drvinfo(struct net_device *dev, 1208 struct ethtool_drvinfo *drvinfo) 1209 { 1210 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1211 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1212 } 1213 1214 static const struct ethtool_ops geneve_ethtool_ops = { 1215 .get_drvinfo = geneve_get_drvinfo, 1216 .get_link = ethtool_op_get_link, 1217 }; 1218 1219 /* Info for udev, that this is a virtual tunnel endpoint */ 1220 static struct device_type geneve_type = { 1221 .name = "geneve", 1222 }; 1223 1224 /* Calls the ndo_udp_tunnel_add of the caller in order to 1225 * supply the listening GENEVE udp ports. Callers are expected 1226 * to implement the ndo_udp_tunnel_add. 1227 */ 1228 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1229 { 1230 struct net *net = dev_net(dev); 1231 struct geneve_net *gn = net_generic(net, geneve_net_id); 1232 struct geneve_sock *gs; 1233 1234 rcu_read_lock(); 1235 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1236 if (push) { 1237 udp_tunnel_push_rx_port(dev, gs->sock, 1238 UDP_TUNNEL_TYPE_GENEVE); 1239 } else { 1240 udp_tunnel_drop_rx_port(dev, gs->sock, 1241 UDP_TUNNEL_TYPE_GENEVE); 1242 } 1243 } 1244 rcu_read_unlock(); 1245 } 1246 1247 /* Initialize the device structure. */ 1248 static void geneve_setup(struct net_device *dev) 1249 { 1250 ether_setup(dev); 1251 1252 dev->netdev_ops = &geneve_netdev_ops; 1253 dev->ethtool_ops = &geneve_ethtool_ops; 1254 dev->needs_free_netdev = true; 1255 1256 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1257 1258 dev->features |= NETIF_F_LLTX; 1259 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1260 dev->features |= NETIF_F_RXCSUM; 1261 dev->features |= NETIF_F_GSO_SOFTWARE; 1262 1263 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1264 dev->hw_features |= NETIF_F_RXCSUM; 1265 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1266 1267 /* MTU range: 68 - (something less than 65535) */ 1268 dev->min_mtu = ETH_MIN_MTU; 1269 /* The max_mtu calculation does not take account of GENEVE 1270 * options, to avoid excluding potentially valid 1271 * configurations. This will be further reduced by IPvX hdr size. 1272 */ 1273 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1274 1275 netif_keep_dst(dev); 1276 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1277 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1278 eth_hw_addr_random(dev); 1279 } 1280 1281 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1282 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1283 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1284 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1285 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1286 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1287 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1288 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1289 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1290 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1291 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1292 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1293 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1294 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1295 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1296 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1297 }; 1298 1299 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1300 struct netlink_ext_ack *extack) 1301 { 1302 if (tb[IFLA_ADDRESS]) { 1303 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1304 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1305 "Provided link layer address is not Ethernet"); 1306 return -EINVAL; 1307 } 1308 1309 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1310 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1311 "Provided Ethernet address is not unicast"); 1312 return -EADDRNOTAVAIL; 1313 } 1314 } 1315 1316 if (!data) { 1317 NL_SET_ERR_MSG(extack, 1318 "Not enough attributes provided to perform the operation"); 1319 return -EINVAL; 1320 } 1321 1322 if (data[IFLA_GENEVE_ID]) { 1323 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1324 1325 if (vni >= GENEVE_N_VID) { 1326 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1327 "Geneve ID must be lower than 16777216"); 1328 return -ERANGE; 1329 } 1330 } 1331 1332 if (data[IFLA_GENEVE_DF]) { 1333 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1334 1335 if (df < 0 || df > GENEVE_DF_MAX) { 1336 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1337 "Invalid DF attribute"); 1338 return -EINVAL; 1339 } 1340 } 1341 1342 return 0; 1343 } 1344 1345 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1346 const struct ip_tunnel_info *info, 1347 bool *tun_on_same_port, 1348 bool *tun_collect_md) 1349 { 1350 struct geneve_dev *geneve, *t = NULL; 1351 1352 *tun_on_same_port = false; 1353 *tun_collect_md = false; 1354 list_for_each_entry(geneve, &gn->geneve_list, next) { 1355 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { 1356 *tun_collect_md = geneve->cfg.collect_md; 1357 *tun_on_same_port = true; 1358 } 1359 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1360 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1361 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1362 t = geneve; 1363 } 1364 return t; 1365 } 1366 1367 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1368 { 1369 return !(info->key.tun_id || info->key.tun_flags || info->key.tos || 1370 info->key.ttl || info->key.label || info->key.tp_src || 1371 memchr_inv(&info->key.u, 0, sizeof(info->key.u))); 1372 } 1373 1374 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1375 struct ip_tunnel_info *b) 1376 { 1377 if (ip_tunnel_info_af(a) == AF_INET) 1378 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1379 else 1380 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1381 } 1382 1383 static int geneve_configure(struct net *net, struct net_device *dev, 1384 struct netlink_ext_ack *extack, 1385 const struct geneve_config *cfg) 1386 { 1387 struct geneve_net *gn = net_generic(net, geneve_net_id); 1388 struct geneve_dev *t, *geneve = netdev_priv(dev); 1389 const struct ip_tunnel_info *info = &cfg->info; 1390 bool tun_collect_md, tun_on_same_port; 1391 int err, encap_len; 1392 1393 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1394 NL_SET_ERR_MSG(extack, 1395 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1396 return -EINVAL; 1397 } 1398 1399 geneve->net = net; 1400 geneve->dev = dev; 1401 1402 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); 1403 if (t) 1404 return -EBUSY; 1405 1406 /* make enough headroom for basic scenario */ 1407 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1408 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1409 encap_len += sizeof(struct iphdr); 1410 dev->max_mtu -= sizeof(struct iphdr); 1411 } else { 1412 encap_len += sizeof(struct ipv6hdr); 1413 dev->max_mtu -= sizeof(struct ipv6hdr); 1414 } 1415 dev->needed_headroom = encap_len + ETH_HLEN; 1416 1417 if (cfg->collect_md) { 1418 if (tun_on_same_port) { 1419 NL_SET_ERR_MSG(extack, 1420 "There can be only one externally controlled device on a destination port"); 1421 return -EPERM; 1422 } 1423 } else { 1424 if (tun_collect_md) { 1425 NL_SET_ERR_MSG(extack, 1426 "There already exists an externally controlled device on this destination port"); 1427 return -EPERM; 1428 } 1429 } 1430 1431 dst_cache_reset(&geneve->cfg.info.dst_cache); 1432 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1433 1434 if (geneve->cfg.inner_proto_inherit) { 1435 dev->header_ops = NULL; 1436 dev->type = ARPHRD_NONE; 1437 dev->hard_header_len = 0; 1438 dev->addr_len = 0; 1439 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1440 } 1441 1442 err = register_netdevice(dev); 1443 if (err) 1444 return err; 1445 1446 list_add(&geneve->next, &gn->geneve_list); 1447 return 0; 1448 } 1449 1450 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1451 { 1452 memset(info, 0, sizeof(*info)); 1453 info->key.tp_dst = htons(dst_port); 1454 } 1455 1456 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1457 struct netlink_ext_ack *extack, 1458 struct geneve_config *cfg, bool changelink) 1459 { 1460 struct ip_tunnel_info *info = &cfg->info; 1461 int attrtype; 1462 1463 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { 1464 NL_SET_ERR_MSG(extack, 1465 "Cannot specify both IPv4 and IPv6 Remote addresses"); 1466 return -EINVAL; 1467 } 1468 1469 if (data[IFLA_GENEVE_REMOTE]) { 1470 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 1471 attrtype = IFLA_GENEVE_REMOTE; 1472 goto change_notsup; 1473 } 1474 1475 info->key.u.ipv4.dst = 1476 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1477 1478 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 1479 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 1480 "Remote IPv4 address cannot be Multicast"); 1481 return -EINVAL; 1482 } 1483 } 1484 1485 if (data[IFLA_GENEVE_REMOTE6]) { 1486 #if IS_ENABLED(CONFIG_IPV6) 1487 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 1488 attrtype = IFLA_GENEVE_REMOTE6; 1489 goto change_notsup; 1490 } 1491 1492 info->mode = IP_TUNNEL_INFO_IPV6; 1493 info->key.u.ipv6.dst = 1494 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1495 1496 if (ipv6_addr_type(&info->key.u.ipv6.dst) & 1497 IPV6_ADDR_LINKLOCAL) { 1498 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1499 "Remote IPv6 address cannot be link-local"); 1500 return -EINVAL; 1501 } 1502 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { 1503 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1504 "Remote IPv6 address cannot be Multicast"); 1505 return -EINVAL; 1506 } 1507 info->key.tun_flags |= TUNNEL_CSUM; 1508 cfg->use_udp6_rx_checksums = true; 1509 #else 1510 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1511 "IPv6 support not enabled in the kernel"); 1512 return -EPFNOSUPPORT; 1513 #endif 1514 } 1515 1516 if (data[IFLA_GENEVE_ID]) { 1517 __u32 vni; 1518 __u8 tvni[3]; 1519 __be64 tunid; 1520 1521 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1522 tvni[0] = (vni & 0x00ff0000) >> 16; 1523 tvni[1] = (vni & 0x0000ff00) >> 8; 1524 tvni[2] = vni & 0x000000ff; 1525 1526 tunid = vni_to_tunnel_id(tvni); 1527 if (changelink && (tunid != info->key.tun_id)) { 1528 attrtype = IFLA_GENEVE_ID; 1529 goto change_notsup; 1530 } 1531 info->key.tun_id = tunid; 1532 } 1533 1534 if (data[IFLA_GENEVE_TTL_INHERIT]) { 1535 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 1536 cfg->ttl_inherit = true; 1537 else 1538 cfg->ttl_inherit = false; 1539 } else if (data[IFLA_GENEVE_TTL]) { 1540 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1541 cfg->ttl_inherit = false; 1542 } 1543 1544 if (data[IFLA_GENEVE_TOS]) 1545 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1546 1547 if (data[IFLA_GENEVE_DF]) 1548 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 1549 1550 if (data[IFLA_GENEVE_LABEL]) { 1551 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 1552 IPV6_FLOWLABEL_MASK; 1553 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 1554 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 1555 "Label attribute only applies for IPv6 Geneve devices"); 1556 return -EINVAL; 1557 } 1558 } 1559 1560 if (data[IFLA_GENEVE_PORT]) { 1561 if (changelink) { 1562 attrtype = IFLA_GENEVE_PORT; 1563 goto change_notsup; 1564 } 1565 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 1566 } 1567 1568 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 1569 if (changelink) { 1570 attrtype = IFLA_GENEVE_COLLECT_METADATA; 1571 goto change_notsup; 1572 } 1573 cfg->collect_md = true; 1574 } 1575 1576 if (data[IFLA_GENEVE_UDP_CSUM]) { 1577 if (changelink) { 1578 attrtype = IFLA_GENEVE_UDP_CSUM; 1579 goto change_notsup; 1580 } 1581 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1582 info->key.tun_flags |= TUNNEL_CSUM; 1583 } 1584 1585 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 1586 #if IS_ENABLED(CONFIG_IPV6) 1587 if (changelink) { 1588 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 1589 goto change_notsup; 1590 } 1591 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1592 info->key.tun_flags &= ~TUNNEL_CSUM; 1593 #else 1594 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 1595 "IPv6 support not enabled in the kernel"); 1596 return -EPFNOSUPPORT; 1597 #endif 1598 } 1599 1600 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 1601 #if IS_ENABLED(CONFIG_IPV6) 1602 if (changelink) { 1603 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 1604 goto change_notsup; 1605 } 1606 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1607 cfg->use_udp6_rx_checksums = false; 1608 #else 1609 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 1610 "IPv6 support not enabled in the kernel"); 1611 return -EPFNOSUPPORT; 1612 #endif 1613 } 1614 1615 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 1616 if (changelink) { 1617 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 1618 goto change_notsup; 1619 } 1620 cfg->inner_proto_inherit = true; 1621 } 1622 1623 return 0; 1624 change_notsup: 1625 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 1626 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported"); 1627 return -EOPNOTSUPP; 1628 } 1629 1630 static void geneve_link_config(struct net_device *dev, 1631 struct ip_tunnel_info *info, struct nlattr *tb[]) 1632 { 1633 struct geneve_dev *geneve = netdev_priv(dev); 1634 int ldev_mtu = 0; 1635 1636 if (tb[IFLA_MTU]) { 1637 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 1638 return; 1639 } 1640 1641 switch (ip_tunnel_info_af(info)) { 1642 case AF_INET: { 1643 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 1644 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 1645 1646 if (!IS_ERR(rt) && rt->dst.dev) { 1647 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 1648 ip_rt_put(rt); 1649 } 1650 break; 1651 } 1652 #if IS_ENABLED(CONFIG_IPV6) 1653 case AF_INET6: { 1654 struct rt6_info *rt; 1655 1656 if (!__in6_dev_get(dev)) 1657 break; 1658 1659 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 1660 NULL, 0); 1661 1662 if (rt && rt->dst.dev) 1663 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 1664 ip6_rt_put(rt); 1665 break; 1666 } 1667 #endif 1668 } 1669 1670 if (ldev_mtu <= 0) 1671 return; 1672 1673 geneve_change_mtu(dev, ldev_mtu - info->options_len); 1674 } 1675 1676 static int geneve_newlink(struct net *net, struct net_device *dev, 1677 struct nlattr *tb[], struct nlattr *data[], 1678 struct netlink_ext_ack *extack) 1679 { 1680 struct geneve_config cfg = { 1681 .df = GENEVE_DF_UNSET, 1682 .use_udp6_rx_checksums = false, 1683 .ttl_inherit = false, 1684 .collect_md = false, 1685 }; 1686 int err; 1687 1688 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 1689 err = geneve_nl2info(tb, data, extack, &cfg, false); 1690 if (err) 1691 return err; 1692 1693 err = geneve_configure(net, dev, extack, &cfg); 1694 if (err) 1695 return err; 1696 1697 geneve_link_config(dev, &cfg.info, tb); 1698 1699 return 0; 1700 } 1701 1702 /* Quiesces the geneve device data path for both TX and RX. 1703 * 1704 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 1705 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 1706 * to complete for the existing set of in-flight packets to be transmitted, 1707 * then we would have quiesced the transmit data path. All the future packets 1708 * will get dropped until we unquiesce the data path. 1709 * 1710 * On receive geneve dereference the geneve_sock stashed in the socket. So, 1711 * if we set that to NULL under RCU and wait for synchronize_net() to 1712 * complete, then we would have quiesced the receive data path. 1713 */ 1714 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 1715 struct geneve_sock **gs6) 1716 { 1717 *gs4 = rtnl_dereference(geneve->sock4); 1718 rcu_assign_pointer(geneve->sock4, NULL); 1719 if (*gs4) 1720 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); 1721 #if IS_ENABLED(CONFIG_IPV6) 1722 *gs6 = rtnl_dereference(geneve->sock6); 1723 rcu_assign_pointer(geneve->sock6, NULL); 1724 if (*gs6) 1725 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); 1726 #else 1727 *gs6 = NULL; 1728 #endif 1729 synchronize_net(); 1730 } 1731 1732 /* Resumes the geneve device data path for both TX and RX. */ 1733 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 1734 struct geneve_sock __maybe_unused *gs6) 1735 { 1736 rcu_assign_pointer(geneve->sock4, gs4); 1737 if (gs4) 1738 rcu_assign_sk_user_data(gs4->sock->sk, gs4); 1739 #if IS_ENABLED(CONFIG_IPV6) 1740 rcu_assign_pointer(geneve->sock6, gs6); 1741 if (gs6) 1742 rcu_assign_sk_user_data(gs6->sock->sk, gs6); 1743 #endif 1744 synchronize_net(); 1745 } 1746 1747 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 1748 struct nlattr *data[], 1749 struct netlink_ext_ack *extack) 1750 { 1751 struct geneve_dev *geneve = netdev_priv(dev); 1752 struct geneve_sock *gs4, *gs6; 1753 struct geneve_config cfg; 1754 int err; 1755 1756 /* If the geneve device is configured for metadata (or externally 1757 * controlled, for example, OVS), then nothing can be changed. 1758 */ 1759 if (geneve->cfg.collect_md) 1760 return -EOPNOTSUPP; 1761 1762 /* Start with the existing info. */ 1763 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 1764 err = geneve_nl2info(tb, data, extack, &cfg, true); 1765 if (err) 1766 return err; 1767 1768 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 1769 dst_cache_reset(&cfg.info.dst_cache); 1770 geneve_link_config(dev, &cfg.info, tb); 1771 } 1772 1773 geneve_quiesce(geneve, &gs4, &gs6); 1774 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 1775 geneve_unquiesce(geneve, gs4, gs6); 1776 1777 return 0; 1778 } 1779 1780 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1781 { 1782 struct geneve_dev *geneve = netdev_priv(dev); 1783 1784 list_del(&geneve->next); 1785 unregister_netdevice_queue(dev, head); 1786 } 1787 1788 static size_t geneve_get_size(const struct net_device *dev) 1789 { 1790 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1791 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1792 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1793 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1794 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 1795 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 1796 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1797 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1798 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1799 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1800 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1801 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 1802 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 1803 0; 1804 } 1805 1806 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1807 { 1808 struct geneve_dev *geneve = netdev_priv(dev); 1809 struct ip_tunnel_info *info = &geneve->cfg.info; 1810 bool ttl_inherit = geneve->cfg.ttl_inherit; 1811 bool metadata = geneve->cfg.collect_md; 1812 __u8 tmp_vni[3]; 1813 __u32 vni; 1814 1815 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 1816 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 1817 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1818 goto nla_put_failure; 1819 1820 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 1821 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1822 info->key.u.ipv4.dst)) 1823 goto nla_put_failure; 1824 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1825 !!(info->key.tun_flags & TUNNEL_CSUM))) 1826 goto nla_put_failure; 1827 1828 #if IS_ENABLED(CONFIG_IPV6) 1829 } else if (!metadata) { 1830 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1831 &info->key.u.ipv6.dst)) 1832 goto nla_put_failure; 1833 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1834 !(info->key.tun_flags & TUNNEL_CSUM))) 1835 goto nla_put_failure; 1836 #endif 1837 } 1838 1839 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 1840 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 1841 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 1842 goto nla_put_failure; 1843 1844 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 1845 goto nla_put_failure; 1846 1847 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 1848 goto nla_put_failure; 1849 1850 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1851 goto nla_put_failure; 1852 1853 #if IS_ENABLED(CONFIG_IPV6) 1854 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1855 !geneve->cfg.use_udp6_rx_checksums)) 1856 goto nla_put_failure; 1857 #endif 1858 1859 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 1860 goto nla_put_failure; 1861 1862 if (geneve->cfg.inner_proto_inherit && 1863 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 1864 goto nla_put_failure; 1865 1866 return 0; 1867 1868 nla_put_failure: 1869 return -EMSGSIZE; 1870 } 1871 1872 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1873 .kind = "geneve", 1874 .maxtype = IFLA_GENEVE_MAX, 1875 .policy = geneve_policy, 1876 .priv_size = sizeof(struct geneve_dev), 1877 .setup = geneve_setup, 1878 .validate = geneve_validate, 1879 .newlink = geneve_newlink, 1880 .changelink = geneve_changelink, 1881 .dellink = geneve_dellink, 1882 .get_size = geneve_get_size, 1883 .fill_info = geneve_fill_info, 1884 }; 1885 1886 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1887 u8 name_assign_type, u16 dst_port) 1888 { 1889 struct nlattr *tb[IFLA_MAX + 1]; 1890 struct net_device *dev; 1891 LIST_HEAD(list_kill); 1892 int err; 1893 struct geneve_config cfg = { 1894 .df = GENEVE_DF_UNSET, 1895 .use_udp6_rx_checksums = true, 1896 .ttl_inherit = false, 1897 .collect_md = true, 1898 }; 1899 1900 memset(tb, 0, sizeof(tb)); 1901 dev = rtnl_create_link(net, name, name_assign_type, 1902 &geneve_link_ops, tb, NULL); 1903 if (IS_ERR(dev)) 1904 return dev; 1905 1906 init_tnl_info(&cfg.info, dst_port); 1907 err = geneve_configure(net, dev, NULL, &cfg); 1908 if (err) { 1909 free_netdev(dev); 1910 return ERR_PTR(err); 1911 } 1912 1913 /* openvswitch users expect packet sizes to be unrestricted, 1914 * so set the largest MTU we can. 1915 */ 1916 err = geneve_change_mtu(dev, IP_MAX_MTU); 1917 if (err) 1918 goto err; 1919 1920 err = rtnl_configure_link(dev, NULL, 0, NULL); 1921 if (err < 0) 1922 goto err; 1923 1924 return dev; 1925 err: 1926 geneve_dellink(dev, &list_kill); 1927 unregister_netdevice_many(&list_kill); 1928 return ERR_PTR(err); 1929 } 1930 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1931 1932 static int geneve_netdevice_event(struct notifier_block *unused, 1933 unsigned long event, void *ptr) 1934 { 1935 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1936 1937 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 1938 geneve_offload_rx_ports(dev, true); 1939 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 1940 geneve_offload_rx_ports(dev, false); 1941 1942 return NOTIFY_DONE; 1943 } 1944 1945 static struct notifier_block geneve_notifier_block __read_mostly = { 1946 .notifier_call = geneve_netdevice_event, 1947 }; 1948 1949 static __net_init int geneve_init_net(struct net *net) 1950 { 1951 struct geneve_net *gn = net_generic(net, geneve_net_id); 1952 1953 INIT_LIST_HEAD(&gn->geneve_list); 1954 INIT_LIST_HEAD(&gn->sock_list); 1955 return 0; 1956 } 1957 1958 static void geneve_destroy_tunnels(struct net *net, struct list_head *head) 1959 { 1960 struct geneve_net *gn = net_generic(net, geneve_net_id); 1961 struct geneve_dev *geneve, *next; 1962 struct net_device *dev, *aux; 1963 1964 /* gather any geneve devices that were moved into this ns */ 1965 for_each_netdev_safe(net, dev, aux) 1966 if (dev->rtnl_link_ops == &geneve_link_ops) 1967 unregister_netdevice_queue(dev, head); 1968 1969 /* now gather any other geneve devices that were created in this ns */ 1970 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { 1971 /* If geneve->dev is in the same netns, it was already added 1972 * to the list by the previous loop. 1973 */ 1974 if (!net_eq(dev_net(geneve->dev), net)) 1975 unregister_netdevice_queue(geneve->dev, head); 1976 } 1977 } 1978 1979 static void __net_exit geneve_exit_batch_net(struct list_head *net_list) 1980 { 1981 struct net *net; 1982 LIST_HEAD(list); 1983 1984 rtnl_lock(); 1985 list_for_each_entry(net, net_list, exit_list) 1986 geneve_destroy_tunnels(net, &list); 1987 1988 /* unregister the devices gathered above */ 1989 unregister_netdevice_many(&list); 1990 rtnl_unlock(); 1991 1992 list_for_each_entry(net, net_list, exit_list) { 1993 const struct geneve_net *gn = net_generic(net, geneve_net_id); 1994 1995 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 1996 } 1997 } 1998 1999 static struct pernet_operations geneve_net_ops = { 2000 .init = geneve_init_net, 2001 .exit_batch = geneve_exit_batch_net, 2002 .id = &geneve_net_id, 2003 .size = sizeof(struct geneve_net), 2004 }; 2005 2006 static int __init geneve_init_module(void) 2007 { 2008 int rc; 2009 2010 rc = register_pernet_subsys(&geneve_net_ops); 2011 if (rc) 2012 goto out1; 2013 2014 rc = register_netdevice_notifier(&geneve_notifier_block); 2015 if (rc) 2016 goto out2; 2017 2018 rc = rtnl_link_register(&geneve_link_ops); 2019 if (rc) 2020 goto out3; 2021 2022 return 0; 2023 out3: 2024 unregister_netdevice_notifier(&geneve_notifier_block); 2025 out2: 2026 unregister_pernet_subsys(&geneve_net_ops); 2027 out1: 2028 return rc; 2029 } 2030 late_initcall(geneve_init_module); 2031 2032 static void __exit geneve_cleanup_module(void) 2033 { 2034 rtnl_link_unregister(&geneve_link_ops); 2035 unregister_netdevice_notifier(&geneve_notifier_block); 2036 unregister_pernet_subsys(&geneve_net_ops); 2037 } 2038 module_exit(geneve_cleanup_module); 2039 2040 MODULE_LICENSE("GPL"); 2041 MODULE_VERSION(GENEVE_NETDEV_VER); 2042 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2043 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2044 MODULE_ALIAS_RTNL_LINK("geneve"); 2045