1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/ipv6_stubs.h> 16 #include <net/dst_metadata.h> 17 #include <net/gro_cells.h> 18 #include <net/rtnetlink.h> 19 #include <net/geneve.h> 20 #include <net/gro.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 /* per-network namespace private data for this module */ 41 struct geneve_net { 42 struct list_head geneve_list; 43 struct list_head sock_list; 44 }; 45 46 static unsigned int geneve_net_id; 47 48 struct geneve_dev_node { 49 struct hlist_node hlist; 50 struct geneve_dev *geneve; 51 }; 52 53 struct geneve_config { 54 struct ip_tunnel_info info; 55 bool collect_md; 56 bool use_udp6_rx_checksums; 57 bool ttl_inherit; 58 enum ifla_geneve_df df; 59 bool inner_proto_inherit; 60 }; 61 62 /* Pseudo network device */ 63 struct geneve_dev { 64 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 65 #if IS_ENABLED(CONFIG_IPV6) 66 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 67 #endif 68 struct net *net; /* netns for packet i/o */ 69 struct net_device *dev; /* netdev for geneve tunnel */ 70 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 71 #if IS_ENABLED(CONFIG_IPV6) 72 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 73 #endif 74 struct list_head next; /* geneve's per namespace list */ 75 struct gro_cells gro_cells; 76 struct geneve_config cfg; 77 }; 78 79 struct geneve_sock { 80 bool collect_md; 81 struct list_head list; 82 struct socket *sock; 83 struct rcu_head rcu; 84 int refcnt; 85 struct hlist_head vni_list[VNI_HASH_SIZE]; 86 }; 87 88 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 89 { 90 __u32 vnid; 91 92 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 93 return hash_32(vnid, VNI_HASH_BITS); 94 } 95 96 static __be64 vni_to_tunnel_id(const __u8 *vni) 97 { 98 #ifdef __BIG_ENDIAN 99 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 100 #else 101 return (__force __be64)(((__force u64)vni[0] << 40) | 102 ((__force u64)vni[1] << 48) | 103 ((__force u64)vni[2] << 56)); 104 #endif 105 } 106 107 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 108 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 109 { 110 #ifdef __BIG_ENDIAN 111 vni[0] = (__force __u8)(tun_id >> 16); 112 vni[1] = (__force __u8)(tun_id >> 8); 113 vni[2] = (__force __u8)tun_id; 114 #else 115 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 116 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 117 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 118 #endif 119 } 120 121 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 122 { 123 return !memcmp(vni, &tun_id[5], 3); 124 } 125 126 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 127 { 128 return gs->sock->sk->sk_family; 129 } 130 131 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 132 __be32 addr, u8 vni[]) 133 { 134 struct hlist_head *vni_list_head; 135 struct geneve_dev_node *node; 136 __u32 hash; 137 138 /* Find the device for this VNI */ 139 hash = geneve_net_vni_hash(vni); 140 vni_list_head = &gs->vni_list[hash]; 141 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 142 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 143 addr == node->geneve->cfg.info.key.u.ipv4.dst) 144 return node->geneve; 145 } 146 return NULL; 147 } 148 149 #if IS_ENABLED(CONFIG_IPV6) 150 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 151 struct in6_addr addr6, u8 vni[]) 152 { 153 struct hlist_head *vni_list_head; 154 struct geneve_dev_node *node; 155 __u32 hash; 156 157 /* Find the device for this VNI */ 158 hash = geneve_net_vni_hash(vni); 159 vni_list_head = &gs->vni_list[hash]; 160 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 161 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 162 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 163 return node->geneve; 164 } 165 return NULL; 166 } 167 #endif 168 169 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 170 { 171 return (struct genevehdr *)(udp_hdr(skb) + 1); 172 } 173 174 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 175 struct sk_buff *skb) 176 { 177 static u8 zero_vni[3]; 178 u8 *vni; 179 180 if (geneve_get_sk_family(gs) == AF_INET) { 181 struct iphdr *iph; 182 __be32 addr; 183 184 iph = ip_hdr(skb); /* outer IP header... */ 185 186 if (gs->collect_md) { 187 vni = zero_vni; 188 addr = 0; 189 } else { 190 vni = geneve_hdr(skb)->vni; 191 addr = iph->saddr; 192 } 193 194 return geneve_lookup(gs, addr, vni); 195 #if IS_ENABLED(CONFIG_IPV6) 196 } else if (geneve_get_sk_family(gs) == AF_INET6) { 197 static struct in6_addr zero_addr6; 198 struct ipv6hdr *ip6h; 199 struct in6_addr addr6; 200 201 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 202 203 if (gs->collect_md) { 204 vni = zero_vni; 205 addr6 = zero_addr6; 206 } else { 207 vni = geneve_hdr(skb)->vni; 208 addr6 = ip6h->saddr; 209 } 210 211 return geneve6_lookup(gs, addr6, vni); 212 #endif 213 } 214 return NULL; 215 } 216 217 /* geneve receive/decap routine */ 218 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 219 struct sk_buff *skb) 220 { 221 struct genevehdr *gnvh = geneve_hdr(skb); 222 struct metadata_dst *tun_dst = NULL; 223 unsigned int len; 224 int nh, err = 0; 225 void *oiph; 226 227 if (ip_tunnel_collect_metadata() || gs->collect_md) { 228 __be16 flags; 229 230 flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | 231 (gnvh->critical ? TUNNEL_CRIT_OPT : 0); 232 233 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 234 vni_to_tunnel_id(gnvh->vni), 235 gnvh->opt_len * 4); 236 if (!tun_dst) { 237 geneve->dev->stats.rx_dropped++; 238 goto drop; 239 } 240 /* Update tunnel dst according to Geneve options. */ 241 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 242 gnvh->options, gnvh->opt_len * 4, 243 TUNNEL_GENEVE_OPT); 244 } else { 245 /* Drop packets w/ critical options, 246 * since we don't support any... 247 */ 248 if (gnvh->critical) { 249 geneve->dev->stats.rx_frame_errors++; 250 geneve->dev->stats.rx_errors++; 251 goto drop; 252 } 253 } 254 255 if (tun_dst) 256 skb_dst_set(skb, &tun_dst->dst); 257 258 if (gnvh->proto_type == htons(ETH_P_TEB)) { 259 skb_reset_mac_header(skb); 260 skb->protocol = eth_type_trans(skb, geneve->dev); 261 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 262 263 /* Ignore packet loops (and multicast echo) */ 264 if (ether_addr_equal(eth_hdr(skb)->h_source, 265 geneve->dev->dev_addr)) { 266 geneve->dev->stats.rx_errors++; 267 goto drop; 268 } 269 } else { 270 skb_reset_mac_header(skb); 271 skb->dev = geneve->dev; 272 skb->pkt_type = PACKET_HOST; 273 } 274 275 /* Save offset of outer header relative to skb->head, 276 * because we are going to reset the network header to the inner header 277 * and might change skb->head. 278 */ 279 nh = skb_network_header(skb) - skb->head; 280 281 skb_reset_network_header(skb); 282 283 if (!pskb_inet_may_pull(skb)) { 284 DEV_STATS_INC(geneve->dev, rx_length_errors); 285 DEV_STATS_INC(geneve->dev, rx_errors); 286 goto drop; 287 } 288 289 /* Get the outer header. */ 290 oiph = skb->head + nh; 291 292 if (geneve_get_sk_family(gs) == AF_INET) 293 err = IP_ECN_decapsulate(oiph, skb); 294 #if IS_ENABLED(CONFIG_IPV6) 295 else 296 err = IP6_ECN_decapsulate(oiph, skb); 297 #endif 298 299 if (unlikely(err)) { 300 if (log_ecn_error) { 301 if (geneve_get_sk_family(gs) == AF_INET) 302 net_info_ratelimited("non-ECT from %pI4 " 303 "with TOS=%#x\n", 304 &((struct iphdr *)oiph)->saddr, 305 ((struct iphdr *)oiph)->tos); 306 #if IS_ENABLED(CONFIG_IPV6) 307 else 308 net_info_ratelimited("non-ECT from %pI6\n", 309 &((struct ipv6hdr *)oiph)->saddr); 310 #endif 311 } 312 if (err > 1) { 313 ++geneve->dev->stats.rx_frame_errors; 314 ++geneve->dev->stats.rx_errors; 315 goto drop; 316 } 317 } 318 319 len = skb->len; 320 err = gro_cells_receive(&geneve->gro_cells, skb); 321 if (likely(err == NET_RX_SUCCESS)) 322 dev_sw_netstats_rx_add(geneve->dev, len); 323 324 return; 325 drop: 326 /* Consume bad packet */ 327 kfree_skb(skb); 328 } 329 330 /* Setup stats when device is created */ 331 static int geneve_init(struct net_device *dev) 332 { 333 struct geneve_dev *geneve = netdev_priv(dev); 334 int err; 335 336 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 337 if (!dev->tstats) 338 return -ENOMEM; 339 340 err = gro_cells_init(&geneve->gro_cells, dev); 341 if (err) { 342 free_percpu(dev->tstats); 343 return err; 344 } 345 346 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 347 if (err) { 348 free_percpu(dev->tstats); 349 gro_cells_destroy(&geneve->gro_cells); 350 return err; 351 } 352 netdev_lockdep_set_classes(dev); 353 return 0; 354 } 355 356 static void geneve_uninit(struct net_device *dev) 357 { 358 struct geneve_dev *geneve = netdev_priv(dev); 359 360 dst_cache_destroy(&geneve->cfg.info.dst_cache); 361 gro_cells_destroy(&geneve->gro_cells); 362 free_percpu(dev->tstats); 363 } 364 365 /* Callback from net/ipv4/udp.c to receive packets */ 366 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 367 { 368 struct genevehdr *geneveh; 369 struct geneve_dev *geneve; 370 struct geneve_sock *gs; 371 __be16 inner_proto; 372 int opts_len; 373 374 /* Need UDP and Geneve header to be present */ 375 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 376 goto drop; 377 378 /* Return packets with reserved bits set */ 379 geneveh = geneve_hdr(skb); 380 if (unlikely(geneveh->ver != GENEVE_VER)) 381 goto drop; 382 383 gs = rcu_dereference_sk_user_data(sk); 384 if (!gs) 385 goto drop; 386 387 geneve = geneve_lookup_skb(gs, skb); 388 if (!geneve) 389 goto drop; 390 391 inner_proto = geneveh->proto_type; 392 393 if (unlikely((!geneve->cfg.inner_proto_inherit && 394 inner_proto != htons(ETH_P_TEB)))) { 395 geneve->dev->stats.rx_dropped++; 396 goto drop; 397 } 398 399 opts_len = geneveh->opt_len * 4; 400 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 401 !net_eq(geneve->net, dev_net(geneve->dev)))) { 402 geneve->dev->stats.rx_dropped++; 403 goto drop; 404 } 405 406 geneve_rx(geneve, gs, skb); 407 return 0; 408 409 drop: 410 /* Consume bad packet */ 411 kfree_skb(skb); 412 return 0; 413 } 414 415 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 416 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 417 { 418 struct genevehdr *geneveh; 419 struct geneve_sock *gs; 420 u8 zero_vni[3] = { 0 }; 421 u8 *vni = zero_vni; 422 423 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 424 return -EINVAL; 425 426 geneveh = geneve_hdr(skb); 427 if (geneveh->ver != GENEVE_VER) 428 return -EINVAL; 429 430 if (geneveh->proto_type != htons(ETH_P_TEB)) 431 return -EINVAL; 432 433 gs = rcu_dereference_sk_user_data(sk); 434 if (!gs) 435 return -ENOENT; 436 437 if (geneve_get_sk_family(gs) == AF_INET) { 438 struct iphdr *iph = ip_hdr(skb); 439 __be32 addr4 = 0; 440 441 if (!gs->collect_md) { 442 vni = geneve_hdr(skb)->vni; 443 addr4 = iph->daddr; 444 } 445 446 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 447 } 448 449 #if IS_ENABLED(CONFIG_IPV6) 450 if (geneve_get_sk_family(gs) == AF_INET6) { 451 struct ipv6hdr *ip6h = ipv6_hdr(skb); 452 struct in6_addr addr6; 453 454 memset(&addr6, 0, sizeof(struct in6_addr)); 455 456 if (!gs->collect_md) { 457 vni = geneve_hdr(skb)->vni; 458 addr6 = ip6h->daddr; 459 } 460 461 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 462 } 463 #endif 464 465 return -EPFNOSUPPORT; 466 } 467 468 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 469 __be16 port, bool ipv6_rx_csum) 470 { 471 struct socket *sock; 472 struct udp_port_cfg udp_conf; 473 int err; 474 475 memset(&udp_conf, 0, sizeof(udp_conf)); 476 477 if (ipv6) { 478 udp_conf.family = AF_INET6; 479 udp_conf.ipv6_v6only = 1; 480 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; 481 } else { 482 udp_conf.family = AF_INET; 483 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 484 } 485 486 udp_conf.local_udp_port = port; 487 488 /* Open UDP socket */ 489 err = udp_sock_create(net, &udp_conf, &sock); 490 if (err < 0) 491 return ERR_PTR(err); 492 493 udp_allow_gso(sock->sk); 494 return sock; 495 } 496 497 static int geneve_hlen(struct genevehdr *gh) 498 { 499 return sizeof(*gh) + gh->opt_len * 4; 500 } 501 502 static struct sk_buff *geneve_gro_receive(struct sock *sk, 503 struct list_head *head, 504 struct sk_buff *skb) 505 { 506 struct sk_buff *pp = NULL; 507 struct sk_buff *p; 508 struct genevehdr *gh, *gh2; 509 unsigned int hlen, gh_len, off_gnv; 510 const struct packet_offload *ptype; 511 __be16 type; 512 int flush = 1; 513 514 off_gnv = skb_gro_offset(skb); 515 hlen = off_gnv + sizeof(*gh); 516 gh = skb_gro_header(skb, hlen, off_gnv); 517 if (unlikely(!gh)) 518 goto out; 519 520 if (gh->ver != GENEVE_VER || gh->oam) 521 goto out; 522 gh_len = geneve_hlen(gh); 523 524 hlen = off_gnv + gh_len; 525 if (skb_gro_header_hard(skb, hlen)) { 526 gh = skb_gro_header_slow(skb, hlen, off_gnv); 527 if (unlikely(!gh)) 528 goto out; 529 } 530 531 list_for_each_entry(p, head, list) { 532 if (!NAPI_GRO_CB(p)->same_flow) 533 continue; 534 535 gh2 = (struct genevehdr *)(p->data + off_gnv); 536 if (gh->opt_len != gh2->opt_len || 537 memcmp(gh, gh2, gh_len)) { 538 NAPI_GRO_CB(p)->same_flow = 0; 539 continue; 540 } 541 } 542 543 skb_gro_pull(skb, gh_len); 544 skb_gro_postpull_rcsum(skb, gh, gh_len); 545 type = gh->proto_type; 546 if (likely(type == htons(ETH_P_TEB))) 547 return call_gro_receive(eth_gro_receive, head, skb); 548 549 ptype = gro_find_receive_by_type(type); 550 if (!ptype) 551 goto out; 552 553 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 554 flush = 0; 555 556 out: 557 skb_gro_flush_final(skb, pp, flush); 558 559 return pp; 560 } 561 562 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 563 int nhoff) 564 { 565 struct genevehdr *gh; 566 struct packet_offload *ptype; 567 __be16 type; 568 int gh_len; 569 int err = -ENOSYS; 570 571 gh = (struct genevehdr *)(skb->data + nhoff); 572 gh_len = geneve_hlen(gh); 573 type = gh->proto_type; 574 575 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 576 if (likely(type == htons(ETH_P_TEB))) 577 return eth_gro_complete(skb, nhoff + gh_len); 578 579 ptype = gro_find_complete_by_type(type); 580 if (ptype) 581 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 582 583 skb_set_inner_mac_header(skb, nhoff + gh_len); 584 585 return err; 586 } 587 588 /* Create new listen socket if needed */ 589 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 590 bool ipv6, bool ipv6_rx_csum) 591 { 592 struct geneve_net *gn = net_generic(net, geneve_net_id); 593 struct geneve_sock *gs; 594 struct socket *sock; 595 struct udp_tunnel_sock_cfg tunnel_cfg; 596 int h; 597 598 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 599 if (!gs) 600 return ERR_PTR(-ENOMEM); 601 602 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); 603 if (IS_ERR(sock)) { 604 kfree(gs); 605 return ERR_CAST(sock); 606 } 607 608 gs->sock = sock; 609 gs->refcnt = 1; 610 for (h = 0; h < VNI_HASH_SIZE; ++h) 611 INIT_HLIST_HEAD(&gs->vni_list[h]); 612 613 /* Initialize the geneve udp offloads structure */ 614 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); 615 616 /* Mark socket as an encapsulation socket */ 617 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 618 tunnel_cfg.sk_user_data = gs; 619 tunnel_cfg.encap_type = 1; 620 tunnel_cfg.gro_receive = geneve_gro_receive; 621 tunnel_cfg.gro_complete = geneve_gro_complete; 622 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 623 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 624 tunnel_cfg.encap_destroy = NULL; 625 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 626 list_add(&gs->list, &gn->sock_list); 627 return gs; 628 } 629 630 static void __geneve_sock_release(struct geneve_sock *gs) 631 { 632 if (!gs || --gs->refcnt) 633 return; 634 635 list_del(&gs->list); 636 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); 637 udp_tunnel_sock_release(gs->sock); 638 kfree_rcu(gs, rcu); 639 } 640 641 static void geneve_sock_release(struct geneve_dev *geneve) 642 { 643 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 644 #if IS_ENABLED(CONFIG_IPV6) 645 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 646 647 rcu_assign_pointer(geneve->sock6, NULL); 648 #endif 649 650 rcu_assign_pointer(geneve->sock4, NULL); 651 synchronize_net(); 652 653 __geneve_sock_release(gs4); 654 #if IS_ENABLED(CONFIG_IPV6) 655 __geneve_sock_release(gs6); 656 #endif 657 } 658 659 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 660 sa_family_t family, 661 __be16 dst_port) 662 { 663 struct geneve_sock *gs; 664 665 list_for_each_entry(gs, &gn->sock_list, list) { 666 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 667 geneve_get_sk_family(gs) == family) { 668 return gs; 669 } 670 } 671 return NULL; 672 } 673 674 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 675 { 676 struct net *net = geneve->net; 677 struct geneve_net *gn = net_generic(net, geneve_net_id); 678 struct geneve_dev_node *node; 679 struct geneve_sock *gs; 680 __u8 vni[3]; 681 __u32 hash; 682 683 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst); 684 if (gs) { 685 gs->refcnt++; 686 goto out; 687 } 688 689 gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, 690 geneve->cfg.use_udp6_rx_checksums); 691 if (IS_ERR(gs)) 692 return PTR_ERR(gs); 693 694 out: 695 gs->collect_md = geneve->cfg.collect_md; 696 #if IS_ENABLED(CONFIG_IPV6) 697 if (ipv6) { 698 rcu_assign_pointer(geneve->sock6, gs); 699 node = &geneve->hlist6; 700 } else 701 #endif 702 { 703 rcu_assign_pointer(geneve->sock4, gs); 704 node = &geneve->hlist4; 705 } 706 node->geneve = geneve; 707 708 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 709 hash = geneve_net_vni_hash(vni); 710 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 711 return 0; 712 } 713 714 static int geneve_open(struct net_device *dev) 715 { 716 struct geneve_dev *geneve = netdev_priv(dev); 717 bool metadata = geneve->cfg.collect_md; 718 bool ipv4, ipv6; 719 int ret = 0; 720 721 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; 722 ipv4 = !ipv6 || metadata; 723 #if IS_ENABLED(CONFIG_IPV6) 724 if (ipv6) { 725 ret = geneve_sock_add(geneve, true); 726 if (ret < 0 && ret != -EAFNOSUPPORT) 727 ipv4 = false; 728 } 729 #endif 730 if (ipv4) 731 ret = geneve_sock_add(geneve, false); 732 if (ret < 0) 733 geneve_sock_release(geneve); 734 735 return ret; 736 } 737 738 static int geneve_stop(struct net_device *dev) 739 { 740 struct geneve_dev *geneve = netdev_priv(dev); 741 742 hlist_del_init_rcu(&geneve->hlist4.hlist); 743 #if IS_ENABLED(CONFIG_IPV6) 744 hlist_del_init_rcu(&geneve->hlist6.hlist); 745 #endif 746 geneve_sock_release(geneve); 747 return 0; 748 } 749 750 static void geneve_build_header(struct genevehdr *geneveh, 751 const struct ip_tunnel_info *info, 752 __be16 inner_proto) 753 { 754 geneveh->ver = GENEVE_VER; 755 geneveh->opt_len = info->options_len / 4; 756 geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM); 757 geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT); 758 geneveh->rsvd1 = 0; 759 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 760 geneveh->proto_type = inner_proto; 761 geneveh->rsvd2 = 0; 762 763 if (info->key.tun_flags & TUNNEL_GENEVE_OPT) 764 ip_tunnel_info_opts_get(geneveh->options, info); 765 } 766 767 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 768 const struct ip_tunnel_info *info, 769 bool xnet, int ip_hdr_len, 770 bool inner_proto_inherit) 771 { 772 bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); 773 struct genevehdr *gnvh; 774 __be16 inner_proto; 775 int min_headroom; 776 int err; 777 778 skb_reset_mac_header(skb); 779 skb_scrub_packet(skb, xnet); 780 781 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 782 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len; 783 err = skb_cow_head(skb, min_headroom); 784 if (unlikely(err)) 785 goto free_dst; 786 787 err = udp_tunnel_handle_offloads(skb, udp_sum); 788 if (err) 789 goto free_dst; 790 791 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len); 792 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 793 geneve_build_header(gnvh, info, inner_proto); 794 skb_set_inner_protocol(skb, inner_proto); 795 return 0; 796 797 free_dst: 798 dst_release(dst); 799 return err; 800 } 801 802 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, 803 struct net_device *dev, 804 struct geneve_sock *gs4, 805 struct flowi4 *fl4, 806 const struct ip_tunnel_info *info, 807 __be16 dport, __be16 sport, 808 __u8 *full_tos) 809 { 810 bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 811 struct geneve_dev *geneve = netdev_priv(dev); 812 struct dst_cache *dst_cache; 813 struct rtable *rt = NULL; 814 __u8 tos; 815 816 if (!gs4) 817 return ERR_PTR(-EIO); 818 819 memset(fl4, 0, sizeof(*fl4)); 820 fl4->flowi4_mark = skb->mark; 821 fl4->flowi4_proto = IPPROTO_UDP; 822 fl4->daddr = info->key.u.ipv4.dst; 823 fl4->saddr = info->key.u.ipv4.src; 824 fl4->fl4_dport = dport; 825 fl4->fl4_sport = sport; 826 fl4->flowi4_flags = info->key.flow_flags; 827 828 tos = info->key.tos; 829 if ((tos == 1) && !geneve->cfg.collect_md) { 830 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 831 use_cache = false; 832 } 833 fl4->flowi4_tos = RT_TOS(tos); 834 if (full_tos) 835 *full_tos = tos; 836 837 dst_cache = (struct dst_cache *)&info->dst_cache; 838 if (use_cache) { 839 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); 840 if (rt) 841 return rt; 842 } 843 rt = ip_route_output_key(geneve->net, fl4); 844 if (IS_ERR(rt)) { 845 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); 846 return ERR_PTR(-ENETUNREACH); 847 } 848 if (rt->dst.dev == dev) { /* is this necessary? */ 849 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); 850 ip_rt_put(rt); 851 return ERR_PTR(-ELOOP); 852 } 853 if (use_cache) 854 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); 855 return rt; 856 } 857 858 #if IS_ENABLED(CONFIG_IPV6) 859 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, 860 struct net_device *dev, 861 struct geneve_sock *gs6, 862 struct flowi6 *fl6, 863 const struct ip_tunnel_info *info, 864 __be16 dport, __be16 sport) 865 { 866 bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 867 struct geneve_dev *geneve = netdev_priv(dev); 868 struct dst_entry *dst = NULL; 869 struct dst_cache *dst_cache; 870 __u8 prio; 871 872 if (!gs6) 873 return ERR_PTR(-EIO); 874 875 memset(fl6, 0, sizeof(*fl6)); 876 fl6->flowi6_mark = skb->mark; 877 fl6->flowi6_proto = IPPROTO_UDP; 878 fl6->daddr = info->key.u.ipv6.dst; 879 fl6->saddr = info->key.u.ipv6.src; 880 fl6->fl6_dport = dport; 881 fl6->fl6_sport = sport; 882 883 prio = info->key.tos; 884 if ((prio == 1) && !geneve->cfg.collect_md) { 885 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 886 use_cache = false; 887 } 888 889 fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label); 890 dst_cache = (struct dst_cache *)&info->dst_cache; 891 if (use_cache) { 892 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); 893 if (dst) 894 return dst; 895 } 896 dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6, 897 NULL); 898 if (IS_ERR(dst)) { 899 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); 900 return ERR_PTR(-ENETUNREACH); 901 } 902 if (dst->dev == dev) { /* is this necessary? */ 903 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr); 904 dst_release(dst); 905 return ERR_PTR(-ELOOP); 906 } 907 908 if (use_cache) 909 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr); 910 return dst; 911 } 912 #endif 913 914 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 915 struct geneve_dev *geneve, 916 const struct ip_tunnel_info *info) 917 { 918 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 919 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 920 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 921 const struct ip_tunnel_key *key = &info->key; 922 struct rtable *rt; 923 struct flowi4 fl4; 924 __u8 full_tos; 925 __u8 tos, ttl; 926 __be16 df = 0; 927 __be16 sport; 928 int err; 929 930 if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) 931 return -EINVAL; 932 933 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 934 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, 935 geneve->cfg.info.key.tp_dst, sport, &full_tos); 936 if (IS_ERR(rt)) 937 return PTR_ERR(rt); 938 939 err = skb_tunnel_check_pmtu(skb, &rt->dst, 940 GENEVE_IPV4_HLEN + info->options_len, 941 netif_is_any_bridge_port(dev)); 942 if (err < 0) { 943 dst_release(&rt->dst); 944 return err; 945 } else if (err) { 946 struct ip_tunnel_info *info; 947 948 info = skb_tunnel_info(skb); 949 if (info) { 950 struct ip_tunnel_info *unclone; 951 952 unclone = skb_tunnel_info_unclone(skb); 953 if (unlikely(!unclone)) { 954 dst_release(&rt->dst); 955 return -ENOMEM; 956 } 957 958 unclone->key.u.ipv4.dst = fl4.saddr; 959 unclone->key.u.ipv4.src = fl4.daddr; 960 } 961 962 if (!pskb_may_pull(skb, ETH_HLEN)) { 963 dst_release(&rt->dst); 964 return -EINVAL; 965 } 966 967 skb->protocol = eth_type_trans(skb, geneve->dev); 968 __netif_rx(skb); 969 dst_release(&rt->dst); 970 return -EMSGSIZE; 971 } 972 973 if (geneve->cfg.collect_md) { 974 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 975 ttl = key->ttl; 976 977 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 978 } else { 979 tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb); 980 if (geneve->cfg.ttl_inherit) 981 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 982 else 983 ttl = key->ttl; 984 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 985 986 if (geneve->cfg.df == GENEVE_DF_SET) { 987 df = htons(IP_DF); 988 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 989 struct ethhdr *eth = eth_hdr(skb); 990 991 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 992 df = htons(IP_DF); 993 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 994 struct iphdr *iph = ip_hdr(skb); 995 996 if (iph->frag_off & htons(IP_DF)) 997 df = htons(IP_DF); 998 } 999 } 1000 } 1001 1002 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), 1003 inner_proto_inherit); 1004 if (unlikely(err)) 1005 return err; 1006 1007 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, 1008 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 1009 !net_eq(geneve->net, dev_net(geneve->dev)), 1010 !(info->key.tun_flags & TUNNEL_CSUM)); 1011 return 0; 1012 } 1013 1014 #if IS_ENABLED(CONFIG_IPV6) 1015 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1016 struct geneve_dev *geneve, 1017 const struct ip_tunnel_info *info) 1018 { 1019 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 1020 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1021 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1022 const struct ip_tunnel_key *key = &info->key; 1023 struct dst_entry *dst = NULL; 1024 struct flowi6 fl6; 1025 __u8 prio, ttl; 1026 __be16 sport; 1027 int err; 1028 1029 if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) 1030 return -EINVAL; 1031 1032 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 1033 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, 1034 geneve->cfg.info.key.tp_dst, sport); 1035 if (IS_ERR(dst)) 1036 return PTR_ERR(dst); 1037 1038 err = skb_tunnel_check_pmtu(skb, dst, 1039 GENEVE_IPV6_HLEN + info->options_len, 1040 netif_is_any_bridge_port(dev)); 1041 if (err < 0) { 1042 dst_release(dst); 1043 return err; 1044 } else if (err) { 1045 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1046 1047 if (info) { 1048 struct ip_tunnel_info *unclone; 1049 1050 unclone = skb_tunnel_info_unclone(skb); 1051 if (unlikely(!unclone)) { 1052 dst_release(dst); 1053 return -ENOMEM; 1054 } 1055 1056 unclone->key.u.ipv6.dst = fl6.saddr; 1057 unclone->key.u.ipv6.src = fl6.daddr; 1058 } 1059 1060 if (!pskb_may_pull(skb, ETH_HLEN)) { 1061 dst_release(dst); 1062 return -EINVAL; 1063 } 1064 1065 skb->protocol = eth_type_trans(skb, geneve->dev); 1066 __netif_rx(skb); 1067 dst_release(dst); 1068 return -EMSGSIZE; 1069 } 1070 1071 if (geneve->cfg.collect_md) { 1072 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 1073 ttl = key->ttl; 1074 } else { 1075 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), 1076 ip_hdr(skb), skb); 1077 if (geneve->cfg.ttl_inherit) 1078 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1079 else 1080 ttl = key->ttl; 1081 ttl = ttl ? : ip6_dst_hoplimit(dst); 1082 } 1083 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), 1084 inner_proto_inherit); 1085 if (unlikely(err)) 1086 return err; 1087 1088 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1089 &fl6.saddr, &fl6.daddr, prio, ttl, 1090 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1091 !(info->key.tun_flags & TUNNEL_CSUM)); 1092 return 0; 1093 } 1094 #endif 1095 1096 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1097 { 1098 struct geneve_dev *geneve = netdev_priv(dev); 1099 struct ip_tunnel_info *info = NULL; 1100 int err; 1101 1102 if (geneve->cfg.collect_md) { 1103 info = skb_tunnel_info(skb); 1104 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1105 netdev_dbg(dev, "no tunnel metadata\n"); 1106 dev_kfree_skb(skb); 1107 dev->stats.tx_dropped++; 1108 return NETDEV_TX_OK; 1109 } 1110 } else { 1111 info = &geneve->cfg.info; 1112 } 1113 1114 rcu_read_lock(); 1115 #if IS_ENABLED(CONFIG_IPV6) 1116 if (info->mode & IP_TUNNEL_INFO_IPV6) 1117 err = geneve6_xmit_skb(skb, dev, geneve, info); 1118 else 1119 #endif 1120 err = geneve_xmit_skb(skb, dev, geneve, info); 1121 rcu_read_unlock(); 1122 1123 if (likely(!err)) 1124 return NETDEV_TX_OK; 1125 1126 if (err != -EMSGSIZE) 1127 dev_kfree_skb(skb); 1128 1129 if (err == -ELOOP) 1130 dev->stats.collisions++; 1131 else if (err == -ENETUNREACH) 1132 dev->stats.tx_carrier_errors++; 1133 1134 dev->stats.tx_errors++; 1135 return NETDEV_TX_OK; 1136 } 1137 1138 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1139 { 1140 if (new_mtu > dev->max_mtu) 1141 new_mtu = dev->max_mtu; 1142 else if (new_mtu < dev->min_mtu) 1143 new_mtu = dev->min_mtu; 1144 1145 dev->mtu = new_mtu; 1146 return 0; 1147 } 1148 1149 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1150 { 1151 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1152 struct geneve_dev *geneve = netdev_priv(dev); 1153 __be16 sport; 1154 1155 if (ip_tunnel_info_af(info) == AF_INET) { 1156 struct rtable *rt; 1157 struct flowi4 fl4; 1158 1159 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1160 sport = udp_flow_src_port(geneve->net, skb, 1161 1, USHRT_MAX, true); 1162 1163 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, 1164 geneve->cfg.info.key.tp_dst, sport, NULL); 1165 if (IS_ERR(rt)) 1166 return PTR_ERR(rt); 1167 1168 ip_rt_put(rt); 1169 info->key.u.ipv4.src = fl4.saddr; 1170 #if IS_ENABLED(CONFIG_IPV6) 1171 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1172 struct dst_entry *dst; 1173 struct flowi6 fl6; 1174 1175 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1176 sport = udp_flow_src_port(geneve->net, skb, 1177 1, USHRT_MAX, true); 1178 1179 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, 1180 geneve->cfg.info.key.tp_dst, sport); 1181 if (IS_ERR(dst)) 1182 return PTR_ERR(dst); 1183 1184 dst_release(dst); 1185 info->key.u.ipv6.src = fl6.saddr; 1186 #endif 1187 } else { 1188 return -EINVAL; 1189 } 1190 1191 info->key.tp_src = sport; 1192 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1193 return 0; 1194 } 1195 1196 static const struct net_device_ops geneve_netdev_ops = { 1197 .ndo_init = geneve_init, 1198 .ndo_uninit = geneve_uninit, 1199 .ndo_open = geneve_open, 1200 .ndo_stop = geneve_stop, 1201 .ndo_start_xmit = geneve_xmit, 1202 .ndo_get_stats64 = dev_get_tstats64, 1203 .ndo_change_mtu = geneve_change_mtu, 1204 .ndo_validate_addr = eth_validate_addr, 1205 .ndo_set_mac_address = eth_mac_addr, 1206 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1207 }; 1208 1209 static void geneve_get_drvinfo(struct net_device *dev, 1210 struct ethtool_drvinfo *drvinfo) 1211 { 1212 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1213 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1214 } 1215 1216 static const struct ethtool_ops geneve_ethtool_ops = { 1217 .get_drvinfo = geneve_get_drvinfo, 1218 .get_link = ethtool_op_get_link, 1219 }; 1220 1221 /* Info for udev, that this is a virtual tunnel endpoint */ 1222 static struct device_type geneve_type = { 1223 .name = "geneve", 1224 }; 1225 1226 /* Calls the ndo_udp_tunnel_add of the caller in order to 1227 * supply the listening GENEVE udp ports. Callers are expected 1228 * to implement the ndo_udp_tunnel_add. 1229 */ 1230 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1231 { 1232 struct net *net = dev_net(dev); 1233 struct geneve_net *gn = net_generic(net, geneve_net_id); 1234 struct geneve_sock *gs; 1235 1236 rcu_read_lock(); 1237 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1238 if (push) { 1239 udp_tunnel_push_rx_port(dev, gs->sock, 1240 UDP_TUNNEL_TYPE_GENEVE); 1241 } else { 1242 udp_tunnel_drop_rx_port(dev, gs->sock, 1243 UDP_TUNNEL_TYPE_GENEVE); 1244 } 1245 } 1246 rcu_read_unlock(); 1247 } 1248 1249 /* Initialize the device structure. */ 1250 static void geneve_setup(struct net_device *dev) 1251 { 1252 ether_setup(dev); 1253 1254 dev->netdev_ops = &geneve_netdev_ops; 1255 dev->ethtool_ops = &geneve_ethtool_ops; 1256 dev->needs_free_netdev = true; 1257 1258 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1259 1260 dev->features |= NETIF_F_LLTX; 1261 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1262 dev->features |= NETIF_F_RXCSUM; 1263 dev->features |= NETIF_F_GSO_SOFTWARE; 1264 1265 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1266 dev->hw_features |= NETIF_F_RXCSUM; 1267 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1268 1269 /* MTU range: 68 - (something less than 65535) */ 1270 dev->min_mtu = ETH_MIN_MTU; 1271 /* The max_mtu calculation does not take account of GENEVE 1272 * options, to avoid excluding potentially valid 1273 * configurations. This will be further reduced by IPvX hdr size. 1274 */ 1275 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1276 1277 netif_keep_dst(dev); 1278 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1279 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1280 eth_hw_addr_random(dev); 1281 } 1282 1283 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1284 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1285 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1286 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1287 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1288 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1289 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1290 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1291 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1292 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1293 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1294 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1295 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1296 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1297 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1298 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1299 }; 1300 1301 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1302 struct netlink_ext_ack *extack) 1303 { 1304 if (tb[IFLA_ADDRESS]) { 1305 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1306 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1307 "Provided link layer address is not Ethernet"); 1308 return -EINVAL; 1309 } 1310 1311 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1312 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1313 "Provided Ethernet address is not unicast"); 1314 return -EADDRNOTAVAIL; 1315 } 1316 } 1317 1318 if (!data) { 1319 NL_SET_ERR_MSG(extack, 1320 "Not enough attributes provided to perform the operation"); 1321 return -EINVAL; 1322 } 1323 1324 if (data[IFLA_GENEVE_ID]) { 1325 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1326 1327 if (vni >= GENEVE_N_VID) { 1328 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1329 "Geneve ID must be lower than 16777216"); 1330 return -ERANGE; 1331 } 1332 } 1333 1334 if (data[IFLA_GENEVE_DF]) { 1335 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1336 1337 if (df < 0 || df > GENEVE_DF_MAX) { 1338 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1339 "Invalid DF attribute"); 1340 return -EINVAL; 1341 } 1342 } 1343 1344 return 0; 1345 } 1346 1347 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1348 const struct ip_tunnel_info *info, 1349 bool *tun_on_same_port, 1350 bool *tun_collect_md) 1351 { 1352 struct geneve_dev *geneve, *t = NULL; 1353 1354 *tun_on_same_port = false; 1355 *tun_collect_md = false; 1356 list_for_each_entry(geneve, &gn->geneve_list, next) { 1357 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { 1358 *tun_collect_md = geneve->cfg.collect_md; 1359 *tun_on_same_port = true; 1360 } 1361 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1362 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1363 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1364 t = geneve; 1365 } 1366 return t; 1367 } 1368 1369 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1370 { 1371 return !(info->key.tun_id || info->key.tun_flags || info->key.tos || 1372 info->key.ttl || info->key.label || info->key.tp_src || 1373 memchr_inv(&info->key.u, 0, sizeof(info->key.u))); 1374 } 1375 1376 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1377 struct ip_tunnel_info *b) 1378 { 1379 if (ip_tunnel_info_af(a) == AF_INET) 1380 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1381 else 1382 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1383 } 1384 1385 static int geneve_configure(struct net *net, struct net_device *dev, 1386 struct netlink_ext_ack *extack, 1387 const struct geneve_config *cfg) 1388 { 1389 struct geneve_net *gn = net_generic(net, geneve_net_id); 1390 struct geneve_dev *t, *geneve = netdev_priv(dev); 1391 const struct ip_tunnel_info *info = &cfg->info; 1392 bool tun_collect_md, tun_on_same_port; 1393 int err, encap_len; 1394 1395 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1396 NL_SET_ERR_MSG(extack, 1397 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1398 return -EINVAL; 1399 } 1400 1401 geneve->net = net; 1402 geneve->dev = dev; 1403 1404 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); 1405 if (t) 1406 return -EBUSY; 1407 1408 /* make enough headroom for basic scenario */ 1409 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1410 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1411 encap_len += sizeof(struct iphdr); 1412 dev->max_mtu -= sizeof(struct iphdr); 1413 } else { 1414 encap_len += sizeof(struct ipv6hdr); 1415 dev->max_mtu -= sizeof(struct ipv6hdr); 1416 } 1417 dev->needed_headroom = encap_len + ETH_HLEN; 1418 1419 if (cfg->collect_md) { 1420 if (tun_on_same_port) { 1421 NL_SET_ERR_MSG(extack, 1422 "There can be only one externally controlled device on a destination port"); 1423 return -EPERM; 1424 } 1425 } else { 1426 if (tun_collect_md) { 1427 NL_SET_ERR_MSG(extack, 1428 "There already exists an externally controlled device on this destination port"); 1429 return -EPERM; 1430 } 1431 } 1432 1433 dst_cache_reset(&geneve->cfg.info.dst_cache); 1434 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1435 1436 if (geneve->cfg.inner_proto_inherit) { 1437 dev->header_ops = NULL; 1438 dev->type = ARPHRD_NONE; 1439 dev->hard_header_len = 0; 1440 dev->addr_len = 0; 1441 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1442 } 1443 1444 err = register_netdevice(dev); 1445 if (err) 1446 return err; 1447 1448 list_add(&geneve->next, &gn->geneve_list); 1449 return 0; 1450 } 1451 1452 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1453 { 1454 memset(info, 0, sizeof(*info)); 1455 info->key.tp_dst = htons(dst_port); 1456 } 1457 1458 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1459 struct netlink_ext_ack *extack, 1460 struct geneve_config *cfg, bool changelink) 1461 { 1462 struct ip_tunnel_info *info = &cfg->info; 1463 int attrtype; 1464 1465 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { 1466 NL_SET_ERR_MSG(extack, 1467 "Cannot specify both IPv4 and IPv6 Remote addresses"); 1468 return -EINVAL; 1469 } 1470 1471 if (data[IFLA_GENEVE_REMOTE]) { 1472 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 1473 attrtype = IFLA_GENEVE_REMOTE; 1474 goto change_notsup; 1475 } 1476 1477 info->key.u.ipv4.dst = 1478 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1479 1480 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 1481 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 1482 "Remote IPv4 address cannot be Multicast"); 1483 return -EINVAL; 1484 } 1485 } 1486 1487 if (data[IFLA_GENEVE_REMOTE6]) { 1488 #if IS_ENABLED(CONFIG_IPV6) 1489 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 1490 attrtype = IFLA_GENEVE_REMOTE6; 1491 goto change_notsup; 1492 } 1493 1494 info->mode = IP_TUNNEL_INFO_IPV6; 1495 info->key.u.ipv6.dst = 1496 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1497 1498 if (ipv6_addr_type(&info->key.u.ipv6.dst) & 1499 IPV6_ADDR_LINKLOCAL) { 1500 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1501 "Remote IPv6 address cannot be link-local"); 1502 return -EINVAL; 1503 } 1504 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { 1505 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1506 "Remote IPv6 address cannot be Multicast"); 1507 return -EINVAL; 1508 } 1509 info->key.tun_flags |= TUNNEL_CSUM; 1510 cfg->use_udp6_rx_checksums = true; 1511 #else 1512 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1513 "IPv6 support not enabled in the kernel"); 1514 return -EPFNOSUPPORT; 1515 #endif 1516 } 1517 1518 if (data[IFLA_GENEVE_ID]) { 1519 __u32 vni; 1520 __u8 tvni[3]; 1521 __be64 tunid; 1522 1523 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1524 tvni[0] = (vni & 0x00ff0000) >> 16; 1525 tvni[1] = (vni & 0x0000ff00) >> 8; 1526 tvni[2] = vni & 0x000000ff; 1527 1528 tunid = vni_to_tunnel_id(tvni); 1529 if (changelink && (tunid != info->key.tun_id)) { 1530 attrtype = IFLA_GENEVE_ID; 1531 goto change_notsup; 1532 } 1533 info->key.tun_id = tunid; 1534 } 1535 1536 if (data[IFLA_GENEVE_TTL_INHERIT]) { 1537 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 1538 cfg->ttl_inherit = true; 1539 else 1540 cfg->ttl_inherit = false; 1541 } else if (data[IFLA_GENEVE_TTL]) { 1542 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1543 cfg->ttl_inherit = false; 1544 } 1545 1546 if (data[IFLA_GENEVE_TOS]) 1547 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1548 1549 if (data[IFLA_GENEVE_DF]) 1550 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 1551 1552 if (data[IFLA_GENEVE_LABEL]) { 1553 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 1554 IPV6_FLOWLABEL_MASK; 1555 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 1556 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 1557 "Label attribute only applies for IPv6 Geneve devices"); 1558 return -EINVAL; 1559 } 1560 } 1561 1562 if (data[IFLA_GENEVE_PORT]) { 1563 if (changelink) { 1564 attrtype = IFLA_GENEVE_PORT; 1565 goto change_notsup; 1566 } 1567 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 1568 } 1569 1570 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 1571 if (changelink) { 1572 attrtype = IFLA_GENEVE_COLLECT_METADATA; 1573 goto change_notsup; 1574 } 1575 cfg->collect_md = true; 1576 } 1577 1578 if (data[IFLA_GENEVE_UDP_CSUM]) { 1579 if (changelink) { 1580 attrtype = IFLA_GENEVE_UDP_CSUM; 1581 goto change_notsup; 1582 } 1583 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1584 info->key.tun_flags |= TUNNEL_CSUM; 1585 } 1586 1587 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 1588 #if IS_ENABLED(CONFIG_IPV6) 1589 if (changelink) { 1590 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 1591 goto change_notsup; 1592 } 1593 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1594 info->key.tun_flags &= ~TUNNEL_CSUM; 1595 #else 1596 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 1597 "IPv6 support not enabled in the kernel"); 1598 return -EPFNOSUPPORT; 1599 #endif 1600 } 1601 1602 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 1603 #if IS_ENABLED(CONFIG_IPV6) 1604 if (changelink) { 1605 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 1606 goto change_notsup; 1607 } 1608 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1609 cfg->use_udp6_rx_checksums = false; 1610 #else 1611 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 1612 "IPv6 support not enabled in the kernel"); 1613 return -EPFNOSUPPORT; 1614 #endif 1615 } 1616 1617 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 1618 if (changelink) { 1619 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 1620 goto change_notsup; 1621 } 1622 cfg->inner_proto_inherit = true; 1623 } 1624 1625 return 0; 1626 change_notsup: 1627 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 1628 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported"); 1629 return -EOPNOTSUPP; 1630 } 1631 1632 static void geneve_link_config(struct net_device *dev, 1633 struct ip_tunnel_info *info, struct nlattr *tb[]) 1634 { 1635 struct geneve_dev *geneve = netdev_priv(dev); 1636 int ldev_mtu = 0; 1637 1638 if (tb[IFLA_MTU]) { 1639 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 1640 return; 1641 } 1642 1643 switch (ip_tunnel_info_af(info)) { 1644 case AF_INET: { 1645 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 1646 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 1647 1648 if (!IS_ERR(rt) && rt->dst.dev) { 1649 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 1650 ip_rt_put(rt); 1651 } 1652 break; 1653 } 1654 #if IS_ENABLED(CONFIG_IPV6) 1655 case AF_INET6: { 1656 struct rt6_info *rt; 1657 1658 if (!__in6_dev_get(dev)) 1659 break; 1660 1661 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 1662 NULL, 0); 1663 1664 if (rt && rt->dst.dev) 1665 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 1666 ip6_rt_put(rt); 1667 break; 1668 } 1669 #endif 1670 } 1671 1672 if (ldev_mtu <= 0) 1673 return; 1674 1675 geneve_change_mtu(dev, ldev_mtu - info->options_len); 1676 } 1677 1678 static int geneve_newlink(struct net *net, struct net_device *dev, 1679 struct nlattr *tb[], struct nlattr *data[], 1680 struct netlink_ext_ack *extack) 1681 { 1682 struct geneve_config cfg = { 1683 .df = GENEVE_DF_UNSET, 1684 .use_udp6_rx_checksums = false, 1685 .ttl_inherit = false, 1686 .collect_md = false, 1687 }; 1688 int err; 1689 1690 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 1691 err = geneve_nl2info(tb, data, extack, &cfg, false); 1692 if (err) 1693 return err; 1694 1695 err = geneve_configure(net, dev, extack, &cfg); 1696 if (err) 1697 return err; 1698 1699 geneve_link_config(dev, &cfg.info, tb); 1700 1701 return 0; 1702 } 1703 1704 /* Quiesces the geneve device data path for both TX and RX. 1705 * 1706 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 1707 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 1708 * to complete for the existing set of in-flight packets to be transmitted, 1709 * then we would have quiesced the transmit data path. All the future packets 1710 * will get dropped until we unquiesce the data path. 1711 * 1712 * On receive geneve dereference the geneve_sock stashed in the socket. So, 1713 * if we set that to NULL under RCU and wait for synchronize_net() to 1714 * complete, then we would have quiesced the receive data path. 1715 */ 1716 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 1717 struct geneve_sock **gs6) 1718 { 1719 *gs4 = rtnl_dereference(geneve->sock4); 1720 rcu_assign_pointer(geneve->sock4, NULL); 1721 if (*gs4) 1722 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); 1723 #if IS_ENABLED(CONFIG_IPV6) 1724 *gs6 = rtnl_dereference(geneve->sock6); 1725 rcu_assign_pointer(geneve->sock6, NULL); 1726 if (*gs6) 1727 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); 1728 #else 1729 *gs6 = NULL; 1730 #endif 1731 synchronize_net(); 1732 } 1733 1734 /* Resumes the geneve device data path for both TX and RX. */ 1735 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 1736 struct geneve_sock __maybe_unused *gs6) 1737 { 1738 rcu_assign_pointer(geneve->sock4, gs4); 1739 if (gs4) 1740 rcu_assign_sk_user_data(gs4->sock->sk, gs4); 1741 #if IS_ENABLED(CONFIG_IPV6) 1742 rcu_assign_pointer(geneve->sock6, gs6); 1743 if (gs6) 1744 rcu_assign_sk_user_data(gs6->sock->sk, gs6); 1745 #endif 1746 synchronize_net(); 1747 } 1748 1749 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 1750 struct nlattr *data[], 1751 struct netlink_ext_ack *extack) 1752 { 1753 struct geneve_dev *geneve = netdev_priv(dev); 1754 struct geneve_sock *gs4, *gs6; 1755 struct geneve_config cfg; 1756 int err; 1757 1758 /* If the geneve device is configured for metadata (or externally 1759 * controlled, for example, OVS), then nothing can be changed. 1760 */ 1761 if (geneve->cfg.collect_md) 1762 return -EOPNOTSUPP; 1763 1764 /* Start with the existing info. */ 1765 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 1766 err = geneve_nl2info(tb, data, extack, &cfg, true); 1767 if (err) 1768 return err; 1769 1770 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 1771 dst_cache_reset(&cfg.info.dst_cache); 1772 geneve_link_config(dev, &cfg.info, tb); 1773 } 1774 1775 geneve_quiesce(geneve, &gs4, &gs6); 1776 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 1777 geneve_unquiesce(geneve, gs4, gs6); 1778 1779 return 0; 1780 } 1781 1782 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1783 { 1784 struct geneve_dev *geneve = netdev_priv(dev); 1785 1786 list_del(&geneve->next); 1787 unregister_netdevice_queue(dev, head); 1788 } 1789 1790 static size_t geneve_get_size(const struct net_device *dev) 1791 { 1792 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1793 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1794 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1795 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1796 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 1797 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 1798 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1799 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1800 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1801 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1802 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1803 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 1804 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 1805 0; 1806 } 1807 1808 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1809 { 1810 struct geneve_dev *geneve = netdev_priv(dev); 1811 struct ip_tunnel_info *info = &geneve->cfg.info; 1812 bool ttl_inherit = geneve->cfg.ttl_inherit; 1813 bool metadata = geneve->cfg.collect_md; 1814 __u8 tmp_vni[3]; 1815 __u32 vni; 1816 1817 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 1818 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 1819 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1820 goto nla_put_failure; 1821 1822 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 1823 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1824 info->key.u.ipv4.dst)) 1825 goto nla_put_failure; 1826 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1827 !!(info->key.tun_flags & TUNNEL_CSUM))) 1828 goto nla_put_failure; 1829 1830 #if IS_ENABLED(CONFIG_IPV6) 1831 } else if (!metadata) { 1832 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1833 &info->key.u.ipv6.dst)) 1834 goto nla_put_failure; 1835 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1836 !(info->key.tun_flags & TUNNEL_CSUM))) 1837 goto nla_put_failure; 1838 #endif 1839 } 1840 1841 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 1842 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 1843 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 1844 goto nla_put_failure; 1845 1846 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 1847 goto nla_put_failure; 1848 1849 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 1850 goto nla_put_failure; 1851 1852 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1853 goto nla_put_failure; 1854 1855 #if IS_ENABLED(CONFIG_IPV6) 1856 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1857 !geneve->cfg.use_udp6_rx_checksums)) 1858 goto nla_put_failure; 1859 #endif 1860 1861 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 1862 goto nla_put_failure; 1863 1864 if (geneve->cfg.inner_proto_inherit && 1865 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 1866 goto nla_put_failure; 1867 1868 return 0; 1869 1870 nla_put_failure: 1871 return -EMSGSIZE; 1872 } 1873 1874 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1875 .kind = "geneve", 1876 .maxtype = IFLA_GENEVE_MAX, 1877 .policy = geneve_policy, 1878 .priv_size = sizeof(struct geneve_dev), 1879 .setup = geneve_setup, 1880 .validate = geneve_validate, 1881 .newlink = geneve_newlink, 1882 .changelink = geneve_changelink, 1883 .dellink = geneve_dellink, 1884 .get_size = geneve_get_size, 1885 .fill_info = geneve_fill_info, 1886 }; 1887 1888 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1889 u8 name_assign_type, u16 dst_port) 1890 { 1891 struct nlattr *tb[IFLA_MAX + 1]; 1892 struct net_device *dev; 1893 LIST_HEAD(list_kill); 1894 int err; 1895 struct geneve_config cfg = { 1896 .df = GENEVE_DF_UNSET, 1897 .use_udp6_rx_checksums = true, 1898 .ttl_inherit = false, 1899 .collect_md = true, 1900 }; 1901 1902 memset(tb, 0, sizeof(tb)); 1903 dev = rtnl_create_link(net, name, name_assign_type, 1904 &geneve_link_ops, tb, NULL); 1905 if (IS_ERR(dev)) 1906 return dev; 1907 1908 init_tnl_info(&cfg.info, dst_port); 1909 err = geneve_configure(net, dev, NULL, &cfg); 1910 if (err) { 1911 free_netdev(dev); 1912 return ERR_PTR(err); 1913 } 1914 1915 /* openvswitch users expect packet sizes to be unrestricted, 1916 * so set the largest MTU we can. 1917 */ 1918 err = geneve_change_mtu(dev, IP_MAX_MTU); 1919 if (err) 1920 goto err; 1921 1922 err = rtnl_configure_link(dev, NULL, 0, NULL); 1923 if (err < 0) 1924 goto err; 1925 1926 return dev; 1927 err: 1928 geneve_dellink(dev, &list_kill); 1929 unregister_netdevice_many(&list_kill); 1930 return ERR_PTR(err); 1931 } 1932 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1933 1934 static int geneve_netdevice_event(struct notifier_block *unused, 1935 unsigned long event, void *ptr) 1936 { 1937 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1938 1939 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 1940 geneve_offload_rx_ports(dev, true); 1941 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 1942 geneve_offload_rx_ports(dev, false); 1943 1944 return NOTIFY_DONE; 1945 } 1946 1947 static struct notifier_block geneve_notifier_block __read_mostly = { 1948 .notifier_call = geneve_netdevice_event, 1949 }; 1950 1951 static __net_init int geneve_init_net(struct net *net) 1952 { 1953 struct geneve_net *gn = net_generic(net, geneve_net_id); 1954 1955 INIT_LIST_HEAD(&gn->geneve_list); 1956 INIT_LIST_HEAD(&gn->sock_list); 1957 return 0; 1958 } 1959 1960 static void geneve_destroy_tunnels(struct net *net, struct list_head *head) 1961 { 1962 struct geneve_net *gn = net_generic(net, geneve_net_id); 1963 struct geneve_dev *geneve, *next; 1964 struct net_device *dev, *aux; 1965 1966 /* gather any geneve devices that were moved into this ns */ 1967 for_each_netdev_safe(net, dev, aux) 1968 if (dev->rtnl_link_ops == &geneve_link_ops) 1969 unregister_netdevice_queue(dev, head); 1970 1971 /* now gather any other geneve devices that were created in this ns */ 1972 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { 1973 /* If geneve->dev is in the same netns, it was already added 1974 * to the list by the previous loop. 1975 */ 1976 if (!net_eq(dev_net(geneve->dev), net)) 1977 unregister_netdevice_queue(geneve->dev, head); 1978 } 1979 } 1980 1981 static void __net_exit geneve_exit_batch_net(struct list_head *net_list) 1982 { 1983 struct net *net; 1984 LIST_HEAD(list); 1985 1986 rtnl_lock(); 1987 list_for_each_entry(net, net_list, exit_list) 1988 geneve_destroy_tunnels(net, &list); 1989 1990 /* unregister the devices gathered above */ 1991 unregister_netdevice_many(&list); 1992 rtnl_unlock(); 1993 1994 list_for_each_entry(net, net_list, exit_list) { 1995 const struct geneve_net *gn = net_generic(net, geneve_net_id); 1996 1997 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 1998 } 1999 } 2000 2001 static struct pernet_operations geneve_net_ops = { 2002 .init = geneve_init_net, 2003 .exit_batch = geneve_exit_batch_net, 2004 .id = &geneve_net_id, 2005 .size = sizeof(struct geneve_net), 2006 }; 2007 2008 static int __init geneve_init_module(void) 2009 { 2010 int rc; 2011 2012 rc = register_pernet_subsys(&geneve_net_ops); 2013 if (rc) 2014 goto out1; 2015 2016 rc = register_netdevice_notifier(&geneve_notifier_block); 2017 if (rc) 2018 goto out2; 2019 2020 rc = rtnl_link_register(&geneve_link_ops); 2021 if (rc) 2022 goto out3; 2023 2024 return 0; 2025 out3: 2026 unregister_netdevice_notifier(&geneve_notifier_block); 2027 out2: 2028 unregister_pernet_subsys(&geneve_net_ops); 2029 out1: 2030 return rc; 2031 } 2032 late_initcall(geneve_init_module); 2033 2034 static void __exit geneve_cleanup_module(void) 2035 { 2036 rtnl_link_unregister(&geneve_link_ops); 2037 unregister_netdevice_notifier(&geneve_notifier_block); 2038 unregister_pernet_subsys(&geneve_net_ops); 2039 } 2040 module_exit(geneve_cleanup_module); 2041 2042 MODULE_LICENSE("GPL"); 2043 MODULE_VERSION(GENEVE_NETDEV_VER); 2044 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2045 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2046 MODULE_ALIAS_RTNL_LINK("geneve"); 2047