1 /* 2 * GENEVE: Generic Network Virtualization Encapsulation 3 * 4 * Copyright (c) 2015 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/netdevice.h> 16 #include <linux/etherdevice.h> 17 #include <linux/hash.h> 18 #include <net/dst_metadata.h> 19 #include <net/gro_cells.h> 20 #include <net/rtnetlink.h> 21 #include <net/geneve.h> 22 #include <net/protocol.h> 23 24 #define GENEVE_NETDEV_VER "0.6" 25 26 #define GENEVE_UDP_PORT 6081 27 28 #define GENEVE_N_VID (1u << 24) 29 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 30 31 #define VNI_HASH_BITS 10 32 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 33 34 static bool log_ecn_error = true; 35 module_param(log_ecn_error, bool, 0644); 36 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 37 38 #define GENEVE_VER 0 39 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 40 41 /* per-network namespace private data for this module */ 42 struct geneve_net { 43 struct list_head geneve_list; 44 struct list_head sock_list; 45 }; 46 47 static int geneve_net_id; 48 49 union geneve_addr { 50 struct sockaddr_in sin; 51 struct sockaddr_in6 sin6; 52 struct sockaddr sa; 53 }; 54 55 static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, }; 56 57 /* Pseudo network device */ 58 struct geneve_dev { 59 struct hlist_node hlist; /* vni hash table */ 60 struct net *net; /* netns for packet i/o */ 61 struct net_device *dev; /* netdev for geneve tunnel */ 62 struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */ 63 #if IS_ENABLED(CONFIG_IPV6) 64 struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */ 65 #endif 66 u8 vni[3]; /* virtual network ID for tunnel */ 67 u8 ttl; /* TTL override */ 68 u8 tos; /* TOS override */ 69 union geneve_addr remote; /* IP address for link partner */ 70 struct list_head next; /* geneve's per namespace list */ 71 __be32 label; /* IPv6 flowlabel override */ 72 __be16 dst_port; 73 bool collect_md; 74 struct gro_cells gro_cells; 75 u32 flags; 76 struct dst_cache dst_cache; 77 }; 78 79 /* Geneve device flags */ 80 #define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0) 81 #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1) 82 #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2) 83 84 struct geneve_sock { 85 bool collect_md; 86 struct list_head list; 87 struct socket *sock; 88 struct rcu_head rcu; 89 int refcnt; 90 struct hlist_head vni_list[VNI_HASH_SIZE]; 91 u32 flags; 92 }; 93 94 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 95 { 96 __u32 vnid; 97 98 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 99 return hash_32(vnid, VNI_HASH_BITS); 100 } 101 102 static __be64 vni_to_tunnel_id(const __u8 *vni) 103 { 104 #ifdef __BIG_ENDIAN 105 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 106 #else 107 return (__force __be64)(((__force u64)vni[0] << 40) | 108 ((__force u64)vni[1] << 48) | 109 ((__force u64)vni[2] << 56)); 110 #endif 111 } 112 113 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 114 { 115 return gs->sock->sk->sk_family; 116 } 117 118 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 119 __be32 addr, u8 vni[]) 120 { 121 struct hlist_head *vni_list_head; 122 struct geneve_dev *geneve; 123 __u32 hash; 124 125 /* Find the device for this VNI */ 126 hash = geneve_net_vni_hash(vni); 127 vni_list_head = &gs->vni_list[hash]; 128 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 129 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 130 addr == geneve->remote.sin.sin_addr.s_addr) 131 return geneve; 132 } 133 return NULL; 134 } 135 136 #if IS_ENABLED(CONFIG_IPV6) 137 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 138 struct in6_addr addr6, u8 vni[]) 139 { 140 struct hlist_head *vni_list_head; 141 struct geneve_dev *geneve; 142 __u32 hash; 143 144 /* Find the device for this VNI */ 145 hash = geneve_net_vni_hash(vni); 146 vni_list_head = &gs->vni_list[hash]; 147 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 148 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 149 ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr)) 150 return geneve; 151 } 152 return NULL; 153 } 154 #endif 155 156 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 157 { 158 return (struct genevehdr *)(udp_hdr(skb) + 1); 159 } 160 161 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 162 struct sk_buff *skb) 163 { 164 u8 *vni; 165 __be32 addr; 166 static u8 zero_vni[3]; 167 #if IS_ENABLED(CONFIG_IPV6) 168 static struct in6_addr zero_addr6; 169 #endif 170 171 if (geneve_get_sk_family(gs) == AF_INET) { 172 struct iphdr *iph; 173 174 iph = ip_hdr(skb); /* outer IP header... */ 175 176 if (gs->collect_md) { 177 vni = zero_vni; 178 addr = 0; 179 } else { 180 vni = geneve_hdr(skb)->vni; 181 addr = iph->saddr; 182 } 183 184 return geneve_lookup(gs, addr, vni); 185 #if IS_ENABLED(CONFIG_IPV6) 186 } else if (geneve_get_sk_family(gs) == AF_INET6) { 187 struct ipv6hdr *ip6h; 188 struct in6_addr addr6; 189 190 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 191 192 if (gs->collect_md) { 193 vni = zero_vni; 194 addr6 = zero_addr6; 195 } else { 196 vni = geneve_hdr(skb)->vni; 197 addr6 = ip6h->saddr; 198 } 199 200 return geneve6_lookup(gs, addr6, vni); 201 #endif 202 } 203 return NULL; 204 } 205 206 /* geneve receive/decap routine */ 207 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 208 struct sk_buff *skb) 209 { 210 struct genevehdr *gnvh = geneve_hdr(skb); 211 struct metadata_dst *tun_dst = NULL; 212 struct pcpu_sw_netstats *stats; 213 int err = 0; 214 void *oiph; 215 216 if (ip_tunnel_collect_metadata() || gs->collect_md) { 217 __be16 flags; 218 219 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | 220 (gnvh->oam ? TUNNEL_OAM : 0) | 221 (gnvh->critical ? TUNNEL_CRIT_OPT : 0); 222 223 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 224 vni_to_tunnel_id(gnvh->vni), 225 gnvh->opt_len * 4); 226 if (!tun_dst) 227 goto drop; 228 /* Update tunnel dst according to Geneve options. */ 229 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 230 gnvh->options, gnvh->opt_len * 4); 231 } else { 232 /* Drop packets w/ critical options, 233 * since we don't support any... 234 */ 235 if (gnvh->critical) 236 goto drop; 237 } 238 239 skb_reset_mac_header(skb); 240 skb->protocol = eth_type_trans(skb, geneve->dev); 241 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 242 243 if (tun_dst) 244 skb_dst_set(skb, &tun_dst->dst); 245 246 /* Ignore packet loops (and multicast echo) */ 247 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) 248 goto drop; 249 250 oiph = skb_network_header(skb); 251 skb_reset_network_header(skb); 252 253 if (geneve_get_sk_family(gs) == AF_INET) 254 err = IP_ECN_decapsulate(oiph, skb); 255 #if IS_ENABLED(CONFIG_IPV6) 256 else 257 err = IP6_ECN_decapsulate(oiph, skb); 258 #endif 259 260 if (unlikely(err)) { 261 if (log_ecn_error) { 262 if (geneve_get_sk_family(gs) == AF_INET) 263 net_info_ratelimited("non-ECT from %pI4 " 264 "with TOS=%#x\n", 265 &((struct iphdr *)oiph)->saddr, 266 ((struct iphdr *)oiph)->tos); 267 #if IS_ENABLED(CONFIG_IPV6) 268 else 269 net_info_ratelimited("non-ECT from %pI6\n", 270 &((struct ipv6hdr *)oiph)->saddr); 271 #endif 272 } 273 if (err > 1) { 274 ++geneve->dev->stats.rx_frame_errors; 275 ++geneve->dev->stats.rx_errors; 276 goto drop; 277 } 278 } 279 280 stats = this_cpu_ptr(geneve->dev->tstats); 281 u64_stats_update_begin(&stats->syncp); 282 stats->rx_packets++; 283 stats->rx_bytes += skb->len; 284 u64_stats_update_end(&stats->syncp); 285 286 gro_cells_receive(&geneve->gro_cells, skb); 287 return; 288 drop: 289 /* Consume bad packet */ 290 kfree_skb(skb); 291 } 292 293 /* Setup stats when device is created */ 294 static int geneve_init(struct net_device *dev) 295 { 296 struct geneve_dev *geneve = netdev_priv(dev); 297 int err; 298 299 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 300 if (!dev->tstats) 301 return -ENOMEM; 302 303 err = gro_cells_init(&geneve->gro_cells, dev); 304 if (err) { 305 free_percpu(dev->tstats); 306 return err; 307 } 308 309 err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL); 310 if (err) { 311 free_percpu(dev->tstats); 312 gro_cells_destroy(&geneve->gro_cells); 313 return err; 314 } 315 316 return 0; 317 } 318 319 static void geneve_uninit(struct net_device *dev) 320 { 321 struct geneve_dev *geneve = netdev_priv(dev); 322 323 dst_cache_destroy(&geneve->dst_cache); 324 gro_cells_destroy(&geneve->gro_cells); 325 free_percpu(dev->tstats); 326 } 327 328 /* Callback from net/ipv4/udp.c to receive packets */ 329 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 330 { 331 struct genevehdr *geneveh; 332 struct geneve_dev *geneve; 333 struct geneve_sock *gs; 334 int opts_len; 335 336 /* Need Geneve and inner Ethernet header to be present */ 337 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 338 goto drop; 339 340 /* Return packets with reserved bits set */ 341 geneveh = geneve_hdr(skb); 342 if (unlikely(geneveh->ver != GENEVE_VER)) 343 goto drop; 344 345 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) 346 goto drop; 347 348 gs = rcu_dereference_sk_user_data(sk); 349 if (!gs) 350 goto drop; 351 352 geneve = geneve_lookup_skb(gs, skb); 353 if (!geneve) 354 goto drop; 355 356 opts_len = geneveh->opt_len * 4; 357 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, 358 htons(ETH_P_TEB), 359 !net_eq(geneve->net, dev_net(geneve->dev)))) 360 goto drop; 361 362 geneve_rx(geneve, gs, skb); 363 return 0; 364 365 drop: 366 /* Consume bad packet */ 367 kfree_skb(skb); 368 return 0; 369 } 370 371 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 372 __be16 port, u32 flags) 373 { 374 struct socket *sock; 375 struct udp_port_cfg udp_conf; 376 int err; 377 378 memset(&udp_conf, 0, sizeof(udp_conf)); 379 380 if (ipv6) { 381 udp_conf.family = AF_INET6; 382 udp_conf.ipv6_v6only = 1; 383 udp_conf.use_udp6_rx_checksums = 384 !(flags & GENEVE_F_UDP_ZERO_CSUM6_RX); 385 } else { 386 udp_conf.family = AF_INET; 387 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 388 } 389 390 udp_conf.local_udp_port = port; 391 392 /* Open UDP socket */ 393 err = udp_sock_create(net, &udp_conf, &sock); 394 if (err < 0) 395 return ERR_PTR(err); 396 397 return sock; 398 } 399 400 static void geneve_notify_add_rx_port(struct geneve_sock *gs) 401 { 402 struct net_device *dev; 403 struct sock *sk = gs->sock->sk; 404 struct net *net = sock_net(sk); 405 sa_family_t sa_family = geneve_get_sk_family(gs); 406 __be16 port = inet_sk(sk)->inet_sport; 407 408 rcu_read_lock(); 409 for_each_netdev_rcu(net, dev) { 410 if (dev->netdev_ops->ndo_add_geneve_port) 411 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, 412 port); 413 } 414 rcu_read_unlock(); 415 } 416 417 static int geneve_hlen(struct genevehdr *gh) 418 { 419 return sizeof(*gh) + gh->opt_len * 4; 420 } 421 422 static struct sk_buff **geneve_gro_receive(struct sock *sk, 423 struct sk_buff **head, 424 struct sk_buff *skb) 425 { 426 struct sk_buff *p, **pp = NULL; 427 struct genevehdr *gh, *gh2; 428 unsigned int hlen, gh_len, off_gnv; 429 const struct packet_offload *ptype; 430 __be16 type; 431 int flush = 1; 432 433 off_gnv = skb_gro_offset(skb); 434 hlen = off_gnv + sizeof(*gh); 435 gh = skb_gro_header_fast(skb, off_gnv); 436 if (skb_gro_header_hard(skb, hlen)) { 437 gh = skb_gro_header_slow(skb, hlen, off_gnv); 438 if (unlikely(!gh)) 439 goto out; 440 } 441 442 if (gh->ver != GENEVE_VER || gh->oam) 443 goto out; 444 gh_len = geneve_hlen(gh); 445 446 hlen = off_gnv + gh_len; 447 if (skb_gro_header_hard(skb, hlen)) { 448 gh = skb_gro_header_slow(skb, hlen, off_gnv); 449 if (unlikely(!gh)) 450 goto out; 451 } 452 453 for (p = *head; p; p = p->next) { 454 if (!NAPI_GRO_CB(p)->same_flow) 455 continue; 456 457 gh2 = (struct genevehdr *)(p->data + off_gnv); 458 if (gh->opt_len != gh2->opt_len || 459 memcmp(gh, gh2, gh_len)) { 460 NAPI_GRO_CB(p)->same_flow = 0; 461 continue; 462 } 463 } 464 465 type = gh->proto_type; 466 467 rcu_read_lock(); 468 ptype = gro_find_receive_by_type(type); 469 if (!ptype) 470 goto out_unlock; 471 472 skb_gro_pull(skb, gh_len); 473 skb_gro_postpull_rcsum(skb, gh, gh_len); 474 pp = ptype->callbacks.gro_receive(head, skb); 475 flush = 0; 476 477 out_unlock: 478 rcu_read_unlock(); 479 out: 480 NAPI_GRO_CB(skb)->flush |= flush; 481 482 return pp; 483 } 484 485 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 486 int nhoff) 487 { 488 struct genevehdr *gh; 489 struct packet_offload *ptype; 490 __be16 type; 491 int gh_len; 492 int err = -ENOSYS; 493 494 gh = (struct genevehdr *)(skb->data + nhoff); 495 gh_len = geneve_hlen(gh); 496 type = gh->proto_type; 497 498 rcu_read_lock(); 499 ptype = gro_find_complete_by_type(type); 500 if (ptype) 501 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 502 503 rcu_read_unlock(); 504 505 skb_set_inner_mac_header(skb, nhoff + gh_len); 506 507 return err; 508 } 509 510 /* Create new listen socket if needed */ 511 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 512 bool ipv6, u32 flags) 513 { 514 struct geneve_net *gn = net_generic(net, geneve_net_id); 515 struct geneve_sock *gs; 516 struct socket *sock; 517 struct udp_tunnel_sock_cfg tunnel_cfg; 518 int h; 519 520 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 521 if (!gs) 522 return ERR_PTR(-ENOMEM); 523 524 sock = geneve_create_sock(net, ipv6, port, flags); 525 if (IS_ERR(sock)) { 526 kfree(gs); 527 return ERR_CAST(sock); 528 } 529 530 gs->sock = sock; 531 gs->refcnt = 1; 532 for (h = 0; h < VNI_HASH_SIZE; ++h) 533 INIT_HLIST_HEAD(&gs->vni_list[h]); 534 535 /* Initialize the geneve udp offloads structure */ 536 geneve_notify_add_rx_port(gs); 537 538 /* Mark socket as an encapsulation socket */ 539 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 540 tunnel_cfg.sk_user_data = gs; 541 tunnel_cfg.encap_type = 1; 542 tunnel_cfg.gro_receive = geneve_gro_receive; 543 tunnel_cfg.gro_complete = geneve_gro_complete; 544 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 545 tunnel_cfg.encap_destroy = NULL; 546 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 547 list_add(&gs->list, &gn->sock_list); 548 return gs; 549 } 550 551 static void geneve_notify_del_rx_port(struct geneve_sock *gs) 552 { 553 struct net_device *dev; 554 struct sock *sk = gs->sock->sk; 555 struct net *net = sock_net(sk); 556 sa_family_t sa_family = geneve_get_sk_family(gs); 557 __be16 port = inet_sk(sk)->inet_sport; 558 559 rcu_read_lock(); 560 for_each_netdev_rcu(net, dev) { 561 if (dev->netdev_ops->ndo_del_geneve_port) 562 dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, 563 port); 564 } 565 566 rcu_read_unlock(); 567 } 568 569 static void __geneve_sock_release(struct geneve_sock *gs) 570 { 571 if (!gs || --gs->refcnt) 572 return; 573 574 list_del(&gs->list); 575 geneve_notify_del_rx_port(gs); 576 udp_tunnel_sock_release(gs->sock); 577 kfree_rcu(gs, rcu); 578 } 579 580 static void geneve_sock_release(struct geneve_dev *geneve) 581 { 582 __geneve_sock_release(geneve->sock4); 583 #if IS_ENABLED(CONFIG_IPV6) 584 __geneve_sock_release(geneve->sock6); 585 #endif 586 } 587 588 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 589 sa_family_t family, 590 __be16 dst_port) 591 { 592 struct geneve_sock *gs; 593 594 list_for_each_entry(gs, &gn->sock_list, list) { 595 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 596 geneve_get_sk_family(gs) == family) { 597 return gs; 598 } 599 } 600 return NULL; 601 } 602 603 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 604 { 605 struct net *net = geneve->net; 606 struct geneve_net *gn = net_generic(net, geneve_net_id); 607 struct geneve_sock *gs; 608 __u32 hash; 609 610 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->dst_port); 611 if (gs) { 612 gs->refcnt++; 613 goto out; 614 } 615 616 gs = geneve_socket_create(net, geneve->dst_port, ipv6, geneve->flags); 617 if (IS_ERR(gs)) 618 return PTR_ERR(gs); 619 620 out: 621 gs->collect_md = geneve->collect_md; 622 gs->flags = geneve->flags; 623 #if IS_ENABLED(CONFIG_IPV6) 624 if (ipv6) 625 geneve->sock6 = gs; 626 else 627 #endif 628 geneve->sock4 = gs; 629 630 hash = geneve_net_vni_hash(geneve->vni); 631 hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); 632 return 0; 633 } 634 635 static int geneve_open(struct net_device *dev) 636 { 637 struct geneve_dev *geneve = netdev_priv(dev); 638 bool ipv6 = geneve->remote.sa.sa_family == AF_INET6; 639 bool metadata = geneve->collect_md; 640 int ret = 0; 641 642 geneve->sock4 = NULL; 643 #if IS_ENABLED(CONFIG_IPV6) 644 geneve->sock6 = NULL; 645 if (ipv6 || metadata) 646 ret = geneve_sock_add(geneve, true); 647 #endif 648 if (!ret && (!ipv6 || metadata)) 649 ret = geneve_sock_add(geneve, false); 650 if (ret < 0) 651 geneve_sock_release(geneve); 652 653 return ret; 654 } 655 656 static int geneve_stop(struct net_device *dev) 657 { 658 struct geneve_dev *geneve = netdev_priv(dev); 659 660 if (!hlist_unhashed(&geneve->hlist)) 661 hlist_del_rcu(&geneve->hlist); 662 geneve_sock_release(geneve); 663 return 0; 664 } 665 666 static void geneve_build_header(struct genevehdr *geneveh, 667 __be16 tun_flags, u8 vni[3], 668 u8 options_len, u8 *options) 669 { 670 geneveh->ver = GENEVE_VER; 671 geneveh->opt_len = options_len / 4; 672 geneveh->oam = !!(tun_flags & TUNNEL_OAM); 673 geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); 674 geneveh->rsvd1 = 0; 675 memcpy(geneveh->vni, vni, 3); 676 geneveh->proto_type = htons(ETH_P_TEB); 677 geneveh->rsvd2 = 0; 678 679 memcpy(geneveh->options, options, options_len); 680 } 681 682 static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, 683 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 684 u32 flags, bool xnet) 685 { 686 struct genevehdr *gnvh; 687 int min_headroom; 688 int err; 689 bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX); 690 691 skb_scrub_packet(skb, xnet); 692 693 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 694 + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr); 695 err = skb_cow_head(skb, min_headroom); 696 if (unlikely(err)) 697 goto free_rt; 698 699 err = udp_tunnel_handle_offloads(skb, udp_sum); 700 if (err) 701 goto free_rt; 702 703 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 704 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 705 706 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 707 return 0; 708 709 free_rt: 710 ip_rt_put(rt); 711 return err; 712 } 713 714 #if IS_ENABLED(CONFIG_IPV6) 715 static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb, 716 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 717 u32 flags, bool xnet) 718 { 719 struct genevehdr *gnvh; 720 int min_headroom; 721 int err; 722 bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM6_TX); 723 724 skb_scrub_packet(skb, xnet); 725 726 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 727 + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr); 728 err = skb_cow_head(skb, min_headroom); 729 if (unlikely(err)) 730 goto free_dst; 731 732 err = udp_tunnel_handle_offloads(skb, udp_sum); 733 if (err) 734 goto free_dst; 735 736 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 737 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 738 739 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 740 return 0; 741 742 free_dst: 743 dst_release(dst); 744 return err; 745 } 746 #endif 747 748 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, 749 struct net_device *dev, 750 struct flowi4 *fl4, 751 struct ip_tunnel_info *info) 752 { 753 bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 754 struct geneve_dev *geneve = netdev_priv(dev); 755 struct dst_cache *dst_cache; 756 struct rtable *rt = NULL; 757 __u8 tos; 758 759 memset(fl4, 0, sizeof(*fl4)); 760 fl4->flowi4_mark = skb->mark; 761 fl4->flowi4_proto = IPPROTO_UDP; 762 763 if (info) { 764 fl4->daddr = info->key.u.ipv4.dst; 765 fl4->saddr = info->key.u.ipv4.src; 766 fl4->flowi4_tos = RT_TOS(info->key.tos); 767 dst_cache = &info->dst_cache; 768 } else { 769 tos = geneve->tos; 770 if (tos == 1) { 771 const struct iphdr *iip = ip_hdr(skb); 772 773 tos = ip_tunnel_get_dsfield(iip, skb); 774 use_cache = false; 775 } 776 777 fl4->flowi4_tos = RT_TOS(tos); 778 fl4->daddr = geneve->remote.sin.sin_addr.s_addr; 779 dst_cache = &geneve->dst_cache; 780 } 781 782 if (use_cache) { 783 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); 784 if (rt) 785 return rt; 786 } 787 788 rt = ip_route_output_key(geneve->net, fl4); 789 if (IS_ERR(rt)) { 790 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); 791 return ERR_PTR(-ENETUNREACH); 792 } 793 if (rt->dst.dev == dev) { /* is this necessary? */ 794 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); 795 ip_rt_put(rt); 796 return ERR_PTR(-ELOOP); 797 } 798 if (use_cache) 799 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); 800 return rt; 801 } 802 803 #if IS_ENABLED(CONFIG_IPV6) 804 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, 805 struct net_device *dev, 806 struct flowi6 *fl6, 807 struct ip_tunnel_info *info) 808 { 809 bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 810 struct geneve_dev *geneve = netdev_priv(dev); 811 struct geneve_sock *gs6 = geneve->sock6; 812 struct dst_entry *dst = NULL; 813 struct dst_cache *dst_cache; 814 __u8 prio; 815 816 memset(fl6, 0, sizeof(*fl6)); 817 fl6->flowi6_mark = skb->mark; 818 fl6->flowi6_proto = IPPROTO_UDP; 819 820 if (info) { 821 fl6->daddr = info->key.u.ipv6.dst; 822 fl6->saddr = info->key.u.ipv6.src; 823 fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos), 824 info->key.label); 825 dst_cache = &info->dst_cache; 826 } else { 827 prio = geneve->tos; 828 if (prio == 1) { 829 const struct iphdr *iip = ip_hdr(skb); 830 831 prio = ip_tunnel_get_dsfield(iip, skb); 832 use_cache = false; 833 } 834 835 fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio), 836 geneve->label); 837 fl6->daddr = geneve->remote.sin6.sin6_addr; 838 dst_cache = &geneve->dst_cache; 839 } 840 841 if (use_cache) { 842 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); 843 if (dst) 844 return dst; 845 } 846 847 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { 848 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); 849 return ERR_PTR(-ENETUNREACH); 850 } 851 if (dst->dev == dev) { /* is this necessary? */ 852 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr); 853 dst_release(dst); 854 return ERR_PTR(-ELOOP); 855 } 856 857 if (use_cache) 858 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr); 859 return dst; 860 } 861 #endif 862 863 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 864 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 865 { 866 #ifdef __BIG_ENDIAN 867 vni[0] = (__force __u8)(tun_id >> 16); 868 vni[1] = (__force __u8)(tun_id >> 8); 869 vni[2] = (__force __u8)tun_id; 870 #else 871 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 872 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 873 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 874 #endif 875 } 876 877 static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 878 struct ip_tunnel_info *info) 879 { 880 struct geneve_dev *geneve = netdev_priv(dev); 881 struct geneve_sock *gs4 = geneve->sock4; 882 struct rtable *rt = NULL; 883 const struct iphdr *iip; /* interior IP header */ 884 int err = -EINVAL; 885 struct flowi4 fl4; 886 __u8 tos, ttl; 887 __be16 sport; 888 __be16 df; 889 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 890 u32 flags = geneve->flags; 891 892 if (geneve->collect_md) { 893 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 894 netdev_dbg(dev, "no tunnel metadata\n"); 895 goto tx_error; 896 } 897 if (info && ip_tunnel_info_af(info) != AF_INET) 898 goto tx_error; 899 } 900 901 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 902 if (IS_ERR(rt)) { 903 err = PTR_ERR(rt); 904 goto tx_error; 905 } 906 907 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 908 skb_reset_mac_header(skb); 909 910 iip = ip_hdr(skb); 911 912 if (info) { 913 const struct ip_tunnel_key *key = &info->key; 914 u8 *opts = NULL; 915 u8 vni[3]; 916 917 tunnel_id_to_vni(key->tun_id, vni); 918 if (info->options_len) 919 opts = ip_tunnel_info_opts(info); 920 921 if (key->tun_flags & TUNNEL_CSUM) 922 flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX; 923 else 924 flags |= GENEVE_F_UDP_ZERO_CSUM_TX; 925 926 err = geneve_build_skb(rt, skb, key->tun_flags, vni, 927 info->options_len, opts, flags, xnet); 928 if (unlikely(err)) 929 goto tx_error; 930 931 tos = ip_tunnel_ecn_encap(key->tos, iip, skb); 932 ttl = key->ttl; 933 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 934 } else { 935 err = geneve_build_skb(rt, skb, 0, geneve->vni, 936 0, NULL, flags, xnet); 937 if (unlikely(err)) 938 goto tx_error; 939 940 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); 941 ttl = geneve->ttl; 942 if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) 943 ttl = 1; 944 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 945 df = 0; 946 } 947 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, 948 tos, ttl, df, sport, geneve->dst_port, 949 !net_eq(geneve->net, dev_net(geneve->dev)), 950 !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX)); 951 952 return NETDEV_TX_OK; 953 954 tx_error: 955 dev_kfree_skb(skb); 956 957 if (err == -ELOOP) 958 dev->stats.collisions++; 959 else if (err == -ENETUNREACH) 960 dev->stats.tx_carrier_errors++; 961 else 962 dev->stats.tx_errors++; 963 return NETDEV_TX_OK; 964 } 965 966 #if IS_ENABLED(CONFIG_IPV6) 967 static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 968 struct ip_tunnel_info *info) 969 { 970 struct geneve_dev *geneve = netdev_priv(dev); 971 struct geneve_sock *gs6 = geneve->sock6; 972 struct dst_entry *dst = NULL; 973 const struct iphdr *iip; /* interior IP header */ 974 int err = -EINVAL; 975 struct flowi6 fl6; 976 __u8 prio, ttl; 977 __be16 sport; 978 __be32 label; 979 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 980 u32 flags = geneve->flags; 981 982 if (geneve->collect_md) { 983 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 984 netdev_dbg(dev, "no tunnel metadata\n"); 985 goto tx_error; 986 } 987 } 988 989 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 990 if (IS_ERR(dst)) { 991 err = PTR_ERR(dst); 992 goto tx_error; 993 } 994 995 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 996 skb_reset_mac_header(skb); 997 998 iip = ip_hdr(skb); 999 1000 if (info) { 1001 const struct ip_tunnel_key *key = &info->key; 1002 u8 *opts = NULL; 1003 u8 vni[3]; 1004 1005 tunnel_id_to_vni(key->tun_id, vni); 1006 if (info->options_len) 1007 opts = ip_tunnel_info_opts(info); 1008 1009 if (key->tun_flags & TUNNEL_CSUM) 1010 flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX; 1011 else 1012 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 1013 1014 err = geneve6_build_skb(dst, skb, key->tun_flags, vni, 1015 info->options_len, opts, 1016 flags, xnet); 1017 if (unlikely(err)) 1018 goto tx_error; 1019 1020 prio = ip_tunnel_ecn_encap(key->tos, iip, skb); 1021 ttl = key->ttl; 1022 label = info->key.label; 1023 } else { 1024 err = geneve6_build_skb(dst, skb, 0, geneve->vni, 1025 0, NULL, flags, xnet); 1026 if (unlikely(err)) 1027 goto tx_error; 1028 1029 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), 1030 iip, skb); 1031 ttl = geneve->ttl; 1032 if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) 1033 ttl = 1; 1034 ttl = ttl ? : ip6_dst_hoplimit(dst); 1035 label = geneve->label; 1036 } 1037 1038 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1039 &fl6.saddr, &fl6.daddr, prio, ttl, label, 1040 sport, geneve->dst_port, 1041 !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); 1042 return NETDEV_TX_OK; 1043 1044 tx_error: 1045 dev_kfree_skb(skb); 1046 1047 if (err == -ELOOP) 1048 dev->stats.collisions++; 1049 else if (err == -ENETUNREACH) 1050 dev->stats.tx_carrier_errors++; 1051 else 1052 dev->stats.tx_errors++; 1053 return NETDEV_TX_OK; 1054 } 1055 #endif 1056 1057 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1058 { 1059 struct geneve_dev *geneve = netdev_priv(dev); 1060 struct ip_tunnel_info *info = NULL; 1061 1062 if (geneve->collect_md) 1063 info = skb_tunnel_info(skb); 1064 1065 #if IS_ENABLED(CONFIG_IPV6) 1066 if ((info && ip_tunnel_info_af(info) == AF_INET6) || 1067 (!info && geneve->remote.sa.sa_family == AF_INET6)) 1068 return geneve6_xmit_skb(skb, dev, info); 1069 #endif 1070 return geneve_xmit_skb(skb, dev, info); 1071 } 1072 1073 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) 1074 { 1075 /* The max_mtu calculation does not take account of GENEVE 1076 * options, to avoid excluding potentially valid 1077 * configurations. 1078 */ 1079 int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) 1080 - dev->hard_header_len; 1081 1082 if (new_mtu < 68) 1083 return -EINVAL; 1084 1085 if (new_mtu > max_mtu) { 1086 if (strict) 1087 return -EINVAL; 1088 1089 new_mtu = max_mtu; 1090 } 1091 1092 dev->mtu = new_mtu; 1093 return 0; 1094 } 1095 1096 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1097 { 1098 return __geneve_change_mtu(dev, new_mtu, true); 1099 } 1100 1101 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1102 { 1103 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1104 struct geneve_dev *geneve = netdev_priv(dev); 1105 struct rtable *rt; 1106 struct flowi4 fl4; 1107 #if IS_ENABLED(CONFIG_IPV6) 1108 struct dst_entry *dst; 1109 struct flowi6 fl6; 1110 #endif 1111 1112 if (ip_tunnel_info_af(info) == AF_INET) { 1113 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 1114 if (IS_ERR(rt)) 1115 return PTR_ERR(rt); 1116 1117 ip_rt_put(rt); 1118 info->key.u.ipv4.src = fl4.saddr; 1119 #if IS_ENABLED(CONFIG_IPV6) 1120 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1121 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 1122 if (IS_ERR(dst)) 1123 return PTR_ERR(dst); 1124 1125 dst_release(dst); 1126 info->key.u.ipv6.src = fl6.saddr; 1127 #endif 1128 } else { 1129 return -EINVAL; 1130 } 1131 1132 info->key.tp_src = udp_flow_src_port(geneve->net, skb, 1133 1, USHRT_MAX, true); 1134 info->key.tp_dst = geneve->dst_port; 1135 return 0; 1136 } 1137 1138 static const struct net_device_ops geneve_netdev_ops = { 1139 .ndo_init = geneve_init, 1140 .ndo_uninit = geneve_uninit, 1141 .ndo_open = geneve_open, 1142 .ndo_stop = geneve_stop, 1143 .ndo_start_xmit = geneve_xmit, 1144 .ndo_get_stats64 = ip_tunnel_get_stats64, 1145 .ndo_change_mtu = geneve_change_mtu, 1146 .ndo_validate_addr = eth_validate_addr, 1147 .ndo_set_mac_address = eth_mac_addr, 1148 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1149 }; 1150 1151 static void geneve_get_drvinfo(struct net_device *dev, 1152 struct ethtool_drvinfo *drvinfo) 1153 { 1154 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1155 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1156 } 1157 1158 static const struct ethtool_ops geneve_ethtool_ops = { 1159 .get_drvinfo = geneve_get_drvinfo, 1160 .get_link = ethtool_op_get_link, 1161 }; 1162 1163 /* Info for udev, that this is a virtual tunnel endpoint */ 1164 static struct device_type geneve_type = { 1165 .name = "geneve", 1166 }; 1167 1168 /* Calls the ndo_add_geneve_port of the caller in order to 1169 * supply the listening GENEVE udp ports. Callers are expected 1170 * to implement the ndo_add_geneve_port. 1171 */ 1172 static void geneve_push_rx_ports(struct net_device *dev) 1173 { 1174 struct net *net = dev_net(dev); 1175 struct geneve_net *gn = net_generic(net, geneve_net_id); 1176 struct geneve_sock *gs; 1177 sa_family_t sa_family; 1178 struct sock *sk; 1179 __be16 port; 1180 1181 if (!dev->netdev_ops->ndo_add_geneve_port) 1182 return; 1183 1184 rcu_read_lock(); 1185 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1186 sk = gs->sock->sk; 1187 sa_family = sk->sk_family; 1188 port = inet_sk(sk)->inet_sport; 1189 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); 1190 } 1191 rcu_read_unlock(); 1192 } 1193 1194 /* Initialize the device structure. */ 1195 static void geneve_setup(struct net_device *dev) 1196 { 1197 ether_setup(dev); 1198 1199 dev->netdev_ops = &geneve_netdev_ops; 1200 dev->ethtool_ops = &geneve_ethtool_ops; 1201 dev->destructor = free_netdev; 1202 1203 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1204 1205 dev->features |= NETIF_F_LLTX; 1206 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 1207 dev->features |= NETIF_F_RXCSUM; 1208 dev->features |= NETIF_F_GSO_SOFTWARE; 1209 1210 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; 1211 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1212 1213 netif_keep_dst(dev); 1214 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1215 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1216 eth_hw_addr_random(dev); 1217 } 1218 1219 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1220 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1221 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1222 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1223 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1224 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1225 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1226 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1227 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1228 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1229 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1230 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1231 }; 1232 1233 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) 1234 { 1235 if (tb[IFLA_ADDRESS]) { 1236 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1237 return -EINVAL; 1238 1239 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1240 return -EADDRNOTAVAIL; 1241 } 1242 1243 if (!data) 1244 return -EINVAL; 1245 1246 if (data[IFLA_GENEVE_ID]) { 1247 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1248 1249 if (vni >= GENEVE_VID_MASK) 1250 return -ERANGE; 1251 } 1252 1253 return 0; 1254 } 1255 1256 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1257 __be16 dst_port, 1258 union geneve_addr *remote, 1259 u8 vni[], 1260 bool *tun_on_same_port, 1261 bool *tun_collect_md) 1262 { 1263 struct geneve_dev *geneve, *t; 1264 1265 *tun_on_same_port = false; 1266 *tun_collect_md = false; 1267 t = NULL; 1268 list_for_each_entry(geneve, &gn->geneve_list, next) { 1269 if (geneve->dst_port == dst_port) { 1270 *tun_collect_md = geneve->collect_md; 1271 *tun_on_same_port = true; 1272 } 1273 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 1274 !memcmp(remote, &geneve->remote, sizeof(geneve->remote)) && 1275 dst_port == geneve->dst_port) 1276 t = geneve; 1277 } 1278 return t; 1279 } 1280 1281 static int geneve_configure(struct net *net, struct net_device *dev, 1282 union geneve_addr *remote, 1283 __u32 vni, __u8 ttl, __u8 tos, __be32 label, 1284 __be16 dst_port, bool metadata, u32 flags) 1285 { 1286 struct geneve_net *gn = net_generic(net, geneve_net_id); 1287 struct geneve_dev *t, *geneve = netdev_priv(dev); 1288 bool tun_collect_md, tun_on_same_port; 1289 int err, encap_len; 1290 1291 if (!remote) 1292 return -EINVAL; 1293 if (metadata && 1294 (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl || label)) 1295 return -EINVAL; 1296 1297 geneve->net = net; 1298 geneve->dev = dev; 1299 1300 geneve->vni[0] = (vni & 0x00ff0000) >> 16; 1301 geneve->vni[1] = (vni & 0x0000ff00) >> 8; 1302 geneve->vni[2] = vni & 0x000000ff; 1303 1304 if ((remote->sa.sa_family == AF_INET && 1305 IN_MULTICAST(ntohl(remote->sin.sin_addr.s_addr))) || 1306 (remote->sa.sa_family == AF_INET6 && 1307 ipv6_addr_is_multicast(&remote->sin6.sin6_addr))) 1308 return -EINVAL; 1309 if (label && remote->sa.sa_family != AF_INET6) 1310 return -EINVAL; 1311 1312 geneve->remote = *remote; 1313 1314 geneve->ttl = ttl; 1315 geneve->tos = tos; 1316 geneve->label = label; 1317 geneve->dst_port = dst_port; 1318 geneve->collect_md = metadata; 1319 geneve->flags = flags; 1320 1321 t = geneve_find_dev(gn, dst_port, remote, geneve->vni, 1322 &tun_on_same_port, &tun_collect_md); 1323 if (t) 1324 return -EBUSY; 1325 1326 /* make enough headroom for basic scenario */ 1327 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1328 if (remote->sa.sa_family == AF_INET) 1329 encap_len += sizeof(struct iphdr); 1330 else 1331 encap_len += sizeof(struct ipv6hdr); 1332 dev->needed_headroom = encap_len + ETH_HLEN; 1333 1334 if (metadata) { 1335 if (tun_on_same_port) 1336 return -EPERM; 1337 } else { 1338 if (tun_collect_md) 1339 return -EPERM; 1340 } 1341 1342 dst_cache_reset(&geneve->dst_cache); 1343 1344 err = register_netdevice(dev); 1345 if (err) 1346 return err; 1347 1348 list_add(&geneve->next, &gn->geneve_list); 1349 return 0; 1350 } 1351 1352 static int geneve_newlink(struct net *net, struct net_device *dev, 1353 struct nlattr *tb[], struct nlattr *data[]) 1354 { 1355 __be16 dst_port = htons(GENEVE_UDP_PORT); 1356 __u8 ttl = 0, tos = 0; 1357 bool metadata = false; 1358 union geneve_addr remote = geneve_remote_unspec; 1359 __be32 label = 0; 1360 __u32 vni = 0; 1361 u32 flags = 0; 1362 1363 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) 1364 return -EINVAL; 1365 1366 if (data[IFLA_GENEVE_REMOTE]) { 1367 remote.sa.sa_family = AF_INET; 1368 remote.sin.sin_addr.s_addr = 1369 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1370 } 1371 1372 if (data[IFLA_GENEVE_REMOTE6]) { 1373 if (!IS_ENABLED(CONFIG_IPV6)) 1374 return -EPFNOSUPPORT; 1375 1376 remote.sa.sa_family = AF_INET6; 1377 remote.sin6.sin6_addr = 1378 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1379 1380 if (ipv6_addr_type(&remote.sin6.sin6_addr) & 1381 IPV6_ADDR_LINKLOCAL) { 1382 netdev_dbg(dev, "link-local remote is unsupported\n"); 1383 return -EINVAL; 1384 } 1385 } 1386 1387 if (data[IFLA_GENEVE_ID]) 1388 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1389 1390 if (data[IFLA_GENEVE_TTL]) 1391 ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1392 1393 if (data[IFLA_GENEVE_TOS]) 1394 tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1395 1396 if (data[IFLA_GENEVE_LABEL]) 1397 label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 1398 IPV6_FLOWLABEL_MASK; 1399 1400 if (data[IFLA_GENEVE_PORT]) 1401 dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]); 1402 1403 if (data[IFLA_GENEVE_COLLECT_METADATA]) 1404 metadata = true; 1405 1406 if (data[IFLA_GENEVE_UDP_CSUM] && 1407 !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1408 flags |= GENEVE_F_UDP_ZERO_CSUM_TX; 1409 1410 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && 1411 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1412 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 1413 1414 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] && 1415 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1416 flags |= GENEVE_F_UDP_ZERO_CSUM6_RX; 1417 1418 return geneve_configure(net, dev, &remote, vni, ttl, tos, label, 1419 dst_port, metadata, flags); 1420 } 1421 1422 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1423 { 1424 struct geneve_dev *geneve = netdev_priv(dev); 1425 1426 list_del(&geneve->next); 1427 unregister_netdevice_queue(dev, head); 1428 } 1429 1430 static size_t geneve_get_size(const struct net_device *dev) 1431 { 1432 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1433 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1434 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1435 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1436 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 1437 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1438 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1439 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1440 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1441 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1442 0; 1443 } 1444 1445 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1446 { 1447 struct geneve_dev *geneve = netdev_priv(dev); 1448 __u32 vni; 1449 1450 vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2]; 1451 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1452 goto nla_put_failure; 1453 1454 if (geneve->remote.sa.sa_family == AF_INET) { 1455 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1456 geneve->remote.sin.sin_addr.s_addr)) 1457 goto nla_put_failure; 1458 #if IS_ENABLED(CONFIG_IPV6) 1459 } else { 1460 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1461 &geneve->remote.sin6.sin6_addr)) 1462 goto nla_put_failure; 1463 #endif 1464 } 1465 1466 if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) || 1467 nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos) || 1468 nla_put_be32(skb, IFLA_GENEVE_LABEL, geneve->label)) 1469 goto nla_put_failure; 1470 1471 if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port)) 1472 goto nla_put_failure; 1473 1474 if (geneve->collect_md) { 1475 if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1476 goto nla_put_failure; 1477 } 1478 1479 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1480 !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) || 1481 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1482 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) || 1483 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1484 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_RX))) 1485 goto nla_put_failure; 1486 1487 return 0; 1488 1489 nla_put_failure: 1490 return -EMSGSIZE; 1491 } 1492 1493 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1494 .kind = "geneve", 1495 .maxtype = IFLA_GENEVE_MAX, 1496 .policy = geneve_policy, 1497 .priv_size = sizeof(struct geneve_dev), 1498 .setup = geneve_setup, 1499 .validate = geneve_validate, 1500 .newlink = geneve_newlink, 1501 .dellink = geneve_dellink, 1502 .get_size = geneve_get_size, 1503 .fill_info = geneve_fill_info, 1504 }; 1505 1506 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1507 u8 name_assign_type, u16 dst_port) 1508 { 1509 struct nlattr *tb[IFLA_MAX + 1]; 1510 struct net_device *dev; 1511 int err; 1512 1513 memset(tb, 0, sizeof(tb)); 1514 dev = rtnl_create_link(net, name, name_assign_type, 1515 &geneve_link_ops, tb); 1516 if (IS_ERR(dev)) 1517 return dev; 1518 1519 err = geneve_configure(net, dev, &geneve_remote_unspec, 1520 0, 0, 0, 0, htons(dst_port), true, 1521 GENEVE_F_UDP_ZERO_CSUM6_RX); 1522 if (err) 1523 goto err; 1524 1525 /* openvswitch users expect packet sizes to be unrestricted, 1526 * so set the largest MTU we can. 1527 */ 1528 err = __geneve_change_mtu(dev, IP_MAX_MTU, false); 1529 if (err) 1530 goto err; 1531 1532 return dev; 1533 1534 err: 1535 free_netdev(dev); 1536 return ERR_PTR(err); 1537 } 1538 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1539 1540 static int geneve_netdevice_event(struct notifier_block *unused, 1541 unsigned long event, void *ptr) 1542 { 1543 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1544 1545 if (event == NETDEV_OFFLOAD_PUSH_GENEVE) 1546 geneve_push_rx_ports(dev); 1547 1548 return NOTIFY_DONE; 1549 } 1550 1551 static struct notifier_block geneve_notifier_block __read_mostly = { 1552 .notifier_call = geneve_netdevice_event, 1553 }; 1554 1555 static __net_init int geneve_init_net(struct net *net) 1556 { 1557 struct geneve_net *gn = net_generic(net, geneve_net_id); 1558 1559 INIT_LIST_HEAD(&gn->geneve_list); 1560 INIT_LIST_HEAD(&gn->sock_list); 1561 return 0; 1562 } 1563 1564 static void __net_exit geneve_exit_net(struct net *net) 1565 { 1566 struct geneve_net *gn = net_generic(net, geneve_net_id); 1567 struct geneve_dev *geneve, *next; 1568 struct net_device *dev, *aux; 1569 LIST_HEAD(list); 1570 1571 rtnl_lock(); 1572 1573 /* gather any geneve devices that were moved into this ns */ 1574 for_each_netdev_safe(net, dev, aux) 1575 if (dev->rtnl_link_ops == &geneve_link_ops) 1576 unregister_netdevice_queue(dev, &list); 1577 1578 /* now gather any other geneve devices that were created in this ns */ 1579 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { 1580 /* If geneve->dev is in the same netns, it was already added 1581 * to the list by the previous loop. 1582 */ 1583 if (!net_eq(dev_net(geneve->dev), net)) 1584 unregister_netdevice_queue(geneve->dev, &list); 1585 } 1586 1587 /* unregister the devices gathered above */ 1588 unregister_netdevice_many(&list); 1589 rtnl_unlock(); 1590 } 1591 1592 static struct pernet_operations geneve_net_ops = { 1593 .init = geneve_init_net, 1594 .exit = geneve_exit_net, 1595 .id = &geneve_net_id, 1596 .size = sizeof(struct geneve_net), 1597 }; 1598 1599 static int __init geneve_init_module(void) 1600 { 1601 int rc; 1602 1603 rc = register_pernet_subsys(&geneve_net_ops); 1604 if (rc) 1605 goto out1; 1606 1607 rc = register_netdevice_notifier(&geneve_notifier_block); 1608 if (rc) 1609 goto out2; 1610 1611 rc = rtnl_link_register(&geneve_link_ops); 1612 if (rc) 1613 goto out3; 1614 1615 return 0; 1616 1617 out3: 1618 unregister_netdevice_notifier(&geneve_notifier_block); 1619 out2: 1620 unregister_pernet_subsys(&geneve_net_ops); 1621 out1: 1622 return rc; 1623 } 1624 late_initcall(geneve_init_module); 1625 1626 static void __exit geneve_cleanup_module(void) 1627 { 1628 rtnl_link_unregister(&geneve_link_ops); 1629 unregister_netdevice_notifier(&geneve_notifier_block); 1630 unregister_pernet_subsys(&geneve_net_ops); 1631 } 1632 module_exit(geneve_cleanup_module); 1633 1634 MODULE_LICENSE("GPL"); 1635 MODULE_VERSION(GENEVE_NETDEV_VER); 1636 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 1637 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 1638 MODULE_ALIAS_RTNL_LINK("geneve"); 1639