1 /* 2 * GENEVE: Generic Network Virtualization Encapsulation 3 * 4 * Copyright (c) 2015 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/netdevice.h> 16 #include <linux/etherdevice.h> 17 #include <linux/hash.h> 18 #include <net/dst_metadata.h> 19 #include <net/gro_cells.h> 20 #include <net/rtnetlink.h> 21 #include <net/geneve.h> 22 #include <net/protocol.h> 23 24 #define GENEVE_NETDEV_VER "0.6" 25 26 #define GENEVE_UDP_PORT 6081 27 28 #define GENEVE_N_VID (1u << 24) 29 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 30 31 #define VNI_HASH_BITS 10 32 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 33 34 static bool log_ecn_error = true; 35 module_param(log_ecn_error, bool, 0644); 36 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 37 38 #define GENEVE_VER 0 39 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 40 41 /* per-network namespace private data for this module */ 42 struct geneve_net { 43 struct list_head geneve_list; 44 struct list_head sock_list; 45 }; 46 47 static int geneve_net_id; 48 49 union geneve_addr { 50 struct sockaddr_in sin; 51 struct sockaddr_in6 sin6; 52 struct sockaddr sa; 53 }; 54 55 static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, }; 56 57 /* Pseudo network device */ 58 struct geneve_dev { 59 struct hlist_node hlist; /* vni hash table */ 60 struct net *net; /* netns for packet i/o */ 61 struct net_device *dev; /* netdev for geneve tunnel */ 62 struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */ 63 #if IS_ENABLED(CONFIG_IPV6) 64 struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */ 65 #endif 66 u8 vni[3]; /* virtual network ID for tunnel */ 67 u8 ttl; /* TTL override */ 68 u8 tos; /* TOS override */ 69 union geneve_addr remote; /* IP address for link partner */ 70 struct list_head next; /* geneve's per namespace list */ 71 __be16 dst_port; 72 bool collect_md; 73 struct gro_cells gro_cells; 74 u32 flags; 75 }; 76 77 /* Geneve device flags */ 78 #define GENEVE_F_UDP_CSUM BIT(0) 79 #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1) 80 #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2) 81 82 struct geneve_sock { 83 bool collect_md; 84 struct list_head list; 85 struct socket *sock; 86 struct rcu_head rcu; 87 int refcnt; 88 struct udp_offload udp_offloads; 89 struct hlist_head vni_list[VNI_HASH_SIZE]; 90 u32 flags; 91 }; 92 93 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 94 { 95 __u32 vnid; 96 97 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 98 return hash_32(vnid, VNI_HASH_BITS); 99 } 100 101 static __be64 vni_to_tunnel_id(const __u8 *vni) 102 { 103 #ifdef __BIG_ENDIAN 104 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 105 #else 106 return (__force __be64)(((__force u64)vni[0] << 40) | 107 ((__force u64)vni[1] << 48) | 108 ((__force u64)vni[2] << 56)); 109 #endif 110 } 111 112 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 113 __be32 addr, u8 vni[]) 114 { 115 struct hlist_head *vni_list_head; 116 struct geneve_dev *geneve; 117 __u32 hash; 118 119 /* Find the device for this VNI */ 120 hash = geneve_net_vni_hash(vni); 121 vni_list_head = &gs->vni_list[hash]; 122 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 123 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 124 addr == geneve->remote.sin.sin_addr.s_addr) 125 return geneve; 126 } 127 return NULL; 128 } 129 130 #if IS_ENABLED(CONFIG_IPV6) 131 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 132 struct in6_addr addr6, u8 vni[]) 133 { 134 struct hlist_head *vni_list_head; 135 struct geneve_dev *geneve; 136 __u32 hash; 137 138 /* Find the device for this VNI */ 139 hash = geneve_net_vni_hash(vni); 140 vni_list_head = &gs->vni_list[hash]; 141 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 142 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 143 ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr)) 144 return geneve; 145 } 146 return NULL; 147 } 148 #endif 149 150 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 151 { 152 return (struct genevehdr *)(udp_hdr(skb) + 1); 153 } 154 155 /* geneve receive/decap routine */ 156 static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) 157 { 158 struct genevehdr *gnvh = geneve_hdr(skb); 159 struct metadata_dst *tun_dst = NULL; 160 struct geneve_dev *geneve = NULL; 161 struct pcpu_sw_netstats *stats; 162 struct iphdr *iph = NULL; 163 __be32 addr; 164 static u8 zero_vni[3]; 165 u8 *vni; 166 int err = 0; 167 sa_family_t sa_family; 168 #if IS_ENABLED(CONFIG_IPV6) 169 struct ipv6hdr *ip6h = NULL; 170 struct in6_addr addr6; 171 static struct in6_addr zero_addr6; 172 #endif 173 174 sa_family = gs->sock->sk->sk_family; 175 176 if (sa_family == AF_INET) { 177 iph = ip_hdr(skb); /* outer IP header... */ 178 179 if (gs->collect_md) { 180 vni = zero_vni; 181 addr = 0; 182 } else { 183 vni = gnvh->vni; 184 185 addr = iph->saddr; 186 } 187 188 geneve = geneve_lookup(gs, addr, vni); 189 #if IS_ENABLED(CONFIG_IPV6) 190 } else if (sa_family == AF_INET6) { 191 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 192 193 if (gs->collect_md) { 194 vni = zero_vni; 195 addr6 = zero_addr6; 196 } else { 197 vni = gnvh->vni; 198 199 addr6 = ip6h->saddr; 200 } 201 202 geneve = geneve6_lookup(gs, addr6, vni); 203 #endif 204 } 205 if (!geneve) 206 goto drop; 207 208 if (ip_tunnel_collect_metadata() || gs->collect_md) { 209 __be16 flags; 210 211 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | 212 (gnvh->oam ? TUNNEL_OAM : 0) | 213 (gnvh->critical ? TUNNEL_CRIT_OPT : 0); 214 215 tun_dst = udp_tun_rx_dst(skb, sa_family, flags, 216 vni_to_tunnel_id(gnvh->vni), 217 gnvh->opt_len * 4); 218 if (!tun_dst) 219 goto drop; 220 /* Update tunnel dst according to Geneve options. */ 221 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 222 gnvh->options, gnvh->opt_len * 4); 223 } else { 224 /* Drop packets w/ critical options, 225 * since we don't support any... 226 */ 227 if (gnvh->critical) 228 goto drop; 229 } 230 231 skb_reset_mac_header(skb); 232 skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); 233 skb->protocol = eth_type_trans(skb, geneve->dev); 234 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 235 236 if (tun_dst) 237 skb_dst_set(skb, &tun_dst->dst); 238 239 /* Ignore packet loops (and multicast echo) */ 240 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) 241 goto drop; 242 243 skb_reset_network_header(skb); 244 245 if (iph) 246 err = IP_ECN_decapsulate(iph, skb); 247 #if IS_ENABLED(CONFIG_IPV6) 248 if (ip6h) 249 err = IP6_ECN_decapsulate(ip6h, skb); 250 #endif 251 252 if (unlikely(err)) { 253 if (log_ecn_error) { 254 if (iph) 255 net_info_ratelimited("non-ECT from %pI4 " 256 "with TOS=%#x\n", 257 &iph->saddr, iph->tos); 258 #if IS_ENABLED(CONFIG_IPV6) 259 if (ip6h) 260 net_info_ratelimited("non-ECT from %pI6\n", 261 &ip6h->saddr); 262 #endif 263 } 264 if (err > 1) { 265 ++geneve->dev->stats.rx_frame_errors; 266 ++geneve->dev->stats.rx_errors; 267 goto drop; 268 } 269 } 270 271 stats = this_cpu_ptr(geneve->dev->tstats); 272 u64_stats_update_begin(&stats->syncp); 273 stats->rx_packets++; 274 stats->rx_bytes += skb->len; 275 u64_stats_update_end(&stats->syncp); 276 277 gro_cells_receive(&geneve->gro_cells, skb); 278 return; 279 drop: 280 /* Consume bad packet */ 281 kfree_skb(skb); 282 } 283 284 /* Setup stats when device is created */ 285 static int geneve_init(struct net_device *dev) 286 { 287 struct geneve_dev *geneve = netdev_priv(dev); 288 int err; 289 290 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 291 if (!dev->tstats) 292 return -ENOMEM; 293 294 err = gro_cells_init(&geneve->gro_cells, dev); 295 if (err) { 296 free_percpu(dev->tstats); 297 return err; 298 } 299 300 return 0; 301 } 302 303 static void geneve_uninit(struct net_device *dev) 304 { 305 struct geneve_dev *geneve = netdev_priv(dev); 306 307 gro_cells_destroy(&geneve->gro_cells); 308 free_percpu(dev->tstats); 309 } 310 311 /* Callback from net/ipv4/udp.c to receive packets */ 312 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 313 { 314 struct genevehdr *geneveh; 315 struct geneve_sock *gs; 316 int opts_len; 317 318 /* Need Geneve and inner Ethernet header to be present */ 319 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 320 goto error; 321 322 /* Return packets with reserved bits set */ 323 geneveh = geneve_hdr(skb); 324 if (unlikely(geneveh->ver != GENEVE_VER)) 325 goto error; 326 327 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) 328 goto error; 329 330 opts_len = geneveh->opt_len * 4; 331 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, 332 htons(ETH_P_TEB))) 333 goto drop; 334 335 gs = rcu_dereference_sk_user_data(sk); 336 if (!gs) 337 goto drop; 338 339 geneve_rx(gs, skb); 340 return 0; 341 342 drop: 343 /* Consume bad packet */ 344 kfree_skb(skb); 345 return 0; 346 347 error: 348 /* Let the UDP layer deal with the skb */ 349 return 1; 350 } 351 352 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 353 __be16 port, u32 flags) 354 { 355 struct socket *sock; 356 struct udp_port_cfg udp_conf; 357 int err; 358 359 memset(&udp_conf, 0, sizeof(udp_conf)); 360 361 if (ipv6) { 362 udp_conf.family = AF_INET6; 363 udp_conf.ipv6_v6only = 1; 364 udp_conf.use_udp6_rx_checksums = 365 !(flags & GENEVE_F_UDP_ZERO_CSUM6_RX); 366 } else { 367 udp_conf.family = AF_INET; 368 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 369 } 370 371 udp_conf.local_udp_port = port; 372 373 /* Open UDP socket */ 374 err = udp_sock_create(net, &udp_conf, &sock); 375 if (err < 0) 376 return ERR_PTR(err); 377 378 return sock; 379 } 380 381 static void geneve_notify_add_rx_port(struct geneve_sock *gs) 382 { 383 struct net_device *dev; 384 struct sock *sk = gs->sock->sk; 385 struct net *net = sock_net(sk); 386 sa_family_t sa_family = sk->sk_family; 387 __be16 port = inet_sk(sk)->inet_sport; 388 int err; 389 390 if (sa_family == AF_INET) { 391 err = udp_add_offload(sock_net(sk), &gs->udp_offloads); 392 if (err) 393 pr_warn("geneve: udp_add_offload failed with status %d\n", 394 err); 395 } 396 397 rcu_read_lock(); 398 for_each_netdev_rcu(net, dev) { 399 if (dev->netdev_ops->ndo_add_geneve_port) 400 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, 401 port); 402 } 403 rcu_read_unlock(); 404 } 405 406 static int geneve_hlen(struct genevehdr *gh) 407 { 408 return sizeof(*gh) + gh->opt_len * 4; 409 } 410 411 static struct sk_buff **geneve_gro_receive(struct sk_buff **head, 412 struct sk_buff *skb, 413 struct udp_offload *uoff) 414 { 415 struct sk_buff *p, **pp = NULL; 416 struct genevehdr *gh, *gh2; 417 unsigned int hlen, gh_len, off_gnv; 418 const struct packet_offload *ptype; 419 __be16 type; 420 int flush = 1; 421 422 off_gnv = skb_gro_offset(skb); 423 hlen = off_gnv + sizeof(*gh); 424 gh = skb_gro_header_fast(skb, off_gnv); 425 if (skb_gro_header_hard(skb, hlen)) { 426 gh = skb_gro_header_slow(skb, hlen, off_gnv); 427 if (unlikely(!gh)) 428 goto out; 429 } 430 431 if (gh->ver != GENEVE_VER || gh->oam) 432 goto out; 433 gh_len = geneve_hlen(gh); 434 435 hlen = off_gnv + gh_len; 436 if (skb_gro_header_hard(skb, hlen)) { 437 gh = skb_gro_header_slow(skb, hlen, off_gnv); 438 if (unlikely(!gh)) 439 goto out; 440 } 441 442 flush = 0; 443 444 for (p = *head; p; p = p->next) { 445 if (!NAPI_GRO_CB(p)->same_flow) 446 continue; 447 448 gh2 = (struct genevehdr *)(p->data + off_gnv); 449 if (gh->opt_len != gh2->opt_len || 450 memcmp(gh, gh2, gh_len)) { 451 NAPI_GRO_CB(p)->same_flow = 0; 452 continue; 453 } 454 } 455 456 type = gh->proto_type; 457 458 rcu_read_lock(); 459 ptype = gro_find_receive_by_type(type); 460 if (!ptype) { 461 flush = 1; 462 goto out_unlock; 463 } 464 465 skb_gro_pull(skb, gh_len); 466 skb_gro_postpull_rcsum(skb, gh, gh_len); 467 pp = ptype->callbacks.gro_receive(head, skb); 468 469 out_unlock: 470 rcu_read_unlock(); 471 out: 472 NAPI_GRO_CB(skb)->flush |= flush; 473 474 return pp; 475 } 476 477 static int geneve_gro_complete(struct sk_buff *skb, int nhoff, 478 struct udp_offload *uoff) 479 { 480 struct genevehdr *gh; 481 struct packet_offload *ptype; 482 __be16 type; 483 int gh_len; 484 int err = -ENOSYS; 485 486 udp_tunnel_gro_complete(skb, nhoff); 487 488 gh = (struct genevehdr *)(skb->data + nhoff); 489 gh_len = geneve_hlen(gh); 490 type = gh->proto_type; 491 492 rcu_read_lock(); 493 ptype = gro_find_complete_by_type(type); 494 if (ptype) 495 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 496 497 rcu_read_unlock(); 498 return err; 499 } 500 501 /* Create new listen socket if needed */ 502 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 503 bool ipv6, u32 flags) 504 { 505 struct geneve_net *gn = net_generic(net, geneve_net_id); 506 struct geneve_sock *gs; 507 struct socket *sock; 508 struct udp_tunnel_sock_cfg tunnel_cfg; 509 int h; 510 511 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 512 if (!gs) 513 return ERR_PTR(-ENOMEM); 514 515 sock = geneve_create_sock(net, ipv6, port, flags); 516 if (IS_ERR(sock)) { 517 kfree(gs); 518 return ERR_CAST(sock); 519 } 520 521 gs->sock = sock; 522 gs->refcnt = 1; 523 for (h = 0; h < VNI_HASH_SIZE; ++h) 524 INIT_HLIST_HEAD(&gs->vni_list[h]); 525 526 /* Initialize the geneve udp offloads structure */ 527 gs->udp_offloads.port = port; 528 gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; 529 gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; 530 geneve_notify_add_rx_port(gs); 531 532 /* Mark socket as an encapsulation socket */ 533 tunnel_cfg.sk_user_data = gs; 534 tunnel_cfg.encap_type = 1; 535 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 536 tunnel_cfg.encap_destroy = NULL; 537 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 538 list_add(&gs->list, &gn->sock_list); 539 return gs; 540 } 541 542 static void geneve_notify_del_rx_port(struct geneve_sock *gs) 543 { 544 struct net_device *dev; 545 struct sock *sk = gs->sock->sk; 546 struct net *net = sock_net(sk); 547 sa_family_t sa_family = sk->sk_family; 548 __be16 port = inet_sk(sk)->inet_sport; 549 550 rcu_read_lock(); 551 for_each_netdev_rcu(net, dev) { 552 if (dev->netdev_ops->ndo_del_geneve_port) 553 dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, 554 port); 555 } 556 557 rcu_read_unlock(); 558 559 if (sa_family == AF_INET) 560 udp_del_offload(&gs->udp_offloads); 561 } 562 563 static void __geneve_sock_release(struct geneve_sock *gs) 564 { 565 if (!gs || --gs->refcnt) 566 return; 567 568 list_del(&gs->list); 569 geneve_notify_del_rx_port(gs); 570 udp_tunnel_sock_release(gs->sock); 571 kfree_rcu(gs, rcu); 572 } 573 574 static void geneve_sock_release(struct geneve_dev *geneve) 575 { 576 __geneve_sock_release(geneve->sock4); 577 #if IS_ENABLED(CONFIG_IPV6) 578 __geneve_sock_release(geneve->sock6); 579 #endif 580 } 581 582 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 583 sa_family_t family, 584 __be16 dst_port) 585 { 586 struct geneve_sock *gs; 587 588 list_for_each_entry(gs, &gn->sock_list, list) { 589 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 590 inet_sk(gs->sock->sk)->sk.sk_family == family) { 591 return gs; 592 } 593 } 594 return NULL; 595 } 596 597 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 598 { 599 struct net *net = geneve->net; 600 struct geneve_net *gn = net_generic(net, geneve_net_id); 601 struct geneve_sock *gs; 602 __u32 hash; 603 604 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->dst_port); 605 if (gs) { 606 gs->refcnt++; 607 goto out; 608 } 609 610 gs = geneve_socket_create(net, geneve->dst_port, ipv6, geneve->flags); 611 if (IS_ERR(gs)) 612 return PTR_ERR(gs); 613 614 out: 615 gs->collect_md = geneve->collect_md; 616 gs->flags = geneve->flags; 617 #if IS_ENABLED(CONFIG_IPV6) 618 if (ipv6) 619 geneve->sock6 = gs; 620 else 621 #endif 622 geneve->sock4 = gs; 623 624 hash = geneve_net_vni_hash(geneve->vni); 625 hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); 626 return 0; 627 } 628 629 static int geneve_open(struct net_device *dev) 630 { 631 struct geneve_dev *geneve = netdev_priv(dev); 632 bool ipv6 = geneve->remote.sa.sa_family == AF_INET6; 633 bool metadata = geneve->collect_md; 634 int ret = 0; 635 636 geneve->sock4 = NULL; 637 #if IS_ENABLED(CONFIG_IPV6) 638 geneve->sock6 = NULL; 639 if (ipv6 || metadata) 640 ret = geneve_sock_add(geneve, true); 641 #endif 642 if (!ret && (!ipv6 || metadata)) 643 ret = geneve_sock_add(geneve, false); 644 if (ret < 0) 645 geneve_sock_release(geneve); 646 647 return ret; 648 } 649 650 static int geneve_stop(struct net_device *dev) 651 { 652 struct geneve_dev *geneve = netdev_priv(dev); 653 654 if (!hlist_unhashed(&geneve->hlist)) 655 hlist_del_rcu(&geneve->hlist); 656 geneve_sock_release(geneve); 657 return 0; 658 } 659 660 static void geneve_build_header(struct genevehdr *geneveh, 661 __be16 tun_flags, u8 vni[3], 662 u8 options_len, u8 *options) 663 { 664 geneveh->ver = GENEVE_VER; 665 geneveh->opt_len = options_len / 4; 666 geneveh->oam = !!(tun_flags & TUNNEL_OAM); 667 geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); 668 geneveh->rsvd1 = 0; 669 memcpy(geneveh->vni, vni, 3); 670 geneveh->proto_type = htons(ETH_P_TEB); 671 geneveh->rsvd2 = 0; 672 673 memcpy(geneveh->options, options, options_len); 674 } 675 676 static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, 677 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 678 u32 flags, bool xnet) 679 { 680 struct genevehdr *gnvh; 681 int min_headroom; 682 int err; 683 bool udp_sum = !!(flags & GENEVE_F_UDP_CSUM); 684 685 skb_scrub_packet(skb, xnet); 686 687 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 688 + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr); 689 err = skb_cow_head(skb, min_headroom); 690 if (unlikely(err)) { 691 kfree_skb(skb); 692 goto free_rt; 693 } 694 695 skb = udp_tunnel_handle_offloads(skb, udp_sum); 696 if (IS_ERR(skb)) { 697 err = PTR_ERR(skb); 698 goto free_rt; 699 } 700 701 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 702 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 703 704 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 705 return 0; 706 707 free_rt: 708 ip_rt_put(rt); 709 return err; 710 } 711 712 #if IS_ENABLED(CONFIG_IPV6) 713 static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb, 714 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 715 u32 flags, bool xnet) 716 { 717 struct genevehdr *gnvh; 718 int min_headroom; 719 int err; 720 bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM6_TX); 721 722 skb_scrub_packet(skb, xnet); 723 724 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 725 + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr); 726 err = skb_cow_head(skb, min_headroom); 727 if (unlikely(err)) { 728 kfree_skb(skb); 729 goto free_dst; 730 } 731 732 skb = udp_tunnel_handle_offloads(skb, udp_sum); 733 if (IS_ERR(skb)) { 734 err = PTR_ERR(skb); 735 goto free_dst; 736 } 737 738 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 739 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 740 741 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 742 return 0; 743 744 free_dst: 745 dst_release(dst); 746 return err; 747 } 748 #endif 749 750 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, 751 struct net_device *dev, 752 struct flowi4 *fl4, 753 struct ip_tunnel_info *info) 754 { 755 struct geneve_dev *geneve = netdev_priv(dev); 756 struct rtable *rt = NULL; 757 __u8 tos; 758 759 memset(fl4, 0, sizeof(*fl4)); 760 fl4->flowi4_mark = skb->mark; 761 fl4->flowi4_proto = IPPROTO_UDP; 762 763 if (info) { 764 fl4->daddr = info->key.u.ipv4.dst; 765 fl4->saddr = info->key.u.ipv4.src; 766 fl4->flowi4_tos = RT_TOS(info->key.tos); 767 } else { 768 tos = geneve->tos; 769 if (tos == 1) { 770 const struct iphdr *iip = ip_hdr(skb); 771 772 tos = ip_tunnel_get_dsfield(iip, skb); 773 } 774 775 fl4->flowi4_tos = RT_TOS(tos); 776 fl4->daddr = geneve->remote.sin.sin_addr.s_addr; 777 } 778 779 rt = ip_route_output_key(geneve->net, fl4); 780 if (IS_ERR(rt)) { 781 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); 782 return ERR_PTR(-ENETUNREACH); 783 } 784 if (rt->dst.dev == dev) { /* is this necessary? */ 785 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); 786 ip_rt_put(rt); 787 return ERR_PTR(-ELOOP); 788 } 789 return rt; 790 } 791 792 #if IS_ENABLED(CONFIG_IPV6) 793 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, 794 struct net_device *dev, 795 struct flowi6 *fl6, 796 struct ip_tunnel_info *info) 797 { 798 struct geneve_dev *geneve = netdev_priv(dev); 799 struct geneve_sock *gs6 = geneve->sock6; 800 struct dst_entry *dst = NULL; 801 __u8 prio; 802 803 memset(fl6, 0, sizeof(*fl6)); 804 fl6->flowi6_mark = skb->mark; 805 fl6->flowi6_proto = IPPROTO_UDP; 806 807 if (info) { 808 fl6->daddr = info->key.u.ipv6.dst; 809 fl6->saddr = info->key.u.ipv6.src; 810 fl6->flowi6_tos = RT_TOS(info->key.tos); 811 } else { 812 prio = geneve->tos; 813 if (prio == 1) { 814 const struct iphdr *iip = ip_hdr(skb); 815 816 prio = ip_tunnel_get_dsfield(iip, skb); 817 } 818 819 fl6->flowi6_tos = RT_TOS(prio); 820 fl6->daddr = geneve->remote.sin6.sin6_addr; 821 } 822 823 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { 824 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); 825 return ERR_PTR(-ENETUNREACH); 826 } 827 if (dst->dev == dev) { /* is this necessary? */ 828 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr); 829 dst_release(dst); 830 return ERR_PTR(-ELOOP); 831 } 832 833 return dst; 834 } 835 #endif 836 837 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 838 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 839 { 840 #ifdef __BIG_ENDIAN 841 vni[0] = (__force __u8)(tun_id >> 16); 842 vni[1] = (__force __u8)(tun_id >> 8); 843 vni[2] = (__force __u8)tun_id; 844 #else 845 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 846 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 847 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 848 #endif 849 } 850 851 static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 852 struct ip_tunnel_info *info) 853 { 854 struct geneve_dev *geneve = netdev_priv(dev); 855 struct geneve_sock *gs4 = geneve->sock4; 856 struct rtable *rt = NULL; 857 const struct iphdr *iip; /* interior IP header */ 858 int err = -EINVAL; 859 struct flowi4 fl4; 860 __u8 tos, ttl; 861 __be16 sport; 862 __be16 df; 863 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 864 u32 flags = geneve->flags; 865 866 if (geneve->collect_md) { 867 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 868 netdev_dbg(dev, "no tunnel metadata\n"); 869 goto tx_error; 870 } 871 if (info && ip_tunnel_info_af(info) != AF_INET) 872 goto tx_error; 873 } 874 875 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 876 if (IS_ERR(rt)) { 877 err = PTR_ERR(rt); 878 goto tx_error; 879 } 880 881 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 882 skb_reset_mac_header(skb); 883 884 iip = ip_hdr(skb); 885 886 if (info) { 887 const struct ip_tunnel_key *key = &info->key; 888 u8 *opts = NULL; 889 u8 vni[3]; 890 891 tunnel_id_to_vni(key->tun_id, vni); 892 if (key->tun_flags & TUNNEL_GENEVE_OPT) 893 opts = ip_tunnel_info_opts(info); 894 895 if (key->tun_flags & TUNNEL_CSUM) 896 flags |= GENEVE_F_UDP_CSUM; 897 else 898 flags &= ~GENEVE_F_UDP_CSUM; 899 900 err = geneve_build_skb(rt, skb, key->tun_flags, vni, 901 info->options_len, opts, flags, xnet); 902 if (unlikely(err)) 903 goto err; 904 905 tos = ip_tunnel_ecn_encap(key->tos, iip, skb); 906 ttl = key->ttl; 907 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 908 } else { 909 err = geneve_build_skb(rt, skb, 0, geneve->vni, 910 0, NULL, flags, xnet); 911 if (unlikely(err)) 912 goto err; 913 914 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); 915 ttl = geneve->ttl; 916 if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) 917 ttl = 1; 918 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 919 df = 0; 920 } 921 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, 922 tos, ttl, df, sport, geneve->dst_port, 923 !net_eq(geneve->net, dev_net(geneve->dev)), 924 !(flags & GENEVE_F_UDP_CSUM)); 925 926 return NETDEV_TX_OK; 927 928 tx_error: 929 dev_kfree_skb(skb); 930 err: 931 if (err == -ELOOP) 932 dev->stats.collisions++; 933 else if (err == -ENETUNREACH) 934 dev->stats.tx_carrier_errors++; 935 else 936 dev->stats.tx_errors++; 937 return NETDEV_TX_OK; 938 } 939 940 #if IS_ENABLED(CONFIG_IPV6) 941 static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 942 struct ip_tunnel_info *info) 943 { 944 struct geneve_dev *geneve = netdev_priv(dev); 945 struct geneve_sock *gs6 = geneve->sock6; 946 struct dst_entry *dst = NULL; 947 const struct iphdr *iip; /* interior IP header */ 948 int err = -EINVAL; 949 struct flowi6 fl6; 950 __u8 prio, ttl; 951 __be16 sport; 952 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 953 u32 flags = geneve->flags; 954 955 if (geneve->collect_md) { 956 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 957 netdev_dbg(dev, "no tunnel metadata\n"); 958 goto tx_error; 959 } 960 } 961 962 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 963 if (IS_ERR(dst)) { 964 err = PTR_ERR(dst); 965 goto tx_error; 966 } 967 968 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 969 skb_reset_mac_header(skb); 970 971 iip = ip_hdr(skb); 972 973 if (info) { 974 const struct ip_tunnel_key *key = &info->key; 975 u8 *opts = NULL; 976 u8 vni[3]; 977 978 tunnel_id_to_vni(key->tun_id, vni); 979 if (key->tun_flags & TUNNEL_GENEVE_OPT) 980 opts = ip_tunnel_info_opts(info); 981 982 if (key->tun_flags & TUNNEL_CSUM) 983 flags |= GENEVE_F_UDP_CSUM; 984 else 985 flags &= ~GENEVE_F_UDP_CSUM; 986 987 err = geneve6_build_skb(dst, skb, key->tun_flags, vni, 988 info->options_len, opts, 989 flags, xnet); 990 if (unlikely(err)) 991 goto err; 992 993 prio = ip_tunnel_ecn_encap(key->tos, iip, skb); 994 ttl = key->ttl; 995 } else { 996 err = geneve6_build_skb(dst, skb, 0, geneve->vni, 997 0, NULL, flags, xnet); 998 if (unlikely(err)) 999 goto err; 1000 1001 prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb); 1002 ttl = geneve->ttl; 1003 if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) 1004 ttl = 1; 1005 ttl = ttl ? : ip6_dst_hoplimit(dst); 1006 } 1007 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1008 &fl6.saddr, &fl6.daddr, prio, ttl, 1009 sport, geneve->dst_port, 1010 !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); 1011 return NETDEV_TX_OK; 1012 1013 tx_error: 1014 dev_kfree_skb(skb); 1015 err: 1016 if (err == -ELOOP) 1017 dev->stats.collisions++; 1018 else if (err == -ENETUNREACH) 1019 dev->stats.tx_carrier_errors++; 1020 else 1021 dev->stats.tx_errors++; 1022 return NETDEV_TX_OK; 1023 } 1024 #endif 1025 1026 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1027 { 1028 struct geneve_dev *geneve = netdev_priv(dev); 1029 struct ip_tunnel_info *info = NULL; 1030 1031 if (geneve->collect_md) 1032 info = skb_tunnel_info(skb); 1033 1034 #if IS_ENABLED(CONFIG_IPV6) 1035 if ((info && ip_tunnel_info_af(info) == AF_INET6) || 1036 (!info && geneve->remote.sa.sa_family == AF_INET6)) 1037 return geneve6_xmit_skb(skb, dev, info); 1038 #endif 1039 return geneve_xmit_skb(skb, dev, info); 1040 } 1041 1042 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1043 { 1044 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1045 struct geneve_dev *geneve = netdev_priv(dev); 1046 struct rtable *rt; 1047 struct flowi4 fl4; 1048 #if IS_ENABLED(CONFIG_IPV6) 1049 struct dst_entry *dst; 1050 struct flowi6 fl6; 1051 #endif 1052 1053 if (ip_tunnel_info_af(info) == AF_INET) { 1054 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 1055 if (IS_ERR(rt)) 1056 return PTR_ERR(rt); 1057 1058 ip_rt_put(rt); 1059 info->key.u.ipv4.src = fl4.saddr; 1060 #if IS_ENABLED(CONFIG_IPV6) 1061 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1062 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 1063 if (IS_ERR(dst)) 1064 return PTR_ERR(dst); 1065 1066 dst_release(dst); 1067 info->key.u.ipv6.src = fl6.saddr; 1068 #endif 1069 } else { 1070 return -EINVAL; 1071 } 1072 1073 info->key.tp_src = udp_flow_src_port(geneve->net, skb, 1074 1, USHRT_MAX, true); 1075 info->key.tp_dst = geneve->dst_port; 1076 return 0; 1077 } 1078 1079 static const struct net_device_ops geneve_netdev_ops = { 1080 .ndo_init = geneve_init, 1081 .ndo_uninit = geneve_uninit, 1082 .ndo_open = geneve_open, 1083 .ndo_stop = geneve_stop, 1084 .ndo_start_xmit = geneve_xmit, 1085 .ndo_get_stats64 = ip_tunnel_get_stats64, 1086 .ndo_change_mtu = eth_change_mtu, 1087 .ndo_validate_addr = eth_validate_addr, 1088 .ndo_set_mac_address = eth_mac_addr, 1089 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1090 }; 1091 1092 static void geneve_get_drvinfo(struct net_device *dev, 1093 struct ethtool_drvinfo *drvinfo) 1094 { 1095 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1096 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1097 } 1098 1099 static const struct ethtool_ops geneve_ethtool_ops = { 1100 .get_drvinfo = geneve_get_drvinfo, 1101 .get_link = ethtool_op_get_link, 1102 }; 1103 1104 /* Info for udev, that this is a virtual tunnel endpoint */ 1105 static struct device_type geneve_type = { 1106 .name = "geneve", 1107 }; 1108 1109 /* Calls the ndo_add_geneve_port of the caller in order to 1110 * supply the listening GENEVE udp ports. Callers are expected 1111 * to implement the ndo_add_geneve_port. 1112 */ 1113 void geneve_get_rx_port(struct net_device *dev) 1114 { 1115 struct net *net = dev_net(dev); 1116 struct geneve_net *gn = net_generic(net, geneve_net_id); 1117 struct geneve_sock *gs; 1118 sa_family_t sa_family; 1119 struct sock *sk; 1120 __be16 port; 1121 1122 rcu_read_lock(); 1123 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1124 sk = gs->sock->sk; 1125 sa_family = sk->sk_family; 1126 port = inet_sk(sk)->inet_sport; 1127 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); 1128 } 1129 rcu_read_unlock(); 1130 } 1131 EXPORT_SYMBOL_GPL(geneve_get_rx_port); 1132 1133 /* Initialize the device structure. */ 1134 static void geneve_setup(struct net_device *dev) 1135 { 1136 ether_setup(dev); 1137 1138 dev->netdev_ops = &geneve_netdev_ops; 1139 dev->ethtool_ops = &geneve_ethtool_ops; 1140 dev->destructor = free_netdev; 1141 1142 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1143 1144 dev->features |= NETIF_F_LLTX; 1145 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 1146 dev->features |= NETIF_F_RXCSUM; 1147 dev->features |= NETIF_F_GSO_SOFTWARE; 1148 1149 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; 1150 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1151 1152 netif_keep_dst(dev); 1153 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1154 eth_hw_addr_random(dev); 1155 } 1156 1157 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1158 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1159 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1160 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1161 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1162 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1163 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1164 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1165 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1166 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1167 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1168 }; 1169 1170 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) 1171 { 1172 if (tb[IFLA_ADDRESS]) { 1173 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1174 return -EINVAL; 1175 1176 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1177 return -EADDRNOTAVAIL; 1178 } 1179 1180 if (!data) 1181 return -EINVAL; 1182 1183 if (data[IFLA_GENEVE_ID]) { 1184 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1185 1186 if (vni >= GENEVE_VID_MASK) 1187 return -ERANGE; 1188 } 1189 1190 return 0; 1191 } 1192 1193 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1194 __be16 dst_port, 1195 union geneve_addr *remote, 1196 u8 vni[], 1197 bool *tun_on_same_port, 1198 bool *tun_collect_md) 1199 { 1200 struct geneve_dev *geneve, *t; 1201 1202 *tun_on_same_port = false; 1203 *tun_collect_md = false; 1204 t = NULL; 1205 list_for_each_entry(geneve, &gn->geneve_list, next) { 1206 if (geneve->dst_port == dst_port) { 1207 *tun_collect_md = geneve->collect_md; 1208 *tun_on_same_port = true; 1209 } 1210 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 1211 !memcmp(remote, &geneve->remote, sizeof(geneve->remote)) && 1212 dst_port == geneve->dst_port) 1213 t = geneve; 1214 } 1215 return t; 1216 } 1217 1218 static int geneve_configure(struct net *net, struct net_device *dev, 1219 union geneve_addr *remote, 1220 __u32 vni, __u8 ttl, __u8 tos, __be16 dst_port, 1221 bool metadata, u32 flags) 1222 { 1223 struct geneve_net *gn = net_generic(net, geneve_net_id); 1224 struct geneve_dev *t, *geneve = netdev_priv(dev); 1225 bool tun_collect_md, tun_on_same_port; 1226 int err, encap_len; 1227 1228 if (!remote) 1229 return -EINVAL; 1230 if (metadata && 1231 (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl)) 1232 return -EINVAL; 1233 1234 geneve->net = net; 1235 geneve->dev = dev; 1236 1237 geneve->vni[0] = (vni & 0x00ff0000) >> 16; 1238 geneve->vni[1] = (vni & 0x0000ff00) >> 8; 1239 geneve->vni[2] = vni & 0x000000ff; 1240 1241 if ((remote->sa.sa_family == AF_INET && 1242 IN_MULTICAST(ntohl(remote->sin.sin_addr.s_addr))) || 1243 (remote->sa.sa_family == AF_INET6 && 1244 ipv6_addr_is_multicast(&remote->sin6.sin6_addr))) 1245 return -EINVAL; 1246 geneve->remote = *remote; 1247 1248 geneve->ttl = ttl; 1249 geneve->tos = tos; 1250 geneve->dst_port = dst_port; 1251 geneve->collect_md = metadata; 1252 geneve->flags = flags; 1253 1254 t = geneve_find_dev(gn, dst_port, remote, geneve->vni, 1255 &tun_on_same_port, &tun_collect_md); 1256 if (t) 1257 return -EBUSY; 1258 1259 /* make enough headroom for basic scenario */ 1260 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1261 if (remote->sa.sa_family == AF_INET) 1262 encap_len += sizeof(struct iphdr); 1263 else 1264 encap_len += sizeof(struct ipv6hdr); 1265 dev->needed_headroom = encap_len + ETH_HLEN; 1266 1267 if (metadata) { 1268 if (tun_on_same_port) 1269 return -EPERM; 1270 } else { 1271 if (tun_collect_md) 1272 return -EPERM; 1273 } 1274 1275 err = register_netdevice(dev); 1276 if (err) 1277 return err; 1278 1279 list_add(&geneve->next, &gn->geneve_list); 1280 return 0; 1281 } 1282 1283 static int geneve_newlink(struct net *net, struct net_device *dev, 1284 struct nlattr *tb[], struct nlattr *data[]) 1285 { 1286 __be16 dst_port = htons(GENEVE_UDP_PORT); 1287 __u8 ttl = 0, tos = 0; 1288 bool metadata = false; 1289 union geneve_addr remote = geneve_remote_unspec; 1290 __u32 vni = 0; 1291 u32 flags = 0; 1292 1293 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) 1294 return -EINVAL; 1295 1296 if (data[IFLA_GENEVE_REMOTE]) { 1297 remote.sa.sa_family = AF_INET; 1298 remote.sin.sin_addr.s_addr = 1299 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1300 } 1301 1302 if (data[IFLA_GENEVE_REMOTE6]) { 1303 if (!IS_ENABLED(CONFIG_IPV6)) 1304 return -EPFNOSUPPORT; 1305 1306 remote.sa.sa_family = AF_INET6; 1307 remote.sin6.sin6_addr = 1308 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1309 1310 if (ipv6_addr_type(&remote.sin6.sin6_addr) & 1311 IPV6_ADDR_LINKLOCAL) { 1312 netdev_dbg(dev, "link-local remote is unsupported\n"); 1313 return -EINVAL; 1314 } 1315 } 1316 1317 if (data[IFLA_GENEVE_ID]) 1318 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1319 1320 if (data[IFLA_GENEVE_TTL]) 1321 ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1322 1323 if (data[IFLA_GENEVE_TOS]) 1324 tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1325 1326 if (data[IFLA_GENEVE_PORT]) 1327 dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]); 1328 1329 if (data[IFLA_GENEVE_COLLECT_METADATA]) 1330 metadata = true; 1331 1332 if (data[IFLA_GENEVE_UDP_CSUM] && 1333 nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1334 flags |= GENEVE_F_UDP_CSUM; 1335 1336 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && 1337 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1338 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 1339 1340 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] && 1341 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1342 flags |= GENEVE_F_UDP_ZERO_CSUM6_RX; 1343 1344 return geneve_configure(net, dev, &remote, vni, ttl, tos, dst_port, 1345 metadata, flags); 1346 } 1347 1348 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1349 { 1350 struct geneve_dev *geneve = netdev_priv(dev); 1351 1352 list_del(&geneve->next); 1353 unregister_netdevice_queue(dev, head); 1354 } 1355 1356 static size_t geneve_get_size(const struct net_device *dev) 1357 { 1358 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1359 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1360 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1361 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1362 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1363 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1364 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1365 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1366 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1367 0; 1368 } 1369 1370 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1371 { 1372 struct geneve_dev *geneve = netdev_priv(dev); 1373 __u32 vni; 1374 1375 vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2]; 1376 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1377 goto nla_put_failure; 1378 1379 if (geneve->remote.sa.sa_family == AF_INET) { 1380 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1381 geneve->remote.sin.sin_addr.s_addr)) 1382 goto nla_put_failure; 1383 #if IS_ENABLED(CONFIG_IPV6) 1384 } else { 1385 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1386 &geneve->remote.sin6.sin6_addr)) 1387 goto nla_put_failure; 1388 #endif 1389 } 1390 1391 if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) || 1392 nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) 1393 goto nla_put_failure; 1394 1395 if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port)) 1396 goto nla_put_failure; 1397 1398 if (geneve->collect_md) { 1399 if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1400 goto nla_put_failure; 1401 } 1402 1403 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1404 !!(geneve->flags & GENEVE_F_UDP_CSUM)) || 1405 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1406 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) || 1407 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1408 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_RX))) 1409 goto nla_put_failure; 1410 1411 return 0; 1412 1413 nla_put_failure: 1414 return -EMSGSIZE; 1415 } 1416 1417 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1418 .kind = "geneve", 1419 .maxtype = IFLA_GENEVE_MAX, 1420 .policy = geneve_policy, 1421 .priv_size = sizeof(struct geneve_dev), 1422 .setup = geneve_setup, 1423 .validate = geneve_validate, 1424 .newlink = geneve_newlink, 1425 .dellink = geneve_dellink, 1426 .get_size = geneve_get_size, 1427 .fill_info = geneve_fill_info, 1428 }; 1429 1430 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1431 u8 name_assign_type, u16 dst_port) 1432 { 1433 struct nlattr *tb[IFLA_MAX + 1]; 1434 struct net_device *dev; 1435 int err; 1436 1437 memset(tb, 0, sizeof(tb)); 1438 dev = rtnl_create_link(net, name, name_assign_type, 1439 &geneve_link_ops, tb); 1440 if (IS_ERR(dev)) 1441 return dev; 1442 1443 err = geneve_configure(net, dev, &geneve_remote_unspec, 1444 0, 0, 0, htons(dst_port), true, 0); 1445 if (err) { 1446 free_netdev(dev); 1447 return ERR_PTR(err); 1448 } 1449 return dev; 1450 } 1451 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1452 1453 static __net_init int geneve_init_net(struct net *net) 1454 { 1455 struct geneve_net *gn = net_generic(net, geneve_net_id); 1456 1457 INIT_LIST_HEAD(&gn->geneve_list); 1458 INIT_LIST_HEAD(&gn->sock_list); 1459 return 0; 1460 } 1461 1462 static void __net_exit geneve_exit_net(struct net *net) 1463 { 1464 struct geneve_net *gn = net_generic(net, geneve_net_id); 1465 struct geneve_dev *geneve, *next; 1466 struct net_device *dev, *aux; 1467 LIST_HEAD(list); 1468 1469 rtnl_lock(); 1470 1471 /* gather any geneve devices that were moved into this ns */ 1472 for_each_netdev_safe(net, dev, aux) 1473 if (dev->rtnl_link_ops == &geneve_link_ops) 1474 unregister_netdevice_queue(dev, &list); 1475 1476 /* now gather any other geneve devices that were created in this ns */ 1477 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { 1478 /* If geneve->dev is in the same netns, it was already added 1479 * to the list by the previous loop. 1480 */ 1481 if (!net_eq(dev_net(geneve->dev), net)) 1482 unregister_netdevice_queue(geneve->dev, &list); 1483 } 1484 1485 /* unregister the devices gathered above */ 1486 unregister_netdevice_many(&list); 1487 rtnl_unlock(); 1488 } 1489 1490 static struct pernet_operations geneve_net_ops = { 1491 .init = geneve_init_net, 1492 .exit = geneve_exit_net, 1493 .id = &geneve_net_id, 1494 .size = sizeof(struct geneve_net), 1495 }; 1496 1497 static int __init geneve_init_module(void) 1498 { 1499 int rc; 1500 1501 rc = register_pernet_subsys(&geneve_net_ops); 1502 if (rc) 1503 goto out1; 1504 1505 rc = rtnl_link_register(&geneve_link_ops); 1506 if (rc) 1507 goto out2; 1508 1509 return 0; 1510 out2: 1511 unregister_pernet_subsys(&geneve_net_ops); 1512 out1: 1513 return rc; 1514 } 1515 late_initcall(geneve_init_module); 1516 1517 static void __exit geneve_cleanup_module(void) 1518 { 1519 rtnl_link_unregister(&geneve_link_ops); 1520 unregister_pernet_subsys(&geneve_net_ops); 1521 } 1522 module_exit(geneve_cleanup_module); 1523 1524 MODULE_LICENSE("GPL"); 1525 MODULE_VERSION(GENEVE_NETDEV_VER); 1526 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 1527 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 1528 MODULE_ALIAS_RTNL_LINK("geneve"); 1529