1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 3 */ 4 5 #include "ipvlan.h" 6 7 static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, 8 struct netlink_ext_ack *extack) 9 { 10 struct ipvl_dev *ipvlan; 11 unsigned int flags; 12 int err; 13 14 ASSERT_RTNL(); 15 if (port->mode != nval) { 16 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 17 flags = ipvlan->dev->flags; 18 if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { 19 err = dev_change_flags(ipvlan->dev, 20 flags | IFF_NOARP, 21 extack); 22 } else { 23 err = dev_change_flags(ipvlan->dev, 24 flags & ~IFF_NOARP, 25 extack); 26 } 27 if (unlikely(err)) 28 goto fail; 29 } 30 if (nval == IPVLAN_MODE_L3S) { 31 /* New mode is L3S */ 32 err = ipvlan_l3s_register(port); 33 if (err) 34 goto fail; 35 } else if (port->mode == IPVLAN_MODE_L3S) { 36 /* Old mode was L3S */ 37 ipvlan_l3s_unregister(port); 38 } 39 port->mode = nval; 40 } 41 return 0; 42 43 fail: 44 /* Undo the flags changes that have been done so far. */ 45 list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { 46 flags = ipvlan->dev->flags; 47 if (port->mode == IPVLAN_MODE_L3 || 48 port->mode == IPVLAN_MODE_L3S) 49 dev_change_flags(ipvlan->dev, flags | IFF_NOARP, 50 NULL); 51 else 52 dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP, 53 NULL); 54 } 55 56 return err; 57 } 58 59 static int ipvlan_port_create(struct net_device *dev) 60 { 61 struct ipvl_port *port; 62 int err, idx; 63 64 port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); 65 if (!port) 66 return -ENOMEM; 67 68 write_pnet(&port->pnet, dev_net(dev)); 69 port->dev = dev; 70 port->mode = IPVLAN_MODE_L3; 71 INIT_LIST_HEAD(&port->ipvlans); 72 for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) 73 INIT_HLIST_HEAD(&port->hlhead[idx]); 74 75 skb_queue_head_init(&port->backlog); 76 INIT_WORK(&port->wq, ipvlan_process_multicast); 77 ida_init(&port->ida); 78 port->dev_id_start = 1; 79 80 err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); 81 if (err) 82 goto err; 83 84 return 0; 85 86 err: 87 kfree(port); 88 return err; 89 } 90 91 static void ipvlan_port_destroy(struct net_device *dev) 92 { 93 struct ipvl_port *port = ipvlan_port_get_rtnl(dev); 94 struct sk_buff *skb; 95 96 if (port->mode == IPVLAN_MODE_L3S) 97 ipvlan_l3s_unregister(port); 98 netdev_rx_handler_unregister(dev); 99 cancel_work_sync(&port->wq); 100 while ((skb = __skb_dequeue(&port->backlog)) != NULL) { 101 if (skb->dev) 102 dev_put(skb->dev); 103 kfree_skb(skb); 104 } 105 ida_destroy(&port->ida); 106 kfree(port); 107 } 108 109 #define IPVLAN_FEATURES \ 110 (NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ 111 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \ 112 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ 113 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) 114 115 #define IPVLAN_STATE_MASK \ 116 ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) 117 118 static int ipvlan_init(struct net_device *dev) 119 { 120 struct ipvl_dev *ipvlan = netdev_priv(dev); 121 struct net_device *phy_dev = ipvlan->phy_dev; 122 struct ipvl_port *port; 123 int err; 124 125 dev->state = (dev->state & ~IPVLAN_STATE_MASK) | 126 (phy_dev->state & IPVLAN_STATE_MASK); 127 dev->features = phy_dev->features & IPVLAN_FEATURES; 128 dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED; 129 dev->hw_enc_features |= dev->features; 130 dev->gso_max_size = phy_dev->gso_max_size; 131 dev->gso_max_segs = phy_dev->gso_max_segs; 132 dev->hard_header_len = phy_dev->hard_header_len; 133 134 ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); 135 if (!ipvlan->pcpu_stats) 136 return -ENOMEM; 137 138 if (!netif_is_ipvlan_port(phy_dev)) { 139 err = ipvlan_port_create(phy_dev); 140 if (err < 0) { 141 free_percpu(ipvlan->pcpu_stats); 142 return err; 143 } 144 } 145 port = ipvlan_port_get_rtnl(phy_dev); 146 port->count += 1; 147 return 0; 148 } 149 150 static void ipvlan_uninit(struct net_device *dev) 151 { 152 struct ipvl_dev *ipvlan = netdev_priv(dev); 153 struct net_device *phy_dev = ipvlan->phy_dev; 154 struct ipvl_port *port; 155 156 free_percpu(ipvlan->pcpu_stats); 157 158 port = ipvlan_port_get_rtnl(phy_dev); 159 port->count -= 1; 160 if (!port->count) 161 ipvlan_port_destroy(port->dev); 162 } 163 164 static int ipvlan_open(struct net_device *dev) 165 { 166 struct ipvl_dev *ipvlan = netdev_priv(dev); 167 struct net_device *phy_dev = ipvlan->phy_dev; 168 struct ipvl_addr *addr; 169 170 if (ipvlan->port->mode == IPVLAN_MODE_L3 || 171 ipvlan->port->mode == IPVLAN_MODE_L3S) 172 dev->flags |= IFF_NOARP; 173 else 174 dev->flags &= ~IFF_NOARP; 175 176 rcu_read_lock(); 177 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) 178 ipvlan_ht_addr_add(ipvlan, addr); 179 rcu_read_unlock(); 180 181 return dev_uc_add(phy_dev, phy_dev->dev_addr); 182 } 183 184 static int ipvlan_stop(struct net_device *dev) 185 { 186 struct ipvl_dev *ipvlan = netdev_priv(dev); 187 struct net_device *phy_dev = ipvlan->phy_dev; 188 struct ipvl_addr *addr; 189 190 dev_uc_unsync(phy_dev, dev); 191 dev_mc_unsync(phy_dev, dev); 192 193 dev_uc_del(phy_dev, phy_dev->dev_addr); 194 195 rcu_read_lock(); 196 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) 197 ipvlan_ht_addr_del(addr); 198 rcu_read_unlock(); 199 200 return 0; 201 } 202 203 static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, 204 struct net_device *dev) 205 { 206 const struct ipvl_dev *ipvlan = netdev_priv(dev); 207 int skblen = skb->len; 208 int ret; 209 210 ret = ipvlan_queue_xmit(skb, dev); 211 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { 212 struct ipvl_pcpu_stats *pcptr; 213 214 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 215 216 u64_stats_update_begin(&pcptr->syncp); 217 pcptr->tx_pkts++; 218 pcptr->tx_bytes += skblen; 219 u64_stats_update_end(&pcptr->syncp); 220 } else { 221 this_cpu_inc(ipvlan->pcpu_stats->tx_drps); 222 } 223 return ret; 224 } 225 226 static netdev_features_t ipvlan_fix_features(struct net_device *dev, 227 netdev_features_t features) 228 { 229 struct ipvl_dev *ipvlan = netdev_priv(dev); 230 231 return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES); 232 } 233 234 static void ipvlan_change_rx_flags(struct net_device *dev, int change) 235 { 236 struct ipvl_dev *ipvlan = netdev_priv(dev); 237 struct net_device *phy_dev = ipvlan->phy_dev; 238 239 if (change & IFF_ALLMULTI) 240 dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); 241 } 242 243 static void ipvlan_set_multicast_mac_filter(struct net_device *dev) 244 { 245 struct ipvl_dev *ipvlan = netdev_priv(dev); 246 247 if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { 248 bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); 249 } else { 250 struct netdev_hw_addr *ha; 251 DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); 252 253 bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); 254 netdev_for_each_mc_addr(ha, dev) 255 __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); 256 257 /* Turn-on broadcast bit irrespective of address family, 258 * since broadcast is deferred to a work-queue, hence no 259 * impact on fast-path processing. 260 */ 261 __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); 262 263 bitmap_copy(ipvlan->mac_filters, mc_filters, 264 IPVLAN_MAC_FILTER_SIZE); 265 } 266 dev_uc_sync(ipvlan->phy_dev, dev); 267 dev_mc_sync(ipvlan->phy_dev, dev); 268 } 269 270 static void ipvlan_get_stats64(struct net_device *dev, 271 struct rtnl_link_stats64 *s) 272 { 273 struct ipvl_dev *ipvlan = netdev_priv(dev); 274 275 if (ipvlan->pcpu_stats) { 276 struct ipvl_pcpu_stats *pcptr; 277 u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; 278 u32 rx_errs = 0, tx_drps = 0; 279 u32 strt; 280 int idx; 281 282 for_each_possible_cpu(idx) { 283 pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); 284 do { 285 strt= u64_stats_fetch_begin_irq(&pcptr->syncp); 286 rx_pkts = pcptr->rx_pkts; 287 rx_bytes = pcptr->rx_bytes; 288 rx_mcast = pcptr->rx_mcast; 289 tx_pkts = pcptr->tx_pkts; 290 tx_bytes = pcptr->tx_bytes; 291 } while (u64_stats_fetch_retry_irq(&pcptr->syncp, 292 strt)); 293 294 s->rx_packets += rx_pkts; 295 s->rx_bytes += rx_bytes; 296 s->multicast += rx_mcast; 297 s->tx_packets += tx_pkts; 298 s->tx_bytes += tx_bytes; 299 300 /* u32 values are updated without syncp protection. */ 301 rx_errs += pcptr->rx_errs; 302 tx_drps += pcptr->tx_drps; 303 } 304 s->rx_errors = rx_errs; 305 s->rx_dropped = rx_errs; 306 s->tx_dropped = tx_drps; 307 } 308 } 309 310 static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) 311 { 312 struct ipvl_dev *ipvlan = netdev_priv(dev); 313 struct net_device *phy_dev = ipvlan->phy_dev; 314 315 return vlan_vid_add(phy_dev, proto, vid); 316 } 317 318 static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, 319 u16 vid) 320 { 321 struct ipvl_dev *ipvlan = netdev_priv(dev); 322 struct net_device *phy_dev = ipvlan->phy_dev; 323 324 vlan_vid_del(phy_dev, proto, vid); 325 return 0; 326 } 327 328 static int ipvlan_get_iflink(const struct net_device *dev) 329 { 330 struct ipvl_dev *ipvlan = netdev_priv(dev); 331 332 return ipvlan->phy_dev->ifindex; 333 } 334 335 static const struct net_device_ops ipvlan_netdev_ops = { 336 .ndo_init = ipvlan_init, 337 .ndo_uninit = ipvlan_uninit, 338 .ndo_open = ipvlan_open, 339 .ndo_stop = ipvlan_stop, 340 .ndo_start_xmit = ipvlan_start_xmit, 341 .ndo_fix_features = ipvlan_fix_features, 342 .ndo_change_rx_flags = ipvlan_change_rx_flags, 343 .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, 344 .ndo_get_stats64 = ipvlan_get_stats64, 345 .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, 346 .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, 347 .ndo_get_iflink = ipvlan_get_iflink, 348 }; 349 350 static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, 351 unsigned short type, const void *daddr, 352 const void *saddr, unsigned len) 353 { 354 const struct ipvl_dev *ipvlan = netdev_priv(dev); 355 struct net_device *phy_dev = ipvlan->phy_dev; 356 357 /* TODO Probably use a different field than dev_addr so that the 358 * mac-address on the virtual device is portable and can be carried 359 * while the packets use the mac-addr on the physical device. 360 */ 361 return dev_hard_header(skb, phy_dev, type, daddr, 362 saddr ? : phy_dev->dev_addr, len); 363 } 364 365 static const struct header_ops ipvlan_header_ops = { 366 .create = ipvlan_hard_header, 367 .parse = eth_header_parse, 368 .cache = eth_header_cache, 369 .cache_update = eth_header_cache_update, 370 }; 371 372 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) 373 { 374 ipvlan->dev->mtu = dev->mtu; 375 } 376 377 static bool netif_is_ipvlan(const struct net_device *dev) 378 { 379 /* both ipvlan and ipvtap devices use the same netdev_ops */ 380 return dev->netdev_ops == &ipvlan_netdev_ops; 381 } 382 383 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev, 384 struct ethtool_link_ksettings *cmd) 385 { 386 const struct ipvl_dev *ipvlan = netdev_priv(dev); 387 388 return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd); 389 } 390 391 static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, 392 struct ethtool_drvinfo *drvinfo) 393 { 394 strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); 395 strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); 396 } 397 398 static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) 399 { 400 const struct ipvl_dev *ipvlan = netdev_priv(dev); 401 402 return ipvlan->msg_enable; 403 } 404 405 static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) 406 { 407 struct ipvl_dev *ipvlan = netdev_priv(dev); 408 409 ipvlan->msg_enable = value; 410 } 411 412 static const struct ethtool_ops ipvlan_ethtool_ops = { 413 .get_link = ethtool_op_get_link, 414 .get_link_ksettings = ipvlan_ethtool_get_link_ksettings, 415 .get_drvinfo = ipvlan_ethtool_get_drvinfo, 416 .get_msglevel = ipvlan_ethtool_get_msglevel, 417 .set_msglevel = ipvlan_ethtool_set_msglevel, 418 }; 419 420 static int ipvlan_nl_changelink(struct net_device *dev, 421 struct nlattr *tb[], struct nlattr *data[], 422 struct netlink_ext_ack *extack) 423 { 424 struct ipvl_dev *ipvlan = netdev_priv(dev); 425 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 426 int err = 0; 427 428 if (!data) 429 return 0; 430 if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN)) 431 return -EPERM; 432 433 if (data[IFLA_IPVLAN_MODE]) { 434 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 435 436 err = ipvlan_set_port_mode(port, nmode, extack); 437 } 438 439 if (!err && data[IFLA_IPVLAN_FLAGS]) { 440 u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 441 442 if (flags & IPVLAN_F_PRIVATE) 443 ipvlan_mark_private(port); 444 else 445 ipvlan_clear_private(port); 446 447 if (flags & IPVLAN_F_VEPA) 448 ipvlan_mark_vepa(port); 449 else 450 ipvlan_clear_vepa(port); 451 } 452 453 return err; 454 } 455 456 static size_t ipvlan_nl_getsize(const struct net_device *dev) 457 { 458 return (0 459 + nla_total_size(2) /* IFLA_IPVLAN_MODE */ 460 + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */ 461 ); 462 } 463 464 static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[], 465 struct netlink_ext_ack *extack) 466 { 467 if (!data) 468 return 0; 469 470 if (data[IFLA_IPVLAN_MODE]) { 471 u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 472 473 if (mode >= IPVLAN_MODE_MAX) 474 return -EINVAL; 475 } 476 if (data[IFLA_IPVLAN_FLAGS]) { 477 u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 478 479 /* Only two bits are used at this moment. */ 480 if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) 481 return -EINVAL; 482 /* Also both flags can't be active at the same time. */ 483 if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) == 484 (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) 485 return -EINVAL; 486 } 487 488 return 0; 489 } 490 491 static int ipvlan_nl_fillinfo(struct sk_buff *skb, 492 const struct net_device *dev) 493 { 494 struct ipvl_dev *ipvlan = netdev_priv(dev); 495 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 496 int ret = -EINVAL; 497 498 if (!port) 499 goto err; 500 501 ret = -EMSGSIZE; 502 if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) 503 goto err; 504 if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags)) 505 goto err; 506 507 return 0; 508 509 err: 510 return ret; 511 } 512 513 int ipvlan_link_new(struct net *src_net, struct net_device *dev, 514 struct nlattr *tb[], struct nlattr *data[], 515 struct netlink_ext_ack *extack) 516 { 517 struct ipvl_dev *ipvlan = netdev_priv(dev); 518 struct ipvl_port *port; 519 struct net_device *phy_dev; 520 int err; 521 u16 mode = IPVLAN_MODE_L3; 522 523 if (!tb[IFLA_LINK]) 524 return -EINVAL; 525 526 phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); 527 if (!phy_dev) 528 return -ENODEV; 529 530 if (netif_is_ipvlan(phy_dev)) { 531 struct ipvl_dev *tmp = netdev_priv(phy_dev); 532 533 phy_dev = tmp->phy_dev; 534 if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN)) 535 return -EPERM; 536 } else if (!netif_is_ipvlan_port(phy_dev)) { 537 /* Exit early if the underlying link is invalid or busy */ 538 if (phy_dev->type != ARPHRD_ETHER || 539 phy_dev->flags & IFF_LOOPBACK) { 540 netdev_err(phy_dev, 541 "Master is either lo or non-ether device\n"); 542 return -EINVAL; 543 } 544 545 if (netdev_is_rx_handler_busy(phy_dev)) { 546 netdev_err(phy_dev, "Device is already in use.\n"); 547 return -EBUSY; 548 } 549 } 550 551 ipvlan->phy_dev = phy_dev; 552 ipvlan->dev = dev; 553 ipvlan->sfeatures = IPVLAN_FEATURES; 554 if (!tb[IFLA_MTU]) 555 ipvlan_adjust_mtu(ipvlan, phy_dev); 556 INIT_LIST_HEAD(&ipvlan->addrs); 557 spin_lock_init(&ipvlan->addrs_lock); 558 559 /* TODO Probably put random address here to be presented to the 560 * world but keep using the physical-dev address for the outgoing 561 * packets. 562 */ 563 memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN); 564 565 dev->priv_flags |= IFF_NO_RX_HANDLER; 566 567 err = register_netdevice(dev); 568 if (err < 0) 569 return err; 570 571 /* ipvlan_init() would have created the port, if required */ 572 port = ipvlan_port_get_rtnl(phy_dev); 573 ipvlan->port = port; 574 575 /* If the port-id base is at the MAX value, then wrap it around and 576 * begin from 0x1 again. This may be due to a busy system where lots 577 * of slaves are getting created and deleted. 578 */ 579 if (port->dev_id_start == 0xFFFE) 580 port->dev_id_start = 0x1; 581 582 /* Since L2 address is shared among all IPvlan slaves including 583 * master, use unique 16 bit dev-ids to diffentiate among them. 584 * Assign IDs between 0x1 and 0xFFFE (used by the master) to each 585 * slave link [see addrconf_ifid_eui48()]. 586 */ 587 err = ida_simple_get(&port->ida, port->dev_id_start, 0xFFFE, 588 GFP_KERNEL); 589 if (err < 0) 590 err = ida_simple_get(&port->ida, 0x1, port->dev_id_start, 591 GFP_KERNEL); 592 if (err < 0) 593 goto unregister_netdev; 594 dev->dev_id = err; 595 596 /* Increment id-base to the next slot for the future assignment */ 597 port->dev_id_start = err + 1; 598 599 err = netdev_upper_dev_link(phy_dev, dev, extack); 600 if (err) 601 goto remove_ida; 602 603 /* Flags are per port and latest update overrides. User has 604 * to be consistent in setting it just like the mode attribute. 605 */ 606 if (data && data[IFLA_IPVLAN_FLAGS]) 607 port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 608 609 if (data && data[IFLA_IPVLAN_MODE]) 610 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 611 612 err = ipvlan_set_port_mode(port, mode, extack); 613 if (err) 614 goto unlink_netdev; 615 616 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); 617 netif_stacked_transfer_operstate(phy_dev, dev); 618 return 0; 619 620 unlink_netdev: 621 netdev_upper_dev_unlink(phy_dev, dev); 622 remove_ida: 623 ida_simple_remove(&port->ida, dev->dev_id); 624 unregister_netdev: 625 unregister_netdevice(dev); 626 return err; 627 } 628 EXPORT_SYMBOL_GPL(ipvlan_link_new); 629 630 void ipvlan_link_delete(struct net_device *dev, struct list_head *head) 631 { 632 struct ipvl_dev *ipvlan = netdev_priv(dev); 633 struct ipvl_addr *addr, *next; 634 635 spin_lock_bh(&ipvlan->addrs_lock); 636 list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { 637 ipvlan_ht_addr_del(addr); 638 list_del_rcu(&addr->anode); 639 kfree_rcu(addr, rcu); 640 } 641 spin_unlock_bh(&ipvlan->addrs_lock); 642 643 ida_simple_remove(&ipvlan->port->ida, dev->dev_id); 644 list_del_rcu(&ipvlan->pnode); 645 unregister_netdevice_queue(dev, head); 646 netdev_upper_dev_unlink(ipvlan->phy_dev, dev); 647 } 648 EXPORT_SYMBOL_GPL(ipvlan_link_delete); 649 650 void ipvlan_link_setup(struct net_device *dev) 651 { 652 ether_setup(dev); 653 654 dev->max_mtu = ETH_MAX_MTU; 655 dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 656 dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; 657 dev->netdev_ops = &ipvlan_netdev_ops; 658 dev->needs_free_netdev = true; 659 dev->header_ops = &ipvlan_header_ops; 660 dev->ethtool_ops = &ipvlan_ethtool_ops; 661 } 662 EXPORT_SYMBOL_GPL(ipvlan_link_setup); 663 664 static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = 665 { 666 [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, 667 [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 }, 668 }; 669 670 static struct rtnl_link_ops ipvlan_link_ops = { 671 .kind = "ipvlan", 672 .priv_size = sizeof(struct ipvl_dev), 673 674 .setup = ipvlan_link_setup, 675 .newlink = ipvlan_link_new, 676 .dellink = ipvlan_link_delete, 677 }; 678 679 int ipvlan_link_register(struct rtnl_link_ops *ops) 680 { 681 ops->get_size = ipvlan_nl_getsize; 682 ops->policy = ipvlan_nl_policy; 683 ops->validate = ipvlan_nl_validate; 684 ops->fill_info = ipvlan_nl_fillinfo; 685 ops->changelink = ipvlan_nl_changelink; 686 ops->maxtype = IFLA_IPVLAN_MAX; 687 return rtnl_link_register(ops); 688 } 689 EXPORT_SYMBOL_GPL(ipvlan_link_register); 690 691 static int ipvlan_device_event(struct notifier_block *unused, 692 unsigned long event, void *ptr) 693 { 694 struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); 695 struct netdev_notifier_pre_changeaddr_info *prechaddr_info; 696 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 697 struct ipvl_dev *ipvlan, *next; 698 struct ipvl_port *port; 699 LIST_HEAD(lst_kill); 700 int err; 701 702 if (!netif_is_ipvlan_port(dev)) 703 return NOTIFY_DONE; 704 705 port = ipvlan_port_get_rtnl(dev); 706 707 switch (event) { 708 case NETDEV_CHANGE: 709 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 710 netif_stacked_transfer_operstate(ipvlan->phy_dev, 711 ipvlan->dev); 712 break; 713 714 case NETDEV_REGISTER: { 715 struct net *oldnet, *newnet = dev_net(dev); 716 717 oldnet = read_pnet(&port->pnet); 718 if (net_eq(newnet, oldnet)) 719 break; 720 721 write_pnet(&port->pnet, newnet); 722 723 ipvlan_migrate_l3s_hook(oldnet, newnet); 724 break; 725 } 726 case NETDEV_UNREGISTER: 727 if (dev->reg_state != NETREG_UNREGISTERING) 728 break; 729 730 list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) 731 ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, 732 &lst_kill); 733 unregister_netdevice_many(&lst_kill); 734 break; 735 736 case NETDEV_FEAT_CHANGE: 737 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 738 ipvlan->dev->features = dev->features & IPVLAN_FEATURES; 739 ipvlan->dev->gso_max_size = dev->gso_max_size; 740 ipvlan->dev->gso_max_segs = dev->gso_max_segs; 741 netdev_features_change(ipvlan->dev); 742 } 743 break; 744 745 case NETDEV_CHANGEMTU: 746 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 747 ipvlan_adjust_mtu(ipvlan, dev); 748 break; 749 750 case NETDEV_PRE_CHANGEADDR: 751 prechaddr_info = ptr; 752 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 753 err = dev_pre_changeaddr_notify(ipvlan->dev, 754 prechaddr_info->dev_addr, 755 extack); 756 if (err) 757 return notifier_from_errno(err); 758 } 759 break; 760 761 case NETDEV_CHANGEADDR: 762 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 763 ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr); 764 call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev); 765 } 766 break; 767 768 case NETDEV_PRE_TYPE_CHANGE: 769 /* Forbid underlying device to change its type. */ 770 return NOTIFY_BAD; 771 } 772 return NOTIFY_DONE; 773 } 774 775 /* the caller must held the addrs lock */ 776 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) 777 { 778 struct ipvl_addr *addr; 779 780 addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); 781 if (!addr) 782 return -ENOMEM; 783 784 addr->master = ipvlan; 785 if (!is_v6) { 786 memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr)); 787 addr->atype = IPVL_IPV4; 788 #if IS_ENABLED(CONFIG_IPV6) 789 } else { 790 memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr)); 791 addr->atype = IPVL_IPV6; 792 #endif 793 } 794 795 list_add_tail_rcu(&addr->anode, &ipvlan->addrs); 796 797 /* If the interface is not up, the address will be added to the hash 798 * list by ipvlan_open. 799 */ 800 if (netif_running(ipvlan->dev)) 801 ipvlan_ht_addr_add(ipvlan, addr); 802 803 return 0; 804 } 805 806 static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) 807 { 808 struct ipvl_addr *addr; 809 810 spin_lock_bh(&ipvlan->addrs_lock); 811 addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); 812 if (!addr) { 813 spin_unlock_bh(&ipvlan->addrs_lock); 814 return; 815 } 816 817 ipvlan_ht_addr_del(addr); 818 list_del_rcu(&addr->anode); 819 spin_unlock_bh(&ipvlan->addrs_lock); 820 kfree_rcu(addr, rcu); 821 } 822 823 static bool ipvlan_is_valid_dev(const struct net_device *dev) 824 { 825 struct ipvl_dev *ipvlan = netdev_priv(dev); 826 827 if (!netif_is_ipvlan(dev)) 828 return false; 829 830 if (!ipvlan || !ipvlan->port) 831 return false; 832 833 return true; 834 } 835 836 #if IS_ENABLED(CONFIG_IPV6) 837 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 838 { 839 int ret = -EINVAL; 840 841 spin_lock_bh(&ipvlan->addrs_lock); 842 if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) 843 netif_err(ipvlan, ifup, ipvlan->dev, 844 "Failed to add IPv6=%pI6c addr for %s intf\n", 845 ip6_addr, ipvlan->dev->name); 846 else 847 ret = ipvlan_add_addr(ipvlan, ip6_addr, true); 848 spin_unlock_bh(&ipvlan->addrs_lock); 849 return ret; 850 } 851 852 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 853 { 854 return ipvlan_del_addr(ipvlan, ip6_addr, true); 855 } 856 857 static int ipvlan_addr6_event(struct notifier_block *unused, 858 unsigned long event, void *ptr) 859 { 860 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; 861 struct net_device *dev = (struct net_device *)if6->idev->dev; 862 struct ipvl_dev *ipvlan = netdev_priv(dev); 863 864 if (!ipvlan_is_valid_dev(dev)) 865 return NOTIFY_DONE; 866 867 switch (event) { 868 case NETDEV_UP: 869 if (ipvlan_add_addr6(ipvlan, &if6->addr)) 870 return NOTIFY_BAD; 871 break; 872 873 case NETDEV_DOWN: 874 ipvlan_del_addr6(ipvlan, &if6->addr); 875 break; 876 } 877 878 return NOTIFY_OK; 879 } 880 881 static int ipvlan_addr6_validator_event(struct notifier_block *unused, 882 unsigned long event, void *ptr) 883 { 884 struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; 885 struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; 886 struct ipvl_dev *ipvlan = netdev_priv(dev); 887 888 if (!ipvlan_is_valid_dev(dev)) 889 return NOTIFY_DONE; 890 891 switch (event) { 892 case NETDEV_UP: 893 if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { 894 NL_SET_ERR_MSG(i6vi->extack, 895 "Address already assigned to an ipvlan device"); 896 return notifier_from_errno(-EADDRINUSE); 897 } 898 break; 899 } 900 901 return NOTIFY_OK; 902 } 903 #endif 904 905 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 906 { 907 int ret = -EINVAL; 908 909 spin_lock_bh(&ipvlan->addrs_lock); 910 if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) 911 netif_err(ipvlan, ifup, ipvlan->dev, 912 "Failed to add IPv4=%pI4 on %s intf.\n", 913 ip4_addr, ipvlan->dev->name); 914 else 915 ret = ipvlan_add_addr(ipvlan, ip4_addr, false); 916 spin_unlock_bh(&ipvlan->addrs_lock); 917 return ret; 918 } 919 920 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 921 { 922 return ipvlan_del_addr(ipvlan, ip4_addr, false); 923 } 924 925 static int ipvlan_addr4_event(struct notifier_block *unused, 926 unsigned long event, void *ptr) 927 { 928 struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; 929 struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; 930 struct ipvl_dev *ipvlan = netdev_priv(dev); 931 struct in_addr ip4_addr; 932 933 if (!ipvlan_is_valid_dev(dev)) 934 return NOTIFY_DONE; 935 936 switch (event) { 937 case NETDEV_UP: 938 ip4_addr.s_addr = if4->ifa_address; 939 if (ipvlan_add_addr4(ipvlan, &ip4_addr)) 940 return NOTIFY_BAD; 941 break; 942 943 case NETDEV_DOWN: 944 ip4_addr.s_addr = if4->ifa_address; 945 ipvlan_del_addr4(ipvlan, &ip4_addr); 946 break; 947 } 948 949 return NOTIFY_OK; 950 } 951 952 static int ipvlan_addr4_validator_event(struct notifier_block *unused, 953 unsigned long event, void *ptr) 954 { 955 struct in_validator_info *ivi = (struct in_validator_info *)ptr; 956 struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; 957 struct ipvl_dev *ipvlan = netdev_priv(dev); 958 959 if (!ipvlan_is_valid_dev(dev)) 960 return NOTIFY_DONE; 961 962 switch (event) { 963 case NETDEV_UP: 964 if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { 965 NL_SET_ERR_MSG(ivi->extack, 966 "Address already assigned to an ipvlan device"); 967 return notifier_from_errno(-EADDRINUSE); 968 } 969 break; 970 } 971 972 return NOTIFY_OK; 973 } 974 975 static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { 976 .notifier_call = ipvlan_addr4_event, 977 }; 978 979 static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = { 980 .notifier_call = ipvlan_addr4_validator_event, 981 }; 982 983 static struct notifier_block ipvlan_notifier_block __read_mostly = { 984 .notifier_call = ipvlan_device_event, 985 }; 986 987 #if IS_ENABLED(CONFIG_IPV6) 988 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { 989 .notifier_call = ipvlan_addr6_event, 990 }; 991 992 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { 993 .notifier_call = ipvlan_addr6_validator_event, 994 }; 995 #endif 996 997 static int __init ipvlan_init_module(void) 998 { 999 int err; 1000 1001 ipvlan_init_secret(); 1002 register_netdevice_notifier(&ipvlan_notifier_block); 1003 #if IS_ENABLED(CONFIG_IPV6) 1004 register_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1005 register_inet6addr_validator_notifier( 1006 &ipvlan_addr6_vtor_notifier_block); 1007 #endif 1008 register_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1009 register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); 1010 1011 err = ipvlan_l3s_init(); 1012 if (err < 0) 1013 goto error; 1014 1015 err = ipvlan_link_register(&ipvlan_link_ops); 1016 if (err < 0) { 1017 ipvlan_l3s_cleanup(); 1018 goto error; 1019 } 1020 1021 return 0; 1022 error: 1023 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1024 unregister_inetaddr_validator_notifier( 1025 &ipvlan_addr4_vtor_notifier_block); 1026 #if IS_ENABLED(CONFIG_IPV6) 1027 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1028 unregister_inet6addr_validator_notifier( 1029 &ipvlan_addr6_vtor_notifier_block); 1030 #endif 1031 unregister_netdevice_notifier(&ipvlan_notifier_block); 1032 return err; 1033 } 1034 1035 static void __exit ipvlan_cleanup_module(void) 1036 { 1037 rtnl_link_unregister(&ipvlan_link_ops); 1038 ipvlan_l3s_cleanup(); 1039 unregister_netdevice_notifier(&ipvlan_notifier_block); 1040 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1041 unregister_inetaddr_validator_notifier( 1042 &ipvlan_addr4_vtor_notifier_block); 1043 #if IS_ENABLED(CONFIG_IPV6) 1044 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1045 unregister_inet6addr_validator_notifier( 1046 &ipvlan_addr6_vtor_notifier_block); 1047 #endif 1048 } 1049 1050 module_init(ipvlan_init_module); 1051 module_exit(ipvlan_cleanup_module); 1052 1053 MODULE_LICENSE("GPL"); 1054 MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); 1055 MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); 1056 MODULE_ALIAS_RTNL_LINK("ipvlan"); 1057