1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * drivers/net/veth.c 4 * 5 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 6 * 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 9 * 10 */ 11 12 #include <linux/netdevice.h> 13 #include <linux/slab.h> 14 #include <linux/ethtool.h> 15 #include <linux/etherdevice.h> 16 #include <linux/u64_stats_sync.h> 17 18 #include <net/rtnetlink.h> 19 #include <net/dst.h> 20 #include <net/xfrm.h> 21 #include <net/xdp.h> 22 #include <linux/veth.h> 23 #include <linux/module.h> 24 #include <linux/bpf.h> 25 #include <linux/filter.h> 26 #include <linux/ptr_ring.h> 27 #include <linux/bpf_trace.h> 28 #include <linux/net_tstamp.h> 29 #include <net/page_pool/helpers.h> 30 31 #define DRV_NAME "veth" 32 #define DRV_VERSION "1.0" 33 34 #define VETH_XDP_FLAG BIT(0) 35 #define VETH_RING_SIZE 256 36 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 37 38 #define VETH_XDP_TX_BULK_SIZE 16 39 #define VETH_XDP_BATCH 16 40 41 struct veth_stats { 42 u64 rx_drops; 43 /* xdp */ 44 u64 xdp_packets; 45 u64 xdp_bytes; 46 u64 xdp_redirect; 47 u64 xdp_drops; 48 u64 xdp_tx; 49 u64 xdp_tx_err; 50 u64 peer_tq_xdp_xmit; 51 u64 peer_tq_xdp_xmit_err; 52 }; 53 54 struct veth_rq_stats { 55 struct veth_stats vs; 56 struct u64_stats_sync syncp; 57 }; 58 59 struct veth_rq { 60 struct napi_struct xdp_napi; 61 struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */ 62 struct net_device *dev; 63 struct bpf_prog __rcu *xdp_prog; 64 struct xdp_mem_info xdp_mem; 65 struct veth_rq_stats stats; 66 bool rx_notify_masked; 67 struct ptr_ring xdp_ring; 68 struct xdp_rxq_info xdp_rxq; 69 struct page_pool *page_pool; 70 }; 71 72 struct veth_priv { 73 struct net_device __rcu *peer; 74 atomic64_t dropped; 75 struct bpf_prog *_xdp_prog; 76 struct veth_rq *rq; 77 unsigned int requested_headroom; 78 }; 79 80 struct veth_xdp_tx_bq { 81 struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE]; 82 unsigned int count; 83 }; 84 85 /* 86 * ethtool interface 87 */ 88 89 struct veth_q_stat_desc { 90 char desc[ETH_GSTRING_LEN]; 91 size_t offset; 92 }; 93 94 #define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) 95 96 static const struct veth_q_stat_desc veth_rq_stats_desc[] = { 97 { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 98 { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 99 { "drops", VETH_RQ_STAT(rx_drops) }, 100 { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, 101 { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 102 { "xdp_tx", VETH_RQ_STAT(xdp_tx) }, 103 { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, 104 }; 105 106 #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 107 108 static const struct veth_q_stat_desc veth_tq_stats_desc[] = { 109 { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) }, 110 { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) }, 111 }; 112 113 #define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc) 114 115 static struct { 116 const char string[ETH_GSTRING_LEN]; 117 } ethtool_stats_keys[] = { 118 { "peer_ifindex" }, 119 }; 120 121 struct veth_xdp_buff { 122 struct xdp_buff xdp; 123 struct sk_buff *skb; 124 }; 125 126 static int veth_get_link_ksettings(struct net_device *dev, 127 struct ethtool_link_ksettings *cmd) 128 { 129 cmd->base.speed = SPEED_10000; 130 cmd->base.duplex = DUPLEX_FULL; 131 cmd->base.port = PORT_TP; 132 cmd->base.autoneg = AUTONEG_DISABLE; 133 return 0; 134 } 135 136 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 137 { 138 strscpy(info->driver, DRV_NAME, sizeof(info->driver)); 139 strscpy(info->version, DRV_VERSION, sizeof(info->version)); 140 } 141 142 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 143 { 144 u8 *p = buf; 145 int i, j; 146 147 switch(stringset) { 148 case ETH_SS_STATS: 149 memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 150 p += sizeof(ethtool_stats_keys); 151 for (i = 0; i < dev->real_num_rx_queues; i++) 152 for (j = 0; j < VETH_RQ_STATS_LEN; j++) 153 ethtool_sprintf(&p, "rx_queue_%u_%.18s", 154 i, veth_rq_stats_desc[j].desc); 155 156 for (i = 0; i < dev->real_num_tx_queues; i++) 157 for (j = 0; j < VETH_TQ_STATS_LEN; j++) 158 ethtool_sprintf(&p, "tx_queue_%u_%.18s", 159 i, veth_tq_stats_desc[j].desc); 160 161 page_pool_ethtool_stats_get_strings(p); 162 break; 163 } 164 } 165 166 static int veth_get_sset_count(struct net_device *dev, int sset) 167 { 168 switch (sset) { 169 case ETH_SS_STATS: 170 return ARRAY_SIZE(ethtool_stats_keys) + 171 VETH_RQ_STATS_LEN * dev->real_num_rx_queues + 172 VETH_TQ_STATS_LEN * dev->real_num_tx_queues + 173 page_pool_ethtool_stats_get_count(); 174 default: 175 return -EOPNOTSUPP; 176 } 177 } 178 179 static void veth_get_page_pool_stats(struct net_device *dev, u64 *data) 180 { 181 #ifdef CONFIG_PAGE_POOL_STATS 182 struct veth_priv *priv = netdev_priv(dev); 183 struct page_pool_stats pp_stats = {}; 184 int i; 185 186 for (i = 0; i < dev->real_num_rx_queues; i++) { 187 if (!priv->rq[i].page_pool) 188 continue; 189 page_pool_get_stats(priv->rq[i].page_pool, &pp_stats); 190 } 191 page_pool_ethtool_stats_get(data, &pp_stats); 192 #endif /* CONFIG_PAGE_POOL_STATS */ 193 } 194 195 static void veth_get_ethtool_stats(struct net_device *dev, 196 struct ethtool_stats *stats, u64 *data) 197 { 198 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 199 struct net_device *peer = rtnl_dereference(priv->peer); 200 int i, j, idx, pp_idx; 201 202 data[0] = peer ? peer->ifindex : 0; 203 idx = 1; 204 for (i = 0; i < dev->real_num_rx_queues; i++) { 205 const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 206 const void *stats_base = (void *)&rq_stats->vs; 207 unsigned int start; 208 size_t offset; 209 210 do { 211 start = u64_stats_fetch_begin(&rq_stats->syncp); 212 for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 213 offset = veth_rq_stats_desc[j].offset; 214 data[idx + j] = *(u64 *)(stats_base + offset); 215 } 216 } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 217 idx += VETH_RQ_STATS_LEN; 218 } 219 pp_idx = idx; 220 221 if (!peer) 222 goto page_pool_stats; 223 224 rcv_priv = netdev_priv(peer); 225 for (i = 0; i < peer->real_num_rx_queues; i++) { 226 const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; 227 const void *base = (void *)&rq_stats->vs; 228 unsigned int start, tx_idx = idx; 229 size_t offset; 230 231 tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; 232 do { 233 start = u64_stats_fetch_begin(&rq_stats->syncp); 234 for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 235 offset = veth_tq_stats_desc[j].offset; 236 data[tx_idx + j] += *(u64 *)(base + offset); 237 } 238 } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 239 pp_idx = tx_idx + VETH_TQ_STATS_LEN; 240 } 241 242 page_pool_stats: 243 veth_get_page_pool_stats(dev, &data[pp_idx]); 244 } 245 246 static void veth_get_channels(struct net_device *dev, 247 struct ethtool_channels *channels) 248 { 249 channels->tx_count = dev->real_num_tx_queues; 250 channels->rx_count = dev->real_num_rx_queues; 251 channels->max_tx = dev->num_tx_queues; 252 channels->max_rx = dev->num_rx_queues; 253 } 254 255 static int veth_set_channels(struct net_device *dev, 256 struct ethtool_channels *ch); 257 258 static const struct ethtool_ops veth_ethtool_ops = { 259 .get_drvinfo = veth_get_drvinfo, 260 .get_link = ethtool_op_get_link, 261 .get_strings = veth_get_strings, 262 .get_sset_count = veth_get_sset_count, 263 .get_ethtool_stats = veth_get_ethtool_stats, 264 .get_link_ksettings = veth_get_link_ksettings, 265 .get_ts_info = ethtool_op_get_ts_info, 266 .get_channels = veth_get_channels, 267 .set_channels = veth_set_channels, 268 }; 269 270 /* general routines */ 271 272 static bool veth_is_xdp_frame(void *ptr) 273 { 274 return (unsigned long)ptr & VETH_XDP_FLAG; 275 } 276 277 static struct xdp_frame *veth_ptr_to_xdp(void *ptr) 278 { 279 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 280 } 281 282 static void *veth_xdp_to_ptr(struct xdp_frame *xdp) 283 { 284 return (void *)((unsigned long)xdp | VETH_XDP_FLAG); 285 } 286 287 static void veth_ptr_free(void *ptr) 288 { 289 if (veth_is_xdp_frame(ptr)) 290 xdp_return_frame(veth_ptr_to_xdp(ptr)); 291 else 292 kfree_skb(ptr); 293 } 294 295 static void __veth_xdp_flush(struct veth_rq *rq) 296 { 297 /* Write ptr_ring before reading rx_notify_masked */ 298 smp_mb(); 299 if (!READ_ONCE(rq->rx_notify_masked) && 300 napi_schedule_prep(&rq->xdp_napi)) { 301 WRITE_ONCE(rq->rx_notify_masked, true); 302 __napi_schedule(&rq->xdp_napi); 303 } 304 } 305 306 static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 307 { 308 if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 309 dev_kfree_skb_any(skb); 310 return NET_RX_DROP; 311 } 312 313 return NET_RX_SUCCESS; 314 } 315 316 static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 317 struct veth_rq *rq, bool xdp) 318 { 319 return __dev_forward_skb(dev, skb) ?: xdp ? 320 veth_xdp_rx(rq, skb) : 321 __netif_rx(skb); 322 } 323 324 /* return true if the specified skb has chances of GRO aggregation 325 * Don't strive for accuracy, but try to avoid GRO overhead in the most 326 * common scenarios. 327 * When XDP is enabled, all traffic is considered eligible, as the xmit 328 * device has TSO off. 329 * When TSO is enabled on the xmit device, we are likely interested only 330 * in UDP aggregation, explicitly check for that if the skb is suspected 331 * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets - 332 * to belong to locally generated UDP traffic. 333 */ 334 static bool veth_skb_is_eligible_for_gro(const struct net_device *dev, 335 const struct net_device *rcv, 336 const struct sk_buff *skb) 337 { 338 return !(dev->features & NETIF_F_ALL_TSO) || 339 (skb->destructor == sock_wfree && 340 rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)); 341 } 342 343 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 344 { 345 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 346 struct veth_rq *rq = NULL; 347 struct net_device *rcv; 348 int length = skb->len; 349 bool use_napi = false; 350 int rxq; 351 352 rcu_read_lock(); 353 rcv = rcu_dereference(priv->peer); 354 if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { 355 kfree_skb(skb); 356 goto drop; 357 } 358 359 rcv_priv = netdev_priv(rcv); 360 rxq = skb_get_queue_mapping(skb); 361 if (rxq < rcv->real_num_rx_queues) { 362 rq = &rcv_priv->rq[rxq]; 363 364 /* The napi pointer is available when an XDP program is 365 * attached or when GRO is enabled 366 * Don't bother with napi/GRO if the skb can't be aggregated 367 */ 368 use_napi = rcu_access_pointer(rq->napi) && 369 veth_skb_is_eligible_for_gro(dev, rcv, skb); 370 } 371 372 skb_tx_timestamp(skb); 373 if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { 374 if (!use_napi) 375 dev_lstats_add(dev, length); 376 } else { 377 drop: 378 atomic64_inc(&priv->dropped); 379 } 380 381 if (use_napi) 382 __veth_xdp_flush(rq); 383 384 rcu_read_unlock(); 385 386 return NETDEV_TX_OK; 387 } 388 389 static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) 390 { 391 struct veth_priv *priv = netdev_priv(dev); 392 393 dev_lstats_read(dev, packets, bytes); 394 return atomic64_read(&priv->dropped); 395 } 396 397 static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) 398 { 399 struct veth_priv *priv = netdev_priv(dev); 400 int i; 401 402 result->peer_tq_xdp_xmit_err = 0; 403 result->xdp_packets = 0; 404 result->xdp_tx_err = 0; 405 result->xdp_bytes = 0; 406 result->rx_drops = 0; 407 for (i = 0; i < dev->num_rx_queues; i++) { 408 u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err; 409 struct veth_rq_stats *stats = &priv->rq[i].stats; 410 unsigned int start; 411 412 do { 413 start = u64_stats_fetch_begin(&stats->syncp); 414 peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; 415 xdp_tx_err = stats->vs.xdp_tx_err; 416 packets = stats->vs.xdp_packets; 417 bytes = stats->vs.xdp_bytes; 418 drops = stats->vs.rx_drops; 419 } while (u64_stats_fetch_retry(&stats->syncp, start)); 420 result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; 421 result->xdp_tx_err += xdp_tx_err; 422 result->xdp_packets += packets; 423 result->xdp_bytes += bytes; 424 result->rx_drops += drops; 425 } 426 } 427 428 static void veth_get_stats64(struct net_device *dev, 429 struct rtnl_link_stats64 *tot) 430 { 431 struct veth_priv *priv = netdev_priv(dev); 432 struct net_device *peer; 433 struct veth_stats rx; 434 u64 packets, bytes; 435 436 tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); 437 tot->tx_bytes = bytes; 438 tot->tx_packets = packets; 439 440 veth_stats_rx(&rx, dev); 441 tot->tx_dropped += rx.xdp_tx_err; 442 tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; 443 tot->rx_bytes = rx.xdp_bytes; 444 tot->rx_packets = rx.xdp_packets; 445 446 rcu_read_lock(); 447 peer = rcu_dereference(priv->peer); 448 if (peer) { 449 veth_stats_tx(peer, &packets, &bytes); 450 tot->rx_bytes += bytes; 451 tot->rx_packets += packets; 452 453 veth_stats_rx(&rx, peer); 454 tot->tx_dropped += rx.peer_tq_xdp_xmit_err; 455 tot->rx_dropped += rx.xdp_tx_err; 456 tot->tx_bytes += rx.xdp_bytes; 457 tot->tx_packets += rx.xdp_packets; 458 } 459 rcu_read_unlock(); 460 } 461 462 /* fake multicast ability */ 463 static void veth_set_multicast_list(struct net_device *dev) 464 { 465 } 466 467 static int veth_select_rxq(struct net_device *dev) 468 { 469 return smp_processor_id() % dev->real_num_rx_queues; 470 } 471 472 static struct net_device *veth_peer_dev(struct net_device *dev) 473 { 474 struct veth_priv *priv = netdev_priv(dev); 475 476 /* Callers must be under RCU read side. */ 477 return rcu_dereference(priv->peer); 478 } 479 480 static int veth_xdp_xmit(struct net_device *dev, int n, 481 struct xdp_frame **frames, 482 u32 flags, bool ndo_xmit) 483 { 484 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 485 int i, ret = -ENXIO, nxmit = 0; 486 struct net_device *rcv; 487 unsigned int max_len; 488 struct veth_rq *rq; 489 490 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 491 return -EINVAL; 492 493 rcu_read_lock(); 494 rcv = rcu_dereference(priv->peer); 495 if (unlikely(!rcv)) 496 goto out; 497 498 rcv_priv = netdev_priv(rcv); 499 rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 500 /* The napi pointer is set if NAPI is enabled, which ensures that 501 * xdp_ring is initialized on receive side and the peer device is up. 502 */ 503 if (!rcu_access_pointer(rq->napi)) 504 goto out; 505 506 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 507 508 spin_lock(&rq->xdp_ring.producer_lock); 509 for (i = 0; i < n; i++) { 510 struct xdp_frame *frame = frames[i]; 511 void *ptr = veth_xdp_to_ptr(frame); 512 513 if (unlikely(xdp_get_frame_len(frame) > max_len || 514 __ptr_ring_produce(&rq->xdp_ring, ptr))) 515 break; 516 nxmit++; 517 } 518 spin_unlock(&rq->xdp_ring.producer_lock); 519 520 if (flags & XDP_XMIT_FLUSH) 521 __veth_xdp_flush(rq); 522 523 ret = nxmit; 524 if (ndo_xmit) { 525 u64_stats_update_begin(&rq->stats.syncp); 526 rq->stats.vs.peer_tq_xdp_xmit += nxmit; 527 rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit; 528 u64_stats_update_end(&rq->stats.syncp); 529 } 530 531 out: 532 rcu_read_unlock(); 533 534 return ret; 535 } 536 537 static int veth_ndo_xdp_xmit(struct net_device *dev, int n, 538 struct xdp_frame **frames, u32 flags) 539 { 540 int err; 541 542 err = veth_xdp_xmit(dev, n, frames, flags, true); 543 if (err < 0) { 544 struct veth_priv *priv = netdev_priv(dev); 545 546 atomic64_add(n, &priv->dropped); 547 } 548 549 return err; 550 } 551 552 static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 553 { 554 int sent, i, err = 0, drops; 555 556 sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false); 557 if (sent < 0) { 558 err = sent; 559 sent = 0; 560 } 561 562 for (i = sent; unlikely(i < bq->count); i++) 563 xdp_return_frame(bq->q[i]); 564 565 drops = bq->count - sent; 566 trace_xdp_bulk_tx(rq->dev, sent, drops, err); 567 568 u64_stats_update_begin(&rq->stats.syncp); 569 rq->stats.vs.xdp_tx += sent; 570 rq->stats.vs.xdp_tx_err += drops; 571 u64_stats_update_end(&rq->stats.syncp); 572 573 bq->count = 0; 574 } 575 576 static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 577 { 578 struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev); 579 struct net_device *rcv; 580 struct veth_rq *rcv_rq; 581 582 rcu_read_lock(); 583 veth_xdp_flush_bq(rq, bq); 584 rcv = rcu_dereference(priv->peer); 585 if (unlikely(!rcv)) 586 goto out; 587 588 rcv_priv = netdev_priv(rcv); 589 rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 590 /* xdp_ring is initialized on receive side? */ 591 if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog))) 592 goto out; 593 594 __veth_xdp_flush(rcv_rq); 595 out: 596 rcu_read_unlock(); 597 } 598 599 static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp, 600 struct veth_xdp_tx_bq *bq) 601 { 602 struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); 603 604 if (unlikely(!frame)) 605 return -EOVERFLOW; 606 607 if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE)) 608 veth_xdp_flush_bq(rq, bq); 609 610 bq->q[bq->count++] = frame; 611 612 return 0; 613 } 614 615 static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, 616 struct xdp_frame *frame, 617 struct veth_xdp_tx_bq *bq, 618 struct veth_stats *stats) 619 { 620 struct xdp_frame orig_frame; 621 struct bpf_prog *xdp_prog; 622 623 rcu_read_lock(); 624 xdp_prog = rcu_dereference(rq->xdp_prog); 625 if (likely(xdp_prog)) { 626 struct veth_xdp_buff vxbuf; 627 struct xdp_buff *xdp = &vxbuf.xdp; 628 u32 act; 629 630 xdp_convert_frame_to_buff(frame, xdp); 631 xdp->rxq = &rq->xdp_rxq; 632 vxbuf.skb = NULL; 633 634 act = bpf_prog_run_xdp(xdp_prog, xdp); 635 636 switch (act) { 637 case XDP_PASS: 638 if (xdp_update_frame_from_buff(xdp, frame)) 639 goto err_xdp; 640 break; 641 case XDP_TX: 642 orig_frame = *frame; 643 xdp->rxq->mem = frame->mem; 644 if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 645 trace_xdp_exception(rq->dev, xdp_prog, act); 646 frame = &orig_frame; 647 stats->rx_drops++; 648 goto err_xdp; 649 } 650 stats->xdp_tx++; 651 rcu_read_unlock(); 652 goto xdp_xmit; 653 case XDP_REDIRECT: 654 orig_frame = *frame; 655 xdp->rxq->mem = frame->mem; 656 if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 657 frame = &orig_frame; 658 stats->rx_drops++; 659 goto err_xdp; 660 } 661 stats->xdp_redirect++; 662 rcu_read_unlock(); 663 goto xdp_xmit; 664 default: 665 bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 666 fallthrough; 667 case XDP_ABORTED: 668 trace_xdp_exception(rq->dev, xdp_prog, act); 669 fallthrough; 670 case XDP_DROP: 671 stats->xdp_drops++; 672 goto err_xdp; 673 } 674 } 675 rcu_read_unlock(); 676 677 return frame; 678 err_xdp: 679 rcu_read_unlock(); 680 xdp_return_frame(frame); 681 xdp_xmit: 682 return NULL; 683 } 684 685 /* frames array contains VETH_XDP_BATCH at most */ 686 static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames, 687 int n_xdpf, struct veth_xdp_tx_bq *bq, 688 struct veth_stats *stats) 689 { 690 void *skbs[VETH_XDP_BATCH]; 691 int i; 692 693 if (xdp_alloc_skb_bulk(skbs, n_xdpf, 694 GFP_ATOMIC | __GFP_ZERO) < 0) { 695 for (i = 0; i < n_xdpf; i++) 696 xdp_return_frame(frames[i]); 697 stats->rx_drops += n_xdpf; 698 699 return; 700 } 701 702 for (i = 0; i < n_xdpf; i++) { 703 struct sk_buff *skb = skbs[i]; 704 705 skb = __xdp_build_skb_from_frame(frames[i], skb, 706 rq->dev); 707 if (!skb) { 708 xdp_return_frame(frames[i]); 709 stats->rx_drops++; 710 continue; 711 } 712 napi_gro_receive(&rq->xdp_napi, skb); 713 } 714 } 715 716 static void veth_xdp_get(struct xdp_buff *xdp) 717 { 718 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 719 int i; 720 721 get_page(virt_to_page(xdp->data)); 722 if (likely(!xdp_buff_has_frags(xdp))) 723 return; 724 725 for (i = 0; i < sinfo->nr_frags; i++) 726 __skb_frag_ref(&sinfo->frags[i]); 727 } 728 729 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, 730 struct xdp_buff *xdp, 731 struct sk_buff **pskb) 732 { 733 struct sk_buff *skb = *pskb; 734 u32 frame_sz; 735 736 if (skb_shared(skb) || skb_head_is_locked(skb) || 737 skb_shinfo(skb)->nr_frags || 738 skb_headroom(skb) < XDP_PACKET_HEADROOM) { 739 u32 size, len, max_head_size, off; 740 struct sk_buff *nskb; 741 struct page *page; 742 int i, head_off; 743 744 /* We need a private copy of the skb and data buffers since 745 * the ebpf program can modify it. We segment the original skb 746 * into order-0 pages without linearize it. 747 * 748 * Make sure we have enough space for linear and paged area 749 */ 750 max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - 751 VETH_XDP_HEADROOM); 752 if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) 753 goto drop; 754 755 /* Allocate skb head */ 756 page = page_pool_dev_alloc_pages(rq->page_pool); 757 if (!page) 758 goto drop; 759 760 nskb = napi_build_skb(page_address(page), PAGE_SIZE); 761 if (!nskb) { 762 page_pool_put_full_page(rq->page_pool, page, true); 763 goto drop; 764 } 765 766 skb_reserve(nskb, VETH_XDP_HEADROOM); 767 skb_copy_header(nskb, skb); 768 skb_mark_for_recycle(nskb); 769 770 size = min_t(u32, skb->len, max_head_size); 771 if (skb_copy_bits(skb, 0, nskb->data, size)) { 772 consume_skb(nskb); 773 goto drop; 774 } 775 skb_put(nskb, size); 776 777 head_off = skb_headroom(nskb) - skb_headroom(skb); 778 skb_headers_offset_update(nskb, head_off); 779 780 /* Allocate paged area of new skb */ 781 off = size; 782 len = skb->len - off; 783 784 for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { 785 page = page_pool_dev_alloc_pages(rq->page_pool); 786 if (!page) { 787 consume_skb(nskb); 788 goto drop; 789 } 790 791 size = min_t(u32, len, PAGE_SIZE); 792 skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE); 793 if (skb_copy_bits(skb, off, page_address(page), 794 size)) { 795 consume_skb(nskb); 796 goto drop; 797 } 798 799 len -= size; 800 off += size; 801 } 802 803 consume_skb(skb); 804 skb = nskb; 805 } 806 807 /* SKB "head" area always have tailroom for skb_shared_info */ 808 frame_sz = skb_end_pointer(skb) - skb->head; 809 frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 810 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 811 xdp_prepare_buff(xdp, skb->head, skb_headroom(skb), 812 skb_headlen(skb), true); 813 814 if (skb_is_nonlinear(skb)) { 815 skb_shinfo(skb)->xdp_frags_size = skb->data_len; 816 xdp_buff_set_frags_flag(xdp); 817 } else { 818 xdp_buff_clear_frags_flag(xdp); 819 } 820 *pskb = skb; 821 822 return 0; 823 drop: 824 consume_skb(skb); 825 *pskb = NULL; 826 827 return -ENOMEM; 828 } 829 830 static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 831 struct sk_buff *skb, 832 struct veth_xdp_tx_bq *bq, 833 struct veth_stats *stats) 834 { 835 void *orig_data, *orig_data_end; 836 struct bpf_prog *xdp_prog; 837 struct veth_xdp_buff vxbuf; 838 struct xdp_buff *xdp = &vxbuf.xdp; 839 u32 act, metalen; 840 int off; 841 842 skb_prepare_for_gro(skb); 843 844 rcu_read_lock(); 845 xdp_prog = rcu_dereference(rq->xdp_prog); 846 if (unlikely(!xdp_prog)) { 847 rcu_read_unlock(); 848 goto out; 849 } 850 851 __skb_push(skb, skb->data - skb_mac_header(skb)); 852 if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb)) 853 goto drop; 854 vxbuf.skb = skb; 855 856 orig_data = xdp->data; 857 orig_data_end = xdp->data_end; 858 859 act = bpf_prog_run_xdp(xdp_prog, xdp); 860 861 switch (act) { 862 case XDP_PASS: 863 break; 864 case XDP_TX: 865 veth_xdp_get(xdp); 866 consume_skb(skb); 867 xdp->rxq->mem = rq->xdp_mem; 868 if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 869 trace_xdp_exception(rq->dev, xdp_prog, act); 870 stats->rx_drops++; 871 goto err_xdp; 872 } 873 stats->xdp_tx++; 874 rcu_read_unlock(); 875 goto xdp_xmit; 876 case XDP_REDIRECT: 877 veth_xdp_get(xdp); 878 consume_skb(skb); 879 xdp->rxq->mem = rq->xdp_mem; 880 if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 881 stats->rx_drops++; 882 goto err_xdp; 883 } 884 stats->xdp_redirect++; 885 rcu_read_unlock(); 886 goto xdp_xmit; 887 default: 888 bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 889 fallthrough; 890 case XDP_ABORTED: 891 trace_xdp_exception(rq->dev, xdp_prog, act); 892 fallthrough; 893 case XDP_DROP: 894 stats->xdp_drops++; 895 goto xdp_drop; 896 } 897 rcu_read_unlock(); 898 899 /* check if bpf_xdp_adjust_head was used */ 900 off = orig_data - xdp->data; 901 if (off > 0) 902 __skb_push(skb, off); 903 else if (off < 0) 904 __skb_pull(skb, -off); 905 906 skb_reset_mac_header(skb); 907 908 /* check if bpf_xdp_adjust_tail was used */ 909 off = xdp->data_end - orig_data_end; 910 if (off != 0) 911 __skb_put(skb, off); /* positive on grow, negative on shrink */ 912 913 /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers 914 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. 915 */ 916 if (xdp_buff_has_frags(xdp)) 917 skb->data_len = skb_shinfo(skb)->xdp_frags_size; 918 else 919 skb->data_len = 0; 920 921 skb->protocol = eth_type_trans(skb, rq->dev); 922 923 metalen = xdp->data - xdp->data_meta; 924 if (metalen) 925 skb_metadata_set(skb, metalen); 926 out: 927 return skb; 928 drop: 929 stats->rx_drops++; 930 xdp_drop: 931 rcu_read_unlock(); 932 kfree_skb(skb); 933 return NULL; 934 err_xdp: 935 rcu_read_unlock(); 936 xdp_return_buff(xdp); 937 xdp_xmit: 938 return NULL; 939 } 940 941 static int veth_xdp_rcv(struct veth_rq *rq, int budget, 942 struct veth_xdp_tx_bq *bq, 943 struct veth_stats *stats) 944 { 945 int i, done = 0, n_xdpf = 0; 946 void *xdpf[VETH_XDP_BATCH]; 947 948 for (i = 0; i < budget; i++) { 949 void *ptr = __ptr_ring_consume(&rq->xdp_ring); 950 951 if (!ptr) 952 break; 953 954 if (veth_is_xdp_frame(ptr)) { 955 /* ndo_xdp_xmit */ 956 struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 957 958 stats->xdp_bytes += xdp_get_frame_len(frame); 959 frame = veth_xdp_rcv_one(rq, frame, bq, stats); 960 if (frame) { 961 /* XDP_PASS */ 962 xdpf[n_xdpf++] = frame; 963 if (n_xdpf == VETH_XDP_BATCH) { 964 veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, 965 bq, stats); 966 n_xdpf = 0; 967 } 968 } 969 } else { 970 /* ndo_start_xmit */ 971 struct sk_buff *skb = ptr; 972 973 stats->xdp_bytes += skb->len; 974 skb = veth_xdp_rcv_skb(rq, skb, bq, stats); 975 if (skb) { 976 if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) 977 netif_receive_skb(skb); 978 else 979 napi_gro_receive(&rq->xdp_napi, skb); 980 } 981 } 982 done++; 983 } 984 985 if (n_xdpf) 986 veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, bq, stats); 987 988 u64_stats_update_begin(&rq->stats.syncp); 989 rq->stats.vs.xdp_redirect += stats->xdp_redirect; 990 rq->stats.vs.xdp_bytes += stats->xdp_bytes; 991 rq->stats.vs.xdp_drops += stats->xdp_drops; 992 rq->stats.vs.rx_drops += stats->rx_drops; 993 rq->stats.vs.xdp_packets += done; 994 u64_stats_update_end(&rq->stats.syncp); 995 996 return done; 997 } 998 999 static int veth_poll(struct napi_struct *napi, int budget) 1000 { 1001 struct veth_rq *rq = 1002 container_of(napi, struct veth_rq, xdp_napi); 1003 struct veth_stats stats = {}; 1004 struct veth_xdp_tx_bq bq; 1005 int done; 1006 1007 bq.count = 0; 1008 1009 xdp_set_return_frame_no_direct(); 1010 done = veth_xdp_rcv(rq, budget, &bq, &stats); 1011 1012 if (stats.xdp_redirect > 0) 1013 xdp_do_flush(); 1014 1015 if (done < budget && napi_complete_done(napi, done)) { 1016 /* Write rx_notify_masked before reading ptr_ring */ 1017 smp_store_mb(rq->rx_notify_masked, false); 1018 if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 1019 if (napi_schedule_prep(&rq->xdp_napi)) { 1020 WRITE_ONCE(rq->rx_notify_masked, true); 1021 __napi_schedule(&rq->xdp_napi); 1022 } 1023 } 1024 } 1025 1026 if (stats.xdp_tx > 0) 1027 veth_xdp_flush(rq, &bq); 1028 xdp_clear_return_frame_no_direct(); 1029 1030 return done; 1031 } 1032 1033 static int veth_create_page_pool(struct veth_rq *rq) 1034 { 1035 struct page_pool_params pp_params = { 1036 .order = 0, 1037 .pool_size = VETH_RING_SIZE, 1038 .nid = NUMA_NO_NODE, 1039 .dev = &rq->dev->dev, 1040 }; 1041 1042 rq->page_pool = page_pool_create(&pp_params); 1043 if (IS_ERR(rq->page_pool)) { 1044 int err = PTR_ERR(rq->page_pool); 1045 1046 rq->page_pool = NULL; 1047 return err; 1048 } 1049 1050 return 0; 1051 } 1052 1053 static int __veth_napi_enable_range(struct net_device *dev, int start, int end) 1054 { 1055 struct veth_priv *priv = netdev_priv(dev); 1056 int err, i; 1057 1058 for (i = start; i < end; i++) { 1059 err = veth_create_page_pool(&priv->rq[i]); 1060 if (err) 1061 goto err_page_pool; 1062 } 1063 1064 for (i = start; i < end; i++) { 1065 struct veth_rq *rq = &priv->rq[i]; 1066 1067 err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 1068 if (err) 1069 goto err_xdp_ring; 1070 } 1071 1072 for (i = start; i < end; i++) { 1073 struct veth_rq *rq = &priv->rq[i]; 1074 1075 napi_enable(&rq->xdp_napi); 1076 rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1077 } 1078 1079 return 0; 1080 1081 err_xdp_ring: 1082 for (i--; i >= start; i--) 1083 ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 1084 err_page_pool: 1085 for (i = start; i < end; i++) { 1086 page_pool_destroy(priv->rq[i].page_pool); 1087 priv->rq[i].page_pool = NULL; 1088 } 1089 1090 return err; 1091 } 1092 1093 static int __veth_napi_enable(struct net_device *dev) 1094 { 1095 return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1096 } 1097 1098 static void veth_napi_del_range(struct net_device *dev, int start, int end) 1099 { 1100 struct veth_priv *priv = netdev_priv(dev); 1101 int i; 1102 1103 for (i = start; i < end; i++) { 1104 struct veth_rq *rq = &priv->rq[i]; 1105 1106 rcu_assign_pointer(priv->rq[i].napi, NULL); 1107 napi_disable(&rq->xdp_napi); 1108 __netif_napi_del(&rq->xdp_napi); 1109 } 1110 synchronize_net(); 1111 1112 for (i = start; i < end; i++) { 1113 struct veth_rq *rq = &priv->rq[i]; 1114 1115 rq->rx_notify_masked = false; 1116 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 1117 } 1118 1119 for (i = start; i < end; i++) { 1120 page_pool_destroy(priv->rq[i].page_pool); 1121 priv->rq[i].page_pool = NULL; 1122 } 1123 } 1124 1125 static void veth_napi_del(struct net_device *dev) 1126 { 1127 veth_napi_del_range(dev, 0, dev->real_num_rx_queues); 1128 } 1129 1130 static bool veth_gro_requested(const struct net_device *dev) 1131 { 1132 return !!(dev->wanted_features & NETIF_F_GRO); 1133 } 1134 1135 static int veth_enable_xdp_range(struct net_device *dev, int start, int end, 1136 bool napi_already_on) 1137 { 1138 struct veth_priv *priv = netdev_priv(dev); 1139 int err, i; 1140 1141 for (i = start; i < end; i++) { 1142 struct veth_rq *rq = &priv->rq[i]; 1143 1144 if (!napi_already_on) 1145 netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1146 err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); 1147 if (err < 0) 1148 goto err_rxq_reg; 1149 1150 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 1151 MEM_TYPE_PAGE_SHARED, 1152 NULL); 1153 if (err < 0) 1154 goto err_reg_mem; 1155 1156 /* Save original mem info as it can be overwritten */ 1157 rq->xdp_mem = rq->xdp_rxq.mem; 1158 } 1159 return 0; 1160 1161 err_reg_mem: 1162 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 1163 err_rxq_reg: 1164 for (i--; i >= start; i--) { 1165 struct veth_rq *rq = &priv->rq[i]; 1166 1167 xdp_rxq_info_unreg(&rq->xdp_rxq); 1168 if (!napi_already_on) 1169 netif_napi_del(&rq->xdp_napi); 1170 } 1171 1172 return err; 1173 } 1174 1175 static void veth_disable_xdp_range(struct net_device *dev, int start, int end, 1176 bool delete_napi) 1177 { 1178 struct veth_priv *priv = netdev_priv(dev); 1179 int i; 1180 1181 for (i = start; i < end; i++) { 1182 struct veth_rq *rq = &priv->rq[i]; 1183 1184 rq->xdp_rxq.mem = rq->xdp_mem; 1185 xdp_rxq_info_unreg(&rq->xdp_rxq); 1186 1187 if (delete_napi) 1188 netif_napi_del(&rq->xdp_napi); 1189 } 1190 } 1191 1192 static int veth_enable_xdp(struct net_device *dev) 1193 { 1194 bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); 1195 struct veth_priv *priv = netdev_priv(dev); 1196 int err, i; 1197 1198 if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 1199 err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); 1200 if (err) 1201 return err; 1202 1203 if (!napi_already_on) { 1204 err = __veth_napi_enable(dev); 1205 if (err) { 1206 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); 1207 return err; 1208 } 1209 1210 if (!veth_gro_requested(dev)) { 1211 /* user-space did not require GRO, but adding XDP 1212 * is supposed to get GRO working 1213 */ 1214 dev->features |= NETIF_F_GRO; 1215 netdev_features_change(dev); 1216 } 1217 } 1218 } 1219 1220 for (i = 0; i < dev->real_num_rx_queues; i++) { 1221 rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 1222 rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1223 } 1224 1225 return 0; 1226 } 1227 1228 static void veth_disable_xdp(struct net_device *dev) 1229 { 1230 struct veth_priv *priv = netdev_priv(dev); 1231 int i; 1232 1233 for (i = 0; i < dev->real_num_rx_queues; i++) 1234 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 1235 1236 if (!netif_running(dev) || !veth_gro_requested(dev)) { 1237 veth_napi_del(dev); 1238 1239 /* if user-space did not require GRO, since adding XDP 1240 * enabled it, clear it now 1241 */ 1242 if (!veth_gro_requested(dev) && netif_running(dev)) { 1243 dev->features &= ~NETIF_F_GRO; 1244 netdev_features_change(dev); 1245 } 1246 } 1247 1248 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); 1249 } 1250 1251 static int veth_napi_enable_range(struct net_device *dev, int start, int end) 1252 { 1253 struct veth_priv *priv = netdev_priv(dev); 1254 int err, i; 1255 1256 for (i = start; i < end; i++) { 1257 struct veth_rq *rq = &priv->rq[i]; 1258 1259 netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1260 } 1261 1262 err = __veth_napi_enable_range(dev, start, end); 1263 if (err) { 1264 for (i = start; i < end; i++) { 1265 struct veth_rq *rq = &priv->rq[i]; 1266 1267 netif_napi_del(&rq->xdp_napi); 1268 } 1269 return err; 1270 } 1271 return err; 1272 } 1273 1274 static int veth_napi_enable(struct net_device *dev) 1275 { 1276 return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1277 } 1278 1279 static void veth_disable_range_safe(struct net_device *dev, int start, int end) 1280 { 1281 struct veth_priv *priv = netdev_priv(dev); 1282 1283 if (start >= end) 1284 return; 1285 1286 if (priv->_xdp_prog) { 1287 veth_napi_del_range(dev, start, end); 1288 veth_disable_xdp_range(dev, start, end, false); 1289 } else if (veth_gro_requested(dev)) { 1290 veth_napi_del_range(dev, start, end); 1291 } 1292 } 1293 1294 static int veth_enable_range_safe(struct net_device *dev, int start, int end) 1295 { 1296 struct veth_priv *priv = netdev_priv(dev); 1297 int err; 1298 1299 if (start >= end) 1300 return 0; 1301 1302 if (priv->_xdp_prog) { 1303 /* these channels are freshly initialized, napi is not on there even 1304 * when GRO is requeste 1305 */ 1306 err = veth_enable_xdp_range(dev, start, end, false); 1307 if (err) 1308 return err; 1309 1310 err = __veth_napi_enable_range(dev, start, end); 1311 if (err) { 1312 /* on error always delete the newly added napis */ 1313 veth_disable_xdp_range(dev, start, end, true); 1314 return err; 1315 } 1316 } else if (veth_gro_requested(dev)) { 1317 return veth_napi_enable_range(dev, start, end); 1318 } 1319 return 0; 1320 } 1321 1322 static void veth_set_xdp_features(struct net_device *dev) 1323 { 1324 struct veth_priv *priv = netdev_priv(dev); 1325 struct net_device *peer; 1326 1327 peer = rtnl_dereference(priv->peer); 1328 if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { 1329 struct veth_priv *priv_peer = netdev_priv(peer); 1330 xdp_features_t val = NETDEV_XDP_ACT_BASIC | 1331 NETDEV_XDP_ACT_REDIRECT | 1332 NETDEV_XDP_ACT_RX_SG; 1333 1334 if (priv_peer->_xdp_prog || veth_gro_requested(peer)) 1335 val |= NETDEV_XDP_ACT_NDO_XMIT | 1336 NETDEV_XDP_ACT_NDO_XMIT_SG; 1337 xdp_set_features_flag(dev, val); 1338 } else { 1339 xdp_clear_features_flag(dev); 1340 } 1341 } 1342 1343 static int veth_set_channels(struct net_device *dev, 1344 struct ethtool_channels *ch) 1345 { 1346 struct veth_priv *priv = netdev_priv(dev); 1347 unsigned int old_rx_count, new_rx_count; 1348 struct veth_priv *peer_priv; 1349 struct net_device *peer; 1350 int err; 1351 1352 /* sanity check. Upper bounds are already enforced by the caller */ 1353 if (!ch->rx_count || !ch->tx_count) 1354 return -EINVAL; 1355 1356 /* avoid braking XDP, if that is enabled */ 1357 peer = rtnl_dereference(priv->peer); 1358 peer_priv = peer ? netdev_priv(peer) : NULL; 1359 if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) 1360 return -EINVAL; 1361 1362 if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) 1363 return -EINVAL; 1364 1365 old_rx_count = dev->real_num_rx_queues; 1366 new_rx_count = ch->rx_count; 1367 if (netif_running(dev)) { 1368 /* turn device off */ 1369 netif_carrier_off(dev); 1370 if (peer) 1371 netif_carrier_off(peer); 1372 1373 /* try to allocate new resurces, as needed*/ 1374 err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); 1375 if (err) 1376 goto out; 1377 } 1378 1379 err = netif_set_real_num_rx_queues(dev, ch->rx_count); 1380 if (err) 1381 goto revert; 1382 1383 err = netif_set_real_num_tx_queues(dev, ch->tx_count); 1384 if (err) { 1385 int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); 1386 1387 /* this error condition could happen only if rx and tx change 1388 * in opposite directions (e.g. tx nr raises, rx nr decreases) 1389 * and we can't do anything to fully restore the original 1390 * status 1391 */ 1392 if (err2) 1393 pr_warn("Can't restore rx queues config %d -> %d %d", 1394 new_rx_count, old_rx_count, err2); 1395 else 1396 goto revert; 1397 } 1398 1399 out: 1400 if (netif_running(dev)) { 1401 /* note that we need to swap the arguments WRT the enable part 1402 * to identify the range we have to disable 1403 */ 1404 veth_disable_range_safe(dev, new_rx_count, old_rx_count); 1405 netif_carrier_on(dev); 1406 if (peer) 1407 netif_carrier_on(peer); 1408 } 1409 1410 /* update XDP supported features */ 1411 veth_set_xdp_features(dev); 1412 if (peer) 1413 veth_set_xdp_features(peer); 1414 1415 return err; 1416 1417 revert: 1418 new_rx_count = old_rx_count; 1419 old_rx_count = ch->rx_count; 1420 goto out; 1421 } 1422 1423 static int veth_open(struct net_device *dev) 1424 { 1425 struct veth_priv *priv = netdev_priv(dev); 1426 struct net_device *peer = rtnl_dereference(priv->peer); 1427 int err; 1428 1429 if (!peer) 1430 return -ENOTCONN; 1431 1432 if (priv->_xdp_prog) { 1433 err = veth_enable_xdp(dev); 1434 if (err) 1435 return err; 1436 } else if (veth_gro_requested(dev)) { 1437 err = veth_napi_enable(dev); 1438 if (err) 1439 return err; 1440 } 1441 1442 if (peer->flags & IFF_UP) { 1443 netif_carrier_on(dev); 1444 netif_carrier_on(peer); 1445 } 1446 1447 return 0; 1448 } 1449 1450 static int veth_close(struct net_device *dev) 1451 { 1452 struct veth_priv *priv = netdev_priv(dev); 1453 struct net_device *peer = rtnl_dereference(priv->peer); 1454 1455 netif_carrier_off(dev); 1456 if (peer) 1457 netif_carrier_off(peer); 1458 1459 if (priv->_xdp_prog) 1460 veth_disable_xdp(dev); 1461 else if (veth_gro_requested(dev)) 1462 veth_napi_del(dev); 1463 1464 return 0; 1465 } 1466 1467 static int is_valid_veth_mtu(int mtu) 1468 { 1469 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 1470 } 1471 1472 static int veth_alloc_queues(struct net_device *dev) 1473 { 1474 struct veth_priv *priv = netdev_priv(dev); 1475 int i; 1476 1477 priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); 1478 if (!priv->rq) 1479 return -ENOMEM; 1480 1481 for (i = 0; i < dev->num_rx_queues; i++) { 1482 priv->rq[i].dev = dev; 1483 u64_stats_init(&priv->rq[i].stats.syncp); 1484 } 1485 1486 return 0; 1487 } 1488 1489 static void veth_free_queues(struct net_device *dev) 1490 { 1491 struct veth_priv *priv = netdev_priv(dev); 1492 1493 kfree(priv->rq); 1494 } 1495 1496 static int veth_dev_init(struct net_device *dev) 1497 { 1498 int err; 1499 1500 dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); 1501 if (!dev->lstats) 1502 return -ENOMEM; 1503 1504 err = veth_alloc_queues(dev); 1505 if (err) { 1506 free_percpu(dev->lstats); 1507 return err; 1508 } 1509 1510 return 0; 1511 } 1512 1513 static void veth_dev_free(struct net_device *dev) 1514 { 1515 veth_free_queues(dev); 1516 free_percpu(dev->lstats); 1517 } 1518 1519 #ifdef CONFIG_NET_POLL_CONTROLLER 1520 static void veth_poll_controller(struct net_device *dev) 1521 { 1522 /* veth only receives frames when its peer sends one 1523 * Since it has nothing to do with disabling irqs, we are guaranteed 1524 * never to have pending data when we poll for it so 1525 * there is nothing to do here. 1526 * 1527 * We need this though so netpoll recognizes us as an interface that 1528 * supports polling, which enables bridge devices in virt setups to 1529 * still use netconsole 1530 */ 1531 } 1532 #endif /* CONFIG_NET_POLL_CONTROLLER */ 1533 1534 static int veth_get_iflink(const struct net_device *dev) 1535 { 1536 struct veth_priv *priv = netdev_priv(dev); 1537 struct net_device *peer; 1538 int iflink; 1539 1540 rcu_read_lock(); 1541 peer = rcu_dereference(priv->peer); 1542 iflink = peer ? peer->ifindex : 0; 1543 rcu_read_unlock(); 1544 1545 return iflink; 1546 } 1547 1548 static netdev_features_t veth_fix_features(struct net_device *dev, 1549 netdev_features_t features) 1550 { 1551 struct veth_priv *priv = netdev_priv(dev); 1552 struct net_device *peer; 1553 1554 peer = rtnl_dereference(priv->peer); 1555 if (peer) { 1556 struct veth_priv *peer_priv = netdev_priv(peer); 1557 1558 if (peer_priv->_xdp_prog) 1559 features &= ~NETIF_F_GSO_SOFTWARE; 1560 } 1561 if (priv->_xdp_prog) 1562 features |= NETIF_F_GRO; 1563 1564 return features; 1565 } 1566 1567 static int veth_set_features(struct net_device *dev, 1568 netdev_features_t features) 1569 { 1570 netdev_features_t changed = features ^ dev->features; 1571 struct veth_priv *priv = netdev_priv(dev); 1572 struct net_device *peer; 1573 int err; 1574 1575 if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) 1576 return 0; 1577 1578 peer = rtnl_dereference(priv->peer); 1579 if (features & NETIF_F_GRO) { 1580 err = veth_napi_enable(dev); 1581 if (err) 1582 return err; 1583 1584 if (peer) 1585 xdp_features_set_redirect_target(peer, true); 1586 } else { 1587 if (peer) 1588 xdp_features_clear_redirect_target(peer); 1589 veth_napi_del(dev); 1590 } 1591 return 0; 1592 } 1593 1594 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 1595 { 1596 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 1597 struct net_device *peer; 1598 1599 if (new_hr < 0) 1600 new_hr = 0; 1601 1602 rcu_read_lock(); 1603 peer = rcu_dereference(priv->peer); 1604 if (unlikely(!peer)) 1605 goto out; 1606 1607 peer_priv = netdev_priv(peer); 1608 priv->requested_headroom = new_hr; 1609 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 1610 dev->needed_headroom = new_hr; 1611 peer->needed_headroom = new_hr; 1612 1613 out: 1614 rcu_read_unlock(); 1615 } 1616 1617 static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1618 struct netlink_ext_ack *extack) 1619 { 1620 struct veth_priv *priv = netdev_priv(dev); 1621 struct bpf_prog *old_prog; 1622 struct net_device *peer; 1623 unsigned int max_mtu; 1624 int err; 1625 1626 old_prog = priv->_xdp_prog; 1627 priv->_xdp_prog = prog; 1628 peer = rtnl_dereference(priv->peer); 1629 1630 if (prog) { 1631 if (!peer) { 1632 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 1633 err = -ENOTCONN; 1634 goto err; 1635 } 1636 1637 max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) - 1638 peer->hard_header_len; 1639 /* Allow increasing the max_mtu if the program supports 1640 * XDP fragments. 1641 */ 1642 if (prog->aux->xdp_has_frags) 1643 max_mtu += PAGE_SIZE * MAX_SKB_FRAGS; 1644 1645 if (peer->mtu > max_mtu) { 1646 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 1647 err = -ERANGE; 1648 goto err; 1649 } 1650 1651 if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 1652 NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 1653 err = -ENOSPC; 1654 goto err; 1655 } 1656 1657 if (dev->flags & IFF_UP) { 1658 err = veth_enable_xdp(dev); 1659 if (err) { 1660 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 1661 goto err; 1662 } 1663 } 1664 1665 if (!old_prog) { 1666 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 1667 peer->max_mtu = max_mtu; 1668 } 1669 1670 xdp_features_set_redirect_target(peer, true); 1671 } 1672 1673 if (old_prog) { 1674 if (!prog) { 1675 if (peer && !veth_gro_requested(dev)) 1676 xdp_features_clear_redirect_target(peer); 1677 1678 if (dev->flags & IFF_UP) 1679 veth_disable_xdp(dev); 1680 1681 if (peer) { 1682 peer->hw_features |= NETIF_F_GSO_SOFTWARE; 1683 peer->max_mtu = ETH_MAX_MTU; 1684 } 1685 } 1686 bpf_prog_put(old_prog); 1687 } 1688 1689 if ((!!old_prog ^ !!prog) && peer) 1690 netdev_update_features(peer); 1691 1692 return 0; 1693 err: 1694 priv->_xdp_prog = old_prog; 1695 1696 return err; 1697 } 1698 1699 static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1700 { 1701 switch (xdp->command) { 1702 case XDP_SETUP_PROG: 1703 return veth_xdp_set(dev, xdp->prog, xdp->extack); 1704 default: 1705 return -EINVAL; 1706 } 1707 } 1708 1709 static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 1710 { 1711 struct veth_xdp_buff *_ctx = (void *)ctx; 1712 1713 if (!_ctx->skb) 1714 return -ENODATA; 1715 1716 *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; 1717 return 0; 1718 } 1719 1720 static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, 1721 enum xdp_rss_hash_type *rss_type) 1722 { 1723 struct veth_xdp_buff *_ctx = (void *)ctx; 1724 struct sk_buff *skb = _ctx->skb; 1725 1726 if (!skb) 1727 return -ENODATA; 1728 1729 *hash = skb_get_hash(skb); 1730 *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE; 1731 1732 return 0; 1733 } 1734 1735 static const struct net_device_ops veth_netdev_ops = { 1736 .ndo_init = veth_dev_init, 1737 .ndo_open = veth_open, 1738 .ndo_stop = veth_close, 1739 .ndo_start_xmit = veth_xmit, 1740 .ndo_get_stats64 = veth_get_stats64, 1741 .ndo_set_rx_mode = veth_set_multicast_list, 1742 .ndo_set_mac_address = eth_mac_addr, 1743 #ifdef CONFIG_NET_POLL_CONTROLLER 1744 .ndo_poll_controller = veth_poll_controller, 1745 #endif 1746 .ndo_get_iflink = veth_get_iflink, 1747 .ndo_fix_features = veth_fix_features, 1748 .ndo_set_features = veth_set_features, 1749 .ndo_features_check = passthru_features_check, 1750 .ndo_set_rx_headroom = veth_set_rx_headroom, 1751 .ndo_bpf = veth_xdp, 1752 .ndo_xdp_xmit = veth_ndo_xdp_xmit, 1753 .ndo_get_peer_dev = veth_peer_dev, 1754 }; 1755 1756 static const struct xdp_metadata_ops veth_xdp_metadata_ops = { 1757 .xmo_rx_timestamp = veth_xdp_rx_timestamp, 1758 .xmo_rx_hash = veth_xdp_rx_hash, 1759 }; 1760 1761 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1762 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1763 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 1764 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 1765 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 1766 1767 static void veth_setup(struct net_device *dev) 1768 { 1769 ether_setup(dev); 1770 1771 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1772 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1773 dev->priv_flags |= IFF_NO_QUEUE; 1774 dev->priv_flags |= IFF_PHONY_HEADROOM; 1775 1776 dev->netdev_ops = &veth_netdev_ops; 1777 dev->xdp_metadata_ops = &veth_xdp_metadata_ops; 1778 dev->ethtool_ops = &veth_ethtool_ops; 1779 dev->features |= NETIF_F_LLTX; 1780 dev->features |= VETH_FEATURES; 1781 dev->vlan_features = dev->features & 1782 ~(NETIF_F_HW_VLAN_CTAG_TX | 1783 NETIF_F_HW_VLAN_STAG_TX | 1784 NETIF_F_HW_VLAN_CTAG_RX | 1785 NETIF_F_HW_VLAN_STAG_RX); 1786 dev->needs_free_netdev = true; 1787 dev->priv_destructor = veth_dev_free; 1788 dev->max_mtu = ETH_MAX_MTU; 1789 1790 dev->hw_features = VETH_FEATURES; 1791 dev->hw_enc_features = VETH_FEATURES; 1792 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1793 netif_set_tso_max_size(dev, GSO_MAX_SIZE); 1794 } 1795 1796 /* 1797 * netlink interface 1798 */ 1799 1800 static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1801 struct netlink_ext_ack *extack) 1802 { 1803 if (tb[IFLA_ADDRESS]) { 1804 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1805 return -EINVAL; 1806 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1807 return -EADDRNOTAVAIL; 1808 } 1809 if (tb[IFLA_MTU]) { 1810 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 1811 return -EINVAL; 1812 } 1813 return 0; 1814 } 1815 1816 static struct rtnl_link_ops veth_link_ops; 1817 1818 static void veth_disable_gro(struct net_device *dev) 1819 { 1820 dev->features &= ~NETIF_F_GRO; 1821 dev->wanted_features &= ~NETIF_F_GRO; 1822 netdev_update_features(dev); 1823 } 1824 1825 static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) 1826 { 1827 int err; 1828 1829 if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { 1830 err = netif_set_real_num_tx_queues(dev, 1); 1831 if (err) 1832 return err; 1833 } 1834 if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { 1835 err = netif_set_real_num_rx_queues(dev, 1); 1836 if (err) 1837 return err; 1838 } 1839 return 0; 1840 } 1841 1842 static int veth_newlink(struct net *src_net, struct net_device *dev, 1843 struct nlattr *tb[], struct nlattr *data[], 1844 struct netlink_ext_ack *extack) 1845 { 1846 int err; 1847 struct net_device *peer; 1848 struct veth_priv *priv; 1849 char ifname[IFNAMSIZ]; 1850 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 1851 unsigned char name_assign_type; 1852 struct ifinfomsg *ifmp; 1853 struct net *net; 1854 1855 /* 1856 * create and register peer first 1857 */ 1858 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1859 struct nlattr *nla_peer; 1860 1861 nla_peer = data[VETH_INFO_PEER]; 1862 ifmp = nla_data(nla_peer); 1863 err = rtnl_nla_parse_ifla(peer_tb, 1864 nla_data(nla_peer) + sizeof(struct ifinfomsg), 1865 nla_len(nla_peer) - sizeof(struct ifinfomsg), 1866 NULL); 1867 if (err < 0) 1868 return err; 1869 1870 err = veth_validate(peer_tb, NULL, extack); 1871 if (err < 0) 1872 return err; 1873 1874 tbp = peer_tb; 1875 } else { 1876 ifmp = NULL; 1877 tbp = tb; 1878 } 1879 1880 if (ifmp && tbp[IFLA_IFNAME]) { 1881 nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 1882 name_assign_type = NET_NAME_USER; 1883 } else { 1884 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 1885 name_assign_type = NET_NAME_ENUM; 1886 } 1887 1888 net = rtnl_link_get_net(src_net, tbp); 1889 if (IS_ERR(net)) 1890 return PTR_ERR(net); 1891 1892 peer = rtnl_create_link(net, ifname, name_assign_type, 1893 &veth_link_ops, tbp, extack); 1894 if (IS_ERR(peer)) { 1895 put_net(net); 1896 return PTR_ERR(peer); 1897 } 1898 1899 if (!ifmp || !tbp[IFLA_ADDRESS]) 1900 eth_hw_addr_random(peer); 1901 1902 if (ifmp && (dev->ifindex != 0)) 1903 peer->ifindex = ifmp->ifi_index; 1904 1905 netif_inherit_tso_max(peer, dev); 1906 1907 err = register_netdevice(peer); 1908 put_net(net); 1909 net = NULL; 1910 if (err < 0) 1911 goto err_register_peer; 1912 1913 /* keep GRO disabled by default to be consistent with the established 1914 * veth behavior 1915 */ 1916 veth_disable_gro(peer); 1917 netif_carrier_off(peer); 1918 1919 err = rtnl_configure_link(peer, ifmp, 0, NULL); 1920 if (err < 0) 1921 goto err_configure_peer; 1922 1923 /* 1924 * register dev last 1925 * 1926 * note, that since we've registered new device the dev's name 1927 * should be re-allocated 1928 */ 1929 1930 if (tb[IFLA_ADDRESS] == NULL) 1931 eth_hw_addr_random(dev); 1932 1933 if (tb[IFLA_IFNAME]) 1934 nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 1935 else 1936 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 1937 1938 err = register_netdevice(dev); 1939 if (err < 0) 1940 goto err_register_dev; 1941 1942 netif_carrier_off(dev); 1943 1944 /* 1945 * tie the deviced together 1946 */ 1947 1948 priv = netdev_priv(dev); 1949 rcu_assign_pointer(priv->peer, peer); 1950 err = veth_init_queues(dev, tb); 1951 if (err) 1952 goto err_queues; 1953 1954 priv = netdev_priv(peer); 1955 rcu_assign_pointer(priv->peer, dev); 1956 err = veth_init_queues(peer, tb); 1957 if (err) 1958 goto err_queues; 1959 1960 veth_disable_gro(dev); 1961 /* update XDP supported features */ 1962 veth_set_xdp_features(dev); 1963 veth_set_xdp_features(peer); 1964 1965 return 0; 1966 1967 err_queues: 1968 unregister_netdevice(dev); 1969 err_register_dev: 1970 /* nothing to do */ 1971 err_configure_peer: 1972 unregister_netdevice(peer); 1973 return err; 1974 1975 err_register_peer: 1976 free_netdev(peer); 1977 return err; 1978 } 1979 1980 static void veth_dellink(struct net_device *dev, struct list_head *head) 1981 { 1982 struct veth_priv *priv; 1983 struct net_device *peer; 1984 1985 priv = netdev_priv(dev); 1986 peer = rtnl_dereference(priv->peer); 1987 1988 /* Note : dellink() is called from default_device_exit_batch(), 1989 * before a rcu_synchronize() point. The devices are guaranteed 1990 * not being freed before one RCU grace period. 1991 */ 1992 RCU_INIT_POINTER(priv->peer, NULL); 1993 unregister_netdevice_queue(dev, head); 1994 1995 if (peer) { 1996 priv = netdev_priv(peer); 1997 RCU_INIT_POINTER(priv->peer, NULL); 1998 unregister_netdevice_queue(peer, head); 1999 } 2000 } 2001 2002 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 2003 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 2004 }; 2005 2006 static struct net *veth_get_link_net(const struct net_device *dev) 2007 { 2008 struct veth_priv *priv = netdev_priv(dev); 2009 struct net_device *peer = rtnl_dereference(priv->peer); 2010 2011 return peer ? dev_net(peer) : dev_net(dev); 2012 } 2013 2014 static unsigned int veth_get_num_queues(void) 2015 { 2016 /* enforce the same queue limit as rtnl_create_link */ 2017 int queues = num_possible_cpus(); 2018 2019 if (queues > 4096) 2020 queues = 4096; 2021 return queues; 2022 } 2023 2024 static struct rtnl_link_ops veth_link_ops = { 2025 .kind = DRV_NAME, 2026 .priv_size = sizeof(struct veth_priv), 2027 .setup = veth_setup, 2028 .validate = veth_validate, 2029 .newlink = veth_newlink, 2030 .dellink = veth_dellink, 2031 .policy = veth_policy, 2032 .maxtype = VETH_INFO_MAX, 2033 .get_link_net = veth_get_link_net, 2034 .get_num_tx_queues = veth_get_num_queues, 2035 .get_num_rx_queues = veth_get_num_queues, 2036 }; 2037 2038 /* 2039 * init/fini 2040 */ 2041 2042 static __init int veth_init(void) 2043 { 2044 return rtnl_link_register(&veth_link_ops); 2045 } 2046 2047 static __exit void veth_exit(void) 2048 { 2049 rtnl_link_unregister(&veth_link_ops); 2050 } 2051 2052 module_init(veth_init); 2053 module_exit(veth_exit); 2054 2055 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 2056 MODULE_LICENSE("GPL v2"); 2057 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 2058