1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, see <http://www.gnu.org/licenses/>. 15 * 16 * Authors: 17 * Haiyang Zhang <haiyangz@microsoft.com> 18 * Hank Janssen <hjanssen@microsoft.com> 19 */ 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/init.h> 23 #include <linux/atomic.h> 24 #include <linux/module.h> 25 #include <linux/highmem.h> 26 #include <linux/device.h> 27 #include <linux/io.h> 28 #include <linux/delay.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/etherdevice.h> 32 #include <linux/skbuff.h> 33 #include <linux/if_vlan.h> 34 #include <linux/in.h> 35 #include <linux/slab.h> 36 #include <net/arp.h> 37 #include <net/route.h> 38 #include <net/sock.h> 39 #include <net/pkt_sched.h> 40 #include <net/checksum.h> 41 #include <net/ip6_checksum.h> 42 43 #include "hyperv_net.h" 44 45 #define RING_SIZE_MIN 64 46 #define LINKCHANGE_INT (2 * HZ) 47 48 static int ring_size = 128; 49 module_param(ring_size, int, S_IRUGO); 50 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); 51 52 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | 53 NETIF_MSG_LINK | NETIF_MSG_IFUP | 54 NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | 55 NETIF_MSG_TX_ERR; 56 57 static int debug = -1; 58 module_param(debug, int, S_IRUGO); 59 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 60 61 static void netvsc_set_multicast_list(struct net_device *net) 62 { 63 struct net_device_context *net_device_ctx = netdev_priv(net); 64 struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 65 66 rndis_filter_update(nvdev); 67 } 68 69 static int netvsc_open(struct net_device *net) 70 { 71 struct net_device_context *ndev_ctx = netdev_priv(net); 72 struct netvsc_device *nvdev = ndev_ctx->nvdev; 73 struct rndis_device *rdev; 74 int ret = 0; 75 76 netif_carrier_off(net); 77 78 /* Open up the device */ 79 ret = rndis_filter_open(nvdev); 80 if (ret != 0) { 81 netdev_err(net, "unable to open device (ret %d).\n", ret); 82 return ret; 83 } 84 85 netif_tx_wake_all_queues(net); 86 87 rdev = nvdev->extension; 88 if (!rdev->link_state && !ndev_ctx->datapath) 89 netif_carrier_on(net); 90 91 return ret; 92 } 93 94 static int netvsc_close(struct net_device *net) 95 { 96 struct net_device_context *net_device_ctx = netdev_priv(net); 97 struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 98 int ret; 99 u32 aread, i, msec = 10, retry = 0, retry_max = 20; 100 struct vmbus_channel *chn; 101 102 netif_tx_disable(net); 103 104 ret = rndis_filter_close(nvdev); 105 if (ret != 0) { 106 netdev_err(net, "unable to close device (ret %d).\n", ret); 107 return ret; 108 } 109 110 /* Ensure pending bytes in ring are read */ 111 while (true) { 112 aread = 0; 113 for (i = 0; i < nvdev->num_chn; i++) { 114 chn = nvdev->chan_table[i].channel; 115 if (!chn) 116 continue; 117 118 aread = hv_get_bytes_to_read(&chn->inbound); 119 if (aread) 120 break; 121 122 aread = hv_get_bytes_to_read(&chn->outbound); 123 if (aread) 124 break; 125 } 126 127 retry++; 128 if (retry > retry_max || aread == 0) 129 break; 130 131 msleep(msec); 132 133 if (msec < 1000) 134 msec *= 2; 135 } 136 137 if (aread) { 138 netdev_err(net, "Ring buffer not empty after closing rndis\n"); 139 ret = -ETIMEDOUT; 140 } 141 142 return ret; 143 } 144 145 static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, 146 int pkt_type) 147 { 148 struct rndis_packet *rndis_pkt; 149 struct rndis_per_packet_info *ppi; 150 151 rndis_pkt = &msg->msg.pkt; 152 rndis_pkt->data_offset += ppi_size; 153 154 ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + 155 rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); 156 157 ppi->size = ppi_size; 158 ppi->type = pkt_type; 159 ppi->ppi_offset = sizeof(struct rndis_per_packet_info); 160 161 rndis_pkt->per_pkt_info_len += ppi_size; 162 163 return ppi; 164 } 165 166 /* Azure hosts don't support non-TCP port numbers in hashing yet. We compute 167 * hash for non-TCP traffic with only IP numbers. 168 */ 169 static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk) 170 { 171 struct flow_keys flow; 172 u32 hash; 173 static u32 hashrnd __read_mostly; 174 175 net_get_random_once(&hashrnd, sizeof(hashrnd)); 176 177 if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) 178 return 0; 179 180 if (flow.basic.ip_proto == IPPROTO_TCP) { 181 return skb_get_hash(skb); 182 } else { 183 if (flow.basic.n_proto == htons(ETH_P_IP)) 184 hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd); 185 else if (flow.basic.n_proto == htons(ETH_P_IPV6)) 186 hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); 187 else 188 hash = 0; 189 190 skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); 191 } 192 193 return hash; 194 } 195 196 static inline int netvsc_get_tx_queue(struct net_device *ndev, 197 struct sk_buff *skb, int old_idx) 198 { 199 const struct net_device_context *ndc = netdev_priv(ndev); 200 struct sock *sk = skb->sk; 201 int q_idx; 202 203 q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) & 204 (VRSS_SEND_TAB_SIZE - 1)]; 205 206 /* If queue index changed record the new value */ 207 if (q_idx != old_idx && 208 sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) 209 sk_tx_queue_set(sk, q_idx); 210 211 return q_idx; 212 } 213 214 /* 215 * Select queue for transmit. 216 * 217 * If a valid queue has already been assigned, then use that. 218 * Otherwise compute tx queue based on hash and the send table. 219 * 220 * This is basically similar to default (__netdev_pick_tx) with the added step 221 * of using the host send_table when no other queue has been assigned. 222 * 223 * TODO support XPS - but get_xps_queue not exported 224 */ 225 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, 226 void *accel_priv, select_queue_fallback_t fallback) 227 { 228 unsigned int num_tx_queues = ndev->real_num_tx_queues; 229 int q_idx = sk_tx_queue_get(skb->sk); 230 231 if (q_idx < 0 || skb->ooo_okay) { 232 /* If forwarding a packet, we use the recorded queue when 233 * available for better cache locality. 234 */ 235 if (skb_rx_queue_recorded(skb)) 236 q_idx = skb_get_rx_queue(skb); 237 else 238 q_idx = netvsc_get_tx_queue(ndev, skb, q_idx); 239 } 240 241 while (unlikely(q_idx >= num_tx_queues)) 242 q_idx -= num_tx_queues; 243 244 return q_idx; 245 } 246 247 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, 248 struct hv_page_buffer *pb) 249 { 250 int j = 0; 251 252 /* Deal with compund pages by ignoring unused part 253 * of the page. 254 */ 255 page += (offset >> PAGE_SHIFT); 256 offset &= ~PAGE_MASK; 257 258 while (len > 0) { 259 unsigned long bytes; 260 261 bytes = PAGE_SIZE - offset; 262 if (bytes > len) 263 bytes = len; 264 pb[j].pfn = page_to_pfn(page); 265 pb[j].offset = offset; 266 pb[j].len = bytes; 267 268 offset += bytes; 269 len -= bytes; 270 271 if (offset == PAGE_SIZE && len) { 272 page++; 273 offset = 0; 274 j++; 275 } 276 } 277 278 return j + 1; 279 } 280 281 static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, 282 struct hv_netvsc_packet *packet, 283 struct hv_page_buffer **page_buf) 284 { 285 struct hv_page_buffer *pb = *page_buf; 286 u32 slots_used = 0; 287 char *data = skb->data; 288 int frags = skb_shinfo(skb)->nr_frags; 289 int i; 290 291 /* The packet is laid out thus: 292 * 1. hdr: RNDIS header and PPI 293 * 2. skb linear data 294 * 3. skb fragment data 295 */ 296 if (hdr != NULL) 297 slots_used += fill_pg_buf(virt_to_page(hdr), 298 offset_in_page(hdr), 299 len, &pb[slots_used]); 300 301 packet->rmsg_size = len; 302 packet->rmsg_pgcnt = slots_used; 303 304 slots_used += fill_pg_buf(virt_to_page(data), 305 offset_in_page(data), 306 skb_headlen(skb), &pb[slots_used]); 307 308 for (i = 0; i < frags; i++) { 309 skb_frag_t *frag = skb_shinfo(skb)->frags + i; 310 311 slots_used += fill_pg_buf(skb_frag_page(frag), 312 frag->page_offset, 313 skb_frag_size(frag), &pb[slots_used]); 314 } 315 return slots_used; 316 } 317 318 /* Estimate number of page buffers neede to transmit 319 * Need at most 2 for RNDIS header plus skb body and fragments. 320 */ 321 static unsigned int netvsc_get_slots(const struct sk_buff *skb) 322 { 323 return PFN_UP(offset_in_page(skb->data) + skb_headlen(skb)) 324 + skb_shinfo(skb)->nr_frags 325 + 2; 326 } 327 328 static u32 net_checksum_info(struct sk_buff *skb) 329 { 330 if (skb->protocol == htons(ETH_P_IP)) { 331 struct iphdr *ip = ip_hdr(skb); 332 333 if (ip->protocol == IPPROTO_TCP) 334 return TRANSPORT_INFO_IPV4_TCP; 335 else if (ip->protocol == IPPROTO_UDP) 336 return TRANSPORT_INFO_IPV4_UDP; 337 } else { 338 struct ipv6hdr *ip6 = ipv6_hdr(skb); 339 340 if (ip6->nexthdr == IPPROTO_TCP) 341 return TRANSPORT_INFO_IPV6_TCP; 342 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) 343 return TRANSPORT_INFO_IPV6_UDP; 344 } 345 346 return TRANSPORT_INFO_NOT_IP; 347 } 348 349 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) 350 { 351 struct net_device_context *net_device_ctx = netdev_priv(net); 352 struct hv_netvsc_packet *packet = NULL; 353 int ret; 354 unsigned int num_data_pgs; 355 struct rndis_message *rndis_msg; 356 struct rndis_packet *rndis_pkt; 357 u32 rndis_msg_size; 358 struct rndis_per_packet_info *ppi; 359 u32 hash; 360 struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; 361 struct hv_page_buffer *pb = page_buf; 362 363 /* We can only transmit MAX_PAGE_BUFFER_COUNT number 364 * of pages in a single packet. If skb is scattered around 365 * more pages we try linearizing it. 366 */ 367 num_data_pgs = netvsc_get_slots(skb); 368 if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { 369 ++net_device_ctx->eth_stats.tx_scattered; 370 371 if (skb_linearize(skb)) 372 goto no_memory; 373 374 num_data_pgs = netvsc_get_slots(skb); 375 if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { 376 ++net_device_ctx->eth_stats.tx_too_big; 377 goto drop; 378 } 379 } 380 381 /* 382 * Place the rndis header in the skb head room and 383 * the skb->cb will be used for hv_netvsc_packet 384 * structure. 385 */ 386 ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); 387 if (ret) 388 goto no_memory; 389 390 /* Use the skb control buffer for building up the packet */ 391 BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > 392 FIELD_SIZEOF(struct sk_buff, cb)); 393 packet = (struct hv_netvsc_packet *)skb->cb; 394 395 packet->q_idx = skb_get_queue_mapping(skb); 396 397 packet->total_data_buflen = skb->len; 398 packet->total_bytes = skb->len; 399 packet->total_packets = 1; 400 401 rndis_msg = (struct rndis_message *)skb->head; 402 403 memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); 404 405 /* Add the rndis header */ 406 rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; 407 rndis_msg->msg_len = packet->total_data_buflen; 408 rndis_pkt = &rndis_msg->msg.pkt; 409 rndis_pkt->data_offset = sizeof(struct rndis_packet); 410 rndis_pkt->data_len = packet->total_data_buflen; 411 rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); 412 413 rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); 414 415 hash = skb_get_hash_raw(skb); 416 if (hash != 0 && net->real_num_tx_queues > 1) { 417 rndis_msg_size += NDIS_HASH_PPI_SIZE; 418 ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, 419 NBL_HASH_VALUE); 420 *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; 421 } 422 423 if (skb_vlan_tag_present(skb)) { 424 struct ndis_pkt_8021q_info *vlan; 425 426 rndis_msg_size += NDIS_VLAN_PPI_SIZE; 427 ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, 428 IEEE_8021Q_INFO); 429 vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + 430 ppi->ppi_offset); 431 vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; 432 vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> 433 VLAN_PRIO_SHIFT; 434 } 435 436 if (skb_is_gso(skb)) { 437 struct ndis_tcp_lso_info *lso_info; 438 439 rndis_msg_size += NDIS_LSO_PPI_SIZE; 440 ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, 441 TCP_LARGESEND_PKTINFO); 442 443 lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + 444 ppi->ppi_offset); 445 446 lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 447 if (skb->protocol == htons(ETH_P_IP)) { 448 lso_info->lso_v2_transmit.ip_version = 449 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 450 ip_hdr(skb)->tot_len = 0; 451 ip_hdr(skb)->check = 0; 452 tcp_hdr(skb)->check = 453 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 454 ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 455 } else { 456 lso_info->lso_v2_transmit.ip_version = 457 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 458 ipv6_hdr(skb)->payload_len = 0; 459 tcp_hdr(skb)->check = 460 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 461 &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 462 } 463 lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb); 464 lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; 465 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 466 if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) { 467 struct ndis_tcp_ip_checksum_info *csum_info; 468 469 rndis_msg_size += NDIS_CSUM_PPI_SIZE; 470 ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, 471 TCPIP_CHKSUM_PKTINFO); 472 473 csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + 474 ppi->ppi_offset); 475 476 csum_info->transmit.tcp_header_offset = skb_transport_offset(skb); 477 478 if (skb->protocol == htons(ETH_P_IP)) { 479 csum_info->transmit.is_ipv4 = 1; 480 481 if (ip_hdr(skb)->protocol == IPPROTO_TCP) 482 csum_info->transmit.tcp_checksum = 1; 483 else 484 csum_info->transmit.udp_checksum = 1; 485 } else { 486 csum_info->transmit.is_ipv6 = 1; 487 488 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) 489 csum_info->transmit.tcp_checksum = 1; 490 else 491 csum_info->transmit.udp_checksum = 1; 492 } 493 } else { 494 /* Can't do offload of this type of checksum */ 495 if (skb_checksum_help(skb)) 496 goto drop; 497 } 498 } 499 500 /* Start filling in the page buffers with the rndis hdr */ 501 rndis_msg->msg_len += rndis_msg_size; 502 packet->total_data_buflen = rndis_msg->msg_len; 503 packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, 504 skb, packet, &pb); 505 506 /* timestamp packet in software */ 507 skb_tx_timestamp(skb); 508 ret = netvsc_send(net_device_ctx->device_ctx, packet, 509 rndis_msg, &pb, skb); 510 if (likely(ret == 0)) 511 return NETDEV_TX_OK; 512 513 if (ret == -EAGAIN) { 514 ++net_device_ctx->eth_stats.tx_busy; 515 return NETDEV_TX_BUSY; 516 } 517 518 if (ret == -ENOSPC) 519 ++net_device_ctx->eth_stats.tx_no_space; 520 521 drop: 522 dev_kfree_skb_any(skb); 523 net->stats.tx_dropped++; 524 525 return NETDEV_TX_OK; 526 527 no_memory: 528 ++net_device_ctx->eth_stats.tx_no_memory; 529 goto drop; 530 } 531 /* 532 * netvsc_linkstatus_callback - Link up/down notification 533 */ 534 void netvsc_linkstatus_callback(struct hv_device *device_obj, 535 struct rndis_message *resp) 536 { 537 struct rndis_indicate_status *indicate = &resp->msg.indicate_status; 538 struct net_device *net; 539 struct net_device_context *ndev_ctx; 540 struct netvsc_reconfig *event; 541 unsigned long flags; 542 543 net = hv_get_drvdata(device_obj); 544 545 if (!net) 546 return; 547 548 ndev_ctx = netdev_priv(net); 549 550 /* Update the physical link speed when changing to another vSwitch */ 551 if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { 552 u32 speed; 553 554 speed = *(u32 *)((void *)indicate + indicate-> 555 status_buf_offset) / 10000; 556 ndev_ctx->speed = speed; 557 return; 558 } 559 560 /* Handle these link change statuses below */ 561 if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE && 562 indicate->status != RNDIS_STATUS_MEDIA_CONNECT && 563 indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT) 564 return; 565 566 if (net->reg_state != NETREG_REGISTERED) 567 return; 568 569 event = kzalloc(sizeof(*event), GFP_ATOMIC); 570 if (!event) 571 return; 572 event->event = indicate->status; 573 574 spin_lock_irqsave(&ndev_ctx->lock, flags); 575 list_add_tail(&event->list, &ndev_ctx->reconfig_events); 576 spin_unlock_irqrestore(&ndev_ctx->lock, flags); 577 578 schedule_delayed_work(&ndev_ctx->dwork, 0); 579 } 580 581 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, 582 struct napi_struct *napi, 583 const struct ndis_tcp_ip_checksum_info *csum_info, 584 const struct ndis_pkt_8021q_info *vlan, 585 void *data, u32 buflen) 586 { 587 struct sk_buff *skb; 588 589 skb = napi_alloc_skb(napi, buflen); 590 if (!skb) 591 return skb; 592 593 /* 594 * Copy to skb. This copy is needed here since the memory pointed by 595 * hv_netvsc_packet cannot be deallocated 596 */ 597 skb_put_data(skb, data, buflen); 598 599 skb->protocol = eth_type_trans(skb, net); 600 601 /* skb is already created with CHECKSUM_NONE */ 602 skb_checksum_none_assert(skb); 603 604 /* 605 * In Linux, the IP checksum is always checked. 606 * Do L4 checksum offload if enabled and present. 607 */ 608 if (csum_info && (net->features & NETIF_F_RXCSUM)) { 609 if (csum_info->receive.tcp_checksum_succeeded || 610 csum_info->receive.udp_checksum_succeeded) 611 skb->ip_summed = CHECKSUM_UNNECESSARY; 612 } 613 614 if (vlan) { 615 u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT); 616 617 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 618 vlan_tci); 619 } 620 621 return skb; 622 } 623 624 /* 625 * netvsc_recv_callback - Callback when we receive a packet from the 626 * "wire" on the specified device. 627 */ 628 int netvsc_recv_callback(struct net_device *net, 629 struct vmbus_channel *channel, 630 void *data, u32 len, 631 const struct ndis_tcp_ip_checksum_info *csum_info, 632 const struct ndis_pkt_8021q_info *vlan) 633 { 634 struct net_device_context *net_device_ctx = netdev_priv(net); 635 struct netvsc_device *net_device; 636 u16 q_idx = channel->offermsg.offer.sub_channel_index; 637 struct netvsc_channel *nvchan; 638 struct net_device *vf_netdev; 639 struct sk_buff *skb; 640 struct netvsc_stats *rx_stats; 641 642 if (net->reg_state != NETREG_REGISTERED) 643 return NVSP_STAT_FAIL; 644 645 /* 646 * If necessary, inject this packet into the VF interface. 647 * On Hyper-V, multicast and brodcast packets are only delivered 648 * to the synthetic interface (after subjecting these to 649 * policy filters on the host). Deliver these via the VF 650 * interface in the guest. 651 */ 652 rcu_read_lock(); 653 net_device = rcu_dereference(net_device_ctx->nvdev); 654 if (unlikely(!net_device)) 655 goto drop; 656 657 nvchan = &net_device->chan_table[q_idx]; 658 vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); 659 if (vf_netdev && (vf_netdev->flags & IFF_UP)) 660 net = vf_netdev; 661 662 /* Allocate a skb - TODO direct I/O to pages? */ 663 skb = netvsc_alloc_recv_skb(net, &nvchan->napi, 664 csum_info, vlan, data, len); 665 if (unlikely(!skb)) { 666 drop: 667 ++net->stats.rx_dropped; 668 rcu_read_unlock(); 669 return NVSP_STAT_FAIL; 670 } 671 672 if (net != vf_netdev) 673 skb_record_rx_queue(skb, q_idx); 674 675 /* 676 * Even if injecting the packet, record the statistics 677 * on the synthetic device because modifying the VF device 678 * statistics will not work correctly. 679 */ 680 rx_stats = &nvchan->rx_stats; 681 u64_stats_update_begin(&rx_stats->syncp); 682 rx_stats->packets++; 683 rx_stats->bytes += len; 684 685 if (skb->pkt_type == PACKET_BROADCAST) 686 ++rx_stats->broadcast; 687 else if (skb->pkt_type == PACKET_MULTICAST) 688 ++rx_stats->multicast; 689 u64_stats_update_end(&rx_stats->syncp); 690 691 napi_gro_receive(&nvchan->napi, skb); 692 rcu_read_unlock(); 693 694 return 0; 695 } 696 697 static void netvsc_get_drvinfo(struct net_device *net, 698 struct ethtool_drvinfo *info) 699 { 700 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 701 strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); 702 } 703 704 static void netvsc_get_channels(struct net_device *net, 705 struct ethtool_channels *channel) 706 { 707 struct net_device_context *net_device_ctx = netdev_priv(net); 708 struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 709 710 if (nvdev) { 711 channel->max_combined = nvdev->max_chn; 712 channel->combined_count = nvdev->num_chn; 713 } 714 } 715 716 static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, 717 u32 num_chn) 718 { 719 struct netvsc_device_info device_info; 720 int ret; 721 722 memset(&device_info, 0, sizeof(device_info)); 723 device_info.num_chn = num_chn; 724 device_info.ring_size = ring_size; 725 device_info.max_num_vrss_chns = num_chn; 726 727 ret = rndis_filter_device_add(dev, &device_info); 728 if (ret) 729 return ret; 730 731 ret = netif_set_real_num_tx_queues(net, num_chn); 732 if (ret) 733 return ret; 734 735 ret = netif_set_real_num_rx_queues(net, num_chn); 736 737 return ret; 738 } 739 740 static int netvsc_set_channels(struct net_device *net, 741 struct ethtool_channels *channels) 742 { 743 struct net_device_context *net_device_ctx = netdev_priv(net); 744 struct hv_device *dev = net_device_ctx->device_ctx; 745 struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 746 unsigned int count = channels->combined_count; 747 bool was_running; 748 int ret; 749 750 /* We do not support separate count for rx, tx, or other */ 751 if (count == 0 || 752 channels->rx_count || channels->tx_count || channels->other_count) 753 return -EINVAL; 754 755 if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX) 756 return -EINVAL; 757 758 if (!nvdev || nvdev->destroy) 759 return -ENODEV; 760 761 if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) 762 return -EINVAL; 763 764 if (count > nvdev->max_chn) 765 return -EINVAL; 766 767 was_running = netif_running(net); 768 if (was_running) { 769 ret = netvsc_close(net); 770 if (ret) 771 return ret; 772 } 773 774 rndis_filter_device_remove(dev, nvdev); 775 776 ret = netvsc_set_queues(net, dev, count); 777 if (ret == 0) 778 nvdev->num_chn = count; 779 else 780 netvsc_set_queues(net, dev, nvdev->num_chn); 781 782 if (was_running) 783 ret = netvsc_open(net); 784 785 /* We may have missed link change notifications */ 786 schedule_delayed_work(&net_device_ctx->dwork, 0); 787 788 return ret; 789 } 790 791 static bool 792 netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd) 793 { 794 struct ethtool_link_ksettings diff1 = *cmd; 795 struct ethtool_link_ksettings diff2 = {}; 796 797 diff1.base.speed = 0; 798 diff1.base.duplex = 0; 799 /* advertising and cmd are usually set */ 800 ethtool_link_ksettings_zero_link_mode(&diff1, advertising); 801 diff1.base.cmd = 0; 802 /* We set port to PORT_OTHER */ 803 diff2.base.port = PORT_OTHER; 804 805 return !memcmp(&diff1, &diff2, sizeof(diff1)); 806 } 807 808 static void netvsc_init_settings(struct net_device *dev) 809 { 810 struct net_device_context *ndc = netdev_priv(dev); 811 812 ndc->speed = SPEED_UNKNOWN; 813 ndc->duplex = DUPLEX_FULL; 814 } 815 816 static int netvsc_get_link_ksettings(struct net_device *dev, 817 struct ethtool_link_ksettings *cmd) 818 { 819 struct net_device_context *ndc = netdev_priv(dev); 820 821 cmd->base.speed = ndc->speed; 822 cmd->base.duplex = ndc->duplex; 823 cmd->base.port = PORT_OTHER; 824 825 return 0; 826 } 827 828 static int netvsc_set_link_ksettings(struct net_device *dev, 829 const struct ethtool_link_ksettings *cmd) 830 { 831 struct net_device_context *ndc = netdev_priv(dev); 832 u32 speed; 833 834 speed = cmd->base.speed; 835 if (!ethtool_validate_speed(speed) || 836 !ethtool_validate_duplex(cmd->base.duplex) || 837 !netvsc_validate_ethtool_ss_cmd(cmd)) 838 return -EINVAL; 839 840 ndc->speed = speed; 841 ndc->duplex = cmd->base.duplex; 842 843 return 0; 844 } 845 846 static int netvsc_change_mtu(struct net_device *ndev, int mtu) 847 { 848 struct net_device_context *ndevctx = netdev_priv(ndev); 849 struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); 850 struct hv_device *hdev = ndevctx->device_ctx; 851 struct netvsc_device_info device_info; 852 bool was_running; 853 int ret = 0; 854 855 if (!nvdev || nvdev->destroy) 856 return -ENODEV; 857 858 was_running = netif_running(ndev); 859 if (was_running) { 860 ret = netvsc_close(ndev); 861 if (ret) 862 return ret; 863 } 864 865 memset(&device_info, 0, sizeof(device_info)); 866 device_info.ring_size = ring_size; 867 device_info.num_chn = nvdev->num_chn; 868 device_info.max_num_vrss_chns = nvdev->num_chn; 869 870 rndis_filter_device_remove(hdev, nvdev); 871 872 /* 'nvdev' has been freed in rndis_filter_device_remove() -> 873 * netvsc_device_remove () -> free_netvsc_device(). 874 * We mustn't access it before it's re-created in 875 * rndis_filter_device_add() -> netvsc_device_add(). 876 */ 877 878 ndev->mtu = mtu; 879 880 rndis_filter_device_add(hdev, &device_info); 881 882 if (was_running) 883 ret = netvsc_open(ndev); 884 885 /* We may have missed link change notifications */ 886 schedule_delayed_work(&ndevctx->dwork, 0); 887 888 return ret; 889 } 890 891 static void netvsc_get_stats64(struct net_device *net, 892 struct rtnl_link_stats64 *t) 893 { 894 struct net_device_context *ndev_ctx = netdev_priv(net); 895 struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); 896 int i; 897 898 if (!nvdev) 899 return; 900 901 for (i = 0; i < nvdev->num_chn; i++) { 902 const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; 903 const struct netvsc_stats *stats; 904 u64 packets, bytes, multicast; 905 unsigned int start; 906 907 stats = &nvchan->tx_stats; 908 do { 909 start = u64_stats_fetch_begin_irq(&stats->syncp); 910 packets = stats->packets; 911 bytes = stats->bytes; 912 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 913 914 t->tx_bytes += bytes; 915 t->tx_packets += packets; 916 917 stats = &nvchan->rx_stats; 918 do { 919 start = u64_stats_fetch_begin_irq(&stats->syncp); 920 packets = stats->packets; 921 bytes = stats->bytes; 922 multicast = stats->multicast + stats->broadcast; 923 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 924 925 t->rx_bytes += bytes; 926 t->rx_packets += packets; 927 t->multicast += multicast; 928 } 929 930 t->tx_dropped = net->stats.tx_dropped; 931 t->tx_errors = net->stats.tx_errors; 932 933 t->rx_dropped = net->stats.rx_dropped; 934 t->rx_errors = net->stats.rx_errors; 935 } 936 937 static int netvsc_set_mac_addr(struct net_device *ndev, void *p) 938 { 939 struct sockaddr *addr = p; 940 char save_adr[ETH_ALEN]; 941 unsigned char save_aatype; 942 int err; 943 944 memcpy(save_adr, ndev->dev_addr, ETH_ALEN); 945 save_aatype = ndev->addr_assign_type; 946 947 err = eth_mac_addr(ndev, p); 948 if (err != 0) 949 return err; 950 951 err = rndis_filter_set_device_mac(ndev, addr->sa_data); 952 if (err != 0) { 953 /* roll back to saved MAC */ 954 memcpy(ndev->dev_addr, save_adr, ETH_ALEN); 955 ndev->addr_assign_type = save_aatype; 956 } 957 958 return err; 959 } 960 961 static const struct { 962 char name[ETH_GSTRING_LEN]; 963 u16 offset; 964 } netvsc_stats[] = { 965 { "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) }, 966 { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) }, 967 { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) }, 968 { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) }, 969 { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, 970 }; 971 972 #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats) 973 974 /* 4 statistics per queue (rx/tx packets/bytes) */ 975 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) 976 977 static int netvsc_get_sset_count(struct net_device *dev, int string_set) 978 { 979 struct net_device_context *ndc = netdev_priv(dev); 980 struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); 981 982 if (!nvdev) 983 return -ENODEV; 984 985 switch (string_set) { 986 case ETH_SS_STATS: 987 return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev); 988 default: 989 return -EINVAL; 990 } 991 } 992 993 static void netvsc_get_ethtool_stats(struct net_device *dev, 994 struct ethtool_stats *stats, u64 *data) 995 { 996 struct net_device_context *ndc = netdev_priv(dev); 997 struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); 998 const void *nds = &ndc->eth_stats; 999 const struct netvsc_stats *qstats; 1000 unsigned int start; 1001 u64 packets, bytes; 1002 int i, j; 1003 1004 if (!nvdev) 1005 return; 1006 1007 for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++) 1008 data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); 1009 1010 for (j = 0; j < nvdev->num_chn; j++) { 1011 qstats = &nvdev->chan_table[j].tx_stats; 1012 1013 do { 1014 start = u64_stats_fetch_begin_irq(&qstats->syncp); 1015 packets = qstats->packets; 1016 bytes = qstats->bytes; 1017 } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); 1018 data[i++] = packets; 1019 data[i++] = bytes; 1020 1021 qstats = &nvdev->chan_table[j].rx_stats; 1022 do { 1023 start = u64_stats_fetch_begin_irq(&qstats->syncp); 1024 packets = qstats->packets; 1025 bytes = qstats->bytes; 1026 } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); 1027 data[i++] = packets; 1028 data[i++] = bytes; 1029 } 1030 } 1031 1032 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) 1033 { 1034 struct net_device_context *ndc = netdev_priv(dev); 1035 struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); 1036 u8 *p = data; 1037 int i; 1038 1039 if (!nvdev) 1040 return; 1041 1042 switch (stringset) { 1043 case ETH_SS_STATS: 1044 for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) 1045 memcpy(p + i * ETH_GSTRING_LEN, 1046 netvsc_stats[i].name, ETH_GSTRING_LEN); 1047 1048 p += i * ETH_GSTRING_LEN; 1049 for (i = 0; i < nvdev->num_chn; i++) { 1050 sprintf(p, "tx_queue_%u_packets", i); 1051 p += ETH_GSTRING_LEN; 1052 sprintf(p, "tx_queue_%u_bytes", i); 1053 p += ETH_GSTRING_LEN; 1054 sprintf(p, "rx_queue_%u_packets", i); 1055 p += ETH_GSTRING_LEN; 1056 sprintf(p, "rx_queue_%u_bytes", i); 1057 p += ETH_GSTRING_LEN; 1058 } 1059 1060 break; 1061 } 1062 } 1063 1064 static int 1065 netvsc_get_rss_hash_opts(struct netvsc_device *nvdev, 1066 struct ethtool_rxnfc *info) 1067 { 1068 info->data = RXH_IP_SRC | RXH_IP_DST; 1069 1070 switch (info->flow_type) { 1071 case TCP_V4_FLOW: 1072 case TCP_V6_FLOW: 1073 info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; 1074 /* fallthrough */ 1075 case UDP_V4_FLOW: 1076 case UDP_V6_FLOW: 1077 case IPV4_FLOW: 1078 case IPV6_FLOW: 1079 break; 1080 default: 1081 info->data = 0; 1082 break; 1083 } 1084 1085 return 0; 1086 } 1087 1088 static int 1089 netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, 1090 u32 *rules) 1091 { 1092 struct net_device_context *ndc = netdev_priv(dev); 1093 struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); 1094 1095 if (!nvdev) 1096 return -ENODEV; 1097 1098 switch (info->cmd) { 1099 case ETHTOOL_GRXRINGS: 1100 info->data = nvdev->num_chn; 1101 return 0; 1102 1103 case ETHTOOL_GRXFH: 1104 return netvsc_get_rss_hash_opts(nvdev, info); 1105 } 1106 return -EOPNOTSUPP; 1107 } 1108 1109 #ifdef CONFIG_NET_POLL_CONTROLLER 1110 static void netvsc_poll_controller(struct net_device *dev) 1111 { 1112 struct net_device_context *ndc = netdev_priv(dev); 1113 struct netvsc_device *ndev; 1114 int i; 1115 1116 rcu_read_lock(); 1117 ndev = rcu_dereference(ndc->nvdev); 1118 if (ndev) { 1119 for (i = 0; i < ndev->num_chn; i++) { 1120 struct netvsc_channel *nvchan = &ndev->chan_table[i]; 1121 1122 napi_schedule(&nvchan->napi); 1123 } 1124 } 1125 rcu_read_unlock(); 1126 } 1127 #endif 1128 1129 static u32 netvsc_get_rxfh_key_size(struct net_device *dev) 1130 { 1131 return NETVSC_HASH_KEYLEN; 1132 } 1133 1134 static u32 netvsc_rss_indir_size(struct net_device *dev) 1135 { 1136 return ITAB_NUM; 1137 } 1138 1139 static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, 1140 u8 *hfunc) 1141 { 1142 struct net_device_context *ndc = netdev_priv(dev); 1143 struct netvsc_device *ndev = rcu_dereference(ndc->nvdev); 1144 struct rndis_device *rndis_dev; 1145 int i; 1146 1147 if (!ndev) 1148 return -ENODEV; 1149 1150 if (hfunc) 1151 *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ 1152 1153 rndis_dev = ndev->extension; 1154 if (indir) { 1155 for (i = 0; i < ITAB_NUM; i++) 1156 indir[i] = rndis_dev->ind_table[i]; 1157 } 1158 1159 if (key) 1160 memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); 1161 1162 return 0; 1163 } 1164 1165 static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, 1166 const u8 *key, const u8 hfunc) 1167 { 1168 struct net_device_context *ndc = netdev_priv(dev); 1169 struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); 1170 struct rndis_device *rndis_dev; 1171 int i; 1172 1173 if (!ndev) 1174 return -ENODEV; 1175 1176 if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) 1177 return -EOPNOTSUPP; 1178 1179 rndis_dev = ndev->extension; 1180 if (indir) { 1181 for (i = 0; i < ITAB_NUM; i++) 1182 if (indir[i] >= VRSS_CHANNEL_MAX) 1183 return -EINVAL; 1184 1185 for (i = 0; i < ITAB_NUM; i++) 1186 rndis_dev->ind_table[i] = indir[i]; 1187 } 1188 1189 if (!key) { 1190 if (!indir) 1191 return 0; 1192 1193 key = rndis_dev->rss_key; 1194 } 1195 1196 return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn); 1197 } 1198 1199 static const struct ethtool_ops ethtool_ops = { 1200 .get_drvinfo = netvsc_get_drvinfo, 1201 .get_link = ethtool_op_get_link, 1202 .get_ethtool_stats = netvsc_get_ethtool_stats, 1203 .get_sset_count = netvsc_get_sset_count, 1204 .get_strings = netvsc_get_strings, 1205 .get_channels = netvsc_get_channels, 1206 .set_channels = netvsc_set_channels, 1207 .get_ts_info = ethtool_op_get_ts_info, 1208 .get_rxnfc = netvsc_get_rxnfc, 1209 .get_rxfh_key_size = netvsc_get_rxfh_key_size, 1210 .get_rxfh_indir_size = netvsc_rss_indir_size, 1211 .get_rxfh = netvsc_get_rxfh, 1212 .set_rxfh = netvsc_set_rxfh, 1213 .get_link_ksettings = netvsc_get_link_ksettings, 1214 .set_link_ksettings = netvsc_set_link_ksettings, 1215 }; 1216 1217 static const struct net_device_ops device_ops = { 1218 .ndo_open = netvsc_open, 1219 .ndo_stop = netvsc_close, 1220 .ndo_start_xmit = netvsc_start_xmit, 1221 .ndo_set_rx_mode = netvsc_set_multicast_list, 1222 .ndo_change_mtu = netvsc_change_mtu, 1223 .ndo_validate_addr = eth_validate_addr, 1224 .ndo_set_mac_address = netvsc_set_mac_addr, 1225 .ndo_select_queue = netvsc_select_queue, 1226 .ndo_get_stats64 = netvsc_get_stats64, 1227 #ifdef CONFIG_NET_POLL_CONTROLLER 1228 .ndo_poll_controller = netvsc_poll_controller, 1229 #endif 1230 }; 1231 1232 /* 1233 * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link 1234 * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is 1235 * present send GARP packet to network peers with netif_notify_peers(). 1236 */ 1237 static void netvsc_link_change(struct work_struct *w) 1238 { 1239 struct net_device_context *ndev_ctx = 1240 container_of(w, struct net_device_context, dwork.work); 1241 struct hv_device *device_obj = ndev_ctx->device_ctx; 1242 struct net_device *net = hv_get_drvdata(device_obj); 1243 struct netvsc_device *net_device; 1244 struct rndis_device *rdev; 1245 struct netvsc_reconfig *event = NULL; 1246 bool notify = false, reschedule = false; 1247 unsigned long flags, next_reconfig, delay; 1248 1249 rtnl_lock(); 1250 net_device = rtnl_dereference(ndev_ctx->nvdev); 1251 if (!net_device) 1252 goto out_unlock; 1253 1254 rdev = net_device->extension; 1255 1256 next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT; 1257 if (time_is_after_jiffies(next_reconfig)) { 1258 /* link_watch only sends one notification with current state 1259 * per second, avoid doing reconfig more frequently. Handle 1260 * wrap around. 1261 */ 1262 delay = next_reconfig - jiffies; 1263 delay = delay < LINKCHANGE_INT ? delay : LINKCHANGE_INT; 1264 schedule_delayed_work(&ndev_ctx->dwork, delay); 1265 goto out_unlock; 1266 } 1267 ndev_ctx->last_reconfig = jiffies; 1268 1269 spin_lock_irqsave(&ndev_ctx->lock, flags); 1270 if (!list_empty(&ndev_ctx->reconfig_events)) { 1271 event = list_first_entry(&ndev_ctx->reconfig_events, 1272 struct netvsc_reconfig, list); 1273 list_del(&event->list); 1274 reschedule = !list_empty(&ndev_ctx->reconfig_events); 1275 } 1276 spin_unlock_irqrestore(&ndev_ctx->lock, flags); 1277 1278 if (!event) 1279 goto out_unlock; 1280 1281 switch (event->event) { 1282 /* Only the following events are possible due to the check in 1283 * netvsc_linkstatus_callback() 1284 */ 1285 case RNDIS_STATUS_MEDIA_CONNECT: 1286 if (rdev->link_state) { 1287 rdev->link_state = false; 1288 if (!ndev_ctx->datapath) 1289 netif_carrier_on(net); 1290 netif_tx_wake_all_queues(net); 1291 } else { 1292 notify = true; 1293 } 1294 kfree(event); 1295 break; 1296 case RNDIS_STATUS_MEDIA_DISCONNECT: 1297 if (!rdev->link_state) { 1298 rdev->link_state = true; 1299 netif_carrier_off(net); 1300 netif_tx_stop_all_queues(net); 1301 } 1302 kfree(event); 1303 break; 1304 case RNDIS_STATUS_NETWORK_CHANGE: 1305 /* Only makes sense if carrier is present */ 1306 if (!rdev->link_state) { 1307 rdev->link_state = true; 1308 netif_carrier_off(net); 1309 netif_tx_stop_all_queues(net); 1310 event->event = RNDIS_STATUS_MEDIA_CONNECT; 1311 spin_lock_irqsave(&ndev_ctx->lock, flags); 1312 list_add(&event->list, &ndev_ctx->reconfig_events); 1313 spin_unlock_irqrestore(&ndev_ctx->lock, flags); 1314 reschedule = true; 1315 } 1316 break; 1317 } 1318 1319 rtnl_unlock(); 1320 1321 if (notify) 1322 netdev_notify_peers(net); 1323 1324 /* link_watch only sends one notification with current state per 1325 * second, handle next reconfig event in 2 seconds. 1326 */ 1327 if (reschedule) 1328 schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT); 1329 1330 return; 1331 1332 out_unlock: 1333 rtnl_unlock(); 1334 } 1335 1336 static struct net_device *get_netvsc_bymac(const u8 *mac) 1337 { 1338 struct net_device *dev; 1339 1340 ASSERT_RTNL(); 1341 1342 for_each_netdev(&init_net, dev) { 1343 if (dev->netdev_ops != &device_ops) 1344 continue; /* not a netvsc device */ 1345 1346 if (ether_addr_equal(mac, dev->perm_addr)) 1347 return dev; 1348 } 1349 1350 return NULL; 1351 } 1352 1353 static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) 1354 { 1355 struct net_device *dev; 1356 1357 ASSERT_RTNL(); 1358 1359 for_each_netdev(&init_net, dev) { 1360 struct net_device_context *net_device_ctx; 1361 1362 if (dev->netdev_ops != &device_ops) 1363 continue; /* not a netvsc device */ 1364 1365 net_device_ctx = netdev_priv(dev); 1366 if (net_device_ctx->nvdev == NULL) 1367 continue; /* device is removed */ 1368 1369 if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) 1370 return dev; /* a match */ 1371 } 1372 1373 return NULL; 1374 } 1375 1376 static int netvsc_register_vf(struct net_device *vf_netdev) 1377 { 1378 struct net_device *ndev; 1379 struct net_device_context *net_device_ctx; 1380 struct netvsc_device *netvsc_dev; 1381 1382 if (vf_netdev->addr_len != ETH_ALEN) 1383 return NOTIFY_DONE; 1384 1385 /* 1386 * We will use the MAC address to locate the synthetic interface to 1387 * associate with the VF interface. If we don't find a matching 1388 * synthetic interface, move on. 1389 */ 1390 ndev = get_netvsc_bymac(vf_netdev->perm_addr); 1391 if (!ndev) 1392 return NOTIFY_DONE; 1393 1394 net_device_ctx = netdev_priv(ndev); 1395 netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); 1396 if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) 1397 return NOTIFY_DONE; 1398 1399 netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); 1400 /* 1401 * Take a reference on the module. 1402 */ 1403 try_module_get(THIS_MODULE); 1404 1405 dev_hold(vf_netdev); 1406 rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); 1407 return NOTIFY_OK; 1408 } 1409 1410 static int netvsc_vf_up(struct net_device *vf_netdev) 1411 { 1412 struct net_device *ndev; 1413 struct netvsc_device *netvsc_dev; 1414 struct net_device_context *net_device_ctx; 1415 1416 ndev = get_netvsc_byref(vf_netdev); 1417 if (!ndev) 1418 return NOTIFY_DONE; 1419 1420 net_device_ctx = netdev_priv(ndev); 1421 netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); 1422 1423 netdev_info(ndev, "VF up: %s\n", vf_netdev->name); 1424 1425 /* 1426 * Open the device before switching data path. 1427 */ 1428 rndis_filter_open(netvsc_dev); 1429 1430 /* 1431 * notify the host to switch the data path. 1432 */ 1433 netvsc_switch_datapath(ndev, true); 1434 netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name); 1435 1436 netif_carrier_off(ndev); 1437 1438 /* Now notify peers through VF device. */ 1439 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev); 1440 1441 return NOTIFY_OK; 1442 } 1443 1444 static int netvsc_vf_down(struct net_device *vf_netdev) 1445 { 1446 struct net_device *ndev; 1447 struct netvsc_device *netvsc_dev; 1448 struct net_device_context *net_device_ctx; 1449 1450 ndev = get_netvsc_byref(vf_netdev); 1451 if (!ndev) 1452 return NOTIFY_DONE; 1453 1454 net_device_ctx = netdev_priv(ndev); 1455 netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); 1456 1457 netdev_info(ndev, "VF down: %s\n", vf_netdev->name); 1458 netvsc_switch_datapath(ndev, false); 1459 netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); 1460 rndis_filter_close(netvsc_dev); 1461 netif_carrier_on(ndev); 1462 1463 /* Now notify peers through netvsc device. */ 1464 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev); 1465 1466 return NOTIFY_OK; 1467 } 1468 1469 static int netvsc_unregister_vf(struct net_device *vf_netdev) 1470 { 1471 struct net_device *ndev; 1472 struct net_device_context *net_device_ctx; 1473 1474 ndev = get_netvsc_byref(vf_netdev); 1475 if (!ndev) 1476 return NOTIFY_DONE; 1477 1478 net_device_ctx = netdev_priv(ndev); 1479 1480 netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); 1481 1482 RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); 1483 dev_put(vf_netdev); 1484 module_put(THIS_MODULE); 1485 return NOTIFY_OK; 1486 } 1487 1488 static int netvsc_probe(struct hv_device *dev, 1489 const struct hv_vmbus_device_id *dev_id) 1490 { 1491 struct net_device *net = NULL; 1492 struct net_device_context *net_device_ctx; 1493 struct netvsc_device_info device_info; 1494 struct netvsc_device *nvdev; 1495 int ret; 1496 1497 net = alloc_etherdev_mq(sizeof(struct net_device_context), 1498 VRSS_CHANNEL_MAX); 1499 if (!net) 1500 return -ENOMEM; 1501 1502 netif_carrier_off(net); 1503 1504 netvsc_init_settings(net); 1505 1506 net_device_ctx = netdev_priv(net); 1507 net_device_ctx->device_ctx = dev; 1508 net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); 1509 if (netif_msg_probe(net_device_ctx)) 1510 netdev_dbg(net, "netvsc msg_enable: %d\n", 1511 net_device_ctx->msg_enable); 1512 1513 hv_set_drvdata(dev, net); 1514 1515 INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); 1516 1517 spin_lock_init(&net_device_ctx->lock); 1518 INIT_LIST_HEAD(&net_device_ctx->reconfig_events); 1519 1520 net->netdev_ops = &device_ops; 1521 net->ethtool_ops = ðtool_ops; 1522 SET_NETDEV_DEV(net, &dev->device); 1523 1524 /* We always need headroom for rndis header */ 1525 net->needed_headroom = RNDIS_AND_PPI_SIZE; 1526 1527 /* Notify the netvsc driver of the new device */ 1528 memset(&device_info, 0, sizeof(device_info)); 1529 device_info.ring_size = ring_size; 1530 device_info.num_chn = VRSS_CHANNEL_DEFAULT; 1531 ret = rndis_filter_device_add(dev, &device_info); 1532 if (ret != 0) { 1533 netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); 1534 free_netdev(net); 1535 hv_set_drvdata(dev, NULL); 1536 return ret; 1537 } 1538 memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); 1539 1540 /* hw_features computed in rndis_filter_device_add */ 1541 net->features = net->hw_features | 1542 NETIF_F_HIGHDMA | NETIF_F_SG | 1543 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 1544 net->vlan_features = net->features; 1545 1546 /* RCU not necessary here, device not registered */ 1547 nvdev = net_device_ctx->nvdev; 1548 netif_set_real_num_tx_queues(net, nvdev->num_chn); 1549 netif_set_real_num_rx_queues(net, nvdev->num_chn); 1550 1551 /* MTU range: 68 - 1500 or 65521 */ 1552 net->min_mtu = NETVSC_MTU_MIN; 1553 if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) 1554 net->max_mtu = NETVSC_MTU - ETH_HLEN; 1555 else 1556 net->max_mtu = ETH_DATA_LEN; 1557 1558 ret = register_netdev(net); 1559 if (ret != 0) { 1560 pr_err("Unable to register netdev.\n"); 1561 rndis_filter_device_remove(dev, nvdev); 1562 free_netdev(net); 1563 } 1564 1565 return ret; 1566 } 1567 1568 static int netvsc_remove(struct hv_device *dev) 1569 { 1570 struct net_device *net; 1571 struct net_device_context *ndev_ctx; 1572 1573 net = hv_get_drvdata(dev); 1574 1575 if (net == NULL) { 1576 dev_err(&dev->device, "No net device to remove\n"); 1577 return 0; 1578 } 1579 1580 ndev_ctx = netdev_priv(net); 1581 1582 netif_device_detach(net); 1583 1584 cancel_delayed_work_sync(&ndev_ctx->dwork); 1585 1586 /* 1587 * Call to the vsc driver to let it know that the device is being 1588 * removed. Also blocks mtu and channel changes. 1589 */ 1590 rtnl_lock(); 1591 rndis_filter_device_remove(dev, ndev_ctx->nvdev); 1592 rtnl_unlock(); 1593 1594 unregister_netdev(net); 1595 1596 hv_set_drvdata(dev, NULL); 1597 1598 free_netdev(net); 1599 return 0; 1600 } 1601 1602 static const struct hv_vmbus_device_id id_table[] = { 1603 /* Network guid */ 1604 { HV_NIC_GUID, }, 1605 { }, 1606 }; 1607 1608 MODULE_DEVICE_TABLE(vmbus, id_table); 1609 1610 /* The one and only one */ 1611 static struct hv_driver netvsc_drv = { 1612 .name = KBUILD_MODNAME, 1613 .id_table = id_table, 1614 .probe = netvsc_probe, 1615 .remove = netvsc_remove, 1616 }; 1617 1618 /* 1619 * On Hyper-V, every VF interface is matched with a corresponding 1620 * synthetic interface. The synthetic interface is presented first 1621 * to the guest. When the corresponding VF instance is registered, 1622 * we will take care of switching the data path. 1623 */ 1624 static int netvsc_netdev_event(struct notifier_block *this, 1625 unsigned long event, void *ptr) 1626 { 1627 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 1628 1629 /* Skip our own events */ 1630 if (event_dev->netdev_ops == &device_ops) 1631 return NOTIFY_DONE; 1632 1633 /* Avoid non-Ethernet type devices */ 1634 if (event_dev->type != ARPHRD_ETHER) 1635 return NOTIFY_DONE; 1636 1637 /* Avoid Vlan dev with same MAC registering as VF */ 1638 if (is_vlan_dev(event_dev)) 1639 return NOTIFY_DONE; 1640 1641 /* Avoid Bonding master dev with same MAC registering as VF */ 1642 if ((event_dev->priv_flags & IFF_BONDING) && 1643 (event_dev->flags & IFF_MASTER)) 1644 return NOTIFY_DONE; 1645 1646 switch (event) { 1647 case NETDEV_REGISTER: 1648 return netvsc_register_vf(event_dev); 1649 case NETDEV_UNREGISTER: 1650 return netvsc_unregister_vf(event_dev); 1651 case NETDEV_UP: 1652 return netvsc_vf_up(event_dev); 1653 case NETDEV_DOWN: 1654 return netvsc_vf_down(event_dev); 1655 default: 1656 return NOTIFY_DONE; 1657 } 1658 } 1659 1660 static struct notifier_block netvsc_netdev_notifier = { 1661 .notifier_call = netvsc_netdev_event, 1662 }; 1663 1664 static void __exit netvsc_drv_exit(void) 1665 { 1666 unregister_netdevice_notifier(&netvsc_netdev_notifier); 1667 vmbus_driver_unregister(&netvsc_drv); 1668 } 1669 1670 static int __init netvsc_drv_init(void) 1671 { 1672 int ret; 1673 1674 if (ring_size < RING_SIZE_MIN) { 1675 ring_size = RING_SIZE_MIN; 1676 pr_info("Increased ring_size to %d (min allowed)\n", 1677 ring_size); 1678 } 1679 ret = vmbus_driver_register(&netvsc_drv); 1680 1681 if (ret) 1682 return ret; 1683 1684 register_netdevice_notifier(&netvsc_netdev_notifier); 1685 return 0; 1686 } 1687 1688 MODULE_LICENSE("GPL"); 1689 MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); 1690 1691 module_init(netvsc_drv_init); 1692 module_exit(netvsc_drv_exit); 1693