1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, see <http://www.gnu.org/licenses/>. 15 * 16 * Authors: 17 * Haiyang Zhang <haiyangz@microsoft.com> 18 * Hank Janssen <hjanssen@microsoft.com> 19 */ 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/init.h> 23 #include <linux/atomic.h> 24 #include <linux/module.h> 25 #include <linux/highmem.h> 26 #include <linux/device.h> 27 #include <linux/io.h> 28 #include <linux/delay.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/etherdevice.h> 32 #include <linux/skbuff.h> 33 #include <linux/if_vlan.h> 34 #include <linux/in.h> 35 #include <linux/slab.h> 36 #include <net/arp.h> 37 #include <net/route.h> 38 #include <net/sock.h> 39 #include <net/pkt_sched.h> 40 41 #include "hyperv_net.h" 42 43 44 #define RING_SIZE_MIN 64 45 static int ring_size = 128; 46 module_param(ring_size, int, S_IRUGO); 47 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); 48 49 static int max_num_vrss_chns = 8; 50 51 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | 52 NETIF_MSG_LINK | NETIF_MSG_IFUP | 53 NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | 54 NETIF_MSG_TX_ERR; 55 56 static int debug = -1; 57 module_param(debug, int, S_IRUGO); 58 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 59 60 static void do_set_multicast(struct work_struct *w) 61 { 62 struct net_device_context *ndevctx = 63 container_of(w, struct net_device_context, work); 64 struct netvsc_device *nvdev; 65 struct rndis_device *rdev; 66 67 nvdev = hv_get_drvdata(ndevctx->device_ctx); 68 if (nvdev == NULL || nvdev->ndev == NULL) 69 return; 70 71 rdev = nvdev->extension; 72 if (rdev == NULL) 73 return; 74 75 if (nvdev->ndev->flags & IFF_PROMISC) 76 rndis_filter_set_packet_filter(rdev, 77 NDIS_PACKET_TYPE_PROMISCUOUS); 78 else 79 rndis_filter_set_packet_filter(rdev, 80 NDIS_PACKET_TYPE_BROADCAST | 81 NDIS_PACKET_TYPE_ALL_MULTICAST | 82 NDIS_PACKET_TYPE_DIRECTED); 83 } 84 85 static void netvsc_set_multicast_list(struct net_device *net) 86 { 87 struct net_device_context *net_device_ctx = netdev_priv(net); 88 89 schedule_work(&net_device_ctx->work); 90 } 91 92 static int netvsc_open(struct net_device *net) 93 { 94 struct net_device_context *net_device_ctx = netdev_priv(net); 95 struct hv_device *device_obj = net_device_ctx->device_ctx; 96 struct netvsc_device *nvdev; 97 struct rndis_device *rdev; 98 int ret = 0; 99 100 netif_carrier_off(net); 101 102 /* Open up the device */ 103 ret = rndis_filter_open(device_obj); 104 if (ret != 0) { 105 netdev_err(net, "unable to open device (ret %d).\n", ret); 106 return ret; 107 } 108 109 netif_tx_start_all_queues(net); 110 111 nvdev = hv_get_drvdata(device_obj); 112 rdev = nvdev->extension; 113 if (!rdev->link_state) 114 netif_carrier_on(net); 115 116 return ret; 117 } 118 119 static int netvsc_close(struct net_device *net) 120 { 121 struct net_device_context *net_device_ctx = netdev_priv(net); 122 struct hv_device *device_obj = net_device_ctx->device_ctx; 123 int ret; 124 125 netif_tx_disable(net); 126 127 /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */ 128 cancel_work_sync(&net_device_ctx->work); 129 ret = rndis_filter_close(device_obj); 130 if (ret != 0) 131 netdev_err(net, "unable to close device (ret %d).\n", ret); 132 133 return ret; 134 } 135 136 static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, 137 int pkt_type) 138 { 139 struct rndis_packet *rndis_pkt; 140 struct rndis_per_packet_info *ppi; 141 142 rndis_pkt = &msg->msg.pkt; 143 rndis_pkt->data_offset += ppi_size; 144 145 ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + 146 rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); 147 148 ppi->size = ppi_size; 149 ppi->type = pkt_type; 150 ppi->ppi_offset = sizeof(struct rndis_per_packet_info); 151 152 rndis_pkt->per_pkt_info_len += ppi_size; 153 154 return ppi; 155 } 156 157 union sub_key { 158 u64 k; 159 struct { 160 u8 pad[3]; 161 u8 kb; 162 u32 ka; 163 }; 164 }; 165 166 /* Toeplitz hash function 167 * data: network byte order 168 * return: host byte order 169 */ 170 static u32 comp_hash(u8 *key, int klen, void *data, int dlen) 171 { 172 union sub_key subk; 173 int k_next = 4; 174 u8 dt; 175 int i, j; 176 u32 ret = 0; 177 178 subk.k = 0; 179 subk.ka = ntohl(*(u32 *)key); 180 181 for (i = 0; i < dlen; i++) { 182 subk.kb = key[k_next]; 183 k_next = (k_next + 1) % klen; 184 dt = ((u8 *)data)[i]; 185 for (j = 0; j < 8; j++) { 186 if (dt & 0x80) 187 ret ^= subk.ka; 188 dt <<= 1; 189 subk.k <<= 1; 190 } 191 } 192 193 return ret; 194 } 195 196 static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) 197 { 198 struct flow_keys flow; 199 int data_len; 200 201 if (!skb_flow_dissect_flow_keys(skb, &flow) || 202 !(flow.basic.n_proto == htons(ETH_P_IP) || 203 flow.basic.n_proto == htons(ETH_P_IPV6))) 204 return false; 205 206 if (flow.basic.ip_proto == IPPROTO_TCP) 207 data_len = 12; 208 else 209 data_len = 8; 210 211 *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len); 212 213 return true; 214 } 215 216 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, 217 void *accel_priv, select_queue_fallback_t fallback) 218 { 219 struct net_device_context *net_device_ctx = netdev_priv(ndev); 220 struct hv_device *hdev = net_device_ctx->device_ctx; 221 struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); 222 u32 hash; 223 u16 q_idx = 0; 224 225 if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) 226 return 0; 227 228 if (netvsc_set_hash(&hash, skb)) { 229 q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % 230 ndev->real_num_tx_queues; 231 skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); 232 } 233 234 return q_idx; 235 } 236 237 void netvsc_xmit_completion(void *context) 238 { 239 struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; 240 struct sk_buff *skb = (struct sk_buff *) 241 (unsigned long)packet->send_completion_tid; 242 243 if (skb) 244 dev_kfree_skb_any(skb); 245 } 246 247 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, 248 struct hv_page_buffer *pb) 249 { 250 int j = 0; 251 252 /* Deal with compund pages by ignoring unused part 253 * of the page. 254 */ 255 page += (offset >> PAGE_SHIFT); 256 offset &= ~PAGE_MASK; 257 258 while (len > 0) { 259 unsigned long bytes; 260 261 bytes = PAGE_SIZE - offset; 262 if (bytes > len) 263 bytes = len; 264 pb[j].pfn = page_to_pfn(page); 265 pb[j].offset = offset; 266 pb[j].len = bytes; 267 268 offset += bytes; 269 len -= bytes; 270 271 if (offset == PAGE_SIZE && len) { 272 page++; 273 offset = 0; 274 j++; 275 } 276 } 277 278 return j + 1; 279 } 280 281 static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, 282 struct hv_netvsc_packet *packet) 283 { 284 struct hv_page_buffer *pb = packet->page_buf; 285 u32 slots_used = 0; 286 char *data = skb->data; 287 int frags = skb_shinfo(skb)->nr_frags; 288 int i; 289 290 /* The packet is laid out thus: 291 * 1. hdr: RNDIS header and PPI 292 * 2. skb linear data 293 * 3. skb fragment data 294 */ 295 if (hdr != NULL) 296 slots_used += fill_pg_buf(virt_to_page(hdr), 297 offset_in_page(hdr), 298 len, &pb[slots_used]); 299 300 packet->rmsg_size = len; 301 packet->rmsg_pgcnt = slots_used; 302 303 slots_used += fill_pg_buf(virt_to_page(data), 304 offset_in_page(data), 305 skb_headlen(skb), &pb[slots_used]); 306 307 for (i = 0; i < frags; i++) { 308 skb_frag_t *frag = skb_shinfo(skb)->frags + i; 309 310 slots_used += fill_pg_buf(skb_frag_page(frag), 311 frag->page_offset, 312 skb_frag_size(frag), &pb[slots_used]); 313 } 314 return slots_used; 315 } 316 317 static int count_skb_frag_slots(struct sk_buff *skb) 318 { 319 int i, frags = skb_shinfo(skb)->nr_frags; 320 int pages = 0; 321 322 for (i = 0; i < frags; i++) { 323 skb_frag_t *frag = skb_shinfo(skb)->frags + i; 324 unsigned long size = skb_frag_size(frag); 325 unsigned long offset = frag->page_offset; 326 327 /* Skip unused frames from start of page */ 328 offset &= ~PAGE_MASK; 329 pages += PFN_UP(offset + size); 330 } 331 return pages; 332 } 333 334 static int netvsc_get_slots(struct sk_buff *skb) 335 { 336 char *data = skb->data; 337 unsigned int offset = offset_in_page(data); 338 unsigned int len = skb_headlen(skb); 339 int slots; 340 int frag_slots; 341 342 slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); 343 frag_slots = count_skb_frag_slots(skb); 344 return slots + frag_slots; 345 } 346 347 static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) 348 { 349 u32 ret_val = TRANSPORT_INFO_NOT_IP; 350 351 if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && 352 (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { 353 goto not_ip; 354 } 355 356 *trans_off = skb_transport_offset(skb); 357 358 if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { 359 struct iphdr *iphdr = ip_hdr(skb); 360 361 if (iphdr->protocol == IPPROTO_TCP) 362 ret_val = TRANSPORT_INFO_IPV4_TCP; 363 else if (iphdr->protocol == IPPROTO_UDP) 364 ret_val = TRANSPORT_INFO_IPV4_UDP; 365 } else { 366 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) 367 ret_val = TRANSPORT_INFO_IPV6_TCP; 368 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) 369 ret_val = TRANSPORT_INFO_IPV6_UDP; 370 } 371 372 not_ip: 373 return ret_val; 374 } 375 376 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) 377 { 378 struct net_device_context *net_device_ctx = netdev_priv(net); 379 struct hv_netvsc_packet *packet = NULL; 380 int ret; 381 unsigned int num_data_pgs; 382 struct rndis_message *rndis_msg; 383 struct rndis_packet *rndis_pkt; 384 u32 rndis_msg_size; 385 bool isvlan; 386 bool linear = false; 387 struct rndis_per_packet_info *ppi; 388 struct ndis_tcp_ip_checksum_info *csum_info; 389 struct ndis_tcp_lso_info *lso_info; 390 int hdr_offset; 391 u32 net_trans_info; 392 u32 hash; 393 u32 skb_length; 394 u32 pkt_sz; 395 struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; 396 struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); 397 398 /* We will atmost need two pages to describe the rndis 399 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number 400 * of pages in a single packet. If skb is scattered around 401 * more pages we try linearizing it. 402 */ 403 404 check_size: 405 skb_length = skb->len; 406 num_data_pgs = netvsc_get_slots(skb) + 2; 407 if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { 408 net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", 409 num_data_pgs, skb->len); 410 ret = -EFAULT; 411 goto drop; 412 } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { 413 if (skb_linearize(skb)) { 414 net_alert_ratelimited("failed to linearize skb\n"); 415 ret = -ENOMEM; 416 goto drop; 417 } 418 linear = true; 419 goto check_size; 420 } 421 422 pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE; 423 424 ret = skb_cow_head(skb, pkt_sz); 425 if (ret) { 426 netdev_err(net, "unable to alloc hv_netvsc_packet\n"); 427 ret = -ENOMEM; 428 goto drop; 429 } 430 /* Use the headroom for building up the packet */ 431 packet = (struct hv_netvsc_packet *)skb->head; 432 433 packet->status = 0; 434 packet->xmit_more = skb->xmit_more; 435 436 packet->vlan_tci = skb->vlan_tci; 437 packet->page_buf = page_buf; 438 439 packet->q_idx = skb_get_queue_mapping(skb); 440 441 packet->is_data_pkt = true; 442 packet->total_data_buflen = skb->len; 443 444 packet->rndis_msg = (struct rndis_message *)((unsigned long)packet + 445 sizeof(struct hv_netvsc_packet)); 446 447 memset(packet->rndis_msg, 0, RNDIS_AND_PPI_SIZE); 448 449 /* Set the completion routine */ 450 packet->send_completion = netvsc_xmit_completion; 451 packet->send_completion_ctx = packet; 452 packet->send_completion_tid = (unsigned long)skb; 453 454 isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; 455 456 /* Add the rndis header */ 457 rndis_msg = packet->rndis_msg; 458 rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; 459 rndis_msg->msg_len = packet->total_data_buflen; 460 rndis_pkt = &rndis_msg->msg.pkt; 461 rndis_pkt->data_offset = sizeof(struct rndis_packet); 462 rndis_pkt->data_len = packet->total_data_buflen; 463 rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); 464 465 rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); 466 467 hash = skb_get_hash_raw(skb); 468 if (hash != 0 && net->real_num_tx_queues > 1) { 469 rndis_msg_size += NDIS_HASH_PPI_SIZE; 470 ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, 471 NBL_HASH_VALUE); 472 *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; 473 } 474 475 if (isvlan) { 476 struct ndis_pkt_8021q_info *vlan; 477 478 rndis_msg_size += NDIS_VLAN_PPI_SIZE; 479 ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, 480 IEEE_8021Q_INFO); 481 vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + 482 ppi->ppi_offset); 483 vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK; 484 vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >> 485 VLAN_PRIO_SHIFT; 486 } 487 488 net_trans_info = get_net_transport_info(skb, &hdr_offset); 489 if (net_trans_info == TRANSPORT_INFO_NOT_IP) 490 goto do_send; 491 492 /* 493 * Setup the sendside checksum offload only if this is not a 494 * GSO packet. 495 */ 496 if (skb_is_gso(skb)) 497 goto do_lso; 498 499 if ((skb->ip_summed == CHECKSUM_NONE) || 500 (skb->ip_summed == CHECKSUM_UNNECESSARY)) 501 goto do_send; 502 503 rndis_msg_size += NDIS_CSUM_PPI_SIZE; 504 ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, 505 TCPIP_CHKSUM_PKTINFO); 506 507 csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + 508 ppi->ppi_offset); 509 510 if (net_trans_info & (INFO_IPV4 << 16)) 511 csum_info->transmit.is_ipv4 = 1; 512 else 513 csum_info->transmit.is_ipv6 = 1; 514 515 if (net_trans_info & INFO_TCP) { 516 csum_info->transmit.tcp_checksum = 1; 517 csum_info->transmit.tcp_header_offset = hdr_offset; 518 } else if (net_trans_info & INFO_UDP) { 519 /* UDP checksum offload is not supported on ws2008r2. 520 * Furthermore, on ws2012 and ws2012r2, there are some 521 * issues with udp checksum offload from Linux guests. 522 * (these are host issues). 523 * For now compute the checksum here. 524 */ 525 struct udphdr *uh; 526 u16 udp_len; 527 528 ret = skb_cow_head(skb, 0); 529 if (ret) 530 goto drop; 531 532 uh = udp_hdr(skb); 533 udp_len = ntohs(uh->len); 534 uh->check = 0; 535 uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, 536 ip_hdr(skb)->daddr, 537 udp_len, IPPROTO_UDP, 538 csum_partial(uh, udp_len, 0)); 539 if (uh->check == 0) 540 uh->check = CSUM_MANGLED_0; 541 542 csum_info->transmit.udp_checksum = 0; 543 } 544 goto do_send; 545 546 do_lso: 547 rndis_msg_size += NDIS_LSO_PPI_SIZE; 548 ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, 549 TCP_LARGESEND_PKTINFO); 550 551 lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + 552 ppi->ppi_offset); 553 554 lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 555 if (net_trans_info & (INFO_IPV4 << 16)) { 556 lso_info->lso_v2_transmit.ip_version = 557 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 558 ip_hdr(skb)->tot_len = 0; 559 ip_hdr(skb)->check = 0; 560 tcp_hdr(skb)->check = 561 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 562 ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 563 } else { 564 lso_info->lso_v2_transmit.ip_version = 565 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 566 ipv6_hdr(skb)->payload_len = 0; 567 tcp_hdr(skb)->check = 568 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 569 &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 570 } 571 lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; 572 lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; 573 574 do_send: 575 /* Start filling in the page buffers with the rndis hdr */ 576 rndis_msg->msg_len += rndis_msg_size; 577 packet->total_data_buflen = rndis_msg->msg_len; 578 packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, 579 skb, packet); 580 581 ret = netvsc_send(net_device_ctx->device_ctx, packet); 582 583 drop: 584 if (ret == 0) { 585 u64_stats_update_begin(&tx_stats->syncp); 586 tx_stats->packets++; 587 tx_stats->bytes += skb_length; 588 u64_stats_update_end(&tx_stats->syncp); 589 } else { 590 if (ret != -EAGAIN) { 591 dev_kfree_skb_any(skb); 592 net->stats.tx_dropped++; 593 } 594 } 595 596 return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; 597 } 598 599 /* 600 * netvsc_linkstatus_callback - Link up/down notification 601 */ 602 void netvsc_linkstatus_callback(struct hv_device *device_obj, 603 struct rndis_message *resp) 604 { 605 struct rndis_indicate_status *indicate = &resp->msg.indicate_status; 606 struct net_device *net; 607 struct net_device_context *ndev_ctx; 608 struct netvsc_device *net_device; 609 struct rndis_device *rdev; 610 611 net_device = hv_get_drvdata(device_obj); 612 rdev = net_device->extension; 613 614 switch (indicate->status) { 615 case RNDIS_STATUS_MEDIA_CONNECT: 616 rdev->link_state = false; 617 break; 618 case RNDIS_STATUS_MEDIA_DISCONNECT: 619 rdev->link_state = true; 620 break; 621 case RNDIS_STATUS_NETWORK_CHANGE: 622 rdev->link_change = true; 623 break; 624 default: 625 return; 626 } 627 628 net = net_device->ndev; 629 630 if (!net || net->reg_state != NETREG_REGISTERED) 631 return; 632 633 ndev_ctx = netdev_priv(net); 634 if (!rdev->link_state) { 635 schedule_delayed_work(&ndev_ctx->dwork, 0); 636 schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); 637 } else { 638 schedule_delayed_work(&ndev_ctx->dwork, 0); 639 } 640 } 641 642 /* 643 * netvsc_recv_callback - Callback when we receive a packet from the 644 * "wire" on the specified device. 645 */ 646 int netvsc_recv_callback(struct hv_device *device_obj, 647 struct hv_netvsc_packet *packet, 648 struct ndis_tcp_ip_checksum_info *csum_info) 649 { 650 struct net_device *net; 651 struct net_device_context *net_device_ctx; 652 struct sk_buff *skb; 653 struct netvsc_stats *rx_stats; 654 655 net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; 656 if (!net || net->reg_state != NETREG_REGISTERED) { 657 packet->status = NVSP_STAT_FAIL; 658 return 0; 659 } 660 net_device_ctx = netdev_priv(net); 661 rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); 662 663 /* Allocate a skb - TODO direct I/O to pages? */ 664 skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); 665 if (unlikely(!skb)) { 666 ++net->stats.rx_dropped; 667 packet->status = NVSP_STAT_FAIL; 668 return 0; 669 } 670 671 /* 672 * Copy to skb. This copy is needed here since the memory pointed by 673 * hv_netvsc_packet cannot be deallocated 674 */ 675 memcpy(skb_put(skb, packet->total_data_buflen), packet->data, 676 packet->total_data_buflen); 677 678 skb->protocol = eth_type_trans(skb, net); 679 if (csum_info) { 680 /* We only look at the IP checksum here. 681 * Should we be dropping the packet if checksum 682 * failed? How do we deal with other checksums - TCP/UDP? 683 */ 684 if (csum_info->receive.ip_checksum_succeeded) 685 skb->ip_summed = CHECKSUM_UNNECESSARY; 686 else 687 skb->ip_summed = CHECKSUM_NONE; 688 } 689 690 if (packet->vlan_tci & VLAN_TAG_PRESENT) 691 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 692 packet->vlan_tci); 693 694 skb_record_rx_queue(skb, packet->channel-> 695 offermsg.offer.sub_channel_index); 696 697 u64_stats_update_begin(&rx_stats->syncp); 698 rx_stats->packets++; 699 rx_stats->bytes += packet->total_data_buflen; 700 u64_stats_update_end(&rx_stats->syncp); 701 702 /* 703 * Pass the skb back up. Network stack will deallocate the skb when it 704 * is done. 705 * TODO - use NAPI? 706 */ 707 netif_rx(skb); 708 709 return 0; 710 } 711 712 static void netvsc_get_drvinfo(struct net_device *net, 713 struct ethtool_drvinfo *info) 714 { 715 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 716 strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); 717 } 718 719 static void netvsc_get_channels(struct net_device *net, 720 struct ethtool_channels *channel) 721 { 722 struct net_device_context *net_device_ctx = netdev_priv(net); 723 struct hv_device *dev = net_device_ctx->device_ctx; 724 struct netvsc_device *nvdev = hv_get_drvdata(dev); 725 726 if (nvdev) { 727 channel->max_combined = nvdev->max_chn; 728 channel->combined_count = nvdev->num_chn; 729 } 730 } 731 732 static int netvsc_change_mtu(struct net_device *ndev, int mtu) 733 { 734 struct net_device_context *ndevctx = netdev_priv(ndev); 735 struct hv_device *hdev = ndevctx->device_ctx; 736 struct netvsc_device *nvdev = hv_get_drvdata(hdev); 737 struct netvsc_device_info device_info; 738 int limit = ETH_DATA_LEN; 739 740 if (nvdev == NULL || nvdev->destroy) 741 return -ENODEV; 742 743 if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) 744 limit = NETVSC_MTU - ETH_HLEN; 745 746 /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */ 747 if (mtu < ETH_DATA_LEN || mtu > limit) 748 return -EINVAL; 749 750 nvdev->start_remove = true; 751 cancel_work_sync(&ndevctx->work); 752 netif_tx_disable(ndev); 753 rndis_filter_device_remove(hdev); 754 755 ndev->mtu = mtu; 756 757 ndevctx->device_ctx = hdev; 758 hv_set_drvdata(hdev, ndev); 759 device_info.ring_size = ring_size; 760 device_info.max_num_vrss_chns = max_num_vrss_chns; 761 rndis_filter_device_add(hdev, &device_info); 762 netif_tx_wake_all_queues(ndev); 763 764 return 0; 765 } 766 767 static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, 768 struct rtnl_link_stats64 *t) 769 { 770 struct net_device_context *ndev_ctx = netdev_priv(net); 771 int cpu; 772 773 for_each_possible_cpu(cpu) { 774 struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats, 775 cpu); 776 struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, 777 cpu); 778 u64 tx_packets, tx_bytes, rx_packets, rx_bytes; 779 unsigned int start; 780 781 do { 782 start = u64_stats_fetch_begin_irq(&tx_stats->syncp); 783 tx_packets = tx_stats->packets; 784 tx_bytes = tx_stats->bytes; 785 } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); 786 787 do { 788 start = u64_stats_fetch_begin_irq(&rx_stats->syncp); 789 rx_packets = rx_stats->packets; 790 rx_bytes = rx_stats->bytes; 791 } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); 792 793 t->tx_bytes += tx_bytes; 794 t->tx_packets += tx_packets; 795 t->rx_bytes += rx_bytes; 796 t->rx_packets += rx_packets; 797 } 798 799 t->tx_dropped = net->stats.tx_dropped; 800 t->tx_errors = net->stats.tx_dropped; 801 802 t->rx_dropped = net->stats.rx_dropped; 803 t->rx_errors = net->stats.rx_errors; 804 805 return t; 806 } 807 808 static int netvsc_set_mac_addr(struct net_device *ndev, void *p) 809 { 810 struct net_device_context *ndevctx = netdev_priv(ndev); 811 struct hv_device *hdev = ndevctx->device_ctx; 812 struct sockaddr *addr = p; 813 char save_adr[ETH_ALEN]; 814 unsigned char save_aatype; 815 int err; 816 817 memcpy(save_adr, ndev->dev_addr, ETH_ALEN); 818 save_aatype = ndev->addr_assign_type; 819 820 err = eth_mac_addr(ndev, p); 821 if (err != 0) 822 return err; 823 824 err = rndis_filter_set_device_mac(hdev, addr->sa_data); 825 if (err != 0) { 826 /* roll back to saved MAC */ 827 memcpy(ndev->dev_addr, save_adr, ETH_ALEN); 828 ndev->addr_assign_type = save_aatype; 829 } 830 831 return err; 832 } 833 834 #ifdef CONFIG_NET_POLL_CONTROLLER 835 static void netvsc_poll_controller(struct net_device *net) 836 { 837 /* As netvsc_start_xmit() works synchronous we don't have to 838 * trigger anything here. 839 */ 840 } 841 #endif 842 843 static const struct ethtool_ops ethtool_ops = { 844 .get_drvinfo = netvsc_get_drvinfo, 845 .get_link = ethtool_op_get_link, 846 .get_channels = netvsc_get_channels, 847 }; 848 849 static const struct net_device_ops device_ops = { 850 .ndo_open = netvsc_open, 851 .ndo_stop = netvsc_close, 852 .ndo_start_xmit = netvsc_start_xmit, 853 .ndo_set_rx_mode = netvsc_set_multicast_list, 854 .ndo_change_mtu = netvsc_change_mtu, 855 .ndo_validate_addr = eth_validate_addr, 856 .ndo_set_mac_address = netvsc_set_mac_addr, 857 .ndo_select_queue = netvsc_select_queue, 858 .ndo_get_stats64 = netvsc_get_stats64, 859 #ifdef CONFIG_NET_POLL_CONTROLLER 860 .ndo_poll_controller = netvsc_poll_controller, 861 #endif 862 }; 863 864 /* 865 * Send GARP packet to network peers after migrations. 866 * After Quick Migration, the network is not immediately operational in the 867 * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add 868 * another netif_notify_peers() into a delayed work, otherwise GARP packet 869 * will not be sent after quick migration, and cause network disconnection. 870 * Also, we update the carrier status here. 871 */ 872 static void netvsc_link_change(struct work_struct *w) 873 { 874 struct net_device_context *ndev_ctx; 875 struct net_device *net; 876 struct netvsc_device *net_device; 877 struct rndis_device *rdev; 878 bool notify, refresh = false; 879 char *argv[] = { "/etc/init.d/network", "restart", NULL }; 880 char *envp[] = { "HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 881 882 rtnl_lock(); 883 884 ndev_ctx = container_of(w, struct net_device_context, dwork.work); 885 net_device = hv_get_drvdata(ndev_ctx->device_ctx); 886 rdev = net_device->extension; 887 net = net_device->ndev; 888 889 if (rdev->link_state) { 890 netif_carrier_off(net); 891 notify = false; 892 } else { 893 netif_carrier_on(net); 894 notify = true; 895 if (rdev->link_change) { 896 rdev->link_change = false; 897 refresh = true; 898 } 899 } 900 901 rtnl_unlock(); 902 903 if (refresh) 904 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); 905 906 if (notify) 907 netdev_notify_peers(net); 908 } 909 910 static void netvsc_free_netdev(struct net_device *netdev) 911 { 912 struct net_device_context *net_device_ctx = netdev_priv(netdev); 913 914 free_percpu(net_device_ctx->tx_stats); 915 free_percpu(net_device_ctx->rx_stats); 916 free_netdev(netdev); 917 } 918 919 static int netvsc_probe(struct hv_device *dev, 920 const struct hv_vmbus_device_id *dev_id) 921 { 922 struct net_device *net = NULL; 923 struct net_device_context *net_device_ctx; 924 struct netvsc_device_info device_info; 925 struct netvsc_device *nvdev; 926 int ret; 927 u32 max_needed_headroom; 928 929 net = alloc_etherdev_mq(sizeof(struct net_device_context), 930 num_online_cpus()); 931 if (!net) 932 return -ENOMEM; 933 934 max_needed_headroom = sizeof(struct hv_netvsc_packet) + 935 RNDIS_AND_PPI_SIZE; 936 937 netif_carrier_off(net); 938 939 net_device_ctx = netdev_priv(net); 940 net_device_ctx->device_ctx = dev; 941 net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); 942 if (netif_msg_probe(net_device_ctx)) 943 netdev_dbg(net, "netvsc msg_enable: %d\n", 944 net_device_ctx->msg_enable); 945 946 net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); 947 if (!net_device_ctx->tx_stats) { 948 free_netdev(net); 949 return -ENOMEM; 950 } 951 net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); 952 if (!net_device_ctx->rx_stats) { 953 free_percpu(net_device_ctx->tx_stats); 954 free_netdev(net); 955 return -ENOMEM; 956 } 957 958 hv_set_drvdata(dev, net); 959 INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); 960 INIT_WORK(&net_device_ctx->work, do_set_multicast); 961 962 net->netdev_ops = &device_ops; 963 964 net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | 965 NETIF_F_TSO; 966 net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | 967 NETIF_F_IP_CSUM | NETIF_F_TSO; 968 969 net->ethtool_ops = ðtool_ops; 970 SET_NETDEV_DEV(net, &dev->device); 971 972 /* 973 * Request additional head room in the skb. 974 * We will use this space to build the rndis 975 * heaser and other state we need to maintain. 976 */ 977 net->needed_headroom = max_needed_headroom; 978 979 /* Notify the netvsc driver of the new device */ 980 device_info.ring_size = ring_size; 981 device_info.max_num_vrss_chns = max_num_vrss_chns; 982 ret = rndis_filter_device_add(dev, &device_info); 983 if (ret != 0) { 984 netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); 985 netvsc_free_netdev(net); 986 hv_set_drvdata(dev, NULL); 987 return ret; 988 } 989 memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); 990 991 nvdev = hv_get_drvdata(dev); 992 netif_set_real_num_tx_queues(net, nvdev->num_chn); 993 netif_set_real_num_rx_queues(net, nvdev->num_chn); 994 995 ret = register_netdev(net); 996 if (ret != 0) { 997 pr_err("Unable to register netdev.\n"); 998 rndis_filter_device_remove(dev); 999 netvsc_free_netdev(net); 1000 } else { 1001 schedule_delayed_work(&net_device_ctx->dwork, 0); 1002 } 1003 1004 return ret; 1005 } 1006 1007 static int netvsc_remove(struct hv_device *dev) 1008 { 1009 struct net_device *net; 1010 struct net_device_context *ndev_ctx; 1011 struct netvsc_device *net_device; 1012 1013 net_device = hv_get_drvdata(dev); 1014 net = net_device->ndev; 1015 1016 if (net == NULL) { 1017 dev_err(&dev->device, "No net device to remove\n"); 1018 return 0; 1019 } 1020 1021 net_device->start_remove = true; 1022 1023 ndev_ctx = netdev_priv(net); 1024 cancel_delayed_work_sync(&ndev_ctx->dwork); 1025 cancel_work_sync(&ndev_ctx->work); 1026 1027 /* Stop outbound asap */ 1028 netif_tx_disable(net); 1029 1030 unregister_netdev(net); 1031 1032 /* 1033 * Call to the vsc driver to let it know that the device is being 1034 * removed 1035 */ 1036 rndis_filter_device_remove(dev); 1037 1038 netvsc_free_netdev(net); 1039 return 0; 1040 } 1041 1042 static const struct hv_vmbus_device_id id_table[] = { 1043 /* Network guid */ 1044 { HV_NIC_GUID, }, 1045 { }, 1046 }; 1047 1048 MODULE_DEVICE_TABLE(vmbus, id_table); 1049 1050 /* The one and only one */ 1051 static struct hv_driver netvsc_drv = { 1052 .name = KBUILD_MODNAME, 1053 .id_table = id_table, 1054 .probe = netvsc_probe, 1055 .remove = netvsc_remove, 1056 }; 1057 1058 static void __exit netvsc_drv_exit(void) 1059 { 1060 vmbus_driver_unregister(&netvsc_drv); 1061 } 1062 1063 static int __init netvsc_drv_init(void) 1064 { 1065 if (ring_size < RING_SIZE_MIN) { 1066 ring_size = RING_SIZE_MIN; 1067 pr_info("Increased ring_size to %d (min allowed)\n", 1068 ring_size); 1069 } 1070 return vmbus_driver_register(&netvsc_drv); 1071 } 1072 1073 MODULE_LICENSE("GPL"); 1074 MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); 1075 1076 module_init(netvsc_drv_init); 1077 module_exit(netvsc_drv_exit); 1078