1 /* sunvnet.c: Sun LDOM Virtual Network Driver. 2 * 3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 4 * Copyright (C) 2016-2017 Oracle. All rights reserved. 5 */ 6 7 #include <linux/module.h> 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/delay.h> 12 #include <linux/init.h> 13 #include <linux/netdevice.h> 14 #include <linux/ethtool.h> 15 #include <linux/etherdevice.h> 16 #include <linux/mutex.h> 17 #include <linux/highmem.h> 18 #include <linux/if_vlan.h> 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/sunvnet.h> 21 22 #if IS_ENABLED(CONFIG_IPV6) 23 #include <linux/icmpv6.h> 24 #endif 25 26 #include <net/ip.h> 27 #include <net/icmp.h> 28 #include <net/route.h> 29 30 #include <asm/vio.h> 31 #include <asm/ldc.h> 32 33 #include "sunvnet_common.h" 34 35 /* Heuristic for the number of times to exponentially backoff and 36 * retry sending an LDC trigger when EAGAIN is encountered 37 */ 38 #define VNET_MAX_RETRIES 10 39 40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); 41 MODULE_DESCRIPTION("Sun LDOM virtual network support library"); 42 MODULE_LICENSE("GPL"); 43 MODULE_VERSION("1.1"); 44 45 static int __vnet_tx_trigger(struct vnet_port *port, u32 start); 46 47 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) 48 { 49 return vio_dring_avail(dr, VNET_TX_RING_SIZE); 50 } 51 52 static int vnet_handle_unknown(struct vnet_port *port, void *arg) 53 { 54 struct vio_msg_tag *pkt = arg; 55 56 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n", 57 pkt->type, pkt->stype, pkt->stype_env, pkt->sid); 58 pr_err("Resetting connection\n"); 59 60 ldc_disconnect(port->vio.lp); 61 62 return -ECONNRESET; 63 } 64 65 static int vnet_port_alloc_tx_ring(struct vnet_port *port); 66 67 int sunvnet_send_attr_common(struct vio_driver_state *vio) 68 { 69 struct vnet_port *port = to_vnet_port(vio); 70 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 71 struct vio_net_attr_info pkt; 72 int framelen = ETH_FRAME_LEN; 73 int i, err; 74 75 err = vnet_port_alloc_tx_ring(to_vnet_port(vio)); 76 if (err) 77 return err; 78 79 memset(&pkt, 0, sizeof(pkt)); 80 pkt.tag.type = VIO_TYPE_CTRL; 81 pkt.tag.stype = VIO_SUBTYPE_INFO; 82 pkt.tag.stype_env = VIO_ATTR_INFO; 83 pkt.tag.sid = vio_send_sid(vio); 84 if (vio_version_before(vio, 1, 2)) 85 pkt.xfer_mode = VIO_DRING_MODE; 86 else 87 pkt.xfer_mode = VIO_NEW_DRING_MODE; 88 pkt.addr_type = VNET_ADDR_ETHERMAC; 89 pkt.ack_freq = 0; 90 for (i = 0; i < 6; i++) 91 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); 92 if (vio_version_after(vio, 1, 3)) { 93 if (port->rmtu) { 94 port->rmtu = min(VNET_MAXPACKET, port->rmtu); 95 pkt.mtu = port->rmtu; 96 } else { 97 port->rmtu = VNET_MAXPACKET; 98 pkt.mtu = port->rmtu; 99 } 100 if (vio_version_after_eq(vio, 1, 6)) 101 pkt.options = VIO_TX_DRING; 102 } else if (vio_version_before(vio, 1, 3)) { 103 pkt.mtu = framelen; 104 } else { /* v1.3 */ 105 pkt.mtu = framelen + VLAN_HLEN; 106 } 107 108 pkt.cflags = 0; 109 if (vio_version_after_eq(vio, 1, 7) && port->tso) { 110 pkt.cflags |= VNET_LSO_IPV4_CAPAB; 111 if (!port->tsolen) 112 port->tsolen = VNET_MAXTSO; 113 pkt.ipv4_lso_maxlen = port->tsolen; 114 } 115 116 pkt.plnk_updt = PHYSLINK_UPDATE_NONE; 117 118 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 119 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 120 "cflags[0x%04x] lso_max[%u]\n", 121 pkt.xfer_mode, pkt.addr_type, 122 (unsigned long long)pkt.addr, 123 pkt.ack_freq, pkt.plnk_updt, pkt.options, 124 (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); 125 126 return vio_ldc_send(vio, &pkt, sizeof(pkt)); 127 } 128 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common); 129 130 static int handle_attr_info(struct vio_driver_state *vio, 131 struct vio_net_attr_info *pkt) 132 { 133 struct vnet_port *port = to_vnet_port(vio); 134 u64 localmtu; 135 u8 xfer_mode; 136 137 viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 138 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 139 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 140 pkt->xfer_mode, pkt->addr_type, 141 (unsigned long long)pkt->addr, 142 pkt->ack_freq, pkt->plnk_updt, pkt->options, 143 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 144 pkt->ipv4_lso_maxlen); 145 146 pkt->tag.sid = vio_send_sid(vio); 147 148 xfer_mode = pkt->xfer_mode; 149 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ 150 if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) 151 xfer_mode = VIO_NEW_DRING_MODE; 152 153 /* MTU negotiation: 154 * < v1.3 - ETH_FRAME_LEN exactly 155 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change 156 * pkt->mtu for ACK 157 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly 158 */ 159 if (vio_version_before(vio, 1, 3)) { 160 localmtu = ETH_FRAME_LEN; 161 } else if (vio_version_after(vio, 1, 3)) { 162 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; 163 localmtu = min(pkt->mtu, localmtu); 164 pkt->mtu = localmtu; 165 } else { /* v1.3 */ 166 localmtu = ETH_FRAME_LEN + VLAN_HLEN; 167 } 168 port->rmtu = localmtu; 169 170 /* LSO negotiation */ 171 if (vio_version_after_eq(vio, 1, 7)) 172 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB); 173 else 174 port->tso = false; 175 if (port->tso) { 176 if (!port->tsolen) 177 port->tsolen = VNET_MAXTSO; 178 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen); 179 if (port->tsolen < VNET_MINTSO) { 180 port->tso = false; 181 port->tsolen = 0; 182 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 183 } 184 pkt->ipv4_lso_maxlen = port->tsolen; 185 } else { 186 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 187 pkt->ipv4_lso_maxlen = 0; 188 port->tsolen = 0; 189 } 190 191 /* for version >= 1.6, ACK packet mode we support */ 192 if (vio_version_after_eq(vio, 1, 6)) { 193 pkt->xfer_mode = VIO_NEW_DRING_MODE; 194 pkt->options = VIO_TX_DRING; 195 } 196 197 if (!(xfer_mode | VIO_NEW_DRING_MODE) || 198 pkt->addr_type != VNET_ADDR_ETHERMAC || 199 pkt->mtu != localmtu) { 200 viodbg(HS, "SEND NET ATTR NACK\n"); 201 202 pkt->tag.stype = VIO_SUBTYPE_NACK; 203 204 (void)vio_ldc_send(vio, pkt, sizeof(*pkt)); 205 206 return -ECONNRESET; 207 } 208 209 viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " 210 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " 211 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 212 pkt->xfer_mode, pkt->addr_type, 213 (unsigned long long)pkt->addr, 214 pkt->ack_freq, pkt->plnk_updt, pkt->options, 215 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 216 pkt->ipv4_lso_maxlen); 217 218 pkt->tag.stype = VIO_SUBTYPE_ACK; 219 220 return vio_ldc_send(vio, pkt, sizeof(*pkt)); 221 } 222 223 static int handle_attr_ack(struct vio_driver_state *vio, 224 struct vio_net_attr_info *pkt) 225 { 226 viodbg(HS, "GOT NET ATTR ACK\n"); 227 228 return 0; 229 } 230 231 static int handle_attr_nack(struct vio_driver_state *vio, 232 struct vio_net_attr_info *pkt) 233 { 234 viodbg(HS, "GOT NET ATTR NACK\n"); 235 236 return -ECONNRESET; 237 } 238 239 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg) 240 { 241 struct vio_net_attr_info *pkt = arg; 242 243 switch (pkt->tag.stype) { 244 case VIO_SUBTYPE_INFO: 245 return handle_attr_info(vio, pkt); 246 247 case VIO_SUBTYPE_ACK: 248 return handle_attr_ack(vio, pkt); 249 250 case VIO_SUBTYPE_NACK: 251 return handle_attr_nack(vio, pkt); 252 253 default: 254 return -ECONNRESET; 255 } 256 } 257 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common); 258 259 void sunvnet_handshake_complete_common(struct vio_driver_state *vio) 260 { 261 struct vio_dring_state *dr; 262 263 dr = &vio->drings[VIO_DRIVER_RX_RING]; 264 dr->rcv_nxt = 1; 265 dr->snd_nxt = 1; 266 267 dr = &vio->drings[VIO_DRIVER_TX_RING]; 268 dr->rcv_nxt = 1; 269 dr->snd_nxt = 1; 270 } 271 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common); 272 273 /* The hypervisor interface that implements copying to/from imported 274 * memory from another domain requires that copies are done to 8-byte 275 * aligned buffers, and that the lengths of such copies are also 8-byte 276 * multiples. 277 * 278 * So we align skb->data to an 8-byte multiple and pad-out the data 279 * area so we can round the copy length up to the next multiple of 280 * 8 for the copy. 281 * 282 * The transmitter puts the actual start of the packet 6 bytes into 283 * the buffer it sends over, so that the IP headers after the ethernet 284 * header are aligned properly. These 6 bytes are not in the descriptor 285 * length, they are simply implied. This offset is represented using 286 * the VNET_PACKET_SKIP macro. 287 */ 288 static struct sk_buff *alloc_and_align_skb(struct net_device *dev, 289 unsigned int len) 290 { 291 struct sk_buff *skb; 292 unsigned long addr, off; 293 294 skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8); 295 if (unlikely(!skb)) 296 return NULL; 297 298 addr = (unsigned long)skb->data; 299 off = ((addr + 7UL) & ~7UL) - addr; 300 if (off) 301 skb_reserve(skb, off); 302 303 return skb; 304 } 305 306 static inline void vnet_fullcsum_ipv4(struct sk_buff *skb) 307 { 308 struct iphdr *iph = ip_hdr(skb); 309 int offset = skb_transport_offset(skb); 310 311 if (skb->protocol != htons(ETH_P_IP)) 312 return; 313 if (iph->protocol != IPPROTO_TCP && 314 iph->protocol != IPPROTO_UDP) 315 return; 316 skb->ip_summed = CHECKSUM_NONE; 317 skb->csum_level = 1; 318 skb->csum = 0; 319 if (iph->protocol == IPPROTO_TCP) { 320 struct tcphdr *ptcp = tcp_hdr(skb); 321 322 ptcp->check = 0; 323 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 324 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 325 skb->len - offset, IPPROTO_TCP, 326 skb->csum); 327 } else if (iph->protocol == IPPROTO_UDP) { 328 struct udphdr *pudp = udp_hdr(skb); 329 330 pudp->check = 0; 331 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 332 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 333 skb->len - offset, IPPROTO_UDP, 334 skb->csum); 335 } 336 } 337 338 #if IS_ENABLED(CONFIG_IPV6) 339 static inline void vnet_fullcsum_ipv6(struct sk_buff *skb) 340 { 341 struct ipv6hdr *ip6h = ipv6_hdr(skb); 342 int offset = skb_transport_offset(skb); 343 344 if (skb->protocol != htons(ETH_P_IPV6)) 345 return; 346 if (ip6h->nexthdr != IPPROTO_TCP && 347 ip6h->nexthdr != IPPROTO_UDP) 348 return; 349 skb->ip_summed = CHECKSUM_NONE; 350 skb->csum_level = 1; 351 skb->csum = 0; 352 if (ip6h->nexthdr == IPPROTO_TCP) { 353 struct tcphdr *ptcp = tcp_hdr(skb); 354 355 ptcp->check = 0; 356 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 357 ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 358 skb->len - offset, IPPROTO_TCP, 359 skb->csum); 360 } else if (ip6h->nexthdr == IPPROTO_UDP) { 361 struct udphdr *pudp = udp_hdr(skb); 362 363 pudp->check = 0; 364 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 365 pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 366 skb->len - offset, IPPROTO_UDP, 367 skb->csum); 368 } 369 } 370 #endif 371 372 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) 373 { 374 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 375 unsigned int len = desc->size; 376 unsigned int copy_len; 377 struct sk_buff *skb; 378 int maxlen; 379 int err; 380 381 err = -EMSGSIZE; 382 if (port->tso && port->tsolen > port->rmtu) 383 maxlen = port->tsolen; 384 else 385 maxlen = port->rmtu; 386 if (unlikely(len < ETH_ZLEN || len > maxlen)) { 387 dev->stats.rx_length_errors++; 388 goto out_dropped; 389 } 390 391 skb = alloc_and_align_skb(dev, len); 392 err = -ENOMEM; 393 if (unlikely(!skb)) { 394 dev->stats.rx_missed_errors++; 395 goto out_dropped; 396 } 397 398 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; 399 skb_put(skb, copy_len); 400 err = ldc_copy(port->vio.lp, LDC_COPY_IN, 401 skb->data, copy_len, 0, 402 desc->cookies, desc->ncookies); 403 if (unlikely(err < 0)) { 404 dev->stats.rx_frame_errors++; 405 goto out_free_skb; 406 } 407 408 skb_pull(skb, VNET_PACKET_SKIP); 409 skb_trim(skb, len); 410 skb->protocol = eth_type_trans(skb, dev); 411 412 if (vio_version_after_eq(&port->vio, 1, 8)) { 413 struct vio_net_dext *dext = vio_net_ext(desc); 414 415 skb_reset_network_header(skb); 416 417 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) { 418 if (skb->protocol == ETH_P_IP) { 419 struct iphdr *iph = ip_hdr(skb); 420 421 iph->check = 0; 422 ip_send_check(iph); 423 } 424 } 425 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) && 426 skb->ip_summed == CHECKSUM_NONE) { 427 if (skb->protocol == htons(ETH_P_IP)) { 428 struct iphdr *iph = ip_hdr(skb); 429 int ihl = iph->ihl * 4; 430 431 skb_set_transport_header(skb, ihl); 432 vnet_fullcsum_ipv4(skb); 433 #if IS_ENABLED(CONFIG_IPV6) 434 } else if (skb->protocol == htons(ETH_P_IPV6)) { 435 skb_set_transport_header(skb, 436 sizeof(struct ipv6hdr)); 437 vnet_fullcsum_ipv6(skb); 438 #endif 439 } 440 } 441 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { 442 skb->ip_summed = CHECKSUM_PARTIAL; 443 skb->csum_level = 0; 444 if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK) 445 skb->csum_level = 1; 446 } 447 } 448 449 skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; 450 451 if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest))) 452 dev->stats.multicast++; 453 dev->stats.rx_packets++; 454 dev->stats.rx_bytes += len; 455 port->stats.rx_packets++; 456 port->stats.rx_bytes += len; 457 napi_gro_receive(&port->napi, skb); 458 return 0; 459 460 out_free_skb: 461 kfree_skb(skb); 462 463 out_dropped: 464 dev->stats.rx_dropped++; 465 return err; 466 } 467 468 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, 469 u32 start, u32 end, u8 vio_dring_state) 470 { 471 struct vio_dring_data hdr = { 472 .tag = { 473 .type = VIO_TYPE_DATA, 474 .stype = VIO_SUBTYPE_ACK, 475 .stype_env = VIO_DRING_DATA, 476 .sid = vio_send_sid(&port->vio), 477 }, 478 .dring_ident = dr->ident, 479 .start_idx = start, 480 .end_idx = end, 481 .state = vio_dring_state, 482 }; 483 int err, delay; 484 int retries = 0; 485 486 hdr.seq = dr->snd_nxt; 487 delay = 1; 488 do { 489 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 490 if (err > 0) { 491 dr->snd_nxt++; 492 break; 493 } 494 udelay(delay); 495 if ((delay <<= 1) > 128) 496 delay = 128; 497 if (retries++ > VNET_MAX_RETRIES) { 498 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", 499 port->raddr[0], port->raddr[1], 500 port->raddr[2], port->raddr[3], 501 port->raddr[4], port->raddr[5]); 502 break; 503 } 504 } while (err == -EAGAIN); 505 506 if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) { 507 port->stop_rx_idx = end; 508 port->stop_rx = true; 509 } else { 510 port->stop_rx_idx = 0; 511 port->stop_rx = false; 512 } 513 514 return err; 515 } 516 517 static struct vio_net_desc *get_rx_desc(struct vnet_port *port, 518 struct vio_dring_state *dr, 519 u32 index) 520 { 521 struct vio_net_desc *desc = port->vio.desc_buf; 522 int err; 523 524 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, 525 (index * dr->entry_size), 526 dr->cookies, dr->ncookies); 527 if (err < 0) 528 return ERR_PTR(err); 529 530 return desc; 531 } 532 533 static int put_rx_desc(struct vnet_port *port, 534 struct vio_dring_state *dr, 535 struct vio_net_desc *desc, 536 u32 index) 537 { 538 int err; 539 540 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, 541 (index * dr->entry_size), 542 dr->cookies, dr->ncookies); 543 if (err < 0) 544 return err; 545 546 return 0; 547 } 548 549 static int vnet_walk_rx_one(struct vnet_port *port, 550 struct vio_dring_state *dr, 551 u32 index, int *needs_ack) 552 { 553 struct vio_net_desc *desc = get_rx_desc(port, dr, index); 554 struct vio_driver_state *vio = &port->vio; 555 int err; 556 557 BUG_ON(!desc); 558 if (IS_ERR(desc)) 559 return PTR_ERR(desc); 560 561 if (desc->hdr.state != VIO_DESC_READY) 562 return 1; 563 564 dma_rmb(); 565 566 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", 567 desc->hdr.state, desc->hdr.ack, 568 desc->size, desc->ncookies, 569 desc->cookies[0].cookie_addr, 570 desc->cookies[0].cookie_size); 571 572 err = vnet_rx_one(port, desc); 573 if (err == -ECONNRESET) 574 return err; 575 trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid, 576 index, desc->hdr.ack); 577 desc->hdr.state = VIO_DESC_DONE; 578 err = put_rx_desc(port, dr, desc, index); 579 if (err < 0) 580 return err; 581 *needs_ack = desc->hdr.ack; 582 return 0; 583 } 584 585 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, 586 u32 start, u32 end, int *npkts, int budget) 587 { 588 struct vio_driver_state *vio = &port->vio; 589 int ack_start = -1, ack_end = -1; 590 bool send_ack = true; 591 592 end = (end == (u32)-1) ? vio_dring_prev(dr, start) 593 : vio_dring_next(dr, end); 594 595 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); 596 597 while (start != end) { 598 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); 599 600 if (err == -ECONNRESET) 601 return err; 602 if (err != 0) 603 break; 604 (*npkts)++; 605 if (ack_start == -1) 606 ack_start = start; 607 ack_end = start; 608 start = vio_dring_next(dr, start); 609 if (ack && start != end) { 610 err = vnet_send_ack(port, dr, ack_start, ack_end, 611 VIO_DRING_ACTIVE); 612 if (err == -ECONNRESET) 613 return err; 614 ack_start = -1; 615 } 616 if ((*npkts) >= budget) { 617 send_ack = false; 618 break; 619 } 620 } 621 if (unlikely(ack_start == -1)) { 622 ack_end = vio_dring_prev(dr, start); 623 ack_start = ack_end; 624 } 625 if (send_ack) { 626 port->napi_resume = false; 627 trace_vnet_tx_send_stopped_ack(port->vio._local_sid, 628 port->vio._peer_sid, 629 ack_end, *npkts); 630 return vnet_send_ack(port, dr, ack_start, ack_end, 631 VIO_DRING_STOPPED); 632 } else { 633 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid, 634 port->vio._peer_sid, 635 ack_end, *npkts); 636 port->napi_resume = true; 637 port->napi_stop_idx = ack_end; 638 return 1; 639 } 640 } 641 642 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts, 643 int budget) 644 { 645 struct vio_dring_data *pkt = msgbuf; 646 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; 647 struct vio_driver_state *vio = &port->vio; 648 649 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n", 650 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); 651 652 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 653 return 0; 654 if (unlikely(pkt->seq != dr->rcv_nxt)) { 655 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n", 656 pkt->seq, dr->rcv_nxt); 657 return 0; 658 } 659 660 if (!port->napi_resume) 661 dr->rcv_nxt++; 662 663 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ 664 665 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx, 666 npkts, budget); 667 } 668 669 static int idx_is_pending(struct vio_dring_state *dr, u32 end) 670 { 671 u32 idx = dr->cons; 672 int found = 0; 673 674 while (idx != dr->prod) { 675 if (idx == end) { 676 found = 1; 677 break; 678 } 679 idx = vio_dring_next(dr, idx); 680 } 681 return found; 682 } 683 684 static int vnet_ack(struct vnet_port *port, void *msgbuf) 685 { 686 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 687 struct vio_dring_data *pkt = msgbuf; 688 struct net_device *dev; 689 u32 end; 690 struct vio_net_desc *desc; 691 struct netdev_queue *txq; 692 693 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 694 return 0; 695 696 end = pkt->end_idx; 697 dev = VNET_PORT_TO_NET_DEVICE(port); 698 netif_tx_lock(dev); 699 if (unlikely(!idx_is_pending(dr, end))) { 700 netif_tx_unlock(dev); 701 return 0; 702 } 703 704 /* sync for race conditions with vnet_start_xmit() and tell xmit it 705 * is time to send a trigger. 706 */ 707 trace_vnet_rx_stopped_ack(port->vio._local_sid, 708 port->vio._peer_sid, end); 709 dr->cons = vio_dring_next(dr, end); 710 desc = vio_dring_entry(dr, dr->cons); 711 if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) { 712 /* vnet_start_xmit() just populated this dring but missed 713 * sending the "start" LDC message to the consumer. 714 * Send a "start" trigger on its behalf. 715 */ 716 if (__vnet_tx_trigger(port, dr->cons) > 0) 717 port->start_cons = false; 718 else 719 port->start_cons = true; 720 } else { 721 port->start_cons = true; 722 } 723 netif_tx_unlock(dev); 724 725 txq = netdev_get_tx_queue(dev, port->q_index); 726 if (unlikely(netif_tx_queue_stopped(txq) && 727 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) 728 return 1; 729 730 return 0; 731 } 732 733 static int vnet_nack(struct vnet_port *port, void *msgbuf) 734 { 735 /* XXX just reset or similar XXX */ 736 return 0; 737 } 738 739 static int handle_mcast(struct vnet_port *port, void *msgbuf) 740 { 741 struct vio_net_mcast_info *pkt = msgbuf; 742 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 743 744 if (pkt->tag.stype != VIO_SUBTYPE_ACK) 745 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n", 746 dev->name, 747 pkt->tag.type, 748 pkt->tag.stype, 749 pkt->tag.stype_env, 750 pkt->tag.sid); 751 752 return 0; 753 } 754 755 /* If the queue is stopped, wake it up so that we'll 756 * send out another START message at the next TX. 757 */ 758 static void maybe_tx_wakeup(struct vnet_port *port) 759 { 760 struct netdev_queue *txq; 761 762 txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 763 port->q_index); 764 __netif_tx_lock(txq, smp_processor_id()); 765 if (likely(netif_tx_queue_stopped(txq))) 766 netif_tx_wake_queue(txq); 767 __netif_tx_unlock(txq); 768 } 769 770 bool sunvnet_port_is_up_common(struct vnet_port *vnet) 771 { 772 struct vio_driver_state *vio = &vnet->vio; 773 774 return !!(vio->hs_state & VIO_HS_COMPLETE); 775 } 776 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common); 777 778 static int vnet_event_napi(struct vnet_port *port, int budget) 779 { 780 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 781 struct vio_driver_state *vio = &port->vio; 782 int tx_wakeup, err; 783 int npkts = 0; 784 785 /* we don't expect any other bits */ 786 BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY | 787 LDC_EVENT_RESET | 788 LDC_EVENT_UP)); 789 790 /* RESET takes precedent over any other event */ 791 if (port->rx_event & LDC_EVENT_RESET) { 792 /* a link went down */ 793 794 if (port->vsw == 1) { 795 netif_tx_stop_all_queues(dev); 796 netif_carrier_off(dev); 797 } 798 799 vio_link_state_change(vio, LDC_EVENT_RESET); 800 vnet_port_reset(port); 801 vio_port_up(vio); 802 803 /* If the device is running but its tx queue was 804 * stopped (due to flow control), restart it. 805 * This is necessary since vnet_port_reset() 806 * clears the tx drings and thus we may never get 807 * back a VIO_TYPE_DATA ACK packet - which is 808 * the normal mechanism to restart the tx queue. 809 */ 810 if (netif_running(dev)) 811 maybe_tx_wakeup(port); 812 813 port->rx_event = 0; 814 port->stats.event_reset++; 815 return 0; 816 } 817 818 if (port->rx_event & LDC_EVENT_UP) { 819 /* a link came up */ 820 821 if (port->vsw == 1) { 822 netif_carrier_on(port->dev); 823 netif_tx_start_all_queues(port->dev); 824 } 825 826 vio_link_state_change(vio, LDC_EVENT_UP); 827 port->rx_event = 0; 828 port->stats.event_up++; 829 return 0; 830 } 831 832 err = 0; 833 tx_wakeup = 0; 834 while (1) { 835 union { 836 struct vio_msg_tag tag; 837 u64 raw[8]; 838 } msgbuf; 839 840 if (port->napi_resume) { 841 struct vio_dring_data *pkt = 842 (struct vio_dring_data *)&msgbuf; 843 struct vio_dring_state *dr = 844 &port->vio.drings[VIO_DRIVER_RX_RING]; 845 846 pkt->tag.type = VIO_TYPE_DATA; 847 pkt->tag.stype = VIO_SUBTYPE_INFO; 848 pkt->tag.stype_env = VIO_DRING_DATA; 849 pkt->seq = dr->rcv_nxt; 850 pkt->start_idx = vio_dring_next(dr, 851 port->napi_stop_idx); 852 pkt->end_idx = -1; 853 } else { 854 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); 855 if (unlikely(err < 0)) { 856 if (err == -ECONNRESET) 857 vio_conn_reset(vio); 858 break; 859 } 860 if (err == 0) 861 break; 862 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", 863 msgbuf.tag.type, 864 msgbuf.tag.stype, 865 msgbuf.tag.stype_env, 866 msgbuf.tag.sid); 867 err = vio_validate_sid(vio, &msgbuf.tag); 868 if (err < 0) 869 break; 870 } 871 872 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { 873 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { 874 if (!sunvnet_port_is_up_common(port)) { 875 /* failures like handshake_failure() 876 * may have cleaned up dring, but 877 * NAPI polling may bring us here. 878 */ 879 err = -ECONNRESET; 880 break; 881 } 882 err = vnet_rx(port, &msgbuf, &npkts, budget); 883 if (npkts >= budget) 884 break; 885 if (npkts == 0) 886 break; 887 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { 888 err = vnet_ack(port, &msgbuf); 889 if (err > 0) 890 tx_wakeup |= err; 891 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { 892 err = vnet_nack(port, &msgbuf); 893 } 894 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { 895 if (msgbuf.tag.stype_env == VNET_MCAST_INFO) 896 err = handle_mcast(port, &msgbuf); 897 else 898 err = vio_control_pkt_engine(vio, &msgbuf); 899 if (err) 900 break; 901 } else { 902 err = vnet_handle_unknown(port, &msgbuf); 903 } 904 if (err == -ECONNRESET) 905 break; 906 } 907 if (unlikely(tx_wakeup && err != -ECONNRESET)) 908 maybe_tx_wakeup(port); 909 return npkts; 910 } 911 912 int sunvnet_poll_common(struct napi_struct *napi, int budget) 913 { 914 struct vnet_port *port = container_of(napi, struct vnet_port, napi); 915 struct vio_driver_state *vio = &port->vio; 916 int processed = vnet_event_napi(port, budget); 917 918 if (processed < budget) { 919 napi_complete_done(napi, processed); 920 port->rx_event &= ~LDC_EVENT_DATA_READY; 921 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED); 922 } 923 return processed; 924 } 925 EXPORT_SYMBOL_GPL(sunvnet_poll_common); 926 927 void sunvnet_event_common(void *arg, int event) 928 { 929 struct vnet_port *port = arg; 930 struct vio_driver_state *vio = &port->vio; 931 932 port->rx_event |= event; 933 vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED); 934 napi_schedule(&port->napi); 935 } 936 EXPORT_SYMBOL_GPL(sunvnet_event_common); 937 938 static int __vnet_tx_trigger(struct vnet_port *port, u32 start) 939 { 940 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 941 struct vio_dring_data hdr = { 942 .tag = { 943 .type = VIO_TYPE_DATA, 944 .stype = VIO_SUBTYPE_INFO, 945 .stype_env = VIO_DRING_DATA, 946 .sid = vio_send_sid(&port->vio), 947 }, 948 .dring_ident = dr->ident, 949 .start_idx = start, 950 .end_idx = (u32)-1, 951 }; 952 int err, delay; 953 int retries = 0; 954 955 if (port->stop_rx) { 956 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid, 957 port->vio._peer_sid, 958 port->stop_rx_idx, -1); 959 err = vnet_send_ack(port, 960 &port->vio.drings[VIO_DRIVER_RX_RING], 961 port->stop_rx_idx, -1, 962 VIO_DRING_STOPPED); 963 if (err <= 0) 964 return err; 965 } 966 967 hdr.seq = dr->snd_nxt; 968 delay = 1; 969 do { 970 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 971 if (err > 0) { 972 dr->snd_nxt++; 973 break; 974 } 975 udelay(delay); 976 if ((delay <<= 1) > 128) 977 delay = 128; 978 if (retries++ > VNET_MAX_RETRIES) 979 break; 980 } while (err == -EAGAIN); 981 trace_vnet_tx_trigger(port->vio._local_sid, 982 port->vio._peer_sid, start, err); 983 984 return err; 985 } 986 987 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port, 988 unsigned *pending) 989 { 990 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 991 struct sk_buff *skb = NULL; 992 int i, txi; 993 994 *pending = 0; 995 996 txi = dr->prod; 997 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 998 struct vio_net_desc *d; 999 1000 --txi; 1001 if (txi < 0) 1002 txi = VNET_TX_RING_SIZE - 1; 1003 1004 d = vio_dring_entry(dr, txi); 1005 1006 if (d->hdr.state == VIO_DESC_READY) { 1007 (*pending)++; 1008 continue; 1009 } 1010 if (port->tx_bufs[txi].skb) { 1011 if (d->hdr.state != VIO_DESC_DONE) 1012 pr_notice("invalid ring buffer state %d\n", 1013 d->hdr.state); 1014 BUG_ON(port->tx_bufs[txi].skb->next); 1015 1016 port->tx_bufs[txi].skb->next = skb; 1017 skb = port->tx_bufs[txi].skb; 1018 port->tx_bufs[txi].skb = NULL; 1019 1020 ldc_unmap(port->vio.lp, 1021 port->tx_bufs[txi].cookies, 1022 port->tx_bufs[txi].ncookies); 1023 } else if (d->hdr.state == VIO_DESC_FREE) { 1024 break; 1025 } 1026 d->hdr.state = VIO_DESC_FREE; 1027 } 1028 return skb; 1029 } 1030 1031 static inline void vnet_free_skbs(struct sk_buff *skb) 1032 { 1033 struct sk_buff *next; 1034 1035 while (skb) { 1036 next = skb->next; 1037 skb->next = NULL; 1038 dev_kfree_skb(skb); 1039 skb = next; 1040 } 1041 } 1042 1043 void sunvnet_clean_timer_expire_common(struct timer_list *t) 1044 { 1045 struct vnet_port *port = from_timer(port, t, clean_timer); 1046 struct sk_buff *freeskbs; 1047 unsigned pending; 1048 1049 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port)); 1050 freeskbs = vnet_clean_tx_ring(port, &pending); 1051 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port)); 1052 1053 vnet_free_skbs(freeskbs); 1054 1055 if (pending) 1056 (void)mod_timer(&port->clean_timer, 1057 jiffies + VNET_CLEAN_TIMEOUT); 1058 else 1059 del_timer(&port->clean_timer); 1060 } 1061 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common); 1062 1063 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, 1064 struct ldc_trans_cookie *cookies, int ncookies, 1065 unsigned int map_perm) 1066 { 1067 int i, nc, err, blen; 1068 1069 /* header */ 1070 blen = skb_headlen(skb); 1071 if (blen < ETH_ZLEN) 1072 blen = ETH_ZLEN; 1073 blen += VNET_PACKET_SKIP; 1074 blen += 8 - (blen & 7); 1075 1076 err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies, 1077 ncookies, map_perm); 1078 if (err < 0) 1079 return err; 1080 nc = err; 1081 1082 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1083 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1084 u8 *vaddr; 1085 1086 if (nc < ncookies) { 1087 vaddr = kmap_atomic(skb_frag_page(f)); 1088 blen = skb_frag_size(f); 1089 blen += 8 - (blen & 7); 1090 err = ldc_map_single(lp, vaddr + f->page_offset, 1091 blen, cookies + nc, ncookies - nc, 1092 map_perm); 1093 kunmap_atomic(vaddr); 1094 } else { 1095 err = -EMSGSIZE; 1096 } 1097 1098 if (err < 0) { 1099 ldc_unmap(lp, cookies, nc); 1100 return err; 1101 } 1102 nc += err; 1103 } 1104 return nc; 1105 } 1106 1107 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) 1108 { 1109 struct sk_buff *nskb; 1110 int i, len, pad, docopy; 1111 1112 len = skb->len; 1113 pad = 0; 1114 if (len < ETH_ZLEN) { 1115 pad += ETH_ZLEN - skb->len; 1116 len += pad; 1117 } 1118 len += VNET_PACKET_SKIP; 1119 pad += 8 - (len & 7); 1120 1121 /* make sure we have enough cookies and alignment in every frag */ 1122 docopy = skb_shinfo(skb)->nr_frags >= ncookies; 1123 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1124 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1125 1126 docopy |= f->page_offset & 7; 1127 } 1128 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || 1129 skb_tailroom(skb) < pad || 1130 skb_headroom(skb) < VNET_PACKET_SKIP || docopy) { 1131 int start = 0, offset; 1132 __wsum csum; 1133 1134 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN; 1135 nskb = alloc_and_align_skb(skb->dev, len); 1136 if (!nskb) { 1137 dev_kfree_skb(skb); 1138 return NULL; 1139 } 1140 skb_reserve(nskb, VNET_PACKET_SKIP); 1141 1142 nskb->protocol = skb->protocol; 1143 offset = skb_mac_header(skb) - skb->data; 1144 skb_set_mac_header(nskb, offset); 1145 offset = skb_network_header(skb) - skb->data; 1146 skb_set_network_header(nskb, offset); 1147 offset = skb_transport_header(skb) - skb->data; 1148 skb_set_transport_header(nskb, offset); 1149 1150 offset = 0; 1151 nskb->csum_offset = skb->csum_offset; 1152 nskb->ip_summed = skb->ip_summed; 1153 1154 if (skb->ip_summed == CHECKSUM_PARTIAL) 1155 start = skb_checksum_start_offset(skb); 1156 if (start) { 1157 int offset = start + nskb->csum_offset; 1158 1159 /* copy the headers, no csum here */ 1160 if (skb_copy_bits(skb, 0, nskb->data, start)) { 1161 dev_kfree_skb(nskb); 1162 dev_kfree_skb(skb); 1163 return NULL; 1164 } 1165 1166 /* copy the rest, with csum calculation */ 1167 *(__sum16 *)(skb->data + offset) = 0; 1168 csum = skb_copy_and_csum_bits(skb, start, 1169 nskb->data + start, 1170 skb->len - start, 0); 1171 1172 /* add in the header checksums */ 1173 if (skb->protocol == htons(ETH_P_IP)) { 1174 struct iphdr *iph = ip_hdr(nskb); 1175 1176 if (iph->protocol == IPPROTO_TCP || 1177 iph->protocol == IPPROTO_UDP) { 1178 csum = csum_tcpudp_magic(iph->saddr, 1179 iph->daddr, 1180 skb->len - start, 1181 iph->protocol, 1182 csum); 1183 } 1184 } else if (skb->protocol == htons(ETH_P_IPV6)) { 1185 struct ipv6hdr *ip6h = ipv6_hdr(nskb); 1186 1187 if (ip6h->nexthdr == IPPROTO_TCP || 1188 ip6h->nexthdr == IPPROTO_UDP) { 1189 csum = csum_ipv6_magic(&ip6h->saddr, 1190 &ip6h->daddr, 1191 skb->len - start, 1192 ip6h->nexthdr, 1193 csum); 1194 } 1195 } 1196 1197 /* save the final result */ 1198 *(__sum16 *)(nskb->data + offset) = csum; 1199 1200 nskb->ip_summed = CHECKSUM_NONE; 1201 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { 1202 dev_kfree_skb(nskb); 1203 dev_kfree_skb(skb); 1204 return NULL; 1205 } 1206 (void)skb_put(nskb, skb->len); 1207 if (skb_is_gso(skb)) { 1208 skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; 1209 skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; 1210 } 1211 nskb->queue_mapping = skb->queue_mapping; 1212 dev_kfree_skb(skb); 1213 skb = nskb; 1214 } 1215 return skb; 1216 } 1217 1218 static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, 1219 struct vnet_port *(*vnet_tx_port) 1220 (struct sk_buff *, struct net_device *)) 1221 { 1222 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 1223 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1224 struct sk_buff *segs; 1225 int maclen, datalen; 1226 int status; 1227 int gso_size, gso_type, gso_segs; 1228 int hlen = skb_transport_header(skb) - skb_mac_header(skb); 1229 int proto = IPPROTO_IP; 1230 1231 if (skb->protocol == htons(ETH_P_IP)) 1232 proto = ip_hdr(skb)->protocol; 1233 else if (skb->protocol == htons(ETH_P_IPV6)) 1234 proto = ipv6_hdr(skb)->nexthdr; 1235 1236 if (proto == IPPROTO_TCP) { 1237 hlen += tcp_hdr(skb)->doff * 4; 1238 } else if (proto == IPPROTO_UDP) { 1239 hlen += sizeof(struct udphdr); 1240 } else { 1241 pr_err("vnet_handle_offloads GSO with unknown transport " 1242 "protocol %d tproto %d\n", skb->protocol, proto); 1243 hlen = 128; /* XXX */ 1244 } 1245 datalen = port->tsolen - hlen; 1246 1247 gso_size = skb_shinfo(skb)->gso_size; 1248 gso_type = skb_shinfo(skb)->gso_type; 1249 gso_segs = skb_shinfo(skb)->gso_segs; 1250 1251 if (port->tso && gso_size < datalen) 1252 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen); 1253 1254 if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) { 1255 struct netdev_queue *txq; 1256 1257 txq = netdev_get_tx_queue(dev, port->q_index); 1258 netif_tx_stop_queue(txq); 1259 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs) 1260 return NETDEV_TX_BUSY; 1261 netif_tx_wake_queue(txq); 1262 } 1263 1264 maclen = skb_network_header(skb) - skb_mac_header(skb); 1265 skb_pull(skb, maclen); 1266 1267 if (port->tso && gso_size < datalen) { 1268 if (skb_unclone(skb, GFP_ATOMIC)) 1269 goto out_dropped; 1270 1271 /* segment to TSO size */ 1272 skb_shinfo(skb)->gso_size = datalen; 1273 skb_shinfo(skb)->gso_segs = gso_segs; 1274 } 1275 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); 1276 if (IS_ERR(segs)) 1277 goto out_dropped; 1278 1279 skb_push(skb, maclen); 1280 skb_reset_mac_header(skb); 1281 1282 status = 0; 1283 while (segs) { 1284 struct sk_buff *curr = segs; 1285 1286 segs = segs->next; 1287 curr->next = NULL; 1288 if (port->tso && curr->len > dev->mtu) { 1289 skb_shinfo(curr)->gso_size = gso_size; 1290 skb_shinfo(curr)->gso_type = gso_type; 1291 skb_shinfo(curr)->gso_segs = 1292 DIV_ROUND_UP(curr->len - hlen, gso_size); 1293 } else { 1294 skb_shinfo(curr)->gso_size = 0; 1295 } 1296 1297 skb_push(curr, maclen); 1298 skb_reset_mac_header(curr); 1299 memcpy(skb_mac_header(curr), skb_mac_header(skb), 1300 maclen); 1301 curr->csum_start = skb_transport_header(curr) - curr->head; 1302 if (ip_hdr(curr)->protocol == IPPROTO_TCP) 1303 curr->csum_offset = offsetof(struct tcphdr, check); 1304 else if (ip_hdr(curr)->protocol == IPPROTO_UDP) 1305 curr->csum_offset = offsetof(struct udphdr, check); 1306 1307 if (!(status & NETDEV_TX_MASK)) 1308 status = sunvnet_start_xmit_common(curr, dev, 1309 vnet_tx_port); 1310 if (status & NETDEV_TX_MASK) 1311 dev_kfree_skb_any(curr); 1312 } 1313 1314 if (!(status & NETDEV_TX_MASK)) 1315 dev_kfree_skb_any(skb); 1316 return status; 1317 out_dropped: 1318 dev->stats.tx_dropped++; 1319 dev_kfree_skb_any(skb); 1320 return NETDEV_TX_OK; 1321 } 1322 1323 int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, 1324 struct vnet_port *(*vnet_tx_port) 1325 (struct sk_buff *, struct net_device *)) 1326 { 1327 struct vnet_port *port = NULL; 1328 struct vio_dring_state *dr; 1329 struct vio_net_desc *d; 1330 unsigned int len; 1331 struct sk_buff *freeskbs = NULL; 1332 int i, err, txi; 1333 unsigned pending = 0; 1334 struct netdev_queue *txq; 1335 1336 rcu_read_lock(); 1337 port = vnet_tx_port(skb, dev); 1338 if (unlikely(!port)) 1339 goto out_dropped; 1340 1341 if (skb_is_gso(skb) && skb->len > port->tsolen) { 1342 err = vnet_handle_offloads(port, skb, vnet_tx_port); 1343 rcu_read_unlock(); 1344 return err; 1345 } 1346 1347 if (!skb_is_gso(skb) && skb->len > port->rmtu) { 1348 unsigned long localmtu = port->rmtu - ETH_HLEN; 1349 1350 if (vio_version_after_eq(&port->vio, 1, 3)) 1351 localmtu -= VLAN_HLEN; 1352 1353 if (skb->protocol == htons(ETH_P_IP)) { 1354 struct flowi4 fl4; 1355 struct rtable *rt = NULL; 1356 1357 memset(&fl4, 0, sizeof(fl4)); 1358 fl4.flowi4_oif = dev->ifindex; 1359 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 1360 fl4.daddr = ip_hdr(skb)->daddr; 1361 fl4.saddr = ip_hdr(skb)->saddr; 1362 1363 rt = ip_route_output_key(dev_net(dev), &fl4); 1364 if (!IS_ERR(rt)) { 1365 skb_dst_set(skb, &rt->dst); 1366 icmp_send(skb, ICMP_DEST_UNREACH, 1367 ICMP_FRAG_NEEDED, 1368 htonl(localmtu)); 1369 } 1370 } 1371 #if IS_ENABLED(CONFIG_IPV6) 1372 else if (skb->protocol == htons(ETH_P_IPV6)) 1373 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); 1374 #endif 1375 goto out_dropped; 1376 } 1377 1378 skb = vnet_skb_shape(skb, 2); 1379 1380 if (unlikely(!skb)) 1381 goto out_dropped; 1382 1383 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1384 if (skb->protocol == htons(ETH_P_IP)) 1385 vnet_fullcsum_ipv4(skb); 1386 #if IS_ENABLED(CONFIG_IPV6) 1387 else if (skb->protocol == htons(ETH_P_IPV6)) 1388 vnet_fullcsum_ipv6(skb); 1389 #endif 1390 } 1391 1392 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1393 i = skb_get_queue_mapping(skb); 1394 txq = netdev_get_tx_queue(dev, i); 1395 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1396 if (!netif_tx_queue_stopped(txq)) { 1397 netif_tx_stop_queue(txq); 1398 1399 /* This is a hard error, log it. */ 1400 netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); 1401 dev->stats.tx_errors++; 1402 } 1403 rcu_read_unlock(); 1404 return NETDEV_TX_BUSY; 1405 } 1406 1407 d = vio_dring_cur(dr); 1408 1409 txi = dr->prod; 1410 1411 freeskbs = vnet_clean_tx_ring(port, &pending); 1412 1413 BUG_ON(port->tx_bufs[txi].skb); 1414 1415 len = skb->len; 1416 if (len < ETH_ZLEN) 1417 len = ETH_ZLEN; 1418 1419 err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2, 1420 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); 1421 if (err < 0) { 1422 netdev_info(dev, "tx buffer map error %d\n", err); 1423 goto out_dropped; 1424 } 1425 1426 port->tx_bufs[txi].skb = skb; 1427 skb = NULL; 1428 port->tx_bufs[txi].ncookies = err; 1429 1430 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 1431 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 1432 * the protocol itself does not require it as long as the peer 1433 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. 1434 * 1435 * An ACK for every packet in the ring is expensive as the 1436 * sending of LDC messages is slow and affects performance. 1437 */ 1438 d->hdr.ack = VIO_ACK_DISABLE; 1439 d->size = len; 1440 d->ncookies = port->tx_bufs[txi].ncookies; 1441 for (i = 0; i < d->ncookies; i++) 1442 d->cookies[i] = port->tx_bufs[txi].cookies[i]; 1443 if (vio_version_after_eq(&port->vio, 1, 7)) { 1444 struct vio_net_dext *dext = vio_net_ext(d); 1445 1446 memset(dext, 0, sizeof(*dext)); 1447 if (skb_is_gso(port->tx_bufs[txi].skb)) { 1448 dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb) 1449 ->gso_size; 1450 dext->flags |= VNET_PKT_IPV4_LSO; 1451 } 1452 if (vio_version_after_eq(&port->vio, 1, 8) && 1453 !port->switch_port) { 1454 dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK; 1455 dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK; 1456 } 1457 } 1458 1459 /* This has to be a non-SMP write barrier because we are writing 1460 * to memory which is shared with the peer LDOM. 1461 */ 1462 dma_wmb(); 1463 1464 d->hdr.state = VIO_DESC_READY; 1465 1466 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent 1467 * to notify the consumer that some descriptors are READY. 1468 * After that "start" trigger, no additional triggers are needed until 1469 * a DRING_STOPPED is received from the consumer. The dr->cons field 1470 * (set up by vnet_ack()) has the value of the next dring index 1471 * that has not yet been ack-ed. We send a "start" trigger here 1472 * if, and only if, start_cons is true (reset it afterward). Conversely, 1473 * vnet_ack() should check if the dring corresponding to cons 1474 * is marked READY, but start_cons was false. 1475 * If so, vnet_ack() should send out the missed "start" trigger. 1476 * 1477 * Note that the dma_wmb() above makes sure the cookies et al. are 1478 * not globally visible before the VIO_DESC_READY, and that the 1479 * stores are ordered correctly by the compiler. The consumer will 1480 * not proceed until the VIO_DESC_READY is visible assuring that 1481 * the consumer does not observe anything related to descriptors 1482 * out of order. The HV trap from the LDC start trigger is the 1483 * producer to consumer announcement that work is available to the 1484 * consumer 1485 */ 1486 if (!port->start_cons) { /* previous trigger suffices */ 1487 trace_vnet_skip_tx_trigger(port->vio._local_sid, 1488 port->vio._peer_sid, dr->cons); 1489 goto ldc_start_done; 1490 } 1491 1492 err = __vnet_tx_trigger(port, dr->cons); 1493 if (unlikely(err < 0)) { 1494 netdev_info(dev, "TX trigger error %d\n", err); 1495 d->hdr.state = VIO_DESC_FREE; 1496 skb = port->tx_bufs[txi].skb; 1497 port->tx_bufs[txi].skb = NULL; 1498 dev->stats.tx_carrier_errors++; 1499 goto out_dropped; 1500 } 1501 1502 ldc_start_done: 1503 port->start_cons = false; 1504 1505 dev->stats.tx_packets++; 1506 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1507 port->stats.tx_packets++; 1508 port->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1509 1510 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1511 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1512 netif_tx_stop_queue(txq); 1513 smp_rmb(); 1514 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) 1515 netif_tx_wake_queue(txq); 1516 } 1517 1518 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); 1519 rcu_read_unlock(); 1520 1521 vnet_free_skbs(freeskbs); 1522 1523 return NETDEV_TX_OK; 1524 1525 out_dropped: 1526 if (pending) 1527 (void)mod_timer(&port->clean_timer, 1528 jiffies + VNET_CLEAN_TIMEOUT); 1529 else if (port) 1530 del_timer(&port->clean_timer); 1531 rcu_read_unlock(); 1532 if (skb) 1533 dev_kfree_skb(skb); 1534 vnet_free_skbs(freeskbs); 1535 dev->stats.tx_dropped++; 1536 return NETDEV_TX_OK; 1537 } 1538 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); 1539 1540 void sunvnet_tx_timeout_common(struct net_device *dev) 1541 { 1542 /* XXX Implement me XXX */ 1543 } 1544 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common); 1545 1546 int sunvnet_open_common(struct net_device *dev) 1547 { 1548 netif_carrier_on(dev); 1549 netif_tx_start_all_queues(dev); 1550 1551 return 0; 1552 } 1553 EXPORT_SYMBOL_GPL(sunvnet_open_common); 1554 1555 int sunvnet_close_common(struct net_device *dev) 1556 { 1557 netif_tx_stop_all_queues(dev); 1558 netif_carrier_off(dev); 1559 1560 return 0; 1561 } 1562 EXPORT_SYMBOL_GPL(sunvnet_close_common); 1563 1564 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr) 1565 { 1566 struct vnet_mcast_entry *m; 1567 1568 for (m = vp->mcast_list; m; m = m->next) { 1569 if (ether_addr_equal(m->addr, addr)) 1570 return m; 1571 } 1572 return NULL; 1573 } 1574 1575 static void __update_mc_list(struct vnet *vp, struct net_device *dev) 1576 { 1577 struct netdev_hw_addr *ha; 1578 1579 netdev_for_each_mc_addr(ha, dev) { 1580 struct vnet_mcast_entry *m; 1581 1582 m = __vnet_mc_find(vp, ha->addr); 1583 if (m) { 1584 m->hit = 1; 1585 continue; 1586 } 1587 1588 if (!m) { 1589 m = kzalloc(sizeof(*m), GFP_ATOMIC); 1590 if (!m) 1591 continue; 1592 memcpy(m->addr, ha->addr, ETH_ALEN); 1593 m->hit = 1; 1594 1595 m->next = vp->mcast_list; 1596 vp->mcast_list = m; 1597 } 1598 } 1599 } 1600 1601 static void __send_mc_list(struct vnet *vp, struct vnet_port *port) 1602 { 1603 struct vio_net_mcast_info info; 1604 struct vnet_mcast_entry *m, **pp; 1605 int n_addrs; 1606 1607 memset(&info, 0, sizeof(info)); 1608 1609 info.tag.type = VIO_TYPE_CTRL; 1610 info.tag.stype = VIO_SUBTYPE_INFO; 1611 info.tag.stype_env = VNET_MCAST_INFO; 1612 info.tag.sid = vio_send_sid(&port->vio); 1613 info.set = 1; 1614 1615 n_addrs = 0; 1616 for (m = vp->mcast_list; m; m = m->next) { 1617 if (m->sent) 1618 continue; 1619 m->sent = 1; 1620 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1621 m->addr, ETH_ALEN); 1622 if (++n_addrs == VNET_NUM_MCAST) { 1623 info.count = n_addrs; 1624 1625 (void)vio_ldc_send(&port->vio, &info, 1626 sizeof(info)); 1627 n_addrs = 0; 1628 } 1629 } 1630 if (n_addrs) { 1631 info.count = n_addrs; 1632 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1633 } 1634 1635 info.set = 0; 1636 1637 n_addrs = 0; 1638 pp = &vp->mcast_list; 1639 while ((m = *pp) != NULL) { 1640 if (m->hit) { 1641 m->hit = 0; 1642 pp = &m->next; 1643 continue; 1644 } 1645 1646 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1647 m->addr, ETH_ALEN); 1648 if (++n_addrs == VNET_NUM_MCAST) { 1649 info.count = n_addrs; 1650 (void)vio_ldc_send(&port->vio, &info, 1651 sizeof(info)); 1652 n_addrs = 0; 1653 } 1654 1655 *pp = m->next; 1656 kfree(m); 1657 } 1658 if (n_addrs) { 1659 info.count = n_addrs; 1660 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1661 } 1662 } 1663 1664 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp) 1665 { 1666 struct vnet_port *port; 1667 1668 rcu_read_lock(); 1669 list_for_each_entry_rcu(port, &vp->port_list, list) { 1670 if (port->switch_port) { 1671 __update_mc_list(vp, dev); 1672 __send_mc_list(vp, port); 1673 break; 1674 } 1675 } 1676 rcu_read_unlock(); 1677 } 1678 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common); 1679 1680 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p) 1681 { 1682 return -EINVAL; 1683 } 1684 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common); 1685 1686 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port) 1687 { 1688 struct vio_dring_state *dr; 1689 int i; 1690 1691 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1692 1693 if (!dr->base) 1694 return; 1695 1696 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1697 struct vio_net_desc *d; 1698 void *skb = port->tx_bufs[i].skb; 1699 1700 if (!skb) 1701 continue; 1702 1703 d = vio_dring_entry(dr, i); 1704 1705 ldc_unmap(port->vio.lp, 1706 port->tx_bufs[i].cookies, 1707 port->tx_bufs[i].ncookies); 1708 dev_kfree_skb(skb); 1709 port->tx_bufs[i].skb = NULL; 1710 d->hdr.state = VIO_DESC_FREE; 1711 } 1712 ldc_free_exp_dring(port->vio.lp, dr->base, 1713 (dr->entry_size * dr->num_entries), 1714 dr->cookies, dr->ncookies); 1715 dr->base = NULL; 1716 dr->entry_size = 0; 1717 dr->num_entries = 0; 1718 dr->pending = 0; 1719 dr->ncookies = 0; 1720 } 1721 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); 1722 1723 void vnet_port_reset(struct vnet_port *port) 1724 { 1725 del_timer(&port->clean_timer); 1726 sunvnet_port_free_tx_bufs_common(port); 1727 port->rmtu = 0; 1728 port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ 1729 port->tsolen = 0; 1730 } 1731 EXPORT_SYMBOL_GPL(vnet_port_reset); 1732 1733 static int vnet_port_alloc_tx_ring(struct vnet_port *port) 1734 { 1735 struct vio_dring_state *dr; 1736 unsigned long len, elen; 1737 int i, err, ncookies; 1738 void *dring; 1739 1740 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1741 1742 elen = sizeof(struct vio_net_desc) + 1743 sizeof(struct ldc_trans_cookie) * 2; 1744 if (vio_version_after_eq(&port->vio, 1, 7)) 1745 elen += sizeof(struct vio_net_dext); 1746 len = VNET_TX_RING_SIZE * elen; 1747 1748 ncookies = VIO_MAX_RING_COOKIES; 1749 dring = ldc_alloc_exp_dring(port->vio.lp, len, 1750 dr->cookies, &ncookies, 1751 (LDC_MAP_SHADOW | 1752 LDC_MAP_DIRECT | 1753 LDC_MAP_RW)); 1754 if (IS_ERR(dring)) { 1755 err = PTR_ERR(dring); 1756 goto err_out; 1757 } 1758 1759 dr->base = dring; 1760 dr->entry_size = elen; 1761 dr->num_entries = VNET_TX_RING_SIZE; 1762 dr->prod = 0; 1763 dr->cons = 0; 1764 port->start_cons = true; /* need an initial trigger */ 1765 dr->pending = VNET_TX_RING_SIZE; 1766 dr->ncookies = ncookies; 1767 1768 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1769 struct vio_net_desc *d; 1770 1771 d = vio_dring_entry(dr, i); 1772 d->hdr.state = VIO_DESC_FREE; 1773 } 1774 return 0; 1775 1776 err_out: 1777 sunvnet_port_free_tx_bufs_common(port); 1778 1779 return err; 1780 } 1781 1782 #ifdef CONFIG_NET_POLL_CONTROLLER 1783 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp) 1784 { 1785 struct vnet_port *port; 1786 unsigned long flags; 1787 1788 spin_lock_irqsave(&vp->lock, flags); 1789 if (!list_empty(&vp->port_list)) { 1790 port = list_entry(vp->port_list.next, struct vnet_port, list); 1791 napi_schedule(&port->napi); 1792 } 1793 spin_unlock_irqrestore(&vp->lock, flags); 1794 } 1795 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common); 1796 #endif 1797 1798 void sunvnet_port_add_txq_common(struct vnet_port *port) 1799 { 1800 struct vnet *vp = port->vp; 1801 int smallest = 0; 1802 int i; 1803 1804 /* find the first least-used q 1805 * When there are more ldoms than q's, we start to 1806 * double up on ports per queue. 1807 */ 1808 for (i = 0; i < VNET_MAX_TXQS; i++) { 1809 if (vp->q_used[i] == 0) { 1810 smallest = i; 1811 break; 1812 } 1813 if (vp->q_used[i] < vp->q_used[smallest]) 1814 smallest = i; 1815 } 1816 1817 vp->nports++; 1818 vp->q_used[smallest]++; 1819 port->q_index = smallest; 1820 } 1821 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common); 1822 1823 void sunvnet_port_rm_txq_common(struct vnet_port *port) 1824 { 1825 port->vp->nports--; 1826 port->vp->q_used[port->q_index]--; 1827 port->q_index = 0; 1828 } 1829 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common); 1830