1 // SPDX-License-Identifier: GPL-2.0 2 /* sunvnet.c: Sun LDOM Virtual Network Driver. 3 * 4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 5 * Copyright (C) 2016-2017 Oracle. All rights reserved. 6 */ 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/delay.h> 13 #include <linux/init.h> 14 #include <linux/netdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/etherdevice.h> 17 #include <linux/mutex.h> 18 #include <linux/highmem.h> 19 #include <linux/if_vlan.h> 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/sunvnet.h> 22 23 #if IS_ENABLED(CONFIG_IPV6) 24 #include <linux/icmpv6.h> 25 #endif 26 27 #include <net/ip.h> 28 #include <net/gso.h> 29 #include <net/icmp.h> 30 #include <net/route.h> 31 32 #include <asm/vio.h> 33 #include <asm/ldc.h> 34 35 #include "sunvnet_common.h" 36 37 /* Heuristic for the number of times to exponentially backoff and 38 * retry sending an LDC trigger when EAGAIN is encountered 39 */ 40 #define VNET_MAX_RETRIES 10 41 42 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); 43 MODULE_DESCRIPTION("Sun LDOM virtual network support library"); 44 MODULE_LICENSE("GPL"); 45 MODULE_VERSION("1.1"); 46 47 static int __vnet_tx_trigger(struct vnet_port *port, u32 start); 48 49 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) 50 { 51 return vio_dring_avail(dr, VNET_TX_RING_SIZE); 52 } 53 54 static int vnet_handle_unknown(struct vnet_port *port, void *arg) 55 { 56 struct vio_msg_tag *pkt = arg; 57 58 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n", 59 pkt->type, pkt->stype, pkt->stype_env, pkt->sid); 60 pr_err("Resetting connection\n"); 61 62 ldc_disconnect(port->vio.lp); 63 64 return -ECONNRESET; 65 } 66 67 static int vnet_port_alloc_tx_ring(struct vnet_port *port); 68 69 int sunvnet_send_attr_common(struct vio_driver_state *vio) 70 { 71 struct vnet_port *port = to_vnet_port(vio); 72 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 73 struct vio_net_attr_info pkt; 74 int framelen = ETH_FRAME_LEN; 75 int i, err; 76 77 err = vnet_port_alloc_tx_ring(to_vnet_port(vio)); 78 if (err) 79 return err; 80 81 memset(&pkt, 0, sizeof(pkt)); 82 pkt.tag.type = VIO_TYPE_CTRL; 83 pkt.tag.stype = VIO_SUBTYPE_INFO; 84 pkt.tag.stype_env = VIO_ATTR_INFO; 85 pkt.tag.sid = vio_send_sid(vio); 86 if (vio_version_before(vio, 1, 2)) 87 pkt.xfer_mode = VIO_DRING_MODE; 88 else 89 pkt.xfer_mode = VIO_NEW_DRING_MODE; 90 pkt.addr_type = VNET_ADDR_ETHERMAC; 91 pkt.ack_freq = 0; 92 for (i = 0; i < 6; i++) 93 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); 94 if (vio_version_after(vio, 1, 3)) { 95 if (port->rmtu) { 96 port->rmtu = min(VNET_MAXPACKET, port->rmtu); 97 pkt.mtu = port->rmtu; 98 } else { 99 port->rmtu = VNET_MAXPACKET; 100 pkt.mtu = port->rmtu; 101 } 102 if (vio_version_after_eq(vio, 1, 6)) 103 pkt.options = VIO_TX_DRING; 104 } else if (vio_version_before(vio, 1, 3)) { 105 pkt.mtu = framelen; 106 } else { /* v1.3 */ 107 pkt.mtu = framelen + VLAN_HLEN; 108 } 109 110 pkt.cflags = 0; 111 if (vio_version_after_eq(vio, 1, 7) && port->tso) { 112 pkt.cflags |= VNET_LSO_IPV4_CAPAB; 113 if (!port->tsolen) 114 port->tsolen = VNET_MAXTSO; 115 pkt.ipv4_lso_maxlen = port->tsolen; 116 } 117 118 pkt.plnk_updt = PHYSLINK_UPDATE_NONE; 119 120 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 121 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 122 "cflags[0x%04x] lso_max[%u]\n", 123 pkt.xfer_mode, pkt.addr_type, 124 (unsigned long long)pkt.addr, 125 pkt.ack_freq, pkt.plnk_updt, pkt.options, 126 (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); 127 128 return vio_ldc_send(vio, &pkt, sizeof(pkt)); 129 } 130 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common); 131 132 static int handle_attr_info(struct vio_driver_state *vio, 133 struct vio_net_attr_info *pkt) 134 { 135 struct vnet_port *port = to_vnet_port(vio); 136 u64 localmtu; 137 u8 xfer_mode; 138 139 viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 140 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 141 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 142 pkt->xfer_mode, pkt->addr_type, 143 (unsigned long long)pkt->addr, 144 pkt->ack_freq, pkt->plnk_updt, pkt->options, 145 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 146 pkt->ipv4_lso_maxlen); 147 148 pkt->tag.sid = vio_send_sid(vio); 149 150 xfer_mode = pkt->xfer_mode; 151 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ 152 if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) 153 xfer_mode = VIO_NEW_DRING_MODE; 154 155 /* MTU negotiation: 156 * < v1.3 - ETH_FRAME_LEN exactly 157 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change 158 * pkt->mtu for ACK 159 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly 160 */ 161 if (vio_version_before(vio, 1, 3)) { 162 localmtu = ETH_FRAME_LEN; 163 } else if (vio_version_after(vio, 1, 3)) { 164 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; 165 localmtu = min(pkt->mtu, localmtu); 166 pkt->mtu = localmtu; 167 } else { /* v1.3 */ 168 localmtu = ETH_FRAME_LEN + VLAN_HLEN; 169 } 170 port->rmtu = localmtu; 171 172 /* LSO negotiation */ 173 if (vio_version_after_eq(vio, 1, 7)) 174 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB); 175 else 176 port->tso = false; 177 if (port->tso) { 178 if (!port->tsolen) 179 port->tsolen = VNET_MAXTSO; 180 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen); 181 if (port->tsolen < VNET_MINTSO) { 182 port->tso = false; 183 port->tsolen = 0; 184 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 185 } 186 pkt->ipv4_lso_maxlen = port->tsolen; 187 } else { 188 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 189 pkt->ipv4_lso_maxlen = 0; 190 port->tsolen = 0; 191 } 192 193 /* for version >= 1.6, ACK packet mode we support */ 194 if (vio_version_after_eq(vio, 1, 6)) { 195 pkt->xfer_mode = VIO_NEW_DRING_MODE; 196 pkt->options = VIO_TX_DRING; 197 } 198 199 if (!(xfer_mode | VIO_NEW_DRING_MODE) || 200 pkt->addr_type != VNET_ADDR_ETHERMAC || 201 pkt->mtu != localmtu) { 202 viodbg(HS, "SEND NET ATTR NACK\n"); 203 204 pkt->tag.stype = VIO_SUBTYPE_NACK; 205 206 (void)vio_ldc_send(vio, pkt, sizeof(*pkt)); 207 208 return -ECONNRESET; 209 } 210 211 viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " 212 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " 213 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 214 pkt->xfer_mode, pkt->addr_type, 215 (unsigned long long)pkt->addr, 216 pkt->ack_freq, pkt->plnk_updt, pkt->options, 217 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 218 pkt->ipv4_lso_maxlen); 219 220 pkt->tag.stype = VIO_SUBTYPE_ACK; 221 222 return vio_ldc_send(vio, pkt, sizeof(*pkt)); 223 } 224 225 static int handle_attr_ack(struct vio_driver_state *vio, 226 struct vio_net_attr_info *pkt) 227 { 228 viodbg(HS, "GOT NET ATTR ACK\n"); 229 230 return 0; 231 } 232 233 static int handle_attr_nack(struct vio_driver_state *vio, 234 struct vio_net_attr_info *pkt) 235 { 236 viodbg(HS, "GOT NET ATTR NACK\n"); 237 238 return -ECONNRESET; 239 } 240 241 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg) 242 { 243 struct vio_net_attr_info *pkt = arg; 244 245 switch (pkt->tag.stype) { 246 case VIO_SUBTYPE_INFO: 247 return handle_attr_info(vio, pkt); 248 249 case VIO_SUBTYPE_ACK: 250 return handle_attr_ack(vio, pkt); 251 252 case VIO_SUBTYPE_NACK: 253 return handle_attr_nack(vio, pkt); 254 255 default: 256 return -ECONNRESET; 257 } 258 } 259 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common); 260 261 void sunvnet_handshake_complete_common(struct vio_driver_state *vio) 262 { 263 struct vio_dring_state *dr; 264 265 dr = &vio->drings[VIO_DRIVER_RX_RING]; 266 dr->rcv_nxt = 1; 267 dr->snd_nxt = 1; 268 269 dr = &vio->drings[VIO_DRIVER_TX_RING]; 270 dr->rcv_nxt = 1; 271 dr->snd_nxt = 1; 272 } 273 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common); 274 275 /* The hypervisor interface that implements copying to/from imported 276 * memory from another domain requires that copies are done to 8-byte 277 * aligned buffers, and that the lengths of such copies are also 8-byte 278 * multiples. 279 * 280 * So we align skb->data to an 8-byte multiple and pad-out the data 281 * area so we can round the copy length up to the next multiple of 282 * 8 for the copy. 283 * 284 * The transmitter puts the actual start of the packet 6 bytes into 285 * the buffer it sends over, so that the IP headers after the ethernet 286 * header are aligned properly. These 6 bytes are not in the descriptor 287 * length, they are simply implied. This offset is represented using 288 * the VNET_PACKET_SKIP macro. 289 */ 290 static struct sk_buff *alloc_and_align_skb(struct net_device *dev, 291 unsigned int len) 292 { 293 struct sk_buff *skb; 294 unsigned long addr, off; 295 296 skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8); 297 if (unlikely(!skb)) 298 return NULL; 299 300 addr = (unsigned long)skb->data; 301 off = ((addr + 7UL) & ~7UL) - addr; 302 if (off) 303 skb_reserve(skb, off); 304 305 return skb; 306 } 307 308 static inline void vnet_fullcsum_ipv4(struct sk_buff *skb) 309 { 310 struct iphdr *iph = ip_hdr(skb); 311 int offset = skb_transport_offset(skb); 312 313 if (skb->protocol != htons(ETH_P_IP)) 314 return; 315 if (iph->protocol != IPPROTO_TCP && 316 iph->protocol != IPPROTO_UDP) 317 return; 318 skb->ip_summed = CHECKSUM_NONE; 319 skb->csum_level = 1; 320 skb->csum = 0; 321 if (iph->protocol == IPPROTO_TCP) { 322 struct tcphdr *ptcp = tcp_hdr(skb); 323 324 ptcp->check = 0; 325 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 326 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 327 skb->len - offset, IPPROTO_TCP, 328 skb->csum); 329 } else if (iph->protocol == IPPROTO_UDP) { 330 struct udphdr *pudp = udp_hdr(skb); 331 332 pudp->check = 0; 333 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 334 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 335 skb->len - offset, IPPROTO_UDP, 336 skb->csum); 337 } 338 } 339 340 #if IS_ENABLED(CONFIG_IPV6) 341 static inline void vnet_fullcsum_ipv6(struct sk_buff *skb) 342 { 343 struct ipv6hdr *ip6h = ipv6_hdr(skb); 344 int offset = skb_transport_offset(skb); 345 346 if (skb->protocol != htons(ETH_P_IPV6)) 347 return; 348 if (ip6h->nexthdr != IPPROTO_TCP && 349 ip6h->nexthdr != IPPROTO_UDP) 350 return; 351 skb->ip_summed = CHECKSUM_NONE; 352 skb->csum_level = 1; 353 skb->csum = 0; 354 if (ip6h->nexthdr == IPPROTO_TCP) { 355 struct tcphdr *ptcp = tcp_hdr(skb); 356 357 ptcp->check = 0; 358 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 359 ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 360 skb->len - offset, IPPROTO_TCP, 361 skb->csum); 362 } else if (ip6h->nexthdr == IPPROTO_UDP) { 363 struct udphdr *pudp = udp_hdr(skb); 364 365 pudp->check = 0; 366 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 367 pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 368 skb->len - offset, IPPROTO_UDP, 369 skb->csum); 370 } 371 } 372 #endif 373 374 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) 375 { 376 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 377 unsigned int len = desc->size; 378 unsigned int copy_len; 379 struct sk_buff *skb; 380 int maxlen; 381 int err; 382 383 err = -EMSGSIZE; 384 if (port->tso && port->tsolen > port->rmtu) 385 maxlen = port->tsolen; 386 else 387 maxlen = port->rmtu; 388 if (unlikely(len < ETH_ZLEN || len > maxlen)) { 389 dev->stats.rx_length_errors++; 390 goto out_dropped; 391 } 392 393 skb = alloc_and_align_skb(dev, len); 394 err = -ENOMEM; 395 if (unlikely(!skb)) { 396 dev->stats.rx_missed_errors++; 397 goto out_dropped; 398 } 399 400 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; 401 skb_put(skb, copy_len); 402 err = ldc_copy(port->vio.lp, LDC_COPY_IN, 403 skb->data, copy_len, 0, 404 desc->cookies, desc->ncookies); 405 if (unlikely(err < 0)) { 406 dev->stats.rx_frame_errors++; 407 goto out_free_skb; 408 } 409 410 skb_pull(skb, VNET_PACKET_SKIP); 411 skb_trim(skb, len); 412 skb->protocol = eth_type_trans(skb, dev); 413 414 if (vio_version_after_eq(&port->vio, 1, 8)) { 415 struct vio_net_dext *dext = vio_net_ext(desc); 416 417 skb_reset_network_header(skb); 418 419 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) { 420 if (skb->protocol == ETH_P_IP) { 421 struct iphdr *iph = ip_hdr(skb); 422 423 iph->check = 0; 424 ip_send_check(iph); 425 } 426 } 427 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) && 428 skb->ip_summed == CHECKSUM_NONE) { 429 if (skb->protocol == htons(ETH_P_IP)) { 430 struct iphdr *iph = ip_hdr(skb); 431 int ihl = iph->ihl * 4; 432 433 skb_set_transport_header(skb, ihl); 434 vnet_fullcsum_ipv4(skb); 435 #if IS_ENABLED(CONFIG_IPV6) 436 } else if (skb->protocol == htons(ETH_P_IPV6)) { 437 skb_set_transport_header(skb, 438 sizeof(struct ipv6hdr)); 439 vnet_fullcsum_ipv6(skb); 440 #endif 441 } 442 } 443 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { 444 skb->ip_summed = CHECKSUM_PARTIAL; 445 skb->csum_level = 0; 446 if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK) 447 skb->csum_level = 1; 448 } 449 } 450 451 skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; 452 453 if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest))) 454 dev->stats.multicast++; 455 dev->stats.rx_packets++; 456 dev->stats.rx_bytes += len; 457 port->stats.rx_packets++; 458 port->stats.rx_bytes += len; 459 napi_gro_receive(&port->napi, skb); 460 return 0; 461 462 out_free_skb: 463 kfree_skb(skb); 464 465 out_dropped: 466 dev->stats.rx_dropped++; 467 return err; 468 } 469 470 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, 471 u32 start, u32 end, u8 vio_dring_state) 472 { 473 struct vio_dring_data hdr = { 474 .tag = { 475 .type = VIO_TYPE_DATA, 476 .stype = VIO_SUBTYPE_ACK, 477 .stype_env = VIO_DRING_DATA, 478 .sid = vio_send_sid(&port->vio), 479 }, 480 .dring_ident = dr->ident, 481 .start_idx = start, 482 .end_idx = end, 483 .state = vio_dring_state, 484 }; 485 int err, delay; 486 int retries = 0; 487 488 hdr.seq = dr->snd_nxt; 489 delay = 1; 490 do { 491 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 492 if (err > 0) { 493 dr->snd_nxt++; 494 break; 495 } 496 udelay(delay); 497 if ((delay <<= 1) > 128) 498 delay = 128; 499 if (retries++ > VNET_MAX_RETRIES) { 500 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", 501 port->raddr[0], port->raddr[1], 502 port->raddr[2], port->raddr[3], 503 port->raddr[4], port->raddr[5]); 504 break; 505 } 506 } while (err == -EAGAIN); 507 508 if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) { 509 port->stop_rx_idx = end; 510 port->stop_rx = true; 511 } else { 512 port->stop_rx_idx = 0; 513 port->stop_rx = false; 514 } 515 516 return err; 517 } 518 519 static struct vio_net_desc *get_rx_desc(struct vnet_port *port, 520 struct vio_dring_state *dr, 521 u32 index) 522 { 523 struct vio_net_desc *desc = port->vio.desc_buf; 524 int err; 525 526 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, 527 (index * dr->entry_size), 528 dr->cookies, dr->ncookies); 529 if (err < 0) 530 return ERR_PTR(err); 531 532 return desc; 533 } 534 535 static int put_rx_desc(struct vnet_port *port, 536 struct vio_dring_state *dr, 537 struct vio_net_desc *desc, 538 u32 index) 539 { 540 int err; 541 542 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, 543 (index * dr->entry_size), 544 dr->cookies, dr->ncookies); 545 if (err < 0) 546 return err; 547 548 return 0; 549 } 550 551 static int vnet_walk_rx_one(struct vnet_port *port, 552 struct vio_dring_state *dr, 553 u32 index, int *needs_ack) 554 { 555 struct vio_net_desc *desc = get_rx_desc(port, dr, index); 556 struct vio_driver_state *vio = &port->vio; 557 int err; 558 559 BUG_ON(!desc); 560 if (IS_ERR(desc)) 561 return PTR_ERR(desc); 562 563 if (desc->hdr.state != VIO_DESC_READY) 564 return 1; 565 566 dma_rmb(); 567 568 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", 569 desc->hdr.state, desc->hdr.ack, 570 desc->size, desc->ncookies, 571 desc->cookies[0].cookie_addr, 572 desc->cookies[0].cookie_size); 573 574 err = vnet_rx_one(port, desc); 575 if (err == -ECONNRESET) 576 return err; 577 trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid, 578 index, desc->hdr.ack); 579 desc->hdr.state = VIO_DESC_DONE; 580 err = put_rx_desc(port, dr, desc, index); 581 if (err < 0) 582 return err; 583 *needs_ack = desc->hdr.ack; 584 return 0; 585 } 586 587 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, 588 u32 start, u32 end, int *npkts, int budget) 589 { 590 struct vio_driver_state *vio = &port->vio; 591 int ack_start = -1, ack_end = -1; 592 bool send_ack = true; 593 594 end = (end == (u32)-1) ? vio_dring_prev(dr, start) 595 : vio_dring_next(dr, end); 596 597 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); 598 599 while (start != end) { 600 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); 601 602 if (err == -ECONNRESET) 603 return err; 604 if (err != 0) 605 break; 606 (*npkts)++; 607 if (ack_start == -1) 608 ack_start = start; 609 ack_end = start; 610 start = vio_dring_next(dr, start); 611 if (ack && start != end) { 612 err = vnet_send_ack(port, dr, ack_start, ack_end, 613 VIO_DRING_ACTIVE); 614 if (err == -ECONNRESET) 615 return err; 616 ack_start = -1; 617 } 618 if ((*npkts) >= budget) { 619 send_ack = false; 620 break; 621 } 622 } 623 if (unlikely(ack_start == -1)) { 624 ack_end = vio_dring_prev(dr, start); 625 ack_start = ack_end; 626 } 627 if (send_ack) { 628 port->napi_resume = false; 629 trace_vnet_tx_send_stopped_ack(port->vio._local_sid, 630 port->vio._peer_sid, 631 ack_end, *npkts); 632 return vnet_send_ack(port, dr, ack_start, ack_end, 633 VIO_DRING_STOPPED); 634 } else { 635 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid, 636 port->vio._peer_sid, 637 ack_end, *npkts); 638 port->napi_resume = true; 639 port->napi_stop_idx = ack_end; 640 return 1; 641 } 642 } 643 644 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts, 645 int budget) 646 { 647 struct vio_dring_data *pkt = msgbuf; 648 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; 649 struct vio_driver_state *vio = &port->vio; 650 651 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n", 652 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); 653 654 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 655 return 0; 656 if (unlikely(pkt->seq != dr->rcv_nxt)) { 657 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n", 658 pkt->seq, dr->rcv_nxt); 659 return 0; 660 } 661 662 if (!port->napi_resume) 663 dr->rcv_nxt++; 664 665 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ 666 667 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx, 668 npkts, budget); 669 } 670 671 static int idx_is_pending(struct vio_dring_state *dr, u32 end) 672 { 673 u32 idx = dr->cons; 674 int found = 0; 675 676 while (idx != dr->prod) { 677 if (idx == end) { 678 found = 1; 679 break; 680 } 681 idx = vio_dring_next(dr, idx); 682 } 683 return found; 684 } 685 686 static int vnet_ack(struct vnet_port *port, void *msgbuf) 687 { 688 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 689 struct vio_dring_data *pkt = msgbuf; 690 struct net_device *dev; 691 u32 end; 692 struct vio_net_desc *desc; 693 struct netdev_queue *txq; 694 695 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 696 return 0; 697 698 end = pkt->end_idx; 699 dev = VNET_PORT_TO_NET_DEVICE(port); 700 netif_tx_lock(dev); 701 if (unlikely(!idx_is_pending(dr, end))) { 702 netif_tx_unlock(dev); 703 return 0; 704 } 705 706 /* sync for race conditions with vnet_start_xmit() and tell xmit it 707 * is time to send a trigger. 708 */ 709 trace_vnet_rx_stopped_ack(port->vio._local_sid, 710 port->vio._peer_sid, end); 711 dr->cons = vio_dring_next(dr, end); 712 desc = vio_dring_entry(dr, dr->cons); 713 if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) { 714 /* vnet_start_xmit() just populated this dring but missed 715 * sending the "start" LDC message to the consumer. 716 * Send a "start" trigger on its behalf. 717 */ 718 if (__vnet_tx_trigger(port, dr->cons) > 0) 719 port->start_cons = false; 720 else 721 port->start_cons = true; 722 } else { 723 port->start_cons = true; 724 } 725 netif_tx_unlock(dev); 726 727 txq = netdev_get_tx_queue(dev, port->q_index); 728 if (unlikely(netif_tx_queue_stopped(txq) && 729 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) 730 return 1; 731 732 return 0; 733 } 734 735 static int vnet_nack(struct vnet_port *port, void *msgbuf) 736 { 737 /* XXX just reset or similar XXX */ 738 return 0; 739 } 740 741 static int handle_mcast(struct vnet_port *port, void *msgbuf) 742 { 743 struct vio_net_mcast_info *pkt = msgbuf; 744 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 745 746 if (pkt->tag.stype != VIO_SUBTYPE_ACK) 747 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n", 748 dev->name, 749 pkt->tag.type, 750 pkt->tag.stype, 751 pkt->tag.stype_env, 752 pkt->tag.sid); 753 754 return 0; 755 } 756 757 /* If the queue is stopped, wake it up so that we'll 758 * send out another START message at the next TX. 759 */ 760 static void maybe_tx_wakeup(struct vnet_port *port) 761 { 762 struct netdev_queue *txq; 763 764 txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 765 port->q_index); 766 __netif_tx_lock(txq, smp_processor_id()); 767 if (likely(netif_tx_queue_stopped(txq))) 768 netif_tx_wake_queue(txq); 769 __netif_tx_unlock(txq); 770 } 771 772 bool sunvnet_port_is_up_common(struct vnet_port *vnet) 773 { 774 struct vio_driver_state *vio = &vnet->vio; 775 776 return !!(vio->hs_state & VIO_HS_COMPLETE); 777 } 778 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common); 779 780 static int vnet_event_napi(struct vnet_port *port, int budget) 781 { 782 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 783 struct vio_driver_state *vio = &port->vio; 784 int tx_wakeup, err; 785 int npkts = 0; 786 787 /* we don't expect any other bits */ 788 BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY | 789 LDC_EVENT_RESET | 790 LDC_EVENT_UP)); 791 792 /* RESET takes precedent over any other event */ 793 if (port->rx_event & LDC_EVENT_RESET) { 794 /* a link went down */ 795 796 if (port->vsw == 1) { 797 netif_tx_stop_all_queues(dev); 798 netif_carrier_off(dev); 799 } 800 801 vio_link_state_change(vio, LDC_EVENT_RESET); 802 vnet_port_reset(port); 803 vio_port_up(vio); 804 805 /* If the device is running but its tx queue was 806 * stopped (due to flow control), restart it. 807 * This is necessary since vnet_port_reset() 808 * clears the tx drings and thus we may never get 809 * back a VIO_TYPE_DATA ACK packet - which is 810 * the normal mechanism to restart the tx queue. 811 */ 812 if (netif_running(dev)) 813 maybe_tx_wakeup(port); 814 815 port->rx_event = 0; 816 port->stats.event_reset++; 817 return 0; 818 } 819 820 if (port->rx_event & LDC_EVENT_UP) { 821 /* a link came up */ 822 823 if (port->vsw == 1) { 824 netif_carrier_on(port->dev); 825 netif_tx_start_all_queues(port->dev); 826 } 827 828 vio_link_state_change(vio, LDC_EVENT_UP); 829 port->rx_event = 0; 830 port->stats.event_up++; 831 return 0; 832 } 833 834 err = 0; 835 tx_wakeup = 0; 836 while (1) { 837 union { 838 struct vio_msg_tag tag; 839 u64 raw[8]; 840 } msgbuf; 841 842 if (port->napi_resume) { 843 struct vio_dring_data *pkt = 844 (struct vio_dring_data *)&msgbuf; 845 struct vio_dring_state *dr = 846 &port->vio.drings[VIO_DRIVER_RX_RING]; 847 848 pkt->tag.type = VIO_TYPE_DATA; 849 pkt->tag.stype = VIO_SUBTYPE_INFO; 850 pkt->tag.stype_env = VIO_DRING_DATA; 851 pkt->seq = dr->rcv_nxt; 852 pkt->start_idx = vio_dring_next(dr, 853 port->napi_stop_idx); 854 pkt->end_idx = -1; 855 } else { 856 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); 857 if (unlikely(err < 0)) { 858 if (err == -ECONNRESET) 859 vio_conn_reset(vio); 860 break; 861 } 862 if (err == 0) 863 break; 864 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", 865 msgbuf.tag.type, 866 msgbuf.tag.stype, 867 msgbuf.tag.stype_env, 868 msgbuf.tag.sid); 869 err = vio_validate_sid(vio, &msgbuf.tag); 870 if (err < 0) 871 break; 872 } 873 874 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { 875 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { 876 if (!sunvnet_port_is_up_common(port)) { 877 /* failures like handshake_failure() 878 * may have cleaned up dring, but 879 * NAPI polling may bring us here. 880 */ 881 err = -ECONNRESET; 882 break; 883 } 884 err = vnet_rx(port, &msgbuf, &npkts, budget); 885 if (npkts >= budget) 886 break; 887 if (npkts == 0) 888 break; 889 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { 890 err = vnet_ack(port, &msgbuf); 891 if (err > 0) 892 tx_wakeup |= err; 893 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { 894 err = vnet_nack(port, &msgbuf); 895 } 896 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { 897 if (msgbuf.tag.stype_env == VNET_MCAST_INFO) 898 err = handle_mcast(port, &msgbuf); 899 else 900 err = vio_control_pkt_engine(vio, &msgbuf); 901 if (err) 902 break; 903 } else { 904 err = vnet_handle_unknown(port, &msgbuf); 905 } 906 if (err == -ECONNRESET) 907 break; 908 } 909 if (unlikely(tx_wakeup && err != -ECONNRESET)) 910 maybe_tx_wakeup(port); 911 return npkts; 912 } 913 914 int sunvnet_poll_common(struct napi_struct *napi, int budget) 915 { 916 struct vnet_port *port = container_of(napi, struct vnet_port, napi); 917 struct vio_driver_state *vio = &port->vio; 918 int processed = vnet_event_napi(port, budget); 919 920 if (processed < budget) { 921 napi_complete_done(napi, processed); 922 port->rx_event &= ~LDC_EVENT_DATA_READY; 923 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED); 924 } 925 return processed; 926 } 927 EXPORT_SYMBOL_GPL(sunvnet_poll_common); 928 929 void sunvnet_event_common(void *arg, int event) 930 { 931 struct vnet_port *port = arg; 932 struct vio_driver_state *vio = &port->vio; 933 934 port->rx_event |= event; 935 vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED); 936 napi_schedule(&port->napi); 937 } 938 EXPORT_SYMBOL_GPL(sunvnet_event_common); 939 940 static int __vnet_tx_trigger(struct vnet_port *port, u32 start) 941 { 942 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 943 struct vio_dring_data hdr = { 944 .tag = { 945 .type = VIO_TYPE_DATA, 946 .stype = VIO_SUBTYPE_INFO, 947 .stype_env = VIO_DRING_DATA, 948 .sid = vio_send_sid(&port->vio), 949 }, 950 .dring_ident = dr->ident, 951 .start_idx = start, 952 .end_idx = (u32)-1, 953 }; 954 int err, delay; 955 int retries = 0; 956 957 if (port->stop_rx) { 958 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid, 959 port->vio._peer_sid, 960 port->stop_rx_idx, -1); 961 err = vnet_send_ack(port, 962 &port->vio.drings[VIO_DRIVER_RX_RING], 963 port->stop_rx_idx, -1, 964 VIO_DRING_STOPPED); 965 if (err <= 0) 966 return err; 967 } 968 969 hdr.seq = dr->snd_nxt; 970 delay = 1; 971 do { 972 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 973 if (err > 0) { 974 dr->snd_nxt++; 975 break; 976 } 977 udelay(delay); 978 if ((delay <<= 1) > 128) 979 delay = 128; 980 if (retries++ > VNET_MAX_RETRIES) 981 break; 982 } while (err == -EAGAIN); 983 trace_vnet_tx_trigger(port->vio._local_sid, 984 port->vio._peer_sid, start, err); 985 986 return err; 987 } 988 989 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port, 990 unsigned *pending) 991 { 992 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 993 struct sk_buff *skb = NULL; 994 int i, txi; 995 996 *pending = 0; 997 998 txi = dr->prod; 999 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1000 struct vio_net_desc *d; 1001 1002 --txi; 1003 if (txi < 0) 1004 txi = VNET_TX_RING_SIZE - 1; 1005 1006 d = vio_dring_entry(dr, txi); 1007 1008 if (d->hdr.state == VIO_DESC_READY) { 1009 (*pending)++; 1010 continue; 1011 } 1012 if (port->tx_bufs[txi].skb) { 1013 if (d->hdr.state != VIO_DESC_DONE) 1014 pr_notice("invalid ring buffer state %d\n", 1015 d->hdr.state); 1016 BUG_ON(port->tx_bufs[txi].skb->next); 1017 1018 port->tx_bufs[txi].skb->next = skb; 1019 skb = port->tx_bufs[txi].skb; 1020 port->tx_bufs[txi].skb = NULL; 1021 1022 ldc_unmap(port->vio.lp, 1023 port->tx_bufs[txi].cookies, 1024 port->tx_bufs[txi].ncookies); 1025 } else if (d->hdr.state == VIO_DESC_FREE) { 1026 break; 1027 } 1028 d->hdr.state = VIO_DESC_FREE; 1029 } 1030 return skb; 1031 } 1032 1033 static inline void vnet_free_skbs(struct sk_buff *skb) 1034 { 1035 struct sk_buff *next; 1036 1037 while (skb) { 1038 next = skb->next; 1039 skb->next = NULL; 1040 dev_kfree_skb(skb); 1041 skb = next; 1042 } 1043 } 1044 1045 void sunvnet_clean_timer_expire_common(struct timer_list *t) 1046 { 1047 struct vnet_port *port = from_timer(port, t, clean_timer); 1048 struct sk_buff *freeskbs; 1049 unsigned pending; 1050 1051 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port)); 1052 freeskbs = vnet_clean_tx_ring(port, &pending); 1053 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port)); 1054 1055 vnet_free_skbs(freeskbs); 1056 1057 if (pending) 1058 (void)mod_timer(&port->clean_timer, 1059 jiffies + VNET_CLEAN_TIMEOUT); 1060 else 1061 del_timer(&port->clean_timer); 1062 } 1063 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common); 1064 1065 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, 1066 struct ldc_trans_cookie *cookies, int ncookies, 1067 unsigned int map_perm) 1068 { 1069 int i, nc, err, blen; 1070 1071 /* header */ 1072 blen = skb_headlen(skb); 1073 if (blen < ETH_ZLEN) 1074 blen = ETH_ZLEN; 1075 blen += VNET_PACKET_SKIP; 1076 blen += 8 - (blen & 7); 1077 1078 err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies, 1079 ncookies, map_perm); 1080 if (err < 0) 1081 return err; 1082 nc = err; 1083 1084 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1085 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1086 u8 *vaddr; 1087 1088 if (nc < ncookies) { 1089 vaddr = kmap_local_page(skb_frag_page(f)); 1090 blen = skb_frag_size(f); 1091 blen += 8 - (blen & 7); 1092 err = ldc_map_single(lp, vaddr + skb_frag_off(f), 1093 blen, cookies + nc, ncookies - nc, 1094 map_perm); 1095 kunmap_local(vaddr); 1096 } else { 1097 err = -EMSGSIZE; 1098 } 1099 1100 if (err < 0) { 1101 ldc_unmap(lp, cookies, nc); 1102 return err; 1103 } 1104 nc += err; 1105 } 1106 return nc; 1107 } 1108 1109 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) 1110 { 1111 struct sk_buff *nskb; 1112 int i, len, pad, docopy; 1113 1114 len = skb->len; 1115 pad = 0; 1116 if (len < ETH_ZLEN) { 1117 pad += ETH_ZLEN - skb->len; 1118 len += pad; 1119 } 1120 len += VNET_PACKET_SKIP; 1121 pad += 8 - (len & 7); 1122 1123 /* make sure we have enough cookies and alignment in every frag */ 1124 docopy = skb_shinfo(skb)->nr_frags >= ncookies; 1125 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1126 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1127 1128 docopy |= skb_frag_off(f) & 7; 1129 } 1130 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || 1131 skb_tailroom(skb) < pad || 1132 skb_headroom(skb) < VNET_PACKET_SKIP || docopy) { 1133 int start = 0, offset; 1134 __wsum csum; 1135 1136 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN; 1137 nskb = alloc_and_align_skb(skb->dev, len); 1138 if (!nskb) { 1139 dev_kfree_skb(skb); 1140 return NULL; 1141 } 1142 skb_reserve(nskb, VNET_PACKET_SKIP); 1143 1144 nskb->protocol = skb->protocol; 1145 offset = skb_mac_header(skb) - skb->data; 1146 skb_set_mac_header(nskb, offset); 1147 offset = skb_network_header(skb) - skb->data; 1148 skb_set_network_header(nskb, offset); 1149 offset = skb_transport_header(skb) - skb->data; 1150 skb_set_transport_header(nskb, offset); 1151 1152 offset = 0; 1153 nskb->csum_offset = skb->csum_offset; 1154 nskb->ip_summed = skb->ip_summed; 1155 1156 if (skb->ip_summed == CHECKSUM_PARTIAL) 1157 start = skb_checksum_start_offset(skb); 1158 if (start) { 1159 int offset = start + nskb->csum_offset; 1160 1161 /* copy the headers, no csum here */ 1162 if (skb_copy_bits(skb, 0, nskb->data, start)) { 1163 dev_kfree_skb(nskb); 1164 dev_kfree_skb(skb); 1165 return NULL; 1166 } 1167 1168 /* copy the rest, with csum calculation */ 1169 *(__sum16 *)(skb->data + offset) = 0; 1170 csum = skb_copy_and_csum_bits(skb, start, 1171 nskb->data + start, 1172 skb->len - start); 1173 1174 /* add in the header checksums */ 1175 if (skb->protocol == htons(ETH_P_IP)) { 1176 struct iphdr *iph = ip_hdr(nskb); 1177 1178 if (iph->protocol == IPPROTO_TCP || 1179 iph->protocol == IPPROTO_UDP) { 1180 csum = csum_tcpudp_magic(iph->saddr, 1181 iph->daddr, 1182 skb->len - start, 1183 iph->protocol, 1184 csum); 1185 } 1186 } else if (skb->protocol == htons(ETH_P_IPV6)) { 1187 struct ipv6hdr *ip6h = ipv6_hdr(nskb); 1188 1189 if (ip6h->nexthdr == IPPROTO_TCP || 1190 ip6h->nexthdr == IPPROTO_UDP) { 1191 csum = csum_ipv6_magic(&ip6h->saddr, 1192 &ip6h->daddr, 1193 skb->len - start, 1194 ip6h->nexthdr, 1195 csum); 1196 } 1197 } 1198 1199 /* save the final result */ 1200 *(__sum16 *)(nskb->data + offset) = csum; 1201 1202 nskb->ip_summed = CHECKSUM_NONE; 1203 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { 1204 dev_kfree_skb(nskb); 1205 dev_kfree_skb(skb); 1206 return NULL; 1207 } 1208 (void)skb_put(nskb, skb->len); 1209 if (skb_is_gso(skb)) { 1210 skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; 1211 skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; 1212 } 1213 nskb->queue_mapping = skb->queue_mapping; 1214 dev_kfree_skb(skb); 1215 skb = nskb; 1216 } 1217 return skb; 1218 } 1219 1220 static netdev_tx_t 1221 vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, 1222 struct vnet_port *(*vnet_tx_port) 1223 (struct sk_buff *, struct net_device *)) 1224 { 1225 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 1226 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1227 struct sk_buff *segs, *curr, *next; 1228 int maclen, datalen; 1229 int status; 1230 int gso_size, gso_type, gso_segs; 1231 int hlen = skb_transport_header(skb) - skb_mac_header(skb); 1232 int proto = IPPROTO_IP; 1233 1234 if (skb->protocol == htons(ETH_P_IP)) 1235 proto = ip_hdr(skb)->protocol; 1236 else if (skb->protocol == htons(ETH_P_IPV6)) 1237 proto = ipv6_hdr(skb)->nexthdr; 1238 1239 if (proto == IPPROTO_TCP) { 1240 hlen += tcp_hdr(skb)->doff * 4; 1241 } else if (proto == IPPROTO_UDP) { 1242 hlen += sizeof(struct udphdr); 1243 } else { 1244 pr_err("vnet_handle_offloads GSO with unknown transport " 1245 "protocol %d tproto %d\n", skb->protocol, proto); 1246 hlen = 128; /* XXX */ 1247 } 1248 datalen = port->tsolen - hlen; 1249 1250 gso_size = skb_shinfo(skb)->gso_size; 1251 gso_type = skb_shinfo(skb)->gso_type; 1252 gso_segs = skb_shinfo(skb)->gso_segs; 1253 1254 if (port->tso && gso_size < datalen) 1255 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen); 1256 1257 if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) { 1258 struct netdev_queue *txq; 1259 1260 txq = netdev_get_tx_queue(dev, port->q_index); 1261 netif_tx_stop_queue(txq); 1262 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs) 1263 return NETDEV_TX_BUSY; 1264 netif_tx_wake_queue(txq); 1265 } 1266 1267 maclen = skb_network_header(skb) - skb_mac_header(skb); 1268 skb_pull(skb, maclen); 1269 1270 if (port->tso && gso_size < datalen) { 1271 if (skb_unclone(skb, GFP_ATOMIC)) 1272 goto out_dropped; 1273 1274 /* segment to TSO size */ 1275 skb_shinfo(skb)->gso_size = datalen; 1276 skb_shinfo(skb)->gso_segs = gso_segs; 1277 } 1278 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); 1279 if (IS_ERR(segs)) 1280 goto out_dropped; 1281 1282 skb_push(skb, maclen); 1283 skb_reset_mac_header(skb); 1284 1285 status = 0; 1286 skb_list_walk_safe(segs, curr, next) { 1287 skb_mark_not_on_list(curr); 1288 if (port->tso && curr->len > dev->mtu) { 1289 skb_shinfo(curr)->gso_size = gso_size; 1290 skb_shinfo(curr)->gso_type = gso_type; 1291 skb_shinfo(curr)->gso_segs = 1292 DIV_ROUND_UP(curr->len - hlen, gso_size); 1293 } else { 1294 skb_shinfo(curr)->gso_size = 0; 1295 } 1296 1297 skb_push(curr, maclen); 1298 skb_reset_mac_header(curr); 1299 memcpy(skb_mac_header(curr), skb_mac_header(skb), 1300 maclen); 1301 curr->csum_start = skb_transport_header(curr) - curr->head; 1302 if (ip_hdr(curr)->protocol == IPPROTO_TCP) 1303 curr->csum_offset = offsetof(struct tcphdr, check); 1304 else if (ip_hdr(curr)->protocol == IPPROTO_UDP) 1305 curr->csum_offset = offsetof(struct udphdr, check); 1306 1307 if (!(status & NETDEV_TX_MASK)) 1308 status = sunvnet_start_xmit_common(curr, dev, 1309 vnet_tx_port); 1310 if (status & NETDEV_TX_MASK) 1311 dev_kfree_skb_any(curr); 1312 } 1313 1314 if (!(status & NETDEV_TX_MASK)) 1315 dev_kfree_skb_any(skb); 1316 return status; 1317 out_dropped: 1318 dev->stats.tx_dropped++; 1319 dev_kfree_skb_any(skb); 1320 return NETDEV_TX_OK; 1321 } 1322 1323 netdev_tx_t 1324 sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, 1325 struct vnet_port *(*vnet_tx_port) 1326 (struct sk_buff *, struct net_device *)) 1327 { 1328 struct vnet_port *port = NULL; 1329 struct vio_dring_state *dr; 1330 struct vio_net_desc *d; 1331 unsigned int len; 1332 struct sk_buff *freeskbs = NULL; 1333 int i, err, txi; 1334 unsigned pending = 0; 1335 struct netdev_queue *txq; 1336 1337 rcu_read_lock(); 1338 port = vnet_tx_port(skb, dev); 1339 if (unlikely(!port)) 1340 goto out_dropped; 1341 1342 if (skb_is_gso(skb) && skb->len > port->tsolen) { 1343 err = vnet_handle_offloads(port, skb, vnet_tx_port); 1344 rcu_read_unlock(); 1345 return err; 1346 } 1347 1348 if (!skb_is_gso(skb) && skb->len > port->rmtu) { 1349 unsigned long localmtu = port->rmtu - ETH_HLEN; 1350 1351 if (vio_version_after_eq(&port->vio, 1, 3)) 1352 localmtu -= VLAN_HLEN; 1353 1354 if (skb->protocol == htons(ETH_P_IP)) 1355 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 1356 htonl(localmtu)); 1357 #if IS_ENABLED(CONFIG_IPV6) 1358 else if (skb->protocol == htons(ETH_P_IPV6)) 1359 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); 1360 #endif 1361 goto out_dropped; 1362 } 1363 1364 skb = vnet_skb_shape(skb, 2); 1365 1366 if (unlikely(!skb)) 1367 goto out_dropped; 1368 1369 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1370 if (skb->protocol == htons(ETH_P_IP)) 1371 vnet_fullcsum_ipv4(skb); 1372 #if IS_ENABLED(CONFIG_IPV6) 1373 else if (skb->protocol == htons(ETH_P_IPV6)) 1374 vnet_fullcsum_ipv6(skb); 1375 #endif 1376 } 1377 1378 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1379 i = skb_get_queue_mapping(skb); 1380 txq = netdev_get_tx_queue(dev, i); 1381 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1382 if (!netif_tx_queue_stopped(txq)) { 1383 netif_tx_stop_queue(txq); 1384 1385 /* This is a hard error, log it. */ 1386 netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); 1387 dev->stats.tx_errors++; 1388 } 1389 rcu_read_unlock(); 1390 return NETDEV_TX_BUSY; 1391 } 1392 1393 d = vio_dring_cur(dr); 1394 1395 txi = dr->prod; 1396 1397 freeskbs = vnet_clean_tx_ring(port, &pending); 1398 1399 BUG_ON(port->tx_bufs[txi].skb); 1400 1401 len = skb->len; 1402 if (len < ETH_ZLEN) 1403 len = ETH_ZLEN; 1404 1405 err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2, 1406 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); 1407 if (err < 0) { 1408 netdev_info(dev, "tx buffer map error %d\n", err); 1409 goto out_dropped; 1410 } 1411 1412 port->tx_bufs[txi].skb = skb; 1413 skb = NULL; 1414 port->tx_bufs[txi].ncookies = err; 1415 1416 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 1417 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 1418 * the protocol itself does not require it as long as the peer 1419 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. 1420 * 1421 * An ACK for every packet in the ring is expensive as the 1422 * sending of LDC messages is slow and affects performance. 1423 */ 1424 d->hdr.ack = VIO_ACK_DISABLE; 1425 d->size = len; 1426 d->ncookies = port->tx_bufs[txi].ncookies; 1427 for (i = 0; i < d->ncookies; i++) 1428 d->cookies[i] = port->tx_bufs[txi].cookies[i]; 1429 if (vio_version_after_eq(&port->vio, 1, 7)) { 1430 struct vio_net_dext *dext = vio_net_ext(d); 1431 1432 memset(dext, 0, sizeof(*dext)); 1433 if (skb_is_gso(port->tx_bufs[txi].skb)) { 1434 dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb) 1435 ->gso_size; 1436 dext->flags |= VNET_PKT_IPV4_LSO; 1437 } 1438 if (vio_version_after_eq(&port->vio, 1, 8) && 1439 !port->switch_port) { 1440 dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK; 1441 dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK; 1442 } 1443 } 1444 1445 /* This has to be a non-SMP write barrier because we are writing 1446 * to memory which is shared with the peer LDOM. 1447 */ 1448 dma_wmb(); 1449 1450 d->hdr.state = VIO_DESC_READY; 1451 1452 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent 1453 * to notify the consumer that some descriptors are READY. 1454 * After that "start" trigger, no additional triggers are needed until 1455 * a DRING_STOPPED is received from the consumer. The dr->cons field 1456 * (set up by vnet_ack()) has the value of the next dring index 1457 * that has not yet been ack-ed. We send a "start" trigger here 1458 * if, and only if, start_cons is true (reset it afterward). Conversely, 1459 * vnet_ack() should check if the dring corresponding to cons 1460 * is marked READY, but start_cons was false. 1461 * If so, vnet_ack() should send out the missed "start" trigger. 1462 * 1463 * Note that the dma_wmb() above makes sure the cookies et al. are 1464 * not globally visible before the VIO_DESC_READY, and that the 1465 * stores are ordered correctly by the compiler. The consumer will 1466 * not proceed until the VIO_DESC_READY is visible assuring that 1467 * the consumer does not observe anything related to descriptors 1468 * out of order. The HV trap from the LDC start trigger is the 1469 * producer to consumer announcement that work is available to the 1470 * consumer 1471 */ 1472 if (!port->start_cons) { /* previous trigger suffices */ 1473 trace_vnet_skip_tx_trigger(port->vio._local_sid, 1474 port->vio._peer_sid, dr->cons); 1475 goto ldc_start_done; 1476 } 1477 1478 err = __vnet_tx_trigger(port, dr->cons); 1479 if (unlikely(err < 0)) { 1480 netdev_info(dev, "TX trigger error %d\n", err); 1481 d->hdr.state = VIO_DESC_FREE; 1482 skb = port->tx_bufs[txi].skb; 1483 port->tx_bufs[txi].skb = NULL; 1484 dev->stats.tx_carrier_errors++; 1485 goto out_dropped; 1486 } 1487 1488 ldc_start_done: 1489 port->start_cons = false; 1490 1491 dev->stats.tx_packets++; 1492 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1493 port->stats.tx_packets++; 1494 port->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1495 1496 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1497 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1498 netif_tx_stop_queue(txq); 1499 smp_rmb(); 1500 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) 1501 netif_tx_wake_queue(txq); 1502 } 1503 1504 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); 1505 rcu_read_unlock(); 1506 1507 vnet_free_skbs(freeskbs); 1508 1509 return NETDEV_TX_OK; 1510 1511 out_dropped: 1512 if (pending) 1513 (void)mod_timer(&port->clean_timer, 1514 jiffies + VNET_CLEAN_TIMEOUT); 1515 else if (port) 1516 del_timer(&port->clean_timer); 1517 rcu_read_unlock(); 1518 dev_kfree_skb(skb); 1519 vnet_free_skbs(freeskbs); 1520 dev->stats.tx_dropped++; 1521 return NETDEV_TX_OK; 1522 } 1523 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); 1524 1525 void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue) 1526 { 1527 /* XXX Implement me XXX */ 1528 } 1529 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common); 1530 1531 int sunvnet_open_common(struct net_device *dev) 1532 { 1533 netif_carrier_on(dev); 1534 netif_tx_start_all_queues(dev); 1535 1536 return 0; 1537 } 1538 EXPORT_SYMBOL_GPL(sunvnet_open_common); 1539 1540 int sunvnet_close_common(struct net_device *dev) 1541 { 1542 netif_tx_stop_all_queues(dev); 1543 netif_carrier_off(dev); 1544 1545 return 0; 1546 } 1547 EXPORT_SYMBOL_GPL(sunvnet_close_common); 1548 1549 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr) 1550 { 1551 struct vnet_mcast_entry *m; 1552 1553 for (m = vp->mcast_list; m; m = m->next) { 1554 if (ether_addr_equal(m->addr, addr)) 1555 return m; 1556 } 1557 return NULL; 1558 } 1559 1560 static void __update_mc_list(struct vnet *vp, struct net_device *dev) 1561 { 1562 struct netdev_hw_addr *ha; 1563 1564 netdev_for_each_mc_addr(ha, dev) { 1565 struct vnet_mcast_entry *m; 1566 1567 m = __vnet_mc_find(vp, ha->addr); 1568 if (m) { 1569 m->hit = 1; 1570 continue; 1571 } 1572 1573 if (!m) { 1574 m = kzalloc(sizeof(*m), GFP_ATOMIC); 1575 if (!m) 1576 continue; 1577 memcpy(m->addr, ha->addr, ETH_ALEN); 1578 m->hit = 1; 1579 1580 m->next = vp->mcast_list; 1581 vp->mcast_list = m; 1582 } 1583 } 1584 } 1585 1586 static void __send_mc_list(struct vnet *vp, struct vnet_port *port) 1587 { 1588 struct vio_net_mcast_info info; 1589 struct vnet_mcast_entry *m, **pp; 1590 int n_addrs; 1591 1592 memset(&info, 0, sizeof(info)); 1593 1594 info.tag.type = VIO_TYPE_CTRL; 1595 info.tag.stype = VIO_SUBTYPE_INFO; 1596 info.tag.stype_env = VNET_MCAST_INFO; 1597 info.tag.sid = vio_send_sid(&port->vio); 1598 info.set = 1; 1599 1600 n_addrs = 0; 1601 for (m = vp->mcast_list; m; m = m->next) { 1602 if (m->sent) 1603 continue; 1604 m->sent = 1; 1605 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1606 m->addr, ETH_ALEN); 1607 if (++n_addrs == VNET_NUM_MCAST) { 1608 info.count = n_addrs; 1609 1610 (void)vio_ldc_send(&port->vio, &info, 1611 sizeof(info)); 1612 n_addrs = 0; 1613 } 1614 } 1615 if (n_addrs) { 1616 info.count = n_addrs; 1617 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1618 } 1619 1620 info.set = 0; 1621 1622 n_addrs = 0; 1623 pp = &vp->mcast_list; 1624 while ((m = *pp) != NULL) { 1625 if (m->hit) { 1626 m->hit = 0; 1627 pp = &m->next; 1628 continue; 1629 } 1630 1631 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1632 m->addr, ETH_ALEN); 1633 if (++n_addrs == VNET_NUM_MCAST) { 1634 info.count = n_addrs; 1635 (void)vio_ldc_send(&port->vio, &info, 1636 sizeof(info)); 1637 n_addrs = 0; 1638 } 1639 1640 *pp = m->next; 1641 kfree(m); 1642 } 1643 if (n_addrs) { 1644 info.count = n_addrs; 1645 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1646 } 1647 } 1648 1649 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp) 1650 { 1651 struct vnet_port *port; 1652 1653 rcu_read_lock(); 1654 list_for_each_entry_rcu(port, &vp->port_list, list) { 1655 if (port->switch_port) { 1656 __update_mc_list(vp, dev); 1657 __send_mc_list(vp, port); 1658 break; 1659 } 1660 } 1661 rcu_read_unlock(); 1662 } 1663 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common); 1664 1665 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p) 1666 { 1667 return -EINVAL; 1668 } 1669 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common); 1670 1671 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port) 1672 { 1673 struct vio_dring_state *dr; 1674 int i; 1675 1676 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1677 1678 if (!dr->base) 1679 return; 1680 1681 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1682 struct vio_net_desc *d; 1683 void *skb = port->tx_bufs[i].skb; 1684 1685 if (!skb) 1686 continue; 1687 1688 d = vio_dring_entry(dr, i); 1689 1690 ldc_unmap(port->vio.lp, 1691 port->tx_bufs[i].cookies, 1692 port->tx_bufs[i].ncookies); 1693 dev_kfree_skb(skb); 1694 port->tx_bufs[i].skb = NULL; 1695 d->hdr.state = VIO_DESC_FREE; 1696 } 1697 ldc_free_exp_dring(port->vio.lp, dr->base, 1698 (dr->entry_size * dr->num_entries), 1699 dr->cookies, dr->ncookies); 1700 dr->base = NULL; 1701 dr->entry_size = 0; 1702 dr->num_entries = 0; 1703 dr->pending = 0; 1704 dr->ncookies = 0; 1705 } 1706 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); 1707 1708 void vnet_port_reset(struct vnet_port *port) 1709 { 1710 del_timer(&port->clean_timer); 1711 sunvnet_port_free_tx_bufs_common(port); 1712 port->rmtu = 0; 1713 port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ 1714 port->tsolen = 0; 1715 } 1716 EXPORT_SYMBOL_GPL(vnet_port_reset); 1717 1718 static int vnet_port_alloc_tx_ring(struct vnet_port *port) 1719 { 1720 struct vio_dring_state *dr; 1721 unsigned long len, elen; 1722 int i, err, ncookies; 1723 void *dring; 1724 1725 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1726 1727 elen = sizeof(struct vio_net_desc) + 1728 sizeof(struct ldc_trans_cookie) * 2; 1729 if (vio_version_after_eq(&port->vio, 1, 7)) 1730 elen += sizeof(struct vio_net_dext); 1731 len = VNET_TX_RING_SIZE * elen; 1732 1733 ncookies = VIO_MAX_RING_COOKIES; 1734 dring = ldc_alloc_exp_dring(port->vio.lp, len, 1735 dr->cookies, &ncookies, 1736 (LDC_MAP_SHADOW | 1737 LDC_MAP_DIRECT | 1738 LDC_MAP_RW)); 1739 if (IS_ERR(dring)) { 1740 err = PTR_ERR(dring); 1741 goto err_out; 1742 } 1743 1744 dr->base = dring; 1745 dr->entry_size = elen; 1746 dr->num_entries = VNET_TX_RING_SIZE; 1747 dr->prod = 0; 1748 dr->cons = 0; 1749 port->start_cons = true; /* need an initial trigger */ 1750 dr->pending = VNET_TX_RING_SIZE; 1751 dr->ncookies = ncookies; 1752 1753 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1754 struct vio_net_desc *d; 1755 1756 d = vio_dring_entry(dr, i); 1757 d->hdr.state = VIO_DESC_FREE; 1758 } 1759 return 0; 1760 1761 err_out: 1762 sunvnet_port_free_tx_bufs_common(port); 1763 1764 return err; 1765 } 1766 1767 #ifdef CONFIG_NET_POLL_CONTROLLER 1768 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp) 1769 { 1770 struct vnet_port *port; 1771 unsigned long flags; 1772 1773 spin_lock_irqsave(&vp->lock, flags); 1774 if (!list_empty(&vp->port_list)) { 1775 port = list_entry(vp->port_list.next, struct vnet_port, list); 1776 napi_schedule(&port->napi); 1777 } 1778 spin_unlock_irqrestore(&vp->lock, flags); 1779 } 1780 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common); 1781 #endif 1782 1783 void sunvnet_port_add_txq_common(struct vnet_port *port) 1784 { 1785 struct vnet *vp = port->vp; 1786 int smallest = 0; 1787 int i; 1788 1789 /* find the first least-used q 1790 * When there are more ldoms than q's, we start to 1791 * double up on ports per queue. 1792 */ 1793 for (i = 0; i < VNET_MAX_TXQS; i++) { 1794 if (vp->q_used[i] == 0) { 1795 smallest = i; 1796 break; 1797 } 1798 if (vp->q_used[i] < vp->q_used[smallest]) 1799 smallest = i; 1800 } 1801 1802 vp->nports++; 1803 vp->q_used[smallest]++; 1804 port->q_index = smallest; 1805 } 1806 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common); 1807 1808 void sunvnet_port_rm_txq_common(struct vnet_port *port) 1809 { 1810 port->vp->nports--; 1811 port->vp->q_used[port->q_index]--; 1812 port->q_index = 0; 1813 } 1814 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common); 1815