1 // SPDX-License-Identifier: GPL-2.0 2 /* sunvnet.c: Sun LDOM Virtual Network Driver. 3 * 4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 5 * Copyright (C) 2016-2017 Oracle. All rights reserved. 6 */ 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/delay.h> 13 #include <linux/init.h> 14 #include <linux/netdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/etherdevice.h> 17 #include <linux/mutex.h> 18 #include <linux/highmem.h> 19 #include <linux/if_vlan.h> 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/sunvnet.h> 22 23 #if IS_ENABLED(CONFIG_IPV6) 24 #include <linux/icmpv6.h> 25 #endif 26 27 #include <net/ip.h> 28 #include <net/icmp.h> 29 #include <net/route.h> 30 31 #include <asm/vio.h> 32 #include <asm/ldc.h> 33 34 #include "sunvnet_common.h" 35 36 /* Heuristic for the number of times to exponentially backoff and 37 * retry sending an LDC trigger when EAGAIN is encountered 38 */ 39 #define VNET_MAX_RETRIES 10 40 41 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); 42 MODULE_DESCRIPTION("Sun LDOM virtual network support library"); 43 MODULE_LICENSE("GPL"); 44 MODULE_VERSION("1.1"); 45 46 static int __vnet_tx_trigger(struct vnet_port *port, u32 start); 47 48 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) 49 { 50 return vio_dring_avail(dr, VNET_TX_RING_SIZE); 51 } 52 53 static int vnet_handle_unknown(struct vnet_port *port, void *arg) 54 { 55 struct vio_msg_tag *pkt = arg; 56 57 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n", 58 pkt->type, pkt->stype, pkt->stype_env, pkt->sid); 59 pr_err("Resetting connection\n"); 60 61 ldc_disconnect(port->vio.lp); 62 63 return -ECONNRESET; 64 } 65 66 static int vnet_port_alloc_tx_ring(struct vnet_port *port); 67 68 int sunvnet_send_attr_common(struct vio_driver_state *vio) 69 { 70 struct vnet_port *port = to_vnet_port(vio); 71 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 72 struct vio_net_attr_info pkt; 73 int framelen = ETH_FRAME_LEN; 74 int i, err; 75 76 err = vnet_port_alloc_tx_ring(to_vnet_port(vio)); 77 if (err) 78 return err; 79 80 memset(&pkt, 0, sizeof(pkt)); 81 pkt.tag.type = VIO_TYPE_CTRL; 82 pkt.tag.stype = VIO_SUBTYPE_INFO; 83 pkt.tag.stype_env = VIO_ATTR_INFO; 84 pkt.tag.sid = vio_send_sid(vio); 85 if (vio_version_before(vio, 1, 2)) 86 pkt.xfer_mode = VIO_DRING_MODE; 87 else 88 pkt.xfer_mode = VIO_NEW_DRING_MODE; 89 pkt.addr_type = VNET_ADDR_ETHERMAC; 90 pkt.ack_freq = 0; 91 for (i = 0; i < 6; i++) 92 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); 93 if (vio_version_after(vio, 1, 3)) { 94 if (port->rmtu) { 95 port->rmtu = min(VNET_MAXPACKET, port->rmtu); 96 pkt.mtu = port->rmtu; 97 } else { 98 port->rmtu = VNET_MAXPACKET; 99 pkt.mtu = port->rmtu; 100 } 101 if (vio_version_after_eq(vio, 1, 6)) 102 pkt.options = VIO_TX_DRING; 103 } else if (vio_version_before(vio, 1, 3)) { 104 pkt.mtu = framelen; 105 } else { /* v1.3 */ 106 pkt.mtu = framelen + VLAN_HLEN; 107 } 108 109 pkt.cflags = 0; 110 if (vio_version_after_eq(vio, 1, 7) && port->tso) { 111 pkt.cflags |= VNET_LSO_IPV4_CAPAB; 112 if (!port->tsolen) 113 port->tsolen = VNET_MAXTSO; 114 pkt.ipv4_lso_maxlen = port->tsolen; 115 } 116 117 pkt.plnk_updt = PHYSLINK_UPDATE_NONE; 118 119 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 120 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 121 "cflags[0x%04x] lso_max[%u]\n", 122 pkt.xfer_mode, pkt.addr_type, 123 (unsigned long long)pkt.addr, 124 pkt.ack_freq, pkt.plnk_updt, pkt.options, 125 (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); 126 127 return vio_ldc_send(vio, &pkt, sizeof(pkt)); 128 } 129 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common); 130 131 static int handle_attr_info(struct vio_driver_state *vio, 132 struct vio_net_attr_info *pkt) 133 { 134 struct vnet_port *port = to_vnet_port(vio); 135 u64 localmtu; 136 u8 xfer_mode; 137 138 viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 139 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 140 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 141 pkt->xfer_mode, pkt->addr_type, 142 (unsigned long long)pkt->addr, 143 pkt->ack_freq, pkt->plnk_updt, pkt->options, 144 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 145 pkt->ipv4_lso_maxlen); 146 147 pkt->tag.sid = vio_send_sid(vio); 148 149 xfer_mode = pkt->xfer_mode; 150 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ 151 if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) 152 xfer_mode = VIO_NEW_DRING_MODE; 153 154 /* MTU negotiation: 155 * < v1.3 - ETH_FRAME_LEN exactly 156 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change 157 * pkt->mtu for ACK 158 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly 159 */ 160 if (vio_version_before(vio, 1, 3)) { 161 localmtu = ETH_FRAME_LEN; 162 } else if (vio_version_after(vio, 1, 3)) { 163 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; 164 localmtu = min(pkt->mtu, localmtu); 165 pkt->mtu = localmtu; 166 } else { /* v1.3 */ 167 localmtu = ETH_FRAME_LEN + VLAN_HLEN; 168 } 169 port->rmtu = localmtu; 170 171 /* LSO negotiation */ 172 if (vio_version_after_eq(vio, 1, 7)) 173 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB); 174 else 175 port->tso = false; 176 if (port->tso) { 177 if (!port->tsolen) 178 port->tsolen = VNET_MAXTSO; 179 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen); 180 if (port->tsolen < VNET_MINTSO) { 181 port->tso = false; 182 port->tsolen = 0; 183 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 184 } 185 pkt->ipv4_lso_maxlen = port->tsolen; 186 } else { 187 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 188 pkt->ipv4_lso_maxlen = 0; 189 port->tsolen = 0; 190 } 191 192 /* for version >= 1.6, ACK packet mode we support */ 193 if (vio_version_after_eq(vio, 1, 6)) { 194 pkt->xfer_mode = VIO_NEW_DRING_MODE; 195 pkt->options = VIO_TX_DRING; 196 } 197 198 if (!(xfer_mode | VIO_NEW_DRING_MODE) || 199 pkt->addr_type != VNET_ADDR_ETHERMAC || 200 pkt->mtu != localmtu) { 201 viodbg(HS, "SEND NET ATTR NACK\n"); 202 203 pkt->tag.stype = VIO_SUBTYPE_NACK; 204 205 (void)vio_ldc_send(vio, pkt, sizeof(*pkt)); 206 207 return -ECONNRESET; 208 } 209 210 viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " 211 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " 212 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 213 pkt->xfer_mode, pkt->addr_type, 214 (unsigned long long)pkt->addr, 215 pkt->ack_freq, pkt->plnk_updt, pkt->options, 216 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 217 pkt->ipv4_lso_maxlen); 218 219 pkt->tag.stype = VIO_SUBTYPE_ACK; 220 221 return vio_ldc_send(vio, pkt, sizeof(*pkt)); 222 } 223 224 static int handle_attr_ack(struct vio_driver_state *vio, 225 struct vio_net_attr_info *pkt) 226 { 227 viodbg(HS, "GOT NET ATTR ACK\n"); 228 229 return 0; 230 } 231 232 static int handle_attr_nack(struct vio_driver_state *vio, 233 struct vio_net_attr_info *pkt) 234 { 235 viodbg(HS, "GOT NET ATTR NACK\n"); 236 237 return -ECONNRESET; 238 } 239 240 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg) 241 { 242 struct vio_net_attr_info *pkt = arg; 243 244 switch (pkt->tag.stype) { 245 case VIO_SUBTYPE_INFO: 246 return handle_attr_info(vio, pkt); 247 248 case VIO_SUBTYPE_ACK: 249 return handle_attr_ack(vio, pkt); 250 251 case VIO_SUBTYPE_NACK: 252 return handle_attr_nack(vio, pkt); 253 254 default: 255 return -ECONNRESET; 256 } 257 } 258 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common); 259 260 void sunvnet_handshake_complete_common(struct vio_driver_state *vio) 261 { 262 struct vio_dring_state *dr; 263 264 dr = &vio->drings[VIO_DRIVER_RX_RING]; 265 dr->rcv_nxt = 1; 266 dr->snd_nxt = 1; 267 268 dr = &vio->drings[VIO_DRIVER_TX_RING]; 269 dr->rcv_nxt = 1; 270 dr->snd_nxt = 1; 271 } 272 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common); 273 274 /* The hypervisor interface that implements copying to/from imported 275 * memory from another domain requires that copies are done to 8-byte 276 * aligned buffers, and that the lengths of such copies are also 8-byte 277 * multiples. 278 * 279 * So we align skb->data to an 8-byte multiple and pad-out the data 280 * area so we can round the copy length up to the next multiple of 281 * 8 for the copy. 282 * 283 * The transmitter puts the actual start of the packet 6 bytes into 284 * the buffer it sends over, so that the IP headers after the ethernet 285 * header are aligned properly. These 6 bytes are not in the descriptor 286 * length, they are simply implied. This offset is represented using 287 * the VNET_PACKET_SKIP macro. 288 */ 289 static struct sk_buff *alloc_and_align_skb(struct net_device *dev, 290 unsigned int len) 291 { 292 struct sk_buff *skb; 293 unsigned long addr, off; 294 295 skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8); 296 if (unlikely(!skb)) 297 return NULL; 298 299 addr = (unsigned long)skb->data; 300 off = ((addr + 7UL) & ~7UL) - addr; 301 if (off) 302 skb_reserve(skb, off); 303 304 return skb; 305 } 306 307 static inline void vnet_fullcsum_ipv4(struct sk_buff *skb) 308 { 309 struct iphdr *iph = ip_hdr(skb); 310 int offset = skb_transport_offset(skb); 311 312 if (skb->protocol != htons(ETH_P_IP)) 313 return; 314 if (iph->protocol != IPPROTO_TCP && 315 iph->protocol != IPPROTO_UDP) 316 return; 317 skb->ip_summed = CHECKSUM_NONE; 318 skb->csum_level = 1; 319 skb->csum = 0; 320 if (iph->protocol == IPPROTO_TCP) { 321 struct tcphdr *ptcp = tcp_hdr(skb); 322 323 ptcp->check = 0; 324 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 325 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 326 skb->len - offset, IPPROTO_TCP, 327 skb->csum); 328 } else if (iph->protocol == IPPROTO_UDP) { 329 struct udphdr *pudp = udp_hdr(skb); 330 331 pudp->check = 0; 332 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 333 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 334 skb->len - offset, IPPROTO_UDP, 335 skb->csum); 336 } 337 } 338 339 #if IS_ENABLED(CONFIG_IPV6) 340 static inline void vnet_fullcsum_ipv6(struct sk_buff *skb) 341 { 342 struct ipv6hdr *ip6h = ipv6_hdr(skb); 343 int offset = skb_transport_offset(skb); 344 345 if (skb->protocol != htons(ETH_P_IPV6)) 346 return; 347 if (ip6h->nexthdr != IPPROTO_TCP && 348 ip6h->nexthdr != IPPROTO_UDP) 349 return; 350 skb->ip_summed = CHECKSUM_NONE; 351 skb->csum_level = 1; 352 skb->csum = 0; 353 if (ip6h->nexthdr == IPPROTO_TCP) { 354 struct tcphdr *ptcp = tcp_hdr(skb); 355 356 ptcp->check = 0; 357 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 358 ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 359 skb->len - offset, IPPROTO_TCP, 360 skb->csum); 361 } else if (ip6h->nexthdr == IPPROTO_UDP) { 362 struct udphdr *pudp = udp_hdr(skb); 363 364 pudp->check = 0; 365 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 366 pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 367 skb->len - offset, IPPROTO_UDP, 368 skb->csum); 369 } 370 } 371 #endif 372 373 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) 374 { 375 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 376 unsigned int len = desc->size; 377 unsigned int copy_len; 378 struct sk_buff *skb; 379 int maxlen; 380 int err; 381 382 err = -EMSGSIZE; 383 if (port->tso && port->tsolen > port->rmtu) 384 maxlen = port->tsolen; 385 else 386 maxlen = port->rmtu; 387 if (unlikely(len < ETH_ZLEN || len > maxlen)) { 388 dev->stats.rx_length_errors++; 389 goto out_dropped; 390 } 391 392 skb = alloc_and_align_skb(dev, len); 393 err = -ENOMEM; 394 if (unlikely(!skb)) { 395 dev->stats.rx_missed_errors++; 396 goto out_dropped; 397 } 398 399 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; 400 skb_put(skb, copy_len); 401 err = ldc_copy(port->vio.lp, LDC_COPY_IN, 402 skb->data, copy_len, 0, 403 desc->cookies, desc->ncookies); 404 if (unlikely(err < 0)) { 405 dev->stats.rx_frame_errors++; 406 goto out_free_skb; 407 } 408 409 skb_pull(skb, VNET_PACKET_SKIP); 410 skb_trim(skb, len); 411 skb->protocol = eth_type_trans(skb, dev); 412 413 if (vio_version_after_eq(&port->vio, 1, 8)) { 414 struct vio_net_dext *dext = vio_net_ext(desc); 415 416 skb_reset_network_header(skb); 417 418 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) { 419 if (skb->protocol == ETH_P_IP) { 420 struct iphdr *iph = ip_hdr(skb); 421 422 iph->check = 0; 423 ip_send_check(iph); 424 } 425 } 426 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) && 427 skb->ip_summed == CHECKSUM_NONE) { 428 if (skb->protocol == htons(ETH_P_IP)) { 429 struct iphdr *iph = ip_hdr(skb); 430 int ihl = iph->ihl * 4; 431 432 skb_set_transport_header(skb, ihl); 433 vnet_fullcsum_ipv4(skb); 434 #if IS_ENABLED(CONFIG_IPV6) 435 } else if (skb->protocol == htons(ETH_P_IPV6)) { 436 skb_set_transport_header(skb, 437 sizeof(struct ipv6hdr)); 438 vnet_fullcsum_ipv6(skb); 439 #endif 440 } 441 } 442 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { 443 skb->ip_summed = CHECKSUM_PARTIAL; 444 skb->csum_level = 0; 445 if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK) 446 skb->csum_level = 1; 447 } 448 } 449 450 skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; 451 452 if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest))) 453 dev->stats.multicast++; 454 dev->stats.rx_packets++; 455 dev->stats.rx_bytes += len; 456 port->stats.rx_packets++; 457 port->stats.rx_bytes += len; 458 napi_gro_receive(&port->napi, skb); 459 return 0; 460 461 out_free_skb: 462 kfree_skb(skb); 463 464 out_dropped: 465 dev->stats.rx_dropped++; 466 return err; 467 } 468 469 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, 470 u32 start, u32 end, u8 vio_dring_state) 471 { 472 struct vio_dring_data hdr = { 473 .tag = { 474 .type = VIO_TYPE_DATA, 475 .stype = VIO_SUBTYPE_ACK, 476 .stype_env = VIO_DRING_DATA, 477 .sid = vio_send_sid(&port->vio), 478 }, 479 .dring_ident = dr->ident, 480 .start_idx = start, 481 .end_idx = end, 482 .state = vio_dring_state, 483 }; 484 int err, delay; 485 int retries = 0; 486 487 hdr.seq = dr->snd_nxt; 488 delay = 1; 489 do { 490 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 491 if (err > 0) { 492 dr->snd_nxt++; 493 break; 494 } 495 udelay(delay); 496 if ((delay <<= 1) > 128) 497 delay = 128; 498 if (retries++ > VNET_MAX_RETRIES) { 499 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", 500 port->raddr[0], port->raddr[1], 501 port->raddr[2], port->raddr[3], 502 port->raddr[4], port->raddr[5]); 503 break; 504 } 505 } while (err == -EAGAIN); 506 507 if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) { 508 port->stop_rx_idx = end; 509 port->stop_rx = true; 510 } else { 511 port->stop_rx_idx = 0; 512 port->stop_rx = false; 513 } 514 515 return err; 516 } 517 518 static struct vio_net_desc *get_rx_desc(struct vnet_port *port, 519 struct vio_dring_state *dr, 520 u32 index) 521 { 522 struct vio_net_desc *desc = port->vio.desc_buf; 523 int err; 524 525 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, 526 (index * dr->entry_size), 527 dr->cookies, dr->ncookies); 528 if (err < 0) 529 return ERR_PTR(err); 530 531 return desc; 532 } 533 534 static int put_rx_desc(struct vnet_port *port, 535 struct vio_dring_state *dr, 536 struct vio_net_desc *desc, 537 u32 index) 538 { 539 int err; 540 541 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, 542 (index * dr->entry_size), 543 dr->cookies, dr->ncookies); 544 if (err < 0) 545 return err; 546 547 return 0; 548 } 549 550 static int vnet_walk_rx_one(struct vnet_port *port, 551 struct vio_dring_state *dr, 552 u32 index, int *needs_ack) 553 { 554 struct vio_net_desc *desc = get_rx_desc(port, dr, index); 555 struct vio_driver_state *vio = &port->vio; 556 int err; 557 558 BUG_ON(!desc); 559 if (IS_ERR(desc)) 560 return PTR_ERR(desc); 561 562 if (desc->hdr.state != VIO_DESC_READY) 563 return 1; 564 565 dma_rmb(); 566 567 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", 568 desc->hdr.state, desc->hdr.ack, 569 desc->size, desc->ncookies, 570 desc->cookies[0].cookie_addr, 571 desc->cookies[0].cookie_size); 572 573 err = vnet_rx_one(port, desc); 574 if (err == -ECONNRESET) 575 return err; 576 trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid, 577 index, desc->hdr.ack); 578 desc->hdr.state = VIO_DESC_DONE; 579 err = put_rx_desc(port, dr, desc, index); 580 if (err < 0) 581 return err; 582 *needs_ack = desc->hdr.ack; 583 return 0; 584 } 585 586 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, 587 u32 start, u32 end, int *npkts, int budget) 588 { 589 struct vio_driver_state *vio = &port->vio; 590 int ack_start = -1, ack_end = -1; 591 bool send_ack = true; 592 593 end = (end == (u32)-1) ? vio_dring_prev(dr, start) 594 : vio_dring_next(dr, end); 595 596 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); 597 598 while (start != end) { 599 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); 600 601 if (err == -ECONNRESET) 602 return err; 603 if (err != 0) 604 break; 605 (*npkts)++; 606 if (ack_start == -1) 607 ack_start = start; 608 ack_end = start; 609 start = vio_dring_next(dr, start); 610 if (ack && start != end) { 611 err = vnet_send_ack(port, dr, ack_start, ack_end, 612 VIO_DRING_ACTIVE); 613 if (err == -ECONNRESET) 614 return err; 615 ack_start = -1; 616 } 617 if ((*npkts) >= budget) { 618 send_ack = false; 619 break; 620 } 621 } 622 if (unlikely(ack_start == -1)) { 623 ack_end = vio_dring_prev(dr, start); 624 ack_start = ack_end; 625 } 626 if (send_ack) { 627 port->napi_resume = false; 628 trace_vnet_tx_send_stopped_ack(port->vio._local_sid, 629 port->vio._peer_sid, 630 ack_end, *npkts); 631 return vnet_send_ack(port, dr, ack_start, ack_end, 632 VIO_DRING_STOPPED); 633 } else { 634 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid, 635 port->vio._peer_sid, 636 ack_end, *npkts); 637 port->napi_resume = true; 638 port->napi_stop_idx = ack_end; 639 return 1; 640 } 641 } 642 643 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts, 644 int budget) 645 { 646 struct vio_dring_data *pkt = msgbuf; 647 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; 648 struct vio_driver_state *vio = &port->vio; 649 650 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n", 651 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); 652 653 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 654 return 0; 655 if (unlikely(pkt->seq != dr->rcv_nxt)) { 656 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n", 657 pkt->seq, dr->rcv_nxt); 658 return 0; 659 } 660 661 if (!port->napi_resume) 662 dr->rcv_nxt++; 663 664 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ 665 666 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx, 667 npkts, budget); 668 } 669 670 static int idx_is_pending(struct vio_dring_state *dr, u32 end) 671 { 672 u32 idx = dr->cons; 673 int found = 0; 674 675 while (idx != dr->prod) { 676 if (idx == end) { 677 found = 1; 678 break; 679 } 680 idx = vio_dring_next(dr, idx); 681 } 682 return found; 683 } 684 685 static int vnet_ack(struct vnet_port *port, void *msgbuf) 686 { 687 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 688 struct vio_dring_data *pkt = msgbuf; 689 struct net_device *dev; 690 u32 end; 691 struct vio_net_desc *desc; 692 struct netdev_queue *txq; 693 694 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 695 return 0; 696 697 end = pkt->end_idx; 698 dev = VNET_PORT_TO_NET_DEVICE(port); 699 netif_tx_lock(dev); 700 if (unlikely(!idx_is_pending(dr, end))) { 701 netif_tx_unlock(dev); 702 return 0; 703 } 704 705 /* sync for race conditions with vnet_start_xmit() and tell xmit it 706 * is time to send a trigger. 707 */ 708 trace_vnet_rx_stopped_ack(port->vio._local_sid, 709 port->vio._peer_sid, end); 710 dr->cons = vio_dring_next(dr, end); 711 desc = vio_dring_entry(dr, dr->cons); 712 if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) { 713 /* vnet_start_xmit() just populated this dring but missed 714 * sending the "start" LDC message to the consumer. 715 * Send a "start" trigger on its behalf. 716 */ 717 if (__vnet_tx_trigger(port, dr->cons) > 0) 718 port->start_cons = false; 719 else 720 port->start_cons = true; 721 } else { 722 port->start_cons = true; 723 } 724 netif_tx_unlock(dev); 725 726 txq = netdev_get_tx_queue(dev, port->q_index); 727 if (unlikely(netif_tx_queue_stopped(txq) && 728 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) 729 return 1; 730 731 return 0; 732 } 733 734 static int vnet_nack(struct vnet_port *port, void *msgbuf) 735 { 736 /* XXX just reset or similar XXX */ 737 return 0; 738 } 739 740 static int handle_mcast(struct vnet_port *port, void *msgbuf) 741 { 742 struct vio_net_mcast_info *pkt = msgbuf; 743 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 744 745 if (pkt->tag.stype != VIO_SUBTYPE_ACK) 746 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n", 747 dev->name, 748 pkt->tag.type, 749 pkt->tag.stype, 750 pkt->tag.stype_env, 751 pkt->tag.sid); 752 753 return 0; 754 } 755 756 /* If the queue is stopped, wake it up so that we'll 757 * send out another START message at the next TX. 758 */ 759 static void maybe_tx_wakeup(struct vnet_port *port) 760 { 761 struct netdev_queue *txq; 762 763 txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 764 port->q_index); 765 __netif_tx_lock(txq, smp_processor_id()); 766 if (likely(netif_tx_queue_stopped(txq))) 767 netif_tx_wake_queue(txq); 768 __netif_tx_unlock(txq); 769 } 770 771 bool sunvnet_port_is_up_common(struct vnet_port *vnet) 772 { 773 struct vio_driver_state *vio = &vnet->vio; 774 775 return !!(vio->hs_state & VIO_HS_COMPLETE); 776 } 777 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common); 778 779 static int vnet_event_napi(struct vnet_port *port, int budget) 780 { 781 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 782 struct vio_driver_state *vio = &port->vio; 783 int tx_wakeup, err; 784 int npkts = 0; 785 786 /* we don't expect any other bits */ 787 BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY | 788 LDC_EVENT_RESET | 789 LDC_EVENT_UP)); 790 791 /* RESET takes precedent over any other event */ 792 if (port->rx_event & LDC_EVENT_RESET) { 793 /* a link went down */ 794 795 if (port->vsw == 1) { 796 netif_tx_stop_all_queues(dev); 797 netif_carrier_off(dev); 798 } 799 800 vio_link_state_change(vio, LDC_EVENT_RESET); 801 vnet_port_reset(port); 802 vio_port_up(vio); 803 804 /* If the device is running but its tx queue was 805 * stopped (due to flow control), restart it. 806 * This is necessary since vnet_port_reset() 807 * clears the tx drings and thus we may never get 808 * back a VIO_TYPE_DATA ACK packet - which is 809 * the normal mechanism to restart the tx queue. 810 */ 811 if (netif_running(dev)) 812 maybe_tx_wakeup(port); 813 814 port->rx_event = 0; 815 port->stats.event_reset++; 816 return 0; 817 } 818 819 if (port->rx_event & LDC_EVENT_UP) { 820 /* a link came up */ 821 822 if (port->vsw == 1) { 823 netif_carrier_on(port->dev); 824 netif_tx_start_all_queues(port->dev); 825 } 826 827 vio_link_state_change(vio, LDC_EVENT_UP); 828 port->rx_event = 0; 829 port->stats.event_up++; 830 return 0; 831 } 832 833 err = 0; 834 tx_wakeup = 0; 835 while (1) { 836 union { 837 struct vio_msg_tag tag; 838 u64 raw[8]; 839 } msgbuf; 840 841 if (port->napi_resume) { 842 struct vio_dring_data *pkt = 843 (struct vio_dring_data *)&msgbuf; 844 struct vio_dring_state *dr = 845 &port->vio.drings[VIO_DRIVER_RX_RING]; 846 847 pkt->tag.type = VIO_TYPE_DATA; 848 pkt->tag.stype = VIO_SUBTYPE_INFO; 849 pkt->tag.stype_env = VIO_DRING_DATA; 850 pkt->seq = dr->rcv_nxt; 851 pkt->start_idx = vio_dring_next(dr, 852 port->napi_stop_idx); 853 pkt->end_idx = -1; 854 } else { 855 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); 856 if (unlikely(err < 0)) { 857 if (err == -ECONNRESET) 858 vio_conn_reset(vio); 859 break; 860 } 861 if (err == 0) 862 break; 863 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", 864 msgbuf.tag.type, 865 msgbuf.tag.stype, 866 msgbuf.tag.stype_env, 867 msgbuf.tag.sid); 868 err = vio_validate_sid(vio, &msgbuf.tag); 869 if (err < 0) 870 break; 871 } 872 873 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { 874 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { 875 if (!sunvnet_port_is_up_common(port)) { 876 /* failures like handshake_failure() 877 * may have cleaned up dring, but 878 * NAPI polling may bring us here. 879 */ 880 err = -ECONNRESET; 881 break; 882 } 883 err = vnet_rx(port, &msgbuf, &npkts, budget); 884 if (npkts >= budget) 885 break; 886 if (npkts == 0) 887 break; 888 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { 889 err = vnet_ack(port, &msgbuf); 890 if (err > 0) 891 tx_wakeup |= err; 892 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { 893 err = vnet_nack(port, &msgbuf); 894 } 895 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { 896 if (msgbuf.tag.stype_env == VNET_MCAST_INFO) 897 err = handle_mcast(port, &msgbuf); 898 else 899 err = vio_control_pkt_engine(vio, &msgbuf); 900 if (err) 901 break; 902 } else { 903 err = vnet_handle_unknown(port, &msgbuf); 904 } 905 if (err == -ECONNRESET) 906 break; 907 } 908 if (unlikely(tx_wakeup && err != -ECONNRESET)) 909 maybe_tx_wakeup(port); 910 return npkts; 911 } 912 913 int sunvnet_poll_common(struct napi_struct *napi, int budget) 914 { 915 struct vnet_port *port = container_of(napi, struct vnet_port, napi); 916 struct vio_driver_state *vio = &port->vio; 917 int processed = vnet_event_napi(port, budget); 918 919 if (processed < budget) { 920 napi_complete_done(napi, processed); 921 port->rx_event &= ~LDC_EVENT_DATA_READY; 922 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED); 923 } 924 return processed; 925 } 926 EXPORT_SYMBOL_GPL(sunvnet_poll_common); 927 928 void sunvnet_event_common(void *arg, int event) 929 { 930 struct vnet_port *port = arg; 931 struct vio_driver_state *vio = &port->vio; 932 933 port->rx_event |= event; 934 vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED); 935 napi_schedule(&port->napi); 936 } 937 EXPORT_SYMBOL_GPL(sunvnet_event_common); 938 939 static int __vnet_tx_trigger(struct vnet_port *port, u32 start) 940 { 941 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 942 struct vio_dring_data hdr = { 943 .tag = { 944 .type = VIO_TYPE_DATA, 945 .stype = VIO_SUBTYPE_INFO, 946 .stype_env = VIO_DRING_DATA, 947 .sid = vio_send_sid(&port->vio), 948 }, 949 .dring_ident = dr->ident, 950 .start_idx = start, 951 .end_idx = (u32)-1, 952 }; 953 int err, delay; 954 int retries = 0; 955 956 if (port->stop_rx) { 957 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid, 958 port->vio._peer_sid, 959 port->stop_rx_idx, -1); 960 err = vnet_send_ack(port, 961 &port->vio.drings[VIO_DRIVER_RX_RING], 962 port->stop_rx_idx, -1, 963 VIO_DRING_STOPPED); 964 if (err <= 0) 965 return err; 966 } 967 968 hdr.seq = dr->snd_nxt; 969 delay = 1; 970 do { 971 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 972 if (err > 0) { 973 dr->snd_nxt++; 974 break; 975 } 976 udelay(delay); 977 if ((delay <<= 1) > 128) 978 delay = 128; 979 if (retries++ > VNET_MAX_RETRIES) 980 break; 981 } while (err == -EAGAIN); 982 trace_vnet_tx_trigger(port->vio._local_sid, 983 port->vio._peer_sid, start, err); 984 985 return err; 986 } 987 988 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port, 989 unsigned *pending) 990 { 991 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 992 struct sk_buff *skb = NULL; 993 int i, txi; 994 995 *pending = 0; 996 997 txi = dr->prod; 998 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 999 struct vio_net_desc *d; 1000 1001 --txi; 1002 if (txi < 0) 1003 txi = VNET_TX_RING_SIZE - 1; 1004 1005 d = vio_dring_entry(dr, txi); 1006 1007 if (d->hdr.state == VIO_DESC_READY) { 1008 (*pending)++; 1009 continue; 1010 } 1011 if (port->tx_bufs[txi].skb) { 1012 if (d->hdr.state != VIO_DESC_DONE) 1013 pr_notice("invalid ring buffer state %d\n", 1014 d->hdr.state); 1015 BUG_ON(port->tx_bufs[txi].skb->next); 1016 1017 port->tx_bufs[txi].skb->next = skb; 1018 skb = port->tx_bufs[txi].skb; 1019 port->tx_bufs[txi].skb = NULL; 1020 1021 ldc_unmap(port->vio.lp, 1022 port->tx_bufs[txi].cookies, 1023 port->tx_bufs[txi].ncookies); 1024 } else if (d->hdr.state == VIO_DESC_FREE) { 1025 break; 1026 } 1027 d->hdr.state = VIO_DESC_FREE; 1028 } 1029 return skb; 1030 } 1031 1032 static inline void vnet_free_skbs(struct sk_buff *skb) 1033 { 1034 struct sk_buff *next; 1035 1036 while (skb) { 1037 next = skb->next; 1038 skb->next = NULL; 1039 dev_kfree_skb(skb); 1040 skb = next; 1041 } 1042 } 1043 1044 void sunvnet_clean_timer_expire_common(struct timer_list *t) 1045 { 1046 struct vnet_port *port = from_timer(port, t, clean_timer); 1047 struct sk_buff *freeskbs; 1048 unsigned pending; 1049 1050 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port)); 1051 freeskbs = vnet_clean_tx_ring(port, &pending); 1052 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port)); 1053 1054 vnet_free_skbs(freeskbs); 1055 1056 if (pending) 1057 (void)mod_timer(&port->clean_timer, 1058 jiffies + VNET_CLEAN_TIMEOUT); 1059 else 1060 del_timer(&port->clean_timer); 1061 } 1062 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common); 1063 1064 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, 1065 struct ldc_trans_cookie *cookies, int ncookies, 1066 unsigned int map_perm) 1067 { 1068 int i, nc, err, blen; 1069 1070 /* header */ 1071 blen = skb_headlen(skb); 1072 if (blen < ETH_ZLEN) 1073 blen = ETH_ZLEN; 1074 blen += VNET_PACKET_SKIP; 1075 blen += 8 - (blen & 7); 1076 1077 err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies, 1078 ncookies, map_perm); 1079 if (err < 0) 1080 return err; 1081 nc = err; 1082 1083 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1084 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1085 u8 *vaddr; 1086 1087 if (nc < ncookies) { 1088 vaddr = kmap_atomic(skb_frag_page(f)); 1089 blen = skb_frag_size(f); 1090 blen += 8 - (blen & 7); 1091 err = ldc_map_single(lp, vaddr + f->page_offset, 1092 blen, cookies + nc, ncookies - nc, 1093 map_perm); 1094 kunmap_atomic(vaddr); 1095 } else { 1096 err = -EMSGSIZE; 1097 } 1098 1099 if (err < 0) { 1100 ldc_unmap(lp, cookies, nc); 1101 return err; 1102 } 1103 nc += err; 1104 } 1105 return nc; 1106 } 1107 1108 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) 1109 { 1110 struct sk_buff *nskb; 1111 int i, len, pad, docopy; 1112 1113 len = skb->len; 1114 pad = 0; 1115 if (len < ETH_ZLEN) { 1116 pad += ETH_ZLEN - skb->len; 1117 len += pad; 1118 } 1119 len += VNET_PACKET_SKIP; 1120 pad += 8 - (len & 7); 1121 1122 /* make sure we have enough cookies and alignment in every frag */ 1123 docopy = skb_shinfo(skb)->nr_frags >= ncookies; 1124 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1125 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1126 1127 docopy |= f->page_offset & 7; 1128 } 1129 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || 1130 skb_tailroom(skb) < pad || 1131 skb_headroom(skb) < VNET_PACKET_SKIP || docopy) { 1132 int start = 0, offset; 1133 __wsum csum; 1134 1135 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN; 1136 nskb = alloc_and_align_skb(skb->dev, len); 1137 if (!nskb) { 1138 dev_kfree_skb(skb); 1139 return NULL; 1140 } 1141 skb_reserve(nskb, VNET_PACKET_SKIP); 1142 1143 nskb->protocol = skb->protocol; 1144 offset = skb_mac_header(skb) - skb->data; 1145 skb_set_mac_header(nskb, offset); 1146 offset = skb_network_header(skb) - skb->data; 1147 skb_set_network_header(nskb, offset); 1148 offset = skb_transport_header(skb) - skb->data; 1149 skb_set_transport_header(nskb, offset); 1150 1151 offset = 0; 1152 nskb->csum_offset = skb->csum_offset; 1153 nskb->ip_summed = skb->ip_summed; 1154 1155 if (skb->ip_summed == CHECKSUM_PARTIAL) 1156 start = skb_checksum_start_offset(skb); 1157 if (start) { 1158 int offset = start + nskb->csum_offset; 1159 1160 /* copy the headers, no csum here */ 1161 if (skb_copy_bits(skb, 0, nskb->data, start)) { 1162 dev_kfree_skb(nskb); 1163 dev_kfree_skb(skb); 1164 return NULL; 1165 } 1166 1167 /* copy the rest, with csum calculation */ 1168 *(__sum16 *)(skb->data + offset) = 0; 1169 csum = skb_copy_and_csum_bits(skb, start, 1170 nskb->data + start, 1171 skb->len - start, 0); 1172 1173 /* add in the header checksums */ 1174 if (skb->protocol == htons(ETH_P_IP)) { 1175 struct iphdr *iph = ip_hdr(nskb); 1176 1177 if (iph->protocol == IPPROTO_TCP || 1178 iph->protocol == IPPROTO_UDP) { 1179 csum = csum_tcpudp_magic(iph->saddr, 1180 iph->daddr, 1181 skb->len - start, 1182 iph->protocol, 1183 csum); 1184 } 1185 } else if (skb->protocol == htons(ETH_P_IPV6)) { 1186 struct ipv6hdr *ip6h = ipv6_hdr(nskb); 1187 1188 if (ip6h->nexthdr == IPPROTO_TCP || 1189 ip6h->nexthdr == IPPROTO_UDP) { 1190 csum = csum_ipv6_magic(&ip6h->saddr, 1191 &ip6h->daddr, 1192 skb->len - start, 1193 ip6h->nexthdr, 1194 csum); 1195 } 1196 } 1197 1198 /* save the final result */ 1199 *(__sum16 *)(nskb->data + offset) = csum; 1200 1201 nskb->ip_summed = CHECKSUM_NONE; 1202 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { 1203 dev_kfree_skb(nskb); 1204 dev_kfree_skb(skb); 1205 return NULL; 1206 } 1207 (void)skb_put(nskb, skb->len); 1208 if (skb_is_gso(skb)) { 1209 skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; 1210 skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; 1211 } 1212 nskb->queue_mapping = skb->queue_mapping; 1213 dev_kfree_skb(skb); 1214 skb = nskb; 1215 } 1216 return skb; 1217 } 1218 1219 static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, 1220 struct vnet_port *(*vnet_tx_port) 1221 (struct sk_buff *, struct net_device *)) 1222 { 1223 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 1224 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1225 struct sk_buff *segs; 1226 int maclen, datalen; 1227 int status; 1228 int gso_size, gso_type, gso_segs; 1229 int hlen = skb_transport_header(skb) - skb_mac_header(skb); 1230 int proto = IPPROTO_IP; 1231 1232 if (skb->protocol == htons(ETH_P_IP)) 1233 proto = ip_hdr(skb)->protocol; 1234 else if (skb->protocol == htons(ETH_P_IPV6)) 1235 proto = ipv6_hdr(skb)->nexthdr; 1236 1237 if (proto == IPPROTO_TCP) { 1238 hlen += tcp_hdr(skb)->doff * 4; 1239 } else if (proto == IPPROTO_UDP) { 1240 hlen += sizeof(struct udphdr); 1241 } else { 1242 pr_err("vnet_handle_offloads GSO with unknown transport " 1243 "protocol %d tproto %d\n", skb->protocol, proto); 1244 hlen = 128; /* XXX */ 1245 } 1246 datalen = port->tsolen - hlen; 1247 1248 gso_size = skb_shinfo(skb)->gso_size; 1249 gso_type = skb_shinfo(skb)->gso_type; 1250 gso_segs = skb_shinfo(skb)->gso_segs; 1251 1252 if (port->tso && gso_size < datalen) 1253 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen); 1254 1255 if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) { 1256 struct netdev_queue *txq; 1257 1258 txq = netdev_get_tx_queue(dev, port->q_index); 1259 netif_tx_stop_queue(txq); 1260 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs) 1261 return NETDEV_TX_BUSY; 1262 netif_tx_wake_queue(txq); 1263 } 1264 1265 maclen = skb_network_header(skb) - skb_mac_header(skb); 1266 skb_pull(skb, maclen); 1267 1268 if (port->tso && gso_size < datalen) { 1269 if (skb_unclone(skb, GFP_ATOMIC)) 1270 goto out_dropped; 1271 1272 /* segment to TSO size */ 1273 skb_shinfo(skb)->gso_size = datalen; 1274 skb_shinfo(skb)->gso_segs = gso_segs; 1275 } 1276 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); 1277 if (IS_ERR(segs)) 1278 goto out_dropped; 1279 1280 skb_push(skb, maclen); 1281 skb_reset_mac_header(skb); 1282 1283 status = 0; 1284 while (segs) { 1285 struct sk_buff *curr = segs; 1286 1287 segs = segs->next; 1288 curr->next = NULL; 1289 if (port->tso && curr->len > dev->mtu) { 1290 skb_shinfo(curr)->gso_size = gso_size; 1291 skb_shinfo(curr)->gso_type = gso_type; 1292 skb_shinfo(curr)->gso_segs = 1293 DIV_ROUND_UP(curr->len - hlen, gso_size); 1294 } else { 1295 skb_shinfo(curr)->gso_size = 0; 1296 } 1297 1298 skb_push(curr, maclen); 1299 skb_reset_mac_header(curr); 1300 memcpy(skb_mac_header(curr), skb_mac_header(skb), 1301 maclen); 1302 curr->csum_start = skb_transport_header(curr) - curr->head; 1303 if (ip_hdr(curr)->protocol == IPPROTO_TCP) 1304 curr->csum_offset = offsetof(struct tcphdr, check); 1305 else if (ip_hdr(curr)->protocol == IPPROTO_UDP) 1306 curr->csum_offset = offsetof(struct udphdr, check); 1307 1308 if (!(status & NETDEV_TX_MASK)) 1309 status = sunvnet_start_xmit_common(curr, dev, 1310 vnet_tx_port); 1311 if (status & NETDEV_TX_MASK) 1312 dev_kfree_skb_any(curr); 1313 } 1314 1315 if (!(status & NETDEV_TX_MASK)) 1316 dev_kfree_skb_any(skb); 1317 return status; 1318 out_dropped: 1319 dev->stats.tx_dropped++; 1320 dev_kfree_skb_any(skb); 1321 return NETDEV_TX_OK; 1322 } 1323 1324 int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, 1325 struct vnet_port *(*vnet_tx_port) 1326 (struct sk_buff *, struct net_device *)) 1327 { 1328 struct vnet_port *port = NULL; 1329 struct vio_dring_state *dr; 1330 struct vio_net_desc *d; 1331 unsigned int len; 1332 struct sk_buff *freeskbs = NULL; 1333 int i, err, txi; 1334 unsigned pending = 0; 1335 struct netdev_queue *txq; 1336 1337 rcu_read_lock(); 1338 port = vnet_tx_port(skb, dev); 1339 if (unlikely(!port)) 1340 goto out_dropped; 1341 1342 if (skb_is_gso(skb) && skb->len > port->tsolen) { 1343 err = vnet_handle_offloads(port, skb, vnet_tx_port); 1344 rcu_read_unlock(); 1345 return err; 1346 } 1347 1348 if (!skb_is_gso(skb) && skb->len > port->rmtu) { 1349 unsigned long localmtu = port->rmtu - ETH_HLEN; 1350 1351 if (vio_version_after_eq(&port->vio, 1, 3)) 1352 localmtu -= VLAN_HLEN; 1353 1354 if (skb->protocol == htons(ETH_P_IP)) { 1355 struct flowi4 fl4; 1356 struct rtable *rt = NULL; 1357 1358 memset(&fl4, 0, sizeof(fl4)); 1359 fl4.flowi4_oif = dev->ifindex; 1360 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 1361 fl4.daddr = ip_hdr(skb)->daddr; 1362 fl4.saddr = ip_hdr(skb)->saddr; 1363 1364 rt = ip_route_output_key(dev_net(dev), &fl4); 1365 if (!IS_ERR(rt)) { 1366 skb_dst_set(skb, &rt->dst); 1367 icmp_send(skb, ICMP_DEST_UNREACH, 1368 ICMP_FRAG_NEEDED, 1369 htonl(localmtu)); 1370 } 1371 } 1372 #if IS_ENABLED(CONFIG_IPV6) 1373 else if (skb->protocol == htons(ETH_P_IPV6)) 1374 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); 1375 #endif 1376 goto out_dropped; 1377 } 1378 1379 skb = vnet_skb_shape(skb, 2); 1380 1381 if (unlikely(!skb)) 1382 goto out_dropped; 1383 1384 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1385 if (skb->protocol == htons(ETH_P_IP)) 1386 vnet_fullcsum_ipv4(skb); 1387 #if IS_ENABLED(CONFIG_IPV6) 1388 else if (skb->protocol == htons(ETH_P_IPV6)) 1389 vnet_fullcsum_ipv6(skb); 1390 #endif 1391 } 1392 1393 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1394 i = skb_get_queue_mapping(skb); 1395 txq = netdev_get_tx_queue(dev, i); 1396 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1397 if (!netif_tx_queue_stopped(txq)) { 1398 netif_tx_stop_queue(txq); 1399 1400 /* This is a hard error, log it. */ 1401 netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); 1402 dev->stats.tx_errors++; 1403 } 1404 rcu_read_unlock(); 1405 return NETDEV_TX_BUSY; 1406 } 1407 1408 d = vio_dring_cur(dr); 1409 1410 txi = dr->prod; 1411 1412 freeskbs = vnet_clean_tx_ring(port, &pending); 1413 1414 BUG_ON(port->tx_bufs[txi].skb); 1415 1416 len = skb->len; 1417 if (len < ETH_ZLEN) 1418 len = ETH_ZLEN; 1419 1420 err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2, 1421 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); 1422 if (err < 0) { 1423 netdev_info(dev, "tx buffer map error %d\n", err); 1424 goto out_dropped; 1425 } 1426 1427 port->tx_bufs[txi].skb = skb; 1428 skb = NULL; 1429 port->tx_bufs[txi].ncookies = err; 1430 1431 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 1432 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 1433 * the protocol itself does not require it as long as the peer 1434 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. 1435 * 1436 * An ACK for every packet in the ring is expensive as the 1437 * sending of LDC messages is slow and affects performance. 1438 */ 1439 d->hdr.ack = VIO_ACK_DISABLE; 1440 d->size = len; 1441 d->ncookies = port->tx_bufs[txi].ncookies; 1442 for (i = 0; i < d->ncookies; i++) 1443 d->cookies[i] = port->tx_bufs[txi].cookies[i]; 1444 if (vio_version_after_eq(&port->vio, 1, 7)) { 1445 struct vio_net_dext *dext = vio_net_ext(d); 1446 1447 memset(dext, 0, sizeof(*dext)); 1448 if (skb_is_gso(port->tx_bufs[txi].skb)) { 1449 dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb) 1450 ->gso_size; 1451 dext->flags |= VNET_PKT_IPV4_LSO; 1452 } 1453 if (vio_version_after_eq(&port->vio, 1, 8) && 1454 !port->switch_port) { 1455 dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK; 1456 dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK; 1457 } 1458 } 1459 1460 /* This has to be a non-SMP write barrier because we are writing 1461 * to memory which is shared with the peer LDOM. 1462 */ 1463 dma_wmb(); 1464 1465 d->hdr.state = VIO_DESC_READY; 1466 1467 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent 1468 * to notify the consumer that some descriptors are READY. 1469 * After that "start" trigger, no additional triggers are needed until 1470 * a DRING_STOPPED is received from the consumer. The dr->cons field 1471 * (set up by vnet_ack()) has the value of the next dring index 1472 * that has not yet been ack-ed. We send a "start" trigger here 1473 * if, and only if, start_cons is true (reset it afterward). Conversely, 1474 * vnet_ack() should check if the dring corresponding to cons 1475 * is marked READY, but start_cons was false. 1476 * If so, vnet_ack() should send out the missed "start" trigger. 1477 * 1478 * Note that the dma_wmb() above makes sure the cookies et al. are 1479 * not globally visible before the VIO_DESC_READY, and that the 1480 * stores are ordered correctly by the compiler. The consumer will 1481 * not proceed until the VIO_DESC_READY is visible assuring that 1482 * the consumer does not observe anything related to descriptors 1483 * out of order. The HV trap from the LDC start trigger is the 1484 * producer to consumer announcement that work is available to the 1485 * consumer 1486 */ 1487 if (!port->start_cons) { /* previous trigger suffices */ 1488 trace_vnet_skip_tx_trigger(port->vio._local_sid, 1489 port->vio._peer_sid, dr->cons); 1490 goto ldc_start_done; 1491 } 1492 1493 err = __vnet_tx_trigger(port, dr->cons); 1494 if (unlikely(err < 0)) { 1495 netdev_info(dev, "TX trigger error %d\n", err); 1496 d->hdr.state = VIO_DESC_FREE; 1497 skb = port->tx_bufs[txi].skb; 1498 port->tx_bufs[txi].skb = NULL; 1499 dev->stats.tx_carrier_errors++; 1500 goto out_dropped; 1501 } 1502 1503 ldc_start_done: 1504 port->start_cons = false; 1505 1506 dev->stats.tx_packets++; 1507 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1508 port->stats.tx_packets++; 1509 port->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1510 1511 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1512 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1513 netif_tx_stop_queue(txq); 1514 smp_rmb(); 1515 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) 1516 netif_tx_wake_queue(txq); 1517 } 1518 1519 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); 1520 rcu_read_unlock(); 1521 1522 vnet_free_skbs(freeskbs); 1523 1524 return NETDEV_TX_OK; 1525 1526 out_dropped: 1527 if (pending) 1528 (void)mod_timer(&port->clean_timer, 1529 jiffies + VNET_CLEAN_TIMEOUT); 1530 else if (port) 1531 del_timer(&port->clean_timer); 1532 rcu_read_unlock(); 1533 if (skb) 1534 dev_kfree_skb(skb); 1535 vnet_free_skbs(freeskbs); 1536 dev->stats.tx_dropped++; 1537 return NETDEV_TX_OK; 1538 } 1539 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); 1540 1541 void sunvnet_tx_timeout_common(struct net_device *dev) 1542 { 1543 /* XXX Implement me XXX */ 1544 } 1545 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common); 1546 1547 int sunvnet_open_common(struct net_device *dev) 1548 { 1549 netif_carrier_on(dev); 1550 netif_tx_start_all_queues(dev); 1551 1552 return 0; 1553 } 1554 EXPORT_SYMBOL_GPL(sunvnet_open_common); 1555 1556 int sunvnet_close_common(struct net_device *dev) 1557 { 1558 netif_tx_stop_all_queues(dev); 1559 netif_carrier_off(dev); 1560 1561 return 0; 1562 } 1563 EXPORT_SYMBOL_GPL(sunvnet_close_common); 1564 1565 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr) 1566 { 1567 struct vnet_mcast_entry *m; 1568 1569 for (m = vp->mcast_list; m; m = m->next) { 1570 if (ether_addr_equal(m->addr, addr)) 1571 return m; 1572 } 1573 return NULL; 1574 } 1575 1576 static void __update_mc_list(struct vnet *vp, struct net_device *dev) 1577 { 1578 struct netdev_hw_addr *ha; 1579 1580 netdev_for_each_mc_addr(ha, dev) { 1581 struct vnet_mcast_entry *m; 1582 1583 m = __vnet_mc_find(vp, ha->addr); 1584 if (m) { 1585 m->hit = 1; 1586 continue; 1587 } 1588 1589 if (!m) { 1590 m = kzalloc(sizeof(*m), GFP_ATOMIC); 1591 if (!m) 1592 continue; 1593 memcpy(m->addr, ha->addr, ETH_ALEN); 1594 m->hit = 1; 1595 1596 m->next = vp->mcast_list; 1597 vp->mcast_list = m; 1598 } 1599 } 1600 } 1601 1602 static void __send_mc_list(struct vnet *vp, struct vnet_port *port) 1603 { 1604 struct vio_net_mcast_info info; 1605 struct vnet_mcast_entry *m, **pp; 1606 int n_addrs; 1607 1608 memset(&info, 0, sizeof(info)); 1609 1610 info.tag.type = VIO_TYPE_CTRL; 1611 info.tag.stype = VIO_SUBTYPE_INFO; 1612 info.tag.stype_env = VNET_MCAST_INFO; 1613 info.tag.sid = vio_send_sid(&port->vio); 1614 info.set = 1; 1615 1616 n_addrs = 0; 1617 for (m = vp->mcast_list; m; m = m->next) { 1618 if (m->sent) 1619 continue; 1620 m->sent = 1; 1621 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1622 m->addr, ETH_ALEN); 1623 if (++n_addrs == VNET_NUM_MCAST) { 1624 info.count = n_addrs; 1625 1626 (void)vio_ldc_send(&port->vio, &info, 1627 sizeof(info)); 1628 n_addrs = 0; 1629 } 1630 } 1631 if (n_addrs) { 1632 info.count = n_addrs; 1633 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1634 } 1635 1636 info.set = 0; 1637 1638 n_addrs = 0; 1639 pp = &vp->mcast_list; 1640 while ((m = *pp) != NULL) { 1641 if (m->hit) { 1642 m->hit = 0; 1643 pp = &m->next; 1644 continue; 1645 } 1646 1647 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1648 m->addr, ETH_ALEN); 1649 if (++n_addrs == VNET_NUM_MCAST) { 1650 info.count = n_addrs; 1651 (void)vio_ldc_send(&port->vio, &info, 1652 sizeof(info)); 1653 n_addrs = 0; 1654 } 1655 1656 *pp = m->next; 1657 kfree(m); 1658 } 1659 if (n_addrs) { 1660 info.count = n_addrs; 1661 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1662 } 1663 } 1664 1665 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp) 1666 { 1667 struct vnet_port *port; 1668 1669 rcu_read_lock(); 1670 list_for_each_entry_rcu(port, &vp->port_list, list) { 1671 if (port->switch_port) { 1672 __update_mc_list(vp, dev); 1673 __send_mc_list(vp, port); 1674 break; 1675 } 1676 } 1677 rcu_read_unlock(); 1678 } 1679 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common); 1680 1681 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p) 1682 { 1683 return -EINVAL; 1684 } 1685 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common); 1686 1687 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port) 1688 { 1689 struct vio_dring_state *dr; 1690 int i; 1691 1692 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1693 1694 if (!dr->base) 1695 return; 1696 1697 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1698 struct vio_net_desc *d; 1699 void *skb = port->tx_bufs[i].skb; 1700 1701 if (!skb) 1702 continue; 1703 1704 d = vio_dring_entry(dr, i); 1705 1706 ldc_unmap(port->vio.lp, 1707 port->tx_bufs[i].cookies, 1708 port->tx_bufs[i].ncookies); 1709 dev_kfree_skb(skb); 1710 port->tx_bufs[i].skb = NULL; 1711 d->hdr.state = VIO_DESC_FREE; 1712 } 1713 ldc_free_exp_dring(port->vio.lp, dr->base, 1714 (dr->entry_size * dr->num_entries), 1715 dr->cookies, dr->ncookies); 1716 dr->base = NULL; 1717 dr->entry_size = 0; 1718 dr->num_entries = 0; 1719 dr->pending = 0; 1720 dr->ncookies = 0; 1721 } 1722 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); 1723 1724 void vnet_port_reset(struct vnet_port *port) 1725 { 1726 del_timer(&port->clean_timer); 1727 sunvnet_port_free_tx_bufs_common(port); 1728 port->rmtu = 0; 1729 port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ 1730 port->tsolen = 0; 1731 } 1732 EXPORT_SYMBOL_GPL(vnet_port_reset); 1733 1734 static int vnet_port_alloc_tx_ring(struct vnet_port *port) 1735 { 1736 struct vio_dring_state *dr; 1737 unsigned long len, elen; 1738 int i, err, ncookies; 1739 void *dring; 1740 1741 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1742 1743 elen = sizeof(struct vio_net_desc) + 1744 sizeof(struct ldc_trans_cookie) * 2; 1745 if (vio_version_after_eq(&port->vio, 1, 7)) 1746 elen += sizeof(struct vio_net_dext); 1747 len = VNET_TX_RING_SIZE * elen; 1748 1749 ncookies = VIO_MAX_RING_COOKIES; 1750 dring = ldc_alloc_exp_dring(port->vio.lp, len, 1751 dr->cookies, &ncookies, 1752 (LDC_MAP_SHADOW | 1753 LDC_MAP_DIRECT | 1754 LDC_MAP_RW)); 1755 if (IS_ERR(dring)) { 1756 err = PTR_ERR(dring); 1757 goto err_out; 1758 } 1759 1760 dr->base = dring; 1761 dr->entry_size = elen; 1762 dr->num_entries = VNET_TX_RING_SIZE; 1763 dr->prod = 0; 1764 dr->cons = 0; 1765 port->start_cons = true; /* need an initial trigger */ 1766 dr->pending = VNET_TX_RING_SIZE; 1767 dr->ncookies = ncookies; 1768 1769 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1770 struct vio_net_desc *d; 1771 1772 d = vio_dring_entry(dr, i); 1773 d->hdr.state = VIO_DESC_FREE; 1774 } 1775 return 0; 1776 1777 err_out: 1778 sunvnet_port_free_tx_bufs_common(port); 1779 1780 return err; 1781 } 1782 1783 #ifdef CONFIG_NET_POLL_CONTROLLER 1784 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp) 1785 { 1786 struct vnet_port *port; 1787 unsigned long flags; 1788 1789 spin_lock_irqsave(&vp->lock, flags); 1790 if (!list_empty(&vp->port_list)) { 1791 port = list_entry(vp->port_list.next, struct vnet_port, list); 1792 napi_schedule(&port->napi); 1793 } 1794 spin_unlock_irqrestore(&vp->lock, flags); 1795 } 1796 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common); 1797 #endif 1798 1799 void sunvnet_port_add_txq_common(struct vnet_port *port) 1800 { 1801 struct vnet *vp = port->vp; 1802 int smallest = 0; 1803 int i; 1804 1805 /* find the first least-used q 1806 * When there are more ldoms than q's, we start to 1807 * double up on ports per queue. 1808 */ 1809 for (i = 0; i < VNET_MAX_TXQS; i++) { 1810 if (vp->q_used[i] == 0) { 1811 smallest = i; 1812 break; 1813 } 1814 if (vp->q_used[i] < vp->q_used[smallest]) 1815 smallest = i; 1816 } 1817 1818 vp->nports++; 1819 vp->q_used[smallest]++; 1820 port->q_index = smallest; 1821 } 1822 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common); 1823 1824 void sunvnet_port_rm_txq_common(struct vnet_port *port) 1825 { 1826 port->vp->nports--; 1827 port->vp->q_used[port->q_index]--; 1828 port->q_index = 0; 1829 } 1830 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common); 1831