1 /* sunvnet.c: Sun LDOM Virtual Network Driver. 2 * 3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 4 * Copyright (C) 2016-2017 Oracle. All rights reserved. 5 */ 6 7 #include <linux/module.h> 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/delay.h> 12 #include <linux/init.h> 13 #include <linux/netdevice.h> 14 #include <linux/ethtool.h> 15 #include <linux/etherdevice.h> 16 #include <linux/mutex.h> 17 #include <linux/highmem.h> 18 #include <linux/if_vlan.h> 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/sunvnet.h> 21 22 #if IS_ENABLED(CONFIG_IPV6) 23 #include <linux/icmpv6.h> 24 #endif 25 26 #include <net/ip.h> 27 #include <net/icmp.h> 28 #include <net/route.h> 29 30 #include <asm/vio.h> 31 #include <asm/ldc.h> 32 33 #include "sunvnet_common.h" 34 35 /* Heuristic for the number of times to exponentially backoff and 36 * retry sending an LDC trigger when EAGAIN is encountered 37 */ 38 #define VNET_MAX_RETRIES 10 39 40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); 41 MODULE_DESCRIPTION("Sun LDOM virtual network support library"); 42 MODULE_LICENSE("GPL"); 43 MODULE_VERSION("1.1"); 44 45 static int __vnet_tx_trigger(struct vnet_port *port, u32 start); 46 47 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) 48 { 49 return vio_dring_avail(dr, VNET_TX_RING_SIZE); 50 } 51 52 static int vnet_handle_unknown(struct vnet_port *port, void *arg) 53 { 54 struct vio_msg_tag *pkt = arg; 55 56 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n", 57 pkt->type, pkt->stype, pkt->stype_env, pkt->sid); 58 pr_err("Resetting connection\n"); 59 60 ldc_disconnect(port->vio.lp); 61 62 return -ECONNRESET; 63 } 64 65 static int vnet_port_alloc_tx_ring(struct vnet_port *port); 66 67 int sunvnet_send_attr_common(struct vio_driver_state *vio) 68 { 69 struct vnet_port *port = to_vnet_port(vio); 70 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 71 struct vio_net_attr_info pkt; 72 int framelen = ETH_FRAME_LEN; 73 int i, err; 74 75 err = vnet_port_alloc_tx_ring(to_vnet_port(vio)); 76 if (err) 77 return err; 78 79 memset(&pkt, 0, sizeof(pkt)); 80 pkt.tag.type = VIO_TYPE_CTRL; 81 pkt.tag.stype = VIO_SUBTYPE_INFO; 82 pkt.tag.stype_env = VIO_ATTR_INFO; 83 pkt.tag.sid = vio_send_sid(vio); 84 if (vio_version_before(vio, 1, 2)) 85 pkt.xfer_mode = VIO_DRING_MODE; 86 else 87 pkt.xfer_mode = VIO_NEW_DRING_MODE; 88 pkt.addr_type = VNET_ADDR_ETHERMAC; 89 pkt.ack_freq = 0; 90 for (i = 0; i < 6; i++) 91 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); 92 if (vio_version_after(vio, 1, 3)) { 93 if (port->rmtu) { 94 port->rmtu = min(VNET_MAXPACKET, port->rmtu); 95 pkt.mtu = port->rmtu; 96 } else { 97 port->rmtu = VNET_MAXPACKET; 98 pkt.mtu = port->rmtu; 99 } 100 if (vio_version_after_eq(vio, 1, 6)) 101 pkt.options = VIO_TX_DRING; 102 } else if (vio_version_before(vio, 1, 3)) { 103 pkt.mtu = framelen; 104 } else { /* v1.3 */ 105 pkt.mtu = framelen + VLAN_HLEN; 106 } 107 108 pkt.cflags = 0; 109 if (vio_version_after_eq(vio, 1, 7) && port->tso) { 110 pkt.cflags |= VNET_LSO_IPV4_CAPAB; 111 if (!port->tsolen) 112 port->tsolen = VNET_MAXTSO; 113 pkt.ipv4_lso_maxlen = port->tsolen; 114 } 115 116 pkt.plnk_updt = PHYSLINK_UPDATE_NONE; 117 118 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 119 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 120 "cflags[0x%04x] lso_max[%u]\n", 121 pkt.xfer_mode, pkt.addr_type, 122 (unsigned long long)pkt.addr, 123 pkt.ack_freq, pkt.plnk_updt, pkt.options, 124 (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); 125 126 return vio_ldc_send(vio, &pkt, sizeof(pkt)); 127 } 128 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common); 129 130 static int handle_attr_info(struct vio_driver_state *vio, 131 struct vio_net_attr_info *pkt) 132 { 133 struct vnet_port *port = to_vnet_port(vio); 134 u64 localmtu; 135 u8 xfer_mode; 136 137 viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 138 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 139 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 140 pkt->xfer_mode, pkt->addr_type, 141 (unsigned long long)pkt->addr, 142 pkt->ack_freq, pkt->plnk_updt, pkt->options, 143 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 144 pkt->ipv4_lso_maxlen); 145 146 pkt->tag.sid = vio_send_sid(vio); 147 148 xfer_mode = pkt->xfer_mode; 149 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ 150 if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) 151 xfer_mode = VIO_NEW_DRING_MODE; 152 153 /* MTU negotiation: 154 * < v1.3 - ETH_FRAME_LEN exactly 155 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change 156 * pkt->mtu for ACK 157 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly 158 */ 159 if (vio_version_before(vio, 1, 3)) { 160 localmtu = ETH_FRAME_LEN; 161 } else if (vio_version_after(vio, 1, 3)) { 162 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; 163 localmtu = min(pkt->mtu, localmtu); 164 pkt->mtu = localmtu; 165 } else { /* v1.3 */ 166 localmtu = ETH_FRAME_LEN + VLAN_HLEN; 167 } 168 port->rmtu = localmtu; 169 170 /* LSO negotiation */ 171 if (vio_version_after_eq(vio, 1, 7)) 172 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB); 173 else 174 port->tso = false; 175 if (port->tso) { 176 if (!port->tsolen) 177 port->tsolen = VNET_MAXTSO; 178 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen); 179 if (port->tsolen < VNET_MINTSO) { 180 port->tso = false; 181 port->tsolen = 0; 182 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 183 } 184 pkt->ipv4_lso_maxlen = port->tsolen; 185 } else { 186 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 187 pkt->ipv4_lso_maxlen = 0; 188 port->tsolen = 0; 189 } 190 191 /* for version >= 1.6, ACK packet mode we support */ 192 if (vio_version_after_eq(vio, 1, 6)) { 193 pkt->xfer_mode = VIO_NEW_DRING_MODE; 194 pkt->options = VIO_TX_DRING; 195 } 196 197 if (!(xfer_mode | VIO_NEW_DRING_MODE) || 198 pkt->addr_type != VNET_ADDR_ETHERMAC || 199 pkt->mtu != localmtu) { 200 viodbg(HS, "SEND NET ATTR NACK\n"); 201 202 pkt->tag.stype = VIO_SUBTYPE_NACK; 203 204 (void)vio_ldc_send(vio, pkt, sizeof(*pkt)); 205 206 return -ECONNRESET; 207 } 208 209 viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " 210 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " 211 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 212 pkt->xfer_mode, pkt->addr_type, 213 (unsigned long long)pkt->addr, 214 pkt->ack_freq, pkt->plnk_updt, pkt->options, 215 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 216 pkt->ipv4_lso_maxlen); 217 218 pkt->tag.stype = VIO_SUBTYPE_ACK; 219 220 return vio_ldc_send(vio, pkt, sizeof(*pkt)); 221 } 222 223 static int handle_attr_ack(struct vio_driver_state *vio, 224 struct vio_net_attr_info *pkt) 225 { 226 viodbg(HS, "GOT NET ATTR ACK\n"); 227 228 return 0; 229 } 230 231 static int handle_attr_nack(struct vio_driver_state *vio, 232 struct vio_net_attr_info *pkt) 233 { 234 viodbg(HS, "GOT NET ATTR NACK\n"); 235 236 return -ECONNRESET; 237 } 238 239 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg) 240 { 241 struct vio_net_attr_info *pkt = arg; 242 243 switch (pkt->tag.stype) { 244 case VIO_SUBTYPE_INFO: 245 return handle_attr_info(vio, pkt); 246 247 case VIO_SUBTYPE_ACK: 248 return handle_attr_ack(vio, pkt); 249 250 case VIO_SUBTYPE_NACK: 251 return handle_attr_nack(vio, pkt); 252 253 default: 254 return -ECONNRESET; 255 } 256 } 257 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common); 258 259 void sunvnet_handshake_complete_common(struct vio_driver_state *vio) 260 { 261 struct vio_dring_state *dr; 262 263 dr = &vio->drings[VIO_DRIVER_RX_RING]; 264 dr->rcv_nxt = 1; 265 dr->snd_nxt = 1; 266 267 dr = &vio->drings[VIO_DRIVER_TX_RING]; 268 dr->rcv_nxt = 1; 269 dr->snd_nxt = 1; 270 } 271 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common); 272 273 /* The hypervisor interface that implements copying to/from imported 274 * memory from another domain requires that copies are done to 8-byte 275 * aligned buffers, and that the lengths of such copies are also 8-byte 276 * multiples. 277 * 278 * So we align skb->data to an 8-byte multiple and pad-out the data 279 * area so we can round the copy length up to the next multiple of 280 * 8 for the copy. 281 * 282 * The transmitter puts the actual start of the packet 6 bytes into 283 * the buffer it sends over, so that the IP headers after the ethernet 284 * header are aligned properly. These 6 bytes are not in the descriptor 285 * length, they are simply implied. This offset is represented using 286 * the VNET_PACKET_SKIP macro. 287 */ 288 static struct sk_buff *alloc_and_align_skb(struct net_device *dev, 289 unsigned int len) 290 { 291 struct sk_buff *skb; 292 unsigned long addr, off; 293 294 skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8); 295 if (unlikely(!skb)) 296 return NULL; 297 298 addr = (unsigned long)skb->data; 299 off = ((addr + 7UL) & ~7UL) - addr; 300 if (off) 301 skb_reserve(skb, off); 302 303 return skb; 304 } 305 306 static inline void vnet_fullcsum(struct sk_buff *skb) 307 { 308 struct iphdr *iph = ip_hdr(skb); 309 int offset = skb_transport_offset(skb); 310 311 if (skb->protocol != htons(ETH_P_IP)) 312 return; 313 if (iph->protocol != IPPROTO_TCP && 314 iph->protocol != IPPROTO_UDP) 315 return; 316 skb->ip_summed = CHECKSUM_NONE; 317 skb->csum_level = 1; 318 skb->csum = 0; 319 if (iph->protocol == IPPROTO_TCP) { 320 struct tcphdr *ptcp = tcp_hdr(skb); 321 322 ptcp->check = 0; 323 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 324 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 325 skb->len - offset, IPPROTO_TCP, 326 skb->csum); 327 } else if (iph->protocol == IPPROTO_UDP) { 328 struct udphdr *pudp = udp_hdr(skb); 329 330 pudp->check = 0; 331 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 332 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 333 skb->len - offset, IPPROTO_UDP, 334 skb->csum); 335 } 336 } 337 338 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) 339 { 340 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 341 unsigned int len = desc->size; 342 unsigned int copy_len; 343 struct sk_buff *skb; 344 int maxlen; 345 int err; 346 347 err = -EMSGSIZE; 348 if (port->tso && port->tsolen > port->rmtu) 349 maxlen = port->tsolen; 350 else 351 maxlen = port->rmtu; 352 if (unlikely(len < ETH_ZLEN || len > maxlen)) { 353 dev->stats.rx_length_errors++; 354 goto out_dropped; 355 } 356 357 skb = alloc_and_align_skb(dev, len); 358 err = -ENOMEM; 359 if (unlikely(!skb)) { 360 dev->stats.rx_missed_errors++; 361 goto out_dropped; 362 } 363 364 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; 365 skb_put(skb, copy_len); 366 err = ldc_copy(port->vio.lp, LDC_COPY_IN, 367 skb->data, copy_len, 0, 368 desc->cookies, desc->ncookies); 369 if (unlikely(err < 0)) { 370 dev->stats.rx_frame_errors++; 371 goto out_free_skb; 372 } 373 374 skb_pull(skb, VNET_PACKET_SKIP); 375 skb_trim(skb, len); 376 skb->protocol = eth_type_trans(skb, dev); 377 378 if (vio_version_after_eq(&port->vio, 1, 8)) { 379 struct vio_net_dext *dext = vio_net_ext(desc); 380 381 skb_reset_network_header(skb); 382 383 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) { 384 if (skb->protocol == ETH_P_IP) { 385 struct iphdr *iph = ip_hdr(skb); 386 387 iph->check = 0; 388 ip_send_check(iph); 389 } 390 } 391 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) && 392 skb->ip_summed == CHECKSUM_NONE) { 393 if (skb->protocol == htons(ETH_P_IP)) { 394 struct iphdr *iph = ip_hdr(skb); 395 int ihl = iph->ihl * 4; 396 397 skb_reset_transport_header(skb); 398 skb_set_transport_header(skb, ihl); 399 vnet_fullcsum(skb); 400 } 401 } 402 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { 403 skb->ip_summed = CHECKSUM_PARTIAL; 404 skb->csum_level = 0; 405 if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK) 406 skb->csum_level = 1; 407 } 408 } 409 410 skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; 411 412 if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest))) 413 dev->stats.multicast++; 414 dev->stats.rx_packets++; 415 dev->stats.rx_bytes += len; 416 port->stats.rx_packets++; 417 port->stats.rx_bytes += len; 418 napi_gro_receive(&port->napi, skb); 419 return 0; 420 421 out_free_skb: 422 kfree_skb(skb); 423 424 out_dropped: 425 dev->stats.rx_dropped++; 426 return err; 427 } 428 429 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, 430 u32 start, u32 end, u8 vio_dring_state) 431 { 432 struct vio_dring_data hdr = { 433 .tag = { 434 .type = VIO_TYPE_DATA, 435 .stype = VIO_SUBTYPE_ACK, 436 .stype_env = VIO_DRING_DATA, 437 .sid = vio_send_sid(&port->vio), 438 }, 439 .dring_ident = dr->ident, 440 .start_idx = start, 441 .end_idx = end, 442 .state = vio_dring_state, 443 }; 444 int err, delay; 445 int retries = 0; 446 447 hdr.seq = dr->snd_nxt; 448 delay = 1; 449 do { 450 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 451 if (err > 0) { 452 dr->snd_nxt++; 453 break; 454 } 455 udelay(delay); 456 if ((delay <<= 1) > 128) 457 delay = 128; 458 if (retries++ > VNET_MAX_RETRIES) { 459 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", 460 port->raddr[0], port->raddr[1], 461 port->raddr[2], port->raddr[3], 462 port->raddr[4], port->raddr[5]); 463 break; 464 } 465 } while (err == -EAGAIN); 466 467 if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) { 468 port->stop_rx_idx = end; 469 port->stop_rx = true; 470 } else { 471 port->stop_rx_idx = 0; 472 port->stop_rx = false; 473 } 474 475 return err; 476 } 477 478 static struct vio_net_desc *get_rx_desc(struct vnet_port *port, 479 struct vio_dring_state *dr, 480 u32 index) 481 { 482 struct vio_net_desc *desc = port->vio.desc_buf; 483 int err; 484 485 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, 486 (index * dr->entry_size), 487 dr->cookies, dr->ncookies); 488 if (err < 0) 489 return ERR_PTR(err); 490 491 return desc; 492 } 493 494 static int put_rx_desc(struct vnet_port *port, 495 struct vio_dring_state *dr, 496 struct vio_net_desc *desc, 497 u32 index) 498 { 499 int err; 500 501 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, 502 (index * dr->entry_size), 503 dr->cookies, dr->ncookies); 504 if (err < 0) 505 return err; 506 507 return 0; 508 } 509 510 static int vnet_walk_rx_one(struct vnet_port *port, 511 struct vio_dring_state *dr, 512 u32 index, int *needs_ack) 513 { 514 struct vio_net_desc *desc = get_rx_desc(port, dr, index); 515 struct vio_driver_state *vio = &port->vio; 516 int err; 517 518 BUG_ON(!desc); 519 if (IS_ERR(desc)) 520 return PTR_ERR(desc); 521 522 if (desc->hdr.state != VIO_DESC_READY) 523 return 1; 524 525 dma_rmb(); 526 527 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", 528 desc->hdr.state, desc->hdr.ack, 529 desc->size, desc->ncookies, 530 desc->cookies[0].cookie_addr, 531 desc->cookies[0].cookie_size); 532 533 err = vnet_rx_one(port, desc); 534 if (err == -ECONNRESET) 535 return err; 536 trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid, 537 index, desc->hdr.ack); 538 desc->hdr.state = VIO_DESC_DONE; 539 err = put_rx_desc(port, dr, desc, index); 540 if (err < 0) 541 return err; 542 *needs_ack = desc->hdr.ack; 543 return 0; 544 } 545 546 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, 547 u32 start, u32 end, int *npkts, int budget) 548 { 549 struct vio_driver_state *vio = &port->vio; 550 int ack_start = -1, ack_end = -1; 551 bool send_ack = true; 552 553 end = (end == (u32)-1) ? vio_dring_prev(dr, start) 554 : vio_dring_next(dr, end); 555 556 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); 557 558 while (start != end) { 559 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); 560 561 if (err == -ECONNRESET) 562 return err; 563 if (err != 0) 564 break; 565 (*npkts)++; 566 if (ack_start == -1) 567 ack_start = start; 568 ack_end = start; 569 start = vio_dring_next(dr, start); 570 if (ack && start != end) { 571 err = vnet_send_ack(port, dr, ack_start, ack_end, 572 VIO_DRING_ACTIVE); 573 if (err == -ECONNRESET) 574 return err; 575 ack_start = -1; 576 } 577 if ((*npkts) >= budget) { 578 send_ack = false; 579 break; 580 } 581 } 582 if (unlikely(ack_start == -1)) { 583 ack_end = vio_dring_prev(dr, start); 584 ack_start = ack_end; 585 } 586 if (send_ack) { 587 port->napi_resume = false; 588 trace_vnet_tx_send_stopped_ack(port->vio._local_sid, 589 port->vio._peer_sid, 590 ack_end, *npkts); 591 return vnet_send_ack(port, dr, ack_start, ack_end, 592 VIO_DRING_STOPPED); 593 } else { 594 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid, 595 port->vio._peer_sid, 596 ack_end, *npkts); 597 port->napi_resume = true; 598 port->napi_stop_idx = ack_end; 599 return 1; 600 } 601 } 602 603 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts, 604 int budget) 605 { 606 struct vio_dring_data *pkt = msgbuf; 607 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; 608 struct vio_driver_state *vio = &port->vio; 609 610 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n", 611 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); 612 613 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 614 return 0; 615 if (unlikely(pkt->seq != dr->rcv_nxt)) { 616 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n", 617 pkt->seq, dr->rcv_nxt); 618 return 0; 619 } 620 621 if (!port->napi_resume) 622 dr->rcv_nxt++; 623 624 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ 625 626 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx, 627 npkts, budget); 628 } 629 630 static int idx_is_pending(struct vio_dring_state *dr, u32 end) 631 { 632 u32 idx = dr->cons; 633 int found = 0; 634 635 while (idx != dr->prod) { 636 if (idx == end) { 637 found = 1; 638 break; 639 } 640 idx = vio_dring_next(dr, idx); 641 } 642 return found; 643 } 644 645 static int vnet_ack(struct vnet_port *port, void *msgbuf) 646 { 647 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 648 struct vio_dring_data *pkt = msgbuf; 649 struct net_device *dev; 650 u32 end; 651 struct vio_net_desc *desc; 652 struct netdev_queue *txq; 653 654 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 655 return 0; 656 657 end = pkt->end_idx; 658 dev = VNET_PORT_TO_NET_DEVICE(port); 659 netif_tx_lock(dev); 660 if (unlikely(!idx_is_pending(dr, end))) { 661 netif_tx_unlock(dev); 662 return 0; 663 } 664 665 /* sync for race conditions with vnet_start_xmit() and tell xmit it 666 * is time to send a trigger. 667 */ 668 trace_vnet_rx_stopped_ack(port->vio._local_sid, 669 port->vio._peer_sid, end); 670 dr->cons = vio_dring_next(dr, end); 671 desc = vio_dring_entry(dr, dr->cons); 672 if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) { 673 /* vnet_start_xmit() just populated this dring but missed 674 * sending the "start" LDC message to the consumer. 675 * Send a "start" trigger on its behalf. 676 */ 677 if (__vnet_tx_trigger(port, dr->cons) > 0) 678 port->start_cons = false; 679 else 680 port->start_cons = true; 681 } else { 682 port->start_cons = true; 683 } 684 netif_tx_unlock(dev); 685 686 txq = netdev_get_tx_queue(dev, port->q_index); 687 if (unlikely(netif_tx_queue_stopped(txq) && 688 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) 689 return 1; 690 691 return 0; 692 } 693 694 static int vnet_nack(struct vnet_port *port, void *msgbuf) 695 { 696 /* XXX just reset or similar XXX */ 697 return 0; 698 } 699 700 static int handle_mcast(struct vnet_port *port, void *msgbuf) 701 { 702 struct vio_net_mcast_info *pkt = msgbuf; 703 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 704 705 if (pkt->tag.stype != VIO_SUBTYPE_ACK) 706 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n", 707 dev->name, 708 pkt->tag.type, 709 pkt->tag.stype, 710 pkt->tag.stype_env, 711 pkt->tag.sid); 712 713 return 0; 714 } 715 716 /* If the queue is stopped, wake it up so that we'll 717 * send out another START message at the next TX. 718 */ 719 static void maybe_tx_wakeup(struct vnet_port *port) 720 { 721 struct netdev_queue *txq; 722 723 txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 724 port->q_index); 725 __netif_tx_lock(txq, smp_processor_id()); 726 if (likely(netif_tx_queue_stopped(txq))) 727 netif_tx_wake_queue(txq); 728 __netif_tx_unlock(txq); 729 } 730 731 bool sunvnet_port_is_up_common(struct vnet_port *vnet) 732 { 733 struct vio_driver_state *vio = &vnet->vio; 734 735 return !!(vio->hs_state & VIO_HS_COMPLETE); 736 } 737 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common); 738 739 static int vnet_event_napi(struct vnet_port *port, int budget) 740 { 741 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 742 struct vio_driver_state *vio = &port->vio; 743 int tx_wakeup, err; 744 int npkts = 0; 745 746 /* we don't expect any other bits */ 747 BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY | 748 LDC_EVENT_RESET | 749 LDC_EVENT_UP)); 750 751 /* RESET takes precedent over any other event */ 752 if (port->rx_event & LDC_EVENT_RESET) { 753 /* a link went down */ 754 755 if (port->vsw == 1) { 756 netif_tx_stop_all_queues(dev); 757 netif_carrier_off(dev); 758 } 759 760 vio_link_state_change(vio, LDC_EVENT_RESET); 761 vnet_port_reset(port); 762 vio_port_up(vio); 763 764 /* If the device is running but its tx queue was 765 * stopped (due to flow control), restart it. 766 * This is necessary since vnet_port_reset() 767 * clears the tx drings and thus we may never get 768 * back a VIO_TYPE_DATA ACK packet - which is 769 * the normal mechanism to restart the tx queue. 770 */ 771 if (netif_running(dev)) 772 maybe_tx_wakeup(port); 773 774 port->rx_event = 0; 775 port->stats.event_reset++; 776 return 0; 777 } 778 779 if (port->rx_event & LDC_EVENT_UP) { 780 /* a link came up */ 781 782 if (port->vsw == 1) { 783 netif_carrier_on(port->dev); 784 netif_tx_start_all_queues(port->dev); 785 } 786 787 vio_link_state_change(vio, LDC_EVENT_UP); 788 port->rx_event = 0; 789 port->stats.event_up++; 790 return 0; 791 } 792 793 err = 0; 794 tx_wakeup = 0; 795 while (1) { 796 union { 797 struct vio_msg_tag tag; 798 u64 raw[8]; 799 } msgbuf; 800 801 if (port->napi_resume) { 802 struct vio_dring_data *pkt = 803 (struct vio_dring_data *)&msgbuf; 804 struct vio_dring_state *dr = 805 &port->vio.drings[VIO_DRIVER_RX_RING]; 806 807 pkt->tag.type = VIO_TYPE_DATA; 808 pkt->tag.stype = VIO_SUBTYPE_INFO; 809 pkt->tag.stype_env = VIO_DRING_DATA; 810 pkt->seq = dr->rcv_nxt; 811 pkt->start_idx = vio_dring_next(dr, 812 port->napi_stop_idx); 813 pkt->end_idx = -1; 814 } else { 815 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); 816 if (unlikely(err < 0)) { 817 if (err == -ECONNRESET) 818 vio_conn_reset(vio); 819 break; 820 } 821 if (err == 0) 822 break; 823 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", 824 msgbuf.tag.type, 825 msgbuf.tag.stype, 826 msgbuf.tag.stype_env, 827 msgbuf.tag.sid); 828 err = vio_validate_sid(vio, &msgbuf.tag); 829 if (err < 0) 830 break; 831 } 832 833 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { 834 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { 835 if (!sunvnet_port_is_up_common(port)) { 836 /* failures like handshake_failure() 837 * may have cleaned up dring, but 838 * NAPI polling may bring us here. 839 */ 840 err = -ECONNRESET; 841 break; 842 } 843 err = vnet_rx(port, &msgbuf, &npkts, budget); 844 if (npkts >= budget) 845 break; 846 if (npkts == 0) 847 break; 848 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { 849 err = vnet_ack(port, &msgbuf); 850 if (err > 0) 851 tx_wakeup |= err; 852 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { 853 err = vnet_nack(port, &msgbuf); 854 } 855 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { 856 if (msgbuf.tag.stype_env == VNET_MCAST_INFO) 857 err = handle_mcast(port, &msgbuf); 858 else 859 err = vio_control_pkt_engine(vio, &msgbuf); 860 if (err) 861 break; 862 } else { 863 err = vnet_handle_unknown(port, &msgbuf); 864 } 865 if (err == -ECONNRESET) 866 break; 867 } 868 if (unlikely(tx_wakeup && err != -ECONNRESET)) 869 maybe_tx_wakeup(port); 870 return npkts; 871 } 872 873 int sunvnet_poll_common(struct napi_struct *napi, int budget) 874 { 875 struct vnet_port *port = container_of(napi, struct vnet_port, napi); 876 struct vio_driver_state *vio = &port->vio; 877 int processed = vnet_event_napi(port, budget); 878 879 if (processed < budget) { 880 napi_complete_done(napi, processed); 881 port->rx_event &= ~LDC_EVENT_DATA_READY; 882 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED); 883 } 884 return processed; 885 } 886 EXPORT_SYMBOL_GPL(sunvnet_poll_common); 887 888 void sunvnet_event_common(void *arg, int event) 889 { 890 struct vnet_port *port = arg; 891 struct vio_driver_state *vio = &port->vio; 892 893 port->rx_event |= event; 894 vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED); 895 napi_schedule(&port->napi); 896 } 897 EXPORT_SYMBOL_GPL(sunvnet_event_common); 898 899 static int __vnet_tx_trigger(struct vnet_port *port, u32 start) 900 { 901 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 902 struct vio_dring_data hdr = { 903 .tag = { 904 .type = VIO_TYPE_DATA, 905 .stype = VIO_SUBTYPE_INFO, 906 .stype_env = VIO_DRING_DATA, 907 .sid = vio_send_sid(&port->vio), 908 }, 909 .dring_ident = dr->ident, 910 .start_idx = start, 911 .end_idx = (u32)-1, 912 }; 913 int err, delay; 914 int retries = 0; 915 916 if (port->stop_rx) { 917 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid, 918 port->vio._peer_sid, 919 port->stop_rx_idx, -1); 920 err = vnet_send_ack(port, 921 &port->vio.drings[VIO_DRIVER_RX_RING], 922 port->stop_rx_idx, -1, 923 VIO_DRING_STOPPED); 924 if (err <= 0) 925 return err; 926 } 927 928 hdr.seq = dr->snd_nxt; 929 delay = 1; 930 do { 931 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 932 if (err > 0) { 933 dr->snd_nxt++; 934 break; 935 } 936 udelay(delay); 937 if ((delay <<= 1) > 128) 938 delay = 128; 939 if (retries++ > VNET_MAX_RETRIES) 940 break; 941 } while (err == -EAGAIN); 942 trace_vnet_tx_trigger(port->vio._local_sid, 943 port->vio._peer_sid, start, err); 944 945 return err; 946 } 947 948 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port, 949 unsigned *pending) 950 { 951 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 952 struct sk_buff *skb = NULL; 953 int i, txi; 954 955 *pending = 0; 956 957 txi = dr->prod; 958 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 959 struct vio_net_desc *d; 960 961 --txi; 962 if (txi < 0) 963 txi = VNET_TX_RING_SIZE - 1; 964 965 d = vio_dring_entry(dr, txi); 966 967 if (d->hdr.state == VIO_DESC_READY) { 968 (*pending)++; 969 continue; 970 } 971 if (port->tx_bufs[txi].skb) { 972 if (d->hdr.state != VIO_DESC_DONE) 973 pr_notice("invalid ring buffer state %d\n", 974 d->hdr.state); 975 BUG_ON(port->tx_bufs[txi].skb->next); 976 977 port->tx_bufs[txi].skb->next = skb; 978 skb = port->tx_bufs[txi].skb; 979 port->tx_bufs[txi].skb = NULL; 980 981 ldc_unmap(port->vio.lp, 982 port->tx_bufs[txi].cookies, 983 port->tx_bufs[txi].ncookies); 984 } else if (d->hdr.state == VIO_DESC_FREE) { 985 break; 986 } 987 d->hdr.state = VIO_DESC_FREE; 988 } 989 return skb; 990 } 991 992 static inline void vnet_free_skbs(struct sk_buff *skb) 993 { 994 struct sk_buff *next; 995 996 while (skb) { 997 next = skb->next; 998 skb->next = NULL; 999 dev_kfree_skb(skb); 1000 skb = next; 1001 } 1002 } 1003 1004 void sunvnet_clean_timer_expire_common(unsigned long port0) 1005 { 1006 struct vnet_port *port = (struct vnet_port *)port0; 1007 struct sk_buff *freeskbs; 1008 unsigned pending; 1009 1010 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port)); 1011 freeskbs = vnet_clean_tx_ring(port, &pending); 1012 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port)); 1013 1014 vnet_free_skbs(freeskbs); 1015 1016 if (pending) 1017 (void)mod_timer(&port->clean_timer, 1018 jiffies + VNET_CLEAN_TIMEOUT); 1019 else 1020 del_timer(&port->clean_timer); 1021 } 1022 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common); 1023 1024 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, 1025 struct ldc_trans_cookie *cookies, int ncookies, 1026 unsigned int map_perm) 1027 { 1028 int i, nc, err, blen; 1029 1030 /* header */ 1031 blen = skb_headlen(skb); 1032 if (blen < ETH_ZLEN) 1033 blen = ETH_ZLEN; 1034 blen += VNET_PACKET_SKIP; 1035 blen += 8 - (blen & 7); 1036 1037 err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies, 1038 ncookies, map_perm); 1039 if (err < 0) 1040 return err; 1041 nc = err; 1042 1043 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1044 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1045 u8 *vaddr; 1046 1047 if (nc < ncookies) { 1048 vaddr = kmap_atomic(skb_frag_page(f)); 1049 blen = skb_frag_size(f); 1050 blen += 8 - (blen & 7); 1051 err = ldc_map_single(lp, vaddr + f->page_offset, 1052 blen, cookies + nc, ncookies - nc, 1053 map_perm); 1054 kunmap_atomic(vaddr); 1055 } else { 1056 err = -EMSGSIZE; 1057 } 1058 1059 if (err < 0) { 1060 ldc_unmap(lp, cookies, nc); 1061 return err; 1062 } 1063 nc += err; 1064 } 1065 return nc; 1066 } 1067 1068 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) 1069 { 1070 struct sk_buff *nskb; 1071 int i, len, pad, docopy; 1072 1073 len = skb->len; 1074 pad = 0; 1075 if (len < ETH_ZLEN) { 1076 pad += ETH_ZLEN - skb->len; 1077 len += pad; 1078 } 1079 len += VNET_PACKET_SKIP; 1080 pad += 8 - (len & 7); 1081 1082 /* make sure we have enough cookies and alignment in every frag */ 1083 docopy = skb_shinfo(skb)->nr_frags >= ncookies; 1084 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1085 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1086 1087 docopy |= f->page_offset & 7; 1088 } 1089 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || 1090 skb_tailroom(skb) < pad || 1091 skb_headroom(skb) < VNET_PACKET_SKIP || docopy) { 1092 int start = 0, offset; 1093 __wsum csum; 1094 1095 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN; 1096 nskb = alloc_and_align_skb(skb->dev, len); 1097 if (!nskb) { 1098 dev_kfree_skb(skb); 1099 return NULL; 1100 } 1101 skb_reserve(nskb, VNET_PACKET_SKIP); 1102 1103 nskb->protocol = skb->protocol; 1104 offset = skb_mac_header(skb) - skb->data; 1105 skb_set_mac_header(nskb, offset); 1106 offset = skb_network_header(skb) - skb->data; 1107 skb_set_network_header(nskb, offset); 1108 offset = skb_transport_header(skb) - skb->data; 1109 skb_set_transport_header(nskb, offset); 1110 1111 offset = 0; 1112 nskb->csum_offset = skb->csum_offset; 1113 nskb->ip_summed = skb->ip_summed; 1114 1115 if (skb->ip_summed == CHECKSUM_PARTIAL) 1116 start = skb_checksum_start_offset(skb); 1117 if (start) { 1118 struct iphdr *iph = ip_hdr(nskb); 1119 int offset = start + nskb->csum_offset; 1120 1121 if (skb_copy_bits(skb, 0, nskb->data, start)) { 1122 dev_kfree_skb(nskb); 1123 dev_kfree_skb(skb); 1124 return NULL; 1125 } 1126 *(__sum16 *)(skb->data + offset) = 0; 1127 csum = skb_copy_and_csum_bits(skb, start, 1128 nskb->data + start, 1129 skb->len - start, 0); 1130 if (iph->protocol == IPPROTO_TCP || 1131 iph->protocol == IPPROTO_UDP) { 1132 csum = csum_tcpudp_magic(iph->saddr, iph->daddr, 1133 skb->len - start, 1134 iph->protocol, csum); 1135 } 1136 *(__sum16 *)(nskb->data + offset) = csum; 1137 1138 nskb->ip_summed = CHECKSUM_NONE; 1139 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { 1140 dev_kfree_skb(nskb); 1141 dev_kfree_skb(skb); 1142 return NULL; 1143 } 1144 (void)skb_put(nskb, skb->len); 1145 if (skb_is_gso(skb)) { 1146 skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; 1147 skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; 1148 } 1149 nskb->queue_mapping = skb->queue_mapping; 1150 dev_kfree_skb(skb); 1151 skb = nskb; 1152 } 1153 return skb; 1154 } 1155 1156 static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, 1157 struct vnet_port *(*vnet_tx_port) 1158 (struct sk_buff *, struct net_device *)) 1159 { 1160 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 1161 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1162 struct sk_buff *segs; 1163 int maclen, datalen; 1164 int status; 1165 int gso_size, gso_type, gso_segs; 1166 int hlen = skb_transport_header(skb) - skb_mac_header(skb); 1167 int proto = IPPROTO_IP; 1168 1169 if (skb->protocol == htons(ETH_P_IP)) 1170 proto = ip_hdr(skb)->protocol; 1171 else if (skb->protocol == htons(ETH_P_IPV6)) 1172 proto = ipv6_hdr(skb)->nexthdr; 1173 1174 if (proto == IPPROTO_TCP) { 1175 hlen += tcp_hdr(skb)->doff * 4; 1176 } else if (proto == IPPROTO_UDP) { 1177 hlen += sizeof(struct udphdr); 1178 } else { 1179 pr_err("vnet_handle_offloads GSO with unknown transport " 1180 "protocol %d tproto %d\n", skb->protocol, proto); 1181 hlen = 128; /* XXX */ 1182 } 1183 datalen = port->tsolen - hlen; 1184 1185 gso_size = skb_shinfo(skb)->gso_size; 1186 gso_type = skb_shinfo(skb)->gso_type; 1187 gso_segs = skb_shinfo(skb)->gso_segs; 1188 1189 if (port->tso && gso_size < datalen) 1190 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen); 1191 1192 if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) { 1193 struct netdev_queue *txq; 1194 1195 txq = netdev_get_tx_queue(dev, port->q_index); 1196 netif_tx_stop_queue(txq); 1197 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs) 1198 return NETDEV_TX_BUSY; 1199 netif_tx_wake_queue(txq); 1200 } 1201 1202 maclen = skb_network_header(skb) - skb_mac_header(skb); 1203 skb_pull(skb, maclen); 1204 1205 if (port->tso && gso_size < datalen) { 1206 if (skb_unclone(skb, GFP_ATOMIC)) 1207 goto out_dropped; 1208 1209 /* segment to TSO size */ 1210 skb_shinfo(skb)->gso_size = datalen; 1211 skb_shinfo(skb)->gso_segs = gso_segs; 1212 } 1213 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); 1214 if (IS_ERR(segs)) 1215 goto out_dropped; 1216 1217 skb_push(skb, maclen); 1218 skb_reset_mac_header(skb); 1219 1220 status = 0; 1221 while (segs) { 1222 struct sk_buff *curr = segs; 1223 1224 segs = segs->next; 1225 curr->next = NULL; 1226 if (port->tso && curr->len > dev->mtu) { 1227 skb_shinfo(curr)->gso_size = gso_size; 1228 skb_shinfo(curr)->gso_type = gso_type; 1229 skb_shinfo(curr)->gso_segs = 1230 DIV_ROUND_UP(curr->len - hlen, gso_size); 1231 } else { 1232 skb_shinfo(curr)->gso_size = 0; 1233 } 1234 1235 skb_push(curr, maclen); 1236 skb_reset_mac_header(curr); 1237 memcpy(skb_mac_header(curr), skb_mac_header(skb), 1238 maclen); 1239 curr->csum_start = skb_transport_header(curr) - curr->head; 1240 if (ip_hdr(curr)->protocol == IPPROTO_TCP) 1241 curr->csum_offset = offsetof(struct tcphdr, check); 1242 else if (ip_hdr(curr)->protocol == IPPROTO_UDP) 1243 curr->csum_offset = offsetof(struct udphdr, check); 1244 1245 if (!(status & NETDEV_TX_MASK)) 1246 status = sunvnet_start_xmit_common(curr, dev, 1247 vnet_tx_port); 1248 if (status & NETDEV_TX_MASK) 1249 dev_kfree_skb_any(curr); 1250 } 1251 1252 if (!(status & NETDEV_TX_MASK)) 1253 dev_kfree_skb_any(skb); 1254 return status; 1255 out_dropped: 1256 dev->stats.tx_dropped++; 1257 dev_kfree_skb_any(skb); 1258 return NETDEV_TX_OK; 1259 } 1260 1261 int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, 1262 struct vnet_port *(*vnet_tx_port) 1263 (struct sk_buff *, struct net_device *)) 1264 { 1265 struct vnet_port *port = NULL; 1266 struct vio_dring_state *dr; 1267 struct vio_net_desc *d; 1268 unsigned int len; 1269 struct sk_buff *freeskbs = NULL; 1270 int i, err, txi; 1271 unsigned pending = 0; 1272 struct netdev_queue *txq; 1273 1274 rcu_read_lock(); 1275 port = vnet_tx_port(skb, dev); 1276 if (unlikely(!port)) 1277 goto out_dropped; 1278 1279 if (skb_is_gso(skb) && skb->len > port->tsolen) { 1280 err = vnet_handle_offloads(port, skb, vnet_tx_port); 1281 rcu_read_unlock(); 1282 return err; 1283 } 1284 1285 if (!skb_is_gso(skb) && skb->len > port->rmtu) { 1286 unsigned long localmtu = port->rmtu - ETH_HLEN; 1287 1288 if (vio_version_after_eq(&port->vio, 1, 3)) 1289 localmtu -= VLAN_HLEN; 1290 1291 if (skb->protocol == htons(ETH_P_IP)) { 1292 struct flowi4 fl4; 1293 struct rtable *rt = NULL; 1294 1295 memset(&fl4, 0, sizeof(fl4)); 1296 fl4.flowi4_oif = dev->ifindex; 1297 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 1298 fl4.daddr = ip_hdr(skb)->daddr; 1299 fl4.saddr = ip_hdr(skb)->saddr; 1300 1301 rt = ip_route_output_key(dev_net(dev), &fl4); 1302 if (!IS_ERR(rt)) { 1303 skb_dst_set(skb, &rt->dst); 1304 icmp_send(skb, ICMP_DEST_UNREACH, 1305 ICMP_FRAG_NEEDED, 1306 htonl(localmtu)); 1307 } 1308 } 1309 #if IS_ENABLED(CONFIG_IPV6) 1310 else if (skb->protocol == htons(ETH_P_IPV6)) 1311 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); 1312 #endif 1313 goto out_dropped; 1314 } 1315 1316 skb = vnet_skb_shape(skb, 2); 1317 1318 if (unlikely(!skb)) 1319 goto out_dropped; 1320 1321 if (skb->ip_summed == CHECKSUM_PARTIAL) 1322 vnet_fullcsum(skb); 1323 1324 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1325 i = skb_get_queue_mapping(skb); 1326 txq = netdev_get_tx_queue(dev, i); 1327 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1328 if (!netif_tx_queue_stopped(txq)) { 1329 netif_tx_stop_queue(txq); 1330 1331 /* This is a hard error, log it. */ 1332 netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); 1333 dev->stats.tx_errors++; 1334 } 1335 rcu_read_unlock(); 1336 return NETDEV_TX_BUSY; 1337 } 1338 1339 d = vio_dring_cur(dr); 1340 1341 txi = dr->prod; 1342 1343 freeskbs = vnet_clean_tx_ring(port, &pending); 1344 1345 BUG_ON(port->tx_bufs[txi].skb); 1346 1347 len = skb->len; 1348 if (len < ETH_ZLEN) 1349 len = ETH_ZLEN; 1350 1351 err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2, 1352 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); 1353 if (err < 0) { 1354 netdev_info(dev, "tx buffer map error %d\n", err); 1355 goto out_dropped; 1356 } 1357 1358 port->tx_bufs[txi].skb = skb; 1359 skb = NULL; 1360 port->tx_bufs[txi].ncookies = err; 1361 1362 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 1363 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 1364 * the protocol itself does not require it as long as the peer 1365 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. 1366 * 1367 * An ACK for every packet in the ring is expensive as the 1368 * sending of LDC messages is slow and affects performance. 1369 */ 1370 d->hdr.ack = VIO_ACK_DISABLE; 1371 d->size = len; 1372 d->ncookies = port->tx_bufs[txi].ncookies; 1373 for (i = 0; i < d->ncookies; i++) 1374 d->cookies[i] = port->tx_bufs[txi].cookies[i]; 1375 if (vio_version_after_eq(&port->vio, 1, 7)) { 1376 struct vio_net_dext *dext = vio_net_ext(d); 1377 1378 memset(dext, 0, sizeof(*dext)); 1379 if (skb_is_gso(port->tx_bufs[txi].skb)) { 1380 dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb) 1381 ->gso_size; 1382 dext->flags |= VNET_PKT_IPV4_LSO; 1383 } 1384 if (vio_version_after_eq(&port->vio, 1, 8) && 1385 !port->switch_port) { 1386 dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK; 1387 dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK; 1388 } 1389 } 1390 1391 /* This has to be a non-SMP write barrier because we are writing 1392 * to memory which is shared with the peer LDOM. 1393 */ 1394 dma_wmb(); 1395 1396 d->hdr.state = VIO_DESC_READY; 1397 1398 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent 1399 * to notify the consumer that some descriptors are READY. 1400 * After that "start" trigger, no additional triggers are needed until 1401 * a DRING_STOPPED is received from the consumer. The dr->cons field 1402 * (set up by vnet_ack()) has the value of the next dring index 1403 * that has not yet been ack-ed. We send a "start" trigger here 1404 * if, and only if, start_cons is true (reset it afterward). Conversely, 1405 * vnet_ack() should check if the dring corresponding to cons 1406 * is marked READY, but start_cons was false. 1407 * If so, vnet_ack() should send out the missed "start" trigger. 1408 * 1409 * Note that the dma_wmb() above makes sure the cookies et al. are 1410 * not globally visible before the VIO_DESC_READY, and that the 1411 * stores are ordered correctly by the compiler. The consumer will 1412 * not proceed until the VIO_DESC_READY is visible assuring that 1413 * the consumer does not observe anything related to descriptors 1414 * out of order. The HV trap from the LDC start trigger is the 1415 * producer to consumer announcement that work is available to the 1416 * consumer 1417 */ 1418 if (!port->start_cons) { /* previous trigger suffices */ 1419 trace_vnet_skip_tx_trigger(port->vio._local_sid, 1420 port->vio._peer_sid, dr->cons); 1421 goto ldc_start_done; 1422 } 1423 1424 err = __vnet_tx_trigger(port, dr->cons); 1425 if (unlikely(err < 0)) { 1426 netdev_info(dev, "TX trigger error %d\n", err); 1427 d->hdr.state = VIO_DESC_FREE; 1428 skb = port->tx_bufs[txi].skb; 1429 port->tx_bufs[txi].skb = NULL; 1430 dev->stats.tx_carrier_errors++; 1431 goto out_dropped; 1432 } 1433 1434 ldc_start_done: 1435 port->start_cons = false; 1436 1437 dev->stats.tx_packets++; 1438 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1439 port->stats.tx_packets++; 1440 port->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1441 1442 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1443 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1444 netif_tx_stop_queue(txq); 1445 smp_rmb(); 1446 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) 1447 netif_tx_wake_queue(txq); 1448 } 1449 1450 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); 1451 rcu_read_unlock(); 1452 1453 vnet_free_skbs(freeskbs); 1454 1455 return NETDEV_TX_OK; 1456 1457 out_dropped: 1458 if (pending) 1459 (void)mod_timer(&port->clean_timer, 1460 jiffies + VNET_CLEAN_TIMEOUT); 1461 else if (port) 1462 del_timer(&port->clean_timer); 1463 rcu_read_unlock(); 1464 if (skb) 1465 dev_kfree_skb(skb); 1466 vnet_free_skbs(freeskbs); 1467 dev->stats.tx_dropped++; 1468 return NETDEV_TX_OK; 1469 } 1470 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); 1471 1472 void sunvnet_tx_timeout_common(struct net_device *dev) 1473 { 1474 /* XXX Implement me XXX */ 1475 } 1476 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common); 1477 1478 int sunvnet_open_common(struct net_device *dev) 1479 { 1480 netif_carrier_on(dev); 1481 netif_tx_start_all_queues(dev); 1482 1483 return 0; 1484 } 1485 EXPORT_SYMBOL_GPL(sunvnet_open_common); 1486 1487 int sunvnet_close_common(struct net_device *dev) 1488 { 1489 netif_tx_stop_all_queues(dev); 1490 netif_carrier_off(dev); 1491 1492 return 0; 1493 } 1494 EXPORT_SYMBOL_GPL(sunvnet_close_common); 1495 1496 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr) 1497 { 1498 struct vnet_mcast_entry *m; 1499 1500 for (m = vp->mcast_list; m; m = m->next) { 1501 if (ether_addr_equal(m->addr, addr)) 1502 return m; 1503 } 1504 return NULL; 1505 } 1506 1507 static void __update_mc_list(struct vnet *vp, struct net_device *dev) 1508 { 1509 struct netdev_hw_addr *ha; 1510 1511 netdev_for_each_mc_addr(ha, dev) { 1512 struct vnet_mcast_entry *m; 1513 1514 m = __vnet_mc_find(vp, ha->addr); 1515 if (m) { 1516 m->hit = 1; 1517 continue; 1518 } 1519 1520 if (!m) { 1521 m = kzalloc(sizeof(*m), GFP_ATOMIC); 1522 if (!m) 1523 continue; 1524 memcpy(m->addr, ha->addr, ETH_ALEN); 1525 m->hit = 1; 1526 1527 m->next = vp->mcast_list; 1528 vp->mcast_list = m; 1529 } 1530 } 1531 } 1532 1533 static void __send_mc_list(struct vnet *vp, struct vnet_port *port) 1534 { 1535 struct vio_net_mcast_info info; 1536 struct vnet_mcast_entry *m, **pp; 1537 int n_addrs; 1538 1539 memset(&info, 0, sizeof(info)); 1540 1541 info.tag.type = VIO_TYPE_CTRL; 1542 info.tag.stype = VIO_SUBTYPE_INFO; 1543 info.tag.stype_env = VNET_MCAST_INFO; 1544 info.tag.sid = vio_send_sid(&port->vio); 1545 info.set = 1; 1546 1547 n_addrs = 0; 1548 for (m = vp->mcast_list; m; m = m->next) { 1549 if (m->sent) 1550 continue; 1551 m->sent = 1; 1552 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1553 m->addr, ETH_ALEN); 1554 if (++n_addrs == VNET_NUM_MCAST) { 1555 info.count = n_addrs; 1556 1557 (void)vio_ldc_send(&port->vio, &info, 1558 sizeof(info)); 1559 n_addrs = 0; 1560 } 1561 } 1562 if (n_addrs) { 1563 info.count = n_addrs; 1564 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1565 } 1566 1567 info.set = 0; 1568 1569 n_addrs = 0; 1570 pp = &vp->mcast_list; 1571 while ((m = *pp) != NULL) { 1572 if (m->hit) { 1573 m->hit = 0; 1574 pp = &m->next; 1575 continue; 1576 } 1577 1578 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1579 m->addr, ETH_ALEN); 1580 if (++n_addrs == VNET_NUM_MCAST) { 1581 info.count = n_addrs; 1582 (void)vio_ldc_send(&port->vio, &info, 1583 sizeof(info)); 1584 n_addrs = 0; 1585 } 1586 1587 *pp = m->next; 1588 kfree(m); 1589 } 1590 if (n_addrs) { 1591 info.count = n_addrs; 1592 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1593 } 1594 } 1595 1596 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp) 1597 { 1598 struct vnet_port *port; 1599 1600 rcu_read_lock(); 1601 list_for_each_entry_rcu(port, &vp->port_list, list) { 1602 if (port->switch_port) { 1603 __update_mc_list(vp, dev); 1604 __send_mc_list(vp, port); 1605 break; 1606 } 1607 } 1608 rcu_read_unlock(); 1609 } 1610 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common); 1611 1612 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p) 1613 { 1614 return -EINVAL; 1615 } 1616 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common); 1617 1618 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port) 1619 { 1620 struct vio_dring_state *dr; 1621 int i; 1622 1623 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1624 1625 if (!dr->base) 1626 return; 1627 1628 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1629 struct vio_net_desc *d; 1630 void *skb = port->tx_bufs[i].skb; 1631 1632 if (!skb) 1633 continue; 1634 1635 d = vio_dring_entry(dr, i); 1636 1637 ldc_unmap(port->vio.lp, 1638 port->tx_bufs[i].cookies, 1639 port->tx_bufs[i].ncookies); 1640 dev_kfree_skb(skb); 1641 port->tx_bufs[i].skb = NULL; 1642 d->hdr.state = VIO_DESC_FREE; 1643 } 1644 ldc_free_exp_dring(port->vio.lp, dr->base, 1645 (dr->entry_size * dr->num_entries), 1646 dr->cookies, dr->ncookies); 1647 dr->base = NULL; 1648 dr->entry_size = 0; 1649 dr->num_entries = 0; 1650 dr->pending = 0; 1651 dr->ncookies = 0; 1652 } 1653 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); 1654 1655 void vnet_port_reset(struct vnet_port *port) 1656 { 1657 del_timer(&port->clean_timer); 1658 sunvnet_port_free_tx_bufs_common(port); 1659 port->rmtu = 0; 1660 port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ 1661 port->tsolen = 0; 1662 } 1663 EXPORT_SYMBOL_GPL(vnet_port_reset); 1664 1665 static int vnet_port_alloc_tx_ring(struct vnet_port *port) 1666 { 1667 struct vio_dring_state *dr; 1668 unsigned long len, elen; 1669 int i, err, ncookies; 1670 void *dring; 1671 1672 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1673 1674 elen = sizeof(struct vio_net_desc) + 1675 sizeof(struct ldc_trans_cookie) * 2; 1676 if (vio_version_after_eq(&port->vio, 1, 7)) 1677 elen += sizeof(struct vio_net_dext); 1678 len = VNET_TX_RING_SIZE * elen; 1679 1680 ncookies = VIO_MAX_RING_COOKIES; 1681 dring = ldc_alloc_exp_dring(port->vio.lp, len, 1682 dr->cookies, &ncookies, 1683 (LDC_MAP_SHADOW | 1684 LDC_MAP_DIRECT | 1685 LDC_MAP_RW)); 1686 if (IS_ERR(dring)) { 1687 err = PTR_ERR(dring); 1688 goto err_out; 1689 } 1690 1691 dr->base = dring; 1692 dr->entry_size = elen; 1693 dr->num_entries = VNET_TX_RING_SIZE; 1694 dr->prod = 0; 1695 dr->cons = 0; 1696 port->start_cons = true; /* need an initial trigger */ 1697 dr->pending = VNET_TX_RING_SIZE; 1698 dr->ncookies = ncookies; 1699 1700 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1701 struct vio_net_desc *d; 1702 1703 d = vio_dring_entry(dr, i); 1704 d->hdr.state = VIO_DESC_FREE; 1705 } 1706 return 0; 1707 1708 err_out: 1709 sunvnet_port_free_tx_bufs_common(port); 1710 1711 return err; 1712 } 1713 1714 #ifdef CONFIG_NET_POLL_CONTROLLER 1715 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp) 1716 { 1717 struct vnet_port *port; 1718 unsigned long flags; 1719 1720 spin_lock_irqsave(&vp->lock, flags); 1721 if (!list_empty(&vp->port_list)) { 1722 port = list_entry(vp->port_list.next, struct vnet_port, list); 1723 napi_schedule(&port->napi); 1724 } 1725 spin_unlock_irqrestore(&vp->lock, flags); 1726 } 1727 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common); 1728 #endif 1729 1730 void sunvnet_port_add_txq_common(struct vnet_port *port) 1731 { 1732 struct vnet *vp = port->vp; 1733 int smallest = 0; 1734 int i; 1735 1736 /* find the first least-used q 1737 * When there are more ldoms than q's, we start to 1738 * double up on ports per queue. 1739 */ 1740 for (i = 0; i < VNET_MAX_TXQS; i++) { 1741 if (vp->q_used[i] == 0) { 1742 smallest = i; 1743 break; 1744 } 1745 if (vp->q_used[i] < vp->q_used[smallest]) 1746 smallest = i; 1747 } 1748 1749 vp->nports++; 1750 vp->q_used[smallest]++; 1751 port->q_index = smallest; 1752 } 1753 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common); 1754 1755 void sunvnet_port_rm_txq_common(struct vnet_port *port) 1756 { 1757 port->vp->nports--; 1758 port->vp->q_used[port->q_index]--; 1759 port->q_index = 0; 1760 } 1761 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common); 1762