1 /* sunvnet.c: Sun LDOM Virtual Network Driver. 2 * 3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 4 * Copyright (C) 2016 Oracle. All rights reserved. 5 */ 6 7 #include <linux/module.h> 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/delay.h> 12 #include <linux/init.h> 13 #include <linux/netdevice.h> 14 #include <linux/ethtool.h> 15 #include <linux/etherdevice.h> 16 #include <linux/mutex.h> 17 #include <linux/highmem.h> 18 #include <linux/if_vlan.h> 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/sunvnet.h> 21 22 #if IS_ENABLED(CONFIG_IPV6) 23 #include <linux/icmpv6.h> 24 #endif 25 26 #include <net/ip.h> 27 #include <net/icmp.h> 28 #include <net/route.h> 29 30 #include <asm/vio.h> 31 #include <asm/ldc.h> 32 33 #include "sunvnet_common.h" 34 35 /* Heuristic for the number of times to exponentially backoff and 36 * retry sending an LDC trigger when EAGAIN is encountered 37 */ 38 #define VNET_MAX_RETRIES 10 39 40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); 41 MODULE_DESCRIPTION("Sun LDOM virtual network support library"); 42 MODULE_LICENSE("GPL"); 43 MODULE_VERSION("1.1"); 44 45 static int __vnet_tx_trigger(struct vnet_port *port, u32 start); 46 static void vnet_port_reset(struct vnet_port *port); 47 48 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) 49 { 50 return vio_dring_avail(dr, VNET_TX_RING_SIZE); 51 } 52 53 static int vnet_handle_unknown(struct vnet_port *port, void *arg) 54 { 55 struct vio_msg_tag *pkt = arg; 56 57 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n", 58 pkt->type, pkt->stype, pkt->stype_env, pkt->sid); 59 pr_err("Resetting connection\n"); 60 61 ldc_disconnect(port->vio.lp); 62 63 return -ECONNRESET; 64 } 65 66 static int vnet_port_alloc_tx_ring(struct vnet_port *port); 67 68 int sunvnet_send_attr_common(struct vio_driver_state *vio) 69 { 70 struct vnet_port *port = to_vnet_port(vio); 71 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 72 struct vio_net_attr_info pkt; 73 int framelen = ETH_FRAME_LEN; 74 int i, err; 75 76 err = vnet_port_alloc_tx_ring(to_vnet_port(vio)); 77 if (err) 78 return err; 79 80 memset(&pkt, 0, sizeof(pkt)); 81 pkt.tag.type = VIO_TYPE_CTRL; 82 pkt.tag.stype = VIO_SUBTYPE_INFO; 83 pkt.tag.stype_env = VIO_ATTR_INFO; 84 pkt.tag.sid = vio_send_sid(vio); 85 if (vio_version_before(vio, 1, 2)) 86 pkt.xfer_mode = VIO_DRING_MODE; 87 else 88 pkt.xfer_mode = VIO_NEW_DRING_MODE; 89 pkt.addr_type = VNET_ADDR_ETHERMAC; 90 pkt.ack_freq = 0; 91 for (i = 0; i < 6; i++) 92 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); 93 if (vio_version_after(vio, 1, 3)) { 94 if (port->rmtu) { 95 port->rmtu = min(VNET_MAXPACKET, port->rmtu); 96 pkt.mtu = port->rmtu; 97 } else { 98 port->rmtu = VNET_MAXPACKET; 99 pkt.mtu = port->rmtu; 100 } 101 if (vio_version_after_eq(vio, 1, 6)) 102 pkt.options = VIO_TX_DRING; 103 } else if (vio_version_before(vio, 1, 3)) { 104 pkt.mtu = framelen; 105 } else { /* v1.3 */ 106 pkt.mtu = framelen + VLAN_HLEN; 107 } 108 109 pkt.cflags = 0; 110 if (vio_version_after_eq(vio, 1, 7) && port->tso) { 111 pkt.cflags |= VNET_LSO_IPV4_CAPAB; 112 if (!port->tsolen) 113 port->tsolen = VNET_MAXTSO; 114 pkt.ipv4_lso_maxlen = port->tsolen; 115 } 116 117 pkt.plnk_updt = PHYSLINK_UPDATE_NONE; 118 119 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 120 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 121 "cflags[0x%04x] lso_max[%u]\n", 122 pkt.xfer_mode, pkt.addr_type, 123 (unsigned long long)pkt.addr, 124 pkt.ack_freq, pkt.plnk_updt, pkt.options, 125 (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); 126 127 return vio_ldc_send(vio, &pkt, sizeof(pkt)); 128 } 129 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common); 130 131 static int handle_attr_info(struct vio_driver_state *vio, 132 struct vio_net_attr_info *pkt) 133 { 134 struct vnet_port *port = to_vnet_port(vio); 135 u64 localmtu; 136 u8 xfer_mode; 137 138 viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 139 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 140 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 141 pkt->xfer_mode, pkt->addr_type, 142 (unsigned long long)pkt->addr, 143 pkt->ack_freq, pkt->plnk_updt, pkt->options, 144 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 145 pkt->ipv4_lso_maxlen); 146 147 pkt->tag.sid = vio_send_sid(vio); 148 149 xfer_mode = pkt->xfer_mode; 150 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ 151 if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) 152 xfer_mode = VIO_NEW_DRING_MODE; 153 154 /* MTU negotiation: 155 * < v1.3 - ETH_FRAME_LEN exactly 156 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change 157 * pkt->mtu for ACK 158 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly 159 */ 160 if (vio_version_before(vio, 1, 3)) { 161 localmtu = ETH_FRAME_LEN; 162 } else if (vio_version_after(vio, 1, 3)) { 163 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; 164 localmtu = min(pkt->mtu, localmtu); 165 pkt->mtu = localmtu; 166 } else { /* v1.3 */ 167 localmtu = ETH_FRAME_LEN + VLAN_HLEN; 168 } 169 port->rmtu = localmtu; 170 171 /* LSO negotiation */ 172 if (vio_version_after_eq(vio, 1, 7)) 173 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB); 174 else 175 port->tso = false; 176 if (port->tso) { 177 if (!port->tsolen) 178 port->tsolen = VNET_MAXTSO; 179 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen); 180 if (port->tsolen < VNET_MINTSO) { 181 port->tso = false; 182 port->tsolen = 0; 183 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 184 } 185 pkt->ipv4_lso_maxlen = port->tsolen; 186 } else { 187 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; 188 pkt->ipv4_lso_maxlen = 0; 189 port->tsolen = 0; 190 } 191 192 /* for version >= 1.6, ACK packet mode we support */ 193 if (vio_version_after_eq(vio, 1, 6)) { 194 pkt->xfer_mode = VIO_NEW_DRING_MODE; 195 pkt->options = VIO_TX_DRING; 196 } 197 198 if (!(xfer_mode | VIO_NEW_DRING_MODE) || 199 pkt->addr_type != VNET_ADDR_ETHERMAC || 200 pkt->mtu != localmtu) { 201 viodbg(HS, "SEND NET ATTR NACK\n"); 202 203 pkt->tag.stype = VIO_SUBTYPE_NACK; 204 205 (void)vio_ldc_send(vio, pkt, sizeof(*pkt)); 206 207 return -ECONNRESET; 208 } 209 210 viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " 211 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " 212 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 213 pkt->xfer_mode, pkt->addr_type, 214 (unsigned long long)pkt->addr, 215 pkt->ack_freq, pkt->plnk_updt, pkt->options, 216 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 217 pkt->ipv4_lso_maxlen); 218 219 pkt->tag.stype = VIO_SUBTYPE_ACK; 220 221 return vio_ldc_send(vio, pkt, sizeof(*pkt)); 222 } 223 224 static int handle_attr_ack(struct vio_driver_state *vio, 225 struct vio_net_attr_info *pkt) 226 { 227 viodbg(HS, "GOT NET ATTR ACK\n"); 228 229 return 0; 230 } 231 232 static int handle_attr_nack(struct vio_driver_state *vio, 233 struct vio_net_attr_info *pkt) 234 { 235 viodbg(HS, "GOT NET ATTR NACK\n"); 236 237 return -ECONNRESET; 238 } 239 240 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg) 241 { 242 struct vio_net_attr_info *pkt = arg; 243 244 switch (pkt->tag.stype) { 245 case VIO_SUBTYPE_INFO: 246 return handle_attr_info(vio, pkt); 247 248 case VIO_SUBTYPE_ACK: 249 return handle_attr_ack(vio, pkt); 250 251 case VIO_SUBTYPE_NACK: 252 return handle_attr_nack(vio, pkt); 253 254 default: 255 return -ECONNRESET; 256 } 257 } 258 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common); 259 260 void sunvnet_handshake_complete_common(struct vio_driver_state *vio) 261 { 262 struct vio_dring_state *dr; 263 264 dr = &vio->drings[VIO_DRIVER_RX_RING]; 265 dr->rcv_nxt = 1; 266 dr->snd_nxt = 1; 267 268 dr = &vio->drings[VIO_DRIVER_TX_RING]; 269 dr->rcv_nxt = 1; 270 dr->snd_nxt = 1; 271 } 272 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common); 273 274 /* The hypervisor interface that implements copying to/from imported 275 * memory from another domain requires that copies are done to 8-byte 276 * aligned buffers, and that the lengths of such copies are also 8-byte 277 * multiples. 278 * 279 * So we align skb->data to an 8-byte multiple and pad-out the data 280 * area so we can round the copy length up to the next multiple of 281 * 8 for the copy. 282 * 283 * The transmitter puts the actual start of the packet 6 bytes into 284 * the buffer it sends over, so that the IP headers after the ethernet 285 * header are aligned properly. These 6 bytes are not in the descriptor 286 * length, they are simply implied. This offset is represented using 287 * the VNET_PACKET_SKIP macro. 288 */ 289 static struct sk_buff *alloc_and_align_skb(struct net_device *dev, 290 unsigned int len) 291 { 292 struct sk_buff *skb; 293 unsigned long addr, off; 294 295 skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8); 296 if (unlikely(!skb)) 297 return NULL; 298 299 addr = (unsigned long)skb->data; 300 off = ((addr + 7UL) & ~7UL) - addr; 301 if (off) 302 skb_reserve(skb, off); 303 304 return skb; 305 } 306 307 static inline void vnet_fullcsum(struct sk_buff *skb) 308 { 309 struct iphdr *iph = ip_hdr(skb); 310 int offset = skb_transport_offset(skb); 311 312 if (skb->protocol != htons(ETH_P_IP)) 313 return; 314 if (iph->protocol != IPPROTO_TCP && 315 iph->protocol != IPPROTO_UDP) 316 return; 317 skb->ip_summed = CHECKSUM_NONE; 318 skb->csum_level = 1; 319 skb->csum = 0; 320 if (iph->protocol == IPPROTO_TCP) { 321 struct tcphdr *ptcp = tcp_hdr(skb); 322 323 ptcp->check = 0; 324 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 325 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 326 skb->len - offset, IPPROTO_TCP, 327 skb->csum); 328 } else if (iph->protocol == IPPROTO_UDP) { 329 struct udphdr *pudp = udp_hdr(skb); 330 331 pudp->check = 0; 332 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 333 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 334 skb->len - offset, IPPROTO_UDP, 335 skb->csum); 336 } 337 } 338 339 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) 340 { 341 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 342 unsigned int len = desc->size; 343 unsigned int copy_len; 344 struct sk_buff *skb; 345 int maxlen; 346 int err; 347 348 err = -EMSGSIZE; 349 if (port->tso && port->tsolen > port->rmtu) 350 maxlen = port->tsolen; 351 else 352 maxlen = port->rmtu; 353 if (unlikely(len < ETH_ZLEN || len > maxlen)) { 354 dev->stats.rx_length_errors++; 355 goto out_dropped; 356 } 357 358 skb = alloc_and_align_skb(dev, len); 359 err = -ENOMEM; 360 if (unlikely(!skb)) { 361 dev->stats.rx_missed_errors++; 362 goto out_dropped; 363 } 364 365 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; 366 skb_put(skb, copy_len); 367 err = ldc_copy(port->vio.lp, LDC_COPY_IN, 368 skb->data, copy_len, 0, 369 desc->cookies, desc->ncookies); 370 if (unlikely(err < 0)) { 371 dev->stats.rx_frame_errors++; 372 goto out_free_skb; 373 } 374 375 skb_pull(skb, VNET_PACKET_SKIP); 376 skb_trim(skb, len); 377 skb->protocol = eth_type_trans(skb, dev); 378 379 if (vio_version_after_eq(&port->vio, 1, 8)) { 380 struct vio_net_dext *dext = vio_net_ext(desc); 381 382 skb_reset_network_header(skb); 383 384 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) { 385 if (skb->protocol == ETH_P_IP) { 386 struct iphdr *iph = ip_hdr(skb); 387 388 iph->check = 0; 389 ip_send_check(iph); 390 } 391 } 392 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) && 393 skb->ip_summed == CHECKSUM_NONE) { 394 if (skb->protocol == htons(ETH_P_IP)) { 395 struct iphdr *iph = ip_hdr(skb); 396 int ihl = iph->ihl * 4; 397 398 skb_reset_transport_header(skb); 399 skb_set_transport_header(skb, ihl); 400 vnet_fullcsum(skb); 401 } 402 } 403 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { 404 skb->ip_summed = CHECKSUM_PARTIAL; 405 skb->csum_level = 0; 406 if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK) 407 skb->csum_level = 1; 408 } 409 } 410 411 skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; 412 413 dev->stats.rx_packets++; 414 dev->stats.rx_bytes += len; 415 napi_gro_receive(&port->napi, skb); 416 return 0; 417 418 out_free_skb: 419 kfree_skb(skb); 420 421 out_dropped: 422 dev->stats.rx_dropped++; 423 return err; 424 } 425 426 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, 427 u32 start, u32 end, u8 vio_dring_state) 428 { 429 struct vio_dring_data hdr = { 430 .tag = { 431 .type = VIO_TYPE_DATA, 432 .stype = VIO_SUBTYPE_ACK, 433 .stype_env = VIO_DRING_DATA, 434 .sid = vio_send_sid(&port->vio), 435 }, 436 .dring_ident = dr->ident, 437 .start_idx = start, 438 .end_idx = end, 439 .state = vio_dring_state, 440 }; 441 int err, delay; 442 int retries = 0; 443 444 hdr.seq = dr->snd_nxt; 445 delay = 1; 446 do { 447 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 448 if (err > 0) { 449 dr->snd_nxt++; 450 break; 451 } 452 udelay(delay); 453 if ((delay <<= 1) > 128) 454 delay = 128; 455 if (retries++ > VNET_MAX_RETRIES) { 456 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", 457 port->raddr[0], port->raddr[1], 458 port->raddr[2], port->raddr[3], 459 port->raddr[4], port->raddr[5]); 460 break; 461 } 462 } while (err == -EAGAIN); 463 464 if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) { 465 port->stop_rx_idx = end; 466 port->stop_rx = true; 467 } else { 468 port->stop_rx_idx = 0; 469 port->stop_rx = false; 470 } 471 472 return err; 473 } 474 475 static struct vio_net_desc *get_rx_desc(struct vnet_port *port, 476 struct vio_dring_state *dr, 477 u32 index) 478 { 479 struct vio_net_desc *desc = port->vio.desc_buf; 480 int err; 481 482 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, 483 (index * dr->entry_size), 484 dr->cookies, dr->ncookies); 485 if (err < 0) 486 return ERR_PTR(err); 487 488 return desc; 489 } 490 491 static int put_rx_desc(struct vnet_port *port, 492 struct vio_dring_state *dr, 493 struct vio_net_desc *desc, 494 u32 index) 495 { 496 int err; 497 498 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, 499 (index * dr->entry_size), 500 dr->cookies, dr->ncookies); 501 if (err < 0) 502 return err; 503 504 return 0; 505 } 506 507 static int vnet_walk_rx_one(struct vnet_port *port, 508 struct vio_dring_state *dr, 509 u32 index, int *needs_ack) 510 { 511 struct vio_net_desc *desc = get_rx_desc(port, dr, index); 512 struct vio_driver_state *vio = &port->vio; 513 int err; 514 515 BUG_ON(!desc); 516 if (IS_ERR(desc)) 517 return PTR_ERR(desc); 518 519 if (desc->hdr.state != VIO_DESC_READY) 520 return 1; 521 522 dma_rmb(); 523 524 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", 525 desc->hdr.state, desc->hdr.ack, 526 desc->size, desc->ncookies, 527 desc->cookies[0].cookie_addr, 528 desc->cookies[0].cookie_size); 529 530 err = vnet_rx_one(port, desc); 531 if (err == -ECONNRESET) 532 return err; 533 trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid, 534 index, desc->hdr.ack); 535 desc->hdr.state = VIO_DESC_DONE; 536 err = put_rx_desc(port, dr, desc, index); 537 if (err < 0) 538 return err; 539 *needs_ack = desc->hdr.ack; 540 return 0; 541 } 542 543 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, 544 u32 start, u32 end, int *npkts, int budget) 545 { 546 struct vio_driver_state *vio = &port->vio; 547 int ack_start = -1, ack_end = -1; 548 bool send_ack = true; 549 550 end = (end == (u32)-1) ? vio_dring_prev(dr, start) 551 : vio_dring_next(dr, end); 552 553 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); 554 555 while (start != end) { 556 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); 557 558 if (err == -ECONNRESET) 559 return err; 560 if (err != 0) 561 break; 562 (*npkts)++; 563 if (ack_start == -1) 564 ack_start = start; 565 ack_end = start; 566 start = vio_dring_next(dr, start); 567 if (ack && start != end) { 568 err = vnet_send_ack(port, dr, ack_start, ack_end, 569 VIO_DRING_ACTIVE); 570 if (err == -ECONNRESET) 571 return err; 572 ack_start = -1; 573 } 574 if ((*npkts) >= budget) { 575 send_ack = false; 576 break; 577 } 578 } 579 if (unlikely(ack_start == -1)) { 580 ack_end = vio_dring_prev(dr, start); 581 ack_start = ack_end; 582 } 583 if (send_ack) { 584 port->napi_resume = false; 585 trace_vnet_tx_send_stopped_ack(port->vio._local_sid, 586 port->vio._peer_sid, 587 ack_end, *npkts); 588 return vnet_send_ack(port, dr, ack_start, ack_end, 589 VIO_DRING_STOPPED); 590 } else { 591 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid, 592 port->vio._peer_sid, 593 ack_end, *npkts); 594 port->napi_resume = true; 595 port->napi_stop_idx = ack_end; 596 return 1; 597 } 598 } 599 600 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts, 601 int budget) 602 { 603 struct vio_dring_data *pkt = msgbuf; 604 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; 605 struct vio_driver_state *vio = &port->vio; 606 607 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n", 608 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); 609 610 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 611 return 0; 612 if (unlikely(pkt->seq != dr->rcv_nxt)) { 613 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n", 614 pkt->seq, dr->rcv_nxt); 615 return 0; 616 } 617 618 if (!port->napi_resume) 619 dr->rcv_nxt++; 620 621 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ 622 623 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx, 624 npkts, budget); 625 } 626 627 static int idx_is_pending(struct vio_dring_state *dr, u32 end) 628 { 629 u32 idx = dr->cons; 630 int found = 0; 631 632 while (idx != dr->prod) { 633 if (idx == end) { 634 found = 1; 635 break; 636 } 637 idx = vio_dring_next(dr, idx); 638 } 639 return found; 640 } 641 642 static int vnet_ack(struct vnet_port *port, void *msgbuf) 643 { 644 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 645 struct vio_dring_data *pkt = msgbuf; 646 struct net_device *dev; 647 u32 end; 648 struct vio_net_desc *desc; 649 struct netdev_queue *txq; 650 651 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 652 return 0; 653 654 end = pkt->end_idx; 655 dev = VNET_PORT_TO_NET_DEVICE(port); 656 netif_tx_lock(dev); 657 if (unlikely(!idx_is_pending(dr, end))) { 658 netif_tx_unlock(dev); 659 return 0; 660 } 661 662 /* sync for race conditions with vnet_start_xmit() and tell xmit it 663 * is time to send a trigger. 664 */ 665 trace_vnet_rx_stopped_ack(port->vio._local_sid, 666 port->vio._peer_sid, end); 667 dr->cons = vio_dring_next(dr, end); 668 desc = vio_dring_entry(dr, dr->cons); 669 if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) { 670 /* vnet_start_xmit() just populated this dring but missed 671 * sending the "start" LDC message to the consumer. 672 * Send a "start" trigger on its behalf. 673 */ 674 if (__vnet_tx_trigger(port, dr->cons) > 0) 675 port->start_cons = false; 676 else 677 port->start_cons = true; 678 } else { 679 port->start_cons = true; 680 } 681 netif_tx_unlock(dev); 682 683 txq = netdev_get_tx_queue(dev, port->q_index); 684 if (unlikely(netif_tx_queue_stopped(txq) && 685 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) 686 return 1; 687 688 return 0; 689 } 690 691 static int vnet_nack(struct vnet_port *port, void *msgbuf) 692 { 693 /* XXX just reset or similar XXX */ 694 return 0; 695 } 696 697 static int handle_mcast(struct vnet_port *port, void *msgbuf) 698 { 699 struct vio_net_mcast_info *pkt = msgbuf; 700 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 701 702 if (pkt->tag.stype != VIO_SUBTYPE_ACK) 703 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n", 704 dev->name, 705 pkt->tag.type, 706 pkt->tag.stype, 707 pkt->tag.stype_env, 708 pkt->tag.sid); 709 710 return 0; 711 } 712 713 /* If the queue is stopped, wake it up so that we'll 714 * send out another START message at the next TX. 715 */ 716 static void maybe_tx_wakeup(struct vnet_port *port) 717 { 718 struct netdev_queue *txq; 719 720 txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 721 port->q_index); 722 __netif_tx_lock(txq, smp_processor_id()); 723 if (likely(netif_tx_queue_stopped(txq))) 724 netif_tx_wake_queue(txq); 725 __netif_tx_unlock(txq); 726 } 727 728 bool sunvnet_port_is_up_common(struct vnet_port *vnet) 729 { 730 struct vio_driver_state *vio = &vnet->vio; 731 732 return !!(vio->hs_state & VIO_HS_COMPLETE); 733 } 734 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common); 735 736 static int vnet_event_napi(struct vnet_port *port, int budget) 737 { 738 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 739 struct vio_driver_state *vio = &port->vio; 740 int tx_wakeup, err; 741 int npkts = 0; 742 743 /* we don't expect any other bits */ 744 BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY | 745 LDC_EVENT_RESET | 746 LDC_EVENT_UP)); 747 748 /* RESET takes precedent over any other event */ 749 if (port->rx_event & LDC_EVENT_RESET) { 750 vio_link_state_change(vio, LDC_EVENT_RESET); 751 vnet_port_reset(port); 752 vio_port_up(vio); 753 754 /* If the device is running but its tx queue was 755 * stopped (due to flow control), restart it. 756 * This is necessary since vnet_port_reset() 757 * clears the tx drings and thus we may never get 758 * back a VIO_TYPE_DATA ACK packet - which is 759 * the normal mechanism to restart the tx queue. 760 */ 761 if (netif_running(dev)) 762 maybe_tx_wakeup(port); 763 764 port->rx_event = 0; 765 return 0; 766 } 767 768 if (port->rx_event & LDC_EVENT_UP) { 769 vio_link_state_change(vio, LDC_EVENT_UP); 770 port->rx_event = 0; 771 return 0; 772 } 773 774 err = 0; 775 tx_wakeup = 0; 776 while (1) { 777 union { 778 struct vio_msg_tag tag; 779 u64 raw[8]; 780 } msgbuf; 781 782 if (port->napi_resume) { 783 struct vio_dring_data *pkt = 784 (struct vio_dring_data *)&msgbuf; 785 struct vio_dring_state *dr = 786 &port->vio.drings[VIO_DRIVER_RX_RING]; 787 788 pkt->tag.type = VIO_TYPE_DATA; 789 pkt->tag.stype = VIO_SUBTYPE_INFO; 790 pkt->tag.stype_env = VIO_DRING_DATA; 791 pkt->seq = dr->rcv_nxt; 792 pkt->start_idx = vio_dring_next(dr, 793 port->napi_stop_idx); 794 pkt->end_idx = -1; 795 } else { 796 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); 797 if (unlikely(err < 0)) { 798 if (err == -ECONNRESET) 799 vio_conn_reset(vio); 800 break; 801 } 802 if (err == 0) 803 break; 804 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", 805 msgbuf.tag.type, 806 msgbuf.tag.stype, 807 msgbuf.tag.stype_env, 808 msgbuf.tag.sid); 809 err = vio_validate_sid(vio, &msgbuf.tag); 810 if (err < 0) 811 break; 812 } 813 814 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { 815 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { 816 if (!sunvnet_port_is_up_common(port)) { 817 /* failures like handshake_failure() 818 * may have cleaned up dring, but 819 * NAPI polling may bring us here. 820 */ 821 err = -ECONNRESET; 822 break; 823 } 824 err = vnet_rx(port, &msgbuf, &npkts, budget); 825 if (npkts >= budget) 826 break; 827 if (npkts == 0) 828 break; 829 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { 830 err = vnet_ack(port, &msgbuf); 831 if (err > 0) 832 tx_wakeup |= err; 833 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { 834 err = vnet_nack(port, &msgbuf); 835 } 836 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { 837 if (msgbuf.tag.stype_env == VNET_MCAST_INFO) 838 err = handle_mcast(port, &msgbuf); 839 else 840 err = vio_control_pkt_engine(vio, &msgbuf); 841 if (err) 842 break; 843 } else { 844 err = vnet_handle_unknown(port, &msgbuf); 845 } 846 if (err == -ECONNRESET) 847 break; 848 } 849 if (unlikely(tx_wakeup && err != -ECONNRESET)) 850 maybe_tx_wakeup(port); 851 return npkts; 852 } 853 854 int sunvnet_poll_common(struct napi_struct *napi, int budget) 855 { 856 struct vnet_port *port = container_of(napi, struct vnet_port, napi); 857 struct vio_driver_state *vio = &port->vio; 858 int processed = vnet_event_napi(port, budget); 859 860 if (processed < budget) { 861 napi_complete_done(napi, processed); 862 port->rx_event &= ~LDC_EVENT_DATA_READY; 863 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED); 864 } 865 return processed; 866 } 867 EXPORT_SYMBOL_GPL(sunvnet_poll_common); 868 869 void sunvnet_event_common(void *arg, int event) 870 { 871 struct vnet_port *port = arg; 872 struct vio_driver_state *vio = &port->vio; 873 874 port->rx_event |= event; 875 vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED); 876 napi_schedule(&port->napi); 877 } 878 EXPORT_SYMBOL_GPL(sunvnet_event_common); 879 880 static int __vnet_tx_trigger(struct vnet_port *port, u32 start) 881 { 882 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 883 struct vio_dring_data hdr = { 884 .tag = { 885 .type = VIO_TYPE_DATA, 886 .stype = VIO_SUBTYPE_INFO, 887 .stype_env = VIO_DRING_DATA, 888 .sid = vio_send_sid(&port->vio), 889 }, 890 .dring_ident = dr->ident, 891 .start_idx = start, 892 .end_idx = (u32)-1, 893 }; 894 int err, delay; 895 int retries = 0; 896 897 if (port->stop_rx) { 898 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid, 899 port->vio._peer_sid, 900 port->stop_rx_idx, -1); 901 err = vnet_send_ack(port, 902 &port->vio.drings[VIO_DRIVER_RX_RING], 903 port->stop_rx_idx, -1, 904 VIO_DRING_STOPPED); 905 if (err <= 0) 906 return err; 907 } 908 909 hdr.seq = dr->snd_nxt; 910 delay = 1; 911 do { 912 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 913 if (err > 0) { 914 dr->snd_nxt++; 915 break; 916 } 917 udelay(delay); 918 if ((delay <<= 1) > 128) 919 delay = 128; 920 if (retries++ > VNET_MAX_RETRIES) 921 break; 922 } while (err == -EAGAIN); 923 trace_vnet_tx_trigger(port->vio._local_sid, 924 port->vio._peer_sid, start, err); 925 926 return err; 927 } 928 929 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port, 930 unsigned *pending) 931 { 932 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 933 struct sk_buff *skb = NULL; 934 int i, txi; 935 936 *pending = 0; 937 938 txi = dr->prod; 939 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 940 struct vio_net_desc *d; 941 942 --txi; 943 if (txi < 0) 944 txi = VNET_TX_RING_SIZE - 1; 945 946 d = vio_dring_entry(dr, txi); 947 948 if (d->hdr.state == VIO_DESC_READY) { 949 (*pending)++; 950 continue; 951 } 952 if (port->tx_bufs[txi].skb) { 953 if (d->hdr.state != VIO_DESC_DONE) 954 pr_notice("invalid ring buffer state %d\n", 955 d->hdr.state); 956 BUG_ON(port->tx_bufs[txi].skb->next); 957 958 port->tx_bufs[txi].skb->next = skb; 959 skb = port->tx_bufs[txi].skb; 960 port->tx_bufs[txi].skb = NULL; 961 962 ldc_unmap(port->vio.lp, 963 port->tx_bufs[txi].cookies, 964 port->tx_bufs[txi].ncookies); 965 } else if (d->hdr.state == VIO_DESC_FREE) { 966 break; 967 } 968 d->hdr.state = VIO_DESC_FREE; 969 } 970 return skb; 971 } 972 973 static inline void vnet_free_skbs(struct sk_buff *skb) 974 { 975 struct sk_buff *next; 976 977 while (skb) { 978 next = skb->next; 979 skb->next = NULL; 980 dev_kfree_skb(skb); 981 skb = next; 982 } 983 } 984 985 void sunvnet_clean_timer_expire_common(unsigned long port0) 986 { 987 struct vnet_port *port = (struct vnet_port *)port0; 988 struct sk_buff *freeskbs; 989 unsigned pending; 990 991 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port)); 992 freeskbs = vnet_clean_tx_ring(port, &pending); 993 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port)); 994 995 vnet_free_skbs(freeskbs); 996 997 if (pending) 998 (void)mod_timer(&port->clean_timer, 999 jiffies + VNET_CLEAN_TIMEOUT); 1000 else 1001 del_timer(&port->clean_timer); 1002 } 1003 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common); 1004 1005 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, 1006 struct ldc_trans_cookie *cookies, int ncookies, 1007 unsigned int map_perm) 1008 { 1009 int i, nc, err, blen; 1010 1011 /* header */ 1012 blen = skb_headlen(skb); 1013 if (blen < ETH_ZLEN) 1014 blen = ETH_ZLEN; 1015 blen += VNET_PACKET_SKIP; 1016 blen += 8 - (blen & 7); 1017 1018 err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies, 1019 ncookies, map_perm); 1020 if (err < 0) 1021 return err; 1022 nc = err; 1023 1024 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1025 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1026 u8 *vaddr; 1027 1028 if (nc < ncookies) { 1029 vaddr = kmap_atomic(skb_frag_page(f)); 1030 blen = skb_frag_size(f); 1031 blen += 8 - (blen & 7); 1032 err = ldc_map_single(lp, vaddr + f->page_offset, 1033 blen, cookies + nc, ncookies - nc, 1034 map_perm); 1035 kunmap_atomic(vaddr); 1036 } else { 1037 err = -EMSGSIZE; 1038 } 1039 1040 if (err < 0) { 1041 ldc_unmap(lp, cookies, nc); 1042 return err; 1043 } 1044 nc += err; 1045 } 1046 return nc; 1047 } 1048 1049 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) 1050 { 1051 struct sk_buff *nskb; 1052 int i, len, pad, docopy; 1053 1054 len = skb->len; 1055 pad = 0; 1056 if (len < ETH_ZLEN) { 1057 pad += ETH_ZLEN - skb->len; 1058 len += pad; 1059 } 1060 len += VNET_PACKET_SKIP; 1061 pad += 8 - (len & 7); 1062 1063 /* make sure we have enough cookies and alignment in every frag */ 1064 docopy = skb_shinfo(skb)->nr_frags >= ncookies; 1065 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1066 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 1067 1068 docopy |= f->page_offset & 7; 1069 } 1070 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || 1071 skb_tailroom(skb) < pad || 1072 skb_headroom(skb) < VNET_PACKET_SKIP || docopy) { 1073 int start = 0, offset; 1074 __wsum csum; 1075 1076 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN; 1077 nskb = alloc_and_align_skb(skb->dev, len); 1078 if (!nskb) { 1079 dev_kfree_skb(skb); 1080 return NULL; 1081 } 1082 skb_reserve(nskb, VNET_PACKET_SKIP); 1083 1084 nskb->protocol = skb->protocol; 1085 offset = skb_mac_header(skb) - skb->data; 1086 skb_set_mac_header(nskb, offset); 1087 offset = skb_network_header(skb) - skb->data; 1088 skb_set_network_header(nskb, offset); 1089 offset = skb_transport_header(skb) - skb->data; 1090 skb_set_transport_header(nskb, offset); 1091 1092 offset = 0; 1093 nskb->csum_offset = skb->csum_offset; 1094 nskb->ip_summed = skb->ip_summed; 1095 1096 if (skb->ip_summed == CHECKSUM_PARTIAL) 1097 start = skb_checksum_start_offset(skb); 1098 if (start) { 1099 struct iphdr *iph = ip_hdr(nskb); 1100 int offset = start + nskb->csum_offset; 1101 1102 if (skb_copy_bits(skb, 0, nskb->data, start)) { 1103 dev_kfree_skb(nskb); 1104 dev_kfree_skb(skb); 1105 return NULL; 1106 } 1107 *(__sum16 *)(skb->data + offset) = 0; 1108 csum = skb_copy_and_csum_bits(skb, start, 1109 nskb->data + start, 1110 skb->len - start, 0); 1111 if (iph->protocol == IPPROTO_TCP || 1112 iph->protocol == IPPROTO_UDP) { 1113 csum = csum_tcpudp_magic(iph->saddr, iph->daddr, 1114 skb->len - start, 1115 iph->protocol, csum); 1116 } 1117 *(__sum16 *)(nskb->data + offset) = csum; 1118 1119 nskb->ip_summed = CHECKSUM_NONE; 1120 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { 1121 dev_kfree_skb(nskb); 1122 dev_kfree_skb(skb); 1123 return NULL; 1124 } 1125 (void)skb_put(nskb, skb->len); 1126 if (skb_is_gso(skb)) { 1127 skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; 1128 skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; 1129 } 1130 nskb->queue_mapping = skb->queue_mapping; 1131 dev_kfree_skb(skb); 1132 skb = nskb; 1133 } 1134 return skb; 1135 } 1136 1137 static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, 1138 struct vnet_port *(*vnet_tx_port) 1139 (struct sk_buff *, struct net_device *)) 1140 { 1141 struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); 1142 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1143 struct sk_buff *segs; 1144 int maclen, datalen; 1145 int status; 1146 int gso_size, gso_type, gso_segs; 1147 int hlen = skb_transport_header(skb) - skb_mac_header(skb); 1148 int proto = IPPROTO_IP; 1149 1150 if (skb->protocol == htons(ETH_P_IP)) 1151 proto = ip_hdr(skb)->protocol; 1152 else if (skb->protocol == htons(ETH_P_IPV6)) 1153 proto = ipv6_hdr(skb)->nexthdr; 1154 1155 if (proto == IPPROTO_TCP) { 1156 hlen += tcp_hdr(skb)->doff * 4; 1157 } else if (proto == IPPROTO_UDP) { 1158 hlen += sizeof(struct udphdr); 1159 } else { 1160 pr_err("vnet_handle_offloads GSO with unknown transport " 1161 "protocol %d tproto %d\n", skb->protocol, proto); 1162 hlen = 128; /* XXX */ 1163 } 1164 datalen = port->tsolen - hlen; 1165 1166 gso_size = skb_shinfo(skb)->gso_size; 1167 gso_type = skb_shinfo(skb)->gso_type; 1168 gso_segs = skb_shinfo(skb)->gso_segs; 1169 1170 if (port->tso && gso_size < datalen) 1171 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen); 1172 1173 if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) { 1174 struct netdev_queue *txq; 1175 1176 txq = netdev_get_tx_queue(dev, port->q_index); 1177 netif_tx_stop_queue(txq); 1178 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs) 1179 return NETDEV_TX_BUSY; 1180 netif_tx_wake_queue(txq); 1181 } 1182 1183 maclen = skb_network_header(skb) - skb_mac_header(skb); 1184 skb_pull(skb, maclen); 1185 1186 if (port->tso && gso_size < datalen) { 1187 if (skb_unclone(skb, GFP_ATOMIC)) 1188 goto out_dropped; 1189 1190 /* segment to TSO size */ 1191 skb_shinfo(skb)->gso_size = datalen; 1192 skb_shinfo(skb)->gso_segs = gso_segs; 1193 } 1194 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); 1195 if (IS_ERR(segs)) 1196 goto out_dropped; 1197 1198 skb_push(skb, maclen); 1199 skb_reset_mac_header(skb); 1200 1201 status = 0; 1202 while (segs) { 1203 struct sk_buff *curr = segs; 1204 1205 segs = segs->next; 1206 curr->next = NULL; 1207 if (port->tso && curr->len > dev->mtu) { 1208 skb_shinfo(curr)->gso_size = gso_size; 1209 skb_shinfo(curr)->gso_type = gso_type; 1210 skb_shinfo(curr)->gso_segs = 1211 DIV_ROUND_UP(curr->len - hlen, gso_size); 1212 } else { 1213 skb_shinfo(curr)->gso_size = 0; 1214 } 1215 1216 skb_push(curr, maclen); 1217 skb_reset_mac_header(curr); 1218 memcpy(skb_mac_header(curr), skb_mac_header(skb), 1219 maclen); 1220 curr->csum_start = skb_transport_header(curr) - curr->head; 1221 if (ip_hdr(curr)->protocol == IPPROTO_TCP) 1222 curr->csum_offset = offsetof(struct tcphdr, check); 1223 else if (ip_hdr(curr)->protocol == IPPROTO_UDP) 1224 curr->csum_offset = offsetof(struct udphdr, check); 1225 1226 if (!(status & NETDEV_TX_MASK)) 1227 status = sunvnet_start_xmit_common(curr, dev, 1228 vnet_tx_port); 1229 if (status & NETDEV_TX_MASK) 1230 dev_kfree_skb_any(curr); 1231 } 1232 1233 if (!(status & NETDEV_TX_MASK)) 1234 dev_kfree_skb_any(skb); 1235 return status; 1236 out_dropped: 1237 dev->stats.tx_dropped++; 1238 dev_kfree_skb_any(skb); 1239 return NETDEV_TX_OK; 1240 } 1241 1242 int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, 1243 struct vnet_port *(*vnet_tx_port) 1244 (struct sk_buff *, struct net_device *)) 1245 { 1246 struct vnet_port *port = NULL; 1247 struct vio_dring_state *dr; 1248 struct vio_net_desc *d; 1249 unsigned int len; 1250 struct sk_buff *freeskbs = NULL; 1251 int i, err, txi; 1252 unsigned pending = 0; 1253 struct netdev_queue *txq; 1254 1255 rcu_read_lock(); 1256 port = vnet_tx_port(skb, dev); 1257 if (unlikely(!port)) 1258 goto out_dropped; 1259 1260 if (skb_is_gso(skb) && skb->len > port->tsolen) { 1261 err = vnet_handle_offloads(port, skb, vnet_tx_port); 1262 rcu_read_unlock(); 1263 return err; 1264 } 1265 1266 if (!skb_is_gso(skb) && skb->len > port->rmtu) { 1267 unsigned long localmtu = port->rmtu - ETH_HLEN; 1268 1269 if (vio_version_after_eq(&port->vio, 1, 3)) 1270 localmtu -= VLAN_HLEN; 1271 1272 if (skb->protocol == htons(ETH_P_IP)) { 1273 struct flowi4 fl4; 1274 struct rtable *rt = NULL; 1275 1276 memset(&fl4, 0, sizeof(fl4)); 1277 fl4.flowi4_oif = dev->ifindex; 1278 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 1279 fl4.daddr = ip_hdr(skb)->daddr; 1280 fl4.saddr = ip_hdr(skb)->saddr; 1281 1282 rt = ip_route_output_key(dev_net(dev), &fl4); 1283 if (!IS_ERR(rt)) { 1284 skb_dst_set(skb, &rt->dst); 1285 icmp_send(skb, ICMP_DEST_UNREACH, 1286 ICMP_FRAG_NEEDED, 1287 htonl(localmtu)); 1288 } 1289 } 1290 #if IS_ENABLED(CONFIG_IPV6) 1291 else if (skb->protocol == htons(ETH_P_IPV6)) 1292 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); 1293 #endif 1294 goto out_dropped; 1295 } 1296 1297 skb = vnet_skb_shape(skb, 2); 1298 1299 if (unlikely(!skb)) 1300 goto out_dropped; 1301 1302 if (skb->ip_summed == CHECKSUM_PARTIAL) 1303 vnet_fullcsum(skb); 1304 1305 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1306 i = skb_get_queue_mapping(skb); 1307 txq = netdev_get_tx_queue(dev, i); 1308 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1309 if (!netif_tx_queue_stopped(txq)) { 1310 netif_tx_stop_queue(txq); 1311 1312 /* This is a hard error, log it. */ 1313 netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); 1314 dev->stats.tx_errors++; 1315 } 1316 rcu_read_unlock(); 1317 return NETDEV_TX_BUSY; 1318 } 1319 1320 d = vio_dring_cur(dr); 1321 1322 txi = dr->prod; 1323 1324 freeskbs = vnet_clean_tx_ring(port, &pending); 1325 1326 BUG_ON(port->tx_bufs[txi].skb); 1327 1328 len = skb->len; 1329 if (len < ETH_ZLEN) 1330 len = ETH_ZLEN; 1331 1332 err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2, 1333 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); 1334 if (err < 0) { 1335 netdev_info(dev, "tx buffer map error %d\n", err); 1336 goto out_dropped; 1337 } 1338 1339 port->tx_bufs[txi].skb = skb; 1340 skb = NULL; 1341 port->tx_bufs[txi].ncookies = err; 1342 1343 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 1344 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 1345 * the protocol itself does not require it as long as the peer 1346 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. 1347 * 1348 * An ACK for every packet in the ring is expensive as the 1349 * sending of LDC messages is slow and affects performance. 1350 */ 1351 d->hdr.ack = VIO_ACK_DISABLE; 1352 d->size = len; 1353 d->ncookies = port->tx_bufs[txi].ncookies; 1354 for (i = 0; i < d->ncookies; i++) 1355 d->cookies[i] = port->tx_bufs[txi].cookies[i]; 1356 if (vio_version_after_eq(&port->vio, 1, 7)) { 1357 struct vio_net_dext *dext = vio_net_ext(d); 1358 1359 memset(dext, 0, sizeof(*dext)); 1360 if (skb_is_gso(port->tx_bufs[txi].skb)) { 1361 dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb) 1362 ->gso_size; 1363 dext->flags |= VNET_PKT_IPV4_LSO; 1364 } 1365 if (vio_version_after_eq(&port->vio, 1, 8) && 1366 !port->switch_port) { 1367 dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK; 1368 dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK; 1369 } 1370 } 1371 1372 /* This has to be a non-SMP write barrier because we are writing 1373 * to memory which is shared with the peer LDOM. 1374 */ 1375 dma_wmb(); 1376 1377 d->hdr.state = VIO_DESC_READY; 1378 1379 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent 1380 * to notify the consumer that some descriptors are READY. 1381 * After that "start" trigger, no additional triggers are needed until 1382 * a DRING_STOPPED is received from the consumer. The dr->cons field 1383 * (set up by vnet_ack()) has the value of the next dring index 1384 * that has not yet been ack-ed. We send a "start" trigger here 1385 * if, and only if, start_cons is true (reset it afterward). Conversely, 1386 * vnet_ack() should check if the dring corresponding to cons 1387 * is marked READY, but start_cons was false. 1388 * If so, vnet_ack() should send out the missed "start" trigger. 1389 * 1390 * Note that the dma_wmb() above makes sure the cookies et al. are 1391 * not globally visible before the VIO_DESC_READY, and that the 1392 * stores are ordered correctly by the compiler. The consumer will 1393 * not proceed until the VIO_DESC_READY is visible assuring that 1394 * the consumer does not observe anything related to descriptors 1395 * out of order. The HV trap from the LDC start trigger is the 1396 * producer to consumer announcement that work is available to the 1397 * consumer 1398 */ 1399 if (!port->start_cons) { /* previous trigger suffices */ 1400 trace_vnet_skip_tx_trigger(port->vio._local_sid, 1401 port->vio._peer_sid, dr->cons); 1402 goto ldc_start_done; 1403 } 1404 1405 err = __vnet_tx_trigger(port, dr->cons); 1406 if (unlikely(err < 0)) { 1407 netdev_info(dev, "TX trigger error %d\n", err); 1408 d->hdr.state = VIO_DESC_FREE; 1409 skb = port->tx_bufs[txi].skb; 1410 port->tx_bufs[txi].skb = NULL; 1411 dev->stats.tx_carrier_errors++; 1412 goto out_dropped; 1413 } 1414 1415 ldc_start_done: 1416 port->start_cons = false; 1417 1418 dev->stats.tx_packets++; 1419 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1420 1421 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1422 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1423 netif_tx_stop_queue(txq); 1424 smp_rmb(); 1425 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) 1426 netif_tx_wake_queue(txq); 1427 } 1428 1429 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); 1430 rcu_read_unlock(); 1431 1432 vnet_free_skbs(freeskbs); 1433 1434 return NETDEV_TX_OK; 1435 1436 out_dropped: 1437 if (pending) 1438 (void)mod_timer(&port->clean_timer, 1439 jiffies + VNET_CLEAN_TIMEOUT); 1440 else if (port) 1441 del_timer(&port->clean_timer); 1442 rcu_read_unlock(); 1443 if (skb) 1444 dev_kfree_skb(skb); 1445 vnet_free_skbs(freeskbs); 1446 dev->stats.tx_dropped++; 1447 return NETDEV_TX_OK; 1448 } 1449 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); 1450 1451 void sunvnet_tx_timeout_common(struct net_device *dev) 1452 { 1453 /* XXX Implement me XXX */ 1454 } 1455 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common); 1456 1457 int sunvnet_open_common(struct net_device *dev) 1458 { 1459 netif_carrier_on(dev); 1460 netif_tx_start_all_queues(dev); 1461 1462 return 0; 1463 } 1464 EXPORT_SYMBOL_GPL(sunvnet_open_common); 1465 1466 int sunvnet_close_common(struct net_device *dev) 1467 { 1468 netif_tx_stop_all_queues(dev); 1469 netif_carrier_off(dev); 1470 1471 return 0; 1472 } 1473 EXPORT_SYMBOL_GPL(sunvnet_close_common); 1474 1475 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr) 1476 { 1477 struct vnet_mcast_entry *m; 1478 1479 for (m = vp->mcast_list; m; m = m->next) { 1480 if (ether_addr_equal(m->addr, addr)) 1481 return m; 1482 } 1483 return NULL; 1484 } 1485 1486 static void __update_mc_list(struct vnet *vp, struct net_device *dev) 1487 { 1488 struct netdev_hw_addr *ha; 1489 1490 netdev_for_each_mc_addr(ha, dev) { 1491 struct vnet_mcast_entry *m; 1492 1493 m = __vnet_mc_find(vp, ha->addr); 1494 if (m) { 1495 m->hit = 1; 1496 continue; 1497 } 1498 1499 if (!m) { 1500 m = kzalloc(sizeof(*m), GFP_ATOMIC); 1501 if (!m) 1502 continue; 1503 memcpy(m->addr, ha->addr, ETH_ALEN); 1504 m->hit = 1; 1505 1506 m->next = vp->mcast_list; 1507 vp->mcast_list = m; 1508 } 1509 } 1510 } 1511 1512 static void __send_mc_list(struct vnet *vp, struct vnet_port *port) 1513 { 1514 struct vio_net_mcast_info info; 1515 struct vnet_mcast_entry *m, **pp; 1516 int n_addrs; 1517 1518 memset(&info, 0, sizeof(info)); 1519 1520 info.tag.type = VIO_TYPE_CTRL; 1521 info.tag.stype = VIO_SUBTYPE_INFO; 1522 info.tag.stype_env = VNET_MCAST_INFO; 1523 info.tag.sid = vio_send_sid(&port->vio); 1524 info.set = 1; 1525 1526 n_addrs = 0; 1527 for (m = vp->mcast_list; m; m = m->next) { 1528 if (m->sent) 1529 continue; 1530 m->sent = 1; 1531 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1532 m->addr, ETH_ALEN); 1533 if (++n_addrs == VNET_NUM_MCAST) { 1534 info.count = n_addrs; 1535 1536 (void)vio_ldc_send(&port->vio, &info, 1537 sizeof(info)); 1538 n_addrs = 0; 1539 } 1540 } 1541 if (n_addrs) { 1542 info.count = n_addrs; 1543 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1544 } 1545 1546 info.set = 0; 1547 1548 n_addrs = 0; 1549 pp = &vp->mcast_list; 1550 while ((m = *pp) != NULL) { 1551 if (m->hit) { 1552 m->hit = 0; 1553 pp = &m->next; 1554 continue; 1555 } 1556 1557 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1558 m->addr, ETH_ALEN); 1559 if (++n_addrs == VNET_NUM_MCAST) { 1560 info.count = n_addrs; 1561 (void)vio_ldc_send(&port->vio, &info, 1562 sizeof(info)); 1563 n_addrs = 0; 1564 } 1565 1566 *pp = m->next; 1567 kfree(m); 1568 } 1569 if (n_addrs) { 1570 info.count = n_addrs; 1571 (void)vio_ldc_send(&port->vio, &info, sizeof(info)); 1572 } 1573 } 1574 1575 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp) 1576 { 1577 struct vnet_port *port; 1578 1579 rcu_read_lock(); 1580 list_for_each_entry_rcu(port, &vp->port_list, list) { 1581 if (port->switch_port) { 1582 __update_mc_list(vp, dev); 1583 __send_mc_list(vp, port); 1584 break; 1585 } 1586 } 1587 rcu_read_unlock(); 1588 } 1589 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common); 1590 1591 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p) 1592 { 1593 return -EINVAL; 1594 } 1595 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common); 1596 1597 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port) 1598 { 1599 struct vio_dring_state *dr; 1600 int i; 1601 1602 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1603 1604 if (!dr->base) 1605 return; 1606 1607 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1608 struct vio_net_desc *d; 1609 void *skb = port->tx_bufs[i].skb; 1610 1611 if (!skb) 1612 continue; 1613 1614 d = vio_dring_entry(dr, i); 1615 1616 ldc_unmap(port->vio.lp, 1617 port->tx_bufs[i].cookies, 1618 port->tx_bufs[i].ncookies); 1619 dev_kfree_skb(skb); 1620 port->tx_bufs[i].skb = NULL; 1621 d->hdr.state = VIO_DESC_FREE; 1622 } 1623 ldc_free_exp_dring(port->vio.lp, dr->base, 1624 (dr->entry_size * dr->num_entries), 1625 dr->cookies, dr->ncookies); 1626 dr->base = NULL; 1627 dr->entry_size = 0; 1628 dr->num_entries = 0; 1629 dr->pending = 0; 1630 dr->ncookies = 0; 1631 } 1632 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); 1633 1634 static void vnet_port_reset(struct vnet_port *port) 1635 { 1636 del_timer(&port->clean_timer); 1637 sunvnet_port_free_tx_bufs_common(port); 1638 port->rmtu = 0; 1639 port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ 1640 port->tsolen = 0; 1641 } 1642 1643 static int vnet_port_alloc_tx_ring(struct vnet_port *port) 1644 { 1645 struct vio_dring_state *dr; 1646 unsigned long len, elen; 1647 int i, err, ncookies; 1648 void *dring; 1649 1650 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1651 1652 elen = sizeof(struct vio_net_desc) + 1653 sizeof(struct ldc_trans_cookie) * 2; 1654 if (vio_version_after_eq(&port->vio, 1, 7)) 1655 elen += sizeof(struct vio_net_dext); 1656 len = VNET_TX_RING_SIZE * elen; 1657 1658 ncookies = VIO_MAX_RING_COOKIES; 1659 dring = ldc_alloc_exp_dring(port->vio.lp, len, 1660 dr->cookies, &ncookies, 1661 (LDC_MAP_SHADOW | 1662 LDC_MAP_DIRECT | 1663 LDC_MAP_RW)); 1664 if (IS_ERR(dring)) { 1665 err = PTR_ERR(dring); 1666 goto err_out; 1667 } 1668 1669 dr->base = dring; 1670 dr->entry_size = elen; 1671 dr->num_entries = VNET_TX_RING_SIZE; 1672 dr->prod = 0; 1673 dr->cons = 0; 1674 port->start_cons = true; /* need an initial trigger */ 1675 dr->pending = VNET_TX_RING_SIZE; 1676 dr->ncookies = ncookies; 1677 1678 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1679 struct vio_net_desc *d; 1680 1681 d = vio_dring_entry(dr, i); 1682 d->hdr.state = VIO_DESC_FREE; 1683 } 1684 return 0; 1685 1686 err_out: 1687 sunvnet_port_free_tx_bufs_common(port); 1688 1689 return err; 1690 } 1691 1692 #ifdef CONFIG_NET_POLL_CONTROLLER 1693 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp) 1694 { 1695 struct vnet_port *port; 1696 unsigned long flags; 1697 1698 spin_lock_irqsave(&vp->lock, flags); 1699 if (!list_empty(&vp->port_list)) { 1700 port = list_entry(vp->port_list.next, struct vnet_port, list); 1701 napi_schedule(&port->napi); 1702 } 1703 spin_unlock_irqrestore(&vp->lock, flags); 1704 } 1705 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common); 1706 #endif 1707 1708 void sunvnet_port_add_txq_common(struct vnet_port *port) 1709 { 1710 struct vnet *vp = port->vp; 1711 int n; 1712 1713 n = vp->nports++; 1714 n = n & (VNET_MAX_TXQS - 1); 1715 port->q_index = n; 1716 netif_tx_wake_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 1717 port->q_index)); 1718 } 1719 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common); 1720 1721 void sunvnet_port_rm_txq_common(struct vnet_port *port) 1722 { 1723 port->vp->nports--; 1724 netif_tx_stop_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), 1725 port->q_index)); 1726 } 1727 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common); 1728