1 /* sunvnet.c: Sun LDOM Virtual Network Driver. 2 * 3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/delay.h> 13 #include <linux/init.h> 14 #include <linux/netdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/etherdevice.h> 17 #include <linux/mutex.h> 18 #include <linux/if_vlan.h> 19 20 #if IS_ENABLED(CONFIG_IPV6) 21 #include <linux/icmpv6.h> 22 #endif 23 24 #include <net/icmp.h> 25 #include <net/route.h> 26 27 #include <asm/vio.h> 28 #include <asm/ldc.h> 29 30 #include "sunvnet.h" 31 32 #define DRV_MODULE_NAME "sunvnet" 33 #define DRV_MODULE_VERSION "1.0" 34 #define DRV_MODULE_RELDATE "June 25, 2007" 35 36 static char version[] = 37 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; 38 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); 39 MODULE_DESCRIPTION("Sun LDOM virtual network driver"); 40 MODULE_LICENSE("GPL"); 41 MODULE_VERSION(DRV_MODULE_VERSION); 42 43 /* Heuristic for the number of times to exponentially backoff and 44 * retry sending an LDC trigger when EAGAIN is encountered 45 */ 46 #define VNET_MAX_RETRIES 10 47 48 static int __vnet_tx_trigger(struct vnet_port *port, u32 start); 49 50 /* Ordered from largest major to lowest */ 51 static struct vio_version vnet_versions[] = { 52 { .major = 1, .minor = 6 }, 53 { .major = 1, .minor = 0 }, 54 }; 55 56 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) 57 { 58 return vio_dring_avail(dr, VNET_TX_RING_SIZE); 59 } 60 61 static int vnet_handle_unknown(struct vnet_port *port, void *arg) 62 { 63 struct vio_msg_tag *pkt = arg; 64 65 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n", 66 pkt->type, pkt->stype, pkt->stype_env, pkt->sid); 67 pr_err("Resetting connection\n"); 68 69 ldc_disconnect(port->vio.lp); 70 71 return -ECONNRESET; 72 } 73 74 static int vnet_send_attr(struct vio_driver_state *vio) 75 { 76 struct vnet_port *port = to_vnet_port(vio); 77 struct net_device *dev = port->vp->dev; 78 struct vio_net_attr_info pkt; 79 int framelen = ETH_FRAME_LEN; 80 int i; 81 82 memset(&pkt, 0, sizeof(pkt)); 83 pkt.tag.type = VIO_TYPE_CTRL; 84 pkt.tag.stype = VIO_SUBTYPE_INFO; 85 pkt.tag.stype_env = VIO_ATTR_INFO; 86 pkt.tag.sid = vio_send_sid(vio); 87 if (vio_version_before(vio, 1, 2)) 88 pkt.xfer_mode = VIO_DRING_MODE; 89 else 90 pkt.xfer_mode = VIO_NEW_DRING_MODE; 91 pkt.addr_type = VNET_ADDR_ETHERMAC; 92 pkt.ack_freq = 0; 93 for (i = 0; i < 6; i++) 94 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); 95 if (vio_version_after(vio, 1, 3)) { 96 if (port->rmtu) { 97 port->rmtu = min(VNET_MAXPACKET, port->rmtu); 98 pkt.mtu = port->rmtu; 99 } else { 100 port->rmtu = VNET_MAXPACKET; 101 pkt.mtu = port->rmtu; 102 } 103 if (vio_version_after_eq(vio, 1, 6)) 104 pkt.options = VIO_TX_DRING; 105 } else if (vio_version_before(vio, 1, 3)) { 106 pkt.mtu = framelen; 107 } else { /* v1.3 */ 108 pkt.mtu = framelen + VLAN_HLEN; 109 } 110 111 pkt.plnk_updt = PHYSLINK_UPDATE_NONE; 112 pkt.cflags = 0; 113 114 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 115 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 116 "cflags[0x%04x] lso_max[%u]\n", 117 pkt.xfer_mode, pkt.addr_type, 118 (unsigned long long)pkt.addr, 119 pkt.ack_freq, pkt.plnk_updt, pkt.options, 120 (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); 121 122 123 return vio_ldc_send(vio, &pkt, sizeof(pkt)); 124 } 125 126 static int handle_attr_info(struct vio_driver_state *vio, 127 struct vio_net_attr_info *pkt) 128 { 129 struct vnet_port *port = to_vnet_port(vio); 130 u64 localmtu; 131 u8 xfer_mode; 132 133 viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " 134 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " 135 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 136 pkt->xfer_mode, pkt->addr_type, 137 (unsigned long long)pkt->addr, 138 pkt->ack_freq, pkt->plnk_updt, pkt->options, 139 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 140 pkt->ipv4_lso_maxlen); 141 142 pkt->tag.sid = vio_send_sid(vio); 143 144 xfer_mode = pkt->xfer_mode; 145 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ 146 if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) 147 xfer_mode = VIO_NEW_DRING_MODE; 148 149 /* MTU negotiation: 150 * < v1.3 - ETH_FRAME_LEN exactly 151 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change 152 * pkt->mtu for ACK 153 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly 154 */ 155 if (vio_version_before(vio, 1, 3)) { 156 localmtu = ETH_FRAME_LEN; 157 } else if (vio_version_after(vio, 1, 3)) { 158 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; 159 localmtu = min(pkt->mtu, localmtu); 160 pkt->mtu = localmtu; 161 } else { /* v1.3 */ 162 localmtu = ETH_FRAME_LEN + VLAN_HLEN; 163 } 164 port->rmtu = localmtu; 165 166 /* for version >= 1.6, ACK packet mode we support */ 167 if (vio_version_after_eq(vio, 1, 6)) { 168 pkt->xfer_mode = VIO_NEW_DRING_MODE; 169 pkt->options = VIO_TX_DRING; 170 } 171 172 if (!(xfer_mode | VIO_NEW_DRING_MODE) || 173 pkt->addr_type != VNET_ADDR_ETHERMAC || 174 pkt->mtu != localmtu) { 175 viodbg(HS, "SEND NET ATTR NACK\n"); 176 177 pkt->tag.stype = VIO_SUBTYPE_NACK; 178 179 (void) vio_ldc_send(vio, pkt, sizeof(*pkt)); 180 181 return -ECONNRESET; 182 } else { 183 viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " 184 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " 185 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", 186 pkt->xfer_mode, pkt->addr_type, 187 (unsigned long long)pkt->addr, 188 pkt->ack_freq, pkt->plnk_updt, pkt->options, 189 (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, 190 pkt->ipv4_lso_maxlen); 191 192 pkt->tag.stype = VIO_SUBTYPE_ACK; 193 194 return vio_ldc_send(vio, pkt, sizeof(*pkt)); 195 } 196 197 } 198 199 static int handle_attr_ack(struct vio_driver_state *vio, 200 struct vio_net_attr_info *pkt) 201 { 202 viodbg(HS, "GOT NET ATTR ACK\n"); 203 204 return 0; 205 } 206 207 static int handle_attr_nack(struct vio_driver_state *vio, 208 struct vio_net_attr_info *pkt) 209 { 210 viodbg(HS, "GOT NET ATTR NACK\n"); 211 212 return -ECONNRESET; 213 } 214 215 static int vnet_handle_attr(struct vio_driver_state *vio, void *arg) 216 { 217 struct vio_net_attr_info *pkt = arg; 218 219 switch (pkt->tag.stype) { 220 case VIO_SUBTYPE_INFO: 221 return handle_attr_info(vio, pkt); 222 223 case VIO_SUBTYPE_ACK: 224 return handle_attr_ack(vio, pkt); 225 226 case VIO_SUBTYPE_NACK: 227 return handle_attr_nack(vio, pkt); 228 229 default: 230 return -ECONNRESET; 231 } 232 } 233 234 static void vnet_handshake_complete(struct vio_driver_state *vio) 235 { 236 struct vio_dring_state *dr; 237 238 dr = &vio->drings[VIO_DRIVER_RX_RING]; 239 dr->snd_nxt = dr->rcv_nxt = 1; 240 241 dr = &vio->drings[VIO_DRIVER_TX_RING]; 242 dr->snd_nxt = dr->rcv_nxt = 1; 243 } 244 245 /* The hypervisor interface that implements copying to/from imported 246 * memory from another domain requires that copies are done to 8-byte 247 * aligned buffers, and that the lengths of such copies are also 8-byte 248 * multiples. 249 * 250 * So we align skb->data to an 8-byte multiple and pad-out the data 251 * area so we can round the copy length up to the next multiple of 252 * 8 for the copy. 253 * 254 * The transmitter puts the actual start of the packet 6 bytes into 255 * the buffer it sends over, so that the IP headers after the ethernet 256 * header are aligned properly. These 6 bytes are not in the descriptor 257 * length, they are simply implied. This offset is represented using 258 * the VNET_PACKET_SKIP macro. 259 */ 260 static struct sk_buff *alloc_and_align_skb(struct net_device *dev, 261 unsigned int len) 262 { 263 struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8); 264 unsigned long addr, off; 265 266 if (unlikely(!skb)) 267 return NULL; 268 269 addr = (unsigned long) skb->data; 270 off = ((addr + 7UL) & ~7UL) - addr; 271 if (off) 272 skb_reserve(skb, off); 273 274 return skb; 275 } 276 277 static int vnet_rx_one(struct vnet_port *port, unsigned int len, 278 struct ldc_trans_cookie *cookies, int ncookies) 279 { 280 struct net_device *dev = port->vp->dev; 281 unsigned int copy_len; 282 struct sk_buff *skb; 283 int err; 284 285 err = -EMSGSIZE; 286 if (unlikely(len < ETH_ZLEN || len > port->rmtu)) { 287 dev->stats.rx_length_errors++; 288 goto out_dropped; 289 } 290 291 skb = alloc_and_align_skb(dev, len); 292 err = -ENOMEM; 293 if (unlikely(!skb)) { 294 dev->stats.rx_missed_errors++; 295 goto out_dropped; 296 } 297 298 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U; 299 skb_put(skb, copy_len); 300 err = ldc_copy(port->vio.lp, LDC_COPY_IN, 301 skb->data, copy_len, 0, 302 cookies, ncookies); 303 if (unlikely(err < 0)) { 304 dev->stats.rx_frame_errors++; 305 goto out_free_skb; 306 } 307 308 skb_pull(skb, VNET_PACKET_SKIP); 309 skb_trim(skb, len); 310 skb->protocol = eth_type_trans(skb, dev); 311 312 dev->stats.rx_packets++; 313 dev->stats.rx_bytes += len; 314 315 netif_rx(skb); 316 317 return 0; 318 319 out_free_skb: 320 kfree_skb(skb); 321 322 out_dropped: 323 dev->stats.rx_dropped++; 324 return err; 325 } 326 327 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, 328 u32 start, u32 end, u8 vio_dring_state) 329 { 330 struct vio_dring_data hdr = { 331 .tag = { 332 .type = VIO_TYPE_DATA, 333 .stype = VIO_SUBTYPE_ACK, 334 .stype_env = VIO_DRING_DATA, 335 .sid = vio_send_sid(&port->vio), 336 }, 337 .dring_ident = dr->ident, 338 .start_idx = start, 339 .end_idx = end, 340 .state = vio_dring_state, 341 }; 342 int err, delay; 343 int retries = 0; 344 345 hdr.seq = dr->snd_nxt; 346 delay = 1; 347 do { 348 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 349 if (err > 0) { 350 dr->snd_nxt++; 351 break; 352 } 353 udelay(delay); 354 if ((delay <<= 1) > 128) 355 delay = 128; 356 if (retries++ > VNET_MAX_RETRIES) { 357 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", 358 port->raddr[0], port->raddr[1], 359 port->raddr[2], port->raddr[3], 360 port->raddr[4], port->raddr[5]); 361 break; 362 } 363 } while (err == -EAGAIN); 364 365 if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) { 366 port->stop_rx_idx = end; 367 port->stop_rx = true; 368 } else { 369 port->stop_rx_idx = 0; 370 port->stop_rx = false; 371 } 372 373 return err; 374 } 375 376 static u32 next_idx(u32 idx, struct vio_dring_state *dr) 377 { 378 if (++idx == dr->num_entries) 379 idx = 0; 380 return idx; 381 } 382 383 static u32 prev_idx(u32 idx, struct vio_dring_state *dr) 384 { 385 if (idx == 0) 386 idx = dr->num_entries - 1; 387 else 388 idx--; 389 390 return idx; 391 } 392 393 static struct vio_net_desc *get_rx_desc(struct vnet_port *port, 394 struct vio_dring_state *dr, 395 u32 index) 396 { 397 struct vio_net_desc *desc = port->vio.desc_buf; 398 int err; 399 400 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size, 401 (index * dr->entry_size), 402 dr->cookies, dr->ncookies); 403 if (err < 0) 404 return ERR_PTR(err); 405 406 return desc; 407 } 408 409 static int put_rx_desc(struct vnet_port *port, 410 struct vio_dring_state *dr, 411 struct vio_net_desc *desc, 412 u32 index) 413 { 414 int err; 415 416 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size, 417 (index * dr->entry_size), 418 dr->cookies, dr->ncookies); 419 if (err < 0) 420 return err; 421 422 return 0; 423 } 424 425 static int vnet_walk_rx_one(struct vnet_port *port, 426 struct vio_dring_state *dr, 427 u32 index, int *needs_ack) 428 { 429 struct vio_net_desc *desc = get_rx_desc(port, dr, index); 430 struct vio_driver_state *vio = &port->vio; 431 int err; 432 433 if (IS_ERR(desc)) 434 return PTR_ERR(desc); 435 436 if (desc->hdr.state != VIO_DESC_READY) 437 return 1; 438 439 rmb(); 440 441 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n", 442 desc->hdr.state, desc->hdr.ack, 443 desc->size, desc->ncookies, 444 desc->cookies[0].cookie_addr, 445 desc->cookies[0].cookie_size); 446 447 err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies); 448 if (err == -ECONNRESET) 449 return err; 450 desc->hdr.state = VIO_DESC_DONE; 451 err = put_rx_desc(port, dr, desc, index); 452 if (err < 0) 453 return err; 454 *needs_ack = desc->hdr.ack; 455 return 0; 456 } 457 458 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, 459 u32 start, u32 end) 460 { 461 struct vio_driver_state *vio = &port->vio; 462 int ack_start = -1, ack_end = -1; 463 464 end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr); 465 466 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end); 467 468 while (start != end) { 469 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack); 470 if (err == -ECONNRESET) 471 return err; 472 if (err != 0) 473 break; 474 if (ack_start == -1) 475 ack_start = start; 476 ack_end = start; 477 start = next_idx(start, dr); 478 if (ack && start != end) { 479 err = vnet_send_ack(port, dr, ack_start, ack_end, 480 VIO_DRING_ACTIVE); 481 if (err == -ECONNRESET) 482 return err; 483 ack_start = -1; 484 } 485 } 486 if (unlikely(ack_start == -1)) 487 ack_start = ack_end = prev_idx(start, dr); 488 return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED); 489 } 490 491 static int vnet_rx(struct vnet_port *port, void *msgbuf) 492 { 493 struct vio_dring_data *pkt = msgbuf; 494 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING]; 495 struct vio_driver_state *vio = &port->vio; 496 497 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n", 498 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt); 499 500 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 501 return 0; 502 if (unlikely(pkt->seq != dr->rcv_nxt)) { 503 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n", 504 pkt->seq, dr->rcv_nxt); 505 return 0; 506 } 507 508 dr->rcv_nxt++; 509 510 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */ 511 512 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx); 513 } 514 515 static int idx_is_pending(struct vio_dring_state *dr, u32 end) 516 { 517 u32 idx = dr->cons; 518 int found = 0; 519 520 while (idx != dr->prod) { 521 if (idx == end) { 522 found = 1; 523 break; 524 } 525 idx = next_idx(idx, dr); 526 } 527 return found; 528 } 529 530 static int vnet_ack(struct vnet_port *port, void *msgbuf) 531 { 532 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 533 struct vio_dring_data *pkt = msgbuf; 534 struct net_device *dev; 535 struct vnet *vp; 536 u32 end; 537 struct vio_net_desc *desc; 538 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA)) 539 return 0; 540 541 end = pkt->end_idx; 542 if (unlikely(!idx_is_pending(dr, end))) 543 return 0; 544 545 /* sync for race conditions with vnet_start_xmit() and tell xmit it 546 * is time to send a trigger. 547 */ 548 dr->cons = next_idx(end, dr); 549 desc = vio_dring_entry(dr, dr->cons); 550 if (desc->hdr.state == VIO_DESC_READY && port->start_cons) { 551 /* vnet_start_xmit() just populated this dring but missed 552 * sending the "start" LDC message to the consumer. 553 * Send a "start" trigger on its behalf. 554 */ 555 if (__vnet_tx_trigger(port, dr->cons) > 0) 556 port->start_cons = false; 557 else 558 port->start_cons = true; 559 } else { 560 port->start_cons = true; 561 } 562 563 564 vp = port->vp; 565 dev = vp->dev; 566 if (unlikely(netif_queue_stopped(dev) && 567 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr))) 568 return 1; 569 570 return 0; 571 } 572 573 static int vnet_nack(struct vnet_port *port, void *msgbuf) 574 { 575 /* XXX just reset or similar XXX */ 576 return 0; 577 } 578 579 static int handle_mcast(struct vnet_port *port, void *msgbuf) 580 { 581 struct vio_net_mcast_info *pkt = msgbuf; 582 583 if (pkt->tag.stype != VIO_SUBTYPE_ACK) 584 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n", 585 port->vp->dev->name, 586 pkt->tag.type, 587 pkt->tag.stype, 588 pkt->tag.stype_env, 589 pkt->tag.sid); 590 591 return 0; 592 } 593 594 static void maybe_tx_wakeup(unsigned long param) 595 { 596 struct vnet *vp = (struct vnet *)param; 597 struct net_device *dev = vp->dev; 598 599 netif_tx_lock(dev); 600 if (likely(netif_queue_stopped(dev))) { 601 struct vnet_port *port; 602 int wake = 1; 603 604 list_for_each_entry(port, &vp->port_list, list) { 605 struct vio_dring_state *dr; 606 607 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 608 if (vnet_tx_dring_avail(dr) < 609 VNET_TX_WAKEUP_THRESH(dr)) { 610 wake = 0; 611 break; 612 } 613 } 614 if (wake) 615 netif_wake_queue(dev); 616 } 617 netif_tx_unlock(dev); 618 } 619 620 static void vnet_event(void *arg, int event) 621 { 622 struct vnet_port *port = arg; 623 struct vio_driver_state *vio = &port->vio; 624 unsigned long flags; 625 int tx_wakeup, err; 626 627 spin_lock_irqsave(&vio->lock, flags); 628 629 if (unlikely(event == LDC_EVENT_RESET || 630 event == LDC_EVENT_UP)) { 631 vio_link_state_change(vio, event); 632 spin_unlock_irqrestore(&vio->lock, flags); 633 634 if (event == LDC_EVENT_RESET) { 635 port->rmtu = 0; 636 vio_port_up(vio); 637 } 638 return; 639 } 640 641 if (unlikely(event != LDC_EVENT_DATA_READY)) { 642 pr_warn("Unexpected LDC event %d\n", event); 643 spin_unlock_irqrestore(&vio->lock, flags); 644 return; 645 } 646 647 tx_wakeup = err = 0; 648 while (1) { 649 union { 650 struct vio_msg_tag tag; 651 u64 raw[8]; 652 } msgbuf; 653 654 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); 655 if (unlikely(err < 0)) { 656 if (err == -ECONNRESET) 657 vio_conn_reset(vio); 658 break; 659 } 660 if (err == 0) 661 break; 662 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", 663 msgbuf.tag.type, 664 msgbuf.tag.stype, 665 msgbuf.tag.stype_env, 666 msgbuf.tag.sid); 667 err = vio_validate_sid(vio, &msgbuf.tag); 668 if (err < 0) 669 break; 670 671 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { 672 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { 673 err = vnet_rx(port, &msgbuf); 674 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) { 675 err = vnet_ack(port, &msgbuf); 676 if (err > 0) 677 tx_wakeup |= err; 678 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) { 679 err = vnet_nack(port, &msgbuf); 680 } 681 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) { 682 if (msgbuf.tag.stype_env == VNET_MCAST_INFO) 683 err = handle_mcast(port, &msgbuf); 684 else 685 err = vio_control_pkt_engine(vio, &msgbuf); 686 if (err) 687 break; 688 } else { 689 err = vnet_handle_unknown(port, &msgbuf); 690 } 691 if (err == -ECONNRESET) 692 break; 693 } 694 spin_unlock(&vio->lock); 695 /* Kick off a tasklet to wake the queue. We cannot call 696 * maybe_tx_wakeup directly here because we could deadlock on 697 * netif_tx_lock() with dev_watchdog() 698 */ 699 if (unlikely(tx_wakeup && err != -ECONNRESET)) 700 tasklet_schedule(&port->vp->vnet_tx_wakeup); 701 702 local_irq_restore(flags); 703 } 704 705 static int __vnet_tx_trigger(struct vnet_port *port, u32 start) 706 { 707 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 708 struct vio_dring_data hdr = { 709 .tag = { 710 .type = VIO_TYPE_DATA, 711 .stype = VIO_SUBTYPE_INFO, 712 .stype_env = VIO_DRING_DATA, 713 .sid = vio_send_sid(&port->vio), 714 }, 715 .dring_ident = dr->ident, 716 .start_idx = start, 717 .end_idx = (u32) -1, 718 }; 719 int err, delay; 720 int retries = 0; 721 722 if (port->stop_rx) { 723 err = vnet_send_ack(port, 724 &port->vio.drings[VIO_DRIVER_RX_RING], 725 port->stop_rx_idx, -1, 726 VIO_DRING_STOPPED); 727 if (err <= 0) 728 return err; 729 } 730 731 hdr.seq = dr->snd_nxt; 732 delay = 1; 733 do { 734 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr)); 735 if (err > 0) { 736 dr->snd_nxt++; 737 break; 738 } 739 udelay(delay); 740 if ((delay <<= 1) > 128) 741 delay = 128; 742 if (retries++ > VNET_MAX_RETRIES) 743 break; 744 } while (err == -EAGAIN); 745 746 return err; 747 } 748 749 static inline bool port_is_up(struct vnet_port *vnet) 750 { 751 struct vio_driver_state *vio = &vnet->vio; 752 753 return !!(vio->hs_state & VIO_HS_COMPLETE); 754 } 755 756 struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) 757 { 758 unsigned int hash = vnet_hashfn(skb->data); 759 struct hlist_head *hp = &vp->port_hash[hash]; 760 struct vnet_port *port; 761 762 hlist_for_each_entry(port, hp, hash) { 763 if (!port_is_up(port)) 764 continue; 765 if (ether_addr_equal(port->raddr, skb->data)) 766 return port; 767 } 768 list_for_each_entry(port, &vp->port_list, list) { 769 if (!port->switch_port) 770 continue; 771 if (!port_is_up(port)) 772 continue; 773 return port; 774 } 775 return NULL; 776 } 777 778 struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb) 779 { 780 struct vnet_port *ret; 781 unsigned long flags; 782 783 spin_lock_irqsave(&vp->lock, flags); 784 ret = __tx_port_find(vp, skb); 785 spin_unlock_irqrestore(&vp->lock, flags); 786 787 return ret; 788 } 789 790 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port, 791 unsigned *pending) 792 { 793 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 794 struct sk_buff *skb = NULL; 795 int i, txi; 796 797 *pending = 0; 798 799 txi = dr->prod-1; 800 if (txi < 0) 801 txi = VNET_TX_RING_SIZE-1; 802 803 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 804 struct vio_net_desc *d; 805 806 d = vio_dring_entry(dr, txi); 807 808 if (d->hdr.state == VIO_DESC_DONE) { 809 if (port->tx_bufs[txi].skb) { 810 BUG_ON(port->tx_bufs[txi].skb->next); 811 812 port->tx_bufs[txi].skb->next = skb; 813 skb = port->tx_bufs[txi].skb; 814 port->tx_bufs[txi].skb = NULL; 815 816 ldc_unmap(port->vio.lp, 817 port->tx_bufs[txi].cookies, 818 port->tx_bufs[txi].ncookies); 819 } 820 d->hdr.state = VIO_DESC_FREE; 821 } else if (d->hdr.state == VIO_DESC_READY) { 822 (*pending)++; 823 } else if (d->hdr.state == VIO_DESC_FREE) { 824 break; 825 } 826 --txi; 827 if (txi < 0) 828 txi = VNET_TX_RING_SIZE-1; 829 } 830 return skb; 831 } 832 833 static inline void vnet_free_skbs(struct sk_buff *skb) 834 { 835 struct sk_buff *next; 836 837 while (skb) { 838 next = skb->next; 839 skb->next = NULL; 840 dev_kfree_skb(skb); 841 skb = next; 842 } 843 } 844 845 static void vnet_clean_timer_expire(unsigned long port0) 846 { 847 struct vnet_port *port = (struct vnet_port *)port0; 848 struct sk_buff *freeskbs; 849 unsigned pending; 850 unsigned long flags; 851 852 spin_lock_irqsave(&port->vio.lock, flags); 853 freeskbs = vnet_clean_tx_ring(port, &pending); 854 spin_unlock_irqrestore(&port->vio.lock, flags); 855 856 vnet_free_skbs(freeskbs); 857 858 if (pending) 859 (void)mod_timer(&port->clean_timer, 860 jiffies + VNET_CLEAN_TIMEOUT); 861 else 862 del_timer(&port->clean_timer); 863 } 864 865 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, void **pstart, 866 int *plen) 867 { 868 struct sk_buff *nskb; 869 int len, pad; 870 871 len = skb->len; 872 pad = 0; 873 if (len < ETH_ZLEN) { 874 pad += ETH_ZLEN - skb->len; 875 len += pad; 876 } 877 len += VNET_PACKET_SKIP; 878 pad += 8 - (len & 7); 879 len += 8 - (len & 7); 880 881 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || 882 skb_tailroom(skb) < pad || 883 skb_headroom(skb) < VNET_PACKET_SKIP) { 884 nskb = alloc_and_align_skb(skb->dev, skb->len); 885 skb_reserve(nskb, VNET_PACKET_SKIP); 886 if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { 887 dev_kfree_skb(nskb); 888 dev_kfree_skb(skb); 889 return NULL; 890 } 891 (void)skb_put(nskb, skb->len); 892 dev_kfree_skb(skb); 893 skb = nskb; 894 } 895 896 *pstart = skb->data - VNET_PACKET_SKIP; 897 *plen = len; 898 return skb; 899 } 900 901 static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) 902 { 903 struct vnet *vp = netdev_priv(dev); 904 struct vnet_port *port = tx_port_find(vp, skb); 905 struct vio_dring_state *dr; 906 struct vio_net_desc *d; 907 unsigned long flags; 908 unsigned int len; 909 struct sk_buff *freeskbs = NULL; 910 int i, err, txi; 911 void *start = NULL; 912 int nlen = 0; 913 unsigned pending = 0; 914 915 if (unlikely(!port)) 916 goto out_dropped; 917 918 skb = vnet_skb_shape(skb, &start, &nlen); 919 920 if (unlikely(!skb)) 921 goto out_dropped; 922 923 if (skb->len > port->rmtu) { 924 unsigned long localmtu = port->rmtu - ETH_HLEN; 925 926 if (vio_version_after_eq(&port->vio, 1, 3)) 927 localmtu -= VLAN_HLEN; 928 929 if (skb->protocol == htons(ETH_P_IP)) { 930 struct flowi4 fl4; 931 struct rtable *rt = NULL; 932 933 memset(&fl4, 0, sizeof(fl4)); 934 fl4.flowi4_oif = dev->ifindex; 935 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 936 fl4.daddr = ip_hdr(skb)->daddr; 937 fl4.saddr = ip_hdr(skb)->saddr; 938 939 rt = ip_route_output_key(dev_net(dev), &fl4); 940 if (!IS_ERR(rt)) { 941 skb_dst_set(skb, &rt->dst); 942 icmp_send(skb, ICMP_DEST_UNREACH, 943 ICMP_FRAG_NEEDED, 944 htonl(localmtu)); 945 } 946 } 947 #if IS_ENABLED(CONFIG_IPV6) 948 else if (skb->protocol == htons(ETH_P_IPV6)) 949 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); 950 #endif 951 goto out_dropped; 952 } 953 954 spin_lock_irqsave(&port->vio.lock, flags); 955 956 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 957 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 958 if (!netif_queue_stopped(dev)) { 959 netif_stop_queue(dev); 960 961 /* This is a hard error, log it. */ 962 netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); 963 dev->stats.tx_errors++; 964 } 965 spin_unlock_irqrestore(&port->vio.lock, flags); 966 return NETDEV_TX_BUSY; 967 } 968 969 d = vio_dring_cur(dr); 970 971 txi = dr->prod; 972 973 freeskbs = vnet_clean_tx_ring(port, &pending); 974 975 BUG_ON(port->tx_bufs[txi].skb); 976 977 len = skb->len; 978 if (len < ETH_ZLEN) 979 len = ETH_ZLEN; 980 981 port->tx_bufs[txi].skb = skb; 982 skb = NULL; 983 984 err = ldc_map_single(port->vio.lp, start, nlen, 985 port->tx_bufs[txi].cookies, VNET_MAXCOOKIES, 986 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); 987 if (err < 0) { 988 netdev_info(dev, "tx buffer map error %d\n", err); 989 goto out_dropped_unlock; 990 } 991 port->tx_bufs[txi].ncookies = err; 992 993 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 994 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 995 * the protocol itself does not require it as long as the peer 996 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. 997 * 998 * An ACK for every packet in the ring is expensive as the 999 * sending of LDC messages is slow and affects performance. 1000 */ 1001 d->hdr.ack = VIO_ACK_DISABLE; 1002 d->size = len; 1003 d->ncookies = port->tx_bufs[txi].ncookies; 1004 for (i = 0; i < d->ncookies; i++) 1005 d->cookies[i] = port->tx_bufs[txi].cookies[i]; 1006 1007 /* This has to be a non-SMP write barrier because we are writing 1008 * to memory which is shared with the peer LDOM. 1009 */ 1010 wmb(); 1011 1012 d->hdr.state = VIO_DESC_READY; 1013 1014 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent 1015 * to notify the consumer that some descriptors are READY. 1016 * After that "start" trigger, no additional triggers are needed until 1017 * a DRING_STOPPED is received from the consumer. The dr->cons field 1018 * (set up by vnet_ack()) has the value of the next dring index 1019 * that has not yet been ack-ed. We send a "start" trigger here 1020 * if, and only if, start_cons is true (reset it afterward). Conversely, 1021 * vnet_ack() should check if the dring corresponding to cons 1022 * is marked READY, but start_cons was false. 1023 * If so, vnet_ack() should send out the missed "start" trigger. 1024 * 1025 * Note that the wmb() above makes sure the cookies et al. are 1026 * not globally visible before the VIO_DESC_READY, and that the 1027 * stores are ordered correctly by the compiler. The consumer will 1028 * not proceed until the VIO_DESC_READY is visible assuring that 1029 * the consumer does not observe anything related to descriptors 1030 * out of order. The HV trap from the LDC start trigger is the 1031 * producer to consumer announcement that work is available to the 1032 * consumer 1033 */ 1034 if (!port->start_cons) 1035 goto ldc_start_done; /* previous trigger suffices */ 1036 1037 err = __vnet_tx_trigger(port, dr->cons); 1038 if (unlikely(err < 0)) { 1039 netdev_info(dev, "TX trigger error %d\n", err); 1040 d->hdr.state = VIO_DESC_FREE; 1041 dev->stats.tx_carrier_errors++; 1042 goto out_dropped_unlock; 1043 } 1044 1045 ldc_start_done: 1046 port->start_cons = false; 1047 1048 dev->stats.tx_packets++; 1049 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; 1050 1051 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1052 if (unlikely(vnet_tx_dring_avail(dr) < 1)) { 1053 netif_stop_queue(dev); 1054 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) 1055 netif_wake_queue(dev); 1056 } 1057 1058 spin_unlock_irqrestore(&port->vio.lock, flags); 1059 1060 vnet_free_skbs(freeskbs); 1061 1062 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); 1063 1064 return NETDEV_TX_OK; 1065 1066 out_dropped_unlock: 1067 spin_unlock_irqrestore(&port->vio.lock, flags); 1068 1069 out_dropped: 1070 if (skb) 1071 dev_kfree_skb(skb); 1072 vnet_free_skbs(freeskbs); 1073 if (pending) 1074 (void)mod_timer(&port->clean_timer, 1075 jiffies + VNET_CLEAN_TIMEOUT); 1076 else if (port) 1077 del_timer(&port->clean_timer); 1078 dev->stats.tx_dropped++; 1079 return NETDEV_TX_OK; 1080 } 1081 1082 static void vnet_tx_timeout(struct net_device *dev) 1083 { 1084 /* XXX Implement me XXX */ 1085 } 1086 1087 static int vnet_open(struct net_device *dev) 1088 { 1089 netif_carrier_on(dev); 1090 netif_start_queue(dev); 1091 1092 return 0; 1093 } 1094 1095 static int vnet_close(struct net_device *dev) 1096 { 1097 netif_stop_queue(dev); 1098 netif_carrier_off(dev); 1099 1100 return 0; 1101 } 1102 1103 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr) 1104 { 1105 struct vnet_mcast_entry *m; 1106 1107 for (m = vp->mcast_list; m; m = m->next) { 1108 if (ether_addr_equal(m->addr, addr)) 1109 return m; 1110 } 1111 return NULL; 1112 } 1113 1114 static void __update_mc_list(struct vnet *vp, struct net_device *dev) 1115 { 1116 struct netdev_hw_addr *ha; 1117 1118 netdev_for_each_mc_addr(ha, dev) { 1119 struct vnet_mcast_entry *m; 1120 1121 m = __vnet_mc_find(vp, ha->addr); 1122 if (m) { 1123 m->hit = 1; 1124 continue; 1125 } 1126 1127 if (!m) { 1128 m = kzalloc(sizeof(*m), GFP_ATOMIC); 1129 if (!m) 1130 continue; 1131 memcpy(m->addr, ha->addr, ETH_ALEN); 1132 m->hit = 1; 1133 1134 m->next = vp->mcast_list; 1135 vp->mcast_list = m; 1136 } 1137 } 1138 } 1139 1140 static void __send_mc_list(struct vnet *vp, struct vnet_port *port) 1141 { 1142 struct vio_net_mcast_info info; 1143 struct vnet_mcast_entry *m, **pp; 1144 int n_addrs; 1145 1146 memset(&info, 0, sizeof(info)); 1147 1148 info.tag.type = VIO_TYPE_CTRL; 1149 info.tag.stype = VIO_SUBTYPE_INFO; 1150 info.tag.stype_env = VNET_MCAST_INFO; 1151 info.tag.sid = vio_send_sid(&port->vio); 1152 info.set = 1; 1153 1154 n_addrs = 0; 1155 for (m = vp->mcast_list; m; m = m->next) { 1156 if (m->sent) 1157 continue; 1158 m->sent = 1; 1159 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1160 m->addr, ETH_ALEN); 1161 if (++n_addrs == VNET_NUM_MCAST) { 1162 info.count = n_addrs; 1163 1164 (void) vio_ldc_send(&port->vio, &info, 1165 sizeof(info)); 1166 n_addrs = 0; 1167 } 1168 } 1169 if (n_addrs) { 1170 info.count = n_addrs; 1171 (void) vio_ldc_send(&port->vio, &info, sizeof(info)); 1172 } 1173 1174 info.set = 0; 1175 1176 n_addrs = 0; 1177 pp = &vp->mcast_list; 1178 while ((m = *pp) != NULL) { 1179 if (m->hit) { 1180 m->hit = 0; 1181 pp = &m->next; 1182 continue; 1183 } 1184 1185 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN], 1186 m->addr, ETH_ALEN); 1187 if (++n_addrs == VNET_NUM_MCAST) { 1188 info.count = n_addrs; 1189 (void) vio_ldc_send(&port->vio, &info, 1190 sizeof(info)); 1191 n_addrs = 0; 1192 } 1193 1194 *pp = m->next; 1195 kfree(m); 1196 } 1197 if (n_addrs) { 1198 info.count = n_addrs; 1199 (void) vio_ldc_send(&port->vio, &info, sizeof(info)); 1200 } 1201 } 1202 1203 static void vnet_set_rx_mode(struct net_device *dev) 1204 { 1205 struct vnet *vp = netdev_priv(dev); 1206 struct vnet_port *port; 1207 unsigned long flags; 1208 1209 spin_lock_irqsave(&vp->lock, flags); 1210 if (!list_empty(&vp->port_list)) { 1211 port = list_entry(vp->port_list.next, struct vnet_port, list); 1212 1213 if (port->switch_port) { 1214 __update_mc_list(vp, dev); 1215 __send_mc_list(vp, port); 1216 } 1217 } 1218 spin_unlock_irqrestore(&vp->lock, flags); 1219 } 1220 1221 static int vnet_change_mtu(struct net_device *dev, int new_mtu) 1222 { 1223 if (new_mtu < 68 || new_mtu > 65535) 1224 return -EINVAL; 1225 1226 dev->mtu = new_mtu; 1227 return 0; 1228 } 1229 1230 static int vnet_set_mac_addr(struct net_device *dev, void *p) 1231 { 1232 return -EINVAL; 1233 } 1234 1235 static void vnet_get_drvinfo(struct net_device *dev, 1236 struct ethtool_drvinfo *info) 1237 { 1238 strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); 1239 strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); 1240 } 1241 1242 static u32 vnet_get_msglevel(struct net_device *dev) 1243 { 1244 struct vnet *vp = netdev_priv(dev); 1245 return vp->msg_enable; 1246 } 1247 1248 static void vnet_set_msglevel(struct net_device *dev, u32 value) 1249 { 1250 struct vnet *vp = netdev_priv(dev); 1251 vp->msg_enable = value; 1252 } 1253 1254 static const struct ethtool_ops vnet_ethtool_ops = { 1255 .get_drvinfo = vnet_get_drvinfo, 1256 .get_msglevel = vnet_get_msglevel, 1257 .set_msglevel = vnet_set_msglevel, 1258 .get_link = ethtool_op_get_link, 1259 }; 1260 1261 static void vnet_port_free_tx_bufs(struct vnet_port *port) 1262 { 1263 struct vio_dring_state *dr; 1264 int i; 1265 1266 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1267 if (dr->base) { 1268 ldc_free_exp_dring(port->vio.lp, dr->base, 1269 (dr->entry_size * dr->num_entries), 1270 dr->cookies, dr->ncookies); 1271 dr->base = NULL; 1272 dr->entry_size = 0; 1273 dr->num_entries = 0; 1274 dr->pending = 0; 1275 dr->ncookies = 0; 1276 } 1277 1278 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1279 struct vio_net_desc *d; 1280 void *skb = port->tx_bufs[i].skb; 1281 1282 if (!skb) 1283 continue; 1284 1285 d = vio_dring_entry(dr, i); 1286 if (d->hdr.state == VIO_DESC_READY) 1287 pr_warn("active transmit buffers freed\n"); 1288 1289 ldc_unmap(port->vio.lp, 1290 port->tx_bufs[i].cookies, 1291 port->tx_bufs[i].ncookies); 1292 dev_kfree_skb(skb); 1293 port->tx_bufs[i].skb = NULL; 1294 d->hdr.state = VIO_DESC_FREE; 1295 } 1296 } 1297 1298 static int vnet_port_alloc_tx_bufs(struct vnet_port *port) 1299 { 1300 struct vio_dring_state *dr; 1301 unsigned long len; 1302 int i, err, ncookies; 1303 void *dring; 1304 1305 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1306 1307 len = (VNET_TX_RING_SIZE * 1308 (sizeof(struct vio_net_desc) + 1309 (sizeof(struct ldc_trans_cookie) * 2))); 1310 1311 ncookies = VIO_MAX_RING_COOKIES; 1312 dring = ldc_alloc_exp_dring(port->vio.lp, len, 1313 dr->cookies, &ncookies, 1314 (LDC_MAP_SHADOW | 1315 LDC_MAP_DIRECT | 1316 LDC_MAP_RW)); 1317 if (IS_ERR(dring)) { 1318 err = PTR_ERR(dring); 1319 goto err_out; 1320 } 1321 1322 dr->base = dring; 1323 dr->entry_size = (sizeof(struct vio_net_desc) + 1324 (sizeof(struct ldc_trans_cookie) * 2)); 1325 dr->num_entries = VNET_TX_RING_SIZE; 1326 dr->prod = dr->cons = 0; 1327 port->start_cons = true; /* need an initial trigger */ 1328 dr->pending = VNET_TX_RING_SIZE; 1329 dr->ncookies = ncookies; 1330 1331 for (i = 0; i < VNET_TX_RING_SIZE; ++i) { 1332 struct vio_net_desc *d; 1333 1334 d = vio_dring_entry(dr, i); 1335 d->hdr.state = VIO_DESC_FREE; 1336 } 1337 return 0; 1338 1339 err_out: 1340 vnet_port_free_tx_bufs(port); 1341 1342 return err; 1343 } 1344 1345 static LIST_HEAD(vnet_list); 1346 static DEFINE_MUTEX(vnet_list_mutex); 1347 1348 static const struct net_device_ops vnet_ops = { 1349 .ndo_open = vnet_open, 1350 .ndo_stop = vnet_close, 1351 .ndo_set_rx_mode = vnet_set_rx_mode, 1352 .ndo_set_mac_address = vnet_set_mac_addr, 1353 .ndo_validate_addr = eth_validate_addr, 1354 .ndo_tx_timeout = vnet_tx_timeout, 1355 .ndo_change_mtu = vnet_change_mtu, 1356 .ndo_start_xmit = vnet_start_xmit, 1357 }; 1358 1359 static struct vnet *vnet_new(const u64 *local_mac) 1360 { 1361 struct net_device *dev; 1362 struct vnet *vp; 1363 int err, i; 1364 1365 dev = alloc_etherdev(sizeof(*vp)); 1366 if (!dev) 1367 return ERR_PTR(-ENOMEM); 1368 dev->needed_headroom = VNET_PACKET_SKIP + 8; 1369 dev->needed_tailroom = 8; 1370 1371 for (i = 0; i < ETH_ALEN; i++) 1372 dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff; 1373 1374 vp = netdev_priv(dev); 1375 1376 spin_lock_init(&vp->lock); 1377 tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp); 1378 vp->dev = dev; 1379 1380 INIT_LIST_HEAD(&vp->port_list); 1381 for (i = 0; i < VNET_PORT_HASH_SIZE; i++) 1382 INIT_HLIST_HEAD(&vp->port_hash[i]); 1383 INIT_LIST_HEAD(&vp->list); 1384 vp->local_mac = *local_mac; 1385 1386 dev->netdev_ops = &vnet_ops; 1387 dev->ethtool_ops = &vnet_ethtool_ops; 1388 dev->watchdog_timeo = VNET_TX_TIMEOUT; 1389 1390 err = register_netdev(dev); 1391 if (err) { 1392 pr_err("Cannot register net device, aborting\n"); 1393 goto err_out_free_dev; 1394 } 1395 1396 netdev_info(dev, "Sun LDOM vnet %pM\n", dev->dev_addr); 1397 1398 list_add(&vp->list, &vnet_list); 1399 1400 return vp; 1401 1402 err_out_free_dev: 1403 free_netdev(dev); 1404 1405 return ERR_PTR(err); 1406 } 1407 1408 static struct vnet *vnet_find_or_create(const u64 *local_mac) 1409 { 1410 struct vnet *iter, *vp; 1411 1412 mutex_lock(&vnet_list_mutex); 1413 vp = NULL; 1414 list_for_each_entry(iter, &vnet_list, list) { 1415 if (iter->local_mac == *local_mac) { 1416 vp = iter; 1417 break; 1418 } 1419 } 1420 if (!vp) 1421 vp = vnet_new(local_mac); 1422 mutex_unlock(&vnet_list_mutex); 1423 1424 return vp; 1425 } 1426 1427 static void vnet_cleanup(void) 1428 { 1429 struct vnet *vp; 1430 struct net_device *dev; 1431 1432 mutex_lock(&vnet_list_mutex); 1433 while (!list_empty(&vnet_list)) { 1434 vp = list_first_entry(&vnet_list, struct vnet, list); 1435 list_del(&vp->list); 1436 dev = vp->dev; 1437 tasklet_kill(&vp->vnet_tx_wakeup); 1438 /* vio_unregister_driver() should have cleaned up port_list */ 1439 BUG_ON(!list_empty(&vp->port_list)); 1440 unregister_netdev(dev); 1441 free_netdev(dev); 1442 } 1443 mutex_unlock(&vnet_list_mutex); 1444 } 1445 1446 static const char *local_mac_prop = "local-mac-address"; 1447 1448 static struct vnet *vnet_find_parent(struct mdesc_handle *hp, 1449 u64 port_node) 1450 { 1451 const u64 *local_mac = NULL; 1452 u64 a; 1453 1454 mdesc_for_each_arc(a, hp, port_node, MDESC_ARC_TYPE_BACK) { 1455 u64 target = mdesc_arc_target(hp, a); 1456 const char *name; 1457 1458 name = mdesc_get_property(hp, target, "name", NULL); 1459 if (!name || strcmp(name, "network")) 1460 continue; 1461 1462 local_mac = mdesc_get_property(hp, target, 1463 local_mac_prop, NULL); 1464 if (local_mac) 1465 break; 1466 } 1467 if (!local_mac) 1468 return ERR_PTR(-ENODEV); 1469 1470 return vnet_find_or_create(local_mac); 1471 } 1472 1473 static struct ldc_channel_config vnet_ldc_cfg = { 1474 .event = vnet_event, 1475 .mtu = 64, 1476 .mode = LDC_MODE_UNRELIABLE, 1477 }; 1478 1479 static struct vio_driver_ops vnet_vio_ops = { 1480 .send_attr = vnet_send_attr, 1481 .handle_attr = vnet_handle_attr, 1482 .handshake_complete = vnet_handshake_complete, 1483 }; 1484 1485 static void print_version(void) 1486 { 1487 printk_once(KERN_INFO "%s", version); 1488 } 1489 1490 const char *remote_macaddr_prop = "remote-mac-address"; 1491 1492 static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) 1493 { 1494 struct mdesc_handle *hp; 1495 struct vnet_port *port; 1496 unsigned long flags; 1497 struct vnet *vp; 1498 const u64 *rmac; 1499 int len, i, err, switch_port; 1500 1501 print_version(); 1502 1503 hp = mdesc_grab(); 1504 1505 vp = vnet_find_parent(hp, vdev->mp); 1506 if (IS_ERR(vp)) { 1507 pr_err("Cannot find port parent vnet\n"); 1508 err = PTR_ERR(vp); 1509 goto err_out_put_mdesc; 1510 } 1511 1512 rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len); 1513 err = -ENODEV; 1514 if (!rmac) { 1515 pr_err("Port lacks %s property\n", remote_macaddr_prop); 1516 goto err_out_put_mdesc; 1517 } 1518 1519 port = kzalloc(sizeof(*port), GFP_KERNEL); 1520 err = -ENOMEM; 1521 if (!port) 1522 goto err_out_put_mdesc; 1523 1524 for (i = 0; i < ETH_ALEN; i++) 1525 port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff; 1526 1527 port->vp = vp; 1528 1529 err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK, 1530 vnet_versions, ARRAY_SIZE(vnet_versions), 1531 &vnet_vio_ops, vp->dev->name); 1532 if (err) 1533 goto err_out_free_port; 1534 1535 err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port); 1536 if (err) 1537 goto err_out_free_port; 1538 1539 err = vnet_port_alloc_tx_bufs(port); 1540 if (err) 1541 goto err_out_free_ldc; 1542 1543 INIT_HLIST_NODE(&port->hash); 1544 INIT_LIST_HEAD(&port->list); 1545 1546 switch_port = 0; 1547 if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL) 1548 switch_port = 1; 1549 port->switch_port = switch_port; 1550 1551 spin_lock_irqsave(&vp->lock, flags); 1552 if (switch_port) 1553 list_add(&port->list, &vp->port_list); 1554 else 1555 list_add_tail(&port->list, &vp->port_list); 1556 hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]); 1557 spin_unlock_irqrestore(&vp->lock, flags); 1558 1559 dev_set_drvdata(&vdev->dev, port); 1560 1561 pr_info("%s: PORT ( remote-mac %pM%s )\n", 1562 vp->dev->name, port->raddr, switch_port ? " switch-port" : ""); 1563 1564 setup_timer(&port->clean_timer, vnet_clean_timer_expire, 1565 (unsigned long)port); 1566 1567 vio_port_up(&port->vio); 1568 1569 mdesc_release(hp); 1570 1571 return 0; 1572 1573 err_out_free_ldc: 1574 vio_ldc_free(&port->vio); 1575 1576 err_out_free_port: 1577 kfree(port); 1578 1579 err_out_put_mdesc: 1580 mdesc_release(hp); 1581 return err; 1582 } 1583 1584 static int vnet_port_remove(struct vio_dev *vdev) 1585 { 1586 struct vnet_port *port = dev_get_drvdata(&vdev->dev); 1587 1588 if (port) { 1589 struct vnet *vp = port->vp; 1590 unsigned long flags; 1591 1592 del_timer_sync(&port->vio.timer); 1593 del_timer_sync(&port->clean_timer); 1594 1595 spin_lock_irqsave(&vp->lock, flags); 1596 list_del(&port->list); 1597 hlist_del(&port->hash); 1598 spin_unlock_irqrestore(&vp->lock, flags); 1599 1600 vnet_port_free_tx_bufs(port); 1601 vio_ldc_free(&port->vio); 1602 1603 dev_set_drvdata(&vdev->dev, NULL); 1604 1605 kfree(port); 1606 1607 } 1608 return 0; 1609 } 1610 1611 static const struct vio_device_id vnet_port_match[] = { 1612 { 1613 .type = "vnet-port", 1614 }, 1615 {}, 1616 }; 1617 MODULE_DEVICE_TABLE(vio, vnet_port_match); 1618 1619 static struct vio_driver vnet_port_driver = { 1620 .id_table = vnet_port_match, 1621 .probe = vnet_port_probe, 1622 .remove = vnet_port_remove, 1623 .name = "vnet_port", 1624 }; 1625 1626 static int __init vnet_init(void) 1627 { 1628 return vio_register_driver(&vnet_port_driver); 1629 } 1630 1631 static void __exit vnet_exit(void) 1632 { 1633 vio_unregister_driver(&vnet_port_driver); 1634 vnet_cleanup(); 1635 } 1636 1637 module_init(vnet_init); 1638 module_exit(vnet_exit); 1639