1 /************************************************************************* 2 * myri10ge.c: Myricom Myri-10G Ethernet driver. 3 * 4 * Copyright (C) 2005 - 2011 Myricom, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of Myricom, Inc. nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 * 32 * If the eeprom on your board is not recent enough, you will need to get a 33 * newer firmware image at: 34 * http://www.myri.com/scs/download-Myri10GE.html 35 * 36 * Contact Information: 37 * <help@myri.com> 38 * Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006 39 *************************************************************************/ 40 41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 42 43 #include <linux/tcp.h> 44 #include <linux/netdevice.h> 45 #include <linux/skbuff.h> 46 #include <linux/string.h> 47 #include <linux/module.h> 48 #include <linux/pci.h> 49 #include <linux/dma-mapping.h> 50 #include <linux/etherdevice.h> 51 #include <linux/if_ether.h> 52 #include <linux/if_vlan.h> 53 #include <linux/dca.h> 54 #include <linux/ip.h> 55 #include <linux/inet.h> 56 #include <linux/in.h> 57 #include <linux/ethtool.h> 58 #include <linux/firmware.h> 59 #include <linux/delay.h> 60 #include <linux/timer.h> 61 #include <linux/vmalloc.h> 62 #include <linux/crc32.h> 63 #include <linux/moduleparam.h> 64 #include <linux/io.h> 65 #include <linux/log2.h> 66 #include <linux/slab.h> 67 #include <linux/prefetch.h> 68 #include <net/checksum.h> 69 #include <net/ip.h> 70 #include <net/tcp.h> 71 #include <asm/byteorder.h> 72 #include <asm/processor.h> 73 #include <net/busy_poll.h> 74 75 #include "myri10ge_mcp.h" 76 #include "myri10ge_mcp_gen_header.h" 77 78 #define MYRI10GE_VERSION_STR "1.5.3-1.534" 79 80 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); 81 MODULE_AUTHOR("Maintainer: help@myri.com"); 82 MODULE_VERSION(MYRI10GE_VERSION_STR); 83 MODULE_LICENSE("Dual BSD/GPL"); 84 85 #define MYRI10GE_MAX_ETHER_MTU 9014 86 87 #define MYRI10GE_ETH_STOPPED 0 88 #define MYRI10GE_ETH_STOPPING 1 89 #define MYRI10GE_ETH_STARTING 2 90 #define MYRI10GE_ETH_RUNNING 3 91 #define MYRI10GE_ETH_OPEN_FAILED 4 92 93 #define MYRI10GE_EEPROM_STRINGS_SIZE 256 94 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2) 95 96 #define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff) 97 #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff 98 99 #define MYRI10GE_ALLOC_ORDER 0 100 #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE) 101 #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1) 102 103 #define MYRI10GE_MAX_SLICES 32 104 105 struct myri10ge_rx_buffer_state { 106 struct page *page; 107 int page_offset; 108 DEFINE_DMA_UNMAP_ADDR(bus); 109 DEFINE_DMA_UNMAP_LEN(len); 110 }; 111 112 struct myri10ge_tx_buffer_state { 113 struct sk_buff *skb; 114 int last; 115 DEFINE_DMA_UNMAP_ADDR(bus); 116 DEFINE_DMA_UNMAP_LEN(len); 117 }; 118 119 struct myri10ge_cmd { 120 u32 data0; 121 u32 data1; 122 u32 data2; 123 }; 124 125 struct myri10ge_rx_buf { 126 struct mcp_kreq_ether_recv __iomem *lanai; /* lanai ptr for recv ring */ 127 struct mcp_kreq_ether_recv *shadow; /* host shadow of recv ring */ 128 struct myri10ge_rx_buffer_state *info; 129 struct page *page; 130 dma_addr_t bus; 131 int page_offset; 132 int cnt; 133 int fill_cnt; 134 int alloc_fail; 135 int mask; /* number of rx slots -1 */ 136 int watchdog_needed; 137 }; 138 139 struct myri10ge_tx_buf { 140 struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */ 141 __be32 __iomem *send_go; /* "go" doorbell ptr */ 142 __be32 __iomem *send_stop; /* "stop" doorbell ptr */ 143 struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */ 144 char *req_bytes; 145 struct myri10ge_tx_buffer_state *info; 146 int mask; /* number of transmit slots -1 */ 147 int req ____cacheline_aligned; /* transmit slots submitted */ 148 int pkt_start; /* packets started */ 149 int stop_queue; 150 int linearized; 151 int done ____cacheline_aligned; /* transmit slots completed */ 152 int pkt_done; /* packets completed */ 153 int wake_queue; 154 int queue_active; 155 }; 156 157 struct myri10ge_rx_done { 158 struct mcp_slot *entry; 159 dma_addr_t bus; 160 int cnt; 161 int idx; 162 }; 163 164 struct myri10ge_slice_netstats { 165 unsigned long rx_packets; 166 unsigned long tx_packets; 167 unsigned long rx_bytes; 168 unsigned long tx_bytes; 169 unsigned long rx_dropped; 170 unsigned long tx_dropped; 171 }; 172 173 struct myri10ge_slice_state { 174 struct myri10ge_tx_buf tx; /* transmit ring */ 175 struct myri10ge_rx_buf rx_small; 176 struct myri10ge_rx_buf rx_big; 177 struct myri10ge_rx_done rx_done; 178 struct net_device *dev; 179 struct napi_struct napi; 180 struct myri10ge_priv *mgp; 181 struct myri10ge_slice_netstats stats; 182 __be32 __iomem *irq_claim; 183 struct mcp_irq_data *fw_stats; 184 dma_addr_t fw_stats_bus; 185 int watchdog_tx_done; 186 int watchdog_tx_req; 187 int watchdog_rx_done; 188 int stuck; 189 #ifdef CONFIG_MYRI10GE_DCA 190 int cached_dca_tag; 191 int cpu; 192 __be32 __iomem *dca_tag; 193 #endif 194 char irq_desc[32]; 195 }; 196 197 struct myri10ge_priv { 198 struct myri10ge_slice_state *ss; 199 int tx_boundary; /* boundary transmits cannot cross */ 200 int num_slices; 201 int running; /* running? */ 202 int small_bytes; 203 int big_bytes; 204 int max_intr_slots; 205 struct net_device *dev; 206 u8 __iomem *sram; 207 int sram_size; 208 unsigned long board_span; 209 unsigned long iomem_base; 210 __be32 __iomem *irq_deassert; 211 char *mac_addr_string; 212 struct mcp_cmd_response *cmd; 213 dma_addr_t cmd_bus; 214 struct pci_dev *pdev; 215 int msi_enabled; 216 int msix_enabled; 217 struct msix_entry *msix_vectors; 218 #ifdef CONFIG_MYRI10GE_DCA 219 int dca_enabled; 220 int relaxed_order; 221 #endif 222 u32 link_state; 223 unsigned int rdma_tags_available; 224 int intr_coal_delay; 225 __be32 __iomem *intr_coal_delay_ptr; 226 int wc_cookie; 227 int down_cnt; 228 wait_queue_head_t down_wq; 229 struct work_struct watchdog_work; 230 struct timer_list watchdog_timer; 231 int watchdog_resets; 232 int watchdog_pause; 233 int pause; 234 bool fw_name_allocated; 235 char *fw_name; 236 char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE]; 237 char *product_code_string; 238 char fw_version[128]; 239 int fw_ver_major; 240 int fw_ver_minor; 241 int fw_ver_tiny; 242 int adopted_rx_filter_bug; 243 u8 mac_addr[ETH_ALEN]; /* eeprom mac address */ 244 unsigned long serial_number; 245 int vendor_specific_offset; 246 int fw_multicast_support; 247 u32 features; 248 u32 max_tso6; 249 u32 read_dma; 250 u32 write_dma; 251 u32 read_write_dma; 252 u32 link_changes; 253 u32 msg_enable; 254 unsigned int board_number; 255 int rebooted; 256 }; 257 258 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat"; 259 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat"; 260 static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat"; 261 static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat"; 262 MODULE_FIRMWARE("myri10ge_ethp_z8e.dat"); 263 MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); 264 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); 265 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); 266 267 /* Careful: must be accessed under kernel_param_lock() */ 268 static char *myri10ge_fw_name = NULL; 269 module_param(myri10ge_fw_name, charp, 0644); 270 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); 271 272 #define MYRI10GE_MAX_BOARDS 8 273 static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] = 274 {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL }; 275 module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL, 276 0444); 277 MODULE_PARM_DESC(myri10ge_fw_names, "Firmware image names per board"); 278 279 static int myri10ge_ecrc_enable = 1; 280 module_param(myri10ge_ecrc_enable, int, 0444); 281 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E"); 282 283 static int myri10ge_small_bytes = -1; /* -1 == auto */ 284 module_param(myri10ge_small_bytes, int, 0644); 285 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets"); 286 287 static int myri10ge_msi = 1; /* enable msi by default */ 288 module_param(myri10ge_msi, int, 0644); 289 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts"); 290 291 static int myri10ge_intr_coal_delay = 75; 292 module_param(myri10ge_intr_coal_delay, int, 0444); 293 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay"); 294 295 static int myri10ge_flow_control = 1; 296 module_param(myri10ge_flow_control, int, 0444); 297 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter"); 298 299 static int myri10ge_deassert_wait = 1; 300 module_param(myri10ge_deassert_wait, int, 0644); 301 MODULE_PARM_DESC(myri10ge_deassert_wait, 302 "Wait when deasserting legacy interrupts"); 303 304 static int myri10ge_force_firmware = 0; 305 module_param(myri10ge_force_firmware, int, 0444); 306 MODULE_PARM_DESC(myri10ge_force_firmware, 307 "Force firmware to assume aligned completions"); 308 309 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 310 module_param(myri10ge_initial_mtu, int, 0444); 311 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU"); 312 313 static int myri10ge_napi_weight = 64; 314 module_param(myri10ge_napi_weight, int, 0444); 315 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight"); 316 317 static int myri10ge_watchdog_timeout = 1; 318 module_param(myri10ge_watchdog_timeout, int, 0444); 319 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout"); 320 321 static int myri10ge_max_irq_loops = 1048576; 322 module_param(myri10ge_max_irq_loops, int, 0444); 323 MODULE_PARM_DESC(myri10ge_max_irq_loops, 324 "Set stuck legacy IRQ detection threshold"); 325 326 #define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK 327 328 static int myri10ge_debug = -1; /* defaults above */ 329 module_param(myri10ge_debug, int, 0); 330 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)"); 331 332 static int myri10ge_fill_thresh = 256; 333 module_param(myri10ge_fill_thresh, int, 0644); 334 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed"); 335 336 static int myri10ge_reset_recover = 1; 337 338 static int myri10ge_max_slices = 1; 339 module_param(myri10ge_max_slices, int, 0444); 340 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues"); 341 342 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 343 module_param(myri10ge_rss_hash, int, 0444); 344 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do"); 345 346 static int myri10ge_dca = 1; 347 module_param(myri10ge_dca, int, 0444); 348 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible"); 349 350 #define MYRI10GE_FW_OFFSET 1024*1024 351 #define MYRI10GE_HIGHPART_TO_U32(X) \ 352 (sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0) 353 #define MYRI10GE_LOWPART_TO_U32(X) ((u32)(X)) 354 355 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) 356 357 static void myri10ge_set_multicast_list(struct net_device *dev); 358 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 359 struct net_device *dev); 360 361 static inline void put_be32(__be32 val, __be32 __iomem * p) 362 { 363 __raw_writel((__force __u32) val, (__force void __iomem *)p); 364 } 365 366 static void myri10ge_get_stats(struct net_device *dev, 367 struct rtnl_link_stats64 *stats); 368 369 static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) 370 { 371 if (mgp->fw_name_allocated) 372 kfree(mgp->fw_name); 373 mgp->fw_name = name; 374 mgp->fw_name_allocated = allocated; 375 } 376 377 static int 378 myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, 379 struct myri10ge_cmd *data, int atomic) 380 { 381 struct mcp_cmd *buf; 382 char buf_bytes[sizeof(*buf) + 8]; 383 struct mcp_cmd_response *response = mgp->cmd; 384 char __iomem *cmd_addr = mgp->sram + MXGEFW_ETH_CMD; 385 u32 dma_low, dma_high, result, value; 386 int sleep_total = 0; 387 388 /* ensure buf is aligned to 8 bytes */ 389 buf = (struct mcp_cmd *)ALIGN((unsigned long)buf_bytes, 8); 390 391 buf->data0 = htonl(data->data0); 392 buf->data1 = htonl(data->data1); 393 buf->data2 = htonl(data->data2); 394 buf->cmd = htonl(cmd); 395 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 396 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 397 398 buf->response_addr.low = htonl(dma_low); 399 buf->response_addr.high = htonl(dma_high); 400 response->result = htonl(MYRI10GE_NO_RESPONSE_RESULT); 401 mb(); 402 myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf)); 403 404 /* wait up to 15ms. Longest command is the DMA benchmark, 405 * which is capped at 5ms, but runs from a timeout handler 406 * that runs every 7.8ms. So a 15ms timeout leaves us with 407 * a 2.2ms margin 408 */ 409 if (atomic) { 410 /* if atomic is set, do not sleep, 411 * and try to get the completion quickly 412 * (1ms will be enough for those commands) */ 413 for (sleep_total = 0; 414 sleep_total < 1000 && 415 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 416 sleep_total += 10) { 417 udelay(10); 418 mb(); 419 } 420 } else { 421 /* use msleep for most command */ 422 for (sleep_total = 0; 423 sleep_total < 15 && 424 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 425 sleep_total++) 426 msleep(1); 427 } 428 429 result = ntohl(response->result); 430 value = ntohl(response->data); 431 if (result != MYRI10GE_NO_RESPONSE_RESULT) { 432 if (result == 0) { 433 data->data0 = value; 434 return 0; 435 } else if (result == MXGEFW_CMD_UNKNOWN) { 436 return -ENOSYS; 437 } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) { 438 return -E2BIG; 439 } else if (result == MXGEFW_CMD_ERROR_RANGE && 440 cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES && 441 (data-> 442 data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) != 443 0) { 444 return -ERANGE; 445 } else { 446 dev_err(&mgp->pdev->dev, 447 "command %d failed, result = %d\n", 448 cmd, result); 449 return -ENXIO; 450 } 451 } 452 453 dev_err(&mgp->pdev->dev, "command %d timed out, result = %d\n", 454 cmd, result); 455 return -EAGAIN; 456 } 457 458 /* 459 * The eeprom strings on the lanaiX have the format 460 * SN=x\0 461 * MAC=x:x:x:x:x:x\0 462 * PT:ddd mmm xx xx:xx:xx xx\0 463 * PV:ddd mmm xx xx:xx:xx xx\0 464 */ 465 static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 466 { 467 char *ptr, *limit; 468 int i; 469 470 ptr = mgp->eeprom_strings; 471 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 472 473 while (*ptr != '\0' && ptr < limit) { 474 if (memcmp(ptr, "MAC=", 4) == 0) { 475 ptr += 4; 476 mgp->mac_addr_string = ptr; 477 for (i = 0; i < 6; i++) { 478 if ((ptr + 2) > limit) 479 goto abort; 480 mgp->mac_addr[i] = 481 simple_strtoul(ptr, &ptr, 16); 482 ptr += 1; 483 } 484 } 485 if (memcmp(ptr, "PC=", 3) == 0) { 486 ptr += 3; 487 mgp->product_code_string = ptr; 488 } 489 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 490 ptr += 3; 491 mgp->serial_number = simple_strtoul(ptr, &ptr, 10); 492 } 493 while (ptr < limit && *ptr++) ; 494 } 495 496 return 0; 497 498 abort: 499 dev_err(&mgp->pdev->dev, "failed to parse eeprom_strings\n"); 500 return -ENXIO; 501 } 502 503 /* 504 * Enable or disable periodic RDMAs from the host to make certain 505 * chipsets resend dropped PCIe messages 506 */ 507 508 static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 509 { 510 char __iomem *submit; 511 __be32 buf[16] __attribute__ ((__aligned__(8))); 512 u32 dma_low, dma_high; 513 int i; 514 515 /* clear confirmation addr */ 516 mgp->cmd->data = 0; 517 mb(); 518 519 /* send a rdma command to the PCIe engine, and wait for the 520 * response in the confirmation address. The firmware should 521 * write a -1 there to indicate it is alive and well 522 */ 523 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 524 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 525 526 buf[0] = htonl(dma_high); /* confirm addr MSW */ 527 buf[1] = htonl(dma_low); /* confirm addr LSW */ 528 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 529 buf[3] = htonl(dma_high); /* dummy addr MSW */ 530 buf[4] = htonl(dma_low); /* dummy addr LSW */ 531 buf[5] = htonl(enable); /* enable? */ 532 533 submit = mgp->sram + MXGEFW_BOOT_DUMMY_RDMA; 534 535 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 536 for (i = 0; mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20; i++) 537 msleep(1); 538 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) 539 dev_err(&mgp->pdev->dev, "dummy rdma %s failed\n", 540 (enable ? "enable" : "disable")); 541 } 542 543 static int 544 myri10ge_validate_firmware(struct myri10ge_priv *mgp, 545 struct mcp_gen_header *hdr) 546 { 547 struct device *dev = &mgp->pdev->dev; 548 549 /* check firmware type */ 550 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 551 dev_err(dev, "Bad firmware type: 0x%x\n", ntohl(hdr->mcp_type)); 552 return -EINVAL; 553 } 554 555 /* save firmware version for ethtool */ 556 strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version)); 557 mgp->fw_version[sizeof(mgp->fw_version) - 1] = '\0'; 558 559 sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major, 560 &mgp->fw_ver_minor, &mgp->fw_ver_tiny); 561 562 if (!(mgp->fw_ver_major == MXGEFW_VERSION_MAJOR && 563 mgp->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 564 dev_err(dev, "Found firmware version %s\n", mgp->fw_version); 565 dev_err(dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR, 566 MXGEFW_VERSION_MINOR); 567 return -EINVAL; 568 } 569 return 0; 570 } 571 572 static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size) 573 { 574 unsigned crc, reread_crc; 575 const struct firmware *fw; 576 struct device *dev = &mgp->pdev->dev; 577 unsigned char *fw_readback; 578 struct mcp_gen_header *hdr; 579 size_t hdr_offset; 580 int status; 581 unsigned i; 582 583 if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) { 584 dev_err(dev, "Unable to load %s firmware image via hotplug\n", 585 mgp->fw_name); 586 status = -EINVAL; 587 goto abort_with_nothing; 588 } 589 590 /* check size */ 591 592 if (fw->size >= mgp->sram_size - MYRI10GE_FW_OFFSET || 593 fw->size < MCP_HEADER_PTR_OFFSET + 4) { 594 dev_err(dev, "Firmware size invalid:%d\n", (int)fw->size); 595 status = -EINVAL; 596 goto abort_with_fw; 597 } 598 599 /* check id */ 600 hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET)); 601 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) { 602 dev_err(dev, "Bad firmware file\n"); 603 status = -EINVAL; 604 goto abort_with_fw; 605 } 606 hdr = (void *)(fw->data + hdr_offset); 607 608 status = myri10ge_validate_firmware(mgp, hdr); 609 if (status != 0) 610 goto abort_with_fw; 611 612 crc = crc32(~0, fw->data, fw->size); 613 for (i = 0; i < fw->size; i += 256) { 614 myri10ge_pio_copy(mgp->sram + MYRI10GE_FW_OFFSET + i, 615 fw->data + i, 616 min(256U, (unsigned)(fw->size - i))); 617 mb(); 618 readb(mgp->sram); 619 } 620 fw_readback = vmalloc(fw->size); 621 if (!fw_readback) { 622 status = -ENOMEM; 623 goto abort_with_fw; 624 } 625 /* corruption checking is good for parity recovery and buggy chipset */ 626 memcpy_fromio(fw_readback, mgp->sram + MYRI10GE_FW_OFFSET, fw->size); 627 reread_crc = crc32(~0, fw_readback, fw->size); 628 vfree(fw_readback); 629 if (crc != reread_crc) { 630 dev_err(dev, "CRC failed(fw-len=%u), got 0x%x (expect 0x%x)\n", 631 (unsigned)fw->size, reread_crc, crc); 632 status = -EIO; 633 goto abort_with_fw; 634 } 635 *size = (u32) fw->size; 636 637 abort_with_fw: 638 release_firmware(fw); 639 640 abort_with_nothing: 641 return status; 642 } 643 644 static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp) 645 { 646 struct mcp_gen_header *hdr; 647 struct device *dev = &mgp->pdev->dev; 648 const size_t bytes = sizeof(struct mcp_gen_header); 649 size_t hdr_offset; 650 int status; 651 652 /* find running firmware header */ 653 hdr_offset = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 654 655 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) { 656 dev_err(dev, "Running firmware has bad header offset (%d)\n", 657 (int)hdr_offset); 658 return -EIO; 659 } 660 661 /* copy header of running firmware from SRAM to host memory to 662 * validate firmware */ 663 hdr = kmalloc(bytes, GFP_KERNEL); 664 if (hdr == NULL) 665 return -ENOMEM; 666 667 memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes); 668 status = myri10ge_validate_firmware(mgp, hdr); 669 kfree(hdr); 670 671 /* check to see if adopted firmware has bug where adopting 672 * it will cause broadcasts to be filtered unless the NIC 673 * is kept in ALLMULTI mode */ 674 if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 && 675 mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) { 676 mgp->adopted_rx_filter_bug = 1; 677 dev_warn(dev, "Adopting fw %d.%d.%d: " 678 "working around rx filter bug\n", 679 mgp->fw_ver_major, mgp->fw_ver_minor, 680 mgp->fw_ver_tiny); 681 } 682 return status; 683 } 684 685 static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) 686 { 687 struct myri10ge_cmd cmd; 688 int status; 689 690 /* probe for IPv6 TSO support */ 691 mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; 692 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, 693 &cmd, 0); 694 if (status == 0) { 695 mgp->max_tso6 = cmd.data0; 696 mgp->features |= NETIF_F_TSO6; 697 } 698 699 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 700 if (status != 0) { 701 dev_err(&mgp->pdev->dev, 702 "failed MXGEFW_CMD_GET_RX_RING_SIZE\n"); 703 return -ENXIO; 704 } 705 706 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr)); 707 708 return 0; 709 } 710 711 static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) 712 { 713 char __iomem *submit; 714 __be32 buf[16] __attribute__ ((__aligned__(8))); 715 u32 dma_low, dma_high, size; 716 int status, i; 717 718 size = 0; 719 status = myri10ge_load_hotplug_firmware(mgp, &size); 720 if (status) { 721 if (!adopt) 722 return status; 723 dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n"); 724 725 /* Do not attempt to adopt firmware if there 726 * was a bad crc */ 727 if (status == -EIO) 728 return status; 729 730 status = myri10ge_adopt_running_firmware(mgp); 731 if (status != 0) { 732 dev_err(&mgp->pdev->dev, 733 "failed to adopt running firmware\n"); 734 return status; 735 } 736 dev_info(&mgp->pdev->dev, 737 "Successfully adopted running firmware\n"); 738 if (mgp->tx_boundary == 4096) { 739 dev_warn(&mgp->pdev->dev, 740 "Using firmware currently running on NIC" 741 ". For optimal\n"); 742 dev_warn(&mgp->pdev->dev, 743 "performance consider loading optimized " 744 "firmware\n"); 745 dev_warn(&mgp->pdev->dev, "via hotplug\n"); 746 } 747 748 set_fw_name(mgp, "adopted", false); 749 mgp->tx_boundary = 2048; 750 myri10ge_dummy_rdma(mgp, 1); 751 status = myri10ge_get_firmware_capabilities(mgp); 752 return status; 753 } 754 755 /* clear confirmation addr */ 756 mgp->cmd->data = 0; 757 mb(); 758 759 /* send a reload command to the bootstrap MCP, and wait for the 760 * response in the confirmation address. The firmware should 761 * write a -1 there to indicate it is alive and well 762 */ 763 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 764 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 765 766 buf[0] = htonl(dma_high); /* confirm addr MSW */ 767 buf[1] = htonl(dma_low); /* confirm addr LSW */ 768 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 769 770 /* FIX: All newest firmware should un-protect the bottom of 771 * the sram before handoff. However, the very first interfaces 772 * do not. Therefore the handoff copy must skip the first 8 bytes 773 */ 774 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 775 buf[4] = htonl(size - 8); /* length of code */ 776 buf[5] = htonl(8); /* where to copy to */ 777 buf[6] = htonl(0); /* where to jump to */ 778 779 submit = mgp->sram + MXGEFW_BOOT_HANDOFF; 780 781 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 782 mb(); 783 msleep(1); 784 mb(); 785 i = 0; 786 while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) { 787 msleep(1 << i); 788 i++; 789 } 790 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) { 791 dev_err(&mgp->pdev->dev, "handoff failed\n"); 792 return -ENXIO; 793 } 794 myri10ge_dummy_rdma(mgp, 1); 795 status = myri10ge_get_firmware_capabilities(mgp); 796 797 return status; 798 } 799 800 static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr) 801 { 802 struct myri10ge_cmd cmd; 803 int status; 804 805 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 806 | (addr[2] << 8) | addr[3]); 807 808 cmd.data1 = ((addr[4] << 8) | (addr[5])); 809 810 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd, 0); 811 return status; 812 } 813 814 static int myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 815 { 816 struct myri10ge_cmd cmd; 817 int status, ctl; 818 819 ctl = pause ? MXGEFW_ENABLE_FLOW_CONTROL : MXGEFW_DISABLE_FLOW_CONTROL; 820 status = myri10ge_send_cmd(mgp, ctl, &cmd, 0); 821 822 if (status) { 823 netdev_err(mgp->dev, "Failed to set flow control mode\n"); 824 return status; 825 } 826 mgp->pause = pause; 827 return 0; 828 } 829 830 static void 831 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc, int atomic) 832 { 833 struct myri10ge_cmd cmd; 834 int status, ctl; 835 836 ctl = promisc ? MXGEFW_ENABLE_PROMISC : MXGEFW_DISABLE_PROMISC; 837 status = myri10ge_send_cmd(mgp, ctl, &cmd, atomic); 838 if (status) 839 netdev_err(mgp->dev, "Failed to set promisc mode\n"); 840 } 841 842 static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 843 { 844 struct myri10ge_cmd cmd; 845 int status; 846 u32 len; 847 struct page *dmatest_page; 848 dma_addr_t dmatest_bus; 849 char *test = " "; 850 851 dmatest_page = alloc_page(GFP_KERNEL); 852 if (!dmatest_page) 853 return -ENOMEM; 854 dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, 855 DMA_BIDIRECTIONAL); 856 if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) { 857 __free_page(dmatest_page); 858 return -ENOMEM; 859 } 860 861 /* Run a small DMA test. 862 * The magic multipliers to the length tell the firmware 863 * to do DMA read, write, or read+write tests. The 864 * results are returned in cmd.data0. The upper 16 865 * bits or the return is the number of transfers completed. 866 * The lower 16 bits is the time in 0.5us ticks that the 867 * transfers took to complete. 868 */ 869 870 len = mgp->tx_boundary; 871 872 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 873 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 874 cmd.data2 = len * 0x10000; 875 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 876 if (status != 0) { 877 test = "read"; 878 goto abort; 879 } 880 mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 881 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 882 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 883 cmd.data2 = len * 0x1; 884 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 885 if (status != 0) { 886 test = "write"; 887 goto abort; 888 } 889 mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 890 891 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 892 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 893 cmd.data2 = len * 0x10001; 894 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 895 if (status != 0) { 896 test = "read/write"; 897 goto abort; 898 } 899 mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) / 900 (cmd.data0 & 0xffff); 901 902 abort: 903 pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL); 904 put_page(dmatest_page); 905 906 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 907 dev_warn(&mgp->pdev->dev, "DMA %s benchmark failed: %d\n", 908 test, status); 909 910 return status; 911 } 912 913 static int myri10ge_reset(struct myri10ge_priv *mgp) 914 { 915 struct myri10ge_cmd cmd; 916 struct myri10ge_slice_state *ss; 917 int i, status; 918 size_t bytes; 919 #ifdef CONFIG_MYRI10GE_DCA 920 unsigned long dca_tag_off; 921 #endif 922 923 /* try to send a reset command to the card to see if it 924 * is alive */ 925 memset(&cmd, 0, sizeof(cmd)); 926 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 927 if (status != 0) { 928 dev_err(&mgp->pdev->dev, "failed reset\n"); 929 return -ENXIO; 930 } 931 932 (void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 933 /* 934 * Use non-ndis mcp_slot (eg, 4 bytes total, 935 * no toeplitz hash value returned. Older firmware will 936 * not understand this command, but will use the correct 937 * sized mcp_slot, so we ignore error returns 938 */ 939 cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN; 940 (void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0); 941 942 /* Now exchange information about interrupts */ 943 944 bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry); 945 cmd.data0 = (u32) bytes; 946 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 947 948 /* 949 * Even though we already know how many slices are supported 950 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 951 * has magic side effects, and must be called after a reset. 952 * It must be called prior to calling any RSS related cmds, 953 * including assigning an interrupt queue for anything but 954 * slice 0. It must also be called *after* 955 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 956 * the firmware to compute offsets. 957 */ 958 959 if (mgp->num_slices > 1) { 960 961 /* ask the maximum number of slices it supports */ 962 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 963 &cmd, 0); 964 if (status != 0) { 965 dev_err(&mgp->pdev->dev, 966 "failed to get number of slices\n"); 967 } 968 969 /* 970 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 971 * to setting up the interrupt queue DMA 972 */ 973 974 cmd.data0 = mgp->num_slices; 975 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 976 if (mgp->dev->real_num_tx_queues > 1) 977 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 978 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 979 &cmd, 0); 980 981 /* Firmware older than 1.4.32 only supports multiple 982 * RX queues, so if we get an error, first retry using a 983 * single TX queue before giving up */ 984 if (status != 0 && mgp->dev->real_num_tx_queues > 1) { 985 netif_set_real_num_tx_queues(mgp->dev, 1); 986 cmd.data0 = mgp->num_slices; 987 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 988 status = myri10ge_send_cmd(mgp, 989 MXGEFW_CMD_ENABLE_RSS_QUEUES, 990 &cmd, 0); 991 } 992 993 if (status != 0) { 994 dev_err(&mgp->pdev->dev, 995 "failed to set number of slices\n"); 996 997 return status; 998 } 999 } 1000 for (i = 0; i < mgp->num_slices; i++) { 1001 ss = &mgp->ss[i]; 1002 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus); 1003 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus); 1004 cmd.data2 = i; 1005 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1006 &cmd, 0); 1007 } 1008 1009 status |= 1010 myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); 1011 for (i = 0; i < mgp->num_slices; i++) { 1012 ss = &mgp->ss[i]; 1013 ss->irq_claim = 1014 (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i); 1015 } 1016 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1017 &cmd, 0); 1018 mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0); 1019 1020 status |= myri10ge_send_cmd 1021 (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0); 1022 mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0); 1023 if (status != 0) { 1024 dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n"); 1025 return status; 1026 } 1027 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1028 1029 #ifdef CONFIG_MYRI10GE_DCA 1030 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0); 1031 dca_tag_off = cmd.data0; 1032 for (i = 0; i < mgp->num_slices; i++) { 1033 ss = &mgp->ss[i]; 1034 if (status == 0) { 1035 ss->dca_tag = (__iomem __be32 *) 1036 (mgp->sram + dca_tag_off + 4 * i); 1037 } else { 1038 ss->dca_tag = NULL; 1039 } 1040 } 1041 #endif /* CONFIG_MYRI10GE_DCA */ 1042 1043 /* reset mcp/driver shared state back to 0 */ 1044 1045 mgp->link_changes = 0; 1046 for (i = 0; i < mgp->num_slices; i++) { 1047 ss = &mgp->ss[i]; 1048 1049 memset(ss->rx_done.entry, 0, bytes); 1050 ss->tx.req = 0; 1051 ss->tx.done = 0; 1052 ss->tx.pkt_start = 0; 1053 ss->tx.pkt_done = 0; 1054 ss->rx_big.cnt = 0; 1055 ss->rx_small.cnt = 0; 1056 ss->rx_done.idx = 0; 1057 ss->rx_done.cnt = 0; 1058 ss->tx.wake_queue = 0; 1059 ss->tx.stop_queue = 0; 1060 } 1061 1062 status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr); 1063 myri10ge_change_pause(mgp, mgp->pause); 1064 myri10ge_set_multicast_list(mgp->dev); 1065 return status; 1066 } 1067 1068 #ifdef CONFIG_MYRI10GE_DCA 1069 static int myri10ge_toggle_relaxed(struct pci_dev *pdev, int on) 1070 { 1071 int ret; 1072 u16 ctl; 1073 1074 pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &ctl); 1075 1076 ret = (ctl & PCI_EXP_DEVCTL_RELAX_EN) >> 4; 1077 if (ret != on) { 1078 ctl &= ~PCI_EXP_DEVCTL_RELAX_EN; 1079 ctl |= (on << 4); 1080 pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, ctl); 1081 } 1082 return ret; 1083 } 1084 1085 static void 1086 myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag) 1087 { 1088 ss->cached_dca_tag = tag; 1089 put_be32(htonl(tag), ss->dca_tag); 1090 } 1091 1092 static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss) 1093 { 1094 int cpu = get_cpu(); 1095 int tag; 1096 1097 if (cpu != ss->cpu) { 1098 tag = dca3_get_tag(&ss->mgp->pdev->dev, cpu); 1099 if (ss->cached_dca_tag != tag) 1100 myri10ge_write_dca(ss, cpu, tag); 1101 ss->cpu = cpu; 1102 } 1103 put_cpu(); 1104 } 1105 1106 static void myri10ge_setup_dca(struct myri10ge_priv *mgp) 1107 { 1108 int err, i; 1109 struct pci_dev *pdev = mgp->pdev; 1110 1111 if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled) 1112 return; 1113 if (!myri10ge_dca) { 1114 dev_err(&pdev->dev, "dca disabled by administrator\n"); 1115 return; 1116 } 1117 err = dca_add_requester(&pdev->dev); 1118 if (err) { 1119 if (err != -ENODEV) 1120 dev_err(&pdev->dev, 1121 "dca_add_requester() failed, err=%d\n", err); 1122 return; 1123 } 1124 mgp->relaxed_order = myri10ge_toggle_relaxed(pdev, 0); 1125 mgp->dca_enabled = 1; 1126 for (i = 0; i < mgp->num_slices; i++) { 1127 mgp->ss[i].cpu = -1; 1128 mgp->ss[i].cached_dca_tag = -1; 1129 myri10ge_update_dca(&mgp->ss[i]); 1130 } 1131 } 1132 1133 static void myri10ge_teardown_dca(struct myri10ge_priv *mgp) 1134 { 1135 struct pci_dev *pdev = mgp->pdev; 1136 1137 if (!mgp->dca_enabled) 1138 return; 1139 mgp->dca_enabled = 0; 1140 if (mgp->relaxed_order) 1141 myri10ge_toggle_relaxed(pdev, 1); 1142 dca_remove_requester(&pdev->dev); 1143 } 1144 1145 static int myri10ge_notify_dca_device(struct device *dev, void *data) 1146 { 1147 struct myri10ge_priv *mgp; 1148 unsigned long event; 1149 1150 mgp = dev_get_drvdata(dev); 1151 event = *(unsigned long *)data; 1152 1153 if (event == DCA_PROVIDER_ADD) 1154 myri10ge_setup_dca(mgp); 1155 else if (event == DCA_PROVIDER_REMOVE) 1156 myri10ge_teardown_dca(mgp); 1157 return 0; 1158 } 1159 #endif /* CONFIG_MYRI10GE_DCA */ 1160 1161 static inline void 1162 myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst, 1163 struct mcp_kreq_ether_recv *src) 1164 { 1165 __be32 low; 1166 1167 low = src->addr_low; 1168 src->addr_low = htonl(DMA_BIT_MASK(32)); 1169 myri10ge_pio_copy(dst, src, 4 * sizeof(*src)); 1170 mb(); 1171 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src)); 1172 mb(); 1173 src->addr_low = low; 1174 put_be32(low, &dst->addr_low); 1175 mb(); 1176 } 1177 1178 static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum) 1179 { 1180 struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data); 1181 1182 if ((skb->protocol == htons(ETH_P_8021Q)) && 1183 (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || 1184 vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { 1185 skb->csum = hw_csum; 1186 skb->ip_summed = CHECKSUM_COMPLETE; 1187 } 1188 } 1189 1190 static void 1191 myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, 1192 int bytes, int watchdog) 1193 { 1194 struct page *page; 1195 dma_addr_t bus; 1196 int idx; 1197 #if MYRI10GE_ALLOC_SIZE > 4096 1198 int end_offset; 1199 #endif 1200 1201 if (unlikely(rx->watchdog_needed && !watchdog)) 1202 return; 1203 1204 /* try to refill entire ring */ 1205 while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) { 1206 idx = rx->fill_cnt & rx->mask; 1207 if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) { 1208 /* we can use part of previous page */ 1209 get_page(rx->page); 1210 } else { 1211 /* we need a new page */ 1212 page = 1213 alloc_pages(GFP_ATOMIC | __GFP_COMP, 1214 MYRI10GE_ALLOC_ORDER); 1215 if (unlikely(page == NULL)) { 1216 if (rx->fill_cnt - rx->cnt < 16) 1217 rx->watchdog_needed = 1; 1218 return; 1219 } 1220 1221 bus = pci_map_page(mgp->pdev, page, 0, 1222 MYRI10GE_ALLOC_SIZE, 1223 PCI_DMA_FROMDEVICE); 1224 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { 1225 __free_pages(page, MYRI10GE_ALLOC_ORDER); 1226 if (rx->fill_cnt - rx->cnt < 16) 1227 rx->watchdog_needed = 1; 1228 return; 1229 } 1230 1231 rx->page = page; 1232 rx->page_offset = 0; 1233 rx->bus = bus; 1234 1235 } 1236 rx->info[idx].page = rx->page; 1237 rx->info[idx].page_offset = rx->page_offset; 1238 /* note that this is the address of the start of the 1239 * page */ 1240 dma_unmap_addr_set(&rx->info[idx], bus, rx->bus); 1241 rx->shadow[idx].addr_low = 1242 htonl(MYRI10GE_LOWPART_TO_U32(rx->bus) + rx->page_offset); 1243 rx->shadow[idx].addr_high = 1244 htonl(MYRI10GE_HIGHPART_TO_U32(rx->bus)); 1245 1246 /* start next packet on a cacheline boundary */ 1247 rx->page_offset += SKB_DATA_ALIGN(bytes); 1248 1249 #if MYRI10GE_ALLOC_SIZE > 4096 1250 /* don't cross a 4KB boundary */ 1251 end_offset = rx->page_offset + bytes - 1; 1252 if ((unsigned)(rx->page_offset ^ end_offset) > 4095) 1253 rx->page_offset = end_offset & ~4095; 1254 #endif 1255 rx->fill_cnt++; 1256 1257 /* copy 8 descriptors to the firmware at a time */ 1258 if ((idx & 7) == 7) { 1259 myri10ge_submit_8rx(&rx->lanai[idx - 7], 1260 &rx->shadow[idx - 7]); 1261 } 1262 } 1263 } 1264 1265 static inline void 1266 myri10ge_unmap_rx_page(struct pci_dev *pdev, 1267 struct myri10ge_rx_buffer_state *info, int bytes) 1268 { 1269 /* unmap the recvd page if we're the only or last user of it */ 1270 if (bytes >= MYRI10GE_ALLOC_SIZE / 2 || 1271 (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) { 1272 pci_unmap_page(pdev, (dma_unmap_addr(info, bus) 1273 & ~(MYRI10GE_ALLOC_SIZE - 1)), 1274 MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE); 1275 } 1276 } 1277 1278 /* 1279 * GRO does not support acceleration of tagged vlan frames, and 1280 * this NIC does not support vlan tag offload, so we must pop 1281 * the tag ourselves to be able to achieve GRO performance that 1282 * is comparable to LRO. 1283 */ 1284 1285 static inline void 1286 myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb) 1287 { 1288 u8 *va; 1289 struct vlan_ethhdr *veh; 1290 struct skb_frag_struct *frag; 1291 __wsum vsum; 1292 1293 va = addr; 1294 va += MXGEFW_PAD; 1295 veh = (struct vlan_ethhdr *)va; 1296 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) == 1297 NETIF_F_HW_VLAN_CTAG_RX && 1298 veh->h_vlan_proto == htons(ETH_P_8021Q)) { 1299 /* fixup csum if needed */ 1300 if (skb->ip_summed == CHECKSUM_COMPLETE) { 1301 vsum = csum_partial(va + ETH_HLEN, VLAN_HLEN, 0); 1302 skb->csum = csum_sub(skb->csum, vsum); 1303 } 1304 /* pop tag */ 1305 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(veh->h_vlan_TCI)); 1306 memmove(va + VLAN_HLEN, va, 2 * ETH_ALEN); 1307 skb->len -= VLAN_HLEN; 1308 skb->data_len -= VLAN_HLEN; 1309 frag = skb_shinfo(skb)->frags; 1310 frag->page_offset += VLAN_HLEN; 1311 skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN); 1312 } 1313 } 1314 1315 #define MYRI10GE_HLEN 64 /* Bytes to copy from page to skb linear memory */ 1316 1317 static inline int 1318 myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) 1319 { 1320 struct myri10ge_priv *mgp = ss->mgp; 1321 struct sk_buff *skb; 1322 struct skb_frag_struct *rx_frags; 1323 struct myri10ge_rx_buf *rx; 1324 int i, idx, remainder, bytes; 1325 struct pci_dev *pdev = mgp->pdev; 1326 struct net_device *dev = mgp->dev; 1327 u8 *va; 1328 1329 if (len <= mgp->small_bytes) { 1330 rx = &ss->rx_small; 1331 bytes = mgp->small_bytes; 1332 } else { 1333 rx = &ss->rx_big; 1334 bytes = mgp->big_bytes; 1335 } 1336 1337 len += MXGEFW_PAD; 1338 idx = rx->cnt & rx->mask; 1339 va = page_address(rx->info[idx].page) + rx->info[idx].page_offset; 1340 prefetch(va); 1341 1342 skb = napi_get_frags(&ss->napi); 1343 if (unlikely(skb == NULL)) { 1344 ss->stats.rx_dropped++; 1345 for (i = 0, remainder = len; remainder > 0; i++) { 1346 myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); 1347 put_page(rx->info[idx].page); 1348 rx->cnt++; 1349 idx = rx->cnt & rx->mask; 1350 remainder -= MYRI10GE_ALLOC_SIZE; 1351 } 1352 return 0; 1353 } 1354 rx_frags = skb_shinfo(skb)->frags; 1355 /* Fill skb_frag_struct(s) with data from our receive */ 1356 for (i = 0, remainder = len; remainder > 0; i++) { 1357 myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); 1358 skb_fill_page_desc(skb, i, rx->info[idx].page, 1359 rx->info[idx].page_offset, 1360 remainder < MYRI10GE_ALLOC_SIZE ? 1361 remainder : MYRI10GE_ALLOC_SIZE); 1362 rx->cnt++; 1363 idx = rx->cnt & rx->mask; 1364 remainder -= MYRI10GE_ALLOC_SIZE; 1365 } 1366 1367 /* remove padding */ 1368 rx_frags[0].page_offset += MXGEFW_PAD; 1369 rx_frags[0].size -= MXGEFW_PAD; 1370 len -= MXGEFW_PAD; 1371 1372 skb->len = len; 1373 skb->data_len = len; 1374 skb->truesize += len; 1375 if (dev->features & NETIF_F_RXCSUM) { 1376 skb->ip_summed = CHECKSUM_COMPLETE; 1377 skb->csum = csum; 1378 } 1379 myri10ge_vlan_rx(mgp->dev, va, skb); 1380 skb_record_rx_queue(skb, ss - &mgp->ss[0]); 1381 1382 napi_gro_frags(&ss->napi); 1383 1384 return 1; 1385 } 1386 1387 static inline void 1388 myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index) 1389 { 1390 struct pci_dev *pdev = ss->mgp->pdev; 1391 struct myri10ge_tx_buf *tx = &ss->tx; 1392 struct netdev_queue *dev_queue; 1393 struct sk_buff *skb; 1394 int idx, len; 1395 1396 while (tx->pkt_done != mcp_index) { 1397 idx = tx->done & tx->mask; 1398 skb = tx->info[idx].skb; 1399 1400 /* Mark as free */ 1401 tx->info[idx].skb = NULL; 1402 if (tx->info[idx].last) { 1403 tx->pkt_done++; 1404 tx->info[idx].last = 0; 1405 } 1406 tx->done++; 1407 len = dma_unmap_len(&tx->info[idx], len); 1408 dma_unmap_len_set(&tx->info[idx], len, 0); 1409 if (skb) { 1410 ss->stats.tx_bytes += skb->len; 1411 ss->stats.tx_packets++; 1412 dev_kfree_skb_irq(skb); 1413 if (len) 1414 pci_unmap_single(pdev, 1415 dma_unmap_addr(&tx->info[idx], 1416 bus), len, 1417 PCI_DMA_TODEVICE); 1418 } else { 1419 if (len) 1420 pci_unmap_page(pdev, 1421 dma_unmap_addr(&tx->info[idx], 1422 bus), len, 1423 PCI_DMA_TODEVICE); 1424 } 1425 } 1426 1427 dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss); 1428 /* 1429 * Make a minimal effort to prevent the NIC from polling an 1430 * idle tx queue. If we can't get the lock we leave the queue 1431 * active. In this case, either a thread was about to start 1432 * using the queue anyway, or we lost a race and the NIC will 1433 * waste some of its resources polling an inactive queue for a 1434 * while. 1435 */ 1436 1437 if ((ss->mgp->dev->real_num_tx_queues > 1) && 1438 __netif_tx_trylock(dev_queue)) { 1439 if (tx->req == tx->done) { 1440 tx->queue_active = 0; 1441 put_be32(htonl(1), tx->send_stop); 1442 mb(); 1443 mmiowb(); 1444 } 1445 __netif_tx_unlock(dev_queue); 1446 } 1447 1448 /* start the queue if we've stopped it */ 1449 if (netif_tx_queue_stopped(dev_queue) && 1450 tx->req - tx->done < (tx->mask >> 1) && 1451 ss->mgp->running == MYRI10GE_ETH_RUNNING) { 1452 tx->wake_queue++; 1453 netif_tx_wake_queue(dev_queue); 1454 } 1455 } 1456 1457 static inline int 1458 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) 1459 { 1460 struct myri10ge_rx_done *rx_done = &ss->rx_done; 1461 struct myri10ge_priv *mgp = ss->mgp; 1462 unsigned long rx_bytes = 0; 1463 unsigned long rx_packets = 0; 1464 unsigned long rx_ok; 1465 int idx = rx_done->idx; 1466 int cnt = rx_done->cnt; 1467 int work_done = 0; 1468 u16 length; 1469 __wsum checksum; 1470 1471 while (rx_done->entry[idx].length != 0 && work_done < budget) { 1472 length = ntohs(rx_done->entry[idx].length); 1473 rx_done->entry[idx].length = 0; 1474 checksum = csum_unfold(rx_done->entry[idx].checksum); 1475 rx_ok = myri10ge_rx_done(ss, length, checksum); 1476 rx_packets += rx_ok; 1477 rx_bytes += rx_ok * (unsigned long)length; 1478 cnt++; 1479 idx = cnt & (mgp->max_intr_slots - 1); 1480 work_done++; 1481 } 1482 rx_done->idx = idx; 1483 rx_done->cnt = cnt; 1484 ss->stats.rx_packets += rx_packets; 1485 ss->stats.rx_bytes += rx_bytes; 1486 1487 /* restock receive rings if needed */ 1488 if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh) 1489 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 1490 mgp->small_bytes + MXGEFW_PAD, 0); 1491 if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh) 1492 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 1493 1494 return work_done; 1495 } 1496 1497 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) 1498 { 1499 struct mcp_irq_data *stats = mgp->ss[0].fw_stats; 1500 1501 if (unlikely(stats->stats_updated)) { 1502 unsigned link_up = ntohl(stats->link_up); 1503 if (mgp->link_state != link_up) { 1504 mgp->link_state = link_up; 1505 1506 if (mgp->link_state == MXGEFW_LINK_UP) { 1507 netif_info(mgp, link, mgp->dev, "link up\n"); 1508 netif_carrier_on(mgp->dev); 1509 mgp->link_changes++; 1510 } else { 1511 netif_info(mgp, link, mgp->dev, "link %s\n", 1512 (link_up == MXGEFW_LINK_MYRINET ? 1513 "mismatch (Myrinet detected)" : 1514 "down")); 1515 netif_carrier_off(mgp->dev); 1516 mgp->link_changes++; 1517 } 1518 } 1519 if (mgp->rdma_tags_available != 1520 ntohl(stats->rdma_tags_available)) { 1521 mgp->rdma_tags_available = 1522 ntohl(stats->rdma_tags_available); 1523 netdev_warn(mgp->dev, "RDMA timed out! %d tags left\n", 1524 mgp->rdma_tags_available); 1525 } 1526 mgp->down_cnt += stats->link_down; 1527 if (stats->link_down) 1528 wake_up(&mgp->down_wq); 1529 } 1530 } 1531 1532 static int myri10ge_poll(struct napi_struct *napi, int budget) 1533 { 1534 struct myri10ge_slice_state *ss = 1535 container_of(napi, struct myri10ge_slice_state, napi); 1536 int work_done; 1537 1538 #ifdef CONFIG_MYRI10GE_DCA 1539 if (ss->mgp->dca_enabled) 1540 myri10ge_update_dca(ss); 1541 #endif 1542 /* process as many rx events as NAPI will allow */ 1543 work_done = myri10ge_clean_rx_done(ss, budget); 1544 1545 if (work_done < budget) { 1546 napi_complete_done(napi, work_done); 1547 put_be32(htonl(3), ss->irq_claim); 1548 } 1549 return work_done; 1550 } 1551 1552 static irqreturn_t myri10ge_intr(int irq, void *arg) 1553 { 1554 struct myri10ge_slice_state *ss = arg; 1555 struct myri10ge_priv *mgp = ss->mgp; 1556 struct mcp_irq_data *stats = ss->fw_stats; 1557 struct myri10ge_tx_buf *tx = &ss->tx; 1558 u32 send_done_count; 1559 int i; 1560 1561 /* an interrupt on a non-zero receive-only slice is implicitly 1562 * valid since MSI-X irqs are not shared */ 1563 if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) { 1564 napi_schedule(&ss->napi); 1565 return IRQ_HANDLED; 1566 } 1567 1568 /* make sure it is our IRQ, and that the DMA has finished */ 1569 if (unlikely(!stats->valid)) 1570 return IRQ_NONE; 1571 1572 /* low bit indicates receives are present, so schedule 1573 * napi poll handler */ 1574 if (stats->valid & 1) 1575 napi_schedule(&ss->napi); 1576 1577 if (!mgp->msi_enabled && !mgp->msix_enabled) { 1578 put_be32(0, mgp->irq_deassert); 1579 if (!myri10ge_deassert_wait) 1580 stats->valid = 0; 1581 mb(); 1582 } else 1583 stats->valid = 0; 1584 1585 /* Wait for IRQ line to go low, if using INTx */ 1586 i = 0; 1587 while (1) { 1588 i++; 1589 /* check for transmit completes and receives */ 1590 send_done_count = ntohl(stats->send_done_count); 1591 if (send_done_count != tx->pkt_done) 1592 myri10ge_tx_done(ss, (int)send_done_count); 1593 if (unlikely(i > myri10ge_max_irq_loops)) { 1594 netdev_warn(mgp->dev, "irq stuck?\n"); 1595 stats->valid = 0; 1596 schedule_work(&mgp->watchdog_work); 1597 } 1598 if (likely(stats->valid == 0)) 1599 break; 1600 cpu_relax(); 1601 barrier(); 1602 } 1603 1604 /* Only slice 0 updates stats */ 1605 if (ss == mgp->ss) 1606 myri10ge_check_statblock(mgp); 1607 1608 put_be32(htonl(3), ss->irq_claim + 1); 1609 return IRQ_HANDLED; 1610 } 1611 1612 static int 1613 myri10ge_get_link_ksettings(struct net_device *netdev, 1614 struct ethtool_link_ksettings *cmd) 1615 { 1616 struct myri10ge_priv *mgp = netdev_priv(netdev); 1617 char *ptr; 1618 int i; 1619 1620 cmd->base.autoneg = AUTONEG_DISABLE; 1621 cmd->base.speed = SPEED_10000; 1622 cmd->base.duplex = DUPLEX_FULL; 1623 1624 /* 1625 * parse the product code to deterimine the interface type 1626 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 1627 * after the 3rd dash in the driver's cached copy of the 1628 * EEPROM's product code string. 1629 */ 1630 ptr = mgp->product_code_string; 1631 if (ptr == NULL) { 1632 netdev_err(netdev, "Missing product code\n"); 1633 return 0; 1634 } 1635 for (i = 0; i < 3; i++, ptr++) { 1636 ptr = strchr(ptr, '-'); 1637 if (ptr == NULL) { 1638 netdev_err(netdev, "Invalid product code %s\n", 1639 mgp->product_code_string); 1640 return 0; 1641 } 1642 } 1643 if (*ptr == '2') 1644 ptr++; 1645 if (*ptr == 'R' || *ptr == 'Q' || *ptr == 'S') { 1646 /* We've found either an XFP, quad ribbon fiber, or SFP+ */ 1647 cmd->base.port = PORT_FIBRE; 1648 ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); 1649 ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); 1650 } else { 1651 cmd->base.port = PORT_OTHER; 1652 } 1653 1654 return 0; 1655 } 1656 1657 static void 1658 myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) 1659 { 1660 struct myri10ge_priv *mgp = netdev_priv(netdev); 1661 1662 strlcpy(info->driver, "myri10ge", sizeof(info->driver)); 1663 strlcpy(info->version, MYRI10GE_VERSION_STR, sizeof(info->version)); 1664 strlcpy(info->fw_version, mgp->fw_version, sizeof(info->fw_version)); 1665 strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info)); 1666 } 1667 1668 static int 1669 myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1670 { 1671 struct myri10ge_priv *mgp = netdev_priv(netdev); 1672 1673 coal->rx_coalesce_usecs = mgp->intr_coal_delay; 1674 return 0; 1675 } 1676 1677 static int 1678 myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1679 { 1680 struct myri10ge_priv *mgp = netdev_priv(netdev); 1681 1682 mgp->intr_coal_delay = coal->rx_coalesce_usecs; 1683 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1684 return 0; 1685 } 1686 1687 static void 1688 myri10ge_get_pauseparam(struct net_device *netdev, 1689 struct ethtool_pauseparam *pause) 1690 { 1691 struct myri10ge_priv *mgp = netdev_priv(netdev); 1692 1693 pause->autoneg = 0; 1694 pause->rx_pause = mgp->pause; 1695 pause->tx_pause = mgp->pause; 1696 } 1697 1698 static int 1699 myri10ge_set_pauseparam(struct net_device *netdev, 1700 struct ethtool_pauseparam *pause) 1701 { 1702 struct myri10ge_priv *mgp = netdev_priv(netdev); 1703 1704 if (pause->tx_pause != mgp->pause) 1705 return myri10ge_change_pause(mgp, pause->tx_pause); 1706 if (pause->rx_pause != mgp->pause) 1707 return myri10ge_change_pause(mgp, pause->rx_pause); 1708 if (pause->autoneg != 0) 1709 return -EINVAL; 1710 return 0; 1711 } 1712 1713 static void 1714 myri10ge_get_ringparam(struct net_device *netdev, 1715 struct ethtool_ringparam *ring) 1716 { 1717 struct myri10ge_priv *mgp = netdev_priv(netdev); 1718 1719 ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1; 1720 ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1; 1721 ring->rx_jumbo_max_pending = 0; 1722 ring->tx_max_pending = mgp->ss[0].tx.mask + 1; 1723 ring->rx_mini_pending = ring->rx_mini_max_pending; 1724 ring->rx_pending = ring->rx_max_pending; 1725 ring->rx_jumbo_pending = ring->rx_jumbo_max_pending; 1726 ring->tx_pending = ring->tx_max_pending; 1727 } 1728 1729 static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = { 1730 "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", 1731 "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", 1732 "rx_length_errors", "rx_over_errors", "rx_crc_errors", 1733 "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", 1734 "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", 1735 "tx_heartbeat_errors", "tx_window_errors", 1736 /* device-specific stats */ 1737 "tx_boundary", "irq", "MSI", "MSIX", 1738 "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs", 1739 "serial_number", "watchdog_resets", 1740 #ifdef CONFIG_MYRI10GE_DCA 1741 "dca_capable_firmware", "dca_device_present", 1742 #endif 1743 "link_changes", "link_up", "dropped_link_overflow", 1744 "dropped_link_error_or_filtered", 1745 "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32", 1746 "dropped_unicast_filtered", "dropped_multicast_filtered", 1747 "dropped_runt", "dropped_overrun", "dropped_no_small_buffer", 1748 "dropped_no_big_buffer" 1749 }; 1750 1751 static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = { 1752 "----------- slice ---------", 1753 "tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done", 1754 "rx_small_cnt", "rx_big_cnt", 1755 "wake_queue", "stop_queue", "tx_linearized", 1756 }; 1757 1758 #define MYRI10GE_NET_STATS_LEN 21 1759 #define MYRI10GE_MAIN_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_main_stats) 1760 #define MYRI10GE_SLICE_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_slice_stats) 1761 1762 static void 1763 myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data) 1764 { 1765 struct myri10ge_priv *mgp = netdev_priv(netdev); 1766 int i; 1767 1768 switch (stringset) { 1769 case ETH_SS_STATS: 1770 memcpy(data, *myri10ge_gstrings_main_stats, 1771 sizeof(myri10ge_gstrings_main_stats)); 1772 data += sizeof(myri10ge_gstrings_main_stats); 1773 for (i = 0; i < mgp->num_slices; i++) { 1774 memcpy(data, *myri10ge_gstrings_slice_stats, 1775 sizeof(myri10ge_gstrings_slice_stats)); 1776 data += sizeof(myri10ge_gstrings_slice_stats); 1777 } 1778 break; 1779 } 1780 } 1781 1782 static int myri10ge_get_sset_count(struct net_device *netdev, int sset) 1783 { 1784 struct myri10ge_priv *mgp = netdev_priv(netdev); 1785 1786 switch (sset) { 1787 case ETH_SS_STATS: 1788 return MYRI10GE_MAIN_STATS_LEN + 1789 mgp->num_slices * MYRI10GE_SLICE_STATS_LEN; 1790 default: 1791 return -EOPNOTSUPP; 1792 } 1793 } 1794 1795 static void 1796 myri10ge_get_ethtool_stats(struct net_device *netdev, 1797 struct ethtool_stats *stats, u64 * data) 1798 { 1799 struct myri10ge_priv *mgp = netdev_priv(netdev); 1800 struct myri10ge_slice_state *ss; 1801 struct rtnl_link_stats64 link_stats; 1802 int slice; 1803 int i; 1804 1805 /* force stats update */ 1806 memset(&link_stats, 0, sizeof(link_stats)); 1807 (void)myri10ge_get_stats(netdev, &link_stats); 1808 for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++) 1809 data[i] = ((u64 *)&link_stats)[i]; 1810 1811 data[i++] = (unsigned int)mgp->tx_boundary; 1812 data[i++] = (unsigned int)mgp->pdev->irq; 1813 data[i++] = (unsigned int)mgp->msi_enabled; 1814 data[i++] = (unsigned int)mgp->msix_enabled; 1815 data[i++] = (unsigned int)mgp->read_dma; 1816 data[i++] = (unsigned int)mgp->write_dma; 1817 data[i++] = (unsigned int)mgp->read_write_dma; 1818 data[i++] = (unsigned int)mgp->serial_number; 1819 data[i++] = (unsigned int)mgp->watchdog_resets; 1820 #ifdef CONFIG_MYRI10GE_DCA 1821 data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL); 1822 data[i++] = (unsigned int)(mgp->dca_enabled); 1823 #endif 1824 data[i++] = (unsigned int)mgp->link_changes; 1825 1826 /* firmware stats are useful only in the first slice */ 1827 ss = &mgp->ss[0]; 1828 data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up); 1829 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow); 1830 data[i++] = 1831 (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered); 1832 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause); 1833 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy); 1834 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32); 1835 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered); 1836 data[i++] = 1837 (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered); 1838 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt); 1839 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun); 1840 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer); 1841 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer); 1842 1843 for (slice = 0; slice < mgp->num_slices; slice++) { 1844 ss = &mgp->ss[slice]; 1845 data[i++] = slice; 1846 data[i++] = (unsigned int)ss->tx.pkt_start; 1847 data[i++] = (unsigned int)ss->tx.pkt_done; 1848 data[i++] = (unsigned int)ss->tx.req; 1849 data[i++] = (unsigned int)ss->tx.done; 1850 data[i++] = (unsigned int)ss->rx_small.cnt; 1851 data[i++] = (unsigned int)ss->rx_big.cnt; 1852 data[i++] = (unsigned int)ss->tx.wake_queue; 1853 data[i++] = (unsigned int)ss->tx.stop_queue; 1854 data[i++] = (unsigned int)ss->tx.linearized; 1855 } 1856 } 1857 1858 static void myri10ge_set_msglevel(struct net_device *netdev, u32 value) 1859 { 1860 struct myri10ge_priv *mgp = netdev_priv(netdev); 1861 mgp->msg_enable = value; 1862 } 1863 1864 static u32 myri10ge_get_msglevel(struct net_device *netdev) 1865 { 1866 struct myri10ge_priv *mgp = netdev_priv(netdev); 1867 return mgp->msg_enable; 1868 } 1869 1870 /* 1871 * Use a low-level command to change the LED behavior. Rather than 1872 * blinking (which is the normal case), when identify is used, the 1873 * yellow LED turns solid. 1874 */ 1875 static int myri10ge_led(struct myri10ge_priv *mgp, int on) 1876 { 1877 struct mcp_gen_header *hdr; 1878 struct device *dev = &mgp->pdev->dev; 1879 size_t hdr_off, pattern_off, hdr_len; 1880 u32 pattern = 0xfffffffe; 1881 1882 /* find running firmware header */ 1883 hdr_off = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 1884 if ((hdr_off & 3) || hdr_off + sizeof(*hdr) > mgp->sram_size) { 1885 dev_err(dev, "Running firmware has bad header offset (%d)\n", 1886 (int)hdr_off); 1887 return -EIO; 1888 } 1889 hdr_len = swab32(readl(mgp->sram + hdr_off + 1890 offsetof(struct mcp_gen_header, header_length))); 1891 pattern_off = hdr_off + offsetof(struct mcp_gen_header, led_pattern); 1892 if (pattern_off >= (hdr_len + hdr_off)) { 1893 dev_info(dev, "Firmware does not support LED identification\n"); 1894 return -EINVAL; 1895 } 1896 if (!on) 1897 pattern = swab32(readl(mgp->sram + pattern_off + 4)); 1898 writel(swab32(pattern), mgp->sram + pattern_off); 1899 return 0; 1900 } 1901 1902 static int 1903 myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) 1904 { 1905 struct myri10ge_priv *mgp = netdev_priv(netdev); 1906 int rc; 1907 1908 switch (state) { 1909 case ETHTOOL_ID_ACTIVE: 1910 rc = myri10ge_led(mgp, 1); 1911 break; 1912 1913 case ETHTOOL_ID_INACTIVE: 1914 rc = myri10ge_led(mgp, 0); 1915 break; 1916 1917 default: 1918 rc = -EINVAL; 1919 } 1920 1921 return rc; 1922 } 1923 1924 static const struct ethtool_ops myri10ge_ethtool_ops = { 1925 .get_drvinfo = myri10ge_get_drvinfo, 1926 .get_coalesce = myri10ge_get_coalesce, 1927 .set_coalesce = myri10ge_set_coalesce, 1928 .get_pauseparam = myri10ge_get_pauseparam, 1929 .set_pauseparam = myri10ge_set_pauseparam, 1930 .get_ringparam = myri10ge_get_ringparam, 1931 .get_link = ethtool_op_get_link, 1932 .get_strings = myri10ge_get_strings, 1933 .get_sset_count = myri10ge_get_sset_count, 1934 .get_ethtool_stats = myri10ge_get_ethtool_stats, 1935 .set_msglevel = myri10ge_set_msglevel, 1936 .get_msglevel = myri10ge_get_msglevel, 1937 .set_phys_id = myri10ge_phys_id, 1938 .get_link_ksettings = myri10ge_get_link_ksettings, 1939 }; 1940 1941 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss) 1942 { 1943 struct myri10ge_priv *mgp = ss->mgp; 1944 struct myri10ge_cmd cmd; 1945 struct net_device *dev = mgp->dev; 1946 int tx_ring_size, rx_ring_size; 1947 int tx_ring_entries, rx_ring_entries; 1948 int i, slice, status; 1949 size_t bytes; 1950 1951 /* get ring sizes */ 1952 slice = ss - mgp->ss; 1953 cmd.data0 = slice; 1954 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0); 1955 tx_ring_size = cmd.data0; 1956 cmd.data0 = slice; 1957 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 1958 if (status != 0) 1959 return status; 1960 rx_ring_size = cmd.data0; 1961 1962 tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send); 1963 rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr); 1964 ss->tx.mask = tx_ring_entries - 1; 1965 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1966 1967 status = -ENOMEM; 1968 1969 /* allocate the host shadow rings */ 1970 1971 bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4) 1972 * sizeof(*ss->tx.req_list); 1973 ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL); 1974 if (ss->tx.req_bytes == NULL) 1975 goto abort_with_nothing; 1976 1977 /* ensure req_list entries are aligned to 8 bytes */ 1978 ss->tx.req_list = (struct mcp_kreq_ether_send *) 1979 ALIGN((unsigned long)ss->tx.req_bytes, 8); 1980 ss->tx.queue_active = 0; 1981 1982 bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow); 1983 ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL); 1984 if (ss->rx_small.shadow == NULL) 1985 goto abort_with_tx_req_bytes; 1986 1987 bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow); 1988 ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL); 1989 if (ss->rx_big.shadow == NULL) 1990 goto abort_with_rx_small_shadow; 1991 1992 /* allocate the host info rings */ 1993 1994 bytes = tx_ring_entries * sizeof(*ss->tx.info); 1995 ss->tx.info = kzalloc(bytes, GFP_KERNEL); 1996 if (ss->tx.info == NULL) 1997 goto abort_with_rx_big_shadow; 1998 1999 bytes = rx_ring_entries * sizeof(*ss->rx_small.info); 2000 ss->rx_small.info = kzalloc(bytes, GFP_KERNEL); 2001 if (ss->rx_small.info == NULL) 2002 goto abort_with_tx_info; 2003 2004 bytes = rx_ring_entries * sizeof(*ss->rx_big.info); 2005 ss->rx_big.info = kzalloc(bytes, GFP_KERNEL); 2006 if (ss->rx_big.info == NULL) 2007 goto abort_with_rx_small_info; 2008 2009 /* Fill the receive rings */ 2010 ss->rx_big.cnt = 0; 2011 ss->rx_small.cnt = 0; 2012 ss->rx_big.fill_cnt = 0; 2013 ss->rx_small.fill_cnt = 0; 2014 ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE; 2015 ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE; 2016 ss->rx_small.watchdog_needed = 0; 2017 ss->rx_big.watchdog_needed = 0; 2018 if (mgp->small_bytes == 0) { 2019 ss->rx_small.fill_cnt = ss->rx_small.mask + 1; 2020 } else { 2021 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 2022 mgp->small_bytes + MXGEFW_PAD, 0); 2023 } 2024 2025 if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) { 2026 netdev_err(dev, "slice-%d: alloced only %d small bufs\n", 2027 slice, ss->rx_small.fill_cnt); 2028 goto abort_with_rx_small_ring; 2029 } 2030 2031 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 2032 if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) { 2033 netdev_err(dev, "slice-%d: alloced only %d big bufs\n", 2034 slice, ss->rx_big.fill_cnt); 2035 goto abort_with_rx_big_ring; 2036 } 2037 2038 return 0; 2039 2040 abort_with_rx_big_ring: 2041 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2042 int idx = i & ss->rx_big.mask; 2043 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2044 mgp->big_bytes); 2045 put_page(ss->rx_big.info[idx].page); 2046 } 2047 2048 abort_with_rx_small_ring: 2049 if (mgp->small_bytes == 0) 2050 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2051 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2052 int idx = i & ss->rx_small.mask; 2053 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2054 mgp->small_bytes + MXGEFW_PAD); 2055 put_page(ss->rx_small.info[idx].page); 2056 } 2057 2058 kfree(ss->rx_big.info); 2059 2060 abort_with_rx_small_info: 2061 kfree(ss->rx_small.info); 2062 2063 abort_with_tx_info: 2064 kfree(ss->tx.info); 2065 2066 abort_with_rx_big_shadow: 2067 kfree(ss->rx_big.shadow); 2068 2069 abort_with_rx_small_shadow: 2070 kfree(ss->rx_small.shadow); 2071 2072 abort_with_tx_req_bytes: 2073 kfree(ss->tx.req_bytes); 2074 ss->tx.req_bytes = NULL; 2075 ss->tx.req_list = NULL; 2076 2077 abort_with_nothing: 2078 return status; 2079 } 2080 2081 static void myri10ge_free_rings(struct myri10ge_slice_state *ss) 2082 { 2083 struct myri10ge_priv *mgp = ss->mgp; 2084 struct sk_buff *skb; 2085 struct myri10ge_tx_buf *tx; 2086 int i, len, idx; 2087 2088 /* If not allocated, skip it */ 2089 if (ss->tx.req_list == NULL) 2090 return; 2091 2092 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2093 idx = i & ss->rx_big.mask; 2094 if (i == ss->rx_big.fill_cnt - 1) 2095 ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE; 2096 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2097 mgp->big_bytes); 2098 put_page(ss->rx_big.info[idx].page); 2099 } 2100 2101 if (mgp->small_bytes == 0) 2102 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2103 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2104 idx = i & ss->rx_small.mask; 2105 if (i == ss->rx_small.fill_cnt - 1) 2106 ss->rx_small.info[idx].page_offset = 2107 MYRI10GE_ALLOC_SIZE; 2108 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2109 mgp->small_bytes + MXGEFW_PAD); 2110 put_page(ss->rx_small.info[idx].page); 2111 } 2112 tx = &ss->tx; 2113 while (tx->done != tx->req) { 2114 idx = tx->done & tx->mask; 2115 skb = tx->info[idx].skb; 2116 2117 /* Mark as free */ 2118 tx->info[idx].skb = NULL; 2119 tx->done++; 2120 len = dma_unmap_len(&tx->info[idx], len); 2121 dma_unmap_len_set(&tx->info[idx], len, 0); 2122 if (skb) { 2123 ss->stats.tx_dropped++; 2124 dev_kfree_skb_any(skb); 2125 if (len) 2126 pci_unmap_single(mgp->pdev, 2127 dma_unmap_addr(&tx->info[idx], 2128 bus), len, 2129 PCI_DMA_TODEVICE); 2130 } else { 2131 if (len) 2132 pci_unmap_page(mgp->pdev, 2133 dma_unmap_addr(&tx->info[idx], 2134 bus), len, 2135 PCI_DMA_TODEVICE); 2136 } 2137 } 2138 kfree(ss->rx_big.info); 2139 2140 kfree(ss->rx_small.info); 2141 2142 kfree(ss->tx.info); 2143 2144 kfree(ss->rx_big.shadow); 2145 2146 kfree(ss->rx_small.shadow); 2147 2148 kfree(ss->tx.req_bytes); 2149 ss->tx.req_bytes = NULL; 2150 ss->tx.req_list = NULL; 2151 } 2152 2153 static int myri10ge_request_irq(struct myri10ge_priv *mgp) 2154 { 2155 struct pci_dev *pdev = mgp->pdev; 2156 struct myri10ge_slice_state *ss; 2157 struct net_device *netdev = mgp->dev; 2158 int i; 2159 int status; 2160 2161 mgp->msi_enabled = 0; 2162 mgp->msix_enabled = 0; 2163 status = 0; 2164 if (myri10ge_msi) { 2165 if (mgp->num_slices > 1) { 2166 status = pci_enable_msix_range(pdev, mgp->msix_vectors, 2167 mgp->num_slices, mgp->num_slices); 2168 if (status < 0) { 2169 dev_err(&pdev->dev, 2170 "Error %d setting up MSI-X\n", status); 2171 return status; 2172 } 2173 mgp->msix_enabled = 1; 2174 } 2175 if (mgp->msix_enabled == 0) { 2176 status = pci_enable_msi(pdev); 2177 if (status != 0) { 2178 dev_err(&pdev->dev, 2179 "Error %d setting up MSI; falling back to xPIC\n", 2180 status); 2181 } else { 2182 mgp->msi_enabled = 1; 2183 } 2184 } 2185 } 2186 if (mgp->msix_enabled) { 2187 for (i = 0; i < mgp->num_slices; i++) { 2188 ss = &mgp->ss[i]; 2189 snprintf(ss->irq_desc, sizeof(ss->irq_desc), 2190 "%s:slice-%d", netdev->name, i); 2191 status = request_irq(mgp->msix_vectors[i].vector, 2192 myri10ge_intr, 0, ss->irq_desc, 2193 ss); 2194 if (status != 0) { 2195 dev_err(&pdev->dev, 2196 "slice %d failed to allocate IRQ\n", i); 2197 i--; 2198 while (i >= 0) { 2199 free_irq(mgp->msix_vectors[i].vector, 2200 &mgp->ss[i]); 2201 i--; 2202 } 2203 pci_disable_msix(pdev); 2204 return status; 2205 } 2206 } 2207 } else { 2208 status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED, 2209 mgp->dev->name, &mgp->ss[0]); 2210 if (status != 0) { 2211 dev_err(&pdev->dev, "failed to allocate IRQ\n"); 2212 if (mgp->msi_enabled) 2213 pci_disable_msi(pdev); 2214 } 2215 } 2216 return status; 2217 } 2218 2219 static void myri10ge_free_irq(struct myri10ge_priv *mgp) 2220 { 2221 struct pci_dev *pdev = mgp->pdev; 2222 int i; 2223 2224 if (mgp->msix_enabled) { 2225 for (i = 0; i < mgp->num_slices; i++) 2226 free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]); 2227 } else { 2228 free_irq(pdev->irq, &mgp->ss[0]); 2229 } 2230 if (mgp->msi_enabled) 2231 pci_disable_msi(pdev); 2232 if (mgp->msix_enabled) 2233 pci_disable_msix(pdev); 2234 } 2235 2236 static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice) 2237 { 2238 struct myri10ge_cmd cmd; 2239 struct myri10ge_slice_state *ss; 2240 int status; 2241 2242 ss = &mgp->ss[slice]; 2243 status = 0; 2244 if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) { 2245 cmd.data0 = slice; 2246 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, 2247 &cmd, 0); 2248 ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *) 2249 (mgp->sram + cmd.data0); 2250 } 2251 cmd.data0 = slice; 2252 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, 2253 &cmd, 0); 2254 ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *) 2255 (mgp->sram + cmd.data0); 2256 2257 cmd.data0 = slice; 2258 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); 2259 ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *) 2260 (mgp->sram + cmd.data0); 2261 2262 ss->tx.send_go = (__iomem __be32 *) 2263 (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 2264 ss->tx.send_stop = (__iomem __be32 *) 2265 (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 2266 return status; 2267 2268 } 2269 2270 static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice) 2271 { 2272 struct myri10ge_cmd cmd; 2273 struct myri10ge_slice_state *ss; 2274 int status; 2275 2276 ss = &mgp->ss[slice]; 2277 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus); 2278 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus); 2279 cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16); 2280 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); 2281 if (status == -ENOSYS) { 2282 dma_addr_t bus = ss->fw_stats_bus; 2283 if (slice != 0) 2284 return -EINVAL; 2285 bus += offsetof(struct mcp_irq_data, send_done_count); 2286 cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); 2287 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); 2288 status = myri10ge_send_cmd(mgp, 2289 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2290 &cmd, 0); 2291 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2292 mgp->fw_multicast_support = 0; 2293 } else { 2294 mgp->fw_multicast_support = 1; 2295 } 2296 return 0; 2297 } 2298 2299 static int myri10ge_open(struct net_device *dev) 2300 { 2301 struct myri10ge_slice_state *ss; 2302 struct myri10ge_priv *mgp = netdev_priv(dev); 2303 struct myri10ge_cmd cmd; 2304 int i, status, big_pow2, slice; 2305 u8 __iomem *itable; 2306 2307 if (mgp->running != MYRI10GE_ETH_STOPPED) 2308 return -EBUSY; 2309 2310 mgp->running = MYRI10GE_ETH_STARTING; 2311 status = myri10ge_reset(mgp); 2312 if (status != 0) { 2313 netdev_err(dev, "failed reset\n"); 2314 goto abort_with_nothing; 2315 } 2316 2317 if (mgp->num_slices > 1) { 2318 cmd.data0 = mgp->num_slices; 2319 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 2320 if (mgp->dev->real_num_tx_queues > 1) 2321 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 2322 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2323 &cmd, 0); 2324 if (status != 0) { 2325 netdev_err(dev, "failed to set number of slices\n"); 2326 goto abort_with_nothing; 2327 } 2328 /* setup the indirection table */ 2329 cmd.data0 = mgp->num_slices; 2330 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2331 &cmd, 0); 2332 2333 status |= myri10ge_send_cmd(mgp, 2334 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 2335 &cmd, 0); 2336 if (status != 0) { 2337 netdev_err(dev, "failed to setup rss tables\n"); 2338 goto abort_with_nothing; 2339 } 2340 2341 /* just enable an identity mapping */ 2342 itable = mgp->sram + cmd.data0; 2343 for (i = 0; i < mgp->num_slices; i++) 2344 __raw_writeb(i, &itable[i]); 2345 2346 cmd.data0 = 1; 2347 cmd.data1 = myri10ge_rss_hash; 2348 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2349 &cmd, 0); 2350 if (status != 0) { 2351 netdev_err(dev, "failed to enable slices\n"); 2352 goto abort_with_nothing; 2353 } 2354 } 2355 2356 status = myri10ge_request_irq(mgp); 2357 if (status != 0) 2358 goto abort_with_nothing; 2359 2360 /* decide what small buffer size to use. For good TCP rx 2361 * performance, it is important to not receive 1514 byte 2362 * frames into jumbo buffers, as it confuses the socket buffer 2363 * accounting code, leading to drops and erratic performance. 2364 */ 2365 2366 if (dev->mtu <= ETH_DATA_LEN) 2367 /* enough for a TCP header */ 2368 mgp->small_bytes = (128 > SMP_CACHE_BYTES) 2369 ? (128 - MXGEFW_PAD) 2370 : (SMP_CACHE_BYTES - MXGEFW_PAD); 2371 else 2372 /* enough for a vlan encapsulated ETH_DATA_LEN frame */ 2373 mgp->small_bytes = VLAN_ETH_FRAME_LEN; 2374 2375 /* Override the small buffer size? */ 2376 if (myri10ge_small_bytes >= 0) 2377 mgp->small_bytes = myri10ge_small_bytes; 2378 2379 /* Firmware needs the big buff size as a power of 2. Lie and 2380 * tell him the buffer is larger, because we only use 1 2381 * buffer/pkt, and the mtu will prevent overruns. 2382 */ 2383 big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2384 if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) { 2385 while (!is_power_of_2(big_pow2)) 2386 big_pow2++; 2387 mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2388 } else { 2389 big_pow2 = MYRI10GE_ALLOC_SIZE; 2390 mgp->big_bytes = big_pow2; 2391 } 2392 2393 /* setup the per-slice data structures */ 2394 for (slice = 0; slice < mgp->num_slices; slice++) { 2395 ss = &mgp->ss[slice]; 2396 2397 status = myri10ge_get_txrx(mgp, slice); 2398 if (status != 0) { 2399 netdev_err(dev, "failed to get ring sizes or locations\n"); 2400 goto abort_with_rings; 2401 } 2402 status = myri10ge_allocate_rings(ss); 2403 if (status != 0) 2404 goto abort_with_rings; 2405 2406 /* only firmware which supports multiple TX queues 2407 * supports setting up the tx stats on non-zero 2408 * slices */ 2409 if (slice == 0 || mgp->dev->real_num_tx_queues > 1) 2410 status = myri10ge_set_stats(mgp, slice); 2411 if (status) { 2412 netdev_err(dev, "Couldn't set stats DMA\n"); 2413 goto abort_with_rings; 2414 } 2415 2416 /* must happen prior to any irq */ 2417 napi_enable(&(ss)->napi); 2418 } 2419 2420 /* now give firmware buffers sizes, and MTU */ 2421 cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN; 2422 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0); 2423 cmd.data0 = mgp->small_bytes; 2424 status |= 2425 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0); 2426 cmd.data0 = big_pow2; 2427 status |= 2428 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0); 2429 if (status) { 2430 netdev_err(dev, "Couldn't set buffer sizes\n"); 2431 goto abort_with_rings; 2432 } 2433 2434 /* 2435 * Set Linux style TSO mode; this is needed only on newer 2436 * firmware versions. Older versions default to Linux 2437 * style TSO 2438 */ 2439 cmd.data0 = 0; 2440 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0); 2441 if (status && status != -ENOSYS) { 2442 netdev_err(dev, "Couldn't set TSO mode\n"); 2443 goto abort_with_rings; 2444 } 2445 2446 mgp->link_state = ~0U; 2447 mgp->rdma_tags_available = 15; 2448 2449 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); 2450 if (status) { 2451 netdev_err(dev, "Couldn't bring up link\n"); 2452 goto abort_with_rings; 2453 } 2454 2455 mgp->running = MYRI10GE_ETH_RUNNING; 2456 mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; 2457 add_timer(&mgp->watchdog_timer); 2458 netif_tx_wake_all_queues(dev); 2459 2460 return 0; 2461 2462 abort_with_rings: 2463 while (slice) { 2464 slice--; 2465 napi_disable(&mgp->ss[slice].napi); 2466 } 2467 for (i = 0; i < mgp->num_slices; i++) 2468 myri10ge_free_rings(&mgp->ss[i]); 2469 2470 myri10ge_free_irq(mgp); 2471 2472 abort_with_nothing: 2473 mgp->running = MYRI10GE_ETH_STOPPED; 2474 return -ENOMEM; 2475 } 2476 2477 static int myri10ge_close(struct net_device *dev) 2478 { 2479 struct myri10ge_priv *mgp = netdev_priv(dev); 2480 struct myri10ge_cmd cmd; 2481 int status, old_down_cnt; 2482 int i; 2483 2484 if (mgp->running != MYRI10GE_ETH_RUNNING) 2485 return 0; 2486 2487 if (mgp->ss[0].tx.req_bytes == NULL) 2488 return 0; 2489 2490 del_timer_sync(&mgp->watchdog_timer); 2491 mgp->running = MYRI10GE_ETH_STOPPING; 2492 for (i = 0; i < mgp->num_slices; i++) 2493 napi_disable(&mgp->ss[i].napi); 2494 2495 netif_carrier_off(dev); 2496 2497 netif_tx_stop_all_queues(dev); 2498 if (mgp->rebooted == 0) { 2499 old_down_cnt = mgp->down_cnt; 2500 mb(); 2501 status = 2502 myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0); 2503 if (status) 2504 netdev_err(dev, "Couldn't bring down link\n"); 2505 2506 wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt, 2507 HZ); 2508 if (old_down_cnt == mgp->down_cnt) 2509 netdev_err(dev, "never got down irq\n"); 2510 } 2511 netif_tx_disable(dev); 2512 myri10ge_free_irq(mgp); 2513 for (i = 0; i < mgp->num_slices; i++) 2514 myri10ge_free_rings(&mgp->ss[i]); 2515 2516 mgp->running = MYRI10GE_ETH_STOPPED; 2517 return 0; 2518 } 2519 2520 /* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2521 * backwards one at a time and handle ring wraps */ 2522 2523 static inline void 2524 myri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx, 2525 struct mcp_kreq_ether_send *src, int cnt) 2526 { 2527 int idx, starting_slot; 2528 starting_slot = tx->req; 2529 while (cnt > 1) { 2530 cnt--; 2531 idx = (starting_slot + cnt) & tx->mask; 2532 myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 2533 mb(); 2534 } 2535 } 2536 2537 /* 2538 * copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2539 * at most 32 bytes at a time, so as to avoid involving the software 2540 * pio handler in the nic. We re-write the first segment's flags 2541 * to mark them valid only after writing the entire chain. 2542 */ 2543 2544 static inline void 2545 myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, 2546 int cnt) 2547 { 2548 int idx, i; 2549 struct mcp_kreq_ether_send __iomem *dstp, *dst; 2550 struct mcp_kreq_ether_send *srcp; 2551 u8 last_flags; 2552 2553 idx = tx->req & tx->mask; 2554 2555 last_flags = src->flags; 2556 src->flags = 0; 2557 mb(); 2558 dst = dstp = &tx->lanai[idx]; 2559 srcp = src; 2560 2561 if ((idx + cnt) < tx->mask) { 2562 for (i = 0; i < (cnt - 1); i += 2) { 2563 myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 2564 mb(); /* force write every 32 bytes */ 2565 srcp += 2; 2566 dstp += 2; 2567 } 2568 } else { 2569 /* submit all but the first request, and ensure 2570 * that it is submitted below */ 2571 myri10ge_submit_req_backwards(tx, src, cnt); 2572 i = 0; 2573 } 2574 if (i < cnt) { 2575 /* submit the first request */ 2576 myri10ge_pio_copy(dstp, srcp, sizeof(*src)); 2577 mb(); /* barrier before setting valid flag */ 2578 } 2579 2580 /* re-write the last 32-bits with the valid flags */ 2581 src->flags = last_flags; 2582 put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3); 2583 tx->req += cnt; 2584 mb(); 2585 } 2586 2587 static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp, 2588 struct myri10ge_tx_buf *tx, int idx) 2589 { 2590 unsigned int len; 2591 int last_idx; 2592 2593 /* Free any DMA resources we've alloced and clear out the skb slot */ 2594 last_idx = (idx + 1) & tx->mask; 2595 idx = tx->req & tx->mask; 2596 do { 2597 len = dma_unmap_len(&tx->info[idx], len); 2598 if (len) { 2599 if (tx->info[idx].skb != NULL) 2600 pci_unmap_single(mgp->pdev, 2601 dma_unmap_addr(&tx->info[idx], 2602 bus), len, 2603 PCI_DMA_TODEVICE); 2604 else 2605 pci_unmap_page(mgp->pdev, 2606 dma_unmap_addr(&tx->info[idx], 2607 bus), len, 2608 PCI_DMA_TODEVICE); 2609 dma_unmap_len_set(&tx->info[idx], len, 0); 2610 tx->info[idx].skb = NULL; 2611 } 2612 idx = (idx + 1) & tx->mask; 2613 } while (idx != last_idx); 2614 } 2615 2616 /* 2617 * Transmit a packet. We need to split the packet so that a single 2618 * segment does not cross myri10ge->tx_boundary, so this makes segment 2619 * counting tricky. So rather than try to count segments up front, we 2620 * just give up if there are too few segments to hold a reasonably 2621 * fragmented packet currently available. If we run 2622 * out of segments while preparing a packet for DMA, we just linearize 2623 * it and try again. 2624 */ 2625 2626 static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, 2627 struct net_device *dev) 2628 { 2629 struct myri10ge_priv *mgp = netdev_priv(dev); 2630 struct myri10ge_slice_state *ss; 2631 struct mcp_kreq_ether_send *req; 2632 struct myri10ge_tx_buf *tx; 2633 struct skb_frag_struct *frag; 2634 struct netdev_queue *netdev_queue; 2635 dma_addr_t bus; 2636 u32 low; 2637 __be32 high_swapped; 2638 unsigned int len; 2639 int idx, avail, frag_cnt, frag_idx, count, mss, max_segments; 2640 u16 pseudo_hdr_offset, cksum_offset, queue; 2641 int cum_len, seglen, boundary, rdma_count; 2642 u8 flags, odd_flag; 2643 2644 queue = skb_get_queue_mapping(skb); 2645 ss = &mgp->ss[queue]; 2646 netdev_queue = netdev_get_tx_queue(mgp->dev, queue); 2647 tx = &ss->tx; 2648 2649 again: 2650 req = tx->req_list; 2651 avail = tx->mask - 1 - (tx->req - tx->done); 2652 2653 mss = 0; 2654 max_segments = MXGEFW_MAX_SEND_DESC; 2655 2656 if (skb_is_gso(skb)) { 2657 mss = skb_shinfo(skb)->gso_size; 2658 max_segments = MYRI10GE_MAX_SEND_DESC_TSO; 2659 } 2660 2661 if ((unlikely(avail < max_segments))) { 2662 /* we are out of transmit resources */ 2663 tx->stop_queue++; 2664 netif_tx_stop_queue(netdev_queue); 2665 return NETDEV_TX_BUSY; 2666 } 2667 2668 /* Setup checksum offloading, if needed */ 2669 cksum_offset = 0; 2670 pseudo_hdr_offset = 0; 2671 odd_flag = 0; 2672 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 2673 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 2674 cksum_offset = skb_checksum_start_offset(skb); 2675 pseudo_hdr_offset = cksum_offset + skb->csum_offset; 2676 /* If the headers are excessively large, then we must 2677 * fall back to a software checksum */ 2678 if (unlikely(!mss && (cksum_offset > 255 || 2679 pseudo_hdr_offset > 127))) { 2680 if (skb_checksum_help(skb)) 2681 goto drop; 2682 cksum_offset = 0; 2683 pseudo_hdr_offset = 0; 2684 } else { 2685 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2686 flags |= MXGEFW_FLAGS_CKSUM; 2687 } 2688 } 2689 2690 cum_len = 0; 2691 2692 if (mss) { /* TSO */ 2693 /* this removes any CKSUM flag from before */ 2694 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 2695 2696 /* negative cum_len signifies to the 2697 * send loop that we are still in the 2698 * header portion of the TSO packet. 2699 * TSO header can be at most 1KB long */ 2700 cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); 2701 2702 /* for IPv6 TSO, the checksum offset stores the 2703 * TCP header length, to save the firmware from 2704 * the need to parse the headers */ 2705 if (skb_is_gso_v6(skb)) { 2706 cksum_offset = tcp_hdrlen(skb); 2707 /* Can only handle headers <= max_tso6 long */ 2708 if (unlikely(-cum_len > mgp->max_tso6)) 2709 return myri10ge_sw_tso(skb, dev); 2710 } 2711 /* for TSO, pseudo_hdr_offset holds mss. 2712 * The firmware figures out where to put 2713 * the checksum by parsing the header. */ 2714 pseudo_hdr_offset = mss; 2715 } else 2716 /* Mark small packets, and pad out tiny packets */ 2717 if (skb->len <= MXGEFW_SEND_SMALL_SIZE) { 2718 flags |= MXGEFW_FLAGS_SMALL; 2719 2720 /* pad frames to at least ETH_ZLEN bytes */ 2721 if (eth_skb_pad(skb)) { 2722 /* The packet is gone, so we must 2723 * return 0 */ 2724 ss->stats.tx_dropped += 1; 2725 return NETDEV_TX_OK; 2726 } 2727 } 2728 2729 /* map the skb for DMA */ 2730 len = skb_headlen(skb); 2731 bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); 2732 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) 2733 goto drop; 2734 2735 idx = tx->req & tx->mask; 2736 tx->info[idx].skb = skb; 2737 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2738 dma_unmap_len_set(&tx->info[idx], len, len); 2739 2740 frag_cnt = skb_shinfo(skb)->nr_frags; 2741 frag_idx = 0; 2742 count = 0; 2743 rdma_count = 0; 2744 2745 /* "rdma_count" is the number of RDMAs belonging to the 2746 * current packet BEFORE the current send request. For 2747 * non-TSO packets, this is equal to "count". 2748 * For TSO packets, rdma_count needs to be reset 2749 * to 0 after a segment cut. 2750 * 2751 * The rdma_count field of the send request is 2752 * the number of RDMAs of the packet starting at 2753 * that request. For TSO send requests with one ore more cuts 2754 * in the middle, this is the number of RDMAs starting 2755 * after the last cut in the request. All previous 2756 * segments before the last cut implicitly have 1 RDMA. 2757 * 2758 * Since the number of RDMAs is not known beforehand, 2759 * it must be filled-in retroactively - after each 2760 * segmentation cut or at the end of the entire packet. 2761 */ 2762 2763 while (1) { 2764 /* Break the SKB or Fragment up into pieces which 2765 * do not cross mgp->tx_boundary */ 2766 low = MYRI10GE_LOWPART_TO_U32(bus); 2767 high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus)); 2768 while (len) { 2769 u8 flags_next; 2770 int cum_len_next; 2771 2772 if (unlikely(count == max_segments)) 2773 goto abort_linearize; 2774 2775 boundary = 2776 (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1); 2777 seglen = boundary - low; 2778 if (seglen > len) 2779 seglen = len; 2780 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 2781 cum_len_next = cum_len + seglen; 2782 if (mss) { /* TSO */ 2783 (req - rdma_count)->rdma_count = rdma_count + 1; 2784 2785 if (likely(cum_len >= 0)) { /* payload */ 2786 int next_is_first, chop; 2787 2788 chop = (cum_len_next > mss); 2789 cum_len_next = cum_len_next % mss; 2790 next_is_first = (cum_len_next == 0); 2791 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 2792 flags_next |= next_is_first * 2793 MXGEFW_FLAGS_FIRST; 2794 rdma_count |= -(chop | next_is_first); 2795 rdma_count += chop & ~next_is_first; 2796 } else if (likely(cum_len_next >= 0)) { /* header ends */ 2797 int small; 2798 2799 rdma_count = -1; 2800 cum_len_next = 0; 2801 seglen = -cum_len; 2802 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 2803 flags_next = MXGEFW_FLAGS_TSO_PLD | 2804 MXGEFW_FLAGS_FIRST | 2805 (small * MXGEFW_FLAGS_SMALL); 2806 } 2807 } 2808 req->addr_high = high_swapped; 2809 req->addr_low = htonl(low); 2810 req->pseudo_hdr_offset = htons(pseudo_hdr_offset); 2811 req->pad = 0; /* complete solid 16-byte block; does this matter? */ 2812 req->rdma_count = 1; 2813 req->length = htons(seglen); 2814 req->cksum_offset = cksum_offset; 2815 req->flags = flags | ((cum_len & 1) * odd_flag); 2816 2817 low += seglen; 2818 len -= seglen; 2819 cum_len = cum_len_next; 2820 flags = flags_next; 2821 req++; 2822 count++; 2823 rdma_count++; 2824 if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) { 2825 if (unlikely(cksum_offset > seglen)) 2826 cksum_offset -= seglen; 2827 else 2828 cksum_offset = 0; 2829 } 2830 } 2831 if (frag_idx == frag_cnt) 2832 break; 2833 2834 /* map next fragment for DMA */ 2835 frag = &skb_shinfo(skb)->frags[frag_idx]; 2836 frag_idx++; 2837 len = skb_frag_size(frag); 2838 bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, 2839 DMA_TO_DEVICE); 2840 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { 2841 myri10ge_unmap_tx_dma(mgp, tx, idx); 2842 goto drop; 2843 } 2844 idx = (count + tx->req) & tx->mask; 2845 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2846 dma_unmap_len_set(&tx->info[idx], len, len); 2847 } 2848 2849 (req - rdma_count)->rdma_count = rdma_count; 2850 if (mss) 2851 do { 2852 req--; 2853 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2854 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 2855 MXGEFW_FLAGS_FIRST))); 2856 idx = ((count - 1) + tx->req) & tx->mask; 2857 tx->info[idx].last = 1; 2858 myri10ge_submit_req(tx, tx->req_list, count); 2859 /* if using multiple tx queues, make sure NIC polls the 2860 * current slice */ 2861 if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) { 2862 tx->queue_active = 1; 2863 put_be32(htonl(1), tx->send_go); 2864 mb(); 2865 mmiowb(); 2866 } 2867 tx->pkt_start++; 2868 if ((avail - count) < MXGEFW_MAX_SEND_DESC) { 2869 tx->stop_queue++; 2870 netif_tx_stop_queue(netdev_queue); 2871 } 2872 return NETDEV_TX_OK; 2873 2874 abort_linearize: 2875 myri10ge_unmap_tx_dma(mgp, tx, idx); 2876 2877 if (skb_is_gso(skb)) { 2878 netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); 2879 goto drop; 2880 } 2881 2882 if (skb_linearize(skb)) 2883 goto drop; 2884 2885 tx->linearized++; 2886 goto again; 2887 2888 drop: 2889 dev_kfree_skb_any(skb); 2890 ss->stats.tx_dropped += 1; 2891 return NETDEV_TX_OK; 2892 2893 } 2894 2895 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 2896 struct net_device *dev) 2897 { 2898 struct sk_buff *segs, *curr; 2899 struct myri10ge_priv *mgp = netdev_priv(dev); 2900 struct myri10ge_slice_state *ss; 2901 netdev_tx_t status; 2902 2903 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); 2904 if (IS_ERR(segs)) 2905 goto drop; 2906 2907 while (segs) { 2908 curr = segs; 2909 segs = segs->next; 2910 curr->next = NULL; 2911 status = myri10ge_xmit(curr, dev); 2912 if (status != 0) { 2913 dev_kfree_skb_any(curr); 2914 if (segs != NULL) { 2915 curr = segs; 2916 segs = segs->next; 2917 curr->next = NULL; 2918 dev_kfree_skb_any(segs); 2919 } 2920 goto drop; 2921 } 2922 } 2923 dev_kfree_skb_any(skb); 2924 return NETDEV_TX_OK; 2925 2926 drop: 2927 ss = &mgp->ss[skb_get_queue_mapping(skb)]; 2928 dev_kfree_skb_any(skb); 2929 ss->stats.tx_dropped += 1; 2930 return NETDEV_TX_OK; 2931 } 2932 2933 static void myri10ge_get_stats(struct net_device *dev, 2934 struct rtnl_link_stats64 *stats) 2935 { 2936 const struct myri10ge_priv *mgp = netdev_priv(dev); 2937 const struct myri10ge_slice_netstats *slice_stats; 2938 int i; 2939 2940 for (i = 0; i < mgp->num_slices; i++) { 2941 slice_stats = &mgp->ss[i].stats; 2942 stats->rx_packets += slice_stats->rx_packets; 2943 stats->tx_packets += slice_stats->tx_packets; 2944 stats->rx_bytes += slice_stats->rx_bytes; 2945 stats->tx_bytes += slice_stats->tx_bytes; 2946 stats->rx_dropped += slice_stats->rx_dropped; 2947 stats->tx_dropped += slice_stats->tx_dropped; 2948 } 2949 } 2950 2951 static void myri10ge_set_multicast_list(struct net_device *dev) 2952 { 2953 struct myri10ge_priv *mgp = netdev_priv(dev); 2954 struct myri10ge_cmd cmd; 2955 struct netdev_hw_addr *ha; 2956 __be32 data[2] = { 0, 0 }; 2957 int err; 2958 2959 /* can be called from atomic contexts, 2960 * pass 1 to force atomicity in myri10ge_send_cmd() */ 2961 myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1); 2962 2963 /* This firmware is known to not support multicast */ 2964 if (!mgp->fw_multicast_support) 2965 return; 2966 2967 /* Disable multicast filtering */ 2968 2969 err = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_ALLMULTI, &cmd, 1); 2970 if (err != 0) { 2971 netdev_err(dev, "Failed MXGEFW_ENABLE_ALLMULTI, error status: %d\n", 2972 err); 2973 goto abort; 2974 } 2975 2976 if ((dev->flags & IFF_ALLMULTI) || mgp->adopted_rx_filter_bug) { 2977 /* request to disable multicast filtering, so quit here */ 2978 return; 2979 } 2980 2981 /* Flush the filters */ 2982 2983 err = myri10ge_send_cmd(mgp, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, 2984 &cmd, 1); 2985 if (err != 0) { 2986 netdev_err(dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, error status: %d\n", 2987 err); 2988 goto abort; 2989 } 2990 2991 /* Walk the multicast list, and add each address */ 2992 netdev_for_each_mc_addr(ha, dev) { 2993 memcpy(data, &ha->addr, ETH_ALEN); 2994 cmd.data0 = ntohl(data[0]); 2995 cmd.data1 = ntohl(data[1]); 2996 err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP, 2997 &cmd, 1); 2998 2999 if (err != 0) { 3000 netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n", 3001 err, ha->addr); 3002 goto abort; 3003 } 3004 } 3005 /* Enable multicast filtering */ 3006 err = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_ALLMULTI, &cmd, 1); 3007 if (err != 0) { 3008 netdev_err(dev, "Failed MXGEFW_DISABLE_ALLMULTI, error status: %d\n", 3009 err); 3010 goto abort; 3011 } 3012 3013 return; 3014 3015 abort: 3016 return; 3017 } 3018 3019 static int myri10ge_set_mac_address(struct net_device *dev, void *addr) 3020 { 3021 struct sockaddr *sa = addr; 3022 struct myri10ge_priv *mgp = netdev_priv(dev); 3023 int status; 3024 3025 if (!is_valid_ether_addr(sa->sa_data)) 3026 return -EADDRNOTAVAIL; 3027 3028 status = myri10ge_update_mac_address(mgp, sa->sa_data); 3029 if (status != 0) { 3030 netdev_err(dev, "changing mac address failed with %d\n", 3031 status); 3032 return status; 3033 } 3034 3035 /* change the dev structure */ 3036 memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN); 3037 return 0; 3038 } 3039 3040 static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) 3041 { 3042 struct myri10ge_priv *mgp = netdev_priv(dev); 3043 int error = 0; 3044 3045 netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); 3046 if (mgp->running) { 3047 /* if we change the mtu on an active device, we must 3048 * reset the device so the firmware sees the change */ 3049 myri10ge_close(dev); 3050 dev->mtu = new_mtu; 3051 myri10ge_open(dev); 3052 } else 3053 dev->mtu = new_mtu; 3054 3055 return error; 3056 } 3057 3058 /* 3059 * Enable ECRC to align PCI-E Completion packets on an 8-byte boundary. 3060 * Only do it if the bridge is a root port since we don't want to disturb 3061 * any other device, except if forced with myri10ge_ecrc_enable > 1. 3062 */ 3063 3064 static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) 3065 { 3066 struct pci_dev *bridge = mgp->pdev->bus->self; 3067 struct device *dev = &mgp->pdev->dev; 3068 int cap; 3069 unsigned err_cap; 3070 int ret; 3071 3072 if (!myri10ge_ecrc_enable || !bridge) 3073 return; 3074 3075 /* check that the bridge is a root port */ 3076 if (pci_pcie_type(bridge) != PCI_EXP_TYPE_ROOT_PORT) { 3077 if (myri10ge_ecrc_enable > 1) { 3078 struct pci_dev *prev_bridge, *old_bridge = bridge; 3079 3080 /* Walk the hierarchy up to the root port 3081 * where ECRC has to be enabled */ 3082 do { 3083 prev_bridge = bridge; 3084 bridge = bridge->bus->self; 3085 if (!bridge || prev_bridge == bridge) { 3086 dev_err(dev, 3087 "Failed to find root port" 3088 " to force ECRC\n"); 3089 return; 3090 } 3091 } while (pci_pcie_type(bridge) != 3092 PCI_EXP_TYPE_ROOT_PORT); 3093 3094 dev_info(dev, 3095 "Forcing ECRC on non-root port %s" 3096 " (enabling on root port %s)\n", 3097 pci_name(old_bridge), pci_name(bridge)); 3098 } else { 3099 dev_err(dev, 3100 "Not enabling ECRC on non-root port %s\n", 3101 pci_name(bridge)); 3102 return; 3103 } 3104 } 3105 3106 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3107 if (!cap) 3108 return; 3109 3110 ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap); 3111 if (ret) { 3112 dev_err(dev, "failed reading ext-conf-space of %s\n", 3113 pci_name(bridge)); 3114 dev_err(dev, "\t pci=nommconf in use? " 3115 "or buggy/incomplete/absent ACPI MCFG attr?\n"); 3116 return; 3117 } 3118 if (!(err_cap & PCI_ERR_CAP_ECRC_GENC)) 3119 return; 3120 3121 err_cap |= PCI_ERR_CAP_ECRC_GENE; 3122 pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap); 3123 dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge)); 3124 } 3125 3126 /* 3127 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 3128 * when the PCI-E Completion packets are aligned on an 8-byte 3129 * boundary. Some PCI-E chip sets always align Completion packets; on 3130 * the ones that do not, the alignment can be enforced by enabling 3131 * ECRC generation (if supported). 3132 * 3133 * When PCI-E Completion packets are not aligned, it is actually more 3134 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 3135 * 3136 * If the driver can neither enable ECRC nor verify that it has 3137 * already been enabled, then it must use a firmware image which works 3138 * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it 3139 * should also ensure that it never gives the device a Read-DMA which is 3140 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 3141 * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat) 3142 * firmware image, and set tx_boundary to 4KB. 3143 */ 3144 3145 static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) 3146 { 3147 struct pci_dev *pdev = mgp->pdev; 3148 struct device *dev = &pdev->dev; 3149 int status; 3150 3151 mgp->tx_boundary = 4096; 3152 /* 3153 * Verify the max read request size was set to 4KB 3154 * before trying the test with 4KB. 3155 */ 3156 status = pcie_get_readrq(pdev); 3157 if (status < 0) { 3158 dev_err(dev, "Couldn't read max read req size: %d\n", status); 3159 goto abort; 3160 } 3161 if (status != 4096) { 3162 dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status); 3163 mgp->tx_boundary = 2048; 3164 } 3165 /* 3166 * load the optimized firmware (which assumes aligned PCIe 3167 * completions) in order to see if it works on this host. 3168 */ 3169 set_fw_name(mgp, myri10ge_fw_aligned, false); 3170 status = myri10ge_load_firmware(mgp, 1); 3171 if (status != 0) { 3172 goto abort; 3173 } 3174 3175 /* 3176 * Enable ECRC if possible 3177 */ 3178 myri10ge_enable_ecrc(mgp); 3179 3180 /* 3181 * Run a DMA test which watches for unaligned completions and 3182 * aborts on the first one seen. 3183 */ 3184 3185 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 3186 if (status == 0) 3187 return; /* keep the aligned firmware */ 3188 3189 if (status != -E2BIG) 3190 dev_warn(dev, "DMA test failed: %d\n", status); 3191 if (status == -ENOSYS) 3192 dev_warn(dev, "Falling back to ethp! " 3193 "Please install up to date fw\n"); 3194 abort: 3195 /* fall back to using the unaligned firmware */ 3196 mgp->tx_boundary = 2048; 3197 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3198 } 3199 3200 static void myri10ge_select_firmware(struct myri10ge_priv *mgp) 3201 { 3202 int overridden = 0; 3203 3204 if (myri10ge_force_firmware == 0) { 3205 int link_width; 3206 u16 lnk; 3207 3208 pcie_capability_read_word(mgp->pdev, PCI_EXP_LNKSTA, &lnk); 3209 link_width = (lnk >> 4) & 0x3f; 3210 3211 /* Check to see if Link is less than 8 or if the 3212 * upstream bridge is known to provide aligned 3213 * completions */ 3214 if (link_width < 8) { 3215 dev_info(&mgp->pdev->dev, "PCIE x%d Link\n", 3216 link_width); 3217 mgp->tx_boundary = 4096; 3218 set_fw_name(mgp, myri10ge_fw_aligned, false); 3219 } else { 3220 myri10ge_firmware_probe(mgp); 3221 } 3222 } else { 3223 if (myri10ge_force_firmware == 1) { 3224 dev_info(&mgp->pdev->dev, 3225 "Assuming aligned completions (forced)\n"); 3226 mgp->tx_boundary = 4096; 3227 set_fw_name(mgp, myri10ge_fw_aligned, false); 3228 } else { 3229 dev_info(&mgp->pdev->dev, 3230 "Assuming unaligned completions (forced)\n"); 3231 mgp->tx_boundary = 2048; 3232 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3233 } 3234 } 3235 3236 kernel_param_lock(THIS_MODULE); 3237 if (myri10ge_fw_name != NULL) { 3238 char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); 3239 if (fw_name) { 3240 overridden = 1; 3241 set_fw_name(mgp, fw_name, true); 3242 } 3243 } 3244 kernel_param_unlock(THIS_MODULE); 3245 3246 if (mgp->board_number < MYRI10GE_MAX_BOARDS && 3247 myri10ge_fw_names[mgp->board_number] != NULL && 3248 strlen(myri10ge_fw_names[mgp->board_number])) { 3249 set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false); 3250 overridden = 1; 3251 } 3252 if (overridden) 3253 dev_info(&mgp->pdev->dev, "overriding firmware to %s\n", 3254 mgp->fw_name); 3255 } 3256 3257 static void myri10ge_mask_surprise_down(struct pci_dev *pdev) 3258 { 3259 struct pci_dev *bridge = pdev->bus->self; 3260 int cap; 3261 u32 mask; 3262 3263 if (bridge == NULL) 3264 return; 3265 3266 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3267 if (cap) { 3268 /* a sram parity error can cause a surprise link 3269 * down; since we expect and can recover from sram 3270 * parity errors, mask surprise link down events */ 3271 pci_read_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, &mask); 3272 mask |= 0x20; 3273 pci_write_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, mask); 3274 } 3275 } 3276 3277 #ifdef CONFIG_PM 3278 static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state) 3279 { 3280 struct myri10ge_priv *mgp; 3281 struct net_device *netdev; 3282 3283 mgp = pci_get_drvdata(pdev); 3284 if (mgp == NULL) 3285 return -EINVAL; 3286 netdev = mgp->dev; 3287 3288 netif_device_detach(netdev); 3289 if (netif_running(netdev)) { 3290 netdev_info(netdev, "closing\n"); 3291 rtnl_lock(); 3292 myri10ge_close(netdev); 3293 rtnl_unlock(); 3294 } 3295 myri10ge_dummy_rdma(mgp, 0); 3296 pci_save_state(pdev); 3297 pci_disable_device(pdev); 3298 3299 return pci_set_power_state(pdev, pci_choose_state(pdev, state)); 3300 } 3301 3302 static int myri10ge_resume(struct pci_dev *pdev) 3303 { 3304 struct myri10ge_priv *mgp; 3305 struct net_device *netdev; 3306 int status; 3307 u16 vendor; 3308 3309 mgp = pci_get_drvdata(pdev); 3310 if (mgp == NULL) 3311 return -EINVAL; 3312 netdev = mgp->dev; 3313 pci_set_power_state(pdev, PCI_D0); /* zeros conf space as a side effect */ 3314 msleep(5); /* give card time to respond */ 3315 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3316 if (vendor == 0xffff) { 3317 netdev_err(mgp->dev, "device disappeared!\n"); 3318 return -EIO; 3319 } 3320 3321 pci_restore_state(pdev); 3322 3323 status = pci_enable_device(pdev); 3324 if (status) { 3325 dev_err(&pdev->dev, "failed to enable device\n"); 3326 return status; 3327 } 3328 3329 pci_set_master(pdev); 3330 3331 myri10ge_reset(mgp); 3332 myri10ge_dummy_rdma(mgp, 1); 3333 3334 /* Save configuration space to be restored if the 3335 * nic resets due to a parity error */ 3336 pci_save_state(pdev); 3337 3338 if (netif_running(netdev)) { 3339 rtnl_lock(); 3340 status = myri10ge_open(netdev); 3341 rtnl_unlock(); 3342 if (status != 0) 3343 goto abort_with_enabled; 3344 3345 } 3346 netif_device_attach(netdev); 3347 3348 return 0; 3349 3350 abort_with_enabled: 3351 pci_disable_device(pdev); 3352 return -EIO; 3353 3354 } 3355 #endif /* CONFIG_PM */ 3356 3357 static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp) 3358 { 3359 struct pci_dev *pdev = mgp->pdev; 3360 int vs = mgp->vendor_specific_offset; 3361 u32 reboot; 3362 3363 /*enter read32 mode */ 3364 pci_write_config_byte(pdev, vs + 0x10, 0x3); 3365 3366 /*read REBOOT_STATUS (0xfffffff0) */ 3367 pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0); 3368 pci_read_config_dword(pdev, vs + 0x14, &reboot); 3369 return reboot; 3370 } 3371 3372 static void 3373 myri10ge_check_slice(struct myri10ge_slice_state *ss, int *reset_needed, 3374 int *busy_slice_cnt, u32 rx_pause_cnt) 3375 { 3376 struct myri10ge_priv *mgp = ss->mgp; 3377 int slice = ss - mgp->ss; 3378 3379 if (ss->tx.req != ss->tx.done && 3380 ss->tx.done == ss->watchdog_tx_done && 3381 ss->watchdog_tx_req != ss->watchdog_tx_done) { 3382 /* nic seems like it might be stuck.. */ 3383 if (rx_pause_cnt != mgp->watchdog_pause) { 3384 if (net_ratelimit()) 3385 netdev_warn(mgp->dev, "slice %d: TX paused, " 3386 "check link partner\n", slice); 3387 } else { 3388 netdev_warn(mgp->dev, 3389 "slice %d: TX stuck %d %d %d %d %d %d\n", 3390 slice, ss->tx.queue_active, ss->tx.req, 3391 ss->tx.done, ss->tx.pkt_start, 3392 ss->tx.pkt_done, 3393 (int)ntohl(mgp->ss[slice].fw_stats-> 3394 send_done_count)); 3395 *reset_needed = 1; 3396 ss->stuck = 1; 3397 } 3398 } 3399 if (ss->watchdog_tx_done != ss->tx.done || 3400 ss->watchdog_rx_done != ss->rx_done.cnt) { 3401 *busy_slice_cnt += 1; 3402 } 3403 ss->watchdog_tx_done = ss->tx.done; 3404 ss->watchdog_tx_req = ss->tx.req; 3405 ss->watchdog_rx_done = ss->rx_done.cnt; 3406 } 3407 3408 /* 3409 * This watchdog is used to check whether the board has suffered 3410 * from a parity error and needs to be recovered. 3411 */ 3412 static void myri10ge_watchdog(struct work_struct *work) 3413 { 3414 struct myri10ge_priv *mgp = 3415 container_of(work, struct myri10ge_priv, watchdog_work); 3416 struct myri10ge_slice_state *ss; 3417 u32 reboot, rx_pause_cnt; 3418 int status, rebooted; 3419 int i; 3420 int reset_needed = 0; 3421 int busy_slice_cnt = 0; 3422 u16 cmd, vendor; 3423 3424 mgp->watchdog_resets++; 3425 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3426 rebooted = 0; 3427 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3428 /* Bus master DMA disabled? Check to see 3429 * if the card rebooted due to a parity error 3430 * For now, just report it */ 3431 reboot = myri10ge_read_reboot(mgp); 3432 netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n", 3433 reboot, myri10ge_reset_recover ? "" : " not"); 3434 if (myri10ge_reset_recover == 0) 3435 return; 3436 rtnl_lock(); 3437 mgp->rebooted = 1; 3438 rebooted = 1; 3439 myri10ge_close(mgp->dev); 3440 myri10ge_reset_recover--; 3441 mgp->rebooted = 0; 3442 /* 3443 * A rebooted nic will come back with config space as 3444 * it was after power was applied to PCIe bus. 3445 * Attempt to restore config space which was saved 3446 * when the driver was loaded, or the last time the 3447 * nic was resumed from power saving mode. 3448 */ 3449 pci_restore_state(mgp->pdev); 3450 3451 /* save state again for accounting reasons */ 3452 pci_save_state(mgp->pdev); 3453 3454 } else { 3455 /* if we get back -1's from our slot, perhaps somebody 3456 * powered off our card. Don't try to reset it in 3457 * this case */ 3458 if (cmd == 0xffff) { 3459 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3460 if (vendor == 0xffff) { 3461 netdev_err(mgp->dev, "device disappeared!\n"); 3462 return; 3463 } 3464 } 3465 /* Perhaps it is a software error. See if stuck slice 3466 * has recovered, reset if not */ 3467 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3468 for (i = 0; i < mgp->num_slices; i++) { 3469 ss = mgp->ss; 3470 if (ss->stuck) { 3471 myri10ge_check_slice(ss, &reset_needed, 3472 &busy_slice_cnt, 3473 rx_pause_cnt); 3474 ss->stuck = 0; 3475 } 3476 } 3477 if (!reset_needed) { 3478 netdev_dbg(mgp->dev, "not resetting\n"); 3479 return; 3480 } 3481 3482 netdev_err(mgp->dev, "device timeout, resetting\n"); 3483 } 3484 3485 if (!rebooted) { 3486 rtnl_lock(); 3487 myri10ge_close(mgp->dev); 3488 } 3489 status = myri10ge_load_firmware(mgp, 1); 3490 if (status != 0) 3491 netdev_err(mgp->dev, "failed to load firmware\n"); 3492 else 3493 myri10ge_open(mgp->dev); 3494 rtnl_unlock(); 3495 } 3496 3497 /* 3498 * We use our own timer routine rather than relying upon 3499 * netdev->tx_timeout because we have a very large hardware transmit 3500 * queue. Due to the large queue, the netdev->tx_timeout function 3501 * cannot detect a NIC with a parity error in a timely fashion if the 3502 * NIC is lightly loaded. 3503 */ 3504 static void myri10ge_watchdog_timer(struct timer_list *t) 3505 { 3506 struct myri10ge_priv *mgp; 3507 struct myri10ge_slice_state *ss; 3508 int i, reset_needed, busy_slice_cnt; 3509 u32 rx_pause_cnt; 3510 u16 cmd; 3511 3512 mgp = from_timer(mgp, t, watchdog_timer); 3513 3514 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3515 busy_slice_cnt = 0; 3516 for (i = 0, reset_needed = 0; 3517 i < mgp->num_slices && reset_needed == 0; ++i) { 3518 3519 ss = &mgp->ss[i]; 3520 if (ss->rx_small.watchdog_needed) { 3521 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 3522 mgp->small_bytes + MXGEFW_PAD, 3523 1); 3524 if (ss->rx_small.fill_cnt - ss->rx_small.cnt >= 3525 myri10ge_fill_thresh) 3526 ss->rx_small.watchdog_needed = 0; 3527 } 3528 if (ss->rx_big.watchdog_needed) { 3529 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, 3530 mgp->big_bytes, 1); 3531 if (ss->rx_big.fill_cnt - ss->rx_big.cnt >= 3532 myri10ge_fill_thresh) 3533 ss->rx_big.watchdog_needed = 0; 3534 } 3535 myri10ge_check_slice(ss, &reset_needed, &busy_slice_cnt, 3536 rx_pause_cnt); 3537 } 3538 /* if we've sent or received no traffic, poll the NIC to 3539 * ensure it is still there. Otherwise, we risk not noticing 3540 * an error in a timely fashion */ 3541 if (busy_slice_cnt == 0) { 3542 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3543 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3544 reset_needed = 1; 3545 } 3546 } 3547 mgp->watchdog_pause = rx_pause_cnt; 3548 3549 if (reset_needed) { 3550 schedule_work(&mgp->watchdog_work); 3551 } else { 3552 /* rearm timer */ 3553 mod_timer(&mgp->watchdog_timer, 3554 jiffies + myri10ge_watchdog_timeout * HZ); 3555 } 3556 } 3557 3558 static void myri10ge_free_slices(struct myri10ge_priv *mgp) 3559 { 3560 struct myri10ge_slice_state *ss; 3561 struct pci_dev *pdev = mgp->pdev; 3562 size_t bytes; 3563 int i; 3564 3565 if (mgp->ss == NULL) 3566 return; 3567 3568 for (i = 0; i < mgp->num_slices; i++) { 3569 ss = &mgp->ss[i]; 3570 if (ss->rx_done.entry != NULL) { 3571 bytes = mgp->max_intr_slots * 3572 sizeof(*ss->rx_done.entry); 3573 dma_free_coherent(&pdev->dev, bytes, 3574 ss->rx_done.entry, ss->rx_done.bus); 3575 ss->rx_done.entry = NULL; 3576 } 3577 if (ss->fw_stats != NULL) { 3578 bytes = sizeof(*ss->fw_stats); 3579 dma_free_coherent(&pdev->dev, bytes, 3580 ss->fw_stats, ss->fw_stats_bus); 3581 ss->fw_stats = NULL; 3582 } 3583 napi_hash_del(&ss->napi); 3584 netif_napi_del(&ss->napi); 3585 } 3586 /* Wait till napi structs are no longer used, and then free ss. */ 3587 synchronize_rcu(); 3588 kfree(mgp->ss); 3589 mgp->ss = NULL; 3590 } 3591 3592 static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) 3593 { 3594 struct myri10ge_slice_state *ss; 3595 struct pci_dev *pdev = mgp->pdev; 3596 size_t bytes; 3597 int i; 3598 3599 bytes = sizeof(*mgp->ss) * mgp->num_slices; 3600 mgp->ss = kzalloc(bytes, GFP_KERNEL); 3601 if (mgp->ss == NULL) { 3602 return -ENOMEM; 3603 } 3604 3605 for (i = 0; i < mgp->num_slices; i++) { 3606 ss = &mgp->ss[i]; 3607 bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); 3608 ss->rx_done.entry = dma_zalloc_coherent(&pdev->dev, bytes, 3609 &ss->rx_done.bus, 3610 GFP_KERNEL); 3611 if (ss->rx_done.entry == NULL) 3612 goto abort; 3613 bytes = sizeof(*ss->fw_stats); 3614 ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes, 3615 &ss->fw_stats_bus, 3616 GFP_KERNEL); 3617 if (ss->fw_stats == NULL) 3618 goto abort; 3619 ss->mgp = mgp; 3620 ss->dev = mgp->dev; 3621 netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, 3622 myri10ge_napi_weight); 3623 } 3624 return 0; 3625 abort: 3626 myri10ge_free_slices(mgp); 3627 return -ENOMEM; 3628 } 3629 3630 /* 3631 * This function determines the number of slices supported. 3632 * The number slices is the minimum of the number of CPUS, 3633 * the number of MSI-X irqs supported, the number of slices 3634 * supported by the firmware 3635 */ 3636 static void myri10ge_probe_slices(struct myri10ge_priv *mgp) 3637 { 3638 struct myri10ge_cmd cmd; 3639 struct pci_dev *pdev = mgp->pdev; 3640 char *old_fw; 3641 bool old_allocated; 3642 int i, status, ncpus; 3643 3644 mgp->num_slices = 1; 3645 ncpus = netif_get_num_default_rss_queues(); 3646 3647 if (myri10ge_max_slices == 1 || !pdev->msix_cap || 3648 (myri10ge_max_slices == -1 && ncpus < 2)) 3649 return; 3650 3651 /* try to load the slice aware rss firmware */ 3652 old_fw = mgp->fw_name; 3653 old_allocated = mgp->fw_name_allocated; 3654 /* don't free old_fw if we override it. */ 3655 mgp->fw_name_allocated = false; 3656 3657 if (myri10ge_fw_name != NULL) { 3658 dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n", 3659 myri10ge_fw_name); 3660 set_fw_name(mgp, myri10ge_fw_name, false); 3661 } else if (old_fw == myri10ge_fw_aligned) 3662 set_fw_name(mgp, myri10ge_fw_rss_aligned, false); 3663 else 3664 set_fw_name(mgp, myri10ge_fw_rss_unaligned, false); 3665 status = myri10ge_load_firmware(mgp, 0); 3666 if (status != 0) { 3667 dev_info(&pdev->dev, "Rss firmware not found\n"); 3668 if (old_allocated) 3669 kfree(old_fw); 3670 return; 3671 } 3672 3673 /* hit the board with a reset to ensure it is alive */ 3674 memset(&cmd, 0, sizeof(cmd)); 3675 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 3676 if (status != 0) { 3677 dev_err(&mgp->pdev->dev, "failed reset\n"); 3678 goto abort_with_fw; 3679 } 3680 3681 mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot); 3682 3683 /* tell it the size of the interrupt queues */ 3684 cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot); 3685 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 3686 if (status != 0) { 3687 dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3688 goto abort_with_fw; 3689 } 3690 3691 /* ask the maximum number of slices it supports */ 3692 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0); 3693 if (status != 0) 3694 goto abort_with_fw; 3695 else 3696 mgp->num_slices = cmd.data0; 3697 3698 /* Only allow multiple slices if MSI-X is usable */ 3699 if (!myri10ge_msi) { 3700 goto abort_with_fw; 3701 } 3702 3703 /* if the admin did not specify a limit to how many 3704 * slices we should use, cap it automatically to the 3705 * number of CPUs currently online */ 3706 if (myri10ge_max_slices == -1) 3707 myri10ge_max_slices = ncpus; 3708 3709 if (mgp->num_slices > myri10ge_max_slices) 3710 mgp->num_slices = myri10ge_max_slices; 3711 3712 /* Now try to allocate as many MSI-X vectors as we have 3713 * slices. We give up on MSI-X if we can only get a single 3714 * vector. */ 3715 3716 mgp->msix_vectors = kcalloc(mgp->num_slices, sizeof(*mgp->msix_vectors), 3717 GFP_KERNEL); 3718 if (mgp->msix_vectors == NULL) 3719 goto no_msix; 3720 for (i = 0; i < mgp->num_slices; i++) { 3721 mgp->msix_vectors[i].entry = i; 3722 } 3723 3724 while (mgp->num_slices > 1) { 3725 mgp->num_slices = rounddown_pow_of_two(mgp->num_slices); 3726 if (mgp->num_slices == 1) 3727 goto no_msix; 3728 status = pci_enable_msix_range(pdev, 3729 mgp->msix_vectors, 3730 mgp->num_slices, 3731 mgp->num_slices); 3732 if (status < 0) 3733 goto no_msix; 3734 3735 pci_disable_msix(pdev); 3736 3737 if (status == mgp->num_slices) { 3738 if (old_allocated) 3739 kfree(old_fw); 3740 return; 3741 } else { 3742 mgp->num_slices = status; 3743 } 3744 } 3745 3746 no_msix: 3747 if (mgp->msix_vectors != NULL) { 3748 kfree(mgp->msix_vectors); 3749 mgp->msix_vectors = NULL; 3750 } 3751 3752 abort_with_fw: 3753 mgp->num_slices = 1; 3754 set_fw_name(mgp, old_fw, old_allocated); 3755 myri10ge_load_firmware(mgp, 0); 3756 } 3757 3758 static const struct net_device_ops myri10ge_netdev_ops = { 3759 .ndo_open = myri10ge_open, 3760 .ndo_stop = myri10ge_close, 3761 .ndo_start_xmit = myri10ge_xmit, 3762 .ndo_get_stats64 = myri10ge_get_stats, 3763 .ndo_validate_addr = eth_validate_addr, 3764 .ndo_change_mtu = myri10ge_change_mtu, 3765 .ndo_set_rx_mode = myri10ge_set_multicast_list, 3766 .ndo_set_mac_address = myri10ge_set_mac_address, 3767 }; 3768 3769 static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 3770 { 3771 struct net_device *netdev; 3772 struct myri10ge_priv *mgp; 3773 struct device *dev = &pdev->dev; 3774 int i; 3775 int status = -ENXIO; 3776 int dac_enabled; 3777 unsigned hdr_offset, ss_offset; 3778 static int board_number; 3779 3780 netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES); 3781 if (netdev == NULL) 3782 return -ENOMEM; 3783 3784 SET_NETDEV_DEV(netdev, &pdev->dev); 3785 3786 mgp = netdev_priv(netdev); 3787 mgp->dev = netdev; 3788 mgp->pdev = pdev; 3789 mgp->pause = myri10ge_flow_control; 3790 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 3791 mgp->msg_enable = netif_msg_init(myri10ge_debug, MYRI10GE_MSG_DEFAULT); 3792 mgp->board_number = board_number; 3793 init_waitqueue_head(&mgp->down_wq); 3794 3795 if (pci_enable_device(pdev)) { 3796 dev_err(&pdev->dev, "pci_enable_device call failed\n"); 3797 status = -ENODEV; 3798 goto abort_with_netdev; 3799 } 3800 3801 /* Find the vendor-specific cap so we can check 3802 * the reboot register later on */ 3803 mgp->vendor_specific_offset 3804 = pci_find_capability(pdev, PCI_CAP_ID_VNDR); 3805 3806 /* Set our max read request to 4KB */ 3807 status = pcie_set_readrq(pdev, 4096); 3808 if (status != 0) { 3809 dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n", 3810 status); 3811 goto abort_with_enabled; 3812 } 3813 3814 myri10ge_mask_surprise_down(pdev); 3815 pci_set_master(pdev); 3816 dac_enabled = 1; 3817 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3818 if (status != 0) { 3819 dac_enabled = 0; 3820 dev_err(&pdev->dev, 3821 "64-bit pci address mask was refused, " 3822 "trying 32-bit\n"); 3823 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3824 } 3825 if (status != 0) { 3826 dev_err(&pdev->dev, "Error %d setting DMA mask\n", status); 3827 goto abort_with_enabled; 3828 } 3829 (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3830 mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd), 3831 &mgp->cmd_bus, GFP_KERNEL); 3832 if (!mgp->cmd) { 3833 status = -ENOMEM; 3834 goto abort_with_enabled; 3835 } 3836 3837 mgp->board_span = pci_resource_len(pdev, 0); 3838 mgp->iomem_base = pci_resource_start(pdev, 0); 3839 mgp->wc_cookie = arch_phys_wc_add(mgp->iomem_base, mgp->board_span); 3840 mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span); 3841 if (mgp->sram == NULL) { 3842 dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n", 3843 mgp->board_span, mgp->iomem_base); 3844 status = -ENXIO; 3845 goto abort_with_mtrr; 3846 } 3847 hdr_offset = 3848 swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)) & 0xffffc; 3849 ss_offset = hdr_offset + offsetof(struct mcp_gen_header, string_specs); 3850 mgp->sram_size = swab32(readl(mgp->sram + ss_offset)); 3851 if (mgp->sram_size > mgp->board_span || 3852 mgp->sram_size <= MYRI10GE_FW_OFFSET) { 3853 dev_err(&pdev->dev, 3854 "invalid sram_size %dB or board span %ldB\n", 3855 mgp->sram_size, mgp->board_span); 3856 goto abort_with_ioremap; 3857 } 3858 memcpy_fromio(mgp->eeprom_strings, 3859 mgp->sram + mgp->sram_size, MYRI10GE_EEPROM_STRINGS_SIZE); 3860 memset(mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 3861 status = myri10ge_read_mac_addr(mgp); 3862 if (status) 3863 goto abort_with_ioremap; 3864 3865 for (i = 0; i < ETH_ALEN; i++) 3866 netdev->dev_addr[i] = mgp->mac_addr[i]; 3867 3868 myri10ge_select_firmware(mgp); 3869 3870 status = myri10ge_load_firmware(mgp, 1); 3871 if (status != 0) { 3872 dev_err(&pdev->dev, "failed to load firmware\n"); 3873 goto abort_with_ioremap; 3874 } 3875 myri10ge_probe_slices(mgp); 3876 status = myri10ge_alloc_slices(mgp); 3877 if (status != 0) { 3878 dev_err(&pdev->dev, "failed to alloc slice state\n"); 3879 goto abort_with_firmware; 3880 } 3881 netif_set_real_num_tx_queues(netdev, mgp->num_slices); 3882 netif_set_real_num_rx_queues(netdev, mgp->num_slices); 3883 status = myri10ge_reset(mgp); 3884 if (status != 0) { 3885 dev_err(&pdev->dev, "failed reset\n"); 3886 goto abort_with_slices; 3887 } 3888 #ifdef CONFIG_MYRI10GE_DCA 3889 myri10ge_setup_dca(mgp); 3890 #endif 3891 pci_set_drvdata(pdev, mgp); 3892 3893 /* MTU range: 68 - 9000 */ 3894 netdev->min_mtu = ETH_MIN_MTU; 3895 netdev->max_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 3896 3897 if (myri10ge_initial_mtu > netdev->max_mtu) 3898 myri10ge_initial_mtu = netdev->max_mtu; 3899 if (myri10ge_initial_mtu < netdev->min_mtu) 3900 myri10ge_initial_mtu = netdev->min_mtu; 3901 3902 netdev->mtu = myri10ge_initial_mtu; 3903 3904 netdev->netdev_ops = &myri10ge_netdev_ops; 3905 netdev->hw_features = mgp->features | NETIF_F_RXCSUM; 3906 3907 /* fake NETIF_F_HW_VLAN_CTAG_RX for good GRO performance */ 3908 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 3909 3910 netdev->features = netdev->hw_features; 3911 3912 if (dac_enabled) 3913 netdev->features |= NETIF_F_HIGHDMA; 3914 3915 netdev->vlan_features |= mgp->features; 3916 if (mgp->fw_ver_tiny < 37) 3917 netdev->vlan_features &= ~NETIF_F_TSO6; 3918 if (mgp->fw_ver_tiny < 32) 3919 netdev->vlan_features &= ~NETIF_F_TSO; 3920 3921 /* make sure we can get an irq, and that MSI can be 3922 * setup (if available). */ 3923 status = myri10ge_request_irq(mgp); 3924 if (status != 0) 3925 goto abort_with_firmware; 3926 myri10ge_free_irq(mgp); 3927 3928 /* Save configuration space to be restored if the 3929 * nic resets due to a parity error */ 3930 pci_save_state(pdev); 3931 3932 /* Setup the watchdog timer */ 3933 timer_setup(&mgp->watchdog_timer, myri10ge_watchdog_timer, 0); 3934 3935 netdev->ethtool_ops = &myri10ge_ethtool_ops; 3936 INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog); 3937 status = register_netdev(netdev); 3938 if (status != 0) { 3939 dev_err(&pdev->dev, "register_netdev failed: %d\n", status); 3940 goto abort_with_state; 3941 } 3942 if (mgp->msix_enabled) 3943 dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, MTRR %s, WC Enabled\n", 3944 mgp->num_slices, mgp->tx_boundary, mgp->fw_name, 3945 (mgp->wc_cookie > 0 ? "Enabled" : "Disabled")); 3946 else 3947 dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, MTRR %s, WC Enabled\n", 3948 mgp->msi_enabled ? "MSI" : "xPIC", 3949 pdev->irq, mgp->tx_boundary, mgp->fw_name, 3950 (mgp->wc_cookie > 0 ? "Enabled" : "Disabled")); 3951 3952 board_number++; 3953 return 0; 3954 3955 abort_with_state: 3956 pci_restore_state(pdev); 3957 3958 abort_with_slices: 3959 myri10ge_free_slices(mgp); 3960 3961 abort_with_firmware: 3962 myri10ge_dummy_rdma(mgp, 0); 3963 3964 abort_with_ioremap: 3965 if (mgp->mac_addr_string != NULL) 3966 dev_err(&pdev->dev, 3967 "myri10ge_probe() failed: MAC=%s, SN=%ld\n", 3968 mgp->mac_addr_string, mgp->serial_number); 3969 iounmap(mgp->sram); 3970 3971 abort_with_mtrr: 3972 arch_phys_wc_del(mgp->wc_cookie); 3973 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 3974 mgp->cmd, mgp->cmd_bus); 3975 3976 abort_with_enabled: 3977 pci_disable_device(pdev); 3978 3979 abort_with_netdev: 3980 set_fw_name(mgp, NULL, false); 3981 free_netdev(netdev); 3982 return status; 3983 } 3984 3985 /* 3986 * myri10ge_remove 3987 * 3988 * Does what is necessary to shutdown one Myrinet device. Called 3989 * once for each Myrinet card by the kernel when a module is 3990 * unloaded. 3991 */ 3992 static void myri10ge_remove(struct pci_dev *pdev) 3993 { 3994 struct myri10ge_priv *mgp; 3995 struct net_device *netdev; 3996 3997 mgp = pci_get_drvdata(pdev); 3998 if (mgp == NULL) 3999 return; 4000 4001 cancel_work_sync(&mgp->watchdog_work); 4002 netdev = mgp->dev; 4003 unregister_netdev(netdev); 4004 4005 #ifdef CONFIG_MYRI10GE_DCA 4006 myri10ge_teardown_dca(mgp); 4007 #endif 4008 myri10ge_dummy_rdma(mgp, 0); 4009 4010 /* avoid a memory leak */ 4011 pci_restore_state(pdev); 4012 4013 iounmap(mgp->sram); 4014 arch_phys_wc_del(mgp->wc_cookie); 4015 myri10ge_free_slices(mgp); 4016 kfree(mgp->msix_vectors); 4017 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4018 mgp->cmd, mgp->cmd_bus); 4019 4020 set_fw_name(mgp, NULL, false); 4021 free_netdev(netdev); 4022 pci_disable_device(pdev); 4023 } 4024 4025 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 0x0008 4026 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9 0x0009 4027 4028 static const struct pci_device_id myri10ge_pci_tbl[] = { 4029 {PCI_DEVICE(PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E)}, 4030 {PCI_DEVICE 4031 (PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9)}, 4032 {0}, 4033 }; 4034 4035 MODULE_DEVICE_TABLE(pci, myri10ge_pci_tbl); 4036 4037 static struct pci_driver myri10ge_driver = { 4038 .name = "myri10ge", 4039 .probe = myri10ge_probe, 4040 .remove = myri10ge_remove, 4041 .id_table = myri10ge_pci_tbl, 4042 #ifdef CONFIG_PM 4043 .suspend = myri10ge_suspend, 4044 .resume = myri10ge_resume, 4045 #endif 4046 }; 4047 4048 #ifdef CONFIG_MYRI10GE_DCA 4049 static int 4050 myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p) 4051 { 4052 int err = driver_for_each_device(&myri10ge_driver.driver, 4053 NULL, &event, 4054 myri10ge_notify_dca_device); 4055 4056 if (err) 4057 return NOTIFY_BAD; 4058 return NOTIFY_DONE; 4059 } 4060 4061 static struct notifier_block myri10ge_dca_notifier = { 4062 .notifier_call = myri10ge_notify_dca, 4063 .next = NULL, 4064 .priority = 0, 4065 }; 4066 #endif /* CONFIG_MYRI10GE_DCA */ 4067 4068 static __init int myri10ge_init_module(void) 4069 { 4070 pr_info("Version %s\n", MYRI10GE_VERSION_STR); 4071 4072 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) { 4073 pr_err("Illegal rssh hash type %d, defaulting to source port\n", 4074 myri10ge_rss_hash); 4075 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4076 } 4077 #ifdef CONFIG_MYRI10GE_DCA 4078 dca_register_notify(&myri10ge_dca_notifier); 4079 #endif 4080 if (myri10ge_max_slices > MYRI10GE_MAX_SLICES) 4081 myri10ge_max_slices = MYRI10GE_MAX_SLICES; 4082 4083 return pci_register_driver(&myri10ge_driver); 4084 } 4085 4086 module_init(myri10ge_init_module); 4087 4088 static __exit void myri10ge_cleanup_module(void) 4089 { 4090 #ifdef CONFIG_MYRI10GE_DCA 4091 dca_unregister_notify(&myri10ge_dca_notifier); 4092 #endif 4093 pci_unregister_driver(&myri10ge_driver); 4094 } 4095 4096 module_exit(myri10ge_cleanup_module); 4097