1 /************************************************************************* 2 * myri10ge.c: Myricom Myri-10G Ethernet driver. 3 * 4 * Copyright (C) 2005 - 2011 Myricom, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of Myricom, Inc. nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 * 32 * If the eeprom on your board is not recent enough, you will need to get a 33 * newer firmware image at: 34 * http://www.myri.com/scs/download-Myri10GE.html 35 * 36 * Contact Information: 37 * <help@myri.com> 38 * Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006 39 *************************************************************************/ 40 41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 42 43 #include <linux/tcp.h> 44 #include <linux/netdevice.h> 45 #include <linux/skbuff.h> 46 #include <linux/string.h> 47 #include <linux/module.h> 48 #include <linux/pci.h> 49 #include <linux/dma-mapping.h> 50 #include <linux/etherdevice.h> 51 #include <linux/if_ether.h> 52 #include <linux/if_vlan.h> 53 #include <linux/inet_lro.h> 54 #include <linux/dca.h> 55 #include <linux/ip.h> 56 #include <linux/inet.h> 57 #include <linux/in.h> 58 #include <linux/ethtool.h> 59 #include <linux/firmware.h> 60 #include <linux/delay.h> 61 #include <linux/timer.h> 62 #include <linux/vmalloc.h> 63 #include <linux/crc32.h> 64 #include <linux/moduleparam.h> 65 #include <linux/io.h> 66 #include <linux/log2.h> 67 #include <linux/slab.h> 68 #include <linux/prefetch.h> 69 #include <net/checksum.h> 70 #include <net/ip.h> 71 #include <net/tcp.h> 72 #include <asm/byteorder.h> 73 #include <asm/io.h> 74 #include <asm/processor.h> 75 #ifdef CONFIG_MTRR 76 #include <asm/mtrr.h> 77 #endif 78 79 #include "myri10ge_mcp.h" 80 #include "myri10ge_mcp_gen_header.h" 81 82 #define MYRI10GE_VERSION_STR "1.5.3-1.534" 83 84 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); 85 MODULE_AUTHOR("Maintainer: help@myri.com"); 86 MODULE_VERSION(MYRI10GE_VERSION_STR); 87 MODULE_LICENSE("Dual BSD/GPL"); 88 89 #define MYRI10GE_MAX_ETHER_MTU 9014 90 91 #define MYRI10GE_ETH_STOPPED 0 92 #define MYRI10GE_ETH_STOPPING 1 93 #define MYRI10GE_ETH_STARTING 2 94 #define MYRI10GE_ETH_RUNNING 3 95 #define MYRI10GE_ETH_OPEN_FAILED 4 96 97 #define MYRI10GE_EEPROM_STRINGS_SIZE 256 98 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2) 99 #define MYRI10GE_MAX_LRO_DESCRIPTORS 8 100 #define MYRI10GE_LRO_MAX_PKTS 64 101 102 #define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff) 103 #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff 104 105 #define MYRI10GE_ALLOC_ORDER 0 106 #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE) 107 #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1) 108 109 #define MYRI10GE_MAX_SLICES 32 110 111 struct myri10ge_rx_buffer_state { 112 struct page *page; 113 int page_offset; 114 DEFINE_DMA_UNMAP_ADDR(bus); 115 DEFINE_DMA_UNMAP_LEN(len); 116 }; 117 118 struct myri10ge_tx_buffer_state { 119 struct sk_buff *skb; 120 int last; 121 DEFINE_DMA_UNMAP_ADDR(bus); 122 DEFINE_DMA_UNMAP_LEN(len); 123 }; 124 125 struct myri10ge_cmd { 126 u32 data0; 127 u32 data1; 128 u32 data2; 129 }; 130 131 struct myri10ge_rx_buf { 132 struct mcp_kreq_ether_recv __iomem *lanai; /* lanai ptr for recv ring */ 133 struct mcp_kreq_ether_recv *shadow; /* host shadow of recv ring */ 134 struct myri10ge_rx_buffer_state *info; 135 struct page *page; 136 dma_addr_t bus; 137 int page_offset; 138 int cnt; 139 int fill_cnt; 140 int alloc_fail; 141 int mask; /* number of rx slots -1 */ 142 int watchdog_needed; 143 }; 144 145 struct myri10ge_tx_buf { 146 struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */ 147 __be32 __iomem *send_go; /* "go" doorbell ptr */ 148 __be32 __iomem *send_stop; /* "stop" doorbell ptr */ 149 struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */ 150 char *req_bytes; 151 struct myri10ge_tx_buffer_state *info; 152 int mask; /* number of transmit slots -1 */ 153 int req ____cacheline_aligned; /* transmit slots submitted */ 154 int pkt_start; /* packets started */ 155 int stop_queue; 156 int linearized; 157 int done ____cacheline_aligned; /* transmit slots completed */ 158 int pkt_done; /* packets completed */ 159 int wake_queue; 160 int queue_active; 161 }; 162 163 struct myri10ge_rx_done { 164 struct mcp_slot *entry; 165 dma_addr_t bus; 166 int cnt; 167 int idx; 168 struct net_lro_mgr lro_mgr; 169 struct net_lro_desc lro_desc[MYRI10GE_MAX_LRO_DESCRIPTORS]; 170 }; 171 172 struct myri10ge_slice_netstats { 173 unsigned long rx_packets; 174 unsigned long tx_packets; 175 unsigned long rx_bytes; 176 unsigned long tx_bytes; 177 unsigned long rx_dropped; 178 unsigned long tx_dropped; 179 }; 180 181 struct myri10ge_slice_state { 182 struct myri10ge_tx_buf tx; /* transmit ring */ 183 struct myri10ge_rx_buf rx_small; 184 struct myri10ge_rx_buf rx_big; 185 struct myri10ge_rx_done rx_done; 186 struct net_device *dev; 187 struct napi_struct napi; 188 struct myri10ge_priv *mgp; 189 struct myri10ge_slice_netstats stats; 190 __be32 __iomem *irq_claim; 191 struct mcp_irq_data *fw_stats; 192 dma_addr_t fw_stats_bus; 193 int watchdog_tx_done; 194 int watchdog_tx_req; 195 int watchdog_rx_done; 196 int stuck; 197 #ifdef CONFIG_MYRI10GE_DCA 198 int cached_dca_tag; 199 int cpu; 200 __be32 __iomem *dca_tag; 201 #endif 202 char irq_desc[32]; 203 }; 204 205 struct myri10ge_priv { 206 struct myri10ge_slice_state *ss; 207 int tx_boundary; /* boundary transmits cannot cross */ 208 int num_slices; 209 int running; /* running? */ 210 int small_bytes; 211 int big_bytes; 212 int max_intr_slots; 213 struct net_device *dev; 214 u8 __iomem *sram; 215 int sram_size; 216 unsigned long board_span; 217 unsigned long iomem_base; 218 __be32 __iomem *irq_deassert; 219 char *mac_addr_string; 220 struct mcp_cmd_response *cmd; 221 dma_addr_t cmd_bus; 222 struct pci_dev *pdev; 223 int msi_enabled; 224 int msix_enabled; 225 struct msix_entry *msix_vectors; 226 #ifdef CONFIG_MYRI10GE_DCA 227 int dca_enabled; 228 int relaxed_order; 229 #endif 230 u32 link_state; 231 unsigned int rdma_tags_available; 232 int intr_coal_delay; 233 __be32 __iomem *intr_coal_delay_ptr; 234 int mtrr; 235 int wc_enabled; 236 int down_cnt; 237 wait_queue_head_t down_wq; 238 struct work_struct watchdog_work; 239 struct timer_list watchdog_timer; 240 int watchdog_resets; 241 int watchdog_pause; 242 int pause; 243 bool fw_name_allocated; 244 char *fw_name; 245 char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE]; 246 char *product_code_string; 247 char fw_version[128]; 248 int fw_ver_major; 249 int fw_ver_minor; 250 int fw_ver_tiny; 251 int adopted_rx_filter_bug; 252 u8 mac_addr[6]; /* eeprom mac address */ 253 unsigned long serial_number; 254 int vendor_specific_offset; 255 int fw_multicast_support; 256 u32 features; 257 u32 max_tso6; 258 u32 read_dma; 259 u32 write_dma; 260 u32 read_write_dma; 261 u32 link_changes; 262 u32 msg_enable; 263 unsigned int board_number; 264 int rebooted; 265 }; 266 267 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat"; 268 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat"; 269 static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat"; 270 static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat"; 271 MODULE_FIRMWARE("myri10ge_ethp_z8e.dat"); 272 MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); 273 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); 274 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); 275 276 /* Careful: must be accessed under kparam_block_sysfs_write */ 277 static char *myri10ge_fw_name = NULL; 278 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); 279 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); 280 281 #define MYRI10GE_MAX_BOARDS 8 282 static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] = 283 {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL }; 284 module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL, 285 0444); 286 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board"); 287 288 static int myri10ge_ecrc_enable = 1; 289 module_param(myri10ge_ecrc_enable, int, S_IRUGO); 290 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E"); 291 292 static int myri10ge_small_bytes = -1; /* -1 == auto */ 293 module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR); 294 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets"); 295 296 static int myri10ge_msi = 1; /* enable msi by default */ 297 module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR); 298 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts"); 299 300 static int myri10ge_intr_coal_delay = 75; 301 module_param(myri10ge_intr_coal_delay, int, S_IRUGO); 302 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay"); 303 304 static int myri10ge_flow_control = 1; 305 module_param(myri10ge_flow_control, int, S_IRUGO); 306 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter"); 307 308 static int myri10ge_deassert_wait = 1; 309 module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR); 310 MODULE_PARM_DESC(myri10ge_deassert_wait, 311 "Wait when deasserting legacy interrupts"); 312 313 static int myri10ge_force_firmware = 0; 314 module_param(myri10ge_force_firmware, int, S_IRUGO); 315 MODULE_PARM_DESC(myri10ge_force_firmware, 316 "Force firmware to assume aligned completions"); 317 318 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 319 module_param(myri10ge_initial_mtu, int, S_IRUGO); 320 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU"); 321 322 static int myri10ge_napi_weight = 64; 323 module_param(myri10ge_napi_weight, int, S_IRUGO); 324 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight"); 325 326 static int myri10ge_watchdog_timeout = 1; 327 module_param(myri10ge_watchdog_timeout, int, S_IRUGO); 328 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout"); 329 330 static int myri10ge_max_irq_loops = 1048576; 331 module_param(myri10ge_max_irq_loops, int, S_IRUGO); 332 MODULE_PARM_DESC(myri10ge_max_irq_loops, 333 "Set stuck legacy IRQ detection threshold"); 334 335 #define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK 336 337 static int myri10ge_debug = -1; /* defaults above */ 338 module_param(myri10ge_debug, int, 0); 339 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)"); 340 341 static int myri10ge_lro_max_pkts = MYRI10GE_LRO_MAX_PKTS; 342 module_param(myri10ge_lro_max_pkts, int, S_IRUGO); 343 MODULE_PARM_DESC(myri10ge_lro_max_pkts, 344 "Number of LRO packets to be aggregated"); 345 346 static int myri10ge_fill_thresh = 256; 347 module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR); 348 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed"); 349 350 static int myri10ge_reset_recover = 1; 351 352 static int myri10ge_max_slices = 1; 353 module_param(myri10ge_max_slices, int, S_IRUGO); 354 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues"); 355 356 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 357 module_param(myri10ge_rss_hash, int, S_IRUGO); 358 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do"); 359 360 static int myri10ge_dca = 1; 361 module_param(myri10ge_dca, int, S_IRUGO); 362 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible"); 363 364 #define MYRI10GE_FW_OFFSET 1024*1024 365 #define MYRI10GE_HIGHPART_TO_U32(X) \ 366 (sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0) 367 #define MYRI10GE_LOWPART_TO_U32(X) ((u32)(X)) 368 369 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) 370 371 static void myri10ge_set_multicast_list(struct net_device *dev); 372 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 373 struct net_device *dev); 374 375 static inline void put_be32(__be32 val, __be32 __iomem * p) 376 { 377 __raw_writel((__force __u32) val, (__force void __iomem *)p); 378 } 379 380 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, 381 struct rtnl_link_stats64 *stats); 382 383 static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) 384 { 385 if (mgp->fw_name_allocated) 386 kfree(mgp->fw_name); 387 mgp->fw_name = name; 388 mgp->fw_name_allocated = allocated; 389 } 390 391 static int 392 myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, 393 struct myri10ge_cmd *data, int atomic) 394 { 395 struct mcp_cmd *buf; 396 char buf_bytes[sizeof(*buf) + 8]; 397 struct mcp_cmd_response *response = mgp->cmd; 398 char __iomem *cmd_addr = mgp->sram + MXGEFW_ETH_CMD; 399 u32 dma_low, dma_high, result, value; 400 int sleep_total = 0; 401 402 /* ensure buf is aligned to 8 bytes */ 403 buf = (struct mcp_cmd *)ALIGN((unsigned long)buf_bytes, 8); 404 405 buf->data0 = htonl(data->data0); 406 buf->data1 = htonl(data->data1); 407 buf->data2 = htonl(data->data2); 408 buf->cmd = htonl(cmd); 409 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 410 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 411 412 buf->response_addr.low = htonl(dma_low); 413 buf->response_addr.high = htonl(dma_high); 414 response->result = htonl(MYRI10GE_NO_RESPONSE_RESULT); 415 mb(); 416 myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf)); 417 418 /* wait up to 15ms. Longest command is the DMA benchmark, 419 * which is capped at 5ms, but runs from a timeout handler 420 * that runs every 7.8ms. So a 15ms timeout leaves us with 421 * a 2.2ms margin 422 */ 423 if (atomic) { 424 /* if atomic is set, do not sleep, 425 * and try to get the completion quickly 426 * (1ms will be enough for those commands) */ 427 for (sleep_total = 0; 428 sleep_total < 1000 && 429 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 430 sleep_total += 10) { 431 udelay(10); 432 mb(); 433 } 434 } else { 435 /* use msleep for most command */ 436 for (sleep_total = 0; 437 sleep_total < 15 && 438 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 439 sleep_total++) 440 msleep(1); 441 } 442 443 result = ntohl(response->result); 444 value = ntohl(response->data); 445 if (result != MYRI10GE_NO_RESPONSE_RESULT) { 446 if (result == 0) { 447 data->data0 = value; 448 return 0; 449 } else if (result == MXGEFW_CMD_UNKNOWN) { 450 return -ENOSYS; 451 } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) { 452 return -E2BIG; 453 } else if (result == MXGEFW_CMD_ERROR_RANGE && 454 cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES && 455 (data-> 456 data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) != 457 0) { 458 return -ERANGE; 459 } else { 460 dev_err(&mgp->pdev->dev, 461 "command %d failed, result = %d\n", 462 cmd, result); 463 return -ENXIO; 464 } 465 } 466 467 dev_err(&mgp->pdev->dev, "command %d timed out, result = %d\n", 468 cmd, result); 469 return -EAGAIN; 470 } 471 472 /* 473 * The eeprom strings on the lanaiX have the format 474 * SN=x\0 475 * MAC=x:x:x:x:x:x\0 476 * PT:ddd mmm xx xx:xx:xx xx\0 477 * PV:ddd mmm xx xx:xx:xx xx\0 478 */ 479 static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 480 { 481 char *ptr, *limit; 482 int i; 483 484 ptr = mgp->eeprom_strings; 485 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 486 487 while (*ptr != '\0' && ptr < limit) { 488 if (memcmp(ptr, "MAC=", 4) == 0) { 489 ptr += 4; 490 mgp->mac_addr_string = ptr; 491 for (i = 0; i < 6; i++) { 492 if ((ptr + 2) > limit) 493 goto abort; 494 mgp->mac_addr[i] = 495 simple_strtoul(ptr, &ptr, 16); 496 ptr += 1; 497 } 498 } 499 if (memcmp(ptr, "PC=", 3) == 0) { 500 ptr += 3; 501 mgp->product_code_string = ptr; 502 } 503 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 504 ptr += 3; 505 mgp->serial_number = simple_strtoul(ptr, &ptr, 10); 506 } 507 while (ptr < limit && *ptr++) ; 508 } 509 510 return 0; 511 512 abort: 513 dev_err(&mgp->pdev->dev, "failed to parse eeprom_strings\n"); 514 return -ENXIO; 515 } 516 517 /* 518 * Enable or disable periodic RDMAs from the host to make certain 519 * chipsets resend dropped PCIe messages 520 */ 521 522 static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 523 { 524 char __iomem *submit; 525 __be32 buf[16] __attribute__ ((__aligned__(8))); 526 u32 dma_low, dma_high; 527 int i; 528 529 /* clear confirmation addr */ 530 mgp->cmd->data = 0; 531 mb(); 532 533 /* send a rdma command to the PCIe engine, and wait for the 534 * response in the confirmation address. The firmware should 535 * write a -1 there to indicate it is alive and well 536 */ 537 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 538 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 539 540 buf[0] = htonl(dma_high); /* confirm addr MSW */ 541 buf[1] = htonl(dma_low); /* confirm addr LSW */ 542 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 543 buf[3] = htonl(dma_high); /* dummy addr MSW */ 544 buf[4] = htonl(dma_low); /* dummy addr LSW */ 545 buf[5] = htonl(enable); /* enable? */ 546 547 submit = mgp->sram + MXGEFW_BOOT_DUMMY_RDMA; 548 549 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 550 for (i = 0; mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20; i++) 551 msleep(1); 552 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) 553 dev_err(&mgp->pdev->dev, "dummy rdma %s failed\n", 554 (enable ? "enable" : "disable")); 555 } 556 557 static int 558 myri10ge_validate_firmware(struct myri10ge_priv *mgp, 559 struct mcp_gen_header *hdr) 560 { 561 struct device *dev = &mgp->pdev->dev; 562 563 /* check firmware type */ 564 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 565 dev_err(dev, "Bad firmware type: 0x%x\n", ntohl(hdr->mcp_type)); 566 return -EINVAL; 567 } 568 569 /* save firmware version for ethtool */ 570 strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version)); 571 572 sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major, 573 &mgp->fw_ver_minor, &mgp->fw_ver_tiny); 574 575 if (!(mgp->fw_ver_major == MXGEFW_VERSION_MAJOR && 576 mgp->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 577 dev_err(dev, "Found firmware version %s\n", mgp->fw_version); 578 dev_err(dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR, 579 MXGEFW_VERSION_MINOR); 580 return -EINVAL; 581 } 582 return 0; 583 } 584 585 static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size) 586 { 587 unsigned crc, reread_crc; 588 const struct firmware *fw; 589 struct device *dev = &mgp->pdev->dev; 590 unsigned char *fw_readback; 591 struct mcp_gen_header *hdr; 592 size_t hdr_offset; 593 int status; 594 unsigned i; 595 596 if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) { 597 dev_err(dev, "Unable to load %s firmware image via hotplug\n", 598 mgp->fw_name); 599 status = -EINVAL; 600 goto abort_with_nothing; 601 } 602 603 /* check size */ 604 605 if (fw->size >= mgp->sram_size - MYRI10GE_FW_OFFSET || 606 fw->size < MCP_HEADER_PTR_OFFSET + 4) { 607 dev_err(dev, "Firmware size invalid:%d\n", (int)fw->size); 608 status = -EINVAL; 609 goto abort_with_fw; 610 } 611 612 /* check id */ 613 hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET)); 614 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) { 615 dev_err(dev, "Bad firmware file\n"); 616 status = -EINVAL; 617 goto abort_with_fw; 618 } 619 hdr = (void *)(fw->data + hdr_offset); 620 621 status = myri10ge_validate_firmware(mgp, hdr); 622 if (status != 0) 623 goto abort_with_fw; 624 625 crc = crc32(~0, fw->data, fw->size); 626 for (i = 0; i < fw->size; i += 256) { 627 myri10ge_pio_copy(mgp->sram + MYRI10GE_FW_OFFSET + i, 628 fw->data + i, 629 min(256U, (unsigned)(fw->size - i))); 630 mb(); 631 readb(mgp->sram); 632 } 633 fw_readback = vmalloc(fw->size); 634 if (!fw_readback) { 635 status = -ENOMEM; 636 goto abort_with_fw; 637 } 638 /* corruption checking is good for parity recovery and buggy chipset */ 639 memcpy_fromio(fw_readback, mgp->sram + MYRI10GE_FW_OFFSET, fw->size); 640 reread_crc = crc32(~0, fw_readback, fw->size); 641 vfree(fw_readback); 642 if (crc != reread_crc) { 643 dev_err(dev, "CRC failed(fw-len=%u), got 0x%x (expect 0x%x)\n", 644 (unsigned)fw->size, reread_crc, crc); 645 status = -EIO; 646 goto abort_with_fw; 647 } 648 *size = (u32) fw->size; 649 650 abort_with_fw: 651 release_firmware(fw); 652 653 abort_with_nothing: 654 return status; 655 } 656 657 static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp) 658 { 659 struct mcp_gen_header *hdr; 660 struct device *dev = &mgp->pdev->dev; 661 const size_t bytes = sizeof(struct mcp_gen_header); 662 size_t hdr_offset; 663 int status; 664 665 /* find running firmware header */ 666 hdr_offset = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 667 668 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) { 669 dev_err(dev, "Running firmware has bad header offset (%d)\n", 670 (int)hdr_offset); 671 return -EIO; 672 } 673 674 /* copy header of running firmware from SRAM to host memory to 675 * validate firmware */ 676 hdr = kmalloc(bytes, GFP_KERNEL); 677 if (hdr == NULL) { 678 dev_err(dev, "could not malloc firmware hdr\n"); 679 return -ENOMEM; 680 } 681 memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes); 682 status = myri10ge_validate_firmware(mgp, hdr); 683 kfree(hdr); 684 685 /* check to see if adopted firmware has bug where adopting 686 * it will cause broadcasts to be filtered unless the NIC 687 * is kept in ALLMULTI mode */ 688 if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 && 689 mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) { 690 mgp->adopted_rx_filter_bug = 1; 691 dev_warn(dev, "Adopting fw %d.%d.%d: " 692 "working around rx filter bug\n", 693 mgp->fw_ver_major, mgp->fw_ver_minor, 694 mgp->fw_ver_tiny); 695 } 696 return status; 697 } 698 699 static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) 700 { 701 struct myri10ge_cmd cmd; 702 int status; 703 704 /* probe for IPv6 TSO support */ 705 mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; 706 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, 707 &cmd, 0); 708 if (status == 0) { 709 mgp->max_tso6 = cmd.data0; 710 mgp->features |= NETIF_F_TSO6; 711 } 712 713 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 714 if (status != 0) { 715 dev_err(&mgp->pdev->dev, 716 "failed MXGEFW_CMD_GET_RX_RING_SIZE\n"); 717 return -ENXIO; 718 } 719 720 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr)); 721 722 return 0; 723 } 724 725 static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) 726 { 727 char __iomem *submit; 728 __be32 buf[16] __attribute__ ((__aligned__(8))); 729 u32 dma_low, dma_high, size; 730 int status, i; 731 732 size = 0; 733 status = myri10ge_load_hotplug_firmware(mgp, &size); 734 if (status) { 735 if (!adopt) 736 return status; 737 dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n"); 738 739 /* Do not attempt to adopt firmware if there 740 * was a bad crc */ 741 if (status == -EIO) 742 return status; 743 744 status = myri10ge_adopt_running_firmware(mgp); 745 if (status != 0) { 746 dev_err(&mgp->pdev->dev, 747 "failed to adopt running firmware\n"); 748 return status; 749 } 750 dev_info(&mgp->pdev->dev, 751 "Successfully adopted running firmware\n"); 752 if (mgp->tx_boundary == 4096) { 753 dev_warn(&mgp->pdev->dev, 754 "Using firmware currently running on NIC" 755 ". For optimal\n"); 756 dev_warn(&mgp->pdev->dev, 757 "performance consider loading optimized " 758 "firmware\n"); 759 dev_warn(&mgp->pdev->dev, "via hotplug\n"); 760 } 761 762 set_fw_name(mgp, "adopted", false); 763 mgp->tx_boundary = 2048; 764 myri10ge_dummy_rdma(mgp, 1); 765 status = myri10ge_get_firmware_capabilities(mgp); 766 return status; 767 } 768 769 /* clear confirmation addr */ 770 mgp->cmd->data = 0; 771 mb(); 772 773 /* send a reload command to the bootstrap MCP, and wait for the 774 * response in the confirmation address. The firmware should 775 * write a -1 there to indicate it is alive and well 776 */ 777 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 778 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 779 780 buf[0] = htonl(dma_high); /* confirm addr MSW */ 781 buf[1] = htonl(dma_low); /* confirm addr LSW */ 782 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 783 784 /* FIX: All newest firmware should un-protect the bottom of 785 * the sram before handoff. However, the very first interfaces 786 * do not. Therefore the handoff copy must skip the first 8 bytes 787 */ 788 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 789 buf[4] = htonl(size - 8); /* length of code */ 790 buf[5] = htonl(8); /* where to copy to */ 791 buf[6] = htonl(0); /* where to jump to */ 792 793 submit = mgp->sram + MXGEFW_BOOT_HANDOFF; 794 795 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 796 mb(); 797 msleep(1); 798 mb(); 799 i = 0; 800 while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) { 801 msleep(1 << i); 802 i++; 803 } 804 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) { 805 dev_err(&mgp->pdev->dev, "handoff failed\n"); 806 return -ENXIO; 807 } 808 myri10ge_dummy_rdma(mgp, 1); 809 status = myri10ge_get_firmware_capabilities(mgp); 810 811 return status; 812 } 813 814 static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr) 815 { 816 struct myri10ge_cmd cmd; 817 int status; 818 819 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 820 | (addr[2] << 8) | addr[3]); 821 822 cmd.data1 = ((addr[4] << 8) | (addr[5])); 823 824 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd, 0); 825 return status; 826 } 827 828 static int myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 829 { 830 struct myri10ge_cmd cmd; 831 int status, ctl; 832 833 ctl = pause ? MXGEFW_ENABLE_FLOW_CONTROL : MXGEFW_DISABLE_FLOW_CONTROL; 834 status = myri10ge_send_cmd(mgp, ctl, &cmd, 0); 835 836 if (status) { 837 netdev_err(mgp->dev, "Failed to set flow control mode\n"); 838 return status; 839 } 840 mgp->pause = pause; 841 return 0; 842 } 843 844 static void 845 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc, int atomic) 846 { 847 struct myri10ge_cmd cmd; 848 int status, ctl; 849 850 ctl = promisc ? MXGEFW_ENABLE_PROMISC : MXGEFW_DISABLE_PROMISC; 851 status = myri10ge_send_cmd(mgp, ctl, &cmd, atomic); 852 if (status) 853 netdev_err(mgp->dev, "Failed to set promisc mode\n"); 854 } 855 856 static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 857 { 858 struct myri10ge_cmd cmd; 859 int status; 860 u32 len; 861 struct page *dmatest_page; 862 dma_addr_t dmatest_bus; 863 char *test = " "; 864 865 dmatest_page = alloc_page(GFP_KERNEL); 866 if (!dmatest_page) 867 return -ENOMEM; 868 dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, 869 DMA_BIDIRECTIONAL); 870 871 /* Run a small DMA test. 872 * The magic multipliers to the length tell the firmware 873 * to do DMA read, write, or read+write tests. The 874 * results are returned in cmd.data0. The upper 16 875 * bits or the return is the number of transfers completed. 876 * The lower 16 bits is the time in 0.5us ticks that the 877 * transfers took to complete. 878 */ 879 880 len = mgp->tx_boundary; 881 882 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 883 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 884 cmd.data2 = len * 0x10000; 885 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 886 if (status != 0) { 887 test = "read"; 888 goto abort; 889 } 890 mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 891 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 892 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 893 cmd.data2 = len * 0x1; 894 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 895 if (status != 0) { 896 test = "write"; 897 goto abort; 898 } 899 mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 900 901 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 902 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 903 cmd.data2 = len * 0x10001; 904 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 905 if (status != 0) { 906 test = "read/write"; 907 goto abort; 908 } 909 mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) / 910 (cmd.data0 & 0xffff); 911 912 abort: 913 pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL); 914 put_page(dmatest_page); 915 916 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 917 dev_warn(&mgp->pdev->dev, "DMA %s benchmark failed: %d\n", 918 test, status); 919 920 return status; 921 } 922 923 static int myri10ge_reset(struct myri10ge_priv *mgp) 924 { 925 struct myri10ge_cmd cmd; 926 struct myri10ge_slice_state *ss; 927 int i, status; 928 size_t bytes; 929 #ifdef CONFIG_MYRI10GE_DCA 930 unsigned long dca_tag_off; 931 #endif 932 933 /* try to send a reset command to the card to see if it 934 * is alive */ 935 memset(&cmd, 0, sizeof(cmd)); 936 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 937 if (status != 0) { 938 dev_err(&mgp->pdev->dev, "failed reset\n"); 939 return -ENXIO; 940 } 941 942 (void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 943 /* 944 * Use non-ndis mcp_slot (eg, 4 bytes total, 945 * no toeplitz hash value returned. Older firmware will 946 * not understand this command, but will use the correct 947 * sized mcp_slot, so we ignore error returns 948 */ 949 cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN; 950 (void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0); 951 952 /* Now exchange information about interrupts */ 953 954 bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry); 955 cmd.data0 = (u32) bytes; 956 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 957 958 /* 959 * Even though we already know how many slices are supported 960 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 961 * has magic side effects, and must be called after a reset. 962 * It must be called prior to calling any RSS related cmds, 963 * including assigning an interrupt queue for anything but 964 * slice 0. It must also be called *after* 965 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 966 * the firmware to compute offsets. 967 */ 968 969 if (mgp->num_slices > 1) { 970 971 /* ask the maximum number of slices it supports */ 972 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 973 &cmd, 0); 974 if (status != 0) { 975 dev_err(&mgp->pdev->dev, 976 "failed to get number of slices\n"); 977 } 978 979 /* 980 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 981 * to setting up the interrupt queue DMA 982 */ 983 984 cmd.data0 = mgp->num_slices; 985 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 986 if (mgp->dev->real_num_tx_queues > 1) 987 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 988 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 989 &cmd, 0); 990 991 /* Firmware older than 1.4.32 only supports multiple 992 * RX queues, so if we get an error, first retry using a 993 * single TX queue before giving up */ 994 if (status != 0 && mgp->dev->real_num_tx_queues > 1) { 995 netif_set_real_num_tx_queues(mgp->dev, 1); 996 cmd.data0 = mgp->num_slices; 997 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 998 status = myri10ge_send_cmd(mgp, 999 MXGEFW_CMD_ENABLE_RSS_QUEUES, 1000 &cmd, 0); 1001 } 1002 1003 if (status != 0) { 1004 dev_err(&mgp->pdev->dev, 1005 "failed to set number of slices\n"); 1006 1007 return status; 1008 } 1009 } 1010 for (i = 0; i < mgp->num_slices; i++) { 1011 ss = &mgp->ss[i]; 1012 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus); 1013 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus); 1014 cmd.data2 = i; 1015 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1016 &cmd, 0); 1017 } 1018 1019 status |= 1020 myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); 1021 for (i = 0; i < mgp->num_slices; i++) { 1022 ss = &mgp->ss[i]; 1023 ss->irq_claim = 1024 (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i); 1025 } 1026 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1027 &cmd, 0); 1028 mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0); 1029 1030 status |= myri10ge_send_cmd 1031 (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0); 1032 mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0); 1033 if (status != 0) { 1034 dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n"); 1035 return status; 1036 } 1037 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1038 1039 #ifdef CONFIG_MYRI10GE_DCA 1040 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0); 1041 dca_tag_off = cmd.data0; 1042 for (i = 0; i < mgp->num_slices; i++) { 1043 ss = &mgp->ss[i]; 1044 if (status == 0) { 1045 ss->dca_tag = (__iomem __be32 *) 1046 (mgp->sram + dca_tag_off + 4 * i); 1047 } else { 1048 ss->dca_tag = NULL; 1049 } 1050 } 1051 #endif /* CONFIG_MYRI10GE_DCA */ 1052 1053 /* reset mcp/driver shared state back to 0 */ 1054 1055 mgp->link_changes = 0; 1056 for (i = 0; i < mgp->num_slices; i++) { 1057 ss = &mgp->ss[i]; 1058 1059 memset(ss->rx_done.entry, 0, bytes); 1060 ss->tx.req = 0; 1061 ss->tx.done = 0; 1062 ss->tx.pkt_start = 0; 1063 ss->tx.pkt_done = 0; 1064 ss->rx_big.cnt = 0; 1065 ss->rx_small.cnt = 0; 1066 ss->rx_done.idx = 0; 1067 ss->rx_done.cnt = 0; 1068 ss->tx.wake_queue = 0; 1069 ss->tx.stop_queue = 0; 1070 } 1071 1072 status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr); 1073 myri10ge_change_pause(mgp, mgp->pause); 1074 myri10ge_set_multicast_list(mgp->dev); 1075 return status; 1076 } 1077 1078 #ifdef CONFIG_MYRI10GE_DCA 1079 static int myri10ge_toggle_relaxed(struct pci_dev *pdev, int on) 1080 { 1081 int ret, cap, err; 1082 u16 ctl; 1083 1084 cap = pci_pcie_cap(pdev); 1085 if (!cap) 1086 return 0; 1087 1088 err = pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &ctl); 1089 if (err) 1090 return 0; 1091 1092 ret = (ctl & PCI_EXP_DEVCTL_RELAX_EN) >> 4; 1093 if (ret != on) { 1094 ctl &= ~PCI_EXP_DEVCTL_RELAX_EN; 1095 ctl |= (on << 4); 1096 pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, ctl); 1097 } 1098 return ret; 1099 } 1100 1101 static void 1102 myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag) 1103 { 1104 ss->cached_dca_tag = tag; 1105 put_be32(htonl(tag), ss->dca_tag); 1106 } 1107 1108 static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss) 1109 { 1110 int cpu = get_cpu(); 1111 int tag; 1112 1113 if (cpu != ss->cpu) { 1114 tag = dca3_get_tag(&ss->mgp->pdev->dev, cpu); 1115 if (ss->cached_dca_tag != tag) 1116 myri10ge_write_dca(ss, cpu, tag); 1117 ss->cpu = cpu; 1118 } 1119 put_cpu(); 1120 } 1121 1122 static void myri10ge_setup_dca(struct myri10ge_priv *mgp) 1123 { 1124 int err, i; 1125 struct pci_dev *pdev = mgp->pdev; 1126 1127 if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled) 1128 return; 1129 if (!myri10ge_dca) { 1130 dev_err(&pdev->dev, "dca disabled by administrator\n"); 1131 return; 1132 } 1133 err = dca_add_requester(&pdev->dev); 1134 if (err) { 1135 if (err != -ENODEV) 1136 dev_err(&pdev->dev, 1137 "dca_add_requester() failed, err=%d\n", err); 1138 return; 1139 } 1140 mgp->relaxed_order = myri10ge_toggle_relaxed(pdev, 0); 1141 mgp->dca_enabled = 1; 1142 for (i = 0; i < mgp->num_slices; i++) { 1143 mgp->ss[i].cpu = -1; 1144 mgp->ss[i].cached_dca_tag = -1; 1145 myri10ge_update_dca(&mgp->ss[i]); 1146 } 1147 } 1148 1149 static void myri10ge_teardown_dca(struct myri10ge_priv *mgp) 1150 { 1151 struct pci_dev *pdev = mgp->pdev; 1152 1153 if (!mgp->dca_enabled) 1154 return; 1155 mgp->dca_enabled = 0; 1156 if (mgp->relaxed_order) 1157 myri10ge_toggle_relaxed(pdev, 1); 1158 dca_remove_requester(&pdev->dev); 1159 } 1160 1161 static int myri10ge_notify_dca_device(struct device *dev, void *data) 1162 { 1163 struct myri10ge_priv *mgp; 1164 unsigned long event; 1165 1166 mgp = dev_get_drvdata(dev); 1167 event = *(unsigned long *)data; 1168 1169 if (event == DCA_PROVIDER_ADD) 1170 myri10ge_setup_dca(mgp); 1171 else if (event == DCA_PROVIDER_REMOVE) 1172 myri10ge_teardown_dca(mgp); 1173 return 0; 1174 } 1175 #endif /* CONFIG_MYRI10GE_DCA */ 1176 1177 static inline void 1178 myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst, 1179 struct mcp_kreq_ether_recv *src) 1180 { 1181 __be32 low; 1182 1183 low = src->addr_low; 1184 src->addr_low = htonl(DMA_BIT_MASK(32)); 1185 myri10ge_pio_copy(dst, src, 4 * sizeof(*src)); 1186 mb(); 1187 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src)); 1188 mb(); 1189 src->addr_low = low; 1190 put_be32(low, &dst->addr_low); 1191 mb(); 1192 } 1193 1194 static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum) 1195 { 1196 struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data); 1197 1198 if ((skb->protocol == htons(ETH_P_8021Q)) && 1199 (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || 1200 vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { 1201 skb->csum = hw_csum; 1202 skb->ip_summed = CHECKSUM_COMPLETE; 1203 } 1204 } 1205 1206 static inline void 1207 myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va, 1208 struct skb_frag_struct *rx_frags, int len, int hlen) 1209 { 1210 struct skb_frag_struct *skb_frags; 1211 1212 skb->len = skb->data_len = len; 1213 /* attach the page(s) */ 1214 1215 skb_frags = skb_shinfo(skb)->frags; 1216 while (len > 0) { 1217 memcpy(skb_frags, rx_frags, sizeof(*skb_frags)); 1218 len -= skb_frag_size(rx_frags); 1219 skb_frags++; 1220 rx_frags++; 1221 skb_shinfo(skb)->nr_frags++; 1222 } 1223 1224 /* pskb_may_pull is not available in irq context, but 1225 * skb_pull() (for ether_pad and eth_type_trans()) requires 1226 * the beginning of the packet in skb_headlen(), move it 1227 * manually */ 1228 skb_copy_to_linear_data(skb, va, hlen); 1229 skb_shinfo(skb)->frags[0].page_offset += hlen; 1230 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hlen); 1231 skb->data_len -= hlen; 1232 skb->tail += hlen; 1233 skb_pull(skb, MXGEFW_PAD); 1234 } 1235 1236 static void 1237 myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, 1238 int bytes, int watchdog) 1239 { 1240 struct page *page; 1241 int idx; 1242 #if MYRI10GE_ALLOC_SIZE > 4096 1243 int end_offset; 1244 #endif 1245 1246 if (unlikely(rx->watchdog_needed && !watchdog)) 1247 return; 1248 1249 /* try to refill entire ring */ 1250 while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) { 1251 idx = rx->fill_cnt & rx->mask; 1252 if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) { 1253 /* we can use part of previous page */ 1254 get_page(rx->page); 1255 } else { 1256 /* we need a new page */ 1257 page = 1258 alloc_pages(GFP_ATOMIC | __GFP_COMP, 1259 MYRI10GE_ALLOC_ORDER); 1260 if (unlikely(page == NULL)) { 1261 if (rx->fill_cnt - rx->cnt < 16) 1262 rx->watchdog_needed = 1; 1263 return; 1264 } 1265 rx->page = page; 1266 rx->page_offset = 0; 1267 rx->bus = pci_map_page(mgp->pdev, page, 0, 1268 MYRI10GE_ALLOC_SIZE, 1269 PCI_DMA_FROMDEVICE); 1270 } 1271 rx->info[idx].page = rx->page; 1272 rx->info[idx].page_offset = rx->page_offset; 1273 /* note that this is the address of the start of the 1274 * page */ 1275 dma_unmap_addr_set(&rx->info[idx], bus, rx->bus); 1276 rx->shadow[idx].addr_low = 1277 htonl(MYRI10GE_LOWPART_TO_U32(rx->bus) + rx->page_offset); 1278 rx->shadow[idx].addr_high = 1279 htonl(MYRI10GE_HIGHPART_TO_U32(rx->bus)); 1280 1281 /* start next packet on a cacheline boundary */ 1282 rx->page_offset += SKB_DATA_ALIGN(bytes); 1283 1284 #if MYRI10GE_ALLOC_SIZE > 4096 1285 /* don't cross a 4KB boundary */ 1286 end_offset = rx->page_offset + bytes - 1; 1287 if ((unsigned)(rx->page_offset ^ end_offset) > 4095) 1288 rx->page_offset = end_offset & ~4095; 1289 #endif 1290 rx->fill_cnt++; 1291 1292 /* copy 8 descriptors to the firmware at a time */ 1293 if ((idx & 7) == 7) { 1294 myri10ge_submit_8rx(&rx->lanai[idx - 7], 1295 &rx->shadow[idx - 7]); 1296 } 1297 } 1298 } 1299 1300 static inline void 1301 myri10ge_unmap_rx_page(struct pci_dev *pdev, 1302 struct myri10ge_rx_buffer_state *info, int bytes) 1303 { 1304 /* unmap the recvd page if we're the only or last user of it */ 1305 if (bytes >= MYRI10GE_ALLOC_SIZE / 2 || 1306 (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) { 1307 pci_unmap_page(pdev, (dma_unmap_addr(info, bus) 1308 & ~(MYRI10GE_ALLOC_SIZE - 1)), 1309 MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE); 1310 } 1311 } 1312 1313 #define MYRI10GE_HLEN 64 /* The number of bytes to copy from a 1314 * page into an skb */ 1315 1316 static inline int 1317 myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum, 1318 bool lro_enabled) 1319 { 1320 struct myri10ge_priv *mgp = ss->mgp; 1321 struct sk_buff *skb; 1322 struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME]; 1323 struct myri10ge_rx_buf *rx; 1324 int i, idx, hlen, remainder, bytes; 1325 struct pci_dev *pdev = mgp->pdev; 1326 struct net_device *dev = mgp->dev; 1327 u8 *va; 1328 1329 if (len <= mgp->small_bytes) { 1330 rx = &ss->rx_small; 1331 bytes = mgp->small_bytes; 1332 } else { 1333 rx = &ss->rx_big; 1334 bytes = mgp->big_bytes; 1335 } 1336 1337 len += MXGEFW_PAD; 1338 idx = rx->cnt & rx->mask; 1339 va = page_address(rx->info[idx].page) + rx->info[idx].page_offset; 1340 prefetch(va); 1341 /* Fill skb_frag_struct(s) with data from our receive */ 1342 for (i = 0, remainder = len; remainder > 0; i++) { 1343 myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); 1344 __skb_frag_set_page(&rx_frags[i], rx->info[idx].page); 1345 rx_frags[i].page_offset = rx->info[idx].page_offset; 1346 if (remainder < MYRI10GE_ALLOC_SIZE) 1347 skb_frag_size_set(&rx_frags[i], remainder); 1348 else 1349 skb_frag_size_set(&rx_frags[i], MYRI10GE_ALLOC_SIZE); 1350 rx->cnt++; 1351 idx = rx->cnt & rx->mask; 1352 remainder -= MYRI10GE_ALLOC_SIZE; 1353 } 1354 1355 if (lro_enabled) { 1356 rx_frags[0].page_offset += MXGEFW_PAD; 1357 skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD); 1358 len -= MXGEFW_PAD; 1359 lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags, 1360 /* opaque, will come back in get_frag_header */ 1361 len, len, 1362 (void *)(__force unsigned long)csum, csum); 1363 1364 return 1; 1365 } 1366 1367 hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN; 1368 1369 /* allocate an skb to attach the page(s) to. This is done 1370 * after trying LRO, so as to avoid skb allocation overheads */ 1371 1372 skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16); 1373 if (unlikely(skb == NULL)) { 1374 ss->stats.rx_dropped++; 1375 do { 1376 i--; 1377 __skb_frag_unref(&rx_frags[i]); 1378 } while (i != 0); 1379 return 0; 1380 } 1381 1382 /* Attach the pages to the skb, and trim off any padding */ 1383 myri10ge_rx_skb_build(skb, va, rx_frags, len, hlen); 1384 if (skb_frag_size(&skb_shinfo(skb)->frags[0]) <= 0) { 1385 skb_frag_unref(skb, 0); 1386 skb_shinfo(skb)->nr_frags = 0; 1387 } else { 1388 skb->truesize += bytes * skb_shinfo(skb)->nr_frags; 1389 } 1390 skb->protocol = eth_type_trans(skb, dev); 1391 skb_record_rx_queue(skb, ss - &mgp->ss[0]); 1392 1393 if (dev->features & NETIF_F_RXCSUM) { 1394 if ((skb->protocol == htons(ETH_P_IP)) || 1395 (skb->protocol == htons(ETH_P_IPV6))) { 1396 skb->csum = csum; 1397 skb->ip_summed = CHECKSUM_COMPLETE; 1398 } else 1399 myri10ge_vlan_ip_csum(skb, csum); 1400 } 1401 netif_receive_skb(skb); 1402 return 1; 1403 } 1404 1405 static inline void 1406 myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index) 1407 { 1408 struct pci_dev *pdev = ss->mgp->pdev; 1409 struct myri10ge_tx_buf *tx = &ss->tx; 1410 struct netdev_queue *dev_queue; 1411 struct sk_buff *skb; 1412 int idx, len; 1413 1414 while (tx->pkt_done != mcp_index) { 1415 idx = tx->done & tx->mask; 1416 skb = tx->info[idx].skb; 1417 1418 /* Mark as free */ 1419 tx->info[idx].skb = NULL; 1420 if (tx->info[idx].last) { 1421 tx->pkt_done++; 1422 tx->info[idx].last = 0; 1423 } 1424 tx->done++; 1425 len = dma_unmap_len(&tx->info[idx], len); 1426 dma_unmap_len_set(&tx->info[idx], len, 0); 1427 if (skb) { 1428 ss->stats.tx_bytes += skb->len; 1429 ss->stats.tx_packets++; 1430 dev_kfree_skb_irq(skb); 1431 if (len) 1432 pci_unmap_single(pdev, 1433 dma_unmap_addr(&tx->info[idx], 1434 bus), len, 1435 PCI_DMA_TODEVICE); 1436 } else { 1437 if (len) 1438 pci_unmap_page(pdev, 1439 dma_unmap_addr(&tx->info[idx], 1440 bus), len, 1441 PCI_DMA_TODEVICE); 1442 } 1443 } 1444 1445 dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss); 1446 /* 1447 * Make a minimal effort to prevent the NIC from polling an 1448 * idle tx queue. If we can't get the lock we leave the queue 1449 * active. In this case, either a thread was about to start 1450 * using the queue anyway, or we lost a race and the NIC will 1451 * waste some of its resources polling an inactive queue for a 1452 * while. 1453 */ 1454 1455 if ((ss->mgp->dev->real_num_tx_queues > 1) && 1456 __netif_tx_trylock(dev_queue)) { 1457 if (tx->req == tx->done) { 1458 tx->queue_active = 0; 1459 put_be32(htonl(1), tx->send_stop); 1460 mb(); 1461 mmiowb(); 1462 } 1463 __netif_tx_unlock(dev_queue); 1464 } 1465 1466 /* start the queue if we've stopped it */ 1467 if (netif_tx_queue_stopped(dev_queue) && 1468 tx->req - tx->done < (tx->mask >> 1) && 1469 ss->mgp->running == MYRI10GE_ETH_RUNNING) { 1470 tx->wake_queue++; 1471 netif_tx_wake_queue(dev_queue); 1472 } 1473 } 1474 1475 static inline int 1476 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) 1477 { 1478 struct myri10ge_rx_done *rx_done = &ss->rx_done; 1479 struct myri10ge_priv *mgp = ss->mgp; 1480 unsigned long rx_bytes = 0; 1481 unsigned long rx_packets = 0; 1482 unsigned long rx_ok; 1483 int idx = rx_done->idx; 1484 int cnt = rx_done->cnt; 1485 int work_done = 0; 1486 u16 length; 1487 __wsum checksum; 1488 1489 /* 1490 * Prevent compiler from generating more than one ->features memory 1491 * access to avoid theoretical race condition with functions that 1492 * change NETIF_F_LRO flag at runtime. 1493 */ 1494 bool lro_enabled = !!(ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO); 1495 1496 while (rx_done->entry[idx].length != 0 && work_done < budget) { 1497 length = ntohs(rx_done->entry[idx].length); 1498 rx_done->entry[idx].length = 0; 1499 checksum = csum_unfold(rx_done->entry[idx].checksum); 1500 rx_ok = myri10ge_rx_done(ss, length, checksum, lro_enabled); 1501 rx_packets += rx_ok; 1502 rx_bytes += rx_ok * (unsigned long)length; 1503 cnt++; 1504 idx = cnt & (mgp->max_intr_slots - 1); 1505 work_done++; 1506 } 1507 rx_done->idx = idx; 1508 rx_done->cnt = cnt; 1509 ss->stats.rx_packets += rx_packets; 1510 ss->stats.rx_bytes += rx_bytes; 1511 1512 if (lro_enabled) 1513 lro_flush_all(&rx_done->lro_mgr); 1514 1515 /* restock receive rings if needed */ 1516 if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh) 1517 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 1518 mgp->small_bytes + MXGEFW_PAD, 0); 1519 if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh) 1520 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 1521 1522 return work_done; 1523 } 1524 1525 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) 1526 { 1527 struct mcp_irq_data *stats = mgp->ss[0].fw_stats; 1528 1529 if (unlikely(stats->stats_updated)) { 1530 unsigned link_up = ntohl(stats->link_up); 1531 if (mgp->link_state != link_up) { 1532 mgp->link_state = link_up; 1533 1534 if (mgp->link_state == MXGEFW_LINK_UP) { 1535 netif_info(mgp, link, mgp->dev, "link up\n"); 1536 netif_carrier_on(mgp->dev); 1537 mgp->link_changes++; 1538 } else { 1539 netif_info(mgp, link, mgp->dev, "link %s\n", 1540 (link_up == MXGEFW_LINK_MYRINET ? 1541 "mismatch (Myrinet detected)" : 1542 "down")); 1543 netif_carrier_off(mgp->dev); 1544 mgp->link_changes++; 1545 } 1546 } 1547 if (mgp->rdma_tags_available != 1548 ntohl(stats->rdma_tags_available)) { 1549 mgp->rdma_tags_available = 1550 ntohl(stats->rdma_tags_available); 1551 netdev_warn(mgp->dev, "RDMA timed out! %d tags left\n", 1552 mgp->rdma_tags_available); 1553 } 1554 mgp->down_cnt += stats->link_down; 1555 if (stats->link_down) 1556 wake_up(&mgp->down_wq); 1557 } 1558 } 1559 1560 static int myri10ge_poll(struct napi_struct *napi, int budget) 1561 { 1562 struct myri10ge_slice_state *ss = 1563 container_of(napi, struct myri10ge_slice_state, napi); 1564 int work_done; 1565 1566 #ifdef CONFIG_MYRI10GE_DCA 1567 if (ss->mgp->dca_enabled) 1568 myri10ge_update_dca(ss); 1569 #endif 1570 1571 /* process as many rx events as NAPI will allow */ 1572 work_done = myri10ge_clean_rx_done(ss, budget); 1573 1574 if (work_done < budget) { 1575 napi_complete(napi); 1576 put_be32(htonl(3), ss->irq_claim); 1577 } 1578 return work_done; 1579 } 1580 1581 static irqreturn_t myri10ge_intr(int irq, void *arg) 1582 { 1583 struct myri10ge_slice_state *ss = arg; 1584 struct myri10ge_priv *mgp = ss->mgp; 1585 struct mcp_irq_data *stats = ss->fw_stats; 1586 struct myri10ge_tx_buf *tx = &ss->tx; 1587 u32 send_done_count; 1588 int i; 1589 1590 /* an interrupt on a non-zero receive-only slice is implicitly 1591 * valid since MSI-X irqs are not shared */ 1592 if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) { 1593 napi_schedule(&ss->napi); 1594 return IRQ_HANDLED; 1595 } 1596 1597 /* make sure it is our IRQ, and that the DMA has finished */ 1598 if (unlikely(!stats->valid)) 1599 return IRQ_NONE; 1600 1601 /* low bit indicates receives are present, so schedule 1602 * napi poll handler */ 1603 if (stats->valid & 1) 1604 napi_schedule(&ss->napi); 1605 1606 if (!mgp->msi_enabled && !mgp->msix_enabled) { 1607 put_be32(0, mgp->irq_deassert); 1608 if (!myri10ge_deassert_wait) 1609 stats->valid = 0; 1610 mb(); 1611 } else 1612 stats->valid = 0; 1613 1614 /* Wait for IRQ line to go low, if using INTx */ 1615 i = 0; 1616 while (1) { 1617 i++; 1618 /* check for transmit completes and receives */ 1619 send_done_count = ntohl(stats->send_done_count); 1620 if (send_done_count != tx->pkt_done) 1621 myri10ge_tx_done(ss, (int)send_done_count); 1622 if (unlikely(i > myri10ge_max_irq_loops)) { 1623 netdev_warn(mgp->dev, "irq stuck?\n"); 1624 stats->valid = 0; 1625 schedule_work(&mgp->watchdog_work); 1626 } 1627 if (likely(stats->valid == 0)) 1628 break; 1629 cpu_relax(); 1630 barrier(); 1631 } 1632 1633 /* Only slice 0 updates stats */ 1634 if (ss == mgp->ss) 1635 myri10ge_check_statblock(mgp); 1636 1637 put_be32(htonl(3), ss->irq_claim + 1); 1638 return IRQ_HANDLED; 1639 } 1640 1641 static int 1642 myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) 1643 { 1644 struct myri10ge_priv *mgp = netdev_priv(netdev); 1645 char *ptr; 1646 int i; 1647 1648 cmd->autoneg = AUTONEG_DISABLE; 1649 ethtool_cmd_speed_set(cmd, SPEED_10000); 1650 cmd->duplex = DUPLEX_FULL; 1651 1652 /* 1653 * parse the product code to deterimine the interface type 1654 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 1655 * after the 3rd dash in the driver's cached copy of the 1656 * EEPROM's product code string. 1657 */ 1658 ptr = mgp->product_code_string; 1659 if (ptr == NULL) { 1660 netdev_err(netdev, "Missing product code\n"); 1661 return 0; 1662 } 1663 for (i = 0; i < 3; i++, ptr++) { 1664 ptr = strchr(ptr, '-'); 1665 if (ptr == NULL) { 1666 netdev_err(netdev, "Invalid product code %s\n", 1667 mgp->product_code_string); 1668 return 0; 1669 } 1670 } 1671 if (*ptr == '2') 1672 ptr++; 1673 if (*ptr == 'R' || *ptr == 'Q' || *ptr == 'S') { 1674 /* We've found either an XFP, quad ribbon fiber, or SFP+ */ 1675 cmd->port = PORT_FIBRE; 1676 cmd->supported |= SUPPORTED_FIBRE; 1677 cmd->advertising |= ADVERTISED_FIBRE; 1678 } else { 1679 cmd->port = PORT_OTHER; 1680 } 1681 if (*ptr == 'R' || *ptr == 'S') 1682 cmd->transceiver = XCVR_EXTERNAL; 1683 else 1684 cmd->transceiver = XCVR_INTERNAL; 1685 1686 return 0; 1687 } 1688 1689 static void 1690 myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) 1691 { 1692 struct myri10ge_priv *mgp = netdev_priv(netdev); 1693 1694 strlcpy(info->driver, "myri10ge", sizeof(info->driver)); 1695 strlcpy(info->version, MYRI10GE_VERSION_STR, sizeof(info->version)); 1696 strlcpy(info->fw_version, mgp->fw_version, sizeof(info->fw_version)); 1697 strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info)); 1698 } 1699 1700 static int 1701 myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1702 { 1703 struct myri10ge_priv *mgp = netdev_priv(netdev); 1704 1705 coal->rx_coalesce_usecs = mgp->intr_coal_delay; 1706 return 0; 1707 } 1708 1709 static int 1710 myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1711 { 1712 struct myri10ge_priv *mgp = netdev_priv(netdev); 1713 1714 mgp->intr_coal_delay = coal->rx_coalesce_usecs; 1715 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1716 return 0; 1717 } 1718 1719 static void 1720 myri10ge_get_pauseparam(struct net_device *netdev, 1721 struct ethtool_pauseparam *pause) 1722 { 1723 struct myri10ge_priv *mgp = netdev_priv(netdev); 1724 1725 pause->autoneg = 0; 1726 pause->rx_pause = mgp->pause; 1727 pause->tx_pause = mgp->pause; 1728 } 1729 1730 static int 1731 myri10ge_set_pauseparam(struct net_device *netdev, 1732 struct ethtool_pauseparam *pause) 1733 { 1734 struct myri10ge_priv *mgp = netdev_priv(netdev); 1735 1736 if (pause->tx_pause != mgp->pause) 1737 return myri10ge_change_pause(mgp, pause->tx_pause); 1738 if (pause->rx_pause != mgp->pause) 1739 return myri10ge_change_pause(mgp, pause->rx_pause); 1740 if (pause->autoneg != 0) 1741 return -EINVAL; 1742 return 0; 1743 } 1744 1745 static void 1746 myri10ge_get_ringparam(struct net_device *netdev, 1747 struct ethtool_ringparam *ring) 1748 { 1749 struct myri10ge_priv *mgp = netdev_priv(netdev); 1750 1751 ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1; 1752 ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1; 1753 ring->rx_jumbo_max_pending = 0; 1754 ring->tx_max_pending = mgp->ss[0].tx.mask + 1; 1755 ring->rx_mini_pending = ring->rx_mini_max_pending; 1756 ring->rx_pending = ring->rx_max_pending; 1757 ring->rx_jumbo_pending = ring->rx_jumbo_max_pending; 1758 ring->tx_pending = ring->tx_max_pending; 1759 } 1760 1761 static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = { 1762 "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", 1763 "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", 1764 "rx_length_errors", "rx_over_errors", "rx_crc_errors", 1765 "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", 1766 "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", 1767 "tx_heartbeat_errors", "tx_window_errors", 1768 /* device-specific stats */ 1769 "tx_boundary", "WC", "irq", "MSI", "MSIX", 1770 "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs", 1771 "serial_number", "watchdog_resets", 1772 #ifdef CONFIG_MYRI10GE_DCA 1773 "dca_capable_firmware", "dca_device_present", 1774 #endif 1775 "link_changes", "link_up", "dropped_link_overflow", 1776 "dropped_link_error_or_filtered", 1777 "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32", 1778 "dropped_unicast_filtered", "dropped_multicast_filtered", 1779 "dropped_runt", "dropped_overrun", "dropped_no_small_buffer", 1780 "dropped_no_big_buffer" 1781 }; 1782 1783 static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = { 1784 "----------- slice ---------", 1785 "tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done", 1786 "rx_small_cnt", "rx_big_cnt", 1787 "wake_queue", "stop_queue", "tx_linearized", 1788 "LRO aggregated", "LRO flushed", "LRO avg aggr", "LRO no_desc", 1789 }; 1790 1791 #define MYRI10GE_NET_STATS_LEN 21 1792 #define MYRI10GE_MAIN_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_main_stats) 1793 #define MYRI10GE_SLICE_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_slice_stats) 1794 1795 static void 1796 myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data) 1797 { 1798 struct myri10ge_priv *mgp = netdev_priv(netdev); 1799 int i; 1800 1801 switch (stringset) { 1802 case ETH_SS_STATS: 1803 memcpy(data, *myri10ge_gstrings_main_stats, 1804 sizeof(myri10ge_gstrings_main_stats)); 1805 data += sizeof(myri10ge_gstrings_main_stats); 1806 for (i = 0; i < mgp->num_slices; i++) { 1807 memcpy(data, *myri10ge_gstrings_slice_stats, 1808 sizeof(myri10ge_gstrings_slice_stats)); 1809 data += sizeof(myri10ge_gstrings_slice_stats); 1810 } 1811 break; 1812 } 1813 } 1814 1815 static int myri10ge_get_sset_count(struct net_device *netdev, int sset) 1816 { 1817 struct myri10ge_priv *mgp = netdev_priv(netdev); 1818 1819 switch (sset) { 1820 case ETH_SS_STATS: 1821 return MYRI10GE_MAIN_STATS_LEN + 1822 mgp->num_slices * MYRI10GE_SLICE_STATS_LEN; 1823 default: 1824 return -EOPNOTSUPP; 1825 } 1826 } 1827 1828 static void 1829 myri10ge_get_ethtool_stats(struct net_device *netdev, 1830 struct ethtool_stats *stats, u64 * data) 1831 { 1832 struct myri10ge_priv *mgp = netdev_priv(netdev); 1833 struct myri10ge_slice_state *ss; 1834 struct rtnl_link_stats64 link_stats; 1835 int slice; 1836 int i; 1837 1838 /* force stats update */ 1839 memset(&link_stats, 0, sizeof(link_stats)); 1840 (void)myri10ge_get_stats(netdev, &link_stats); 1841 for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++) 1842 data[i] = ((u64 *)&link_stats)[i]; 1843 1844 data[i++] = (unsigned int)mgp->tx_boundary; 1845 data[i++] = (unsigned int)mgp->wc_enabled; 1846 data[i++] = (unsigned int)mgp->pdev->irq; 1847 data[i++] = (unsigned int)mgp->msi_enabled; 1848 data[i++] = (unsigned int)mgp->msix_enabled; 1849 data[i++] = (unsigned int)mgp->read_dma; 1850 data[i++] = (unsigned int)mgp->write_dma; 1851 data[i++] = (unsigned int)mgp->read_write_dma; 1852 data[i++] = (unsigned int)mgp->serial_number; 1853 data[i++] = (unsigned int)mgp->watchdog_resets; 1854 #ifdef CONFIG_MYRI10GE_DCA 1855 data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL); 1856 data[i++] = (unsigned int)(mgp->dca_enabled); 1857 #endif 1858 data[i++] = (unsigned int)mgp->link_changes; 1859 1860 /* firmware stats are useful only in the first slice */ 1861 ss = &mgp->ss[0]; 1862 data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up); 1863 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow); 1864 data[i++] = 1865 (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered); 1866 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause); 1867 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy); 1868 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32); 1869 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered); 1870 data[i++] = 1871 (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered); 1872 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt); 1873 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun); 1874 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer); 1875 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer); 1876 1877 for (slice = 0; slice < mgp->num_slices; slice++) { 1878 ss = &mgp->ss[slice]; 1879 data[i++] = slice; 1880 data[i++] = (unsigned int)ss->tx.pkt_start; 1881 data[i++] = (unsigned int)ss->tx.pkt_done; 1882 data[i++] = (unsigned int)ss->tx.req; 1883 data[i++] = (unsigned int)ss->tx.done; 1884 data[i++] = (unsigned int)ss->rx_small.cnt; 1885 data[i++] = (unsigned int)ss->rx_big.cnt; 1886 data[i++] = (unsigned int)ss->tx.wake_queue; 1887 data[i++] = (unsigned int)ss->tx.stop_queue; 1888 data[i++] = (unsigned int)ss->tx.linearized; 1889 data[i++] = ss->rx_done.lro_mgr.stats.aggregated; 1890 data[i++] = ss->rx_done.lro_mgr.stats.flushed; 1891 if (ss->rx_done.lro_mgr.stats.flushed) 1892 data[i++] = ss->rx_done.lro_mgr.stats.aggregated / 1893 ss->rx_done.lro_mgr.stats.flushed; 1894 else 1895 data[i++] = 0; 1896 data[i++] = ss->rx_done.lro_mgr.stats.no_desc; 1897 } 1898 } 1899 1900 static void myri10ge_set_msglevel(struct net_device *netdev, u32 value) 1901 { 1902 struct myri10ge_priv *mgp = netdev_priv(netdev); 1903 mgp->msg_enable = value; 1904 } 1905 1906 static u32 myri10ge_get_msglevel(struct net_device *netdev) 1907 { 1908 struct myri10ge_priv *mgp = netdev_priv(netdev); 1909 return mgp->msg_enable; 1910 } 1911 1912 /* 1913 * Use a low-level command to change the LED behavior. Rather than 1914 * blinking (which is the normal case), when identify is used, the 1915 * yellow LED turns solid. 1916 */ 1917 static int myri10ge_led(struct myri10ge_priv *mgp, int on) 1918 { 1919 struct mcp_gen_header *hdr; 1920 struct device *dev = &mgp->pdev->dev; 1921 size_t hdr_off, pattern_off, hdr_len; 1922 u32 pattern = 0xfffffffe; 1923 1924 /* find running firmware header */ 1925 hdr_off = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 1926 if ((hdr_off & 3) || hdr_off + sizeof(*hdr) > mgp->sram_size) { 1927 dev_err(dev, "Running firmware has bad header offset (%d)\n", 1928 (int)hdr_off); 1929 return -EIO; 1930 } 1931 hdr_len = swab32(readl(mgp->sram + hdr_off + 1932 offsetof(struct mcp_gen_header, header_length))); 1933 pattern_off = hdr_off + offsetof(struct mcp_gen_header, led_pattern); 1934 if (pattern_off >= (hdr_len + hdr_off)) { 1935 dev_info(dev, "Firmware does not support LED identification\n"); 1936 return -EINVAL; 1937 } 1938 if (!on) 1939 pattern = swab32(readl(mgp->sram + pattern_off + 4)); 1940 writel(htonl(pattern), mgp->sram + pattern_off); 1941 return 0; 1942 } 1943 1944 static int 1945 myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) 1946 { 1947 struct myri10ge_priv *mgp = netdev_priv(netdev); 1948 int rc; 1949 1950 switch (state) { 1951 case ETHTOOL_ID_ACTIVE: 1952 rc = myri10ge_led(mgp, 1); 1953 break; 1954 1955 case ETHTOOL_ID_INACTIVE: 1956 rc = myri10ge_led(mgp, 0); 1957 break; 1958 1959 default: 1960 rc = -EINVAL; 1961 } 1962 1963 return rc; 1964 } 1965 1966 static const struct ethtool_ops myri10ge_ethtool_ops = { 1967 .get_settings = myri10ge_get_settings, 1968 .get_drvinfo = myri10ge_get_drvinfo, 1969 .get_coalesce = myri10ge_get_coalesce, 1970 .set_coalesce = myri10ge_set_coalesce, 1971 .get_pauseparam = myri10ge_get_pauseparam, 1972 .set_pauseparam = myri10ge_set_pauseparam, 1973 .get_ringparam = myri10ge_get_ringparam, 1974 .get_link = ethtool_op_get_link, 1975 .get_strings = myri10ge_get_strings, 1976 .get_sset_count = myri10ge_get_sset_count, 1977 .get_ethtool_stats = myri10ge_get_ethtool_stats, 1978 .set_msglevel = myri10ge_set_msglevel, 1979 .get_msglevel = myri10ge_get_msglevel, 1980 .set_phys_id = myri10ge_phys_id, 1981 }; 1982 1983 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss) 1984 { 1985 struct myri10ge_priv *mgp = ss->mgp; 1986 struct myri10ge_cmd cmd; 1987 struct net_device *dev = mgp->dev; 1988 int tx_ring_size, rx_ring_size; 1989 int tx_ring_entries, rx_ring_entries; 1990 int i, slice, status; 1991 size_t bytes; 1992 1993 /* get ring sizes */ 1994 slice = ss - mgp->ss; 1995 cmd.data0 = slice; 1996 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0); 1997 tx_ring_size = cmd.data0; 1998 cmd.data0 = slice; 1999 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 2000 if (status != 0) 2001 return status; 2002 rx_ring_size = cmd.data0; 2003 2004 tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send); 2005 rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr); 2006 ss->tx.mask = tx_ring_entries - 1; 2007 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2008 2009 status = -ENOMEM; 2010 2011 /* allocate the host shadow rings */ 2012 2013 bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4) 2014 * sizeof(*ss->tx.req_list); 2015 ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL); 2016 if (ss->tx.req_bytes == NULL) 2017 goto abort_with_nothing; 2018 2019 /* ensure req_list entries are aligned to 8 bytes */ 2020 ss->tx.req_list = (struct mcp_kreq_ether_send *) 2021 ALIGN((unsigned long)ss->tx.req_bytes, 8); 2022 ss->tx.queue_active = 0; 2023 2024 bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow); 2025 ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL); 2026 if (ss->rx_small.shadow == NULL) 2027 goto abort_with_tx_req_bytes; 2028 2029 bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow); 2030 ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL); 2031 if (ss->rx_big.shadow == NULL) 2032 goto abort_with_rx_small_shadow; 2033 2034 /* allocate the host info rings */ 2035 2036 bytes = tx_ring_entries * sizeof(*ss->tx.info); 2037 ss->tx.info = kzalloc(bytes, GFP_KERNEL); 2038 if (ss->tx.info == NULL) 2039 goto abort_with_rx_big_shadow; 2040 2041 bytes = rx_ring_entries * sizeof(*ss->rx_small.info); 2042 ss->rx_small.info = kzalloc(bytes, GFP_KERNEL); 2043 if (ss->rx_small.info == NULL) 2044 goto abort_with_tx_info; 2045 2046 bytes = rx_ring_entries * sizeof(*ss->rx_big.info); 2047 ss->rx_big.info = kzalloc(bytes, GFP_KERNEL); 2048 if (ss->rx_big.info == NULL) 2049 goto abort_with_rx_small_info; 2050 2051 /* Fill the receive rings */ 2052 ss->rx_big.cnt = 0; 2053 ss->rx_small.cnt = 0; 2054 ss->rx_big.fill_cnt = 0; 2055 ss->rx_small.fill_cnt = 0; 2056 ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE; 2057 ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE; 2058 ss->rx_small.watchdog_needed = 0; 2059 ss->rx_big.watchdog_needed = 0; 2060 if (mgp->small_bytes == 0) { 2061 ss->rx_small.fill_cnt = ss->rx_small.mask + 1; 2062 } else { 2063 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 2064 mgp->small_bytes + MXGEFW_PAD, 0); 2065 } 2066 2067 if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) { 2068 netdev_err(dev, "slice-%d: alloced only %d small bufs\n", 2069 slice, ss->rx_small.fill_cnt); 2070 goto abort_with_rx_small_ring; 2071 } 2072 2073 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 2074 if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) { 2075 netdev_err(dev, "slice-%d: alloced only %d big bufs\n", 2076 slice, ss->rx_big.fill_cnt); 2077 goto abort_with_rx_big_ring; 2078 } 2079 2080 return 0; 2081 2082 abort_with_rx_big_ring: 2083 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2084 int idx = i & ss->rx_big.mask; 2085 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2086 mgp->big_bytes); 2087 put_page(ss->rx_big.info[idx].page); 2088 } 2089 2090 abort_with_rx_small_ring: 2091 if (mgp->small_bytes == 0) 2092 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2093 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2094 int idx = i & ss->rx_small.mask; 2095 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2096 mgp->small_bytes + MXGEFW_PAD); 2097 put_page(ss->rx_small.info[idx].page); 2098 } 2099 2100 kfree(ss->rx_big.info); 2101 2102 abort_with_rx_small_info: 2103 kfree(ss->rx_small.info); 2104 2105 abort_with_tx_info: 2106 kfree(ss->tx.info); 2107 2108 abort_with_rx_big_shadow: 2109 kfree(ss->rx_big.shadow); 2110 2111 abort_with_rx_small_shadow: 2112 kfree(ss->rx_small.shadow); 2113 2114 abort_with_tx_req_bytes: 2115 kfree(ss->tx.req_bytes); 2116 ss->tx.req_bytes = NULL; 2117 ss->tx.req_list = NULL; 2118 2119 abort_with_nothing: 2120 return status; 2121 } 2122 2123 static void myri10ge_free_rings(struct myri10ge_slice_state *ss) 2124 { 2125 struct myri10ge_priv *mgp = ss->mgp; 2126 struct sk_buff *skb; 2127 struct myri10ge_tx_buf *tx; 2128 int i, len, idx; 2129 2130 /* If not allocated, skip it */ 2131 if (ss->tx.req_list == NULL) 2132 return; 2133 2134 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2135 idx = i & ss->rx_big.mask; 2136 if (i == ss->rx_big.fill_cnt - 1) 2137 ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE; 2138 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2139 mgp->big_bytes); 2140 put_page(ss->rx_big.info[idx].page); 2141 } 2142 2143 if (mgp->small_bytes == 0) 2144 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2145 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2146 idx = i & ss->rx_small.mask; 2147 if (i == ss->rx_small.fill_cnt - 1) 2148 ss->rx_small.info[idx].page_offset = 2149 MYRI10GE_ALLOC_SIZE; 2150 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2151 mgp->small_bytes + MXGEFW_PAD); 2152 put_page(ss->rx_small.info[idx].page); 2153 } 2154 tx = &ss->tx; 2155 while (tx->done != tx->req) { 2156 idx = tx->done & tx->mask; 2157 skb = tx->info[idx].skb; 2158 2159 /* Mark as free */ 2160 tx->info[idx].skb = NULL; 2161 tx->done++; 2162 len = dma_unmap_len(&tx->info[idx], len); 2163 dma_unmap_len_set(&tx->info[idx], len, 0); 2164 if (skb) { 2165 ss->stats.tx_dropped++; 2166 dev_kfree_skb_any(skb); 2167 if (len) 2168 pci_unmap_single(mgp->pdev, 2169 dma_unmap_addr(&tx->info[idx], 2170 bus), len, 2171 PCI_DMA_TODEVICE); 2172 } else { 2173 if (len) 2174 pci_unmap_page(mgp->pdev, 2175 dma_unmap_addr(&tx->info[idx], 2176 bus), len, 2177 PCI_DMA_TODEVICE); 2178 } 2179 } 2180 kfree(ss->rx_big.info); 2181 2182 kfree(ss->rx_small.info); 2183 2184 kfree(ss->tx.info); 2185 2186 kfree(ss->rx_big.shadow); 2187 2188 kfree(ss->rx_small.shadow); 2189 2190 kfree(ss->tx.req_bytes); 2191 ss->tx.req_bytes = NULL; 2192 ss->tx.req_list = NULL; 2193 } 2194 2195 static int myri10ge_request_irq(struct myri10ge_priv *mgp) 2196 { 2197 struct pci_dev *pdev = mgp->pdev; 2198 struct myri10ge_slice_state *ss; 2199 struct net_device *netdev = mgp->dev; 2200 int i; 2201 int status; 2202 2203 mgp->msi_enabled = 0; 2204 mgp->msix_enabled = 0; 2205 status = 0; 2206 if (myri10ge_msi) { 2207 if (mgp->num_slices > 1) { 2208 status = 2209 pci_enable_msix(pdev, mgp->msix_vectors, 2210 mgp->num_slices); 2211 if (status == 0) { 2212 mgp->msix_enabled = 1; 2213 } else { 2214 dev_err(&pdev->dev, 2215 "Error %d setting up MSI-X\n", status); 2216 return status; 2217 } 2218 } 2219 if (mgp->msix_enabled == 0) { 2220 status = pci_enable_msi(pdev); 2221 if (status != 0) { 2222 dev_err(&pdev->dev, 2223 "Error %d setting up MSI; falling back to xPIC\n", 2224 status); 2225 } else { 2226 mgp->msi_enabled = 1; 2227 } 2228 } 2229 } 2230 if (mgp->msix_enabled) { 2231 for (i = 0; i < mgp->num_slices; i++) { 2232 ss = &mgp->ss[i]; 2233 snprintf(ss->irq_desc, sizeof(ss->irq_desc), 2234 "%s:slice-%d", netdev->name, i); 2235 status = request_irq(mgp->msix_vectors[i].vector, 2236 myri10ge_intr, 0, ss->irq_desc, 2237 ss); 2238 if (status != 0) { 2239 dev_err(&pdev->dev, 2240 "slice %d failed to allocate IRQ\n", i); 2241 i--; 2242 while (i >= 0) { 2243 free_irq(mgp->msix_vectors[i].vector, 2244 &mgp->ss[i]); 2245 i--; 2246 } 2247 pci_disable_msix(pdev); 2248 return status; 2249 } 2250 } 2251 } else { 2252 status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED, 2253 mgp->dev->name, &mgp->ss[0]); 2254 if (status != 0) { 2255 dev_err(&pdev->dev, "failed to allocate IRQ\n"); 2256 if (mgp->msi_enabled) 2257 pci_disable_msi(pdev); 2258 } 2259 } 2260 return status; 2261 } 2262 2263 static void myri10ge_free_irq(struct myri10ge_priv *mgp) 2264 { 2265 struct pci_dev *pdev = mgp->pdev; 2266 int i; 2267 2268 if (mgp->msix_enabled) { 2269 for (i = 0; i < mgp->num_slices; i++) 2270 free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]); 2271 } else { 2272 free_irq(pdev->irq, &mgp->ss[0]); 2273 } 2274 if (mgp->msi_enabled) 2275 pci_disable_msi(pdev); 2276 if (mgp->msix_enabled) 2277 pci_disable_msix(pdev); 2278 } 2279 2280 static int 2281 myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr, 2282 void **ip_hdr, void **tcpudp_hdr, 2283 u64 * hdr_flags, void *priv) 2284 { 2285 struct ethhdr *eh; 2286 struct vlan_ethhdr *veh; 2287 struct iphdr *iph; 2288 u8 *va = skb_frag_address(frag); 2289 unsigned long ll_hlen; 2290 /* passed opaque through lro_receive_frags() */ 2291 __wsum csum = (__force __wsum) (unsigned long)priv; 2292 2293 /* find the mac header, aborting if not IPv4 */ 2294 2295 eh = (struct ethhdr *)va; 2296 *mac_hdr = eh; 2297 ll_hlen = ETH_HLEN; 2298 if (eh->h_proto != htons(ETH_P_IP)) { 2299 if (eh->h_proto == htons(ETH_P_8021Q)) { 2300 veh = (struct vlan_ethhdr *)va; 2301 if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) 2302 return -1; 2303 2304 ll_hlen += VLAN_HLEN; 2305 2306 /* 2307 * HW checksum starts ETH_HLEN bytes into 2308 * frame, so we must subtract off the VLAN 2309 * header's checksum before csum can be used 2310 */ 2311 csum = csum_sub(csum, csum_partial(va + ETH_HLEN, 2312 VLAN_HLEN, 0)); 2313 } else { 2314 return -1; 2315 } 2316 } 2317 *hdr_flags = LRO_IPV4; 2318 2319 iph = (struct iphdr *)(va + ll_hlen); 2320 *ip_hdr = iph; 2321 if (iph->protocol != IPPROTO_TCP) 2322 return -1; 2323 if (ip_is_fragment(iph)) 2324 return -1; 2325 *hdr_flags |= LRO_TCP; 2326 *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2); 2327 2328 /* verify the IP checksum */ 2329 if (unlikely(ip_fast_csum((u8 *) iph, iph->ihl))) 2330 return -1; 2331 2332 /* verify the checksum */ 2333 if (unlikely(csum_tcpudp_magic(iph->saddr, iph->daddr, 2334 ntohs(iph->tot_len) - (iph->ihl << 2), 2335 IPPROTO_TCP, csum))) 2336 return -1; 2337 2338 return 0; 2339 } 2340 2341 static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice) 2342 { 2343 struct myri10ge_cmd cmd; 2344 struct myri10ge_slice_state *ss; 2345 int status; 2346 2347 ss = &mgp->ss[slice]; 2348 status = 0; 2349 if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) { 2350 cmd.data0 = slice; 2351 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, 2352 &cmd, 0); 2353 ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *) 2354 (mgp->sram + cmd.data0); 2355 } 2356 cmd.data0 = slice; 2357 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, 2358 &cmd, 0); 2359 ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *) 2360 (mgp->sram + cmd.data0); 2361 2362 cmd.data0 = slice; 2363 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); 2364 ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *) 2365 (mgp->sram + cmd.data0); 2366 2367 ss->tx.send_go = (__iomem __be32 *) 2368 (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 2369 ss->tx.send_stop = (__iomem __be32 *) 2370 (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 2371 return status; 2372 2373 } 2374 2375 static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice) 2376 { 2377 struct myri10ge_cmd cmd; 2378 struct myri10ge_slice_state *ss; 2379 int status; 2380 2381 ss = &mgp->ss[slice]; 2382 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus); 2383 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus); 2384 cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16); 2385 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); 2386 if (status == -ENOSYS) { 2387 dma_addr_t bus = ss->fw_stats_bus; 2388 if (slice != 0) 2389 return -EINVAL; 2390 bus += offsetof(struct mcp_irq_data, send_done_count); 2391 cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); 2392 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); 2393 status = myri10ge_send_cmd(mgp, 2394 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2395 &cmd, 0); 2396 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2397 mgp->fw_multicast_support = 0; 2398 } else { 2399 mgp->fw_multicast_support = 1; 2400 } 2401 return 0; 2402 } 2403 2404 static int myri10ge_open(struct net_device *dev) 2405 { 2406 struct myri10ge_slice_state *ss; 2407 struct myri10ge_priv *mgp = netdev_priv(dev); 2408 struct myri10ge_cmd cmd; 2409 int i, status, big_pow2, slice; 2410 u8 *itable; 2411 struct net_lro_mgr *lro_mgr; 2412 2413 if (mgp->running != MYRI10GE_ETH_STOPPED) 2414 return -EBUSY; 2415 2416 mgp->running = MYRI10GE_ETH_STARTING; 2417 status = myri10ge_reset(mgp); 2418 if (status != 0) { 2419 netdev_err(dev, "failed reset\n"); 2420 goto abort_with_nothing; 2421 } 2422 2423 if (mgp->num_slices > 1) { 2424 cmd.data0 = mgp->num_slices; 2425 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 2426 if (mgp->dev->real_num_tx_queues > 1) 2427 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 2428 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2429 &cmd, 0); 2430 if (status != 0) { 2431 netdev_err(dev, "failed to set number of slices\n"); 2432 goto abort_with_nothing; 2433 } 2434 /* setup the indirection table */ 2435 cmd.data0 = mgp->num_slices; 2436 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2437 &cmd, 0); 2438 2439 status |= myri10ge_send_cmd(mgp, 2440 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 2441 &cmd, 0); 2442 if (status != 0) { 2443 netdev_err(dev, "failed to setup rss tables\n"); 2444 goto abort_with_nothing; 2445 } 2446 2447 /* just enable an identity mapping */ 2448 itable = mgp->sram + cmd.data0; 2449 for (i = 0; i < mgp->num_slices; i++) 2450 __raw_writeb(i, &itable[i]); 2451 2452 cmd.data0 = 1; 2453 cmd.data1 = myri10ge_rss_hash; 2454 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2455 &cmd, 0); 2456 if (status != 0) { 2457 netdev_err(dev, "failed to enable slices\n"); 2458 goto abort_with_nothing; 2459 } 2460 } 2461 2462 status = myri10ge_request_irq(mgp); 2463 if (status != 0) 2464 goto abort_with_nothing; 2465 2466 /* decide what small buffer size to use. For good TCP rx 2467 * performance, it is important to not receive 1514 byte 2468 * frames into jumbo buffers, as it confuses the socket buffer 2469 * accounting code, leading to drops and erratic performance. 2470 */ 2471 2472 if (dev->mtu <= ETH_DATA_LEN) 2473 /* enough for a TCP header */ 2474 mgp->small_bytes = (128 > SMP_CACHE_BYTES) 2475 ? (128 - MXGEFW_PAD) 2476 : (SMP_CACHE_BYTES - MXGEFW_PAD); 2477 else 2478 /* enough for a vlan encapsulated ETH_DATA_LEN frame */ 2479 mgp->small_bytes = VLAN_ETH_FRAME_LEN; 2480 2481 /* Override the small buffer size? */ 2482 if (myri10ge_small_bytes >= 0) 2483 mgp->small_bytes = myri10ge_small_bytes; 2484 2485 /* Firmware needs the big buff size as a power of 2. Lie and 2486 * tell him the buffer is larger, because we only use 1 2487 * buffer/pkt, and the mtu will prevent overruns. 2488 */ 2489 big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2490 if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) { 2491 while (!is_power_of_2(big_pow2)) 2492 big_pow2++; 2493 mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2494 } else { 2495 big_pow2 = MYRI10GE_ALLOC_SIZE; 2496 mgp->big_bytes = big_pow2; 2497 } 2498 2499 /* setup the per-slice data structures */ 2500 for (slice = 0; slice < mgp->num_slices; slice++) { 2501 ss = &mgp->ss[slice]; 2502 2503 status = myri10ge_get_txrx(mgp, slice); 2504 if (status != 0) { 2505 netdev_err(dev, "failed to get ring sizes or locations\n"); 2506 goto abort_with_rings; 2507 } 2508 status = myri10ge_allocate_rings(ss); 2509 if (status != 0) 2510 goto abort_with_rings; 2511 2512 /* only firmware which supports multiple TX queues 2513 * supports setting up the tx stats on non-zero 2514 * slices */ 2515 if (slice == 0 || mgp->dev->real_num_tx_queues > 1) 2516 status = myri10ge_set_stats(mgp, slice); 2517 if (status) { 2518 netdev_err(dev, "Couldn't set stats DMA\n"); 2519 goto abort_with_rings; 2520 } 2521 2522 lro_mgr = &ss->rx_done.lro_mgr; 2523 lro_mgr->dev = dev; 2524 lro_mgr->features = LRO_F_NAPI; 2525 lro_mgr->ip_summed = CHECKSUM_COMPLETE; 2526 lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; 2527 lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS; 2528 lro_mgr->lro_arr = ss->rx_done.lro_desc; 2529 lro_mgr->get_frag_header = myri10ge_get_frag_header; 2530 lro_mgr->max_aggr = myri10ge_lro_max_pkts; 2531 lro_mgr->frag_align_pad = 2; 2532 if (lro_mgr->max_aggr > MAX_SKB_FRAGS) 2533 lro_mgr->max_aggr = MAX_SKB_FRAGS; 2534 2535 /* must happen prior to any irq */ 2536 napi_enable(&(ss)->napi); 2537 } 2538 2539 /* now give firmware buffers sizes, and MTU */ 2540 cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN; 2541 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0); 2542 cmd.data0 = mgp->small_bytes; 2543 status |= 2544 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0); 2545 cmd.data0 = big_pow2; 2546 status |= 2547 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0); 2548 if (status) { 2549 netdev_err(dev, "Couldn't set buffer sizes\n"); 2550 goto abort_with_rings; 2551 } 2552 2553 /* 2554 * Set Linux style TSO mode; this is needed only on newer 2555 * firmware versions. Older versions default to Linux 2556 * style TSO 2557 */ 2558 cmd.data0 = 0; 2559 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0); 2560 if (status && status != -ENOSYS) { 2561 netdev_err(dev, "Couldn't set TSO mode\n"); 2562 goto abort_with_rings; 2563 } 2564 2565 mgp->link_state = ~0U; 2566 mgp->rdma_tags_available = 15; 2567 2568 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); 2569 if (status) { 2570 netdev_err(dev, "Couldn't bring up link\n"); 2571 goto abort_with_rings; 2572 } 2573 2574 mgp->running = MYRI10GE_ETH_RUNNING; 2575 mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; 2576 add_timer(&mgp->watchdog_timer); 2577 netif_tx_wake_all_queues(dev); 2578 2579 return 0; 2580 2581 abort_with_rings: 2582 while (slice) { 2583 slice--; 2584 napi_disable(&mgp->ss[slice].napi); 2585 } 2586 for (i = 0; i < mgp->num_slices; i++) 2587 myri10ge_free_rings(&mgp->ss[i]); 2588 2589 myri10ge_free_irq(mgp); 2590 2591 abort_with_nothing: 2592 mgp->running = MYRI10GE_ETH_STOPPED; 2593 return -ENOMEM; 2594 } 2595 2596 static int myri10ge_close(struct net_device *dev) 2597 { 2598 struct myri10ge_priv *mgp = netdev_priv(dev); 2599 struct myri10ge_cmd cmd; 2600 int status, old_down_cnt; 2601 int i; 2602 2603 if (mgp->running != MYRI10GE_ETH_RUNNING) 2604 return 0; 2605 2606 if (mgp->ss[0].tx.req_bytes == NULL) 2607 return 0; 2608 2609 del_timer_sync(&mgp->watchdog_timer); 2610 mgp->running = MYRI10GE_ETH_STOPPING; 2611 for (i = 0; i < mgp->num_slices; i++) { 2612 napi_disable(&mgp->ss[i].napi); 2613 } 2614 netif_carrier_off(dev); 2615 2616 netif_tx_stop_all_queues(dev); 2617 if (mgp->rebooted == 0) { 2618 old_down_cnt = mgp->down_cnt; 2619 mb(); 2620 status = 2621 myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0); 2622 if (status) 2623 netdev_err(dev, "Couldn't bring down link\n"); 2624 2625 wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt, 2626 HZ); 2627 if (old_down_cnt == mgp->down_cnt) 2628 netdev_err(dev, "never got down irq\n"); 2629 } 2630 netif_tx_disable(dev); 2631 myri10ge_free_irq(mgp); 2632 for (i = 0; i < mgp->num_slices; i++) 2633 myri10ge_free_rings(&mgp->ss[i]); 2634 2635 mgp->running = MYRI10GE_ETH_STOPPED; 2636 return 0; 2637 } 2638 2639 /* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2640 * backwards one at a time and handle ring wraps */ 2641 2642 static inline void 2643 myri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx, 2644 struct mcp_kreq_ether_send *src, int cnt) 2645 { 2646 int idx, starting_slot; 2647 starting_slot = tx->req; 2648 while (cnt > 1) { 2649 cnt--; 2650 idx = (starting_slot + cnt) & tx->mask; 2651 myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 2652 mb(); 2653 } 2654 } 2655 2656 /* 2657 * copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2658 * at most 32 bytes at a time, so as to avoid involving the software 2659 * pio handler in the nic. We re-write the first segment's flags 2660 * to mark them valid only after writing the entire chain. 2661 */ 2662 2663 static inline void 2664 myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, 2665 int cnt) 2666 { 2667 int idx, i; 2668 struct mcp_kreq_ether_send __iomem *dstp, *dst; 2669 struct mcp_kreq_ether_send *srcp; 2670 u8 last_flags; 2671 2672 idx = tx->req & tx->mask; 2673 2674 last_flags = src->flags; 2675 src->flags = 0; 2676 mb(); 2677 dst = dstp = &tx->lanai[idx]; 2678 srcp = src; 2679 2680 if ((idx + cnt) < tx->mask) { 2681 for (i = 0; i < (cnt - 1); i += 2) { 2682 myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 2683 mb(); /* force write every 32 bytes */ 2684 srcp += 2; 2685 dstp += 2; 2686 } 2687 } else { 2688 /* submit all but the first request, and ensure 2689 * that it is submitted below */ 2690 myri10ge_submit_req_backwards(tx, src, cnt); 2691 i = 0; 2692 } 2693 if (i < cnt) { 2694 /* submit the first request */ 2695 myri10ge_pio_copy(dstp, srcp, sizeof(*src)); 2696 mb(); /* barrier before setting valid flag */ 2697 } 2698 2699 /* re-write the last 32-bits with the valid flags */ 2700 src->flags = last_flags; 2701 put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3); 2702 tx->req += cnt; 2703 mb(); 2704 } 2705 2706 /* 2707 * Transmit a packet. We need to split the packet so that a single 2708 * segment does not cross myri10ge->tx_boundary, so this makes segment 2709 * counting tricky. So rather than try to count segments up front, we 2710 * just give up if there are too few segments to hold a reasonably 2711 * fragmented packet currently available. If we run 2712 * out of segments while preparing a packet for DMA, we just linearize 2713 * it and try again. 2714 */ 2715 2716 static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, 2717 struct net_device *dev) 2718 { 2719 struct myri10ge_priv *mgp = netdev_priv(dev); 2720 struct myri10ge_slice_state *ss; 2721 struct mcp_kreq_ether_send *req; 2722 struct myri10ge_tx_buf *tx; 2723 struct skb_frag_struct *frag; 2724 struct netdev_queue *netdev_queue; 2725 dma_addr_t bus; 2726 u32 low; 2727 __be32 high_swapped; 2728 unsigned int len; 2729 int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; 2730 u16 pseudo_hdr_offset, cksum_offset, queue; 2731 int cum_len, seglen, boundary, rdma_count; 2732 u8 flags, odd_flag; 2733 2734 queue = skb_get_queue_mapping(skb); 2735 ss = &mgp->ss[queue]; 2736 netdev_queue = netdev_get_tx_queue(mgp->dev, queue); 2737 tx = &ss->tx; 2738 2739 again: 2740 req = tx->req_list; 2741 avail = tx->mask - 1 - (tx->req - tx->done); 2742 2743 mss = 0; 2744 max_segments = MXGEFW_MAX_SEND_DESC; 2745 2746 if (skb_is_gso(skb)) { 2747 mss = skb_shinfo(skb)->gso_size; 2748 max_segments = MYRI10GE_MAX_SEND_DESC_TSO; 2749 } 2750 2751 if ((unlikely(avail < max_segments))) { 2752 /* we are out of transmit resources */ 2753 tx->stop_queue++; 2754 netif_tx_stop_queue(netdev_queue); 2755 return NETDEV_TX_BUSY; 2756 } 2757 2758 /* Setup checksum offloading, if needed */ 2759 cksum_offset = 0; 2760 pseudo_hdr_offset = 0; 2761 odd_flag = 0; 2762 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 2763 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 2764 cksum_offset = skb_checksum_start_offset(skb); 2765 pseudo_hdr_offset = cksum_offset + skb->csum_offset; 2766 /* If the headers are excessively large, then we must 2767 * fall back to a software checksum */ 2768 if (unlikely(!mss && (cksum_offset > 255 || 2769 pseudo_hdr_offset > 127))) { 2770 if (skb_checksum_help(skb)) 2771 goto drop; 2772 cksum_offset = 0; 2773 pseudo_hdr_offset = 0; 2774 } else { 2775 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2776 flags |= MXGEFW_FLAGS_CKSUM; 2777 } 2778 } 2779 2780 cum_len = 0; 2781 2782 if (mss) { /* TSO */ 2783 /* this removes any CKSUM flag from before */ 2784 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 2785 2786 /* negative cum_len signifies to the 2787 * send loop that we are still in the 2788 * header portion of the TSO packet. 2789 * TSO header can be at most 1KB long */ 2790 cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); 2791 2792 /* for IPv6 TSO, the checksum offset stores the 2793 * TCP header length, to save the firmware from 2794 * the need to parse the headers */ 2795 if (skb_is_gso_v6(skb)) { 2796 cksum_offset = tcp_hdrlen(skb); 2797 /* Can only handle headers <= max_tso6 long */ 2798 if (unlikely(-cum_len > mgp->max_tso6)) 2799 return myri10ge_sw_tso(skb, dev); 2800 } 2801 /* for TSO, pseudo_hdr_offset holds mss. 2802 * The firmware figures out where to put 2803 * the checksum by parsing the header. */ 2804 pseudo_hdr_offset = mss; 2805 } else 2806 /* Mark small packets, and pad out tiny packets */ 2807 if (skb->len <= MXGEFW_SEND_SMALL_SIZE) { 2808 flags |= MXGEFW_FLAGS_SMALL; 2809 2810 /* pad frames to at least ETH_ZLEN bytes */ 2811 if (unlikely(skb->len < ETH_ZLEN)) { 2812 if (skb_padto(skb, ETH_ZLEN)) { 2813 /* The packet is gone, so we must 2814 * return 0 */ 2815 ss->stats.tx_dropped += 1; 2816 return NETDEV_TX_OK; 2817 } 2818 /* adjust the len to account for the zero pad 2819 * so that the nic can know how long it is */ 2820 skb->len = ETH_ZLEN; 2821 } 2822 } 2823 2824 /* map the skb for DMA */ 2825 len = skb_headlen(skb); 2826 idx = tx->req & tx->mask; 2827 tx->info[idx].skb = skb; 2828 bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); 2829 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2830 dma_unmap_len_set(&tx->info[idx], len, len); 2831 2832 frag_cnt = skb_shinfo(skb)->nr_frags; 2833 frag_idx = 0; 2834 count = 0; 2835 rdma_count = 0; 2836 2837 /* "rdma_count" is the number of RDMAs belonging to the 2838 * current packet BEFORE the current send request. For 2839 * non-TSO packets, this is equal to "count". 2840 * For TSO packets, rdma_count needs to be reset 2841 * to 0 after a segment cut. 2842 * 2843 * The rdma_count field of the send request is 2844 * the number of RDMAs of the packet starting at 2845 * that request. For TSO send requests with one ore more cuts 2846 * in the middle, this is the number of RDMAs starting 2847 * after the last cut in the request. All previous 2848 * segments before the last cut implicitly have 1 RDMA. 2849 * 2850 * Since the number of RDMAs is not known beforehand, 2851 * it must be filled-in retroactively - after each 2852 * segmentation cut or at the end of the entire packet. 2853 */ 2854 2855 while (1) { 2856 /* Break the SKB or Fragment up into pieces which 2857 * do not cross mgp->tx_boundary */ 2858 low = MYRI10GE_LOWPART_TO_U32(bus); 2859 high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus)); 2860 while (len) { 2861 u8 flags_next; 2862 int cum_len_next; 2863 2864 if (unlikely(count == max_segments)) 2865 goto abort_linearize; 2866 2867 boundary = 2868 (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1); 2869 seglen = boundary - low; 2870 if (seglen > len) 2871 seglen = len; 2872 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 2873 cum_len_next = cum_len + seglen; 2874 if (mss) { /* TSO */ 2875 (req - rdma_count)->rdma_count = rdma_count + 1; 2876 2877 if (likely(cum_len >= 0)) { /* payload */ 2878 int next_is_first, chop; 2879 2880 chop = (cum_len_next > mss); 2881 cum_len_next = cum_len_next % mss; 2882 next_is_first = (cum_len_next == 0); 2883 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 2884 flags_next |= next_is_first * 2885 MXGEFW_FLAGS_FIRST; 2886 rdma_count |= -(chop | next_is_first); 2887 rdma_count += chop & !next_is_first; 2888 } else if (likely(cum_len_next >= 0)) { /* header ends */ 2889 int small; 2890 2891 rdma_count = -1; 2892 cum_len_next = 0; 2893 seglen = -cum_len; 2894 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 2895 flags_next = MXGEFW_FLAGS_TSO_PLD | 2896 MXGEFW_FLAGS_FIRST | 2897 (small * MXGEFW_FLAGS_SMALL); 2898 } 2899 } 2900 req->addr_high = high_swapped; 2901 req->addr_low = htonl(low); 2902 req->pseudo_hdr_offset = htons(pseudo_hdr_offset); 2903 req->pad = 0; /* complete solid 16-byte block; does this matter? */ 2904 req->rdma_count = 1; 2905 req->length = htons(seglen); 2906 req->cksum_offset = cksum_offset; 2907 req->flags = flags | ((cum_len & 1) * odd_flag); 2908 2909 low += seglen; 2910 len -= seglen; 2911 cum_len = cum_len_next; 2912 flags = flags_next; 2913 req++; 2914 count++; 2915 rdma_count++; 2916 if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) { 2917 if (unlikely(cksum_offset > seglen)) 2918 cksum_offset -= seglen; 2919 else 2920 cksum_offset = 0; 2921 } 2922 } 2923 if (frag_idx == frag_cnt) 2924 break; 2925 2926 /* map next fragment for DMA */ 2927 idx = (count + tx->req) & tx->mask; 2928 frag = &skb_shinfo(skb)->frags[frag_idx]; 2929 frag_idx++; 2930 len = skb_frag_size(frag); 2931 bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, 2932 DMA_TO_DEVICE); 2933 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2934 dma_unmap_len_set(&tx->info[idx], len, len); 2935 } 2936 2937 (req - rdma_count)->rdma_count = rdma_count; 2938 if (mss) 2939 do { 2940 req--; 2941 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2942 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 2943 MXGEFW_FLAGS_FIRST))); 2944 idx = ((count - 1) + tx->req) & tx->mask; 2945 tx->info[idx].last = 1; 2946 myri10ge_submit_req(tx, tx->req_list, count); 2947 /* if using multiple tx queues, make sure NIC polls the 2948 * current slice */ 2949 if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) { 2950 tx->queue_active = 1; 2951 put_be32(htonl(1), tx->send_go); 2952 mb(); 2953 mmiowb(); 2954 } 2955 tx->pkt_start++; 2956 if ((avail - count) < MXGEFW_MAX_SEND_DESC) { 2957 tx->stop_queue++; 2958 netif_tx_stop_queue(netdev_queue); 2959 } 2960 return NETDEV_TX_OK; 2961 2962 abort_linearize: 2963 /* Free any DMA resources we've alloced and clear out the skb 2964 * slot so as to not trip up assertions, and to avoid a 2965 * double-free if linearizing fails */ 2966 2967 last_idx = (idx + 1) & tx->mask; 2968 idx = tx->req & tx->mask; 2969 tx->info[idx].skb = NULL; 2970 do { 2971 len = dma_unmap_len(&tx->info[idx], len); 2972 if (len) { 2973 if (tx->info[idx].skb != NULL) 2974 pci_unmap_single(mgp->pdev, 2975 dma_unmap_addr(&tx->info[idx], 2976 bus), len, 2977 PCI_DMA_TODEVICE); 2978 else 2979 pci_unmap_page(mgp->pdev, 2980 dma_unmap_addr(&tx->info[idx], 2981 bus), len, 2982 PCI_DMA_TODEVICE); 2983 dma_unmap_len_set(&tx->info[idx], len, 0); 2984 tx->info[idx].skb = NULL; 2985 } 2986 idx = (idx + 1) & tx->mask; 2987 } while (idx != last_idx); 2988 if (skb_is_gso(skb)) { 2989 netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); 2990 goto drop; 2991 } 2992 2993 if (skb_linearize(skb)) 2994 goto drop; 2995 2996 tx->linearized++; 2997 goto again; 2998 2999 drop: 3000 dev_kfree_skb_any(skb); 3001 ss->stats.tx_dropped += 1; 3002 return NETDEV_TX_OK; 3003 3004 } 3005 3006 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 3007 struct net_device *dev) 3008 { 3009 struct sk_buff *segs, *curr; 3010 struct myri10ge_priv *mgp = netdev_priv(dev); 3011 struct myri10ge_slice_state *ss; 3012 netdev_tx_t status; 3013 3014 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); 3015 if (IS_ERR(segs)) 3016 goto drop; 3017 3018 while (segs) { 3019 curr = segs; 3020 segs = segs->next; 3021 curr->next = NULL; 3022 status = myri10ge_xmit(curr, dev); 3023 if (status != 0) { 3024 dev_kfree_skb_any(curr); 3025 if (segs != NULL) { 3026 curr = segs; 3027 segs = segs->next; 3028 curr->next = NULL; 3029 dev_kfree_skb_any(segs); 3030 } 3031 goto drop; 3032 } 3033 } 3034 dev_kfree_skb_any(skb); 3035 return NETDEV_TX_OK; 3036 3037 drop: 3038 ss = &mgp->ss[skb_get_queue_mapping(skb)]; 3039 dev_kfree_skb_any(skb); 3040 ss->stats.tx_dropped += 1; 3041 return NETDEV_TX_OK; 3042 } 3043 3044 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, 3045 struct rtnl_link_stats64 *stats) 3046 { 3047 const struct myri10ge_priv *mgp = netdev_priv(dev); 3048 const struct myri10ge_slice_netstats *slice_stats; 3049 int i; 3050 3051 for (i = 0; i < mgp->num_slices; i++) { 3052 slice_stats = &mgp->ss[i].stats; 3053 stats->rx_packets += slice_stats->rx_packets; 3054 stats->tx_packets += slice_stats->tx_packets; 3055 stats->rx_bytes += slice_stats->rx_bytes; 3056 stats->tx_bytes += slice_stats->tx_bytes; 3057 stats->rx_dropped += slice_stats->rx_dropped; 3058 stats->tx_dropped += slice_stats->tx_dropped; 3059 } 3060 return stats; 3061 } 3062 3063 static void myri10ge_set_multicast_list(struct net_device *dev) 3064 { 3065 struct myri10ge_priv *mgp = netdev_priv(dev); 3066 struct myri10ge_cmd cmd; 3067 struct netdev_hw_addr *ha; 3068 __be32 data[2] = { 0, 0 }; 3069 int err; 3070 3071 /* can be called from atomic contexts, 3072 * pass 1 to force atomicity in myri10ge_send_cmd() */ 3073 myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1); 3074 3075 /* This firmware is known to not support multicast */ 3076 if (!mgp->fw_multicast_support) 3077 return; 3078 3079 /* Disable multicast filtering */ 3080 3081 err = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_ALLMULTI, &cmd, 1); 3082 if (err != 0) { 3083 netdev_err(dev, "Failed MXGEFW_ENABLE_ALLMULTI, error status: %d\n", 3084 err); 3085 goto abort; 3086 } 3087 3088 if ((dev->flags & IFF_ALLMULTI) || mgp->adopted_rx_filter_bug) { 3089 /* request to disable multicast filtering, so quit here */ 3090 return; 3091 } 3092 3093 /* Flush the filters */ 3094 3095 err = myri10ge_send_cmd(mgp, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, 3096 &cmd, 1); 3097 if (err != 0) { 3098 netdev_err(dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, error status: %d\n", 3099 err); 3100 goto abort; 3101 } 3102 3103 /* Walk the multicast list, and add each address */ 3104 netdev_for_each_mc_addr(ha, dev) { 3105 memcpy(data, &ha->addr, 6); 3106 cmd.data0 = ntohl(data[0]); 3107 cmd.data1 = ntohl(data[1]); 3108 err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP, 3109 &cmd, 1); 3110 3111 if (err != 0) { 3112 netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n", 3113 err, ha->addr); 3114 goto abort; 3115 } 3116 } 3117 /* Enable multicast filtering */ 3118 err = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_ALLMULTI, &cmd, 1); 3119 if (err != 0) { 3120 netdev_err(dev, "Failed MXGEFW_DISABLE_ALLMULTI, error status: %d\n", 3121 err); 3122 goto abort; 3123 } 3124 3125 return; 3126 3127 abort: 3128 return; 3129 } 3130 3131 static int myri10ge_set_mac_address(struct net_device *dev, void *addr) 3132 { 3133 struct sockaddr *sa = addr; 3134 struct myri10ge_priv *mgp = netdev_priv(dev); 3135 int status; 3136 3137 if (!is_valid_ether_addr(sa->sa_data)) 3138 return -EADDRNOTAVAIL; 3139 3140 status = myri10ge_update_mac_address(mgp, sa->sa_data); 3141 if (status != 0) { 3142 netdev_err(dev, "changing mac address failed with %d\n", 3143 status); 3144 return status; 3145 } 3146 3147 /* change the dev structure */ 3148 memcpy(dev->dev_addr, sa->sa_data, 6); 3149 return 0; 3150 } 3151 3152 static netdev_features_t myri10ge_fix_features(struct net_device *dev, 3153 netdev_features_t features) 3154 { 3155 if (!(features & NETIF_F_RXCSUM)) 3156 features &= ~NETIF_F_LRO; 3157 3158 return features; 3159 } 3160 3161 static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) 3162 { 3163 struct myri10ge_priv *mgp = netdev_priv(dev); 3164 int error = 0; 3165 3166 if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) { 3167 netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu); 3168 return -EINVAL; 3169 } 3170 netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); 3171 if (mgp->running) { 3172 /* if we change the mtu on an active device, we must 3173 * reset the device so the firmware sees the change */ 3174 myri10ge_close(dev); 3175 dev->mtu = new_mtu; 3176 myri10ge_open(dev); 3177 } else 3178 dev->mtu = new_mtu; 3179 3180 return error; 3181 } 3182 3183 /* 3184 * Enable ECRC to align PCI-E Completion packets on an 8-byte boundary. 3185 * Only do it if the bridge is a root port since we don't want to disturb 3186 * any other device, except if forced with myri10ge_ecrc_enable > 1. 3187 */ 3188 3189 static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) 3190 { 3191 struct pci_dev *bridge = mgp->pdev->bus->self; 3192 struct device *dev = &mgp->pdev->dev; 3193 int cap; 3194 unsigned err_cap; 3195 u16 val; 3196 u8 ext_type; 3197 int ret; 3198 3199 if (!myri10ge_ecrc_enable || !bridge) 3200 return; 3201 3202 /* check that the bridge is a root port */ 3203 cap = pci_pcie_cap(bridge); 3204 pci_read_config_word(bridge, cap + PCI_CAP_FLAGS, &val); 3205 ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4; 3206 if (ext_type != PCI_EXP_TYPE_ROOT_PORT) { 3207 if (myri10ge_ecrc_enable > 1) { 3208 struct pci_dev *prev_bridge, *old_bridge = bridge; 3209 3210 /* Walk the hierarchy up to the root port 3211 * where ECRC has to be enabled */ 3212 do { 3213 prev_bridge = bridge; 3214 bridge = bridge->bus->self; 3215 if (!bridge || prev_bridge == bridge) { 3216 dev_err(dev, 3217 "Failed to find root port" 3218 " to force ECRC\n"); 3219 return; 3220 } 3221 cap = pci_pcie_cap(bridge); 3222 pci_read_config_word(bridge, 3223 cap + PCI_CAP_FLAGS, &val); 3224 ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4; 3225 } while (ext_type != PCI_EXP_TYPE_ROOT_PORT); 3226 3227 dev_info(dev, 3228 "Forcing ECRC on non-root port %s" 3229 " (enabling on root port %s)\n", 3230 pci_name(old_bridge), pci_name(bridge)); 3231 } else { 3232 dev_err(dev, 3233 "Not enabling ECRC on non-root port %s\n", 3234 pci_name(bridge)); 3235 return; 3236 } 3237 } 3238 3239 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3240 if (!cap) 3241 return; 3242 3243 ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap); 3244 if (ret) { 3245 dev_err(dev, "failed reading ext-conf-space of %s\n", 3246 pci_name(bridge)); 3247 dev_err(dev, "\t pci=nommconf in use? " 3248 "or buggy/incomplete/absent ACPI MCFG attr?\n"); 3249 return; 3250 } 3251 if (!(err_cap & PCI_ERR_CAP_ECRC_GENC)) 3252 return; 3253 3254 err_cap |= PCI_ERR_CAP_ECRC_GENE; 3255 pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap); 3256 dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge)); 3257 } 3258 3259 /* 3260 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 3261 * when the PCI-E Completion packets are aligned on an 8-byte 3262 * boundary. Some PCI-E chip sets always align Completion packets; on 3263 * the ones that do not, the alignment can be enforced by enabling 3264 * ECRC generation (if supported). 3265 * 3266 * When PCI-E Completion packets are not aligned, it is actually more 3267 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 3268 * 3269 * If the driver can neither enable ECRC nor verify that it has 3270 * already been enabled, then it must use a firmware image which works 3271 * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it 3272 * should also ensure that it never gives the device a Read-DMA which is 3273 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 3274 * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat) 3275 * firmware image, and set tx_boundary to 4KB. 3276 */ 3277 3278 static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) 3279 { 3280 struct pci_dev *pdev = mgp->pdev; 3281 struct device *dev = &pdev->dev; 3282 int status; 3283 3284 mgp->tx_boundary = 4096; 3285 /* 3286 * Verify the max read request size was set to 4KB 3287 * before trying the test with 4KB. 3288 */ 3289 status = pcie_get_readrq(pdev); 3290 if (status < 0) { 3291 dev_err(dev, "Couldn't read max read req size: %d\n", status); 3292 goto abort; 3293 } 3294 if (status != 4096) { 3295 dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status); 3296 mgp->tx_boundary = 2048; 3297 } 3298 /* 3299 * load the optimized firmware (which assumes aligned PCIe 3300 * completions) in order to see if it works on this host. 3301 */ 3302 set_fw_name(mgp, myri10ge_fw_aligned, false); 3303 status = myri10ge_load_firmware(mgp, 1); 3304 if (status != 0) { 3305 goto abort; 3306 } 3307 3308 /* 3309 * Enable ECRC if possible 3310 */ 3311 myri10ge_enable_ecrc(mgp); 3312 3313 /* 3314 * Run a DMA test which watches for unaligned completions and 3315 * aborts on the first one seen. 3316 */ 3317 3318 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 3319 if (status == 0) 3320 return; /* keep the aligned firmware */ 3321 3322 if (status != -E2BIG) 3323 dev_warn(dev, "DMA test failed: %d\n", status); 3324 if (status == -ENOSYS) 3325 dev_warn(dev, "Falling back to ethp! " 3326 "Please install up to date fw\n"); 3327 abort: 3328 /* fall back to using the unaligned firmware */ 3329 mgp->tx_boundary = 2048; 3330 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3331 } 3332 3333 static void myri10ge_select_firmware(struct myri10ge_priv *mgp) 3334 { 3335 int overridden = 0; 3336 3337 if (myri10ge_force_firmware == 0) { 3338 int link_width, exp_cap; 3339 u16 lnk; 3340 3341 exp_cap = pci_pcie_cap(mgp->pdev); 3342 pci_read_config_word(mgp->pdev, exp_cap + PCI_EXP_LNKSTA, &lnk); 3343 link_width = (lnk >> 4) & 0x3f; 3344 3345 /* Check to see if Link is less than 8 or if the 3346 * upstream bridge is known to provide aligned 3347 * completions */ 3348 if (link_width < 8) { 3349 dev_info(&mgp->pdev->dev, "PCIE x%d Link\n", 3350 link_width); 3351 mgp->tx_boundary = 4096; 3352 set_fw_name(mgp, myri10ge_fw_aligned, false); 3353 } else { 3354 myri10ge_firmware_probe(mgp); 3355 } 3356 } else { 3357 if (myri10ge_force_firmware == 1) { 3358 dev_info(&mgp->pdev->dev, 3359 "Assuming aligned completions (forced)\n"); 3360 mgp->tx_boundary = 4096; 3361 set_fw_name(mgp, myri10ge_fw_aligned, false); 3362 } else { 3363 dev_info(&mgp->pdev->dev, 3364 "Assuming unaligned completions (forced)\n"); 3365 mgp->tx_boundary = 2048; 3366 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3367 } 3368 } 3369 3370 kparam_block_sysfs_write(myri10ge_fw_name); 3371 if (myri10ge_fw_name != NULL) { 3372 char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); 3373 if (fw_name) { 3374 overridden = 1; 3375 set_fw_name(mgp, fw_name, true); 3376 } 3377 } 3378 kparam_unblock_sysfs_write(myri10ge_fw_name); 3379 3380 if (mgp->board_number < MYRI10GE_MAX_BOARDS && 3381 myri10ge_fw_names[mgp->board_number] != NULL && 3382 strlen(myri10ge_fw_names[mgp->board_number])) { 3383 set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false); 3384 overridden = 1; 3385 } 3386 if (overridden) 3387 dev_info(&mgp->pdev->dev, "overriding firmware to %s\n", 3388 mgp->fw_name); 3389 } 3390 3391 static void myri10ge_mask_surprise_down(struct pci_dev *pdev) 3392 { 3393 struct pci_dev *bridge = pdev->bus->self; 3394 int cap; 3395 u32 mask; 3396 3397 if (bridge == NULL) 3398 return; 3399 3400 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3401 if (cap) { 3402 /* a sram parity error can cause a surprise link 3403 * down; since we expect and can recover from sram 3404 * parity errors, mask surprise link down events */ 3405 pci_read_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, &mask); 3406 mask |= 0x20; 3407 pci_write_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, mask); 3408 } 3409 } 3410 3411 #ifdef CONFIG_PM 3412 static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state) 3413 { 3414 struct myri10ge_priv *mgp; 3415 struct net_device *netdev; 3416 3417 mgp = pci_get_drvdata(pdev); 3418 if (mgp == NULL) 3419 return -EINVAL; 3420 netdev = mgp->dev; 3421 3422 netif_device_detach(netdev); 3423 if (netif_running(netdev)) { 3424 netdev_info(netdev, "closing\n"); 3425 rtnl_lock(); 3426 myri10ge_close(netdev); 3427 rtnl_unlock(); 3428 } 3429 myri10ge_dummy_rdma(mgp, 0); 3430 pci_save_state(pdev); 3431 pci_disable_device(pdev); 3432 3433 return pci_set_power_state(pdev, pci_choose_state(pdev, state)); 3434 } 3435 3436 static int myri10ge_resume(struct pci_dev *pdev) 3437 { 3438 struct myri10ge_priv *mgp; 3439 struct net_device *netdev; 3440 int status; 3441 u16 vendor; 3442 3443 mgp = pci_get_drvdata(pdev); 3444 if (mgp == NULL) 3445 return -EINVAL; 3446 netdev = mgp->dev; 3447 pci_set_power_state(pdev, 0); /* zeros conf space as a side effect */ 3448 msleep(5); /* give card time to respond */ 3449 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3450 if (vendor == 0xffff) { 3451 netdev_err(mgp->dev, "device disappeared!\n"); 3452 return -EIO; 3453 } 3454 3455 pci_restore_state(pdev); 3456 3457 status = pci_enable_device(pdev); 3458 if (status) { 3459 dev_err(&pdev->dev, "failed to enable device\n"); 3460 return status; 3461 } 3462 3463 pci_set_master(pdev); 3464 3465 myri10ge_reset(mgp); 3466 myri10ge_dummy_rdma(mgp, 1); 3467 3468 /* Save configuration space to be restored if the 3469 * nic resets due to a parity error */ 3470 pci_save_state(pdev); 3471 3472 if (netif_running(netdev)) { 3473 rtnl_lock(); 3474 status = myri10ge_open(netdev); 3475 rtnl_unlock(); 3476 if (status != 0) 3477 goto abort_with_enabled; 3478 3479 } 3480 netif_device_attach(netdev); 3481 3482 return 0; 3483 3484 abort_with_enabled: 3485 pci_disable_device(pdev); 3486 return -EIO; 3487 3488 } 3489 #endif /* CONFIG_PM */ 3490 3491 static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp) 3492 { 3493 struct pci_dev *pdev = mgp->pdev; 3494 int vs = mgp->vendor_specific_offset; 3495 u32 reboot; 3496 3497 /*enter read32 mode */ 3498 pci_write_config_byte(pdev, vs + 0x10, 0x3); 3499 3500 /*read REBOOT_STATUS (0xfffffff0) */ 3501 pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0); 3502 pci_read_config_dword(pdev, vs + 0x14, &reboot); 3503 return reboot; 3504 } 3505 3506 static void 3507 myri10ge_check_slice(struct myri10ge_slice_state *ss, int *reset_needed, 3508 int *busy_slice_cnt, u32 rx_pause_cnt) 3509 { 3510 struct myri10ge_priv *mgp = ss->mgp; 3511 int slice = ss - mgp->ss; 3512 3513 if (ss->tx.req != ss->tx.done && 3514 ss->tx.done == ss->watchdog_tx_done && 3515 ss->watchdog_tx_req != ss->watchdog_tx_done) { 3516 /* nic seems like it might be stuck.. */ 3517 if (rx_pause_cnt != mgp->watchdog_pause) { 3518 if (net_ratelimit()) 3519 netdev_warn(mgp->dev, "slice %d: TX paused, " 3520 "check link partner\n", slice); 3521 } else { 3522 netdev_warn(mgp->dev, 3523 "slice %d: TX stuck %d %d %d %d %d %d\n", 3524 slice, ss->tx.queue_active, ss->tx.req, 3525 ss->tx.done, ss->tx.pkt_start, 3526 ss->tx.pkt_done, 3527 (int)ntohl(mgp->ss[slice].fw_stats-> 3528 send_done_count)); 3529 *reset_needed = 1; 3530 ss->stuck = 1; 3531 } 3532 } 3533 if (ss->watchdog_tx_done != ss->tx.done || 3534 ss->watchdog_rx_done != ss->rx_done.cnt) { 3535 *busy_slice_cnt += 1; 3536 } 3537 ss->watchdog_tx_done = ss->tx.done; 3538 ss->watchdog_tx_req = ss->tx.req; 3539 ss->watchdog_rx_done = ss->rx_done.cnt; 3540 } 3541 3542 /* 3543 * This watchdog is used to check whether the board has suffered 3544 * from a parity error and needs to be recovered. 3545 */ 3546 static void myri10ge_watchdog(struct work_struct *work) 3547 { 3548 struct myri10ge_priv *mgp = 3549 container_of(work, struct myri10ge_priv, watchdog_work); 3550 struct myri10ge_slice_state *ss; 3551 u32 reboot, rx_pause_cnt; 3552 int status, rebooted; 3553 int i; 3554 int reset_needed = 0; 3555 int busy_slice_cnt = 0; 3556 u16 cmd, vendor; 3557 3558 mgp->watchdog_resets++; 3559 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3560 rebooted = 0; 3561 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3562 /* Bus master DMA disabled? Check to see 3563 * if the card rebooted due to a parity error 3564 * For now, just report it */ 3565 reboot = myri10ge_read_reboot(mgp); 3566 netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n", 3567 reboot, myri10ge_reset_recover ? "" : " not"); 3568 if (myri10ge_reset_recover == 0) 3569 return; 3570 rtnl_lock(); 3571 mgp->rebooted = 1; 3572 rebooted = 1; 3573 myri10ge_close(mgp->dev); 3574 myri10ge_reset_recover--; 3575 mgp->rebooted = 0; 3576 /* 3577 * A rebooted nic will come back with config space as 3578 * it was after power was applied to PCIe bus. 3579 * Attempt to restore config space which was saved 3580 * when the driver was loaded, or the last time the 3581 * nic was resumed from power saving mode. 3582 */ 3583 pci_restore_state(mgp->pdev); 3584 3585 /* save state again for accounting reasons */ 3586 pci_save_state(mgp->pdev); 3587 3588 } else { 3589 /* if we get back -1's from our slot, perhaps somebody 3590 * powered off our card. Don't try to reset it in 3591 * this case */ 3592 if (cmd == 0xffff) { 3593 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3594 if (vendor == 0xffff) { 3595 netdev_err(mgp->dev, "device disappeared!\n"); 3596 return; 3597 } 3598 } 3599 /* Perhaps it is a software error. See if stuck slice 3600 * has recovered, reset if not */ 3601 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3602 for (i = 0; i < mgp->num_slices; i++) { 3603 ss = mgp->ss; 3604 if (ss->stuck) { 3605 myri10ge_check_slice(ss, &reset_needed, 3606 &busy_slice_cnt, 3607 rx_pause_cnt); 3608 ss->stuck = 0; 3609 } 3610 } 3611 if (!reset_needed) { 3612 netdev_dbg(mgp->dev, "not resetting\n"); 3613 return; 3614 } 3615 3616 netdev_err(mgp->dev, "device timeout, resetting\n"); 3617 } 3618 3619 if (!rebooted) { 3620 rtnl_lock(); 3621 myri10ge_close(mgp->dev); 3622 } 3623 status = myri10ge_load_firmware(mgp, 1); 3624 if (status != 0) 3625 netdev_err(mgp->dev, "failed to load firmware\n"); 3626 else 3627 myri10ge_open(mgp->dev); 3628 rtnl_unlock(); 3629 } 3630 3631 /* 3632 * We use our own timer routine rather than relying upon 3633 * netdev->tx_timeout because we have a very large hardware transmit 3634 * queue. Due to the large queue, the netdev->tx_timeout function 3635 * cannot detect a NIC with a parity error in a timely fashion if the 3636 * NIC is lightly loaded. 3637 */ 3638 static void myri10ge_watchdog_timer(unsigned long arg) 3639 { 3640 struct myri10ge_priv *mgp; 3641 struct myri10ge_slice_state *ss; 3642 int i, reset_needed, busy_slice_cnt; 3643 u32 rx_pause_cnt; 3644 u16 cmd; 3645 3646 mgp = (struct myri10ge_priv *)arg; 3647 3648 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3649 busy_slice_cnt = 0; 3650 for (i = 0, reset_needed = 0; 3651 i < mgp->num_slices && reset_needed == 0; ++i) { 3652 3653 ss = &mgp->ss[i]; 3654 if (ss->rx_small.watchdog_needed) { 3655 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 3656 mgp->small_bytes + MXGEFW_PAD, 3657 1); 3658 if (ss->rx_small.fill_cnt - ss->rx_small.cnt >= 3659 myri10ge_fill_thresh) 3660 ss->rx_small.watchdog_needed = 0; 3661 } 3662 if (ss->rx_big.watchdog_needed) { 3663 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, 3664 mgp->big_bytes, 1); 3665 if (ss->rx_big.fill_cnt - ss->rx_big.cnt >= 3666 myri10ge_fill_thresh) 3667 ss->rx_big.watchdog_needed = 0; 3668 } 3669 myri10ge_check_slice(ss, &reset_needed, &busy_slice_cnt, 3670 rx_pause_cnt); 3671 } 3672 /* if we've sent or received no traffic, poll the NIC to 3673 * ensure it is still there. Otherwise, we risk not noticing 3674 * an error in a timely fashion */ 3675 if (busy_slice_cnt == 0) { 3676 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3677 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3678 reset_needed = 1; 3679 } 3680 } 3681 mgp->watchdog_pause = rx_pause_cnt; 3682 3683 if (reset_needed) { 3684 schedule_work(&mgp->watchdog_work); 3685 } else { 3686 /* rearm timer */ 3687 mod_timer(&mgp->watchdog_timer, 3688 jiffies + myri10ge_watchdog_timeout * HZ); 3689 } 3690 } 3691 3692 static void myri10ge_free_slices(struct myri10ge_priv *mgp) 3693 { 3694 struct myri10ge_slice_state *ss; 3695 struct pci_dev *pdev = mgp->pdev; 3696 size_t bytes; 3697 int i; 3698 3699 if (mgp->ss == NULL) 3700 return; 3701 3702 for (i = 0; i < mgp->num_slices; i++) { 3703 ss = &mgp->ss[i]; 3704 if (ss->rx_done.entry != NULL) { 3705 bytes = mgp->max_intr_slots * 3706 sizeof(*ss->rx_done.entry); 3707 dma_free_coherent(&pdev->dev, bytes, 3708 ss->rx_done.entry, ss->rx_done.bus); 3709 ss->rx_done.entry = NULL; 3710 } 3711 if (ss->fw_stats != NULL) { 3712 bytes = sizeof(*ss->fw_stats); 3713 dma_free_coherent(&pdev->dev, bytes, 3714 ss->fw_stats, ss->fw_stats_bus); 3715 ss->fw_stats = NULL; 3716 } 3717 netif_napi_del(&ss->napi); 3718 } 3719 kfree(mgp->ss); 3720 mgp->ss = NULL; 3721 } 3722 3723 static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) 3724 { 3725 struct myri10ge_slice_state *ss; 3726 struct pci_dev *pdev = mgp->pdev; 3727 size_t bytes; 3728 int i; 3729 3730 bytes = sizeof(*mgp->ss) * mgp->num_slices; 3731 mgp->ss = kzalloc(bytes, GFP_KERNEL); 3732 if (mgp->ss == NULL) { 3733 return -ENOMEM; 3734 } 3735 3736 for (i = 0; i < mgp->num_slices; i++) { 3737 ss = &mgp->ss[i]; 3738 bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); 3739 ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes, 3740 &ss->rx_done.bus, 3741 GFP_KERNEL); 3742 if (ss->rx_done.entry == NULL) 3743 goto abort; 3744 memset(ss->rx_done.entry, 0, bytes); 3745 bytes = sizeof(*ss->fw_stats); 3746 ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes, 3747 &ss->fw_stats_bus, 3748 GFP_KERNEL); 3749 if (ss->fw_stats == NULL) 3750 goto abort; 3751 ss->mgp = mgp; 3752 ss->dev = mgp->dev; 3753 netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, 3754 myri10ge_napi_weight); 3755 } 3756 return 0; 3757 abort: 3758 myri10ge_free_slices(mgp); 3759 return -ENOMEM; 3760 } 3761 3762 /* 3763 * This function determines the number of slices supported. 3764 * The number slices is the minimum of the number of CPUS, 3765 * the number of MSI-X irqs supported, the number of slices 3766 * supported by the firmware 3767 */ 3768 static void myri10ge_probe_slices(struct myri10ge_priv *mgp) 3769 { 3770 struct myri10ge_cmd cmd; 3771 struct pci_dev *pdev = mgp->pdev; 3772 char *old_fw; 3773 bool old_allocated; 3774 int i, status, ncpus, msix_cap; 3775 3776 mgp->num_slices = 1; 3777 msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX); 3778 ncpus = num_online_cpus(); 3779 3780 if (myri10ge_max_slices == 1 || msix_cap == 0 || 3781 (myri10ge_max_slices == -1 && ncpus < 2)) 3782 return; 3783 3784 /* try to load the slice aware rss firmware */ 3785 old_fw = mgp->fw_name; 3786 old_allocated = mgp->fw_name_allocated; 3787 /* don't free old_fw if we override it. */ 3788 mgp->fw_name_allocated = false; 3789 3790 if (myri10ge_fw_name != NULL) { 3791 dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n", 3792 myri10ge_fw_name); 3793 set_fw_name(mgp, myri10ge_fw_name, false); 3794 } else if (old_fw == myri10ge_fw_aligned) 3795 set_fw_name(mgp, myri10ge_fw_rss_aligned, false); 3796 else 3797 set_fw_name(mgp, myri10ge_fw_rss_unaligned, false); 3798 status = myri10ge_load_firmware(mgp, 0); 3799 if (status != 0) { 3800 dev_info(&pdev->dev, "Rss firmware not found\n"); 3801 if (old_allocated) 3802 kfree(old_fw); 3803 return; 3804 } 3805 3806 /* hit the board with a reset to ensure it is alive */ 3807 memset(&cmd, 0, sizeof(cmd)); 3808 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 3809 if (status != 0) { 3810 dev_err(&mgp->pdev->dev, "failed reset\n"); 3811 goto abort_with_fw; 3812 } 3813 3814 mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot); 3815 3816 /* tell it the size of the interrupt queues */ 3817 cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot); 3818 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 3819 if (status != 0) { 3820 dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3821 goto abort_with_fw; 3822 } 3823 3824 /* ask the maximum number of slices it supports */ 3825 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0); 3826 if (status != 0) 3827 goto abort_with_fw; 3828 else 3829 mgp->num_slices = cmd.data0; 3830 3831 /* Only allow multiple slices if MSI-X is usable */ 3832 if (!myri10ge_msi) { 3833 goto abort_with_fw; 3834 } 3835 3836 /* if the admin did not specify a limit to how many 3837 * slices we should use, cap it automatically to the 3838 * number of CPUs currently online */ 3839 if (myri10ge_max_slices == -1) 3840 myri10ge_max_slices = ncpus; 3841 3842 if (mgp->num_slices > myri10ge_max_slices) 3843 mgp->num_slices = myri10ge_max_slices; 3844 3845 /* Now try to allocate as many MSI-X vectors as we have 3846 * slices. We give up on MSI-X if we can only get a single 3847 * vector. */ 3848 3849 mgp->msix_vectors = kcalloc(mgp->num_slices, sizeof(*mgp->msix_vectors), 3850 GFP_KERNEL); 3851 if (mgp->msix_vectors == NULL) 3852 goto disable_msix; 3853 for (i = 0; i < mgp->num_slices; i++) { 3854 mgp->msix_vectors[i].entry = i; 3855 } 3856 3857 while (mgp->num_slices > 1) { 3858 /* make sure it is a power of two */ 3859 while (!is_power_of_2(mgp->num_slices)) 3860 mgp->num_slices--; 3861 if (mgp->num_slices == 1) 3862 goto disable_msix; 3863 status = pci_enable_msix(pdev, mgp->msix_vectors, 3864 mgp->num_slices); 3865 if (status == 0) { 3866 pci_disable_msix(pdev); 3867 if (old_allocated) 3868 kfree(old_fw); 3869 return; 3870 } 3871 if (status > 0) 3872 mgp->num_slices = status; 3873 else 3874 goto disable_msix; 3875 } 3876 3877 disable_msix: 3878 if (mgp->msix_vectors != NULL) { 3879 kfree(mgp->msix_vectors); 3880 mgp->msix_vectors = NULL; 3881 } 3882 3883 abort_with_fw: 3884 mgp->num_slices = 1; 3885 set_fw_name(mgp, old_fw, old_allocated); 3886 myri10ge_load_firmware(mgp, 0); 3887 } 3888 3889 static const struct net_device_ops myri10ge_netdev_ops = { 3890 .ndo_open = myri10ge_open, 3891 .ndo_stop = myri10ge_close, 3892 .ndo_start_xmit = myri10ge_xmit, 3893 .ndo_get_stats64 = myri10ge_get_stats, 3894 .ndo_validate_addr = eth_validate_addr, 3895 .ndo_change_mtu = myri10ge_change_mtu, 3896 .ndo_fix_features = myri10ge_fix_features, 3897 .ndo_set_rx_mode = myri10ge_set_multicast_list, 3898 .ndo_set_mac_address = myri10ge_set_mac_address, 3899 }; 3900 3901 static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 3902 { 3903 struct net_device *netdev; 3904 struct myri10ge_priv *mgp; 3905 struct device *dev = &pdev->dev; 3906 int i; 3907 int status = -ENXIO; 3908 int dac_enabled; 3909 unsigned hdr_offset, ss_offset; 3910 static int board_number; 3911 3912 netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES); 3913 if (netdev == NULL) 3914 return -ENOMEM; 3915 3916 SET_NETDEV_DEV(netdev, &pdev->dev); 3917 3918 mgp = netdev_priv(netdev); 3919 mgp->dev = netdev; 3920 mgp->pdev = pdev; 3921 mgp->pause = myri10ge_flow_control; 3922 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 3923 mgp->msg_enable = netif_msg_init(myri10ge_debug, MYRI10GE_MSG_DEFAULT); 3924 mgp->board_number = board_number; 3925 init_waitqueue_head(&mgp->down_wq); 3926 3927 if (pci_enable_device(pdev)) { 3928 dev_err(&pdev->dev, "pci_enable_device call failed\n"); 3929 status = -ENODEV; 3930 goto abort_with_netdev; 3931 } 3932 3933 /* Find the vendor-specific cap so we can check 3934 * the reboot register later on */ 3935 mgp->vendor_specific_offset 3936 = pci_find_capability(pdev, PCI_CAP_ID_VNDR); 3937 3938 /* Set our max read request to 4KB */ 3939 status = pcie_set_readrq(pdev, 4096); 3940 if (status != 0) { 3941 dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n", 3942 status); 3943 goto abort_with_enabled; 3944 } 3945 3946 myri10ge_mask_surprise_down(pdev); 3947 pci_set_master(pdev); 3948 dac_enabled = 1; 3949 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3950 if (status != 0) { 3951 dac_enabled = 0; 3952 dev_err(&pdev->dev, 3953 "64-bit pci address mask was refused, " 3954 "trying 32-bit\n"); 3955 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3956 } 3957 if (status != 0) { 3958 dev_err(&pdev->dev, "Error %d setting DMA mask\n", status); 3959 goto abort_with_enabled; 3960 } 3961 (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3962 mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd), 3963 &mgp->cmd_bus, GFP_KERNEL); 3964 if (mgp->cmd == NULL) 3965 goto abort_with_enabled; 3966 3967 mgp->board_span = pci_resource_len(pdev, 0); 3968 mgp->iomem_base = pci_resource_start(pdev, 0); 3969 mgp->mtrr = -1; 3970 mgp->wc_enabled = 0; 3971 #ifdef CONFIG_MTRR 3972 mgp->mtrr = mtrr_add(mgp->iomem_base, mgp->board_span, 3973 MTRR_TYPE_WRCOMB, 1); 3974 if (mgp->mtrr >= 0) 3975 mgp->wc_enabled = 1; 3976 #endif 3977 mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span); 3978 if (mgp->sram == NULL) { 3979 dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n", 3980 mgp->board_span, mgp->iomem_base); 3981 status = -ENXIO; 3982 goto abort_with_mtrr; 3983 } 3984 hdr_offset = 3985 ntohl(__raw_readl(mgp->sram + MCP_HEADER_PTR_OFFSET)) & 0xffffc; 3986 ss_offset = hdr_offset + offsetof(struct mcp_gen_header, string_specs); 3987 mgp->sram_size = ntohl(__raw_readl(mgp->sram + ss_offset)); 3988 if (mgp->sram_size > mgp->board_span || 3989 mgp->sram_size <= MYRI10GE_FW_OFFSET) { 3990 dev_err(&pdev->dev, 3991 "invalid sram_size %dB or board span %ldB\n", 3992 mgp->sram_size, mgp->board_span); 3993 goto abort_with_ioremap; 3994 } 3995 memcpy_fromio(mgp->eeprom_strings, 3996 mgp->sram + mgp->sram_size, MYRI10GE_EEPROM_STRINGS_SIZE); 3997 memset(mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 3998 status = myri10ge_read_mac_addr(mgp); 3999 if (status) 4000 goto abort_with_ioremap; 4001 4002 for (i = 0; i < ETH_ALEN; i++) 4003 netdev->dev_addr[i] = mgp->mac_addr[i]; 4004 4005 myri10ge_select_firmware(mgp); 4006 4007 status = myri10ge_load_firmware(mgp, 1); 4008 if (status != 0) { 4009 dev_err(&pdev->dev, "failed to load firmware\n"); 4010 goto abort_with_ioremap; 4011 } 4012 myri10ge_probe_slices(mgp); 4013 status = myri10ge_alloc_slices(mgp); 4014 if (status != 0) { 4015 dev_err(&pdev->dev, "failed to alloc slice state\n"); 4016 goto abort_with_firmware; 4017 } 4018 netif_set_real_num_tx_queues(netdev, mgp->num_slices); 4019 netif_set_real_num_rx_queues(netdev, mgp->num_slices); 4020 status = myri10ge_reset(mgp); 4021 if (status != 0) { 4022 dev_err(&pdev->dev, "failed reset\n"); 4023 goto abort_with_slices; 4024 } 4025 #ifdef CONFIG_MYRI10GE_DCA 4026 myri10ge_setup_dca(mgp); 4027 #endif 4028 pci_set_drvdata(pdev, mgp); 4029 if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU) 4030 myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 4031 if ((myri10ge_initial_mtu + ETH_HLEN) < 68) 4032 myri10ge_initial_mtu = 68; 4033 4034 netdev->netdev_ops = &myri10ge_netdev_ops; 4035 netdev->mtu = myri10ge_initial_mtu; 4036 netdev->base_addr = mgp->iomem_base; 4037 netdev->hw_features = mgp->features | NETIF_F_LRO | NETIF_F_RXCSUM; 4038 netdev->features = netdev->hw_features; 4039 4040 if (dac_enabled) 4041 netdev->features |= NETIF_F_HIGHDMA; 4042 4043 netdev->vlan_features |= mgp->features; 4044 if (mgp->fw_ver_tiny < 37) 4045 netdev->vlan_features &= ~NETIF_F_TSO6; 4046 if (mgp->fw_ver_tiny < 32) 4047 netdev->vlan_features &= ~NETIF_F_TSO; 4048 4049 /* make sure we can get an irq, and that MSI can be 4050 * setup (if available). Also ensure netdev->irq 4051 * is set to correct value if MSI is enabled */ 4052 status = myri10ge_request_irq(mgp); 4053 if (status != 0) 4054 goto abort_with_firmware; 4055 netdev->irq = pdev->irq; 4056 myri10ge_free_irq(mgp); 4057 4058 /* Save configuration space to be restored if the 4059 * nic resets due to a parity error */ 4060 pci_save_state(pdev); 4061 4062 /* Setup the watchdog timer */ 4063 setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer, 4064 (unsigned long)mgp); 4065 4066 SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops); 4067 INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog); 4068 status = register_netdev(netdev); 4069 if (status != 0) { 4070 dev_err(&pdev->dev, "register_netdev failed: %d\n", status); 4071 goto abort_with_state; 4072 } 4073 if (mgp->msix_enabled) 4074 dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, WC %s\n", 4075 mgp->num_slices, mgp->tx_boundary, mgp->fw_name, 4076 (mgp->wc_enabled ? "Enabled" : "Disabled")); 4077 else 4078 dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n", 4079 mgp->msi_enabled ? "MSI" : "xPIC", 4080 netdev->irq, mgp->tx_boundary, mgp->fw_name, 4081 (mgp->wc_enabled ? "Enabled" : "Disabled")); 4082 4083 board_number++; 4084 return 0; 4085 4086 abort_with_state: 4087 pci_restore_state(pdev); 4088 4089 abort_with_slices: 4090 myri10ge_free_slices(mgp); 4091 4092 abort_with_firmware: 4093 myri10ge_dummy_rdma(mgp, 0); 4094 4095 abort_with_ioremap: 4096 if (mgp->mac_addr_string != NULL) 4097 dev_err(&pdev->dev, 4098 "myri10ge_probe() failed: MAC=%s, SN=%ld\n", 4099 mgp->mac_addr_string, mgp->serial_number); 4100 iounmap(mgp->sram); 4101 4102 abort_with_mtrr: 4103 #ifdef CONFIG_MTRR 4104 if (mgp->mtrr >= 0) 4105 mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); 4106 #endif 4107 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4108 mgp->cmd, mgp->cmd_bus); 4109 4110 abort_with_enabled: 4111 pci_disable_device(pdev); 4112 4113 abort_with_netdev: 4114 set_fw_name(mgp, NULL, false); 4115 free_netdev(netdev); 4116 return status; 4117 } 4118 4119 /* 4120 * myri10ge_remove 4121 * 4122 * Does what is necessary to shutdown one Myrinet device. Called 4123 * once for each Myrinet card by the kernel when a module is 4124 * unloaded. 4125 */ 4126 static void myri10ge_remove(struct pci_dev *pdev) 4127 { 4128 struct myri10ge_priv *mgp; 4129 struct net_device *netdev; 4130 4131 mgp = pci_get_drvdata(pdev); 4132 if (mgp == NULL) 4133 return; 4134 4135 cancel_work_sync(&mgp->watchdog_work); 4136 netdev = mgp->dev; 4137 unregister_netdev(netdev); 4138 4139 #ifdef CONFIG_MYRI10GE_DCA 4140 myri10ge_teardown_dca(mgp); 4141 #endif 4142 myri10ge_dummy_rdma(mgp, 0); 4143 4144 /* avoid a memory leak */ 4145 pci_restore_state(pdev); 4146 4147 iounmap(mgp->sram); 4148 4149 #ifdef CONFIG_MTRR 4150 if (mgp->mtrr >= 0) 4151 mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); 4152 #endif 4153 myri10ge_free_slices(mgp); 4154 if (mgp->msix_vectors != NULL) 4155 kfree(mgp->msix_vectors); 4156 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4157 mgp->cmd, mgp->cmd_bus); 4158 4159 set_fw_name(mgp, NULL, false); 4160 free_netdev(netdev); 4161 pci_disable_device(pdev); 4162 pci_set_drvdata(pdev, NULL); 4163 } 4164 4165 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 0x0008 4166 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9 0x0009 4167 4168 static DEFINE_PCI_DEVICE_TABLE(myri10ge_pci_tbl) = { 4169 {PCI_DEVICE(PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E)}, 4170 {PCI_DEVICE 4171 (PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9)}, 4172 {0}, 4173 }; 4174 4175 MODULE_DEVICE_TABLE(pci, myri10ge_pci_tbl); 4176 4177 static struct pci_driver myri10ge_driver = { 4178 .name = "myri10ge", 4179 .probe = myri10ge_probe, 4180 .remove = myri10ge_remove, 4181 .id_table = myri10ge_pci_tbl, 4182 #ifdef CONFIG_PM 4183 .suspend = myri10ge_suspend, 4184 .resume = myri10ge_resume, 4185 #endif 4186 }; 4187 4188 #ifdef CONFIG_MYRI10GE_DCA 4189 static int 4190 myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p) 4191 { 4192 int err = driver_for_each_device(&myri10ge_driver.driver, 4193 NULL, &event, 4194 myri10ge_notify_dca_device); 4195 4196 if (err) 4197 return NOTIFY_BAD; 4198 return NOTIFY_DONE; 4199 } 4200 4201 static struct notifier_block myri10ge_dca_notifier = { 4202 .notifier_call = myri10ge_notify_dca, 4203 .next = NULL, 4204 .priority = 0, 4205 }; 4206 #endif /* CONFIG_MYRI10GE_DCA */ 4207 4208 static __init int myri10ge_init_module(void) 4209 { 4210 pr_info("Version %s\n", MYRI10GE_VERSION_STR); 4211 4212 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) { 4213 pr_err("Illegal rssh hash type %d, defaulting to source port\n", 4214 myri10ge_rss_hash); 4215 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4216 } 4217 #ifdef CONFIG_MYRI10GE_DCA 4218 dca_register_notify(&myri10ge_dca_notifier); 4219 #endif 4220 if (myri10ge_max_slices > MYRI10GE_MAX_SLICES) 4221 myri10ge_max_slices = MYRI10GE_MAX_SLICES; 4222 4223 return pci_register_driver(&myri10ge_driver); 4224 } 4225 4226 module_init(myri10ge_init_module); 4227 4228 static __exit void myri10ge_cleanup_module(void) 4229 { 4230 #ifdef CONFIG_MYRI10GE_DCA 4231 dca_unregister_notify(&myri10ge_dca_notifier); 4232 #endif 4233 pci_unregister_driver(&myri10ge_driver); 4234 } 4235 4236 module_exit(myri10ge_cleanup_module); 4237