1 /* 2 * Copyright 2015 Amazon.com, Inc. or its affiliates. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #ifdef CONFIG_RFS_ACCEL 36 #include <linux/cpu_rmap.h> 37 #endif /* CONFIG_RFS_ACCEL */ 38 #include <linux/ethtool.h> 39 #include <linux/kernel.h> 40 #include <linux/module.h> 41 #include <linux/numa.h> 42 #include <linux/pci.h> 43 #include <linux/utsname.h> 44 #include <linux/version.h> 45 #include <linux/vmalloc.h> 46 #include <net/ip.h> 47 48 #include "ena_netdev.h" 49 #include <linux/bpf_trace.h> 50 #include "ena_pci_id_tbl.h" 51 52 static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; 53 54 MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); 55 MODULE_DESCRIPTION(DEVICE_NAME); 56 MODULE_LICENSE("GPL"); 57 MODULE_VERSION(DRV_MODULE_VERSION); 58 59 /* Time in jiffies before concluding the transmitter is hung. */ 60 #define TX_TIMEOUT (5 * HZ) 61 62 #define ENA_NAPI_BUDGET 64 63 64 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ 65 NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) 66 static int debug = -1; 67 module_param(debug, int, 0); 68 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 69 70 static struct ena_aenq_handlers aenq_handlers; 71 72 static struct workqueue_struct *ena_wq; 73 74 MODULE_DEVICE_TABLE(pci, ena_pci_tbl); 75 76 static int ena_rss_init_default(struct ena_adapter *adapter); 77 static void check_for_admin_com_state(struct ena_adapter *adapter); 78 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); 79 static int ena_restore_device(struct ena_adapter *adapter); 80 81 static void ena_init_io_rings(struct ena_adapter *adapter, 82 int first_index, int count); 83 static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, 84 int count); 85 static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, 86 int count); 87 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); 88 static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, 89 int first_index, 90 int count); 91 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); 92 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); 93 static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); 94 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); 95 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); 96 static void ena_napi_disable_in_range(struct ena_adapter *adapter, 97 int first_index, int count); 98 static void ena_napi_enable_in_range(struct ena_adapter *adapter, 99 int first_index, int count); 100 static int ena_up(struct ena_adapter *adapter); 101 static void ena_down(struct ena_adapter *adapter); 102 static void ena_unmask_interrupt(struct ena_ring *tx_ring, 103 struct ena_ring *rx_ring); 104 static void ena_update_ring_numa_node(struct ena_ring *tx_ring, 105 struct ena_ring *rx_ring); 106 static void ena_unmap_tx_buff(struct ena_ring *tx_ring, 107 struct ena_tx_buffer *tx_info); 108 static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, 109 int first_index, int count); 110 111 static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) 112 { 113 struct ena_adapter *adapter = netdev_priv(dev); 114 115 /* Change the state of the device to trigger reset 116 * Check that we are not in the middle or a trigger already 117 */ 118 119 if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 120 return; 121 122 adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD; 123 u64_stats_update_begin(&adapter->syncp); 124 adapter->dev_stats.tx_timeout++; 125 u64_stats_update_end(&adapter->syncp); 126 127 netif_err(adapter, tx_err, dev, "Transmit time out\n"); 128 } 129 130 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) 131 { 132 int i; 133 134 for (i = 0; i < adapter->num_io_queues; i++) 135 adapter->rx_ring[i].mtu = mtu; 136 } 137 138 static int ena_change_mtu(struct net_device *dev, int new_mtu) 139 { 140 struct ena_adapter *adapter = netdev_priv(dev); 141 int ret; 142 143 ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); 144 if (!ret) { 145 netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); 146 update_rx_ring_mtu(adapter, new_mtu); 147 dev->mtu = new_mtu; 148 } else { 149 netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", 150 new_mtu); 151 } 152 153 return ret; 154 } 155 156 static int ena_xmit_common(struct net_device *dev, 157 struct ena_ring *ring, 158 struct ena_tx_buffer *tx_info, 159 struct ena_com_tx_ctx *ena_tx_ctx, 160 u16 next_to_use, 161 u32 bytes) 162 { 163 struct ena_adapter *adapter = netdev_priv(dev); 164 int rc, nb_hw_desc; 165 166 if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, 167 ena_tx_ctx))) { 168 netif_dbg(adapter, tx_queued, dev, 169 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 170 ring->qid); 171 ena_com_write_sq_doorbell(ring->ena_com_io_sq); 172 } 173 174 /* prepare the packet's descriptors to dma engine */ 175 rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx, 176 &nb_hw_desc); 177 178 /* In case there isn't enough space in the queue for the packet, 179 * we simply drop it. All other failure reasons of 180 * ena_com_prepare_tx() are fatal and therefore require a device reset. 181 */ 182 if (unlikely(rc)) { 183 netif_err(adapter, tx_queued, dev, 184 "failed to prepare tx bufs\n"); 185 u64_stats_update_begin(&ring->syncp); 186 ring->tx_stats.prepare_ctx_err++; 187 u64_stats_update_end(&ring->syncp); 188 if (rc != -ENOMEM) { 189 adapter->reset_reason = 190 ENA_REGS_RESET_DRIVER_INVALID_STATE; 191 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 192 } 193 return rc; 194 } 195 196 u64_stats_update_begin(&ring->syncp); 197 ring->tx_stats.cnt++; 198 ring->tx_stats.bytes += bytes; 199 u64_stats_update_end(&ring->syncp); 200 201 tx_info->tx_descs = nb_hw_desc; 202 tx_info->last_jiffies = jiffies; 203 tx_info->print_once = 0; 204 205 ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 206 ring->ring_size); 207 return 0; 208 } 209 210 /* This is the XDP napi callback. XDP queues use a separate napi callback 211 * than Rx/Tx queues. 212 */ 213 static int ena_xdp_io_poll(struct napi_struct *napi, int budget) 214 { 215 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); 216 u32 xdp_work_done, xdp_budget; 217 struct ena_ring *xdp_ring; 218 int napi_comp_call = 0; 219 int ret; 220 221 xdp_ring = ena_napi->xdp_ring; 222 xdp_ring->first_interrupt = ena_napi->first_interrupt; 223 224 xdp_budget = budget; 225 226 if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || 227 test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { 228 napi_complete_done(napi, 0); 229 return 0; 230 } 231 232 xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); 233 234 /* If the device is about to reset or down, avoid unmask 235 * the interrupt and return 0 so NAPI won't reschedule 236 */ 237 if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { 238 napi_complete_done(napi, 0); 239 ret = 0; 240 } else if (xdp_budget > xdp_work_done) { 241 napi_comp_call = 1; 242 if (napi_complete_done(napi, xdp_work_done)) 243 ena_unmask_interrupt(xdp_ring, NULL); 244 ena_update_ring_numa_node(xdp_ring, NULL); 245 ret = xdp_work_done; 246 } else { 247 ret = xdp_budget; 248 } 249 250 u64_stats_update_begin(&xdp_ring->syncp); 251 xdp_ring->tx_stats.napi_comp += napi_comp_call; 252 xdp_ring->tx_stats.tx_poll++; 253 u64_stats_update_end(&xdp_ring->syncp); 254 255 return ret; 256 } 257 258 static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring, 259 struct ena_tx_buffer *tx_info, 260 struct xdp_buff *xdp, 261 void **push_hdr, 262 u32 *push_len) 263 { 264 struct ena_adapter *adapter = xdp_ring->adapter; 265 struct ena_com_buf *ena_buf; 266 dma_addr_t dma = 0; 267 u32 size; 268 269 tx_info->xdpf = convert_to_xdp_frame(xdp); 270 size = tx_info->xdpf->len; 271 ena_buf = tx_info->bufs; 272 273 /* llq push buffer */ 274 *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); 275 *push_hdr = tx_info->xdpf->data; 276 277 if (size - *push_len > 0) { 278 dma = dma_map_single(xdp_ring->dev, 279 *push_hdr + *push_len, 280 size - *push_len, 281 DMA_TO_DEVICE); 282 if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) 283 goto error_report_dma_error; 284 285 tx_info->map_linear_data = 1; 286 tx_info->num_of_bufs = 1; 287 } 288 289 ena_buf->paddr = dma; 290 ena_buf->len = size; 291 292 return 0; 293 294 error_report_dma_error: 295 u64_stats_update_begin(&xdp_ring->syncp); 296 xdp_ring->tx_stats.dma_mapping_err++; 297 u64_stats_update_end(&xdp_ring->syncp); 298 netdev_warn(adapter->netdev, "failed to map xdp buff\n"); 299 300 xdp_return_frame_rx_napi(tx_info->xdpf); 301 tx_info->xdpf = NULL; 302 tx_info->num_of_bufs = 0; 303 304 return -EINVAL; 305 } 306 307 static int ena_xdp_xmit_buff(struct net_device *dev, 308 struct xdp_buff *xdp, 309 int qid, 310 struct ena_rx_buffer *rx_info) 311 { 312 struct ena_adapter *adapter = netdev_priv(dev); 313 struct ena_com_tx_ctx ena_tx_ctx = {0}; 314 struct ena_tx_buffer *tx_info; 315 struct ena_ring *xdp_ring; 316 struct ena_ring *rx_ring; 317 u16 next_to_use, req_id; 318 int rc; 319 void *push_hdr; 320 u32 push_len; 321 322 xdp_ring = &adapter->tx_ring[qid]; 323 next_to_use = xdp_ring->next_to_use; 324 req_id = xdp_ring->free_ids[next_to_use]; 325 tx_info = &xdp_ring->tx_buffer_info[req_id]; 326 tx_info->num_of_bufs = 0; 327 rx_ring = &xdp_ring->adapter->rx_ring[qid - 328 xdp_ring->adapter->xdp_first_ring]; 329 page_ref_inc(rx_info->page); 330 tx_info->xdp_rx_page = rx_info->page; 331 332 rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len); 333 if (unlikely(rc)) 334 goto error_drop_packet; 335 336 ena_tx_ctx.ena_bufs = tx_info->bufs; 337 ena_tx_ctx.push_header = push_hdr; 338 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 339 ena_tx_ctx.req_id = req_id; 340 ena_tx_ctx.header_len = push_len; 341 342 rc = ena_xmit_common(dev, 343 xdp_ring, 344 tx_info, 345 &ena_tx_ctx, 346 next_to_use, 347 xdp->data_end - xdp->data); 348 if (rc) 349 goto error_unmap_dma; 350 /* trigger the dma engine. ena_com_write_sq_doorbell() 351 * has a mb 352 */ 353 ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); 354 u64_stats_update_begin(&xdp_ring->syncp); 355 xdp_ring->tx_stats.doorbells++; 356 u64_stats_update_end(&xdp_ring->syncp); 357 358 return NETDEV_TX_OK; 359 360 error_unmap_dma: 361 ena_unmap_tx_buff(xdp_ring, tx_info); 362 tx_info->xdpf = NULL; 363 error_drop_packet: 364 365 return NETDEV_TX_OK; 366 } 367 368 static int ena_xdp_execute(struct ena_ring *rx_ring, 369 struct xdp_buff *xdp, 370 struct ena_rx_buffer *rx_info) 371 { 372 struct bpf_prog *xdp_prog; 373 u32 verdict = XDP_PASS; 374 375 rcu_read_lock(); 376 xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); 377 378 if (!xdp_prog) 379 goto out; 380 381 verdict = bpf_prog_run_xdp(xdp_prog, xdp); 382 383 if (verdict == XDP_TX) 384 ena_xdp_xmit_buff(rx_ring->netdev, 385 xdp, 386 rx_ring->qid + rx_ring->adapter->num_io_queues, 387 rx_info); 388 else if (unlikely(verdict == XDP_ABORTED)) 389 trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); 390 else if (unlikely(verdict > XDP_TX)) 391 bpf_warn_invalid_xdp_action(verdict); 392 out: 393 rcu_read_unlock(); 394 return verdict; 395 } 396 397 static void ena_init_all_xdp_queues(struct ena_adapter *adapter) 398 { 399 adapter->xdp_first_ring = adapter->num_io_queues; 400 adapter->xdp_num_queues = adapter->num_io_queues; 401 402 ena_init_io_rings(adapter, 403 adapter->xdp_first_ring, 404 adapter->xdp_num_queues); 405 } 406 407 static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) 408 { 409 int rc = 0; 410 411 rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, 412 adapter->xdp_num_queues); 413 if (rc) 414 goto setup_err; 415 416 rc = ena_create_io_tx_queues_in_range(adapter, 417 adapter->xdp_first_ring, 418 adapter->xdp_num_queues); 419 if (rc) 420 goto create_err; 421 422 return 0; 423 424 create_err: 425 ena_free_all_io_tx_resources(adapter); 426 setup_err: 427 return rc; 428 } 429 430 /* Provides a way for both kernel and bpf-prog to know 431 * more about the RX-queue a given XDP frame arrived on. 432 */ 433 static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) 434 { 435 int rc; 436 437 rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid); 438 439 if (rc) { 440 netif_err(rx_ring->adapter, ifup, rx_ring->netdev, 441 "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", 442 rx_ring->qid, rc); 443 goto err; 444 } 445 446 rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, 447 NULL); 448 449 if (rc) { 450 netif_err(rx_ring->adapter, ifup, rx_ring->netdev, 451 "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", 452 rx_ring->qid, rc); 453 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 454 } 455 456 err: 457 return rc; 458 } 459 460 static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) 461 { 462 xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); 463 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 464 } 465 466 void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, 467 struct bpf_prog *prog, 468 int first, 469 int count) 470 { 471 struct ena_ring *rx_ring; 472 int i = 0; 473 474 for (i = first; i < count; i++) { 475 rx_ring = &adapter->rx_ring[i]; 476 xchg(&rx_ring->xdp_bpf_prog, prog); 477 if (prog) { 478 ena_xdp_register_rxq_info(rx_ring); 479 rx_ring->rx_headroom = XDP_PACKET_HEADROOM; 480 } else { 481 ena_xdp_unregister_rxq_info(rx_ring); 482 rx_ring->rx_headroom = 0; 483 } 484 } 485 } 486 487 void ena_xdp_exchange_program(struct ena_adapter *adapter, 488 struct bpf_prog *prog) 489 { 490 struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); 491 492 ena_xdp_exchange_program_rx_in_range(adapter, 493 prog, 494 0, 495 adapter->num_io_queues); 496 497 if (old_bpf_prog) 498 bpf_prog_put(old_bpf_prog); 499 } 500 501 static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) 502 { 503 bool was_up; 504 int rc; 505 506 was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 507 508 if (was_up) 509 ena_down(adapter); 510 511 adapter->xdp_first_ring = 0; 512 adapter->xdp_num_queues = 0; 513 ena_xdp_exchange_program(adapter, NULL); 514 if (was_up) { 515 rc = ena_up(adapter); 516 if (rc) 517 return rc; 518 } 519 return 0; 520 } 521 522 static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) 523 { 524 struct ena_adapter *adapter = netdev_priv(netdev); 525 struct bpf_prog *prog = bpf->prog; 526 struct bpf_prog *old_bpf_prog; 527 int rc, prev_mtu; 528 bool is_up; 529 530 is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 531 rc = ena_xdp_allowed(adapter); 532 if (rc == ENA_XDP_ALLOWED) { 533 old_bpf_prog = adapter->xdp_bpf_prog; 534 if (prog) { 535 if (!is_up) { 536 ena_init_all_xdp_queues(adapter); 537 } else if (!old_bpf_prog) { 538 ena_down(adapter); 539 ena_init_all_xdp_queues(adapter); 540 } 541 ena_xdp_exchange_program(adapter, prog); 542 543 if (is_up && !old_bpf_prog) { 544 rc = ena_up(adapter); 545 if (rc) 546 return rc; 547 } 548 } else if (old_bpf_prog) { 549 rc = ena_destroy_and_free_all_xdp_queues(adapter); 550 if (rc) 551 return rc; 552 } 553 554 prev_mtu = netdev->max_mtu; 555 netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; 556 557 if (!old_bpf_prog) 558 netif_info(adapter, drv, adapter->netdev, 559 "xdp program set, changing the max_mtu from %d to %d", 560 prev_mtu, netdev->max_mtu); 561 562 } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { 563 netif_err(adapter, drv, adapter->netdev, 564 "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", 565 netdev->mtu, ENA_XDP_MAX_MTU); 566 NL_SET_ERR_MSG_MOD(bpf->extack, 567 "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); 568 return -EINVAL; 569 } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { 570 netif_err(adapter, drv, adapter->netdev, 571 "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", 572 adapter->num_io_queues, adapter->max_num_io_queues); 573 NL_SET_ERR_MSG_MOD(bpf->extack, 574 "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); 575 return -EINVAL; 576 } 577 578 return 0; 579 } 580 581 /* This is the main xdp callback, it's used by the kernel to set/unset the xdp 582 * program as well as to query the current xdp program id. 583 */ 584 static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) 585 { 586 struct ena_adapter *adapter = netdev_priv(netdev); 587 588 switch (bpf->command) { 589 case XDP_SETUP_PROG: 590 return ena_xdp_set(netdev, bpf); 591 case XDP_QUERY_PROG: 592 bpf->prog_id = adapter->xdp_bpf_prog ? 593 adapter->xdp_bpf_prog->aux->id : 0; 594 break; 595 default: 596 return -EINVAL; 597 } 598 return 0; 599 } 600 601 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) 602 { 603 #ifdef CONFIG_RFS_ACCEL 604 u32 i; 605 int rc; 606 607 adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); 608 if (!adapter->netdev->rx_cpu_rmap) 609 return -ENOMEM; 610 for (i = 0; i < adapter->num_io_queues; i++) { 611 int irq_idx = ENA_IO_IRQ_IDX(i); 612 613 rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, 614 pci_irq_vector(adapter->pdev, irq_idx)); 615 if (rc) { 616 free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); 617 adapter->netdev->rx_cpu_rmap = NULL; 618 return rc; 619 } 620 } 621 #endif /* CONFIG_RFS_ACCEL */ 622 return 0; 623 } 624 625 static void ena_init_io_rings_common(struct ena_adapter *adapter, 626 struct ena_ring *ring, u16 qid) 627 { 628 ring->qid = qid; 629 ring->pdev = adapter->pdev; 630 ring->dev = &adapter->pdev->dev; 631 ring->netdev = adapter->netdev; 632 ring->napi = &adapter->ena_napi[qid].napi; 633 ring->adapter = adapter; 634 ring->ena_dev = adapter->ena_dev; 635 ring->per_napi_packets = 0; 636 ring->cpu = 0; 637 ring->first_interrupt = false; 638 ring->no_interrupt_event_cnt = 0; 639 u64_stats_init(&ring->syncp); 640 } 641 642 static void ena_init_io_rings(struct ena_adapter *adapter, 643 int first_index, int count) 644 { 645 struct ena_com_dev *ena_dev; 646 struct ena_ring *txr, *rxr; 647 int i; 648 649 ena_dev = adapter->ena_dev; 650 651 for (i = first_index; i < first_index + count; i++) { 652 txr = &adapter->tx_ring[i]; 653 rxr = &adapter->rx_ring[i]; 654 655 /* TX common ring state */ 656 ena_init_io_rings_common(adapter, txr, i); 657 658 /* TX specific ring state */ 659 txr->ring_size = adapter->requested_tx_ring_size; 660 txr->tx_max_header_size = ena_dev->tx_max_header_size; 661 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; 662 txr->sgl_size = adapter->max_tx_sgl_size; 663 txr->smoothed_interval = 664 ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); 665 666 /* Don't init RX queues for xdp queues */ 667 if (!ENA_IS_XDP_INDEX(adapter, i)) { 668 /* RX common ring state */ 669 ena_init_io_rings_common(adapter, rxr, i); 670 671 /* RX specific ring state */ 672 rxr->ring_size = adapter->requested_rx_ring_size; 673 rxr->rx_copybreak = adapter->rx_copybreak; 674 rxr->sgl_size = adapter->max_rx_sgl_size; 675 rxr->smoothed_interval = 676 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); 677 rxr->empty_rx_queue = 0; 678 adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 679 } 680 } 681 } 682 683 /* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors) 684 * @adapter: network interface device structure 685 * @qid: queue index 686 * 687 * Return 0 on success, negative on failure 688 */ 689 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) 690 { 691 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 692 struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; 693 int size, i, node; 694 695 if (tx_ring->tx_buffer_info) { 696 netif_err(adapter, ifup, 697 adapter->netdev, "tx_buffer_info info is not NULL"); 698 return -EEXIST; 699 } 700 701 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; 702 node = cpu_to_node(ena_irq->cpu); 703 704 tx_ring->tx_buffer_info = vzalloc_node(size, node); 705 if (!tx_ring->tx_buffer_info) { 706 tx_ring->tx_buffer_info = vzalloc(size); 707 if (!tx_ring->tx_buffer_info) 708 goto err_tx_buffer_info; 709 } 710 711 size = sizeof(u16) * tx_ring->ring_size; 712 tx_ring->free_ids = vzalloc_node(size, node); 713 if (!tx_ring->free_ids) { 714 tx_ring->free_ids = vzalloc(size); 715 if (!tx_ring->free_ids) 716 goto err_tx_free_ids; 717 } 718 719 size = tx_ring->tx_max_header_size; 720 tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); 721 if (!tx_ring->push_buf_intermediate_buf) { 722 tx_ring->push_buf_intermediate_buf = vzalloc(size); 723 if (!tx_ring->push_buf_intermediate_buf) 724 goto err_push_buf_intermediate_buf; 725 } 726 727 /* Req id ring for TX out of order completions */ 728 for (i = 0; i < tx_ring->ring_size; i++) 729 tx_ring->free_ids[i] = i; 730 731 /* Reset tx statistics */ 732 memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); 733 734 tx_ring->next_to_use = 0; 735 tx_ring->next_to_clean = 0; 736 tx_ring->cpu = ena_irq->cpu; 737 return 0; 738 739 err_push_buf_intermediate_buf: 740 vfree(tx_ring->free_ids); 741 tx_ring->free_ids = NULL; 742 err_tx_free_ids: 743 vfree(tx_ring->tx_buffer_info); 744 tx_ring->tx_buffer_info = NULL; 745 err_tx_buffer_info: 746 return -ENOMEM; 747 } 748 749 /* ena_free_tx_resources - Free I/O Tx Resources per Queue 750 * @adapter: network interface device structure 751 * @qid: queue index 752 * 753 * Free all transmit software resources 754 */ 755 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) 756 { 757 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 758 759 vfree(tx_ring->tx_buffer_info); 760 tx_ring->tx_buffer_info = NULL; 761 762 vfree(tx_ring->free_ids); 763 tx_ring->free_ids = NULL; 764 765 vfree(tx_ring->push_buf_intermediate_buf); 766 tx_ring->push_buf_intermediate_buf = NULL; 767 } 768 769 static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, 770 int first_index, 771 int count) 772 { 773 int i, rc = 0; 774 775 for (i = first_index; i < first_index + count; i++) { 776 rc = ena_setup_tx_resources(adapter, i); 777 if (rc) 778 goto err_setup_tx; 779 } 780 781 return 0; 782 783 err_setup_tx: 784 785 netif_err(adapter, ifup, adapter->netdev, 786 "Tx queue %d: allocation failed\n", i); 787 788 /* rewind the index freeing the rings as we go */ 789 while (first_index < i--) 790 ena_free_tx_resources(adapter, i); 791 return rc; 792 } 793 794 static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, 795 int first_index, int count) 796 { 797 int i; 798 799 for (i = first_index; i < first_index + count; i++) 800 ena_free_tx_resources(adapter, i); 801 } 802 803 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues 804 * @adapter: board private structure 805 * 806 * Free all transmit software resources 807 */ 808 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) 809 { 810 ena_free_all_io_tx_resources_in_range(adapter, 811 0, 812 adapter->xdp_num_queues + 813 adapter->num_io_queues); 814 } 815 816 static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) 817 { 818 if (likely(req_id < rx_ring->ring_size)) 819 return 0; 820 821 netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, 822 "Invalid rx req_id: %hu\n", req_id); 823 824 u64_stats_update_begin(&rx_ring->syncp); 825 rx_ring->rx_stats.bad_req_id++; 826 u64_stats_update_end(&rx_ring->syncp); 827 828 /* Trigger device reset */ 829 rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 830 set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags); 831 return -EFAULT; 832 } 833 834 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) 835 * @adapter: network interface device structure 836 * @qid: queue index 837 * 838 * Returns 0 on success, negative on failure 839 */ 840 static int ena_setup_rx_resources(struct ena_adapter *adapter, 841 u32 qid) 842 { 843 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 844 struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; 845 int size, node, i; 846 847 if (rx_ring->rx_buffer_info) { 848 netif_err(adapter, ifup, adapter->netdev, 849 "rx_buffer_info is not NULL"); 850 return -EEXIST; 851 } 852 853 /* alloc extra element so in rx path 854 * we can always prefetch rx_info + 1 855 */ 856 size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1); 857 node = cpu_to_node(ena_irq->cpu); 858 859 rx_ring->rx_buffer_info = vzalloc_node(size, node); 860 if (!rx_ring->rx_buffer_info) { 861 rx_ring->rx_buffer_info = vzalloc(size); 862 if (!rx_ring->rx_buffer_info) 863 return -ENOMEM; 864 } 865 866 size = sizeof(u16) * rx_ring->ring_size; 867 rx_ring->free_ids = vzalloc_node(size, node); 868 if (!rx_ring->free_ids) { 869 rx_ring->free_ids = vzalloc(size); 870 if (!rx_ring->free_ids) { 871 vfree(rx_ring->rx_buffer_info); 872 rx_ring->rx_buffer_info = NULL; 873 return -ENOMEM; 874 } 875 } 876 877 /* Req id ring for receiving RX pkts out of order */ 878 for (i = 0; i < rx_ring->ring_size; i++) 879 rx_ring->free_ids[i] = i; 880 881 /* Reset rx statistics */ 882 memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); 883 884 rx_ring->next_to_clean = 0; 885 rx_ring->next_to_use = 0; 886 rx_ring->cpu = ena_irq->cpu; 887 888 return 0; 889 } 890 891 /* ena_free_rx_resources - Free I/O Rx Resources 892 * @adapter: network interface device structure 893 * @qid: queue index 894 * 895 * Free all receive software resources 896 */ 897 static void ena_free_rx_resources(struct ena_adapter *adapter, 898 u32 qid) 899 { 900 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 901 902 vfree(rx_ring->rx_buffer_info); 903 rx_ring->rx_buffer_info = NULL; 904 905 vfree(rx_ring->free_ids); 906 rx_ring->free_ids = NULL; 907 } 908 909 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues 910 * @adapter: board private structure 911 * 912 * Return 0 on success, negative on failure 913 */ 914 static int ena_setup_all_rx_resources(struct ena_adapter *adapter) 915 { 916 int i, rc = 0; 917 918 for (i = 0; i < adapter->num_io_queues; i++) { 919 rc = ena_setup_rx_resources(adapter, i); 920 if (rc) 921 goto err_setup_rx; 922 } 923 924 return 0; 925 926 err_setup_rx: 927 928 netif_err(adapter, ifup, adapter->netdev, 929 "Rx queue %d: allocation failed\n", i); 930 931 /* rewind the index freeing the rings as we go */ 932 while (i--) 933 ena_free_rx_resources(adapter, i); 934 return rc; 935 } 936 937 /* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues 938 * @adapter: board private structure 939 * 940 * Free all receive software resources 941 */ 942 static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) 943 { 944 int i; 945 946 for (i = 0; i < adapter->num_io_queues; i++) 947 ena_free_rx_resources(adapter, i); 948 } 949 950 static int ena_alloc_rx_page(struct ena_ring *rx_ring, 951 struct ena_rx_buffer *rx_info, gfp_t gfp) 952 { 953 struct ena_com_buf *ena_buf; 954 struct page *page; 955 dma_addr_t dma; 956 957 /* if previous allocated page is not used */ 958 if (unlikely(rx_info->page)) 959 return 0; 960 961 page = alloc_page(gfp); 962 if (unlikely(!page)) { 963 u64_stats_update_begin(&rx_ring->syncp); 964 rx_ring->rx_stats.page_alloc_fail++; 965 u64_stats_update_end(&rx_ring->syncp); 966 return -ENOMEM; 967 } 968 969 dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, 970 DMA_FROM_DEVICE); 971 if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { 972 u64_stats_update_begin(&rx_ring->syncp); 973 rx_ring->rx_stats.dma_mapping_err++; 974 u64_stats_update_end(&rx_ring->syncp); 975 976 __free_page(page); 977 return -EIO; 978 } 979 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 980 "alloc page %p, rx_info %p\n", page, rx_info); 981 982 rx_info->page = page; 983 rx_info->page_offset = 0; 984 ena_buf = &rx_info->ena_buf; 985 ena_buf->paddr = dma + rx_ring->rx_headroom; 986 ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom; 987 988 return 0; 989 } 990 991 static void ena_free_rx_page(struct ena_ring *rx_ring, 992 struct ena_rx_buffer *rx_info) 993 { 994 struct page *page = rx_info->page; 995 struct ena_com_buf *ena_buf = &rx_info->ena_buf; 996 997 if (unlikely(!page)) { 998 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, 999 "Trying to free unallocated buffer\n"); 1000 return; 1001 } 1002 1003 dma_unmap_page(rx_ring->dev, 1004 ena_buf->paddr - rx_ring->rx_headroom, 1005 ENA_PAGE_SIZE, 1006 DMA_FROM_DEVICE); 1007 1008 __free_page(page); 1009 rx_info->page = NULL; 1010 } 1011 1012 static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) 1013 { 1014 u16 next_to_use, req_id; 1015 u32 i; 1016 int rc; 1017 1018 next_to_use = rx_ring->next_to_use; 1019 1020 for (i = 0; i < num; i++) { 1021 struct ena_rx_buffer *rx_info; 1022 1023 req_id = rx_ring->free_ids[next_to_use]; 1024 rc = validate_rx_req_id(rx_ring, req_id); 1025 if (unlikely(rc < 0)) 1026 break; 1027 1028 rx_info = &rx_ring->rx_buffer_info[req_id]; 1029 1030 1031 rc = ena_alloc_rx_page(rx_ring, rx_info, 1032 GFP_ATOMIC | __GFP_COMP); 1033 if (unlikely(rc < 0)) { 1034 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, 1035 "failed to alloc buffer for rx queue %d\n", 1036 rx_ring->qid); 1037 break; 1038 } 1039 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, 1040 &rx_info->ena_buf, 1041 req_id); 1042 if (unlikely(rc)) { 1043 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, 1044 "failed to add buffer for rx queue %d\n", 1045 rx_ring->qid); 1046 break; 1047 } 1048 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, 1049 rx_ring->ring_size); 1050 } 1051 1052 if (unlikely(i < num)) { 1053 u64_stats_update_begin(&rx_ring->syncp); 1054 rx_ring->rx_stats.refil_partial++; 1055 u64_stats_update_end(&rx_ring->syncp); 1056 netdev_warn(rx_ring->netdev, 1057 "refilled rx qid %d with only %d buffers (from %d)\n", 1058 rx_ring->qid, i, num); 1059 } 1060 1061 /* ena_com_write_sq_doorbell issues a wmb() */ 1062 if (likely(i)) 1063 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); 1064 1065 rx_ring->next_to_use = next_to_use; 1066 1067 return i; 1068 } 1069 1070 static void ena_free_rx_bufs(struct ena_adapter *adapter, 1071 u32 qid) 1072 { 1073 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 1074 u32 i; 1075 1076 for (i = 0; i < rx_ring->ring_size; i++) { 1077 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; 1078 1079 if (rx_info->page) 1080 ena_free_rx_page(rx_ring, rx_info); 1081 } 1082 } 1083 1084 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers 1085 * @adapter: board private structure 1086 */ 1087 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) 1088 { 1089 struct ena_ring *rx_ring; 1090 int i, rc, bufs_num; 1091 1092 for (i = 0; i < adapter->num_io_queues; i++) { 1093 rx_ring = &adapter->rx_ring[i]; 1094 bufs_num = rx_ring->ring_size - 1; 1095 rc = ena_refill_rx_bufs(rx_ring, bufs_num); 1096 1097 if (unlikely(rc != bufs_num)) 1098 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, 1099 "refilling Queue %d failed. allocated %d buffers from: %d\n", 1100 i, rc, bufs_num); 1101 } 1102 } 1103 1104 static void ena_free_all_rx_bufs(struct ena_adapter *adapter) 1105 { 1106 int i; 1107 1108 for (i = 0; i < adapter->num_io_queues; i++) 1109 ena_free_rx_bufs(adapter, i); 1110 } 1111 1112 static void ena_unmap_tx_buff(struct ena_ring *tx_ring, 1113 struct ena_tx_buffer *tx_info) 1114 { 1115 struct ena_com_buf *ena_buf; 1116 u32 cnt; 1117 int i; 1118 1119 ena_buf = tx_info->bufs; 1120 cnt = tx_info->num_of_bufs; 1121 1122 if (unlikely(!cnt)) 1123 return; 1124 1125 if (tx_info->map_linear_data) { 1126 dma_unmap_single(tx_ring->dev, 1127 dma_unmap_addr(ena_buf, paddr), 1128 dma_unmap_len(ena_buf, len), 1129 DMA_TO_DEVICE); 1130 ena_buf++; 1131 cnt--; 1132 } 1133 1134 /* unmap remaining mapped pages */ 1135 for (i = 0; i < cnt; i++) { 1136 dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), 1137 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); 1138 ena_buf++; 1139 } 1140 } 1141 1142 /* ena_free_tx_bufs - Free Tx Buffers per Queue 1143 * @tx_ring: TX ring for which buffers be freed 1144 */ 1145 static void ena_free_tx_bufs(struct ena_ring *tx_ring) 1146 { 1147 bool print_once = true; 1148 u32 i; 1149 1150 for (i = 0; i < tx_ring->ring_size; i++) { 1151 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; 1152 1153 if (!tx_info->skb) 1154 continue; 1155 1156 if (print_once) { 1157 netdev_notice(tx_ring->netdev, 1158 "free uncompleted tx skb qid %d idx 0x%x\n", 1159 tx_ring->qid, i); 1160 print_once = false; 1161 } else { 1162 netdev_dbg(tx_ring->netdev, 1163 "free uncompleted tx skb qid %d idx 0x%x\n", 1164 tx_ring->qid, i); 1165 } 1166 1167 ena_unmap_tx_buff(tx_ring, tx_info); 1168 1169 dev_kfree_skb_any(tx_info->skb); 1170 } 1171 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 1172 tx_ring->qid)); 1173 } 1174 1175 static void ena_free_all_tx_bufs(struct ena_adapter *adapter) 1176 { 1177 struct ena_ring *tx_ring; 1178 int i; 1179 1180 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { 1181 tx_ring = &adapter->tx_ring[i]; 1182 ena_free_tx_bufs(tx_ring); 1183 } 1184 } 1185 1186 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) 1187 { 1188 u16 ena_qid; 1189 int i; 1190 1191 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { 1192 ena_qid = ENA_IO_TXQ_IDX(i); 1193 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1194 } 1195 } 1196 1197 static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) 1198 { 1199 u16 ena_qid; 1200 int i; 1201 1202 for (i = 0; i < adapter->num_io_queues; i++) { 1203 ena_qid = ENA_IO_RXQ_IDX(i); 1204 cancel_work_sync(&adapter->ena_napi[i].dim.work); 1205 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1206 } 1207 } 1208 1209 static void ena_destroy_all_io_queues(struct ena_adapter *adapter) 1210 { 1211 ena_destroy_all_tx_queues(adapter); 1212 ena_destroy_all_rx_queues(adapter); 1213 } 1214 1215 static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, 1216 struct ena_tx_buffer *tx_info, bool is_xdp) 1217 { 1218 if (tx_info) 1219 netif_err(ring->adapter, 1220 tx_done, 1221 ring->netdev, 1222 "tx_info doesn't have valid %s", 1223 is_xdp ? "xdp frame" : "skb"); 1224 else 1225 netif_err(ring->adapter, 1226 tx_done, 1227 ring->netdev, 1228 "Invalid req_id: %hu\n", 1229 req_id); 1230 1231 u64_stats_update_begin(&ring->syncp); 1232 ring->tx_stats.bad_req_id++; 1233 u64_stats_update_end(&ring->syncp); 1234 1235 /* Trigger device reset */ 1236 ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 1237 set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); 1238 return -EFAULT; 1239 } 1240 1241 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 1242 { 1243 struct ena_tx_buffer *tx_info = NULL; 1244 1245 if (likely(req_id < tx_ring->ring_size)) { 1246 tx_info = &tx_ring->tx_buffer_info[req_id]; 1247 if (likely(tx_info->skb)) 1248 return 0; 1249 } 1250 1251 return handle_invalid_req_id(tx_ring, req_id, tx_info, false); 1252 } 1253 1254 static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) 1255 { 1256 struct ena_tx_buffer *tx_info = NULL; 1257 1258 if (likely(req_id < xdp_ring->ring_size)) { 1259 tx_info = &xdp_ring->tx_buffer_info[req_id]; 1260 if (likely(tx_info->xdpf)) 1261 return 0; 1262 } 1263 1264 return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); 1265 } 1266 1267 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) 1268 { 1269 struct netdev_queue *txq; 1270 bool above_thresh; 1271 u32 tx_bytes = 0; 1272 u32 total_done = 0; 1273 u16 next_to_clean; 1274 u16 req_id; 1275 int tx_pkts = 0; 1276 int rc; 1277 1278 next_to_clean = tx_ring->next_to_clean; 1279 txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid); 1280 1281 while (tx_pkts < budget) { 1282 struct ena_tx_buffer *tx_info; 1283 struct sk_buff *skb; 1284 1285 rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, 1286 &req_id); 1287 if (rc) 1288 break; 1289 1290 rc = validate_tx_req_id(tx_ring, req_id); 1291 if (rc) 1292 break; 1293 1294 tx_info = &tx_ring->tx_buffer_info[req_id]; 1295 skb = tx_info->skb; 1296 1297 /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */ 1298 prefetch(&skb->end); 1299 1300 tx_info->skb = NULL; 1301 tx_info->last_jiffies = 0; 1302 1303 ena_unmap_tx_buff(tx_ring, tx_info); 1304 1305 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, 1306 "tx_poll: q %d skb %p completed\n", tx_ring->qid, 1307 skb); 1308 1309 tx_bytes += skb->len; 1310 dev_kfree_skb(skb); 1311 tx_pkts++; 1312 total_done += tx_info->tx_descs; 1313 1314 tx_ring->free_ids[next_to_clean] = req_id; 1315 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 1316 tx_ring->ring_size); 1317 } 1318 1319 tx_ring->next_to_clean = next_to_clean; 1320 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); 1321 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 1322 1323 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 1324 1325 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, 1326 "tx_poll: q %d done. total pkts: %d\n", 1327 tx_ring->qid, tx_pkts); 1328 1329 /* need to make the rings circular update visible to 1330 * ena_start_xmit() before checking for netif_queue_stopped(). 1331 */ 1332 smp_mb(); 1333 1334 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1335 ENA_TX_WAKEUP_THRESH); 1336 if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { 1337 __netif_tx_lock(txq, smp_processor_id()); 1338 above_thresh = 1339 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1340 ENA_TX_WAKEUP_THRESH); 1341 if (netif_tx_queue_stopped(txq) && above_thresh && 1342 test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { 1343 netif_tx_wake_queue(txq); 1344 u64_stats_update_begin(&tx_ring->syncp); 1345 tx_ring->tx_stats.queue_wakeup++; 1346 u64_stats_update_end(&tx_ring->syncp); 1347 } 1348 __netif_tx_unlock(txq); 1349 } 1350 1351 return tx_pkts; 1352 } 1353 1354 static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags) 1355 { 1356 struct sk_buff *skb; 1357 1358 if (frags) 1359 skb = napi_get_frags(rx_ring->napi); 1360 else 1361 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 1362 rx_ring->rx_copybreak); 1363 1364 if (unlikely(!skb)) { 1365 u64_stats_update_begin(&rx_ring->syncp); 1366 rx_ring->rx_stats.skb_alloc_fail++; 1367 u64_stats_update_end(&rx_ring->syncp); 1368 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, 1369 "Failed to allocate skb. frags: %d\n", frags); 1370 return NULL; 1371 } 1372 1373 return skb; 1374 } 1375 1376 static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, 1377 struct ena_com_rx_buf_info *ena_bufs, 1378 u32 descs, 1379 u16 *next_to_clean) 1380 { 1381 struct sk_buff *skb; 1382 struct ena_rx_buffer *rx_info; 1383 u16 len, req_id, buf = 0; 1384 void *va; 1385 1386 len = ena_bufs[buf].len; 1387 req_id = ena_bufs[buf].req_id; 1388 rx_info = &rx_ring->rx_buffer_info[req_id]; 1389 1390 if (unlikely(!rx_info->page)) { 1391 netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, 1392 "Page is NULL\n"); 1393 return NULL; 1394 } 1395 1396 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1397 "rx_info %p page %p\n", 1398 rx_info, rx_info->page); 1399 1400 /* save virt address of first buffer */ 1401 va = page_address(rx_info->page) + rx_info->page_offset; 1402 prefetch(va + NET_IP_ALIGN); 1403 1404 if (len <= rx_ring->rx_copybreak) { 1405 skb = ena_alloc_skb(rx_ring, false); 1406 if (unlikely(!skb)) 1407 return NULL; 1408 1409 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1410 "rx allocated small packet. len %d. data_len %d\n", 1411 skb->len, skb->data_len); 1412 1413 /* sync this buffer for CPU use */ 1414 dma_sync_single_for_cpu(rx_ring->dev, 1415 dma_unmap_addr(&rx_info->ena_buf, paddr), 1416 len, 1417 DMA_FROM_DEVICE); 1418 skb_copy_to_linear_data(skb, va, len); 1419 dma_sync_single_for_device(rx_ring->dev, 1420 dma_unmap_addr(&rx_info->ena_buf, paddr), 1421 len, 1422 DMA_FROM_DEVICE); 1423 1424 skb_put(skb, len); 1425 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1426 rx_ring->free_ids[*next_to_clean] = req_id; 1427 *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, 1428 rx_ring->ring_size); 1429 return skb; 1430 } 1431 1432 skb = ena_alloc_skb(rx_ring, true); 1433 if (unlikely(!skb)) 1434 return NULL; 1435 1436 do { 1437 dma_unmap_page(rx_ring->dev, 1438 dma_unmap_addr(&rx_info->ena_buf, paddr), 1439 ENA_PAGE_SIZE, DMA_FROM_DEVICE); 1440 1441 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, 1442 rx_info->page_offset, len, ENA_PAGE_SIZE); 1443 1444 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1445 "rx skb updated. len %d. data_len %d\n", 1446 skb->len, skb->data_len); 1447 1448 rx_info->page = NULL; 1449 1450 rx_ring->free_ids[*next_to_clean] = req_id; 1451 *next_to_clean = 1452 ENA_RX_RING_IDX_NEXT(*next_to_clean, 1453 rx_ring->ring_size); 1454 if (likely(--descs == 0)) 1455 break; 1456 1457 buf++; 1458 len = ena_bufs[buf].len; 1459 req_id = ena_bufs[buf].req_id; 1460 rx_info = &rx_ring->rx_buffer_info[req_id]; 1461 } while (1); 1462 1463 return skb; 1464 } 1465 1466 /* ena_rx_checksum - indicate in skb if hw indicated a good cksum 1467 * @adapter: structure containing adapter specific data 1468 * @ena_rx_ctx: received packet context/metadata 1469 * @skb: skb currently being received and modified 1470 */ 1471 static void ena_rx_checksum(struct ena_ring *rx_ring, 1472 struct ena_com_rx_ctx *ena_rx_ctx, 1473 struct sk_buff *skb) 1474 { 1475 /* Rx csum disabled */ 1476 if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) { 1477 skb->ip_summed = CHECKSUM_NONE; 1478 return; 1479 } 1480 1481 /* For fragmented packets the checksum isn't valid */ 1482 if (ena_rx_ctx->frag) { 1483 skb->ip_summed = CHECKSUM_NONE; 1484 return; 1485 } 1486 1487 /* if IP and error */ 1488 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && 1489 (ena_rx_ctx->l3_csum_err))) { 1490 /* ipv4 checksum error */ 1491 skb->ip_summed = CHECKSUM_NONE; 1492 u64_stats_update_begin(&rx_ring->syncp); 1493 rx_ring->rx_stats.bad_csum++; 1494 u64_stats_update_end(&rx_ring->syncp); 1495 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, 1496 "RX IPv4 header checksum error\n"); 1497 return; 1498 } 1499 1500 /* if TCP/UDP */ 1501 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 1502 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) { 1503 if (unlikely(ena_rx_ctx->l4_csum_err)) { 1504 /* TCP/UDP checksum error */ 1505 u64_stats_update_begin(&rx_ring->syncp); 1506 rx_ring->rx_stats.bad_csum++; 1507 u64_stats_update_end(&rx_ring->syncp); 1508 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, 1509 "RX L4 checksum error\n"); 1510 skb->ip_summed = CHECKSUM_NONE; 1511 return; 1512 } 1513 1514 if (likely(ena_rx_ctx->l4_csum_checked)) { 1515 skb->ip_summed = CHECKSUM_UNNECESSARY; 1516 u64_stats_update_begin(&rx_ring->syncp); 1517 rx_ring->rx_stats.csum_good++; 1518 u64_stats_update_end(&rx_ring->syncp); 1519 } else { 1520 u64_stats_update_begin(&rx_ring->syncp); 1521 rx_ring->rx_stats.csum_unchecked++; 1522 u64_stats_update_end(&rx_ring->syncp); 1523 skb->ip_summed = CHECKSUM_NONE; 1524 } 1525 } else { 1526 skb->ip_summed = CHECKSUM_NONE; 1527 return; 1528 } 1529 1530 } 1531 1532 static void ena_set_rx_hash(struct ena_ring *rx_ring, 1533 struct ena_com_rx_ctx *ena_rx_ctx, 1534 struct sk_buff *skb) 1535 { 1536 enum pkt_hash_types hash_type; 1537 1538 if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) { 1539 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 1540 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) 1541 1542 hash_type = PKT_HASH_TYPE_L4; 1543 else 1544 hash_type = PKT_HASH_TYPE_NONE; 1545 1546 /* Override hash type if the packet is fragmented */ 1547 if (ena_rx_ctx->frag) 1548 hash_type = PKT_HASH_TYPE_NONE; 1549 1550 skb_set_hash(skb, ena_rx_ctx->hash, hash_type); 1551 } 1552 } 1553 1554 int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) 1555 { 1556 struct ena_rx_buffer *rx_info; 1557 int ret; 1558 1559 rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; 1560 xdp->data = page_address(rx_info->page) + 1561 rx_info->page_offset + rx_ring->rx_headroom; 1562 xdp_set_data_meta_invalid(xdp); 1563 xdp->data_hard_start = page_address(rx_info->page); 1564 xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; 1565 /* If for some reason we received a bigger packet than 1566 * we expect, then we simply drop it 1567 */ 1568 if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) 1569 return XDP_DROP; 1570 1571 ret = ena_xdp_execute(rx_ring, xdp, rx_info); 1572 1573 /* The xdp program might expand the headers */ 1574 if (ret == XDP_PASS) { 1575 rx_info->page_offset = xdp->data - xdp->data_hard_start; 1576 rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; 1577 } 1578 1579 return ret; 1580 } 1581 /* ena_clean_rx_irq - Cleanup RX irq 1582 * @rx_ring: RX ring to clean 1583 * @napi: napi handler 1584 * @budget: how many packets driver is allowed to clean 1585 * 1586 * Returns the number of cleaned buffers. 1587 */ 1588 static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, 1589 u32 budget) 1590 { 1591 u16 next_to_clean = rx_ring->next_to_clean; 1592 struct ena_com_rx_ctx ena_rx_ctx; 1593 struct ena_adapter *adapter; 1594 u32 res_budget, work_done; 1595 int rx_copybreak_pkt = 0; 1596 int refill_threshold; 1597 struct sk_buff *skb; 1598 int refill_required; 1599 struct xdp_buff xdp; 1600 int total_len = 0; 1601 int xdp_verdict; 1602 int rc = 0; 1603 int i; 1604 1605 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1606 "%s qid %d\n", __func__, rx_ring->qid); 1607 res_budget = budget; 1608 xdp.rxq = &rx_ring->xdp_rxq; 1609 1610 do { 1611 xdp_verdict = XDP_PASS; 1612 skb = NULL; 1613 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 1614 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 1615 ena_rx_ctx.descs = 0; 1616 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 1617 rx_ring->ena_com_io_sq, 1618 &ena_rx_ctx); 1619 if (unlikely(rc)) 1620 goto error; 1621 1622 if (unlikely(ena_rx_ctx.descs == 0)) 1623 break; 1624 1625 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1626 "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", 1627 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 1628 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 1629 1630 if (ena_xdp_present_ring(rx_ring)) 1631 xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); 1632 1633 /* allocate skb and fill it */ 1634 if (xdp_verdict == XDP_PASS) 1635 skb = ena_rx_skb(rx_ring, 1636 rx_ring->ena_bufs, 1637 ena_rx_ctx.descs, 1638 &next_to_clean); 1639 1640 if (unlikely(!skb)) { 1641 if (xdp_verdict == XDP_TX) { 1642 ena_free_rx_page(rx_ring, 1643 &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]); 1644 res_budget--; 1645 } 1646 for (i = 0; i < ena_rx_ctx.descs; i++) { 1647 rx_ring->free_ids[next_to_clean] = 1648 rx_ring->ena_bufs[i].req_id; 1649 next_to_clean = 1650 ENA_RX_RING_IDX_NEXT(next_to_clean, 1651 rx_ring->ring_size); 1652 } 1653 if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP) 1654 continue; 1655 break; 1656 } 1657 1658 ena_rx_checksum(rx_ring, &ena_rx_ctx, skb); 1659 1660 ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb); 1661 1662 skb_record_rx_queue(skb, rx_ring->qid); 1663 1664 if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { 1665 total_len += rx_ring->ena_bufs[0].len; 1666 rx_copybreak_pkt++; 1667 napi_gro_receive(napi, skb); 1668 } else { 1669 total_len += skb->len; 1670 napi_gro_frags(napi); 1671 } 1672 1673 res_budget--; 1674 } while (likely(res_budget)); 1675 1676 work_done = budget - res_budget; 1677 rx_ring->per_napi_packets += work_done; 1678 u64_stats_update_begin(&rx_ring->syncp); 1679 rx_ring->rx_stats.bytes += total_len; 1680 rx_ring->rx_stats.cnt += work_done; 1681 rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt; 1682 u64_stats_update_end(&rx_ring->syncp); 1683 1684 rx_ring->next_to_clean = next_to_clean; 1685 1686 refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); 1687 refill_threshold = 1688 min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, 1689 ENA_RX_REFILL_THRESH_PACKET); 1690 1691 /* Optimization, try to batch new rx buffers */ 1692 if (refill_required > refill_threshold) { 1693 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 1694 ena_refill_rx_bufs(rx_ring, refill_required); 1695 } 1696 1697 return work_done; 1698 1699 error: 1700 adapter = netdev_priv(rx_ring->netdev); 1701 1702 u64_stats_update_begin(&rx_ring->syncp); 1703 rx_ring->rx_stats.bad_desc_num++; 1704 u64_stats_update_end(&rx_ring->syncp); 1705 1706 /* Too many desc from the device. Trigger reset */ 1707 adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 1708 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 1709 1710 return 0; 1711 } 1712 1713 static void ena_dim_work(struct work_struct *w) 1714 { 1715 struct dim *dim = container_of(w, struct dim, work); 1716 struct dim_cq_moder cur_moder = 1717 net_dim_get_rx_moderation(dim->mode, dim->profile_ix); 1718 struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim); 1719 1720 ena_napi->rx_ring->smoothed_interval = cur_moder.usec; 1721 dim->state = DIM_START_MEASURE; 1722 } 1723 1724 static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) 1725 { 1726 struct dim_sample dim_sample; 1727 struct ena_ring *rx_ring = ena_napi->rx_ring; 1728 1729 if (!rx_ring->per_napi_packets) 1730 return; 1731 1732 rx_ring->non_empty_napi_events++; 1733 1734 dim_update_sample(rx_ring->non_empty_napi_events, 1735 rx_ring->rx_stats.cnt, 1736 rx_ring->rx_stats.bytes, 1737 &dim_sample); 1738 1739 net_dim(&ena_napi->dim, dim_sample); 1740 1741 rx_ring->per_napi_packets = 0; 1742 } 1743 1744 static void ena_unmask_interrupt(struct ena_ring *tx_ring, 1745 struct ena_ring *rx_ring) 1746 { 1747 struct ena_eth_io_intr_reg intr_reg; 1748 u32 rx_interval = 0; 1749 /* Rx ring can be NULL when for XDP tx queues which don't have an 1750 * accompanying rx_ring pair. 1751 */ 1752 if (rx_ring) 1753 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? 1754 rx_ring->smoothed_interval : 1755 ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); 1756 1757 /* Update intr register: rx intr delay, 1758 * tx intr delay and interrupt unmask 1759 */ 1760 ena_com_update_intr_reg(&intr_reg, 1761 rx_interval, 1762 tx_ring->smoothed_interval, 1763 true); 1764 1765 /* It is a shared MSI-X. 1766 * Tx and Rx CQ have pointer to it. 1767 * So we use one of them to reach the intr reg 1768 * The Tx ring is used because the rx_ring is NULL for XDP queues 1769 */ 1770 ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); 1771 } 1772 1773 static void ena_update_ring_numa_node(struct ena_ring *tx_ring, 1774 struct ena_ring *rx_ring) 1775 { 1776 int cpu = get_cpu(); 1777 int numa_node; 1778 1779 /* Check only one ring since the 2 rings are running on the same cpu */ 1780 if (likely(tx_ring->cpu == cpu)) 1781 goto out; 1782 1783 numa_node = cpu_to_node(cpu); 1784 put_cpu(); 1785 1786 if (numa_node != NUMA_NO_NODE) { 1787 ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); 1788 if (rx_ring) 1789 ena_com_update_numa_node(rx_ring->ena_com_io_cq, 1790 numa_node); 1791 } 1792 1793 tx_ring->cpu = cpu; 1794 if (rx_ring) 1795 rx_ring->cpu = cpu; 1796 1797 return; 1798 out: 1799 put_cpu(); 1800 } 1801 1802 static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) 1803 { 1804 u32 total_done = 0; 1805 u16 next_to_clean; 1806 u32 tx_bytes = 0; 1807 int tx_pkts = 0; 1808 u16 req_id; 1809 int rc; 1810 1811 if (unlikely(!xdp_ring)) 1812 return 0; 1813 next_to_clean = xdp_ring->next_to_clean; 1814 1815 while (tx_pkts < budget) { 1816 struct ena_tx_buffer *tx_info; 1817 struct xdp_frame *xdpf; 1818 1819 rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, 1820 &req_id); 1821 if (rc) 1822 break; 1823 1824 rc = validate_xdp_req_id(xdp_ring, req_id); 1825 if (rc) 1826 break; 1827 1828 tx_info = &xdp_ring->tx_buffer_info[req_id]; 1829 xdpf = tx_info->xdpf; 1830 1831 tx_info->xdpf = NULL; 1832 tx_info->last_jiffies = 0; 1833 ena_unmap_tx_buff(xdp_ring, tx_info); 1834 1835 netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, 1836 "tx_poll: q %d skb %p completed\n", xdp_ring->qid, 1837 xdpf); 1838 1839 tx_bytes += xdpf->len; 1840 tx_pkts++; 1841 total_done += tx_info->tx_descs; 1842 1843 __free_page(tx_info->xdp_rx_page); 1844 xdp_ring->free_ids[next_to_clean] = req_id; 1845 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 1846 xdp_ring->ring_size); 1847 } 1848 1849 xdp_ring->next_to_clean = next_to_clean; 1850 ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); 1851 ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); 1852 1853 netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, 1854 "tx_poll: q %d done. total pkts: %d\n", 1855 xdp_ring->qid, tx_pkts); 1856 1857 return tx_pkts; 1858 } 1859 1860 static int ena_io_poll(struct napi_struct *napi, int budget) 1861 { 1862 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); 1863 struct ena_ring *tx_ring, *rx_ring; 1864 u32 tx_work_done; 1865 u32 rx_work_done; 1866 int tx_budget; 1867 int napi_comp_call = 0; 1868 int ret; 1869 1870 tx_ring = ena_napi->tx_ring; 1871 rx_ring = ena_napi->rx_ring; 1872 1873 tx_ring->first_interrupt = ena_napi->first_interrupt; 1874 rx_ring->first_interrupt = ena_napi->first_interrupt; 1875 1876 tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; 1877 1878 if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || 1879 test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) { 1880 napi_complete_done(napi, 0); 1881 return 0; 1882 } 1883 1884 tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); 1885 rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); 1886 1887 /* If the device is about to reset or down, avoid unmask 1888 * the interrupt and return 0 so NAPI won't reschedule 1889 */ 1890 if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || 1891 test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) { 1892 napi_complete_done(napi, 0); 1893 ret = 0; 1894 1895 } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { 1896 napi_comp_call = 1; 1897 1898 /* Update numa and unmask the interrupt only when schedule 1899 * from the interrupt context (vs from sk_busy_loop) 1900 */ 1901 if (napi_complete_done(napi, rx_work_done)) { 1902 /* We apply adaptive moderation on Rx path only. 1903 * Tx uses static interrupt moderation. 1904 */ 1905 if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) 1906 ena_adjust_adaptive_rx_intr_moderation(ena_napi); 1907 1908 ena_unmask_interrupt(tx_ring, rx_ring); 1909 } 1910 1911 ena_update_ring_numa_node(tx_ring, rx_ring); 1912 1913 ret = rx_work_done; 1914 } else { 1915 ret = budget; 1916 } 1917 1918 u64_stats_update_begin(&tx_ring->syncp); 1919 tx_ring->tx_stats.napi_comp += napi_comp_call; 1920 tx_ring->tx_stats.tx_poll++; 1921 u64_stats_update_end(&tx_ring->syncp); 1922 1923 return ret; 1924 } 1925 1926 static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data) 1927 { 1928 struct ena_adapter *adapter = (struct ena_adapter *)data; 1929 1930 ena_com_admin_q_comp_intr_handler(adapter->ena_dev); 1931 1932 /* Don't call the aenq handler before probe is done */ 1933 if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))) 1934 ena_com_aenq_intr_handler(adapter->ena_dev, data); 1935 1936 return IRQ_HANDLED; 1937 } 1938 1939 /* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx 1940 * @irq: interrupt number 1941 * @data: pointer to a network interface private napi device structure 1942 */ 1943 static irqreturn_t ena_intr_msix_io(int irq, void *data) 1944 { 1945 struct ena_napi *ena_napi = data; 1946 1947 ena_napi->first_interrupt = true; 1948 1949 napi_schedule_irqoff(&ena_napi->napi); 1950 1951 return IRQ_HANDLED; 1952 } 1953 1954 /* Reserve a single MSI-X vector for management (admin + aenq). 1955 * plus reserve one vector for each potential io queue. 1956 * the number of potential io queues is the minimum of what the device 1957 * supports and the number of vCPUs. 1958 */ 1959 static int ena_enable_msix(struct ena_adapter *adapter) 1960 { 1961 int msix_vecs, irq_cnt; 1962 1963 if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { 1964 netif_err(adapter, probe, adapter->netdev, 1965 "Error, MSI-X is already enabled\n"); 1966 return -EPERM; 1967 } 1968 1969 /* Reserved the max msix vectors we might need */ 1970 msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues); 1971 netif_dbg(adapter, probe, adapter->netdev, 1972 "trying to enable MSI-X, vectors %d\n", msix_vecs); 1973 1974 irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC, 1975 msix_vecs, PCI_IRQ_MSIX); 1976 1977 if (irq_cnt < 0) { 1978 netif_err(adapter, probe, adapter->netdev, 1979 "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt); 1980 return -ENOSPC; 1981 } 1982 1983 if (irq_cnt != msix_vecs) { 1984 netif_notice(adapter, probe, adapter->netdev, 1985 "enable only %d MSI-X (out of %d), reduce the number of queues\n", 1986 irq_cnt, msix_vecs); 1987 adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; 1988 } 1989 1990 if (ena_init_rx_cpu_rmap(adapter)) 1991 netif_warn(adapter, probe, adapter->netdev, 1992 "Failed to map IRQs to CPUs\n"); 1993 1994 adapter->msix_vecs = irq_cnt; 1995 set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); 1996 1997 return 0; 1998 } 1999 2000 static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) 2001 { 2002 u32 cpu; 2003 2004 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, 2005 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", 2006 pci_name(adapter->pdev)); 2007 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = 2008 ena_intr_msix_mgmnt; 2009 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; 2010 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = 2011 pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX); 2012 cpu = cpumask_first(cpu_online_mask); 2013 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu; 2014 cpumask_set_cpu(cpu, 2015 &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask); 2016 } 2017 2018 static void ena_setup_io_intr(struct ena_adapter *adapter) 2019 { 2020 struct net_device *netdev; 2021 int irq_idx, i, cpu; 2022 int io_queue_count; 2023 2024 netdev = adapter->netdev; 2025 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2026 2027 for (i = 0; i < io_queue_count; i++) { 2028 irq_idx = ENA_IO_IRQ_IDX(i); 2029 cpu = i % num_online_cpus(); 2030 2031 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, 2032 "%s-Tx-Rx-%d", netdev->name, i); 2033 adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io; 2034 adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i]; 2035 adapter->irq_tbl[irq_idx].vector = 2036 pci_irq_vector(adapter->pdev, irq_idx); 2037 adapter->irq_tbl[irq_idx].cpu = cpu; 2038 2039 cpumask_set_cpu(cpu, 2040 &adapter->irq_tbl[irq_idx].affinity_hint_mask); 2041 } 2042 } 2043 2044 static int ena_request_mgmnt_irq(struct ena_adapter *adapter) 2045 { 2046 unsigned long flags = 0; 2047 struct ena_irq *irq; 2048 int rc; 2049 2050 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 2051 rc = request_irq(irq->vector, irq->handler, flags, irq->name, 2052 irq->data); 2053 if (rc) { 2054 netif_err(adapter, probe, adapter->netdev, 2055 "failed to request admin irq\n"); 2056 return rc; 2057 } 2058 2059 netif_dbg(adapter, probe, adapter->netdev, 2060 "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n", 2061 irq->affinity_hint_mask.bits[0], irq->vector); 2062 2063 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); 2064 2065 return rc; 2066 } 2067 2068 static int ena_request_io_irq(struct ena_adapter *adapter) 2069 { 2070 unsigned long flags = 0; 2071 struct ena_irq *irq; 2072 int rc = 0, i, k; 2073 2074 if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { 2075 netif_err(adapter, ifup, adapter->netdev, 2076 "Failed to request I/O IRQ: MSI-X is not enabled\n"); 2077 return -EINVAL; 2078 } 2079 2080 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 2081 irq = &adapter->irq_tbl[i]; 2082 rc = request_irq(irq->vector, irq->handler, flags, irq->name, 2083 irq->data); 2084 if (rc) { 2085 netif_err(adapter, ifup, adapter->netdev, 2086 "Failed to request I/O IRQ. index %d rc %d\n", 2087 i, rc); 2088 goto err; 2089 } 2090 2091 netif_dbg(adapter, ifup, adapter->netdev, 2092 "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n", 2093 i, irq->affinity_hint_mask.bits[0], irq->vector); 2094 2095 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); 2096 } 2097 2098 return rc; 2099 2100 err: 2101 for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) { 2102 irq = &adapter->irq_tbl[k]; 2103 free_irq(irq->vector, irq->data); 2104 } 2105 2106 return rc; 2107 } 2108 2109 static void ena_free_mgmnt_irq(struct ena_adapter *adapter) 2110 { 2111 struct ena_irq *irq; 2112 2113 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 2114 synchronize_irq(irq->vector); 2115 irq_set_affinity_hint(irq->vector, NULL); 2116 free_irq(irq->vector, irq->data); 2117 } 2118 2119 static void ena_free_io_irq(struct ena_adapter *adapter) 2120 { 2121 struct ena_irq *irq; 2122 int i; 2123 2124 #ifdef CONFIG_RFS_ACCEL 2125 if (adapter->msix_vecs >= 1) { 2126 free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); 2127 adapter->netdev->rx_cpu_rmap = NULL; 2128 } 2129 #endif /* CONFIG_RFS_ACCEL */ 2130 2131 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 2132 irq = &adapter->irq_tbl[i]; 2133 irq_set_affinity_hint(irq->vector, NULL); 2134 free_irq(irq->vector, irq->data); 2135 } 2136 } 2137 2138 static void ena_disable_msix(struct ena_adapter *adapter) 2139 { 2140 if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) 2141 pci_free_irq_vectors(adapter->pdev); 2142 } 2143 2144 static void ena_disable_io_intr_sync(struct ena_adapter *adapter) 2145 { 2146 int i; 2147 2148 if (!netif_running(adapter->netdev)) 2149 return; 2150 2151 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) 2152 synchronize_irq(adapter->irq_tbl[i].vector); 2153 } 2154 2155 static void ena_del_napi_in_range(struct ena_adapter *adapter, 2156 int first_index, 2157 int count) 2158 { 2159 int i; 2160 2161 for (i = first_index; i < first_index + count; i++) { 2162 /* Check if napi was initialized before */ 2163 if (!ENA_IS_XDP_INDEX(adapter, i) || 2164 adapter->ena_napi[i].xdp_ring) 2165 netif_napi_del(&adapter->ena_napi[i].napi); 2166 else 2167 WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && 2168 adapter->ena_napi[i].xdp_ring); 2169 } 2170 } 2171 2172 static void ena_init_napi_in_range(struct ena_adapter *adapter, 2173 int first_index, int count) 2174 { 2175 struct ena_napi *napi = {0}; 2176 int i; 2177 2178 for (i = first_index; i < first_index + count; i++) { 2179 napi = &adapter->ena_napi[i]; 2180 2181 netif_napi_add(adapter->netdev, 2182 &adapter->ena_napi[i].napi, 2183 ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll, 2184 ENA_NAPI_BUDGET); 2185 2186 if (!ENA_IS_XDP_INDEX(adapter, i)) { 2187 napi->rx_ring = &adapter->rx_ring[i]; 2188 napi->tx_ring = &adapter->tx_ring[i]; 2189 } else { 2190 napi->xdp_ring = &adapter->tx_ring[i]; 2191 } 2192 napi->qid = i; 2193 } 2194 } 2195 2196 static void ena_napi_disable_in_range(struct ena_adapter *adapter, 2197 int first_index, 2198 int count) 2199 { 2200 int i; 2201 2202 for (i = first_index; i < first_index + count; i++) 2203 napi_disable(&adapter->ena_napi[i].napi); 2204 } 2205 2206 static void ena_napi_enable_in_range(struct ena_adapter *adapter, 2207 int first_index, 2208 int count) 2209 { 2210 int i; 2211 2212 for (i = first_index; i < first_index + count; i++) 2213 napi_enable(&adapter->ena_napi[i].napi); 2214 } 2215 2216 /* Configure the Rx forwarding */ 2217 static int ena_rss_configure(struct ena_adapter *adapter) 2218 { 2219 struct ena_com_dev *ena_dev = adapter->ena_dev; 2220 int rc; 2221 2222 /* In case the RSS table wasn't initialized by probe */ 2223 if (!ena_dev->rss.tbl_log_size) { 2224 rc = ena_rss_init_default(adapter); 2225 if (rc && (rc != -EOPNOTSUPP)) { 2226 netif_err(adapter, ifup, adapter->netdev, 2227 "Failed to init RSS rc: %d\n", rc); 2228 return rc; 2229 } 2230 } 2231 2232 /* Set indirect table */ 2233 rc = ena_com_indirect_table_set(ena_dev); 2234 if (unlikely(rc && rc != -EOPNOTSUPP)) 2235 return rc; 2236 2237 /* Configure hash function (if supported) */ 2238 rc = ena_com_set_hash_function(ena_dev); 2239 if (unlikely(rc && (rc != -EOPNOTSUPP))) 2240 return rc; 2241 2242 /* Configure hash inputs (if supported) */ 2243 rc = ena_com_set_hash_ctrl(ena_dev); 2244 if (unlikely(rc && (rc != -EOPNOTSUPP))) 2245 return rc; 2246 2247 return 0; 2248 } 2249 2250 static int ena_up_complete(struct ena_adapter *adapter) 2251 { 2252 int rc; 2253 2254 rc = ena_rss_configure(adapter); 2255 if (rc) 2256 return rc; 2257 2258 ena_change_mtu(adapter->netdev, adapter->netdev->mtu); 2259 2260 ena_refill_all_rx_bufs(adapter); 2261 2262 /* enable transmits */ 2263 netif_tx_start_all_queues(adapter->netdev); 2264 2265 ena_napi_enable_in_range(adapter, 2266 0, 2267 adapter->xdp_num_queues + adapter->num_io_queues); 2268 2269 return 0; 2270 } 2271 2272 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) 2273 { 2274 struct ena_com_create_io_ctx ctx; 2275 struct ena_com_dev *ena_dev; 2276 struct ena_ring *tx_ring; 2277 u32 msix_vector; 2278 u16 ena_qid; 2279 int rc; 2280 2281 ena_dev = adapter->ena_dev; 2282 2283 tx_ring = &adapter->tx_ring[qid]; 2284 msix_vector = ENA_IO_IRQ_IDX(qid); 2285 ena_qid = ENA_IO_TXQ_IDX(qid); 2286 2287 memset(&ctx, 0x0, sizeof(ctx)); 2288 2289 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 2290 ctx.qid = ena_qid; 2291 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 2292 ctx.msix_vector = msix_vector; 2293 ctx.queue_size = tx_ring->ring_size; 2294 ctx.numa_node = cpu_to_node(tx_ring->cpu); 2295 2296 rc = ena_com_create_io_queue(ena_dev, &ctx); 2297 if (rc) { 2298 netif_err(adapter, ifup, adapter->netdev, 2299 "Failed to create I/O TX queue num %d rc: %d\n", 2300 qid, rc); 2301 return rc; 2302 } 2303 2304 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 2305 &tx_ring->ena_com_io_sq, 2306 &tx_ring->ena_com_io_cq); 2307 if (rc) { 2308 netif_err(adapter, ifup, adapter->netdev, 2309 "Failed to get TX queue handlers. TX queue num %d rc: %d\n", 2310 qid, rc); 2311 ena_com_destroy_io_queue(ena_dev, ena_qid); 2312 return rc; 2313 } 2314 2315 ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); 2316 return rc; 2317 } 2318 2319 static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, 2320 int first_index, int count) 2321 { 2322 struct ena_com_dev *ena_dev = adapter->ena_dev; 2323 int rc, i; 2324 2325 for (i = first_index; i < first_index + count; i++) { 2326 rc = ena_create_io_tx_queue(adapter, i); 2327 if (rc) 2328 goto create_err; 2329 } 2330 2331 return 0; 2332 2333 create_err: 2334 while (i-- > first_index) 2335 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); 2336 2337 return rc; 2338 } 2339 2340 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) 2341 { 2342 struct ena_com_dev *ena_dev; 2343 struct ena_com_create_io_ctx ctx; 2344 struct ena_ring *rx_ring; 2345 u32 msix_vector; 2346 u16 ena_qid; 2347 int rc; 2348 2349 ena_dev = adapter->ena_dev; 2350 2351 rx_ring = &adapter->rx_ring[qid]; 2352 msix_vector = ENA_IO_IRQ_IDX(qid); 2353 ena_qid = ENA_IO_RXQ_IDX(qid); 2354 2355 memset(&ctx, 0x0, sizeof(ctx)); 2356 2357 ctx.qid = ena_qid; 2358 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 2359 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2360 ctx.msix_vector = msix_vector; 2361 ctx.queue_size = rx_ring->ring_size; 2362 ctx.numa_node = cpu_to_node(rx_ring->cpu); 2363 2364 rc = ena_com_create_io_queue(ena_dev, &ctx); 2365 if (rc) { 2366 netif_err(adapter, ifup, adapter->netdev, 2367 "Failed to create I/O RX queue num %d rc: %d\n", 2368 qid, rc); 2369 return rc; 2370 } 2371 2372 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 2373 &rx_ring->ena_com_io_sq, 2374 &rx_ring->ena_com_io_cq); 2375 if (rc) { 2376 netif_err(adapter, ifup, adapter->netdev, 2377 "Failed to get RX queue handlers. RX queue num %d rc: %d\n", 2378 qid, rc); 2379 goto err; 2380 } 2381 2382 ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); 2383 2384 return rc; 2385 err: 2386 ena_com_destroy_io_queue(ena_dev, ena_qid); 2387 return rc; 2388 } 2389 2390 static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) 2391 { 2392 struct ena_com_dev *ena_dev = adapter->ena_dev; 2393 int rc, i; 2394 2395 for (i = 0; i < adapter->num_io_queues; i++) { 2396 rc = ena_create_io_rx_queue(adapter, i); 2397 if (rc) 2398 goto create_err; 2399 INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work); 2400 } 2401 2402 return 0; 2403 2404 create_err: 2405 while (i--) { 2406 cancel_work_sync(&adapter->ena_napi[i].dim.work); 2407 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); 2408 } 2409 2410 return rc; 2411 } 2412 2413 static void set_io_rings_size(struct ena_adapter *adapter, 2414 int new_tx_size, 2415 int new_rx_size) 2416 { 2417 int i; 2418 2419 for (i = 0; i < adapter->num_io_queues; i++) { 2420 adapter->tx_ring[i].ring_size = new_tx_size; 2421 adapter->rx_ring[i].ring_size = new_rx_size; 2422 } 2423 } 2424 2425 /* This function allows queue allocation to backoff when the system is 2426 * low on memory. If there is not enough memory to allocate io queues 2427 * the driver will try to allocate smaller queues. 2428 * 2429 * The backoff algorithm is as follows: 2430 * 1. Try to allocate TX and RX and if successful. 2431 * 1.1. return success 2432 * 2433 * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). 2434 * 2435 * 3. If TX or RX is smaller than 256 2436 * 3.1. return failure. 2437 * 4. else 2438 * 4.1. go back to 1. 2439 */ 2440 static int create_queues_with_size_backoff(struct ena_adapter *adapter) 2441 { 2442 int rc, cur_rx_ring_size, cur_tx_ring_size; 2443 int new_rx_ring_size, new_tx_ring_size; 2444 2445 /* current queue sizes might be set to smaller than the requested 2446 * ones due to past queue allocation failures. 2447 */ 2448 set_io_rings_size(adapter, adapter->requested_tx_ring_size, 2449 adapter->requested_rx_ring_size); 2450 2451 while (1) { 2452 if (ena_xdp_present(adapter)) { 2453 rc = ena_setup_and_create_all_xdp_queues(adapter); 2454 2455 if (rc) 2456 goto err_setup_tx; 2457 } 2458 rc = ena_setup_tx_resources_in_range(adapter, 2459 0, 2460 adapter->num_io_queues); 2461 if (rc) 2462 goto err_setup_tx; 2463 2464 rc = ena_create_io_tx_queues_in_range(adapter, 2465 0, 2466 adapter->num_io_queues); 2467 if (rc) 2468 goto err_create_tx_queues; 2469 2470 rc = ena_setup_all_rx_resources(adapter); 2471 if (rc) 2472 goto err_setup_rx; 2473 2474 rc = ena_create_all_io_rx_queues(adapter); 2475 if (rc) 2476 goto err_create_rx_queues; 2477 2478 return 0; 2479 2480 err_create_rx_queues: 2481 ena_free_all_io_rx_resources(adapter); 2482 err_setup_rx: 2483 ena_destroy_all_tx_queues(adapter); 2484 err_create_tx_queues: 2485 ena_free_all_io_tx_resources(adapter); 2486 err_setup_tx: 2487 if (rc != -ENOMEM) { 2488 netif_err(adapter, ifup, adapter->netdev, 2489 "Queue creation failed with error code %d\n", 2490 rc); 2491 return rc; 2492 } 2493 2494 cur_tx_ring_size = adapter->tx_ring[0].ring_size; 2495 cur_rx_ring_size = adapter->rx_ring[0].ring_size; 2496 2497 netif_err(adapter, ifup, adapter->netdev, 2498 "Not enough memory to create queues with sizes TX=%d, RX=%d\n", 2499 cur_tx_ring_size, cur_rx_ring_size); 2500 2501 new_tx_ring_size = cur_tx_ring_size; 2502 new_rx_ring_size = cur_rx_ring_size; 2503 2504 /* Decrease the size of the larger queue, or 2505 * decrease both if they are the same size. 2506 */ 2507 if (cur_rx_ring_size <= cur_tx_ring_size) 2508 new_tx_ring_size = cur_tx_ring_size / 2; 2509 if (cur_rx_ring_size >= cur_tx_ring_size) 2510 new_rx_ring_size = cur_rx_ring_size / 2; 2511 2512 if (new_tx_ring_size < ENA_MIN_RING_SIZE || 2513 new_rx_ring_size < ENA_MIN_RING_SIZE) { 2514 netif_err(adapter, ifup, adapter->netdev, 2515 "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", 2516 ENA_MIN_RING_SIZE); 2517 return rc; 2518 } 2519 2520 netif_err(adapter, ifup, adapter->netdev, 2521 "Retrying queue creation with sizes TX=%d, RX=%d\n", 2522 new_tx_ring_size, 2523 new_rx_ring_size); 2524 2525 set_io_rings_size(adapter, new_tx_ring_size, 2526 new_rx_ring_size); 2527 } 2528 } 2529 2530 static int ena_up(struct ena_adapter *adapter) 2531 { 2532 int io_queue_count, rc, i; 2533 2534 netdev_dbg(adapter->netdev, "%s\n", __func__); 2535 2536 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2537 ena_setup_io_intr(adapter); 2538 2539 /* napi poll functions should be initialized before running 2540 * request_irq(), to handle a rare condition where there is a pending 2541 * interrupt, causing the ISR to fire immediately while the poll 2542 * function wasn't set yet, causing a null dereference 2543 */ 2544 ena_init_napi_in_range(adapter, 0, io_queue_count); 2545 2546 rc = ena_request_io_irq(adapter); 2547 if (rc) 2548 goto err_req_irq; 2549 2550 rc = create_queues_with_size_backoff(adapter); 2551 if (rc) 2552 goto err_create_queues_with_backoff; 2553 2554 rc = ena_up_complete(adapter); 2555 if (rc) 2556 goto err_up; 2557 2558 if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) 2559 netif_carrier_on(adapter->netdev); 2560 2561 u64_stats_update_begin(&adapter->syncp); 2562 adapter->dev_stats.interface_up++; 2563 u64_stats_update_end(&adapter->syncp); 2564 2565 set_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2566 2567 /* Enable completion queues interrupt */ 2568 for (i = 0; i < adapter->num_io_queues; i++) 2569 ena_unmask_interrupt(&adapter->tx_ring[i], 2570 &adapter->rx_ring[i]); 2571 2572 /* schedule napi in case we had pending packets 2573 * from the last time we disable napi 2574 */ 2575 for (i = 0; i < io_queue_count; i++) 2576 napi_schedule(&adapter->ena_napi[i].napi); 2577 2578 return rc; 2579 2580 err_up: 2581 ena_destroy_all_tx_queues(adapter); 2582 ena_free_all_io_tx_resources(adapter); 2583 ena_destroy_all_rx_queues(adapter); 2584 ena_free_all_io_rx_resources(adapter); 2585 err_create_queues_with_backoff: 2586 ena_free_io_irq(adapter); 2587 err_req_irq: 2588 ena_del_napi_in_range(adapter, 0, io_queue_count); 2589 2590 return rc; 2591 } 2592 2593 static void ena_down(struct ena_adapter *adapter) 2594 { 2595 int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2596 2597 netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); 2598 2599 clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2600 2601 u64_stats_update_begin(&adapter->syncp); 2602 adapter->dev_stats.interface_down++; 2603 u64_stats_update_end(&adapter->syncp); 2604 2605 netif_carrier_off(adapter->netdev); 2606 netif_tx_disable(adapter->netdev); 2607 2608 /* After this point the napi handler won't enable the tx queue */ 2609 ena_napi_disable_in_range(adapter, 0, io_queue_count); 2610 2611 /* After destroy the queue there won't be any new interrupts */ 2612 2613 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) { 2614 int rc; 2615 2616 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 2617 if (rc) 2618 dev_err(&adapter->pdev->dev, "Device reset failed\n"); 2619 /* stop submitting admin commands on a device that was reset */ 2620 ena_com_set_admin_running_state(adapter->ena_dev, false); 2621 } 2622 2623 ena_destroy_all_io_queues(adapter); 2624 2625 ena_disable_io_intr_sync(adapter); 2626 ena_free_io_irq(adapter); 2627 ena_del_napi_in_range(adapter, 0, io_queue_count); 2628 2629 ena_free_all_tx_bufs(adapter); 2630 ena_free_all_rx_bufs(adapter); 2631 ena_free_all_io_tx_resources(adapter); 2632 ena_free_all_io_rx_resources(adapter); 2633 } 2634 2635 /* ena_open - Called when a network interface is made active 2636 * @netdev: network interface device structure 2637 * 2638 * Returns 0 on success, negative value on failure 2639 * 2640 * The open entry point is called when a network interface is made 2641 * active by the system (IFF_UP). At this point all resources needed 2642 * for transmit and receive operations are allocated, the interrupt 2643 * handler is registered with the OS, the watchdog timer is started, 2644 * and the stack is notified that the interface is ready. 2645 */ 2646 static int ena_open(struct net_device *netdev) 2647 { 2648 struct ena_adapter *adapter = netdev_priv(netdev); 2649 int rc; 2650 2651 /* Notify the stack of the actual queue counts. */ 2652 rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues); 2653 if (rc) { 2654 netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); 2655 return rc; 2656 } 2657 2658 rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues); 2659 if (rc) { 2660 netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); 2661 return rc; 2662 } 2663 2664 rc = ena_up(adapter); 2665 if (rc) 2666 return rc; 2667 2668 return rc; 2669 } 2670 2671 /* ena_close - Disables a network interface 2672 * @netdev: network interface device structure 2673 * 2674 * Returns 0, this is not allowed to fail 2675 * 2676 * The close entry point is called when an interface is de-activated 2677 * by the OS. The hardware is still under the drivers control, but 2678 * needs to be disabled. A global MAC reset is issued to stop the 2679 * hardware, and all transmit and receive resources are freed. 2680 */ 2681 static int ena_close(struct net_device *netdev) 2682 { 2683 struct ena_adapter *adapter = netdev_priv(netdev); 2684 2685 netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); 2686 2687 if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) 2688 return 0; 2689 2690 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 2691 ena_down(adapter); 2692 2693 /* Check for device status and issue reset if needed*/ 2694 check_for_admin_com_state(adapter); 2695 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 2696 netif_err(adapter, ifdown, adapter->netdev, 2697 "Destroy failure, restarting device\n"); 2698 ena_dump_stats_to_dmesg(adapter); 2699 /* rtnl lock already obtained in dev_ioctl() layer */ 2700 ena_destroy_device(adapter, false); 2701 ena_restore_device(adapter); 2702 } 2703 2704 return 0; 2705 } 2706 2707 int ena_update_queue_sizes(struct ena_adapter *adapter, 2708 u32 new_tx_size, 2709 u32 new_rx_size) 2710 { 2711 bool dev_was_up; 2712 2713 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2714 ena_close(adapter->netdev); 2715 adapter->requested_tx_ring_size = new_tx_size; 2716 adapter->requested_rx_ring_size = new_rx_size; 2717 ena_init_io_rings(adapter, 2718 0, 2719 adapter->xdp_num_queues + 2720 adapter->num_io_queues); 2721 return dev_was_up ? ena_up(adapter) : 0; 2722 } 2723 2724 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) 2725 { 2726 struct ena_com_dev *ena_dev = adapter->ena_dev; 2727 int prev_channel_count; 2728 bool dev_was_up; 2729 2730 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2731 ena_close(adapter->netdev); 2732 prev_channel_count = adapter->num_io_queues; 2733 adapter->num_io_queues = new_channel_count; 2734 if (ena_xdp_present(adapter) && 2735 ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) { 2736 adapter->xdp_first_ring = new_channel_count; 2737 adapter->xdp_num_queues = new_channel_count; 2738 if (prev_channel_count > new_channel_count) 2739 ena_xdp_exchange_program_rx_in_range(adapter, 2740 NULL, 2741 new_channel_count, 2742 prev_channel_count); 2743 else 2744 ena_xdp_exchange_program_rx_in_range(adapter, 2745 adapter->xdp_bpf_prog, 2746 prev_channel_count, 2747 new_channel_count); 2748 } 2749 2750 /* We need to destroy the rss table so that the indirection 2751 * table will be reinitialized by ena_up() 2752 */ 2753 ena_com_rss_destroy(ena_dev); 2754 ena_init_io_rings(adapter, 2755 0, 2756 adapter->xdp_num_queues + 2757 adapter->num_io_queues); 2758 return dev_was_up ? ena_open(adapter->netdev) : 0; 2759 } 2760 2761 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) 2762 { 2763 u32 mss = skb_shinfo(skb)->gso_size; 2764 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 2765 u8 l4_protocol = 0; 2766 2767 if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) { 2768 ena_tx_ctx->l4_csum_enable = 1; 2769 if (mss) { 2770 ena_tx_ctx->tso_enable = 1; 2771 ena_meta->l4_hdr_len = tcp_hdr(skb)->doff; 2772 ena_tx_ctx->l4_csum_partial = 0; 2773 } else { 2774 ena_tx_ctx->tso_enable = 0; 2775 ena_meta->l4_hdr_len = 0; 2776 ena_tx_ctx->l4_csum_partial = 1; 2777 } 2778 2779 switch (ip_hdr(skb)->version) { 2780 case IPVERSION: 2781 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 2782 if (ip_hdr(skb)->frag_off & htons(IP_DF)) 2783 ena_tx_ctx->df = 1; 2784 if (mss) 2785 ena_tx_ctx->l3_csum_enable = 1; 2786 l4_protocol = ip_hdr(skb)->protocol; 2787 break; 2788 case 6: 2789 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 2790 l4_protocol = ipv6_hdr(skb)->nexthdr; 2791 break; 2792 default: 2793 break; 2794 } 2795 2796 if (l4_protocol == IPPROTO_TCP) 2797 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 2798 else 2799 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 2800 2801 ena_meta->mss = mss; 2802 ena_meta->l3_hdr_len = skb_network_header_len(skb); 2803 ena_meta->l3_hdr_offset = skb_network_offset(skb); 2804 ena_tx_ctx->meta_valid = 1; 2805 2806 } else { 2807 ena_tx_ctx->meta_valid = 0; 2808 } 2809 } 2810 2811 static int ena_check_and_linearize_skb(struct ena_ring *tx_ring, 2812 struct sk_buff *skb) 2813 { 2814 int num_frags, header_len, rc; 2815 2816 num_frags = skb_shinfo(skb)->nr_frags; 2817 header_len = skb_headlen(skb); 2818 2819 if (num_frags < tx_ring->sgl_size) 2820 return 0; 2821 2822 if ((num_frags == tx_ring->sgl_size) && 2823 (header_len < tx_ring->tx_max_header_size)) 2824 return 0; 2825 2826 u64_stats_update_begin(&tx_ring->syncp); 2827 tx_ring->tx_stats.linearize++; 2828 u64_stats_update_end(&tx_ring->syncp); 2829 2830 rc = skb_linearize(skb); 2831 if (unlikely(rc)) { 2832 u64_stats_update_begin(&tx_ring->syncp); 2833 tx_ring->tx_stats.linearize_failed++; 2834 u64_stats_update_end(&tx_ring->syncp); 2835 } 2836 2837 return rc; 2838 } 2839 2840 static int ena_tx_map_skb(struct ena_ring *tx_ring, 2841 struct ena_tx_buffer *tx_info, 2842 struct sk_buff *skb, 2843 void **push_hdr, 2844 u16 *header_len) 2845 { 2846 struct ena_adapter *adapter = tx_ring->adapter; 2847 struct ena_com_buf *ena_buf; 2848 dma_addr_t dma; 2849 u32 skb_head_len, frag_len, last_frag; 2850 u16 push_len = 0; 2851 u16 delta = 0; 2852 int i = 0; 2853 2854 skb_head_len = skb_headlen(skb); 2855 tx_info->skb = skb; 2856 ena_buf = tx_info->bufs; 2857 2858 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2859 /* When the device is LLQ mode, the driver will copy 2860 * the header into the device memory space. 2861 * the ena_com layer assume the header is in a linear 2862 * memory space. 2863 * This assumption might be wrong since part of the header 2864 * can be in the fragmented buffers. 2865 * Use skb_header_pointer to make sure the header is in a 2866 * linear memory space. 2867 */ 2868 2869 push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size); 2870 *push_hdr = skb_header_pointer(skb, 0, push_len, 2871 tx_ring->push_buf_intermediate_buf); 2872 *header_len = push_len; 2873 if (unlikely(skb->data != *push_hdr)) { 2874 u64_stats_update_begin(&tx_ring->syncp); 2875 tx_ring->tx_stats.llq_buffer_copy++; 2876 u64_stats_update_end(&tx_ring->syncp); 2877 2878 delta = push_len - skb_head_len; 2879 } 2880 } else { 2881 *push_hdr = NULL; 2882 *header_len = min_t(u32, skb_head_len, 2883 tx_ring->tx_max_header_size); 2884 } 2885 2886 netif_dbg(adapter, tx_queued, adapter->netdev, 2887 "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, 2888 *push_hdr, push_len); 2889 2890 if (skb_head_len > push_len) { 2891 dma = dma_map_single(tx_ring->dev, skb->data + push_len, 2892 skb_head_len - push_len, DMA_TO_DEVICE); 2893 if (unlikely(dma_mapping_error(tx_ring->dev, dma))) 2894 goto error_report_dma_error; 2895 2896 ena_buf->paddr = dma; 2897 ena_buf->len = skb_head_len - push_len; 2898 2899 ena_buf++; 2900 tx_info->num_of_bufs++; 2901 tx_info->map_linear_data = 1; 2902 } else { 2903 tx_info->map_linear_data = 0; 2904 } 2905 2906 last_frag = skb_shinfo(skb)->nr_frags; 2907 2908 for (i = 0; i < last_frag; i++) { 2909 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2910 2911 frag_len = skb_frag_size(frag); 2912 2913 if (unlikely(delta >= frag_len)) { 2914 delta -= frag_len; 2915 continue; 2916 } 2917 2918 dma = skb_frag_dma_map(tx_ring->dev, frag, delta, 2919 frag_len - delta, DMA_TO_DEVICE); 2920 if (unlikely(dma_mapping_error(tx_ring->dev, dma))) 2921 goto error_report_dma_error; 2922 2923 ena_buf->paddr = dma; 2924 ena_buf->len = frag_len - delta; 2925 ena_buf++; 2926 tx_info->num_of_bufs++; 2927 delta = 0; 2928 } 2929 2930 return 0; 2931 2932 error_report_dma_error: 2933 u64_stats_update_begin(&tx_ring->syncp); 2934 tx_ring->tx_stats.dma_mapping_err++; 2935 u64_stats_update_end(&tx_ring->syncp); 2936 netdev_warn(adapter->netdev, "failed to map skb\n"); 2937 2938 tx_info->skb = NULL; 2939 2940 tx_info->num_of_bufs += i; 2941 ena_unmap_tx_buff(tx_ring, tx_info); 2942 2943 return -EINVAL; 2944 } 2945 2946 /* Called with netif_tx_lock. */ 2947 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) 2948 { 2949 struct ena_adapter *adapter = netdev_priv(dev); 2950 struct ena_tx_buffer *tx_info; 2951 struct ena_com_tx_ctx ena_tx_ctx; 2952 struct ena_ring *tx_ring; 2953 struct netdev_queue *txq; 2954 void *push_hdr; 2955 u16 next_to_use, req_id, header_len; 2956 int qid, rc; 2957 2958 netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); 2959 /* Determine which tx ring we will be placed on */ 2960 qid = skb_get_queue_mapping(skb); 2961 tx_ring = &adapter->tx_ring[qid]; 2962 txq = netdev_get_tx_queue(dev, qid); 2963 2964 rc = ena_check_and_linearize_skb(tx_ring, skb); 2965 if (unlikely(rc)) 2966 goto error_drop_packet; 2967 2968 skb_tx_timestamp(skb); 2969 2970 next_to_use = tx_ring->next_to_use; 2971 req_id = tx_ring->free_ids[next_to_use]; 2972 tx_info = &tx_ring->tx_buffer_info[req_id]; 2973 tx_info->num_of_bufs = 0; 2974 2975 WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); 2976 2977 rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len); 2978 if (unlikely(rc)) 2979 goto error_drop_packet; 2980 2981 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); 2982 ena_tx_ctx.ena_bufs = tx_info->bufs; 2983 ena_tx_ctx.push_header = push_hdr; 2984 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2985 ena_tx_ctx.req_id = req_id; 2986 ena_tx_ctx.header_len = header_len; 2987 2988 /* set flags and meta data */ 2989 ena_tx_csum(&ena_tx_ctx, skb); 2990 2991 rc = ena_xmit_common(dev, 2992 tx_ring, 2993 tx_info, 2994 &ena_tx_ctx, 2995 next_to_use, 2996 skb->len); 2997 if (rc) 2998 goto error_unmap_dma; 2999 3000 netdev_tx_sent_queue(txq, skb->len); 3001 3002 /* stop the queue when no more space available, the packet can have up 3003 * to sgl_size + 2. one for the meta descriptor and one for header 3004 * (if the header is larger than tx_max_header_size). 3005 */ 3006 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3007 tx_ring->sgl_size + 2))) { 3008 netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", 3009 __func__, qid); 3010 3011 netif_tx_stop_queue(txq); 3012 u64_stats_update_begin(&tx_ring->syncp); 3013 tx_ring->tx_stats.queue_stop++; 3014 u64_stats_update_end(&tx_ring->syncp); 3015 3016 /* There is a rare condition where this function decide to 3017 * stop the queue but meanwhile clean_tx_irq updates 3018 * next_to_completion and terminates. 3019 * The queue will remain stopped forever. 3020 * To solve this issue add a mb() to make sure that 3021 * netif_tx_stop_queue() write is vissible before checking if 3022 * there is additional space in the queue. 3023 */ 3024 smp_mb(); 3025 3026 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3027 ENA_TX_WAKEUP_THRESH)) { 3028 netif_tx_wake_queue(txq); 3029 u64_stats_update_begin(&tx_ring->syncp); 3030 tx_ring->tx_stats.queue_wakeup++; 3031 u64_stats_update_end(&tx_ring->syncp); 3032 } 3033 } 3034 3035 if (netif_xmit_stopped(txq) || !netdev_xmit_more()) { 3036 /* trigger the dma engine. ena_com_write_sq_doorbell() 3037 * has a mb 3038 */ 3039 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3040 u64_stats_update_begin(&tx_ring->syncp); 3041 tx_ring->tx_stats.doorbells++; 3042 u64_stats_update_end(&tx_ring->syncp); 3043 } 3044 3045 return NETDEV_TX_OK; 3046 3047 error_unmap_dma: 3048 ena_unmap_tx_buff(tx_ring, tx_info); 3049 tx_info->skb = NULL; 3050 3051 error_drop_packet: 3052 dev_kfree_skb(skb); 3053 return NETDEV_TX_OK; 3054 } 3055 3056 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, 3057 struct net_device *sb_dev) 3058 { 3059 u16 qid; 3060 /* we suspect that this is good for in--kernel network services that 3061 * want to loop incoming skb rx to tx in normal user generated traffic, 3062 * most probably we will not get to this 3063 */ 3064 if (skb_rx_queue_recorded(skb)) 3065 qid = skb_get_rx_queue(skb); 3066 else 3067 qid = netdev_pick_tx(dev, skb, NULL); 3068 3069 return qid; 3070 } 3071 3072 static void ena_config_host_info(struct ena_com_dev *ena_dev, 3073 struct pci_dev *pdev) 3074 { 3075 struct ena_admin_host_info *host_info; 3076 int rc; 3077 3078 /* Allocate only the host info */ 3079 rc = ena_com_allocate_host_info(ena_dev); 3080 if (rc) { 3081 pr_err("Cannot allocate host info\n"); 3082 return; 3083 } 3084 3085 host_info = ena_dev->host_attr.host_info; 3086 3087 host_info->bdf = (pdev->bus->number << 8) | pdev->devfn; 3088 host_info->os_type = ENA_ADMIN_OS_LINUX; 3089 host_info->kernel_ver = LINUX_VERSION_CODE; 3090 strlcpy(host_info->kernel_ver_str, utsname()->version, 3091 sizeof(host_info->kernel_ver_str) - 1); 3092 host_info->os_dist = 0; 3093 strncpy(host_info->os_dist_str, utsname()->release, 3094 sizeof(host_info->os_dist_str) - 1); 3095 host_info->driver_version = 3096 (DRV_MODULE_VER_MAJOR) | 3097 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 3098 (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) | 3099 ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT); 3100 host_info->num_cpus = num_online_cpus(); 3101 3102 host_info->driver_supported_features = 3103 ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK; 3104 3105 rc = ena_com_set_host_attributes(ena_dev); 3106 if (rc) { 3107 if (rc == -EOPNOTSUPP) 3108 pr_warn("Cannot set host attributes\n"); 3109 else 3110 pr_err("Cannot set host attributes\n"); 3111 3112 goto err; 3113 } 3114 3115 return; 3116 3117 err: 3118 ena_com_delete_host_info(ena_dev); 3119 } 3120 3121 static void ena_config_debug_area(struct ena_adapter *adapter) 3122 { 3123 u32 debug_area_size; 3124 int rc, ss_count; 3125 3126 ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS); 3127 if (ss_count <= 0) { 3128 netif_err(adapter, drv, adapter->netdev, 3129 "SS count is negative\n"); 3130 return; 3131 } 3132 3133 /* allocate 32 bytes for each string and 64bit for the value */ 3134 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 3135 3136 rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size); 3137 if (rc) { 3138 pr_err("Cannot allocate debug area\n"); 3139 return; 3140 } 3141 3142 rc = ena_com_set_host_attributes(adapter->ena_dev); 3143 if (rc) { 3144 if (rc == -EOPNOTSUPP) 3145 netif_warn(adapter, drv, adapter->netdev, 3146 "Cannot set host attributes\n"); 3147 else 3148 netif_err(adapter, drv, adapter->netdev, 3149 "Cannot set host attributes\n"); 3150 goto err; 3151 } 3152 3153 return; 3154 err: 3155 ena_com_delete_debug_area(adapter->ena_dev); 3156 } 3157 3158 static void ena_get_stats64(struct net_device *netdev, 3159 struct rtnl_link_stats64 *stats) 3160 { 3161 struct ena_adapter *adapter = netdev_priv(netdev); 3162 struct ena_ring *rx_ring, *tx_ring; 3163 unsigned int start; 3164 u64 rx_drops; 3165 int i; 3166 3167 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3168 return; 3169 3170 for (i = 0; i < adapter->num_io_queues; i++) { 3171 u64 bytes, packets; 3172 3173 tx_ring = &adapter->tx_ring[i]; 3174 3175 do { 3176 start = u64_stats_fetch_begin_irq(&tx_ring->syncp); 3177 packets = tx_ring->tx_stats.cnt; 3178 bytes = tx_ring->tx_stats.bytes; 3179 } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); 3180 3181 stats->tx_packets += packets; 3182 stats->tx_bytes += bytes; 3183 3184 rx_ring = &adapter->rx_ring[i]; 3185 3186 do { 3187 start = u64_stats_fetch_begin_irq(&rx_ring->syncp); 3188 packets = rx_ring->rx_stats.cnt; 3189 bytes = rx_ring->rx_stats.bytes; 3190 } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); 3191 3192 stats->rx_packets += packets; 3193 stats->rx_bytes += bytes; 3194 } 3195 3196 do { 3197 start = u64_stats_fetch_begin_irq(&adapter->syncp); 3198 rx_drops = adapter->dev_stats.rx_drops; 3199 } while (u64_stats_fetch_retry_irq(&adapter->syncp, start)); 3200 3201 stats->rx_dropped = rx_drops; 3202 3203 stats->multicast = 0; 3204 stats->collisions = 0; 3205 3206 stats->rx_length_errors = 0; 3207 stats->rx_crc_errors = 0; 3208 stats->rx_frame_errors = 0; 3209 stats->rx_fifo_errors = 0; 3210 stats->rx_missed_errors = 0; 3211 stats->tx_window_errors = 0; 3212 3213 stats->rx_errors = 0; 3214 stats->tx_errors = 0; 3215 } 3216 3217 static const struct net_device_ops ena_netdev_ops = { 3218 .ndo_open = ena_open, 3219 .ndo_stop = ena_close, 3220 .ndo_start_xmit = ena_start_xmit, 3221 .ndo_select_queue = ena_select_queue, 3222 .ndo_get_stats64 = ena_get_stats64, 3223 .ndo_tx_timeout = ena_tx_timeout, 3224 .ndo_change_mtu = ena_change_mtu, 3225 .ndo_set_mac_address = NULL, 3226 .ndo_validate_addr = eth_validate_addr, 3227 .ndo_bpf = ena_xdp, 3228 }; 3229 3230 static int ena_device_validate_params(struct ena_adapter *adapter, 3231 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3232 { 3233 struct net_device *netdev = adapter->netdev; 3234 int rc; 3235 3236 rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr, 3237 adapter->mac_addr); 3238 if (!rc) { 3239 netif_err(adapter, drv, netdev, 3240 "Error, mac address are different\n"); 3241 return -EINVAL; 3242 } 3243 3244 if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { 3245 netif_err(adapter, drv, netdev, 3246 "Error, device max mtu is smaller than netdev MTU\n"); 3247 return -EINVAL; 3248 } 3249 3250 return 0; 3251 } 3252 3253 static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, 3254 struct ena_com_dev_get_features_ctx *get_feat_ctx, 3255 bool *wd_state) 3256 { 3257 struct device *dev = &pdev->dev; 3258 bool readless_supported; 3259 u32 aenq_groups; 3260 int dma_width; 3261 int rc; 3262 3263 rc = ena_com_mmio_reg_read_request_init(ena_dev); 3264 if (rc) { 3265 dev_err(dev, "failed to init mmio read less\n"); 3266 return rc; 3267 } 3268 3269 /* The PCIe configuration space revision id indicate if mmio reg 3270 * read is disabled 3271 */ 3272 readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); 3273 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 3274 3275 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 3276 if (rc) { 3277 dev_err(dev, "Can not reset device\n"); 3278 goto err_mmio_read_less; 3279 } 3280 3281 rc = ena_com_validate_version(ena_dev); 3282 if (rc) { 3283 dev_err(dev, "device version is too low\n"); 3284 goto err_mmio_read_less; 3285 } 3286 3287 dma_width = ena_com_get_dma_width(ena_dev); 3288 if (dma_width < 0) { 3289 dev_err(dev, "Invalid dma width value %d", dma_width); 3290 rc = dma_width; 3291 goto err_mmio_read_less; 3292 } 3293 3294 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); 3295 if (rc) { 3296 dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); 3297 goto err_mmio_read_less; 3298 } 3299 3300 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); 3301 if (rc) { 3302 dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", 3303 rc); 3304 goto err_mmio_read_less; 3305 } 3306 3307 /* ENA admin level init */ 3308 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 3309 if (rc) { 3310 dev_err(dev, 3311 "Can not initialize ena admin queue with device\n"); 3312 goto err_mmio_read_less; 3313 } 3314 3315 /* To enable the msix interrupts the driver needs to know the number 3316 * of queues. So the driver uses polling mode to retrieve this 3317 * information 3318 */ 3319 ena_com_set_admin_polling_mode(ena_dev, true); 3320 3321 ena_config_host_info(ena_dev, pdev); 3322 3323 /* Get Device Attributes*/ 3324 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 3325 if (rc) { 3326 dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc); 3327 goto err_admin_init; 3328 } 3329 3330 /* Try to turn all the available aenq groups */ 3331 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 3332 BIT(ENA_ADMIN_FATAL_ERROR) | 3333 BIT(ENA_ADMIN_WARNING) | 3334 BIT(ENA_ADMIN_NOTIFICATION) | 3335 BIT(ENA_ADMIN_KEEP_ALIVE); 3336 3337 aenq_groups &= get_feat_ctx->aenq.supported_groups; 3338 3339 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 3340 if (rc) { 3341 dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc); 3342 goto err_admin_init; 3343 } 3344 3345 *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 3346 3347 return 0; 3348 3349 err_admin_init: 3350 ena_com_delete_host_info(ena_dev); 3351 ena_com_admin_destroy(ena_dev); 3352 err_mmio_read_less: 3353 ena_com_mmio_reg_read_request_destroy(ena_dev); 3354 3355 return rc; 3356 } 3357 3358 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) 3359 { 3360 struct ena_com_dev *ena_dev = adapter->ena_dev; 3361 struct device *dev = &adapter->pdev->dev; 3362 int rc; 3363 3364 rc = ena_enable_msix(adapter); 3365 if (rc) { 3366 dev_err(dev, "Can not reserve msix vectors\n"); 3367 return rc; 3368 } 3369 3370 ena_setup_mgmnt_intr(adapter); 3371 3372 rc = ena_request_mgmnt_irq(adapter); 3373 if (rc) { 3374 dev_err(dev, "Can not setup management interrupts\n"); 3375 goto err_disable_msix; 3376 } 3377 3378 ena_com_set_admin_polling_mode(ena_dev, false); 3379 3380 ena_com_admin_aenq_enable(ena_dev); 3381 3382 return 0; 3383 3384 err_disable_msix: 3385 ena_disable_msix(adapter); 3386 3387 return rc; 3388 } 3389 3390 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) 3391 { 3392 struct net_device *netdev = adapter->netdev; 3393 struct ena_com_dev *ena_dev = adapter->ena_dev; 3394 bool dev_up; 3395 3396 if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) 3397 return; 3398 3399 netif_carrier_off(netdev); 3400 3401 del_timer_sync(&adapter->timer_service); 3402 3403 dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 3404 adapter->dev_up_before_reset = dev_up; 3405 if (!graceful) 3406 ena_com_set_admin_running_state(ena_dev, false); 3407 3408 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3409 ena_down(adapter); 3410 3411 /* Stop the device from sending AENQ events (in case reset flag is set 3412 * and device is up, ena_down() already reset the device. 3413 */ 3414 if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) 3415 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 3416 3417 ena_free_mgmnt_irq(adapter); 3418 3419 ena_disable_msix(adapter); 3420 3421 ena_com_abort_admin_commands(ena_dev); 3422 3423 ena_com_wait_for_abort_completion(ena_dev); 3424 3425 ena_com_admin_destroy(ena_dev); 3426 3427 ena_com_mmio_reg_read_request_destroy(ena_dev); 3428 3429 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3430 3431 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3432 clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 3433 } 3434 3435 static int ena_restore_device(struct ena_adapter *adapter) 3436 { 3437 struct ena_com_dev_get_features_ctx get_feat_ctx; 3438 struct ena_com_dev *ena_dev = adapter->ena_dev; 3439 struct pci_dev *pdev = adapter->pdev; 3440 bool wd_state; 3441 int rc; 3442 3443 set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); 3444 rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); 3445 if (rc) { 3446 dev_err(&pdev->dev, "Can not initialize device\n"); 3447 goto err; 3448 } 3449 adapter->wd_state = wd_state; 3450 3451 rc = ena_device_validate_params(adapter, &get_feat_ctx); 3452 if (rc) { 3453 dev_err(&pdev->dev, "Validation of device parameters failed\n"); 3454 goto err_device_destroy; 3455 } 3456 3457 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3458 if (rc) { 3459 dev_err(&pdev->dev, "Enable MSI-X failed\n"); 3460 goto err_device_destroy; 3461 } 3462 /* If the interface was up before the reset bring it up */ 3463 if (adapter->dev_up_before_reset) { 3464 rc = ena_up(adapter); 3465 if (rc) { 3466 dev_err(&pdev->dev, "Failed to create I/O queues\n"); 3467 goto err_disable_msix; 3468 } 3469 } 3470 3471 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 3472 3473 clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); 3474 if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) 3475 netif_carrier_on(adapter->netdev); 3476 3477 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); 3478 dev_err(&pdev->dev, 3479 "Device reset completed successfully, Driver info: %s\n", 3480 version); 3481 3482 return rc; 3483 err_disable_msix: 3484 ena_free_mgmnt_irq(adapter); 3485 ena_disable_msix(adapter); 3486 err_device_destroy: 3487 ena_com_abort_admin_commands(ena_dev); 3488 ena_com_wait_for_abort_completion(ena_dev); 3489 ena_com_admin_destroy(ena_dev); 3490 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); 3491 ena_com_mmio_reg_read_request_destroy(ena_dev); 3492 err: 3493 clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 3494 clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); 3495 dev_err(&pdev->dev, 3496 "Reset attempt failed. Can not reset the device\n"); 3497 3498 return rc; 3499 } 3500 3501 static void ena_fw_reset_device(struct work_struct *work) 3502 { 3503 struct ena_adapter *adapter = 3504 container_of(work, struct ena_adapter, reset_task); 3505 struct pci_dev *pdev = adapter->pdev; 3506 3507 if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 3508 dev_err(&pdev->dev, 3509 "device reset schedule while reset bit is off\n"); 3510 return; 3511 } 3512 rtnl_lock(); 3513 ena_destroy_device(adapter, false); 3514 ena_restore_device(adapter); 3515 rtnl_unlock(); 3516 } 3517 3518 static int check_for_rx_interrupt_queue(struct ena_adapter *adapter, 3519 struct ena_ring *rx_ring) 3520 { 3521 if (likely(rx_ring->first_interrupt)) 3522 return 0; 3523 3524 if (ena_com_cq_empty(rx_ring->ena_com_io_cq)) 3525 return 0; 3526 3527 rx_ring->no_interrupt_event_cnt++; 3528 3529 if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) { 3530 netif_err(adapter, rx_err, adapter->netdev, 3531 "Potential MSIX issue on Rx side Queue = %d. Reset the device\n", 3532 rx_ring->qid); 3533 adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT; 3534 smp_mb__before_atomic(); 3535 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3536 return -EIO; 3537 } 3538 3539 return 0; 3540 } 3541 3542 static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, 3543 struct ena_ring *tx_ring) 3544 { 3545 struct ena_tx_buffer *tx_buf; 3546 unsigned long last_jiffies; 3547 u32 missed_tx = 0; 3548 int i, rc = 0; 3549 3550 for (i = 0; i < tx_ring->ring_size; i++) { 3551 tx_buf = &tx_ring->tx_buffer_info[i]; 3552 last_jiffies = tx_buf->last_jiffies; 3553 3554 if (last_jiffies == 0) 3555 /* no pending Tx at this location */ 3556 continue; 3557 3558 if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies + 3559 2 * adapter->missing_tx_completion_to))) { 3560 /* If after graceful period interrupt is still not 3561 * received, we schedule a reset 3562 */ 3563 netif_err(adapter, tx_err, adapter->netdev, 3564 "Potential MSIX issue on Tx side Queue = %d. Reset the device\n", 3565 tx_ring->qid); 3566 adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT; 3567 smp_mb__before_atomic(); 3568 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3569 return -EIO; 3570 } 3571 3572 if (unlikely(time_is_before_jiffies(last_jiffies + 3573 adapter->missing_tx_completion_to))) { 3574 if (!tx_buf->print_once) 3575 netif_notice(adapter, tx_err, adapter->netdev, 3576 "Found a Tx that wasn't completed on time, qid %d, index %d.\n", 3577 tx_ring->qid, i); 3578 3579 tx_buf->print_once = 1; 3580 missed_tx++; 3581 } 3582 } 3583 3584 if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { 3585 netif_err(adapter, tx_err, adapter->netdev, 3586 "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", 3587 missed_tx, 3588 adapter->missing_tx_completion_threshold); 3589 adapter->reset_reason = 3590 ENA_REGS_RESET_MISS_TX_CMPL; 3591 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3592 rc = -EIO; 3593 } 3594 3595 u64_stats_update_begin(&tx_ring->syncp); 3596 tx_ring->tx_stats.missed_tx = missed_tx; 3597 u64_stats_update_end(&tx_ring->syncp); 3598 3599 return rc; 3600 } 3601 3602 static void check_for_missing_completions(struct ena_adapter *adapter) 3603 { 3604 struct ena_ring *tx_ring; 3605 struct ena_ring *rx_ring; 3606 int i, budget, rc; 3607 int io_queue_count; 3608 3609 io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; 3610 /* Make sure the driver doesn't turn the device in other process */ 3611 smp_rmb(); 3612 3613 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3614 return; 3615 3616 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 3617 return; 3618 3619 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 3620 return; 3621 3622 budget = ENA_MONITORED_TX_QUEUES; 3623 3624 for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { 3625 tx_ring = &adapter->tx_ring[i]; 3626 rx_ring = &adapter->rx_ring[i]; 3627 3628 rc = check_missing_comp_in_tx_queue(adapter, tx_ring); 3629 if (unlikely(rc)) 3630 return; 3631 3632 rc = !ENA_IS_XDP_INDEX(adapter, i) ? 3633 check_for_rx_interrupt_queue(adapter, rx_ring) : 0; 3634 if (unlikely(rc)) 3635 return; 3636 3637 budget--; 3638 if (!budget) 3639 break; 3640 } 3641 3642 adapter->last_monitored_tx_qid = i % io_queue_count; 3643 } 3644 3645 /* trigger napi schedule after 2 consecutive detections */ 3646 #define EMPTY_RX_REFILL 2 3647 /* For the rare case where the device runs out of Rx descriptors and the 3648 * napi handler failed to refill new Rx descriptors (due to a lack of memory 3649 * for example). 3650 * This case will lead to a deadlock: 3651 * The device won't send interrupts since all the new Rx packets will be dropped 3652 * The napi handler won't allocate new Rx descriptors so the device will be 3653 * able to send new packets. 3654 * 3655 * This scenario can happen when the kernel's vm.min_free_kbytes is too small. 3656 * It is recommended to have at least 512MB, with a minimum of 128MB for 3657 * constrained environment). 3658 * 3659 * When such a situation is detected - Reschedule napi 3660 */ 3661 static void check_for_empty_rx_ring(struct ena_adapter *adapter) 3662 { 3663 struct ena_ring *rx_ring; 3664 int i, refill_required; 3665 3666 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3667 return; 3668 3669 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 3670 return; 3671 3672 for (i = 0; i < adapter->num_io_queues; i++) { 3673 rx_ring = &adapter->rx_ring[i]; 3674 3675 refill_required = 3676 ena_com_free_desc(rx_ring->ena_com_io_sq); 3677 if (unlikely(refill_required == (rx_ring->ring_size - 1))) { 3678 rx_ring->empty_rx_queue++; 3679 3680 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { 3681 u64_stats_update_begin(&rx_ring->syncp); 3682 rx_ring->rx_stats.empty_rx_ring++; 3683 u64_stats_update_end(&rx_ring->syncp); 3684 3685 netif_err(adapter, drv, adapter->netdev, 3686 "trigger refill for ring %d\n", i); 3687 3688 napi_schedule(rx_ring->napi); 3689 rx_ring->empty_rx_queue = 0; 3690 } 3691 } else { 3692 rx_ring->empty_rx_queue = 0; 3693 } 3694 } 3695 } 3696 3697 /* Check for keep alive expiration */ 3698 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 3699 { 3700 unsigned long keep_alive_expired; 3701 3702 if (!adapter->wd_state) 3703 return; 3704 3705 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3706 return; 3707 3708 keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + 3709 adapter->keep_alive_timeout); 3710 if (unlikely(time_is_before_jiffies(keep_alive_expired))) { 3711 netif_err(adapter, drv, adapter->netdev, 3712 "Keep alive watchdog timeout.\n"); 3713 u64_stats_update_begin(&adapter->syncp); 3714 adapter->dev_stats.wd_expired++; 3715 u64_stats_update_end(&adapter->syncp); 3716 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; 3717 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3718 } 3719 } 3720 3721 static void check_for_admin_com_state(struct ena_adapter *adapter) 3722 { 3723 if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { 3724 netif_err(adapter, drv, adapter->netdev, 3725 "ENA admin queue is not in running state!\n"); 3726 u64_stats_update_begin(&adapter->syncp); 3727 adapter->dev_stats.admin_q_pause++; 3728 u64_stats_update_end(&adapter->syncp); 3729 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO; 3730 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3731 } 3732 } 3733 3734 static void ena_update_hints(struct ena_adapter *adapter, 3735 struct ena_admin_ena_hw_hints *hints) 3736 { 3737 struct net_device *netdev = adapter->netdev; 3738 3739 if (hints->admin_completion_tx_timeout) 3740 adapter->ena_dev->admin_queue.completion_timeout = 3741 hints->admin_completion_tx_timeout * 1000; 3742 3743 if (hints->mmio_read_timeout) 3744 /* convert to usec */ 3745 adapter->ena_dev->mmio_read.reg_read_to = 3746 hints->mmio_read_timeout * 1000; 3747 3748 if (hints->missed_tx_completion_count_threshold_to_reset) 3749 adapter->missing_tx_completion_threshold = 3750 hints->missed_tx_completion_count_threshold_to_reset; 3751 3752 if (hints->missing_tx_completion_timeout) { 3753 if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3754 adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT; 3755 else 3756 adapter->missing_tx_completion_to = 3757 msecs_to_jiffies(hints->missing_tx_completion_timeout); 3758 } 3759 3760 if (hints->netdev_wd_timeout) 3761 netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout); 3762 3763 if (hints->driver_watchdog_timeout) { 3764 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3765 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3766 else 3767 adapter->keep_alive_timeout = 3768 msecs_to_jiffies(hints->driver_watchdog_timeout); 3769 } 3770 } 3771 3772 static void ena_update_host_info(struct ena_admin_host_info *host_info, 3773 struct net_device *netdev) 3774 { 3775 host_info->supported_network_features[0] = 3776 netdev->features & GENMASK_ULL(31, 0); 3777 host_info->supported_network_features[1] = 3778 (netdev->features & GENMASK_ULL(63, 32)) >> 32; 3779 } 3780 3781 static void ena_timer_service(struct timer_list *t) 3782 { 3783 struct ena_adapter *adapter = from_timer(adapter, t, timer_service); 3784 u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr; 3785 struct ena_admin_host_info *host_info = 3786 adapter->ena_dev->host_attr.host_info; 3787 3788 check_for_missing_keep_alive(adapter); 3789 3790 check_for_admin_com_state(adapter); 3791 3792 check_for_missing_completions(adapter); 3793 3794 check_for_empty_rx_ring(adapter); 3795 3796 if (debug_area) 3797 ena_dump_stats_to_buf(adapter, debug_area); 3798 3799 if (host_info) 3800 ena_update_host_info(host_info, adapter->netdev); 3801 3802 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 3803 netif_err(adapter, drv, adapter->netdev, 3804 "Trigger reset is on\n"); 3805 ena_dump_stats_to_dmesg(adapter); 3806 queue_work(ena_wq, &adapter->reset_task); 3807 return; 3808 } 3809 3810 /* Reset the timer */ 3811 mod_timer(&adapter->timer_service, jiffies + HZ); 3812 } 3813 3814 static int ena_calc_max_io_queue_num(struct pci_dev *pdev, 3815 struct ena_com_dev *ena_dev, 3816 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3817 { 3818 int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 3819 3820 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 3821 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 3822 &get_feat_ctx->max_queue_ext.max_queue_ext; 3823 io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num, 3824 max_queue_ext->max_rx_cq_num); 3825 3826 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 3827 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 3828 } else { 3829 struct ena_admin_queue_feature_desc *max_queues = 3830 &get_feat_ctx->max_queues; 3831 io_tx_sq_num = max_queues->max_sq_num; 3832 io_tx_cq_num = max_queues->max_cq_num; 3833 io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num); 3834 } 3835 3836 /* In case of LLQ use the llq fields for the tx SQ/CQ */ 3837 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 3838 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 3839 3840 max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); 3841 max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num); 3842 max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num); 3843 max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); 3844 /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ 3845 max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); 3846 if (unlikely(!max_num_io_queues)) { 3847 dev_err(&pdev->dev, "The device doesn't have io queues\n"); 3848 return -EFAULT; 3849 } 3850 3851 return max_num_io_queues; 3852 } 3853 3854 static int ena_set_queues_placement_policy(struct pci_dev *pdev, 3855 struct ena_com_dev *ena_dev, 3856 struct ena_admin_feature_llq_desc *llq, 3857 struct ena_llq_configurations *llq_default_configurations) 3858 { 3859 bool has_mem_bar; 3860 int rc; 3861 u32 llq_feature_mask; 3862 3863 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 3864 if (!(ena_dev->supported_features & llq_feature_mask)) { 3865 dev_err(&pdev->dev, 3866 "LLQ is not supported Fallback to host mode policy.\n"); 3867 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 3868 return 0; 3869 } 3870 3871 has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); 3872 3873 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 3874 if (unlikely(rc)) { 3875 dev_err(&pdev->dev, 3876 "Failed to configure the device mode. Fallback to host mode policy.\n"); 3877 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 3878 return 0; 3879 } 3880 3881 /* Nothing to config, exit */ 3882 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 3883 return 0; 3884 3885 if (!has_mem_bar) { 3886 dev_err(&pdev->dev, 3887 "ENA device does not expose LLQ bar. Fallback to host mode policy.\n"); 3888 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 3889 return 0; 3890 } 3891 3892 ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, 3893 pci_resource_start(pdev, ENA_MEM_BAR), 3894 pci_resource_len(pdev, ENA_MEM_BAR)); 3895 3896 if (!ena_dev->mem_bar) 3897 return -EFAULT; 3898 3899 return 0; 3900 } 3901 3902 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, 3903 struct net_device *netdev) 3904 { 3905 netdev_features_t dev_features = 0; 3906 3907 /* Set offload features */ 3908 if (feat->offload.tx & 3909 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 3910 dev_features |= NETIF_F_IP_CSUM; 3911 3912 if (feat->offload.tx & 3913 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 3914 dev_features |= NETIF_F_IPV6_CSUM; 3915 3916 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 3917 dev_features |= NETIF_F_TSO; 3918 3919 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) 3920 dev_features |= NETIF_F_TSO6; 3921 3922 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK) 3923 dev_features |= NETIF_F_TSO_ECN; 3924 3925 if (feat->offload.rx_supported & 3926 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 3927 dev_features |= NETIF_F_RXCSUM; 3928 3929 if (feat->offload.rx_supported & 3930 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 3931 dev_features |= NETIF_F_RXCSUM; 3932 3933 netdev->features = 3934 dev_features | 3935 NETIF_F_SG | 3936 NETIF_F_RXHASH | 3937 NETIF_F_HIGHDMA; 3938 3939 netdev->hw_features |= netdev->features; 3940 netdev->vlan_features |= netdev->features; 3941 } 3942 3943 static void ena_set_conf_feat_params(struct ena_adapter *adapter, 3944 struct ena_com_dev_get_features_ctx *feat) 3945 { 3946 struct net_device *netdev = adapter->netdev; 3947 3948 /* Copy mac address */ 3949 if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) { 3950 eth_hw_addr_random(netdev); 3951 ether_addr_copy(adapter->mac_addr, netdev->dev_addr); 3952 } else { 3953 ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr); 3954 ether_addr_copy(netdev->dev_addr, adapter->mac_addr); 3955 } 3956 3957 /* Set offload features */ 3958 ena_set_dev_offloads(feat, netdev); 3959 3960 adapter->max_mtu = feat->dev_attr.max_mtu; 3961 netdev->max_mtu = adapter->max_mtu; 3962 netdev->min_mtu = ENA_MIN_MTU; 3963 } 3964 3965 static int ena_rss_init_default(struct ena_adapter *adapter) 3966 { 3967 struct ena_com_dev *ena_dev = adapter->ena_dev; 3968 struct device *dev = &adapter->pdev->dev; 3969 int rc, i; 3970 u32 val; 3971 3972 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 3973 if (unlikely(rc)) { 3974 dev_err(dev, "Cannot init indirect table\n"); 3975 goto err_rss_init; 3976 } 3977 3978 for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { 3979 val = ethtool_rxfh_indir_default(i, adapter->num_io_queues); 3980 rc = ena_com_indirect_table_fill_entry(ena_dev, i, 3981 ENA_IO_RXQ_IDX(val)); 3982 if (unlikely(rc && (rc != -EOPNOTSUPP))) { 3983 dev_err(dev, "Cannot fill indirect table\n"); 3984 goto err_fill_indir; 3985 } 3986 } 3987 3988 rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL, 3989 ENA_HASH_KEY_SIZE, 0xFFFFFFFF); 3990 if (unlikely(rc && (rc != -EOPNOTSUPP))) { 3991 dev_err(dev, "Cannot fill hash function\n"); 3992 goto err_fill_indir; 3993 } 3994 3995 rc = ena_com_set_default_hash_ctrl(ena_dev); 3996 if (unlikely(rc && (rc != -EOPNOTSUPP))) { 3997 dev_err(dev, "Cannot fill hash control\n"); 3998 goto err_fill_indir; 3999 } 4000 4001 return 0; 4002 4003 err_fill_indir: 4004 ena_com_rss_destroy(ena_dev); 4005 err_rss_init: 4006 4007 return rc; 4008 } 4009 4010 static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) 4011 { 4012 int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; 4013 4014 pci_release_selected_regions(pdev, release_bars); 4015 } 4016 4017 static void set_default_llq_configurations(struct ena_llq_configurations *llq_config) 4018 { 4019 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 4020 llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; 4021 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 4022 llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 4023 llq_config->llq_ring_entry_size_value = 128; 4024 } 4025 4026 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) 4027 { 4028 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 4029 struct ena_com_dev *ena_dev = ctx->ena_dev; 4030 u32 tx_queue_size = ENA_DEFAULT_RING_SIZE; 4031 u32 rx_queue_size = ENA_DEFAULT_RING_SIZE; 4032 u32 max_tx_queue_size; 4033 u32 max_rx_queue_size; 4034 4035 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 4036 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 4037 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 4038 max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, 4039 max_queue_ext->max_rx_sq_depth); 4040 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 4041 4042 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 4043 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4044 llq->max_llq_depth); 4045 else 4046 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4047 max_queue_ext->max_tx_sq_depth); 4048 4049 ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4050 max_queue_ext->max_per_packet_tx_descs); 4051 ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4052 max_queue_ext->max_per_packet_rx_descs); 4053 } else { 4054 struct ena_admin_queue_feature_desc *max_queues = 4055 &ctx->get_feat_ctx->max_queues; 4056 max_rx_queue_size = min_t(u32, max_queues->max_cq_depth, 4057 max_queues->max_sq_depth); 4058 max_tx_queue_size = max_queues->max_cq_depth; 4059 4060 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 4061 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4062 llq->max_llq_depth); 4063 else 4064 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4065 max_queues->max_sq_depth); 4066 4067 ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4068 max_queues->max_packet_tx_descs); 4069 ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4070 max_queues->max_packet_rx_descs); 4071 } 4072 4073 max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); 4074 max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); 4075 4076 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, 4077 max_tx_queue_size); 4078 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, 4079 max_rx_queue_size); 4080 4081 tx_queue_size = rounddown_pow_of_two(tx_queue_size); 4082 rx_queue_size = rounddown_pow_of_two(rx_queue_size); 4083 4084 ctx->max_tx_queue_size = max_tx_queue_size; 4085 ctx->max_rx_queue_size = max_rx_queue_size; 4086 ctx->tx_queue_size = tx_queue_size; 4087 ctx->rx_queue_size = rx_queue_size; 4088 4089 return 0; 4090 } 4091 4092 /* ena_probe - Device Initialization Routine 4093 * @pdev: PCI device information struct 4094 * @ent: entry in ena_pci_tbl 4095 * 4096 * Returns 0 on success, negative on failure 4097 * 4098 * ena_probe initializes an adapter identified by a pci_dev structure. 4099 * The OS initialization, configuring of the adapter private structure, 4100 * and a hardware reset occur. 4101 */ 4102 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 4103 { 4104 struct ena_com_dev_get_features_ctx get_feat_ctx; 4105 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 4106 struct ena_llq_configurations llq_config; 4107 struct ena_com_dev *ena_dev = NULL; 4108 struct ena_adapter *adapter; 4109 struct net_device *netdev; 4110 static int adapters_found; 4111 u32 max_num_io_queues; 4112 char *queue_type_str; 4113 bool wd_state; 4114 int bars, rc; 4115 4116 dev_dbg(&pdev->dev, "%s\n", __func__); 4117 4118 dev_info_once(&pdev->dev, "%s", version); 4119 4120 rc = pci_enable_device_mem(pdev); 4121 if (rc) { 4122 dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); 4123 return rc; 4124 } 4125 4126 pci_set_master(pdev); 4127 4128 ena_dev = vzalloc(sizeof(*ena_dev)); 4129 if (!ena_dev) { 4130 rc = -ENOMEM; 4131 goto err_disable_device; 4132 } 4133 4134 bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; 4135 rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); 4136 if (rc) { 4137 dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", 4138 rc); 4139 goto err_free_ena_dev; 4140 } 4141 4142 ena_dev->reg_bar = devm_ioremap(&pdev->dev, 4143 pci_resource_start(pdev, ENA_REG_BAR), 4144 pci_resource_len(pdev, ENA_REG_BAR)); 4145 if (!ena_dev->reg_bar) { 4146 dev_err(&pdev->dev, "failed to remap regs bar\n"); 4147 rc = -EFAULT; 4148 goto err_free_region; 4149 } 4150 4151 ena_dev->dmadev = &pdev->dev; 4152 4153 rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); 4154 if (rc) { 4155 dev_err(&pdev->dev, "ena device init failed\n"); 4156 if (rc == -ETIME) 4157 rc = -EPROBE_DEFER; 4158 goto err_free_region; 4159 } 4160 4161 set_default_llq_configurations(&llq_config); 4162 4163 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq, 4164 &llq_config); 4165 if (rc) { 4166 dev_err(&pdev->dev, "ena device init failed\n"); 4167 goto err_device_destroy; 4168 } 4169 4170 calc_queue_ctx.ena_dev = ena_dev; 4171 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 4172 calc_queue_ctx.pdev = pdev; 4173 4174 /* Initial Tx and RX interrupt delay. Assumes 1 usec granularity. 4175 * Updated during device initialization with the real granularity 4176 */ 4177 ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; 4178 ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; 4179 ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; 4180 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx); 4181 rc = ena_calc_io_queue_size(&calc_queue_ctx); 4182 if (rc || !max_num_io_queues) { 4183 rc = -EFAULT; 4184 goto err_device_destroy; 4185 } 4186 4187 /* dev zeroed in init_etherdev */ 4188 netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues); 4189 if (!netdev) { 4190 dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); 4191 rc = -ENOMEM; 4192 goto err_device_destroy; 4193 } 4194 4195 SET_NETDEV_DEV(netdev, &pdev->dev); 4196 4197 adapter = netdev_priv(netdev); 4198 pci_set_drvdata(pdev, adapter); 4199 4200 adapter->ena_dev = ena_dev; 4201 adapter->netdev = netdev; 4202 adapter->pdev = pdev; 4203 4204 ena_set_conf_feat_params(adapter, &get_feat_ctx); 4205 4206 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 4207 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 4208 4209 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; 4210 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; 4211 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 4212 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 4213 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 4214 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 4215 4216 adapter->num_io_queues = max_num_io_queues; 4217 adapter->max_num_io_queues = max_num_io_queues; 4218 4219 adapter->xdp_first_ring = 0; 4220 adapter->xdp_num_queues = 0; 4221 4222 adapter->last_monitored_tx_qid = 0; 4223 4224 adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; 4225 adapter->wd_state = wd_state; 4226 4227 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found); 4228 4229 rc = ena_com_init_interrupt_moderation(adapter->ena_dev); 4230 if (rc) { 4231 dev_err(&pdev->dev, 4232 "Failed to query interrupt moderation feature\n"); 4233 goto err_netdev_destroy; 4234 } 4235 ena_init_io_rings(adapter, 4236 0, 4237 adapter->xdp_num_queues + 4238 adapter->num_io_queues); 4239 4240 netdev->netdev_ops = &ena_netdev_ops; 4241 netdev->watchdog_timeo = TX_TIMEOUT; 4242 ena_set_ethtool_ops(netdev); 4243 4244 netdev->priv_flags |= IFF_UNICAST_FLT; 4245 4246 u64_stats_init(&adapter->syncp); 4247 4248 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 4249 if (rc) { 4250 dev_err(&pdev->dev, 4251 "Failed to enable and set the admin interrupts\n"); 4252 goto err_worker_destroy; 4253 } 4254 rc = ena_rss_init_default(adapter); 4255 if (rc && (rc != -EOPNOTSUPP)) { 4256 dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc); 4257 goto err_free_msix; 4258 } 4259 4260 ena_config_debug_area(adapter); 4261 4262 memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); 4263 4264 netif_carrier_off(netdev); 4265 4266 rc = register_netdev(netdev); 4267 if (rc) { 4268 dev_err(&pdev->dev, "Cannot register net device\n"); 4269 goto err_rss; 4270 } 4271 4272 INIT_WORK(&adapter->reset_task, ena_fw_reset_device); 4273 4274 adapter->last_keep_alive_jiffies = jiffies; 4275 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 4276 adapter->missing_tx_completion_to = TX_TIMEOUT; 4277 adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS; 4278 4279 ena_update_hints(adapter, &get_feat_ctx.hw_hints); 4280 4281 timer_setup(&adapter->timer_service, ena_timer_service, 0); 4282 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); 4283 4284 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 4285 queue_type_str = "Regular"; 4286 else 4287 queue_type_str = "Low Latency"; 4288 4289 dev_info(&pdev->dev, 4290 "%s found at mem %lx, mac addr %pM, Placement policy: %s\n", 4291 DEVICE_NAME, (long)pci_resource_start(pdev, 0), 4292 netdev->dev_addr, queue_type_str); 4293 4294 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 4295 4296 adapters_found++; 4297 4298 return 0; 4299 4300 err_rss: 4301 ena_com_delete_debug_area(ena_dev); 4302 ena_com_rss_destroy(ena_dev); 4303 err_free_msix: 4304 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR); 4305 /* stop submitting admin commands on a device that was reset */ 4306 ena_com_set_admin_running_state(ena_dev, false); 4307 ena_free_mgmnt_irq(adapter); 4308 ena_disable_msix(adapter); 4309 err_worker_destroy: 4310 del_timer(&adapter->timer_service); 4311 err_netdev_destroy: 4312 free_netdev(netdev); 4313 err_device_destroy: 4314 ena_com_delete_host_info(ena_dev); 4315 ena_com_admin_destroy(ena_dev); 4316 err_free_region: 4317 ena_release_bars(ena_dev, pdev); 4318 err_free_ena_dev: 4319 vfree(ena_dev); 4320 err_disable_device: 4321 pci_disable_device(pdev); 4322 return rc; 4323 } 4324 4325 /*****************************************************************************/ 4326 4327 /* ena_remove - Device Removal Routine 4328 * @pdev: PCI device information struct 4329 * 4330 * ena_remove is called by the PCI subsystem to alert the driver 4331 * that it should release a PCI device. 4332 */ 4333 static void ena_remove(struct pci_dev *pdev) 4334 { 4335 struct ena_adapter *adapter = pci_get_drvdata(pdev); 4336 struct ena_com_dev *ena_dev; 4337 struct net_device *netdev; 4338 4339 ena_dev = adapter->ena_dev; 4340 netdev = adapter->netdev; 4341 4342 #ifdef CONFIG_RFS_ACCEL 4343 if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { 4344 free_irq_cpu_rmap(netdev->rx_cpu_rmap); 4345 netdev->rx_cpu_rmap = NULL; 4346 } 4347 #endif /* CONFIG_RFS_ACCEL */ 4348 del_timer_sync(&adapter->timer_service); 4349 4350 cancel_work_sync(&adapter->reset_task); 4351 4352 rtnl_lock(); 4353 ena_destroy_device(adapter, true); 4354 rtnl_unlock(); 4355 4356 unregister_netdev(netdev); 4357 4358 free_netdev(netdev); 4359 4360 ena_com_rss_destroy(ena_dev); 4361 4362 ena_com_delete_debug_area(ena_dev); 4363 4364 ena_com_delete_host_info(ena_dev); 4365 4366 ena_release_bars(ena_dev, pdev); 4367 4368 pci_disable_device(pdev); 4369 4370 vfree(ena_dev); 4371 } 4372 4373 #ifdef CONFIG_PM 4374 /* ena_suspend - PM suspend callback 4375 * @pdev: PCI device information struct 4376 * @state:power state 4377 */ 4378 static int ena_suspend(struct pci_dev *pdev, pm_message_t state) 4379 { 4380 struct ena_adapter *adapter = pci_get_drvdata(pdev); 4381 4382 u64_stats_update_begin(&adapter->syncp); 4383 adapter->dev_stats.suspend++; 4384 u64_stats_update_end(&adapter->syncp); 4385 4386 rtnl_lock(); 4387 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 4388 dev_err(&pdev->dev, 4389 "ignoring device reset request as the device is being suspended\n"); 4390 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 4391 } 4392 ena_destroy_device(adapter, true); 4393 rtnl_unlock(); 4394 return 0; 4395 } 4396 4397 /* ena_resume - PM resume callback 4398 * @pdev: PCI device information struct 4399 * 4400 */ 4401 static int ena_resume(struct pci_dev *pdev) 4402 { 4403 struct ena_adapter *adapter = pci_get_drvdata(pdev); 4404 int rc; 4405 4406 u64_stats_update_begin(&adapter->syncp); 4407 adapter->dev_stats.resume++; 4408 u64_stats_update_end(&adapter->syncp); 4409 4410 rtnl_lock(); 4411 rc = ena_restore_device(adapter); 4412 rtnl_unlock(); 4413 return rc; 4414 } 4415 #endif 4416 4417 static struct pci_driver ena_pci_driver = { 4418 .name = DRV_MODULE_NAME, 4419 .id_table = ena_pci_tbl, 4420 .probe = ena_probe, 4421 .remove = ena_remove, 4422 #ifdef CONFIG_PM 4423 .suspend = ena_suspend, 4424 .resume = ena_resume, 4425 #endif 4426 .sriov_configure = pci_sriov_configure_simple, 4427 }; 4428 4429 static int __init ena_init(void) 4430 { 4431 pr_info("%s", version); 4432 4433 ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); 4434 if (!ena_wq) { 4435 pr_err("Failed to create workqueue\n"); 4436 return -ENOMEM; 4437 } 4438 4439 return pci_register_driver(&ena_pci_driver); 4440 } 4441 4442 static void __exit ena_cleanup(void) 4443 { 4444 pci_unregister_driver(&ena_pci_driver); 4445 4446 if (ena_wq) { 4447 destroy_workqueue(ena_wq); 4448 ena_wq = NULL; 4449 } 4450 } 4451 4452 /****************************************************************************** 4453 ******************************** AENQ Handlers ******************************* 4454 *****************************************************************************/ 4455 /* ena_update_on_link_change: 4456 * Notify the network interface about the change in link status 4457 */ 4458 static void ena_update_on_link_change(void *adapter_data, 4459 struct ena_admin_aenq_entry *aenq_e) 4460 { 4461 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4462 struct ena_admin_aenq_link_change_desc *aenq_desc = 4463 (struct ena_admin_aenq_link_change_desc *)aenq_e; 4464 int status = aenq_desc->flags & 4465 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; 4466 4467 if (status) { 4468 netdev_dbg(adapter->netdev, "%s\n", __func__); 4469 set_bit(ENA_FLAG_LINK_UP, &adapter->flags); 4470 if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags)) 4471 netif_carrier_on(adapter->netdev); 4472 } else { 4473 clear_bit(ENA_FLAG_LINK_UP, &adapter->flags); 4474 netif_carrier_off(adapter->netdev); 4475 } 4476 } 4477 4478 static void ena_keep_alive_wd(void *adapter_data, 4479 struct ena_admin_aenq_entry *aenq_e) 4480 { 4481 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4482 struct ena_admin_aenq_keep_alive_desc *desc; 4483 u64 rx_drops; 4484 4485 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4486 adapter->last_keep_alive_jiffies = jiffies; 4487 4488 rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low; 4489 4490 u64_stats_update_begin(&adapter->syncp); 4491 adapter->dev_stats.rx_drops = rx_drops; 4492 u64_stats_update_end(&adapter->syncp); 4493 } 4494 4495 static void ena_notification(void *adapter_data, 4496 struct ena_admin_aenq_entry *aenq_e) 4497 { 4498 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4499 struct ena_admin_ena_hw_hints *hints; 4500 4501 WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, 4502 "Invalid group(%x) expected %x\n", 4503 aenq_e->aenq_common_desc.group, 4504 ENA_ADMIN_NOTIFICATION); 4505 4506 switch (aenq_e->aenq_common_desc.syndrom) { 4507 case ENA_ADMIN_UPDATE_HINTS: 4508 hints = (struct ena_admin_ena_hw_hints *) 4509 (&aenq_e->inline_data_w4); 4510 ena_update_hints(adapter, hints); 4511 break; 4512 default: 4513 netif_err(adapter, drv, adapter->netdev, 4514 "Invalid aenq notification link state %d\n", 4515 aenq_e->aenq_common_desc.syndrom); 4516 } 4517 } 4518 4519 /* This handler will called for unknown event group or unimplemented handlers*/ 4520 static void unimplemented_aenq_handler(void *data, 4521 struct ena_admin_aenq_entry *aenq_e) 4522 { 4523 struct ena_adapter *adapter = (struct ena_adapter *)data; 4524 4525 netif_err(adapter, drv, adapter->netdev, 4526 "Unknown event was received or event with unimplemented handler\n"); 4527 } 4528 4529 static struct ena_aenq_handlers aenq_handlers = { 4530 .handlers = { 4531 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4532 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4533 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, 4534 }, 4535 .unimplemented_handler = unimplemented_aenq_handler 4536 }; 4537 4538 module_init(ena_init); 4539 module_exit(ena_cleanup); 4540