1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */ 3 4 #include <linux/etherdevice.h> 5 #include <net/page_pool.h> 6 #include <linux/iopoll.h> 7 #include <linux/pci.h> 8 9 #include "wx_type.h" 10 #include "wx_lib.h" 11 #include "wx_hw.h" 12 13 /* wx_test_staterr - tests bits in Rx descriptor status and error fields */ 14 static __le32 wx_test_staterr(union wx_rx_desc *rx_desc, 15 const u32 stat_err_bits) 16 { 17 return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); 18 } 19 20 static bool wx_can_reuse_rx_page(struct wx_rx_buffer *rx_buffer, 21 int rx_buffer_pgcnt) 22 { 23 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 24 struct page *page = rx_buffer->page; 25 26 /* avoid re-using remote and pfmemalloc pages */ 27 if (!dev_page_is_reusable(page)) 28 return false; 29 30 #if (PAGE_SIZE < 8192) 31 /* if we are only owner of page we can reuse it */ 32 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 33 return false; 34 #endif 35 36 /* If we have drained the page fragment pool we need to update 37 * the pagecnt_bias and page count so that we fully restock the 38 * number of references the driver holds. 39 */ 40 if (unlikely(pagecnt_bias == 1)) { 41 page_ref_add(page, USHRT_MAX - 1); 42 rx_buffer->pagecnt_bias = USHRT_MAX; 43 } 44 45 return true; 46 } 47 48 /** 49 * wx_reuse_rx_page - page flip buffer and store it back on the ring 50 * @rx_ring: rx descriptor ring to store buffers on 51 * @old_buff: donor buffer to have page reused 52 * 53 * Synchronizes page for reuse by the adapter 54 **/ 55 static void wx_reuse_rx_page(struct wx_ring *rx_ring, 56 struct wx_rx_buffer *old_buff) 57 { 58 u16 nta = rx_ring->next_to_alloc; 59 struct wx_rx_buffer *new_buff; 60 61 new_buff = &rx_ring->rx_buffer_info[nta]; 62 63 /* update, and store next to alloc */ 64 nta++; 65 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 66 67 /* transfer page from old buffer to new buffer */ 68 new_buff->page = old_buff->page; 69 new_buff->page_dma = old_buff->page_dma; 70 new_buff->page_offset = old_buff->page_offset; 71 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 72 } 73 74 static void wx_dma_sync_frag(struct wx_ring *rx_ring, 75 struct wx_rx_buffer *rx_buffer) 76 { 77 struct sk_buff *skb = rx_buffer->skb; 78 skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; 79 80 dma_sync_single_range_for_cpu(rx_ring->dev, 81 WX_CB(skb)->dma, 82 skb_frag_off(frag), 83 skb_frag_size(frag), 84 DMA_FROM_DEVICE); 85 86 /* If the page was released, just unmap it. */ 87 if (unlikely(WX_CB(skb)->page_released)) 88 page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); 89 } 90 91 static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring, 92 union wx_rx_desc *rx_desc, 93 struct sk_buff **skb, 94 int *rx_buffer_pgcnt) 95 { 96 struct wx_rx_buffer *rx_buffer; 97 unsigned int size; 98 99 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 100 size = le16_to_cpu(rx_desc->wb.upper.length); 101 102 #if (PAGE_SIZE < 8192) 103 *rx_buffer_pgcnt = page_count(rx_buffer->page); 104 #else 105 *rx_buffer_pgcnt = 0; 106 #endif 107 108 prefetchw(rx_buffer->page); 109 *skb = rx_buffer->skb; 110 111 /* Delay unmapping of the first packet. It carries the header 112 * information, HW may still access the header after the writeback. 113 * Only unmap it when EOP is reached 114 */ 115 if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)) { 116 if (!*skb) 117 goto skip_sync; 118 } else { 119 if (*skb) 120 wx_dma_sync_frag(rx_ring, rx_buffer); 121 } 122 123 /* we are reusing so sync this buffer for CPU use */ 124 dma_sync_single_range_for_cpu(rx_ring->dev, 125 rx_buffer->dma, 126 rx_buffer->page_offset, 127 size, 128 DMA_FROM_DEVICE); 129 skip_sync: 130 rx_buffer->pagecnt_bias--; 131 132 return rx_buffer; 133 } 134 135 static void wx_put_rx_buffer(struct wx_ring *rx_ring, 136 struct wx_rx_buffer *rx_buffer, 137 struct sk_buff *skb, 138 int rx_buffer_pgcnt) 139 { 140 if (wx_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 141 /* hand second half of page back to the ring */ 142 wx_reuse_rx_page(rx_ring, rx_buffer); 143 } else { 144 if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma) 145 /* the page has been released from the ring */ 146 WX_CB(skb)->page_released = true; 147 else 148 page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); 149 150 __page_frag_cache_drain(rx_buffer->page, 151 rx_buffer->pagecnt_bias); 152 } 153 154 /* clear contents of rx_buffer */ 155 rx_buffer->page = NULL; 156 rx_buffer->skb = NULL; 157 } 158 159 static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring, 160 struct wx_rx_buffer *rx_buffer, 161 union wx_rx_desc *rx_desc) 162 { 163 unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); 164 #if (PAGE_SIZE < 8192) 165 unsigned int truesize = WX_RX_BUFSZ; 166 #else 167 unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); 168 #endif 169 struct sk_buff *skb = rx_buffer->skb; 170 171 if (!skb) { 172 void *page_addr = page_address(rx_buffer->page) + 173 rx_buffer->page_offset; 174 175 /* prefetch first cache line of first page */ 176 prefetch(page_addr); 177 #if L1_CACHE_BYTES < 128 178 prefetch(page_addr + L1_CACHE_BYTES); 179 #endif 180 181 /* allocate a skb to store the frags */ 182 skb = napi_alloc_skb(&rx_ring->q_vector->napi, WX_RXBUFFER_256); 183 if (unlikely(!skb)) 184 return NULL; 185 186 /* we will be copying header into skb->data in 187 * pskb_may_pull so it is in our interest to prefetch 188 * it now to avoid a possible cache miss 189 */ 190 prefetchw(skb->data); 191 192 if (size <= WX_RXBUFFER_256) { 193 memcpy(__skb_put(skb, size), page_addr, 194 ALIGN(size, sizeof(long))); 195 rx_buffer->pagecnt_bias++; 196 197 return skb; 198 } 199 200 if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)) 201 WX_CB(skb)->dma = rx_buffer->dma; 202 203 skb_add_rx_frag(skb, 0, rx_buffer->page, 204 rx_buffer->page_offset, 205 size, truesize); 206 goto out; 207 208 } else { 209 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 210 rx_buffer->page_offset, size, truesize); 211 } 212 213 out: 214 #if (PAGE_SIZE < 8192) 215 /* flip page offset to other buffer */ 216 rx_buffer->page_offset ^= truesize; 217 #else 218 /* move offset up to the next cache line */ 219 rx_buffer->page_offset += truesize; 220 #endif 221 222 return skb; 223 } 224 225 static bool wx_alloc_mapped_page(struct wx_ring *rx_ring, 226 struct wx_rx_buffer *bi) 227 { 228 struct page *page = bi->page; 229 dma_addr_t dma; 230 231 /* since we are recycling buffers we should seldom need to alloc */ 232 if (likely(page)) 233 return true; 234 235 page = page_pool_dev_alloc_pages(rx_ring->page_pool); 236 WARN_ON(!page); 237 dma = page_pool_get_dma_addr(page); 238 239 bi->page_dma = dma; 240 bi->page = page; 241 bi->page_offset = 0; 242 page_ref_add(page, USHRT_MAX - 1); 243 bi->pagecnt_bias = USHRT_MAX; 244 245 return true; 246 } 247 248 /** 249 * wx_alloc_rx_buffers - Replace used receive buffers 250 * @rx_ring: ring to place buffers on 251 * @cleaned_count: number of buffers to replace 252 **/ 253 void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count) 254 { 255 u16 i = rx_ring->next_to_use; 256 union wx_rx_desc *rx_desc; 257 struct wx_rx_buffer *bi; 258 259 /* nothing to do */ 260 if (!cleaned_count) 261 return; 262 263 rx_desc = WX_RX_DESC(rx_ring, i); 264 bi = &rx_ring->rx_buffer_info[i]; 265 i -= rx_ring->count; 266 267 do { 268 if (!wx_alloc_mapped_page(rx_ring, bi)) 269 break; 270 271 /* sync the buffer for use by the device */ 272 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 273 bi->page_offset, 274 WX_RX_BUFSZ, 275 DMA_FROM_DEVICE); 276 277 rx_desc->read.pkt_addr = 278 cpu_to_le64(bi->page_dma + bi->page_offset); 279 280 rx_desc++; 281 bi++; 282 i++; 283 if (unlikely(!i)) { 284 rx_desc = WX_RX_DESC(rx_ring, 0); 285 bi = rx_ring->rx_buffer_info; 286 i -= rx_ring->count; 287 } 288 289 /* clear the status bits for the next_to_use descriptor */ 290 rx_desc->wb.upper.status_error = 0; 291 292 cleaned_count--; 293 } while (cleaned_count); 294 295 i += rx_ring->count; 296 297 if (rx_ring->next_to_use != i) { 298 rx_ring->next_to_use = i; 299 /* update next to alloc since we have filled the ring */ 300 rx_ring->next_to_alloc = i; 301 302 /* Force memory writes to complete before letting h/w 303 * know there are new descriptors to fetch. (Only 304 * applicable for weak-ordered memory model archs, 305 * such as IA-64). 306 */ 307 wmb(); 308 writel(i, rx_ring->tail); 309 } 310 } 311 312 u16 wx_desc_unused(struct wx_ring *ring) 313 { 314 u16 ntc = ring->next_to_clean; 315 u16 ntu = ring->next_to_use; 316 317 return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; 318 } 319 320 /** 321 * wx_is_non_eop - process handling of non-EOP buffers 322 * @rx_ring: Rx ring being processed 323 * @rx_desc: Rx descriptor for current buffer 324 * @skb: Current socket buffer containing buffer in progress 325 * 326 * This function updates next to clean. If the buffer is an EOP buffer 327 * this function exits returning false, otherwise it will place the 328 * sk_buff in the next buffer to be chained and return true indicating 329 * that this is in fact a non-EOP buffer. 330 **/ 331 static bool wx_is_non_eop(struct wx_ring *rx_ring, 332 union wx_rx_desc *rx_desc, 333 struct sk_buff *skb) 334 { 335 u32 ntc = rx_ring->next_to_clean + 1; 336 337 /* fetch, update, and store next to clean */ 338 ntc = (ntc < rx_ring->count) ? ntc : 0; 339 rx_ring->next_to_clean = ntc; 340 341 prefetch(WX_RX_DESC(rx_ring, ntc)); 342 343 /* if we are the last buffer then there is nothing else to do */ 344 if (likely(wx_test_staterr(rx_desc, WX_RXD_STAT_EOP))) 345 return false; 346 347 rx_ring->rx_buffer_info[ntc].skb = skb; 348 349 return true; 350 } 351 352 static void wx_pull_tail(struct sk_buff *skb) 353 { 354 skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; 355 unsigned int pull_len; 356 unsigned char *va; 357 358 /* it is valid to use page_address instead of kmap since we are 359 * working with pages allocated out of the lomem pool per 360 * alloc_page(GFP_ATOMIC) 361 */ 362 va = skb_frag_address(frag); 363 364 /* we need the header to contain the greater of either ETH_HLEN or 365 * 60 bytes if the skb->len is less than 60 for skb_pad. 366 */ 367 pull_len = eth_get_headlen(skb->dev, va, WX_RXBUFFER_256); 368 369 /* align pull length to size of long to optimize memcpy performance */ 370 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); 371 372 /* update all of the pointers */ 373 skb_frag_size_sub(frag, pull_len); 374 skb_frag_off_add(frag, pull_len); 375 skb->data_len -= pull_len; 376 skb->tail += pull_len; 377 } 378 379 /** 380 * wx_cleanup_headers - Correct corrupted or empty headers 381 * @rx_ring: rx descriptor ring packet is being transacted on 382 * @rx_desc: pointer to the EOP Rx descriptor 383 * @skb: pointer to current skb being fixed 384 * 385 * Check for corrupted packet headers caused by senders on the local L2 386 * embedded NIC switch not setting up their Tx Descriptors right. These 387 * should be very rare. 388 * 389 * Also address the case where we are pulling data in on pages only 390 * and as such no data is present in the skb header. 391 * 392 * In addition if skb is not at least 60 bytes we need to pad it so that 393 * it is large enough to qualify as a valid Ethernet frame. 394 * 395 * Returns true if an error was encountered and skb was freed. 396 **/ 397 static bool wx_cleanup_headers(struct wx_ring *rx_ring, 398 union wx_rx_desc *rx_desc, 399 struct sk_buff *skb) 400 { 401 struct net_device *netdev = rx_ring->netdev; 402 403 /* verify that the packet does not have any known errors */ 404 if (!netdev || 405 unlikely(wx_test_staterr(rx_desc, WX_RXD_ERR_RXE) && 406 !(netdev->features & NETIF_F_RXALL))) { 407 dev_kfree_skb_any(skb); 408 return true; 409 } 410 411 /* place header in linear portion of buffer */ 412 if (!skb_headlen(skb)) 413 wx_pull_tail(skb); 414 415 /* if eth_skb_pad returns an error the skb was freed */ 416 if (eth_skb_pad(skb)) 417 return true; 418 419 return false; 420 } 421 422 /** 423 * wx_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf 424 * @q_vector: structure containing interrupt and ring information 425 * @rx_ring: rx descriptor ring to transact packets on 426 * @budget: Total limit on number of packets to process 427 * 428 * This function provides a "bounce buffer" approach to Rx interrupt 429 * processing. The advantage to this is that on systems that have 430 * expensive overhead for IOMMU access this provides a means of avoiding 431 * it by maintaining the mapping of the page to the system. 432 * 433 * Returns amount of work completed. 434 **/ 435 static int wx_clean_rx_irq(struct wx_q_vector *q_vector, 436 struct wx_ring *rx_ring, 437 int budget) 438 { 439 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 440 u16 cleaned_count = wx_desc_unused(rx_ring); 441 442 do { 443 struct wx_rx_buffer *rx_buffer; 444 union wx_rx_desc *rx_desc; 445 struct sk_buff *skb; 446 int rx_buffer_pgcnt; 447 448 /* return some buffers to hardware, one at a time is too slow */ 449 if (cleaned_count >= WX_RX_BUFFER_WRITE) { 450 wx_alloc_rx_buffers(rx_ring, cleaned_count); 451 cleaned_count = 0; 452 } 453 454 rx_desc = WX_RX_DESC(rx_ring, rx_ring->next_to_clean); 455 if (!wx_test_staterr(rx_desc, WX_RXD_STAT_DD)) 456 break; 457 458 /* This memory barrier is needed to keep us from reading 459 * any other fields out of the rx_desc until we know the 460 * descriptor has been written back 461 */ 462 dma_rmb(); 463 464 rx_buffer = wx_get_rx_buffer(rx_ring, rx_desc, &skb, &rx_buffer_pgcnt); 465 466 /* retrieve a buffer from the ring */ 467 skb = wx_build_skb(rx_ring, rx_buffer, rx_desc); 468 469 /* exit if we failed to retrieve a buffer */ 470 if (!skb) { 471 rx_buffer->pagecnt_bias++; 472 break; 473 } 474 475 wx_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt); 476 cleaned_count++; 477 478 /* place incomplete frames back on ring for completion */ 479 if (wx_is_non_eop(rx_ring, rx_desc, skb)) 480 continue; 481 482 /* verify the packet layout is correct */ 483 if (wx_cleanup_headers(rx_ring, rx_desc, skb)) 484 continue; 485 486 /* probably a little skewed due to removing CRC */ 487 total_rx_bytes += skb->len; 488 489 skb_record_rx_queue(skb, rx_ring->queue_index); 490 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 491 napi_gro_receive(&q_vector->napi, skb); 492 493 /* update budget accounting */ 494 total_rx_packets++; 495 } while (likely(total_rx_packets < budget)); 496 497 u64_stats_update_begin(&rx_ring->syncp); 498 rx_ring->stats.packets += total_rx_packets; 499 rx_ring->stats.bytes += total_rx_bytes; 500 u64_stats_update_end(&rx_ring->syncp); 501 q_vector->rx.total_packets += total_rx_packets; 502 q_vector->rx.total_bytes += total_rx_bytes; 503 504 return total_rx_packets; 505 } 506 507 static struct netdev_queue *wx_txring_txq(const struct wx_ring *ring) 508 { 509 return netdev_get_tx_queue(ring->netdev, ring->queue_index); 510 } 511 512 /** 513 * wx_clean_tx_irq - Reclaim resources after transmit completes 514 * @q_vector: structure containing interrupt and ring information 515 * @tx_ring: tx ring to clean 516 * @napi_budget: Used to determine if we are in netpoll 517 **/ 518 static bool wx_clean_tx_irq(struct wx_q_vector *q_vector, 519 struct wx_ring *tx_ring, int napi_budget) 520 { 521 unsigned int budget = q_vector->wx->tx_work_limit; 522 unsigned int total_bytes = 0, total_packets = 0; 523 unsigned int i = tx_ring->next_to_clean; 524 struct wx_tx_buffer *tx_buffer; 525 union wx_tx_desc *tx_desc; 526 527 if (!netif_carrier_ok(tx_ring->netdev)) 528 return true; 529 530 tx_buffer = &tx_ring->tx_buffer_info[i]; 531 tx_desc = WX_TX_DESC(tx_ring, i); 532 i -= tx_ring->count; 533 534 do { 535 union wx_tx_desc *eop_desc = tx_buffer->next_to_watch; 536 537 /* if next_to_watch is not set then there is no work pending */ 538 if (!eop_desc) 539 break; 540 541 /* prevent any other reads prior to eop_desc */ 542 smp_rmb(); 543 544 /* if DD is not set pending work has not been completed */ 545 if (!(eop_desc->wb.status & cpu_to_le32(WX_TXD_STAT_DD))) 546 break; 547 548 /* clear next_to_watch to prevent false hangs */ 549 tx_buffer->next_to_watch = NULL; 550 551 /* update the statistics for this packet */ 552 total_bytes += tx_buffer->bytecount; 553 total_packets += tx_buffer->gso_segs; 554 555 /* free the skb */ 556 napi_consume_skb(tx_buffer->skb, napi_budget); 557 558 /* unmap skb header data */ 559 dma_unmap_single(tx_ring->dev, 560 dma_unmap_addr(tx_buffer, dma), 561 dma_unmap_len(tx_buffer, len), 562 DMA_TO_DEVICE); 563 564 /* clear tx_buffer data */ 565 dma_unmap_len_set(tx_buffer, len, 0); 566 567 /* unmap remaining buffers */ 568 while (tx_desc != eop_desc) { 569 tx_buffer++; 570 tx_desc++; 571 i++; 572 if (unlikely(!i)) { 573 i -= tx_ring->count; 574 tx_buffer = tx_ring->tx_buffer_info; 575 tx_desc = WX_TX_DESC(tx_ring, 0); 576 } 577 578 /* unmap any remaining paged data */ 579 if (dma_unmap_len(tx_buffer, len)) { 580 dma_unmap_page(tx_ring->dev, 581 dma_unmap_addr(tx_buffer, dma), 582 dma_unmap_len(tx_buffer, len), 583 DMA_TO_DEVICE); 584 dma_unmap_len_set(tx_buffer, len, 0); 585 } 586 } 587 588 /* move us one more past the eop_desc for start of next pkt */ 589 tx_buffer++; 590 tx_desc++; 591 i++; 592 if (unlikely(!i)) { 593 i -= tx_ring->count; 594 tx_buffer = tx_ring->tx_buffer_info; 595 tx_desc = WX_TX_DESC(tx_ring, 0); 596 } 597 598 /* issue prefetch for next Tx descriptor */ 599 prefetch(tx_desc); 600 601 /* update budget accounting */ 602 budget--; 603 } while (likely(budget)); 604 605 i += tx_ring->count; 606 tx_ring->next_to_clean = i; 607 u64_stats_update_begin(&tx_ring->syncp); 608 tx_ring->stats.bytes += total_bytes; 609 tx_ring->stats.packets += total_packets; 610 u64_stats_update_end(&tx_ring->syncp); 611 q_vector->tx.total_bytes += total_bytes; 612 q_vector->tx.total_packets += total_packets; 613 614 netdev_tx_completed_queue(wx_txring_txq(tx_ring), 615 total_packets, total_bytes); 616 617 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 618 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 619 (wx_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { 620 /* Make sure that anybody stopping the queue after this 621 * sees the new next_to_clean. 622 */ 623 smp_mb(); 624 625 if (__netif_subqueue_stopped(tx_ring->netdev, 626 tx_ring->queue_index) && 627 netif_running(tx_ring->netdev)) 628 netif_wake_subqueue(tx_ring->netdev, 629 tx_ring->queue_index); 630 } 631 632 return !!budget; 633 } 634 635 /** 636 * wx_poll - NAPI polling RX/TX cleanup routine 637 * @napi: napi struct with our devices info in it 638 * @budget: amount of work driver is allowed to do this pass, in packets 639 * 640 * This function will clean all queues associated with a q_vector. 641 **/ 642 static int wx_poll(struct napi_struct *napi, int budget) 643 { 644 struct wx_q_vector *q_vector = container_of(napi, struct wx_q_vector, napi); 645 int per_ring_budget, work_done = 0; 646 struct wx *wx = q_vector->wx; 647 bool clean_complete = true; 648 struct wx_ring *ring; 649 650 wx_for_each_ring(ring, q_vector->tx) { 651 if (!wx_clean_tx_irq(q_vector, ring, budget)) 652 clean_complete = false; 653 } 654 655 /* Exit if we are called by netpoll */ 656 if (budget <= 0) 657 return budget; 658 659 /* attempt to distribute budget to each queue fairly, but don't allow 660 * the budget to go below 1 because we'll exit polling 661 */ 662 if (q_vector->rx.count > 1) 663 per_ring_budget = max(budget / q_vector->rx.count, 1); 664 else 665 per_ring_budget = budget; 666 667 wx_for_each_ring(ring, q_vector->rx) { 668 int cleaned = wx_clean_rx_irq(q_vector, ring, per_ring_budget); 669 670 work_done += cleaned; 671 if (cleaned >= per_ring_budget) 672 clean_complete = false; 673 } 674 675 /* If all work not completed, return budget and keep polling */ 676 if (!clean_complete) 677 return budget; 678 679 /* all work done, exit the polling mode */ 680 if (likely(napi_complete_done(napi, work_done))) { 681 if (netif_running(wx->netdev)) 682 wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx)); 683 } 684 685 return min(work_done, budget - 1); 686 } 687 688 static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size) 689 { 690 if (likely(wx_desc_unused(tx_ring) >= size)) 691 return 0; 692 693 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 694 695 /* For the next check */ 696 smp_mb(); 697 698 /* We need to check again in a case another CPU has just 699 * made room available. 700 */ 701 if (likely(wx_desc_unused(tx_ring) < size)) 702 return -EBUSY; 703 704 /* A reprieve! - use start_queue because it doesn't call schedule */ 705 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 706 707 return 0; 708 } 709 710 static void wx_tx_map(struct wx_ring *tx_ring, 711 struct wx_tx_buffer *first) 712 { 713 struct sk_buff *skb = first->skb; 714 struct wx_tx_buffer *tx_buffer; 715 u16 i = tx_ring->next_to_use; 716 unsigned int data_len, size; 717 union wx_tx_desc *tx_desc; 718 skb_frag_t *frag; 719 dma_addr_t dma; 720 u32 cmd_type; 721 722 cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS; 723 tx_desc = WX_TX_DESC(tx_ring, i); 724 725 tx_desc->read.olinfo_status = cpu_to_le32(skb->len << WX_TXD_PAYLEN_SHIFT); 726 727 size = skb_headlen(skb); 728 data_len = skb->data_len; 729 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 730 731 tx_buffer = first; 732 733 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 734 if (dma_mapping_error(tx_ring->dev, dma)) 735 goto dma_error; 736 737 /* record length, and DMA address */ 738 dma_unmap_len_set(tx_buffer, len, size); 739 dma_unmap_addr_set(tx_buffer, dma, dma); 740 741 tx_desc->read.buffer_addr = cpu_to_le64(dma); 742 743 while (unlikely(size > WX_MAX_DATA_PER_TXD)) { 744 tx_desc->read.cmd_type_len = 745 cpu_to_le32(cmd_type ^ WX_MAX_DATA_PER_TXD); 746 747 i++; 748 tx_desc++; 749 if (i == tx_ring->count) { 750 tx_desc = WX_TX_DESC(tx_ring, 0); 751 i = 0; 752 } 753 tx_desc->read.olinfo_status = 0; 754 755 dma += WX_MAX_DATA_PER_TXD; 756 size -= WX_MAX_DATA_PER_TXD; 757 758 tx_desc->read.buffer_addr = cpu_to_le64(dma); 759 } 760 761 if (likely(!data_len)) 762 break; 763 764 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 765 766 i++; 767 tx_desc++; 768 if (i == tx_ring->count) { 769 tx_desc = WX_TX_DESC(tx_ring, 0); 770 i = 0; 771 } 772 tx_desc->read.olinfo_status = 0; 773 774 size = skb_frag_size(frag); 775 776 data_len -= size; 777 778 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 779 DMA_TO_DEVICE); 780 781 tx_buffer = &tx_ring->tx_buffer_info[i]; 782 } 783 784 /* write last descriptor with RS and EOP bits */ 785 cmd_type |= size | WX_TXD_EOP | WX_TXD_RS; 786 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 787 788 netdev_tx_sent_queue(wx_txring_txq(tx_ring), first->bytecount); 789 790 skb_tx_timestamp(skb); 791 792 /* Force memory writes to complete before letting h/w know there 793 * are new descriptors to fetch. (Only applicable for weak-ordered 794 * memory model archs, such as IA-64). 795 * 796 * We also need this memory barrier to make certain all of the 797 * status bits have been updated before next_to_watch is written. 798 */ 799 wmb(); 800 801 /* set next_to_watch value indicating a packet is present */ 802 first->next_to_watch = tx_desc; 803 804 i++; 805 if (i == tx_ring->count) 806 i = 0; 807 808 tx_ring->next_to_use = i; 809 810 wx_maybe_stop_tx(tx_ring, DESC_NEEDED); 811 812 if (netif_xmit_stopped(wx_txring_txq(tx_ring)) || !netdev_xmit_more()) 813 writel(i, tx_ring->tail); 814 815 return; 816 dma_error: 817 dev_err(tx_ring->dev, "TX DMA map failed\n"); 818 819 /* clear dma mappings for failed tx_buffer_info map */ 820 for (;;) { 821 tx_buffer = &tx_ring->tx_buffer_info[i]; 822 if (dma_unmap_len(tx_buffer, len)) 823 dma_unmap_page(tx_ring->dev, 824 dma_unmap_addr(tx_buffer, dma), 825 dma_unmap_len(tx_buffer, len), 826 DMA_TO_DEVICE); 827 dma_unmap_len_set(tx_buffer, len, 0); 828 if (tx_buffer == first) 829 break; 830 if (i == 0) 831 i += tx_ring->count; 832 i--; 833 } 834 835 dev_kfree_skb_any(first->skb); 836 first->skb = NULL; 837 838 tx_ring->next_to_use = i; 839 } 840 841 static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb, 842 struct wx_ring *tx_ring) 843 { 844 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 845 struct wx_tx_buffer *first; 846 unsigned short f; 847 848 /* need: 1 descriptor per page * PAGE_SIZE/WX_MAX_DATA_PER_TXD, 849 * + 1 desc for skb_headlen/WX_MAX_DATA_PER_TXD, 850 * + 2 desc gap to keep tail from touching head, 851 * + 1 desc for context descriptor, 852 * otherwise try next time 853 */ 854 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 855 count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)-> 856 frags[f])); 857 858 if (wx_maybe_stop_tx(tx_ring, count + 3)) 859 return NETDEV_TX_BUSY; 860 861 /* record the location of the first descriptor for this packet */ 862 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 863 first->skb = skb; 864 first->bytecount = skb->len; 865 first->gso_segs = 1; 866 867 wx_tx_map(tx_ring, first); 868 869 return NETDEV_TX_OK; 870 } 871 872 netdev_tx_t wx_xmit_frame(struct sk_buff *skb, 873 struct net_device *netdev) 874 { 875 unsigned int r_idx = skb->queue_mapping; 876 struct wx *wx = netdev_priv(netdev); 877 struct wx_ring *tx_ring; 878 879 if (!netif_carrier_ok(netdev)) { 880 dev_kfree_skb_any(skb); 881 return NETDEV_TX_OK; 882 } 883 884 /* The minimum packet size for olinfo paylen is 17 so pad the skb 885 * in order to meet this minimum size requirement. 886 */ 887 if (skb_put_padto(skb, 17)) 888 return NETDEV_TX_OK; 889 890 if (r_idx >= wx->num_tx_queues) 891 r_idx = r_idx % wx->num_tx_queues; 892 tx_ring = wx->tx_ring[r_idx]; 893 894 return wx_xmit_frame_ring(skb, tx_ring); 895 } 896 EXPORT_SYMBOL(wx_xmit_frame); 897 898 void wx_napi_enable_all(struct wx *wx) 899 { 900 struct wx_q_vector *q_vector; 901 int q_idx; 902 903 for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) { 904 q_vector = wx->q_vector[q_idx]; 905 napi_enable(&q_vector->napi); 906 } 907 } 908 EXPORT_SYMBOL(wx_napi_enable_all); 909 910 void wx_napi_disable_all(struct wx *wx) 911 { 912 struct wx_q_vector *q_vector; 913 int q_idx; 914 915 for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) { 916 q_vector = wx->q_vector[q_idx]; 917 napi_disable(&q_vector->napi); 918 } 919 } 920 EXPORT_SYMBOL(wx_napi_disable_all); 921 922 /** 923 * wx_set_rss_queues: Allocate queues for RSS 924 * @wx: board private structure to initialize 925 * 926 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try 927 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. 928 * 929 **/ 930 static void wx_set_rss_queues(struct wx *wx) 931 { 932 wx->num_rx_queues = wx->mac.max_rx_queues; 933 wx->num_tx_queues = wx->mac.max_tx_queues; 934 } 935 936 static void wx_set_num_queues(struct wx *wx) 937 { 938 /* Start with base case */ 939 wx->num_rx_queues = 1; 940 wx->num_tx_queues = 1; 941 wx->queues_per_pool = 1; 942 943 wx_set_rss_queues(wx); 944 } 945 946 /** 947 * wx_acquire_msix_vectors - acquire MSI-X vectors 948 * @wx: board private structure 949 * 950 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will 951 * return a negative error code if unable to acquire MSI-X vectors for any 952 * reason. 953 */ 954 static int wx_acquire_msix_vectors(struct wx *wx) 955 { 956 struct irq_affinity affd = {0, }; 957 int nvecs, i; 958 959 nvecs = min_t(int, num_online_cpus(), wx->mac.max_msix_vectors); 960 961 wx->msix_entries = kcalloc(nvecs, 962 sizeof(struct msix_entry), 963 GFP_KERNEL); 964 if (!wx->msix_entries) 965 return -ENOMEM; 966 967 nvecs = pci_alloc_irq_vectors_affinity(wx->pdev, nvecs, 968 nvecs, 969 PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, 970 &affd); 971 if (nvecs < 0) { 972 wx_err(wx, "Failed to allocate MSI-X interrupts. Err: %d\n", nvecs); 973 kfree(wx->msix_entries); 974 wx->msix_entries = NULL; 975 return nvecs; 976 } 977 978 for (i = 0; i < nvecs; i++) { 979 wx->msix_entries[i].entry = i; 980 wx->msix_entries[i].vector = pci_irq_vector(wx->pdev, i); 981 } 982 983 /* one for msix_other */ 984 nvecs -= 1; 985 wx->num_q_vectors = nvecs; 986 wx->num_rx_queues = nvecs; 987 wx->num_tx_queues = nvecs; 988 989 return 0; 990 } 991 992 /** 993 * wx_set_interrupt_capability - set MSI-X or MSI if supported 994 * @wx: board private structure to initialize 995 * 996 * Attempt to configure the interrupts using the best available 997 * capabilities of the hardware and the kernel. 998 **/ 999 static int wx_set_interrupt_capability(struct wx *wx) 1000 { 1001 struct pci_dev *pdev = wx->pdev; 1002 int nvecs, ret; 1003 1004 /* We will try to get MSI-X interrupts first */ 1005 ret = wx_acquire_msix_vectors(wx); 1006 if (ret == 0 || (ret == -ENOMEM)) 1007 return ret; 1008 1009 wx->num_rx_queues = 1; 1010 wx->num_tx_queues = 1; 1011 wx->num_q_vectors = 1; 1012 1013 /* minmum one for queue, one for misc*/ 1014 nvecs = 1; 1015 nvecs = pci_alloc_irq_vectors(pdev, nvecs, 1016 nvecs, PCI_IRQ_MSI | PCI_IRQ_LEGACY); 1017 if (nvecs == 1) { 1018 if (pdev->msi_enabled) 1019 wx_err(wx, "Fallback to MSI.\n"); 1020 else 1021 wx_err(wx, "Fallback to LEGACY.\n"); 1022 } else { 1023 wx_err(wx, "Failed to allocate MSI/LEGACY interrupts. Error: %d\n", nvecs); 1024 return nvecs; 1025 } 1026 1027 pdev->irq = pci_irq_vector(pdev, 0); 1028 1029 return 0; 1030 } 1031 1032 /** 1033 * wx_cache_ring_rss - Descriptor ring to register mapping for RSS 1034 * @wx: board private structure to initialize 1035 * 1036 * Cache the descriptor ring offsets for RSS, ATR, FCoE, and SR-IOV. 1037 * 1038 **/ 1039 static void wx_cache_ring_rss(struct wx *wx) 1040 { 1041 u16 i; 1042 1043 for (i = 0; i < wx->num_rx_queues; i++) 1044 wx->rx_ring[i]->reg_idx = i; 1045 1046 for (i = 0; i < wx->num_tx_queues; i++) 1047 wx->tx_ring[i]->reg_idx = i; 1048 } 1049 1050 static void wx_add_ring(struct wx_ring *ring, struct wx_ring_container *head) 1051 { 1052 ring->next = head->ring; 1053 head->ring = ring; 1054 head->count++; 1055 } 1056 1057 /** 1058 * wx_alloc_q_vector - Allocate memory for a single interrupt vector 1059 * @wx: board private structure to initialize 1060 * @v_count: q_vectors allocated on wx, used for ring interleaving 1061 * @v_idx: index of vector in wx struct 1062 * @txr_count: total number of Tx rings to allocate 1063 * @txr_idx: index of first Tx ring to allocate 1064 * @rxr_count: total number of Rx rings to allocate 1065 * @rxr_idx: index of first Rx ring to allocate 1066 * 1067 * We allocate one q_vector. If allocation fails we return -ENOMEM. 1068 **/ 1069 static int wx_alloc_q_vector(struct wx *wx, 1070 unsigned int v_count, unsigned int v_idx, 1071 unsigned int txr_count, unsigned int txr_idx, 1072 unsigned int rxr_count, unsigned int rxr_idx) 1073 { 1074 struct wx_q_vector *q_vector; 1075 int ring_count, default_itr; 1076 struct wx_ring *ring; 1077 1078 /* note this will allocate space for the ring structure as well! */ 1079 ring_count = txr_count + rxr_count; 1080 1081 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 1082 GFP_KERNEL); 1083 if (!q_vector) 1084 return -ENOMEM; 1085 1086 /* initialize NAPI */ 1087 netif_napi_add(wx->netdev, &q_vector->napi, 1088 wx_poll); 1089 1090 /* tie q_vector and wx together */ 1091 wx->q_vector[v_idx] = q_vector; 1092 q_vector->wx = wx; 1093 q_vector->v_idx = v_idx; 1094 if (cpu_online(v_idx)) 1095 q_vector->numa_node = cpu_to_node(v_idx); 1096 1097 /* initialize pointer to rings */ 1098 ring = q_vector->ring; 1099 1100 if (wx->mac.type == wx_mac_sp) 1101 default_itr = WX_12K_ITR; 1102 else 1103 default_itr = WX_7K_ITR; 1104 /* initialize ITR */ 1105 if (txr_count && !rxr_count) 1106 /* tx only vector */ 1107 q_vector->itr = wx->tx_itr_setting ? 1108 default_itr : wx->tx_itr_setting; 1109 else 1110 /* rx or rx/tx vector */ 1111 q_vector->itr = wx->rx_itr_setting ? 1112 default_itr : wx->rx_itr_setting; 1113 1114 while (txr_count) { 1115 /* assign generic ring traits */ 1116 ring->dev = &wx->pdev->dev; 1117 ring->netdev = wx->netdev; 1118 1119 /* configure backlink on ring */ 1120 ring->q_vector = q_vector; 1121 1122 /* update q_vector Tx values */ 1123 wx_add_ring(ring, &q_vector->tx); 1124 1125 /* apply Tx specific ring traits */ 1126 ring->count = wx->tx_ring_count; 1127 1128 ring->queue_index = txr_idx; 1129 1130 /* assign ring to wx */ 1131 wx->tx_ring[txr_idx] = ring; 1132 1133 /* update count and index */ 1134 txr_count--; 1135 txr_idx += v_count; 1136 1137 /* push pointer to next ring */ 1138 ring++; 1139 } 1140 1141 while (rxr_count) { 1142 /* assign generic ring traits */ 1143 ring->dev = &wx->pdev->dev; 1144 ring->netdev = wx->netdev; 1145 1146 /* configure backlink on ring */ 1147 ring->q_vector = q_vector; 1148 1149 /* update q_vector Rx values */ 1150 wx_add_ring(ring, &q_vector->rx); 1151 1152 /* apply Rx specific ring traits */ 1153 ring->count = wx->rx_ring_count; 1154 ring->queue_index = rxr_idx; 1155 1156 /* assign ring to wx */ 1157 wx->rx_ring[rxr_idx] = ring; 1158 1159 /* update count and index */ 1160 rxr_count--; 1161 rxr_idx += v_count; 1162 1163 /* push pointer to next ring */ 1164 ring++; 1165 } 1166 1167 return 0; 1168 } 1169 1170 /** 1171 * wx_free_q_vector - Free memory allocated for specific interrupt vector 1172 * @wx: board private structure to initialize 1173 * @v_idx: Index of vector to be freed 1174 * 1175 * This function frees the memory allocated to the q_vector. In addition if 1176 * NAPI is enabled it will delete any references to the NAPI struct prior 1177 * to freeing the q_vector. 1178 **/ 1179 static void wx_free_q_vector(struct wx *wx, int v_idx) 1180 { 1181 struct wx_q_vector *q_vector = wx->q_vector[v_idx]; 1182 struct wx_ring *ring; 1183 1184 wx_for_each_ring(ring, q_vector->tx) 1185 wx->tx_ring[ring->queue_index] = NULL; 1186 1187 wx_for_each_ring(ring, q_vector->rx) 1188 wx->rx_ring[ring->queue_index] = NULL; 1189 1190 wx->q_vector[v_idx] = NULL; 1191 netif_napi_del(&q_vector->napi); 1192 kfree_rcu(q_vector, rcu); 1193 } 1194 1195 /** 1196 * wx_alloc_q_vectors - Allocate memory for interrupt vectors 1197 * @wx: board private structure to initialize 1198 * 1199 * We allocate one q_vector per queue interrupt. If allocation fails we 1200 * return -ENOMEM. 1201 **/ 1202 static int wx_alloc_q_vectors(struct wx *wx) 1203 { 1204 unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0; 1205 unsigned int rxr_remaining = wx->num_rx_queues; 1206 unsigned int txr_remaining = wx->num_tx_queues; 1207 unsigned int q_vectors = wx->num_q_vectors; 1208 int rqpv, tqpv; 1209 int err; 1210 1211 for (; v_idx < q_vectors; v_idx++) { 1212 rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 1213 tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 1214 err = wx_alloc_q_vector(wx, q_vectors, v_idx, 1215 tqpv, txr_idx, 1216 rqpv, rxr_idx); 1217 1218 if (err) 1219 goto err_out; 1220 1221 /* update counts and index */ 1222 rxr_remaining -= rqpv; 1223 txr_remaining -= tqpv; 1224 rxr_idx++; 1225 txr_idx++; 1226 } 1227 1228 return 0; 1229 1230 err_out: 1231 wx->num_tx_queues = 0; 1232 wx->num_rx_queues = 0; 1233 wx->num_q_vectors = 0; 1234 1235 while (v_idx--) 1236 wx_free_q_vector(wx, v_idx); 1237 1238 return -ENOMEM; 1239 } 1240 1241 /** 1242 * wx_free_q_vectors - Free memory allocated for interrupt vectors 1243 * @wx: board private structure to initialize 1244 * 1245 * This function frees the memory allocated to the q_vectors. In addition if 1246 * NAPI is enabled it will delete any references to the NAPI struct prior 1247 * to freeing the q_vector. 1248 **/ 1249 static void wx_free_q_vectors(struct wx *wx) 1250 { 1251 int v_idx = wx->num_q_vectors; 1252 1253 wx->num_tx_queues = 0; 1254 wx->num_rx_queues = 0; 1255 wx->num_q_vectors = 0; 1256 1257 while (v_idx--) 1258 wx_free_q_vector(wx, v_idx); 1259 } 1260 1261 void wx_reset_interrupt_capability(struct wx *wx) 1262 { 1263 struct pci_dev *pdev = wx->pdev; 1264 1265 if (!pdev->msi_enabled && !pdev->msix_enabled) 1266 return; 1267 1268 pci_free_irq_vectors(wx->pdev); 1269 if (pdev->msix_enabled) { 1270 kfree(wx->msix_entries); 1271 wx->msix_entries = NULL; 1272 } 1273 } 1274 EXPORT_SYMBOL(wx_reset_interrupt_capability); 1275 1276 /** 1277 * wx_clear_interrupt_scheme - Clear the current interrupt scheme settings 1278 * @wx: board private structure to clear interrupt scheme on 1279 * 1280 * We go through and clear interrupt specific resources and reset the structure 1281 * to pre-load conditions 1282 **/ 1283 void wx_clear_interrupt_scheme(struct wx *wx) 1284 { 1285 wx_free_q_vectors(wx); 1286 wx_reset_interrupt_capability(wx); 1287 } 1288 EXPORT_SYMBOL(wx_clear_interrupt_scheme); 1289 1290 int wx_init_interrupt_scheme(struct wx *wx) 1291 { 1292 int ret; 1293 1294 /* Number of supported queues */ 1295 wx_set_num_queues(wx); 1296 1297 /* Set interrupt mode */ 1298 ret = wx_set_interrupt_capability(wx); 1299 if (ret) { 1300 wx_err(wx, "Allocate irq vectors for failed.\n"); 1301 return ret; 1302 } 1303 1304 /* Allocate memory for queues */ 1305 ret = wx_alloc_q_vectors(wx); 1306 if (ret) { 1307 wx_err(wx, "Unable to allocate memory for queue vectors.\n"); 1308 wx_reset_interrupt_capability(wx); 1309 return ret; 1310 } 1311 1312 wx_cache_ring_rss(wx); 1313 1314 return 0; 1315 } 1316 EXPORT_SYMBOL(wx_init_interrupt_scheme); 1317 1318 irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data) 1319 { 1320 struct wx_q_vector *q_vector = data; 1321 1322 /* EIAM disabled interrupts (on this vector) for us */ 1323 if (q_vector->rx.ring || q_vector->tx.ring) 1324 napi_schedule_irqoff(&q_vector->napi); 1325 1326 return IRQ_HANDLED; 1327 } 1328 EXPORT_SYMBOL(wx_msix_clean_rings); 1329 1330 void wx_free_irq(struct wx *wx) 1331 { 1332 struct pci_dev *pdev = wx->pdev; 1333 int vector; 1334 1335 if (!(pdev->msix_enabled)) { 1336 free_irq(pdev->irq, wx); 1337 return; 1338 } 1339 1340 for (vector = 0; vector < wx->num_q_vectors; vector++) { 1341 struct wx_q_vector *q_vector = wx->q_vector[vector]; 1342 struct msix_entry *entry = &wx->msix_entries[vector]; 1343 1344 /* free only the irqs that were actually requested */ 1345 if (!q_vector->rx.ring && !q_vector->tx.ring) 1346 continue; 1347 1348 free_irq(entry->vector, q_vector); 1349 } 1350 1351 free_irq(wx->msix_entries[vector].vector, wx); 1352 } 1353 EXPORT_SYMBOL(wx_free_irq); 1354 1355 /** 1356 * wx_setup_isb_resources - allocate interrupt status resources 1357 * @wx: board private structure 1358 * 1359 * Return 0 on success, negative on failure 1360 **/ 1361 int wx_setup_isb_resources(struct wx *wx) 1362 { 1363 struct pci_dev *pdev = wx->pdev; 1364 1365 wx->isb_mem = dma_alloc_coherent(&pdev->dev, 1366 sizeof(u32) * 4, 1367 &wx->isb_dma, 1368 GFP_KERNEL); 1369 if (!wx->isb_mem) { 1370 wx_err(wx, "Alloc isb_mem failed\n"); 1371 return -ENOMEM; 1372 } 1373 1374 return 0; 1375 } 1376 EXPORT_SYMBOL(wx_setup_isb_resources); 1377 1378 /** 1379 * wx_free_isb_resources - allocate all queues Rx resources 1380 * @wx: board private structure 1381 * 1382 * Return 0 on success, negative on failure 1383 **/ 1384 void wx_free_isb_resources(struct wx *wx) 1385 { 1386 struct pci_dev *pdev = wx->pdev; 1387 1388 dma_free_coherent(&pdev->dev, sizeof(u32) * 4, 1389 wx->isb_mem, wx->isb_dma); 1390 wx->isb_mem = NULL; 1391 } 1392 EXPORT_SYMBOL(wx_free_isb_resources); 1393 1394 u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx) 1395 { 1396 u32 cur_tag = 0; 1397 1398 cur_tag = wx->isb_mem[WX_ISB_HEADER]; 1399 wx->isb_tag[idx] = cur_tag; 1400 1401 return (__force u32)cpu_to_le32(wx->isb_mem[idx]); 1402 } 1403 EXPORT_SYMBOL(wx_misc_isb); 1404 1405 /** 1406 * wx_set_ivar - set the IVAR registers, mapping interrupt causes to vectors 1407 * @wx: pointer to wx struct 1408 * @direction: 0 for Rx, 1 for Tx, -1 for other causes 1409 * @queue: queue to map the corresponding interrupt to 1410 * @msix_vector: the vector to map to the corresponding queue 1411 * 1412 **/ 1413 static void wx_set_ivar(struct wx *wx, s8 direction, 1414 u16 queue, u16 msix_vector) 1415 { 1416 u32 ivar, index; 1417 1418 if (direction == -1) { 1419 /* other causes */ 1420 msix_vector |= WX_PX_IVAR_ALLOC_VAL; 1421 index = 0; 1422 ivar = rd32(wx, WX_PX_MISC_IVAR); 1423 ivar &= ~(0xFF << index); 1424 ivar |= (msix_vector << index); 1425 wr32(wx, WX_PX_MISC_IVAR, ivar); 1426 } else { 1427 /* tx or rx causes */ 1428 msix_vector |= WX_PX_IVAR_ALLOC_VAL; 1429 index = ((16 * (queue & 1)) + (8 * direction)); 1430 ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); 1431 ivar &= ~(0xFF << index); 1432 ivar |= (msix_vector << index); 1433 wr32(wx, WX_PX_IVAR(queue >> 1), ivar); 1434 } 1435 } 1436 1437 /** 1438 * wx_write_eitr - write EITR register in hardware specific way 1439 * @q_vector: structure containing interrupt and ring information 1440 * 1441 * This function is made to be called by ethtool and by the driver 1442 * when it needs to update EITR registers at runtime. Hardware 1443 * specific quirks/differences are taken care of here. 1444 */ 1445 static void wx_write_eitr(struct wx_q_vector *q_vector) 1446 { 1447 struct wx *wx = q_vector->wx; 1448 int v_idx = q_vector->v_idx; 1449 u32 itr_reg; 1450 1451 if (wx->mac.type == wx_mac_sp) 1452 itr_reg = q_vector->itr & WX_SP_MAX_EITR; 1453 else 1454 itr_reg = q_vector->itr & WX_EM_MAX_EITR; 1455 1456 itr_reg |= WX_PX_ITR_CNT_WDIS; 1457 1458 wr32(wx, WX_PX_ITR(v_idx), itr_reg); 1459 } 1460 1461 /** 1462 * wx_configure_vectors - Configure vectors for hardware 1463 * @wx: board private structure 1464 * 1465 * wx_configure_vectors sets up the hardware to properly generate MSI-X/MSI/LEGACY 1466 * interrupts. 1467 **/ 1468 void wx_configure_vectors(struct wx *wx) 1469 { 1470 struct pci_dev *pdev = wx->pdev; 1471 u32 eitrsel = 0; 1472 u16 v_idx; 1473 1474 if (pdev->msix_enabled) { 1475 /* Populate MSIX to EITR Select */ 1476 wr32(wx, WX_PX_ITRSEL, eitrsel); 1477 /* use EIAM to auto-mask when MSI-X interrupt is asserted 1478 * this saves a register write for every interrupt 1479 */ 1480 wr32(wx, WX_PX_GPIE, WX_PX_GPIE_MODEL); 1481 } else { 1482 /* legacy interrupts, use EIAM to auto-mask when reading EICR, 1483 * specifically only auto mask tx and rx interrupts. 1484 */ 1485 wr32(wx, WX_PX_GPIE, 0); 1486 } 1487 1488 /* Populate the IVAR table and set the ITR values to the 1489 * corresponding register. 1490 */ 1491 for (v_idx = 0; v_idx < wx->num_q_vectors; v_idx++) { 1492 struct wx_q_vector *q_vector = wx->q_vector[v_idx]; 1493 struct wx_ring *ring; 1494 1495 wx_for_each_ring(ring, q_vector->rx) 1496 wx_set_ivar(wx, 0, ring->reg_idx, v_idx); 1497 1498 wx_for_each_ring(ring, q_vector->tx) 1499 wx_set_ivar(wx, 1, ring->reg_idx, v_idx); 1500 1501 wx_write_eitr(q_vector); 1502 } 1503 1504 wx_set_ivar(wx, -1, 0, v_idx); 1505 if (pdev->msix_enabled) 1506 wr32(wx, WX_PX_ITR(v_idx), 1950); 1507 } 1508 EXPORT_SYMBOL(wx_configure_vectors); 1509 1510 /** 1511 * wx_clean_rx_ring - Free Rx Buffers per Queue 1512 * @rx_ring: ring to free buffers from 1513 **/ 1514 static void wx_clean_rx_ring(struct wx_ring *rx_ring) 1515 { 1516 struct wx_rx_buffer *rx_buffer; 1517 u16 i = rx_ring->next_to_clean; 1518 1519 rx_buffer = &rx_ring->rx_buffer_info[i]; 1520 1521 /* Free all the Rx ring sk_buffs */ 1522 while (i != rx_ring->next_to_alloc) { 1523 if (rx_buffer->skb) { 1524 struct sk_buff *skb = rx_buffer->skb; 1525 1526 if (WX_CB(skb)->page_released) 1527 page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); 1528 1529 dev_kfree_skb(skb); 1530 } 1531 1532 /* Invalidate cache lines that may have been written to by 1533 * device so that we avoid corrupting memory. 1534 */ 1535 dma_sync_single_range_for_cpu(rx_ring->dev, 1536 rx_buffer->dma, 1537 rx_buffer->page_offset, 1538 WX_RX_BUFSZ, 1539 DMA_FROM_DEVICE); 1540 1541 /* free resources associated with mapping */ 1542 page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); 1543 __page_frag_cache_drain(rx_buffer->page, 1544 rx_buffer->pagecnt_bias); 1545 1546 i++; 1547 rx_buffer++; 1548 if (i == rx_ring->count) { 1549 i = 0; 1550 rx_buffer = rx_ring->rx_buffer_info; 1551 } 1552 } 1553 1554 rx_ring->next_to_alloc = 0; 1555 rx_ring->next_to_clean = 0; 1556 rx_ring->next_to_use = 0; 1557 } 1558 1559 /** 1560 * wx_clean_all_rx_rings - Free Rx Buffers for all queues 1561 * @wx: board private structure 1562 **/ 1563 void wx_clean_all_rx_rings(struct wx *wx) 1564 { 1565 int i; 1566 1567 for (i = 0; i < wx->num_rx_queues; i++) 1568 wx_clean_rx_ring(wx->rx_ring[i]); 1569 } 1570 EXPORT_SYMBOL(wx_clean_all_rx_rings); 1571 1572 /** 1573 * wx_free_rx_resources - Free Rx Resources 1574 * @rx_ring: ring to clean the resources from 1575 * 1576 * Free all receive software resources 1577 **/ 1578 static void wx_free_rx_resources(struct wx_ring *rx_ring) 1579 { 1580 wx_clean_rx_ring(rx_ring); 1581 kvfree(rx_ring->rx_buffer_info); 1582 rx_ring->rx_buffer_info = NULL; 1583 1584 /* if not set, then don't free */ 1585 if (!rx_ring->desc) 1586 return; 1587 1588 dma_free_coherent(rx_ring->dev, rx_ring->size, 1589 rx_ring->desc, rx_ring->dma); 1590 1591 rx_ring->desc = NULL; 1592 1593 if (rx_ring->page_pool) { 1594 page_pool_destroy(rx_ring->page_pool); 1595 rx_ring->page_pool = NULL; 1596 } 1597 } 1598 1599 /** 1600 * wx_free_all_rx_resources - Free Rx Resources for All Queues 1601 * @wx: pointer to hardware structure 1602 * 1603 * Free all receive software resources 1604 **/ 1605 static void wx_free_all_rx_resources(struct wx *wx) 1606 { 1607 int i; 1608 1609 for (i = 0; i < wx->num_rx_queues; i++) 1610 wx_free_rx_resources(wx->rx_ring[i]); 1611 } 1612 1613 /** 1614 * wx_clean_tx_ring - Free Tx Buffers 1615 * @tx_ring: ring to be cleaned 1616 **/ 1617 static void wx_clean_tx_ring(struct wx_ring *tx_ring) 1618 { 1619 struct wx_tx_buffer *tx_buffer; 1620 u16 i = tx_ring->next_to_clean; 1621 1622 tx_buffer = &tx_ring->tx_buffer_info[i]; 1623 1624 while (i != tx_ring->next_to_use) { 1625 union wx_tx_desc *eop_desc, *tx_desc; 1626 1627 /* Free all the Tx ring sk_buffs */ 1628 dev_kfree_skb_any(tx_buffer->skb); 1629 1630 /* unmap skb header data */ 1631 dma_unmap_single(tx_ring->dev, 1632 dma_unmap_addr(tx_buffer, dma), 1633 dma_unmap_len(tx_buffer, len), 1634 DMA_TO_DEVICE); 1635 1636 /* check for eop_desc to determine the end of the packet */ 1637 eop_desc = tx_buffer->next_to_watch; 1638 tx_desc = WX_TX_DESC(tx_ring, i); 1639 1640 /* unmap remaining buffers */ 1641 while (tx_desc != eop_desc) { 1642 tx_buffer++; 1643 tx_desc++; 1644 i++; 1645 if (unlikely(i == tx_ring->count)) { 1646 i = 0; 1647 tx_buffer = tx_ring->tx_buffer_info; 1648 tx_desc = WX_TX_DESC(tx_ring, 0); 1649 } 1650 1651 /* unmap any remaining paged data */ 1652 if (dma_unmap_len(tx_buffer, len)) 1653 dma_unmap_page(tx_ring->dev, 1654 dma_unmap_addr(tx_buffer, dma), 1655 dma_unmap_len(tx_buffer, len), 1656 DMA_TO_DEVICE); 1657 } 1658 1659 /* move us one more past the eop_desc for start of next pkt */ 1660 tx_buffer++; 1661 i++; 1662 if (unlikely(i == tx_ring->count)) { 1663 i = 0; 1664 tx_buffer = tx_ring->tx_buffer_info; 1665 } 1666 } 1667 1668 netdev_tx_reset_queue(wx_txring_txq(tx_ring)); 1669 1670 /* reset next_to_use and next_to_clean */ 1671 tx_ring->next_to_use = 0; 1672 tx_ring->next_to_clean = 0; 1673 } 1674 1675 /** 1676 * wx_clean_all_tx_rings - Free Tx Buffers for all queues 1677 * @wx: board private structure 1678 **/ 1679 void wx_clean_all_tx_rings(struct wx *wx) 1680 { 1681 int i; 1682 1683 for (i = 0; i < wx->num_tx_queues; i++) 1684 wx_clean_tx_ring(wx->tx_ring[i]); 1685 } 1686 EXPORT_SYMBOL(wx_clean_all_tx_rings); 1687 1688 /** 1689 * wx_free_tx_resources - Free Tx Resources per Queue 1690 * @tx_ring: Tx descriptor ring for a specific queue 1691 * 1692 * Free all transmit software resources 1693 **/ 1694 static void wx_free_tx_resources(struct wx_ring *tx_ring) 1695 { 1696 wx_clean_tx_ring(tx_ring); 1697 kvfree(tx_ring->tx_buffer_info); 1698 tx_ring->tx_buffer_info = NULL; 1699 1700 /* if not set, then don't free */ 1701 if (!tx_ring->desc) 1702 return; 1703 1704 dma_free_coherent(tx_ring->dev, tx_ring->size, 1705 tx_ring->desc, tx_ring->dma); 1706 tx_ring->desc = NULL; 1707 } 1708 1709 /** 1710 * wx_free_all_tx_resources - Free Tx Resources for All Queues 1711 * @wx: pointer to hardware structure 1712 * 1713 * Free all transmit software resources 1714 **/ 1715 static void wx_free_all_tx_resources(struct wx *wx) 1716 { 1717 int i; 1718 1719 for (i = 0; i < wx->num_tx_queues; i++) 1720 wx_free_tx_resources(wx->tx_ring[i]); 1721 } 1722 1723 void wx_free_resources(struct wx *wx) 1724 { 1725 wx_free_isb_resources(wx); 1726 wx_free_all_rx_resources(wx); 1727 wx_free_all_tx_resources(wx); 1728 } 1729 EXPORT_SYMBOL(wx_free_resources); 1730 1731 static int wx_alloc_page_pool(struct wx_ring *rx_ring) 1732 { 1733 int ret = 0; 1734 1735 struct page_pool_params pp_params = { 1736 .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, 1737 .order = 0, 1738 .pool_size = rx_ring->size, 1739 .nid = dev_to_node(rx_ring->dev), 1740 .dev = rx_ring->dev, 1741 .dma_dir = DMA_FROM_DEVICE, 1742 .offset = 0, 1743 .max_len = PAGE_SIZE, 1744 }; 1745 1746 rx_ring->page_pool = page_pool_create(&pp_params); 1747 if (IS_ERR(rx_ring->page_pool)) { 1748 ret = PTR_ERR(rx_ring->page_pool); 1749 rx_ring->page_pool = NULL; 1750 } 1751 1752 return ret; 1753 } 1754 1755 /** 1756 * wx_setup_rx_resources - allocate Rx resources (Descriptors) 1757 * @rx_ring: rx descriptor ring (for a specific queue) to setup 1758 * 1759 * Returns 0 on success, negative on failure 1760 **/ 1761 static int wx_setup_rx_resources(struct wx_ring *rx_ring) 1762 { 1763 struct device *dev = rx_ring->dev; 1764 int orig_node = dev_to_node(dev); 1765 int numa_node = NUMA_NO_NODE; 1766 int size, ret; 1767 1768 size = sizeof(struct wx_rx_buffer) * rx_ring->count; 1769 1770 if (rx_ring->q_vector) 1771 numa_node = rx_ring->q_vector->numa_node; 1772 1773 rx_ring->rx_buffer_info = kvmalloc_node(size, GFP_KERNEL, numa_node); 1774 if (!rx_ring->rx_buffer_info) 1775 rx_ring->rx_buffer_info = kvmalloc(size, GFP_KERNEL); 1776 if (!rx_ring->rx_buffer_info) 1777 goto err; 1778 1779 /* Round up to nearest 4K */ 1780 rx_ring->size = rx_ring->count * sizeof(union wx_rx_desc); 1781 rx_ring->size = ALIGN(rx_ring->size, 4096); 1782 1783 set_dev_node(dev, numa_node); 1784 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 1785 &rx_ring->dma, GFP_KERNEL); 1786 if (!rx_ring->desc) { 1787 set_dev_node(dev, orig_node); 1788 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 1789 &rx_ring->dma, GFP_KERNEL); 1790 } 1791 1792 if (!rx_ring->desc) 1793 goto err; 1794 1795 rx_ring->next_to_clean = 0; 1796 rx_ring->next_to_use = 0; 1797 1798 ret = wx_alloc_page_pool(rx_ring); 1799 if (ret < 0) { 1800 dev_err(rx_ring->dev, "Page pool creation failed: %d\n", ret); 1801 goto err; 1802 } 1803 1804 return 0; 1805 err: 1806 kvfree(rx_ring->rx_buffer_info); 1807 rx_ring->rx_buffer_info = NULL; 1808 dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); 1809 return -ENOMEM; 1810 } 1811 1812 /** 1813 * wx_setup_all_rx_resources - allocate all queues Rx resources 1814 * @wx: pointer to hardware structure 1815 * 1816 * If this function returns with an error, then it's possible one or 1817 * more of the rings is populated (while the rest are not). It is the 1818 * callers duty to clean those orphaned rings. 1819 * 1820 * Return 0 on success, negative on failure 1821 **/ 1822 static int wx_setup_all_rx_resources(struct wx *wx) 1823 { 1824 int i, err = 0; 1825 1826 for (i = 0; i < wx->num_rx_queues; i++) { 1827 err = wx_setup_rx_resources(wx->rx_ring[i]); 1828 if (!err) 1829 continue; 1830 1831 wx_err(wx, "Allocation for Rx Queue %u failed\n", i); 1832 goto err_setup_rx; 1833 } 1834 1835 return 0; 1836 err_setup_rx: 1837 /* rewind the index freeing the rings as we go */ 1838 while (i--) 1839 wx_free_rx_resources(wx->rx_ring[i]); 1840 return err; 1841 } 1842 1843 /** 1844 * wx_setup_tx_resources - allocate Tx resources (Descriptors) 1845 * @tx_ring: tx descriptor ring (for a specific queue) to setup 1846 * 1847 * Return 0 on success, negative on failure 1848 **/ 1849 static int wx_setup_tx_resources(struct wx_ring *tx_ring) 1850 { 1851 struct device *dev = tx_ring->dev; 1852 int orig_node = dev_to_node(dev); 1853 int numa_node = NUMA_NO_NODE; 1854 int size; 1855 1856 size = sizeof(struct wx_tx_buffer) * tx_ring->count; 1857 1858 if (tx_ring->q_vector) 1859 numa_node = tx_ring->q_vector->numa_node; 1860 1861 tx_ring->tx_buffer_info = kvmalloc_node(size, GFP_KERNEL, numa_node); 1862 if (!tx_ring->tx_buffer_info) 1863 tx_ring->tx_buffer_info = kvmalloc(size, GFP_KERNEL); 1864 if (!tx_ring->tx_buffer_info) 1865 goto err; 1866 1867 /* round up to nearest 4K */ 1868 tx_ring->size = tx_ring->count * sizeof(union wx_tx_desc); 1869 tx_ring->size = ALIGN(tx_ring->size, 4096); 1870 1871 set_dev_node(dev, numa_node); 1872 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 1873 &tx_ring->dma, GFP_KERNEL); 1874 if (!tx_ring->desc) { 1875 set_dev_node(dev, orig_node); 1876 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 1877 &tx_ring->dma, GFP_KERNEL); 1878 } 1879 1880 if (!tx_ring->desc) 1881 goto err; 1882 1883 tx_ring->next_to_use = 0; 1884 tx_ring->next_to_clean = 0; 1885 1886 return 0; 1887 1888 err: 1889 kvfree(tx_ring->tx_buffer_info); 1890 tx_ring->tx_buffer_info = NULL; 1891 dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); 1892 return -ENOMEM; 1893 } 1894 1895 /** 1896 * wx_setup_all_tx_resources - allocate all queues Tx resources 1897 * @wx: pointer to private structure 1898 * 1899 * If this function returns with an error, then it's possible one or 1900 * more of the rings is populated (while the rest are not). It is the 1901 * callers duty to clean those orphaned rings. 1902 * 1903 * Return 0 on success, negative on failure 1904 **/ 1905 static int wx_setup_all_tx_resources(struct wx *wx) 1906 { 1907 int i, err = 0; 1908 1909 for (i = 0; i < wx->num_tx_queues; i++) { 1910 err = wx_setup_tx_resources(wx->tx_ring[i]); 1911 if (!err) 1912 continue; 1913 1914 wx_err(wx, "Allocation for Tx Queue %u failed\n", i); 1915 goto err_setup_tx; 1916 } 1917 1918 return 0; 1919 err_setup_tx: 1920 /* rewind the index freeing the rings as we go */ 1921 while (i--) 1922 wx_free_tx_resources(wx->tx_ring[i]); 1923 return err; 1924 } 1925 1926 int wx_setup_resources(struct wx *wx) 1927 { 1928 int err; 1929 1930 /* allocate transmit descriptors */ 1931 err = wx_setup_all_tx_resources(wx); 1932 if (err) 1933 return err; 1934 1935 /* allocate receive descriptors */ 1936 err = wx_setup_all_rx_resources(wx); 1937 if (err) 1938 goto err_free_tx; 1939 1940 err = wx_setup_isb_resources(wx); 1941 if (err) 1942 goto err_free_rx; 1943 1944 return 0; 1945 1946 err_free_rx: 1947 wx_free_all_rx_resources(wx); 1948 err_free_tx: 1949 wx_free_all_tx_resources(wx); 1950 1951 return err; 1952 } 1953 EXPORT_SYMBOL(wx_setup_resources); 1954 1955 /** 1956 * wx_get_stats64 - Get System Network Statistics 1957 * @netdev: network interface device structure 1958 * @stats: storage space for 64bit statistics 1959 */ 1960 void wx_get_stats64(struct net_device *netdev, 1961 struct rtnl_link_stats64 *stats) 1962 { 1963 struct wx *wx = netdev_priv(netdev); 1964 int i; 1965 1966 rcu_read_lock(); 1967 for (i = 0; i < wx->num_rx_queues; i++) { 1968 struct wx_ring *ring = READ_ONCE(wx->rx_ring[i]); 1969 u64 bytes, packets; 1970 unsigned int start; 1971 1972 if (ring) { 1973 do { 1974 start = u64_stats_fetch_begin(&ring->syncp); 1975 packets = ring->stats.packets; 1976 bytes = ring->stats.bytes; 1977 } while (u64_stats_fetch_retry(&ring->syncp, start)); 1978 stats->rx_packets += packets; 1979 stats->rx_bytes += bytes; 1980 } 1981 } 1982 1983 for (i = 0; i < wx->num_tx_queues; i++) { 1984 struct wx_ring *ring = READ_ONCE(wx->tx_ring[i]); 1985 u64 bytes, packets; 1986 unsigned int start; 1987 1988 if (ring) { 1989 do { 1990 start = u64_stats_fetch_begin(&ring->syncp); 1991 packets = ring->stats.packets; 1992 bytes = ring->stats.bytes; 1993 } while (u64_stats_fetch_retry(&ring->syncp, 1994 start)); 1995 stats->tx_packets += packets; 1996 stats->tx_bytes += bytes; 1997 } 1998 } 1999 2000 rcu_read_unlock(); 2001 } 2002 EXPORT_SYMBOL(wx_get_stats64); 2003 2004 MODULE_LICENSE("GPL"); 2005