1 // SPDX-License-Identifier: GPL-2.0 2 /* Intel(R) Ethernet Switch Host Interface Driver 3 * Copyright(c) 2013 - 2017 Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * The full GNU General Public License is included in this distribution in 15 * the file called "COPYING". 16 * 17 * Contact Information: 18 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 19 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 20 */ 21 22 #include <linux/types.h> 23 #include <linux/module.h> 24 #include <net/ipv6.h> 25 #include <net/ip.h> 26 #include <net/tcp.h> 27 #include <linux/if_macvlan.h> 28 #include <linux/prefetch.h> 29 30 #include "fm10k.h" 31 32 #define DRV_VERSION "0.23.4-k" 33 #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" 34 const char fm10k_driver_version[] = DRV_VERSION; 35 char fm10k_driver_name[] = "fm10k"; 36 static const char fm10k_driver_string[] = DRV_SUMMARY; 37 static const char fm10k_copyright[] = 38 "Copyright(c) 2013 - 2018 Intel Corporation."; 39 40 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 41 MODULE_DESCRIPTION(DRV_SUMMARY); 42 MODULE_LICENSE("GPL"); 43 MODULE_VERSION(DRV_VERSION); 44 45 /* single workqueue for entire fm10k driver */ 46 struct workqueue_struct *fm10k_workqueue; 47 48 /** 49 * fm10k_init_module - Driver Registration Routine 50 * 51 * fm10k_init_module is the first routine called when the driver is 52 * loaded. All it does is register with the PCI subsystem. 53 **/ 54 static int __init fm10k_init_module(void) 55 { 56 pr_info("%s - version %s\n", fm10k_driver_string, fm10k_driver_version); 57 pr_info("%s\n", fm10k_copyright); 58 59 /* create driver workqueue */ 60 fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, 61 fm10k_driver_name); 62 63 fm10k_dbg_init(); 64 65 return fm10k_register_pci_driver(); 66 } 67 module_init(fm10k_init_module); 68 69 /** 70 * fm10k_exit_module - Driver Exit Cleanup Routine 71 * 72 * fm10k_exit_module is called just before the driver is removed 73 * from memory. 74 **/ 75 static void __exit fm10k_exit_module(void) 76 { 77 fm10k_unregister_pci_driver(); 78 79 fm10k_dbg_exit(); 80 81 /* destroy driver workqueue */ 82 destroy_workqueue(fm10k_workqueue); 83 } 84 module_exit(fm10k_exit_module); 85 86 static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring, 87 struct fm10k_rx_buffer *bi) 88 { 89 struct page *page = bi->page; 90 dma_addr_t dma; 91 92 /* Only page will be NULL if buffer was consumed */ 93 if (likely(page)) 94 return true; 95 96 /* alloc new page for storage */ 97 page = dev_alloc_page(); 98 if (unlikely(!page)) { 99 rx_ring->rx_stats.alloc_failed++; 100 return false; 101 } 102 103 /* map page for use */ 104 dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); 105 106 /* if mapping failed free memory back to system since 107 * there isn't much point in holding memory we can't use 108 */ 109 if (dma_mapping_error(rx_ring->dev, dma)) { 110 __free_page(page); 111 112 rx_ring->rx_stats.alloc_failed++; 113 return false; 114 } 115 116 bi->dma = dma; 117 bi->page = page; 118 bi->page_offset = 0; 119 120 return true; 121 } 122 123 /** 124 * fm10k_alloc_rx_buffers - Replace used receive buffers 125 * @rx_ring: ring to place buffers on 126 * @cleaned_count: number of buffers to replace 127 **/ 128 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count) 129 { 130 union fm10k_rx_desc *rx_desc; 131 struct fm10k_rx_buffer *bi; 132 u16 i = rx_ring->next_to_use; 133 134 /* nothing to do */ 135 if (!cleaned_count) 136 return; 137 138 rx_desc = FM10K_RX_DESC(rx_ring, i); 139 bi = &rx_ring->rx_buffer[i]; 140 i -= rx_ring->count; 141 142 do { 143 if (!fm10k_alloc_mapped_page(rx_ring, bi)) 144 break; 145 146 /* Refresh the desc even if buffer_addrs didn't change 147 * because each write-back erases this info. 148 */ 149 rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 150 151 rx_desc++; 152 bi++; 153 i++; 154 if (unlikely(!i)) { 155 rx_desc = FM10K_RX_DESC(rx_ring, 0); 156 bi = rx_ring->rx_buffer; 157 i -= rx_ring->count; 158 } 159 160 /* clear the status bits for the next_to_use descriptor */ 161 rx_desc->d.staterr = 0; 162 163 cleaned_count--; 164 } while (cleaned_count); 165 166 i += rx_ring->count; 167 168 if (rx_ring->next_to_use != i) { 169 /* record the next descriptor to use */ 170 rx_ring->next_to_use = i; 171 172 /* update next to alloc since we have filled the ring */ 173 rx_ring->next_to_alloc = i; 174 175 /* Force memory writes to complete before letting h/w 176 * know there are new descriptors to fetch. (Only 177 * applicable for weak-ordered memory model archs, 178 * such as IA-64). 179 */ 180 wmb(); 181 182 /* notify hardware of new descriptors */ 183 writel(i, rx_ring->tail); 184 } 185 } 186 187 /** 188 * fm10k_reuse_rx_page - page flip buffer and store it back on the ring 189 * @rx_ring: rx descriptor ring to store buffers on 190 * @old_buff: donor buffer to have page reused 191 * 192 * Synchronizes page for reuse by the interface 193 **/ 194 static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, 195 struct fm10k_rx_buffer *old_buff) 196 { 197 struct fm10k_rx_buffer *new_buff; 198 u16 nta = rx_ring->next_to_alloc; 199 200 new_buff = &rx_ring->rx_buffer[nta]; 201 202 /* update, and store next to alloc */ 203 nta++; 204 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 205 206 /* transfer page from old buffer to new buffer */ 207 *new_buff = *old_buff; 208 209 /* sync the buffer for use by the device */ 210 dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, 211 old_buff->page_offset, 212 FM10K_RX_BUFSZ, 213 DMA_FROM_DEVICE); 214 } 215 216 static inline bool fm10k_page_is_reserved(struct page *page) 217 { 218 return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); 219 } 220 221 static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, 222 struct page *page, 223 unsigned int __maybe_unused truesize) 224 { 225 /* avoid re-using remote pages */ 226 if (unlikely(fm10k_page_is_reserved(page))) 227 return false; 228 229 #if (PAGE_SIZE < 8192) 230 /* if we are only owner of page we can reuse it */ 231 if (unlikely(page_count(page) != 1)) 232 return false; 233 234 /* flip page offset to other buffer */ 235 rx_buffer->page_offset ^= FM10K_RX_BUFSZ; 236 #else 237 /* move offset up to the next cache line */ 238 rx_buffer->page_offset += truesize; 239 240 if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ)) 241 return false; 242 #endif 243 244 /* Even if we own the page, we are not allowed to use atomic_set() 245 * This would break get_page_unless_zero() users. 246 */ 247 page_ref_inc(page); 248 249 return true; 250 } 251 252 /** 253 * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff 254 * @rx_buffer: buffer containing page to add 255 * @size: packet size from rx_desc 256 * @rx_desc: descriptor containing length of buffer written by hardware 257 * @skb: sk_buff to place the data into 258 * 259 * This function will add the data contained in rx_buffer->page to the skb. 260 * This is done either through a direct copy if the data in the buffer is 261 * less than the skb header size, otherwise it will just attach the page as 262 * a frag to the skb. 263 * 264 * The function will then update the page offset if necessary and return 265 * true if the buffer can be reused by the interface. 266 **/ 267 static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer, 268 unsigned int size, 269 union fm10k_rx_desc *rx_desc, 270 struct sk_buff *skb) 271 { 272 struct page *page = rx_buffer->page; 273 unsigned char *va = page_address(page) + rx_buffer->page_offset; 274 #if (PAGE_SIZE < 8192) 275 unsigned int truesize = FM10K_RX_BUFSZ; 276 #else 277 unsigned int truesize = ALIGN(size, 512); 278 #endif 279 unsigned int pull_len; 280 281 if (unlikely(skb_is_nonlinear(skb))) 282 goto add_tail_frag; 283 284 if (likely(size <= FM10K_RX_HDR_LEN)) { 285 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); 286 287 /* page is not reserved, we can reuse buffer as-is */ 288 if (likely(!fm10k_page_is_reserved(page))) 289 return true; 290 291 /* this page cannot be reused so discard it */ 292 __free_page(page); 293 return false; 294 } 295 296 /* we need the header to contain the greater of either ETH_HLEN or 297 * 60 bytes if the skb->len is less than 60 for skb_pad. 298 */ 299 pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN); 300 301 /* align pull length to size of long to optimize memcpy performance */ 302 memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); 303 304 /* update all of the pointers */ 305 va += pull_len; 306 size -= pull_len; 307 308 add_tail_frag: 309 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 310 (unsigned long)va & ~PAGE_MASK, size, truesize); 311 312 return fm10k_can_reuse_rx_page(rx_buffer, page, truesize); 313 } 314 315 static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring, 316 union fm10k_rx_desc *rx_desc, 317 struct sk_buff *skb) 318 { 319 unsigned int size = le16_to_cpu(rx_desc->w.length); 320 struct fm10k_rx_buffer *rx_buffer; 321 struct page *page; 322 323 rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean]; 324 page = rx_buffer->page; 325 prefetchw(page); 326 327 if (likely(!skb)) { 328 void *page_addr = page_address(page) + 329 rx_buffer->page_offset; 330 331 /* prefetch first cache line of first page */ 332 prefetch(page_addr); 333 #if L1_CACHE_BYTES < 128 334 prefetch(page_addr + L1_CACHE_BYTES); 335 #endif 336 337 /* allocate a skb to store the frags */ 338 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 339 FM10K_RX_HDR_LEN); 340 if (unlikely(!skb)) { 341 rx_ring->rx_stats.alloc_failed++; 342 return NULL; 343 } 344 345 /* we will be copying header into skb->data in 346 * pskb_may_pull so it is in our interest to prefetch 347 * it now to avoid a possible cache miss 348 */ 349 prefetchw(skb->data); 350 } 351 352 /* we are reusing so sync this buffer for CPU use */ 353 dma_sync_single_range_for_cpu(rx_ring->dev, 354 rx_buffer->dma, 355 rx_buffer->page_offset, 356 size, 357 DMA_FROM_DEVICE); 358 359 /* pull page into skb */ 360 if (fm10k_add_rx_frag(rx_buffer, size, rx_desc, skb)) { 361 /* hand second half of page back to the ring */ 362 fm10k_reuse_rx_page(rx_ring, rx_buffer); 363 } else { 364 /* we are not reusing the buffer so unmap it */ 365 dma_unmap_page(rx_ring->dev, rx_buffer->dma, 366 PAGE_SIZE, DMA_FROM_DEVICE); 367 } 368 369 /* clear contents of rx_buffer */ 370 rx_buffer->page = NULL; 371 372 return skb; 373 } 374 375 static inline void fm10k_rx_checksum(struct fm10k_ring *ring, 376 union fm10k_rx_desc *rx_desc, 377 struct sk_buff *skb) 378 { 379 skb_checksum_none_assert(skb); 380 381 /* Rx checksum disabled via ethtool */ 382 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 383 return; 384 385 /* TCP/UDP checksum error bit is set */ 386 if (fm10k_test_staterr(rx_desc, 387 FM10K_RXD_STATUS_L4E | 388 FM10K_RXD_STATUS_L4E2 | 389 FM10K_RXD_STATUS_IPE | 390 FM10K_RXD_STATUS_IPE2)) { 391 ring->rx_stats.csum_err++; 392 return; 393 } 394 395 /* It must be a TCP or UDP packet with a valid checksum */ 396 if (fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS2)) 397 skb->encapsulation = true; 398 else if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS)) 399 return; 400 401 skb->ip_summed = CHECKSUM_UNNECESSARY; 402 403 ring->rx_stats.csum_good++; 404 } 405 406 #define FM10K_RSS_L4_TYPES_MASK \ 407 (BIT(FM10K_RSSTYPE_IPV4_TCP) | \ 408 BIT(FM10K_RSSTYPE_IPV4_UDP) | \ 409 BIT(FM10K_RSSTYPE_IPV6_TCP) | \ 410 BIT(FM10K_RSSTYPE_IPV6_UDP)) 411 412 static inline void fm10k_rx_hash(struct fm10k_ring *ring, 413 union fm10k_rx_desc *rx_desc, 414 struct sk_buff *skb) 415 { 416 u16 rss_type; 417 418 if (!(ring->netdev->features & NETIF_F_RXHASH)) 419 return; 420 421 rss_type = le16_to_cpu(rx_desc->w.pkt_info) & FM10K_RXD_RSSTYPE_MASK; 422 if (!rss_type) 423 return; 424 425 skb_set_hash(skb, le32_to_cpu(rx_desc->d.rss), 426 (BIT(rss_type) & FM10K_RSS_L4_TYPES_MASK) ? 427 PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); 428 } 429 430 static void fm10k_type_trans(struct fm10k_ring *rx_ring, 431 union fm10k_rx_desc __maybe_unused *rx_desc, 432 struct sk_buff *skb) 433 { 434 struct net_device *dev = rx_ring->netdev; 435 struct fm10k_l2_accel *l2_accel = rcu_dereference_bh(rx_ring->l2_accel); 436 437 /* check to see if DGLORT belongs to a MACVLAN */ 438 if (l2_accel) { 439 u16 idx = le16_to_cpu(FM10K_CB(skb)->fi.w.dglort) - 1; 440 441 idx -= l2_accel->dglort; 442 if (idx < l2_accel->size && l2_accel->macvlan[idx]) 443 dev = l2_accel->macvlan[idx]; 444 else 445 l2_accel = NULL; 446 } 447 448 skb->protocol = eth_type_trans(skb, dev); 449 450 /* Record Rx queue, or update macvlan statistics */ 451 if (!l2_accel) 452 skb_record_rx_queue(skb, rx_ring->queue_index); 453 else 454 macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, 455 (skb->pkt_type == PACKET_BROADCAST) || 456 (skb->pkt_type == PACKET_MULTICAST)); 457 } 458 459 /** 460 * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor 461 * @rx_ring: rx descriptor ring packet is being transacted on 462 * @rx_desc: pointer to the EOP Rx descriptor 463 * @skb: pointer to current skb being populated 464 * 465 * This function checks the ring, descriptor, and packet information in 466 * order to populate the hash, checksum, VLAN, timestamp, protocol, and 467 * other fields within the skb. 468 **/ 469 static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring, 470 union fm10k_rx_desc *rx_desc, 471 struct sk_buff *skb) 472 { 473 unsigned int len = skb->len; 474 475 fm10k_rx_hash(rx_ring, rx_desc, skb); 476 477 fm10k_rx_checksum(rx_ring, rx_desc, skb); 478 479 FM10K_CB(skb)->tstamp = rx_desc->q.timestamp; 480 481 FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan; 482 483 FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort; 484 485 if (rx_desc->w.vlan) { 486 u16 vid = le16_to_cpu(rx_desc->w.vlan); 487 488 if ((vid & VLAN_VID_MASK) != rx_ring->vid) 489 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 490 else if (vid & VLAN_PRIO_MASK) 491 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 492 vid & VLAN_PRIO_MASK); 493 } 494 495 fm10k_type_trans(rx_ring, rx_desc, skb); 496 497 return len; 498 } 499 500 /** 501 * fm10k_is_non_eop - process handling of non-EOP buffers 502 * @rx_ring: Rx ring being processed 503 * @rx_desc: Rx descriptor for current buffer 504 * 505 * This function updates next to clean. If the buffer is an EOP buffer 506 * this function exits returning false, otherwise it will place the 507 * sk_buff in the next buffer to be chained and return true indicating 508 * that this is in fact a non-EOP buffer. 509 **/ 510 static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring, 511 union fm10k_rx_desc *rx_desc) 512 { 513 u32 ntc = rx_ring->next_to_clean + 1; 514 515 /* fetch, update, and store next to clean */ 516 ntc = (ntc < rx_ring->count) ? ntc : 0; 517 rx_ring->next_to_clean = ntc; 518 519 prefetch(FM10K_RX_DESC(rx_ring, ntc)); 520 521 if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP))) 522 return false; 523 524 return true; 525 } 526 527 /** 528 * fm10k_cleanup_headers - Correct corrupted or empty headers 529 * @rx_ring: rx descriptor ring packet is being transacted on 530 * @rx_desc: pointer to the EOP Rx descriptor 531 * @skb: pointer to current skb being fixed 532 * 533 * Address the case where we are pulling data in on pages only 534 * and as such no data is present in the skb header. 535 * 536 * In addition if skb is not at least 60 bytes we need to pad it so that 537 * it is large enough to qualify as a valid Ethernet frame. 538 * 539 * Returns true if an error was encountered and skb was freed. 540 **/ 541 static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring, 542 union fm10k_rx_desc *rx_desc, 543 struct sk_buff *skb) 544 { 545 if (unlikely((fm10k_test_staterr(rx_desc, 546 FM10K_RXD_STATUS_RXE)))) { 547 #define FM10K_TEST_RXD_BIT(rxd, bit) \ 548 ((rxd)->w.csum_err & cpu_to_le16(bit)) 549 if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_ERROR)) 550 rx_ring->rx_stats.switch_errors++; 551 if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_NO_DESCRIPTOR)) 552 rx_ring->rx_stats.drops++; 553 if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_PP_ERROR)) 554 rx_ring->rx_stats.pp_errors++; 555 if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_READY)) 556 rx_ring->rx_stats.link_errors++; 557 if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_TOO_BIG)) 558 rx_ring->rx_stats.length_errors++; 559 dev_kfree_skb_any(skb); 560 rx_ring->rx_stats.errors++; 561 return true; 562 } 563 564 /* if eth_skb_pad returns an error the skb was freed */ 565 if (eth_skb_pad(skb)) 566 return true; 567 568 return false; 569 } 570 571 /** 572 * fm10k_receive_skb - helper function to handle rx indications 573 * @q_vector: structure containing interrupt and ring information 574 * @skb: packet to send up 575 **/ 576 static void fm10k_receive_skb(struct fm10k_q_vector *q_vector, 577 struct sk_buff *skb) 578 { 579 napi_gro_receive(&q_vector->napi, skb); 580 } 581 582 static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, 583 struct fm10k_ring *rx_ring, 584 int budget) 585 { 586 struct sk_buff *skb = rx_ring->skb; 587 unsigned int total_bytes = 0, total_packets = 0; 588 u16 cleaned_count = fm10k_desc_unused(rx_ring); 589 590 while (likely(total_packets < budget)) { 591 union fm10k_rx_desc *rx_desc; 592 593 /* return some buffers to hardware, one at a time is too slow */ 594 if (cleaned_count >= FM10K_RX_BUFFER_WRITE) { 595 fm10k_alloc_rx_buffers(rx_ring, cleaned_count); 596 cleaned_count = 0; 597 } 598 599 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean); 600 601 if (!rx_desc->d.staterr) 602 break; 603 604 /* This memory barrier is needed to keep us from reading 605 * any other fields out of the rx_desc until we know the 606 * descriptor has been written back 607 */ 608 dma_rmb(); 609 610 /* retrieve a buffer from the ring */ 611 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb); 612 613 /* exit if we failed to retrieve a buffer */ 614 if (!skb) 615 break; 616 617 cleaned_count++; 618 619 /* fetch next buffer in frame if non-eop */ 620 if (fm10k_is_non_eop(rx_ring, rx_desc)) 621 continue; 622 623 /* verify the packet layout is correct */ 624 if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) { 625 skb = NULL; 626 continue; 627 } 628 629 /* populate checksum, timestamp, VLAN, and protocol */ 630 total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb); 631 632 fm10k_receive_skb(q_vector, skb); 633 634 /* reset skb pointer */ 635 skb = NULL; 636 637 /* update budget accounting */ 638 total_packets++; 639 } 640 641 /* place incomplete frames back on ring for completion */ 642 rx_ring->skb = skb; 643 644 u64_stats_update_begin(&rx_ring->syncp); 645 rx_ring->stats.packets += total_packets; 646 rx_ring->stats.bytes += total_bytes; 647 u64_stats_update_end(&rx_ring->syncp); 648 q_vector->rx.total_packets += total_packets; 649 q_vector->rx.total_bytes += total_bytes; 650 651 return total_packets; 652 } 653 654 #define VXLAN_HLEN (sizeof(struct udphdr) + 8) 655 static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb) 656 { 657 struct fm10k_intfc *interface = netdev_priv(skb->dev); 658 struct fm10k_udp_port *vxlan_port; 659 660 /* we can only offload a vxlan if we recognize it as such */ 661 vxlan_port = list_first_entry_or_null(&interface->vxlan_port, 662 struct fm10k_udp_port, list); 663 664 if (!vxlan_port) 665 return NULL; 666 if (vxlan_port->port != udp_hdr(skb)->dest) 667 return NULL; 668 669 /* return offset of udp_hdr plus 8 bytes for VXLAN header */ 670 return (struct ethhdr *)(skb_transport_header(skb) + VXLAN_HLEN); 671 } 672 673 #define FM10K_NVGRE_RESERVED0_FLAGS htons(0x9FFF) 674 #define NVGRE_TNI htons(0x2000) 675 struct fm10k_nvgre_hdr { 676 __be16 flags; 677 __be16 proto; 678 __be32 tni; 679 }; 680 681 static struct ethhdr *fm10k_gre_is_nvgre(struct sk_buff *skb) 682 { 683 struct fm10k_nvgre_hdr *nvgre_hdr; 684 int hlen = ip_hdrlen(skb); 685 686 /* currently only IPv4 is supported due to hlen above */ 687 if (vlan_get_protocol(skb) != htons(ETH_P_IP)) 688 return NULL; 689 690 /* our transport header should be NVGRE */ 691 nvgre_hdr = (struct fm10k_nvgre_hdr *)(skb_network_header(skb) + hlen); 692 693 /* verify all reserved flags are 0 */ 694 if (nvgre_hdr->flags & FM10K_NVGRE_RESERVED0_FLAGS) 695 return NULL; 696 697 /* report start of ethernet header */ 698 if (nvgre_hdr->flags & NVGRE_TNI) 699 return (struct ethhdr *)(nvgre_hdr + 1); 700 701 return (struct ethhdr *)(&nvgre_hdr->tni); 702 } 703 704 __be16 fm10k_tx_encap_offload(struct sk_buff *skb) 705 { 706 u8 l4_hdr = 0, inner_l4_hdr = 0, inner_l4_hlen; 707 struct ethhdr *eth_hdr; 708 709 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || 710 skb->inner_protocol != htons(ETH_P_TEB)) 711 return 0; 712 713 switch (vlan_get_protocol(skb)) { 714 case htons(ETH_P_IP): 715 l4_hdr = ip_hdr(skb)->protocol; 716 break; 717 case htons(ETH_P_IPV6): 718 l4_hdr = ipv6_hdr(skb)->nexthdr; 719 break; 720 default: 721 return 0; 722 } 723 724 switch (l4_hdr) { 725 case IPPROTO_UDP: 726 eth_hdr = fm10k_port_is_vxlan(skb); 727 break; 728 case IPPROTO_GRE: 729 eth_hdr = fm10k_gre_is_nvgre(skb); 730 break; 731 default: 732 return 0; 733 } 734 735 if (!eth_hdr) 736 return 0; 737 738 switch (eth_hdr->h_proto) { 739 case htons(ETH_P_IP): 740 inner_l4_hdr = inner_ip_hdr(skb)->protocol; 741 break; 742 case htons(ETH_P_IPV6): 743 inner_l4_hdr = inner_ipv6_hdr(skb)->nexthdr; 744 break; 745 default: 746 return 0; 747 } 748 749 switch (inner_l4_hdr) { 750 case IPPROTO_TCP: 751 inner_l4_hlen = inner_tcp_hdrlen(skb); 752 break; 753 case IPPROTO_UDP: 754 inner_l4_hlen = 8; 755 break; 756 default: 757 return 0; 758 } 759 760 /* The hardware allows tunnel offloads only if the combined inner and 761 * outer header is 184 bytes or less 762 */ 763 if (skb_inner_transport_header(skb) + inner_l4_hlen - 764 skb_mac_header(skb) > FM10K_TUNNEL_HEADER_LENGTH) 765 return 0; 766 767 return eth_hdr->h_proto; 768 } 769 770 static int fm10k_tso(struct fm10k_ring *tx_ring, 771 struct fm10k_tx_buffer *first) 772 { 773 struct sk_buff *skb = first->skb; 774 struct fm10k_tx_desc *tx_desc; 775 unsigned char *th; 776 u8 hdrlen; 777 778 if (skb->ip_summed != CHECKSUM_PARTIAL) 779 return 0; 780 781 if (!skb_is_gso(skb)) 782 return 0; 783 784 /* compute header lengths */ 785 if (skb->encapsulation) { 786 if (!fm10k_tx_encap_offload(skb)) 787 goto err_vxlan; 788 th = skb_inner_transport_header(skb); 789 } else { 790 th = skb_transport_header(skb); 791 } 792 793 /* compute offset from SOF to transport header and add header len */ 794 hdrlen = (th - skb->data) + (((struct tcphdr *)th)->doff << 2); 795 796 first->tx_flags |= FM10K_TX_FLAGS_CSUM; 797 798 /* update gso size and bytecount with header size */ 799 first->gso_segs = skb_shinfo(skb)->gso_segs; 800 first->bytecount += (first->gso_segs - 1) * hdrlen; 801 802 /* populate Tx descriptor header size and mss */ 803 tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use); 804 tx_desc->hdrlen = hdrlen; 805 tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); 806 807 return 1; 808 809 err_vxlan: 810 tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; 811 if (net_ratelimit()) 812 netdev_err(tx_ring->netdev, 813 "TSO requested for unsupported tunnel, disabling offload\n"); 814 return -1; 815 } 816 817 static void fm10k_tx_csum(struct fm10k_ring *tx_ring, 818 struct fm10k_tx_buffer *first) 819 { 820 struct sk_buff *skb = first->skb; 821 struct fm10k_tx_desc *tx_desc; 822 union { 823 struct iphdr *ipv4; 824 struct ipv6hdr *ipv6; 825 u8 *raw; 826 } network_hdr; 827 u8 *transport_hdr; 828 __be16 frag_off; 829 __be16 protocol; 830 u8 l4_hdr = 0; 831 832 if (skb->ip_summed != CHECKSUM_PARTIAL) 833 goto no_csum; 834 835 if (skb->encapsulation) { 836 protocol = fm10k_tx_encap_offload(skb); 837 if (!protocol) { 838 if (skb_checksum_help(skb)) { 839 dev_warn(tx_ring->dev, 840 "failed to offload encap csum!\n"); 841 tx_ring->tx_stats.csum_err++; 842 } 843 goto no_csum; 844 } 845 network_hdr.raw = skb_inner_network_header(skb); 846 transport_hdr = skb_inner_transport_header(skb); 847 } else { 848 protocol = vlan_get_protocol(skb); 849 network_hdr.raw = skb_network_header(skb); 850 transport_hdr = skb_transport_header(skb); 851 } 852 853 switch (protocol) { 854 case htons(ETH_P_IP): 855 l4_hdr = network_hdr.ipv4->protocol; 856 break; 857 case htons(ETH_P_IPV6): 858 l4_hdr = network_hdr.ipv6->nexthdr; 859 if (likely((transport_hdr - network_hdr.raw) == 860 sizeof(struct ipv6hdr))) 861 break; 862 ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + 863 sizeof(struct ipv6hdr), 864 &l4_hdr, &frag_off); 865 if (unlikely(frag_off)) 866 l4_hdr = NEXTHDR_FRAGMENT; 867 break; 868 default: 869 break; 870 } 871 872 switch (l4_hdr) { 873 case IPPROTO_TCP: 874 case IPPROTO_UDP: 875 break; 876 case IPPROTO_GRE: 877 if (skb->encapsulation) 878 break; 879 /* fall through */ 880 default: 881 if (unlikely(net_ratelimit())) { 882 dev_warn(tx_ring->dev, 883 "partial checksum, version=%d l4 proto=%x\n", 884 protocol, l4_hdr); 885 } 886 skb_checksum_help(skb); 887 tx_ring->tx_stats.csum_err++; 888 goto no_csum; 889 } 890 891 /* update TX checksum flag */ 892 first->tx_flags |= FM10K_TX_FLAGS_CSUM; 893 tx_ring->tx_stats.csum_good++; 894 895 no_csum: 896 /* populate Tx descriptor header size and mss */ 897 tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use); 898 tx_desc->hdrlen = 0; 899 tx_desc->mss = 0; 900 } 901 902 #define FM10K_SET_FLAG(_input, _flag, _result) \ 903 ((_flag <= _result) ? \ 904 ((u32)(_input & _flag) * (_result / _flag)) : \ 905 ((u32)(_input & _flag) / (_flag / _result))) 906 907 static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags) 908 { 909 /* set type for advanced descriptor with frame checksum insertion */ 910 u32 desc_flags = 0; 911 912 /* set checksum offload bits */ 913 desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM, 914 FM10K_TXD_FLAG_CSUM); 915 916 return desc_flags; 917 } 918 919 static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring, 920 struct fm10k_tx_desc *tx_desc, u16 i, 921 dma_addr_t dma, unsigned int size, u8 desc_flags) 922 { 923 /* set RS and INT for last frame in a cache line */ 924 if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0) 925 desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT; 926 927 /* record values to descriptor */ 928 tx_desc->buffer_addr = cpu_to_le64(dma); 929 tx_desc->flags = desc_flags; 930 tx_desc->buflen = cpu_to_le16(size); 931 932 /* return true if we just wrapped the ring */ 933 return i == tx_ring->count; 934 } 935 936 static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) 937 { 938 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 939 940 /* Memory barrier before checking head and tail */ 941 smp_mb(); 942 943 /* Check again in a case another CPU has just made room available */ 944 if (likely(fm10k_desc_unused(tx_ring) < size)) 945 return -EBUSY; 946 947 /* A reprieve! - use start_queue because it doesn't call schedule */ 948 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 949 ++tx_ring->tx_stats.restart_queue; 950 return 0; 951 } 952 953 static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) 954 { 955 if (likely(fm10k_desc_unused(tx_ring) >= size)) 956 return 0; 957 return __fm10k_maybe_stop_tx(tx_ring, size); 958 } 959 960 static void fm10k_tx_map(struct fm10k_ring *tx_ring, 961 struct fm10k_tx_buffer *first) 962 { 963 struct sk_buff *skb = first->skb; 964 struct fm10k_tx_buffer *tx_buffer; 965 struct fm10k_tx_desc *tx_desc; 966 struct skb_frag_struct *frag; 967 unsigned char *data; 968 dma_addr_t dma; 969 unsigned int data_len, size; 970 u32 tx_flags = first->tx_flags; 971 u16 i = tx_ring->next_to_use; 972 u8 flags = fm10k_tx_desc_flags(skb, tx_flags); 973 974 tx_desc = FM10K_TX_DESC(tx_ring, i); 975 976 /* add HW VLAN tag */ 977 if (skb_vlan_tag_present(skb)) 978 tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); 979 else 980 tx_desc->vlan = 0; 981 982 size = skb_headlen(skb); 983 data = skb->data; 984 985 dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE); 986 987 data_len = skb->data_len; 988 tx_buffer = first; 989 990 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 991 if (dma_mapping_error(tx_ring->dev, dma)) 992 goto dma_error; 993 994 /* record length, and DMA address */ 995 dma_unmap_len_set(tx_buffer, len, size); 996 dma_unmap_addr_set(tx_buffer, dma, dma); 997 998 while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) { 999 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma, 1000 FM10K_MAX_DATA_PER_TXD, flags)) { 1001 tx_desc = FM10K_TX_DESC(tx_ring, 0); 1002 i = 0; 1003 } 1004 1005 dma += FM10K_MAX_DATA_PER_TXD; 1006 size -= FM10K_MAX_DATA_PER_TXD; 1007 } 1008 1009 if (likely(!data_len)) 1010 break; 1011 1012 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, 1013 dma, size, flags)) { 1014 tx_desc = FM10K_TX_DESC(tx_ring, 0); 1015 i = 0; 1016 } 1017 1018 size = skb_frag_size(frag); 1019 data_len -= size; 1020 1021 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 1022 DMA_TO_DEVICE); 1023 1024 tx_buffer = &tx_ring->tx_buffer[i]; 1025 } 1026 1027 /* write last descriptor with LAST bit set */ 1028 flags |= FM10K_TXD_FLAG_LAST; 1029 1030 if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags)) 1031 i = 0; 1032 1033 /* record bytecount for BQL */ 1034 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1035 1036 /* record SW timestamp if HW timestamp is not available */ 1037 skb_tx_timestamp(first->skb); 1038 1039 /* Force memory writes to complete before letting h/w know there 1040 * are new descriptors to fetch. (Only applicable for weak-ordered 1041 * memory model archs, such as IA-64). 1042 * 1043 * We also need this memory barrier to make certain all of the 1044 * status bits have been updated before next_to_watch is written. 1045 */ 1046 wmb(); 1047 1048 /* set next_to_watch value indicating a packet is present */ 1049 first->next_to_watch = tx_desc; 1050 1051 tx_ring->next_to_use = i; 1052 1053 /* Make sure there is space in the ring for the next send. */ 1054 fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED); 1055 1056 /* notify HW of packet */ 1057 if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { 1058 writel(i, tx_ring->tail); 1059 1060 /* we need this if more than one processor can write to our tail 1061 * at a time, it synchronizes IO on IA64/Altix systems 1062 */ 1063 mmiowb(); 1064 } 1065 1066 return; 1067 dma_error: 1068 dev_err(tx_ring->dev, "TX DMA map failed\n"); 1069 1070 /* clear dma mappings for failed tx_buffer map */ 1071 for (;;) { 1072 tx_buffer = &tx_ring->tx_buffer[i]; 1073 fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer); 1074 if (tx_buffer == first) 1075 break; 1076 if (i == 0) 1077 i = tx_ring->count; 1078 i--; 1079 } 1080 1081 tx_ring->next_to_use = i; 1082 } 1083 1084 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, 1085 struct fm10k_ring *tx_ring) 1086 { 1087 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1088 struct fm10k_tx_buffer *first; 1089 unsigned short f; 1090 u32 tx_flags = 0; 1091 int tso; 1092 1093 /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD, 1094 * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD, 1095 * + 2 desc gap to keep tail from touching head 1096 * otherwise try next time 1097 */ 1098 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1099 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); 1100 1101 if (fm10k_maybe_stop_tx(tx_ring, count + 3)) { 1102 tx_ring->tx_stats.tx_busy++; 1103 return NETDEV_TX_BUSY; 1104 } 1105 1106 /* record the location of the first descriptor for this packet */ 1107 first = &tx_ring->tx_buffer[tx_ring->next_to_use]; 1108 first->skb = skb; 1109 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); 1110 first->gso_segs = 1; 1111 1112 /* record initial flags and protocol */ 1113 first->tx_flags = tx_flags; 1114 1115 tso = fm10k_tso(tx_ring, first); 1116 if (tso < 0) 1117 goto out_drop; 1118 else if (!tso) 1119 fm10k_tx_csum(tx_ring, first); 1120 1121 fm10k_tx_map(tx_ring, first); 1122 1123 return NETDEV_TX_OK; 1124 1125 out_drop: 1126 dev_kfree_skb_any(first->skb); 1127 first->skb = NULL; 1128 1129 return NETDEV_TX_OK; 1130 } 1131 1132 static u64 fm10k_get_tx_completed(struct fm10k_ring *ring) 1133 { 1134 return ring->stats.packets; 1135 } 1136 1137 /** 1138 * fm10k_get_tx_pending - how many Tx descriptors not processed 1139 * @ring: the ring structure 1140 * @in_sw: is tx_pending being checked in SW or in HW? 1141 */ 1142 u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw) 1143 { 1144 struct fm10k_intfc *interface = ring->q_vector->interface; 1145 struct fm10k_hw *hw = &interface->hw; 1146 u32 head, tail; 1147 1148 if (likely(in_sw)) { 1149 head = ring->next_to_clean; 1150 tail = ring->next_to_use; 1151 } else { 1152 head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx)); 1153 tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx)); 1154 } 1155 1156 return ((head <= tail) ? tail : tail + ring->count) - head; 1157 } 1158 1159 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring) 1160 { 1161 u32 tx_done = fm10k_get_tx_completed(tx_ring); 1162 u32 tx_done_old = tx_ring->tx_stats.tx_done_old; 1163 u32 tx_pending = fm10k_get_tx_pending(tx_ring, true); 1164 1165 clear_check_for_tx_hang(tx_ring); 1166 1167 /* Check for a hung queue, but be thorough. This verifies 1168 * that a transmit has been completed since the previous 1169 * check AND there is at least one packet pending. By 1170 * requiring this to fail twice we avoid races with 1171 * clearing the ARMED bit and conditions where we 1172 * run the check_tx_hang logic with a transmit completion 1173 * pending but without time to complete it yet. 1174 */ 1175 if (!tx_pending || (tx_done_old != tx_done)) { 1176 /* update completed stats and continue */ 1177 tx_ring->tx_stats.tx_done_old = tx_done; 1178 /* reset the countdown */ 1179 clear_bit(__FM10K_HANG_CHECK_ARMED, tx_ring->state); 1180 1181 return false; 1182 } 1183 1184 /* make sure it is true for two checks in a row */ 1185 return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, tx_ring->state); 1186 } 1187 1188 /** 1189 * fm10k_tx_timeout_reset - initiate reset due to Tx timeout 1190 * @interface: driver private struct 1191 **/ 1192 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface) 1193 { 1194 /* Do the reset outside of interrupt context */ 1195 if (!test_bit(__FM10K_DOWN, interface->state)) { 1196 interface->tx_timeout_count++; 1197 set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); 1198 fm10k_service_event_schedule(interface); 1199 } 1200 } 1201 1202 /** 1203 * fm10k_clean_tx_irq - Reclaim resources after transmit completes 1204 * @q_vector: structure containing interrupt and ring information 1205 * @tx_ring: tx ring to clean 1206 * @napi_budget: Used to determine if we are in netpoll 1207 **/ 1208 static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, 1209 struct fm10k_ring *tx_ring, int napi_budget) 1210 { 1211 struct fm10k_intfc *interface = q_vector->interface; 1212 struct fm10k_tx_buffer *tx_buffer; 1213 struct fm10k_tx_desc *tx_desc; 1214 unsigned int total_bytes = 0, total_packets = 0; 1215 unsigned int budget = q_vector->tx.work_limit; 1216 unsigned int i = tx_ring->next_to_clean; 1217 1218 if (test_bit(__FM10K_DOWN, interface->state)) 1219 return true; 1220 1221 tx_buffer = &tx_ring->tx_buffer[i]; 1222 tx_desc = FM10K_TX_DESC(tx_ring, i); 1223 i -= tx_ring->count; 1224 1225 do { 1226 struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch; 1227 1228 /* if next_to_watch is not set then there is no work pending */ 1229 if (!eop_desc) 1230 break; 1231 1232 /* prevent any other reads prior to eop_desc */ 1233 smp_rmb(); 1234 1235 /* if DD is not set pending work has not been completed */ 1236 if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE)) 1237 break; 1238 1239 /* clear next_to_watch to prevent false hangs */ 1240 tx_buffer->next_to_watch = NULL; 1241 1242 /* update the statistics for this packet */ 1243 total_bytes += tx_buffer->bytecount; 1244 total_packets += tx_buffer->gso_segs; 1245 1246 /* free the skb */ 1247 napi_consume_skb(tx_buffer->skb, napi_budget); 1248 1249 /* unmap skb header data */ 1250 dma_unmap_single(tx_ring->dev, 1251 dma_unmap_addr(tx_buffer, dma), 1252 dma_unmap_len(tx_buffer, len), 1253 DMA_TO_DEVICE); 1254 1255 /* clear tx_buffer data */ 1256 tx_buffer->skb = NULL; 1257 dma_unmap_len_set(tx_buffer, len, 0); 1258 1259 /* unmap remaining buffers */ 1260 while (tx_desc != eop_desc) { 1261 tx_buffer++; 1262 tx_desc++; 1263 i++; 1264 if (unlikely(!i)) { 1265 i -= tx_ring->count; 1266 tx_buffer = tx_ring->tx_buffer; 1267 tx_desc = FM10K_TX_DESC(tx_ring, 0); 1268 } 1269 1270 /* unmap any remaining paged data */ 1271 if (dma_unmap_len(tx_buffer, len)) { 1272 dma_unmap_page(tx_ring->dev, 1273 dma_unmap_addr(tx_buffer, dma), 1274 dma_unmap_len(tx_buffer, len), 1275 DMA_TO_DEVICE); 1276 dma_unmap_len_set(tx_buffer, len, 0); 1277 } 1278 } 1279 1280 /* move us one more past the eop_desc for start of next pkt */ 1281 tx_buffer++; 1282 tx_desc++; 1283 i++; 1284 if (unlikely(!i)) { 1285 i -= tx_ring->count; 1286 tx_buffer = tx_ring->tx_buffer; 1287 tx_desc = FM10K_TX_DESC(tx_ring, 0); 1288 } 1289 1290 /* issue prefetch for next Tx descriptor */ 1291 prefetch(tx_desc); 1292 1293 /* update budget accounting */ 1294 budget--; 1295 } while (likely(budget)); 1296 1297 i += tx_ring->count; 1298 tx_ring->next_to_clean = i; 1299 u64_stats_update_begin(&tx_ring->syncp); 1300 tx_ring->stats.bytes += total_bytes; 1301 tx_ring->stats.packets += total_packets; 1302 u64_stats_update_end(&tx_ring->syncp); 1303 q_vector->tx.total_bytes += total_bytes; 1304 q_vector->tx.total_packets += total_packets; 1305 1306 if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) { 1307 /* schedule immediate reset if we believe we hung */ 1308 struct fm10k_hw *hw = &interface->hw; 1309 1310 netif_err(interface, drv, tx_ring->netdev, 1311 "Detected Tx Unit Hang\n" 1312 " Tx Queue <%d>\n" 1313 " TDH, TDT <%x>, <%x>\n" 1314 " next_to_use <%x>\n" 1315 " next_to_clean <%x>\n", 1316 tx_ring->queue_index, 1317 fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)), 1318 fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)), 1319 tx_ring->next_to_use, i); 1320 1321 netif_stop_subqueue(tx_ring->netdev, 1322 tx_ring->queue_index); 1323 1324 netif_info(interface, probe, tx_ring->netdev, 1325 "tx hang %d detected on queue %d, resetting interface\n", 1326 interface->tx_timeout_count + 1, 1327 tx_ring->queue_index); 1328 1329 fm10k_tx_timeout_reset(interface); 1330 1331 /* the netdev is about to reset, no point in enabling stuff */ 1332 return true; 1333 } 1334 1335 /* notify netdev of completed buffers */ 1336 netdev_tx_completed_queue(txring_txq(tx_ring), 1337 total_packets, total_bytes); 1338 1339 #define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2) 1340 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 1341 (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { 1342 /* Make sure that anybody stopping the queue after this 1343 * sees the new next_to_clean. 1344 */ 1345 smp_mb(); 1346 if (__netif_subqueue_stopped(tx_ring->netdev, 1347 tx_ring->queue_index) && 1348 !test_bit(__FM10K_DOWN, interface->state)) { 1349 netif_wake_subqueue(tx_ring->netdev, 1350 tx_ring->queue_index); 1351 ++tx_ring->tx_stats.restart_queue; 1352 } 1353 } 1354 1355 return !!budget; 1356 } 1357 1358 /** 1359 * fm10k_update_itr - update the dynamic ITR value based on packet size 1360 * 1361 * Stores a new ITR value based on strictly on packet size. The 1362 * divisors and thresholds used by this function were determined based 1363 * on theoretical maximum wire speed and testing data, in order to 1364 * minimize response time while increasing bulk throughput. 1365 * 1366 * @ring_container: Container for rings to have ITR updated 1367 **/ 1368 static void fm10k_update_itr(struct fm10k_ring_container *ring_container) 1369 { 1370 unsigned int avg_wire_size, packets, itr_round; 1371 1372 /* Only update ITR if we are using adaptive setting */ 1373 if (!ITR_IS_ADAPTIVE(ring_container->itr)) 1374 goto clear_counts; 1375 1376 packets = ring_container->total_packets; 1377 if (!packets) 1378 goto clear_counts; 1379 1380 avg_wire_size = ring_container->total_bytes / packets; 1381 1382 /* The following is a crude approximation of: 1383 * wmem_default / (size + overhead) = desired_pkts_per_int 1384 * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate 1385 * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value 1386 * 1387 * Assuming wmem_default is 212992 and overhead is 640 bytes per 1388 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the 1389 * formula down to 1390 * 1391 * (34 * (size + 24)) / (size + 640) = ITR 1392 * 1393 * We first do some math on the packet size and then finally bitshift 1394 * by 8 after rounding up. We also have to account for PCIe link speed 1395 * difference as ITR scales based on this. 1396 */ 1397 if (avg_wire_size <= 360) { 1398 /* Start at 250K ints/sec and gradually drop to 77K ints/sec */ 1399 avg_wire_size *= 8; 1400 avg_wire_size += 376; 1401 } else if (avg_wire_size <= 1152) { 1402 /* 77K ints/sec to 45K ints/sec */ 1403 avg_wire_size *= 3; 1404 avg_wire_size += 2176; 1405 } else if (avg_wire_size <= 1920) { 1406 /* 45K ints/sec to 38K ints/sec */ 1407 avg_wire_size += 4480; 1408 } else { 1409 /* plateau at a limit of 38K ints/sec */ 1410 avg_wire_size = 6656; 1411 } 1412 1413 /* Perform final bitshift for division after rounding up to ensure 1414 * that the calculation will never get below a 1. The bit shift 1415 * accounts for changes in the ITR due to PCIe link speed. 1416 */ 1417 itr_round = READ_ONCE(ring_container->itr_scale) + 8; 1418 avg_wire_size += BIT(itr_round) - 1; 1419 avg_wire_size >>= itr_round; 1420 1421 /* write back value and retain adaptive flag */ 1422 ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE; 1423 1424 clear_counts: 1425 ring_container->total_bytes = 0; 1426 ring_container->total_packets = 0; 1427 } 1428 1429 static void fm10k_qv_enable(struct fm10k_q_vector *q_vector) 1430 { 1431 /* Enable auto-mask and clear the current mask */ 1432 u32 itr = FM10K_ITR_ENABLE; 1433 1434 /* Update Tx ITR */ 1435 fm10k_update_itr(&q_vector->tx); 1436 1437 /* Update Rx ITR */ 1438 fm10k_update_itr(&q_vector->rx); 1439 1440 /* Store Tx itr in timer slot 0 */ 1441 itr |= (q_vector->tx.itr & FM10K_ITR_MAX); 1442 1443 /* Shift Rx itr to timer slot 1 */ 1444 itr |= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT; 1445 1446 /* Write the final value to the ITR register */ 1447 writel(itr, q_vector->itr); 1448 } 1449 1450 static int fm10k_poll(struct napi_struct *napi, int budget) 1451 { 1452 struct fm10k_q_vector *q_vector = 1453 container_of(napi, struct fm10k_q_vector, napi); 1454 struct fm10k_ring *ring; 1455 int per_ring_budget, work_done = 0; 1456 bool clean_complete = true; 1457 1458 fm10k_for_each_ring(ring, q_vector->tx) { 1459 if (!fm10k_clean_tx_irq(q_vector, ring, budget)) 1460 clean_complete = false; 1461 } 1462 1463 /* Handle case where we are called by netpoll with a budget of 0 */ 1464 if (budget <= 0) 1465 return budget; 1466 1467 /* attempt to distribute budget to each queue fairly, but don't 1468 * allow the budget to go below 1 because we'll exit polling 1469 */ 1470 if (q_vector->rx.count > 1) 1471 per_ring_budget = max(budget / q_vector->rx.count, 1); 1472 else 1473 per_ring_budget = budget; 1474 1475 fm10k_for_each_ring(ring, q_vector->rx) { 1476 int work = fm10k_clean_rx_irq(q_vector, ring, per_ring_budget); 1477 1478 work_done += work; 1479 if (work >= per_ring_budget) 1480 clean_complete = false; 1481 } 1482 1483 /* If all work not completed, return budget and keep polling */ 1484 if (!clean_complete) 1485 return budget; 1486 1487 /* all work done, exit the polling mode */ 1488 napi_complete_done(napi, work_done); 1489 1490 /* re-enable the q_vector */ 1491 fm10k_qv_enable(q_vector); 1492 1493 return min(work_done, budget - 1); 1494 } 1495 1496 /** 1497 * fm10k_set_qos_queues: Allocate queues for a QOS-enabled device 1498 * @interface: board private structure to initialize 1499 * 1500 * When QoS (Quality of Service) is enabled, allocate queues for 1501 * each traffic class. If multiqueue isn't available,then abort QoS 1502 * initialization. 1503 * 1504 * This function handles all combinations of Qos and RSS. 1505 * 1506 **/ 1507 static bool fm10k_set_qos_queues(struct fm10k_intfc *interface) 1508 { 1509 struct net_device *dev = interface->netdev; 1510 struct fm10k_ring_feature *f; 1511 int rss_i, i; 1512 int pcs; 1513 1514 /* Map queue offset and counts onto allocated tx queues */ 1515 pcs = netdev_get_num_tc(dev); 1516 1517 if (pcs <= 1) 1518 return false; 1519 1520 /* set QoS mask and indices */ 1521 f = &interface->ring_feature[RING_F_QOS]; 1522 f->indices = pcs; 1523 f->mask = BIT(fls(pcs - 1)) - 1; 1524 1525 /* determine the upper limit for our current DCB mode */ 1526 rss_i = interface->hw.mac.max_queues / pcs; 1527 rss_i = BIT(fls(rss_i) - 1); 1528 1529 /* set RSS mask and indices */ 1530 f = &interface->ring_feature[RING_F_RSS]; 1531 rss_i = min_t(u16, rss_i, f->limit); 1532 f->indices = rss_i; 1533 f->mask = BIT(fls(rss_i - 1)) - 1; 1534 1535 /* configure pause class to queue mapping */ 1536 for (i = 0; i < pcs; i++) 1537 netdev_set_tc_queue(dev, i, rss_i, rss_i * i); 1538 1539 interface->num_rx_queues = rss_i * pcs; 1540 interface->num_tx_queues = rss_i * pcs; 1541 1542 return true; 1543 } 1544 1545 /** 1546 * fm10k_set_rss_queues: Allocate queues for RSS 1547 * @interface: board private structure to initialize 1548 * 1549 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try 1550 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. 1551 * 1552 **/ 1553 static bool fm10k_set_rss_queues(struct fm10k_intfc *interface) 1554 { 1555 struct fm10k_ring_feature *f; 1556 u16 rss_i; 1557 1558 f = &interface->ring_feature[RING_F_RSS]; 1559 rss_i = min_t(u16, interface->hw.mac.max_queues, f->limit); 1560 1561 /* record indices and power of 2 mask for RSS */ 1562 f->indices = rss_i; 1563 f->mask = BIT(fls(rss_i - 1)) - 1; 1564 1565 interface->num_rx_queues = rss_i; 1566 interface->num_tx_queues = rss_i; 1567 1568 return true; 1569 } 1570 1571 /** 1572 * fm10k_set_num_queues: Allocate queues for device, feature dependent 1573 * @interface: board private structure to initialize 1574 * 1575 * This is the top level queue allocation routine. The order here is very 1576 * important, starting with the "most" number of features turned on at once, 1577 * and ending with the smallest set of features. This way large combinations 1578 * can be allocated if they're turned on, and smaller combinations are the 1579 * fallthrough conditions. 1580 * 1581 **/ 1582 static void fm10k_set_num_queues(struct fm10k_intfc *interface) 1583 { 1584 /* Attempt to setup QoS and RSS first */ 1585 if (fm10k_set_qos_queues(interface)) 1586 return; 1587 1588 /* If we don't have QoS, just fallback to only RSS. */ 1589 fm10k_set_rss_queues(interface); 1590 } 1591 1592 /** 1593 * fm10k_reset_num_queues - Reset the number of queues to zero 1594 * @interface: board private structure 1595 * 1596 * This function should be called whenever we need to reset the number of 1597 * queues after an error condition. 1598 */ 1599 static void fm10k_reset_num_queues(struct fm10k_intfc *interface) 1600 { 1601 interface->num_tx_queues = 0; 1602 interface->num_rx_queues = 0; 1603 interface->num_q_vectors = 0; 1604 } 1605 1606 /** 1607 * fm10k_alloc_q_vector - Allocate memory for a single interrupt vector 1608 * @interface: board private structure to initialize 1609 * @v_count: q_vectors allocated on interface, used for ring interleaving 1610 * @v_idx: index of vector in interface struct 1611 * @txr_count: total number of Tx rings to allocate 1612 * @txr_idx: index of first Tx ring to allocate 1613 * @rxr_count: total number of Rx rings to allocate 1614 * @rxr_idx: index of first Rx ring to allocate 1615 * 1616 * We allocate one q_vector. If allocation fails we return -ENOMEM. 1617 **/ 1618 static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, 1619 unsigned int v_count, unsigned int v_idx, 1620 unsigned int txr_count, unsigned int txr_idx, 1621 unsigned int rxr_count, unsigned int rxr_idx) 1622 { 1623 struct fm10k_q_vector *q_vector; 1624 struct fm10k_ring *ring; 1625 int ring_count, size; 1626 1627 ring_count = txr_count + rxr_count; 1628 size = sizeof(struct fm10k_q_vector) + 1629 (sizeof(struct fm10k_ring) * ring_count); 1630 1631 /* allocate q_vector and rings */ 1632 q_vector = kzalloc(size, GFP_KERNEL); 1633 if (!q_vector) 1634 return -ENOMEM; 1635 1636 /* initialize NAPI */ 1637 netif_napi_add(interface->netdev, &q_vector->napi, 1638 fm10k_poll, NAPI_POLL_WEIGHT); 1639 1640 /* tie q_vector and interface together */ 1641 interface->q_vector[v_idx] = q_vector; 1642 q_vector->interface = interface; 1643 q_vector->v_idx = v_idx; 1644 1645 /* initialize pointer to rings */ 1646 ring = q_vector->ring; 1647 1648 /* save Tx ring container info */ 1649 q_vector->tx.ring = ring; 1650 q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK; 1651 q_vector->tx.itr = interface->tx_itr; 1652 q_vector->tx.itr_scale = interface->hw.mac.itr_scale; 1653 q_vector->tx.count = txr_count; 1654 1655 while (txr_count) { 1656 /* assign generic ring traits */ 1657 ring->dev = &interface->pdev->dev; 1658 ring->netdev = interface->netdev; 1659 1660 /* configure backlink on ring */ 1661 ring->q_vector = q_vector; 1662 1663 /* apply Tx specific ring traits */ 1664 ring->count = interface->tx_ring_count; 1665 ring->queue_index = txr_idx; 1666 1667 /* assign ring to interface */ 1668 interface->tx_ring[txr_idx] = ring; 1669 1670 /* update count and index */ 1671 txr_count--; 1672 txr_idx += v_count; 1673 1674 /* push pointer to next ring */ 1675 ring++; 1676 } 1677 1678 /* save Rx ring container info */ 1679 q_vector->rx.ring = ring; 1680 q_vector->rx.itr = interface->rx_itr; 1681 q_vector->rx.itr_scale = interface->hw.mac.itr_scale; 1682 q_vector->rx.count = rxr_count; 1683 1684 while (rxr_count) { 1685 /* assign generic ring traits */ 1686 ring->dev = &interface->pdev->dev; 1687 ring->netdev = interface->netdev; 1688 rcu_assign_pointer(ring->l2_accel, interface->l2_accel); 1689 1690 /* configure backlink on ring */ 1691 ring->q_vector = q_vector; 1692 1693 /* apply Rx specific ring traits */ 1694 ring->count = interface->rx_ring_count; 1695 ring->queue_index = rxr_idx; 1696 1697 /* assign ring to interface */ 1698 interface->rx_ring[rxr_idx] = ring; 1699 1700 /* update count and index */ 1701 rxr_count--; 1702 rxr_idx += v_count; 1703 1704 /* push pointer to next ring */ 1705 ring++; 1706 } 1707 1708 fm10k_dbg_q_vector_init(q_vector); 1709 1710 return 0; 1711 } 1712 1713 /** 1714 * fm10k_free_q_vector - Free memory allocated for specific interrupt vector 1715 * @interface: board private structure to initialize 1716 * @v_idx: Index of vector to be freed 1717 * 1718 * This function frees the memory allocated to the q_vector. In addition if 1719 * NAPI is enabled it will delete any references to the NAPI struct prior 1720 * to freeing the q_vector. 1721 **/ 1722 static void fm10k_free_q_vector(struct fm10k_intfc *interface, int v_idx) 1723 { 1724 struct fm10k_q_vector *q_vector = interface->q_vector[v_idx]; 1725 struct fm10k_ring *ring; 1726 1727 fm10k_dbg_q_vector_exit(q_vector); 1728 1729 fm10k_for_each_ring(ring, q_vector->tx) 1730 interface->tx_ring[ring->queue_index] = NULL; 1731 1732 fm10k_for_each_ring(ring, q_vector->rx) 1733 interface->rx_ring[ring->queue_index] = NULL; 1734 1735 interface->q_vector[v_idx] = NULL; 1736 netif_napi_del(&q_vector->napi); 1737 kfree_rcu(q_vector, rcu); 1738 } 1739 1740 /** 1741 * fm10k_alloc_q_vectors - Allocate memory for interrupt vectors 1742 * @interface: board private structure to initialize 1743 * 1744 * We allocate one q_vector per queue interrupt. If allocation fails we 1745 * return -ENOMEM. 1746 **/ 1747 static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface) 1748 { 1749 unsigned int q_vectors = interface->num_q_vectors; 1750 unsigned int rxr_remaining = interface->num_rx_queues; 1751 unsigned int txr_remaining = interface->num_tx_queues; 1752 unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0; 1753 int err; 1754 1755 if (q_vectors >= (rxr_remaining + txr_remaining)) { 1756 for (; rxr_remaining; v_idx++) { 1757 err = fm10k_alloc_q_vector(interface, q_vectors, v_idx, 1758 0, 0, 1, rxr_idx); 1759 if (err) 1760 goto err_out; 1761 1762 /* update counts and index */ 1763 rxr_remaining--; 1764 rxr_idx++; 1765 } 1766 } 1767 1768 for (; v_idx < q_vectors; v_idx++) { 1769 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 1770 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 1771 1772 err = fm10k_alloc_q_vector(interface, q_vectors, v_idx, 1773 tqpv, txr_idx, 1774 rqpv, rxr_idx); 1775 1776 if (err) 1777 goto err_out; 1778 1779 /* update counts and index */ 1780 rxr_remaining -= rqpv; 1781 txr_remaining -= tqpv; 1782 rxr_idx++; 1783 txr_idx++; 1784 } 1785 1786 return 0; 1787 1788 err_out: 1789 fm10k_reset_num_queues(interface); 1790 1791 while (v_idx--) 1792 fm10k_free_q_vector(interface, v_idx); 1793 1794 return -ENOMEM; 1795 } 1796 1797 /** 1798 * fm10k_free_q_vectors - Free memory allocated for interrupt vectors 1799 * @interface: board private structure to initialize 1800 * 1801 * This function frees the memory allocated to the q_vectors. In addition if 1802 * NAPI is enabled it will delete any references to the NAPI struct prior 1803 * to freeing the q_vector. 1804 **/ 1805 static void fm10k_free_q_vectors(struct fm10k_intfc *interface) 1806 { 1807 int v_idx = interface->num_q_vectors; 1808 1809 fm10k_reset_num_queues(interface); 1810 1811 while (v_idx--) 1812 fm10k_free_q_vector(interface, v_idx); 1813 } 1814 1815 /** 1816 * f10k_reset_msix_capability - reset MSI-X capability 1817 * @interface: board private structure to initialize 1818 * 1819 * Reset the MSI-X capability back to its starting state 1820 **/ 1821 static void fm10k_reset_msix_capability(struct fm10k_intfc *interface) 1822 { 1823 pci_disable_msix(interface->pdev); 1824 kfree(interface->msix_entries); 1825 interface->msix_entries = NULL; 1826 } 1827 1828 /** 1829 * f10k_init_msix_capability - configure MSI-X capability 1830 * @interface: board private structure to initialize 1831 * 1832 * Attempt to configure the interrupts using the best available 1833 * capabilities of the hardware and the kernel. 1834 **/ 1835 static int fm10k_init_msix_capability(struct fm10k_intfc *interface) 1836 { 1837 struct fm10k_hw *hw = &interface->hw; 1838 int v_budget, vector; 1839 1840 /* It's easy to be greedy for MSI-X vectors, but it really 1841 * doesn't do us much good if we have a lot more vectors 1842 * than CPU's. So let's be conservative and only ask for 1843 * (roughly) the same number of vectors as there are CPU's. 1844 * the default is to use pairs of vectors 1845 */ 1846 v_budget = max(interface->num_rx_queues, interface->num_tx_queues); 1847 v_budget = min_t(u16, v_budget, num_online_cpus()); 1848 1849 /* account for vectors not related to queues */ 1850 v_budget += NON_Q_VECTORS(hw); 1851 1852 /* At the same time, hardware can only support a maximum of 1853 * hw.mac->max_msix_vectors vectors. With features 1854 * such as RSS and VMDq, we can easily surpass the number of Rx and Tx 1855 * descriptor queues supported by our device. Thus, we cap it off in 1856 * those rare cases where the cpu count also exceeds our vector limit. 1857 */ 1858 v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors); 1859 1860 /* A failure in MSI-X entry allocation is fatal. */ 1861 interface->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), 1862 GFP_KERNEL); 1863 if (!interface->msix_entries) 1864 return -ENOMEM; 1865 1866 /* populate entry values */ 1867 for (vector = 0; vector < v_budget; vector++) 1868 interface->msix_entries[vector].entry = vector; 1869 1870 /* Attempt to enable MSI-X with requested value */ 1871 v_budget = pci_enable_msix_range(interface->pdev, 1872 interface->msix_entries, 1873 MIN_MSIX_COUNT(hw), 1874 v_budget); 1875 if (v_budget < 0) { 1876 kfree(interface->msix_entries); 1877 interface->msix_entries = NULL; 1878 return v_budget; 1879 } 1880 1881 /* record the number of queues available for q_vectors */ 1882 interface->num_q_vectors = v_budget - NON_Q_VECTORS(hw); 1883 1884 return 0; 1885 } 1886 1887 /** 1888 * fm10k_cache_ring_qos - Descriptor ring to register mapping for QoS 1889 * @interface: Interface structure continaining rings and devices 1890 * 1891 * Cache the descriptor ring offsets for Qos 1892 **/ 1893 static bool fm10k_cache_ring_qos(struct fm10k_intfc *interface) 1894 { 1895 struct net_device *dev = interface->netdev; 1896 int pc, offset, rss_i, i, q_idx; 1897 u16 pc_stride = interface->ring_feature[RING_F_QOS].mask + 1; 1898 u8 num_pcs = netdev_get_num_tc(dev); 1899 1900 if (num_pcs <= 1) 1901 return false; 1902 1903 rss_i = interface->ring_feature[RING_F_RSS].indices; 1904 1905 for (pc = 0, offset = 0; pc < num_pcs; pc++, offset += rss_i) { 1906 q_idx = pc; 1907 for (i = 0; i < rss_i; i++) { 1908 interface->tx_ring[offset + i]->reg_idx = q_idx; 1909 interface->tx_ring[offset + i]->qos_pc = pc; 1910 interface->rx_ring[offset + i]->reg_idx = q_idx; 1911 interface->rx_ring[offset + i]->qos_pc = pc; 1912 q_idx += pc_stride; 1913 } 1914 } 1915 1916 return true; 1917 } 1918 1919 /** 1920 * fm10k_cache_ring_rss - Descriptor ring to register mapping for RSS 1921 * @interface: Interface structure continaining rings and devices 1922 * 1923 * Cache the descriptor ring offsets for RSS 1924 **/ 1925 static void fm10k_cache_ring_rss(struct fm10k_intfc *interface) 1926 { 1927 int i; 1928 1929 for (i = 0; i < interface->num_rx_queues; i++) 1930 interface->rx_ring[i]->reg_idx = i; 1931 1932 for (i = 0; i < interface->num_tx_queues; i++) 1933 interface->tx_ring[i]->reg_idx = i; 1934 } 1935 1936 /** 1937 * fm10k_assign_rings - Map rings to network devices 1938 * @interface: Interface structure containing rings and devices 1939 * 1940 * This function is meant to go though and configure both the network 1941 * devices so that they contain rings, and configure the rings so that 1942 * they function with their network devices. 1943 **/ 1944 static void fm10k_assign_rings(struct fm10k_intfc *interface) 1945 { 1946 if (fm10k_cache_ring_qos(interface)) 1947 return; 1948 1949 fm10k_cache_ring_rss(interface); 1950 } 1951 1952 static void fm10k_init_reta(struct fm10k_intfc *interface) 1953 { 1954 u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices; 1955 u32 reta; 1956 1957 /* If the Rx flow indirection table has been configured manually, we 1958 * need to maintain it when possible. 1959 */ 1960 if (netif_is_rxfh_configured(interface->netdev)) { 1961 for (i = FM10K_RETA_SIZE; i--;) { 1962 reta = interface->reta[i]; 1963 if ((((reta << 24) >> 24) < rss_i) && 1964 (((reta << 16) >> 24) < rss_i) && 1965 (((reta << 8) >> 24) < rss_i) && 1966 (((reta) >> 24) < rss_i)) 1967 continue; 1968 1969 /* this should never happen */ 1970 dev_err(&interface->pdev->dev, 1971 "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n"); 1972 goto repopulate_reta; 1973 } 1974 1975 /* do nothing if all of the elements are in bounds */ 1976 return; 1977 } 1978 1979 repopulate_reta: 1980 fm10k_write_reta(interface, NULL); 1981 } 1982 1983 /** 1984 * fm10k_init_queueing_scheme - Determine proper queueing scheme 1985 * @interface: board private structure to initialize 1986 * 1987 * We determine which queueing scheme to use based on... 1988 * - Hardware queue count (num_*_queues) 1989 * - defined by miscellaneous hardware support/features (RSS, etc.) 1990 **/ 1991 int fm10k_init_queueing_scheme(struct fm10k_intfc *interface) 1992 { 1993 int err; 1994 1995 /* Number of supported queues */ 1996 fm10k_set_num_queues(interface); 1997 1998 /* Configure MSI-X capability */ 1999 err = fm10k_init_msix_capability(interface); 2000 if (err) { 2001 dev_err(&interface->pdev->dev, 2002 "Unable to initialize MSI-X capability\n"); 2003 goto err_init_msix; 2004 } 2005 2006 /* Allocate memory for queues */ 2007 err = fm10k_alloc_q_vectors(interface); 2008 if (err) { 2009 dev_err(&interface->pdev->dev, 2010 "Unable to allocate queue vectors\n"); 2011 goto err_alloc_q_vectors; 2012 } 2013 2014 /* Map rings to devices, and map devices to physical queues */ 2015 fm10k_assign_rings(interface); 2016 2017 /* Initialize RSS redirection table */ 2018 fm10k_init_reta(interface); 2019 2020 return 0; 2021 2022 err_alloc_q_vectors: 2023 fm10k_reset_msix_capability(interface); 2024 err_init_msix: 2025 fm10k_reset_num_queues(interface); 2026 return err; 2027 } 2028 2029 /** 2030 * fm10k_clear_queueing_scheme - Clear the current queueing scheme settings 2031 * @interface: board private structure to clear queueing scheme on 2032 * 2033 * We go through and clear queueing specific resources and reset the structure 2034 * to pre-load conditions 2035 **/ 2036 void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface) 2037 { 2038 fm10k_free_q_vectors(interface); 2039 fm10k_reset_msix_capability(interface); 2040 } 2041