1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2005-2006 Fen Systems Ltd. 5 * Copyright 2005-2013 Solarflare Communications Inc. 6 */ 7 8 #include <linux/socket.h> 9 #include <linux/in.h> 10 #include <linux/slab.h> 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/tcp.h> 14 #include <linux/udp.h> 15 #include <linux/prefetch.h> 16 #include <linux/moduleparam.h> 17 #include <linux/iommu.h> 18 #include <net/ip.h> 19 #include <net/checksum.h> 20 #include "net_driver.h" 21 #include "efx.h" 22 #include "filter.h" 23 #include "nic.h" 24 #include "selftest.h" 25 #include "workarounds.h" 26 27 /* Preferred number of descriptors to fill at once */ 28 #define EF4_RX_PREFERRED_BATCH 8U 29 30 /* Number of RX buffers to recycle pages for. When creating the RX page recycle 31 * ring, this number is divided by the number of buffers per page to calculate 32 * the number of pages to store in the RX page recycle ring. 33 */ 34 #define EF4_RECYCLE_RING_SIZE_IOMMU 4096 35 #define EF4_RECYCLE_RING_SIZE_NOIOMMU (2 * EF4_RX_PREFERRED_BATCH) 36 37 /* Size of buffer allocated for skb header area. */ 38 #define EF4_SKB_HEADERS 128u 39 40 /* This is the percentage fill level below which new RX descriptors 41 * will be added to the RX descriptor ring. 42 */ 43 static unsigned int rx_refill_threshold; 44 45 /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ 46 #define EF4_RX_MAX_FRAGS DIV_ROUND_UP(EF4_MAX_FRAME_LEN(EF4_MAX_MTU), \ 47 EF4_RX_USR_BUF_SIZE) 48 49 /* 50 * RX maximum head room required. 51 * 52 * This must be at least 1 to prevent overflow, plus one packet-worth 53 * to allow pipelined receives. 54 */ 55 #define EF4_RXD_HEAD_ROOM (1 + EF4_RX_MAX_FRAGS) 56 57 static inline u8 *ef4_rx_buf_va(struct ef4_rx_buffer *buf) 58 { 59 return page_address(buf->page) + buf->page_offset; 60 } 61 62 static inline u32 ef4_rx_buf_hash(struct ef4_nic *efx, const u8 *eh) 63 { 64 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 65 return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); 66 #else 67 const u8 *data = eh + efx->rx_packet_hash_offset; 68 return (u32)data[0] | 69 (u32)data[1] << 8 | 70 (u32)data[2] << 16 | 71 (u32)data[3] << 24; 72 #endif 73 } 74 75 static inline struct ef4_rx_buffer * 76 ef4_rx_buf_next(struct ef4_rx_queue *rx_queue, struct ef4_rx_buffer *rx_buf) 77 { 78 if (unlikely(rx_buf == ef4_rx_buffer(rx_queue, rx_queue->ptr_mask))) 79 return ef4_rx_buffer(rx_queue, 0); 80 else 81 return rx_buf + 1; 82 } 83 84 static inline void ef4_sync_rx_buffer(struct ef4_nic *efx, 85 struct ef4_rx_buffer *rx_buf, 86 unsigned int len) 87 { 88 dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len, 89 DMA_FROM_DEVICE); 90 } 91 92 void ef4_rx_config_page_split(struct ef4_nic *efx) 93 { 94 efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align, 95 EF4_RX_BUF_ALIGNMENT); 96 efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : 97 ((PAGE_SIZE - sizeof(struct ef4_rx_page_state)) / 98 efx->rx_page_buf_step); 99 efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / 100 efx->rx_bufs_per_page; 101 efx->rx_pages_per_batch = DIV_ROUND_UP(EF4_RX_PREFERRED_BATCH, 102 efx->rx_bufs_per_page); 103 } 104 105 /* Check the RX page recycle ring for a page that can be reused. */ 106 static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue) 107 { 108 struct ef4_nic *efx = rx_queue->efx; 109 struct page *page; 110 struct ef4_rx_page_state *state; 111 unsigned index; 112 113 index = rx_queue->page_remove & rx_queue->page_ptr_mask; 114 page = rx_queue->page_ring[index]; 115 if (page == NULL) 116 return NULL; 117 118 rx_queue->page_ring[index] = NULL; 119 /* page_remove cannot exceed page_add. */ 120 if (rx_queue->page_remove != rx_queue->page_add) 121 ++rx_queue->page_remove; 122 123 /* If page_count is 1 then we hold the only reference to this page. */ 124 if (page_count(page) == 1) { 125 ++rx_queue->page_recycle_count; 126 return page; 127 } else { 128 state = page_address(page); 129 dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, 130 PAGE_SIZE << efx->rx_buffer_order, 131 DMA_FROM_DEVICE); 132 put_page(page); 133 ++rx_queue->page_recycle_failed; 134 } 135 136 return NULL; 137 } 138 139 /** 140 * ef4_init_rx_buffers - create EF4_RX_BATCH page-based RX buffers 141 * 142 * @rx_queue: Efx RX queue 143 * @atomic: control memory allocation flags 144 * 145 * This allocates a batch of pages, maps them for DMA, and populates 146 * struct ef4_rx_buffers for each one. Return a negative error code or 147 * 0 on success. If a single page can be used for multiple buffers, 148 * then the page will either be inserted fully, or not at all. 149 */ 150 static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic) 151 { 152 struct ef4_nic *efx = rx_queue->efx; 153 struct ef4_rx_buffer *rx_buf; 154 struct page *page; 155 unsigned int page_offset; 156 struct ef4_rx_page_state *state; 157 dma_addr_t dma_addr; 158 unsigned index, count; 159 160 count = 0; 161 do { 162 page = ef4_reuse_page(rx_queue); 163 if (page == NULL) { 164 page = alloc_pages(__GFP_COMP | 165 (atomic ? GFP_ATOMIC : GFP_KERNEL), 166 efx->rx_buffer_order); 167 if (unlikely(page == NULL)) 168 return -ENOMEM; 169 dma_addr = 170 dma_map_page(&efx->pci_dev->dev, page, 0, 171 PAGE_SIZE << efx->rx_buffer_order, 172 DMA_FROM_DEVICE); 173 if (unlikely(dma_mapping_error(&efx->pci_dev->dev, 174 dma_addr))) { 175 __free_pages(page, efx->rx_buffer_order); 176 return -EIO; 177 } 178 state = page_address(page); 179 state->dma_addr = dma_addr; 180 } else { 181 state = page_address(page); 182 dma_addr = state->dma_addr; 183 } 184 185 dma_addr += sizeof(struct ef4_rx_page_state); 186 page_offset = sizeof(struct ef4_rx_page_state); 187 188 do { 189 index = rx_queue->added_count & rx_queue->ptr_mask; 190 rx_buf = ef4_rx_buffer(rx_queue, index); 191 rx_buf->dma_addr = dma_addr + efx->rx_ip_align; 192 rx_buf->page = page; 193 rx_buf->page_offset = page_offset + efx->rx_ip_align; 194 rx_buf->len = efx->rx_dma_len; 195 rx_buf->flags = 0; 196 ++rx_queue->added_count; 197 get_page(page); 198 dma_addr += efx->rx_page_buf_step; 199 page_offset += efx->rx_page_buf_step; 200 } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); 201 202 rx_buf->flags = EF4_RX_BUF_LAST_IN_PAGE; 203 } while (++count < efx->rx_pages_per_batch); 204 205 return 0; 206 } 207 208 /* Unmap a DMA-mapped page. This function is only called for the final RX 209 * buffer in a page. 210 */ 211 static void ef4_unmap_rx_buffer(struct ef4_nic *efx, 212 struct ef4_rx_buffer *rx_buf) 213 { 214 struct page *page = rx_buf->page; 215 216 if (page) { 217 struct ef4_rx_page_state *state = page_address(page); 218 dma_unmap_page(&efx->pci_dev->dev, 219 state->dma_addr, 220 PAGE_SIZE << efx->rx_buffer_order, 221 DMA_FROM_DEVICE); 222 } 223 } 224 225 static void ef4_free_rx_buffers(struct ef4_rx_queue *rx_queue, 226 struct ef4_rx_buffer *rx_buf, 227 unsigned int num_bufs) 228 { 229 do { 230 if (rx_buf->page) { 231 put_page(rx_buf->page); 232 rx_buf->page = NULL; 233 } 234 rx_buf = ef4_rx_buf_next(rx_queue, rx_buf); 235 } while (--num_bufs); 236 } 237 238 /* Attempt to recycle the page if there is an RX recycle ring; the page can 239 * only be added if this is the final RX buffer, to prevent pages being used in 240 * the descriptor ring and appearing in the recycle ring simultaneously. 241 */ 242 static void ef4_recycle_rx_page(struct ef4_channel *channel, 243 struct ef4_rx_buffer *rx_buf) 244 { 245 struct page *page = rx_buf->page; 246 struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel); 247 struct ef4_nic *efx = rx_queue->efx; 248 unsigned index; 249 250 /* Only recycle the page after processing the final buffer. */ 251 if (!(rx_buf->flags & EF4_RX_BUF_LAST_IN_PAGE)) 252 return; 253 254 index = rx_queue->page_add & rx_queue->page_ptr_mask; 255 if (rx_queue->page_ring[index] == NULL) { 256 unsigned read_index = rx_queue->page_remove & 257 rx_queue->page_ptr_mask; 258 259 /* The next slot in the recycle ring is available, but 260 * increment page_remove if the read pointer currently 261 * points here. 262 */ 263 if (read_index == index) 264 ++rx_queue->page_remove; 265 rx_queue->page_ring[index] = page; 266 ++rx_queue->page_add; 267 return; 268 } 269 ++rx_queue->page_recycle_full; 270 ef4_unmap_rx_buffer(efx, rx_buf); 271 put_page(rx_buf->page); 272 } 273 274 static void ef4_fini_rx_buffer(struct ef4_rx_queue *rx_queue, 275 struct ef4_rx_buffer *rx_buf) 276 { 277 /* Release the page reference we hold for the buffer. */ 278 if (rx_buf->page) 279 put_page(rx_buf->page); 280 281 /* If this is the last buffer in a page, unmap and free it. */ 282 if (rx_buf->flags & EF4_RX_BUF_LAST_IN_PAGE) { 283 ef4_unmap_rx_buffer(rx_queue->efx, rx_buf); 284 ef4_free_rx_buffers(rx_queue, rx_buf, 1); 285 } 286 rx_buf->page = NULL; 287 } 288 289 /* Recycle the pages that are used by buffers that have just been received. */ 290 static void ef4_recycle_rx_pages(struct ef4_channel *channel, 291 struct ef4_rx_buffer *rx_buf, 292 unsigned int n_frags) 293 { 294 struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel); 295 296 do { 297 ef4_recycle_rx_page(channel, rx_buf); 298 rx_buf = ef4_rx_buf_next(rx_queue, rx_buf); 299 } while (--n_frags); 300 } 301 302 static void ef4_discard_rx_packet(struct ef4_channel *channel, 303 struct ef4_rx_buffer *rx_buf, 304 unsigned int n_frags) 305 { 306 struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel); 307 308 ef4_recycle_rx_pages(channel, rx_buf, n_frags); 309 310 ef4_free_rx_buffers(rx_queue, rx_buf, n_frags); 311 } 312 313 /** 314 * ef4_fast_push_rx_descriptors - push new RX descriptors quickly 315 * @rx_queue: RX descriptor queue 316 * 317 * This will aim to fill the RX descriptor queue up to 318 * @rx_queue->@max_fill. If there is insufficient atomic 319 * memory to do so, a slow fill will be scheduled. 320 * @atomic: control memory allocation flags 321 * 322 * The caller must provide serialisation (none is used here). In practise, 323 * this means this function must run from the NAPI handler, or be called 324 * when NAPI is disabled. 325 */ 326 void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic) 327 { 328 struct ef4_nic *efx = rx_queue->efx; 329 unsigned int fill_level, batch_size; 330 int space, rc = 0; 331 332 if (!rx_queue->refill_enabled) 333 return; 334 335 /* Calculate current fill level, and exit if we don't need to fill */ 336 fill_level = (rx_queue->added_count - rx_queue->removed_count); 337 EF4_BUG_ON_PARANOID(fill_level > rx_queue->efx->rxq_entries); 338 if (fill_level >= rx_queue->fast_fill_trigger) 339 goto out; 340 341 /* Record minimum fill level */ 342 if (unlikely(fill_level < rx_queue->min_fill)) { 343 if (fill_level) 344 rx_queue->min_fill = fill_level; 345 } 346 347 batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; 348 space = rx_queue->max_fill - fill_level; 349 EF4_BUG_ON_PARANOID(space < batch_size); 350 351 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, 352 "RX queue %d fast-filling descriptor ring from" 353 " level %d to level %d\n", 354 ef4_rx_queue_index(rx_queue), fill_level, 355 rx_queue->max_fill); 356 357 358 do { 359 rc = ef4_init_rx_buffers(rx_queue, atomic); 360 if (unlikely(rc)) { 361 /* Ensure that we don't leave the rx queue empty */ 362 if (rx_queue->added_count == rx_queue->removed_count) 363 ef4_schedule_slow_fill(rx_queue); 364 goto out; 365 } 366 } while ((space -= batch_size) >= batch_size); 367 368 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, 369 "RX queue %d fast-filled descriptor ring " 370 "to level %d\n", ef4_rx_queue_index(rx_queue), 371 rx_queue->added_count - rx_queue->removed_count); 372 373 out: 374 if (rx_queue->notified_count != rx_queue->added_count) 375 ef4_nic_notify_rx_desc(rx_queue); 376 } 377 378 void ef4_rx_slow_fill(struct timer_list *t) 379 { 380 struct ef4_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); 381 382 /* Post an event to cause NAPI to run and refill the queue */ 383 ef4_nic_generate_fill_event(rx_queue); 384 ++rx_queue->slow_fill_count; 385 } 386 387 static void ef4_rx_packet__check_len(struct ef4_rx_queue *rx_queue, 388 struct ef4_rx_buffer *rx_buf, 389 int len) 390 { 391 struct ef4_nic *efx = rx_queue->efx; 392 unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; 393 394 if (likely(len <= max_len)) 395 return; 396 397 /* The packet must be discarded, but this is only a fatal error 398 * if the caller indicated it was 399 */ 400 rx_buf->flags |= EF4_RX_PKT_DISCARD; 401 402 if ((len > rx_buf->len) && EF4_WORKAROUND_8071(efx)) { 403 if (net_ratelimit()) 404 netif_err(efx, rx_err, efx->net_dev, 405 " RX queue %d seriously overlength " 406 "RX event (0x%x > 0x%x+0x%x). Leaking\n", 407 ef4_rx_queue_index(rx_queue), len, max_len, 408 efx->type->rx_buffer_padding); 409 ef4_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); 410 } else { 411 if (net_ratelimit()) 412 netif_err(efx, rx_err, efx->net_dev, 413 " RX queue %d overlength RX event " 414 "(0x%x > 0x%x)\n", 415 ef4_rx_queue_index(rx_queue), len, max_len); 416 } 417 418 ef4_rx_queue_channel(rx_queue)->n_rx_overlength++; 419 } 420 421 /* Pass a received packet up through GRO. GRO can handle pages 422 * regardless of checksum state and skbs with a good checksum. 423 */ 424 static void 425 ef4_rx_packet_gro(struct ef4_channel *channel, struct ef4_rx_buffer *rx_buf, 426 unsigned int n_frags, u8 *eh) 427 { 428 struct napi_struct *napi = &channel->napi_str; 429 struct ef4_nic *efx = channel->efx; 430 struct sk_buff *skb; 431 432 skb = napi_get_frags(napi); 433 if (unlikely(!skb)) { 434 struct ef4_rx_queue *rx_queue; 435 436 rx_queue = ef4_channel_get_rx_queue(channel); 437 ef4_free_rx_buffers(rx_queue, rx_buf, n_frags); 438 return; 439 } 440 441 if (efx->net_dev->features & NETIF_F_RXHASH) 442 skb_set_hash(skb, ef4_rx_buf_hash(efx, eh), 443 PKT_HASH_TYPE_L3); 444 skb->ip_summed = ((rx_buf->flags & EF4_RX_PKT_CSUMMED) ? 445 CHECKSUM_UNNECESSARY : CHECKSUM_NONE); 446 447 for (;;) { 448 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 449 rx_buf->page, rx_buf->page_offset, 450 rx_buf->len); 451 rx_buf->page = NULL; 452 skb->len += rx_buf->len; 453 if (skb_shinfo(skb)->nr_frags == n_frags) 454 break; 455 456 rx_buf = ef4_rx_buf_next(&channel->rx_queue, rx_buf); 457 } 458 459 skb->data_len = skb->len; 460 skb->truesize += n_frags * efx->rx_buffer_truesize; 461 462 skb_record_rx_queue(skb, channel->rx_queue.core_index); 463 464 napi_gro_frags(napi); 465 } 466 467 /* Allocate and construct an SKB around page fragments */ 468 static struct sk_buff *ef4_rx_mk_skb(struct ef4_channel *channel, 469 struct ef4_rx_buffer *rx_buf, 470 unsigned int n_frags, 471 u8 *eh, int hdr_len) 472 { 473 struct ef4_nic *efx = channel->efx; 474 struct sk_buff *skb; 475 476 /* Allocate an SKB to store the headers */ 477 skb = netdev_alloc_skb(efx->net_dev, 478 efx->rx_ip_align + efx->rx_prefix_size + 479 hdr_len); 480 if (unlikely(skb == NULL)) { 481 atomic_inc(&efx->n_rx_noskb_drops); 482 return NULL; 483 } 484 485 EF4_BUG_ON_PARANOID(rx_buf->len < hdr_len); 486 487 memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size, 488 efx->rx_prefix_size + hdr_len); 489 skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size); 490 __skb_put(skb, hdr_len); 491 492 /* Append the remaining page(s) onto the frag list */ 493 if (rx_buf->len > hdr_len) { 494 rx_buf->page_offset += hdr_len; 495 rx_buf->len -= hdr_len; 496 497 for (;;) { 498 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 499 rx_buf->page, rx_buf->page_offset, 500 rx_buf->len); 501 rx_buf->page = NULL; 502 skb->len += rx_buf->len; 503 skb->data_len += rx_buf->len; 504 if (skb_shinfo(skb)->nr_frags == n_frags) 505 break; 506 507 rx_buf = ef4_rx_buf_next(&channel->rx_queue, rx_buf); 508 } 509 } else { 510 __free_pages(rx_buf->page, efx->rx_buffer_order); 511 rx_buf->page = NULL; 512 n_frags = 0; 513 } 514 515 skb->truesize += n_frags * efx->rx_buffer_truesize; 516 517 /* Move past the ethernet header */ 518 skb->protocol = eth_type_trans(skb, efx->net_dev); 519 520 skb_mark_napi_id(skb, &channel->napi_str); 521 522 return skb; 523 } 524 525 void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index, 526 unsigned int n_frags, unsigned int len, u16 flags) 527 { 528 struct ef4_nic *efx = rx_queue->efx; 529 struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue); 530 struct ef4_rx_buffer *rx_buf; 531 532 rx_queue->rx_packets++; 533 534 rx_buf = ef4_rx_buffer(rx_queue, index); 535 rx_buf->flags |= flags; 536 537 /* Validate the number of fragments and completed length */ 538 if (n_frags == 1) { 539 if (!(flags & EF4_RX_PKT_PREFIX_LEN)) 540 ef4_rx_packet__check_len(rx_queue, rx_buf, len); 541 } else if (unlikely(n_frags > EF4_RX_MAX_FRAGS) || 542 unlikely(len <= (n_frags - 1) * efx->rx_dma_len) || 543 unlikely(len > n_frags * efx->rx_dma_len) || 544 unlikely(!efx->rx_scatter)) { 545 /* If this isn't an explicit discard request, either 546 * the hardware or the driver is broken. 547 */ 548 WARN_ON(!(len == 0 && rx_buf->flags & EF4_RX_PKT_DISCARD)); 549 rx_buf->flags |= EF4_RX_PKT_DISCARD; 550 } 551 552 netif_vdbg(efx, rx_status, efx->net_dev, 553 "RX queue %d received ids %x-%x len %d %s%s\n", 554 ef4_rx_queue_index(rx_queue), index, 555 (index + n_frags - 1) & rx_queue->ptr_mask, len, 556 (rx_buf->flags & EF4_RX_PKT_CSUMMED) ? " [SUMMED]" : "", 557 (rx_buf->flags & EF4_RX_PKT_DISCARD) ? " [DISCARD]" : ""); 558 559 /* Discard packet, if instructed to do so. Process the 560 * previous receive first. 561 */ 562 if (unlikely(rx_buf->flags & EF4_RX_PKT_DISCARD)) { 563 ef4_rx_flush_packet(channel); 564 ef4_discard_rx_packet(channel, rx_buf, n_frags); 565 return; 566 } 567 568 if (n_frags == 1 && !(flags & EF4_RX_PKT_PREFIX_LEN)) 569 rx_buf->len = len; 570 571 /* Release and/or sync the DMA mapping - assumes all RX buffers 572 * consumed in-order per RX queue. 573 */ 574 ef4_sync_rx_buffer(efx, rx_buf, rx_buf->len); 575 576 /* Prefetch nice and early so data will (hopefully) be in cache by 577 * the time we look at it. 578 */ 579 prefetch(ef4_rx_buf_va(rx_buf)); 580 581 rx_buf->page_offset += efx->rx_prefix_size; 582 rx_buf->len -= efx->rx_prefix_size; 583 584 if (n_frags > 1) { 585 /* Release/sync DMA mapping for additional fragments. 586 * Fix length for last fragment. 587 */ 588 unsigned int tail_frags = n_frags - 1; 589 590 for (;;) { 591 rx_buf = ef4_rx_buf_next(rx_queue, rx_buf); 592 if (--tail_frags == 0) 593 break; 594 ef4_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len); 595 } 596 rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len; 597 ef4_sync_rx_buffer(efx, rx_buf, rx_buf->len); 598 } 599 600 /* All fragments have been DMA-synced, so recycle pages. */ 601 rx_buf = ef4_rx_buffer(rx_queue, index); 602 ef4_recycle_rx_pages(channel, rx_buf, n_frags); 603 604 /* Pipeline receives so that we give time for packet headers to be 605 * prefetched into cache. 606 */ 607 ef4_rx_flush_packet(channel); 608 channel->rx_pkt_n_frags = n_frags; 609 channel->rx_pkt_index = index; 610 } 611 612 static void ef4_rx_deliver(struct ef4_channel *channel, u8 *eh, 613 struct ef4_rx_buffer *rx_buf, 614 unsigned int n_frags) 615 { 616 struct sk_buff *skb; 617 u16 hdr_len = min_t(u16, rx_buf->len, EF4_SKB_HEADERS); 618 619 skb = ef4_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); 620 if (unlikely(skb == NULL)) { 621 struct ef4_rx_queue *rx_queue; 622 623 rx_queue = ef4_channel_get_rx_queue(channel); 624 ef4_free_rx_buffers(rx_queue, rx_buf, n_frags); 625 return; 626 } 627 skb_record_rx_queue(skb, channel->rx_queue.core_index); 628 629 /* Set the SKB flags */ 630 skb_checksum_none_assert(skb); 631 if (likely(rx_buf->flags & EF4_RX_PKT_CSUMMED)) 632 skb->ip_summed = CHECKSUM_UNNECESSARY; 633 634 if (channel->type->receive_skb) 635 if (channel->type->receive_skb(channel, skb)) 636 return; 637 638 /* Pass the packet up */ 639 netif_receive_skb(skb); 640 } 641 642 /* Handle a received packet. Second half: Touches packet payload. */ 643 void __ef4_rx_packet(struct ef4_channel *channel) 644 { 645 struct ef4_nic *efx = channel->efx; 646 struct ef4_rx_buffer *rx_buf = 647 ef4_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); 648 u8 *eh = ef4_rx_buf_va(rx_buf); 649 650 /* Read length from the prefix if necessary. This already 651 * excludes the length of the prefix itself. 652 */ 653 if (rx_buf->flags & EF4_RX_PKT_PREFIX_LEN) 654 rx_buf->len = le16_to_cpup((__le16 *) 655 (eh + efx->rx_packet_len_offset)); 656 657 /* If we're in loopback test, then pass the packet directly to the 658 * loopback layer, and free the rx_buf here 659 */ 660 if (unlikely(efx->loopback_selftest)) { 661 struct ef4_rx_queue *rx_queue; 662 663 ef4_loopback_rx_packet(efx, eh, rx_buf->len); 664 rx_queue = ef4_channel_get_rx_queue(channel); 665 ef4_free_rx_buffers(rx_queue, rx_buf, 666 channel->rx_pkt_n_frags); 667 goto out; 668 } 669 670 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) 671 rx_buf->flags &= ~EF4_RX_PKT_CSUMMED; 672 673 if ((rx_buf->flags & EF4_RX_PKT_TCP) && !channel->type->receive_skb) 674 ef4_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); 675 else 676 ef4_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); 677 out: 678 channel->rx_pkt_n_frags = 0; 679 } 680 681 int ef4_probe_rx_queue(struct ef4_rx_queue *rx_queue) 682 { 683 struct ef4_nic *efx = rx_queue->efx; 684 unsigned int entries; 685 int rc; 686 687 /* Create the smallest power-of-two aligned ring */ 688 entries = max(roundup_pow_of_two(efx->rxq_entries), EF4_MIN_DMAQ_SIZE); 689 EF4_BUG_ON_PARANOID(entries > EF4_MAX_DMAQ_SIZE); 690 rx_queue->ptr_mask = entries - 1; 691 692 netif_dbg(efx, probe, efx->net_dev, 693 "creating RX queue %d size %#x mask %#x\n", 694 ef4_rx_queue_index(rx_queue), efx->rxq_entries, 695 rx_queue->ptr_mask); 696 697 /* Allocate RX buffers */ 698 rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), 699 GFP_KERNEL); 700 if (!rx_queue->buffer) 701 return -ENOMEM; 702 703 rc = ef4_nic_probe_rx(rx_queue); 704 if (rc) { 705 kfree(rx_queue->buffer); 706 rx_queue->buffer = NULL; 707 } 708 709 return rc; 710 } 711 712 static void ef4_init_rx_recycle_ring(struct ef4_nic *efx, 713 struct ef4_rx_queue *rx_queue) 714 { 715 unsigned int bufs_in_recycle_ring, page_ring_size; 716 717 /* Set the RX recycle ring size */ 718 #ifdef CONFIG_PPC64 719 bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU; 720 #else 721 if (iommu_present(&pci_bus_type)) 722 bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU; 723 else 724 bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_NOIOMMU; 725 #endif /* CONFIG_PPC64 */ 726 727 page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / 728 efx->rx_bufs_per_page); 729 rx_queue->page_ring = kcalloc(page_ring_size, 730 sizeof(*rx_queue->page_ring), GFP_KERNEL); 731 rx_queue->page_ptr_mask = page_ring_size - 1; 732 } 733 734 void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue) 735 { 736 struct ef4_nic *efx = rx_queue->efx; 737 unsigned int max_fill, trigger, max_trigger; 738 739 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 740 "initialising RX queue %d\n", ef4_rx_queue_index(rx_queue)); 741 742 /* Initialise ptr fields */ 743 rx_queue->added_count = 0; 744 rx_queue->notified_count = 0; 745 rx_queue->removed_count = 0; 746 rx_queue->min_fill = -1U; 747 ef4_init_rx_recycle_ring(efx, rx_queue); 748 749 rx_queue->page_remove = 0; 750 rx_queue->page_add = rx_queue->page_ptr_mask + 1; 751 rx_queue->page_recycle_count = 0; 752 rx_queue->page_recycle_failed = 0; 753 rx_queue->page_recycle_full = 0; 754 755 /* Initialise limit fields */ 756 max_fill = efx->rxq_entries - EF4_RXD_HEAD_ROOM; 757 max_trigger = 758 max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; 759 if (rx_refill_threshold != 0) { 760 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; 761 if (trigger > max_trigger) 762 trigger = max_trigger; 763 } else { 764 trigger = max_trigger; 765 } 766 767 rx_queue->max_fill = max_fill; 768 rx_queue->fast_fill_trigger = trigger; 769 rx_queue->refill_enabled = true; 770 771 /* Set up RX descriptor ring */ 772 ef4_nic_init_rx(rx_queue); 773 } 774 775 void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue) 776 { 777 int i; 778 struct ef4_nic *efx = rx_queue->efx; 779 struct ef4_rx_buffer *rx_buf; 780 781 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 782 "shutting down RX queue %d\n", ef4_rx_queue_index(rx_queue)); 783 784 del_timer_sync(&rx_queue->slow_fill); 785 786 /* Release RX buffers from the current read ptr to the write ptr */ 787 if (rx_queue->buffer) { 788 for (i = rx_queue->removed_count; i < rx_queue->added_count; 789 i++) { 790 unsigned index = i & rx_queue->ptr_mask; 791 rx_buf = ef4_rx_buffer(rx_queue, index); 792 ef4_fini_rx_buffer(rx_queue, rx_buf); 793 } 794 } 795 796 /* Unmap and release the pages in the recycle ring. Remove the ring. */ 797 for (i = 0; i <= rx_queue->page_ptr_mask; i++) { 798 struct page *page = rx_queue->page_ring[i]; 799 struct ef4_rx_page_state *state; 800 801 if (page == NULL) 802 continue; 803 804 state = page_address(page); 805 dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, 806 PAGE_SIZE << efx->rx_buffer_order, 807 DMA_FROM_DEVICE); 808 put_page(page); 809 } 810 kfree(rx_queue->page_ring); 811 rx_queue->page_ring = NULL; 812 } 813 814 void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue) 815 { 816 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 817 "destroying RX queue %d\n", ef4_rx_queue_index(rx_queue)); 818 819 ef4_nic_remove_rx(rx_queue); 820 821 kfree(rx_queue->buffer); 822 rx_queue->buffer = NULL; 823 } 824 825 826 module_param(rx_refill_threshold, uint, 0444); 827 MODULE_PARM_DESC(rx_refill_threshold, 828 "RX descriptor ring refill threshold (%)"); 829 830 #ifdef CONFIG_RFS_ACCEL 831 832 int ef4_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, 833 u16 rxq_index, u32 flow_id) 834 { 835 struct ef4_nic *efx = netdev_priv(net_dev); 836 struct ef4_channel *channel; 837 struct ef4_filter_spec spec; 838 struct flow_keys fk; 839 int rc; 840 841 if (flow_id == RPS_FLOW_ID_INVALID) 842 return -EINVAL; 843 844 if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) 845 return -EPROTONOSUPPORT; 846 847 if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) 848 return -EPROTONOSUPPORT; 849 if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) 850 return -EPROTONOSUPPORT; 851 852 ef4_filter_init_rx(&spec, EF4_FILTER_PRI_HINT, 853 efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0, 854 rxq_index); 855 spec.match_flags = 856 EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO | 857 EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT | 858 EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT; 859 spec.ether_type = fk.basic.n_proto; 860 spec.ip_proto = fk.basic.ip_proto; 861 862 if (fk.basic.n_proto == htons(ETH_P_IP)) { 863 spec.rem_host[0] = fk.addrs.v4addrs.src; 864 spec.loc_host[0] = fk.addrs.v4addrs.dst; 865 } else { 866 memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr)); 867 memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr)); 868 } 869 870 spec.rem_port = fk.ports.src; 871 spec.loc_port = fk.ports.dst; 872 873 rc = efx->type->filter_rfs_insert(efx, &spec); 874 if (rc < 0) 875 return rc; 876 877 /* Remember this so we can check whether to expire the filter later */ 878 channel = ef4_get_channel(efx, rxq_index); 879 channel->rps_flow_id[rc] = flow_id; 880 ++channel->rfs_filters_added; 881 882 if (spec.ether_type == htons(ETH_P_IP)) 883 netif_info(efx, rx_status, efx->net_dev, 884 "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", 885 (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", 886 spec.rem_host, ntohs(spec.rem_port), spec.loc_host, 887 ntohs(spec.loc_port), rxq_index, flow_id, rc); 888 else 889 netif_info(efx, rx_status, efx->net_dev, 890 "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", 891 (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", 892 spec.rem_host, ntohs(spec.rem_port), spec.loc_host, 893 ntohs(spec.loc_port), rxq_index, flow_id, rc); 894 895 return rc; 896 } 897 898 bool __ef4_filter_rfs_expire(struct ef4_nic *efx, unsigned int quota) 899 { 900 bool (*expire_one)(struct ef4_nic *efx, u32 flow_id, unsigned int index); 901 unsigned int channel_idx, index, size; 902 u32 flow_id; 903 904 if (!spin_trylock_bh(&efx->filter_lock)) 905 return false; 906 907 expire_one = efx->type->filter_rfs_expire_one; 908 channel_idx = efx->rps_expire_channel; 909 index = efx->rps_expire_index; 910 size = efx->type->max_rx_ip_filters; 911 while (quota--) { 912 struct ef4_channel *channel = ef4_get_channel(efx, channel_idx); 913 flow_id = channel->rps_flow_id[index]; 914 915 if (flow_id != RPS_FLOW_ID_INVALID && 916 expire_one(efx, flow_id, index)) { 917 netif_info(efx, rx_status, efx->net_dev, 918 "expired filter %d [queue %u flow %u]\n", 919 index, channel_idx, flow_id); 920 channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; 921 } 922 if (++index == size) { 923 if (++channel_idx == efx->n_channels) 924 channel_idx = 0; 925 index = 0; 926 } 927 } 928 efx->rps_expire_channel = channel_idx; 929 efx->rps_expire_index = index; 930 931 spin_unlock_bh(&efx->filter_lock); 932 return true; 933 } 934 935 #endif /* CONFIG_RFS_ACCEL */ 936 937 /** 938 * ef4_filter_is_mc_recipient - test whether spec is a multicast recipient 939 * @spec: Specification to test 940 * 941 * Return: %true if the specification is a non-drop RX filter that 942 * matches a local MAC address I/G bit value of 1 or matches a local 943 * IPv4 or IPv6 address value in the respective multicast address 944 * range. Otherwise %false. 945 */ 946 bool ef4_filter_is_mc_recipient(const struct ef4_filter_spec *spec) 947 { 948 if (!(spec->flags & EF4_FILTER_FLAG_RX) || 949 spec->dmaq_id == EF4_FILTER_RX_DMAQ_ID_DROP) 950 return false; 951 952 if (spec->match_flags & 953 (EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG) && 954 is_multicast_ether_addr(spec->loc_mac)) 955 return true; 956 957 if ((spec->match_flags & 958 (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_LOC_HOST)) == 959 (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_LOC_HOST)) { 960 if (spec->ether_type == htons(ETH_P_IP) && 961 ipv4_is_multicast(spec->loc_host[0])) 962 return true; 963 if (spec->ether_type == htons(ETH_P_IPV6) && 964 ((const u8 *)spec->loc_host)[0] == 0xff) 965 return true; 966 } 967 968 return false; 969 } 970