1 /* 2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/skbuff.h> 33 #include <linux/netdevice.h> 34 #include <linux/etherdevice.h> 35 #include <linux/if_vlan.h> 36 #include <linux/ip.h> 37 #include <linux/tcp.h> 38 #include <linux/dma-mapping.h> 39 #include <linux/slab.h> 40 #include <linux/prefetch.h> 41 #include <net/arp.h> 42 #include "common.h" 43 #include "regs.h" 44 #include "sge_defs.h" 45 #include "t3_cpl.h" 46 #include "firmware_exports.h" 47 #include "cxgb3_offload.h" 48 49 #define USE_GTS 0 50 51 #define SGE_RX_SM_BUF_SIZE 1536 52 53 #define SGE_RX_COPY_THRES 256 54 #define SGE_RX_PULL_LEN 128 55 56 #define SGE_PG_RSVD SMP_CACHE_BYTES 57 /* 58 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks. 59 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs 60 * directly. 61 */ 62 #define FL0_PG_CHUNK_SIZE 2048 63 #define FL0_PG_ORDER 0 64 #define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER) 65 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192) 66 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1) 67 #define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER) 68 69 #define SGE_RX_DROP_THRES 16 70 #define RX_RECLAIM_PERIOD (HZ/4) 71 72 /* 73 * Max number of Rx buffers we replenish at a time. 74 */ 75 #define MAX_RX_REFILL 16U 76 /* 77 * Period of the Tx buffer reclaim timer. This timer does not need to run 78 * frequently as Tx buffers are usually reclaimed by new Tx packets. 79 */ 80 #define TX_RECLAIM_PERIOD (HZ / 4) 81 #define TX_RECLAIM_TIMER_CHUNK 64U 82 #define TX_RECLAIM_CHUNK 16U 83 84 /* WR size in bytes */ 85 #define WR_LEN (WR_FLITS * 8) 86 87 /* 88 * Types of Tx queues in each queue set. Order here matters, do not change. 89 */ 90 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL }; 91 92 /* Values for sge_txq.flags */ 93 enum { 94 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 95 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 96 }; 97 98 struct tx_desc { 99 __be64 flit[TX_DESC_FLITS]; 100 }; 101 102 struct rx_desc { 103 __be32 addr_lo; 104 __be32 len_gen; 105 __be32 gen2; 106 __be32 addr_hi; 107 }; 108 109 struct tx_sw_desc { /* SW state per Tx descriptor */ 110 struct sk_buff *skb; 111 u8 eop; /* set if last descriptor for packet */ 112 u8 addr_idx; /* buffer index of first SGL entry in descriptor */ 113 u8 fragidx; /* first page fragment associated with descriptor */ 114 s8 sflit; /* start flit of first SGL entry in descriptor */ 115 }; 116 117 struct rx_sw_desc { /* SW state per Rx descriptor */ 118 union { 119 struct sk_buff *skb; 120 struct fl_pg_chunk pg_chunk; 121 }; 122 DEFINE_DMA_UNMAP_ADDR(dma_addr); 123 }; 124 125 struct rsp_desc { /* response queue descriptor */ 126 struct rss_header rss_hdr; 127 __be32 flags; 128 __be32 len_cq; 129 struct_group(immediate, 130 u8 imm_data[47]; 131 u8 intr_gen; 132 ); 133 }; 134 135 /* 136 * Holds unmapping information for Tx packets that need deferred unmapping. 137 * This structure lives at skb->head and must be allocated by callers. 138 */ 139 struct deferred_unmap_info { 140 struct pci_dev *pdev; 141 dma_addr_t addr[MAX_SKB_FRAGS + 1]; 142 }; 143 144 /* 145 * Maps a number of flits to the number of Tx descriptors that can hold them. 146 * The formula is 147 * 148 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 149 * 150 * HW allows up to 4 descriptors to be combined into a WR. 151 */ 152 static u8 flit_desc_map[] = { 153 0, 154 #if SGE_NUM_GENBITS == 1 155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 156 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 157 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 158 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 159 #elif SGE_NUM_GENBITS == 2 160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 161 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 162 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 163 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 164 #else 165 # error "SGE_NUM_GENBITS must be 1 or 2" 166 #endif 167 }; 168 169 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx) 170 { 171 return container_of(q, struct sge_qset, fl[qidx]); 172 } 173 174 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q) 175 { 176 return container_of(q, struct sge_qset, rspq); 177 } 178 179 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx) 180 { 181 return container_of(q, struct sge_qset, txq[qidx]); 182 } 183 184 /** 185 * refill_rspq - replenish an SGE response queue 186 * @adapter: the adapter 187 * @q: the response queue to replenish 188 * @credits: how many new responses to make available 189 * 190 * Replenishes a response queue by making the supplied number of responses 191 * available to HW. 192 */ 193 static inline void refill_rspq(struct adapter *adapter, 194 const struct sge_rspq *q, unsigned int credits) 195 { 196 rmb(); 197 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN, 198 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 199 } 200 201 /** 202 * need_skb_unmap - does the platform need unmapping of sk_buffs? 203 * 204 * Returns true if the platform needs sk_buff unmapping. The compiler 205 * optimizes away unnecessary code if this returns true. 206 */ 207 static inline int need_skb_unmap(void) 208 { 209 #ifdef CONFIG_NEED_DMA_MAP_STATE 210 return 1; 211 #else 212 return 0; 213 #endif 214 } 215 216 /** 217 * unmap_skb - unmap a packet main body and its page fragments 218 * @skb: the packet 219 * @q: the Tx queue containing Tx descriptors for the packet 220 * @cidx: index of Tx descriptor 221 * @pdev: the PCI device 222 * 223 * Unmap the main body of an sk_buff and its page fragments, if any. 224 * Because of the fairly complicated structure of our SGLs and the desire 225 * to conserve space for metadata, the information necessary to unmap an 226 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx 227 * descriptors (the physical addresses of the various data buffers), and 228 * the SW descriptor state (assorted indices). The send functions 229 * initialize the indices for the first packet descriptor so we can unmap 230 * the buffers held in the first Tx descriptor here, and we have enough 231 * information at this point to set the state for the next Tx descriptor. 232 * 233 * Note that it is possible to clean up the first descriptor of a packet 234 * before the send routines have written the next descriptors, but this 235 * race does not cause any problem. We just end up writing the unmapping 236 * info for the descriptor first. 237 */ 238 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q, 239 unsigned int cidx, struct pci_dev *pdev) 240 { 241 const struct sg_ent *sgp; 242 struct tx_sw_desc *d = &q->sdesc[cidx]; 243 int nfrags, frag_idx, curflit, j = d->addr_idx; 244 245 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit]; 246 frag_idx = d->fragidx; 247 248 if (frag_idx == 0 && skb_headlen(skb)) { 249 dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]), 250 skb_headlen(skb), DMA_TO_DEVICE); 251 j = 1; 252 } 253 254 curflit = d->sflit + 1 + j; 255 nfrags = skb_shinfo(skb)->nr_frags; 256 257 while (frag_idx < nfrags && curflit < WR_FLITS) { 258 dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]), 259 skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]), 260 DMA_TO_DEVICE); 261 j ^= 1; 262 if (j == 0) { 263 sgp++; 264 curflit++; 265 } 266 curflit++; 267 frag_idx++; 268 } 269 270 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */ 271 d = cidx + 1 == q->size ? q->sdesc : d + 1; 272 d->fragidx = frag_idx; 273 d->addr_idx = j; 274 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */ 275 } 276 } 277 278 /** 279 * free_tx_desc - reclaims Tx descriptors and their buffers 280 * @adapter: the adapter 281 * @q: the Tx queue to reclaim descriptors from 282 * @n: the number of descriptors to reclaim 283 * 284 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 285 * Tx buffers. Called with the Tx queue lock held. 286 */ 287 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q, 288 unsigned int n) 289 { 290 struct tx_sw_desc *d; 291 struct pci_dev *pdev = adapter->pdev; 292 unsigned int cidx = q->cidx; 293 294 const int need_unmap = need_skb_unmap() && 295 q->cntxt_id >= FW_TUNNEL_SGEEC_START; 296 297 d = &q->sdesc[cidx]; 298 while (n--) { 299 if (d->skb) { /* an SGL is present */ 300 if (need_unmap) 301 unmap_skb(d->skb, q, cidx, pdev); 302 if (d->eop) { 303 dev_consume_skb_any(d->skb); 304 d->skb = NULL; 305 } 306 } 307 ++d; 308 if (++cidx == q->size) { 309 cidx = 0; 310 d = q->sdesc; 311 } 312 } 313 q->cidx = cidx; 314 } 315 316 /** 317 * reclaim_completed_tx - reclaims completed Tx descriptors 318 * @adapter: the adapter 319 * @q: the Tx queue to reclaim completed descriptors from 320 * @chunk: maximum number of descriptors to reclaim 321 * 322 * Reclaims Tx descriptors that the SGE has indicated it has processed, 323 * and frees the associated buffers if possible. Called with the Tx 324 * queue's lock held. 325 */ 326 static inline unsigned int reclaim_completed_tx(struct adapter *adapter, 327 struct sge_txq *q, 328 unsigned int chunk) 329 { 330 unsigned int reclaim = q->processed - q->cleaned; 331 332 reclaim = min(chunk, reclaim); 333 if (reclaim) { 334 free_tx_desc(adapter, q, reclaim); 335 q->cleaned += reclaim; 336 q->in_use -= reclaim; 337 } 338 return q->processed - q->cleaned; 339 } 340 341 /** 342 * should_restart_tx - are there enough resources to restart a Tx queue? 343 * @q: the Tx queue 344 * 345 * Checks if there are enough descriptors to restart a suspended Tx queue. 346 */ 347 static inline int should_restart_tx(const struct sge_txq *q) 348 { 349 unsigned int r = q->processed - q->cleaned; 350 351 return q->in_use - r < (q->size >> 1); 352 } 353 354 static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q, 355 struct rx_sw_desc *d) 356 { 357 if (q->use_pages && d->pg_chunk.page) { 358 (*d->pg_chunk.p_cnt)--; 359 if (!*d->pg_chunk.p_cnt) 360 dma_unmap_page(&pdev->dev, d->pg_chunk.mapping, 361 q->alloc_size, DMA_FROM_DEVICE); 362 363 put_page(d->pg_chunk.page); 364 d->pg_chunk.page = NULL; 365 } else { 366 dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr), 367 q->buf_size, DMA_FROM_DEVICE); 368 kfree_skb(d->skb); 369 d->skb = NULL; 370 } 371 } 372 373 /** 374 * free_rx_bufs - free the Rx buffers on an SGE free list 375 * @pdev: the PCI device associated with the adapter 376 * @q: the SGE free list to clean up 377 * 378 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 379 * this queue should be stopped before calling this function. 380 */ 381 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q) 382 { 383 unsigned int cidx = q->cidx; 384 385 while (q->credits--) { 386 struct rx_sw_desc *d = &q->sdesc[cidx]; 387 388 389 clear_rx_desc(pdev, q, d); 390 if (++cidx == q->size) 391 cidx = 0; 392 } 393 394 if (q->pg_chunk.page) { 395 __free_pages(q->pg_chunk.page, q->order); 396 q->pg_chunk.page = NULL; 397 } 398 } 399 400 /** 401 * add_one_rx_buf - add a packet buffer to a free-buffer list 402 * @va: buffer start VA 403 * @len: the buffer length 404 * @d: the HW Rx descriptor to write 405 * @sd: the SW Rx descriptor to write 406 * @gen: the generation bit value 407 * @pdev: the PCI device associated with the adapter 408 * 409 * Add a buffer of the given length to the supplied HW and SW Rx 410 * descriptors. 411 */ 412 static inline int add_one_rx_buf(void *va, unsigned int len, 413 struct rx_desc *d, struct rx_sw_desc *sd, 414 unsigned int gen, struct pci_dev *pdev) 415 { 416 dma_addr_t mapping; 417 418 mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE); 419 if (unlikely(dma_mapping_error(&pdev->dev, mapping))) 420 return -ENOMEM; 421 422 dma_unmap_addr_set(sd, dma_addr, mapping); 423 424 d->addr_lo = cpu_to_be32(mapping); 425 d->addr_hi = cpu_to_be32((u64) mapping >> 32); 426 dma_wmb(); 427 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen)); 428 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen)); 429 return 0; 430 } 431 432 static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d, 433 unsigned int gen) 434 { 435 d->addr_lo = cpu_to_be32(mapping); 436 d->addr_hi = cpu_to_be32((u64) mapping >> 32); 437 dma_wmb(); 438 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen)); 439 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen)); 440 return 0; 441 } 442 443 static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q, 444 struct rx_sw_desc *sd, gfp_t gfp, 445 unsigned int order) 446 { 447 if (!q->pg_chunk.page) { 448 dma_addr_t mapping; 449 450 q->pg_chunk.page = alloc_pages(gfp, order); 451 if (unlikely(!q->pg_chunk.page)) 452 return -ENOMEM; 453 q->pg_chunk.va = page_address(q->pg_chunk.page); 454 q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) - 455 SGE_PG_RSVD; 456 q->pg_chunk.offset = 0; 457 mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page, 458 0, q->alloc_size, DMA_FROM_DEVICE); 459 if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) { 460 __free_pages(q->pg_chunk.page, order); 461 q->pg_chunk.page = NULL; 462 return -EIO; 463 } 464 q->pg_chunk.mapping = mapping; 465 } 466 sd->pg_chunk = q->pg_chunk; 467 468 prefetch(sd->pg_chunk.p_cnt); 469 470 q->pg_chunk.offset += q->buf_size; 471 if (q->pg_chunk.offset == (PAGE_SIZE << order)) 472 q->pg_chunk.page = NULL; 473 else { 474 q->pg_chunk.va += q->buf_size; 475 get_page(q->pg_chunk.page); 476 } 477 478 if (sd->pg_chunk.offset == 0) 479 *sd->pg_chunk.p_cnt = 1; 480 else 481 *sd->pg_chunk.p_cnt += 1; 482 483 return 0; 484 } 485 486 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) 487 { 488 if (q->pend_cred >= q->credits / 4) { 489 q->pend_cred = 0; 490 wmb(); 491 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 492 } 493 } 494 495 /** 496 * refill_fl - refill an SGE free-buffer list 497 * @adap: the adapter 498 * @q: the free-list to refill 499 * @n: the number of new buffers to allocate 500 * @gfp: the gfp flags for allocating new buffers 501 * 502 * (Re)populate an SGE free-buffer list with up to @n new packet buffers, 503 * allocated with the supplied gfp flags. The caller must assure that 504 * @n does not exceed the queue's capacity. 505 */ 506 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) 507 { 508 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 509 struct rx_desc *d = &q->desc[q->pidx]; 510 unsigned int count = 0; 511 512 while (n--) { 513 dma_addr_t mapping; 514 int err; 515 516 if (q->use_pages) { 517 if (unlikely(alloc_pg_chunk(adap, q, sd, gfp, 518 q->order))) { 519 nomem: q->alloc_failed++; 520 break; 521 } 522 mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset; 523 dma_unmap_addr_set(sd, dma_addr, mapping); 524 525 add_one_rx_chunk(mapping, d, q->gen); 526 dma_sync_single_for_device(&adap->pdev->dev, mapping, 527 q->buf_size - SGE_PG_RSVD, 528 DMA_FROM_DEVICE); 529 } else { 530 void *buf_start; 531 532 struct sk_buff *skb = alloc_skb(q->buf_size, gfp); 533 if (!skb) 534 goto nomem; 535 536 sd->skb = skb; 537 buf_start = skb->data; 538 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, 539 q->gen, adap->pdev); 540 if (unlikely(err)) { 541 clear_rx_desc(adap->pdev, q, sd); 542 break; 543 } 544 } 545 546 d++; 547 sd++; 548 if (++q->pidx == q->size) { 549 q->pidx = 0; 550 q->gen ^= 1; 551 sd = q->sdesc; 552 d = q->desc; 553 } 554 count++; 555 } 556 557 q->credits += count; 558 q->pend_cred += count; 559 ring_fl_db(adap, q); 560 561 return count; 562 } 563 564 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl) 565 { 566 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits), 567 GFP_ATOMIC | __GFP_COMP); 568 } 569 570 /** 571 * recycle_rx_buf - recycle a receive buffer 572 * @adap: the adapter 573 * @q: the SGE free list 574 * @idx: index of buffer to recycle 575 * 576 * Recycles the specified buffer on the given free list by adding it at 577 * the next available slot on the list. 578 */ 579 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q, 580 unsigned int idx) 581 { 582 struct rx_desc *from = &q->desc[idx]; 583 struct rx_desc *to = &q->desc[q->pidx]; 584 585 q->sdesc[q->pidx] = q->sdesc[idx]; 586 to->addr_lo = from->addr_lo; /* already big endian */ 587 to->addr_hi = from->addr_hi; /* likewise */ 588 dma_wmb(); 589 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen)); 590 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen)); 591 592 if (++q->pidx == q->size) { 593 q->pidx = 0; 594 q->gen ^= 1; 595 } 596 597 q->credits++; 598 q->pend_cred++; 599 ring_fl_db(adap, q); 600 } 601 602 /** 603 * alloc_ring - allocate resources for an SGE descriptor ring 604 * @pdev: the PCI device 605 * @nelem: the number of descriptors 606 * @elem_size: the size of each descriptor 607 * @sw_size: the size of the SW state associated with each ring element 608 * @phys: the physical address of the allocated ring 609 * @metadata: address of the array holding the SW state for the ring 610 * 611 * Allocates resources for an SGE descriptor ring, such as Tx queues, 612 * free buffer lists, or response queues. Each SGE ring requires 613 * space for its HW descriptors plus, optionally, space for the SW state 614 * associated with each HW entry (the metadata). The function returns 615 * three values: the virtual address for the HW ring (the return value 616 * of the function), the physical address of the HW ring, and the address 617 * of the SW ring. 618 */ 619 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size, 620 size_t sw_size, dma_addr_t * phys, void *metadata) 621 { 622 size_t len = nelem * elem_size; 623 void *s = NULL; 624 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL); 625 626 if (!p) 627 return NULL; 628 if (sw_size && metadata) { 629 s = kcalloc(nelem, sw_size, GFP_KERNEL); 630 631 if (!s) { 632 dma_free_coherent(&pdev->dev, len, p, *phys); 633 return NULL; 634 } 635 *(void **)metadata = s; 636 } 637 return p; 638 } 639 640 /** 641 * t3_reset_qset - reset a sge qset 642 * @q: the queue set 643 * 644 * Reset the qset structure. 645 * the NAPI structure is preserved in the event of 646 * the qset's reincarnation, for example during EEH recovery. 647 */ 648 static void t3_reset_qset(struct sge_qset *q) 649 { 650 if (q->adap && 651 !(q->adap->flags & NAPI_INIT)) { 652 memset(q, 0, sizeof(*q)); 653 return; 654 } 655 656 q->adap = NULL; 657 memset(&q->rspq, 0, sizeof(q->rspq)); 658 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET); 659 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); 660 q->txq_stopped = 0; 661 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ 662 q->rx_reclaim_timer.function = NULL; 663 q->nomem = 0; 664 napi_free_frags(&q->napi); 665 } 666 667 668 /** 669 * t3_free_qset - free the resources of an SGE queue set 670 * @adapter: the adapter owning the queue set 671 * @q: the queue set 672 * 673 * Release the HW and SW resources associated with an SGE queue set, such 674 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 675 * queue set must be quiesced prior to calling this. 676 */ 677 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q) 678 { 679 int i; 680 struct pci_dev *pdev = adapter->pdev; 681 682 for (i = 0; i < SGE_RXQ_PER_SET; ++i) 683 if (q->fl[i].desc) { 684 spin_lock_irq(&adapter->sge.reg_lock); 685 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id); 686 spin_unlock_irq(&adapter->sge.reg_lock); 687 free_rx_bufs(pdev, &q->fl[i]); 688 kfree(q->fl[i].sdesc); 689 dma_free_coherent(&pdev->dev, 690 q->fl[i].size * 691 sizeof(struct rx_desc), q->fl[i].desc, 692 q->fl[i].phys_addr); 693 } 694 695 for (i = 0; i < SGE_TXQ_PER_SET; ++i) 696 if (q->txq[i].desc) { 697 spin_lock_irq(&adapter->sge.reg_lock); 698 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0); 699 spin_unlock_irq(&adapter->sge.reg_lock); 700 if (q->txq[i].sdesc) { 701 free_tx_desc(adapter, &q->txq[i], 702 q->txq[i].in_use); 703 kfree(q->txq[i].sdesc); 704 } 705 dma_free_coherent(&pdev->dev, 706 q->txq[i].size * 707 sizeof(struct tx_desc), 708 q->txq[i].desc, q->txq[i].phys_addr); 709 __skb_queue_purge(&q->txq[i].sendq); 710 } 711 712 if (q->rspq.desc) { 713 spin_lock_irq(&adapter->sge.reg_lock); 714 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id); 715 spin_unlock_irq(&adapter->sge.reg_lock); 716 dma_free_coherent(&pdev->dev, 717 q->rspq.size * sizeof(struct rsp_desc), 718 q->rspq.desc, q->rspq.phys_addr); 719 } 720 721 t3_reset_qset(q); 722 } 723 724 /** 725 * init_qset_cntxt - initialize an SGE queue set context info 726 * @qs: the queue set 727 * @id: the queue set id 728 * 729 * Initializes the TIDs and context ids for the queues of a queue set. 730 */ 731 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id) 732 { 733 qs->rspq.cntxt_id = id; 734 qs->fl[0].cntxt_id = 2 * id; 735 qs->fl[1].cntxt_id = 2 * id + 1; 736 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 737 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 738 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 739 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 740 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 741 } 742 743 /** 744 * sgl_len - calculates the size of an SGL of the given capacity 745 * @n: the number of SGL entries 746 * 747 * Calculates the number of flits needed for a scatter/gather list that 748 * can hold the given number of entries. 749 */ 750 static inline unsigned int sgl_len(unsigned int n) 751 { 752 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */ 753 return (3 * n) / 2 + (n & 1); 754 } 755 756 /** 757 * flits_to_desc - returns the num of Tx descriptors for the given flits 758 * @n: the number of flits 759 * 760 * Calculates the number of Tx descriptors needed for the supplied number 761 * of flits. 762 */ 763 static inline unsigned int flits_to_desc(unsigned int n) 764 { 765 BUG_ON(n >= ARRAY_SIZE(flit_desc_map)); 766 return flit_desc_map[n]; 767 } 768 769 /** 770 * get_packet - return the next ingress packet buffer from a free list 771 * @adap: the adapter that received the packet 772 * @fl: the SGE free list holding the packet 773 * @len: the packet length including any SGE padding 774 * @drop_thres: # of remaining buffers before we start dropping packets 775 * 776 * Get the next packet from a free list and complete setup of the 777 * sk_buff. If the packet is small we make a copy and recycle the 778 * original buffer, otherwise we use the original buffer itself. If a 779 * positive drop threshold is supplied packets are dropped and their 780 * buffers recycled if (a) the number of remaining buffers is under the 781 * threshold and the packet is too big to copy, or (b) the packet should 782 * be copied but there is no memory for the copy. 783 */ 784 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl, 785 unsigned int len, unsigned int drop_thres) 786 { 787 struct sk_buff *skb = NULL; 788 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 789 790 prefetch(sd->skb->data); 791 fl->credits--; 792 793 if (len <= SGE_RX_COPY_THRES) { 794 skb = alloc_skb(len, GFP_ATOMIC); 795 if (likely(skb != NULL)) { 796 __skb_put(skb, len); 797 dma_sync_single_for_cpu(&adap->pdev->dev, 798 dma_unmap_addr(sd, dma_addr), 799 len, DMA_FROM_DEVICE); 800 memcpy(skb->data, sd->skb->data, len); 801 dma_sync_single_for_device(&adap->pdev->dev, 802 dma_unmap_addr(sd, dma_addr), 803 len, DMA_FROM_DEVICE); 804 } else if (!drop_thres) 805 goto use_orig_buf; 806 recycle: 807 recycle_rx_buf(adap, fl, fl->cidx); 808 return skb; 809 } 810 811 if (unlikely(fl->credits < drop_thres) && 812 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1), 813 GFP_ATOMIC | __GFP_COMP) == 0) 814 goto recycle; 815 816 use_orig_buf: 817 dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr), 818 fl->buf_size, DMA_FROM_DEVICE); 819 skb = sd->skb; 820 skb_put(skb, len); 821 __refill_fl(adap, fl); 822 return skb; 823 } 824 825 /** 826 * get_packet_pg - return the next ingress packet buffer from a free list 827 * @adap: the adapter that received the packet 828 * @fl: the SGE free list holding the packet 829 * @q: the queue 830 * @len: the packet length including any SGE padding 831 * @drop_thres: # of remaining buffers before we start dropping packets 832 * 833 * Get the next packet from a free list populated with page chunks. 834 * If the packet is small we make a copy and recycle the original buffer, 835 * otherwise we attach the original buffer as a page fragment to a fresh 836 * sk_buff. If a positive drop threshold is supplied packets are dropped 837 * and their buffers recycled if (a) the number of remaining buffers is 838 * under the threshold and the packet is too big to copy, or (b) there's 839 * no system memory. 840 * 841 * Note: this function is similar to @get_packet but deals with Rx buffers 842 * that are page chunks rather than sk_buffs. 843 */ 844 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, 845 struct sge_rspq *q, unsigned int len, 846 unsigned int drop_thres) 847 { 848 struct sk_buff *newskb, *skb; 849 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 850 851 dma_addr_t dma_addr = dma_unmap_addr(sd, dma_addr); 852 853 newskb = skb = q->pg_skb; 854 if (!skb && (len <= SGE_RX_COPY_THRES)) { 855 newskb = alloc_skb(len, GFP_ATOMIC); 856 if (likely(newskb != NULL)) { 857 __skb_put(newskb, len); 858 dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, 859 len, DMA_FROM_DEVICE); 860 memcpy(newskb->data, sd->pg_chunk.va, len); 861 dma_sync_single_for_device(&adap->pdev->dev, dma_addr, 862 len, DMA_FROM_DEVICE); 863 } else if (!drop_thres) 864 return NULL; 865 recycle: 866 fl->credits--; 867 recycle_rx_buf(adap, fl, fl->cidx); 868 q->rx_recycle_buf++; 869 return newskb; 870 } 871 872 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres))) 873 goto recycle; 874 875 prefetch(sd->pg_chunk.p_cnt); 876 877 if (!skb) 878 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); 879 880 if (unlikely(!newskb)) { 881 if (!drop_thres) 882 return NULL; 883 goto recycle; 884 } 885 886 dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len, 887 DMA_FROM_DEVICE); 888 (*sd->pg_chunk.p_cnt)--; 889 if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page) 890 dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping, 891 fl->alloc_size, DMA_FROM_DEVICE); 892 if (!skb) { 893 __skb_put(newskb, SGE_RX_PULL_LEN); 894 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); 895 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page, 896 sd->pg_chunk.offset + SGE_RX_PULL_LEN, 897 len - SGE_RX_PULL_LEN); 898 newskb->len = len; 899 newskb->data_len = len - SGE_RX_PULL_LEN; 900 newskb->truesize += newskb->data_len; 901 } else { 902 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags, 903 sd->pg_chunk.page, 904 sd->pg_chunk.offset, len); 905 newskb->len += len; 906 newskb->data_len += len; 907 newskb->truesize += len; 908 } 909 910 fl->credits--; 911 /* 912 * We do not refill FLs here, we let the caller do it to overlap a 913 * prefetch. 914 */ 915 return newskb; 916 } 917 918 /** 919 * get_imm_packet - return the next ingress packet buffer from a response 920 * @resp: the response descriptor containing the packet data 921 * 922 * Return a packet containing the immediate data of the given response. 923 */ 924 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp) 925 { 926 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC); 927 928 if (skb) { 929 __skb_put(skb, IMMED_PKT_SIZE); 930 BUILD_BUG_ON(IMMED_PKT_SIZE != sizeof(resp->immediate)); 931 skb_copy_to_linear_data(skb, &resp->immediate, IMMED_PKT_SIZE); 932 } 933 return skb; 934 } 935 936 /** 937 * calc_tx_descs - calculate the number of Tx descriptors for a packet 938 * @skb: the packet 939 * 940 * Returns the number of Tx descriptors needed for the given Ethernet 941 * packet. Ethernet packets require addition of WR and CPL headers. 942 */ 943 static inline unsigned int calc_tx_descs(const struct sk_buff *skb) 944 { 945 unsigned int flits; 946 947 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 948 return 1; 949 950 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2; 951 if (skb_shinfo(skb)->gso_size) 952 flits++; 953 return flits_to_desc(flits); 954 } 955 956 /* map_skb - map a packet main body and its page fragments 957 * @pdev: the PCI device 958 * @skb: the packet 959 * @addr: placeholder to save the mapped addresses 960 * 961 * map the main body of an sk_buff and its page fragments, if any. 962 */ 963 static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb, 964 dma_addr_t *addr) 965 { 966 const skb_frag_t *fp, *end; 967 const struct skb_shared_info *si; 968 969 if (skb_headlen(skb)) { 970 *addr = dma_map_single(&pdev->dev, skb->data, 971 skb_headlen(skb), DMA_TO_DEVICE); 972 if (dma_mapping_error(&pdev->dev, *addr)) 973 goto out_err; 974 addr++; 975 } 976 977 si = skb_shinfo(skb); 978 end = &si->frags[si->nr_frags]; 979 980 for (fp = si->frags; fp < end; fp++) { 981 *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp), 982 DMA_TO_DEVICE); 983 if (dma_mapping_error(&pdev->dev, *addr)) 984 goto unwind; 985 addr++; 986 } 987 return 0; 988 989 unwind: 990 while (fp-- > si->frags) 991 dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp), 992 DMA_TO_DEVICE); 993 994 dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb), 995 DMA_TO_DEVICE); 996 out_err: 997 return -ENOMEM; 998 } 999 1000 /** 1001 * write_sgl - populate a scatter/gather list for a packet 1002 * @skb: the packet 1003 * @sgp: the SGL to populate 1004 * @start: start address of skb main body data to include in the SGL 1005 * @len: length of skb main body data to include in the SGL 1006 * @addr: the list of the mapped addresses 1007 * 1008 * Copies the scatter/gather list for the buffers that make up a packet 1009 * and returns the SGL size in 8-byte words. The caller must size the SGL 1010 * appropriately. 1011 */ 1012 static inline unsigned int write_sgl(const struct sk_buff *skb, 1013 struct sg_ent *sgp, unsigned char *start, 1014 unsigned int len, const dma_addr_t *addr) 1015 { 1016 unsigned int i, j = 0, k = 0, nfrags; 1017 1018 if (len) { 1019 sgp->len[0] = cpu_to_be32(len); 1020 sgp->addr[j++] = cpu_to_be64(addr[k++]); 1021 } 1022 1023 nfrags = skb_shinfo(skb)->nr_frags; 1024 for (i = 0; i < nfrags; i++) { 1025 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1026 1027 sgp->len[j] = cpu_to_be32(skb_frag_size(frag)); 1028 sgp->addr[j] = cpu_to_be64(addr[k++]); 1029 j ^= 1; 1030 if (j == 0) 1031 ++sgp; 1032 } 1033 if (j) 1034 sgp->len[j] = 0; 1035 return ((nfrags + (len != 0)) * 3) / 2 + j; 1036 } 1037 1038 /** 1039 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1040 * @adap: the adapter 1041 * @q: the Tx queue 1042 * 1043 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1044 * where the HW is going to sleep just after we checked, however, 1045 * then the interrupt handler will detect the outstanding TX packet 1046 * and ring the doorbell for us. 1047 * 1048 * When GTS is disabled we unconditionally ring the doorbell. 1049 */ 1050 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q) 1051 { 1052 #if USE_GTS 1053 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1054 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1055 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1056 t3_write_reg(adap, A_SG_KDOORBELL, 1057 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1058 } 1059 #else 1060 wmb(); /* write descriptors before telling HW */ 1061 t3_write_reg(adap, A_SG_KDOORBELL, 1062 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1063 #endif 1064 } 1065 1066 static inline void wr_gen2(struct tx_desc *d, unsigned int gen) 1067 { 1068 #if SGE_NUM_GENBITS == 2 1069 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen); 1070 #endif 1071 } 1072 1073 /** 1074 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1075 * @ndesc: number of Tx descriptors spanned by the SGL 1076 * @skb: the packet corresponding to the WR 1077 * @d: first Tx descriptor to be written 1078 * @pidx: index of above descriptors 1079 * @q: the SGE Tx queue 1080 * @sgl: the SGL 1081 * @flits: number of flits to the start of the SGL in the first descriptor 1082 * @sgl_flits: the SGL size in flits 1083 * @gen: the Tx descriptor generation 1084 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1085 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1086 * 1087 * Write a work request header and an associated SGL. If the SGL is 1088 * small enough to fit into one Tx descriptor it has already been written 1089 * and we just need to write the WR header. Otherwise we distribute the 1090 * SGL across the number of descriptors it spans. 1091 */ 1092 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb, 1093 struct tx_desc *d, unsigned int pidx, 1094 const struct sge_txq *q, 1095 const struct sg_ent *sgl, 1096 unsigned int flits, unsigned int sgl_flits, 1097 unsigned int gen, __be32 wr_hi, 1098 __be32 wr_lo) 1099 { 1100 struct work_request_hdr *wrp = (struct work_request_hdr *)d; 1101 struct tx_sw_desc *sd = &q->sdesc[pidx]; 1102 1103 sd->skb = skb; 1104 if (need_skb_unmap()) { 1105 sd->fragidx = 0; 1106 sd->addr_idx = 0; 1107 sd->sflit = flits; 1108 } 1109 1110 if (likely(ndesc == 1)) { 1111 sd->eop = 1; 1112 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1113 V_WR_SGLSFLT(flits)) | wr_hi; 1114 dma_wmb(); 1115 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1116 V_WR_GEN(gen)) | wr_lo; 1117 wr_gen2(d, gen); 1118 } else { 1119 unsigned int ogen = gen; 1120 const u64 *fp = (const u64 *)sgl; 1121 struct work_request_hdr *wp = wrp; 1122 1123 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1124 V_WR_SGLSFLT(flits)) | wr_hi; 1125 1126 while (sgl_flits) { 1127 unsigned int avail = WR_FLITS - flits; 1128 1129 if (avail > sgl_flits) 1130 avail = sgl_flits; 1131 memcpy(&d->flit[flits], fp, avail * sizeof(*fp)); 1132 sgl_flits -= avail; 1133 ndesc--; 1134 if (!sgl_flits) 1135 break; 1136 1137 fp += avail; 1138 d++; 1139 sd->eop = 0; 1140 sd++; 1141 if (++pidx == q->size) { 1142 pidx = 0; 1143 gen ^= 1; 1144 d = q->desc; 1145 sd = q->sdesc; 1146 } 1147 1148 sd->skb = skb; 1149 wrp = (struct work_request_hdr *)d; 1150 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1151 V_WR_SGLSFLT(1)) | wr_hi; 1152 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1153 sgl_flits + 1)) | 1154 V_WR_GEN(gen)) | wr_lo; 1155 wr_gen2(d, gen); 1156 flits = 1; 1157 } 1158 sd->eop = 1; 1159 wrp->wr_hi |= htonl(F_WR_EOP); 1160 dma_wmb(); 1161 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1162 wr_gen2((struct tx_desc *)wp, ogen); 1163 WARN_ON(ndesc != 0); 1164 } 1165 } 1166 1167 /** 1168 * write_tx_pkt_wr - write a TX_PKT work request 1169 * @adap: the adapter 1170 * @skb: the packet to send 1171 * @pi: the egress interface 1172 * @pidx: index of the first Tx descriptor to write 1173 * @gen: the generation value to use 1174 * @q: the Tx queue 1175 * @ndesc: number of descriptors the packet will occupy 1176 * @compl: the value of the COMPL bit to use 1177 * @addr: address 1178 * 1179 * Generate a TX_PKT work request to send the supplied packet. 1180 */ 1181 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, 1182 const struct port_info *pi, 1183 unsigned int pidx, unsigned int gen, 1184 struct sge_txq *q, unsigned int ndesc, 1185 unsigned int compl, const dma_addr_t *addr) 1186 { 1187 unsigned int flits, sgl_flits, cntrl, tso_info; 1188 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1]; 1189 struct tx_desc *d = &q->desc[pidx]; 1190 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d; 1191 1192 cpl->len = htonl(skb->len); 1193 cntrl = V_TXPKT_INTF(pi->port_id); 1194 1195 if (skb_vlan_tag_present(skb)) 1196 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(skb_vlan_tag_get(skb)); 1197 1198 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size); 1199 if (tso_info) { 1200 int eth_type; 1201 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl; 1202 1203 d->flit[2] = 0; 1204 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1205 hdr->cntrl = htonl(cntrl); 1206 eth_type = skb_network_offset(skb) == ETH_HLEN ? 1207 CPL_ETH_II : CPL_ETH_II_VLAN; 1208 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1209 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) | 1210 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff); 1211 hdr->lso_info = htonl(tso_info); 1212 flits = 3; 1213 } else { 1214 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1215 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */ 1216 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL); 1217 cpl->cntrl = htonl(cntrl); 1218 1219 if (skb->len <= WR_LEN - sizeof(*cpl)) { 1220 q->sdesc[pidx].skb = NULL; 1221 if (!skb->data_len) 1222 skb_copy_from_linear_data(skb, &d->flit[2], 1223 skb->len); 1224 else 1225 skb_copy_bits(skb, 0, &d->flit[2], skb->len); 1226 1227 flits = (skb->len + 7) / 8 + 2; 1228 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) | 1229 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) 1230 | F_WR_SOP | F_WR_EOP | compl); 1231 dma_wmb(); 1232 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) | 1233 V_WR_TID(q->token)); 1234 wr_gen2(d, gen); 1235 dev_consume_skb_any(skb); 1236 return; 1237 } 1238 1239 flits = 2; 1240 } 1241 1242 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; 1243 sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr); 1244 1245 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen, 1246 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl), 1247 htonl(V_WR_TID(q->token))); 1248 } 1249 1250 static inline void t3_stop_tx_queue(struct netdev_queue *txq, 1251 struct sge_qset *qs, struct sge_txq *q) 1252 { 1253 netif_tx_stop_queue(txq); 1254 set_bit(TXQ_ETH, &qs->txq_stopped); 1255 q->stops++; 1256 } 1257 1258 /** 1259 * t3_eth_xmit - add a packet to the Ethernet Tx queue 1260 * @skb: the packet 1261 * @dev: the egress net device 1262 * 1263 * Add a packet to an SGE Tx queue. Runs with softirqs disabled. 1264 */ 1265 netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) 1266 { 1267 int qidx; 1268 unsigned int ndesc, pidx, credits, gen, compl; 1269 const struct port_info *pi = netdev_priv(dev); 1270 struct adapter *adap = pi->adapter; 1271 struct netdev_queue *txq; 1272 struct sge_qset *qs; 1273 struct sge_txq *q; 1274 dma_addr_t addr[MAX_SKB_FRAGS + 1]; 1275 1276 /* 1277 * The chip min packet length is 9 octets but play safe and reject 1278 * anything shorter than an Ethernet header. 1279 */ 1280 if (unlikely(skb->len < ETH_HLEN)) { 1281 dev_kfree_skb_any(skb); 1282 return NETDEV_TX_OK; 1283 } 1284 1285 qidx = skb_get_queue_mapping(skb); 1286 qs = &pi->qs[qidx]; 1287 q = &qs->txq[TXQ_ETH]; 1288 txq = netdev_get_tx_queue(dev, qidx); 1289 1290 reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); 1291 1292 credits = q->size - q->in_use; 1293 ndesc = calc_tx_descs(skb); 1294 1295 if (unlikely(credits < ndesc)) { 1296 t3_stop_tx_queue(txq, qs, q); 1297 dev_err(&adap->pdev->dev, 1298 "%s: Tx ring %u full while queue awake!\n", 1299 dev->name, q->cntxt_id & 7); 1300 return NETDEV_TX_BUSY; 1301 } 1302 1303 /* Check if ethernet packet can't be sent as immediate data */ 1304 if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) { 1305 if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) { 1306 dev_kfree_skb(skb); 1307 return NETDEV_TX_OK; 1308 } 1309 } 1310 1311 q->in_use += ndesc; 1312 if (unlikely(credits - ndesc < q->stop_thres)) { 1313 t3_stop_tx_queue(txq, qs, q); 1314 1315 if (should_restart_tx(q) && 1316 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) { 1317 q->restarts++; 1318 netif_tx_start_queue(txq); 1319 } 1320 } 1321 1322 gen = q->gen; 1323 q->unacked += ndesc; 1324 compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1325 q->unacked &= 7; 1326 pidx = q->pidx; 1327 q->pidx += ndesc; 1328 if (q->pidx >= q->size) { 1329 q->pidx -= q->size; 1330 q->gen ^= 1; 1331 } 1332 1333 /* update port statistics */ 1334 if (skb->ip_summed == CHECKSUM_PARTIAL) 1335 qs->port_stats[SGE_PSTAT_TX_CSUM]++; 1336 if (skb_shinfo(skb)->gso_size) 1337 qs->port_stats[SGE_PSTAT_TSO]++; 1338 if (skb_vlan_tag_present(skb)) 1339 qs->port_stats[SGE_PSTAT_VLANINS]++; 1340 1341 /* 1342 * We do not use Tx completion interrupts to free DMAd Tx packets. 1343 * This is good for performance but means that we rely on new Tx 1344 * packets arriving to run the destructors of completed packets, 1345 * which open up space in their sockets' send queues. Sometimes 1346 * we do not get such new packets causing Tx to stall. A single 1347 * UDP transmitter is a good example of this situation. We have 1348 * a clean up timer that periodically reclaims completed packets 1349 * but it doesn't run often enough (nor do we want it to) to prevent 1350 * lengthy stalls. A solution to this problem is to run the 1351 * destructor early, after the packet is queued but before it's DMAd. 1352 * A cons is that we lie to socket memory accounting, but the amount 1353 * of extra memory is reasonable (limited by the number of Tx 1354 * descriptors), the packets do actually get freed quickly by new 1355 * packets almost always, and for protocols like TCP that wait for 1356 * acks to really free up the data the extra memory is even less. 1357 * On the positive side we run the destructors on the sending CPU 1358 * rather than on a potentially different completing CPU, usually a 1359 * good thing. We also run them without holding our Tx queue lock, 1360 * unlike what reclaim_completed_tx() would otherwise do. 1361 * 1362 * Run the destructor before telling the DMA engine about the packet 1363 * to make sure it doesn't complete and get freed prematurely. 1364 */ 1365 if (likely(!skb_shared(skb))) 1366 skb_orphan(skb); 1367 1368 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr); 1369 check_ring_tx_db(adap, q); 1370 return NETDEV_TX_OK; 1371 } 1372 1373 /** 1374 * write_imm - write a packet into a Tx descriptor as immediate data 1375 * @d: the Tx descriptor to write 1376 * @skb: the packet 1377 * @len: the length of packet data to write as immediate data 1378 * @gen: the generation bit value to write 1379 * 1380 * Writes a packet as immediate data into a Tx descriptor. The packet 1381 * contains a work request at its beginning. We must write the packet 1382 * carefully so the SGE doesn't read it accidentally before it's written 1383 * in its entirety. 1384 */ 1385 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb, 1386 unsigned int len, unsigned int gen) 1387 { 1388 struct work_request_hdr *from = (struct work_request_hdr *)skb->data; 1389 struct work_request_hdr *to = (struct work_request_hdr *)d; 1390 1391 if (likely(!skb->data_len)) 1392 memcpy(&to[1], &from[1], len - sizeof(*from)); 1393 else 1394 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from)); 1395 1396 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1397 V_WR_BCNTLFLT(len & 7)); 1398 dma_wmb(); 1399 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1400 V_WR_LEN((len + 7) / 8)); 1401 wr_gen2(d, gen); 1402 kfree_skb(skb); 1403 } 1404 1405 /** 1406 * check_desc_avail - check descriptor availability on a send queue 1407 * @adap: the adapter 1408 * @q: the send queue 1409 * @skb: the packet needing the descriptors 1410 * @ndesc: the number of Tx descriptors needed 1411 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1412 * 1413 * Checks if the requested number of Tx descriptors is available on an 1414 * SGE send queue. If the queue is already suspended or not enough 1415 * descriptors are available the packet is queued for later transmission. 1416 * Must be called with the Tx queue locked. 1417 * 1418 * Returns 0 if enough descriptors are available, 1 if there aren't 1419 * enough descriptors and the packet has been queued, and 2 if the caller 1420 * needs to retry because there weren't enough descriptors at the 1421 * beginning of the call but some freed up in the mean time. 1422 */ 1423 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q, 1424 struct sk_buff *skb, unsigned int ndesc, 1425 unsigned int qid) 1426 { 1427 if (unlikely(!skb_queue_empty(&q->sendq))) { 1428 addq_exit:__skb_queue_tail(&q->sendq, skb); 1429 return 1; 1430 } 1431 if (unlikely(q->size - q->in_use < ndesc)) { 1432 struct sge_qset *qs = txq_to_qset(q, qid); 1433 1434 set_bit(qid, &qs->txq_stopped); 1435 smp_mb__after_atomic(); 1436 1437 if (should_restart_tx(q) && 1438 test_and_clear_bit(qid, &qs->txq_stopped)) 1439 return 2; 1440 1441 q->stops++; 1442 goto addq_exit; 1443 } 1444 return 0; 1445 } 1446 1447 /** 1448 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1449 * @q: the SGE control Tx queue 1450 * 1451 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1452 * that send only immediate data (presently just the control queues) and 1453 * thus do not have any sk_buffs to release. 1454 */ 1455 static inline void reclaim_completed_tx_imm(struct sge_txq *q) 1456 { 1457 unsigned int reclaim = q->processed - q->cleaned; 1458 1459 q->in_use -= reclaim; 1460 q->cleaned += reclaim; 1461 } 1462 1463 static inline int immediate(const struct sk_buff *skb) 1464 { 1465 return skb->len <= WR_LEN; 1466 } 1467 1468 /** 1469 * ctrl_xmit - send a packet through an SGE control Tx queue 1470 * @adap: the adapter 1471 * @q: the control queue 1472 * @skb: the packet 1473 * 1474 * Send a packet through an SGE control Tx queue. Packets sent through 1475 * a control queue must fit entirely as immediate data in a single Tx 1476 * descriptor and have no page fragments. 1477 */ 1478 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q, 1479 struct sk_buff *skb) 1480 { 1481 int ret; 1482 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data; 1483 1484 if (unlikely(!immediate(skb))) { 1485 WARN_ON(1); 1486 dev_kfree_skb(skb); 1487 return NET_XMIT_SUCCESS; 1488 } 1489 1490 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1491 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1492 1493 spin_lock(&q->lock); 1494 again:reclaim_completed_tx_imm(q); 1495 1496 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL); 1497 if (unlikely(ret)) { 1498 if (ret == 1) { 1499 spin_unlock(&q->lock); 1500 return NET_XMIT_CN; 1501 } 1502 goto again; 1503 } 1504 1505 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1506 1507 q->in_use++; 1508 if (++q->pidx >= q->size) { 1509 q->pidx = 0; 1510 q->gen ^= 1; 1511 } 1512 spin_unlock(&q->lock); 1513 wmb(); 1514 t3_write_reg(adap, A_SG_KDOORBELL, 1515 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1516 return NET_XMIT_SUCCESS; 1517 } 1518 1519 /** 1520 * restart_ctrlq - restart a suspended control queue 1521 * @w: pointer to the work associated with this handler 1522 * 1523 * Resumes transmission on a suspended Tx control queue. 1524 */ 1525 static void restart_ctrlq(struct work_struct *w) 1526 { 1527 struct sk_buff *skb; 1528 struct sge_qset *qs = container_of(w, struct sge_qset, 1529 txq[TXQ_CTRL].qresume_task); 1530 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1531 1532 spin_lock(&q->lock); 1533 again:reclaim_completed_tx_imm(q); 1534 1535 while (q->in_use < q->size && 1536 (skb = __skb_dequeue(&q->sendq)) != NULL) { 1537 1538 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1539 1540 if (++q->pidx >= q->size) { 1541 q->pidx = 0; 1542 q->gen ^= 1; 1543 } 1544 q->in_use++; 1545 } 1546 1547 if (!skb_queue_empty(&q->sendq)) { 1548 set_bit(TXQ_CTRL, &qs->txq_stopped); 1549 smp_mb__after_atomic(); 1550 1551 if (should_restart_tx(q) && 1552 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1553 goto again; 1554 q->stops++; 1555 } 1556 1557 spin_unlock(&q->lock); 1558 wmb(); 1559 t3_write_reg(qs->adap, A_SG_KDOORBELL, 1560 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1561 } 1562 1563 /* 1564 * Send a management message through control queue 0 1565 */ 1566 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb) 1567 { 1568 int ret; 1569 local_bh_disable(); 1570 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb); 1571 local_bh_enable(); 1572 1573 return ret; 1574 } 1575 1576 /** 1577 * deferred_unmap_destructor - unmap a packet when it is freed 1578 * @skb: the packet 1579 * 1580 * This is the packet destructor used for Tx packets that need to remain 1581 * mapped until they are freed rather than until their Tx descriptors are 1582 * freed. 1583 */ 1584 static void deferred_unmap_destructor(struct sk_buff *skb) 1585 { 1586 int i; 1587 const dma_addr_t *p; 1588 const struct skb_shared_info *si; 1589 const struct deferred_unmap_info *dui; 1590 1591 dui = (struct deferred_unmap_info *)skb->head; 1592 p = dui->addr; 1593 1594 if (skb_tail_pointer(skb) - skb_transport_header(skb)) 1595 dma_unmap_single(&dui->pdev->dev, *p++, 1596 skb_tail_pointer(skb) - skb_transport_header(skb), 1597 DMA_TO_DEVICE); 1598 1599 si = skb_shinfo(skb); 1600 for (i = 0; i < si->nr_frags; i++) 1601 dma_unmap_page(&dui->pdev->dev, *p++, 1602 skb_frag_size(&si->frags[i]), DMA_TO_DEVICE); 1603 } 1604 1605 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev, 1606 const struct sg_ent *sgl, int sgl_flits) 1607 { 1608 dma_addr_t *p; 1609 struct deferred_unmap_info *dui; 1610 1611 dui = (struct deferred_unmap_info *)skb->head; 1612 dui->pdev = pdev; 1613 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) { 1614 *p++ = be64_to_cpu(sgl->addr[0]); 1615 *p++ = be64_to_cpu(sgl->addr[1]); 1616 } 1617 if (sgl_flits) 1618 *p = be64_to_cpu(sgl->addr[0]); 1619 } 1620 1621 /** 1622 * write_ofld_wr - write an offload work request 1623 * @adap: the adapter 1624 * @skb: the packet to send 1625 * @q: the Tx queue 1626 * @pidx: index of the first Tx descriptor to write 1627 * @gen: the generation value to use 1628 * @ndesc: number of descriptors the packet will occupy 1629 * @addr: the address 1630 * 1631 * Write an offload work request to send the supplied packet. The packet 1632 * data already carry the work request with most fields populated. 1633 */ 1634 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, 1635 struct sge_txq *q, unsigned int pidx, 1636 unsigned int gen, unsigned int ndesc, 1637 const dma_addr_t *addr) 1638 { 1639 unsigned int sgl_flits, flits; 1640 struct work_request_hdr *from; 1641 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1]; 1642 struct tx_desc *d = &q->desc[pidx]; 1643 1644 if (immediate(skb)) { 1645 q->sdesc[pidx].skb = NULL; 1646 write_imm(d, skb, skb->len, gen); 1647 return; 1648 } 1649 1650 /* Only TX_DATA builds SGLs */ 1651 1652 from = (struct work_request_hdr *)skb->data; 1653 memcpy(&d->flit[1], &from[1], 1654 skb_transport_offset(skb) - sizeof(*from)); 1655 1656 flits = skb_transport_offset(skb) / 8; 1657 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; 1658 sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb), 1659 skb_tail_pointer(skb) - skb_transport_header(skb), 1660 addr); 1661 if (need_skb_unmap()) { 1662 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); 1663 skb->destructor = deferred_unmap_destructor; 1664 } 1665 1666 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, 1667 gen, from->wr_hi, from->wr_lo); 1668 } 1669 1670 /** 1671 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1672 * @skb: the packet 1673 * 1674 * Returns the number of Tx descriptors needed for the given offload 1675 * packet. These packets are already fully constructed. 1676 */ 1677 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb) 1678 { 1679 unsigned int flits, cnt; 1680 1681 if (skb->len <= WR_LEN) 1682 return 1; /* packet fits as immediate data */ 1683 1684 flits = skb_transport_offset(skb) / 8; /* headers */ 1685 cnt = skb_shinfo(skb)->nr_frags; 1686 if (skb_tail_pointer(skb) != skb_transport_header(skb)) 1687 cnt++; 1688 return flits_to_desc(flits + sgl_len(cnt)); 1689 } 1690 1691 /** 1692 * ofld_xmit - send a packet through an offload queue 1693 * @adap: the adapter 1694 * @q: the Tx offload queue 1695 * @skb: the packet 1696 * 1697 * Send an offload packet through an SGE offload queue. 1698 */ 1699 static int ofld_xmit(struct adapter *adap, struct sge_txq *q, 1700 struct sk_buff *skb) 1701 { 1702 int ret; 1703 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen; 1704 1705 spin_lock(&q->lock); 1706 again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); 1707 1708 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD); 1709 if (unlikely(ret)) { 1710 if (ret == 1) { 1711 skb->priority = ndesc; /* save for restart */ 1712 spin_unlock(&q->lock); 1713 return NET_XMIT_CN; 1714 } 1715 goto again; 1716 } 1717 1718 if (!immediate(skb) && 1719 map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) { 1720 spin_unlock(&q->lock); 1721 return NET_XMIT_SUCCESS; 1722 } 1723 1724 gen = q->gen; 1725 q->in_use += ndesc; 1726 pidx = q->pidx; 1727 q->pidx += ndesc; 1728 if (q->pidx >= q->size) { 1729 q->pidx -= q->size; 1730 q->gen ^= 1; 1731 } 1732 spin_unlock(&q->lock); 1733 1734 write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head); 1735 check_ring_tx_db(adap, q); 1736 return NET_XMIT_SUCCESS; 1737 } 1738 1739 /** 1740 * restart_offloadq - restart a suspended offload queue 1741 * @w: pointer to the work associated with this handler 1742 * 1743 * Resumes transmission on a suspended Tx offload queue. 1744 */ 1745 static void restart_offloadq(struct work_struct *w) 1746 { 1747 struct sk_buff *skb; 1748 struct sge_qset *qs = container_of(w, struct sge_qset, 1749 txq[TXQ_OFLD].qresume_task); 1750 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1751 const struct port_info *pi = netdev_priv(qs->netdev); 1752 struct adapter *adap = pi->adapter; 1753 unsigned int written = 0; 1754 1755 spin_lock(&q->lock); 1756 again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); 1757 1758 while ((skb = skb_peek(&q->sendq)) != NULL) { 1759 unsigned int gen, pidx; 1760 unsigned int ndesc = skb->priority; 1761 1762 if (unlikely(q->size - q->in_use < ndesc)) { 1763 set_bit(TXQ_OFLD, &qs->txq_stopped); 1764 smp_mb__after_atomic(); 1765 1766 if (should_restart_tx(q) && 1767 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1768 goto again; 1769 q->stops++; 1770 break; 1771 } 1772 1773 if (!immediate(skb) && 1774 map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) 1775 break; 1776 1777 gen = q->gen; 1778 q->in_use += ndesc; 1779 pidx = q->pidx; 1780 q->pidx += ndesc; 1781 written += ndesc; 1782 if (q->pidx >= q->size) { 1783 q->pidx -= q->size; 1784 q->gen ^= 1; 1785 } 1786 __skb_unlink(skb, &q->sendq); 1787 spin_unlock(&q->lock); 1788 1789 write_ofld_wr(adap, skb, q, pidx, gen, ndesc, 1790 (dma_addr_t *)skb->head); 1791 spin_lock(&q->lock); 1792 } 1793 spin_unlock(&q->lock); 1794 1795 #if USE_GTS 1796 set_bit(TXQ_RUNNING, &q->flags); 1797 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1798 #endif 1799 wmb(); 1800 if (likely(written)) 1801 t3_write_reg(adap, A_SG_KDOORBELL, 1802 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1803 } 1804 1805 /** 1806 * queue_set - return the queue set a packet should use 1807 * @skb: the packet 1808 * 1809 * Maps a packet to the SGE queue set it should use. The desired queue 1810 * set is carried in bits 1-3 in the packet's priority. 1811 */ 1812 static inline int queue_set(const struct sk_buff *skb) 1813 { 1814 return skb->priority >> 1; 1815 } 1816 1817 /** 1818 * is_ctrl_pkt - return whether an offload packet is a control packet 1819 * @skb: the packet 1820 * 1821 * Determines whether an offload packet should use an OFLD or a CTRL 1822 * Tx queue. This is indicated by bit 0 in the packet's priority. 1823 */ 1824 static inline int is_ctrl_pkt(const struct sk_buff *skb) 1825 { 1826 return skb->priority & 1; 1827 } 1828 1829 /** 1830 * t3_offload_tx - send an offload packet 1831 * @tdev: the offload device to send to 1832 * @skb: the packet 1833 * 1834 * Sends an offload packet. We use the packet priority to select the 1835 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1836 * should be sent as regular or control, bits 1-3 select the queue set. 1837 */ 1838 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb) 1839 { 1840 struct adapter *adap = tdev2adap(tdev); 1841 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)]; 1842 1843 if (unlikely(is_ctrl_pkt(skb))) 1844 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb); 1845 1846 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb); 1847 } 1848 1849 /** 1850 * offload_enqueue - add an offload packet to an SGE offload receive queue 1851 * @q: the SGE response queue 1852 * @skb: the packet 1853 * 1854 * Add a new offload packet to an SGE response queue's offload packet 1855 * queue. If the packet is the first on the queue it schedules the RX 1856 * softirq to process the queue. 1857 */ 1858 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb) 1859 { 1860 int was_empty = skb_queue_empty(&q->rx_queue); 1861 1862 __skb_queue_tail(&q->rx_queue, skb); 1863 1864 if (was_empty) { 1865 struct sge_qset *qs = rspq_to_qset(q); 1866 1867 napi_schedule(&qs->napi); 1868 } 1869 } 1870 1871 /** 1872 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 1873 * @tdev: the offload device that will be receiving the packets 1874 * @q: the SGE response queue that assembled the bundle 1875 * @skbs: the partial bundle 1876 * @n: the number of packets in the bundle 1877 * 1878 * Delivers a (partial) bundle of Rx offload packets to an offload device. 1879 */ 1880 static inline void deliver_partial_bundle(struct t3cdev *tdev, 1881 struct sge_rspq *q, 1882 struct sk_buff *skbs[], int n) 1883 { 1884 if (n) { 1885 q->offload_bundles++; 1886 tdev->recv(tdev, skbs, n); 1887 } 1888 } 1889 1890 /** 1891 * ofld_poll - NAPI handler for offload packets in interrupt mode 1892 * @napi: the network device doing the polling 1893 * @budget: polling budget 1894 * 1895 * The NAPI handler for offload packets when a response queue is serviced 1896 * by the hard interrupt handler, i.e., when it's operating in non-polling 1897 * mode. Creates small packet batches and sends them through the offload 1898 * receive handler. Batches need to be of modest size as we do prefetches 1899 * on the packets in each. 1900 */ 1901 static int ofld_poll(struct napi_struct *napi, int budget) 1902 { 1903 struct sge_qset *qs = container_of(napi, struct sge_qset, napi); 1904 struct sge_rspq *q = &qs->rspq; 1905 struct adapter *adapter = qs->adap; 1906 int work_done = 0; 1907 1908 while (work_done < budget) { 1909 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE]; 1910 struct sk_buff_head queue; 1911 int ngathered; 1912 1913 spin_lock_irq(&q->lock); 1914 __skb_queue_head_init(&queue); 1915 skb_queue_splice_init(&q->rx_queue, &queue); 1916 if (skb_queue_empty(&queue)) { 1917 napi_complete_done(napi, work_done); 1918 spin_unlock_irq(&q->lock); 1919 return work_done; 1920 } 1921 spin_unlock_irq(&q->lock); 1922 1923 ngathered = 0; 1924 skb_queue_walk_safe(&queue, skb, tmp) { 1925 if (work_done >= budget) 1926 break; 1927 work_done++; 1928 1929 __skb_unlink(skb, &queue); 1930 prefetch(skb->data); 1931 skbs[ngathered] = skb; 1932 if (++ngathered == RX_BUNDLE_SIZE) { 1933 q->offload_bundles++; 1934 adapter->tdev.recv(&adapter->tdev, skbs, 1935 ngathered); 1936 ngathered = 0; 1937 } 1938 } 1939 if (!skb_queue_empty(&queue)) { 1940 /* splice remaining packets back onto Rx queue */ 1941 spin_lock_irq(&q->lock); 1942 skb_queue_splice(&queue, &q->rx_queue); 1943 spin_unlock_irq(&q->lock); 1944 } 1945 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered); 1946 } 1947 1948 return work_done; 1949 } 1950 1951 /** 1952 * rx_offload - process a received offload packet 1953 * @tdev: the offload device receiving the packet 1954 * @rq: the response queue that received the packet 1955 * @skb: the packet 1956 * @rx_gather: a gather list of packets if we are building a bundle 1957 * @gather_idx: index of the next available slot in the bundle 1958 * 1959 * Process an ingress offload packet and add it to the offload ingress 1960 * queue. Returns the index of the next available slot in the bundle. 1961 */ 1962 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 1963 struct sk_buff *skb, struct sk_buff *rx_gather[], 1964 unsigned int gather_idx) 1965 { 1966 skb_reset_mac_header(skb); 1967 skb_reset_network_header(skb); 1968 skb_reset_transport_header(skb); 1969 1970 if (rq->polling) { 1971 rx_gather[gather_idx++] = skb; 1972 if (gather_idx == RX_BUNDLE_SIZE) { 1973 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE); 1974 gather_idx = 0; 1975 rq->offload_bundles++; 1976 } 1977 } else 1978 offload_enqueue(rq, skb); 1979 1980 return gather_idx; 1981 } 1982 1983 /** 1984 * restart_tx - check whether to restart suspended Tx queues 1985 * @qs: the queue set to resume 1986 * 1987 * Restarts suspended Tx queues of an SGE queue set if they have enough 1988 * free resources to resume operation. 1989 */ 1990 static void restart_tx(struct sge_qset *qs) 1991 { 1992 if (test_bit(TXQ_ETH, &qs->txq_stopped) && 1993 should_restart_tx(&qs->txq[TXQ_ETH]) && 1994 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) { 1995 qs->txq[TXQ_ETH].restarts++; 1996 if (netif_running(qs->netdev)) 1997 netif_tx_wake_queue(qs->tx_q); 1998 } 1999 2000 if (test_bit(TXQ_OFLD, &qs->txq_stopped) && 2001 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2002 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2003 qs->txq[TXQ_OFLD].restarts++; 2004 2005 /* The work can be quite lengthy so we use driver's own queue */ 2006 queue_work(cxgb3_wq, &qs->txq[TXQ_OFLD].qresume_task); 2007 } 2008 if (test_bit(TXQ_CTRL, &qs->txq_stopped) && 2009 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2010 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2011 qs->txq[TXQ_CTRL].restarts++; 2012 2013 /* The work can be quite lengthy so we use driver's own queue */ 2014 queue_work(cxgb3_wq, &qs->txq[TXQ_CTRL].qresume_task); 2015 } 2016 } 2017 2018 /** 2019 * cxgb3_arp_process - process an ARP request probing a private IP address 2020 * @pi: the port info 2021 * @skb: the skbuff containing the ARP request 2022 * 2023 * Check if the ARP request is probing the private IP address 2024 * dedicated to iSCSI, generate an ARP reply if so. 2025 */ 2026 static void cxgb3_arp_process(struct port_info *pi, struct sk_buff *skb) 2027 { 2028 struct net_device *dev = skb->dev; 2029 struct arphdr *arp; 2030 unsigned char *arp_ptr; 2031 unsigned char *sha; 2032 __be32 sip, tip; 2033 2034 if (!dev) 2035 return; 2036 2037 skb_reset_network_header(skb); 2038 arp = arp_hdr(skb); 2039 2040 if (arp->ar_op != htons(ARPOP_REQUEST)) 2041 return; 2042 2043 arp_ptr = (unsigned char *)(arp + 1); 2044 sha = arp_ptr; 2045 arp_ptr += dev->addr_len; 2046 memcpy(&sip, arp_ptr, sizeof(sip)); 2047 arp_ptr += sizeof(sip); 2048 arp_ptr += dev->addr_len; 2049 memcpy(&tip, arp_ptr, sizeof(tip)); 2050 2051 if (tip != pi->iscsi_ipv4addr) 2052 return; 2053 2054 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, 2055 pi->iscsic.mac_addr, sha); 2056 2057 } 2058 2059 static inline int is_arp(struct sk_buff *skb) 2060 { 2061 return skb->protocol == htons(ETH_P_ARP); 2062 } 2063 2064 static void cxgb3_process_iscsi_prov_pack(struct port_info *pi, 2065 struct sk_buff *skb) 2066 { 2067 if (is_arp(skb)) { 2068 cxgb3_arp_process(pi, skb); 2069 return; 2070 } 2071 2072 if (pi->iscsic.recv) 2073 pi->iscsic.recv(pi, skb); 2074 2075 } 2076 2077 /** 2078 * rx_eth - process an ingress ethernet packet 2079 * @adap: the adapter 2080 * @rq: the response queue that received the packet 2081 * @skb: the packet 2082 * @pad: padding 2083 * @lro: large receive offload 2084 * 2085 * Process an ingress ethernet packet and deliver it to the stack. 2086 * The padding is 2 if the packet was delivered in an Rx buffer and 0 2087 * if it was immediate data in a response. 2088 */ 2089 static void rx_eth(struct adapter *adap, struct sge_rspq *rq, 2090 struct sk_buff *skb, int pad, int lro) 2091 { 2092 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad); 2093 struct sge_qset *qs = rspq_to_qset(rq); 2094 struct port_info *pi; 2095 2096 skb_pull(skb, sizeof(*p) + pad); 2097 skb->protocol = eth_type_trans(skb, adap->port[p->iff]); 2098 pi = netdev_priv(skb->dev); 2099 if ((skb->dev->features & NETIF_F_RXCSUM) && p->csum_valid && 2100 p->csum == htons(0xffff) && !p->fragment) { 2101 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2102 skb->ip_summed = CHECKSUM_UNNECESSARY; 2103 } else 2104 skb_checksum_none_assert(skb); 2105 skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]); 2106 2107 if (p->vlan_valid) { 2108 qs->port_stats[SGE_PSTAT_VLANEX]++; 2109 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan)); 2110 } 2111 if (rq->polling) { 2112 if (lro) 2113 napi_gro_receive(&qs->napi, skb); 2114 else { 2115 if (unlikely(pi->iscsic.flags)) 2116 cxgb3_process_iscsi_prov_pack(pi, skb); 2117 netif_receive_skb(skb); 2118 } 2119 } else 2120 netif_rx(skb); 2121 } 2122 2123 static inline int is_eth_tcp(u32 rss) 2124 { 2125 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE; 2126 } 2127 2128 /** 2129 * lro_add_page - add a page chunk to an LRO session 2130 * @adap: the adapter 2131 * @qs: the associated queue set 2132 * @fl: the free list containing the page chunk to add 2133 * @len: packet length 2134 * @complete: Indicates the last fragment of a frame 2135 * 2136 * Add a received packet contained in a page chunk to an existing LRO 2137 * session. 2138 */ 2139 static void lro_add_page(struct adapter *adap, struct sge_qset *qs, 2140 struct sge_fl *fl, int len, int complete) 2141 { 2142 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2143 struct port_info *pi = netdev_priv(qs->netdev); 2144 struct sk_buff *skb = NULL; 2145 struct cpl_rx_pkt *cpl; 2146 skb_frag_t *rx_frag; 2147 int nr_frags; 2148 int offset = 0; 2149 2150 if (!qs->nomem) { 2151 skb = napi_get_frags(&qs->napi); 2152 qs->nomem = !skb; 2153 } 2154 2155 fl->credits--; 2156 2157 dma_sync_single_for_cpu(&adap->pdev->dev, 2158 dma_unmap_addr(sd, dma_addr), 2159 fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE); 2160 2161 (*sd->pg_chunk.p_cnt)--; 2162 if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page) 2163 dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping, 2164 fl->alloc_size, DMA_FROM_DEVICE); 2165 2166 if (!skb) { 2167 put_page(sd->pg_chunk.page); 2168 if (complete) 2169 qs->nomem = 0; 2170 return; 2171 } 2172 2173 rx_frag = skb_shinfo(skb)->frags; 2174 nr_frags = skb_shinfo(skb)->nr_frags; 2175 2176 if (!nr_frags) { 2177 offset = 2 + sizeof(struct cpl_rx_pkt); 2178 cpl = qs->lro_va = sd->pg_chunk.va + 2; 2179 2180 if ((qs->netdev->features & NETIF_F_RXCSUM) && 2181 cpl->csum_valid && cpl->csum == htons(0xffff)) { 2182 skb->ip_summed = CHECKSUM_UNNECESSARY; 2183 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2184 } else 2185 skb->ip_summed = CHECKSUM_NONE; 2186 } else 2187 cpl = qs->lro_va; 2188 2189 len -= offset; 2190 2191 rx_frag += nr_frags; 2192 __skb_frag_set_page(rx_frag, sd->pg_chunk.page); 2193 skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset); 2194 skb_frag_size_set(rx_frag, len); 2195 2196 skb->len += len; 2197 skb->data_len += len; 2198 skb->truesize += len; 2199 skb_shinfo(skb)->nr_frags++; 2200 2201 if (!complete) 2202 return; 2203 2204 skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]); 2205 2206 if (cpl->vlan_valid) { 2207 qs->port_stats[SGE_PSTAT_VLANEX]++; 2208 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); 2209 } 2210 napi_gro_frags(&qs->napi); 2211 } 2212 2213 /** 2214 * handle_rsp_cntrl_info - handles control information in a response 2215 * @qs: the queue set corresponding to the response 2216 * @flags: the response control flags 2217 * 2218 * Handles the control information of an SGE response, such as GTS 2219 * indications and completion credits for the queue set's Tx queues. 2220 * HW coalesces credits, we don't do any extra SW coalescing. 2221 */ 2222 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags) 2223 { 2224 unsigned int credits; 2225 2226 #if USE_GTS 2227 if (flags & F_RSPD_TXQ0_GTS) 2228 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2229 #endif 2230 2231 credits = G_RSPD_TXQ0_CR(flags); 2232 if (credits) 2233 qs->txq[TXQ_ETH].processed += credits; 2234 2235 credits = G_RSPD_TXQ2_CR(flags); 2236 if (credits) 2237 qs->txq[TXQ_CTRL].processed += credits; 2238 2239 # if USE_GTS 2240 if (flags & F_RSPD_TXQ1_GTS) 2241 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2242 # endif 2243 credits = G_RSPD_TXQ1_CR(flags); 2244 if (credits) 2245 qs->txq[TXQ_OFLD].processed += credits; 2246 } 2247 2248 /** 2249 * check_ring_db - check if we need to ring any doorbells 2250 * @adap: the adapter 2251 * @qs: the queue set whose Tx queues are to be examined 2252 * @sleeping: indicates which Tx queue sent GTS 2253 * 2254 * Checks if some of a queue set's Tx queues need to ring their doorbells 2255 * to resume transmission after idling while they still have unprocessed 2256 * descriptors. 2257 */ 2258 static void check_ring_db(struct adapter *adap, struct sge_qset *qs, 2259 unsigned int sleeping) 2260 { 2261 if (sleeping & F_RSPD_TXQ0_GTS) { 2262 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 2263 2264 if (txq->cleaned + txq->in_use != txq->processed && 2265 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) { 2266 set_bit(TXQ_RUNNING, &txq->flags); 2267 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | 2268 V_EGRCNTX(txq->cntxt_id)); 2269 } 2270 } 2271 2272 if (sleeping & F_RSPD_TXQ1_GTS) { 2273 struct sge_txq *txq = &qs->txq[TXQ_OFLD]; 2274 2275 if (txq->cleaned + txq->in_use != txq->processed && 2276 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) { 2277 set_bit(TXQ_RUNNING, &txq->flags); 2278 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | 2279 V_EGRCNTX(txq->cntxt_id)); 2280 } 2281 } 2282 } 2283 2284 /** 2285 * is_new_response - check if a response is newly written 2286 * @r: the response descriptor 2287 * @q: the response queue 2288 * 2289 * Returns true if a response descriptor contains a yet unprocessed 2290 * response. 2291 */ 2292 static inline int is_new_response(const struct rsp_desc *r, 2293 const struct sge_rspq *q) 2294 { 2295 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2296 } 2297 2298 static inline void clear_rspq_bufstate(struct sge_rspq * const q) 2299 { 2300 q->pg_skb = NULL; 2301 q->rx_recycle_buf = 0; 2302 } 2303 2304 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2305 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2306 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2307 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2308 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2309 2310 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2311 #define NOMEM_INTR_DELAY 2500 2312 2313 /** 2314 * process_responses - process responses from an SGE response queue 2315 * @adap: the adapter 2316 * @qs: the queue set to which the response queue belongs 2317 * @budget: how many responses can be processed in this round 2318 * 2319 * Process responses from an SGE response queue up to the supplied budget. 2320 * Responses include received packets as well as credits and other events 2321 * for the queues that belong to the response queue's queue set. 2322 * A negative budget is effectively unlimited. 2323 * 2324 * Additionally choose the interrupt holdoff time for the next interrupt 2325 * on this queue. If the system is under memory shortage use a fairly 2326 * long delay to help recovery. 2327 */ 2328 static int process_responses(struct adapter *adap, struct sge_qset *qs, 2329 int budget) 2330 { 2331 struct sge_rspq *q = &qs->rspq; 2332 struct rsp_desc *r = &q->desc[q->cidx]; 2333 int budget_left = budget; 2334 unsigned int sleeping = 0; 2335 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE]; 2336 int ngathered = 0; 2337 2338 q->next_holdoff = q->holdoff_tmr; 2339 2340 while (likely(budget_left && is_new_response(r, q))) { 2341 int packet_complete, eth, ethpad = 2; 2342 int lro = !!(qs->netdev->features & NETIF_F_GRO); 2343 struct sk_buff *skb = NULL; 2344 u32 len, flags; 2345 __be32 rss_hi, rss_lo; 2346 2347 dma_rmb(); 2348 eth = r->rss_hdr.opcode == CPL_RX_PKT; 2349 rss_hi = *(const __be32 *)r; 2350 rss_lo = r->rss_hdr.rss_hash_val; 2351 flags = ntohl(r->flags); 2352 2353 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) { 2354 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC); 2355 if (!skb) 2356 goto no_mem; 2357 2358 __skb_put_data(skb, r, AN_PKT_SIZE); 2359 skb->data[0] = CPL_ASYNC_NOTIF; 2360 rss_hi = htonl(CPL_ASYNC_NOTIF << 24); 2361 q->async_notif++; 2362 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2363 skb = get_imm_packet(r); 2364 if (unlikely(!skb)) { 2365 no_mem: 2366 q->next_holdoff = NOMEM_INTR_DELAY; 2367 q->nomem++; 2368 /* consume one credit since we tried */ 2369 budget_left--; 2370 break; 2371 } 2372 q->imm_data++; 2373 ethpad = 0; 2374 } else if ((len = ntohl(r->len_cq)) != 0) { 2375 struct sge_fl *fl; 2376 2377 lro &= eth && is_eth_tcp(rss_hi); 2378 2379 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2380 if (fl->use_pages) { 2381 void *addr = fl->sdesc[fl->cidx].pg_chunk.va; 2382 2383 net_prefetch(addr); 2384 __refill_fl(adap, fl); 2385 if (lro > 0) { 2386 lro_add_page(adap, qs, fl, 2387 G_RSPD_LEN(len), 2388 flags & F_RSPD_EOP); 2389 goto next_fl; 2390 } 2391 2392 skb = get_packet_pg(adap, fl, q, 2393 G_RSPD_LEN(len), 2394 eth ? 2395 SGE_RX_DROP_THRES : 0); 2396 q->pg_skb = skb; 2397 } else 2398 skb = get_packet(adap, fl, G_RSPD_LEN(len), 2399 eth ? SGE_RX_DROP_THRES : 0); 2400 if (unlikely(!skb)) { 2401 if (!eth) 2402 goto no_mem; 2403 q->rx_drops++; 2404 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT)) 2405 __skb_pull(skb, 2); 2406 next_fl: 2407 if (++fl->cidx == fl->size) 2408 fl->cidx = 0; 2409 } else 2410 q->pure_rsps++; 2411 2412 if (flags & RSPD_CTRL_MASK) { 2413 sleeping |= flags & RSPD_GTS_MASK; 2414 handle_rsp_cntrl_info(qs, flags); 2415 } 2416 2417 r++; 2418 if (unlikely(++q->cidx == q->size)) { 2419 q->cidx = 0; 2420 q->gen ^= 1; 2421 r = q->desc; 2422 } 2423 prefetch(r); 2424 2425 if (++q->credits >= (q->size / 4)) { 2426 refill_rspq(adap, q, q->credits); 2427 q->credits = 0; 2428 } 2429 2430 packet_complete = flags & 2431 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID | 2432 F_RSPD_ASYNC_NOTIF); 2433 2434 if (skb != NULL && packet_complete) { 2435 if (eth) 2436 rx_eth(adap, q, skb, ethpad, lro); 2437 else { 2438 q->offload_pkts++; 2439 /* Preserve the RSS info in csum & priority */ 2440 skb->csum = rss_hi; 2441 skb->priority = rss_lo; 2442 ngathered = rx_offload(&adap->tdev, q, skb, 2443 offload_skbs, 2444 ngathered); 2445 } 2446 2447 if (flags & F_RSPD_EOP) 2448 clear_rspq_bufstate(q); 2449 } 2450 --budget_left; 2451 } 2452 2453 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered); 2454 2455 if (sleeping) 2456 check_ring_db(adap, qs, sleeping); 2457 2458 smp_mb(); /* commit Tx queue .processed updates */ 2459 if (unlikely(qs->txq_stopped != 0)) 2460 restart_tx(qs); 2461 2462 budget -= budget_left; 2463 return budget; 2464 } 2465 2466 static inline int is_pure_response(const struct rsp_desc *r) 2467 { 2468 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID); 2469 2470 return (n | r->len_cq) == 0; 2471 } 2472 2473 /** 2474 * napi_rx_handler - the NAPI handler for Rx processing 2475 * @napi: the napi instance 2476 * @budget: how many packets we can process in this round 2477 * 2478 * Handler for new data events when using NAPI. 2479 */ 2480 static int napi_rx_handler(struct napi_struct *napi, int budget) 2481 { 2482 struct sge_qset *qs = container_of(napi, struct sge_qset, napi); 2483 struct adapter *adap = qs->adap; 2484 int work_done = process_responses(adap, qs, budget); 2485 2486 if (likely(work_done < budget)) { 2487 napi_complete_done(napi, work_done); 2488 2489 /* 2490 * Because we don't atomically flush the following 2491 * write it is possible that in very rare cases it can 2492 * reach the device in a way that races with a new 2493 * response being written plus an error interrupt 2494 * causing the NAPI interrupt handler below to return 2495 * unhandled status to the OS. To protect against 2496 * this would require flushing the write and doing 2497 * both the write and the flush with interrupts off. 2498 * Way too expensive and unjustifiable given the 2499 * rarity of the race. 2500 * 2501 * The race cannot happen at all with MSI-X. 2502 */ 2503 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2504 V_NEWTIMER(qs->rspq.next_holdoff) | 2505 V_NEWINDEX(qs->rspq.cidx)); 2506 } 2507 return work_done; 2508 } 2509 2510 /* 2511 * Returns true if the device is already scheduled for polling. 2512 */ 2513 static inline int napi_is_scheduled(struct napi_struct *napi) 2514 { 2515 return test_bit(NAPI_STATE_SCHED, &napi->state); 2516 } 2517 2518 /** 2519 * process_pure_responses - process pure responses from a response queue 2520 * @adap: the adapter 2521 * @qs: the queue set owning the response queue 2522 * @r: the first pure response to process 2523 * 2524 * A simpler version of process_responses() that handles only pure (i.e., 2525 * non data-carrying) responses. Such respones are too light-weight to 2526 * justify calling a softirq under NAPI, so we handle them specially in 2527 * the interrupt handler. The function is called with a pointer to a 2528 * response, which the caller must ensure is a valid pure response. 2529 * 2530 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise. 2531 */ 2532 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs, 2533 struct rsp_desc *r) 2534 { 2535 struct sge_rspq *q = &qs->rspq; 2536 unsigned int sleeping = 0; 2537 2538 do { 2539 u32 flags = ntohl(r->flags); 2540 2541 r++; 2542 if (unlikely(++q->cidx == q->size)) { 2543 q->cidx = 0; 2544 q->gen ^= 1; 2545 r = q->desc; 2546 } 2547 prefetch(r); 2548 2549 if (flags & RSPD_CTRL_MASK) { 2550 sleeping |= flags & RSPD_GTS_MASK; 2551 handle_rsp_cntrl_info(qs, flags); 2552 } 2553 2554 q->pure_rsps++; 2555 if (++q->credits >= (q->size / 4)) { 2556 refill_rspq(adap, q, q->credits); 2557 q->credits = 0; 2558 } 2559 if (!is_new_response(r, q)) 2560 break; 2561 dma_rmb(); 2562 } while (is_pure_response(r)); 2563 2564 if (sleeping) 2565 check_ring_db(adap, qs, sleeping); 2566 2567 smp_mb(); /* commit Tx queue .processed updates */ 2568 if (unlikely(qs->txq_stopped != 0)) 2569 restart_tx(qs); 2570 2571 return is_new_response(r, q); 2572 } 2573 2574 /** 2575 * handle_responses - decide what to do with new responses in NAPI mode 2576 * @adap: the adapter 2577 * @q: the response queue 2578 * 2579 * This is used by the NAPI interrupt handlers to decide what to do with 2580 * new SGE responses. If there are no new responses it returns -1. If 2581 * there are new responses and they are pure (i.e., non-data carrying) 2582 * it handles them straight in hard interrupt context as they are very 2583 * cheap and don't deliver any packets. Finally, if there are any data 2584 * signaling responses it schedules the NAPI handler. Returns 1 if it 2585 * schedules NAPI, 0 if all new responses were pure. 2586 * 2587 * The caller must ascertain NAPI is not already running. 2588 */ 2589 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q) 2590 { 2591 struct sge_qset *qs = rspq_to_qset(q); 2592 struct rsp_desc *r = &q->desc[q->cidx]; 2593 2594 if (!is_new_response(r, q)) 2595 return -1; 2596 dma_rmb(); 2597 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) { 2598 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) | 2599 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx)); 2600 return 0; 2601 } 2602 napi_schedule(&qs->napi); 2603 return 1; 2604 } 2605 2606 /* 2607 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case 2608 * (i.e., response queue serviced in hard interrupt). 2609 */ 2610 static irqreturn_t t3_sge_intr_msix(int irq, void *cookie) 2611 { 2612 struct sge_qset *qs = cookie; 2613 struct adapter *adap = qs->adap; 2614 struct sge_rspq *q = &qs->rspq; 2615 2616 spin_lock(&q->lock); 2617 if (process_responses(adap, qs, -1) == 0) 2618 q->unhandled_irqs++; 2619 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) | 2620 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx)); 2621 spin_unlock(&q->lock); 2622 return IRQ_HANDLED; 2623 } 2624 2625 /* 2626 * The MSI-X interrupt handler for an SGE response queue for the NAPI case 2627 * (i.e., response queue serviced by NAPI polling). 2628 */ 2629 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie) 2630 { 2631 struct sge_qset *qs = cookie; 2632 struct sge_rspq *q = &qs->rspq; 2633 2634 spin_lock(&q->lock); 2635 2636 if (handle_responses(qs->adap, q) < 0) 2637 q->unhandled_irqs++; 2638 spin_unlock(&q->lock); 2639 return IRQ_HANDLED; 2640 } 2641 2642 /* 2643 * The non-NAPI MSI interrupt handler. This needs to handle data events from 2644 * SGE response queues as well as error and other async events as they all use 2645 * the same MSI vector. We use one SGE response queue per port in this mode 2646 * and protect all response queues with queue 0's lock. 2647 */ 2648 static irqreturn_t t3_intr_msi(int irq, void *cookie) 2649 { 2650 int new_packets = 0; 2651 struct adapter *adap = cookie; 2652 struct sge_rspq *q = &adap->sge.qs[0].rspq; 2653 2654 spin_lock(&q->lock); 2655 2656 if (process_responses(adap, &adap->sge.qs[0], -1)) { 2657 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) | 2658 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx)); 2659 new_packets = 1; 2660 } 2661 2662 if (adap->params.nports == 2 && 2663 process_responses(adap, &adap->sge.qs[1], -1)) { 2664 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2665 2666 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) | 2667 V_NEWTIMER(q1->next_holdoff) | 2668 V_NEWINDEX(q1->cidx)); 2669 new_packets = 1; 2670 } 2671 2672 if (!new_packets && t3_slow_intr_handler(adap) == 0) 2673 q->unhandled_irqs++; 2674 2675 spin_unlock(&q->lock); 2676 return IRQ_HANDLED; 2677 } 2678 2679 static int rspq_check_napi(struct sge_qset *qs) 2680 { 2681 struct sge_rspq *q = &qs->rspq; 2682 2683 if (!napi_is_scheduled(&qs->napi) && 2684 is_new_response(&q->desc[q->cidx], q)) { 2685 napi_schedule(&qs->napi); 2686 return 1; 2687 } 2688 return 0; 2689 } 2690 2691 /* 2692 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced 2693 * by NAPI polling). Handles data events from SGE response queues as well as 2694 * error and other async events as they all use the same MSI vector. We use 2695 * one SGE response queue per port in this mode and protect all response 2696 * queues with queue 0's lock. 2697 */ 2698 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie) 2699 { 2700 int new_packets; 2701 struct adapter *adap = cookie; 2702 struct sge_rspq *q = &adap->sge.qs[0].rspq; 2703 2704 spin_lock(&q->lock); 2705 2706 new_packets = rspq_check_napi(&adap->sge.qs[0]); 2707 if (adap->params.nports == 2) 2708 new_packets += rspq_check_napi(&adap->sge.qs[1]); 2709 if (!new_packets && t3_slow_intr_handler(adap) == 0) 2710 q->unhandled_irqs++; 2711 2712 spin_unlock(&q->lock); 2713 return IRQ_HANDLED; 2714 } 2715 2716 /* 2717 * A helper function that processes responses and issues GTS. 2718 */ 2719 static inline int process_responses_gts(struct adapter *adap, 2720 struct sge_rspq *rq) 2721 { 2722 int work; 2723 2724 work = process_responses(adap, rspq_to_qset(rq), -1); 2725 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2726 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2727 return work; 2728 } 2729 2730 /* 2731 * The legacy INTx interrupt handler. This needs to handle data events from 2732 * SGE response queues as well as error and other async events as they all use 2733 * the same interrupt pin. We use one SGE response queue per port in this mode 2734 * and protect all response queues with queue 0's lock. 2735 */ 2736 static irqreturn_t t3_intr(int irq, void *cookie) 2737 { 2738 int work_done, w0, w1; 2739 struct adapter *adap = cookie; 2740 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2741 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2742 2743 spin_lock(&q0->lock); 2744 2745 w0 = is_new_response(&q0->desc[q0->cidx], q0); 2746 w1 = adap->params.nports == 2 && 2747 is_new_response(&q1->desc[q1->cidx], q1); 2748 2749 if (likely(w0 | w1)) { 2750 t3_write_reg(adap, A_PL_CLI, 0); 2751 t3_read_reg(adap, A_PL_CLI); /* flush */ 2752 2753 if (likely(w0)) 2754 process_responses_gts(adap, q0); 2755 2756 if (w1) 2757 process_responses_gts(adap, q1); 2758 2759 work_done = w0 | w1; 2760 } else 2761 work_done = t3_slow_intr_handler(adap); 2762 2763 spin_unlock(&q0->lock); 2764 return IRQ_RETVAL(work_done != 0); 2765 } 2766 2767 /* 2768 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2769 * Handles data events from SGE response queues as well as error and other 2770 * async events as they all use the same interrupt pin. We use one SGE 2771 * response queue per port in this mode and protect all response queues with 2772 * queue 0's lock. 2773 */ 2774 static irqreturn_t t3b_intr(int irq, void *cookie) 2775 { 2776 u32 map; 2777 struct adapter *adap = cookie; 2778 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2779 2780 t3_write_reg(adap, A_PL_CLI, 0); 2781 map = t3_read_reg(adap, A_SG_DATA_INTR); 2782 2783 if (unlikely(!map)) /* shared interrupt, most likely */ 2784 return IRQ_NONE; 2785 2786 spin_lock(&q0->lock); 2787 2788 if (unlikely(map & F_ERRINTR)) 2789 t3_slow_intr_handler(adap); 2790 2791 if (likely(map & 1)) 2792 process_responses_gts(adap, q0); 2793 2794 if (map & 2) 2795 process_responses_gts(adap, &adap->sge.qs[1].rspq); 2796 2797 spin_unlock(&q0->lock); 2798 return IRQ_HANDLED; 2799 } 2800 2801 /* 2802 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards. 2803 * Handles data events from SGE response queues as well as error and other 2804 * async events as they all use the same interrupt pin. We use one SGE 2805 * response queue per port in this mode and protect all response queues with 2806 * queue 0's lock. 2807 */ 2808 static irqreturn_t t3b_intr_napi(int irq, void *cookie) 2809 { 2810 u32 map; 2811 struct adapter *adap = cookie; 2812 struct sge_qset *qs0 = &adap->sge.qs[0]; 2813 struct sge_rspq *q0 = &qs0->rspq; 2814 2815 t3_write_reg(adap, A_PL_CLI, 0); 2816 map = t3_read_reg(adap, A_SG_DATA_INTR); 2817 2818 if (unlikely(!map)) /* shared interrupt, most likely */ 2819 return IRQ_NONE; 2820 2821 spin_lock(&q0->lock); 2822 2823 if (unlikely(map & F_ERRINTR)) 2824 t3_slow_intr_handler(adap); 2825 2826 if (likely(map & 1)) 2827 napi_schedule(&qs0->napi); 2828 2829 if (map & 2) 2830 napi_schedule(&adap->sge.qs[1].napi); 2831 2832 spin_unlock(&q0->lock); 2833 return IRQ_HANDLED; 2834 } 2835 2836 /** 2837 * t3_intr_handler - select the top-level interrupt handler 2838 * @adap: the adapter 2839 * @polling: whether using NAPI to service response queues 2840 * 2841 * Selects the top-level interrupt handler based on the type of interrupts 2842 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the 2843 * response queues. 2844 */ 2845 irq_handler_t t3_intr_handler(struct adapter *adap, int polling) 2846 { 2847 if (adap->flags & USING_MSIX) 2848 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix; 2849 if (adap->flags & USING_MSI) 2850 return polling ? t3_intr_msi_napi : t3_intr_msi; 2851 if (adap->params.rev > 0) 2852 return polling ? t3b_intr_napi : t3b_intr; 2853 return t3_intr; 2854 } 2855 2856 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 2857 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 2858 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 2859 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 2860 F_HIRCQPARITYERROR) 2861 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 2862 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 2863 F_RSPQDISABLED) 2864 2865 /** 2866 * t3_sge_err_intr_handler - SGE async event interrupt handler 2867 * @adapter: the adapter 2868 * 2869 * Interrupt handler for SGE asynchronous (non-data) events. 2870 */ 2871 void t3_sge_err_intr_handler(struct adapter *adapter) 2872 { 2873 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) & 2874 ~F_FLEMPTY; 2875 2876 if (status & SGE_PARERR) 2877 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 2878 status & SGE_PARERR); 2879 if (status & SGE_FRAMINGERR) 2880 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 2881 status & SGE_FRAMINGERR); 2882 2883 if (status & F_RSPQCREDITOVERFOW) 2884 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 2885 2886 if (status & F_RSPQDISABLED) { 2887 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 2888 2889 CH_ALERT(adapter, 2890 "packet delivered to disabled response queue " 2891 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff); 2892 } 2893 2894 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR)) 2895 queue_work(cxgb3_wq, &adapter->db_drop_task); 2896 2897 if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL)) 2898 queue_work(cxgb3_wq, &adapter->db_full_task); 2899 2900 if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY)) 2901 queue_work(cxgb3_wq, &adapter->db_empty_task); 2902 2903 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 2904 if (status & SGE_FATALERR) 2905 t3_fatal_err(adapter); 2906 } 2907 2908 /** 2909 * sge_timer_tx - perform periodic maintenance of an SGE qset 2910 * @t: a timer list containing the SGE queue set to maintain 2911 * 2912 * Runs periodically from a timer to perform maintenance of an SGE queue 2913 * set. It performs two tasks: 2914 * 2915 * Cleans up any completed Tx descriptors that may still be pending. 2916 * Normal descriptor cleanup happens when new packets are added to a Tx 2917 * queue so this timer is relatively infrequent and does any cleanup only 2918 * if the Tx queue has not seen any new packets in a while. We make a 2919 * best effort attempt to reclaim descriptors, in that we don't wait 2920 * around if we cannot get a queue's lock (which most likely is because 2921 * someone else is queueing new packets and so will also handle the clean 2922 * up). Since control queues use immediate data exclusively we don't 2923 * bother cleaning them up here. 2924 * 2925 */ 2926 static void sge_timer_tx(struct timer_list *t) 2927 { 2928 struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer); 2929 struct port_info *pi = netdev_priv(qs->netdev); 2930 struct adapter *adap = pi->adapter; 2931 unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; 2932 unsigned long next_period; 2933 2934 if (__netif_tx_trylock(qs->tx_q)) { 2935 tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH], 2936 TX_RECLAIM_TIMER_CHUNK); 2937 __netif_tx_unlock(qs->tx_q); 2938 } 2939 2940 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) { 2941 tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD], 2942 TX_RECLAIM_TIMER_CHUNK); 2943 spin_unlock(&qs->txq[TXQ_OFLD].lock); 2944 } 2945 2946 next_period = TX_RECLAIM_PERIOD >> 2947 (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) / 2948 TX_RECLAIM_TIMER_CHUNK); 2949 mod_timer(&qs->tx_reclaim_timer, jiffies + next_period); 2950 } 2951 2952 /** 2953 * sge_timer_rx - perform periodic maintenance of an SGE qset 2954 * @t: the timer list containing the SGE queue set to maintain 2955 * 2956 * a) Replenishes Rx queues that have run out due to memory shortage. 2957 * Normally new Rx buffers are added when existing ones are consumed but 2958 * when out of memory a queue can become empty. We try to add only a few 2959 * buffers here, the queue will be replenished fully as these new buffers 2960 * are used up if memory shortage has subsided. 2961 * 2962 * b) Return coalesced response queue credits in case a response queue is 2963 * starved. 2964 * 2965 */ 2966 static void sge_timer_rx(struct timer_list *t) 2967 { 2968 spinlock_t *lock; 2969 struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer); 2970 struct port_info *pi = netdev_priv(qs->netdev); 2971 struct adapter *adap = pi->adapter; 2972 u32 status; 2973 2974 lock = adap->params.rev > 0 ? 2975 &qs->rspq.lock : &adap->sge.qs[0].rspq.lock; 2976 2977 if (!spin_trylock_irq(lock)) 2978 goto out; 2979 2980 if (napi_is_scheduled(&qs->napi)) 2981 goto unlock; 2982 2983 if (adap->params.rev < 4) { 2984 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS); 2985 2986 if (status & (1 << qs->rspq.cntxt_id)) { 2987 qs->rspq.starved++; 2988 if (qs->rspq.credits) { 2989 qs->rspq.credits--; 2990 refill_rspq(adap, &qs->rspq, 1); 2991 qs->rspq.restarted++; 2992 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS, 2993 1 << qs->rspq.cntxt_id); 2994 } 2995 } 2996 } 2997 2998 if (qs->fl[0].credits < qs->fl[0].size) 2999 __refill_fl(adap, &qs->fl[0]); 3000 if (qs->fl[1].credits < qs->fl[1].size) 3001 __refill_fl(adap, &qs->fl[1]); 3002 3003 unlock: 3004 spin_unlock_irq(lock); 3005 out: 3006 mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD); 3007 } 3008 3009 /** 3010 * t3_update_qset_coalesce - update coalescing settings for a queue set 3011 * @qs: the SGE queue set 3012 * @p: new queue set parameters 3013 * 3014 * Update the coalescing settings for an SGE queue set. Nothing is done 3015 * if the queue set is not initialized yet. 3016 */ 3017 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 3018 { 3019 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */ 3020 qs->rspq.polling = p->polling; 3021 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll; 3022 } 3023 3024 /** 3025 * t3_sge_alloc_qset - initialize an SGE queue set 3026 * @adapter: the adapter 3027 * @id: the queue set id 3028 * @nports: how many Ethernet ports will be using this queue set 3029 * @irq_vec_idx: the IRQ vector index for response queue interrupts 3030 * @p: configuration parameters for this queue set 3031 * @ntxq: number of Tx queues for the queue set 3032 * @dev: net device associated with this queue set 3033 * @netdevq: net device TX queue associated with this queue set 3034 * 3035 * Allocate resources and initialize an SGE queue set. A queue set 3036 * comprises a response queue, two Rx free-buffer queues, and up to 3 3037 * Tx queues. The Tx queues are assigned roles in the order Ethernet 3038 * queue, offload queue, and control queue. 3039 */ 3040 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, 3041 int irq_vec_idx, const struct qset_params *p, 3042 int ntxq, struct net_device *dev, 3043 struct netdev_queue *netdevq) 3044 { 3045 int i, avail, ret = -ENOMEM; 3046 struct sge_qset *q = &adapter->sge.qs[id]; 3047 3048 init_qset_cntxt(q, id); 3049 timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0); 3050 timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0); 3051 3052 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, 3053 sizeof(struct rx_desc), 3054 sizeof(struct rx_sw_desc), 3055 &q->fl[0].phys_addr, &q->fl[0].sdesc); 3056 if (!q->fl[0].desc) 3057 goto err; 3058 3059 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size, 3060 sizeof(struct rx_desc), 3061 sizeof(struct rx_sw_desc), 3062 &q->fl[1].phys_addr, &q->fl[1].sdesc); 3063 if (!q->fl[1].desc) 3064 goto err; 3065 3066 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size, 3067 sizeof(struct rsp_desc), 0, 3068 &q->rspq.phys_addr, NULL); 3069 if (!q->rspq.desc) 3070 goto err; 3071 3072 for (i = 0; i < ntxq; ++i) { 3073 /* 3074 * The control queue always uses immediate data so does not 3075 * need to keep track of any sk_buffs. 3076 */ 3077 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 3078 3079 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i], 3080 sizeof(struct tx_desc), sz, 3081 &q->txq[i].phys_addr, 3082 &q->txq[i].sdesc); 3083 if (!q->txq[i].desc) 3084 goto err; 3085 3086 q->txq[i].gen = 1; 3087 q->txq[i].size = p->txq_size[i]; 3088 spin_lock_init(&q->txq[i].lock); 3089 skb_queue_head_init(&q->txq[i].sendq); 3090 } 3091 3092 INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq); 3093 INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq); 3094 3095 q->fl[0].gen = q->fl[1].gen = 1; 3096 q->fl[0].size = p->fl_size; 3097 q->fl[1].size = p->jumbo_size; 3098 3099 q->rspq.gen = 1; 3100 q->rspq.size = p->rspq_size; 3101 spin_lock_init(&q->rspq.lock); 3102 skb_queue_head_init(&q->rspq.rx_queue); 3103 3104 q->txq[TXQ_ETH].stop_thres = nports * 3105 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3); 3106 3107 #if FL0_PG_CHUNK_SIZE > 0 3108 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE; 3109 #else 3110 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data); 3111 #endif 3112 #if FL1_PG_CHUNK_SIZE > 0 3113 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE; 3114 #else 3115 q->fl[1].buf_size = is_offload(adapter) ? 3116 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 3117 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt); 3118 #endif 3119 3120 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0; 3121 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0; 3122 q->fl[0].order = FL0_PG_ORDER; 3123 q->fl[1].order = FL1_PG_ORDER; 3124 q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE; 3125 q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE; 3126 3127 spin_lock_irq(&adapter->sge.reg_lock); 3128 3129 /* FL threshold comparison uses < */ 3130 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx, 3131 q->rspq.phys_addr, q->rspq.size, 3132 q->fl[0].buf_size - SGE_PG_RSVD, 1, 0); 3133 if (ret) 3134 goto err_unlock; 3135 3136 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 3137 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0, 3138 q->fl[i].phys_addr, q->fl[i].size, 3139 q->fl[i].buf_size - SGE_PG_RSVD, 3140 p->cong_thres, 1, 0); 3141 if (ret) 3142 goto err_unlock; 3143 } 3144 3145 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 3146 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 3147 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 3148 1, 0); 3149 if (ret) 3150 goto err_unlock; 3151 3152 if (ntxq > 1) { 3153 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id, 3154 USE_GTS, SGE_CNTXT_OFLD, id, 3155 q->txq[TXQ_OFLD].phys_addr, 3156 q->txq[TXQ_OFLD].size, 0, 1, 0); 3157 if (ret) 3158 goto err_unlock; 3159 } 3160 3161 if (ntxq > 2) { 3162 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0, 3163 SGE_CNTXT_CTRL, id, 3164 q->txq[TXQ_CTRL].phys_addr, 3165 q->txq[TXQ_CTRL].size, 3166 q->txq[TXQ_CTRL].token, 1, 0); 3167 if (ret) 3168 goto err_unlock; 3169 } 3170 3171 spin_unlock_irq(&adapter->sge.reg_lock); 3172 3173 q->adap = adapter; 3174 q->netdev = dev; 3175 q->tx_q = netdevq; 3176 t3_update_qset_coalesce(q, p); 3177 3178 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size, 3179 GFP_KERNEL | __GFP_COMP); 3180 if (!avail) { 3181 CH_ALERT(adapter, "free list queue 0 initialization failed\n"); 3182 ret = -ENOMEM; 3183 goto err; 3184 } 3185 if (avail < q->fl[0].size) 3186 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n", 3187 avail); 3188 3189 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size, 3190 GFP_KERNEL | __GFP_COMP); 3191 if (avail < q->fl[1].size) 3192 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n", 3193 avail); 3194 refill_rspq(adapter, &q->rspq, q->rspq.size - 1); 3195 3196 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 3197 V_NEWTIMER(q->rspq.holdoff_tmr)); 3198 3199 return 0; 3200 3201 err_unlock: 3202 spin_unlock_irq(&adapter->sge.reg_lock); 3203 err: 3204 t3_free_qset(adapter, q); 3205 return ret; 3206 } 3207 3208 /** 3209 * t3_start_sge_timers - start SGE timer call backs 3210 * @adap: the adapter 3211 * 3212 * Starts each SGE queue set's timer call back 3213 */ 3214 void t3_start_sge_timers(struct adapter *adap) 3215 { 3216 int i; 3217 3218 for (i = 0; i < SGE_QSETS; ++i) { 3219 struct sge_qset *q = &adap->sge.qs[i]; 3220 3221 if (q->tx_reclaim_timer.function) 3222 mod_timer(&q->tx_reclaim_timer, 3223 jiffies + TX_RECLAIM_PERIOD); 3224 3225 if (q->rx_reclaim_timer.function) 3226 mod_timer(&q->rx_reclaim_timer, 3227 jiffies + RX_RECLAIM_PERIOD); 3228 } 3229 } 3230 3231 /** 3232 * t3_stop_sge_timers - stop SGE timer call backs 3233 * @adap: the adapter 3234 * 3235 * Stops each SGE queue set's timer call back 3236 */ 3237 void t3_stop_sge_timers(struct adapter *adap) 3238 { 3239 int i; 3240 3241 for (i = 0; i < SGE_QSETS; ++i) { 3242 struct sge_qset *q = &adap->sge.qs[i]; 3243 3244 if (q->tx_reclaim_timer.function) 3245 del_timer_sync(&q->tx_reclaim_timer); 3246 if (q->rx_reclaim_timer.function) 3247 del_timer_sync(&q->rx_reclaim_timer); 3248 } 3249 } 3250 3251 /** 3252 * t3_free_sge_resources - free SGE resources 3253 * @adap: the adapter 3254 * 3255 * Frees resources used by the SGE queue sets. 3256 */ 3257 void t3_free_sge_resources(struct adapter *adap) 3258 { 3259 int i; 3260 3261 for (i = 0; i < SGE_QSETS; ++i) 3262 t3_free_qset(adap, &adap->sge.qs[i]); 3263 } 3264 3265 /** 3266 * t3_sge_start - enable SGE 3267 * @adap: the adapter 3268 * 3269 * Enables the SGE for DMAs. This is the last step in starting packet 3270 * transfers. 3271 */ 3272 void t3_sge_start(struct adapter *adap) 3273 { 3274 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 3275 } 3276 3277 /** 3278 * t3_sge_stop_dma - Disable SGE DMA engine operation 3279 * @adap: the adapter 3280 * 3281 * Can be invoked from interrupt context e.g. error handler. 3282 * 3283 * Note that this function cannot disable the restart of works as 3284 * it cannot wait if called from interrupt context, however the 3285 * works will have no effect since the doorbells are disabled. The 3286 * driver will call tg3_sge_stop() later from process context, at 3287 * which time the works will be stopped if they are still running. 3288 */ 3289 void t3_sge_stop_dma(struct adapter *adap) 3290 { 3291 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0); 3292 } 3293 3294 /** 3295 * t3_sge_stop - disable SGE operation completly 3296 * @adap: the adapter 3297 * 3298 * Called from process context. Disables the DMA engine and any 3299 * pending queue restart works. 3300 */ 3301 void t3_sge_stop(struct adapter *adap) 3302 { 3303 int i; 3304 3305 t3_sge_stop_dma(adap); 3306 3307 /* workqueues aren't initialized otherwise */ 3308 if (!(adap->flags & FULL_INIT_DONE)) 3309 return; 3310 for (i = 0; i < SGE_QSETS; ++i) { 3311 struct sge_qset *qs = &adap->sge.qs[i]; 3312 3313 cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task); 3314 cancel_work_sync(&qs->txq[TXQ_CTRL].qresume_task); 3315 } 3316 } 3317 3318 /** 3319 * t3_sge_init - initialize SGE 3320 * @adap: the adapter 3321 * @p: the SGE parameters 3322 * 3323 * Performs SGE initialization needed every time after a chip reset. 3324 * We do not initialize any of the queue sets here, instead the driver 3325 * top-level must request those individually. We also do not enable DMA 3326 * here, that should be done after the queues have been set up. 3327 */ 3328 void t3_sge_init(struct adapter *adap, struct sge_params *p) 3329 { 3330 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12); 3331 3332 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 3333 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 3334 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 3335 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 3336 #if SGE_NUM_GENBITS == 1 3337 ctrl |= F_EGRGENCTRL; 3338 #endif 3339 if (adap->params.rev > 0) { 3340 if (!(adap->flags & (USING_MSIX | USING_MSI))) 3341 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 3342 } 3343 t3_write_reg(adap, A_SG_CONTROL, ctrl); 3344 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 3345 V_LORCQDRBTHRSH(512)); 3346 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 3347 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 3348 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 3349 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 3350 adap->params.rev < T3_REV_C ? 1000 : 500); 3351 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 3352 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 3353 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 3354 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 3355 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 3356 } 3357 3358 /** 3359 * t3_sge_prep - one-time SGE initialization 3360 * @adap: the associated adapter 3361 * @p: SGE parameters 3362 * 3363 * Performs one-time initialization of SGE SW state. Includes determining 3364 * defaults for the assorted SGE parameters, which admins can change until 3365 * they are used to initialize the SGE. 3366 */ 3367 void t3_sge_prep(struct adapter *adap, struct sge_params *p) 3368 { 3369 int i; 3370 3371 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) - 3372 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 3373 3374 for (i = 0; i < SGE_QSETS; ++i) { 3375 struct qset_params *q = p->qset + i; 3376 3377 q->polling = adap->params.rev > 0; 3378 q->coalesce_usecs = 5; 3379 q->rspq_size = 1024; 3380 q->fl_size = 1024; 3381 q->jumbo_size = 512; 3382 q->txq_size[TXQ_ETH] = 1024; 3383 q->txq_size[TXQ_OFLD] = 1024; 3384 q->txq_size[TXQ_CTRL] = 256; 3385 q->cong_thres = 0; 3386 } 3387 3388 spin_lock_init(&adap->sge.reg_lock); 3389 } 3390