1 /* 2 * This file is part of the Chelsio T4 Ethernet driver for Linux. 3 * 4 * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/skbuff.h> 36 #include <linux/netdevice.h> 37 #include <linux/etherdevice.h> 38 #include <linux/if_vlan.h> 39 #include <linux/ip.h> 40 #include <linux/dma-mapping.h> 41 #include <linux/jiffies.h> 42 #include <linux/prefetch.h> 43 #include <linux/export.h> 44 #include <net/ipv6.h> 45 #include <net/tcp.h> 46 #include "cxgb4.h" 47 #include "t4_regs.h" 48 #include "t4_msg.h" 49 #include "t4fw_api.h" 50 51 /* 52 * Rx buffer size. We use largish buffers if possible but settle for single 53 * pages under memory shortage. 54 */ 55 #if PAGE_SHIFT >= 16 56 # define FL_PG_ORDER 0 57 #else 58 # define FL_PG_ORDER (16 - PAGE_SHIFT) 59 #endif 60 61 /* RX_PULL_LEN should be <= RX_COPY_THRES */ 62 #define RX_COPY_THRES 256 63 #define RX_PULL_LEN 128 64 65 /* 66 * Main body length for sk_buffs used for Rx Ethernet packets with fragments. 67 * Should be >= RX_PULL_LEN but possibly bigger to give pskb_may_pull some room. 68 */ 69 #define RX_PKT_SKB_LEN 512 70 71 /* 72 * Max number of Tx descriptors we clean up at a time. Should be modest as 73 * freeing skbs isn't cheap and it happens while holding locks. We just need 74 * to free packets faster than they arrive, we eventually catch up and keep 75 * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. 76 */ 77 #define MAX_TX_RECLAIM 16 78 79 /* 80 * Max number of Rx buffers we replenish at a time. Again keep this modest, 81 * allocating buffers isn't cheap either. 82 */ 83 #define MAX_RX_REFILL 16U 84 85 /* 86 * Period of the Rx queue check timer. This timer is infrequent as it has 87 * something to do only when the system experiences severe memory shortage. 88 */ 89 #define RX_QCHECK_PERIOD (HZ / 2) 90 91 /* 92 * Period of the Tx queue check timer. 93 */ 94 #define TX_QCHECK_PERIOD (HZ / 2) 95 96 /* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate 97 * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA 98 * State Machines in the same state for this amount of time (in HZ) then we'll 99 * issue a warning about a potential hang. We'll repeat the warning as the 100 * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till 101 * the situation clears. If the situation clears, we'll note that as well. 102 */ 103 #define SGE_IDMA_WARN_THRESH (1 * HZ) 104 #define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD) 105 106 /* 107 * Max number of Tx descriptors to be reclaimed by the Tx timer. 108 */ 109 #define MAX_TIMER_TX_RECLAIM 100 110 111 /* 112 * Timer index used when backing off due to memory shortage. 113 */ 114 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1) 115 116 /* 117 * An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will 118 * attempt to refill it. 119 */ 120 #define FL_STARVE_THRES 4 121 122 /* 123 * Suspend an Ethernet Tx queue with fewer available descriptors than this. 124 * This is the same as calc_tx_descs() for a TSO packet with 125 * nr_frags == MAX_SKB_FRAGS. 126 */ 127 #define ETHTXQ_STOP_THRES \ 128 (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) 129 130 /* 131 * Suspension threshold for non-Ethernet Tx queues. We require enough room 132 * for a full sized WR. 133 */ 134 #define TXQ_STOP_THRES (SGE_MAX_WR_LEN / sizeof(struct tx_desc)) 135 136 /* 137 * Max Tx descriptor space we allow for an Ethernet packet to be inlined 138 * into a WR. 139 */ 140 #define MAX_IMM_TX_PKT_LEN 128 141 142 /* 143 * Max size of a WR sent through a control Tx queue. 144 */ 145 #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN 146 147 struct tx_sw_desc { /* SW state per Tx descriptor */ 148 struct sk_buff *skb; 149 struct ulptx_sgl *sgl; 150 }; 151 152 struct rx_sw_desc { /* SW state per Rx descriptor */ 153 struct page *page; 154 dma_addr_t dma_addr; 155 }; 156 157 /* 158 * Rx buffer sizes for "useskbs" Free List buffers (one ingress packet pe skb 159 * buffer). We currently only support two sizes for 1500- and 9000-byte MTUs. 160 * We could easily support more but there doesn't seem to be much need for 161 * that ... 162 */ 163 #define FL_MTU_SMALL 1500 164 #define FL_MTU_LARGE 9000 165 166 static inline unsigned int fl_mtu_bufsize(struct adapter *adapter, 167 unsigned int mtu) 168 { 169 struct sge *s = &adapter->sge; 170 171 return ALIGN(s->pktshift + ETH_HLEN + VLAN_HLEN + mtu, s->fl_align); 172 } 173 174 #define FL_MTU_SMALL_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_SMALL) 175 #define FL_MTU_LARGE_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_LARGE) 176 177 /* 178 * Bits 0..3 of rx_sw_desc.dma_addr have special meaning. The hardware uses 179 * these to specify the buffer size as an index into the SGE Free List Buffer 180 * Size register array. We also use bit 4, when the buffer has been unmapped 181 * for DMA, but this is of course never sent to the hardware and is only used 182 * to prevent double unmappings. All of the above requires that the Free List 183 * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are 184 * 32-byte or or a power of 2 greater in alignment. Since the SGE's minimal 185 * Free List Buffer alignment is 32 bytes, this works out for us ... 186 */ 187 enum { 188 RX_BUF_FLAGS = 0x1f, /* bottom five bits are special */ 189 RX_BUF_SIZE = 0x0f, /* bottom three bits are for buf sizes */ 190 RX_UNMAPPED_BUF = 0x10, /* buffer is not mapped */ 191 192 /* 193 * XXX We shouldn't depend on being able to use these indices. 194 * XXX Especially when some other Master PF has initialized the 195 * XXX adapter or we use the Firmware Configuration File. We 196 * XXX should really search through the Host Buffer Size register 197 * XXX array for the appropriately sized buffer indices. 198 */ 199 RX_SMALL_PG_BUF = 0x0, /* small (PAGE_SIZE) page buffer */ 200 RX_LARGE_PG_BUF = 0x1, /* buffer large (FL_PG_ORDER) page buffer */ 201 202 RX_SMALL_MTU_BUF = 0x2, /* small MTU buffer */ 203 RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */ 204 }; 205 206 static int timer_pkt_quota[] = {1, 1, 2, 3, 4, 5}; 207 #define MIN_NAPI_WORK 1 208 209 static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d) 210 { 211 return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS; 212 } 213 214 static inline bool is_buf_mapped(const struct rx_sw_desc *d) 215 { 216 return !(d->dma_addr & RX_UNMAPPED_BUF); 217 } 218 219 /** 220 * txq_avail - return the number of available slots in a Tx queue 221 * @q: the Tx queue 222 * 223 * Returns the number of descriptors in a Tx queue available to write new 224 * packets. 225 */ 226 static inline unsigned int txq_avail(const struct sge_txq *q) 227 { 228 return q->size - 1 - q->in_use; 229 } 230 231 /** 232 * fl_cap - return the capacity of a free-buffer list 233 * @fl: the FL 234 * 235 * Returns the capacity of a free-buffer list. The capacity is less than 236 * the size because one descriptor needs to be left unpopulated, otherwise 237 * HW will think the FL is empty. 238 */ 239 static inline unsigned int fl_cap(const struct sge_fl *fl) 240 { 241 return fl->size - 8; /* 1 descriptor = 8 buffers */ 242 } 243 244 static inline bool fl_starving(const struct sge_fl *fl) 245 { 246 return fl->avail - fl->pend_cred <= FL_STARVE_THRES; 247 } 248 249 static int map_skb(struct device *dev, const struct sk_buff *skb, 250 dma_addr_t *addr) 251 { 252 const skb_frag_t *fp, *end; 253 const struct skb_shared_info *si; 254 255 *addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); 256 if (dma_mapping_error(dev, *addr)) 257 goto out_err; 258 259 si = skb_shinfo(skb); 260 end = &si->frags[si->nr_frags]; 261 262 for (fp = si->frags; fp < end; fp++) { 263 *++addr = skb_frag_dma_map(dev, fp, 0, skb_frag_size(fp), 264 DMA_TO_DEVICE); 265 if (dma_mapping_error(dev, *addr)) 266 goto unwind; 267 } 268 return 0; 269 270 unwind: 271 while (fp-- > si->frags) 272 dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE); 273 274 dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE); 275 out_err: 276 return -ENOMEM; 277 } 278 279 #ifdef CONFIG_NEED_DMA_MAP_STATE 280 static void unmap_skb(struct device *dev, const struct sk_buff *skb, 281 const dma_addr_t *addr) 282 { 283 const skb_frag_t *fp, *end; 284 const struct skb_shared_info *si; 285 286 dma_unmap_single(dev, *addr++, skb_headlen(skb), DMA_TO_DEVICE); 287 288 si = skb_shinfo(skb); 289 end = &si->frags[si->nr_frags]; 290 for (fp = si->frags; fp < end; fp++) 291 dma_unmap_page(dev, *addr++, skb_frag_size(fp), DMA_TO_DEVICE); 292 } 293 294 /** 295 * deferred_unmap_destructor - unmap a packet when it is freed 296 * @skb: the packet 297 * 298 * This is the packet destructor used for Tx packets that need to remain 299 * mapped until they are freed rather than until their Tx descriptors are 300 * freed. 301 */ 302 static void deferred_unmap_destructor(struct sk_buff *skb) 303 { 304 unmap_skb(skb->dev->dev.parent, skb, (dma_addr_t *)skb->head); 305 } 306 #endif 307 308 static void unmap_sgl(struct device *dev, const struct sk_buff *skb, 309 const struct ulptx_sgl *sgl, const struct sge_txq *q) 310 { 311 const struct ulptx_sge_pair *p; 312 unsigned int nfrags = skb_shinfo(skb)->nr_frags; 313 314 if (likely(skb_headlen(skb))) 315 dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0), 316 DMA_TO_DEVICE); 317 else { 318 dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0), 319 DMA_TO_DEVICE); 320 nfrags--; 321 } 322 323 /* 324 * the complexity below is because of the possibility of a wrap-around 325 * in the middle of an SGL 326 */ 327 for (p = sgl->sge; nfrags >= 2; nfrags -= 2) { 328 if (likely((u8 *)(p + 1) <= (u8 *)q->stat)) { 329 unmap: dma_unmap_page(dev, be64_to_cpu(p->addr[0]), 330 ntohl(p->len[0]), DMA_TO_DEVICE); 331 dma_unmap_page(dev, be64_to_cpu(p->addr[1]), 332 ntohl(p->len[1]), DMA_TO_DEVICE); 333 p++; 334 } else if ((u8 *)p == (u8 *)q->stat) { 335 p = (const struct ulptx_sge_pair *)q->desc; 336 goto unmap; 337 } else if ((u8 *)p + 8 == (u8 *)q->stat) { 338 const __be64 *addr = (const __be64 *)q->desc; 339 340 dma_unmap_page(dev, be64_to_cpu(addr[0]), 341 ntohl(p->len[0]), DMA_TO_DEVICE); 342 dma_unmap_page(dev, be64_to_cpu(addr[1]), 343 ntohl(p->len[1]), DMA_TO_DEVICE); 344 p = (const struct ulptx_sge_pair *)&addr[2]; 345 } else { 346 const __be64 *addr = (const __be64 *)q->desc; 347 348 dma_unmap_page(dev, be64_to_cpu(p->addr[0]), 349 ntohl(p->len[0]), DMA_TO_DEVICE); 350 dma_unmap_page(dev, be64_to_cpu(addr[0]), 351 ntohl(p->len[1]), DMA_TO_DEVICE); 352 p = (const struct ulptx_sge_pair *)&addr[1]; 353 } 354 } 355 if (nfrags) { 356 __be64 addr; 357 358 if ((u8 *)p == (u8 *)q->stat) 359 p = (const struct ulptx_sge_pair *)q->desc; 360 addr = (u8 *)p + 16 <= (u8 *)q->stat ? p->addr[0] : 361 *(const __be64 *)q->desc; 362 dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]), 363 DMA_TO_DEVICE); 364 } 365 } 366 367 /** 368 * free_tx_desc - reclaims Tx descriptors and their buffers 369 * @adapter: the adapter 370 * @q: the Tx queue to reclaim descriptors from 371 * @n: the number of descriptors to reclaim 372 * @unmap: whether the buffers should be unmapped for DMA 373 * 374 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 375 * Tx buffers. Called with the Tx queue lock held. 376 */ 377 static void free_tx_desc(struct adapter *adap, struct sge_txq *q, 378 unsigned int n, bool unmap) 379 { 380 struct tx_sw_desc *d; 381 unsigned int cidx = q->cidx; 382 struct device *dev = adap->pdev_dev; 383 384 d = &q->sdesc[cidx]; 385 while (n--) { 386 if (d->skb) { /* an SGL is present */ 387 if (unmap) 388 unmap_sgl(dev, d->skb, d->sgl, q); 389 dev_consume_skb_any(d->skb); 390 d->skb = NULL; 391 } 392 ++d; 393 if (++cidx == q->size) { 394 cidx = 0; 395 d = q->sdesc; 396 } 397 } 398 q->cidx = cidx; 399 } 400 401 /* 402 * Return the number of reclaimable descriptors in a Tx queue. 403 */ 404 static inline int reclaimable(const struct sge_txq *q) 405 { 406 int hw_cidx = ntohs(q->stat->cidx); 407 hw_cidx -= q->cidx; 408 return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx; 409 } 410 411 /** 412 * reclaim_completed_tx - reclaims completed Tx descriptors 413 * @adap: the adapter 414 * @q: the Tx queue to reclaim completed descriptors from 415 * @unmap: whether the buffers should be unmapped for DMA 416 * 417 * Reclaims Tx descriptors that the SGE has indicated it has processed, 418 * and frees the associated buffers if possible. Called with the Tx 419 * queue locked. 420 */ 421 static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, 422 bool unmap) 423 { 424 int avail = reclaimable(q); 425 426 if (avail) { 427 /* 428 * Limit the amount of clean up work we do at a time to keep 429 * the Tx lock hold time O(1). 430 */ 431 if (avail > MAX_TX_RECLAIM) 432 avail = MAX_TX_RECLAIM; 433 434 free_tx_desc(adap, q, avail, unmap); 435 q->in_use -= avail; 436 } 437 } 438 439 static inline int get_buf_size(struct adapter *adapter, 440 const struct rx_sw_desc *d) 441 { 442 struct sge *s = &adapter->sge; 443 unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE; 444 int buf_size; 445 446 switch (rx_buf_size_idx) { 447 case RX_SMALL_PG_BUF: 448 buf_size = PAGE_SIZE; 449 break; 450 451 case RX_LARGE_PG_BUF: 452 buf_size = PAGE_SIZE << s->fl_pg_order; 453 break; 454 455 case RX_SMALL_MTU_BUF: 456 buf_size = FL_MTU_SMALL_BUFSIZE(adapter); 457 break; 458 459 case RX_LARGE_MTU_BUF: 460 buf_size = FL_MTU_LARGE_BUFSIZE(adapter); 461 break; 462 463 default: 464 BUG_ON(1); 465 } 466 467 return buf_size; 468 } 469 470 /** 471 * free_rx_bufs - free the Rx buffers on an SGE free list 472 * @adap: the adapter 473 * @q: the SGE free list to free buffers from 474 * @n: how many buffers to free 475 * 476 * Release the next @n buffers on an SGE free-buffer Rx queue. The 477 * buffers must be made inaccessible to HW before calling this function. 478 */ 479 static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n) 480 { 481 while (n--) { 482 struct rx_sw_desc *d = &q->sdesc[q->cidx]; 483 484 if (is_buf_mapped(d)) 485 dma_unmap_page(adap->pdev_dev, get_buf_addr(d), 486 get_buf_size(adap, d), 487 PCI_DMA_FROMDEVICE); 488 put_page(d->page); 489 d->page = NULL; 490 if (++q->cidx == q->size) 491 q->cidx = 0; 492 q->avail--; 493 } 494 } 495 496 /** 497 * unmap_rx_buf - unmap the current Rx buffer on an SGE free list 498 * @adap: the adapter 499 * @q: the SGE free list 500 * 501 * Unmap the current buffer on an SGE free-buffer Rx queue. The 502 * buffer must be made inaccessible to HW before calling this function. 503 * 504 * This is similar to @free_rx_bufs above but does not free the buffer. 505 * Do note that the FL still loses any further access to the buffer. 506 */ 507 static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q) 508 { 509 struct rx_sw_desc *d = &q->sdesc[q->cidx]; 510 511 if (is_buf_mapped(d)) 512 dma_unmap_page(adap->pdev_dev, get_buf_addr(d), 513 get_buf_size(adap, d), PCI_DMA_FROMDEVICE); 514 d->page = NULL; 515 if (++q->cidx == q->size) 516 q->cidx = 0; 517 q->avail--; 518 } 519 520 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) 521 { 522 u32 val; 523 if (q->pend_cred >= 8) { 524 val = PIDX(q->pend_cred / 8); 525 if (!is_t4(adap->params.chip)) 526 val |= DBTYPE(1); 527 val |= DBPRIO(1); 528 wmb(); 529 530 /* If we don't have access to the new User Doorbell (T5+), use 531 * the old doorbell mechanism; otherwise use the new BAR2 532 * mechanism. 533 */ 534 if (unlikely(q->bar2_addr == NULL)) { 535 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), 536 val | QID(q->cntxt_id)); 537 } else { 538 writel(val | QID(q->bar2_qid), 539 q->bar2_addr + SGE_UDB_KDOORBELL); 540 541 /* This Write memory Barrier will force the write to 542 * the User Doorbell area to be flushed. 543 */ 544 wmb(); 545 } 546 q->pend_cred &= 7; 547 } 548 } 549 550 static inline void set_rx_sw_desc(struct rx_sw_desc *sd, struct page *pg, 551 dma_addr_t mapping) 552 { 553 sd->page = pg; 554 sd->dma_addr = mapping; /* includes size low bits */ 555 } 556 557 /** 558 * refill_fl - refill an SGE Rx buffer ring 559 * @adap: the adapter 560 * @q: the ring to refill 561 * @n: the number of new buffers to allocate 562 * @gfp: the gfp flags for the allocations 563 * 564 * (Re)populate an SGE free-buffer queue with up to @n new packet buffers, 565 * allocated with the supplied gfp flags. The caller must assure that 566 * @n does not exceed the queue's capacity. If afterwards the queue is 567 * found critically low mark it as starving in the bitmap of starving FLs. 568 * 569 * Returns the number of buffers allocated. 570 */ 571 static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, 572 gfp_t gfp) 573 { 574 struct sge *s = &adap->sge; 575 struct page *pg; 576 dma_addr_t mapping; 577 unsigned int cred = q->avail; 578 __be64 *d = &q->desc[q->pidx]; 579 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 580 581 gfp |= __GFP_NOWARN; 582 583 if (s->fl_pg_order == 0) 584 goto alloc_small_pages; 585 586 /* 587 * Prefer large buffers 588 */ 589 while (n) { 590 pg = __dev_alloc_pages(gfp, s->fl_pg_order); 591 if (unlikely(!pg)) { 592 q->large_alloc_failed++; 593 break; /* fall back to single pages */ 594 } 595 596 mapping = dma_map_page(adap->pdev_dev, pg, 0, 597 PAGE_SIZE << s->fl_pg_order, 598 PCI_DMA_FROMDEVICE); 599 if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { 600 __free_pages(pg, s->fl_pg_order); 601 goto out; /* do not try small pages for this error */ 602 } 603 mapping |= RX_LARGE_PG_BUF; 604 *d++ = cpu_to_be64(mapping); 605 606 set_rx_sw_desc(sd, pg, mapping); 607 sd++; 608 609 q->avail++; 610 if (++q->pidx == q->size) { 611 q->pidx = 0; 612 sd = q->sdesc; 613 d = q->desc; 614 } 615 n--; 616 } 617 618 alloc_small_pages: 619 while (n--) { 620 pg = __dev_alloc_page(gfp); 621 if (unlikely(!pg)) { 622 q->alloc_failed++; 623 break; 624 } 625 626 mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE, 627 PCI_DMA_FROMDEVICE); 628 if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { 629 put_page(pg); 630 goto out; 631 } 632 *d++ = cpu_to_be64(mapping); 633 634 set_rx_sw_desc(sd, pg, mapping); 635 sd++; 636 637 q->avail++; 638 if (++q->pidx == q->size) { 639 q->pidx = 0; 640 sd = q->sdesc; 641 d = q->desc; 642 } 643 } 644 645 out: cred = q->avail - cred; 646 q->pend_cred += cred; 647 ring_fl_db(adap, q); 648 649 if (unlikely(fl_starving(q))) { 650 smp_wmb(); 651 set_bit(q->cntxt_id - adap->sge.egr_start, 652 adap->sge.starving_fl); 653 } 654 655 return cred; 656 } 657 658 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl) 659 { 660 refill_fl(adap, fl, min(MAX_RX_REFILL, fl_cap(fl) - fl->avail), 661 GFP_ATOMIC); 662 } 663 664 /** 665 * alloc_ring - allocate resources for an SGE descriptor ring 666 * @dev: the PCI device's core device 667 * @nelem: the number of descriptors 668 * @elem_size: the size of each descriptor 669 * @sw_size: the size of the SW state associated with each ring element 670 * @phys: the physical address of the allocated ring 671 * @metadata: address of the array holding the SW state for the ring 672 * @stat_size: extra space in HW ring for status information 673 * @node: preferred node for memory allocations 674 * 675 * Allocates resources for an SGE descriptor ring, such as Tx queues, 676 * free buffer lists, or response queues. Each SGE ring requires 677 * space for its HW descriptors plus, optionally, space for the SW state 678 * associated with each HW entry (the metadata). The function returns 679 * three values: the virtual address for the HW ring (the return value 680 * of the function), the bus address of the HW ring, and the address 681 * of the SW ring. 682 */ 683 static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size, 684 size_t sw_size, dma_addr_t *phys, void *metadata, 685 size_t stat_size, int node) 686 { 687 size_t len = nelem * elem_size + stat_size; 688 void *s = NULL; 689 void *p = dma_alloc_coherent(dev, len, phys, GFP_KERNEL); 690 691 if (!p) 692 return NULL; 693 if (sw_size) { 694 s = kzalloc_node(nelem * sw_size, GFP_KERNEL, node); 695 696 if (!s) { 697 dma_free_coherent(dev, len, p, *phys); 698 return NULL; 699 } 700 } 701 if (metadata) 702 *(void **)metadata = s; 703 memset(p, 0, len); 704 return p; 705 } 706 707 /** 708 * sgl_len - calculates the size of an SGL of the given capacity 709 * @n: the number of SGL entries 710 * 711 * Calculates the number of flits needed for a scatter/gather list that 712 * can hold the given number of entries. 713 */ 714 static inline unsigned int sgl_len(unsigned int n) 715 { 716 n--; 717 return (3 * n) / 2 + (n & 1) + 2; 718 } 719 720 /** 721 * flits_to_desc - returns the num of Tx descriptors for the given flits 722 * @n: the number of flits 723 * 724 * Returns the number of Tx descriptors needed for the supplied number 725 * of flits. 726 */ 727 static inline unsigned int flits_to_desc(unsigned int n) 728 { 729 BUG_ON(n > SGE_MAX_WR_LEN / 8); 730 return DIV_ROUND_UP(n, 8); 731 } 732 733 /** 734 * is_eth_imm - can an Ethernet packet be sent as immediate data? 735 * @skb: the packet 736 * 737 * Returns whether an Ethernet packet is small enough to fit as 738 * immediate data. Return value corresponds to headroom required. 739 */ 740 static inline int is_eth_imm(const struct sk_buff *skb) 741 { 742 int hdrlen = skb_shinfo(skb)->gso_size ? 743 sizeof(struct cpl_tx_pkt_lso_core) : 0; 744 745 hdrlen += sizeof(struct cpl_tx_pkt); 746 if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen) 747 return hdrlen; 748 return 0; 749 } 750 751 /** 752 * calc_tx_flits - calculate the number of flits for a packet Tx WR 753 * @skb: the packet 754 * 755 * Returns the number of flits needed for a Tx WR for the given Ethernet 756 * packet, including the needed WR and CPL headers. 757 */ 758 static inline unsigned int calc_tx_flits(const struct sk_buff *skb) 759 { 760 unsigned int flits; 761 int hdrlen = is_eth_imm(skb); 762 763 if (hdrlen) 764 return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64)); 765 766 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4; 767 if (skb_shinfo(skb)->gso_size) 768 flits += 2; 769 return flits; 770 } 771 772 /** 773 * calc_tx_descs - calculate the number of Tx descriptors for a packet 774 * @skb: the packet 775 * 776 * Returns the number of Tx descriptors needed for the given Ethernet 777 * packet, including the needed WR and CPL headers. 778 */ 779 static inline unsigned int calc_tx_descs(const struct sk_buff *skb) 780 { 781 return flits_to_desc(calc_tx_flits(skb)); 782 } 783 784 /** 785 * write_sgl - populate a scatter/gather list for a packet 786 * @skb: the packet 787 * @q: the Tx queue we are writing into 788 * @sgl: starting location for writing the SGL 789 * @end: points right after the end of the SGL 790 * @start: start offset into skb main-body data to include in the SGL 791 * @addr: the list of bus addresses for the SGL elements 792 * 793 * Generates a gather list for the buffers that make up a packet. 794 * The caller must provide adequate space for the SGL that will be written. 795 * The SGL includes all of the packet's page fragments and the data in its 796 * main body except for the first @start bytes. @sgl must be 16-byte 797 * aligned and within a Tx descriptor with available space. @end points 798 * right after the end of the SGL but does not account for any potential 799 * wrap around, i.e., @end > @sgl. 800 */ 801 static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, 802 struct ulptx_sgl *sgl, u64 *end, unsigned int start, 803 const dma_addr_t *addr) 804 { 805 unsigned int i, len; 806 struct ulptx_sge_pair *to; 807 const struct skb_shared_info *si = skb_shinfo(skb); 808 unsigned int nfrags = si->nr_frags; 809 struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1]; 810 811 len = skb_headlen(skb) - start; 812 if (likely(len)) { 813 sgl->len0 = htonl(len); 814 sgl->addr0 = cpu_to_be64(addr[0] + start); 815 nfrags++; 816 } else { 817 sgl->len0 = htonl(skb_frag_size(&si->frags[0])); 818 sgl->addr0 = cpu_to_be64(addr[1]); 819 } 820 821 sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) | ULPTX_NSGE(nfrags)); 822 if (likely(--nfrags == 0)) 823 return; 824 /* 825 * Most of the complexity below deals with the possibility we hit the 826 * end of the queue in the middle of writing the SGL. For this case 827 * only we create the SGL in a temporary buffer and then copy it. 828 */ 829 to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge; 830 831 for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) { 832 to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i])); 833 to->len[1] = cpu_to_be32(skb_frag_size(&si->frags[++i])); 834 to->addr[0] = cpu_to_be64(addr[i]); 835 to->addr[1] = cpu_to_be64(addr[++i]); 836 } 837 if (nfrags) { 838 to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i])); 839 to->len[1] = cpu_to_be32(0); 840 to->addr[0] = cpu_to_be64(addr[i + 1]); 841 } 842 if (unlikely((u8 *)end > (u8 *)q->stat)) { 843 unsigned int part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1; 844 845 if (likely(part0)) 846 memcpy(sgl->sge, buf, part0); 847 part1 = (u8 *)end - (u8 *)q->stat; 848 memcpy(q->desc, (u8 *)buf + part0, part1); 849 end = (void *)q->desc + part1; 850 } 851 if ((uintptr_t)end & 8) /* 0-pad to multiple of 16 */ 852 *end = 0; 853 } 854 855 /* This function copies 64 byte coalesced work request to 856 * memory mapped BAR2 space. For coalesced WR SGE fetches 857 * data from the FIFO instead of from Host. 858 */ 859 static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) 860 { 861 int count = 8; 862 863 while (count) { 864 writeq(*src, dst); 865 src++; 866 dst++; 867 count--; 868 } 869 } 870 871 /** 872 * ring_tx_db - check and potentially ring a Tx queue's doorbell 873 * @adap: the adapter 874 * @q: the Tx queue 875 * @n: number of new descriptors to give to HW 876 * 877 * Ring the doorbel for a Tx queue. 878 */ 879 static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) 880 { 881 wmb(); /* write descriptors before telling HW */ 882 883 /* If we don't have access to the new User Doorbell (T5+), use the old 884 * doorbell mechanism; otherwise use the new BAR2 mechanism. 885 */ 886 if (unlikely(q->bar2_addr == NULL)) { 887 u32 val = PIDX(n); 888 unsigned long flags; 889 890 /* For T4 we need to participate in the Doorbell Recovery 891 * mechanism. 892 */ 893 spin_lock_irqsave(&q->db_lock, flags); 894 if (!q->db_disabled) 895 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), 896 QID(q->cntxt_id) | val); 897 else 898 q->db_pidx_inc += n; 899 q->db_pidx = q->pidx; 900 spin_unlock_irqrestore(&q->db_lock, flags); 901 } else { 902 u32 val = PIDX_T5(n); 903 904 /* T4 and later chips share the same PIDX field offset within 905 * the doorbell, but T5 and later shrank the field in order to 906 * gain a bit for Doorbell Priority. The field was absurdly 907 * large in the first place (14 bits) so we just use the T5 908 * and later limits and warn if a Queue ID is too large. 909 */ 910 WARN_ON(val & DBPRIO(1)); 911 912 /* If we're only writing a single TX Descriptor and we can use 913 * Inferred QID registers, we can use the Write Combining 914 * Gather Buffer; otherwise we use the simple doorbell. 915 */ 916 if (n == 1 && q->bar2_qid == 0) { 917 int index = (q->pidx 918 ? (q->pidx - 1) 919 : (q->size - 1)); 920 u64 *wr = (u64 *)&q->desc[index]; 921 922 cxgb_pio_copy((u64 __iomem *) 923 (q->bar2_addr + SGE_UDB_WCDOORBELL), 924 wr); 925 } else { 926 writel(val | QID(q->bar2_qid), 927 q->bar2_addr + SGE_UDB_KDOORBELL); 928 } 929 930 /* This Write Memory Barrier will force the write to the User 931 * Doorbell area to be flushed. This is needed to prevent 932 * writes on different CPUs for the same queue from hitting 933 * the adapter out of order. This is required when some Work 934 * Requests take the Write Combine Gather Buffer path (user 935 * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some 936 * take the traditional path where we simply increment the 937 * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the 938 * hardware DMA read the actual Work Request. 939 */ 940 wmb(); 941 } 942 } 943 944 /** 945 * inline_tx_skb - inline a packet's data into Tx descriptors 946 * @skb: the packet 947 * @q: the Tx queue where the packet will be inlined 948 * @pos: starting position in the Tx queue where to inline the packet 949 * 950 * Inline a packet's contents directly into Tx descriptors, starting at 951 * the given position within the Tx DMA ring. 952 * Most of the complexity of this operation is dealing with wrap arounds 953 * in the middle of the packet we want to inline. 954 */ 955 static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, 956 void *pos) 957 { 958 u64 *p; 959 int left = (void *)q->stat - pos; 960 961 if (likely(skb->len <= left)) { 962 if (likely(!skb->data_len)) 963 skb_copy_from_linear_data(skb, pos, skb->len); 964 else 965 skb_copy_bits(skb, 0, pos, skb->len); 966 pos += skb->len; 967 } else { 968 skb_copy_bits(skb, 0, pos, left); 969 skb_copy_bits(skb, left, q->desc, skb->len - left); 970 pos = (void *)q->desc + (skb->len - left); 971 } 972 973 /* 0-pad to multiple of 16 */ 974 p = PTR_ALIGN(pos, 8); 975 if ((uintptr_t)p & 8) 976 *p = 0; 977 } 978 979 /* 980 * Figure out what HW csum a packet wants and return the appropriate control 981 * bits. 982 */ 983 static u64 hwcsum(const struct sk_buff *skb) 984 { 985 int csum_type; 986 const struct iphdr *iph = ip_hdr(skb); 987 988 if (iph->version == 4) { 989 if (iph->protocol == IPPROTO_TCP) 990 csum_type = TX_CSUM_TCPIP; 991 else if (iph->protocol == IPPROTO_UDP) 992 csum_type = TX_CSUM_UDPIP; 993 else { 994 nocsum: /* 995 * unknown protocol, disable HW csum 996 * and hope a bad packet is detected 997 */ 998 return TXPKT_L4CSUM_DIS; 999 } 1000 } else { 1001 /* 1002 * this doesn't work with extension headers 1003 */ 1004 const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph; 1005 1006 if (ip6h->nexthdr == IPPROTO_TCP) 1007 csum_type = TX_CSUM_TCPIP6; 1008 else if (ip6h->nexthdr == IPPROTO_UDP) 1009 csum_type = TX_CSUM_UDPIP6; 1010 else 1011 goto nocsum; 1012 } 1013 1014 if (likely(csum_type >= TX_CSUM_TCPIP)) 1015 return TXPKT_CSUM_TYPE(csum_type) | 1016 TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | 1017 TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); 1018 else { 1019 int start = skb_transport_offset(skb); 1020 1021 return TXPKT_CSUM_TYPE(csum_type) | TXPKT_CSUM_START(start) | 1022 TXPKT_CSUM_LOC(start + skb->csum_offset); 1023 } 1024 } 1025 1026 static void eth_txq_stop(struct sge_eth_txq *q) 1027 { 1028 netif_tx_stop_queue(q->txq); 1029 q->q.stops++; 1030 } 1031 1032 static inline void txq_advance(struct sge_txq *q, unsigned int n) 1033 { 1034 q->in_use += n; 1035 q->pidx += n; 1036 if (q->pidx >= q->size) 1037 q->pidx -= q->size; 1038 } 1039 1040 /** 1041 * t4_eth_xmit - add a packet to an Ethernet Tx queue 1042 * @skb: the packet 1043 * @dev: the egress net device 1044 * 1045 * Add a packet to an SGE Ethernet Tx queue. Runs with softirqs disabled. 1046 */ 1047 netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) 1048 { 1049 int len; 1050 u32 wr_mid; 1051 u64 cntrl, *end; 1052 int qidx, credits; 1053 unsigned int flits, ndesc; 1054 struct adapter *adap; 1055 struct sge_eth_txq *q; 1056 const struct port_info *pi; 1057 struct fw_eth_tx_pkt_wr *wr; 1058 struct cpl_tx_pkt_core *cpl; 1059 const struct skb_shared_info *ssi; 1060 dma_addr_t addr[MAX_SKB_FRAGS + 1]; 1061 bool immediate = false; 1062 1063 /* 1064 * The chip min packet length is 10 octets but play safe and reject 1065 * anything shorter than an Ethernet header. 1066 */ 1067 if (unlikely(skb->len < ETH_HLEN)) { 1068 out_free: dev_kfree_skb_any(skb); 1069 return NETDEV_TX_OK; 1070 } 1071 1072 pi = netdev_priv(dev); 1073 adap = pi->adapter; 1074 qidx = skb_get_queue_mapping(skb); 1075 q = &adap->sge.ethtxq[qidx + pi->first_qset]; 1076 1077 reclaim_completed_tx(adap, &q->q, true); 1078 1079 flits = calc_tx_flits(skb); 1080 ndesc = flits_to_desc(flits); 1081 credits = txq_avail(&q->q) - ndesc; 1082 1083 if (unlikely(credits < 0)) { 1084 eth_txq_stop(q); 1085 dev_err(adap->pdev_dev, 1086 "%s: Tx ring %u full while queue awake!\n", 1087 dev->name, qidx); 1088 return NETDEV_TX_BUSY; 1089 } 1090 1091 if (is_eth_imm(skb)) 1092 immediate = true; 1093 1094 if (!immediate && 1095 unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) { 1096 q->mapping_err++; 1097 goto out_free; 1098 } 1099 1100 wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); 1101 if (unlikely(credits < ETHTXQ_STOP_THRES)) { 1102 eth_txq_stop(q); 1103 wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; 1104 } 1105 1106 wr = (void *)&q->q.desc[q->q.pidx]; 1107 wr->equiq_to_len16 = htonl(wr_mid); 1108 wr->r3 = cpu_to_be64(0); 1109 end = (u64 *)wr + flits; 1110 1111 len = immediate ? skb->len : 0; 1112 ssi = skb_shinfo(skb); 1113 if (ssi->gso_size) { 1114 struct cpl_tx_pkt_lso *lso = (void *)wr; 1115 bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0; 1116 int l3hdr_len = skb_network_header_len(skb); 1117 int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; 1118 1119 len += sizeof(*lso); 1120 wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | 1121 FW_WR_IMMDLEN_V(len)); 1122 lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) | 1123 LSO_FIRST_SLICE | LSO_LAST_SLICE | 1124 LSO_IPV6(v6) | 1125 LSO_ETHHDR_LEN(eth_xtra_len / 4) | 1126 LSO_IPHDR_LEN(l3hdr_len / 4) | 1127 LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); 1128 lso->c.ipid_ofst = htons(0); 1129 lso->c.mss = htons(ssi->gso_size); 1130 lso->c.seqno_offset = htonl(0); 1131 if (is_t4(adap->params.chip)) 1132 lso->c.len = htonl(skb->len); 1133 else 1134 lso->c.len = htonl(LSO_T5_XFER_SIZE(skb->len)); 1135 cpl = (void *)(lso + 1); 1136 cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | 1137 TXPKT_IPHDR_LEN(l3hdr_len) | 1138 TXPKT_ETHHDR_LEN(eth_xtra_len); 1139 q->tso++; 1140 q->tx_cso += ssi->gso_segs; 1141 } else { 1142 len += sizeof(*cpl); 1143 wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | 1144 FW_WR_IMMDLEN_V(len)); 1145 cpl = (void *)(wr + 1); 1146 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1147 cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; 1148 q->tx_cso++; 1149 } else 1150 cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; 1151 } 1152 1153 if (vlan_tx_tag_present(skb)) { 1154 q->vlan_ins++; 1155 cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb)); 1156 } 1157 1158 cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) | 1159 TXPKT_INTF(pi->tx_chan) | TXPKT_PF(adap->fn)); 1160 cpl->pack = htons(0); 1161 cpl->len = htons(skb->len); 1162 cpl->ctrl1 = cpu_to_be64(cntrl); 1163 1164 if (immediate) { 1165 inline_tx_skb(skb, &q->q, cpl + 1); 1166 dev_consume_skb_any(skb); 1167 } else { 1168 int last_desc; 1169 1170 write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0, 1171 addr); 1172 skb_orphan(skb); 1173 1174 last_desc = q->q.pidx + ndesc - 1; 1175 if (last_desc >= q->q.size) 1176 last_desc -= q->q.size; 1177 q->q.sdesc[last_desc].skb = skb; 1178 q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)(cpl + 1); 1179 } 1180 1181 txq_advance(&q->q, ndesc); 1182 1183 ring_tx_db(adap, &q->q, ndesc); 1184 return NETDEV_TX_OK; 1185 } 1186 1187 /** 1188 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1189 * @q: the SGE control Tx queue 1190 * 1191 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1192 * that send only immediate data (presently just the control queues) and 1193 * thus do not have any sk_buffs to release. 1194 */ 1195 static inline void reclaim_completed_tx_imm(struct sge_txq *q) 1196 { 1197 int hw_cidx = ntohs(q->stat->cidx); 1198 int reclaim = hw_cidx - q->cidx; 1199 1200 if (reclaim < 0) 1201 reclaim += q->size; 1202 1203 q->in_use -= reclaim; 1204 q->cidx = hw_cidx; 1205 } 1206 1207 /** 1208 * is_imm - check whether a packet can be sent as immediate data 1209 * @skb: the packet 1210 * 1211 * Returns true if a packet can be sent as a WR with immediate data. 1212 */ 1213 static inline int is_imm(const struct sk_buff *skb) 1214 { 1215 return skb->len <= MAX_CTRL_WR_LEN; 1216 } 1217 1218 /** 1219 * ctrlq_check_stop - check if a control queue is full and should stop 1220 * @q: the queue 1221 * @wr: most recent WR written to the queue 1222 * 1223 * Check if a control queue has become full and should be stopped. 1224 * We clean up control queue descriptors very lazily, only when we are out. 1225 * If the queue is still full after reclaiming any completed descriptors 1226 * we suspend it and have the last WR wake it up. 1227 */ 1228 static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr) 1229 { 1230 reclaim_completed_tx_imm(&q->q); 1231 if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { 1232 wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F); 1233 q->q.stops++; 1234 q->full = 1; 1235 } 1236 } 1237 1238 /** 1239 * ctrl_xmit - send a packet through an SGE control Tx queue 1240 * @q: the control queue 1241 * @skb: the packet 1242 * 1243 * Send a packet through an SGE control Tx queue. Packets sent through 1244 * a control queue must fit entirely as immediate data. 1245 */ 1246 static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb) 1247 { 1248 unsigned int ndesc; 1249 struct fw_wr_hdr *wr; 1250 1251 if (unlikely(!is_imm(skb))) { 1252 WARN_ON(1); 1253 dev_kfree_skb(skb); 1254 return NET_XMIT_DROP; 1255 } 1256 1257 ndesc = DIV_ROUND_UP(skb->len, sizeof(struct tx_desc)); 1258 spin_lock(&q->sendq.lock); 1259 1260 if (unlikely(q->full)) { 1261 skb->priority = ndesc; /* save for restart */ 1262 __skb_queue_tail(&q->sendq, skb); 1263 spin_unlock(&q->sendq.lock); 1264 return NET_XMIT_CN; 1265 } 1266 1267 wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx]; 1268 inline_tx_skb(skb, &q->q, wr); 1269 1270 txq_advance(&q->q, ndesc); 1271 if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) 1272 ctrlq_check_stop(q, wr); 1273 1274 ring_tx_db(q->adap, &q->q, ndesc); 1275 spin_unlock(&q->sendq.lock); 1276 1277 kfree_skb(skb); 1278 return NET_XMIT_SUCCESS; 1279 } 1280 1281 /** 1282 * restart_ctrlq - restart a suspended control queue 1283 * @data: the control queue to restart 1284 * 1285 * Resumes transmission on a suspended Tx control queue. 1286 */ 1287 static void restart_ctrlq(unsigned long data) 1288 { 1289 struct sk_buff *skb; 1290 unsigned int written = 0; 1291 struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data; 1292 1293 spin_lock(&q->sendq.lock); 1294 reclaim_completed_tx_imm(&q->q); 1295 BUG_ON(txq_avail(&q->q) < TXQ_STOP_THRES); /* q should be empty */ 1296 1297 while ((skb = __skb_dequeue(&q->sendq)) != NULL) { 1298 struct fw_wr_hdr *wr; 1299 unsigned int ndesc = skb->priority; /* previously saved */ 1300 1301 /* 1302 * Write descriptors and free skbs outside the lock to limit 1303 * wait times. q->full is still set so new skbs will be queued. 1304 */ 1305 spin_unlock(&q->sendq.lock); 1306 1307 wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx]; 1308 inline_tx_skb(skb, &q->q, wr); 1309 kfree_skb(skb); 1310 1311 written += ndesc; 1312 txq_advance(&q->q, ndesc); 1313 if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { 1314 unsigned long old = q->q.stops; 1315 1316 ctrlq_check_stop(q, wr); 1317 if (q->q.stops != old) { /* suspended anew */ 1318 spin_lock(&q->sendq.lock); 1319 goto ringdb; 1320 } 1321 } 1322 if (written > 16) { 1323 ring_tx_db(q->adap, &q->q, written); 1324 written = 0; 1325 } 1326 spin_lock(&q->sendq.lock); 1327 } 1328 q->full = 0; 1329 ringdb: if (written) 1330 ring_tx_db(q->adap, &q->q, written); 1331 spin_unlock(&q->sendq.lock); 1332 } 1333 1334 /** 1335 * t4_mgmt_tx - send a management message 1336 * @adap: the adapter 1337 * @skb: the packet containing the management message 1338 * 1339 * Send a management message through control queue 0. 1340 */ 1341 int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb) 1342 { 1343 int ret; 1344 1345 local_bh_disable(); 1346 ret = ctrl_xmit(&adap->sge.ctrlq[0], skb); 1347 local_bh_enable(); 1348 return ret; 1349 } 1350 1351 /** 1352 * is_ofld_imm - check whether a packet can be sent as immediate data 1353 * @skb: the packet 1354 * 1355 * Returns true if a packet can be sent as an offload WR with immediate 1356 * data. We currently use the same limit as for Ethernet packets. 1357 */ 1358 static inline int is_ofld_imm(const struct sk_buff *skb) 1359 { 1360 return skb->len <= MAX_IMM_TX_PKT_LEN; 1361 } 1362 1363 /** 1364 * calc_tx_flits_ofld - calculate # of flits for an offload packet 1365 * @skb: the packet 1366 * 1367 * Returns the number of flits needed for the given offload packet. 1368 * These packets are already fully constructed and no additional headers 1369 * will be added. 1370 */ 1371 static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb) 1372 { 1373 unsigned int flits, cnt; 1374 1375 if (is_ofld_imm(skb)) 1376 return DIV_ROUND_UP(skb->len, 8); 1377 1378 flits = skb_transport_offset(skb) / 8U; /* headers */ 1379 cnt = skb_shinfo(skb)->nr_frags; 1380 if (skb_tail_pointer(skb) != skb_transport_header(skb)) 1381 cnt++; 1382 return flits + sgl_len(cnt); 1383 } 1384 1385 /** 1386 * txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion 1387 * @adap: the adapter 1388 * @q: the queue to stop 1389 * 1390 * Mark a Tx queue stopped due to I/O MMU exhaustion and resulting 1391 * inability to map packets. A periodic timer attempts to restart 1392 * queues so marked. 1393 */ 1394 static void txq_stop_maperr(struct sge_ofld_txq *q) 1395 { 1396 q->mapping_err++; 1397 q->q.stops++; 1398 set_bit(q->q.cntxt_id - q->adap->sge.egr_start, 1399 q->adap->sge.txq_maperr); 1400 } 1401 1402 /** 1403 * ofldtxq_stop - stop an offload Tx queue that has become full 1404 * @q: the queue to stop 1405 * @skb: the packet causing the queue to become full 1406 * 1407 * Stops an offload Tx queue that has become full and modifies the packet 1408 * being written to request a wakeup. 1409 */ 1410 static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb) 1411 { 1412 struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data; 1413 1414 wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F); 1415 q->q.stops++; 1416 q->full = 1; 1417 } 1418 1419 /** 1420 * service_ofldq - restart a suspended offload queue 1421 * @q: the offload queue 1422 * 1423 * Services an offload Tx queue by moving packets from its packet queue 1424 * to the HW Tx ring. The function starts and ends with the queue locked. 1425 */ 1426 static void service_ofldq(struct sge_ofld_txq *q) 1427 { 1428 u64 *pos; 1429 int credits; 1430 struct sk_buff *skb; 1431 unsigned int written = 0; 1432 unsigned int flits, ndesc; 1433 1434 while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) { 1435 /* 1436 * We drop the lock but leave skb on sendq, thus retaining 1437 * exclusive access to the state of the queue. 1438 */ 1439 spin_unlock(&q->sendq.lock); 1440 1441 reclaim_completed_tx(q->adap, &q->q, false); 1442 1443 flits = skb->priority; /* previously saved */ 1444 ndesc = flits_to_desc(flits); 1445 credits = txq_avail(&q->q) - ndesc; 1446 BUG_ON(credits < 0); 1447 if (unlikely(credits < TXQ_STOP_THRES)) 1448 ofldtxq_stop(q, skb); 1449 1450 pos = (u64 *)&q->q.desc[q->q.pidx]; 1451 if (is_ofld_imm(skb)) 1452 inline_tx_skb(skb, &q->q, pos); 1453 else if (map_skb(q->adap->pdev_dev, skb, 1454 (dma_addr_t *)skb->head)) { 1455 txq_stop_maperr(q); 1456 spin_lock(&q->sendq.lock); 1457 break; 1458 } else { 1459 int last_desc, hdr_len = skb_transport_offset(skb); 1460 1461 memcpy(pos, skb->data, hdr_len); 1462 write_sgl(skb, &q->q, (void *)pos + hdr_len, 1463 pos + flits, hdr_len, 1464 (dma_addr_t *)skb->head); 1465 #ifdef CONFIG_NEED_DMA_MAP_STATE 1466 skb->dev = q->adap->port[0]; 1467 skb->destructor = deferred_unmap_destructor; 1468 #endif 1469 last_desc = q->q.pidx + ndesc - 1; 1470 if (last_desc >= q->q.size) 1471 last_desc -= q->q.size; 1472 q->q.sdesc[last_desc].skb = skb; 1473 } 1474 1475 txq_advance(&q->q, ndesc); 1476 written += ndesc; 1477 if (unlikely(written > 32)) { 1478 ring_tx_db(q->adap, &q->q, written); 1479 written = 0; 1480 } 1481 1482 spin_lock(&q->sendq.lock); 1483 __skb_unlink(skb, &q->sendq); 1484 if (is_ofld_imm(skb)) 1485 kfree_skb(skb); 1486 } 1487 if (likely(written)) 1488 ring_tx_db(q->adap, &q->q, written); 1489 } 1490 1491 /** 1492 * ofld_xmit - send a packet through an offload queue 1493 * @q: the Tx offload queue 1494 * @skb: the packet 1495 * 1496 * Send an offload packet through an SGE offload queue. 1497 */ 1498 static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb) 1499 { 1500 skb->priority = calc_tx_flits_ofld(skb); /* save for restart */ 1501 spin_lock(&q->sendq.lock); 1502 __skb_queue_tail(&q->sendq, skb); 1503 if (q->sendq.qlen == 1) 1504 service_ofldq(q); 1505 spin_unlock(&q->sendq.lock); 1506 return NET_XMIT_SUCCESS; 1507 } 1508 1509 /** 1510 * restart_ofldq - restart a suspended offload queue 1511 * @data: the offload queue to restart 1512 * 1513 * Resumes transmission on a suspended Tx offload queue. 1514 */ 1515 static void restart_ofldq(unsigned long data) 1516 { 1517 struct sge_ofld_txq *q = (struct sge_ofld_txq *)data; 1518 1519 spin_lock(&q->sendq.lock); 1520 q->full = 0; /* the queue actually is completely empty now */ 1521 service_ofldq(q); 1522 spin_unlock(&q->sendq.lock); 1523 } 1524 1525 /** 1526 * skb_txq - return the Tx queue an offload packet should use 1527 * @skb: the packet 1528 * 1529 * Returns the Tx queue an offload packet should use as indicated by bits 1530 * 1-15 in the packet's queue_mapping. 1531 */ 1532 static inline unsigned int skb_txq(const struct sk_buff *skb) 1533 { 1534 return skb->queue_mapping >> 1; 1535 } 1536 1537 /** 1538 * is_ctrl_pkt - return whether an offload packet is a control packet 1539 * @skb: the packet 1540 * 1541 * Returns whether an offload packet should use an OFLD or a CTRL 1542 * Tx queue as indicated by bit 0 in the packet's queue_mapping. 1543 */ 1544 static inline unsigned int is_ctrl_pkt(const struct sk_buff *skb) 1545 { 1546 return skb->queue_mapping & 1; 1547 } 1548 1549 static inline int ofld_send(struct adapter *adap, struct sk_buff *skb) 1550 { 1551 unsigned int idx = skb_txq(skb); 1552 1553 if (unlikely(is_ctrl_pkt(skb))) { 1554 /* Single ctrl queue is a requirement for LE workaround path */ 1555 if (adap->tids.nsftids) 1556 idx = 0; 1557 return ctrl_xmit(&adap->sge.ctrlq[idx], skb); 1558 } 1559 return ofld_xmit(&adap->sge.ofldtxq[idx], skb); 1560 } 1561 1562 /** 1563 * t4_ofld_send - send an offload packet 1564 * @adap: the adapter 1565 * @skb: the packet 1566 * 1567 * Sends an offload packet. We use the packet queue_mapping to select the 1568 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1569 * should be sent as regular or control, bits 1-15 select the queue. 1570 */ 1571 int t4_ofld_send(struct adapter *adap, struct sk_buff *skb) 1572 { 1573 int ret; 1574 1575 local_bh_disable(); 1576 ret = ofld_send(adap, skb); 1577 local_bh_enable(); 1578 return ret; 1579 } 1580 1581 /** 1582 * cxgb4_ofld_send - send an offload packet 1583 * @dev: the net device 1584 * @skb: the packet 1585 * 1586 * Sends an offload packet. This is an exported version of @t4_ofld_send, 1587 * intended for ULDs. 1588 */ 1589 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb) 1590 { 1591 return t4_ofld_send(netdev2adap(dev), skb); 1592 } 1593 EXPORT_SYMBOL(cxgb4_ofld_send); 1594 1595 static inline void copy_frags(struct sk_buff *skb, 1596 const struct pkt_gl *gl, unsigned int offset) 1597 { 1598 int i; 1599 1600 /* usually there's just one frag */ 1601 __skb_fill_page_desc(skb, 0, gl->frags[0].page, 1602 gl->frags[0].offset + offset, 1603 gl->frags[0].size - offset); 1604 skb_shinfo(skb)->nr_frags = gl->nfrags; 1605 for (i = 1; i < gl->nfrags; i++) 1606 __skb_fill_page_desc(skb, i, gl->frags[i].page, 1607 gl->frags[i].offset, 1608 gl->frags[i].size); 1609 1610 /* get a reference to the last page, we don't own it */ 1611 get_page(gl->frags[gl->nfrags - 1].page); 1612 } 1613 1614 /** 1615 * cxgb4_pktgl_to_skb - build an sk_buff from a packet gather list 1616 * @gl: the gather list 1617 * @skb_len: size of sk_buff main body if it carries fragments 1618 * @pull_len: amount of data to move to the sk_buff's main body 1619 * 1620 * Builds an sk_buff from the given packet gather list. Returns the 1621 * sk_buff or %NULL if sk_buff allocation failed. 1622 */ 1623 struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl, 1624 unsigned int skb_len, unsigned int pull_len) 1625 { 1626 struct sk_buff *skb; 1627 1628 /* 1629 * Below we rely on RX_COPY_THRES being less than the smallest Rx buffer 1630 * size, which is expected since buffers are at least PAGE_SIZEd. 1631 * In this case packets up to RX_COPY_THRES have only one fragment. 1632 */ 1633 if (gl->tot_len <= RX_COPY_THRES) { 1634 skb = dev_alloc_skb(gl->tot_len); 1635 if (unlikely(!skb)) 1636 goto out; 1637 __skb_put(skb, gl->tot_len); 1638 skb_copy_to_linear_data(skb, gl->va, gl->tot_len); 1639 } else { 1640 skb = dev_alloc_skb(skb_len); 1641 if (unlikely(!skb)) 1642 goto out; 1643 __skb_put(skb, pull_len); 1644 skb_copy_to_linear_data(skb, gl->va, pull_len); 1645 1646 copy_frags(skb, gl, pull_len); 1647 skb->len = gl->tot_len; 1648 skb->data_len = skb->len - pull_len; 1649 skb->truesize += skb->data_len; 1650 } 1651 out: return skb; 1652 } 1653 EXPORT_SYMBOL(cxgb4_pktgl_to_skb); 1654 1655 /** 1656 * t4_pktgl_free - free a packet gather list 1657 * @gl: the gather list 1658 * 1659 * Releases the pages of a packet gather list. We do not own the last 1660 * page on the list and do not free it. 1661 */ 1662 static void t4_pktgl_free(const struct pkt_gl *gl) 1663 { 1664 int n; 1665 const struct page_frag *p; 1666 1667 for (p = gl->frags, n = gl->nfrags - 1; n--; p++) 1668 put_page(p->page); 1669 } 1670 1671 /* 1672 * Process an MPS trace packet. Give it an unused protocol number so it won't 1673 * be delivered to anyone and send it to the stack for capture. 1674 */ 1675 static noinline int handle_trace_pkt(struct adapter *adap, 1676 const struct pkt_gl *gl) 1677 { 1678 struct sk_buff *skb; 1679 1680 skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN); 1681 if (unlikely(!skb)) { 1682 t4_pktgl_free(gl); 1683 return 0; 1684 } 1685 1686 if (is_t4(adap->params.chip)) 1687 __skb_pull(skb, sizeof(struct cpl_trace_pkt)); 1688 else 1689 __skb_pull(skb, sizeof(struct cpl_t5_trace_pkt)); 1690 1691 skb_reset_mac_header(skb); 1692 skb->protocol = htons(0xffff); 1693 skb->dev = adap->port[0]; 1694 netif_receive_skb(skb); 1695 return 0; 1696 } 1697 1698 static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, 1699 const struct cpl_rx_pkt *pkt) 1700 { 1701 struct adapter *adapter = rxq->rspq.adap; 1702 struct sge *s = &adapter->sge; 1703 int ret; 1704 struct sk_buff *skb; 1705 1706 skb = napi_get_frags(&rxq->rspq.napi); 1707 if (unlikely(!skb)) { 1708 t4_pktgl_free(gl); 1709 rxq->stats.rx_drops++; 1710 return; 1711 } 1712 1713 copy_frags(skb, gl, s->pktshift); 1714 skb->len = gl->tot_len - s->pktshift; 1715 skb->data_len = skb->len; 1716 skb->truesize += skb->data_len; 1717 skb->ip_summed = CHECKSUM_UNNECESSARY; 1718 skb_record_rx_queue(skb, rxq->rspq.idx); 1719 if (rxq->rspq.netdev->features & NETIF_F_RXHASH) 1720 skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, 1721 PKT_HASH_TYPE_L3); 1722 1723 if (unlikely(pkt->vlan_ex)) { 1724 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); 1725 rxq->stats.vlan_ex++; 1726 } 1727 ret = napi_gro_frags(&rxq->rspq.napi); 1728 if (ret == GRO_HELD) 1729 rxq->stats.lro_pkts++; 1730 else if (ret == GRO_MERGED || ret == GRO_MERGED_FREE) 1731 rxq->stats.lro_merged++; 1732 rxq->stats.pkts++; 1733 rxq->stats.rx_cso++; 1734 } 1735 1736 /** 1737 * t4_ethrx_handler - process an ingress ethernet packet 1738 * @q: the response queue that received the packet 1739 * @rsp: the response queue descriptor holding the RX_PKT message 1740 * @si: the gather list of packet fragments 1741 * 1742 * Process an ingress ethernet packet and deliver it to the stack. 1743 */ 1744 int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, 1745 const struct pkt_gl *si) 1746 { 1747 bool csum_ok; 1748 struct sk_buff *skb; 1749 const struct cpl_rx_pkt *pkt; 1750 struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); 1751 struct sge *s = &q->adap->sge; 1752 int cpl_trace_pkt = is_t4(q->adap->params.chip) ? 1753 CPL_TRACE_PKT : CPL_TRACE_PKT_T5; 1754 1755 if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) 1756 return handle_trace_pkt(q->adap, si); 1757 1758 pkt = (const struct cpl_rx_pkt *)rsp; 1759 csum_ok = pkt->csum_calc && !pkt->err_vec && 1760 (q->netdev->features & NETIF_F_RXCSUM); 1761 if ((pkt->l2info & htonl(RXF_TCP)) && 1762 (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) { 1763 do_gro(rxq, si, pkt); 1764 return 0; 1765 } 1766 1767 skb = cxgb4_pktgl_to_skb(si, RX_PKT_SKB_LEN, RX_PULL_LEN); 1768 if (unlikely(!skb)) { 1769 t4_pktgl_free(si); 1770 rxq->stats.rx_drops++; 1771 return 0; 1772 } 1773 1774 __skb_pull(skb, s->pktshift); /* remove ethernet header padding */ 1775 skb->protocol = eth_type_trans(skb, q->netdev); 1776 skb_record_rx_queue(skb, q->idx); 1777 if (skb->dev->features & NETIF_F_RXHASH) 1778 skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, 1779 PKT_HASH_TYPE_L3); 1780 1781 rxq->stats.pkts++; 1782 1783 if (csum_ok && (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) { 1784 if (!pkt->ip_frag) { 1785 skb->ip_summed = CHECKSUM_UNNECESSARY; 1786 rxq->stats.rx_cso++; 1787 } else if (pkt->l2info & htonl(RXF_IP)) { 1788 __sum16 c = (__force __sum16)pkt->csum; 1789 skb->csum = csum_unfold(c); 1790 skb->ip_summed = CHECKSUM_COMPLETE; 1791 rxq->stats.rx_cso++; 1792 } 1793 } else 1794 skb_checksum_none_assert(skb); 1795 1796 if (unlikely(pkt->vlan_ex)) { 1797 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); 1798 rxq->stats.vlan_ex++; 1799 } 1800 netif_receive_skb(skb); 1801 return 0; 1802 } 1803 1804 /** 1805 * restore_rx_bufs - put back a packet's Rx buffers 1806 * @si: the packet gather list 1807 * @q: the SGE free list 1808 * @frags: number of FL buffers to restore 1809 * 1810 * Puts back on an FL the Rx buffers associated with @si. The buffers 1811 * have already been unmapped and are left unmapped, we mark them so to 1812 * prevent further unmapping attempts. 1813 * 1814 * This function undoes a series of @unmap_rx_buf calls when we find out 1815 * that the current packet can't be processed right away afterall and we 1816 * need to come back to it later. This is a very rare event and there's 1817 * no effort to make this particularly efficient. 1818 */ 1819 static void restore_rx_bufs(const struct pkt_gl *si, struct sge_fl *q, 1820 int frags) 1821 { 1822 struct rx_sw_desc *d; 1823 1824 while (frags--) { 1825 if (q->cidx == 0) 1826 q->cidx = q->size - 1; 1827 else 1828 q->cidx--; 1829 d = &q->sdesc[q->cidx]; 1830 d->page = si->frags[frags].page; 1831 d->dma_addr |= RX_UNMAPPED_BUF; 1832 q->avail++; 1833 } 1834 } 1835 1836 /** 1837 * is_new_response - check if a response is newly written 1838 * @r: the response descriptor 1839 * @q: the response queue 1840 * 1841 * Returns true if a response descriptor contains a yet unprocessed 1842 * response. 1843 */ 1844 static inline bool is_new_response(const struct rsp_ctrl *r, 1845 const struct sge_rspq *q) 1846 { 1847 return RSPD_GEN(r->type_gen) == q->gen; 1848 } 1849 1850 /** 1851 * rspq_next - advance to the next entry in a response queue 1852 * @q: the queue 1853 * 1854 * Updates the state of a response queue to advance it to the next entry. 1855 */ 1856 static inline void rspq_next(struct sge_rspq *q) 1857 { 1858 q->cur_desc = (void *)q->cur_desc + q->iqe_len; 1859 if (unlikely(++q->cidx == q->size)) { 1860 q->cidx = 0; 1861 q->gen ^= 1; 1862 q->cur_desc = q->desc; 1863 } 1864 } 1865 1866 /** 1867 * process_responses - process responses from an SGE response queue 1868 * @q: the ingress queue to process 1869 * @budget: how many responses can be processed in this round 1870 * 1871 * Process responses from an SGE response queue up to the supplied budget. 1872 * Responses include received packets as well as control messages from FW 1873 * or HW. 1874 * 1875 * Additionally choose the interrupt holdoff time for the next interrupt 1876 * on this queue. If the system is under memory shortage use a fairly 1877 * long delay to help recovery. 1878 */ 1879 static int process_responses(struct sge_rspq *q, int budget) 1880 { 1881 int ret, rsp_type; 1882 int budget_left = budget; 1883 const struct rsp_ctrl *rc; 1884 struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); 1885 struct adapter *adapter = q->adap; 1886 struct sge *s = &adapter->sge; 1887 1888 while (likely(budget_left)) { 1889 rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc)); 1890 if (!is_new_response(rc, q)) 1891 break; 1892 1893 rmb(); 1894 rsp_type = RSPD_TYPE(rc->type_gen); 1895 if (likely(rsp_type == RSP_TYPE_FLBUF)) { 1896 struct page_frag *fp; 1897 struct pkt_gl si; 1898 const struct rx_sw_desc *rsd; 1899 u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags; 1900 1901 if (len & RSPD_NEWBUF) { 1902 if (likely(q->offset > 0)) { 1903 free_rx_bufs(q->adap, &rxq->fl, 1); 1904 q->offset = 0; 1905 } 1906 len = RSPD_LEN(len); 1907 } 1908 si.tot_len = len; 1909 1910 /* gather packet fragments */ 1911 for (frags = 0, fp = si.frags; ; frags++, fp++) { 1912 rsd = &rxq->fl.sdesc[rxq->fl.cidx]; 1913 bufsz = get_buf_size(adapter, rsd); 1914 fp->page = rsd->page; 1915 fp->offset = q->offset; 1916 fp->size = min(bufsz, len); 1917 len -= fp->size; 1918 if (!len) 1919 break; 1920 unmap_rx_buf(q->adap, &rxq->fl); 1921 } 1922 1923 /* 1924 * Last buffer remains mapped so explicitly make it 1925 * coherent for CPU access. 1926 */ 1927 dma_sync_single_for_cpu(q->adap->pdev_dev, 1928 get_buf_addr(rsd), 1929 fp->size, DMA_FROM_DEVICE); 1930 1931 si.va = page_address(si.frags[0].page) + 1932 si.frags[0].offset; 1933 prefetch(si.va); 1934 1935 si.nfrags = frags + 1; 1936 ret = q->handler(q, q->cur_desc, &si); 1937 if (likely(ret == 0)) 1938 q->offset += ALIGN(fp->size, s->fl_align); 1939 else 1940 restore_rx_bufs(&si, &rxq->fl, frags); 1941 } else if (likely(rsp_type == RSP_TYPE_CPL)) { 1942 ret = q->handler(q, q->cur_desc, NULL); 1943 } else { 1944 ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN); 1945 } 1946 1947 if (unlikely(ret)) { 1948 /* couldn't process descriptor, back off for recovery */ 1949 q->next_intr_params = QINTR_TIMER_IDX(NOMEM_TMR_IDX); 1950 break; 1951 } 1952 1953 rspq_next(q); 1954 budget_left--; 1955 } 1956 1957 if (q->offset >= 0 && rxq->fl.size - rxq->fl.avail >= 16) 1958 __refill_fl(q->adap, &rxq->fl); 1959 return budget - budget_left; 1960 } 1961 1962 /** 1963 * napi_rx_handler - the NAPI handler for Rx processing 1964 * @napi: the napi instance 1965 * @budget: how many packets we can process in this round 1966 * 1967 * Handler for new data events when using NAPI. This does not need any 1968 * locking or protection from interrupts as data interrupts are off at 1969 * this point and other adapter interrupts do not interfere (the latter 1970 * in not a concern at all with MSI-X as non-data interrupts then have 1971 * a separate handler). 1972 */ 1973 static int napi_rx_handler(struct napi_struct *napi, int budget) 1974 { 1975 unsigned int params; 1976 struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); 1977 int work_done = process_responses(q, budget); 1978 u32 val; 1979 1980 if (likely(work_done < budget)) { 1981 int timer_index; 1982 1983 napi_complete(napi); 1984 timer_index = QINTR_TIMER_IDX_GET(q->next_intr_params); 1985 1986 if (q->adaptive_rx) { 1987 if (work_done > max(timer_pkt_quota[timer_index], 1988 MIN_NAPI_WORK)) 1989 timer_index = (timer_index + 1); 1990 else 1991 timer_index = timer_index - 1; 1992 1993 timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1); 1994 q->next_intr_params = QINTR_TIMER_IDX(timer_index) | 1995 V_QINTR_CNT_EN; 1996 params = q->next_intr_params; 1997 } else { 1998 params = q->next_intr_params; 1999 q->next_intr_params = q->intr_params; 2000 } 2001 } else 2002 params = QINTR_TIMER_IDX(7); 2003 2004 val = CIDXINC(work_done) | SEINTARM(params); 2005 2006 /* If we don't have access to the new User GTS (T5+), use the old 2007 * doorbell mechanism; otherwise use the new BAR2 mechanism. 2008 */ 2009 if (unlikely(q->bar2_addr == NULL)) { 2010 t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), 2011 val | INGRESSQID((u32)q->cntxt_id)); 2012 } else { 2013 writel(val | INGRESSQID(q->bar2_qid), 2014 q->bar2_addr + SGE_UDB_GTS); 2015 wmb(); 2016 } 2017 return work_done; 2018 } 2019 2020 /* 2021 * The MSI-X interrupt handler for an SGE response queue. 2022 */ 2023 irqreturn_t t4_sge_intr_msix(int irq, void *cookie) 2024 { 2025 struct sge_rspq *q = cookie; 2026 2027 napi_schedule(&q->napi); 2028 return IRQ_HANDLED; 2029 } 2030 2031 /* 2032 * Process the indirect interrupt entries in the interrupt queue and kick off 2033 * NAPI for each queue that has generated an entry. 2034 */ 2035 static unsigned int process_intrq(struct adapter *adap) 2036 { 2037 unsigned int credits; 2038 const struct rsp_ctrl *rc; 2039 struct sge_rspq *q = &adap->sge.intrq; 2040 u32 val; 2041 2042 spin_lock(&adap->sge.intrq_lock); 2043 for (credits = 0; ; credits++) { 2044 rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc)); 2045 if (!is_new_response(rc, q)) 2046 break; 2047 2048 rmb(); 2049 if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) { 2050 unsigned int qid = ntohl(rc->pldbuflen_qid); 2051 2052 qid -= adap->sge.ingr_start; 2053 napi_schedule(&adap->sge.ingr_map[qid]->napi); 2054 } 2055 2056 rspq_next(q); 2057 } 2058 2059 val = CIDXINC(credits) | SEINTARM(q->intr_params); 2060 2061 /* If we don't have access to the new User GTS (T5+), use the old 2062 * doorbell mechanism; otherwise use the new BAR2 mechanism. 2063 */ 2064 if (unlikely(q->bar2_addr == NULL)) { 2065 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), 2066 val | INGRESSQID(q->cntxt_id)); 2067 } else { 2068 writel(val | INGRESSQID(q->bar2_qid), 2069 q->bar2_addr + SGE_UDB_GTS); 2070 wmb(); 2071 } 2072 spin_unlock(&adap->sge.intrq_lock); 2073 return credits; 2074 } 2075 2076 /* 2077 * The MSI interrupt handler, which handles data events from SGE response queues 2078 * as well as error and other async events as they all use the same MSI vector. 2079 */ 2080 static irqreturn_t t4_intr_msi(int irq, void *cookie) 2081 { 2082 struct adapter *adap = cookie; 2083 2084 t4_slow_intr_handler(adap); 2085 process_intrq(adap); 2086 return IRQ_HANDLED; 2087 } 2088 2089 /* 2090 * Interrupt handler for legacy INTx interrupts. 2091 * Handles data events from SGE response queues as well as error and other 2092 * async events as they all use the same interrupt line. 2093 */ 2094 static irqreturn_t t4_intr_intx(int irq, void *cookie) 2095 { 2096 struct adapter *adap = cookie; 2097 2098 t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI), 0); 2099 if (t4_slow_intr_handler(adap) | process_intrq(adap)) 2100 return IRQ_HANDLED; 2101 return IRQ_NONE; /* probably shared interrupt */ 2102 } 2103 2104 /** 2105 * t4_intr_handler - select the top-level interrupt handler 2106 * @adap: the adapter 2107 * 2108 * Selects the top-level interrupt handler based on the type of interrupts 2109 * (MSI-X, MSI, or INTx). 2110 */ 2111 irq_handler_t t4_intr_handler(struct adapter *adap) 2112 { 2113 if (adap->flags & USING_MSIX) 2114 return t4_sge_intr_msix; 2115 if (adap->flags & USING_MSI) 2116 return t4_intr_msi; 2117 return t4_intr_intx; 2118 } 2119 2120 static void sge_rx_timer_cb(unsigned long data) 2121 { 2122 unsigned long m; 2123 unsigned int i, idma_same_state_cnt[2]; 2124 struct adapter *adap = (struct adapter *)data; 2125 struct sge *s = &adap->sge; 2126 2127 for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) 2128 for (m = s->starving_fl[i]; m; m &= m - 1) { 2129 struct sge_eth_rxq *rxq; 2130 unsigned int id = __ffs(m) + i * BITS_PER_LONG; 2131 struct sge_fl *fl = s->egr_map[id]; 2132 2133 clear_bit(id, s->starving_fl); 2134 smp_mb__after_atomic(); 2135 2136 if (fl_starving(fl)) { 2137 rxq = container_of(fl, struct sge_eth_rxq, fl); 2138 if (napi_reschedule(&rxq->rspq.napi)) 2139 fl->starving++; 2140 else 2141 set_bit(id, s->starving_fl); 2142 } 2143 } 2144 2145 t4_write_reg(adap, SGE_DEBUG_INDEX, 13); 2146 idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH); 2147 idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); 2148 2149 for (i = 0; i < 2; i++) { 2150 u32 debug0, debug11; 2151 2152 /* If the Ingress DMA Same State Counter ("timer") is less 2153 * than 1s, then we can reset our synthesized Stall Timer and 2154 * continue. If we have previously emitted warnings about a 2155 * potential stalled Ingress Queue, issue a note indicating 2156 * that the Ingress Queue has resumed forward progress. 2157 */ 2158 if (idma_same_state_cnt[i] < s->idma_1s_thresh) { 2159 if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH) 2160 CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n", 2161 i, s->idma_qid[i], 2162 s->idma_stalled[i]/HZ); 2163 s->idma_stalled[i] = 0; 2164 continue; 2165 } 2166 2167 /* Synthesize an SGE Ingress DMA Same State Timer in the Hz 2168 * domain. The first time we get here it'll be because we 2169 * passed the 1s Threshold; each additional time it'll be 2170 * because the RX Timer Callback is being fired on its regular 2171 * schedule. 2172 * 2173 * If the stall is below our Potential Hung Ingress Queue 2174 * Warning Threshold, continue. 2175 */ 2176 if (s->idma_stalled[i] == 0) 2177 s->idma_stalled[i] = HZ; 2178 else 2179 s->idma_stalled[i] += RX_QCHECK_PERIOD; 2180 2181 if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH) 2182 continue; 2183 2184 /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */ 2185 if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0) 2186 continue; 2187 2188 /* Read and save the SGE IDMA State and Queue ID information. 2189 * We do this every time in case it changes across time ... 2190 */ 2191 t4_write_reg(adap, SGE_DEBUG_INDEX, 0); 2192 debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); 2193 s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; 2194 2195 t4_write_reg(adap, SGE_DEBUG_INDEX, 11); 2196 debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); 2197 s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; 2198 2199 CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n", 2200 i, s->idma_qid[i], s->idma_state[i], 2201 s->idma_stalled[i]/HZ, debug0, debug11); 2202 t4_sge_decode_idma_state(adap, s->idma_state[i]); 2203 } 2204 2205 mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); 2206 } 2207 2208 static void sge_tx_timer_cb(unsigned long data) 2209 { 2210 unsigned long m; 2211 unsigned int i, budget; 2212 struct adapter *adap = (struct adapter *)data; 2213 struct sge *s = &adap->sge; 2214 2215 for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++) 2216 for (m = s->txq_maperr[i]; m; m &= m - 1) { 2217 unsigned long id = __ffs(m) + i * BITS_PER_LONG; 2218 struct sge_ofld_txq *txq = s->egr_map[id]; 2219 2220 clear_bit(id, s->txq_maperr); 2221 tasklet_schedule(&txq->qresume_tsk); 2222 } 2223 2224 budget = MAX_TIMER_TX_RECLAIM; 2225 i = s->ethtxq_rover; 2226 do { 2227 struct sge_eth_txq *q = &s->ethtxq[i]; 2228 2229 if (q->q.in_use && 2230 time_after_eq(jiffies, q->txq->trans_start + HZ / 100) && 2231 __netif_tx_trylock(q->txq)) { 2232 int avail = reclaimable(&q->q); 2233 2234 if (avail) { 2235 if (avail > budget) 2236 avail = budget; 2237 2238 free_tx_desc(adap, &q->q, avail, true); 2239 q->q.in_use -= avail; 2240 budget -= avail; 2241 } 2242 __netif_tx_unlock(q->txq); 2243 } 2244 2245 if (++i >= s->ethqsets) 2246 i = 0; 2247 } while (budget && i != s->ethtxq_rover); 2248 s->ethtxq_rover = i; 2249 mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); 2250 } 2251 2252 /** 2253 * bar2_address - return the BAR2 address for an SGE Queue's Registers 2254 * @adapter: the adapter 2255 * @qid: the SGE Queue ID 2256 * @qtype: the SGE Queue Type (Egress or Ingress) 2257 * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues 2258 * 2259 * Returns the BAR2 address for the SGE Queue Registers associated with 2260 * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also 2261 * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE 2262 * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID" 2263 * Registers are supported (e.g. the Write Combining Doorbell Buffer). 2264 */ 2265 static void __iomem *bar2_address(struct adapter *adapter, 2266 unsigned int qid, 2267 enum t4_bar2_qtype qtype, 2268 unsigned int *pbar2_qid) 2269 { 2270 u64 bar2_qoffset; 2271 int ret; 2272 2273 ret = cxgb4_t4_bar2_sge_qregs(adapter, qid, qtype, 2274 &bar2_qoffset, pbar2_qid); 2275 if (ret) 2276 return NULL; 2277 2278 return adapter->bar2 + bar2_qoffset; 2279 } 2280 2281 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, 2282 struct net_device *dev, int intr_idx, 2283 struct sge_fl *fl, rspq_handler_t hnd) 2284 { 2285 int ret, flsz = 0; 2286 struct fw_iq_cmd c; 2287 struct sge *s = &adap->sge; 2288 struct port_info *pi = netdev_priv(dev); 2289 2290 /* Size needs to be multiple of 16, including status entry. */ 2291 iq->size = roundup(iq->size, 16); 2292 2293 iq->desc = alloc_ring(adap->pdev_dev, iq->size, iq->iqe_len, 0, 2294 &iq->phys_addr, NULL, 0, NUMA_NO_NODE); 2295 if (!iq->desc) 2296 return -ENOMEM; 2297 2298 memset(&c, 0, sizeof(c)); 2299 c.op_to_vfn = htonl(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F | 2300 FW_CMD_WRITE_F | FW_CMD_EXEC_F | 2301 FW_IQ_CMD_PFN_V(adap->fn) | FW_IQ_CMD_VFN_V(0)); 2302 c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC_F | FW_IQ_CMD_IQSTART_F | 2303 FW_LEN16(c)); 2304 c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP) | 2305 FW_IQ_CMD_IQASYNCH_V(fwevtq) | FW_IQ_CMD_VIID_V(pi->viid) | 2306 FW_IQ_CMD_IQANDST_V(intr_idx < 0) | FW_IQ_CMD_IQANUD_V(1) | 2307 FW_IQ_CMD_IQANDSTINDEX_V(intr_idx >= 0 ? intr_idx : 2308 -intr_idx - 1)); 2309 c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH_V(pi->tx_chan) | 2310 FW_IQ_CMD_IQGTSMODE_F | 2311 FW_IQ_CMD_IQINTCNTTHRESH_V(iq->pktcnt_idx) | 2312 FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); 2313 c.iqsize = htons(iq->size); 2314 c.iqaddr = cpu_to_be64(iq->phys_addr); 2315 2316 if (fl) { 2317 fl->size = roundup(fl->size, 8); 2318 fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), 2319 sizeof(struct rx_sw_desc), &fl->addr, 2320 &fl->sdesc, s->stat_len, NUMA_NO_NODE); 2321 if (!fl->desc) 2322 goto fl_nomem; 2323 2324 flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); 2325 c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F | 2326 FW_IQ_CMD_FL0FETCHRO_F | 2327 FW_IQ_CMD_FL0DATARO_F | 2328 FW_IQ_CMD_FL0PADEN_F); 2329 c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) | 2330 FW_IQ_CMD_FL0FBMAX_V(3)); 2331 c.fl0size = htons(flsz); 2332 c.fl0addr = cpu_to_be64(fl->addr); 2333 } 2334 2335 ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); 2336 if (ret) 2337 goto err; 2338 2339 netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); 2340 iq->cur_desc = iq->desc; 2341 iq->cidx = 0; 2342 iq->gen = 1; 2343 iq->next_intr_params = iq->intr_params; 2344 iq->cntxt_id = ntohs(c.iqid); 2345 iq->abs_id = ntohs(c.physiqid); 2346 iq->bar2_addr = bar2_address(adap, 2347 iq->cntxt_id, 2348 T4_BAR2_QTYPE_INGRESS, 2349 &iq->bar2_qid); 2350 iq->size--; /* subtract status entry */ 2351 iq->netdev = dev; 2352 iq->handler = hnd; 2353 2354 /* set offset to -1 to distinguish ingress queues without FL */ 2355 iq->offset = fl ? 0 : -1; 2356 2357 adap->sge.ingr_map[iq->cntxt_id - adap->sge.ingr_start] = iq; 2358 2359 if (fl) { 2360 fl->cntxt_id = ntohs(c.fl0id); 2361 fl->avail = fl->pend_cred = 0; 2362 fl->pidx = fl->cidx = 0; 2363 fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; 2364 adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; 2365 2366 /* Note, we must initialize the BAR2 Free List User Doorbell 2367 * information before refilling the Free List! 2368 */ 2369 fl->bar2_addr = bar2_address(adap, 2370 fl->cntxt_id, 2371 T4_BAR2_QTYPE_EGRESS, 2372 &fl->bar2_qid); 2373 refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); 2374 } 2375 return 0; 2376 2377 fl_nomem: 2378 ret = -ENOMEM; 2379 err: 2380 if (iq->desc) { 2381 dma_free_coherent(adap->pdev_dev, iq->size * iq->iqe_len, 2382 iq->desc, iq->phys_addr); 2383 iq->desc = NULL; 2384 } 2385 if (fl && fl->desc) { 2386 kfree(fl->sdesc); 2387 fl->sdesc = NULL; 2388 dma_free_coherent(adap->pdev_dev, flsz * sizeof(struct tx_desc), 2389 fl->desc, fl->addr); 2390 fl->desc = NULL; 2391 } 2392 return ret; 2393 } 2394 2395 static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) 2396 { 2397 q->cntxt_id = id; 2398 q->bar2_addr = bar2_address(adap, 2399 q->cntxt_id, 2400 T4_BAR2_QTYPE_EGRESS, 2401 &q->bar2_qid); 2402 q->in_use = 0; 2403 q->cidx = q->pidx = 0; 2404 q->stops = q->restarts = 0; 2405 q->stat = (void *)&q->desc[q->size]; 2406 spin_lock_init(&q->db_lock); 2407 adap->sge.egr_map[id - adap->sge.egr_start] = q; 2408 } 2409 2410 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, 2411 struct net_device *dev, struct netdev_queue *netdevq, 2412 unsigned int iqid) 2413 { 2414 int ret, nentries; 2415 struct fw_eq_eth_cmd c; 2416 struct sge *s = &adap->sge; 2417 struct port_info *pi = netdev_priv(dev); 2418 2419 /* Add status entries */ 2420 nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); 2421 2422 txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size, 2423 sizeof(struct tx_desc), sizeof(struct tx_sw_desc), 2424 &txq->q.phys_addr, &txq->q.sdesc, s->stat_len, 2425 netdev_queue_numa_node_read(netdevq)); 2426 if (!txq->q.desc) 2427 return -ENOMEM; 2428 2429 memset(&c, 0, sizeof(c)); 2430 c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_ETH_CMD) | FW_CMD_REQUEST_F | 2431 FW_CMD_WRITE_F | FW_CMD_EXEC_F | 2432 FW_EQ_ETH_CMD_PFN_V(adap->fn) | 2433 FW_EQ_ETH_CMD_VFN_V(0)); 2434 c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F | 2435 FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); 2436 c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | 2437 FW_EQ_ETH_CMD_VIID_V(pi->viid)); 2438 c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(2) | 2439 FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | 2440 FW_EQ_ETH_CMD_FETCHRO_V(1) | 2441 FW_EQ_ETH_CMD_IQID_V(iqid)); 2442 c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN_V(2) | 2443 FW_EQ_ETH_CMD_FBMAX_V(3) | 2444 FW_EQ_ETH_CMD_CIDXFTHRESH_V(5) | 2445 FW_EQ_ETH_CMD_EQSIZE_V(nentries)); 2446 c.eqaddr = cpu_to_be64(txq->q.phys_addr); 2447 2448 ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); 2449 if (ret) { 2450 kfree(txq->q.sdesc); 2451 txq->q.sdesc = NULL; 2452 dma_free_coherent(adap->pdev_dev, 2453 nentries * sizeof(struct tx_desc), 2454 txq->q.desc, txq->q.phys_addr); 2455 txq->q.desc = NULL; 2456 return ret; 2457 } 2458 2459 init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd))); 2460 txq->txq = netdevq; 2461 txq->tso = txq->tx_cso = txq->vlan_ins = 0; 2462 txq->mapping_err = 0; 2463 return 0; 2464 } 2465 2466 int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, 2467 struct net_device *dev, unsigned int iqid, 2468 unsigned int cmplqid) 2469 { 2470 int ret, nentries; 2471 struct fw_eq_ctrl_cmd c; 2472 struct sge *s = &adap->sge; 2473 struct port_info *pi = netdev_priv(dev); 2474 2475 /* Add status entries */ 2476 nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); 2477 2478 txq->q.desc = alloc_ring(adap->pdev_dev, nentries, 2479 sizeof(struct tx_desc), 0, &txq->q.phys_addr, 2480 NULL, 0, NUMA_NO_NODE); 2481 if (!txq->q.desc) 2482 return -ENOMEM; 2483 2484 c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST_F | 2485 FW_CMD_WRITE_F | FW_CMD_EXEC_F | 2486 FW_EQ_CTRL_CMD_PFN_V(adap->fn) | 2487 FW_EQ_CTRL_CMD_VFN_V(0)); 2488 c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC_F | 2489 FW_EQ_CTRL_CMD_EQSTART_F | FW_LEN16(c)); 2490 c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid)); 2491 c.physeqid_pkd = htonl(0); 2492 c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(2) | 2493 FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | 2494 FW_EQ_CTRL_CMD_FETCHRO_F | 2495 FW_EQ_CTRL_CMD_IQID_V(iqid)); 2496 c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN_V(2) | 2497 FW_EQ_CTRL_CMD_FBMAX_V(3) | 2498 FW_EQ_CTRL_CMD_CIDXFTHRESH_V(5) | 2499 FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); 2500 c.eqaddr = cpu_to_be64(txq->q.phys_addr); 2501 2502 ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); 2503 if (ret) { 2504 dma_free_coherent(adap->pdev_dev, 2505 nentries * sizeof(struct tx_desc), 2506 txq->q.desc, txq->q.phys_addr); 2507 txq->q.desc = NULL; 2508 return ret; 2509 } 2510 2511 init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid))); 2512 txq->adap = adap; 2513 skb_queue_head_init(&txq->sendq); 2514 tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq); 2515 txq->full = 0; 2516 return 0; 2517 } 2518 2519 int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, 2520 struct net_device *dev, unsigned int iqid) 2521 { 2522 int ret, nentries; 2523 struct fw_eq_ofld_cmd c; 2524 struct sge *s = &adap->sge; 2525 struct port_info *pi = netdev_priv(dev); 2526 2527 /* Add status entries */ 2528 nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); 2529 2530 txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size, 2531 sizeof(struct tx_desc), sizeof(struct tx_sw_desc), 2532 &txq->q.phys_addr, &txq->q.sdesc, s->stat_len, 2533 NUMA_NO_NODE); 2534 if (!txq->q.desc) 2535 return -ENOMEM; 2536 2537 memset(&c, 0, sizeof(c)); 2538 c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST_F | 2539 FW_CMD_WRITE_F | FW_CMD_EXEC_F | 2540 FW_EQ_OFLD_CMD_PFN_V(adap->fn) | 2541 FW_EQ_OFLD_CMD_VFN_V(0)); 2542 c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC_F | 2543 FW_EQ_OFLD_CMD_EQSTART_F | FW_LEN16(c)); 2544 c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(2) | 2545 FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | 2546 FW_EQ_OFLD_CMD_FETCHRO_F | 2547 FW_EQ_OFLD_CMD_IQID_V(iqid)); 2548 c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN_V(2) | 2549 FW_EQ_OFLD_CMD_FBMAX_V(3) | 2550 FW_EQ_OFLD_CMD_CIDXFTHRESH_V(5) | 2551 FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); 2552 c.eqaddr = cpu_to_be64(txq->q.phys_addr); 2553 2554 ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); 2555 if (ret) { 2556 kfree(txq->q.sdesc); 2557 txq->q.sdesc = NULL; 2558 dma_free_coherent(adap->pdev_dev, 2559 nentries * sizeof(struct tx_desc), 2560 txq->q.desc, txq->q.phys_addr); 2561 txq->q.desc = NULL; 2562 return ret; 2563 } 2564 2565 init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd))); 2566 txq->adap = adap; 2567 skb_queue_head_init(&txq->sendq); 2568 tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq); 2569 txq->full = 0; 2570 txq->mapping_err = 0; 2571 return 0; 2572 } 2573 2574 static void free_txq(struct adapter *adap, struct sge_txq *q) 2575 { 2576 struct sge *s = &adap->sge; 2577 2578 dma_free_coherent(adap->pdev_dev, 2579 q->size * sizeof(struct tx_desc) + s->stat_len, 2580 q->desc, q->phys_addr); 2581 q->cntxt_id = 0; 2582 q->sdesc = NULL; 2583 q->desc = NULL; 2584 } 2585 2586 static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, 2587 struct sge_fl *fl) 2588 { 2589 struct sge *s = &adap->sge; 2590 unsigned int fl_id = fl ? fl->cntxt_id : 0xffff; 2591 2592 adap->sge.ingr_map[rq->cntxt_id - adap->sge.ingr_start] = NULL; 2593 t4_iq_free(adap, adap->fn, adap->fn, 0, FW_IQ_TYPE_FL_INT_CAP, 2594 rq->cntxt_id, fl_id, 0xffff); 2595 dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len, 2596 rq->desc, rq->phys_addr); 2597 netif_napi_del(&rq->napi); 2598 rq->netdev = NULL; 2599 rq->cntxt_id = rq->abs_id = 0; 2600 rq->desc = NULL; 2601 2602 if (fl) { 2603 free_rx_bufs(adap, fl, fl->avail); 2604 dma_free_coherent(adap->pdev_dev, fl->size * 8 + s->stat_len, 2605 fl->desc, fl->addr); 2606 kfree(fl->sdesc); 2607 fl->sdesc = NULL; 2608 fl->cntxt_id = 0; 2609 fl->desc = NULL; 2610 } 2611 } 2612 2613 /** 2614 * t4_free_ofld_rxqs - free a block of consecutive Rx queues 2615 * @adap: the adapter 2616 * @n: number of queues 2617 * @q: pointer to first queue 2618 * 2619 * Release the resources of a consecutive block of offload Rx queues. 2620 */ 2621 void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q) 2622 { 2623 for ( ; n; n--, q++) 2624 if (q->rspq.desc) 2625 free_rspq_fl(adap, &q->rspq, 2626 q->fl.size ? &q->fl : NULL); 2627 } 2628 2629 /** 2630 * t4_free_sge_resources - free SGE resources 2631 * @adap: the adapter 2632 * 2633 * Frees resources used by the SGE queue sets. 2634 */ 2635 void t4_free_sge_resources(struct adapter *adap) 2636 { 2637 int i; 2638 struct sge_eth_rxq *eq = adap->sge.ethrxq; 2639 struct sge_eth_txq *etq = adap->sge.ethtxq; 2640 2641 /* clean up Ethernet Tx/Rx queues */ 2642 for (i = 0; i < adap->sge.ethqsets; i++, eq++, etq++) { 2643 if (eq->rspq.desc) 2644 free_rspq_fl(adap, &eq->rspq, 2645 eq->fl.size ? &eq->fl : NULL); 2646 if (etq->q.desc) { 2647 t4_eth_eq_free(adap, adap->fn, adap->fn, 0, 2648 etq->q.cntxt_id); 2649 free_tx_desc(adap, &etq->q, etq->q.in_use, true); 2650 kfree(etq->q.sdesc); 2651 free_txq(adap, &etq->q); 2652 } 2653 } 2654 2655 /* clean up RDMA and iSCSI Rx queues */ 2656 t4_free_ofld_rxqs(adap, adap->sge.ofldqsets, adap->sge.ofldrxq); 2657 t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq); 2658 t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq); 2659 2660 /* clean up offload Tx queues */ 2661 for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { 2662 struct sge_ofld_txq *q = &adap->sge.ofldtxq[i]; 2663 2664 if (q->q.desc) { 2665 tasklet_kill(&q->qresume_tsk); 2666 t4_ofld_eq_free(adap, adap->fn, adap->fn, 0, 2667 q->q.cntxt_id); 2668 free_tx_desc(adap, &q->q, q->q.in_use, false); 2669 kfree(q->q.sdesc); 2670 __skb_queue_purge(&q->sendq); 2671 free_txq(adap, &q->q); 2672 } 2673 } 2674 2675 /* clean up control Tx queues */ 2676 for (i = 0; i < ARRAY_SIZE(adap->sge.ctrlq); i++) { 2677 struct sge_ctrl_txq *cq = &adap->sge.ctrlq[i]; 2678 2679 if (cq->q.desc) { 2680 tasklet_kill(&cq->qresume_tsk); 2681 t4_ctrl_eq_free(adap, adap->fn, adap->fn, 0, 2682 cq->q.cntxt_id); 2683 __skb_queue_purge(&cq->sendq); 2684 free_txq(adap, &cq->q); 2685 } 2686 } 2687 2688 if (adap->sge.fw_evtq.desc) 2689 free_rspq_fl(adap, &adap->sge.fw_evtq, NULL); 2690 2691 if (adap->sge.intrq.desc) 2692 free_rspq_fl(adap, &adap->sge.intrq, NULL); 2693 2694 /* clear the reverse egress queue map */ 2695 memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map)); 2696 } 2697 2698 void t4_sge_start(struct adapter *adap) 2699 { 2700 adap->sge.ethtxq_rover = 0; 2701 mod_timer(&adap->sge.rx_timer, jiffies + RX_QCHECK_PERIOD); 2702 mod_timer(&adap->sge.tx_timer, jiffies + TX_QCHECK_PERIOD); 2703 } 2704 2705 /** 2706 * t4_sge_stop - disable SGE operation 2707 * @adap: the adapter 2708 * 2709 * Stop tasklets and timers associated with the DMA engine. Note that 2710 * this is effective only if measures have been taken to disable any HW 2711 * events that may restart them. 2712 */ 2713 void t4_sge_stop(struct adapter *adap) 2714 { 2715 int i; 2716 struct sge *s = &adap->sge; 2717 2718 if (in_interrupt()) /* actions below require waiting */ 2719 return; 2720 2721 if (s->rx_timer.function) 2722 del_timer_sync(&s->rx_timer); 2723 if (s->tx_timer.function) 2724 del_timer_sync(&s->tx_timer); 2725 2726 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) { 2727 struct sge_ofld_txq *q = &s->ofldtxq[i]; 2728 2729 if (q->q.desc) 2730 tasklet_kill(&q->qresume_tsk); 2731 } 2732 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++) { 2733 struct sge_ctrl_txq *cq = &s->ctrlq[i]; 2734 2735 if (cq->q.desc) 2736 tasklet_kill(&cq->qresume_tsk); 2737 } 2738 } 2739 2740 /** 2741 * t4_sge_init - initialize SGE 2742 * @adap: the adapter 2743 * 2744 * Performs SGE initialization needed every time after a chip reset. 2745 * We do not initialize any of the queues here, instead the driver 2746 * top-level must request them individually. 2747 * 2748 * Called in two different modes: 2749 * 2750 * 1. Perform actual hardware initialization and record hard-coded 2751 * parameters which were used. This gets used when we're the 2752 * Master PF and the Firmware Configuration File support didn't 2753 * work for some reason. 2754 * 2755 * 2. We're not the Master PF or initialization was performed with 2756 * a Firmware Configuration File. In this case we need to grab 2757 * any of the SGE operating parameters that we need to have in 2758 * order to do our job and make sure we can live with them ... 2759 */ 2760 2761 static int t4_sge_init_soft(struct adapter *adap) 2762 { 2763 struct sge *s = &adap->sge; 2764 u32 fl_small_pg, fl_large_pg, fl_small_mtu, fl_large_mtu; 2765 u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5; 2766 u32 ingress_rx_threshold; 2767 2768 /* 2769 * Verify that CPL messages are going to the Ingress Queue for 2770 * process_responses() and that only packet data is going to the 2771 * Free Lists. 2772 */ 2773 if ((t4_read_reg(adap, SGE_CONTROL) & RXPKTCPLMODE_MASK) != 2774 RXPKTCPLMODE(X_RXPKTCPLMODE_SPLIT)) { 2775 dev_err(adap->pdev_dev, "bad SGE CPL MODE\n"); 2776 return -EINVAL; 2777 } 2778 2779 /* 2780 * Validate the Host Buffer Register Array indices that we want to 2781 * use ... 2782 * 2783 * XXX Note that we should really read through the Host Buffer Size 2784 * XXX register array and find the indices of the Buffer Sizes which 2785 * XXX meet our needs! 2786 */ 2787 #define READ_FL_BUF(x) \ 2788 t4_read_reg(adap, SGE_FL_BUFFER_SIZE0+(x)*sizeof(u32)) 2789 2790 fl_small_pg = READ_FL_BUF(RX_SMALL_PG_BUF); 2791 fl_large_pg = READ_FL_BUF(RX_LARGE_PG_BUF); 2792 fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF); 2793 fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF); 2794 2795 /* We only bother using the Large Page logic if the Large Page Buffer 2796 * is larger than our Page Size Buffer. 2797 */ 2798 if (fl_large_pg <= fl_small_pg) 2799 fl_large_pg = 0; 2800 2801 #undef READ_FL_BUF 2802 2803 /* The Page Size Buffer must be exactly equal to our Page Size and the 2804 * Large Page Size Buffer should be 0 (per above) or a power of 2. 2805 */ 2806 if (fl_small_pg != PAGE_SIZE || 2807 (fl_large_pg & (fl_large_pg-1)) != 0) { 2808 dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n", 2809 fl_small_pg, fl_large_pg); 2810 return -EINVAL; 2811 } 2812 if (fl_large_pg) 2813 s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT; 2814 2815 if (fl_small_mtu < FL_MTU_SMALL_BUFSIZE(adap) || 2816 fl_large_mtu < FL_MTU_LARGE_BUFSIZE(adap)) { 2817 dev_err(adap->pdev_dev, "bad SGE FL MTU sizes [%d, %d]\n", 2818 fl_small_mtu, fl_large_mtu); 2819 return -EINVAL; 2820 } 2821 2822 /* 2823 * Retrieve our RX interrupt holdoff timer values and counter 2824 * threshold values from the SGE parameters. 2825 */ 2826 timer_value_0_and_1 = t4_read_reg(adap, SGE_TIMER_VALUE_0_AND_1); 2827 timer_value_2_and_3 = t4_read_reg(adap, SGE_TIMER_VALUE_2_AND_3); 2828 timer_value_4_and_5 = t4_read_reg(adap, SGE_TIMER_VALUE_4_AND_5); 2829 s->timer_val[0] = core_ticks_to_us(adap, 2830 TIMERVALUE0_GET(timer_value_0_and_1)); 2831 s->timer_val[1] = core_ticks_to_us(adap, 2832 TIMERVALUE1_GET(timer_value_0_and_1)); 2833 s->timer_val[2] = core_ticks_to_us(adap, 2834 TIMERVALUE2_GET(timer_value_2_and_3)); 2835 s->timer_val[3] = core_ticks_to_us(adap, 2836 TIMERVALUE3_GET(timer_value_2_and_3)); 2837 s->timer_val[4] = core_ticks_to_us(adap, 2838 TIMERVALUE4_GET(timer_value_4_and_5)); 2839 s->timer_val[5] = core_ticks_to_us(adap, 2840 TIMERVALUE5_GET(timer_value_4_and_5)); 2841 2842 ingress_rx_threshold = t4_read_reg(adap, SGE_INGRESS_RX_THRESHOLD); 2843 s->counter_val[0] = THRESHOLD_0_GET(ingress_rx_threshold); 2844 s->counter_val[1] = THRESHOLD_1_GET(ingress_rx_threshold); 2845 s->counter_val[2] = THRESHOLD_2_GET(ingress_rx_threshold); 2846 s->counter_val[3] = THRESHOLD_3_GET(ingress_rx_threshold); 2847 2848 return 0; 2849 } 2850 2851 static int t4_sge_init_hard(struct adapter *adap) 2852 { 2853 struct sge *s = &adap->sge; 2854 2855 /* 2856 * Set up our basic SGE mode to deliver CPL messages to our Ingress 2857 * Queue and Packet Date to the Free List. 2858 */ 2859 t4_set_reg_field(adap, SGE_CONTROL, RXPKTCPLMODE_MASK, 2860 RXPKTCPLMODE_MASK); 2861 2862 /* 2863 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows 2864 * and generate an interrupt when this occurs so we can recover. 2865 */ 2866 if (is_t4(adap->params.chip)) { 2867 t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, 2868 V_HP_INT_THRESH(M_HP_INT_THRESH) | 2869 V_LP_INT_THRESH(M_LP_INT_THRESH), 2870 V_HP_INT_THRESH(dbfifo_int_thresh) | 2871 V_LP_INT_THRESH(dbfifo_int_thresh)); 2872 } else { 2873 t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, 2874 V_LP_INT_THRESH_T5(M_LP_INT_THRESH_T5), 2875 V_LP_INT_THRESH_T5(dbfifo_int_thresh)); 2876 t4_set_reg_field(adap, SGE_DBFIFO_STATUS2, 2877 V_HP_INT_THRESH_T5(M_HP_INT_THRESH_T5), 2878 V_HP_INT_THRESH_T5(dbfifo_int_thresh)); 2879 } 2880 t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP, 2881 F_ENABLE_DROP); 2882 2883 /* 2884 * SGE_FL_BUFFER_SIZE0 (RX_SMALL_PG_BUF) is set up by 2885 * t4_fixup_host_params(). 2886 */ 2887 s->fl_pg_order = FL_PG_ORDER; 2888 if (s->fl_pg_order) 2889 t4_write_reg(adap, 2890 SGE_FL_BUFFER_SIZE0+RX_LARGE_PG_BUF*sizeof(u32), 2891 PAGE_SIZE << FL_PG_ORDER); 2892 t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_SMALL_MTU_BUF*sizeof(u32), 2893 FL_MTU_SMALL_BUFSIZE(adap)); 2894 t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_LARGE_MTU_BUF*sizeof(u32), 2895 FL_MTU_LARGE_BUFSIZE(adap)); 2896 2897 /* 2898 * Note that the SGE Ingress Packet Count Interrupt Threshold and 2899 * Timer Holdoff values must be supplied by our caller. 2900 */ 2901 t4_write_reg(adap, SGE_INGRESS_RX_THRESHOLD, 2902 THRESHOLD_0(s->counter_val[0]) | 2903 THRESHOLD_1(s->counter_val[1]) | 2904 THRESHOLD_2(s->counter_val[2]) | 2905 THRESHOLD_3(s->counter_val[3])); 2906 t4_write_reg(adap, SGE_TIMER_VALUE_0_AND_1, 2907 TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[0])) | 2908 TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[1]))); 2909 t4_write_reg(adap, SGE_TIMER_VALUE_2_AND_3, 2910 TIMERVALUE2(us_to_core_ticks(adap, s->timer_val[2])) | 2911 TIMERVALUE3(us_to_core_ticks(adap, s->timer_val[3]))); 2912 t4_write_reg(adap, SGE_TIMER_VALUE_4_AND_5, 2913 TIMERVALUE4(us_to_core_ticks(adap, s->timer_val[4])) | 2914 TIMERVALUE5(us_to_core_ticks(adap, s->timer_val[5]))); 2915 2916 return 0; 2917 } 2918 2919 int t4_sge_init(struct adapter *adap) 2920 { 2921 struct sge *s = &adap->sge; 2922 u32 sge_control, sge_control2, sge_conm_ctrl; 2923 unsigned int ingpadboundary, ingpackboundary; 2924 int ret, egress_threshold; 2925 2926 /* 2927 * Ingress Padding Boundary and Egress Status Page Size are set up by 2928 * t4_fixup_host_params(). 2929 */ 2930 sge_control = t4_read_reg(adap, SGE_CONTROL); 2931 s->pktshift = PKTSHIFT_GET(sge_control); 2932 s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64; 2933 2934 /* T4 uses a single control field to specify both the PCIe Padding and 2935 * Packing Boundary. T5 introduced the ability to specify these 2936 * separately. The actual Ingress Packet Data alignment boundary 2937 * within Packed Buffer Mode is the maximum of these two 2938 * specifications. 2939 */ 2940 ingpadboundary = 1 << (INGPADBOUNDARY_GET(sge_control) + 2941 X_INGPADBOUNDARY_SHIFT); 2942 if (is_t4(adap->params.chip)) { 2943 s->fl_align = ingpadboundary; 2944 } else { 2945 /* T5 has a different interpretation of one of the PCIe Packing 2946 * Boundary values. 2947 */ 2948 sge_control2 = t4_read_reg(adap, SGE_CONTROL2_A); 2949 ingpackboundary = INGPACKBOUNDARY_G(sge_control2); 2950 if (ingpackboundary == INGPACKBOUNDARY_16B_X) 2951 ingpackboundary = 16; 2952 else 2953 ingpackboundary = 1 << (ingpackboundary + 2954 INGPACKBOUNDARY_SHIFT_X); 2955 2956 s->fl_align = max(ingpadboundary, ingpackboundary); 2957 } 2958 2959 if (adap->flags & USING_SOFT_PARAMS) 2960 ret = t4_sge_init_soft(adap); 2961 else 2962 ret = t4_sge_init_hard(adap); 2963 if (ret < 0) 2964 return ret; 2965 2966 /* 2967 * A FL with <= fl_starve_thres buffers is starving and a periodic 2968 * timer will attempt to refill it. This needs to be larger than the 2969 * SGE's Egress Congestion Threshold. If it isn't, then we can get 2970 * stuck waiting for new packets while the SGE is waiting for us to 2971 * give it more Free List entries. (Note that the SGE's Egress 2972 * Congestion Threshold is in units of 2 Free List pointers.) For T4, 2973 * there was only a single field to control this. For T5 there's the 2974 * original field which now only applies to Unpacked Mode Free List 2975 * buffers and a new field which only applies to Packed Mode Free List 2976 * buffers. 2977 */ 2978 sge_conm_ctrl = t4_read_reg(adap, SGE_CONM_CTRL); 2979 if (is_t4(adap->params.chip)) 2980 egress_threshold = EGRTHRESHOLD_GET(sge_conm_ctrl); 2981 else 2982 egress_threshold = EGRTHRESHOLDPACKING_GET(sge_conm_ctrl); 2983 s->fl_starve_thres = 2*egress_threshold + 1; 2984 2985 setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); 2986 setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); 2987 s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */ 2988 s->idma_stalled[0] = 0; 2989 s->idma_stalled[1] = 0; 2990 spin_lock_init(&s->intrq_lock); 2991 2992 return 0; 2993 } 2994