1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) 2 3 #include <linux/bpf_trace.h> 4 #include <linux/dma-mapping.h> 5 #include <linux/etherdevice.h> 6 #include <linux/filter.h> 7 #include <linux/irq.h> 8 #include <linux/pci.h> 9 #include <linux/skbuff.h> 10 #include "funeth_txrx.h" 11 #include "funeth.h" 12 #include "fun_queue.h" 13 14 #define CREATE_TRACE_POINTS 15 #include "funeth_trace.h" 16 17 /* Given the device's max supported MTU and pages of at least 4KB a packet can 18 * be scattered into at most 4 buffers. 19 */ 20 #define RX_MAX_FRAGS 4 21 22 /* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */ 23 #define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) 24 25 /* We try to reuse pages for our buffers. To avoid frequent page ref writes we 26 * take EXTRA_PAGE_REFS references at once and then hand them out one per packet 27 * occupying the buffer. 28 */ 29 #define EXTRA_PAGE_REFS 1000000 30 #define MIN_PAGE_REFS 1000 31 32 enum { 33 FUN_XDP_FLUSH_REDIR = 1, 34 FUN_XDP_FLUSH_TX = 2, 35 }; 36 37 /* See if a page is running low on refs we are holding and if so take more. */ 38 static void refresh_refs(struct funeth_rxbuf *buf) 39 { 40 if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) { 41 buf->pg_refs += EXTRA_PAGE_REFS; 42 page_ref_add(buf->page, EXTRA_PAGE_REFS); 43 } 44 } 45 46 /* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its 47 * page is worth retaining and there's room for it. Otherwise the page is 48 * unmapped and our references released. 49 */ 50 static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf) 51 { 52 struct funeth_rx_cache *c = &q->cache; 53 54 if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) { 55 c->bufs[c->prod_cnt & c->mask] = *buf; 56 c->prod_cnt++; 57 } else { 58 dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, 59 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 60 __page_frag_cache_drain(buf->page, buf->pg_refs); 61 } 62 } 63 64 /* Get a page from the Rx buffer cache. We only consider the next available 65 * page and return it if we own all its references. 66 */ 67 static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb) 68 { 69 struct funeth_rx_cache *c = &q->cache; 70 struct funeth_rxbuf *buf; 71 72 if (c->prod_cnt == c->cons_cnt) 73 return false; /* empty cache */ 74 75 buf = &c->bufs[c->cons_cnt & c->mask]; 76 if (page_ref_count(buf->page) == buf->pg_refs) { 77 dma_sync_single_for_device(q->dma_dev, buf->dma_addr, 78 PAGE_SIZE, DMA_FROM_DEVICE); 79 *rb = *buf; 80 buf->page = NULL; 81 refresh_refs(rb); 82 c->cons_cnt++; 83 return true; 84 } 85 86 /* Page can't be reused. If the cache is full drop this page. */ 87 if (c->prod_cnt - c->cons_cnt > c->mask) { 88 dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, 89 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 90 __page_frag_cache_drain(buf->page, buf->pg_refs); 91 buf->page = NULL; 92 c->cons_cnt++; 93 } 94 return false; 95 } 96 97 /* Allocate and DMA-map a page for receive. */ 98 static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb, 99 int node, gfp_t gfp) 100 { 101 struct page *p; 102 103 if (cache_get(q, rb)) 104 return 0; 105 106 p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0); 107 if (unlikely(!p)) 108 return -ENOMEM; 109 110 rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE, 111 DMA_FROM_DEVICE); 112 if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) { 113 FUN_QSTAT_INC(q, rx_map_err); 114 __free_page(p); 115 return -ENOMEM; 116 } 117 118 FUN_QSTAT_INC(q, rx_page_alloc); 119 120 rb->page = p; 121 rb->pg_refs = 1; 122 refresh_refs(rb); 123 rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p); 124 return 0; 125 } 126 127 static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb) 128 { 129 if (rb->page) { 130 dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE, 131 DMA_FROM_DEVICE); 132 __page_frag_cache_drain(rb->page, rb->pg_refs); 133 rb->page = NULL; 134 } 135 } 136 137 /* Run the XDP program assigned to an Rx queue. 138 * Return %NULL if the buffer is consumed, or the virtual address of the packet 139 * to turn into an skb. 140 */ 141 static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, 142 int ref_ok, struct funeth_txq *xdp_q) 143 { 144 struct bpf_prog *xdp_prog; 145 struct xdp_frame *xdpf; 146 struct xdp_buff xdp; 147 u32 act; 148 149 /* VA includes the headroom, frag size includes headroom + tailroom */ 150 xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN), 151 &q->xdp_rxq); 152 xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) - 153 (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false); 154 155 xdp_prog = READ_ONCE(q->xdp_prog); 156 act = bpf_prog_run_xdp(xdp_prog, &xdp); 157 158 switch (act) { 159 case XDP_PASS: 160 /* remove headroom, which may not be FUN_XDP_HEADROOM now */ 161 skb_frag_size_set(frags, xdp.data_end - xdp.data); 162 skb_frag_off_add(frags, xdp.data - xdp.data_hard_start); 163 goto pass; 164 case XDP_TX: 165 if (unlikely(!ref_ok)) 166 goto pass; 167 168 xdpf = xdp_convert_buff_to_frame(&xdp); 169 if (!xdpf || !fun_xdp_tx(xdp_q, xdpf)) 170 goto xdp_error; 171 FUN_QSTAT_INC(q, xdp_tx); 172 q->xdp_flush |= FUN_XDP_FLUSH_TX; 173 break; 174 case XDP_REDIRECT: 175 if (unlikely(!ref_ok)) 176 goto pass; 177 if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog))) 178 goto xdp_error; 179 FUN_QSTAT_INC(q, xdp_redir); 180 q->xdp_flush |= FUN_XDP_FLUSH_REDIR; 181 break; 182 default: 183 bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act); 184 fallthrough; 185 case XDP_ABORTED: 186 trace_xdp_exception(q->netdev, xdp_prog, act); 187 xdp_error: 188 q->cur_buf->pg_refs++; /* return frags' page reference */ 189 FUN_QSTAT_INC(q, xdp_err); 190 break; 191 case XDP_DROP: 192 q->cur_buf->pg_refs++; 193 FUN_QSTAT_INC(q, xdp_drops); 194 break; 195 } 196 return NULL; 197 198 pass: 199 return xdp.data; 200 } 201 202 /* A CQE contains a fixed completion structure along with optional metadata and 203 * even packet data. Given the start address of a CQE return the start of the 204 * contained fixed structure, which lies at the end. 205 */ 206 static const void *cqe_to_info(const void *cqe) 207 { 208 return cqe + FUNETH_CQE_INFO_OFFSET; 209 } 210 211 /* The inverse of cqe_to_info(). */ 212 static const void *info_to_cqe(const void *cqe_info) 213 { 214 return cqe_info - FUNETH_CQE_INFO_OFFSET; 215 } 216 217 /* Return the type of hash provided by the device based on the L3 and L4 218 * protocols it parsed for the packet. 219 */ 220 static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse) 221 { 222 static const enum pkt_hash_types htype_map[] = { 223 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, 224 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4, 225 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, 226 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3 227 }; 228 u16 key; 229 230 /* Build the key from the TCP/UDP and IP/IPv6 bits */ 231 key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) | 232 ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1); 233 234 return htype_map[key]; 235 } 236 237 /* Each received packet can be scattered across several Rx buffers or can 238 * share a buffer with previously received packets depending on the buffer 239 * and packet sizes and the room available in the most recently used buffer. 240 * 241 * The rules are: 242 * - If the buffer at the head of an RQ has not been used it gets (part of) the 243 * next incoming packet. 244 * - Otherwise, if the packet fully fits in the buffer's remaining space the 245 * packet is written there. 246 * - Otherwise, the packet goes into the next Rx buffer. 247 * 248 * This function returns the Rx buffer for a packet or fragment thereof of the 249 * given length. If it isn't @buf it either recycles or frees that buffer 250 * before advancing the queue to the next buffer. 251 * 252 * If called repeatedly with the remaining length of a packet it will walk 253 * through all the buffers containing the packet. 254 */ 255 static struct funeth_rxbuf * 256 get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len) 257 { 258 if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset) 259 return buf; /* @buf holds (part of) the packet */ 260 261 /* The packet occupies part of the next buffer. Move there after 262 * replenishing the current buffer slot either with the spare page or 263 * by reusing the slot's existing page. Note that if a spare page isn't 264 * available and the current packet occupies @buf it is a multi-frag 265 * packet that will be dropped leaving @buf available for reuse. 266 */ 267 if ((page_ref_count(buf->page) == buf->pg_refs && 268 buf->node == numa_mem_id()) || !q->spare_buf.page) { 269 dma_sync_single_for_device(q->dma_dev, buf->dma_addr, 270 PAGE_SIZE, DMA_FROM_DEVICE); 271 refresh_refs(buf); 272 } else { 273 cache_offer(q, buf); 274 *buf = q->spare_buf; 275 q->spare_buf.page = NULL; 276 q->rqes[q->rq_cons & q->rq_mask] = 277 FUN_EPRQ_RQBUF_INIT(buf->dma_addr); 278 } 279 q->buf_offset = 0; 280 q->rq_cons++; 281 return &q->bufs[q->rq_cons & q->rq_mask]; 282 } 283 284 /* Gather the page fragments making up the first Rx packet on @q. Its total 285 * length @tot_len includes optional head- and tail-rooms. 286 * 287 * Return 0 if the device retains ownership of at least some of the pages. 288 * In this case the caller may only copy the packet. 289 * 290 * A non-zero return value gives the caller permission to use references to the 291 * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least 292 * one of the pages is PF_MEMALLOC. 293 * 294 * Regardless of outcome the caller is granted a reference to each of the pages. 295 */ 296 static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, 297 skb_frag_t *frags) 298 { 299 struct funeth_rxbuf *buf = q->cur_buf; 300 unsigned int frag_len; 301 int ref_ok = 1; 302 303 for (;;) { 304 buf = get_buf(q, buf, tot_len); 305 306 /* We always keep the RQ full of buffers so before we can give 307 * one of our pages to the stack we require that we can obtain 308 * a replacement page. If we can't the packet will either be 309 * copied or dropped so we can retain ownership of the page and 310 * reuse it. 311 */ 312 if (!q->spare_buf.page && 313 funeth_alloc_page(q, &q->spare_buf, numa_mem_id(), 314 GFP_ATOMIC | __GFP_MEMALLOC)) 315 ref_ok = 0; 316 317 frag_len = min_t(unsigned int, tot_len, 318 PAGE_SIZE - q->buf_offset); 319 dma_sync_single_for_cpu(q->dma_dev, 320 buf->dma_addr + q->buf_offset, 321 frag_len, DMA_FROM_DEVICE); 322 buf->pg_refs--; 323 if (ref_ok) 324 ref_ok |= buf->node; 325 326 __skb_frag_set_page(frags, buf->page); 327 skb_frag_off_set(frags, q->buf_offset); 328 skb_frag_size_set(frags++, frag_len); 329 330 tot_len -= frag_len; 331 if (!tot_len) 332 break; 333 334 q->buf_offset = PAGE_SIZE; 335 } 336 q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN); 337 q->cur_buf = buf; 338 return ref_ok; 339 } 340 341 static bool rx_hwtstamp_enabled(const struct net_device *dev) 342 { 343 const struct funeth_priv *d = netdev_priv(dev); 344 345 return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL; 346 } 347 348 /* Advance the CQ pointers and phase tag to the next CQE. */ 349 static void advance_cq(struct funeth_rxq *q) 350 { 351 if (unlikely(q->cq_head == q->cq_mask)) { 352 q->cq_head = 0; 353 q->phase ^= 1; 354 q->next_cqe_info = cqe_to_info(q->cqes); 355 } else { 356 q->cq_head++; 357 q->next_cqe_info += FUNETH_CQE_SIZE; 358 } 359 prefetch(q->next_cqe_info); 360 } 361 362 /* Process the packet represented by the head CQE of @q. Gather the packet's 363 * fragments, run it through the optional XDP program, and if needed construct 364 * an skb and pass it to the stack. 365 */ 366 static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q) 367 { 368 const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info); 369 unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len); 370 struct net_device *ndev = q->netdev; 371 skb_frag_t frags[RX_MAX_FRAGS]; 372 struct skb_shared_info *si; 373 unsigned int headroom; 374 gro_result_t gro_res; 375 struct sk_buff *skb; 376 int ref_ok; 377 void *va; 378 u16 cv; 379 380 u64_stats_update_begin(&q->syncp); 381 q->stats.rx_pkts++; 382 q->stats.rx_bytes += pkt_len; 383 u64_stats_update_end(&q->syncp); 384 385 advance_cq(q); 386 387 /* account for head- and tail-room, present only for 1-buffer packets */ 388 tot_len = pkt_len; 389 headroom = be16_to_cpu(rxreq->headroom); 390 if (likely(headroom)) 391 tot_len += FUN_RX_TAILROOM + headroom; 392 393 ref_ok = fun_gather_pkt(q, tot_len, frags); 394 va = skb_frag_address(frags); 395 if (xdp_q && headroom == FUN_XDP_HEADROOM) { 396 va = fun_run_xdp(q, frags, va, ref_ok, xdp_q); 397 if (!va) 398 return; 399 headroom = 0; /* XDP_PASS trims it */ 400 } 401 if (unlikely(!ref_ok)) 402 goto no_mem; 403 404 if (likely(headroom)) { 405 /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */ 406 prefetch(va + headroom); 407 skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN)); 408 if (unlikely(!skb)) 409 goto no_mem; 410 411 skb_reserve(skb, headroom); 412 __skb_put(skb, pkt_len); 413 skb->protocol = eth_type_trans(skb, ndev); 414 } else { 415 prefetch(va); 416 skb = napi_get_frags(q->napi); 417 if (unlikely(!skb)) 418 goto no_mem; 419 420 if (ref_ok < 0) 421 skb->pfmemalloc = 1; 422 423 si = skb_shinfo(skb); 424 si->nr_frags = rxreq->nsgl; 425 for (i = 0; i < si->nr_frags; i++) 426 si->frags[i] = frags[i]; 427 428 skb->len = pkt_len; 429 skb->data_len = pkt_len; 430 skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN); 431 } 432 433 skb_record_rx_queue(skb, q->qidx); 434 cv = be16_to_cpu(rxreq->pkt_cv); 435 if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash)) 436 skb_set_hash(skb, be32_to_cpu(rxreq->hash), 437 cqe_to_pkt_hash_type(cv)); 438 if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) { 439 FUN_QSTAT_INC(q, rx_cso); 440 skb->ip_summed = CHECKSUM_UNNECESSARY; 441 skb->csum_level = be16_to_cpu(rxreq->csum) - 1; 442 } 443 if (unlikely(rx_hwtstamp_enabled(q->netdev))) 444 skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp); 445 446 trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv); 447 448 gro_res = skb->data_len ? napi_gro_frags(q->napi) : 449 napi_gro_receive(q->napi, skb); 450 if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE) 451 FUN_QSTAT_INC(q, gro_merged); 452 else if (gro_res == GRO_HELD) 453 FUN_QSTAT_INC(q, gro_pkts); 454 return; 455 456 no_mem: 457 FUN_QSTAT_INC(q, rx_mem_drops); 458 459 /* Release the references we've been granted for the frag pages. 460 * We return the ref of the last frag and free the rest. 461 */ 462 q->cur_buf->pg_refs++; 463 for (i = 0; i < rxreq->nsgl - 1; i++) 464 __free_page(skb_frag_page(frags + i)); 465 } 466 467 /* Return 0 if the phase tag of the CQE at the CQ's head matches expectations 468 * indicating the CQE is new. 469 */ 470 static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase) 471 { 472 u16 sf_p = be16_to_cpu(ci->sf_p); 473 474 return (sf_p & 1) ^ phase; 475 } 476 477 /* Walk through a CQ identifying and processing fresh CQEs up to the given 478 * budget. Return the remaining budget. 479 */ 480 static int fun_process_cqes(struct funeth_rxq *q, int budget) 481 { 482 struct funeth_priv *fp = netdev_priv(q->netdev); 483 struct funeth_txq **xdpqs, *xdp_q = NULL; 484 485 xdpqs = rcu_dereference_bh(fp->xdpqs); 486 if (xdpqs) 487 xdp_q = xdpqs[smp_processor_id()]; 488 489 while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) { 490 /* access other descriptor fields after the phase check */ 491 dma_rmb(); 492 493 fun_handle_cqe_pkt(q, xdp_q); 494 budget--; 495 } 496 497 if (unlikely(q->xdp_flush)) { 498 if (q->xdp_flush & FUN_XDP_FLUSH_TX) 499 fun_txq_wr_db(xdp_q); 500 if (q->xdp_flush & FUN_XDP_FLUSH_REDIR) 501 xdp_do_flush(); 502 q->xdp_flush = 0; 503 } 504 505 return budget; 506 } 507 508 /* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ 509 * doorbells as needed. 510 */ 511 int fun_rxq_napi_poll(struct napi_struct *napi, int budget) 512 { 513 struct fun_irq *irq = container_of(napi, struct fun_irq, napi); 514 struct funeth_rxq *q = irq->rxq; 515 int work_done = budget - fun_process_cqes(q, budget); 516 u32 cq_db_val = q->cq_head; 517 518 if (unlikely(work_done >= budget)) 519 FUN_QSTAT_INC(q, rx_budget); 520 else if (napi_complete_done(napi, work_done)) 521 cq_db_val |= q->irq_db_val; 522 523 /* check whether to post new Rx buffers */ 524 if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) { 525 u64_stats_update_begin(&q->syncp); 526 q->stats.rx_bufs += q->rq_cons - q->rq_cons_db; 527 u64_stats_update_end(&q->syncp); 528 q->rq_cons_db = q->rq_cons; 529 writel((q->rq_cons - 1) & q->rq_mask, q->rq_db); 530 } 531 532 writel(cq_db_val, q->cq_db); 533 return work_done; 534 } 535 536 /* Free the Rx buffers of an Rx queue. */ 537 static void fun_rxq_free_bufs(struct funeth_rxq *q) 538 { 539 struct funeth_rxbuf *b = q->bufs; 540 unsigned int i; 541 542 for (i = 0; i <= q->rq_mask; i++, b++) 543 funeth_free_page(q, b); 544 545 funeth_free_page(q, &q->spare_buf); 546 q->cur_buf = NULL; 547 } 548 549 /* Initially provision an Rx queue with Rx buffers. */ 550 static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node) 551 { 552 struct funeth_rxbuf *b = q->bufs; 553 unsigned int i; 554 555 for (i = 0; i <= q->rq_mask; i++, b++) { 556 if (funeth_alloc_page(q, b, node, GFP_KERNEL)) { 557 fun_rxq_free_bufs(q); 558 return -ENOMEM; 559 } 560 q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr); 561 } 562 q->cur_buf = q->bufs; 563 return 0; 564 } 565 566 /* Initialize a used-buffer cache of the given depth. */ 567 static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth, 568 int node) 569 { 570 c->mask = depth - 1; 571 c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node); 572 return c->bufs ? 0 : -ENOMEM; 573 } 574 575 /* Deallocate an Rx queue's used-buffer cache and its contents. */ 576 static void fun_rxq_free_cache(struct funeth_rxq *q) 577 { 578 struct funeth_rxbuf *b = q->cache.bufs; 579 unsigned int i; 580 581 for (i = 0; i <= q->cache.mask; i++, b++) 582 funeth_free_page(q, b); 583 584 kvfree(q->cache.bufs); 585 q->cache.bufs = NULL; 586 } 587 588 int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog) 589 { 590 struct funeth_priv *fp = netdev_priv(q->netdev); 591 struct fun_admin_epcq_req cmd; 592 u16 headroom; 593 int err; 594 595 headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; 596 if (headroom != q->headroom) { 597 cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, 598 sizeof(cmd)); 599 cmd.u.modify = 600 FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY, 601 0, q->hw_cqid, headroom); 602 err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0, 603 0); 604 if (err) 605 return err; 606 q->headroom = headroom; 607 } 608 609 WRITE_ONCE(q->xdp_prog, prog); 610 return 0; 611 } 612 613 /* Create an Rx queue, allocating the host memory it needs. */ 614 static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev, 615 unsigned int qidx, 616 unsigned int ncqe, 617 unsigned int nrqe, 618 struct fun_irq *irq) 619 { 620 struct funeth_priv *fp = netdev_priv(dev); 621 struct funeth_rxq *q; 622 int err = -ENOMEM; 623 int numa_node; 624 625 numa_node = fun_irq_node(irq); 626 q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); 627 if (!q) 628 goto err; 629 630 q->qidx = qidx; 631 q->netdev = dev; 632 q->cq_mask = ncqe - 1; 633 q->rq_mask = nrqe - 1; 634 q->numa_node = numa_node; 635 q->rq_db_thres = nrqe / 4; 636 u64_stats_init(&q->syncp); 637 q->dma_dev = &fp->pdev->dev; 638 639 q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), 640 sizeof(*q->bufs), false, numa_node, 641 &q->rq_dma_addr, (void **)&q->bufs, NULL); 642 if (!q->rqes) 643 goto free_q; 644 645 q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0, 646 false, numa_node, &q->cq_dma_addr, NULL, 647 NULL); 648 if (!q->cqes) 649 goto free_rqes; 650 651 err = fun_rxq_init_cache(&q->cache, nrqe, numa_node); 652 if (err) 653 goto free_cqes; 654 655 err = fun_rxq_alloc_bufs(q, numa_node); 656 if (err) 657 goto free_cache; 658 659 q->stats.rx_bufs = q->rq_mask; 660 q->init_state = FUN_QSTATE_INIT_SW; 661 return q; 662 663 free_cache: 664 fun_rxq_free_cache(q); 665 free_cqes: 666 dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes, 667 q->cq_dma_addr); 668 free_rqes: 669 fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes, 670 q->rq_dma_addr, q->bufs); 671 free_q: 672 kfree(q); 673 err: 674 netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx); 675 return ERR_PTR(err); 676 } 677 678 static void fun_rxq_free_sw(struct funeth_rxq *q) 679 { 680 struct funeth_priv *fp = netdev_priv(q->netdev); 681 682 fun_rxq_free_cache(q); 683 fun_rxq_free_bufs(q); 684 fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false, 685 q->rqes, q->rq_dma_addr, q->bufs); 686 dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE, 687 q->cqes, q->cq_dma_addr); 688 689 /* Before freeing the queue transfer key counters to the device. */ 690 fp->rx_packets += q->stats.rx_pkts; 691 fp->rx_bytes += q->stats.rx_bytes; 692 fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops; 693 694 kfree(q); 695 } 696 697 /* Create an Rx queue's resources on the device. */ 698 int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq) 699 { 700 struct funeth_priv *fp = netdev_priv(q->netdev); 701 unsigned int ncqe = q->cq_mask + 1; 702 unsigned int nrqe = q->rq_mask + 1; 703 int err; 704 705 err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx, 706 irq->napi.napi_id); 707 if (err) 708 goto out; 709 710 err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED, 711 NULL); 712 if (err) 713 goto xdp_unreg; 714 715 q->phase = 1; 716 q->irq_cnt = 0; 717 q->cq_head = 0; 718 q->rq_cons = 0; 719 q->rq_cons_db = 0; 720 q->buf_offset = 0; 721 q->napi = &irq->napi; 722 q->irq_db_val = fp->cq_irq_db; 723 q->next_cqe_info = cqe_to_info(q->cqes); 724 725 q->xdp_prog = fp->xdp_prog; 726 q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; 727 728 err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | 729 FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0, 730 FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0, 731 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT, 732 &q->hw_sqid, &q->rq_db); 733 if (err) 734 goto xdp_unreg; 735 736 err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | 737 FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0, 738 q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe, 739 q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0, 740 irq->irq_idx, 0, fp->fdev->kern_end_qid, 741 &q->hw_cqid, &q->cq_db); 742 if (err) 743 goto free_rq; 744 745 irq->rxq = q; 746 writel(q->rq_mask, q->rq_db); 747 q->init_state = FUN_QSTATE_INIT_FULL; 748 749 netif_info(fp, ifup, q->netdev, 750 "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n", 751 q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx, 752 q->numa_node, q->headroom); 753 return 0; 754 755 free_rq: 756 fun_destroy_sq(fp->fdev, q->hw_sqid); 757 xdp_unreg: 758 xdp_rxq_info_unreg(&q->xdp_rxq); 759 out: 760 netdev_err(q->netdev, 761 "Failed to create Rx queue %u on device, error %d\n", 762 q->qidx, err); 763 return err; 764 } 765 766 static void fun_rxq_free_dev(struct funeth_rxq *q) 767 { 768 struct funeth_priv *fp = netdev_priv(q->netdev); 769 struct fun_irq *irq; 770 771 if (q->init_state < FUN_QSTATE_INIT_FULL) 772 return; 773 774 irq = container_of(q->napi, struct fun_irq, napi); 775 netif_info(fp, ifdown, q->netdev, 776 "Freeing Rx queue %u (id %u/%u), IRQ %u\n", 777 q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx); 778 779 irq->rxq = NULL; 780 xdp_rxq_info_unreg(&q->xdp_rxq); 781 fun_destroy_sq(fp->fdev, q->hw_sqid); 782 fun_destroy_cq(fp->fdev, q->hw_cqid); 783 q->init_state = FUN_QSTATE_INIT_SW; 784 } 785 786 /* Create or advance an Rx queue, allocating all the host and device resources 787 * needed to reach the target state. 788 */ 789 int funeth_rxq_create(struct net_device *dev, unsigned int qidx, 790 unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, 791 int state, struct funeth_rxq **qp) 792 { 793 struct funeth_rxq *q = *qp; 794 int err; 795 796 if (!q) { 797 q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq); 798 if (IS_ERR(q)) 799 return PTR_ERR(q); 800 } 801 802 if (q->init_state >= state) 803 goto out; 804 805 err = fun_rxq_create_dev(q, irq); 806 if (err) { 807 if (!*qp) 808 fun_rxq_free_sw(q); 809 return err; 810 } 811 812 out: 813 *qp = q; 814 return 0; 815 } 816 817 /* Free Rx queue resources until it reaches the target state. */ 818 struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state) 819 { 820 if (state < FUN_QSTATE_INIT_FULL) 821 fun_rxq_free_dev(q); 822 823 if (state == FUN_QSTATE_DESTROYED) { 824 fun_rxq_free_sw(q); 825 q = NULL; 826 } 827 828 return q; 829 } 830