1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ 3 4 #include <linux/bpf_trace.h> 5 #include <linux/netdevice.h> 6 #include <linux/overflow.h> 7 #include <linux/sizes.h> 8 #include <linux/bitfield.h> 9 10 #include "../nfp_app.h" 11 #include "../nfp_net.h" 12 #include "../nfp_net_dp.h" 13 #include "../crypto/crypto.h" 14 #include "../crypto/fw.h" 15 #include "nfdk.h" 16 17 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 18 { 19 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); 20 } 21 22 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 23 { 24 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); 25 } 26 27 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, 28 struct nfp_net_tx_ring *tx_ring) 29 { 30 netif_tx_stop_queue(nd_q); 31 32 /* We can race with the TX completion out of NAPI so recheck */ 33 smp_mb(); 34 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) 35 netif_tx_start_queue(nd_q); 36 } 37 38 static __le64 39 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, 40 struct sk_buff *skb) 41 { 42 u32 segs, hdrlen, l3_offset, l4_offset; 43 struct nfp_nfdk_tx_desc txd; 44 u16 mss; 45 46 if (!skb->encapsulation) { 47 l3_offset = skb_network_offset(skb); 48 l4_offset = skb_transport_offset(skb); 49 hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); 50 } else { 51 l3_offset = skb_inner_network_offset(skb); 52 l4_offset = skb_inner_transport_offset(skb); 53 hdrlen = skb_inner_transport_header(skb) - skb->data + 54 inner_tcp_hdrlen(skb); 55 } 56 57 segs = skb_shinfo(skb)->gso_segs; 58 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; 59 60 /* Note: TSO of the packet with metadata prepended to skb is not 61 * supported yet, in which case l3/l4_offset and lso_hdrlen need 62 * be correctly handled here. 63 * Concern: 64 * The driver doesn't have md_bytes easily available at this point. 65 * The PCI.IN PD ME won't have md_bytes bytes to add to lso_hdrlen, 66 * so it needs the full length there. The app MEs might prefer 67 * l3_offset and l4_offset relative to the start of packet data, 68 * but could probably cope with it being relative to the CTM buf 69 * data offset. 70 */ 71 txd.l3_offset = l3_offset; 72 txd.l4_offset = l4_offset; 73 txd.lso_meta_res = 0; 74 txd.mss = cpu_to_le16(mss); 75 txd.lso_hdrlen = hdrlen; 76 txd.lso_totsegs = segs; 77 78 txbuf->pkt_cnt = segs; 79 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); 80 81 u64_stats_update_begin(&r_vec->tx_sync); 82 r_vec->tx_lso++; 83 u64_stats_update_end(&r_vec->tx_sync); 84 85 return txd.raw; 86 } 87 88 static u8 89 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 90 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) 91 { 92 struct ipv6hdr *ipv6h; 93 struct iphdr *iph; 94 95 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 96 return flags; 97 98 if (skb->ip_summed != CHECKSUM_PARTIAL) 99 return flags; 100 101 flags |= NFDK_DESC_TX_L4_CSUM; 102 103 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 104 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 105 106 /* L3 checksum offloading flag is not required for ipv6 */ 107 if (iph->version == 4) { 108 flags |= NFDK_DESC_TX_L3_CSUM; 109 } else if (ipv6h->version != 6) { 110 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); 111 return flags; 112 } 113 114 u64_stats_update_begin(&r_vec->tx_sync); 115 if (!skb->encapsulation) { 116 r_vec->hw_csum_tx += pkt_cnt; 117 } else { 118 flags |= NFDK_DESC_TX_ENCAP; 119 r_vec->hw_csum_tx_inner += pkt_cnt; 120 } 121 u64_stats_update_end(&r_vec->tx_sync); 122 123 return flags; 124 } 125 126 static int 127 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, 128 unsigned int nr_frags, struct sk_buff *skb) 129 { 130 unsigned int n_descs, wr_p, nop_slots; 131 const skb_frag_t *frag, *fend; 132 struct nfp_nfdk_tx_desc *txd; 133 unsigned int wr_idx; 134 int err; 135 136 recount_descs: 137 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); 138 139 frag = skb_shinfo(skb)->frags; 140 fend = frag + nr_frags; 141 for (; frag < fend; frag++) 142 n_descs += DIV_ROUND_UP(skb_frag_size(frag), 143 NFDK_TX_MAX_DATA_PER_DESC); 144 145 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { 146 if (skb_is_nonlinear(skb)) { 147 err = skb_linearize(skb); 148 if (err) 149 return err; 150 goto recount_descs; 151 } 152 return -EINVAL; 153 } 154 155 /* Under count by 1 (don't count meta) for the round down to work out */ 156 n_descs += !!skb_is_gso(skb); 157 158 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 159 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 160 goto close_block; 161 162 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) 163 goto close_block; 164 165 return 0; 166 167 close_block: 168 wr_p = tx_ring->wr_p; 169 nop_slots = D_BLOCK_CPL(wr_p); 170 171 wr_idx = D_IDX(tx_ring, wr_p); 172 tx_ring->ktxbufs[wr_idx].skb = NULL; 173 txd = &tx_ring->ktxds[wr_idx]; 174 175 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 176 177 tx_ring->data_pending = 0; 178 tx_ring->wr_p += nop_slots; 179 tx_ring->wr_ptr_add += nop_slots; 180 181 return 0; 182 } 183 184 static int nfp_nfdk_prep_port_id(struct sk_buff *skb) 185 { 186 struct metadata_dst *md_dst = skb_metadata_dst(skb); 187 unsigned char *data; 188 189 if (likely(!md_dst)) 190 return 0; 191 if (unlikely(md_dst->type != METADATA_HW_PORT_MUX)) 192 return 0; 193 194 /* Note: Unsupported case when TSO a skb with metedata prepended. 195 * See the comments in `nfp_nfdk_tx_tso` for details. 196 */ 197 if (unlikely(md_dst && skb_is_gso(skb))) 198 return -EOPNOTSUPP; 199 200 if (unlikely(skb_cow_head(skb, sizeof(md_dst->u.port_info.port_id)))) 201 return -ENOMEM; 202 203 data = skb_push(skb, sizeof(md_dst->u.port_info.port_id)); 204 put_unaligned_be32(md_dst->u.port_info.port_id, data); 205 206 return sizeof(md_dst->u.port_info.port_id); 207 } 208 209 static int 210 nfp_nfdk_prep_tx_meta(struct nfp_app *app, struct sk_buff *skb, 211 struct nfp_net_r_vector *r_vec) 212 { 213 unsigned char *data; 214 int res, md_bytes; 215 u32 meta_id = 0; 216 217 res = nfp_nfdk_prep_port_id(skb); 218 if (unlikely(res <= 0)) 219 return res; 220 221 md_bytes = res; 222 meta_id = NFP_NET_META_PORTID; 223 224 if (unlikely(skb_cow_head(skb, sizeof(meta_id)))) 225 return -ENOMEM; 226 227 md_bytes += sizeof(meta_id); 228 229 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | 230 FIELD_PREP(NFDK_META_FIELDS, meta_id); 231 232 data = skb_push(skb, sizeof(meta_id)); 233 put_unaligned_be32(meta_id, data); 234 235 return NFDK_DESC_TX_CHAIN_META; 236 } 237 238 /** 239 * nfp_nfdk_tx() - Main transmit entry point 240 * @skb: SKB to transmit 241 * @netdev: netdev structure 242 * 243 * Return: NETDEV_TX_OK on success. 244 */ 245 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) 246 { 247 struct nfp_net *nn = netdev_priv(netdev); 248 struct nfp_nfdk_tx_buf *txbuf, *etxbuf; 249 u32 cnt, tmp_dlen, dlen_type = 0; 250 struct nfp_net_tx_ring *tx_ring; 251 struct nfp_net_r_vector *r_vec; 252 const skb_frag_t *frag, *fend; 253 struct nfp_nfdk_tx_desc *txd; 254 unsigned int real_len, qidx; 255 unsigned int dma_len, type; 256 struct netdev_queue *nd_q; 257 struct nfp_net_dp *dp; 258 int nr_frags, wr_idx; 259 dma_addr_t dma_addr; 260 u64 metadata; 261 262 dp = &nn->dp; 263 qidx = skb_get_queue_mapping(skb); 264 tx_ring = &dp->tx_rings[qidx]; 265 r_vec = tx_ring->r_vec; 266 nd_q = netdev_get_tx_queue(dp->netdev, qidx); 267 268 /* Don't bother counting frags, assume the worst */ 269 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 270 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", 271 qidx, tx_ring->wr_p, tx_ring->rd_p); 272 netif_tx_stop_queue(nd_q); 273 nfp_net_tx_xmit_more_flush(tx_ring); 274 u64_stats_update_begin(&r_vec->tx_sync); 275 r_vec->tx_busy++; 276 u64_stats_update_end(&r_vec->tx_sync); 277 return NETDEV_TX_BUSY; 278 } 279 280 metadata = nfp_nfdk_prep_tx_meta(nn->app, skb, r_vec); 281 if (unlikely((int)metadata < 0)) 282 goto err_flush; 283 284 nr_frags = skb_shinfo(skb)->nr_frags; 285 if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb)) 286 goto err_flush; 287 288 /* DMA map all */ 289 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 290 txd = &tx_ring->ktxds[wr_idx]; 291 txbuf = &tx_ring->ktxbufs[wr_idx]; 292 293 dma_len = skb_headlen(skb); 294 if (skb_is_gso(skb)) 295 type = NFDK_DESC_TX_TYPE_TSO; 296 else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 297 type = NFDK_DESC_TX_TYPE_SIMPLE; 298 else 299 type = NFDK_DESC_TX_TYPE_GATHER; 300 301 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 302 if (dma_mapping_error(dp->dev, dma_addr)) 303 goto err_warn_dma; 304 305 txbuf->skb = skb; 306 txbuf++; 307 308 txbuf->dma_addr = dma_addr; 309 txbuf++; 310 311 /* FIELD_PREP() implicitly truncates to chunk */ 312 dma_len -= 1; 313 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 314 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 315 316 txd->dma_len_type = cpu_to_le16(dlen_type); 317 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 318 319 /* starts at bit 0 */ 320 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); 321 322 /* Preserve the original dlen_type, this way below the EOP logic 323 * can use dlen_type. 324 */ 325 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 326 dma_len -= tmp_dlen; 327 dma_addr += tmp_dlen + 1; 328 txd++; 329 330 /* The rest of the data (if any) will be in larger dma descritors 331 * and is handled with the fragment loop. 332 */ 333 frag = skb_shinfo(skb)->frags; 334 fend = frag + nr_frags; 335 336 while (true) { 337 while (dma_len > 0) { 338 dma_len -= 1; 339 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 340 341 txd->dma_len_type = cpu_to_le16(dlen_type); 342 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 343 344 dma_len -= dlen_type; 345 dma_addr += dlen_type + 1; 346 txd++; 347 } 348 349 if (frag >= fend) 350 break; 351 352 dma_len = skb_frag_size(frag); 353 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, 354 DMA_TO_DEVICE); 355 if (dma_mapping_error(dp->dev, dma_addr)) 356 goto err_unmap; 357 358 txbuf->dma_addr = dma_addr; 359 txbuf++; 360 361 frag++; 362 } 363 364 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 365 366 if (!skb_is_gso(skb)) { 367 real_len = skb->len; 368 /* Metadata desc */ 369 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); 370 txd->raw = cpu_to_le64(metadata); 371 txd++; 372 } else { 373 /* lso desc should be placed after metadata desc */ 374 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); 375 real_len = txbuf->real_len; 376 /* Metadata desc */ 377 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); 378 txd->raw = cpu_to_le64(metadata); 379 txd += 2; 380 txbuf++; 381 } 382 383 cnt = txd - tx_ring->ktxds - wr_idx; 384 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 385 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 386 goto err_warn_overflow; 387 388 skb_tx_timestamp(skb); 389 390 tx_ring->wr_p += cnt; 391 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 392 tx_ring->data_pending += skb->len; 393 else 394 tx_ring->data_pending = 0; 395 396 if (nfp_nfdk_tx_ring_should_stop(tx_ring)) 397 nfp_nfdk_tx_ring_stop(nd_q, tx_ring); 398 399 tx_ring->wr_ptr_add += cnt; 400 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) 401 nfp_net_tx_xmit_more_flush(tx_ring); 402 403 return NETDEV_TX_OK; 404 405 err_warn_overflow: 406 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 407 wr_idx, skb_headlen(skb), nr_frags, cnt); 408 if (skb_is_gso(skb)) 409 txbuf--; 410 err_unmap: 411 /* txbuf pointed to the next-to-use */ 412 etxbuf = txbuf; 413 /* first txbuf holds the skb */ 414 txbuf = &tx_ring->ktxbufs[wr_idx + 1]; 415 if (txbuf < etxbuf) { 416 dma_unmap_single(dp->dev, txbuf->dma_addr, 417 skb_headlen(skb), DMA_TO_DEVICE); 418 txbuf->raw = 0; 419 txbuf++; 420 } 421 frag = skb_shinfo(skb)->frags; 422 while (etxbuf < txbuf) { 423 dma_unmap_page(dp->dev, txbuf->dma_addr, 424 skb_frag_size(frag), DMA_TO_DEVICE); 425 txbuf->raw = 0; 426 frag++; 427 txbuf++; 428 } 429 err_warn_dma: 430 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 431 err_flush: 432 nfp_net_tx_xmit_more_flush(tx_ring); 433 u64_stats_update_begin(&r_vec->tx_sync); 434 r_vec->tx_errors++; 435 u64_stats_update_end(&r_vec->tx_sync); 436 dev_kfree_skb_any(skb); 437 return NETDEV_TX_OK; 438 } 439 440 /** 441 * nfp_nfdk_tx_complete() - Handled completed TX packets 442 * @tx_ring: TX ring structure 443 * @budget: NAPI budget (only used as bool to determine if in NAPI context) 444 */ 445 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) 446 { 447 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 448 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 449 u32 done_pkts = 0, done_bytes = 0; 450 struct nfp_nfdk_tx_buf *ktxbufs; 451 struct device *dev = dp->dev; 452 struct netdev_queue *nd_q; 453 u32 rd_p, qcp_rd_p; 454 int todo; 455 456 rd_p = tx_ring->rd_p; 457 if (tx_ring->wr_p == rd_p) 458 return; 459 460 /* Work out how many descriptors have been transmitted */ 461 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 462 463 if (qcp_rd_p == tx_ring->qcp_rd_p) 464 return; 465 466 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 467 ktxbufs = tx_ring->ktxbufs; 468 469 while (todo > 0) { 470 const skb_frag_t *frag, *fend; 471 unsigned int size, n_descs = 1; 472 struct nfp_nfdk_tx_buf *txbuf; 473 struct sk_buff *skb; 474 475 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; 476 skb = txbuf->skb; 477 txbuf++; 478 479 /* Closed block */ 480 if (!skb) { 481 n_descs = D_BLOCK_CPL(rd_p); 482 goto next; 483 } 484 485 /* Unmap head */ 486 size = skb_headlen(skb); 487 n_descs += nfp_nfdk_headlen_to_segs(size); 488 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); 489 txbuf++; 490 491 /* Unmap frags */ 492 frag = skb_shinfo(skb)->frags; 493 fend = frag + skb_shinfo(skb)->nr_frags; 494 for (; frag < fend; frag++) { 495 size = skb_frag_size(frag); 496 n_descs += DIV_ROUND_UP(size, 497 NFDK_TX_MAX_DATA_PER_DESC); 498 dma_unmap_page(dev, txbuf->dma_addr, 499 skb_frag_size(frag), DMA_TO_DEVICE); 500 txbuf++; 501 } 502 503 if (!skb_is_gso(skb)) { 504 done_bytes += skb->len; 505 done_pkts++; 506 } else { 507 done_bytes += txbuf->real_len; 508 done_pkts += txbuf->pkt_cnt; 509 n_descs++; 510 } 511 512 napi_consume_skb(skb, budget); 513 next: 514 rd_p += n_descs; 515 todo -= n_descs; 516 } 517 518 tx_ring->rd_p = rd_p; 519 tx_ring->qcp_rd_p = qcp_rd_p; 520 521 u64_stats_update_begin(&r_vec->tx_sync); 522 r_vec->tx_bytes += done_bytes; 523 r_vec->tx_pkts += done_pkts; 524 u64_stats_update_end(&r_vec->tx_sync); 525 526 if (!dp->netdev) 527 return; 528 529 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); 530 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 531 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { 532 /* Make sure TX thread will see updated tx_ring->rd_p */ 533 smp_mb(); 534 535 if (unlikely(netif_tx_queue_stopped(nd_q))) 536 netif_tx_wake_queue(nd_q); 537 } 538 539 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 540 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 541 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 542 } 543 544 /* Receive processing */ 545 static void * 546 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) 547 { 548 void *frag; 549 550 if (!dp->xdp_prog) { 551 frag = napi_alloc_frag(dp->fl_bufsz); 552 if (unlikely(!frag)) 553 return NULL; 554 } else { 555 struct page *page; 556 557 page = dev_alloc_page(); 558 if (unlikely(!page)) 559 return NULL; 560 frag = page_address(page); 561 } 562 563 *dma_addr = nfp_net_dma_map_rx(dp, frag); 564 if (dma_mapping_error(dp->dev, *dma_addr)) { 565 nfp_net_free_frag(frag, dp->xdp_prog); 566 nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); 567 return NULL; 568 } 569 570 return frag; 571 } 572 573 /** 574 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings 575 * @dp: NFP Net data path struct 576 * @rx_ring: RX ring structure 577 * @frag: page fragment buffer 578 * @dma_addr: DMA address of skb mapping 579 */ 580 static void 581 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, 582 struct nfp_net_rx_ring *rx_ring, 583 void *frag, dma_addr_t dma_addr) 584 { 585 unsigned int wr_idx; 586 587 wr_idx = D_IDX(rx_ring, rx_ring->wr_p); 588 589 nfp_net_dma_sync_dev_rx(dp, dma_addr); 590 591 /* Stash SKB and DMA address away */ 592 rx_ring->rxbufs[wr_idx].frag = frag; 593 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 594 595 /* Fill freelist descriptor */ 596 rx_ring->rxds[wr_idx].fld.reserved = 0; 597 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 598 nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, 599 dma_addr + dp->rx_dma_off); 600 601 rx_ring->wr_p++; 602 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { 603 /* Update write pointer of the freelist queue. Make 604 * sure all writes are flushed before telling the hardware. 605 */ 606 wmb(); 607 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); 608 } 609 } 610 611 /** 612 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW 613 * @dp: NFP Net data path struct 614 * @rx_ring: RX ring to fill 615 */ 616 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, 617 struct nfp_net_rx_ring *rx_ring) 618 { 619 unsigned int i; 620 621 for (i = 0; i < rx_ring->cnt - 1; i++) 622 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, 623 rx_ring->rxbufs[i].dma_addr); 624 } 625 626 /** 627 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors 628 * @flags: RX descriptor flags field in CPU byte order 629 */ 630 static int nfp_nfdk_rx_csum_has_errors(u16 flags) 631 { 632 u16 csum_all_checked, csum_all_ok; 633 634 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 635 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 636 637 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 638 } 639 640 /** 641 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags 642 * @dp: NFP Net data path struct 643 * @r_vec: per-ring structure 644 * @rxd: Pointer to RX descriptor 645 * @meta: Parsed metadata prepend 646 * @skb: Pointer to SKB 647 */ 648 static void 649 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 650 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, 651 struct sk_buff *skb) 652 { 653 skb_checksum_none_assert(skb); 654 655 if (!(dp->netdev->features & NETIF_F_RXCSUM)) 656 return; 657 658 if (meta->csum_type) { 659 skb->ip_summed = meta->csum_type; 660 skb->csum = meta->csum; 661 u64_stats_update_begin(&r_vec->rx_sync); 662 r_vec->hw_csum_rx_complete++; 663 u64_stats_update_end(&r_vec->rx_sync); 664 return; 665 } 666 667 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 668 u64_stats_update_begin(&r_vec->rx_sync); 669 r_vec->hw_csum_rx_error++; 670 u64_stats_update_end(&r_vec->rx_sync); 671 return; 672 } 673 674 /* Assume that the firmware will never report inner CSUM_OK unless outer 675 * L4 headers were successfully parsed. FW will always report zero UDP 676 * checksum as CSUM_OK. 677 */ 678 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 679 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 680 __skb_incr_checksum_unnecessary(skb); 681 u64_stats_update_begin(&r_vec->rx_sync); 682 r_vec->hw_csum_rx_ok++; 683 u64_stats_update_end(&r_vec->rx_sync); 684 } 685 686 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 687 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 688 __skb_incr_checksum_unnecessary(skb); 689 u64_stats_update_begin(&r_vec->rx_sync); 690 r_vec->hw_csum_rx_inner_ok++; 691 u64_stats_update_end(&r_vec->rx_sync); 692 } 693 } 694 695 static void 696 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, 697 unsigned int type, __be32 *hash) 698 { 699 if (!(netdev->features & NETIF_F_RXHASH)) 700 return; 701 702 switch (type) { 703 case NFP_NET_RSS_IPV4: 704 case NFP_NET_RSS_IPV6: 705 case NFP_NET_RSS_IPV6_EX: 706 meta->hash_type = PKT_HASH_TYPE_L3; 707 break; 708 default: 709 meta->hash_type = PKT_HASH_TYPE_L4; 710 break; 711 } 712 713 meta->hash = get_unaligned_be32(hash); 714 } 715 716 static bool 717 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, 718 void *data, void *pkt, unsigned int pkt_len, int meta_len) 719 { 720 u32 meta_info; 721 722 meta_info = get_unaligned_be32(data); 723 data += 4; 724 725 while (meta_info) { 726 switch (meta_info & NFP_NET_META_FIELD_MASK) { 727 case NFP_NET_META_HASH: 728 meta_info >>= NFP_NET_META_FIELD_SIZE; 729 nfp_nfdk_set_hash(netdev, meta, 730 meta_info & NFP_NET_META_FIELD_MASK, 731 (__be32 *)data); 732 data += 4; 733 break; 734 case NFP_NET_META_MARK: 735 meta->mark = get_unaligned_be32(data); 736 data += 4; 737 break; 738 case NFP_NET_META_PORTID: 739 meta->portid = get_unaligned_be32(data); 740 data += 4; 741 break; 742 case NFP_NET_META_CSUM: 743 meta->csum_type = CHECKSUM_COMPLETE; 744 meta->csum = 745 (__force __wsum)__get_unaligned_cpu32(data); 746 data += 4; 747 break; 748 case NFP_NET_META_RESYNC_INFO: 749 if (nfp_net_tls_rx_resync_req(netdev, data, pkt, 750 pkt_len)) 751 return false; 752 data += sizeof(struct nfp_net_tls_resync_req); 753 break; 754 default: 755 return true; 756 } 757 758 meta_info >>= NFP_NET_META_FIELD_SIZE; 759 } 760 761 return data != pkt; 762 } 763 764 static void 765 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 766 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, 767 struct sk_buff *skb) 768 { 769 u64_stats_update_begin(&r_vec->rx_sync); 770 r_vec->rx_drops++; 771 /* If we have both skb and rxbuf the replacement buffer allocation 772 * must have failed, count this as an alloc failure. 773 */ 774 if (skb && rxbuf) 775 r_vec->rx_replace_buf_alloc_fail++; 776 u64_stats_update_end(&r_vec->rx_sync); 777 778 /* skb is build based on the frag, free_skb() would free the frag 779 * so to be able to reuse it we need an extra ref. 780 */ 781 if (skb && rxbuf && skb->head == rxbuf->frag) 782 page_ref_inc(virt_to_head_page(rxbuf->frag)); 783 if (rxbuf) 784 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); 785 if (skb) 786 dev_kfree_skb_any(skb); 787 } 788 789 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) 790 { 791 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 792 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 793 struct nfp_net_rx_ring *rx_ring; 794 u32 qcp_rd_p, done = 0; 795 bool done_all; 796 int todo; 797 798 /* Work out how many descriptors have been transmitted */ 799 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 800 if (qcp_rd_p == tx_ring->qcp_rd_p) 801 return true; 802 803 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 804 805 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 806 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 807 808 rx_ring = r_vec->rx_ring; 809 while (todo > 0) { 810 int idx = D_IDX(tx_ring, tx_ring->rd_p + done); 811 struct nfp_nfdk_tx_buf *txbuf; 812 unsigned int step = 1; 813 814 txbuf = &tx_ring->ktxbufs[idx]; 815 if (!txbuf->raw) 816 goto next; 817 818 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { 819 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); 820 goto next; 821 } 822 823 /* Two successive txbufs are used to stash virtual and dma 824 * address respectively, recycle and clean them here. 825 */ 826 nfp_nfdk_rx_give_one(dp, rx_ring, 827 (void *)NFDK_TX_BUF_PTR(txbuf[0].val), 828 txbuf[1].dma_addr); 829 txbuf[0].raw = 0; 830 txbuf[1].raw = 0; 831 step = 2; 832 833 u64_stats_update_begin(&r_vec->tx_sync); 834 /* Note: tx_bytes not accumulated. */ 835 r_vec->tx_pkts++; 836 u64_stats_update_end(&r_vec->tx_sync); 837 next: 838 todo -= step; 839 done += step; 840 } 841 842 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); 843 tx_ring->rd_p += done; 844 845 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 846 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 847 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 848 849 return done_all; 850 } 851 852 static bool 853 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, 854 struct nfp_net_tx_ring *tx_ring, 855 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, 856 unsigned int pkt_len, bool *completed) 857 { 858 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; 859 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; 860 struct nfp_nfdk_tx_buf *txbuf; 861 struct nfp_nfdk_tx_desc *txd; 862 unsigned int n_descs; 863 dma_addr_t dma_addr; 864 int wr_idx; 865 866 /* Reject if xdp_adjust_tail grow packet beyond DMA area */ 867 if (pkt_len + dma_off > dma_map_sz) 868 return false; 869 870 /* Make sure there's still at least one block available after 871 * aligning to block boundary, so that the txds used below 872 * won't wrap around the tx_ring. 873 */ 874 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 875 if (!*completed) { 876 nfp_nfdk_xdp_complete(tx_ring); 877 *completed = true; 878 } 879 880 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 881 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, 882 NULL); 883 return false; 884 } 885 } 886 887 /* Check if cross block boundary */ 888 n_descs = nfp_nfdk_headlen_to_segs(pkt_len); 889 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 890 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || 891 ((u32)tx_ring->data_pending + pkt_len > 892 NFDK_TX_MAX_DATA_PER_BLOCK)) { 893 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); 894 895 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 896 txd = &tx_ring->ktxds[wr_idx]; 897 memset(txd, 0, 898 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 899 900 tx_ring->data_pending = 0; 901 tx_ring->wr_p += nop_slots; 902 tx_ring->wr_ptr_add += nop_slots; 903 } 904 905 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 906 907 txbuf = &tx_ring->ktxbufs[wr_idx]; 908 909 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; 910 txbuf[1].dma_addr = rxbuf->dma_addr; 911 /* Note: pkt len not stored */ 912 913 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, 914 pkt_len, DMA_BIDIRECTIONAL); 915 916 /* Build TX descriptor */ 917 txd = &tx_ring->ktxds[wr_idx]; 918 dma_len = pkt_len; 919 dma_addr = rxbuf->dma_addr + dma_off; 920 921 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 922 type = NFDK_DESC_TX_TYPE_SIMPLE; 923 else 924 type = NFDK_DESC_TX_TYPE_GATHER; 925 926 /* FIELD_PREP() implicitly truncates to chunk */ 927 dma_len -= 1; 928 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 929 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 930 931 txd->dma_len_type = cpu_to_le16(dlen_type); 932 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 933 934 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 935 dma_len -= tmp_dlen; 936 dma_addr += tmp_dlen + 1; 937 txd++; 938 939 while (dma_len > 0) { 940 dma_len -= 1; 941 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 942 txd->dma_len_type = cpu_to_le16(dlen_type); 943 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 944 945 dlen_type &= NFDK_DESC_TX_DMA_LEN; 946 dma_len -= dlen_type; 947 dma_addr += dlen_type + 1; 948 txd++; 949 } 950 951 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 952 953 /* Metadata desc */ 954 txd->raw = 0; 955 txd++; 956 957 cnt = txd - tx_ring->ktxds - wr_idx; 958 tx_ring->wr_p += cnt; 959 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 960 tx_ring->data_pending += pkt_len; 961 else 962 tx_ring->data_pending = 0; 963 964 tx_ring->wr_ptr_add += cnt; 965 return true; 966 } 967 968 /** 969 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring 970 * @rx_ring: RX ring to receive from 971 * @budget: NAPI budget 972 * 973 * Note, this function is separated out from the napi poll function to 974 * more cleanly separate packet receive code from other bookkeeping 975 * functions performed in the napi poll function. 976 * 977 * Return: Number of packets received. 978 */ 979 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) 980 { 981 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 982 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 983 struct nfp_net_tx_ring *tx_ring; 984 struct bpf_prog *xdp_prog; 985 bool xdp_tx_cmpl = false; 986 unsigned int true_bufsz; 987 struct sk_buff *skb; 988 int pkts_polled = 0; 989 struct xdp_buff xdp; 990 int idx; 991 992 xdp_prog = READ_ONCE(dp->xdp_prog); 993 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 994 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, 995 &rx_ring->xdp_rxq); 996 tx_ring = r_vec->xdp_ring; 997 998 while (pkts_polled < budget) { 999 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1000 struct nfp_net_rx_buf *rxbuf; 1001 struct nfp_net_rx_desc *rxd; 1002 struct nfp_meta_parsed meta; 1003 bool redir_egress = false; 1004 struct net_device *netdev; 1005 dma_addr_t new_dma_addr; 1006 u32 meta_len_xdp = 0; 1007 void *new_frag; 1008 1009 idx = D_IDX(rx_ring, rx_ring->rd_p); 1010 1011 rxd = &rx_ring->rxds[idx]; 1012 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1013 break; 1014 1015 /* Memory barrier to ensure that we won't do other reads 1016 * before the DD bit. 1017 */ 1018 dma_rmb(); 1019 1020 memset(&meta, 0, sizeof(meta)); 1021 1022 rx_ring->rd_p++; 1023 pkts_polled++; 1024 1025 rxbuf = &rx_ring->rxbufs[idx]; 1026 /* < meta_len > 1027 * <-- [rx_offset] --> 1028 * --------------------------------------------------------- 1029 * | [XX] | metadata | packet | XXXX | 1030 * --------------------------------------------------------- 1031 * <---------------- data_len ---------------> 1032 * 1033 * The rx_offset is fixed for all packets, the meta_len can vary 1034 * on a packet by packet basis. If rx_offset is set to zero 1035 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the 1036 * buffer and is immediately followed by the packet (no [XX]). 1037 */ 1038 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1039 data_len = le16_to_cpu(rxd->rxd.data_len); 1040 pkt_len = data_len - meta_len; 1041 1042 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1043 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1044 pkt_off += meta_len; 1045 else 1046 pkt_off += dp->rx_offset; 1047 meta_off = pkt_off - meta_len; 1048 1049 /* Stats update */ 1050 u64_stats_update_begin(&r_vec->rx_sync); 1051 r_vec->rx_pkts++; 1052 r_vec->rx_bytes += pkt_len; 1053 u64_stats_update_end(&r_vec->rx_sync); 1054 1055 if (unlikely(meta_len > NFP_NET_MAX_PREPEND || 1056 (dp->rx_offset && meta_len > dp->rx_offset))) { 1057 nn_dp_warn(dp, "oversized RX packet metadata %u\n", 1058 meta_len); 1059 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1060 continue; 1061 } 1062 1063 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, 1064 data_len); 1065 1066 if (meta_len) { 1067 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, 1068 rxbuf->frag + meta_off, 1069 rxbuf->frag + pkt_off, 1070 pkt_len, meta_len))) { 1071 nn_dp_warn(dp, "invalid RX packet metadata\n"); 1072 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1073 NULL); 1074 continue; 1075 } 1076 } 1077 1078 if (xdp_prog && !meta.portid) { 1079 void *orig_data = rxbuf->frag + pkt_off; 1080 unsigned int dma_off; 1081 int act; 1082 1083 xdp_prepare_buff(&xdp, 1084 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, 1085 pkt_off - NFP_NET_RX_BUF_HEADROOM, 1086 pkt_len, true); 1087 1088 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1089 1090 pkt_len = xdp.data_end - xdp.data; 1091 pkt_off += xdp.data - orig_data; 1092 1093 switch (act) { 1094 case XDP_PASS: 1095 meta_len_xdp = xdp.data - xdp.data_meta; 1096 break; 1097 case XDP_TX: 1098 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; 1099 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, 1100 tx_ring, 1101 rxbuf, 1102 dma_off, 1103 pkt_len, 1104 &xdp_tx_cmpl))) 1105 trace_xdp_exception(dp->netdev, 1106 xdp_prog, act); 1107 continue; 1108 default: 1109 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 1110 fallthrough; 1111 case XDP_ABORTED: 1112 trace_xdp_exception(dp->netdev, xdp_prog, act); 1113 fallthrough; 1114 case XDP_DROP: 1115 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1116 rxbuf->dma_addr); 1117 continue; 1118 } 1119 } 1120 1121 if (likely(!meta.portid)) { 1122 netdev = dp->netdev; 1123 } else if (meta.portid == NFP_META_PORT_ID_CTRL) { 1124 struct nfp_net *nn = netdev_priv(dp->netdev); 1125 1126 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, 1127 pkt_len); 1128 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1129 rxbuf->dma_addr); 1130 continue; 1131 } else { 1132 struct nfp_net *nn; 1133 1134 nn = netdev_priv(dp->netdev); 1135 netdev = nfp_app_dev_get(nn->app, meta.portid, 1136 &redir_egress); 1137 if (unlikely(!netdev)) { 1138 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1139 NULL); 1140 continue; 1141 } 1142 1143 if (nfp_netdev_is_nfp_repr(netdev)) 1144 nfp_repr_inc_rx_stats(netdev, pkt_len); 1145 } 1146 1147 skb = build_skb(rxbuf->frag, true_bufsz); 1148 if (unlikely(!skb)) { 1149 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1150 continue; 1151 } 1152 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1153 if (unlikely(!new_frag)) { 1154 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1155 continue; 1156 } 1157 1158 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1159 1160 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1161 1162 skb_reserve(skb, pkt_off); 1163 skb_put(skb, pkt_len); 1164 1165 skb->mark = meta.mark; 1166 skb_set_hash(skb, meta.hash, meta.hash_type); 1167 1168 skb_record_rx_queue(skb, rx_ring->idx); 1169 skb->protocol = eth_type_trans(skb, netdev); 1170 1171 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); 1172 1173 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) 1174 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 1175 le16_to_cpu(rxd->rxd.vlan)); 1176 if (meta_len_xdp) 1177 skb_metadata_set(skb, meta_len_xdp); 1178 1179 if (likely(!redir_egress)) { 1180 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1181 } else { 1182 skb->dev = netdev; 1183 skb_reset_network_header(skb); 1184 __skb_push(skb, ETH_HLEN); 1185 dev_queue_xmit(skb); 1186 } 1187 } 1188 1189 if (xdp_prog) { 1190 if (tx_ring->wr_ptr_add) 1191 nfp_net_tx_xmit_more_flush(tx_ring); 1192 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && 1193 !xdp_tx_cmpl) 1194 if (!nfp_nfdk_xdp_complete(tx_ring)) 1195 pkts_polled = budget; 1196 } 1197 1198 return pkts_polled; 1199 } 1200 1201 /** 1202 * nfp_nfdk_poll() - napi poll function 1203 * @napi: NAPI structure 1204 * @budget: NAPI budget 1205 * 1206 * Return: number of packets polled. 1207 */ 1208 int nfp_nfdk_poll(struct napi_struct *napi, int budget) 1209 { 1210 struct nfp_net_r_vector *r_vec = 1211 container_of(napi, struct nfp_net_r_vector, napi); 1212 unsigned int pkts_polled = 0; 1213 1214 if (r_vec->tx_ring) 1215 nfp_nfdk_tx_complete(r_vec->tx_ring, budget); 1216 if (r_vec->rx_ring) 1217 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); 1218 1219 if (pkts_polled < budget) 1220 if (napi_complete_done(napi, pkts_polled)) 1221 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1222 1223 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { 1224 struct dim_sample dim_sample = {}; 1225 unsigned int start; 1226 u64 pkts, bytes; 1227 1228 do { 1229 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1230 pkts = r_vec->rx_pkts; 1231 bytes = r_vec->rx_bytes; 1232 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1233 1234 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1235 net_dim(&r_vec->rx_dim, dim_sample); 1236 } 1237 1238 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { 1239 struct dim_sample dim_sample = {}; 1240 unsigned int start; 1241 u64 pkts, bytes; 1242 1243 do { 1244 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1245 pkts = r_vec->tx_pkts; 1246 bytes = r_vec->tx_bytes; 1247 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1248 1249 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1250 net_dim(&r_vec->tx_dim, dim_sample); 1251 } 1252 1253 return pkts_polled; 1254 } 1255 1256 /* Control device data path 1257 */ 1258 1259 bool 1260 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1261 struct sk_buff *skb, bool old) 1262 { 1263 u32 cnt, tmp_dlen, dlen_type = 0; 1264 struct nfp_net_tx_ring *tx_ring; 1265 struct nfp_nfdk_tx_buf *txbuf; 1266 struct nfp_nfdk_tx_desc *txd; 1267 unsigned int dma_len, type; 1268 struct nfp_net_dp *dp; 1269 dma_addr_t dma_addr; 1270 u64 metadata = 0; 1271 int wr_idx; 1272 1273 dp = &r_vec->nfp_net->dp; 1274 tx_ring = r_vec->tx_ring; 1275 1276 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { 1277 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); 1278 goto err_free; 1279 } 1280 1281 /* Don't bother counting frags, assume the worst */ 1282 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 1283 u64_stats_update_begin(&r_vec->tx_sync); 1284 r_vec->tx_busy++; 1285 u64_stats_update_end(&r_vec->tx_sync); 1286 if (!old) 1287 __skb_queue_tail(&r_vec->queue, skb); 1288 else 1289 __skb_queue_head(&r_vec->queue, skb); 1290 return NETDEV_TX_BUSY; 1291 } 1292 1293 if (nfp_app_ctrl_has_meta(nn->app)) { 1294 if (unlikely(skb_headroom(skb) < 8)) { 1295 nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); 1296 goto err_free; 1297 } 1298 metadata = NFDK_DESC_TX_CHAIN_META; 1299 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); 1300 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | 1301 FIELD_PREP(NFDK_META_FIELDS, 1302 NFP_NET_META_PORTID), 1303 skb_push(skb, 4)); 1304 } 1305 1306 if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb)) 1307 goto err_free; 1308 1309 /* DMA map all */ 1310 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 1311 txd = &tx_ring->ktxds[wr_idx]; 1312 txbuf = &tx_ring->ktxbufs[wr_idx]; 1313 1314 dma_len = skb_headlen(skb); 1315 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 1316 type = NFDK_DESC_TX_TYPE_SIMPLE; 1317 else 1318 type = NFDK_DESC_TX_TYPE_GATHER; 1319 1320 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 1321 if (dma_mapping_error(dp->dev, dma_addr)) 1322 goto err_warn_dma; 1323 1324 txbuf->skb = skb; 1325 txbuf++; 1326 1327 txbuf->dma_addr = dma_addr; 1328 txbuf++; 1329 1330 dma_len -= 1; 1331 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 1332 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 1333 1334 txd->dma_len_type = cpu_to_le16(dlen_type); 1335 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 1336 1337 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1338 dma_len -= tmp_dlen; 1339 dma_addr += tmp_dlen + 1; 1340 txd++; 1341 1342 while (dma_len > 0) { 1343 dma_len -= 1; 1344 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 1345 txd->dma_len_type = cpu_to_le16(dlen_type); 1346 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 1347 1348 dlen_type &= NFDK_DESC_TX_DMA_LEN; 1349 dma_len -= dlen_type; 1350 dma_addr += dlen_type + 1; 1351 txd++; 1352 } 1353 1354 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1355 1356 /* Metadata desc */ 1357 txd->raw = cpu_to_le64(metadata); 1358 txd++; 1359 1360 cnt = txd - tx_ring->ktxds - wr_idx; 1361 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 1362 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 1363 goto err_warn_overflow; 1364 1365 tx_ring->wr_p += cnt; 1366 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1367 tx_ring->data_pending += skb->len; 1368 else 1369 tx_ring->data_pending = 0; 1370 1371 tx_ring->wr_ptr_add += cnt; 1372 nfp_net_tx_xmit_more_flush(tx_ring); 1373 1374 return NETDEV_TX_OK; 1375 1376 err_warn_overflow: 1377 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 1378 wr_idx, skb_headlen(skb), 0, cnt); 1379 txbuf--; 1380 dma_unmap_single(dp->dev, txbuf->dma_addr, 1381 skb_headlen(skb), DMA_TO_DEVICE); 1382 txbuf->raw = 0; 1383 err_warn_dma: 1384 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 1385 err_free: 1386 u64_stats_update_begin(&r_vec->tx_sync); 1387 r_vec->tx_errors++; 1388 u64_stats_update_end(&r_vec->tx_sync); 1389 dev_kfree_skb_any(skb); 1390 return NETDEV_TX_OK; 1391 } 1392 1393 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) 1394 { 1395 struct sk_buff *skb; 1396 1397 while ((skb = __skb_dequeue(&r_vec->queue))) 1398 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) 1399 return; 1400 } 1401 1402 static bool 1403 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) 1404 { 1405 u32 meta_type, meta_tag; 1406 1407 if (!nfp_app_ctrl_has_meta(nn->app)) 1408 return !meta_len; 1409 1410 if (meta_len != 8) 1411 return false; 1412 1413 meta_type = get_unaligned_be32(data); 1414 meta_tag = get_unaligned_be32(data + 4); 1415 1416 return (meta_type == NFP_NET_META_PORTID && 1417 meta_tag == NFP_META_PORT_ID_CTRL); 1418 } 1419 1420 static bool 1421 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, 1422 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) 1423 { 1424 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1425 struct nfp_net_rx_buf *rxbuf; 1426 struct nfp_net_rx_desc *rxd; 1427 dma_addr_t new_dma_addr; 1428 struct sk_buff *skb; 1429 void *new_frag; 1430 int idx; 1431 1432 idx = D_IDX(rx_ring, rx_ring->rd_p); 1433 1434 rxd = &rx_ring->rxds[idx]; 1435 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1436 return false; 1437 1438 /* Memory barrier to ensure that we won't do other reads 1439 * before the DD bit. 1440 */ 1441 dma_rmb(); 1442 1443 rx_ring->rd_p++; 1444 1445 rxbuf = &rx_ring->rxbufs[idx]; 1446 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1447 data_len = le16_to_cpu(rxd->rxd.data_len); 1448 pkt_len = data_len - meta_len; 1449 1450 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1451 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1452 pkt_off += meta_len; 1453 else 1454 pkt_off += dp->rx_offset; 1455 meta_off = pkt_off - meta_len; 1456 1457 /* Stats update */ 1458 u64_stats_update_begin(&r_vec->rx_sync); 1459 r_vec->rx_pkts++; 1460 r_vec->rx_bytes += pkt_len; 1461 u64_stats_update_end(&r_vec->rx_sync); 1462 1463 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); 1464 1465 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { 1466 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", 1467 meta_len); 1468 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1469 return true; 1470 } 1471 1472 skb = build_skb(rxbuf->frag, dp->fl_bufsz); 1473 if (unlikely(!skb)) { 1474 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1475 return true; 1476 } 1477 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1478 if (unlikely(!new_frag)) { 1479 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1480 return true; 1481 } 1482 1483 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1484 1485 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1486 1487 skb_reserve(skb, pkt_off); 1488 skb_put(skb, pkt_len); 1489 1490 nfp_app_ctrl_rx(nn->app, skb); 1491 1492 return true; 1493 } 1494 1495 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) 1496 { 1497 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1498 struct nfp_net *nn = r_vec->nfp_net; 1499 struct nfp_net_dp *dp = &nn->dp; 1500 unsigned int budget = 512; 1501 1502 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) 1503 continue; 1504 1505 return budget; 1506 } 1507 1508 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) 1509 { 1510 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); 1511 1512 spin_lock(&r_vec->lock); 1513 nfp_nfdk_tx_complete(r_vec->tx_ring, 0); 1514 __nfp_ctrl_tx_queued(r_vec); 1515 spin_unlock(&r_vec->lock); 1516 1517 if (nfp_ctrl_rx(r_vec)) { 1518 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1519 } else { 1520 tasklet_schedule(&r_vec->tasklet); 1521 nn_dp_warn(&r_vec->nfp_net->dp, 1522 "control message budget exceeded!\n"); 1523 } 1524 } 1525