1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ 3 4 #include <linux/bpf_trace.h> 5 #include <linux/netdevice.h> 6 #include <linux/overflow.h> 7 #include <linux/sizes.h> 8 #include <linux/bitfield.h> 9 10 #include "../nfp_app.h" 11 #include "../nfp_net.h" 12 #include "../nfp_net_dp.h" 13 #include "../crypto/crypto.h" 14 #include "../crypto/fw.h" 15 #include "nfdk.h" 16 17 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 18 { 19 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); 20 } 21 22 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 23 { 24 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); 25 } 26 27 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, 28 struct nfp_net_tx_ring *tx_ring) 29 { 30 netif_tx_stop_queue(nd_q); 31 32 /* We can race with the TX completion out of NAPI so recheck */ 33 smp_mb(); 34 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) 35 netif_tx_start_queue(nd_q); 36 } 37 38 static __le64 39 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, 40 struct sk_buff *skb) 41 { 42 u32 segs, hdrlen, l3_offset, l4_offset; 43 struct nfp_nfdk_tx_desc txd; 44 u16 mss; 45 46 if (!skb->encapsulation) { 47 l3_offset = skb_network_offset(skb); 48 l4_offset = skb_transport_offset(skb); 49 hdrlen = skb_tcp_all_headers(skb); 50 } else { 51 l3_offset = skb_inner_network_offset(skb); 52 l4_offset = skb_inner_transport_offset(skb); 53 hdrlen = skb_inner_tcp_all_headers(skb); 54 } 55 56 segs = skb_shinfo(skb)->gso_segs; 57 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; 58 59 /* Note: TSO of the packet with metadata prepended to skb is not 60 * supported yet, in which case l3/l4_offset and lso_hdrlen need 61 * be correctly handled here. 62 * Concern: 63 * The driver doesn't have md_bytes easily available at this point. 64 * The PCI.IN PD ME won't have md_bytes bytes to add to lso_hdrlen, 65 * so it needs the full length there. The app MEs might prefer 66 * l3_offset and l4_offset relative to the start of packet data, 67 * but could probably cope with it being relative to the CTM buf 68 * data offset. 69 */ 70 txd.l3_offset = l3_offset; 71 txd.l4_offset = l4_offset; 72 txd.lso_meta_res = 0; 73 txd.mss = cpu_to_le16(mss); 74 txd.lso_hdrlen = hdrlen; 75 txd.lso_totsegs = segs; 76 77 txbuf->pkt_cnt = segs; 78 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); 79 80 u64_stats_update_begin(&r_vec->tx_sync); 81 r_vec->tx_lso++; 82 u64_stats_update_end(&r_vec->tx_sync); 83 84 return txd.raw; 85 } 86 87 static u8 88 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 89 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) 90 { 91 struct ipv6hdr *ipv6h; 92 struct iphdr *iph; 93 94 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 95 return flags; 96 97 if (skb->ip_summed != CHECKSUM_PARTIAL) 98 return flags; 99 100 flags |= NFDK_DESC_TX_L4_CSUM; 101 102 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 103 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 104 105 /* L3 checksum offloading flag is not required for ipv6 */ 106 if (iph->version == 4) { 107 flags |= NFDK_DESC_TX_L3_CSUM; 108 } else if (ipv6h->version != 6) { 109 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); 110 return flags; 111 } 112 113 u64_stats_update_begin(&r_vec->tx_sync); 114 if (!skb->encapsulation) { 115 r_vec->hw_csum_tx += pkt_cnt; 116 } else { 117 flags |= NFDK_DESC_TX_ENCAP; 118 r_vec->hw_csum_tx_inner += pkt_cnt; 119 } 120 u64_stats_update_end(&r_vec->tx_sync); 121 122 return flags; 123 } 124 125 static int 126 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, 127 unsigned int nr_frags, struct sk_buff *skb) 128 { 129 unsigned int n_descs, wr_p, nop_slots; 130 const skb_frag_t *frag, *fend; 131 struct nfp_nfdk_tx_desc *txd; 132 unsigned int wr_idx; 133 int err; 134 135 recount_descs: 136 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); 137 138 frag = skb_shinfo(skb)->frags; 139 fend = frag + nr_frags; 140 for (; frag < fend; frag++) 141 n_descs += DIV_ROUND_UP(skb_frag_size(frag), 142 NFDK_TX_MAX_DATA_PER_DESC); 143 144 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { 145 if (skb_is_nonlinear(skb)) { 146 err = skb_linearize(skb); 147 if (err) 148 return err; 149 goto recount_descs; 150 } 151 return -EINVAL; 152 } 153 154 /* Under count by 1 (don't count meta) for the round down to work out */ 155 n_descs += !!skb_is_gso(skb); 156 157 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 158 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 159 goto close_block; 160 161 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) 162 goto close_block; 163 164 return 0; 165 166 close_block: 167 wr_p = tx_ring->wr_p; 168 nop_slots = D_BLOCK_CPL(wr_p); 169 170 wr_idx = D_IDX(tx_ring, wr_p); 171 tx_ring->ktxbufs[wr_idx].skb = NULL; 172 txd = &tx_ring->ktxds[wr_idx]; 173 174 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 175 176 tx_ring->data_pending = 0; 177 tx_ring->wr_p += nop_slots; 178 tx_ring->wr_ptr_add += nop_slots; 179 180 return 0; 181 } 182 183 static int nfp_nfdk_prep_port_id(struct sk_buff *skb) 184 { 185 struct metadata_dst *md_dst = skb_metadata_dst(skb); 186 unsigned char *data; 187 188 if (likely(!md_dst)) 189 return 0; 190 if (unlikely(md_dst->type != METADATA_HW_PORT_MUX)) 191 return 0; 192 193 /* Note: Unsupported case when TSO a skb with metedata prepended. 194 * See the comments in `nfp_nfdk_tx_tso` for details. 195 */ 196 if (unlikely(md_dst && skb_is_gso(skb))) 197 return -EOPNOTSUPP; 198 199 if (unlikely(skb_cow_head(skb, sizeof(md_dst->u.port_info.port_id)))) 200 return -ENOMEM; 201 202 data = skb_push(skb, sizeof(md_dst->u.port_info.port_id)); 203 put_unaligned_be32(md_dst->u.port_info.port_id, data); 204 205 return sizeof(md_dst->u.port_info.port_id); 206 } 207 208 static int 209 nfp_nfdk_prep_tx_meta(struct nfp_app *app, struct sk_buff *skb, 210 struct nfp_net_r_vector *r_vec) 211 { 212 unsigned char *data; 213 int res, md_bytes; 214 u32 meta_id = 0; 215 216 res = nfp_nfdk_prep_port_id(skb); 217 if (unlikely(res <= 0)) 218 return res; 219 220 md_bytes = res; 221 meta_id = NFP_NET_META_PORTID; 222 223 if (unlikely(skb_cow_head(skb, sizeof(meta_id)))) 224 return -ENOMEM; 225 226 md_bytes += sizeof(meta_id); 227 228 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | 229 FIELD_PREP(NFDK_META_FIELDS, meta_id); 230 231 data = skb_push(skb, sizeof(meta_id)); 232 put_unaligned_be32(meta_id, data); 233 234 return NFDK_DESC_TX_CHAIN_META; 235 } 236 237 /** 238 * nfp_nfdk_tx() - Main transmit entry point 239 * @skb: SKB to transmit 240 * @netdev: netdev structure 241 * 242 * Return: NETDEV_TX_OK on success. 243 */ 244 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) 245 { 246 struct nfp_net *nn = netdev_priv(netdev); 247 struct nfp_nfdk_tx_buf *txbuf, *etxbuf; 248 u32 cnt, tmp_dlen, dlen_type = 0; 249 struct nfp_net_tx_ring *tx_ring; 250 struct nfp_net_r_vector *r_vec; 251 const skb_frag_t *frag, *fend; 252 struct nfp_nfdk_tx_desc *txd; 253 unsigned int real_len, qidx; 254 unsigned int dma_len, type; 255 struct netdev_queue *nd_q; 256 struct nfp_net_dp *dp; 257 int nr_frags, wr_idx; 258 dma_addr_t dma_addr; 259 u64 metadata; 260 261 dp = &nn->dp; 262 qidx = skb_get_queue_mapping(skb); 263 tx_ring = &dp->tx_rings[qidx]; 264 r_vec = tx_ring->r_vec; 265 nd_q = netdev_get_tx_queue(dp->netdev, qidx); 266 267 /* Don't bother counting frags, assume the worst */ 268 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 269 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", 270 qidx, tx_ring->wr_p, tx_ring->rd_p); 271 netif_tx_stop_queue(nd_q); 272 nfp_net_tx_xmit_more_flush(tx_ring); 273 u64_stats_update_begin(&r_vec->tx_sync); 274 r_vec->tx_busy++; 275 u64_stats_update_end(&r_vec->tx_sync); 276 return NETDEV_TX_BUSY; 277 } 278 279 metadata = nfp_nfdk_prep_tx_meta(nn->app, skb, r_vec); 280 if (unlikely((int)metadata < 0)) 281 goto err_flush; 282 283 nr_frags = skb_shinfo(skb)->nr_frags; 284 if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb)) 285 goto err_flush; 286 287 /* DMA map all */ 288 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 289 txd = &tx_ring->ktxds[wr_idx]; 290 txbuf = &tx_ring->ktxbufs[wr_idx]; 291 292 dma_len = skb_headlen(skb); 293 if (skb_is_gso(skb)) 294 type = NFDK_DESC_TX_TYPE_TSO; 295 else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 296 type = NFDK_DESC_TX_TYPE_SIMPLE; 297 else 298 type = NFDK_DESC_TX_TYPE_GATHER; 299 300 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 301 if (dma_mapping_error(dp->dev, dma_addr)) 302 goto err_warn_dma; 303 304 txbuf->skb = skb; 305 txbuf++; 306 307 txbuf->dma_addr = dma_addr; 308 txbuf++; 309 310 /* FIELD_PREP() implicitly truncates to chunk */ 311 dma_len -= 1; 312 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 313 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 314 315 txd->dma_len_type = cpu_to_le16(dlen_type); 316 nfp_desc_set_dma_addr_48b(txd, dma_addr); 317 318 /* starts at bit 0 */ 319 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); 320 321 /* Preserve the original dlen_type, this way below the EOP logic 322 * can use dlen_type. 323 */ 324 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 325 dma_len -= tmp_dlen; 326 dma_addr += tmp_dlen + 1; 327 txd++; 328 329 /* The rest of the data (if any) will be in larger dma descritors 330 * and is handled with the fragment loop. 331 */ 332 frag = skb_shinfo(skb)->frags; 333 fend = frag + nr_frags; 334 335 while (true) { 336 while (dma_len > 0) { 337 dma_len -= 1; 338 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 339 340 txd->dma_len_type = cpu_to_le16(dlen_type); 341 nfp_desc_set_dma_addr_48b(txd, dma_addr); 342 343 dma_len -= dlen_type; 344 dma_addr += dlen_type + 1; 345 txd++; 346 } 347 348 if (frag >= fend) 349 break; 350 351 dma_len = skb_frag_size(frag); 352 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, 353 DMA_TO_DEVICE); 354 if (dma_mapping_error(dp->dev, dma_addr)) 355 goto err_unmap; 356 357 txbuf->dma_addr = dma_addr; 358 txbuf++; 359 360 frag++; 361 } 362 363 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 364 365 if (!skb_is_gso(skb)) { 366 real_len = skb->len; 367 /* Metadata desc */ 368 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); 369 txd->raw = cpu_to_le64(metadata); 370 txd++; 371 } else { 372 /* lso desc should be placed after metadata desc */ 373 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); 374 real_len = txbuf->real_len; 375 /* Metadata desc */ 376 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); 377 txd->raw = cpu_to_le64(metadata); 378 txd += 2; 379 txbuf++; 380 } 381 382 cnt = txd - tx_ring->ktxds - wr_idx; 383 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 384 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 385 goto err_warn_overflow; 386 387 skb_tx_timestamp(skb); 388 389 tx_ring->wr_p += cnt; 390 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 391 tx_ring->data_pending += skb->len; 392 else 393 tx_ring->data_pending = 0; 394 395 if (nfp_nfdk_tx_ring_should_stop(tx_ring)) 396 nfp_nfdk_tx_ring_stop(nd_q, tx_ring); 397 398 tx_ring->wr_ptr_add += cnt; 399 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) 400 nfp_net_tx_xmit_more_flush(tx_ring); 401 402 return NETDEV_TX_OK; 403 404 err_warn_overflow: 405 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 406 wr_idx, skb_headlen(skb), nr_frags, cnt); 407 if (skb_is_gso(skb)) 408 txbuf--; 409 err_unmap: 410 /* txbuf pointed to the next-to-use */ 411 etxbuf = txbuf; 412 /* first txbuf holds the skb */ 413 txbuf = &tx_ring->ktxbufs[wr_idx + 1]; 414 if (txbuf < etxbuf) { 415 dma_unmap_single(dp->dev, txbuf->dma_addr, 416 skb_headlen(skb), DMA_TO_DEVICE); 417 txbuf->raw = 0; 418 txbuf++; 419 } 420 frag = skb_shinfo(skb)->frags; 421 while (etxbuf < txbuf) { 422 dma_unmap_page(dp->dev, txbuf->dma_addr, 423 skb_frag_size(frag), DMA_TO_DEVICE); 424 txbuf->raw = 0; 425 frag++; 426 txbuf++; 427 } 428 err_warn_dma: 429 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 430 err_flush: 431 nfp_net_tx_xmit_more_flush(tx_ring); 432 u64_stats_update_begin(&r_vec->tx_sync); 433 r_vec->tx_errors++; 434 u64_stats_update_end(&r_vec->tx_sync); 435 dev_kfree_skb_any(skb); 436 return NETDEV_TX_OK; 437 } 438 439 /** 440 * nfp_nfdk_tx_complete() - Handled completed TX packets 441 * @tx_ring: TX ring structure 442 * @budget: NAPI budget (only used as bool to determine if in NAPI context) 443 */ 444 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) 445 { 446 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 447 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 448 u32 done_pkts = 0, done_bytes = 0; 449 struct nfp_nfdk_tx_buf *ktxbufs; 450 struct device *dev = dp->dev; 451 struct netdev_queue *nd_q; 452 u32 rd_p, qcp_rd_p; 453 int todo; 454 455 rd_p = tx_ring->rd_p; 456 if (tx_ring->wr_p == rd_p) 457 return; 458 459 /* Work out how many descriptors have been transmitted */ 460 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 461 462 if (qcp_rd_p == tx_ring->qcp_rd_p) 463 return; 464 465 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 466 ktxbufs = tx_ring->ktxbufs; 467 468 while (todo > 0) { 469 const skb_frag_t *frag, *fend; 470 unsigned int size, n_descs = 1; 471 struct nfp_nfdk_tx_buf *txbuf; 472 struct sk_buff *skb; 473 474 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; 475 skb = txbuf->skb; 476 txbuf++; 477 478 /* Closed block */ 479 if (!skb) { 480 n_descs = D_BLOCK_CPL(rd_p); 481 goto next; 482 } 483 484 /* Unmap head */ 485 size = skb_headlen(skb); 486 n_descs += nfp_nfdk_headlen_to_segs(size); 487 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); 488 txbuf++; 489 490 /* Unmap frags */ 491 frag = skb_shinfo(skb)->frags; 492 fend = frag + skb_shinfo(skb)->nr_frags; 493 for (; frag < fend; frag++) { 494 size = skb_frag_size(frag); 495 n_descs += DIV_ROUND_UP(size, 496 NFDK_TX_MAX_DATA_PER_DESC); 497 dma_unmap_page(dev, txbuf->dma_addr, 498 skb_frag_size(frag), DMA_TO_DEVICE); 499 txbuf++; 500 } 501 502 if (!skb_is_gso(skb)) { 503 done_bytes += skb->len; 504 done_pkts++; 505 } else { 506 done_bytes += txbuf->real_len; 507 done_pkts += txbuf->pkt_cnt; 508 n_descs++; 509 } 510 511 napi_consume_skb(skb, budget); 512 next: 513 rd_p += n_descs; 514 todo -= n_descs; 515 } 516 517 tx_ring->rd_p = rd_p; 518 tx_ring->qcp_rd_p = qcp_rd_p; 519 520 u64_stats_update_begin(&r_vec->tx_sync); 521 r_vec->tx_bytes += done_bytes; 522 r_vec->tx_pkts += done_pkts; 523 u64_stats_update_end(&r_vec->tx_sync); 524 525 if (!dp->netdev) 526 return; 527 528 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); 529 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 530 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { 531 /* Make sure TX thread will see updated tx_ring->rd_p */ 532 smp_mb(); 533 534 if (unlikely(netif_tx_queue_stopped(nd_q))) 535 netif_tx_wake_queue(nd_q); 536 } 537 538 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 539 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 540 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 541 } 542 543 /* Receive processing */ 544 static void * 545 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) 546 { 547 void *frag; 548 549 if (!dp->xdp_prog) { 550 frag = napi_alloc_frag(dp->fl_bufsz); 551 if (unlikely(!frag)) 552 return NULL; 553 } else { 554 struct page *page; 555 556 page = dev_alloc_page(); 557 if (unlikely(!page)) 558 return NULL; 559 frag = page_address(page); 560 } 561 562 *dma_addr = nfp_net_dma_map_rx(dp, frag); 563 if (dma_mapping_error(dp->dev, *dma_addr)) { 564 nfp_net_free_frag(frag, dp->xdp_prog); 565 nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); 566 return NULL; 567 } 568 569 return frag; 570 } 571 572 /** 573 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings 574 * @dp: NFP Net data path struct 575 * @rx_ring: RX ring structure 576 * @frag: page fragment buffer 577 * @dma_addr: DMA address of skb mapping 578 */ 579 static void 580 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, 581 struct nfp_net_rx_ring *rx_ring, 582 void *frag, dma_addr_t dma_addr) 583 { 584 unsigned int wr_idx; 585 586 wr_idx = D_IDX(rx_ring, rx_ring->wr_p); 587 588 nfp_net_dma_sync_dev_rx(dp, dma_addr); 589 590 /* Stash SKB and DMA address away */ 591 rx_ring->rxbufs[wr_idx].frag = frag; 592 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 593 594 /* Fill freelist descriptor */ 595 rx_ring->rxds[wr_idx].fld.reserved = 0; 596 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 597 nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, 598 dma_addr + dp->rx_dma_off); 599 600 rx_ring->wr_p++; 601 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { 602 /* Update write pointer of the freelist queue. Make 603 * sure all writes are flushed before telling the hardware. 604 */ 605 wmb(); 606 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); 607 } 608 } 609 610 /** 611 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW 612 * @dp: NFP Net data path struct 613 * @rx_ring: RX ring to fill 614 */ 615 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, 616 struct nfp_net_rx_ring *rx_ring) 617 { 618 unsigned int i; 619 620 for (i = 0; i < rx_ring->cnt - 1; i++) 621 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, 622 rx_ring->rxbufs[i].dma_addr); 623 } 624 625 /** 626 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors 627 * @flags: RX descriptor flags field in CPU byte order 628 */ 629 static int nfp_nfdk_rx_csum_has_errors(u16 flags) 630 { 631 u16 csum_all_checked, csum_all_ok; 632 633 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 634 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 635 636 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 637 } 638 639 /** 640 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags 641 * @dp: NFP Net data path struct 642 * @r_vec: per-ring structure 643 * @rxd: Pointer to RX descriptor 644 * @meta: Parsed metadata prepend 645 * @skb: Pointer to SKB 646 */ 647 static void 648 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 649 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, 650 struct sk_buff *skb) 651 { 652 skb_checksum_none_assert(skb); 653 654 if (!(dp->netdev->features & NETIF_F_RXCSUM)) 655 return; 656 657 if (meta->csum_type) { 658 skb->ip_summed = meta->csum_type; 659 skb->csum = meta->csum; 660 u64_stats_update_begin(&r_vec->rx_sync); 661 r_vec->hw_csum_rx_complete++; 662 u64_stats_update_end(&r_vec->rx_sync); 663 return; 664 } 665 666 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 667 u64_stats_update_begin(&r_vec->rx_sync); 668 r_vec->hw_csum_rx_error++; 669 u64_stats_update_end(&r_vec->rx_sync); 670 return; 671 } 672 673 /* Assume that the firmware will never report inner CSUM_OK unless outer 674 * L4 headers were successfully parsed. FW will always report zero UDP 675 * checksum as CSUM_OK. 676 */ 677 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 678 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 679 __skb_incr_checksum_unnecessary(skb); 680 u64_stats_update_begin(&r_vec->rx_sync); 681 r_vec->hw_csum_rx_ok++; 682 u64_stats_update_end(&r_vec->rx_sync); 683 } 684 685 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 686 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 687 __skb_incr_checksum_unnecessary(skb); 688 u64_stats_update_begin(&r_vec->rx_sync); 689 r_vec->hw_csum_rx_inner_ok++; 690 u64_stats_update_end(&r_vec->rx_sync); 691 } 692 } 693 694 static void 695 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, 696 unsigned int type, __be32 *hash) 697 { 698 if (!(netdev->features & NETIF_F_RXHASH)) 699 return; 700 701 switch (type) { 702 case NFP_NET_RSS_IPV4: 703 case NFP_NET_RSS_IPV6: 704 case NFP_NET_RSS_IPV6_EX: 705 meta->hash_type = PKT_HASH_TYPE_L3; 706 break; 707 default: 708 meta->hash_type = PKT_HASH_TYPE_L4; 709 break; 710 } 711 712 meta->hash = get_unaligned_be32(hash); 713 } 714 715 static bool 716 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, 717 void *data, void *pkt, unsigned int pkt_len, int meta_len) 718 { 719 u32 meta_info, vlan_info; 720 721 meta_info = get_unaligned_be32(data); 722 data += 4; 723 724 while (meta_info) { 725 switch (meta_info & NFP_NET_META_FIELD_MASK) { 726 case NFP_NET_META_HASH: 727 meta_info >>= NFP_NET_META_FIELD_SIZE; 728 nfp_nfdk_set_hash(netdev, meta, 729 meta_info & NFP_NET_META_FIELD_MASK, 730 (__be32 *)data); 731 data += 4; 732 break; 733 case NFP_NET_META_MARK: 734 meta->mark = get_unaligned_be32(data); 735 data += 4; 736 break; 737 case NFP_NET_META_VLAN: 738 vlan_info = get_unaligned_be32(data); 739 if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { 740 meta->vlan.stripped = true; 741 meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, 742 vlan_info); 743 meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, 744 vlan_info); 745 } 746 data += 4; 747 break; 748 case NFP_NET_META_PORTID: 749 meta->portid = get_unaligned_be32(data); 750 data += 4; 751 break; 752 case NFP_NET_META_CSUM: 753 meta->csum_type = CHECKSUM_COMPLETE; 754 meta->csum = 755 (__force __wsum)__get_unaligned_cpu32(data); 756 data += 4; 757 break; 758 case NFP_NET_META_RESYNC_INFO: 759 if (nfp_net_tls_rx_resync_req(netdev, data, pkt, 760 pkt_len)) 761 return false; 762 data += sizeof(struct nfp_net_tls_resync_req); 763 break; 764 default: 765 return true; 766 } 767 768 meta_info >>= NFP_NET_META_FIELD_SIZE; 769 } 770 771 return data != pkt; 772 } 773 774 static void 775 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 776 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, 777 struct sk_buff *skb) 778 { 779 u64_stats_update_begin(&r_vec->rx_sync); 780 r_vec->rx_drops++; 781 /* If we have both skb and rxbuf the replacement buffer allocation 782 * must have failed, count this as an alloc failure. 783 */ 784 if (skb && rxbuf) 785 r_vec->rx_replace_buf_alloc_fail++; 786 u64_stats_update_end(&r_vec->rx_sync); 787 788 /* skb is build based on the frag, free_skb() would free the frag 789 * so to be able to reuse it we need an extra ref. 790 */ 791 if (skb && rxbuf && skb->head == rxbuf->frag) 792 page_ref_inc(virt_to_head_page(rxbuf->frag)); 793 if (rxbuf) 794 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); 795 if (skb) 796 dev_kfree_skb_any(skb); 797 } 798 799 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) 800 { 801 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 802 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 803 struct nfp_net_rx_ring *rx_ring; 804 u32 qcp_rd_p, done = 0; 805 bool done_all; 806 int todo; 807 808 /* Work out how many descriptors have been transmitted */ 809 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 810 if (qcp_rd_p == tx_ring->qcp_rd_p) 811 return true; 812 813 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 814 815 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 816 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 817 818 rx_ring = r_vec->rx_ring; 819 while (todo > 0) { 820 int idx = D_IDX(tx_ring, tx_ring->rd_p + done); 821 struct nfp_nfdk_tx_buf *txbuf; 822 unsigned int step = 1; 823 824 txbuf = &tx_ring->ktxbufs[idx]; 825 if (!txbuf->raw) 826 goto next; 827 828 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { 829 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); 830 goto next; 831 } 832 833 /* Two successive txbufs are used to stash virtual and dma 834 * address respectively, recycle and clean them here. 835 */ 836 nfp_nfdk_rx_give_one(dp, rx_ring, 837 (void *)NFDK_TX_BUF_PTR(txbuf[0].val), 838 txbuf[1].dma_addr); 839 txbuf[0].raw = 0; 840 txbuf[1].raw = 0; 841 step = 2; 842 843 u64_stats_update_begin(&r_vec->tx_sync); 844 /* Note: tx_bytes not accumulated. */ 845 r_vec->tx_pkts++; 846 u64_stats_update_end(&r_vec->tx_sync); 847 next: 848 todo -= step; 849 done += step; 850 } 851 852 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); 853 tx_ring->rd_p += done; 854 855 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 856 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 857 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 858 859 return done_all; 860 } 861 862 static bool 863 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, 864 struct nfp_net_tx_ring *tx_ring, 865 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, 866 unsigned int pkt_len, bool *completed) 867 { 868 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; 869 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; 870 struct nfp_nfdk_tx_buf *txbuf; 871 struct nfp_nfdk_tx_desc *txd; 872 unsigned int n_descs; 873 dma_addr_t dma_addr; 874 int wr_idx; 875 876 /* Reject if xdp_adjust_tail grow packet beyond DMA area */ 877 if (pkt_len + dma_off > dma_map_sz) 878 return false; 879 880 /* Make sure there's still at least one block available after 881 * aligning to block boundary, so that the txds used below 882 * won't wrap around the tx_ring. 883 */ 884 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 885 if (!*completed) { 886 nfp_nfdk_xdp_complete(tx_ring); 887 *completed = true; 888 } 889 890 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 891 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, 892 NULL); 893 return false; 894 } 895 } 896 897 /* Check if cross block boundary */ 898 n_descs = nfp_nfdk_headlen_to_segs(pkt_len); 899 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 900 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || 901 ((u32)tx_ring->data_pending + pkt_len > 902 NFDK_TX_MAX_DATA_PER_BLOCK)) { 903 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); 904 905 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 906 txd = &tx_ring->ktxds[wr_idx]; 907 memset(txd, 0, 908 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 909 910 tx_ring->data_pending = 0; 911 tx_ring->wr_p += nop_slots; 912 tx_ring->wr_ptr_add += nop_slots; 913 } 914 915 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 916 917 txbuf = &tx_ring->ktxbufs[wr_idx]; 918 919 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; 920 txbuf[1].dma_addr = rxbuf->dma_addr; 921 /* Note: pkt len not stored */ 922 923 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, 924 pkt_len, DMA_BIDIRECTIONAL); 925 926 /* Build TX descriptor */ 927 txd = &tx_ring->ktxds[wr_idx]; 928 dma_len = pkt_len; 929 dma_addr = rxbuf->dma_addr + dma_off; 930 931 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 932 type = NFDK_DESC_TX_TYPE_SIMPLE; 933 else 934 type = NFDK_DESC_TX_TYPE_GATHER; 935 936 /* FIELD_PREP() implicitly truncates to chunk */ 937 dma_len -= 1; 938 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 939 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 940 941 txd->dma_len_type = cpu_to_le16(dlen_type); 942 nfp_desc_set_dma_addr_48b(txd, dma_addr); 943 944 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 945 dma_len -= tmp_dlen; 946 dma_addr += tmp_dlen + 1; 947 txd++; 948 949 while (dma_len > 0) { 950 dma_len -= 1; 951 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 952 txd->dma_len_type = cpu_to_le16(dlen_type); 953 nfp_desc_set_dma_addr_48b(txd, dma_addr); 954 955 dlen_type &= NFDK_DESC_TX_DMA_LEN; 956 dma_len -= dlen_type; 957 dma_addr += dlen_type + 1; 958 txd++; 959 } 960 961 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 962 963 /* Metadata desc */ 964 txd->raw = 0; 965 txd++; 966 967 cnt = txd - tx_ring->ktxds - wr_idx; 968 tx_ring->wr_p += cnt; 969 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 970 tx_ring->data_pending += pkt_len; 971 else 972 tx_ring->data_pending = 0; 973 974 tx_ring->wr_ptr_add += cnt; 975 return true; 976 } 977 978 /** 979 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring 980 * @rx_ring: RX ring to receive from 981 * @budget: NAPI budget 982 * 983 * Note, this function is separated out from the napi poll function to 984 * more cleanly separate packet receive code from other bookkeeping 985 * functions performed in the napi poll function. 986 * 987 * Return: Number of packets received. 988 */ 989 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) 990 { 991 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 992 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 993 struct nfp_net_tx_ring *tx_ring; 994 struct bpf_prog *xdp_prog; 995 bool xdp_tx_cmpl = false; 996 unsigned int true_bufsz; 997 struct sk_buff *skb; 998 int pkts_polled = 0; 999 struct xdp_buff xdp; 1000 int idx; 1001 1002 xdp_prog = READ_ONCE(dp->xdp_prog); 1003 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 1004 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, 1005 &rx_ring->xdp_rxq); 1006 tx_ring = r_vec->xdp_ring; 1007 1008 while (pkts_polled < budget) { 1009 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1010 struct nfp_net_rx_buf *rxbuf; 1011 struct nfp_net_rx_desc *rxd; 1012 struct nfp_meta_parsed meta; 1013 bool redir_egress = false; 1014 struct net_device *netdev; 1015 dma_addr_t new_dma_addr; 1016 u32 meta_len_xdp = 0; 1017 void *new_frag; 1018 1019 idx = D_IDX(rx_ring, rx_ring->rd_p); 1020 1021 rxd = &rx_ring->rxds[idx]; 1022 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1023 break; 1024 1025 /* Memory barrier to ensure that we won't do other reads 1026 * before the DD bit. 1027 */ 1028 dma_rmb(); 1029 1030 memset(&meta, 0, sizeof(meta)); 1031 1032 rx_ring->rd_p++; 1033 pkts_polled++; 1034 1035 rxbuf = &rx_ring->rxbufs[idx]; 1036 /* < meta_len > 1037 * <-- [rx_offset] --> 1038 * --------------------------------------------------------- 1039 * | [XX] | metadata | packet | XXXX | 1040 * --------------------------------------------------------- 1041 * <---------------- data_len ---------------> 1042 * 1043 * The rx_offset is fixed for all packets, the meta_len can vary 1044 * on a packet by packet basis. If rx_offset is set to zero 1045 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the 1046 * buffer and is immediately followed by the packet (no [XX]). 1047 */ 1048 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1049 data_len = le16_to_cpu(rxd->rxd.data_len); 1050 pkt_len = data_len - meta_len; 1051 1052 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1053 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1054 pkt_off += meta_len; 1055 else 1056 pkt_off += dp->rx_offset; 1057 meta_off = pkt_off - meta_len; 1058 1059 /* Stats update */ 1060 u64_stats_update_begin(&r_vec->rx_sync); 1061 r_vec->rx_pkts++; 1062 r_vec->rx_bytes += pkt_len; 1063 u64_stats_update_end(&r_vec->rx_sync); 1064 1065 if (unlikely(meta_len > NFP_NET_MAX_PREPEND || 1066 (dp->rx_offset && meta_len > dp->rx_offset))) { 1067 nn_dp_warn(dp, "oversized RX packet metadata %u\n", 1068 meta_len); 1069 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1070 continue; 1071 } 1072 1073 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, 1074 data_len); 1075 1076 if (meta_len) { 1077 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, 1078 rxbuf->frag + meta_off, 1079 rxbuf->frag + pkt_off, 1080 pkt_len, meta_len))) { 1081 nn_dp_warn(dp, "invalid RX packet metadata\n"); 1082 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1083 NULL); 1084 continue; 1085 } 1086 } 1087 1088 if (xdp_prog && !meta.portid) { 1089 void *orig_data = rxbuf->frag + pkt_off; 1090 unsigned int dma_off; 1091 int act; 1092 1093 xdp_prepare_buff(&xdp, 1094 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, 1095 pkt_off - NFP_NET_RX_BUF_HEADROOM, 1096 pkt_len, true); 1097 1098 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1099 1100 pkt_len = xdp.data_end - xdp.data; 1101 pkt_off += xdp.data - orig_data; 1102 1103 switch (act) { 1104 case XDP_PASS: 1105 meta_len_xdp = xdp.data - xdp.data_meta; 1106 break; 1107 case XDP_TX: 1108 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; 1109 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, 1110 tx_ring, 1111 rxbuf, 1112 dma_off, 1113 pkt_len, 1114 &xdp_tx_cmpl))) 1115 trace_xdp_exception(dp->netdev, 1116 xdp_prog, act); 1117 continue; 1118 default: 1119 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 1120 fallthrough; 1121 case XDP_ABORTED: 1122 trace_xdp_exception(dp->netdev, xdp_prog, act); 1123 fallthrough; 1124 case XDP_DROP: 1125 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1126 rxbuf->dma_addr); 1127 continue; 1128 } 1129 } 1130 1131 if (likely(!meta.portid)) { 1132 netdev = dp->netdev; 1133 } else if (meta.portid == NFP_META_PORT_ID_CTRL) { 1134 struct nfp_net *nn = netdev_priv(dp->netdev); 1135 1136 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, 1137 pkt_len); 1138 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1139 rxbuf->dma_addr); 1140 continue; 1141 } else { 1142 struct nfp_net *nn; 1143 1144 nn = netdev_priv(dp->netdev); 1145 netdev = nfp_app_dev_get(nn->app, meta.portid, 1146 &redir_egress); 1147 if (unlikely(!netdev)) { 1148 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1149 NULL); 1150 continue; 1151 } 1152 1153 if (nfp_netdev_is_nfp_repr(netdev)) 1154 nfp_repr_inc_rx_stats(netdev, pkt_len); 1155 } 1156 1157 skb = build_skb(rxbuf->frag, true_bufsz); 1158 if (unlikely(!skb)) { 1159 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1160 continue; 1161 } 1162 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1163 if (unlikely(!new_frag)) { 1164 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1165 continue; 1166 } 1167 1168 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1169 1170 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1171 1172 skb_reserve(skb, pkt_off); 1173 skb_put(skb, pkt_len); 1174 1175 skb->mark = meta.mark; 1176 skb_set_hash(skb, meta.hash, meta.hash_type); 1177 1178 skb_record_rx_queue(skb, rx_ring->idx); 1179 skb->protocol = eth_type_trans(skb, netdev); 1180 1181 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); 1182 1183 if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { 1184 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, NULL, skb); 1185 continue; 1186 } 1187 1188 if (meta_len_xdp) 1189 skb_metadata_set(skb, meta_len_xdp); 1190 1191 if (likely(!redir_egress)) { 1192 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1193 } else { 1194 skb->dev = netdev; 1195 skb_reset_network_header(skb); 1196 __skb_push(skb, ETH_HLEN); 1197 dev_queue_xmit(skb); 1198 } 1199 } 1200 1201 if (xdp_prog) { 1202 if (tx_ring->wr_ptr_add) 1203 nfp_net_tx_xmit_more_flush(tx_ring); 1204 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && 1205 !xdp_tx_cmpl) 1206 if (!nfp_nfdk_xdp_complete(tx_ring)) 1207 pkts_polled = budget; 1208 } 1209 1210 return pkts_polled; 1211 } 1212 1213 /** 1214 * nfp_nfdk_poll() - napi poll function 1215 * @napi: NAPI structure 1216 * @budget: NAPI budget 1217 * 1218 * Return: number of packets polled. 1219 */ 1220 int nfp_nfdk_poll(struct napi_struct *napi, int budget) 1221 { 1222 struct nfp_net_r_vector *r_vec = 1223 container_of(napi, struct nfp_net_r_vector, napi); 1224 unsigned int pkts_polled = 0; 1225 1226 if (r_vec->tx_ring) 1227 nfp_nfdk_tx_complete(r_vec->tx_ring, budget); 1228 if (r_vec->rx_ring) 1229 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); 1230 1231 if (pkts_polled < budget) 1232 if (napi_complete_done(napi, pkts_polled)) 1233 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1234 1235 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { 1236 struct dim_sample dim_sample = {}; 1237 unsigned int start; 1238 u64 pkts, bytes; 1239 1240 do { 1241 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1242 pkts = r_vec->rx_pkts; 1243 bytes = r_vec->rx_bytes; 1244 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1245 1246 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1247 net_dim(&r_vec->rx_dim, dim_sample); 1248 } 1249 1250 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { 1251 struct dim_sample dim_sample = {}; 1252 unsigned int start; 1253 u64 pkts, bytes; 1254 1255 do { 1256 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1257 pkts = r_vec->tx_pkts; 1258 bytes = r_vec->tx_bytes; 1259 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1260 1261 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1262 net_dim(&r_vec->tx_dim, dim_sample); 1263 } 1264 1265 return pkts_polled; 1266 } 1267 1268 /* Control device data path 1269 */ 1270 1271 bool 1272 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1273 struct sk_buff *skb, bool old) 1274 { 1275 u32 cnt, tmp_dlen, dlen_type = 0; 1276 struct nfp_net_tx_ring *tx_ring; 1277 struct nfp_nfdk_tx_buf *txbuf; 1278 struct nfp_nfdk_tx_desc *txd; 1279 unsigned int dma_len, type; 1280 struct nfp_net_dp *dp; 1281 dma_addr_t dma_addr; 1282 u64 metadata = 0; 1283 int wr_idx; 1284 1285 dp = &r_vec->nfp_net->dp; 1286 tx_ring = r_vec->tx_ring; 1287 1288 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { 1289 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); 1290 goto err_free; 1291 } 1292 1293 /* Don't bother counting frags, assume the worst */ 1294 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 1295 u64_stats_update_begin(&r_vec->tx_sync); 1296 r_vec->tx_busy++; 1297 u64_stats_update_end(&r_vec->tx_sync); 1298 if (!old) 1299 __skb_queue_tail(&r_vec->queue, skb); 1300 else 1301 __skb_queue_head(&r_vec->queue, skb); 1302 return NETDEV_TX_BUSY; 1303 } 1304 1305 if (nfp_app_ctrl_has_meta(nn->app)) { 1306 if (unlikely(skb_headroom(skb) < 8)) { 1307 nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); 1308 goto err_free; 1309 } 1310 metadata = NFDK_DESC_TX_CHAIN_META; 1311 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); 1312 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | 1313 FIELD_PREP(NFDK_META_FIELDS, 1314 NFP_NET_META_PORTID), 1315 skb_push(skb, 4)); 1316 } 1317 1318 if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb)) 1319 goto err_free; 1320 1321 /* DMA map all */ 1322 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 1323 txd = &tx_ring->ktxds[wr_idx]; 1324 txbuf = &tx_ring->ktxbufs[wr_idx]; 1325 1326 dma_len = skb_headlen(skb); 1327 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 1328 type = NFDK_DESC_TX_TYPE_SIMPLE; 1329 else 1330 type = NFDK_DESC_TX_TYPE_GATHER; 1331 1332 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 1333 if (dma_mapping_error(dp->dev, dma_addr)) 1334 goto err_warn_dma; 1335 1336 txbuf->skb = skb; 1337 txbuf++; 1338 1339 txbuf->dma_addr = dma_addr; 1340 txbuf++; 1341 1342 dma_len -= 1; 1343 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 1344 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 1345 1346 txd->dma_len_type = cpu_to_le16(dlen_type); 1347 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1348 1349 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1350 dma_len -= tmp_dlen; 1351 dma_addr += tmp_dlen + 1; 1352 txd++; 1353 1354 while (dma_len > 0) { 1355 dma_len -= 1; 1356 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 1357 txd->dma_len_type = cpu_to_le16(dlen_type); 1358 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1359 1360 dlen_type &= NFDK_DESC_TX_DMA_LEN; 1361 dma_len -= dlen_type; 1362 dma_addr += dlen_type + 1; 1363 txd++; 1364 } 1365 1366 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1367 1368 /* Metadata desc */ 1369 txd->raw = cpu_to_le64(metadata); 1370 txd++; 1371 1372 cnt = txd - tx_ring->ktxds - wr_idx; 1373 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 1374 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 1375 goto err_warn_overflow; 1376 1377 tx_ring->wr_p += cnt; 1378 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1379 tx_ring->data_pending += skb->len; 1380 else 1381 tx_ring->data_pending = 0; 1382 1383 tx_ring->wr_ptr_add += cnt; 1384 nfp_net_tx_xmit_more_flush(tx_ring); 1385 1386 return NETDEV_TX_OK; 1387 1388 err_warn_overflow: 1389 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 1390 wr_idx, skb_headlen(skb), 0, cnt); 1391 txbuf--; 1392 dma_unmap_single(dp->dev, txbuf->dma_addr, 1393 skb_headlen(skb), DMA_TO_DEVICE); 1394 txbuf->raw = 0; 1395 err_warn_dma: 1396 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 1397 err_free: 1398 u64_stats_update_begin(&r_vec->tx_sync); 1399 r_vec->tx_errors++; 1400 u64_stats_update_end(&r_vec->tx_sync); 1401 dev_kfree_skb_any(skb); 1402 return NETDEV_TX_OK; 1403 } 1404 1405 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) 1406 { 1407 struct sk_buff *skb; 1408 1409 while ((skb = __skb_dequeue(&r_vec->queue))) 1410 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) 1411 return; 1412 } 1413 1414 static bool 1415 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) 1416 { 1417 u32 meta_type, meta_tag; 1418 1419 if (!nfp_app_ctrl_has_meta(nn->app)) 1420 return !meta_len; 1421 1422 if (meta_len != 8) 1423 return false; 1424 1425 meta_type = get_unaligned_be32(data); 1426 meta_tag = get_unaligned_be32(data + 4); 1427 1428 return (meta_type == NFP_NET_META_PORTID && 1429 meta_tag == NFP_META_PORT_ID_CTRL); 1430 } 1431 1432 static bool 1433 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, 1434 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) 1435 { 1436 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1437 struct nfp_net_rx_buf *rxbuf; 1438 struct nfp_net_rx_desc *rxd; 1439 dma_addr_t new_dma_addr; 1440 struct sk_buff *skb; 1441 void *new_frag; 1442 int idx; 1443 1444 idx = D_IDX(rx_ring, rx_ring->rd_p); 1445 1446 rxd = &rx_ring->rxds[idx]; 1447 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1448 return false; 1449 1450 /* Memory barrier to ensure that we won't do other reads 1451 * before the DD bit. 1452 */ 1453 dma_rmb(); 1454 1455 rx_ring->rd_p++; 1456 1457 rxbuf = &rx_ring->rxbufs[idx]; 1458 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1459 data_len = le16_to_cpu(rxd->rxd.data_len); 1460 pkt_len = data_len - meta_len; 1461 1462 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1463 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1464 pkt_off += meta_len; 1465 else 1466 pkt_off += dp->rx_offset; 1467 meta_off = pkt_off - meta_len; 1468 1469 /* Stats update */ 1470 u64_stats_update_begin(&r_vec->rx_sync); 1471 r_vec->rx_pkts++; 1472 r_vec->rx_bytes += pkt_len; 1473 u64_stats_update_end(&r_vec->rx_sync); 1474 1475 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); 1476 1477 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { 1478 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", 1479 meta_len); 1480 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1481 return true; 1482 } 1483 1484 skb = build_skb(rxbuf->frag, dp->fl_bufsz); 1485 if (unlikely(!skb)) { 1486 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1487 return true; 1488 } 1489 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1490 if (unlikely(!new_frag)) { 1491 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1492 return true; 1493 } 1494 1495 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1496 1497 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1498 1499 skb_reserve(skb, pkt_off); 1500 skb_put(skb, pkt_len); 1501 1502 nfp_app_ctrl_rx(nn->app, skb); 1503 1504 return true; 1505 } 1506 1507 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) 1508 { 1509 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1510 struct nfp_net *nn = r_vec->nfp_net; 1511 struct nfp_net_dp *dp = &nn->dp; 1512 unsigned int budget = 512; 1513 1514 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) 1515 continue; 1516 1517 return budget; 1518 } 1519 1520 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) 1521 { 1522 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); 1523 1524 spin_lock(&r_vec->lock); 1525 nfp_nfdk_tx_complete(r_vec->tx_ring, 0); 1526 __nfp_ctrl_tx_queued(r_vec); 1527 spin_unlock(&r_vec->lock); 1528 1529 if (nfp_ctrl_rx(r_vec)) { 1530 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1531 } else { 1532 tasklet_schedule(&r_vec->tasklet); 1533 nn_dp_warn(&r_vec->nfp_net->dp, 1534 "control message budget exceeded!\n"); 1535 } 1536 } 1537