1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ 3 4 #include <linux/bpf_trace.h> 5 #include <linux/netdevice.h> 6 #include <linux/overflow.h> 7 #include <linux/sizes.h> 8 #include <linux/bitfield.h> 9 10 #include "../nfp_app.h" 11 #include "../nfp_net.h" 12 #include "../nfp_net_dp.h" 13 #include "../crypto/crypto.h" 14 #include "../crypto/fw.h" 15 #include "nfdk.h" 16 17 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 18 { 19 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); 20 } 21 22 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 23 { 24 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); 25 } 26 27 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, 28 struct nfp_net_tx_ring *tx_ring) 29 { 30 netif_tx_stop_queue(nd_q); 31 32 /* We can race with the TX completion out of NAPI so recheck */ 33 smp_mb(); 34 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) 35 netif_tx_start_queue(nd_q); 36 } 37 38 static __le64 39 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, 40 struct sk_buff *skb) 41 { 42 u32 segs, hdrlen, l3_offset, l4_offset; 43 struct nfp_nfdk_tx_desc txd; 44 u16 mss; 45 46 if (!skb->encapsulation) { 47 l3_offset = skb_network_offset(skb); 48 l4_offset = skb_transport_offset(skb); 49 hdrlen = skb_tcp_all_headers(skb); 50 } else { 51 l3_offset = skb_inner_network_offset(skb); 52 l4_offset = skb_inner_transport_offset(skb); 53 hdrlen = skb_inner_tcp_all_headers(skb); 54 } 55 56 segs = skb_shinfo(skb)->gso_segs; 57 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; 58 59 /* Note: TSO of the packet with metadata prepended to skb is not 60 * supported yet, in which case l3/l4_offset and lso_hdrlen need 61 * be correctly handled here. 62 * Concern: 63 * The driver doesn't have md_bytes easily available at this point. 64 * The PCI.IN PD ME won't have md_bytes bytes to add to lso_hdrlen, 65 * so it needs the full length there. The app MEs might prefer 66 * l3_offset and l4_offset relative to the start of packet data, 67 * but could probably cope with it being relative to the CTM buf 68 * data offset. 69 */ 70 txd.l3_offset = l3_offset; 71 txd.l4_offset = l4_offset; 72 txd.lso_meta_res = 0; 73 txd.mss = cpu_to_le16(mss); 74 txd.lso_hdrlen = hdrlen; 75 txd.lso_totsegs = segs; 76 77 txbuf->pkt_cnt = segs; 78 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); 79 80 u64_stats_update_begin(&r_vec->tx_sync); 81 r_vec->tx_lso++; 82 u64_stats_update_end(&r_vec->tx_sync); 83 84 return txd.raw; 85 } 86 87 static u8 88 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 89 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) 90 { 91 struct ipv6hdr *ipv6h; 92 struct iphdr *iph; 93 94 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 95 return flags; 96 97 if (skb->ip_summed != CHECKSUM_PARTIAL) 98 return flags; 99 100 flags |= NFDK_DESC_TX_L4_CSUM; 101 102 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 103 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 104 105 /* L3 checksum offloading flag is not required for ipv6 */ 106 if (iph->version == 4) { 107 flags |= NFDK_DESC_TX_L3_CSUM; 108 } else if (ipv6h->version != 6) { 109 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); 110 return flags; 111 } 112 113 u64_stats_update_begin(&r_vec->tx_sync); 114 if (!skb->encapsulation) { 115 r_vec->hw_csum_tx += pkt_cnt; 116 } else { 117 flags |= NFDK_DESC_TX_ENCAP; 118 r_vec->hw_csum_tx_inner += pkt_cnt; 119 } 120 u64_stats_update_end(&r_vec->tx_sync); 121 122 return flags; 123 } 124 125 static int 126 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, 127 unsigned int nr_frags, struct sk_buff *skb) 128 { 129 unsigned int n_descs, wr_p, nop_slots; 130 const skb_frag_t *frag, *fend; 131 struct nfp_nfdk_tx_desc *txd; 132 unsigned int wr_idx; 133 int err; 134 135 recount_descs: 136 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); 137 138 frag = skb_shinfo(skb)->frags; 139 fend = frag + nr_frags; 140 for (; frag < fend; frag++) 141 n_descs += DIV_ROUND_UP(skb_frag_size(frag), 142 NFDK_TX_MAX_DATA_PER_DESC); 143 144 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { 145 if (skb_is_nonlinear(skb)) { 146 err = skb_linearize(skb); 147 if (err) 148 return err; 149 goto recount_descs; 150 } 151 return -EINVAL; 152 } 153 154 /* Under count by 1 (don't count meta) for the round down to work out */ 155 n_descs += !!skb_is_gso(skb); 156 157 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 158 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 159 goto close_block; 160 161 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) 162 goto close_block; 163 164 return 0; 165 166 close_block: 167 wr_p = tx_ring->wr_p; 168 nop_slots = D_BLOCK_CPL(wr_p); 169 170 wr_idx = D_IDX(tx_ring, wr_p); 171 tx_ring->ktxbufs[wr_idx].skb = NULL; 172 txd = &tx_ring->ktxds[wr_idx]; 173 174 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 175 176 tx_ring->data_pending = 0; 177 tx_ring->wr_p += nop_slots; 178 tx_ring->wr_ptr_add += nop_slots; 179 180 return 0; 181 } 182 183 static int nfp_nfdk_prep_port_id(struct sk_buff *skb) 184 { 185 struct metadata_dst *md_dst = skb_metadata_dst(skb); 186 unsigned char *data; 187 188 if (likely(!md_dst)) 189 return 0; 190 if (unlikely(md_dst->type != METADATA_HW_PORT_MUX)) 191 return 0; 192 193 /* Note: Unsupported case when TSO a skb with metedata prepended. 194 * See the comments in `nfp_nfdk_tx_tso` for details. 195 */ 196 if (unlikely(md_dst && skb_is_gso(skb))) 197 return -EOPNOTSUPP; 198 199 if (unlikely(skb_cow_head(skb, sizeof(md_dst->u.port_info.port_id)))) 200 return -ENOMEM; 201 202 data = skb_push(skb, sizeof(md_dst->u.port_info.port_id)); 203 put_unaligned_be32(md_dst->u.port_info.port_id, data); 204 205 return sizeof(md_dst->u.port_info.port_id); 206 } 207 208 static int 209 nfp_nfdk_prep_tx_meta(struct nfp_app *app, struct sk_buff *skb, 210 struct nfp_net_r_vector *r_vec) 211 { 212 unsigned char *data; 213 int res, md_bytes; 214 u32 meta_id = 0; 215 216 res = nfp_nfdk_prep_port_id(skb); 217 if (unlikely(res <= 0)) 218 return res; 219 220 md_bytes = res; 221 meta_id = NFP_NET_META_PORTID; 222 223 if (unlikely(skb_cow_head(skb, sizeof(meta_id)))) 224 return -ENOMEM; 225 226 md_bytes += sizeof(meta_id); 227 228 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | 229 FIELD_PREP(NFDK_META_FIELDS, meta_id); 230 231 data = skb_push(skb, sizeof(meta_id)); 232 put_unaligned_be32(meta_id, data); 233 234 return NFDK_DESC_TX_CHAIN_META; 235 } 236 237 /** 238 * nfp_nfdk_tx() - Main transmit entry point 239 * @skb: SKB to transmit 240 * @netdev: netdev structure 241 * 242 * Return: NETDEV_TX_OK on success. 243 */ 244 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) 245 { 246 struct nfp_net *nn = netdev_priv(netdev); 247 struct nfp_nfdk_tx_buf *txbuf, *etxbuf; 248 u32 cnt, tmp_dlen, dlen_type = 0; 249 struct nfp_net_tx_ring *tx_ring; 250 struct nfp_net_r_vector *r_vec; 251 const skb_frag_t *frag, *fend; 252 struct nfp_nfdk_tx_desc *txd; 253 unsigned int real_len, qidx; 254 unsigned int dma_len, type; 255 struct netdev_queue *nd_q; 256 struct nfp_net_dp *dp; 257 int nr_frags, wr_idx; 258 dma_addr_t dma_addr; 259 u64 metadata; 260 261 dp = &nn->dp; 262 qidx = skb_get_queue_mapping(skb); 263 tx_ring = &dp->tx_rings[qidx]; 264 r_vec = tx_ring->r_vec; 265 nd_q = netdev_get_tx_queue(dp->netdev, qidx); 266 267 /* Don't bother counting frags, assume the worst */ 268 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 269 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", 270 qidx, tx_ring->wr_p, tx_ring->rd_p); 271 netif_tx_stop_queue(nd_q); 272 nfp_net_tx_xmit_more_flush(tx_ring); 273 u64_stats_update_begin(&r_vec->tx_sync); 274 r_vec->tx_busy++; 275 u64_stats_update_end(&r_vec->tx_sync); 276 return NETDEV_TX_BUSY; 277 } 278 279 metadata = nfp_nfdk_prep_tx_meta(nn->app, skb, r_vec); 280 if (unlikely((int)metadata < 0)) 281 goto err_flush; 282 283 nr_frags = skb_shinfo(skb)->nr_frags; 284 if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb)) 285 goto err_flush; 286 287 /* DMA map all */ 288 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 289 txd = &tx_ring->ktxds[wr_idx]; 290 txbuf = &tx_ring->ktxbufs[wr_idx]; 291 292 dma_len = skb_headlen(skb); 293 if (skb_is_gso(skb)) 294 type = NFDK_DESC_TX_TYPE_TSO; 295 else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 296 type = NFDK_DESC_TX_TYPE_SIMPLE; 297 else 298 type = NFDK_DESC_TX_TYPE_GATHER; 299 300 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 301 if (dma_mapping_error(dp->dev, dma_addr)) 302 goto err_warn_dma; 303 304 txbuf->skb = skb; 305 txbuf++; 306 307 txbuf->dma_addr = dma_addr; 308 txbuf++; 309 310 /* FIELD_PREP() implicitly truncates to chunk */ 311 dma_len -= 1; 312 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 313 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 314 315 txd->dma_len_type = cpu_to_le16(dlen_type); 316 nfp_desc_set_dma_addr_48b(txd, dma_addr); 317 318 /* starts at bit 0 */ 319 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); 320 321 /* Preserve the original dlen_type, this way below the EOP logic 322 * can use dlen_type. 323 */ 324 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 325 dma_len -= tmp_dlen; 326 dma_addr += tmp_dlen + 1; 327 txd++; 328 329 /* The rest of the data (if any) will be in larger dma descritors 330 * and is handled with the fragment loop. 331 */ 332 frag = skb_shinfo(skb)->frags; 333 fend = frag + nr_frags; 334 335 while (true) { 336 while (dma_len > 0) { 337 dma_len -= 1; 338 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 339 340 txd->dma_len_type = cpu_to_le16(dlen_type); 341 nfp_desc_set_dma_addr_48b(txd, dma_addr); 342 343 dma_len -= dlen_type; 344 dma_addr += dlen_type + 1; 345 txd++; 346 } 347 348 if (frag >= fend) 349 break; 350 351 dma_len = skb_frag_size(frag); 352 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, 353 DMA_TO_DEVICE); 354 if (dma_mapping_error(dp->dev, dma_addr)) 355 goto err_unmap; 356 357 txbuf->dma_addr = dma_addr; 358 txbuf++; 359 360 frag++; 361 } 362 363 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 364 365 if (!skb_is_gso(skb)) { 366 real_len = skb->len; 367 /* Metadata desc */ 368 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); 369 txd->raw = cpu_to_le64(metadata); 370 txd++; 371 } else { 372 /* lso desc should be placed after metadata desc */ 373 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); 374 real_len = txbuf->real_len; 375 /* Metadata desc */ 376 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); 377 txd->raw = cpu_to_le64(metadata); 378 txd += 2; 379 txbuf++; 380 } 381 382 cnt = txd - tx_ring->ktxds - wr_idx; 383 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 384 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 385 goto err_warn_overflow; 386 387 skb_tx_timestamp(skb); 388 389 tx_ring->wr_p += cnt; 390 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 391 tx_ring->data_pending += skb->len; 392 else 393 tx_ring->data_pending = 0; 394 395 if (nfp_nfdk_tx_ring_should_stop(tx_ring)) 396 nfp_nfdk_tx_ring_stop(nd_q, tx_ring); 397 398 tx_ring->wr_ptr_add += cnt; 399 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) 400 nfp_net_tx_xmit_more_flush(tx_ring); 401 402 return NETDEV_TX_OK; 403 404 err_warn_overflow: 405 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 406 wr_idx, skb_headlen(skb), nr_frags, cnt); 407 if (skb_is_gso(skb)) 408 txbuf--; 409 err_unmap: 410 /* txbuf pointed to the next-to-use */ 411 etxbuf = txbuf; 412 /* first txbuf holds the skb */ 413 txbuf = &tx_ring->ktxbufs[wr_idx + 1]; 414 if (txbuf < etxbuf) { 415 dma_unmap_single(dp->dev, txbuf->dma_addr, 416 skb_headlen(skb), DMA_TO_DEVICE); 417 txbuf->raw = 0; 418 txbuf++; 419 } 420 frag = skb_shinfo(skb)->frags; 421 while (etxbuf < txbuf) { 422 dma_unmap_page(dp->dev, txbuf->dma_addr, 423 skb_frag_size(frag), DMA_TO_DEVICE); 424 txbuf->raw = 0; 425 frag++; 426 txbuf++; 427 } 428 err_warn_dma: 429 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 430 err_flush: 431 nfp_net_tx_xmit_more_flush(tx_ring); 432 u64_stats_update_begin(&r_vec->tx_sync); 433 r_vec->tx_errors++; 434 u64_stats_update_end(&r_vec->tx_sync); 435 dev_kfree_skb_any(skb); 436 return NETDEV_TX_OK; 437 } 438 439 /** 440 * nfp_nfdk_tx_complete() - Handled completed TX packets 441 * @tx_ring: TX ring structure 442 * @budget: NAPI budget (only used as bool to determine if in NAPI context) 443 */ 444 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) 445 { 446 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 447 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 448 u32 done_pkts = 0, done_bytes = 0; 449 struct nfp_nfdk_tx_buf *ktxbufs; 450 struct device *dev = dp->dev; 451 struct netdev_queue *nd_q; 452 u32 rd_p, qcp_rd_p; 453 int todo; 454 455 rd_p = tx_ring->rd_p; 456 if (tx_ring->wr_p == rd_p) 457 return; 458 459 /* Work out how many descriptors have been transmitted */ 460 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 461 462 if (qcp_rd_p == tx_ring->qcp_rd_p) 463 return; 464 465 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 466 ktxbufs = tx_ring->ktxbufs; 467 468 while (todo > 0) { 469 const skb_frag_t *frag, *fend; 470 unsigned int size, n_descs = 1; 471 struct nfp_nfdk_tx_buf *txbuf; 472 struct sk_buff *skb; 473 474 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; 475 skb = txbuf->skb; 476 txbuf++; 477 478 /* Closed block */ 479 if (!skb) { 480 n_descs = D_BLOCK_CPL(rd_p); 481 goto next; 482 } 483 484 /* Unmap head */ 485 size = skb_headlen(skb); 486 n_descs += nfp_nfdk_headlen_to_segs(size); 487 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); 488 txbuf++; 489 490 /* Unmap frags */ 491 frag = skb_shinfo(skb)->frags; 492 fend = frag + skb_shinfo(skb)->nr_frags; 493 for (; frag < fend; frag++) { 494 size = skb_frag_size(frag); 495 n_descs += DIV_ROUND_UP(size, 496 NFDK_TX_MAX_DATA_PER_DESC); 497 dma_unmap_page(dev, txbuf->dma_addr, 498 skb_frag_size(frag), DMA_TO_DEVICE); 499 txbuf++; 500 } 501 502 if (!skb_is_gso(skb)) { 503 done_bytes += skb->len; 504 done_pkts++; 505 } else { 506 done_bytes += txbuf->real_len; 507 done_pkts += txbuf->pkt_cnt; 508 n_descs++; 509 } 510 511 napi_consume_skb(skb, budget); 512 next: 513 rd_p += n_descs; 514 todo -= n_descs; 515 } 516 517 tx_ring->rd_p = rd_p; 518 tx_ring->qcp_rd_p = qcp_rd_p; 519 520 u64_stats_update_begin(&r_vec->tx_sync); 521 r_vec->tx_bytes += done_bytes; 522 r_vec->tx_pkts += done_pkts; 523 u64_stats_update_end(&r_vec->tx_sync); 524 525 if (!dp->netdev) 526 return; 527 528 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); 529 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 530 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { 531 /* Make sure TX thread will see updated tx_ring->rd_p */ 532 smp_mb(); 533 534 if (unlikely(netif_tx_queue_stopped(nd_q))) 535 netif_tx_wake_queue(nd_q); 536 } 537 538 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 539 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 540 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 541 } 542 543 /* Receive processing */ 544 static void * 545 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) 546 { 547 void *frag; 548 549 if (!dp->xdp_prog) { 550 frag = napi_alloc_frag(dp->fl_bufsz); 551 if (unlikely(!frag)) 552 return NULL; 553 } else { 554 struct page *page; 555 556 page = dev_alloc_page(); 557 if (unlikely(!page)) 558 return NULL; 559 frag = page_address(page); 560 } 561 562 *dma_addr = nfp_net_dma_map_rx(dp, frag); 563 if (dma_mapping_error(dp->dev, *dma_addr)) { 564 nfp_net_free_frag(frag, dp->xdp_prog); 565 nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); 566 return NULL; 567 } 568 569 return frag; 570 } 571 572 /** 573 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings 574 * @dp: NFP Net data path struct 575 * @rx_ring: RX ring structure 576 * @frag: page fragment buffer 577 * @dma_addr: DMA address of skb mapping 578 */ 579 static void 580 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, 581 struct nfp_net_rx_ring *rx_ring, 582 void *frag, dma_addr_t dma_addr) 583 { 584 unsigned int wr_idx; 585 586 wr_idx = D_IDX(rx_ring, rx_ring->wr_p); 587 588 nfp_net_dma_sync_dev_rx(dp, dma_addr); 589 590 /* Stash SKB and DMA address away */ 591 rx_ring->rxbufs[wr_idx].frag = frag; 592 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 593 594 /* Fill freelist descriptor */ 595 rx_ring->rxds[wr_idx].fld.reserved = 0; 596 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 597 nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, 598 dma_addr + dp->rx_dma_off); 599 600 rx_ring->wr_p++; 601 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { 602 /* Update write pointer of the freelist queue. Make 603 * sure all writes are flushed before telling the hardware. 604 */ 605 wmb(); 606 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); 607 } 608 } 609 610 /** 611 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW 612 * @dp: NFP Net data path struct 613 * @rx_ring: RX ring to fill 614 */ 615 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, 616 struct nfp_net_rx_ring *rx_ring) 617 { 618 unsigned int i; 619 620 for (i = 0; i < rx_ring->cnt - 1; i++) 621 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, 622 rx_ring->rxbufs[i].dma_addr); 623 } 624 625 /** 626 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors 627 * @flags: RX descriptor flags field in CPU byte order 628 */ 629 static int nfp_nfdk_rx_csum_has_errors(u16 flags) 630 { 631 u16 csum_all_checked, csum_all_ok; 632 633 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 634 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 635 636 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 637 } 638 639 /** 640 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags 641 * @dp: NFP Net data path struct 642 * @r_vec: per-ring structure 643 * @rxd: Pointer to RX descriptor 644 * @meta: Parsed metadata prepend 645 * @skb: Pointer to SKB 646 */ 647 static void 648 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 649 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, 650 struct sk_buff *skb) 651 { 652 skb_checksum_none_assert(skb); 653 654 if (!(dp->netdev->features & NETIF_F_RXCSUM)) 655 return; 656 657 if (meta->csum_type) { 658 skb->ip_summed = meta->csum_type; 659 skb->csum = meta->csum; 660 u64_stats_update_begin(&r_vec->rx_sync); 661 r_vec->hw_csum_rx_complete++; 662 u64_stats_update_end(&r_vec->rx_sync); 663 return; 664 } 665 666 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 667 u64_stats_update_begin(&r_vec->rx_sync); 668 r_vec->hw_csum_rx_error++; 669 u64_stats_update_end(&r_vec->rx_sync); 670 return; 671 } 672 673 /* Assume that the firmware will never report inner CSUM_OK unless outer 674 * L4 headers were successfully parsed. FW will always report zero UDP 675 * checksum as CSUM_OK. 676 */ 677 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 678 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 679 __skb_incr_checksum_unnecessary(skb); 680 u64_stats_update_begin(&r_vec->rx_sync); 681 r_vec->hw_csum_rx_ok++; 682 u64_stats_update_end(&r_vec->rx_sync); 683 } 684 685 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 686 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 687 __skb_incr_checksum_unnecessary(skb); 688 u64_stats_update_begin(&r_vec->rx_sync); 689 r_vec->hw_csum_rx_inner_ok++; 690 u64_stats_update_end(&r_vec->rx_sync); 691 } 692 } 693 694 static void 695 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, 696 unsigned int type, __be32 *hash) 697 { 698 if (!(netdev->features & NETIF_F_RXHASH)) 699 return; 700 701 switch (type) { 702 case NFP_NET_RSS_IPV4: 703 case NFP_NET_RSS_IPV6: 704 case NFP_NET_RSS_IPV6_EX: 705 meta->hash_type = PKT_HASH_TYPE_L3; 706 break; 707 default: 708 meta->hash_type = PKT_HASH_TYPE_L4; 709 break; 710 } 711 712 meta->hash = get_unaligned_be32(hash); 713 } 714 715 static bool 716 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, 717 void *data, void *pkt, unsigned int pkt_len, int meta_len) 718 { 719 u32 meta_info; 720 721 meta_info = get_unaligned_be32(data); 722 data += 4; 723 724 while (meta_info) { 725 switch (meta_info & NFP_NET_META_FIELD_MASK) { 726 case NFP_NET_META_HASH: 727 meta_info >>= NFP_NET_META_FIELD_SIZE; 728 nfp_nfdk_set_hash(netdev, meta, 729 meta_info & NFP_NET_META_FIELD_MASK, 730 (__be32 *)data); 731 data += 4; 732 break; 733 case NFP_NET_META_MARK: 734 meta->mark = get_unaligned_be32(data); 735 data += 4; 736 break; 737 case NFP_NET_META_PORTID: 738 meta->portid = get_unaligned_be32(data); 739 data += 4; 740 break; 741 case NFP_NET_META_CSUM: 742 meta->csum_type = CHECKSUM_COMPLETE; 743 meta->csum = 744 (__force __wsum)__get_unaligned_cpu32(data); 745 data += 4; 746 break; 747 case NFP_NET_META_RESYNC_INFO: 748 if (nfp_net_tls_rx_resync_req(netdev, data, pkt, 749 pkt_len)) 750 return false; 751 data += sizeof(struct nfp_net_tls_resync_req); 752 break; 753 default: 754 return true; 755 } 756 757 meta_info >>= NFP_NET_META_FIELD_SIZE; 758 } 759 760 return data != pkt; 761 } 762 763 static void 764 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 765 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, 766 struct sk_buff *skb) 767 { 768 u64_stats_update_begin(&r_vec->rx_sync); 769 r_vec->rx_drops++; 770 /* If we have both skb and rxbuf the replacement buffer allocation 771 * must have failed, count this as an alloc failure. 772 */ 773 if (skb && rxbuf) 774 r_vec->rx_replace_buf_alloc_fail++; 775 u64_stats_update_end(&r_vec->rx_sync); 776 777 /* skb is build based on the frag, free_skb() would free the frag 778 * so to be able to reuse it we need an extra ref. 779 */ 780 if (skb && rxbuf && skb->head == rxbuf->frag) 781 page_ref_inc(virt_to_head_page(rxbuf->frag)); 782 if (rxbuf) 783 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); 784 if (skb) 785 dev_kfree_skb_any(skb); 786 } 787 788 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) 789 { 790 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 791 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 792 struct nfp_net_rx_ring *rx_ring; 793 u32 qcp_rd_p, done = 0; 794 bool done_all; 795 int todo; 796 797 /* Work out how many descriptors have been transmitted */ 798 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 799 if (qcp_rd_p == tx_ring->qcp_rd_p) 800 return true; 801 802 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 803 804 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 805 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 806 807 rx_ring = r_vec->rx_ring; 808 while (todo > 0) { 809 int idx = D_IDX(tx_ring, tx_ring->rd_p + done); 810 struct nfp_nfdk_tx_buf *txbuf; 811 unsigned int step = 1; 812 813 txbuf = &tx_ring->ktxbufs[idx]; 814 if (!txbuf->raw) 815 goto next; 816 817 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { 818 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); 819 goto next; 820 } 821 822 /* Two successive txbufs are used to stash virtual and dma 823 * address respectively, recycle and clean them here. 824 */ 825 nfp_nfdk_rx_give_one(dp, rx_ring, 826 (void *)NFDK_TX_BUF_PTR(txbuf[0].val), 827 txbuf[1].dma_addr); 828 txbuf[0].raw = 0; 829 txbuf[1].raw = 0; 830 step = 2; 831 832 u64_stats_update_begin(&r_vec->tx_sync); 833 /* Note: tx_bytes not accumulated. */ 834 r_vec->tx_pkts++; 835 u64_stats_update_end(&r_vec->tx_sync); 836 next: 837 todo -= step; 838 done += step; 839 } 840 841 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); 842 tx_ring->rd_p += done; 843 844 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 845 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 846 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 847 848 return done_all; 849 } 850 851 static bool 852 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, 853 struct nfp_net_tx_ring *tx_ring, 854 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, 855 unsigned int pkt_len, bool *completed) 856 { 857 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; 858 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; 859 struct nfp_nfdk_tx_buf *txbuf; 860 struct nfp_nfdk_tx_desc *txd; 861 unsigned int n_descs; 862 dma_addr_t dma_addr; 863 int wr_idx; 864 865 /* Reject if xdp_adjust_tail grow packet beyond DMA area */ 866 if (pkt_len + dma_off > dma_map_sz) 867 return false; 868 869 /* Make sure there's still at least one block available after 870 * aligning to block boundary, so that the txds used below 871 * won't wrap around the tx_ring. 872 */ 873 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 874 if (!*completed) { 875 nfp_nfdk_xdp_complete(tx_ring); 876 *completed = true; 877 } 878 879 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 880 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, 881 NULL); 882 return false; 883 } 884 } 885 886 /* Check if cross block boundary */ 887 n_descs = nfp_nfdk_headlen_to_segs(pkt_len); 888 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 889 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || 890 ((u32)tx_ring->data_pending + pkt_len > 891 NFDK_TX_MAX_DATA_PER_BLOCK)) { 892 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); 893 894 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 895 txd = &tx_ring->ktxds[wr_idx]; 896 memset(txd, 0, 897 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 898 899 tx_ring->data_pending = 0; 900 tx_ring->wr_p += nop_slots; 901 tx_ring->wr_ptr_add += nop_slots; 902 } 903 904 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 905 906 txbuf = &tx_ring->ktxbufs[wr_idx]; 907 908 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; 909 txbuf[1].dma_addr = rxbuf->dma_addr; 910 /* Note: pkt len not stored */ 911 912 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, 913 pkt_len, DMA_BIDIRECTIONAL); 914 915 /* Build TX descriptor */ 916 txd = &tx_ring->ktxds[wr_idx]; 917 dma_len = pkt_len; 918 dma_addr = rxbuf->dma_addr + dma_off; 919 920 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 921 type = NFDK_DESC_TX_TYPE_SIMPLE; 922 else 923 type = NFDK_DESC_TX_TYPE_GATHER; 924 925 /* FIELD_PREP() implicitly truncates to chunk */ 926 dma_len -= 1; 927 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 928 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 929 930 txd->dma_len_type = cpu_to_le16(dlen_type); 931 nfp_desc_set_dma_addr_48b(txd, dma_addr); 932 933 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 934 dma_len -= tmp_dlen; 935 dma_addr += tmp_dlen + 1; 936 txd++; 937 938 while (dma_len > 0) { 939 dma_len -= 1; 940 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 941 txd->dma_len_type = cpu_to_le16(dlen_type); 942 nfp_desc_set_dma_addr_48b(txd, dma_addr); 943 944 dlen_type &= NFDK_DESC_TX_DMA_LEN; 945 dma_len -= dlen_type; 946 dma_addr += dlen_type + 1; 947 txd++; 948 } 949 950 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 951 952 /* Metadata desc */ 953 txd->raw = 0; 954 txd++; 955 956 cnt = txd - tx_ring->ktxds - wr_idx; 957 tx_ring->wr_p += cnt; 958 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 959 tx_ring->data_pending += pkt_len; 960 else 961 tx_ring->data_pending = 0; 962 963 tx_ring->wr_ptr_add += cnt; 964 return true; 965 } 966 967 /** 968 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring 969 * @rx_ring: RX ring to receive from 970 * @budget: NAPI budget 971 * 972 * Note, this function is separated out from the napi poll function to 973 * more cleanly separate packet receive code from other bookkeeping 974 * functions performed in the napi poll function. 975 * 976 * Return: Number of packets received. 977 */ 978 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) 979 { 980 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 981 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 982 struct nfp_net_tx_ring *tx_ring; 983 struct bpf_prog *xdp_prog; 984 bool xdp_tx_cmpl = false; 985 unsigned int true_bufsz; 986 struct sk_buff *skb; 987 int pkts_polled = 0; 988 struct xdp_buff xdp; 989 int idx; 990 991 xdp_prog = READ_ONCE(dp->xdp_prog); 992 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 993 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, 994 &rx_ring->xdp_rxq); 995 tx_ring = r_vec->xdp_ring; 996 997 while (pkts_polled < budget) { 998 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 999 struct nfp_net_rx_buf *rxbuf; 1000 struct nfp_net_rx_desc *rxd; 1001 struct nfp_meta_parsed meta; 1002 bool redir_egress = false; 1003 struct net_device *netdev; 1004 dma_addr_t new_dma_addr; 1005 u32 meta_len_xdp = 0; 1006 void *new_frag; 1007 1008 idx = D_IDX(rx_ring, rx_ring->rd_p); 1009 1010 rxd = &rx_ring->rxds[idx]; 1011 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1012 break; 1013 1014 /* Memory barrier to ensure that we won't do other reads 1015 * before the DD bit. 1016 */ 1017 dma_rmb(); 1018 1019 memset(&meta, 0, sizeof(meta)); 1020 1021 rx_ring->rd_p++; 1022 pkts_polled++; 1023 1024 rxbuf = &rx_ring->rxbufs[idx]; 1025 /* < meta_len > 1026 * <-- [rx_offset] --> 1027 * --------------------------------------------------------- 1028 * | [XX] | metadata | packet | XXXX | 1029 * --------------------------------------------------------- 1030 * <---------------- data_len ---------------> 1031 * 1032 * The rx_offset is fixed for all packets, the meta_len can vary 1033 * on a packet by packet basis. If rx_offset is set to zero 1034 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the 1035 * buffer and is immediately followed by the packet (no [XX]). 1036 */ 1037 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1038 data_len = le16_to_cpu(rxd->rxd.data_len); 1039 pkt_len = data_len - meta_len; 1040 1041 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1042 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1043 pkt_off += meta_len; 1044 else 1045 pkt_off += dp->rx_offset; 1046 meta_off = pkt_off - meta_len; 1047 1048 /* Stats update */ 1049 u64_stats_update_begin(&r_vec->rx_sync); 1050 r_vec->rx_pkts++; 1051 r_vec->rx_bytes += pkt_len; 1052 u64_stats_update_end(&r_vec->rx_sync); 1053 1054 if (unlikely(meta_len > NFP_NET_MAX_PREPEND || 1055 (dp->rx_offset && meta_len > dp->rx_offset))) { 1056 nn_dp_warn(dp, "oversized RX packet metadata %u\n", 1057 meta_len); 1058 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1059 continue; 1060 } 1061 1062 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, 1063 data_len); 1064 1065 if (meta_len) { 1066 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, 1067 rxbuf->frag + meta_off, 1068 rxbuf->frag + pkt_off, 1069 pkt_len, meta_len))) { 1070 nn_dp_warn(dp, "invalid RX packet metadata\n"); 1071 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1072 NULL); 1073 continue; 1074 } 1075 } 1076 1077 if (xdp_prog && !meta.portid) { 1078 void *orig_data = rxbuf->frag + pkt_off; 1079 unsigned int dma_off; 1080 int act; 1081 1082 xdp_prepare_buff(&xdp, 1083 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, 1084 pkt_off - NFP_NET_RX_BUF_HEADROOM, 1085 pkt_len, true); 1086 1087 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1088 1089 pkt_len = xdp.data_end - xdp.data; 1090 pkt_off += xdp.data - orig_data; 1091 1092 switch (act) { 1093 case XDP_PASS: 1094 meta_len_xdp = xdp.data - xdp.data_meta; 1095 break; 1096 case XDP_TX: 1097 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; 1098 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, 1099 tx_ring, 1100 rxbuf, 1101 dma_off, 1102 pkt_len, 1103 &xdp_tx_cmpl))) 1104 trace_xdp_exception(dp->netdev, 1105 xdp_prog, act); 1106 continue; 1107 default: 1108 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 1109 fallthrough; 1110 case XDP_ABORTED: 1111 trace_xdp_exception(dp->netdev, xdp_prog, act); 1112 fallthrough; 1113 case XDP_DROP: 1114 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1115 rxbuf->dma_addr); 1116 continue; 1117 } 1118 } 1119 1120 if (likely(!meta.portid)) { 1121 netdev = dp->netdev; 1122 } else if (meta.portid == NFP_META_PORT_ID_CTRL) { 1123 struct nfp_net *nn = netdev_priv(dp->netdev); 1124 1125 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, 1126 pkt_len); 1127 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1128 rxbuf->dma_addr); 1129 continue; 1130 } else { 1131 struct nfp_net *nn; 1132 1133 nn = netdev_priv(dp->netdev); 1134 netdev = nfp_app_dev_get(nn->app, meta.portid, 1135 &redir_egress); 1136 if (unlikely(!netdev)) { 1137 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1138 NULL); 1139 continue; 1140 } 1141 1142 if (nfp_netdev_is_nfp_repr(netdev)) 1143 nfp_repr_inc_rx_stats(netdev, pkt_len); 1144 } 1145 1146 skb = build_skb(rxbuf->frag, true_bufsz); 1147 if (unlikely(!skb)) { 1148 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1149 continue; 1150 } 1151 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1152 if (unlikely(!new_frag)) { 1153 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1154 continue; 1155 } 1156 1157 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1158 1159 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1160 1161 skb_reserve(skb, pkt_off); 1162 skb_put(skb, pkt_len); 1163 1164 skb->mark = meta.mark; 1165 skb_set_hash(skb, meta.hash, meta.hash_type); 1166 1167 skb_record_rx_queue(skb, rx_ring->idx); 1168 skb->protocol = eth_type_trans(skb, netdev); 1169 1170 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); 1171 1172 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) 1173 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 1174 le16_to_cpu(rxd->rxd.vlan)); 1175 if (meta_len_xdp) 1176 skb_metadata_set(skb, meta_len_xdp); 1177 1178 if (likely(!redir_egress)) { 1179 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1180 } else { 1181 skb->dev = netdev; 1182 skb_reset_network_header(skb); 1183 __skb_push(skb, ETH_HLEN); 1184 dev_queue_xmit(skb); 1185 } 1186 } 1187 1188 if (xdp_prog) { 1189 if (tx_ring->wr_ptr_add) 1190 nfp_net_tx_xmit_more_flush(tx_ring); 1191 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && 1192 !xdp_tx_cmpl) 1193 if (!nfp_nfdk_xdp_complete(tx_ring)) 1194 pkts_polled = budget; 1195 } 1196 1197 return pkts_polled; 1198 } 1199 1200 /** 1201 * nfp_nfdk_poll() - napi poll function 1202 * @napi: NAPI structure 1203 * @budget: NAPI budget 1204 * 1205 * Return: number of packets polled. 1206 */ 1207 int nfp_nfdk_poll(struct napi_struct *napi, int budget) 1208 { 1209 struct nfp_net_r_vector *r_vec = 1210 container_of(napi, struct nfp_net_r_vector, napi); 1211 unsigned int pkts_polled = 0; 1212 1213 if (r_vec->tx_ring) 1214 nfp_nfdk_tx_complete(r_vec->tx_ring, budget); 1215 if (r_vec->rx_ring) 1216 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); 1217 1218 if (pkts_polled < budget) 1219 if (napi_complete_done(napi, pkts_polled)) 1220 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1221 1222 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { 1223 struct dim_sample dim_sample = {}; 1224 unsigned int start; 1225 u64 pkts, bytes; 1226 1227 do { 1228 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1229 pkts = r_vec->rx_pkts; 1230 bytes = r_vec->rx_bytes; 1231 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1232 1233 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1234 net_dim(&r_vec->rx_dim, dim_sample); 1235 } 1236 1237 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { 1238 struct dim_sample dim_sample = {}; 1239 unsigned int start; 1240 u64 pkts, bytes; 1241 1242 do { 1243 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1244 pkts = r_vec->tx_pkts; 1245 bytes = r_vec->tx_bytes; 1246 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1247 1248 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1249 net_dim(&r_vec->tx_dim, dim_sample); 1250 } 1251 1252 return pkts_polled; 1253 } 1254 1255 /* Control device data path 1256 */ 1257 1258 bool 1259 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1260 struct sk_buff *skb, bool old) 1261 { 1262 u32 cnt, tmp_dlen, dlen_type = 0; 1263 struct nfp_net_tx_ring *tx_ring; 1264 struct nfp_nfdk_tx_buf *txbuf; 1265 struct nfp_nfdk_tx_desc *txd; 1266 unsigned int dma_len, type; 1267 struct nfp_net_dp *dp; 1268 dma_addr_t dma_addr; 1269 u64 metadata = 0; 1270 int wr_idx; 1271 1272 dp = &r_vec->nfp_net->dp; 1273 tx_ring = r_vec->tx_ring; 1274 1275 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { 1276 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); 1277 goto err_free; 1278 } 1279 1280 /* Don't bother counting frags, assume the worst */ 1281 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 1282 u64_stats_update_begin(&r_vec->tx_sync); 1283 r_vec->tx_busy++; 1284 u64_stats_update_end(&r_vec->tx_sync); 1285 if (!old) 1286 __skb_queue_tail(&r_vec->queue, skb); 1287 else 1288 __skb_queue_head(&r_vec->queue, skb); 1289 return NETDEV_TX_BUSY; 1290 } 1291 1292 if (nfp_app_ctrl_has_meta(nn->app)) { 1293 if (unlikely(skb_headroom(skb) < 8)) { 1294 nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); 1295 goto err_free; 1296 } 1297 metadata = NFDK_DESC_TX_CHAIN_META; 1298 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); 1299 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | 1300 FIELD_PREP(NFDK_META_FIELDS, 1301 NFP_NET_META_PORTID), 1302 skb_push(skb, 4)); 1303 } 1304 1305 if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb)) 1306 goto err_free; 1307 1308 /* DMA map all */ 1309 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 1310 txd = &tx_ring->ktxds[wr_idx]; 1311 txbuf = &tx_ring->ktxbufs[wr_idx]; 1312 1313 dma_len = skb_headlen(skb); 1314 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 1315 type = NFDK_DESC_TX_TYPE_SIMPLE; 1316 else 1317 type = NFDK_DESC_TX_TYPE_GATHER; 1318 1319 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 1320 if (dma_mapping_error(dp->dev, dma_addr)) 1321 goto err_warn_dma; 1322 1323 txbuf->skb = skb; 1324 txbuf++; 1325 1326 txbuf->dma_addr = dma_addr; 1327 txbuf++; 1328 1329 dma_len -= 1; 1330 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | 1331 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 1332 1333 txd->dma_len_type = cpu_to_le16(dlen_type); 1334 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1335 1336 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1337 dma_len -= tmp_dlen; 1338 dma_addr += tmp_dlen + 1; 1339 txd++; 1340 1341 while (dma_len > 0) { 1342 dma_len -= 1; 1343 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 1344 txd->dma_len_type = cpu_to_le16(dlen_type); 1345 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1346 1347 dlen_type &= NFDK_DESC_TX_DMA_LEN; 1348 dma_len -= dlen_type; 1349 dma_addr += dlen_type + 1; 1350 txd++; 1351 } 1352 1353 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1354 1355 /* Metadata desc */ 1356 txd->raw = cpu_to_le64(metadata); 1357 txd++; 1358 1359 cnt = txd - tx_ring->ktxds - wr_idx; 1360 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 1361 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 1362 goto err_warn_overflow; 1363 1364 tx_ring->wr_p += cnt; 1365 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1366 tx_ring->data_pending += skb->len; 1367 else 1368 tx_ring->data_pending = 0; 1369 1370 tx_ring->wr_ptr_add += cnt; 1371 nfp_net_tx_xmit_more_flush(tx_ring); 1372 1373 return NETDEV_TX_OK; 1374 1375 err_warn_overflow: 1376 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 1377 wr_idx, skb_headlen(skb), 0, cnt); 1378 txbuf--; 1379 dma_unmap_single(dp->dev, txbuf->dma_addr, 1380 skb_headlen(skb), DMA_TO_DEVICE); 1381 txbuf->raw = 0; 1382 err_warn_dma: 1383 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 1384 err_free: 1385 u64_stats_update_begin(&r_vec->tx_sync); 1386 r_vec->tx_errors++; 1387 u64_stats_update_end(&r_vec->tx_sync); 1388 dev_kfree_skb_any(skb); 1389 return NETDEV_TX_OK; 1390 } 1391 1392 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) 1393 { 1394 struct sk_buff *skb; 1395 1396 while ((skb = __skb_dequeue(&r_vec->queue))) 1397 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) 1398 return; 1399 } 1400 1401 static bool 1402 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) 1403 { 1404 u32 meta_type, meta_tag; 1405 1406 if (!nfp_app_ctrl_has_meta(nn->app)) 1407 return !meta_len; 1408 1409 if (meta_len != 8) 1410 return false; 1411 1412 meta_type = get_unaligned_be32(data); 1413 meta_tag = get_unaligned_be32(data + 4); 1414 1415 return (meta_type == NFP_NET_META_PORTID && 1416 meta_tag == NFP_META_PORT_ID_CTRL); 1417 } 1418 1419 static bool 1420 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, 1421 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) 1422 { 1423 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1424 struct nfp_net_rx_buf *rxbuf; 1425 struct nfp_net_rx_desc *rxd; 1426 dma_addr_t new_dma_addr; 1427 struct sk_buff *skb; 1428 void *new_frag; 1429 int idx; 1430 1431 idx = D_IDX(rx_ring, rx_ring->rd_p); 1432 1433 rxd = &rx_ring->rxds[idx]; 1434 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1435 return false; 1436 1437 /* Memory barrier to ensure that we won't do other reads 1438 * before the DD bit. 1439 */ 1440 dma_rmb(); 1441 1442 rx_ring->rd_p++; 1443 1444 rxbuf = &rx_ring->rxbufs[idx]; 1445 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1446 data_len = le16_to_cpu(rxd->rxd.data_len); 1447 pkt_len = data_len - meta_len; 1448 1449 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1450 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1451 pkt_off += meta_len; 1452 else 1453 pkt_off += dp->rx_offset; 1454 meta_off = pkt_off - meta_len; 1455 1456 /* Stats update */ 1457 u64_stats_update_begin(&r_vec->rx_sync); 1458 r_vec->rx_pkts++; 1459 r_vec->rx_bytes += pkt_len; 1460 u64_stats_update_end(&r_vec->rx_sync); 1461 1462 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); 1463 1464 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { 1465 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", 1466 meta_len); 1467 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1468 return true; 1469 } 1470 1471 skb = build_skb(rxbuf->frag, dp->fl_bufsz); 1472 if (unlikely(!skb)) { 1473 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1474 return true; 1475 } 1476 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1477 if (unlikely(!new_frag)) { 1478 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1479 return true; 1480 } 1481 1482 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1483 1484 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1485 1486 skb_reserve(skb, pkt_off); 1487 skb_put(skb, pkt_len); 1488 1489 nfp_app_ctrl_rx(nn->app, skb); 1490 1491 return true; 1492 } 1493 1494 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) 1495 { 1496 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1497 struct nfp_net *nn = r_vec->nfp_net; 1498 struct nfp_net_dp *dp = &nn->dp; 1499 unsigned int budget = 512; 1500 1501 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) 1502 continue; 1503 1504 return budget; 1505 } 1506 1507 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) 1508 { 1509 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); 1510 1511 spin_lock(&r_vec->lock); 1512 nfp_nfdk_tx_complete(r_vec->tx_ring, 0); 1513 __nfp_ctrl_tx_queued(r_vec); 1514 spin_unlock(&r_vec->lock); 1515 1516 if (nfp_ctrl_rx(r_vec)) { 1517 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1518 } else { 1519 tasklet_schedule(&r_vec->tasklet); 1520 nn_dp_warn(&r_vec->nfp_net->dp, 1521 "control message budget exceeded!\n"); 1522 } 1523 } 1524