1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ 3 4 #include <linux/bpf_trace.h> 5 #include <linux/netdevice.h> 6 #include <linux/overflow.h> 7 #include <linux/sizes.h> 8 #include <linux/bitfield.h> 9 10 #include "../nfp_app.h" 11 #include "../nfp_net.h" 12 #include "../nfp_net_dp.h" 13 #include "../crypto/crypto.h" 14 #include "../crypto/fw.h" 15 #include "nfdk.h" 16 17 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 18 { 19 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); 20 } 21 22 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 23 { 24 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); 25 } 26 27 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, 28 struct nfp_net_tx_ring *tx_ring) 29 { 30 netif_tx_stop_queue(nd_q); 31 32 /* We can race with the TX completion out of NAPI so recheck */ 33 smp_mb(); 34 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) 35 netif_tx_start_queue(nd_q); 36 } 37 38 static __le64 39 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, 40 struct sk_buff *skb) 41 { 42 u32 segs, hdrlen, l3_offset, l4_offset; 43 struct nfp_nfdk_tx_desc txd; 44 u16 mss; 45 46 if (!skb->encapsulation) { 47 l3_offset = skb_network_offset(skb); 48 l4_offset = skb_transport_offset(skb); 49 hdrlen = skb_tcp_all_headers(skb); 50 } else { 51 l3_offset = skb_inner_network_offset(skb); 52 l4_offset = skb_inner_transport_offset(skb); 53 hdrlen = skb_inner_tcp_all_headers(skb); 54 } 55 56 segs = skb_shinfo(skb)->gso_segs; 57 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; 58 59 txd.l3_offset = l3_offset; 60 txd.l4_offset = l4_offset; 61 txd.lso_meta_res = 0; 62 txd.mss = cpu_to_le16(mss); 63 txd.lso_hdrlen = hdrlen; 64 txd.lso_totsegs = segs; 65 66 txbuf->pkt_cnt = segs; 67 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); 68 69 u64_stats_update_begin(&r_vec->tx_sync); 70 r_vec->tx_lso++; 71 u64_stats_update_end(&r_vec->tx_sync); 72 73 return txd.raw; 74 } 75 76 static u8 77 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 78 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) 79 { 80 struct ipv6hdr *ipv6h; 81 struct iphdr *iph; 82 83 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 84 return flags; 85 86 if (skb->ip_summed != CHECKSUM_PARTIAL) 87 return flags; 88 89 flags |= NFDK_DESC_TX_L4_CSUM; 90 91 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 92 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 93 94 /* L3 checksum offloading flag is not required for ipv6 */ 95 if (iph->version == 4) { 96 flags |= NFDK_DESC_TX_L3_CSUM; 97 } else if (ipv6h->version != 6) { 98 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); 99 return flags; 100 } 101 102 u64_stats_update_begin(&r_vec->tx_sync); 103 if (!skb->encapsulation) { 104 r_vec->hw_csum_tx += pkt_cnt; 105 } else { 106 flags |= NFDK_DESC_TX_ENCAP; 107 r_vec->hw_csum_tx_inner += pkt_cnt; 108 } 109 u64_stats_update_end(&r_vec->tx_sync); 110 111 return flags; 112 } 113 114 static int 115 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, 116 struct sk_buff *skb) 117 { 118 unsigned int n_descs, wr_p, nop_slots; 119 const skb_frag_t *frag, *fend; 120 struct nfp_nfdk_tx_desc *txd; 121 unsigned int nr_frags; 122 unsigned int wr_idx; 123 int err; 124 125 recount_descs: 126 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); 127 nr_frags = skb_shinfo(skb)->nr_frags; 128 frag = skb_shinfo(skb)->frags; 129 fend = frag + nr_frags; 130 for (; frag < fend; frag++) 131 n_descs += DIV_ROUND_UP(skb_frag_size(frag), 132 NFDK_TX_MAX_DATA_PER_DESC); 133 134 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { 135 if (skb_is_nonlinear(skb)) { 136 err = skb_linearize(skb); 137 if (err) 138 return err; 139 goto recount_descs; 140 } 141 return -EINVAL; 142 } 143 144 /* Under count by 1 (don't count meta) for the round down to work out */ 145 n_descs += !!skb_is_gso(skb); 146 147 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 148 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 149 goto close_block; 150 151 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) 152 goto close_block; 153 154 return 0; 155 156 close_block: 157 wr_p = tx_ring->wr_p; 158 nop_slots = D_BLOCK_CPL(wr_p); 159 160 wr_idx = D_IDX(tx_ring, wr_p); 161 tx_ring->ktxbufs[wr_idx].skb = NULL; 162 txd = &tx_ring->ktxds[wr_idx]; 163 164 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 165 166 tx_ring->data_pending = 0; 167 tx_ring->wr_p += nop_slots; 168 tx_ring->wr_ptr_add += nop_slots; 169 170 return 0; 171 } 172 173 static int 174 nfp_nfdk_prep_tx_meta(struct nfp_net_dp *dp, struct nfp_app *app, 175 struct sk_buff *skb) 176 { 177 struct metadata_dst *md_dst = skb_metadata_dst(skb); 178 unsigned char *data; 179 bool vlan_insert; 180 u32 meta_id = 0; 181 int md_bytes; 182 183 if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) 184 md_dst = NULL; 185 186 vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); 187 188 if (!(md_dst || vlan_insert)) 189 return 0; 190 191 md_bytes = sizeof(meta_id) + 192 !!md_dst * NFP_NET_META_PORTID_SIZE + 193 vlan_insert * NFP_NET_META_VLAN_SIZE; 194 195 if (unlikely(skb_cow_head(skb, md_bytes))) 196 return -ENOMEM; 197 198 data = skb_push(skb, md_bytes) + md_bytes; 199 if (md_dst) { 200 data -= NFP_NET_META_PORTID_SIZE; 201 put_unaligned_be32(md_dst->u.port_info.port_id, data); 202 meta_id = NFP_NET_META_PORTID; 203 } 204 if (vlan_insert) { 205 data -= NFP_NET_META_VLAN_SIZE; 206 /* data type of skb->vlan_proto is __be16 207 * so it fills metadata without calling put_unaligned_be16 208 */ 209 memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); 210 put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); 211 meta_id <<= NFP_NET_META_FIELD_SIZE; 212 meta_id |= NFP_NET_META_VLAN; 213 } 214 215 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | 216 FIELD_PREP(NFDK_META_FIELDS, meta_id); 217 218 data -= sizeof(meta_id); 219 put_unaligned_be32(meta_id, data); 220 221 return NFDK_DESC_TX_CHAIN_META; 222 } 223 224 /** 225 * nfp_nfdk_tx() - Main transmit entry point 226 * @skb: SKB to transmit 227 * @netdev: netdev structure 228 * 229 * Return: NETDEV_TX_OK on success. 230 */ 231 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) 232 { 233 struct nfp_net *nn = netdev_priv(netdev); 234 struct nfp_nfdk_tx_buf *txbuf, *etxbuf; 235 u32 cnt, tmp_dlen, dlen_type = 0; 236 struct nfp_net_tx_ring *tx_ring; 237 struct nfp_net_r_vector *r_vec; 238 const skb_frag_t *frag, *fend; 239 struct nfp_nfdk_tx_desc *txd; 240 unsigned int real_len, qidx; 241 unsigned int dma_len, type; 242 struct netdev_queue *nd_q; 243 struct nfp_net_dp *dp; 244 int nr_frags, wr_idx; 245 dma_addr_t dma_addr; 246 u64 metadata; 247 248 dp = &nn->dp; 249 qidx = skb_get_queue_mapping(skb); 250 tx_ring = &dp->tx_rings[qidx]; 251 r_vec = tx_ring->r_vec; 252 nd_q = netdev_get_tx_queue(dp->netdev, qidx); 253 254 /* Don't bother counting frags, assume the worst */ 255 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 256 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", 257 qidx, tx_ring->wr_p, tx_ring->rd_p); 258 netif_tx_stop_queue(nd_q); 259 nfp_net_tx_xmit_more_flush(tx_ring); 260 u64_stats_update_begin(&r_vec->tx_sync); 261 r_vec->tx_busy++; 262 u64_stats_update_end(&r_vec->tx_sync); 263 return NETDEV_TX_BUSY; 264 } 265 266 metadata = nfp_nfdk_prep_tx_meta(dp, nn->app, skb); 267 if (unlikely((int)metadata < 0)) 268 goto err_flush; 269 270 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) 271 goto err_flush; 272 273 /* nr_frags will change after skb_linearize so we get nr_frags after 274 * nfp_nfdk_tx_maybe_close_block function 275 */ 276 nr_frags = skb_shinfo(skb)->nr_frags; 277 /* DMA map all */ 278 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 279 txd = &tx_ring->ktxds[wr_idx]; 280 txbuf = &tx_ring->ktxbufs[wr_idx]; 281 282 dma_len = skb_headlen(skb); 283 if (skb_is_gso(skb)) 284 type = NFDK_DESC_TX_TYPE_TSO; 285 else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) 286 type = NFDK_DESC_TX_TYPE_SIMPLE; 287 else 288 type = NFDK_DESC_TX_TYPE_GATHER; 289 290 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 291 if (dma_mapping_error(dp->dev, dma_addr)) 292 goto err_warn_dma; 293 294 txbuf->skb = skb; 295 txbuf++; 296 297 txbuf->dma_addr = dma_addr; 298 txbuf++; 299 300 /* FIELD_PREP() implicitly truncates to chunk */ 301 dma_len -= 1; 302 303 /* We will do our best to pass as much data as we can in descriptor 304 * and we need to make sure the first descriptor includes whole head 305 * since there is limitation in firmware side. Sometimes the value of 306 * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than 307 * headlen. 308 */ 309 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 310 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 311 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 312 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 313 314 txd->dma_len_type = cpu_to_le16(dlen_type); 315 nfp_desc_set_dma_addr_48b(txd, dma_addr); 316 317 /* starts at bit 0 */ 318 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); 319 320 /* Preserve the original dlen_type, this way below the EOP logic 321 * can use dlen_type. 322 */ 323 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 324 dma_len -= tmp_dlen; 325 dma_addr += tmp_dlen + 1; 326 txd++; 327 328 /* The rest of the data (if any) will be in larger dma descritors 329 * and is handled with the fragment loop. 330 */ 331 frag = skb_shinfo(skb)->frags; 332 fend = frag + nr_frags; 333 334 while (true) { 335 while (dma_len > 0) { 336 dma_len -= 1; 337 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 338 339 txd->dma_len_type = cpu_to_le16(dlen_type); 340 nfp_desc_set_dma_addr_48b(txd, dma_addr); 341 342 dma_len -= dlen_type; 343 dma_addr += dlen_type + 1; 344 txd++; 345 } 346 347 if (frag >= fend) 348 break; 349 350 dma_len = skb_frag_size(frag); 351 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, 352 DMA_TO_DEVICE); 353 if (dma_mapping_error(dp->dev, dma_addr)) 354 goto err_unmap; 355 356 txbuf->dma_addr = dma_addr; 357 txbuf++; 358 359 frag++; 360 } 361 362 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 363 364 if (!skb_is_gso(skb)) { 365 real_len = skb->len; 366 /* Metadata desc */ 367 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); 368 txd->raw = cpu_to_le64(metadata); 369 txd++; 370 } else { 371 /* lso desc should be placed after metadata desc */ 372 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); 373 real_len = txbuf->real_len; 374 /* Metadata desc */ 375 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); 376 txd->raw = cpu_to_le64(metadata); 377 txd += 2; 378 txbuf++; 379 } 380 381 cnt = txd - tx_ring->ktxds - wr_idx; 382 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 383 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 384 goto err_warn_overflow; 385 386 skb_tx_timestamp(skb); 387 388 tx_ring->wr_p += cnt; 389 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 390 tx_ring->data_pending += skb->len; 391 else 392 tx_ring->data_pending = 0; 393 394 if (nfp_nfdk_tx_ring_should_stop(tx_ring)) 395 nfp_nfdk_tx_ring_stop(nd_q, tx_ring); 396 397 tx_ring->wr_ptr_add += cnt; 398 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) 399 nfp_net_tx_xmit_more_flush(tx_ring); 400 401 return NETDEV_TX_OK; 402 403 err_warn_overflow: 404 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 405 wr_idx, skb_headlen(skb), nr_frags, cnt); 406 if (skb_is_gso(skb)) 407 txbuf--; 408 err_unmap: 409 /* txbuf pointed to the next-to-use */ 410 etxbuf = txbuf; 411 /* first txbuf holds the skb */ 412 txbuf = &tx_ring->ktxbufs[wr_idx + 1]; 413 if (txbuf < etxbuf) { 414 dma_unmap_single(dp->dev, txbuf->dma_addr, 415 skb_headlen(skb), DMA_TO_DEVICE); 416 txbuf->raw = 0; 417 txbuf++; 418 } 419 frag = skb_shinfo(skb)->frags; 420 while (etxbuf < txbuf) { 421 dma_unmap_page(dp->dev, txbuf->dma_addr, 422 skb_frag_size(frag), DMA_TO_DEVICE); 423 txbuf->raw = 0; 424 frag++; 425 txbuf++; 426 } 427 err_warn_dma: 428 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 429 err_flush: 430 nfp_net_tx_xmit_more_flush(tx_ring); 431 u64_stats_update_begin(&r_vec->tx_sync); 432 r_vec->tx_errors++; 433 u64_stats_update_end(&r_vec->tx_sync); 434 dev_kfree_skb_any(skb); 435 return NETDEV_TX_OK; 436 } 437 438 /** 439 * nfp_nfdk_tx_complete() - Handled completed TX packets 440 * @tx_ring: TX ring structure 441 * @budget: NAPI budget (only used as bool to determine if in NAPI context) 442 */ 443 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) 444 { 445 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 446 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 447 u32 done_pkts = 0, done_bytes = 0; 448 struct nfp_nfdk_tx_buf *ktxbufs; 449 struct device *dev = dp->dev; 450 struct netdev_queue *nd_q; 451 u32 rd_p, qcp_rd_p; 452 int todo; 453 454 rd_p = tx_ring->rd_p; 455 if (tx_ring->wr_p == rd_p) 456 return; 457 458 /* Work out how many descriptors have been transmitted */ 459 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 460 461 if (qcp_rd_p == tx_ring->qcp_rd_p) 462 return; 463 464 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 465 ktxbufs = tx_ring->ktxbufs; 466 467 while (todo > 0) { 468 const skb_frag_t *frag, *fend; 469 unsigned int size, n_descs = 1; 470 struct nfp_nfdk_tx_buf *txbuf; 471 struct sk_buff *skb; 472 473 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; 474 skb = txbuf->skb; 475 txbuf++; 476 477 /* Closed block */ 478 if (!skb) { 479 n_descs = D_BLOCK_CPL(rd_p); 480 goto next; 481 } 482 483 /* Unmap head */ 484 size = skb_headlen(skb); 485 n_descs += nfp_nfdk_headlen_to_segs(size); 486 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); 487 txbuf++; 488 489 /* Unmap frags */ 490 frag = skb_shinfo(skb)->frags; 491 fend = frag + skb_shinfo(skb)->nr_frags; 492 for (; frag < fend; frag++) { 493 size = skb_frag_size(frag); 494 n_descs += DIV_ROUND_UP(size, 495 NFDK_TX_MAX_DATA_PER_DESC); 496 dma_unmap_page(dev, txbuf->dma_addr, 497 skb_frag_size(frag), DMA_TO_DEVICE); 498 txbuf++; 499 } 500 501 if (!skb_is_gso(skb)) { 502 done_bytes += skb->len; 503 done_pkts++; 504 } else { 505 done_bytes += txbuf->real_len; 506 done_pkts += txbuf->pkt_cnt; 507 n_descs++; 508 } 509 510 napi_consume_skb(skb, budget); 511 next: 512 rd_p += n_descs; 513 todo -= n_descs; 514 } 515 516 tx_ring->rd_p = rd_p; 517 tx_ring->qcp_rd_p = qcp_rd_p; 518 519 u64_stats_update_begin(&r_vec->tx_sync); 520 r_vec->tx_bytes += done_bytes; 521 r_vec->tx_pkts += done_pkts; 522 u64_stats_update_end(&r_vec->tx_sync); 523 524 if (!dp->netdev) 525 return; 526 527 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); 528 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 529 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { 530 /* Make sure TX thread will see updated tx_ring->rd_p */ 531 smp_mb(); 532 533 if (unlikely(netif_tx_queue_stopped(nd_q))) 534 netif_tx_wake_queue(nd_q); 535 } 536 537 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 538 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 539 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 540 } 541 542 /* Receive processing */ 543 static void * 544 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) 545 { 546 void *frag; 547 548 if (!dp->xdp_prog) { 549 frag = napi_alloc_frag(dp->fl_bufsz); 550 if (unlikely(!frag)) 551 return NULL; 552 } else { 553 struct page *page; 554 555 page = dev_alloc_page(); 556 if (unlikely(!page)) 557 return NULL; 558 frag = page_address(page); 559 } 560 561 *dma_addr = nfp_net_dma_map_rx(dp, frag); 562 if (dma_mapping_error(dp->dev, *dma_addr)) { 563 nfp_net_free_frag(frag, dp->xdp_prog); 564 nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); 565 return NULL; 566 } 567 568 return frag; 569 } 570 571 /** 572 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings 573 * @dp: NFP Net data path struct 574 * @rx_ring: RX ring structure 575 * @frag: page fragment buffer 576 * @dma_addr: DMA address of skb mapping 577 */ 578 static void 579 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, 580 struct nfp_net_rx_ring *rx_ring, 581 void *frag, dma_addr_t dma_addr) 582 { 583 unsigned int wr_idx; 584 585 wr_idx = D_IDX(rx_ring, rx_ring->wr_p); 586 587 nfp_net_dma_sync_dev_rx(dp, dma_addr); 588 589 /* Stash SKB and DMA address away */ 590 rx_ring->rxbufs[wr_idx].frag = frag; 591 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 592 593 /* Fill freelist descriptor */ 594 rx_ring->rxds[wr_idx].fld.reserved = 0; 595 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 596 nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, 597 dma_addr + dp->rx_dma_off); 598 599 rx_ring->wr_p++; 600 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { 601 /* Update write pointer of the freelist queue. Make 602 * sure all writes are flushed before telling the hardware. 603 */ 604 wmb(); 605 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); 606 } 607 } 608 609 /** 610 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW 611 * @dp: NFP Net data path struct 612 * @rx_ring: RX ring to fill 613 */ 614 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, 615 struct nfp_net_rx_ring *rx_ring) 616 { 617 unsigned int i; 618 619 for (i = 0; i < rx_ring->cnt - 1; i++) 620 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, 621 rx_ring->rxbufs[i].dma_addr); 622 } 623 624 /** 625 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors 626 * @flags: RX descriptor flags field in CPU byte order 627 */ 628 static int nfp_nfdk_rx_csum_has_errors(u16 flags) 629 { 630 u16 csum_all_checked, csum_all_ok; 631 632 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 633 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 634 635 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 636 } 637 638 /** 639 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags 640 * @dp: NFP Net data path struct 641 * @r_vec: per-ring structure 642 * @rxd: Pointer to RX descriptor 643 * @meta: Parsed metadata prepend 644 * @skb: Pointer to SKB 645 */ 646 static void 647 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 648 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, 649 struct sk_buff *skb) 650 { 651 skb_checksum_none_assert(skb); 652 653 if (!(dp->netdev->features & NETIF_F_RXCSUM)) 654 return; 655 656 if (meta->csum_type) { 657 skb->ip_summed = meta->csum_type; 658 skb->csum = meta->csum; 659 u64_stats_update_begin(&r_vec->rx_sync); 660 r_vec->hw_csum_rx_complete++; 661 u64_stats_update_end(&r_vec->rx_sync); 662 return; 663 } 664 665 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 666 u64_stats_update_begin(&r_vec->rx_sync); 667 r_vec->hw_csum_rx_error++; 668 u64_stats_update_end(&r_vec->rx_sync); 669 return; 670 } 671 672 /* Assume that the firmware will never report inner CSUM_OK unless outer 673 * L4 headers were successfully parsed. FW will always report zero UDP 674 * checksum as CSUM_OK. 675 */ 676 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 677 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 678 __skb_incr_checksum_unnecessary(skb); 679 u64_stats_update_begin(&r_vec->rx_sync); 680 r_vec->hw_csum_rx_ok++; 681 u64_stats_update_end(&r_vec->rx_sync); 682 } 683 684 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 685 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 686 __skb_incr_checksum_unnecessary(skb); 687 u64_stats_update_begin(&r_vec->rx_sync); 688 r_vec->hw_csum_rx_inner_ok++; 689 u64_stats_update_end(&r_vec->rx_sync); 690 } 691 } 692 693 static void 694 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, 695 unsigned int type, __be32 *hash) 696 { 697 if (!(netdev->features & NETIF_F_RXHASH)) 698 return; 699 700 switch (type) { 701 case NFP_NET_RSS_IPV4: 702 case NFP_NET_RSS_IPV6: 703 case NFP_NET_RSS_IPV6_EX: 704 meta->hash_type = PKT_HASH_TYPE_L3; 705 break; 706 default: 707 meta->hash_type = PKT_HASH_TYPE_L4; 708 break; 709 } 710 711 meta->hash = get_unaligned_be32(hash); 712 } 713 714 static bool 715 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, 716 void *data, void *pkt, unsigned int pkt_len, int meta_len) 717 { 718 u32 meta_info, vlan_info; 719 720 meta_info = get_unaligned_be32(data); 721 data += 4; 722 723 while (meta_info) { 724 switch (meta_info & NFP_NET_META_FIELD_MASK) { 725 case NFP_NET_META_HASH: 726 meta_info >>= NFP_NET_META_FIELD_SIZE; 727 nfp_nfdk_set_hash(netdev, meta, 728 meta_info & NFP_NET_META_FIELD_MASK, 729 (__be32 *)data); 730 data += 4; 731 break; 732 case NFP_NET_META_MARK: 733 meta->mark = get_unaligned_be32(data); 734 data += 4; 735 break; 736 case NFP_NET_META_VLAN: 737 vlan_info = get_unaligned_be32(data); 738 if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { 739 meta->vlan.stripped = true; 740 meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, 741 vlan_info); 742 meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, 743 vlan_info); 744 } 745 data += 4; 746 break; 747 case NFP_NET_META_PORTID: 748 meta->portid = get_unaligned_be32(data); 749 data += 4; 750 break; 751 case NFP_NET_META_CSUM: 752 meta->csum_type = CHECKSUM_COMPLETE; 753 meta->csum = 754 (__force __wsum)__get_unaligned_cpu32(data); 755 data += 4; 756 break; 757 case NFP_NET_META_RESYNC_INFO: 758 if (nfp_net_tls_rx_resync_req(netdev, data, pkt, 759 pkt_len)) 760 return false; 761 data += sizeof(struct nfp_net_tls_resync_req); 762 break; 763 default: 764 return true; 765 } 766 767 meta_info >>= NFP_NET_META_FIELD_SIZE; 768 } 769 770 return data != pkt; 771 } 772 773 static void 774 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 775 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, 776 struct sk_buff *skb) 777 { 778 u64_stats_update_begin(&r_vec->rx_sync); 779 r_vec->rx_drops++; 780 /* If we have both skb and rxbuf the replacement buffer allocation 781 * must have failed, count this as an alloc failure. 782 */ 783 if (skb && rxbuf) 784 r_vec->rx_replace_buf_alloc_fail++; 785 u64_stats_update_end(&r_vec->rx_sync); 786 787 /* skb is build based on the frag, free_skb() would free the frag 788 * so to be able to reuse it we need an extra ref. 789 */ 790 if (skb && rxbuf && skb->head == rxbuf->frag) 791 page_ref_inc(virt_to_head_page(rxbuf->frag)); 792 if (rxbuf) 793 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); 794 if (skb) 795 dev_kfree_skb_any(skb); 796 } 797 798 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) 799 { 800 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 801 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 802 struct nfp_net_rx_ring *rx_ring; 803 u32 qcp_rd_p, done = 0; 804 bool done_all; 805 int todo; 806 807 /* Work out how many descriptors have been transmitted */ 808 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 809 if (qcp_rd_p == tx_ring->qcp_rd_p) 810 return true; 811 812 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 813 814 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 815 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 816 817 rx_ring = r_vec->rx_ring; 818 while (todo > 0) { 819 int idx = D_IDX(tx_ring, tx_ring->rd_p + done); 820 struct nfp_nfdk_tx_buf *txbuf; 821 unsigned int step = 1; 822 823 txbuf = &tx_ring->ktxbufs[idx]; 824 if (!txbuf->raw) 825 goto next; 826 827 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { 828 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); 829 goto next; 830 } 831 832 /* Two successive txbufs are used to stash virtual and dma 833 * address respectively, recycle and clean them here. 834 */ 835 nfp_nfdk_rx_give_one(dp, rx_ring, 836 (void *)NFDK_TX_BUF_PTR(txbuf[0].val), 837 txbuf[1].dma_addr); 838 txbuf[0].raw = 0; 839 txbuf[1].raw = 0; 840 step = 2; 841 842 u64_stats_update_begin(&r_vec->tx_sync); 843 /* Note: tx_bytes not accumulated. */ 844 r_vec->tx_pkts++; 845 u64_stats_update_end(&r_vec->tx_sync); 846 next: 847 todo -= step; 848 done += step; 849 } 850 851 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); 852 tx_ring->rd_p += done; 853 854 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 855 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 856 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 857 858 return done_all; 859 } 860 861 static bool 862 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, 863 struct nfp_net_tx_ring *tx_ring, 864 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, 865 unsigned int pkt_len, bool *completed) 866 { 867 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; 868 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; 869 struct nfp_nfdk_tx_buf *txbuf; 870 struct nfp_nfdk_tx_desc *txd; 871 unsigned int n_descs; 872 dma_addr_t dma_addr; 873 int wr_idx; 874 875 /* Reject if xdp_adjust_tail grow packet beyond DMA area */ 876 if (pkt_len + dma_off > dma_map_sz) 877 return false; 878 879 /* Make sure there's still at least one block available after 880 * aligning to block boundary, so that the txds used below 881 * won't wrap around the tx_ring. 882 */ 883 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 884 if (!*completed) { 885 nfp_nfdk_xdp_complete(tx_ring); 886 *completed = true; 887 } 888 889 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 890 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, 891 NULL); 892 return false; 893 } 894 } 895 896 /* Check if cross block boundary */ 897 n_descs = nfp_nfdk_headlen_to_segs(pkt_len); 898 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 899 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || 900 ((u32)tx_ring->data_pending + pkt_len > 901 NFDK_TX_MAX_DATA_PER_BLOCK)) { 902 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); 903 904 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 905 txd = &tx_ring->ktxds[wr_idx]; 906 memset(txd, 0, 907 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 908 909 tx_ring->data_pending = 0; 910 tx_ring->wr_p += nop_slots; 911 tx_ring->wr_ptr_add += nop_slots; 912 } 913 914 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 915 916 txbuf = &tx_ring->ktxbufs[wr_idx]; 917 918 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; 919 txbuf[1].dma_addr = rxbuf->dma_addr; 920 /* Note: pkt len not stored */ 921 922 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, 923 pkt_len, DMA_BIDIRECTIONAL); 924 925 /* Build TX descriptor */ 926 txd = &tx_ring->ktxds[wr_idx]; 927 dma_len = pkt_len; 928 dma_addr = rxbuf->dma_addr + dma_off; 929 930 if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) 931 type = NFDK_DESC_TX_TYPE_SIMPLE; 932 else 933 type = NFDK_DESC_TX_TYPE_GATHER; 934 935 /* FIELD_PREP() implicitly truncates to chunk */ 936 dma_len -= 1; 937 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 938 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 939 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 940 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 941 942 txd->dma_len_type = cpu_to_le16(dlen_type); 943 nfp_desc_set_dma_addr_48b(txd, dma_addr); 944 945 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 946 dma_len -= tmp_dlen; 947 dma_addr += tmp_dlen + 1; 948 txd++; 949 950 while (dma_len > 0) { 951 dma_len -= 1; 952 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 953 txd->dma_len_type = cpu_to_le16(dlen_type); 954 nfp_desc_set_dma_addr_48b(txd, dma_addr); 955 956 dlen_type &= NFDK_DESC_TX_DMA_LEN; 957 dma_len -= dlen_type; 958 dma_addr += dlen_type + 1; 959 txd++; 960 } 961 962 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 963 964 /* Metadata desc */ 965 txd->raw = 0; 966 txd++; 967 968 cnt = txd - tx_ring->ktxds - wr_idx; 969 tx_ring->wr_p += cnt; 970 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 971 tx_ring->data_pending += pkt_len; 972 else 973 tx_ring->data_pending = 0; 974 975 tx_ring->wr_ptr_add += cnt; 976 return true; 977 } 978 979 /** 980 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring 981 * @rx_ring: RX ring to receive from 982 * @budget: NAPI budget 983 * 984 * Note, this function is separated out from the napi poll function to 985 * more cleanly separate packet receive code from other bookkeeping 986 * functions performed in the napi poll function. 987 * 988 * Return: Number of packets received. 989 */ 990 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) 991 { 992 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 993 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 994 struct nfp_net_tx_ring *tx_ring; 995 struct bpf_prog *xdp_prog; 996 bool xdp_tx_cmpl = false; 997 unsigned int true_bufsz; 998 struct sk_buff *skb; 999 int pkts_polled = 0; 1000 struct xdp_buff xdp; 1001 int idx; 1002 1003 xdp_prog = READ_ONCE(dp->xdp_prog); 1004 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 1005 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, 1006 &rx_ring->xdp_rxq); 1007 tx_ring = r_vec->xdp_ring; 1008 1009 while (pkts_polled < budget) { 1010 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1011 struct nfp_net_rx_buf *rxbuf; 1012 struct nfp_net_rx_desc *rxd; 1013 struct nfp_meta_parsed meta; 1014 bool redir_egress = false; 1015 struct net_device *netdev; 1016 dma_addr_t new_dma_addr; 1017 u32 meta_len_xdp = 0; 1018 void *new_frag; 1019 1020 idx = D_IDX(rx_ring, rx_ring->rd_p); 1021 1022 rxd = &rx_ring->rxds[idx]; 1023 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1024 break; 1025 1026 /* Memory barrier to ensure that we won't do other reads 1027 * before the DD bit. 1028 */ 1029 dma_rmb(); 1030 1031 memset(&meta, 0, sizeof(meta)); 1032 1033 rx_ring->rd_p++; 1034 pkts_polled++; 1035 1036 rxbuf = &rx_ring->rxbufs[idx]; 1037 /* < meta_len > 1038 * <-- [rx_offset] --> 1039 * --------------------------------------------------------- 1040 * | [XX] | metadata | packet | XXXX | 1041 * --------------------------------------------------------- 1042 * <---------------- data_len ---------------> 1043 * 1044 * The rx_offset is fixed for all packets, the meta_len can vary 1045 * on a packet by packet basis. If rx_offset is set to zero 1046 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the 1047 * buffer and is immediately followed by the packet (no [XX]). 1048 */ 1049 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1050 data_len = le16_to_cpu(rxd->rxd.data_len); 1051 pkt_len = data_len - meta_len; 1052 1053 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1054 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1055 pkt_off += meta_len; 1056 else 1057 pkt_off += dp->rx_offset; 1058 meta_off = pkt_off - meta_len; 1059 1060 /* Stats update */ 1061 u64_stats_update_begin(&r_vec->rx_sync); 1062 r_vec->rx_pkts++; 1063 r_vec->rx_bytes += pkt_len; 1064 u64_stats_update_end(&r_vec->rx_sync); 1065 1066 if (unlikely(meta_len > NFP_NET_MAX_PREPEND || 1067 (dp->rx_offset && meta_len > dp->rx_offset))) { 1068 nn_dp_warn(dp, "oversized RX packet metadata %u\n", 1069 meta_len); 1070 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1071 continue; 1072 } 1073 1074 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, 1075 data_len); 1076 1077 if (meta_len) { 1078 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, 1079 rxbuf->frag + meta_off, 1080 rxbuf->frag + pkt_off, 1081 pkt_len, meta_len))) { 1082 nn_dp_warn(dp, "invalid RX packet metadata\n"); 1083 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1084 NULL); 1085 continue; 1086 } 1087 } 1088 1089 if (xdp_prog && !meta.portid) { 1090 void *orig_data = rxbuf->frag + pkt_off; 1091 unsigned int dma_off; 1092 int act; 1093 1094 xdp_prepare_buff(&xdp, 1095 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, 1096 pkt_off - NFP_NET_RX_BUF_HEADROOM, 1097 pkt_len, true); 1098 1099 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1100 1101 pkt_len = xdp.data_end - xdp.data; 1102 pkt_off += xdp.data - orig_data; 1103 1104 switch (act) { 1105 case XDP_PASS: 1106 meta_len_xdp = xdp.data - xdp.data_meta; 1107 break; 1108 case XDP_TX: 1109 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; 1110 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, 1111 tx_ring, 1112 rxbuf, 1113 dma_off, 1114 pkt_len, 1115 &xdp_tx_cmpl))) 1116 trace_xdp_exception(dp->netdev, 1117 xdp_prog, act); 1118 continue; 1119 default: 1120 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 1121 fallthrough; 1122 case XDP_ABORTED: 1123 trace_xdp_exception(dp->netdev, xdp_prog, act); 1124 fallthrough; 1125 case XDP_DROP: 1126 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1127 rxbuf->dma_addr); 1128 continue; 1129 } 1130 } 1131 1132 if (likely(!meta.portid)) { 1133 netdev = dp->netdev; 1134 } else if (meta.portid == NFP_META_PORT_ID_CTRL) { 1135 struct nfp_net *nn = netdev_priv(dp->netdev); 1136 1137 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, 1138 pkt_len); 1139 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1140 rxbuf->dma_addr); 1141 continue; 1142 } else { 1143 struct nfp_net *nn; 1144 1145 nn = netdev_priv(dp->netdev); 1146 netdev = nfp_app_dev_get(nn->app, meta.portid, 1147 &redir_egress); 1148 if (unlikely(!netdev)) { 1149 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1150 NULL); 1151 continue; 1152 } 1153 1154 if (nfp_netdev_is_nfp_repr(netdev)) 1155 nfp_repr_inc_rx_stats(netdev, pkt_len); 1156 } 1157 1158 skb = build_skb(rxbuf->frag, true_bufsz); 1159 if (unlikely(!skb)) { 1160 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1161 continue; 1162 } 1163 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1164 if (unlikely(!new_frag)) { 1165 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1166 continue; 1167 } 1168 1169 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1170 1171 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1172 1173 skb_reserve(skb, pkt_off); 1174 skb_put(skb, pkt_len); 1175 1176 skb->mark = meta.mark; 1177 skb_set_hash(skb, meta.hash, meta.hash_type); 1178 1179 skb_record_rx_queue(skb, rx_ring->idx); 1180 skb->protocol = eth_type_trans(skb, netdev); 1181 1182 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); 1183 1184 if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { 1185 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, NULL, skb); 1186 continue; 1187 } 1188 1189 if (meta_len_xdp) 1190 skb_metadata_set(skb, meta_len_xdp); 1191 1192 if (likely(!redir_egress)) { 1193 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1194 } else { 1195 skb->dev = netdev; 1196 skb_reset_network_header(skb); 1197 __skb_push(skb, ETH_HLEN); 1198 dev_queue_xmit(skb); 1199 } 1200 } 1201 1202 if (xdp_prog) { 1203 if (tx_ring->wr_ptr_add) 1204 nfp_net_tx_xmit_more_flush(tx_ring); 1205 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && 1206 !xdp_tx_cmpl) 1207 if (!nfp_nfdk_xdp_complete(tx_ring)) 1208 pkts_polled = budget; 1209 } 1210 1211 return pkts_polled; 1212 } 1213 1214 /** 1215 * nfp_nfdk_poll() - napi poll function 1216 * @napi: NAPI structure 1217 * @budget: NAPI budget 1218 * 1219 * Return: number of packets polled. 1220 */ 1221 int nfp_nfdk_poll(struct napi_struct *napi, int budget) 1222 { 1223 struct nfp_net_r_vector *r_vec = 1224 container_of(napi, struct nfp_net_r_vector, napi); 1225 unsigned int pkts_polled = 0; 1226 1227 if (r_vec->tx_ring) 1228 nfp_nfdk_tx_complete(r_vec->tx_ring, budget); 1229 if (r_vec->rx_ring) 1230 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); 1231 1232 if (pkts_polled < budget) 1233 if (napi_complete_done(napi, pkts_polled)) 1234 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1235 1236 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { 1237 struct dim_sample dim_sample = {}; 1238 unsigned int start; 1239 u64 pkts, bytes; 1240 1241 do { 1242 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1243 pkts = r_vec->rx_pkts; 1244 bytes = r_vec->rx_bytes; 1245 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1246 1247 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1248 net_dim(&r_vec->rx_dim, dim_sample); 1249 } 1250 1251 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { 1252 struct dim_sample dim_sample = {}; 1253 unsigned int start; 1254 u64 pkts, bytes; 1255 1256 do { 1257 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1258 pkts = r_vec->tx_pkts; 1259 bytes = r_vec->tx_bytes; 1260 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1261 1262 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1263 net_dim(&r_vec->tx_dim, dim_sample); 1264 } 1265 1266 return pkts_polled; 1267 } 1268 1269 /* Control device data path 1270 */ 1271 1272 bool 1273 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1274 struct sk_buff *skb, bool old) 1275 { 1276 u32 cnt, tmp_dlen, dlen_type = 0; 1277 struct nfp_net_tx_ring *tx_ring; 1278 struct nfp_nfdk_tx_buf *txbuf; 1279 struct nfp_nfdk_tx_desc *txd; 1280 unsigned int dma_len, type; 1281 struct nfp_net_dp *dp; 1282 dma_addr_t dma_addr; 1283 u64 metadata = 0; 1284 int wr_idx; 1285 1286 dp = &r_vec->nfp_net->dp; 1287 tx_ring = r_vec->tx_ring; 1288 1289 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { 1290 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); 1291 goto err_free; 1292 } 1293 1294 /* Don't bother counting frags, assume the worst */ 1295 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 1296 u64_stats_update_begin(&r_vec->tx_sync); 1297 r_vec->tx_busy++; 1298 u64_stats_update_end(&r_vec->tx_sync); 1299 if (!old) 1300 __skb_queue_tail(&r_vec->queue, skb); 1301 else 1302 __skb_queue_head(&r_vec->queue, skb); 1303 return NETDEV_TX_BUSY; 1304 } 1305 1306 if (nfp_app_ctrl_has_meta(nn->app)) { 1307 if (unlikely(skb_headroom(skb) < 8)) { 1308 nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); 1309 goto err_free; 1310 } 1311 metadata = NFDK_DESC_TX_CHAIN_META; 1312 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); 1313 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | 1314 FIELD_PREP(NFDK_META_FIELDS, 1315 NFP_NET_META_PORTID), 1316 skb_push(skb, 4)); 1317 } 1318 1319 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) 1320 goto err_free; 1321 1322 /* DMA map all */ 1323 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 1324 txd = &tx_ring->ktxds[wr_idx]; 1325 txbuf = &tx_ring->ktxbufs[wr_idx]; 1326 1327 dma_len = skb_headlen(skb); 1328 if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) 1329 type = NFDK_DESC_TX_TYPE_SIMPLE; 1330 else 1331 type = NFDK_DESC_TX_TYPE_GATHER; 1332 1333 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 1334 if (dma_mapping_error(dp->dev, dma_addr)) 1335 goto err_warn_dma; 1336 1337 txbuf->skb = skb; 1338 txbuf++; 1339 1340 txbuf->dma_addr = dma_addr; 1341 txbuf++; 1342 1343 dma_len -= 1; 1344 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 1345 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 1346 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 1347 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 1348 1349 txd->dma_len_type = cpu_to_le16(dlen_type); 1350 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1351 1352 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1353 dma_len -= tmp_dlen; 1354 dma_addr += tmp_dlen + 1; 1355 txd++; 1356 1357 while (dma_len > 0) { 1358 dma_len -= 1; 1359 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 1360 txd->dma_len_type = cpu_to_le16(dlen_type); 1361 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1362 1363 dlen_type &= NFDK_DESC_TX_DMA_LEN; 1364 dma_len -= dlen_type; 1365 dma_addr += dlen_type + 1; 1366 txd++; 1367 } 1368 1369 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1370 1371 /* Metadata desc */ 1372 txd->raw = cpu_to_le64(metadata); 1373 txd++; 1374 1375 cnt = txd - tx_ring->ktxds - wr_idx; 1376 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 1377 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 1378 goto err_warn_overflow; 1379 1380 tx_ring->wr_p += cnt; 1381 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1382 tx_ring->data_pending += skb->len; 1383 else 1384 tx_ring->data_pending = 0; 1385 1386 tx_ring->wr_ptr_add += cnt; 1387 nfp_net_tx_xmit_more_flush(tx_ring); 1388 1389 return NETDEV_TX_OK; 1390 1391 err_warn_overflow: 1392 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 1393 wr_idx, skb_headlen(skb), 0, cnt); 1394 txbuf--; 1395 dma_unmap_single(dp->dev, txbuf->dma_addr, 1396 skb_headlen(skb), DMA_TO_DEVICE); 1397 txbuf->raw = 0; 1398 err_warn_dma: 1399 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 1400 err_free: 1401 u64_stats_update_begin(&r_vec->tx_sync); 1402 r_vec->tx_errors++; 1403 u64_stats_update_end(&r_vec->tx_sync); 1404 dev_kfree_skb_any(skb); 1405 return NETDEV_TX_OK; 1406 } 1407 1408 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) 1409 { 1410 struct sk_buff *skb; 1411 1412 while ((skb = __skb_dequeue(&r_vec->queue))) 1413 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) 1414 return; 1415 } 1416 1417 static bool 1418 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) 1419 { 1420 u32 meta_type, meta_tag; 1421 1422 if (!nfp_app_ctrl_has_meta(nn->app)) 1423 return !meta_len; 1424 1425 if (meta_len != 8) 1426 return false; 1427 1428 meta_type = get_unaligned_be32(data); 1429 meta_tag = get_unaligned_be32(data + 4); 1430 1431 return (meta_type == NFP_NET_META_PORTID && 1432 meta_tag == NFP_META_PORT_ID_CTRL); 1433 } 1434 1435 static bool 1436 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, 1437 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) 1438 { 1439 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1440 struct nfp_net_rx_buf *rxbuf; 1441 struct nfp_net_rx_desc *rxd; 1442 dma_addr_t new_dma_addr; 1443 struct sk_buff *skb; 1444 void *new_frag; 1445 int idx; 1446 1447 idx = D_IDX(rx_ring, rx_ring->rd_p); 1448 1449 rxd = &rx_ring->rxds[idx]; 1450 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1451 return false; 1452 1453 /* Memory barrier to ensure that we won't do other reads 1454 * before the DD bit. 1455 */ 1456 dma_rmb(); 1457 1458 rx_ring->rd_p++; 1459 1460 rxbuf = &rx_ring->rxbufs[idx]; 1461 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1462 data_len = le16_to_cpu(rxd->rxd.data_len); 1463 pkt_len = data_len - meta_len; 1464 1465 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1466 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1467 pkt_off += meta_len; 1468 else 1469 pkt_off += dp->rx_offset; 1470 meta_off = pkt_off - meta_len; 1471 1472 /* Stats update */ 1473 u64_stats_update_begin(&r_vec->rx_sync); 1474 r_vec->rx_pkts++; 1475 r_vec->rx_bytes += pkt_len; 1476 u64_stats_update_end(&r_vec->rx_sync); 1477 1478 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); 1479 1480 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { 1481 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", 1482 meta_len); 1483 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1484 return true; 1485 } 1486 1487 skb = build_skb(rxbuf->frag, dp->fl_bufsz); 1488 if (unlikely(!skb)) { 1489 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1490 return true; 1491 } 1492 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1493 if (unlikely(!new_frag)) { 1494 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1495 return true; 1496 } 1497 1498 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1499 1500 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1501 1502 skb_reserve(skb, pkt_off); 1503 skb_put(skb, pkt_len); 1504 1505 nfp_app_ctrl_rx(nn->app, skb); 1506 1507 return true; 1508 } 1509 1510 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) 1511 { 1512 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1513 struct nfp_net *nn = r_vec->nfp_net; 1514 struct nfp_net_dp *dp = &nn->dp; 1515 unsigned int budget = 512; 1516 1517 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) 1518 continue; 1519 1520 return budget; 1521 } 1522 1523 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) 1524 { 1525 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); 1526 1527 spin_lock(&r_vec->lock); 1528 nfp_nfdk_tx_complete(r_vec->tx_ring, 0); 1529 __nfp_ctrl_tx_queued(r_vec); 1530 spin_unlock(&r_vec->lock); 1531 1532 if (nfp_ctrl_rx(r_vec)) { 1533 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1534 } else { 1535 tasklet_schedule(&r_vec->tasklet); 1536 nn_dp_warn(&r_vec->nfp_net->dp, 1537 "control message budget exceeded!\n"); 1538 } 1539 } 1540