1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include <linux/ip.h> 11 #include <linux/tcp.h> 12 #include <linux/vmalloc.h> 13 #include <linux/skbuff.h> 14 15 static inline void gve_tx_put_doorbell(struct gve_priv *priv, 16 struct gve_queue_resources *q_resources, 17 u32 val) 18 { 19 iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); 20 } 21 22 /* gvnic can only transmit from a Registered Segment. 23 * We copy skb payloads into the registered segment before writing Tx 24 * descriptors and ringing the Tx doorbell. 25 * 26 * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must 27 * free allocations in the order they were allocated. 28 */ 29 30 static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo) 31 { 32 fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP, 33 PAGE_KERNEL); 34 if (unlikely(!fifo->base)) { 35 netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n", 36 fifo->qpl->id); 37 return -ENOMEM; 38 } 39 40 fifo->size = fifo->qpl->num_entries * PAGE_SIZE; 41 atomic_set(&fifo->available, fifo->size); 42 fifo->head = 0; 43 return 0; 44 } 45 46 static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo) 47 { 48 WARN(atomic_read(&fifo->available) != fifo->size, 49 "Releasing non-empty fifo"); 50 51 vunmap(fifo->base); 52 } 53 54 static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, 55 size_t bytes) 56 { 57 return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head; 58 } 59 60 static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes) 61 { 62 return (atomic_read(&fifo->available) <= bytes) ? false : true; 63 } 64 65 /* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO 66 * @fifo: FIFO to allocate from 67 * @bytes: Allocation size 68 * @iov: Scatter-gather elements to fill with allocation fragment base/len 69 * 70 * Returns number of valid elements in iov[] or negative on error. 71 * 72 * Allocations from a given FIFO must be externally synchronized but concurrent 73 * allocation and frees are allowed. 74 */ 75 static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes, 76 struct gve_tx_iovec iov[2]) 77 { 78 size_t overflow, padding; 79 u32 aligned_head; 80 int nfrags = 0; 81 82 if (!bytes) 83 return 0; 84 85 /* This check happens before we know how much padding is needed to 86 * align to a cacheline boundary for the payload, but that is fine, 87 * because the FIFO head always start aligned, and the FIFO's boundaries 88 * are aligned, so if there is space for the data, there is space for 89 * the padding to the next alignment. 90 */ 91 WARN(!gve_tx_fifo_can_alloc(fifo, bytes), 92 "Reached %s when there's not enough space in the fifo", __func__); 93 94 nfrags++; 95 96 iov[0].iov_offset = fifo->head; 97 iov[0].iov_len = bytes; 98 fifo->head += bytes; 99 100 if (fifo->head > fifo->size) { 101 /* If the allocation did not fit in the tail fragment of the 102 * FIFO, also use the head fragment. 103 */ 104 nfrags++; 105 overflow = fifo->head - fifo->size; 106 iov[0].iov_len -= overflow; 107 iov[1].iov_offset = 0; /* Start of fifo*/ 108 iov[1].iov_len = overflow; 109 110 fifo->head = overflow; 111 } 112 113 /* Re-align to a cacheline boundary */ 114 aligned_head = L1_CACHE_ALIGN(fifo->head); 115 padding = aligned_head - fifo->head; 116 iov[nfrags - 1].iov_padding = padding; 117 atomic_sub(bytes + padding, &fifo->available); 118 fifo->head = aligned_head; 119 120 if (fifo->head == fifo->size) 121 fifo->head = 0; 122 123 return nfrags; 124 } 125 126 /* gve_tx_free_fifo - Return space to Tx FIFO 127 * @fifo: FIFO to return fragments to 128 * @bytes: Bytes to free 129 */ 130 static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) 131 { 132 atomic_add(bytes, &fifo->available); 133 } 134 135 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 136 u32 to_do, bool try_to_wake); 137 138 static void gve_tx_free_ring(struct gve_priv *priv, int idx) 139 { 140 struct gve_tx_ring *tx = &priv->tx[idx]; 141 struct device *hdev = &priv->pdev->dev; 142 size_t bytes; 143 u32 slots; 144 145 gve_tx_remove_from_block(priv, idx); 146 slots = tx->mask + 1; 147 gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); 148 netdev_tx_reset_queue(tx->netdev_txq); 149 150 dma_free_coherent(hdev, sizeof(*tx->q_resources), 151 tx->q_resources, tx->q_resources_bus); 152 tx->q_resources = NULL; 153 154 if (!tx->raw_addressing) { 155 gve_tx_fifo_release(priv, &tx->tx_fifo); 156 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 157 tx->tx_fifo.qpl = NULL; 158 } 159 160 bytes = sizeof(*tx->desc) * slots; 161 dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 162 tx->desc = NULL; 163 164 vfree(tx->info); 165 tx->info = NULL; 166 167 netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); 168 } 169 170 static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) 171 { 172 struct gve_tx_ring *tx = &priv->tx[idx]; 173 struct device *hdev = &priv->pdev->dev; 174 u32 slots = priv->tx_desc_cnt; 175 size_t bytes; 176 177 /* Make sure everything is zeroed to start */ 178 memset(tx, 0, sizeof(*tx)); 179 spin_lock_init(&tx->clean_lock); 180 tx->q_num = idx; 181 182 tx->mask = slots - 1; 183 184 /* alloc metadata */ 185 tx->info = vzalloc(sizeof(*tx->info) * slots); 186 if (!tx->info) 187 return -ENOMEM; 188 189 /* alloc tx queue */ 190 bytes = sizeof(*tx->desc) * slots; 191 tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); 192 if (!tx->desc) 193 goto abort_with_info; 194 195 tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; 196 tx->dev = &priv->pdev->dev; 197 if (!tx->raw_addressing) { 198 tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); 199 if (!tx->tx_fifo.qpl) 200 goto abort_with_desc; 201 /* map Tx FIFO */ 202 if (gve_tx_fifo_init(priv, &tx->tx_fifo)) 203 goto abort_with_qpl; 204 } 205 206 tx->q_resources = 207 dma_alloc_coherent(hdev, 208 sizeof(*tx->q_resources), 209 &tx->q_resources_bus, 210 GFP_KERNEL); 211 if (!tx->q_resources) 212 goto abort_with_fifo; 213 214 netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, 215 (unsigned long)tx->bus); 216 tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); 217 gve_tx_add_to_block(priv, idx); 218 219 return 0; 220 221 abort_with_fifo: 222 if (!tx->raw_addressing) 223 gve_tx_fifo_release(priv, &tx->tx_fifo); 224 abort_with_qpl: 225 if (!tx->raw_addressing) 226 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 227 abort_with_desc: 228 dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 229 tx->desc = NULL; 230 abort_with_info: 231 vfree(tx->info); 232 tx->info = NULL; 233 return -ENOMEM; 234 } 235 236 int gve_tx_alloc_rings(struct gve_priv *priv) 237 { 238 int err = 0; 239 int i; 240 241 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 242 err = gve_tx_alloc_ring(priv, i); 243 if (err) { 244 netif_err(priv, drv, priv->dev, 245 "Failed to alloc tx ring=%d: err=%d\n", 246 i, err); 247 break; 248 } 249 } 250 /* Unallocate if there was an error */ 251 if (err) { 252 int j; 253 254 for (j = 0; j < i; j++) 255 gve_tx_free_ring(priv, j); 256 } 257 return err; 258 } 259 260 void gve_tx_free_rings_gqi(struct gve_priv *priv) 261 { 262 int i; 263 264 for (i = 0; i < priv->tx_cfg.num_queues; i++) 265 gve_tx_free_ring(priv, i); 266 } 267 268 /* gve_tx_avail - Calculates the number of slots available in the ring 269 * @tx: tx ring to check 270 * 271 * Returns the number of slots available 272 * 273 * The capacity of the queue is mask + 1. We don't need to reserve an entry. 274 **/ 275 static inline u32 gve_tx_avail(struct gve_tx_ring *tx) 276 { 277 return tx->mask + 1 - (tx->req - tx->done); 278 } 279 280 static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, 281 struct sk_buff *skb) 282 { 283 int pad_bytes, align_hdr_pad; 284 int bytes; 285 int hlen; 286 287 hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + 288 tcp_hdrlen(skb) : skb_headlen(skb); 289 290 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, 291 hlen); 292 /* We need to take into account the header alignment padding. */ 293 align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen; 294 bytes = align_hdr_pad + pad_bytes + skb->len; 295 296 return bytes; 297 } 298 299 /* The most descriptors we could need is MAX_SKB_FRAGS + 4 : 300 * 1 for each skb frag 301 * 1 for the skb linear portion 302 * 1 for when tcp hdr needs to be in separate descriptor 303 * 1 if the payload wraps to the beginning of the FIFO 304 * 1 for metadata descriptor 305 */ 306 #define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 4) 307 static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info) 308 { 309 if (info->skb) { 310 dma_unmap_single(dev, dma_unmap_addr(info, dma), 311 dma_unmap_len(info, len), 312 DMA_TO_DEVICE); 313 dma_unmap_len_set(info, len, 0); 314 } else { 315 dma_unmap_page(dev, dma_unmap_addr(info, dma), 316 dma_unmap_len(info, len), 317 DMA_TO_DEVICE); 318 dma_unmap_len_set(info, len, 0); 319 } 320 } 321 322 /* Check if sufficient resources (descriptor ring space, FIFO space) are 323 * available to transmit the given number of bytes. 324 */ 325 static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) 326 { 327 bool can_alloc = true; 328 329 if (!tx->raw_addressing) 330 can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required); 331 332 return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc); 333 } 334 335 static_assert(NAPI_POLL_WEIGHT >= MAX_TX_DESC_NEEDED); 336 337 /* Stops the queue if the skb cannot be transmitted. */ 338 static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, 339 struct sk_buff *skb) 340 { 341 int bytes_required = 0; 342 u32 nic_done; 343 u32 to_do; 344 int ret; 345 346 if (!tx->raw_addressing) 347 bytes_required = gve_skb_fifo_bytes_required(tx, skb); 348 349 if (likely(gve_can_tx(tx, bytes_required))) 350 return 0; 351 352 ret = -EBUSY; 353 spin_lock(&tx->clean_lock); 354 nic_done = gve_tx_load_event_counter(priv, tx); 355 to_do = nic_done - tx->done; 356 357 /* Only try to clean if there is hope for TX */ 358 if (to_do + gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED) { 359 if (to_do > 0) { 360 to_do = min_t(u32, to_do, NAPI_POLL_WEIGHT); 361 gve_clean_tx_done(priv, tx, to_do, false); 362 } 363 if (likely(gve_can_tx(tx, bytes_required))) 364 ret = 0; 365 } 366 if (ret) { 367 /* No space, so stop the queue */ 368 tx->stop_queue++; 369 netif_tx_stop_queue(tx->netdev_txq); 370 } 371 spin_unlock(&tx->clean_lock); 372 373 return ret; 374 } 375 376 static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, 377 struct sk_buff *skb, bool is_gso, 378 int l4_hdr_offset, u32 desc_cnt, 379 u16 hlen, u64 addr) 380 { 381 /* l4_hdr_offset and csum_offset are in units of 16-bit words */ 382 if (is_gso) { 383 pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; 384 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 385 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 386 } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 387 pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; 388 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 389 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 390 } else { 391 pkt_desc->pkt.type_flags = GVE_TXD_STD; 392 pkt_desc->pkt.l4_csum_offset = 0; 393 pkt_desc->pkt.l4_hdr_offset = 0; 394 } 395 pkt_desc->pkt.desc_cnt = desc_cnt; 396 pkt_desc->pkt.len = cpu_to_be16(skb->len); 397 pkt_desc->pkt.seg_len = cpu_to_be16(hlen); 398 pkt_desc->pkt.seg_addr = cpu_to_be64(addr); 399 } 400 401 static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc, 402 struct sk_buff *skb) 403 { 404 BUILD_BUG_ON(sizeof(mtd_desc->mtd) != sizeof(mtd_desc->pkt)); 405 406 mtd_desc->mtd.type_flags = GVE_TXD_MTD | GVE_MTD_SUBTYPE_PATH; 407 mtd_desc->mtd.path_state = GVE_MTD_PATH_STATE_DEFAULT | 408 GVE_MTD_PATH_HASH_L4; 409 mtd_desc->mtd.path_hash = cpu_to_be32(skb->hash); 410 mtd_desc->mtd.reserved0 = 0; 411 mtd_desc->mtd.reserved1 = 0; 412 } 413 414 static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, 415 struct sk_buff *skb, bool is_gso, 416 u16 len, u64 addr) 417 { 418 seg_desc->seg.type_flags = GVE_TXD_SEG; 419 if (is_gso) { 420 if (skb_is_gso_v6(skb)) 421 seg_desc->seg.type_flags |= GVE_TXSF_IPV6; 422 seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; 423 seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); 424 } 425 seg_desc->seg.seg_len = cpu_to_be16(len); 426 seg_desc->seg.seg_addr = cpu_to_be64(addr); 427 } 428 429 static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses, 430 u64 iov_offset, u64 iov_len) 431 { 432 u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE; 433 u64 first_page = iov_offset / PAGE_SIZE; 434 u64 page; 435 436 for (page = first_page; page <= last_page; page++) 437 dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE); 438 } 439 440 static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb) 441 { 442 int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; 443 union gve_tx_desc *pkt_desc, *seg_desc; 444 struct gve_tx_buffer_state *info; 445 int mtd_desc_nr = !!skb->l4_hash; 446 bool is_gso = skb_is_gso(skb); 447 u32 idx = tx->req & tx->mask; 448 int payload_iov = 2; 449 int copy_offset; 450 u32 next_idx; 451 int i; 452 453 info = &tx->info[idx]; 454 pkt_desc = &tx->desc[idx]; 455 456 l4_hdr_offset = skb_checksum_start_offset(skb); 457 /* If the skb is gso, then we want the tcp header in the first segment 458 * otherwise we want the linear portion of the skb (which will contain 459 * the checksum because skb->csum_start and skb->csum_offset are given 460 * relative to skb->head) in the first segment. 461 */ 462 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : 463 skb_headlen(skb); 464 465 info->skb = skb; 466 /* We don't want to split the header, so if necessary, pad to the end 467 * of the fifo and then put the header at the beginning of the fifo. 468 */ 469 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); 470 hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes, 471 &info->iov[0]); 472 WARN(!hdr_nfrags, "hdr_nfrags should never be 0!"); 473 payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, 474 &info->iov[payload_iov]); 475 476 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 477 1 + mtd_desc_nr + payload_nfrags, hlen, 478 info->iov[hdr_nfrags - 1].iov_offset); 479 480 skb_copy_bits(skb, 0, 481 tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, 482 hlen); 483 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, 484 info->iov[hdr_nfrags - 1].iov_offset, 485 info->iov[hdr_nfrags - 1].iov_len); 486 copy_offset = hlen; 487 488 if (mtd_desc_nr) { 489 next_idx = (tx->req + 1) & tx->mask; 490 gve_tx_fill_mtd_desc(&tx->desc[next_idx], skb); 491 } 492 493 for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { 494 next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask; 495 seg_desc = &tx->desc[next_idx]; 496 497 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, 498 info->iov[i].iov_len, 499 info->iov[i].iov_offset); 500 501 skb_copy_bits(skb, copy_offset, 502 tx->tx_fifo.base + info->iov[i].iov_offset, 503 info->iov[i].iov_len); 504 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, 505 info->iov[i].iov_offset, 506 info->iov[i].iov_len); 507 copy_offset += info->iov[i].iov_len; 508 } 509 510 return 1 + mtd_desc_nr + payload_nfrags; 511 } 512 513 static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, 514 struct sk_buff *skb) 515 { 516 const struct skb_shared_info *shinfo = skb_shinfo(skb); 517 int hlen, num_descriptors, l4_hdr_offset; 518 union gve_tx_desc *pkt_desc, *mtd_desc, *seg_desc; 519 struct gve_tx_buffer_state *info; 520 int mtd_desc_nr = !!skb->l4_hash; 521 bool is_gso = skb_is_gso(skb); 522 u32 idx = tx->req & tx->mask; 523 u64 addr; 524 u32 len; 525 int i; 526 527 info = &tx->info[idx]; 528 pkt_desc = &tx->desc[idx]; 529 530 l4_hdr_offset = skb_checksum_start_offset(skb); 531 /* If the skb is gso, then we want only up to the tcp header in the first segment 532 * to efficiently replicate on each segment otherwise we want the linear portion 533 * of the skb (which will contain the checksum because skb->csum_start and 534 * skb->csum_offset are given relative to skb->head) in the first segment. 535 */ 536 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb); 537 len = skb_headlen(skb); 538 539 info->skb = skb; 540 541 addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); 542 if (unlikely(dma_mapping_error(tx->dev, addr))) { 543 tx->dma_mapping_error++; 544 goto drop; 545 } 546 dma_unmap_len_set(info, len, len); 547 dma_unmap_addr_set(info, dma, addr); 548 549 num_descriptors = 1 + shinfo->nr_frags; 550 if (hlen < len) 551 num_descriptors++; 552 if (mtd_desc_nr) 553 num_descriptors++; 554 555 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 556 num_descriptors, hlen, addr); 557 558 if (mtd_desc_nr) { 559 idx = (idx + 1) & tx->mask; 560 mtd_desc = &tx->desc[idx]; 561 gve_tx_fill_mtd_desc(mtd_desc, skb); 562 } 563 564 if (hlen < len) { 565 /* For gso the rest of the linear portion of the skb needs to 566 * be in its own descriptor. 567 */ 568 len -= hlen; 569 addr += hlen; 570 idx = (idx + 1) & tx->mask; 571 seg_desc = &tx->desc[idx]; 572 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); 573 } 574 575 for (i = 0; i < shinfo->nr_frags; i++) { 576 const skb_frag_t *frag = &shinfo->frags[i]; 577 578 idx = (idx + 1) & tx->mask; 579 seg_desc = &tx->desc[idx]; 580 len = skb_frag_size(frag); 581 addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE); 582 if (unlikely(dma_mapping_error(tx->dev, addr))) { 583 tx->dma_mapping_error++; 584 goto unmap_drop; 585 } 586 tx->info[idx].skb = NULL; 587 dma_unmap_len_set(&tx->info[idx], len, len); 588 dma_unmap_addr_set(&tx->info[idx], dma, addr); 589 590 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); 591 } 592 593 return num_descriptors; 594 595 unmap_drop: 596 i += num_descriptors - shinfo->nr_frags; 597 while (i--) { 598 /* Skip metadata descriptor, if set */ 599 if (i == 1 && mtd_desc_nr == 1) 600 continue; 601 idx--; 602 gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]); 603 } 604 drop: 605 tx->dropped_pkt++; 606 return 0; 607 } 608 609 netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) 610 { 611 struct gve_priv *priv = netdev_priv(dev); 612 struct gve_tx_ring *tx; 613 int nsegs; 614 615 WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues, 616 "skb queue index out of range"); 617 tx = &priv->tx[skb_get_queue_mapping(skb)]; 618 if (unlikely(gve_maybe_stop_tx(priv, tx, skb))) { 619 /* We need to ring the txq doorbell -- we have stopped the Tx 620 * queue for want of resources, but prior calls to gve_tx() 621 * may have added descriptors without ringing the doorbell. 622 */ 623 624 gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 625 return NETDEV_TX_BUSY; 626 } 627 if (tx->raw_addressing) 628 nsegs = gve_tx_add_skb_no_copy(priv, tx, skb); 629 else 630 nsegs = gve_tx_add_skb_copy(priv, tx, skb); 631 632 /* If the packet is getting sent, we need to update the skb */ 633 if (nsegs) { 634 netdev_tx_sent_queue(tx->netdev_txq, skb->len); 635 skb_tx_timestamp(skb); 636 tx->req += nsegs; 637 } else { 638 dev_kfree_skb_any(skb); 639 } 640 641 if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) 642 return NETDEV_TX_OK; 643 644 /* Give packets to NIC. Even if this packet failed to send the doorbell 645 * might need to be rung because of xmit_more. 646 */ 647 gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 648 return NETDEV_TX_OK; 649 } 650 651 #define GVE_TX_START_THRESH PAGE_SIZE 652 653 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 654 u32 to_do, bool try_to_wake) 655 { 656 struct gve_tx_buffer_state *info; 657 u64 pkts = 0, bytes = 0; 658 size_t space_freed = 0; 659 struct sk_buff *skb; 660 int i, j; 661 u32 idx; 662 663 for (j = 0; j < to_do; j++) { 664 idx = tx->done & tx->mask; 665 netif_info(priv, tx_done, priv->dev, 666 "[%d] %s: idx=%d (req=%u done=%u)\n", 667 tx->q_num, __func__, idx, tx->req, tx->done); 668 info = &tx->info[idx]; 669 skb = info->skb; 670 671 /* Unmap the buffer */ 672 if (tx->raw_addressing) 673 gve_tx_unmap_buf(tx->dev, info); 674 tx->done++; 675 /* Mark as free */ 676 if (skb) { 677 info->skb = NULL; 678 bytes += skb->len; 679 pkts++; 680 dev_consume_skb_any(skb); 681 if (tx->raw_addressing) 682 continue; 683 /* FIFO free */ 684 for (i = 0; i < ARRAY_SIZE(info->iov); i++) { 685 space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; 686 info->iov[i].iov_len = 0; 687 info->iov[i].iov_padding = 0; 688 } 689 } 690 } 691 692 if (!tx->raw_addressing) 693 gve_tx_free_fifo(&tx->tx_fifo, space_freed); 694 u64_stats_update_begin(&tx->statss); 695 tx->bytes_done += bytes; 696 tx->pkt_done += pkts; 697 u64_stats_update_end(&tx->statss); 698 netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes); 699 700 /* start the queue if we've stopped it */ 701 #ifndef CONFIG_BQL 702 /* Make sure that the doorbells are synced */ 703 smp_mb(); 704 #endif 705 if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) && 706 likely(gve_can_tx(tx, GVE_TX_START_THRESH))) { 707 tx->wake_queue++; 708 netif_tx_wake_queue(tx->netdev_txq); 709 } 710 711 return pkts; 712 } 713 714 u32 gve_tx_load_event_counter(struct gve_priv *priv, 715 struct gve_tx_ring *tx) 716 { 717 u32 counter_index = be32_to_cpu(tx->q_resources->counter_index); 718 __be32 counter = READ_ONCE(priv->counter_array[counter_index]); 719 720 return be32_to_cpu(counter); 721 } 722 723 bool gve_tx_poll(struct gve_notify_block *block, int budget) 724 { 725 struct gve_priv *priv = block->priv; 726 struct gve_tx_ring *tx = block->tx; 727 u32 nic_done; 728 u32 to_do; 729 730 /* If budget is 0, do all the work */ 731 if (budget == 0) 732 budget = INT_MAX; 733 734 /* In TX path, it may try to clean completed pkts in order to xmit, 735 * to avoid cleaning conflict, use spin_lock(), it yields better 736 * concurrency between xmit/clean than netif's lock. 737 */ 738 spin_lock(&tx->clean_lock); 739 /* Find out how much work there is to be done */ 740 nic_done = gve_tx_load_event_counter(priv, tx); 741 to_do = min_t(u32, (nic_done - tx->done), budget); 742 gve_clean_tx_done(priv, tx, to_do, true); 743 spin_unlock(&tx->clean_lock); 744 /* If we still have work we want to repoll */ 745 return nic_done != tx->done; 746 } 747 748 bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx) 749 { 750 u32 nic_done = gve_tx_load_event_counter(priv, tx); 751 752 return nic_done != tx->done; 753 } 754