1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include <linux/ip.h> 11 #include <linux/tcp.h> 12 #include <linux/vmalloc.h> 13 #include <linux/skbuff.h> 14 15 static inline void gve_tx_put_doorbell(struct gve_priv *priv, 16 struct gve_queue_resources *q_resources, 17 u32 val) 18 { 19 iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); 20 } 21 22 /* gvnic can only transmit from a Registered Segment. 23 * We copy skb payloads into the registered segment before writing Tx 24 * descriptors and ringing the Tx doorbell. 25 * 26 * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must 27 * free allocations in the order they were allocated. 28 */ 29 30 static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo) 31 { 32 fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP, 33 PAGE_KERNEL); 34 if (unlikely(!fifo->base)) { 35 netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n", 36 fifo->qpl->id); 37 return -ENOMEM; 38 } 39 40 fifo->size = fifo->qpl->num_entries * PAGE_SIZE; 41 atomic_set(&fifo->available, fifo->size); 42 fifo->head = 0; 43 return 0; 44 } 45 46 static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo) 47 { 48 WARN(atomic_read(&fifo->available) != fifo->size, 49 "Releasing non-empty fifo"); 50 51 vunmap(fifo->base); 52 } 53 54 static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, 55 size_t bytes) 56 { 57 return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head; 58 } 59 60 static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes) 61 { 62 return (atomic_read(&fifo->available) <= bytes) ? false : true; 63 } 64 65 /* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO 66 * @fifo: FIFO to allocate from 67 * @bytes: Allocation size 68 * @iov: Scatter-gather elements to fill with allocation fragment base/len 69 * 70 * Returns number of valid elements in iov[] or negative on error. 71 * 72 * Allocations from a given FIFO must be externally synchronized but concurrent 73 * allocation and frees are allowed. 74 */ 75 static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes, 76 struct gve_tx_iovec iov[2]) 77 { 78 size_t overflow, padding; 79 u32 aligned_head; 80 int nfrags = 0; 81 82 if (!bytes) 83 return 0; 84 85 /* This check happens before we know how much padding is needed to 86 * align to a cacheline boundary for the payload, but that is fine, 87 * because the FIFO head always start aligned, and the FIFO's boundaries 88 * are aligned, so if there is space for the data, there is space for 89 * the padding to the next alignment. 90 */ 91 WARN(!gve_tx_fifo_can_alloc(fifo, bytes), 92 "Reached %s when there's not enough space in the fifo", __func__); 93 94 nfrags++; 95 96 iov[0].iov_offset = fifo->head; 97 iov[0].iov_len = bytes; 98 fifo->head += bytes; 99 100 if (fifo->head > fifo->size) { 101 /* If the allocation did not fit in the tail fragment of the 102 * FIFO, also use the head fragment. 103 */ 104 nfrags++; 105 overflow = fifo->head - fifo->size; 106 iov[0].iov_len -= overflow; 107 iov[1].iov_offset = 0; /* Start of fifo*/ 108 iov[1].iov_len = overflow; 109 110 fifo->head = overflow; 111 } 112 113 /* Re-align to a cacheline boundary */ 114 aligned_head = L1_CACHE_ALIGN(fifo->head); 115 padding = aligned_head - fifo->head; 116 iov[nfrags - 1].iov_padding = padding; 117 atomic_sub(bytes + padding, &fifo->available); 118 fifo->head = aligned_head; 119 120 if (fifo->head == fifo->size) 121 fifo->head = 0; 122 123 return nfrags; 124 } 125 126 /* gve_tx_free_fifo - Return space to Tx FIFO 127 * @fifo: FIFO to return fragments to 128 * @bytes: Bytes to free 129 */ 130 static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) 131 { 132 atomic_add(bytes, &fifo->available); 133 } 134 135 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 136 u32 to_do, bool try_to_wake); 137 138 static void gve_tx_free_ring(struct gve_priv *priv, int idx) 139 { 140 struct gve_tx_ring *tx = &priv->tx[idx]; 141 struct device *hdev = &priv->pdev->dev; 142 size_t bytes; 143 u32 slots; 144 145 gve_tx_remove_from_block(priv, idx); 146 slots = tx->mask + 1; 147 gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); 148 netdev_tx_reset_queue(tx->netdev_txq); 149 150 dma_free_coherent(hdev, sizeof(*tx->q_resources), 151 tx->q_resources, tx->q_resources_bus); 152 tx->q_resources = NULL; 153 154 if (!tx->raw_addressing) { 155 gve_tx_fifo_release(priv, &tx->tx_fifo); 156 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 157 tx->tx_fifo.qpl = NULL; 158 } 159 160 bytes = sizeof(*tx->desc) * slots; 161 dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 162 tx->desc = NULL; 163 164 vfree(tx->info); 165 tx->info = NULL; 166 167 netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); 168 } 169 170 static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) 171 { 172 struct gve_tx_ring *tx = &priv->tx[idx]; 173 struct device *hdev = &priv->pdev->dev; 174 u32 slots = priv->tx_desc_cnt; 175 size_t bytes; 176 177 /* Make sure everything is zeroed to start */ 178 memset(tx, 0, sizeof(*tx)); 179 spin_lock_init(&tx->clean_lock); 180 tx->q_num = idx; 181 182 tx->mask = slots - 1; 183 184 /* alloc metadata */ 185 tx->info = vzalloc(sizeof(*tx->info) * slots); 186 if (!tx->info) 187 return -ENOMEM; 188 189 /* alloc tx queue */ 190 bytes = sizeof(*tx->desc) * slots; 191 tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); 192 if (!tx->desc) 193 goto abort_with_info; 194 195 tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; 196 tx->dev = &priv->pdev->dev; 197 if (!tx->raw_addressing) { 198 tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); 199 if (!tx->tx_fifo.qpl) 200 goto abort_with_desc; 201 /* map Tx FIFO */ 202 if (gve_tx_fifo_init(priv, &tx->tx_fifo)) 203 goto abort_with_qpl; 204 } 205 206 tx->q_resources = 207 dma_alloc_coherent(hdev, 208 sizeof(*tx->q_resources), 209 &tx->q_resources_bus, 210 GFP_KERNEL); 211 if (!tx->q_resources) 212 goto abort_with_fifo; 213 214 netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, 215 (unsigned long)tx->bus); 216 tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); 217 gve_tx_add_to_block(priv, idx); 218 219 return 0; 220 221 abort_with_fifo: 222 if (!tx->raw_addressing) 223 gve_tx_fifo_release(priv, &tx->tx_fifo); 224 abort_with_qpl: 225 if (!tx->raw_addressing) 226 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 227 abort_with_desc: 228 dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 229 tx->desc = NULL; 230 abort_with_info: 231 vfree(tx->info); 232 tx->info = NULL; 233 return -ENOMEM; 234 } 235 236 int gve_tx_alloc_rings(struct gve_priv *priv) 237 { 238 int err = 0; 239 int i; 240 241 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 242 err = gve_tx_alloc_ring(priv, i); 243 if (err) { 244 netif_err(priv, drv, priv->dev, 245 "Failed to alloc tx ring=%d: err=%d\n", 246 i, err); 247 break; 248 } 249 } 250 /* Unallocate if there was an error */ 251 if (err) { 252 int j; 253 254 for (j = 0; j < i; j++) 255 gve_tx_free_ring(priv, j); 256 } 257 return err; 258 } 259 260 void gve_tx_free_rings_gqi(struct gve_priv *priv) 261 { 262 int i; 263 264 for (i = 0; i < priv->tx_cfg.num_queues; i++) 265 gve_tx_free_ring(priv, i); 266 } 267 268 /* gve_tx_avail - Calculates the number of slots available in the ring 269 * @tx: tx ring to check 270 * 271 * Returns the number of slots available 272 * 273 * The capacity of the queue is mask + 1. We don't need to reserve an entry. 274 **/ 275 static inline u32 gve_tx_avail(struct gve_tx_ring *tx) 276 { 277 return tx->mask + 1 - (tx->req - tx->done); 278 } 279 280 static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, 281 struct sk_buff *skb) 282 { 283 int pad_bytes, align_hdr_pad; 284 int bytes; 285 int hlen; 286 287 hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + 288 tcp_hdrlen(skb) : skb_headlen(skb); 289 290 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, 291 hlen); 292 /* We need to take into account the header alignment padding. */ 293 align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen; 294 bytes = align_hdr_pad + pad_bytes + skb->len; 295 296 return bytes; 297 } 298 299 /* The most descriptors we could need is MAX_SKB_FRAGS + 3 : 1 for each skb frag, 300 * +1 for the skb linear portion, +1 for when tcp hdr needs to be in separate descriptor, 301 * and +1 if the payload wraps to the beginning of the FIFO. 302 */ 303 #define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3) 304 static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info) 305 { 306 if (info->skb) { 307 dma_unmap_single(dev, dma_unmap_addr(info, dma), 308 dma_unmap_len(info, len), 309 DMA_TO_DEVICE); 310 dma_unmap_len_set(info, len, 0); 311 } else { 312 dma_unmap_page(dev, dma_unmap_addr(info, dma), 313 dma_unmap_len(info, len), 314 DMA_TO_DEVICE); 315 dma_unmap_len_set(info, len, 0); 316 } 317 } 318 319 /* Check if sufficient resources (descriptor ring space, FIFO space) are 320 * available to transmit the given number of bytes. 321 */ 322 static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) 323 { 324 bool can_alloc = true; 325 326 if (!tx->raw_addressing) 327 can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required); 328 329 return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc); 330 } 331 332 static_assert(NAPI_POLL_WEIGHT >= MAX_TX_DESC_NEEDED); 333 334 /* Stops the queue if the skb cannot be transmitted. */ 335 static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, 336 struct sk_buff *skb) 337 { 338 int bytes_required = 0; 339 u32 nic_done; 340 u32 to_do; 341 int ret; 342 343 if (!tx->raw_addressing) 344 bytes_required = gve_skb_fifo_bytes_required(tx, skb); 345 346 if (likely(gve_can_tx(tx, bytes_required))) 347 return 0; 348 349 ret = -EBUSY; 350 spin_lock(&tx->clean_lock); 351 nic_done = gve_tx_load_event_counter(priv, tx); 352 to_do = nic_done - tx->done; 353 354 /* Only try to clean if there is hope for TX */ 355 if (to_do + gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED) { 356 if (to_do > 0) { 357 to_do = min_t(u32, to_do, NAPI_POLL_WEIGHT); 358 gve_clean_tx_done(priv, tx, to_do, false); 359 } 360 if (likely(gve_can_tx(tx, bytes_required))) 361 ret = 0; 362 } 363 if (ret) { 364 /* No space, so stop the queue */ 365 tx->stop_queue++; 366 netif_tx_stop_queue(tx->netdev_txq); 367 } 368 spin_unlock(&tx->clean_lock); 369 370 return ret; 371 } 372 373 static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, 374 struct sk_buff *skb, bool is_gso, 375 int l4_hdr_offset, u32 desc_cnt, 376 u16 hlen, u64 addr) 377 { 378 /* l4_hdr_offset and csum_offset are in units of 16-bit words */ 379 if (is_gso) { 380 pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; 381 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 382 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 383 } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 384 pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; 385 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 386 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 387 } else { 388 pkt_desc->pkt.type_flags = GVE_TXD_STD; 389 pkt_desc->pkt.l4_csum_offset = 0; 390 pkt_desc->pkt.l4_hdr_offset = 0; 391 } 392 pkt_desc->pkt.desc_cnt = desc_cnt; 393 pkt_desc->pkt.len = cpu_to_be16(skb->len); 394 pkt_desc->pkt.seg_len = cpu_to_be16(hlen); 395 pkt_desc->pkt.seg_addr = cpu_to_be64(addr); 396 } 397 398 static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, 399 struct sk_buff *skb, bool is_gso, 400 u16 len, u64 addr) 401 { 402 seg_desc->seg.type_flags = GVE_TXD_SEG; 403 if (is_gso) { 404 if (skb_is_gso_v6(skb)) 405 seg_desc->seg.type_flags |= GVE_TXSF_IPV6; 406 seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; 407 seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); 408 } 409 seg_desc->seg.seg_len = cpu_to_be16(len); 410 seg_desc->seg.seg_addr = cpu_to_be64(addr); 411 } 412 413 static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses, 414 u64 iov_offset, u64 iov_len) 415 { 416 u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE; 417 u64 first_page = iov_offset / PAGE_SIZE; 418 u64 page; 419 420 for (page = first_page; page <= last_page; page++) 421 dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE); 422 } 423 424 static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb) 425 { 426 int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; 427 union gve_tx_desc *pkt_desc, *seg_desc; 428 struct gve_tx_buffer_state *info; 429 bool is_gso = skb_is_gso(skb); 430 u32 idx = tx->req & tx->mask; 431 int payload_iov = 2; 432 int copy_offset; 433 u32 next_idx; 434 int i; 435 436 info = &tx->info[idx]; 437 pkt_desc = &tx->desc[idx]; 438 439 l4_hdr_offset = skb_checksum_start_offset(skb); 440 /* If the skb is gso, then we want the tcp header in the first segment 441 * otherwise we want the linear portion of the skb (which will contain 442 * the checksum because skb->csum_start and skb->csum_offset are given 443 * relative to skb->head) in the first segment. 444 */ 445 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : 446 skb_headlen(skb); 447 448 info->skb = skb; 449 /* We don't want to split the header, so if necessary, pad to the end 450 * of the fifo and then put the header at the beginning of the fifo. 451 */ 452 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); 453 hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes, 454 &info->iov[0]); 455 WARN(!hdr_nfrags, "hdr_nfrags should never be 0!"); 456 payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, 457 &info->iov[payload_iov]); 458 459 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 460 1 + payload_nfrags, hlen, 461 info->iov[hdr_nfrags - 1].iov_offset); 462 463 skb_copy_bits(skb, 0, 464 tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, 465 hlen); 466 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, 467 info->iov[hdr_nfrags - 1].iov_offset, 468 info->iov[hdr_nfrags - 1].iov_len); 469 copy_offset = hlen; 470 471 for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { 472 next_idx = (tx->req + 1 + i - payload_iov) & tx->mask; 473 seg_desc = &tx->desc[next_idx]; 474 475 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, 476 info->iov[i].iov_len, 477 info->iov[i].iov_offset); 478 479 skb_copy_bits(skb, copy_offset, 480 tx->tx_fifo.base + info->iov[i].iov_offset, 481 info->iov[i].iov_len); 482 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, 483 info->iov[i].iov_offset, 484 info->iov[i].iov_len); 485 copy_offset += info->iov[i].iov_len; 486 } 487 488 return 1 + payload_nfrags; 489 } 490 491 static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, 492 struct sk_buff *skb) 493 { 494 const struct skb_shared_info *shinfo = skb_shinfo(skb); 495 int hlen, payload_nfrags, l4_hdr_offset; 496 union gve_tx_desc *pkt_desc, *seg_desc; 497 struct gve_tx_buffer_state *info; 498 bool is_gso = skb_is_gso(skb); 499 u32 idx = tx->req & tx->mask; 500 u64 addr; 501 u32 len; 502 int i; 503 504 info = &tx->info[idx]; 505 pkt_desc = &tx->desc[idx]; 506 507 l4_hdr_offset = skb_checksum_start_offset(skb); 508 /* If the skb is gso, then we want only up to the tcp header in the first segment 509 * to efficiently replicate on each segment otherwise we want the linear portion 510 * of the skb (which will contain the checksum because skb->csum_start and 511 * skb->csum_offset are given relative to skb->head) in the first segment. 512 */ 513 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb); 514 len = skb_headlen(skb); 515 516 info->skb = skb; 517 518 addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); 519 if (unlikely(dma_mapping_error(tx->dev, addr))) { 520 tx->dma_mapping_error++; 521 goto drop; 522 } 523 dma_unmap_len_set(info, len, len); 524 dma_unmap_addr_set(info, dma, addr); 525 526 payload_nfrags = shinfo->nr_frags; 527 if (hlen < len) { 528 /* For gso the rest of the linear portion of the skb needs to 529 * be in its own descriptor. 530 */ 531 payload_nfrags++; 532 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 533 1 + payload_nfrags, hlen, addr); 534 535 len -= hlen; 536 addr += hlen; 537 idx = (tx->req + 1) & tx->mask; 538 seg_desc = &tx->desc[idx]; 539 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); 540 } else { 541 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 542 1 + payload_nfrags, hlen, addr); 543 } 544 545 for (i = 0; i < shinfo->nr_frags; i++) { 546 const skb_frag_t *frag = &shinfo->frags[i]; 547 548 idx = (idx + 1) & tx->mask; 549 seg_desc = &tx->desc[idx]; 550 len = skb_frag_size(frag); 551 addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE); 552 if (unlikely(dma_mapping_error(tx->dev, addr))) { 553 tx->dma_mapping_error++; 554 goto unmap_drop; 555 } 556 tx->info[idx].skb = NULL; 557 dma_unmap_len_set(&tx->info[idx], len, len); 558 dma_unmap_addr_set(&tx->info[idx], dma, addr); 559 560 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); 561 } 562 563 return 1 + payload_nfrags; 564 565 unmap_drop: 566 i += (payload_nfrags == shinfo->nr_frags ? 1 : 2); 567 while (i--) { 568 idx--; 569 gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]); 570 } 571 drop: 572 tx->dropped_pkt++; 573 return 0; 574 } 575 576 netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) 577 { 578 struct gve_priv *priv = netdev_priv(dev); 579 struct gve_tx_ring *tx; 580 int nsegs; 581 582 WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues, 583 "skb queue index out of range"); 584 tx = &priv->tx[skb_get_queue_mapping(skb)]; 585 if (unlikely(gve_maybe_stop_tx(priv, tx, skb))) { 586 /* We need to ring the txq doorbell -- we have stopped the Tx 587 * queue for want of resources, but prior calls to gve_tx() 588 * may have added descriptors without ringing the doorbell. 589 */ 590 591 gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 592 return NETDEV_TX_BUSY; 593 } 594 if (tx->raw_addressing) 595 nsegs = gve_tx_add_skb_no_copy(priv, tx, skb); 596 else 597 nsegs = gve_tx_add_skb_copy(priv, tx, skb); 598 599 /* If the packet is getting sent, we need to update the skb */ 600 if (nsegs) { 601 netdev_tx_sent_queue(tx->netdev_txq, skb->len); 602 skb_tx_timestamp(skb); 603 tx->req += nsegs; 604 } else { 605 dev_kfree_skb_any(skb); 606 } 607 608 if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) 609 return NETDEV_TX_OK; 610 611 /* Give packets to NIC. Even if this packet failed to send the doorbell 612 * might need to be rung because of xmit_more. 613 */ 614 gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 615 return NETDEV_TX_OK; 616 } 617 618 #define GVE_TX_START_THRESH PAGE_SIZE 619 620 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 621 u32 to_do, bool try_to_wake) 622 { 623 struct gve_tx_buffer_state *info; 624 u64 pkts = 0, bytes = 0; 625 size_t space_freed = 0; 626 struct sk_buff *skb; 627 int i, j; 628 u32 idx; 629 630 for (j = 0; j < to_do; j++) { 631 idx = tx->done & tx->mask; 632 netif_info(priv, tx_done, priv->dev, 633 "[%d] %s: idx=%d (req=%u done=%u)\n", 634 tx->q_num, __func__, idx, tx->req, tx->done); 635 info = &tx->info[idx]; 636 skb = info->skb; 637 638 /* Unmap the buffer */ 639 if (tx->raw_addressing) 640 gve_tx_unmap_buf(tx->dev, info); 641 tx->done++; 642 /* Mark as free */ 643 if (skb) { 644 info->skb = NULL; 645 bytes += skb->len; 646 pkts++; 647 dev_consume_skb_any(skb); 648 if (tx->raw_addressing) 649 continue; 650 /* FIFO free */ 651 for (i = 0; i < ARRAY_SIZE(info->iov); i++) { 652 space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; 653 info->iov[i].iov_len = 0; 654 info->iov[i].iov_padding = 0; 655 } 656 } 657 } 658 659 if (!tx->raw_addressing) 660 gve_tx_free_fifo(&tx->tx_fifo, space_freed); 661 u64_stats_update_begin(&tx->statss); 662 tx->bytes_done += bytes; 663 tx->pkt_done += pkts; 664 u64_stats_update_end(&tx->statss); 665 netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes); 666 667 /* start the queue if we've stopped it */ 668 #ifndef CONFIG_BQL 669 /* Make sure that the doorbells are synced */ 670 smp_mb(); 671 #endif 672 if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) && 673 likely(gve_can_tx(tx, GVE_TX_START_THRESH))) { 674 tx->wake_queue++; 675 netif_tx_wake_queue(tx->netdev_txq); 676 } 677 678 return pkts; 679 } 680 681 u32 gve_tx_load_event_counter(struct gve_priv *priv, 682 struct gve_tx_ring *tx) 683 { 684 u32 counter_index = be32_to_cpu(tx->q_resources->counter_index); 685 __be32 counter = READ_ONCE(priv->counter_array[counter_index]); 686 687 return be32_to_cpu(counter); 688 } 689 690 bool gve_tx_poll(struct gve_notify_block *block, int budget) 691 { 692 struct gve_priv *priv = block->priv; 693 struct gve_tx_ring *tx = block->tx; 694 u32 nic_done; 695 u32 to_do; 696 697 /* If budget is 0, do all the work */ 698 if (budget == 0) 699 budget = INT_MAX; 700 701 /* In TX path, it may try to clean completed pkts in order to xmit, 702 * to avoid cleaning conflict, use spin_lock(), it yields better 703 * concurrency between xmit/clean than netif's lock. 704 */ 705 spin_lock(&tx->clean_lock); 706 /* Find out how much work there is to be done */ 707 nic_done = gve_tx_load_event_counter(priv, tx); 708 to_do = min_t(u32, (nic_done - tx->done), budget); 709 gve_clean_tx_done(priv, tx, to_do, true); 710 spin_unlock(&tx->clean_lock); 711 /* If we still have work we want to repoll */ 712 return nic_done != tx->done; 713 } 714 715 bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx) 716 { 717 u32 nic_done = gve_tx_load_event_counter(priv, tx); 718 719 return nic_done != tx->done; 720 } 721