1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include <linux/ip.h> 11 #include <linux/tcp.h> 12 #include <linux/vmalloc.h> 13 #include <linux/skbuff.h> 14 15 static inline void gve_tx_put_doorbell(struct gve_priv *priv, 16 struct gve_queue_resources *q_resources, 17 u32 val) 18 { 19 iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); 20 } 21 22 /* gvnic can only transmit from a Registered Segment. 23 * We copy skb payloads into the registered segment before writing Tx 24 * descriptors and ringing the Tx doorbell. 25 * 26 * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must 27 * free allocations in the order they were allocated. 28 */ 29 30 static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo) 31 { 32 fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP, 33 PAGE_KERNEL); 34 if (unlikely(!fifo->base)) { 35 netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n", 36 fifo->qpl->id); 37 return -ENOMEM; 38 } 39 40 fifo->size = fifo->qpl->num_entries * PAGE_SIZE; 41 atomic_set(&fifo->available, fifo->size); 42 fifo->head = 0; 43 return 0; 44 } 45 46 static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo) 47 { 48 WARN(atomic_read(&fifo->available) != fifo->size, 49 "Releasing non-empty fifo"); 50 51 vunmap(fifo->base); 52 } 53 54 static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, 55 size_t bytes) 56 { 57 return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head; 58 } 59 60 static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes) 61 { 62 return (atomic_read(&fifo->available) <= bytes) ? false : true; 63 } 64 65 /* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO 66 * @fifo: FIFO to allocate from 67 * @bytes: Allocation size 68 * @iov: Scatter-gather elements to fill with allocation fragment base/len 69 * 70 * Returns number of valid elements in iov[] or negative on error. 71 * 72 * Allocations from a given FIFO must be externally synchronized but concurrent 73 * allocation and frees are allowed. 74 */ 75 static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes, 76 struct gve_tx_iovec iov[2]) 77 { 78 size_t overflow, padding; 79 u32 aligned_head; 80 int nfrags = 0; 81 82 if (!bytes) 83 return 0; 84 85 /* This check happens before we know how much padding is needed to 86 * align to a cacheline boundary for the payload, but that is fine, 87 * because the FIFO head always start aligned, and the FIFO's boundaries 88 * are aligned, so if there is space for the data, there is space for 89 * the padding to the next alignment. 90 */ 91 WARN(!gve_tx_fifo_can_alloc(fifo, bytes), 92 "Reached %s when there's not enough space in the fifo", __func__); 93 94 nfrags++; 95 96 iov[0].iov_offset = fifo->head; 97 iov[0].iov_len = bytes; 98 fifo->head += bytes; 99 100 if (fifo->head > fifo->size) { 101 /* If the allocation did not fit in the tail fragment of the 102 * FIFO, also use the head fragment. 103 */ 104 nfrags++; 105 overflow = fifo->head - fifo->size; 106 iov[0].iov_len -= overflow; 107 iov[1].iov_offset = 0; /* Start of fifo*/ 108 iov[1].iov_len = overflow; 109 110 fifo->head = overflow; 111 } 112 113 /* Re-align to a cacheline boundary */ 114 aligned_head = L1_CACHE_ALIGN(fifo->head); 115 padding = aligned_head - fifo->head; 116 iov[nfrags - 1].iov_padding = padding; 117 atomic_sub(bytes + padding, &fifo->available); 118 fifo->head = aligned_head; 119 120 if (fifo->head == fifo->size) 121 fifo->head = 0; 122 123 return nfrags; 124 } 125 126 /* gve_tx_free_fifo - Return space to Tx FIFO 127 * @fifo: FIFO to return fragments to 128 * @bytes: Bytes to free 129 */ 130 static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) 131 { 132 atomic_add(bytes, &fifo->available); 133 } 134 135 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 136 u32 to_do, bool try_to_wake); 137 138 static void gve_tx_free_ring(struct gve_priv *priv, int idx) 139 { 140 struct gve_tx_ring *tx = &priv->tx[idx]; 141 struct device *hdev = &priv->pdev->dev; 142 size_t bytes; 143 u32 slots; 144 145 gve_tx_remove_from_block(priv, idx); 146 slots = tx->mask + 1; 147 gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); 148 netdev_tx_reset_queue(tx->netdev_txq); 149 150 dma_free_coherent(hdev, sizeof(*tx->q_resources), 151 tx->q_resources, tx->q_resources_bus); 152 tx->q_resources = NULL; 153 154 if (!tx->raw_addressing) { 155 gve_tx_fifo_release(priv, &tx->tx_fifo); 156 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 157 tx->tx_fifo.qpl = NULL; 158 } 159 160 bytes = sizeof(*tx->desc) * slots; 161 dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 162 tx->desc = NULL; 163 164 vfree(tx->info); 165 tx->info = NULL; 166 167 netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); 168 } 169 170 static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) 171 { 172 struct gve_tx_ring *tx = &priv->tx[idx]; 173 struct device *hdev = &priv->pdev->dev; 174 u32 slots = priv->tx_desc_cnt; 175 size_t bytes; 176 177 /* Make sure everything is zeroed to start */ 178 memset(tx, 0, sizeof(*tx)); 179 spin_lock_init(&tx->clean_lock); 180 tx->q_num = idx; 181 182 tx->mask = slots - 1; 183 184 /* alloc metadata */ 185 tx->info = vzalloc(sizeof(*tx->info) * slots); 186 if (!tx->info) 187 return -ENOMEM; 188 189 /* alloc tx queue */ 190 bytes = sizeof(*tx->desc) * slots; 191 tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); 192 if (!tx->desc) 193 goto abort_with_info; 194 195 tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; 196 tx->dev = &priv->pdev->dev; 197 if (!tx->raw_addressing) { 198 tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); 199 if (!tx->tx_fifo.qpl) 200 goto abort_with_desc; 201 /* map Tx FIFO */ 202 if (gve_tx_fifo_init(priv, &tx->tx_fifo)) 203 goto abort_with_qpl; 204 } 205 206 tx->q_resources = 207 dma_alloc_coherent(hdev, 208 sizeof(*tx->q_resources), 209 &tx->q_resources_bus, 210 GFP_KERNEL); 211 if (!tx->q_resources) 212 goto abort_with_fifo; 213 214 netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, 215 (unsigned long)tx->bus); 216 tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); 217 gve_tx_add_to_block(priv, idx); 218 219 return 0; 220 221 abort_with_fifo: 222 if (!tx->raw_addressing) 223 gve_tx_fifo_release(priv, &tx->tx_fifo); 224 abort_with_qpl: 225 if (!tx->raw_addressing) 226 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 227 abort_with_desc: 228 dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 229 tx->desc = NULL; 230 abort_with_info: 231 vfree(tx->info); 232 tx->info = NULL; 233 return -ENOMEM; 234 } 235 236 int gve_tx_alloc_rings(struct gve_priv *priv) 237 { 238 int err = 0; 239 int i; 240 241 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 242 err = gve_tx_alloc_ring(priv, i); 243 if (err) { 244 netif_err(priv, drv, priv->dev, 245 "Failed to alloc tx ring=%d: err=%d\n", 246 i, err); 247 break; 248 } 249 } 250 /* Unallocate if there was an error */ 251 if (err) { 252 int j; 253 254 for (j = 0; j < i; j++) 255 gve_tx_free_ring(priv, j); 256 } 257 return err; 258 } 259 260 void gve_tx_free_rings_gqi(struct gve_priv *priv) 261 { 262 int i; 263 264 for (i = 0; i < priv->tx_cfg.num_queues; i++) 265 gve_tx_free_ring(priv, i); 266 } 267 268 /* gve_tx_avail - Calculates the number of slots available in the ring 269 * @tx: tx ring to check 270 * 271 * Returns the number of slots available 272 * 273 * The capacity of the queue is mask + 1. We don't need to reserve an entry. 274 **/ 275 static inline u32 gve_tx_avail(struct gve_tx_ring *tx) 276 { 277 return tx->mask + 1 - (tx->req - tx->done); 278 } 279 280 static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, 281 struct sk_buff *skb) 282 { 283 int pad_bytes, align_hdr_pad; 284 int bytes; 285 int hlen; 286 287 hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + tcp_hdrlen(skb) : 288 min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len); 289 290 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, 291 hlen); 292 /* We need to take into account the header alignment padding. */ 293 align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen; 294 bytes = align_hdr_pad + pad_bytes + skb->len; 295 296 return bytes; 297 } 298 299 /* The most descriptors we could need is MAX_SKB_FRAGS + 4 : 300 * 1 for each skb frag 301 * 1 for the skb linear portion 302 * 1 for when tcp hdr needs to be in separate descriptor 303 * 1 if the payload wraps to the beginning of the FIFO 304 * 1 for metadata descriptor 305 */ 306 #define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 4) 307 static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info) 308 { 309 if (info->skb) { 310 dma_unmap_single(dev, dma_unmap_addr(info, dma), 311 dma_unmap_len(info, len), 312 DMA_TO_DEVICE); 313 dma_unmap_len_set(info, len, 0); 314 } else { 315 dma_unmap_page(dev, dma_unmap_addr(info, dma), 316 dma_unmap_len(info, len), 317 DMA_TO_DEVICE); 318 dma_unmap_len_set(info, len, 0); 319 } 320 } 321 322 /* Check if sufficient resources (descriptor ring space, FIFO space) are 323 * available to transmit the given number of bytes. 324 */ 325 static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) 326 { 327 bool can_alloc = true; 328 329 if (!tx->raw_addressing) 330 can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required); 331 332 return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc); 333 } 334 335 static_assert(NAPI_POLL_WEIGHT >= MAX_TX_DESC_NEEDED); 336 337 /* Stops the queue if the skb cannot be transmitted. */ 338 static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, 339 struct sk_buff *skb) 340 { 341 int bytes_required = 0; 342 u32 nic_done; 343 u32 to_do; 344 int ret; 345 346 if (!tx->raw_addressing) 347 bytes_required = gve_skb_fifo_bytes_required(tx, skb); 348 349 if (likely(gve_can_tx(tx, bytes_required))) 350 return 0; 351 352 ret = -EBUSY; 353 spin_lock(&tx->clean_lock); 354 nic_done = gve_tx_load_event_counter(priv, tx); 355 to_do = nic_done - tx->done; 356 357 /* Only try to clean if there is hope for TX */ 358 if (to_do + gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED) { 359 if (to_do > 0) { 360 to_do = min_t(u32, to_do, NAPI_POLL_WEIGHT); 361 gve_clean_tx_done(priv, tx, to_do, false); 362 } 363 if (likely(gve_can_tx(tx, bytes_required))) 364 ret = 0; 365 } 366 if (ret) { 367 /* No space, so stop the queue */ 368 tx->stop_queue++; 369 netif_tx_stop_queue(tx->netdev_txq); 370 } 371 spin_unlock(&tx->clean_lock); 372 373 return ret; 374 } 375 376 static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, 377 struct sk_buff *skb, bool is_gso, 378 int l4_hdr_offset, u32 desc_cnt, 379 u16 hlen, u64 addr) 380 { 381 /* l4_hdr_offset and csum_offset are in units of 16-bit words */ 382 if (is_gso) { 383 pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; 384 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 385 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 386 } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 387 pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; 388 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 389 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 390 } else { 391 pkt_desc->pkt.type_flags = GVE_TXD_STD; 392 pkt_desc->pkt.l4_csum_offset = 0; 393 pkt_desc->pkt.l4_hdr_offset = 0; 394 } 395 pkt_desc->pkt.desc_cnt = desc_cnt; 396 pkt_desc->pkt.len = cpu_to_be16(skb->len); 397 pkt_desc->pkt.seg_len = cpu_to_be16(hlen); 398 pkt_desc->pkt.seg_addr = cpu_to_be64(addr); 399 } 400 401 static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc, 402 struct sk_buff *skb) 403 { 404 BUILD_BUG_ON(sizeof(mtd_desc->mtd) != sizeof(mtd_desc->pkt)); 405 406 mtd_desc->mtd.type_flags = GVE_TXD_MTD | GVE_MTD_SUBTYPE_PATH; 407 mtd_desc->mtd.path_state = GVE_MTD_PATH_STATE_DEFAULT | 408 GVE_MTD_PATH_HASH_L4; 409 mtd_desc->mtd.path_hash = cpu_to_be32(skb->hash); 410 mtd_desc->mtd.reserved0 = 0; 411 mtd_desc->mtd.reserved1 = 0; 412 } 413 414 static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, 415 struct sk_buff *skb, bool is_gso, 416 u16 len, u64 addr) 417 { 418 seg_desc->seg.type_flags = GVE_TXD_SEG; 419 if (is_gso) { 420 if (skb_is_gso_v6(skb)) 421 seg_desc->seg.type_flags |= GVE_TXSF_IPV6; 422 seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; 423 seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); 424 } 425 seg_desc->seg.seg_len = cpu_to_be16(len); 426 seg_desc->seg.seg_addr = cpu_to_be64(addr); 427 } 428 429 static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses, 430 u64 iov_offset, u64 iov_len) 431 { 432 u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE; 433 u64 first_page = iov_offset / PAGE_SIZE; 434 u64 page; 435 436 for (page = first_page; page <= last_page; page++) 437 dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE); 438 } 439 440 static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb) 441 { 442 int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; 443 union gve_tx_desc *pkt_desc, *seg_desc; 444 struct gve_tx_buffer_state *info; 445 int mtd_desc_nr = !!skb->l4_hash; 446 bool is_gso = skb_is_gso(skb); 447 u32 idx = tx->req & tx->mask; 448 int payload_iov = 2; 449 int copy_offset; 450 u32 next_idx; 451 int i; 452 453 info = &tx->info[idx]; 454 pkt_desc = &tx->desc[idx]; 455 456 l4_hdr_offset = skb_checksum_start_offset(skb); 457 /* If the skb is gso, then we want the tcp header alone in the first segment 458 * otherwise we want the minimum required by the gVNIC spec. 459 */ 460 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : 461 min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len); 462 463 info->skb = skb; 464 /* We don't want to split the header, so if necessary, pad to the end 465 * of the fifo and then put the header at the beginning of the fifo. 466 */ 467 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); 468 hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes, 469 &info->iov[0]); 470 WARN(!hdr_nfrags, "hdr_nfrags should never be 0!"); 471 payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, 472 &info->iov[payload_iov]); 473 474 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 475 1 + mtd_desc_nr + payload_nfrags, hlen, 476 info->iov[hdr_nfrags - 1].iov_offset); 477 478 skb_copy_bits(skb, 0, 479 tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, 480 hlen); 481 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, 482 info->iov[hdr_nfrags - 1].iov_offset, 483 info->iov[hdr_nfrags - 1].iov_len); 484 copy_offset = hlen; 485 486 if (mtd_desc_nr) { 487 next_idx = (tx->req + 1) & tx->mask; 488 gve_tx_fill_mtd_desc(&tx->desc[next_idx], skb); 489 } 490 491 for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { 492 next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask; 493 seg_desc = &tx->desc[next_idx]; 494 495 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, 496 info->iov[i].iov_len, 497 info->iov[i].iov_offset); 498 499 skb_copy_bits(skb, copy_offset, 500 tx->tx_fifo.base + info->iov[i].iov_offset, 501 info->iov[i].iov_len); 502 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, 503 info->iov[i].iov_offset, 504 info->iov[i].iov_len); 505 copy_offset += info->iov[i].iov_len; 506 } 507 508 return 1 + mtd_desc_nr + payload_nfrags; 509 } 510 511 static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, 512 struct sk_buff *skb) 513 { 514 const struct skb_shared_info *shinfo = skb_shinfo(skb); 515 int hlen, num_descriptors, l4_hdr_offset; 516 union gve_tx_desc *pkt_desc, *mtd_desc, *seg_desc; 517 struct gve_tx_buffer_state *info; 518 int mtd_desc_nr = !!skb->l4_hash; 519 bool is_gso = skb_is_gso(skb); 520 u32 idx = tx->req & tx->mask; 521 u64 addr; 522 u32 len; 523 int i; 524 525 info = &tx->info[idx]; 526 pkt_desc = &tx->desc[idx]; 527 528 l4_hdr_offset = skb_checksum_start_offset(skb); 529 /* If the skb is gso, then we want only up to the tcp header in the first segment 530 * to efficiently replicate on each segment otherwise we want the linear portion 531 * of the skb (which will contain the checksum because skb->csum_start and 532 * skb->csum_offset are given relative to skb->head) in the first segment. 533 */ 534 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb); 535 len = skb_headlen(skb); 536 537 info->skb = skb; 538 539 addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); 540 if (unlikely(dma_mapping_error(tx->dev, addr))) { 541 tx->dma_mapping_error++; 542 goto drop; 543 } 544 dma_unmap_len_set(info, len, len); 545 dma_unmap_addr_set(info, dma, addr); 546 547 num_descriptors = 1 + shinfo->nr_frags; 548 if (hlen < len) 549 num_descriptors++; 550 if (mtd_desc_nr) 551 num_descriptors++; 552 553 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 554 num_descriptors, hlen, addr); 555 556 if (mtd_desc_nr) { 557 idx = (idx + 1) & tx->mask; 558 mtd_desc = &tx->desc[idx]; 559 gve_tx_fill_mtd_desc(mtd_desc, skb); 560 } 561 562 if (hlen < len) { 563 /* For gso the rest of the linear portion of the skb needs to 564 * be in its own descriptor. 565 */ 566 len -= hlen; 567 addr += hlen; 568 idx = (idx + 1) & tx->mask; 569 seg_desc = &tx->desc[idx]; 570 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); 571 } 572 573 for (i = 0; i < shinfo->nr_frags; i++) { 574 const skb_frag_t *frag = &shinfo->frags[i]; 575 576 idx = (idx + 1) & tx->mask; 577 seg_desc = &tx->desc[idx]; 578 len = skb_frag_size(frag); 579 addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE); 580 if (unlikely(dma_mapping_error(tx->dev, addr))) { 581 tx->dma_mapping_error++; 582 goto unmap_drop; 583 } 584 tx->info[idx].skb = NULL; 585 dma_unmap_len_set(&tx->info[idx], len, len); 586 dma_unmap_addr_set(&tx->info[idx], dma, addr); 587 588 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); 589 } 590 591 return num_descriptors; 592 593 unmap_drop: 594 i += num_descriptors - shinfo->nr_frags; 595 while (i--) { 596 /* Skip metadata descriptor, if set */ 597 if (i == 1 && mtd_desc_nr == 1) 598 continue; 599 idx--; 600 gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]); 601 } 602 drop: 603 tx->dropped_pkt++; 604 return 0; 605 } 606 607 netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) 608 { 609 struct gve_priv *priv = netdev_priv(dev); 610 struct gve_tx_ring *tx; 611 int nsegs; 612 613 WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues, 614 "skb queue index out of range"); 615 tx = &priv->tx[skb_get_queue_mapping(skb)]; 616 if (unlikely(gve_maybe_stop_tx(priv, tx, skb))) { 617 /* We need to ring the txq doorbell -- we have stopped the Tx 618 * queue for want of resources, but prior calls to gve_tx() 619 * may have added descriptors without ringing the doorbell. 620 */ 621 622 gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 623 return NETDEV_TX_BUSY; 624 } 625 if (tx->raw_addressing) 626 nsegs = gve_tx_add_skb_no_copy(priv, tx, skb); 627 else 628 nsegs = gve_tx_add_skb_copy(priv, tx, skb); 629 630 /* If the packet is getting sent, we need to update the skb */ 631 if (nsegs) { 632 netdev_tx_sent_queue(tx->netdev_txq, skb->len); 633 skb_tx_timestamp(skb); 634 tx->req += nsegs; 635 } else { 636 dev_kfree_skb_any(skb); 637 } 638 639 if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) 640 return NETDEV_TX_OK; 641 642 /* Give packets to NIC. Even if this packet failed to send the doorbell 643 * might need to be rung because of xmit_more. 644 */ 645 gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 646 return NETDEV_TX_OK; 647 } 648 649 #define GVE_TX_START_THRESH PAGE_SIZE 650 651 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 652 u32 to_do, bool try_to_wake) 653 { 654 struct gve_tx_buffer_state *info; 655 u64 pkts = 0, bytes = 0; 656 size_t space_freed = 0; 657 struct sk_buff *skb; 658 int i, j; 659 u32 idx; 660 661 for (j = 0; j < to_do; j++) { 662 idx = tx->done & tx->mask; 663 netif_info(priv, tx_done, priv->dev, 664 "[%d] %s: idx=%d (req=%u done=%u)\n", 665 tx->q_num, __func__, idx, tx->req, tx->done); 666 info = &tx->info[idx]; 667 skb = info->skb; 668 669 /* Unmap the buffer */ 670 if (tx->raw_addressing) 671 gve_tx_unmap_buf(tx->dev, info); 672 tx->done++; 673 /* Mark as free */ 674 if (skb) { 675 info->skb = NULL; 676 bytes += skb->len; 677 pkts++; 678 dev_consume_skb_any(skb); 679 if (tx->raw_addressing) 680 continue; 681 /* FIFO free */ 682 for (i = 0; i < ARRAY_SIZE(info->iov); i++) { 683 space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; 684 info->iov[i].iov_len = 0; 685 info->iov[i].iov_padding = 0; 686 } 687 } 688 } 689 690 if (!tx->raw_addressing) 691 gve_tx_free_fifo(&tx->tx_fifo, space_freed); 692 u64_stats_update_begin(&tx->statss); 693 tx->bytes_done += bytes; 694 tx->pkt_done += pkts; 695 u64_stats_update_end(&tx->statss); 696 netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes); 697 698 /* start the queue if we've stopped it */ 699 #ifndef CONFIG_BQL 700 /* Make sure that the doorbells are synced */ 701 smp_mb(); 702 #endif 703 if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) && 704 likely(gve_can_tx(tx, GVE_TX_START_THRESH))) { 705 tx->wake_queue++; 706 netif_tx_wake_queue(tx->netdev_txq); 707 } 708 709 return pkts; 710 } 711 712 u32 gve_tx_load_event_counter(struct gve_priv *priv, 713 struct gve_tx_ring *tx) 714 { 715 u32 counter_index = be32_to_cpu(tx->q_resources->counter_index); 716 __be32 counter = READ_ONCE(priv->counter_array[counter_index]); 717 718 return be32_to_cpu(counter); 719 } 720 721 bool gve_tx_poll(struct gve_notify_block *block, int budget) 722 { 723 struct gve_priv *priv = block->priv; 724 struct gve_tx_ring *tx = block->tx; 725 u32 nic_done; 726 u32 to_do; 727 728 /* If budget is 0, do all the work */ 729 if (budget == 0) 730 budget = INT_MAX; 731 732 /* In TX path, it may try to clean completed pkts in order to xmit, 733 * to avoid cleaning conflict, use spin_lock(), it yields better 734 * concurrency between xmit/clean than netif's lock. 735 */ 736 spin_lock(&tx->clean_lock); 737 /* Find out how much work there is to be done */ 738 nic_done = gve_tx_load_event_counter(priv, tx); 739 to_do = min_t(u32, (nic_done - tx->done), budget); 740 gve_clean_tx_done(priv, tx, to_do, true); 741 spin_unlock(&tx->clean_lock); 742 /* If we still have work we want to repoll */ 743 return nic_done != tx->done; 744 } 745 746 bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx) 747 { 748 u32 nic_done = gve_tx_load_event_counter(priv, tx); 749 750 return nic_done != tx->done; 751 } 752