1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include <linux/etherdevice.h> 11 12 static void gve_rx_free_buffer(struct device *dev, 13 struct gve_rx_slot_page_info *page_info, 14 union gve_rx_data_slot *data_slot) 15 { 16 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) & 17 GVE_DATA_SLOT_ADDR_PAGE_MASK); 18 19 page_ref_sub(page_info->page, page_info->pagecnt_bias - 1); 20 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); 21 } 22 23 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) 24 { 25 u32 slots = rx->mask + 1; 26 int i; 27 28 if (rx->data.raw_addressing) { 29 for (i = 0; i < slots; i++) 30 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], 31 &rx->data.data_ring[i]); 32 } else { 33 for (i = 0; i < slots; i++) 34 page_ref_sub(rx->data.page_info[i].page, 35 rx->data.page_info[i].pagecnt_bias - 1); 36 gve_unassign_qpl(priv, rx->data.qpl->id); 37 rx->data.qpl = NULL; 38 39 for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { 40 page_ref_sub(rx->qpl_copy_pool[i].page, 41 rx->qpl_copy_pool[i].pagecnt_bias - 1); 42 put_page(rx->qpl_copy_pool[i].page); 43 } 44 } 45 kvfree(rx->data.page_info); 46 rx->data.page_info = NULL; 47 } 48 49 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 50 { 51 struct gve_rx_ring *rx = &priv->rx[idx]; 52 struct device *dev = &priv->pdev->dev; 53 u32 slots = rx->mask + 1; 54 size_t bytes; 55 56 gve_rx_remove_from_block(priv, idx); 57 58 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 59 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 60 rx->desc.desc_ring = NULL; 61 62 dma_free_coherent(dev, sizeof(*rx->q_resources), 63 rx->q_resources, rx->q_resources_bus); 64 rx->q_resources = NULL; 65 66 gve_rx_unfill_pages(priv, rx); 67 68 bytes = sizeof(*rx->data.data_ring) * slots; 69 dma_free_coherent(dev, bytes, rx->data.data_ring, 70 rx->data.data_bus); 71 rx->data.data_ring = NULL; 72 73 kvfree(rx->qpl_copy_pool); 74 rx->qpl_copy_pool = NULL; 75 76 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 77 } 78 79 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 80 dma_addr_t addr, struct page *page, __be64 *slot_addr) 81 { 82 page_info->page = page; 83 page_info->page_offset = 0; 84 page_info->page_address = page_address(page); 85 *slot_addr = cpu_to_be64(addr); 86 /* The page already has 1 ref */ 87 page_ref_add(page, INT_MAX - 1); 88 page_info->pagecnt_bias = INT_MAX; 89 } 90 91 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, 92 struct gve_rx_slot_page_info *page_info, 93 union gve_rx_data_slot *data_slot) 94 { 95 struct page *page; 96 dma_addr_t dma; 97 int err; 98 99 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, 100 GFP_ATOMIC); 101 if (err) 102 return err; 103 104 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); 105 return 0; 106 } 107 108 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 109 { 110 struct gve_priv *priv = rx->gve; 111 u32 slots; 112 int err; 113 int i; 114 int j; 115 116 /* Allocate one page per Rx queue slot. Each page is split into two 117 * packet buffers, when possible we "page flip" between the two. 118 */ 119 slots = rx->mask + 1; 120 121 rx->data.page_info = kvzalloc(slots * 122 sizeof(*rx->data.page_info), GFP_KERNEL); 123 if (!rx->data.page_info) 124 return -ENOMEM; 125 126 if (!rx->data.raw_addressing) { 127 rx->data.qpl = gve_assign_rx_qpl(priv); 128 if (!rx->data.qpl) { 129 kvfree(rx->data.page_info); 130 rx->data.page_info = NULL; 131 return -ENOMEM; 132 } 133 } 134 for (i = 0; i < slots; i++) { 135 if (!rx->data.raw_addressing) { 136 struct page *page = rx->data.qpl->pages[i]; 137 dma_addr_t addr = i * PAGE_SIZE; 138 139 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, 140 &rx->data.data_ring[i].qpl_offset); 141 continue; 142 } 143 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], 144 &rx->data.data_ring[i]); 145 if (err) 146 goto alloc_err; 147 } 148 149 if (!rx->data.raw_addressing) { 150 for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) { 151 struct page *page = alloc_page(GFP_KERNEL); 152 153 if (!page) 154 goto alloc_err_qpl; 155 156 rx->qpl_copy_pool[j].page = page; 157 rx->qpl_copy_pool[j].page_offset = 0; 158 rx->qpl_copy_pool[j].page_address = page_address(page); 159 160 /* The page already has 1 ref. */ 161 page_ref_add(page, INT_MAX - 1); 162 rx->qpl_copy_pool[j].pagecnt_bias = INT_MAX; 163 } 164 } 165 166 return slots; 167 168 alloc_err_qpl: 169 while (j--) { 170 page_ref_sub(rx->qpl_copy_pool[j].page, 171 rx->qpl_copy_pool[j].pagecnt_bias - 1); 172 put_page(rx->qpl_copy_pool[j].page); 173 } 174 alloc_err: 175 while (i--) 176 gve_rx_free_buffer(&priv->pdev->dev, 177 &rx->data.page_info[i], 178 &rx->data.data_ring[i]); 179 return err; 180 } 181 182 static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) 183 { 184 ctx->skb_head = NULL; 185 ctx->skb_tail = NULL; 186 ctx->total_size = 0; 187 ctx->frag_cnt = 0; 188 ctx->drop_pkt = false; 189 } 190 191 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 192 { 193 struct gve_rx_ring *rx = &priv->rx[idx]; 194 struct device *hdev = &priv->pdev->dev; 195 u32 slots, npages; 196 int filled_pages; 197 size_t bytes; 198 int err; 199 200 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 201 /* Make sure everything is zeroed to start with */ 202 memset(rx, 0, sizeof(*rx)); 203 204 rx->gve = priv; 205 rx->q_num = idx; 206 207 slots = priv->rx_data_slot_cnt; 208 rx->mask = slots - 1; 209 rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; 210 211 /* alloc rx data ring */ 212 bytes = sizeof(*rx->data.data_ring) * slots; 213 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 214 &rx->data.data_bus, 215 GFP_KERNEL); 216 if (!rx->data.data_ring) 217 return -ENOMEM; 218 219 rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1; 220 rx->qpl_copy_pool_head = 0; 221 rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1, 222 sizeof(rx->qpl_copy_pool[0]), 223 GFP_KERNEL); 224 225 if (!rx->qpl_copy_pool) { 226 err = -ENOMEM; 227 goto abort_with_slots; 228 } 229 230 filled_pages = gve_prefill_rx_pages(rx); 231 if (filled_pages < 0) { 232 err = -ENOMEM; 233 goto abort_with_copy_pool; 234 } 235 rx->fill_cnt = filled_pages; 236 /* Ensure data ring slots (packet buffers) are visible. */ 237 dma_wmb(); 238 239 /* Alloc gve_queue_resources */ 240 rx->q_resources = 241 dma_alloc_coherent(hdev, 242 sizeof(*rx->q_resources), 243 &rx->q_resources_bus, 244 GFP_KERNEL); 245 if (!rx->q_resources) { 246 err = -ENOMEM; 247 goto abort_filled; 248 } 249 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 250 (unsigned long)rx->data.data_bus); 251 252 /* alloc rx desc ring */ 253 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 254 npages = bytes / PAGE_SIZE; 255 if (npages * PAGE_SIZE != bytes) { 256 err = -EIO; 257 goto abort_with_q_resources; 258 } 259 260 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 261 GFP_KERNEL); 262 if (!rx->desc.desc_ring) { 263 err = -ENOMEM; 264 goto abort_with_q_resources; 265 } 266 rx->cnt = 0; 267 rx->db_threshold = priv->rx_desc_cnt / 2; 268 rx->desc.seqno = 1; 269 270 /* Allocating half-page buffers allows page-flipping which is faster 271 * than copying or allocating new pages. 272 */ 273 rx->packet_buffer_size = PAGE_SIZE / 2; 274 gve_rx_ctx_clear(&rx->ctx); 275 gve_rx_add_to_block(priv, idx); 276 277 return 0; 278 279 abort_with_q_resources: 280 dma_free_coherent(hdev, sizeof(*rx->q_resources), 281 rx->q_resources, rx->q_resources_bus); 282 rx->q_resources = NULL; 283 abort_filled: 284 gve_rx_unfill_pages(priv, rx); 285 abort_with_copy_pool: 286 kvfree(rx->qpl_copy_pool); 287 rx->qpl_copy_pool = NULL; 288 abort_with_slots: 289 bytes = sizeof(*rx->data.data_ring) * slots; 290 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 291 rx->data.data_ring = NULL; 292 293 return err; 294 } 295 296 int gve_rx_alloc_rings(struct gve_priv *priv) 297 { 298 int err = 0; 299 int i; 300 301 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 302 err = gve_rx_alloc_ring(priv, i); 303 if (err) { 304 netif_err(priv, drv, priv->dev, 305 "Failed to alloc rx ring=%d: err=%d\n", 306 i, err); 307 break; 308 } 309 } 310 /* Unallocate if there was an error */ 311 if (err) { 312 int j; 313 314 for (j = 0; j < i; j++) 315 gve_rx_free_ring(priv, j); 316 } 317 return err; 318 } 319 320 void gve_rx_free_rings_gqi(struct gve_priv *priv) 321 { 322 int i; 323 324 for (i = 0; i < priv->rx_cfg.num_queues; i++) 325 gve_rx_free_ring(priv, i); 326 } 327 328 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 329 { 330 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 331 332 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]); 333 } 334 335 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 336 { 337 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 338 return PKT_HASH_TYPE_L4; 339 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 340 return PKT_HASH_TYPE_L3; 341 return PKT_HASH_TYPE_L2; 342 } 343 344 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, 345 struct gve_rx_slot_page_info *page_info, 346 u16 packet_buffer_size, u16 len, 347 struct gve_rx_ctx *ctx) 348 { 349 u32 offset = page_info->page_offset + page_info->pad; 350 struct sk_buff *skb = ctx->skb_tail; 351 int num_frags = 0; 352 353 if (!skb) { 354 skb = napi_get_frags(napi); 355 if (unlikely(!skb)) 356 return NULL; 357 358 ctx->skb_head = skb; 359 ctx->skb_tail = skb; 360 } else { 361 num_frags = skb_shinfo(ctx->skb_tail)->nr_frags; 362 if (num_frags == MAX_SKB_FRAGS) { 363 skb = napi_alloc_skb(napi, 0); 364 if (!skb) 365 return NULL; 366 367 // We will never chain more than two SKBs: 2 * 16 * 2k > 64k 368 // which is why we do not need to chain by using skb->next 369 skb_shinfo(ctx->skb_tail)->frag_list = skb; 370 371 ctx->skb_tail = skb; 372 num_frags = 0; 373 } 374 } 375 376 if (skb != ctx->skb_head) { 377 ctx->skb_head->len += len; 378 ctx->skb_head->data_len += len; 379 ctx->skb_head->truesize += packet_buffer_size; 380 } 381 skb_add_rx_frag(skb, num_frags, page_info->page, 382 offset, len, packet_buffer_size); 383 384 return ctx->skb_head; 385 } 386 387 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 388 { 389 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2); 390 391 /* "flip" to other packet buffer on this page */ 392 page_info->page_offset ^= PAGE_SIZE / 2; 393 *(slot_addr) ^= offset; 394 } 395 396 static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info) 397 { 398 int pagecount = page_count(page_info->page); 399 400 /* This page is not being used by any SKBs - reuse */ 401 if (pagecount == page_info->pagecnt_bias) 402 return 1; 403 /* This page is still being used by an SKB - we can't reuse */ 404 else if (pagecount > page_info->pagecnt_bias) 405 return 0; 406 WARN(pagecount < page_info->pagecnt_bias, 407 "Pagecount should never be less than the bias."); 408 return -1; 409 } 410 411 static struct sk_buff * 412 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, 413 struct gve_rx_slot_page_info *page_info, u16 len, 414 struct napi_struct *napi, 415 union gve_rx_data_slot *data_slot, 416 u16 packet_buffer_size, struct gve_rx_ctx *ctx) 417 { 418 struct sk_buff *skb = gve_rx_add_frags(napi, page_info, packet_buffer_size, len, ctx); 419 420 if (!skb) 421 return NULL; 422 423 /* Optimistically stop the kernel from freeing the page. 424 * We will check again in refill to determine if we need to alloc a 425 * new page. 426 */ 427 gve_dec_pagecnt_bias(page_info); 428 429 return skb; 430 } 431 432 static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, 433 struct gve_rx_slot_page_info *page_info, 434 u16 len, struct napi_struct *napi) 435 { 436 u32 pool_idx = rx->qpl_copy_pool_head & rx->qpl_copy_pool_mask; 437 void *src = page_info->page_address + page_info->page_offset; 438 struct gve_rx_slot_page_info *copy_page_info; 439 struct gve_rx_ctx *ctx = &rx->ctx; 440 bool alloc_page = false; 441 struct sk_buff *skb; 442 void *dst; 443 444 copy_page_info = &rx->qpl_copy_pool[pool_idx]; 445 if (!copy_page_info->can_flip) { 446 int recycle = gve_rx_can_recycle_buffer(copy_page_info); 447 448 if (unlikely(recycle < 0)) { 449 gve_schedule_reset(rx->gve); 450 return NULL; 451 } 452 alloc_page = !recycle; 453 } 454 455 if (alloc_page) { 456 struct gve_rx_slot_page_info alloc_page_info; 457 struct page *page; 458 459 /* The least recently used page turned out to be 460 * still in use by the kernel. Ignoring it and moving 461 * on alleviates head-of-line blocking. 462 */ 463 rx->qpl_copy_pool_head++; 464 465 page = alloc_page(GFP_ATOMIC); 466 if (!page) 467 return NULL; 468 469 alloc_page_info.page = page; 470 alloc_page_info.page_offset = 0; 471 alloc_page_info.page_address = page_address(page); 472 alloc_page_info.pad = page_info->pad; 473 474 memcpy(alloc_page_info.page_address, src, page_info->pad + len); 475 skb = gve_rx_add_frags(napi, &alloc_page_info, 476 rx->packet_buffer_size, 477 len, ctx); 478 479 u64_stats_update_begin(&rx->statss); 480 rx->rx_frag_copy_cnt++; 481 rx->rx_frag_alloc_cnt++; 482 u64_stats_update_end(&rx->statss); 483 484 return skb; 485 } 486 487 dst = copy_page_info->page_address + copy_page_info->page_offset; 488 memcpy(dst, src, page_info->pad + len); 489 copy_page_info->pad = page_info->pad; 490 491 skb = gve_rx_add_frags(napi, copy_page_info, 492 rx->packet_buffer_size, len, ctx); 493 if (unlikely(!skb)) 494 return NULL; 495 496 gve_dec_pagecnt_bias(copy_page_info); 497 copy_page_info->page_offset += rx->packet_buffer_size; 498 copy_page_info->page_offset &= (PAGE_SIZE - 1); 499 500 if (copy_page_info->can_flip) { 501 /* We have used both halves of this copy page, it 502 * is time for it to go to the back of the queue. 503 */ 504 copy_page_info->can_flip = false; 505 rx->qpl_copy_pool_head++; 506 prefetch(rx->qpl_copy_pool[rx->qpl_copy_pool_head & rx->qpl_copy_pool_mask].page); 507 } else { 508 copy_page_info->can_flip = true; 509 } 510 511 u64_stats_update_begin(&rx->statss); 512 rx->rx_frag_copy_cnt++; 513 u64_stats_update_end(&rx->statss); 514 515 return skb; 516 } 517 518 static struct sk_buff * 519 gve_rx_qpl(struct device *dev, struct net_device *netdev, 520 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info, 521 u16 len, struct napi_struct *napi, 522 union gve_rx_data_slot *data_slot) 523 { 524 struct gve_rx_ctx *ctx = &rx->ctx; 525 struct sk_buff *skb; 526 527 /* if raw_addressing mode is not enabled gvnic can only receive into 528 * registered segments. If the buffer can't be recycled, our only 529 * choice is to copy the data out of it so that we can return it to the 530 * device. 531 */ 532 if (page_info->can_flip) { 533 skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); 534 /* No point in recycling if we didn't get the skb */ 535 if (skb) { 536 /* Make sure that the page isn't freed. */ 537 gve_dec_pagecnt_bias(page_info); 538 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 539 } 540 } else { 541 skb = gve_rx_copy_to_pool(rx, page_info, len, napi); 542 } 543 return skb; 544 } 545 546 static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, 547 struct gve_rx_slot_page_info *page_info, struct napi_struct *napi, 548 u16 len, union gve_rx_data_slot *data_slot, 549 bool is_only_frag) 550 { 551 struct net_device *netdev = priv->dev; 552 struct gve_rx_ctx *ctx = &rx->ctx; 553 struct sk_buff *skb = NULL; 554 555 if (len <= priv->rx_copybreak && is_only_frag) { 556 /* Just copy small packets */ 557 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); 558 if (skb) { 559 u64_stats_update_begin(&rx->statss); 560 rx->rx_copied_pkt++; 561 rx->rx_frag_copy_cnt++; 562 rx->rx_copybreak_pkt++; 563 u64_stats_update_end(&rx->statss); 564 } 565 } else { 566 int recycle = gve_rx_can_recycle_buffer(page_info); 567 568 if (unlikely(recycle < 0)) { 569 gve_schedule_reset(priv); 570 return NULL; 571 } 572 page_info->can_flip = recycle; 573 if (page_info->can_flip) { 574 u64_stats_update_begin(&rx->statss); 575 rx->rx_frag_flip_cnt++; 576 u64_stats_update_end(&rx->statss); 577 } 578 579 if (rx->data.raw_addressing) { 580 skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, 581 page_info, len, napi, 582 data_slot, 583 rx->packet_buffer_size, ctx); 584 } else { 585 skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, 586 page_info, len, napi, data_slot); 587 } 588 } 589 return skb; 590 } 591 592 #define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) 593 static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, 594 struct gve_rx_desc *desc, u32 idx, 595 struct gve_rx_cnts *cnts) 596 { 597 bool is_last_frag = !GVE_PKTCONT_BIT_IS_SET(desc->flags_seq); 598 struct gve_rx_slot_page_info *page_info; 599 u16 frag_size = be16_to_cpu(desc->len); 600 struct gve_rx_ctx *ctx = &rx->ctx; 601 union gve_rx_data_slot *data_slot; 602 struct gve_priv *priv = rx->gve; 603 struct sk_buff *skb = NULL; 604 dma_addr_t page_bus; 605 void *va; 606 607 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 608 bool is_first_frag = ctx->frag_cnt == 0; 609 610 bool is_only_frag = is_first_frag && is_last_frag; 611 612 if (unlikely(ctx->drop_pkt)) 613 goto finish_frag; 614 615 if (desc->flags_seq & GVE_RXF_ERR) { 616 ctx->drop_pkt = true; 617 cnts->desc_err_pkt_cnt++; 618 napi_free_frags(napi); 619 goto finish_frag; 620 } 621 622 if (unlikely(frag_size > rx->packet_buffer_size)) { 623 netdev_warn(priv->dev, "Unexpected frag size %d, can't exceed %d, scheduling reset", 624 frag_size, rx->packet_buffer_size); 625 ctx->drop_pkt = true; 626 napi_free_frags(napi); 627 gve_schedule_reset(rx->gve); 628 goto finish_frag; 629 } 630 631 /* Prefetch two packet buffers ahead, we will need it soon. */ 632 page_info = &rx->data.page_info[(idx + 2) & rx->mask]; 633 va = page_info->page_address + page_info->page_offset; 634 prefetch(page_info->page); /* Kernel page struct. */ 635 prefetch(va); /* Packet header. */ 636 prefetch(va + 64); /* Next cacheline too. */ 637 638 page_info = &rx->data.page_info[idx]; 639 data_slot = &rx->data.data_ring[idx]; 640 page_bus = (rx->data.raw_addressing) ? 641 be64_to_cpu(data_slot->addr) - page_info->page_offset : 642 rx->data.qpl->page_buses[idx]; 643 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, 644 PAGE_SIZE, DMA_FROM_DEVICE); 645 page_info->pad = is_first_frag ? GVE_RX_PAD : 0; 646 frag_size -= page_info->pad; 647 648 skb = gve_rx_skb(priv, rx, page_info, napi, frag_size, 649 data_slot, is_only_frag); 650 if (!skb) { 651 u64_stats_update_begin(&rx->statss); 652 rx->rx_skb_alloc_fail++; 653 u64_stats_update_end(&rx->statss); 654 655 napi_free_frags(napi); 656 ctx->drop_pkt = true; 657 goto finish_frag; 658 } 659 ctx->total_size += frag_size; 660 661 if (is_first_frag) { 662 if (likely(feat & NETIF_F_RXCSUM)) { 663 /* NIC passes up the partial sum */ 664 if (desc->csum) 665 skb->ip_summed = CHECKSUM_COMPLETE; 666 else 667 skb->ip_summed = CHECKSUM_NONE; 668 skb->csum = csum_unfold(desc->csum); 669 } 670 671 /* parse flags & pass relevant info up */ 672 if (likely(feat & NETIF_F_RXHASH) && 673 gve_needs_rss(desc->flags_seq)) 674 skb_set_hash(skb, be32_to_cpu(desc->rss_hash), 675 gve_rss_type(desc->flags_seq)); 676 } 677 678 if (is_last_frag) { 679 skb_record_rx_queue(skb, rx->q_num); 680 if (skb_is_nonlinear(skb)) 681 napi_gro_frags(napi); 682 else 683 napi_gro_receive(napi, skb); 684 goto finish_ok_pkt; 685 } 686 687 goto finish_frag; 688 689 finish_ok_pkt: 690 cnts->ok_pkt_bytes += ctx->total_size; 691 cnts->ok_pkt_cnt++; 692 finish_frag: 693 ctx->frag_cnt++; 694 if (is_last_frag) { 695 cnts->total_pkt_cnt++; 696 cnts->cont_pkt_cnt += (ctx->frag_cnt > 1); 697 gve_rx_ctx_clear(ctx); 698 } 699 } 700 701 bool gve_rx_work_pending(struct gve_rx_ring *rx) 702 { 703 struct gve_rx_desc *desc; 704 __be16 flags_seq; 705 u32 next_idx; 706 707 next_idx = rx->cnt & rx->mask; 708 desc = rx->desc.desc_ring + next_idx; 709 710 flags_seq = desc->flags_seq; 711 712 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 713 } 714 715 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) 716 { 717 int refill_target = rx->mask + 1; 718 u32 fill_cnt = rx->fill_cnt; 719 720 while (fill_cnt - rx->cnt < refill_target) { 721 struct gve_rx_slot_page_info *page_info; 722 u32 idx = fill_cnt & rx->mask; 723 724 page_info = &rx->data.page_info[idx]; 725 if (page_info->can_flip) { 726 /* The other half of the page is free because it was 727 * free when we processed the descriptor. Flip to it. 728 */ 729 union gve_rx_data_slot *data_slot = 730 &rx->data.data_ring[idx]; 731 732 gve_rx_flip_buff(page_info, &data_slot->addr); 733 page_info->can_flip = 0; 734 } else { 735 /* It is possible that the networking stack has already 736 * finished processing all outstanding packets in the buffer 737 * and it can be reused. 738 * Flipping is unnecessary here - if the networking stack still 739 * owns half the page it is impossible to tell which half. Either 740 * the whole page is free or it needs to be replaced. 741 */ 742 int recycle = gve_rx_can_recycle_buffer(page_info); 743 744 if (recycle < 0) { 745 if (!rx->data.raw_addressing) 746 gve_schedule_reset(priv); 747 return false; 748 } 749 if (!recycle) { 750 /* We can't reuse the buffer - alloc a new one*/ 751 union gve_rx_data_slot *data_slot = 752 &rx->data.data_ring[idx]; 753 struct device *dev = &priv->pdev->dev; 754 gve_rx_free_buffer(dev, page_info, data_slot); 755 page_info->page = NULL; 756 if (gve_rx_alloc_buffer(priv, dev, page_info, 757 data_slot)) { 758 u64_stats_update_begin(&rx->statss); 759 rx->rx_buf_alloc_fail++; 760 u64_stats_update_end(&rx->statss); 761 break; 762 } 763 } 764 } 765 fill_cnt++; 766 } 767 rx->fill_cnt = fill_cnt; 768 return true; 769 } 770 771 static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 772 netdev_features_t feat) 773 { 774 struct gve_rx_ctx *ctx = &rx->ctx; 775 struct gve_priv *priv = rx->gve; 776 struct gve_rx_cnts cnts = {0}; 777 struct gve_rx_desc *next_desc; 778 u32 idx = rx->cnt & rx->mask; 779 u32 work_done = 0; 780 781 struct gve_rx_desc *desc = &rx->desc.desc_ring[idx]; 782 783 // Exceed budget only if (and till) the inflight packet is consumed. 784 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 785 (work_done < budget || ctx->frag_cnt)) { 786 next_desc = &rx->desc.desc_ring[(idx + 1) & rx->mask]; 787 prefetch(next_desc); 788 789 gve_rx(rx, feat, desc, idx, &cnts); 790 791 rx->cnt++; 792 idx = rx->cnt & rx->mask; 793 desc = &rx->desc.desc_ring[idx]; 794 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 795 work_done++; 796 } 797 798 // The device will only send whole packets. 799 if (unlikely(ctx->frag_cnt)) { 800 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 801 802 napi_free_frags(napi); 803 gve_rx_ctx_clear(&rx->ctx); 804 netdev_warn(priv->dev, "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset", 805 GVE_SEQNO(desc->flags_seq), rx->desc.seqno); 806 gve_schedule_reset(rx->gve); 807 } 808 809 if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold) 810 return 0; 811 812 if (work_done) { 813 u64_stats_update_begin(&rx->statss); 814 rx->rpackets += cnts.ok_pkt_cnt; 815 rx->rbytes += cnts.ok_pkt_bytes; 816 rx->rx_cont_packet_cnt += cnts.cont_pkt_cnt; 817 rx->rx_desc_err_dropped_pkt += cnts.desc_err_pkt_cnt; 818 u64_stats_update_end(&rx->statss); 819 } 820 821 /* restock ring slots */ 822 if (!rx->data.raw_addressing) { 823 /* In QPL mode buffs are refilled as the desc are processed */ 824 rx->fill_cnt += work_done; 825 } else if (rx->fill_cnt - rx->cnt <= rx->db_threshold) { 826 /* In raw addressing mode buffs are only refilled if the avail 827 * falls below a threshold. 828 */ 829 if (!gve_rx_refill_buffers(priv, rx)) 830 return 0; 831 832 /* If we were not able to completely refill buffers, we'll want 833 * to schedule this queue for work again to refill buffers. 834 */ 835 if (rx->fill_cnt - rx->cnt <= rx->db_threshold) { 836 gve_rx_write_doorbell(priv, rx); 837 return budget; 838 } 839 } 840 841 gve_rx_write_doorbell(priv, rx); 842 return cnts.total_pkt_cnt; 843 } 844 845 int gve_rx_poll(struct gve_notify_block *block, int budget) 846 { 847 struct gve_rx_ring *rx = block->rx; 848 netdev_features_t feat; 849 int work_done = 0; 850 851 feat = block->napi.dev->features; 852 853 /* If budget is 0, do all the work */ 854 if (budget == 0) 855 budget = INT_MAX; 856 857 if (budget > 0) 858 work_done = gve_clean_rx_done(rx, budget, feat); 859 860 return work_done; 861 } 862