1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include <linux/etherdevice.h> 11 12 static void gve_rx_free_buffer(struct device *dev, 13 struct gve_rx_slot_page_info *page_info, 14 union gve_rx_data_slot *data_slot) 15 { 16 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) & 17 GVE_DATA_SLOT_ADDR_PAGE_MASK); 18 19 page_ref_sub(page_info->page, page_info->pagecnt_bias - 1); 20 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); 21 } 22 23 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) 24 { 25 u32 slots = rx->mask + 1; 26 int i; 27 28 if (rx->data.raw_addressing) { 29 for (i = 0; i < slots; i++) 30 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], 31 &rx->data.data_ring[i]); 32 } else { 33 for (i = 0; i < slots; i++) 34 page_ref_sub(rx->data.page_info[i].page, 35 rx->data.page_info[i].pagecnt_bias - 1); 36 gve_unassign_qpl(priv, rx->data.qpl->id); 37 rx->data.qpl = NULL; 38 39 for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { 40 page_ref_sub(rx->qpl_copy_pool[i].page, 41 rx->qpl_copy_pool[i].pagecnt_bias - 1); 42 put_page(rx->qpl_copy_pool[i].page); 43 } 44 } 45 kvfree(rx->data.page_info); 46 rx->data.page_info = NULL; 47 } 48 49 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 50 { 51 struct gve_rx_ring *rx = &priv->rx[idx]; 52 struct device *dev = &priv->pdev->dev; 53 u32 slots = rx->mask + 1; 54 size_t bytes; 55 56 gve_rx_remove_from_block(priv, idx); 57 58 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 59 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 60 rx->desc.desc_ring = NULL; 61 62 dma_free_coherent(dev, sizeof(*rx->q_resources), 63 rx->q_resources, rx->q_resources_bus); 64 rx->q_resources = NULL; 65 66 gve_rx_unfill_pages(priv, rx); 67 68 bytes = sizeof(*rx->data.data_ring) * slots; 69 dma_free_coherent(dev, bytes, rx->data.data_ring, 70 rx->data.data_bus); 71 rx->data.data_ring = NULL; 72 73 kvfree(rx->qpl_copy_pool); 74 rx->qpl_copy_pool = NULL; 75 76 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 77 } 78 79 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 80 dma_addr_t addr, struct page *page, __be64 *slot_addr) 81 { 82 page_info->page = page; 83 page_info->page_offset = 0; 84 page_info->page_address = page_address(page); 85 *slot_addr = cpu_to_be64(addr); 86 /* The page already has 1 ref */ 87 page_ref_add(page, INT_MAX - 1); 88 page_info->pagecnt_bias = INT_MAX; 89 } 90 91 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, 92 struct gve_rx_slot_page_info *page_info, 93 union gve_rx_data_slot *data_slot) 94 { 95 struct page *page; 96 dma_addr_t dma; 97 int err; 98 99 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, 100 GFP_ATOMIC); 101 if (err) 102 return err; 103 104 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); 105 return 0; 106 } 107 108 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 109 { 110 struct gve_priv *priv = rx->gve; 111 u32 slots; 112 int err; 113 int i; 114 int j; 115 116 /* Allocate one page per Rx queue slot. Each page is split into two 117 * packet buffers, when possible we "page flip" between the two. 118 */ 119 slots = rx->mask + 1; 120 121 rx->data.page_info = kvzalloc(slots * 122 sizeof(*rx->data.page_info), GFP_KERNEL); 123 if (!rx->data.page_info) 124 return -ENOMEM; 125 126 if (!rx->data.raw_addressing) { 127 rx->data.qpl = gve_assign_rx_qpl(priv); 128 if (!rx->data.qpl) { 129 kvfree(rx->data.page_info); 130 rx->data.page_info = NULL; 131 return -ENOMEM; 132 } 133 } 134 for (i = 0; i < slots; i++) { 135 if (!rx->data.raw_addressing) { 136 struct page *page = rx->data.qpl->pages[i]; 137 dma_addr_t addr = i * PAGE_SIZE; 138 139 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, 140 &rx->data.data_ring[i].qpl_offset); 141 continue; 142 } 143 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], 144 &rx->data.data_ring[i]); 145 if (err) 146 goto alloc_err; 147 } 148 149 if (!rx->data.raw_addressing) { 150 for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) { 151 struct page *page = alloc_page(GFP_KERNEL); 152 153 if (!page) { 154 err = -ENOMEM; 155 goto alloc_err_qpl; 156 } 157 158 rx->qpl_copy_pool[j].page = page; 159 rx->qpl_copy_pool[j].page_offset = 0; 160 rx->qpl_copy_pool[j].page_address = page_address(page); 161 162 /* The page already has 1 ref. */ 163 page_ref_add(page, INT_MAX - 1); 164 rx->qpl_copy_pool[j].pagecnt_bias = INT_MAX; 165 } 166 } 167 168 return slots; 169 170 alloc_err_qpl: 171 while (j--) { 172 page_ref_sub(rx->qpl_copy_pool[j].page, 173 rx->qpl_copy_pool[j].pagecnt_bias - 1); 174 put_page(rx->qpl_copy_pool[j].page); 175 } 176 alloc_err: 177 while (i--) 178 gve_rx_free_buffer(&priv->pdev->dev, 179 &rx->data.page_info[i], 180 &rx->data.data_ring[i]); 181 return err; 182 } 183 184 static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) 185 { 186 ctx->skb_head = NULL; 187 ctx->skb_tail = NULL; 188 ctx->total_size = 0; 189 ctx->frag_cnt = 0; 190 ctx->drop_pkt = false; 191 } 192 193 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 194 { 195 struct gve_rx_ring *rx = &priv->rx[idx]; 196 struct device *hdev = &priv->pdev->dev; 197 u32 slots, npages; 198 int filled_pages; 199 size_t bytes; 200 int err; 201 202 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 203 /* Make sure everything is zeroed to start with */ 204 memset(rx, 0, sizeof(*rx)); 205 206 rx->gve = priv; 207 rx->q_num = idx; 208 209 slots = priv->rx_data_slot_cnt; 210 rx->mask = slots - 1; 211 rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; 212 213 /* alloc rx data ring */ 214 bytes = sizeof(*rx->data.data_ring) * slots; 215 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 216 &rx->data.data_bus, 217 GFP_KERNEL); 218 if (!rx->data.data_ring) 219 return -ENOMEM; 220 221 rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1; 222 rx->qpl_copy_pool_head = 0; 223 rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1, 224 sizeof(rx->qpl_copy_pool[0]), 225 GFP_KERNEL); 226 227 if (!rx->qpl_copy_pool) { 228 err = -ENOMEM; 229 goto abort_with_slots; 230 } 231 232 filled_pages = gve_prefill_rx_pages(rx); 233 if (filled_pages < 0) { 234 err = -ENOMEM; 235 goto abort_with_copy_pool; 236 } 237 rx->fill_cnt = filled_pages; 238 /* Ensure data ring slots (packet buffers) are visible. */ 239 dma_wmb(); 240 241 /* Alloc gve_queue_resources */ 242 rx->q_resources = 243 dma_alloc_coherent(hdev, 244 sizeof(*rx->q_resources), 245 &rx->q_resources_bus, 246 GFP_KERNEL); 247 if (!rx->q_resources) { 248 err = -ENOMEM; 249 goto abort_filled; 250 } 251 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 252 (unsigned long)rx->data.data_bus); 253 254 /* alloc rx desc ring */ 255 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 256 npages = bytes / PAGE_SIZE; 257 if (npages * PAGE_SIZE != bytes) { 258 err = -EIO; 259 goto abort_with_q_resources; 260 } 261 262 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 263 GFP_KERNEL); 264 if (!rx->desc.desc_ring) { 265 err = -ENOMEM; 266 goto abort_with_q_resources; 267 } 268 rx->cnt = 0; 269 rx->db_threshold = priv->rx_desc_cnt / 2; 270 rx->desc.seqno = 1; 271 272 /* Allocating half-page buffers allows page-flipping which is faster 273 * than copying or allocating new pages. 274 */ 275 rx->packet_buffer_size = PAGE_SIZE / 2; 276 gve_rx_ctx_clear(&rx->ctx); 277 gve_rx_add_to_block(priv, idx); 278 279 return 0; 280 281 abort_with_q_resources: 282 dma_free_coherent(hdev, sizeof(*rx->q_resources), 283 rx->q_resources, rx->q_resources_bus); 284 rx->q_resources = NULL; 285 abort_filled: 286 gve_rx_unfill_pages(priv, rx); 287 abort_with_copy_pool: 288 kvfree(rx->qpl_copy_pool); 289 rx->qpl_copy_pool = NULL; 290 abort_with_slots: 291 bytes = sizeof(*rx->data.data_ring) * slots; 292 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 293 rx->data.data_ring = NULL; 294 295 return err; 296 } 297 298 int gve_rx_alloc_rings(struct gve_priv *priv) 299 { 300 int err = 0; 301 int i; 302 303 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 304 err = gve_rx_alloc_ring(priv, i); 305 if (err) { 306 netif_err(priv, drv, priv->dev, 307 "Failed to alloc rx ring=%d: err=%d\n", 308 i, err); 309 break; 310 } 311 } 312 /* Unallocate if there was an error */ 313 if (err) { 314 int j; 315 316 for (j = 0; j < i; j++) 317 gve_rx_free_ring(priv, j); 318 } 319 return err; 320 } 321 322 void gve_rx_free_rings_gqi(struct gve_priv *priv) 323 { 324 int i; 325 326 for (i = 0; i < priv->rx_cfg.num_queues; i++) 327 gve_rx_free_ring(priv, i); 328 } 329 330 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 331 { 332 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 333 334 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]); 335 } 336 337 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 338 { 339 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 340 return PKT_HASH_TYPE_L4; 341 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 342 return PKT_HASH_TYPE_L3; 343 return PKT_HASH_TYPE_L2; 344 } 345 346 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, 347 struct gve_rx_slot_page_info *page_info, 348 u16 packet_buffer_size, u16 len, 349 struct gve_rx_ctx *ctx) 350 { 351 u32 offset = page_info->page_offset + page_info->pad; 352 struct sk_buff *skb = ctx->skb_tail; 353 int num_frags = 0; 354 355 if (!skb) { 356 skb = napi_get_frags(napi); 357 if (unlikely(!skb)) 358 return NULL; 359 360 ctx->skb_head = skb; 361 ctx->skb_tail = skb; 362 } else { 363 num_frags = skb_shinfo(ctx->skb_tail)->nr_frags; 364 if (num_frags == MAX_SKB_FRAGS) { 365 skb = napi_alloc_skb(napi, 0); 366 if (!skb) 367 return NULL; 368 369 // We will never chain more than two SKBs: 2 * 16 * 2k > 64k 370 // which is why we do not need to chain by using skb->next 371 skb_shinfo(ctx->skb_tail)->frag_list = skb; 372 373 ctx->skb_tail = skb; 374 num_frags = 0; 375 } 376 } 377 378 if (skb != ctx->skb_head) { 379 ctx->skb_head->len += len; 380 ctx->skb_head->data_len += len; 381 ctx->skb_head->truesize += packet_buffer_size; 382 } 383 skb_add_rx_frag(skb, num_frags, page_info->page, 384 offset, len, packet_buffer_size); 385 386 return ctx->skb_head; 387 } 388 389 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 390 { 391 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2); 392 393 /* "flip" to other packet buffer on this page */ 394 page_info->page_offset ^= PAGE_SIZE / 2; 395 *(slot_addr) ^= offset; 396 } 397 398 static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info) 399 { 400 int pagecount = page_count(page_info->page); 401 402 /* This page is not being used by any SKBs - reuse */ 403 if (pagecount == page_info->pagecnt_bias) 404 return 1; 405 /* This page is still being used by an SKB - we can't reuse */ 406 else if (pagecount > page_info->pagecnt_bias) 407 return 0; 408 WARN(pagecount < page_info->pagecnt_bias, 409 "Pagecount should never be less than the bias."); 410 return -1; 411 } 412 413 static struct sk_buff * 414 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, 415 struct gve_rx_slot_page_info *page_info, u16 len, 416 struct napi_struct *napi, 417 union gve_rx_data_slot *data_slot, 418 u16 packet_buffer_size, struct gve_rx_ctx *ctx) 419 { 420 struct sk_buff *skb = gve_rx_add_frags(napi, page_info, packet_buffer_size, len, ctx); 421 422 if (!skb) 423 return NULL; 424 425 /* Optimistically stop the kernel from freeing the page. 426 * We will check again in refill to determine if we need to alloc a 427 * new page. 428 */ 429 gve_dec_pagecnt_bias(page_info); 430 431 return skb; 432 } 433 434 static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, 435 struct gve_rx_slot_page_info *page_info, 436 u16 len, struct napi_struct *napi) 437 { 438 u32 pool_idx = rx->qpl_copy_pool_head & rx->qpl_copy_pool_mask; 439 void *src = page_info->page_address + page_info->page_offset; 440 struct gve_rx_slot_page_info *copy_page_info; 441 struct gve_rx_ctx *ctx = &rx->ctx; 442 bool alloc_page = false; 443 struct sk_buff *skb; 444 void *dst; 445 446 copy_page_info = &rx->qpl_copy_pool[pool_idx]; 447 if (!copy_page_info->can_flip) { 448 int recycle = gve_rx_can_recycle_buffer(copy_page_info); 449 450 if (unlikely(recycle < 0)) { 451 gve_schedule_reset(rx->gve); 452 return NULL; 453 } 454 alloc_page = !recycle; 455 } 456 457 if (alloc_page) { 458 struct gve_rx_slot_page_info alloc_page_info; 459 struct page *page; 460 461 /* The least recently used page turned out to be 462 * still in use by the kernel. Ignoring it and moving 463 * on alleviates head-of-line blocking. 464 */ 465 rx->qpl_copy_pool_head++; 466 467 page = alloc_page(GFP_ATOMIC); 468 if (!page) 469 return NULL; 470 471 alloc_page_info.page = page; 472 alloc_page_info.page_offset = 0; 473 alloc_page_info.page_address = page_address(page); 474 alloc_page_info.pad = page_info->pad; 475 476 memcpy(alloc_page_info.page_address, src, page_info->pad + len); 477 skb = gve_rx_add_frags(napi, &alloc_page_info, 478 rx->packet_buffer_size, 479 len, ctx); 480 481 u64_stats_update_begin(&rx->statss); 482 rx->rx_frag_copy_cnt++; 483 rx->rx_frag_alloc_cnt++; 484 u64_stats_update_end(&rx->statss); 485 486 return skb; 487 } 488 489 dst = copy_page_info->page_address + copy_page_info->page_offset; 490 memcpy(dst, src, page_info->pad + len); 491 copy_page_info->pad = page_info->pad; 492 493 skb = gve_rx_add_frags(napi, copy_page_info, 494 rx->packet_buffer_size, len, ctx); 495 if (unlikely(!skb)) 496 return NULL; 497 498 gve_dec_pagecnt_bias(copy_page_info); 499 copy_page_info->page_offset += rx->packet_buffer_size; 500 copy_page_info->page_offset &= (PAGE_SIZE - 1); 501 502 if (copy_page_info->can_flip) { 503 /* We have used both halves of this copy page, it 504 * is time for it to go to the back of the queue. 505 */ 506 copy_page_info->can_flip = false; 507 rx->qpl_copy_pool_head++; 508 prefetch(rx->qpl_copy_pool[rx->qpl_copy_pool_head & rx->qpl_copy_pool_mask].page); 509 } else { 510 copy_page_info->can_flip = true; 511 } 512 513 u64_stats_update_begin(&rx->statss); 514 rx->rx_frag_copy_cnt++; 515 u64_stats_update_end(&rx->statss); 516 517 return skb; 518 } 519 520 static struct sk_buff * 521 gve_rx_qpl(struct device *dev, struct net_device *netdev, 522 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info, 523 u16 len, struct napi_struct *napi, 524 union gve_rx_data_slot *data_slot) 525 { 526 struct gve_rx_ctx *ctx = &rx->ctx; 527 struct sk_buff *skb; 528 529 /* if raw_addressing mode is not enabled gvnic can only receive into 530 * registered segments. If the buffer can't be recycled, our only 531 * choice is to copy the data out of it so that we can return it to the 532 * device. 533 */ 534 if (page_info->can_flip) { 535 skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); 536 /* No point in recycling if we didn't get the skb */ 537 if (skb) { 538 /* Make sure that the page isn't freed. */ 539 gve_dec_pagecnt_bias(page_info); 540 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 541 } 542 } else { 543 skb = gve_rx_copy_to_pool(rx, page_info, len, napi); 544 } 545 return skb; 546 } 547 548 static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, 549 struct gve_rx_slot_page_info *page_info, struct napi_struct *napi, 550 u16 len, union gve_rx_data_slot *data_slot, 551 bool is_only_frag) 552 { 553 struct net_device *netdev = priv->dev; 554 struct gve_rx_ctx *ctx = &rx->ctx; 555 struct sk_buff *skb = NULL; 556 557 if (len <= priv->rx_copybreak && is_only_frag) { 558 /* Just copy small packets */ 559 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); 560 if (skb) { 561 u64_stats_update_begin(&rx->statss); 562 rx->rx_copied_pkt++; 563 rx->rx_frag_copy_cnt++; 564 rx->rx_copybreak_pkt++; 565 u64_stats_update_end(&rx->statss); 566 } 567 } else { 568 int recycle = gve_rx_can_recycle_buffer(page_info); 569 570 if (unlikely(recycle < 0)) { 571 gve_schedule_reset(priv); 572 return NULL; 573 } 574 page_info->can_flip = recycle; 575 if (page_info->can_flip) { 576 u64_stats_update_begin(&rx->statss); 577 rx->rx_frag_flip_cnt++; 578 u64_stats_update_end(&rx->statss); 579 } 580 581 if (rx->data.raw_addressing) { 582 skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, 583 page_info, len, napi, 584 data_slot, 585 rx->packet_buffer_size, ctx); 586 } else { 587 skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, 588 page_info, len, napi, data_slot); 589 } 590 } 591 return skb; 592 } 593 594 #define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) 595 static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, 596 struct gve_rx_desc *desc, u32 idx, 597 struct gve_rx_cnts *cnts) 598 { 599 bool is_last_frag = !GVE_PKTCONT_BIT_IS_SET(desc->flags_seq); 600 struct gve_rx_slot_page_info *page_info; 601 u16 frag_size = be16_to_cpu(desc->len); 602 struct gve_rx_ctx *ctx = &rx->ctx; 603 union gve_rx_data_slot *data_slot; 604 struct gve_priv *priv = rx->gve; 605 struct sk_buff *skb = NULL; 606 dma_addr_t page_bus; 607 void *va; 608 609 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 610 bool is_first_frag = ctx->frag_cnt == 0; 611 612 bool is_only_frag = is_first_frag && is_last_frag; 613 614 if (unlikely(ctx->drop_pkt)) 615 goto finish_frag; 616 617 if (desc->flags_seq & GVE_RXF_ERR) { 618 ctx->drop_pkt = true; 619 cnts->desc_err_pkt_cnt++; 620 napi_free_frags(napi); 621 goto finish_frag; 622 } 623 624 if (unlikely(frag_size > rx->packet_buffer_size)) { 625 netdev_warn(priv->dev, "Unexpected frag size %d, can't exceed %d, scheduling reset", 626 frag_size, rx->packet_buffer_size); 627 ctx->drop_pkt = true; 628 napi_free_frags(napi); 629 gve_schedule_reset(rx->gve); 630 goto finish_frag; 631 } 632 633 /* Prefetch two packet buffers ahead, we will need it soon. */ 634 page_info = &rx->data.page_info[(idx + 2) & rx->mask]; 635 va = page_info->page_address + page_info->page_offset; 636 prefetch(page_info->page); /* Kernel page struct. */ 637 prefetch(va); /* Packet header. */ 638 prefetch(va + 64); /* Next cacheline too. */ 639 640 page_info = &rx->data.page_info[idx]; 641 data_slot = &rx->data.data_ring[idx]; 642 page_bus = (rx->data.raw_addressing) ? 643 be64_to_cpu(data_slot->addr) - page_info->page_offset : 644 rx->data.qpl->page_buses[idx]; 645 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, 646 PAGE_SIZE, DMA_FROM_DEVICE); 647 page_info->pad = is_first_frag ? GVE_RX_PAD : 0; 648 frag_size -= page_info->pad; 649 650 skb = gve_rx_skb(priv, rx, page_info, napi, frag_size, 651 data_slot, is_only_frag); 652 if (!skb) { 653 u64_stats_update_begin(&rx->statss); 654 rx->rx_skb_alloc_fail++; 655 u64_stats_update_end(&rx->statss); 656 657 napi_free_frags(napi); 658 ctx->drop_pkt = true; 659 goto finish_frag; 660 } 661 ctx->total_size += frag_size; 662 663 if (is_first_frag) { 664 if (likely(feat & NETIF_F_RXCSUM)) { 665 /* NIC passes up the partial sum */ 666 if (desc->csum) 667 skb->ip_summed = CHECKSUM_COMPLETE; 668 else 669 skb->ip_summed = CHECKSUM_NONE; 670 skb->csum = csum_unfold(desc->csum); 671 } 672 673 /* parse flags & pass relevant info up */ 674 if (likely(feat & NETIF_F_RXHASH) && 675 gve_needs_rss(desc->flags_seq)) 676 skb_set_hash(skb, be32_to_cpu(desc->rss_hash), 677 gve_rss_type(desc->flags_seq)); 678 } 679 680 if (is_last_frag) { 681 skb_record_rx_queue(skb, rx->q_num); 682 if (skb_is_nonlinear(skb)) 683 napi_gro_frags(napi); 684 else 685 napi_gro_receive(napi, skb); 686 goto finish_ok_pkt; 687 } 688 689 goto finish_frag; 690 691 finish_ok_pkt: 692 cnts->ok_pkt_bytes += ctx->total_size; 693 cnts->ok_pkt_cnt++; 694 finish_frag: 695 ctx->frag_cnt++; 696 if (is_last_frag) { 697 cnts->total_pkt_cnt++; 698 cnts->cont_pkt_cnt += (ctx->frag_cnt > 1); 699 gve_rx_ctx_clear(ctx); 700 } 701 } 702 703 bool gve_rx_work_pending(struct gve_rx_ring *rx) 704 { 705 struct gve_rx_desc *desc; 706 __be16 flags_seq; 707 u32 next_idx; 708 709 next_idx = rx->cnt & rx->mask; 710 desc = rx->desc.desc_ring + next_idx; 711 712 flags_seq = desc->flags_seq; 713 714 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 715 } 716 717 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) 718 { 719 int refill_target = rx->mask + 1; 720 u32 fill_cnt = rx->fill_cnt; 721 722 while (fill_cnt - rx->cnt < refill_target) { 723 struct gve_rx_slot_page_info *page_info; 724 u32 idx = fill_cnt & rx->mask; 725 726 page_info = &rx->data.page_info[idx]; 727 if (page_info->can_flip) { 728 /* The other half of the page is free because it was 729 * free when we processed the descriptor. Flip to it. 730 */ 731 union gve_rx_data_slot *data_slot = 732 &rx->data.data_ring[idx]; 733 734 gve_rx_flip_buff(page_info, &data_slot->addr); 735 page_info->can_flip = 0; 736 } else { 737 /* It is possible that the networking stack has already 738 * finished processing all outstanding packets in the buffer 739 * and it can be reused. 740 * Flipping is unnecessary here - if the networking stack still 741 * owns half the page it is impossible to tell which half. Either 742 * the whole page is free or it needs to be replaced. 743 */ 744 int recycle = gve_rx_can_recycle_buffer(page_info); 745 746 if (recycle < 0) { 747 if (!rx->data.raw_addressing) 748 gve_schedule_reset(priv); 749 return false; 750 } 751 if (!recycle) { 752 /* We can't reuse the buffer - alloc a new one*/ 753 union gve_rx_data_slot *data_slot = 754 &rx->data.data_ring[idx]; 755 struct device *dev = &priv->pdev->dev; 756 gve_rx_free_buffer(dev, page_info, data_slot); 757 page_info->page = NULL; 758 if (gve_rx_alloc_buffer(priv, dev, page_info, 759 data_slot)) { 760 u64_stats_update_begin(&rx->statss); 761 rx->rx_buf_alloc_fail++; 762 u64_stats_update_end(&rx->statss); 763 break; 764 } 765 } 766 } 767 fill_cnt++; 768 } 769 rx->fill_cnt = fill_cnt; 770 return true; 771 } 772 773 static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 774 netdev_features_t feat) 775 { 776 struct gve_rx_ctx *ctx = &rx->ctx; 777 struct gve_priv *priv = rx->gve; 778 struct gve_rx_cnts cnts = {0}; 779 struct gve_rx_desc *next_desc; 780 u32 idx = rx->cnt & rx->mask; 781 u32 work_done = 0; 782 783 struct gve_rx_desc *desc = &rx->desc.desc_ring[idx]; 784 785 // Exceed budget only if (and till) the inflight packet is consumed. 786 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 787 (work_done < budget || ctx->frag_cnt)) { 788 next_desc = &rx->desc.desc_ring[(idx + 1) & rx->mask]; 789 prefetch(next_desc); 790 791 gve_rx(rx, feat, desc, idx, &cnts); 792 793 rx->cnt++; 794 idx = rx->cnt & rx->mask; 795 desc = &rx->desc.desc_ring[idx]; 796 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 797 work_done++; 798 } 799 800 // The device will only send whole packets. 801 if (unlikely(ctx->frag_cnt)) { 802 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 803 804 napi_free_frags(napi); 805 gve_rx_ctx_clear(&rx->ctx); 806 netdev_warn(priv->dev, "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset", 807 GVE_SEQNO(desc->flags_seq), rx->desc.seqno); 808 gve_schedule_reset(rx->gve); 809 } 810 811 if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold) 812 return 0; 813 814 if (work_done) { 815 u64_stats_update_begin(&rx->statss); 816 rx->rpackets += cnts.ok_pkt_cnt; 817 rx->rbytes += cnts.ok_pkt_bytes; 818 rx->rx_cont_packet_cnt += cnts.cont_pkt_cnt; 819 rx->rx_desc_err_dropped_pkt += cnts.desc_err_pkt_cnt; 820 u64_stats_update_end(&rx->statss); 821 } 822 823 /* restock ring slots */ 824 if (!rx->data.raw_addressing) { 825 /* In QPL mode buffs are refilled as the desc are processed */ 826 rx->fill_cnt += work_done; 827 } else if (rx->fill_cnt - rx->cnt <= rx->db_threshold) { 828 /* In raw addressing mode buffs are only refilled if the avail 829 * falls below a threshold. 830 */ 831 if (!gve_rx_refill_buffers(priv, rx)) 832 return 0; 833 834 /* If we were not able to completely refill buffers, we'll want 835 * to schedule this queue for work again to refill buffers. 836 */ 837 if (rx->fill_cnt - rx->cnt <= rx->db_threshold) { 838 gve_rx_write_doorbell(priv, rx); 839 return budget; 840 } 841 } 842 843 gve_rx_write_doorbell(priv, rx); 844 return cnts.total_pkt_cnt; 845 } 846 847 int gve_rx_poll(struct gve_notify_block *block, int budget) 848 { 849 struct gve_rx_ring *rx = block->rx; 850 netdev_features_t feat; 851 int work_done = 0; 852 853 feat = block->napi.dev->features; 854 855 /* If budget is 0, do all the work */ 856 if (budget == 0) 857 budget = INT_MAX; 858 859 if (budget > 0) 860 work_done = gve_clean_rx_done(rx, budget, feat); 861 862 return work_done; 863 } 864