1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2019 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include <linux/etherdevice.h> 10 11 static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) 12 { 13 struct gve_notify_block *block = 14 &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; 15 16 block->rx = NULL; 17 } 18 19 static void gve_rx_free_buffer(struct device *dev, 20 struct gve_rx_slot_page_info *page_info, 21 union gve_rx_data_slot *data_slot) 22 { 23 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) & 24 GVE_DATA_SLOT_ADDR_PAGE_MASK); 25 26 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); 27 } 28 29 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) 30 { 31 if (rx->data.raw_addressing) { 32 u32 slots = rx->mask + 1; 33 int i; 34 35 for (i = 0; i < slots; i++) 36 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], 37 &rx->data.data_ring[i]); 38 } else { 39 gve_unassign_qpl(priv, rx->data.qpl->id); 40 rx->data.qpl = NULL; 41 } 42 kvfree(rx->data.page_info); 43 rx->data.page_info = NULL; 44 } 45 46 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 47 { 48 struct gve_rx_ring *rx = &priv->rx[idx]; 49 struct device *dev = &priv->pdev->dev; 50 u32 slots = rx->mask + 1; 51 size_t bytes; 52 53 gve_rx_remove_from_block(priv, idx); 54 55 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 56 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 57 rx->desc.desc_ring = NULL; 58 59 dma_free_coherent(dev, sizeof(*rx->q_resources), 60 rx->q_resources, rx->q_resources_bus); 61 rx->q_resources = NULL; 62 63 gve_rx_unfill_pages(priv, rx); 64 65 bytes = sizeof(*rx->data.data_ring) * slots; 66 dma_free_coherent(dev, bytes, rx->data.data_ring, 67 rx->data.data_bus); 68 rx->data.data_ring = NULL; 69 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 70 } 71 72 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 73 dma_addr_t addr, struct page *page, __be64 *slot_addr) 74 { 75 page_info->page = page; 76 page_info->page_offset = 0; 77 page_info->page_address = page_address(page); 78 *slot_addr = cpu_to_be64(addr); 79 } 80 81 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, 82 struct gve_rx_slot_page_info *page_info, 83 union gve_rx_data_slot *data_slot) 84 { 85 struct page *page; 86 dma_addr_t dma; 87 int err; 88 89 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE); 90 if (err) 91 return err; 92 93 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); 94 return 0; 95 } 96 97 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 98 { 99 struct gve_priv *priv = rx->gve; 100 u32 slots; 101 int err; 102 int i; 103 104 /* Allocate one page per Rx queue slot. Each page is split into two 105 * packet buffers, when possible we "page flip" between the two. 106 */ 107 slots = rx->mask + 1; 108 109 rx->data.page_info = kvzalloc(slots * 110 sizeof(*rx->data.page_info), GFP_KERNEL); 111 if (!rx->data.page_info) 112 return -ENOMEM; 113 114 if (!rx->data.raw_addressing) 115 rx->data.qpl = gve_assign_rx_qpl(priv); 116 for (i = 0; i < slots; i++) { 117 if (!rx->data.raw_addressing) { 118 struct page *page = rx->data.qpl->pages[i]; 119 dma_addr_t addr = i * PAGE_SIZE; 120 121 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, 122 &rx->data.data_ring[i].qpl_offset); 123 continue; 124 } 125 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], 126 &rx->data.data_ring[i]); 127 if (err) 128 goto alloc_err; 129 } 130 131 return slots; 132 alloc_err: 133 while (i--) 134 gve_rx_free_buffer(&priv->pdev->dev, 135 &rx->data.page_info[i], 136 &rx->data.data_ring[i]); 137 return err; 138 } 139 140 static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) 141 { 142 u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx); 143 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 144 struct gve_rx_ring *rx = &priv->rx[queue_idx]; 145 146 block->rx = rx; 147 rx->ntfy_id = ntfy_idx; 148 } 149 150 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 151 { 152 struct gve_rx_ring *rx = &priv->rx[idx]; 153 struct device *hdev = &priv->pdev->dev; 154 u32 slots, npages; 155 int filled_pages; 156 size_t bytes; 157 int err; 158 159 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 160 /* Make sure everything is zeroed to start with */ 161 memset(rx, 0, sizeof(*rx)); 162 163 rx->gve = priv; 164 rx->q_num = idx; 165 166 slots = priv->rx_data_slot_cnt; 167 rx->mask = slots - 1; 168 rx->data.raw_addressing = priv->raw_addressing; 169 170 /* alloc rx data ring */ 171 bytes = sizeof(*rx->data.data_ring) * slots; 172 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 173 &rx->data.data_bus, 174 GFP_KERNEL); 175 if (!rx->data.data_ring) 176 return -ENOMEM; 177 filled_pages = gve_prefill_rx_pages(rx); 178 if (filled_pages < 0) { 179 err = -ENOMEM; 180 goto abort_with_slots; 181 } 182 rx->fill_cnt = filled_pages; 183 /* Ensure data ring slots (packet buffers) are visible. */ 184 dma_wmb(); 185 186 /* Alloc gve_queue_resources */ 187 rx->q_resources = 188 dma_alloc_coherent(hdev, 189 sizeof(*rx->q_resources), 190 &rx->q_resources_bus, 191 GFP_KERNEL); 192 if (!rx->q_resources) { 193 err = -ENOMEM; 194 goto abort_filled; 195 } 196 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 197 (unsigned long)rx->data.data_bus); 198 199 /* alloc rx desc ring */ 200 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 201 npages = bytes / PAGE_SIZE; 202 if (npages * PAGE_SIZE != bytes) { 203 err = -EIO; 204 goto abort_with_q_resources; 205 } 206 207 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 208 GFP_KERNEL); 209 if (!rx->desc.desc_ring) { 210 err = -ENOMEM; 211 goto abort_with_q_resources; 212 } 213 rx->cnt = 0; 214 rx->db_threshold = priv->rx_desc_cnt / 2; 215 rx->desc.seqno = 1; 216 gve_rx_add_to_block(priv, idx); 217 218 return 0; 219 220 abort_with_q_resources: 221 dma_free_coherent(hdev, sizeof(*rx->q_resources), 222 rx->q_resources, rx->q_resources_bus); 223 rx->q_resources = NULL; 224 abort_filled: 225 gve_rx_unfill_pages(priv, rx); 226 abort_with_slots: 227 bytes = sizeof(*rx->data.data_ring) * slots; 228 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 229 rx->data.data_ring = NULL; 230 231 return err; 232 } 233 234 int gve_rx_alloc_rings(struct gve_priv *priv) 235 { 236 int err = 0; 237 int i; 238 239 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 240 err = gve_rx_alloc_ring(priv, i); 241 if (err) { 242 netif_err(priv, drv, priv->dev, 243 "Failed to alloc rx ring=%d: err=%d\n", 244 i, err); 245 break; 246 } 247 } 248 /* Unallocate if there was an error */ 249 if (err) { 250 int j; 251 252 for (j = 0; j < i; j++) 253 gve_rx_free_ring(priv, j); 254 } 255 return err; 256 } 257 258 void gve_rx_free_rings(struct gve_priv *priv) 259 { 260 int i; 261 262 for (i = 0; i < priv->rx_cfg.num_queues; i++) 263 gve_rx_free_ring(priv, i); 264 } 265 266 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 267 { 268 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 269 270 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]); 271 } 272 273 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 274 { 275 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 276 return PKT_HASH_TYPE_L4; 277 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 278 return PKT_HASH_TYPE_L3; 279 return PKT_HASH_TYPE_L2; 280 } 281 282 static struct sk_buff *gve_rx_copy(struct net_device *dev, 283 struct napi_struct *napi, 284 struct gve_rx_slot_page_info *page_info, 285 u16 len) 286 { 287 struct sk_buff *skb = napi_alloc_skb(napi, len); 288 void *va = page_info->page_address + GVE_RX_PAD + 289 (page_info->page_offset ? PAGE_SIZE / 2 : 0); 290 291 if (unlikely(!skb)) 292 return NULL; 293 294 __skb_put(skb, len); 295 296 skb_copy_to_linear_data(skb, va, len); 297 298 skb->protocol = eth_type_trans(skb, dev); 299 300 return skb; 301 } 302 303 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, 304 struct gve_rx_slot_page_info *page_info, 305 u16 len) 306 { 307 struct sk_buff *skb = napi_get_frags(napi); 308 309 if (unlikely(!skb)) 310 return NULL; 311 312 skb_add_rx_frag(skb, 0, page_info->page, 313 (page_info->page_offset ? PAGE_SIZE / 2 : 0) + 314 GVE_RX_PAD, len, PAGE_SIZE / 2); 315 316 return skb; 317 } 318 319 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 320 { 321 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2); 322 323 /* "flip" to other packet buffer on this page */ 324 page_info->page_offset ^= 0x1; 325 *(slot_addr) ^= offset; 326 } 327 328 static bool gve_rx_can_flip_buffers(struct net_device *netdev) 329 { 330 return PAGE_SIZE == 4096 331 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; 332 } 333 334 static int gve_rx_can_recycle_buffer(struct page *page) 335 { 336 int pagecount = page_count(page); 337 338 /* This page is not being used by any SKBs - reuse */ 339 if (pagecount == 1) 340 return 1; 341 /* This page is still being used by an SKB - we can't reuse */ 342 else if (pagecount >= 2) 343 return 0; 344 WARN(pagecount < 1, "Pagecount should never be < 1"); 345 return -1; 346 } 347 348 static struct sk_buff * 349 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, 350 struct gve_rx_slot_page_info *page_info, u16 len, 351 struct napi_struct *napi, 352 union gve_rx_data_slot *data_slot) 353 { 354 struct sk_buff *skb; 355 356 skb = gve_rx_add_frags(napi, page_info, len); 357 if (!skb) 358 return NULL; 359 360 /* Optimistically stop the kernel from freeing the page by increasing 361 * the page bias. We will check the refcount in refill to determine if 362 * we need to alloc a new page. 363 */ 364 get_page(page_info->page); 365 366 return skb; 367 } 368 369 static struct sk_buff * 370 gve_rx_qpl(struct device *dev, struct net_device *netdev, 371 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info, 372 u16 len, struct napi_struct *napi, 373 union gve_rx_data_slot *data_slot) 374 { 375 struct sk_buff *skb; 376 377 /* if raw_addressing mode is not enabled gvnic can only receive into 378 * registered segments. If the buffer can't be recycled, our only 379 * choice is to copy the data out of it so that we can return it to the 380 * device. 381 */ 382 if (page_info->can_flip) { 383 skb = gve_rx_add_frags(napi, page_info, len); 384 /* No point in recycling if we didn't get the skb */ 385 if (skb) { 386 /* Make sure that the page isn't freed. */ 387 get_page(page_info->page); 388 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 389 } 390 } else { 391 skb = gve_rx_copy(netdev, napi, page_info, len); 392 if (skb) { 393 u64_stats_update_begin(&rx->statss); 394 rx->rx_copied_pkt++; 395 u64_stats_update_end(&rx->statss); 396 } 397 } 398 return skb; 399 } 400 401 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 402 netdev_features_t feat, u32 idx) 403 { 404 struct gve_rx_slot_page_info *page_info; 405 struct gve_priv *priv = rx->gve; 406 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 407 struct net_device *dev = priv->dev; 408 union gve_rx_data_slot *data_slot; 409 struct sk_buff *skb = NULL; 410 dma_addr_t page_bus; 411 u16 len; 412 413 /* drop this packet */ 414 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) { 415 u64_stats_update_begin(&rx->statss); 416 rx->rx_desc_err_dropped_pkt++; 417 u64_stats_update_end(&rx->statss); 418 return false; 419 } 420 421 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 422 page_info = &rx->data.page_info[idx]; 423 424 data_slot = &rx->data.data_ring[idx]; 425 page_bus = (rx->data.raw_addressing) ? 426 be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK : 427 rx->data.qpl->page_buses[idx]; 428 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, 429 PAGE_SIZE, DMA_FROM_DEVICE); 430 431 if (len <= priv->rx_copybreak) { 432 /* Just copy small packets */ 433 skb = gve_rx_copy(dev, napi, page_info, len); 434 u64_stats_update_begin(&rx->statss); 435 rx->rx_copied_pkt++; 436 rx->rx_copybreak_pkt++; 437 u64_stats_update_end(&rx->statss); 438 } else { 439 u8 can_flip = gve_rx_can_flip_buffers(dev); 440 int recycle = 0; 441 442 if (can_flip) { 443 recycle = gve_rx_can_recycle_buffer(page_info->page); 444 if (recycle < 0) { 445 if (!rx->data.raw_addressing) 446 gve_schedule_reset(priv); 447 return false; 448 } 449 } 450 451 page_info->can_flip = can_flip && recycle; 452 if (rx->data.raw_addressing) { 453 skb = gve_rx_raw_addressing(&priv->pdev->dev, dev, 454 page_info, len, napi, 455 data_slot); 456 } else { 457 skb = gve_rx_qpl(&priv->pdev->dev, dev, rx, 458 page_info, len, napi, data_slot); 459 } 460 } 461 462 if (!skb) { 463 u64_stats_update_begin(&rx->statss); 464 rx->rx_skb_alloc_fail++; 465 u64_stats_update_end(&rx->statss); 466 return false; 467 } 468 469 if (likely(feat & NETIF_F_RXCSUM)) { 470 /* NIC passes up the partial sum */ 471 if (rx_desc->csum) 472 skb->ip_summed = CHECKSUM_COMPLETE; 473 else 474 skb->ip_summed = CHECKSUM_NONE; 475 skb->csum = csum_unfold(rx_desc->csum); 476 } 477 478 /* parse flags & pass relevant info up */ 479 if (likely(feat & NETIF_F_RXHASH) && 480 gve_needs_rss(rx_desc->flags_seq)) 481 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 482 gve_rss_type(rx_desc->flags_seq)); 483 484 if (skb_is_nonlinear(skb)) 485 napi_gro_frags(napi); 486 else 487 napi_gro_receive(napi, skb); 488 return true; 489 } 490 491 static bool gve_rx_work_pending(struct gve_rx_ring *rx) 492 { 493 struct gve_rx_desc *desc; 494 __be16 flags_seq; 495 u32 next_idx; 496 497 next_idx = rx->cnt & rx->mask; 498 desc = rx->desc.desc_ring + next_idx; 499 500 flags_seq = desc->flags_seq; 501 /* Make sure we have synchronized the seq no with the device */ 502 smp_rmb(); 503 504 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 505 } 506 507 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) 508 { 509 int refill_target = rx->mask + 1; 510 u32 fill_cnt = rx->fill_cnt; 511 512 while (fill_cnt - rx->cnt < refill_target) { 513 struct gve_rx_slot_page_info *page_info; 514 u32 idx = fill_cnt & rx->mask; 515 516 page_info = &rx->data.page_info[idx]; 517 if (page_info->can_flip) { 518 /* The other half of the page is free because it was 519 * free when we processed the descriptor. Flip to it. 520 */ 521 union gve_rx_data_slot *data_slot = 522 &rx->data.data_ring[idx]; 523 524 gve_rx_flip_buff(page_info, &data_slot->addr); 525 page_info->can_flip = 0; 526 } else { 527 /* It is possible that the networking stack has already 528 * finished processing all outstanding packets in the buffer 529 * and it can be reused. 530 * Flipping is unnecessary here - if the networking stack still 531 * owns half the page it is impossible to tell which half. Either 532 * the whole page is free or it needs to be replaced. 533 */ 534 int recycle = gve_rx_can_recycle_buffer(page_info->page); 535 536 if (recycle < 0) { 537 if (!rx->data.raw_addressing) 538 gve_schedule_reset(priv); 539 return false; 540 } 541 if (!recycle) { 542 /* We can't reuse the buffer - alloc a new one*/ 543 union gve_rx_data_slot *data_slot = 544 &rx->data.data_ring[idx]; 545 struct device *dev = &priv->pdev->dev; 546 547 gve_rx_free_buffer(dev, page_info, data_slot); 548 page_info->page = NULL; 549 if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot)) 550 break; 551 } 552 } 553 fill_cnt++; 554 } 555 rx->fill_cnt = fill_cnt; 556 return true; 557 } 558 559 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 560 netdev_features_t feat) 561 { 562 struct gve_priv *priv = rx->gve; 563 u32 work_done = 0, packets = 0; 564 struct gve_rx_desc *desc; 565 u32 cnt = rx->cnt; 566 u32 idx = cnt & rx->mask; 567 u64 bytes = 0; 568 569 desc = rx->desc.desc_ring + idx; 570 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 571 work_done < budget) { 572 bool dropped; 573 574 netif_info(priv, rx_status, priv->dev, 575 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", 576 rx->q_num, idx, desc, desc->flags_seq); 577 netif_info(priv, rx_status, priv->dev, 578 "[%d] seqno=%d rx->desc.seqno=%d\n", 579 rx->q_num, GVE_SEQNO(desc->flags_seq), 580 rx->desc.seqno); 581 dropped = !gve_rx(rx, desc, feat, idx); 582 if (!dropped) { 583 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 584 packets++; 585 } 586 cnt++; 587 idx = cnt & rx->mask; 588 desc = rx->desc.desc_ring + idx; 589 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 590 work_done++; 591 } 592 593 if (!work_done && rx->fill_cnt - cnt > rx->db_threshold) 594 return false; 595 596 u64_stats_update_begin(&rx->statss); 597 rx->rpackets += packets; 598 rx->rbytes += bytes; 599 u64_stats_update_end(&rx->statss); 600 rx->cnt = cnt; 601 602 /* restock ring slots */ 603 if (!rx->data.raw_addressing) { 604 /* In QPL mode buffs are refilled as the desc are processed */ 605 rx->fill_cnt += work_done; 606 } else if (rx->fill_cnt - cnt <= rx->db_threshold) { 607 /* In raw addressing mode buffs are only refilled if the avail 608 * falls below a threshold. 609 */ 610 if (!gve_rx_refill_buffers(priv, rx)) 611 return false; 612 613 /* If we were not able to completely refill buffers, we'll want 614 * to schedule this queue for work again to refill buffers. 615 */ 616 if (rx->fill_cnt - cnt <= rx->db_threshold) { 617 gve_rx_write_doorbell(priv, rx); 618 return true; 619 } 620 } 621 622 gve_rx_write_doorbell(priv, rx); 623 return gve_rx_work_pending(rx); 624 } 625 626 bool gve_rx_poll(struct gve_notify_block *block, int budget) 627 { 628 struct gve_rx_ring *rx = block->rx; 629 netdev_features_t feat; 630 bool repoll = false; 631 632 feat = block->napi.dev->features; 633 634 /* If budget is 0, do all the work */ 635 if (budget == 0) 636 budget = INT_MAX; 637 638 if (budget > 0) 639 repoll |= gve_clean_rx_done(rx, budget, feat); 640 else 641 repoll |= gve_rx_work_pending(rx); 642 return repoll; 643 } 644