1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include <linux/etherdevice.h> 11 12 static void gve_rx_free_buffer(struct device *dev, 13 struct gve_rx_slot_page_info *page_info, 14 union gve_rx_data_slot *data_slot) 15 { 16 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) & 17 GVE_DATA_SLOT_ADDR_PAGE_MASK); 18 19 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); 20 } 21 22 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) 23 { 24 if (rx->data.raw_addressing) { 25 u32 slots = rx->mask + 1; 26 int i; 27 28 for (i = 0; i < slots; i++) 29 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], 30 &rx->data.data_ring[i]); 31 } else { 32 gve_unassign_qpl(priv, rx->data.qpl->id); 33 rx->data.qpl = NULL; 34 } 35 kvfree(rx->data.page_info); 36 rx->data.page_info = NULL; 37 } 38 39 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 40 { 41 struct gve_rx_ring *rx = &priv->rx[idx]; 42 struct device *dev = &priv->pdev->dev; 43 u32 slots = rx->mask + 1; 44 size_t bytes; 45 46 gve_rx_remove_from_block(priv, idx); 47 48 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 49 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 50 rx->desc.desc_ring = NULL; 51 52 dma_free_coherent(dev, sizeof(*rx->q_resources), 53 rx->q_resources, rx->q_resources_bus); 54 rx->q_resources = NULL; 55 56 gve_rx_unfill_pages(priv, rx); 57 58 bytes = sizeof(*rx->data.data_ring) * slots; 59 dma_free_coherent(dev, bytes, rx->data.data_ring, 60 rx->data.data_bus); 61 rx->data.data_ring = NULL; 62 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 63 } 64 65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 66 dma_addr_t addr, struct page *page, __be64 *slot_addr) 67 { 68 page_info->page = page; 69 page_info->page_offset = 0; 70 page_info->page_address = page_address(page); 71 *slot_addr = cpu_to_be64(addr); 72 } 73 74 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, 75 struct gve_rx_slot_page_info *page_info, 76 union gve_rx_data_slot *data_slot) 77 { 78 struct page *page; 79 dma_addr_t dma; 80 int err; 81 82 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE); 83 if (err) 84 return err; 85 86 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); 87 return 0; 88 } 89 90 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 91 { 92 struct gve_priv *priv = rx->gve; 93 u32 slots; 94 int err; 95 int i; 96 97 /* Allocate one page per Rx queue slot. Each page is split into two 98 * packet buffers, when possible we "page flip" between the two. 99 */ 100 slots = rx->mask + 1; 101 102 rx->data.page_info = kvzalloc(slots * 103 sizeof(*rx->data.page_info), GFP_KERNEL); 104 if (!rx->data.page_info) 105 return -ENOMEM; 106 107 if (!rx->data.raw_addressing) 108 rx->data.qpl = gve_assign_rx_qpl(priv); 109 for (i = 0; i < slots; i++) { 110 if (!rx->data.raw_addressing) { 111 struct page *page = rx->data.qpl->pages[i]; 112 dma_addr_t addr = i * PAGE_SIZE; 113 114 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, 115 &rx->data.data_ring[i].qpl_offset); 116 continue; 117 } 118 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], 119 &rx->data.data_ring[i]); 120 if (err) 121 goto alloc_err; 122 } 123 124 return slots; 125 alloc_err: 126 while (i--) 127 gve_rx_free_buffer(&priv->pdev->dev, 128 &rx->data.page_info[i], 129 &rx->data.data_ring[i]); 130 return err; 131 } 132 133 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 134 { 135 struct gve_rx_ring *rx = &priv->rx[idx]; 136 struct device *hdev = &priv->pdev->dev; 137 u32 slots, npages; 138 int filled_pages; 139 size_t bytes; 140 int err; 141 142 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 143 /* Make sure everything is zeroed to start with */ 144 memset(rx, 0, sizeof(*rx)); 145 146 rx->gve = priv; 147 rx->q_num = idx; 148 149 slots = priv->rx_data_slot_cnt; 150 rx->mask = slots - 1; 151 rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; 152 153 /* alloc rx data ring */ 154 bytes = sizeof(*rx->data.data_ring) * slots; 155 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 156 &rx->data.data_bus, 157 GFP_KERNEL); 158 if (!rx->data.data_ring) 159 return -ENOMEM; 160 filled_pages = gve_prefill_rx_pages(rx); 161 if (filled_pages < 0) { 162 err = -ENOMEM; 163 goto abort_with_slots; 164 } 165 rx->fill_cnt = filled_pages; 166 /* Ensure data ring slots (packet buffers) are visible. */ 167 dma_wmb(); 168 169 /* Alloc gve_queue_resources */ 170 rx->q_resources = 171 dma_alloc_coherent(hdev, 172 sizeof(*rx->q_resources), 173 &rx->q_resources_bus, 174 GFP_KERNEL); 175 if (!rx->q_resources) { 176 err = -ENOMEM; 177 goto abort_filled; 178 } 179 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 180 (unsigned long)rx->data.data_bus); 181 182 /* alloc rx desc ring */ 183 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 184 npages = bytes / PAGE_SIZE; 185 if (npages * PAGE_SIZE != bytes) { 186 err = -EIO; 187 goto abort_with_q_resources; 188 } 189 190 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 191 GFP_KERNEL); 192 if (!rx->desc.desc_ring) { 193 err = -ENOMEM; 194 goto abort_with_q_resources; 195 } 196 rx->cnt = 0; 197 rx->db_threshold = priv->rx_desc_cnt / 2; 198 rx->desc.seqno = 1; 199 gve_rx_add_to_block(priv, idx); 200 201 return 0; 202 203 abort_with_q_resources: 204 dma_free_coherent(hdev, sizeof(*rx->q_resources), 205 rx->q_resources, rx->q_resources_bus); 206 rx->q_resources = NULL; 207 abort_filled: 208 gve_rx_unfill_pages(priv, rx); 209 abort_with_slots: 210 bytes = sizeof(*rx->data.data_ring) * slots; 211 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 212 rx->data.data_ring = NULL; 213 214 return err; 215 } 216 217 int gve_rx_alloc_rings(struct gve_priv *priv) 218 { 219 int err = 0; 220 int i; 221 222 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 223 err = gve_rx_alloc_ring(priv, i); 224 if (err) { 225 netif_err(priv, drv, priv->dev, 226 "Failed to alloc rx ring=%d: err=%d\n", 227 i, err); 228 break; 229 } 230 } 231 /* Unallocate if there was an error */ 232 if (err) { 233 int j; 234 235 for (j = 0; j < i; j++) 236 gve_rx_free_ring(priv, j); 237 } 238 return err; 239 } 240 241 void gve_rx_free_rings_gqi(struct gve_priv *priv) 242 { 243 int i; 244 245 for (i = 0; i < priv->rx_cfg.num_queues; i++) 246 gve_rx_free_ring(priv, i); 247 } 248 249 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 250 { 251 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 252 253 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]); 254 } 255 256 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 257 { 258 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 259 return PKT_HASH_TYPE_L4; 260 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 261 return PKT_HASH_TYPE_L3; 262 return PKT_HASH_TYPE_L2; 263 } 264 265 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, 266 struct gve_rx_slot_page_info *page_info, 267 u16 len) 268 { 269 struct sk_buff *skb = napi_get_frags(napi); 270 271 if (unlikely(!skb)) 272 return NULL; 273 274 skb_add_rx_frag(skb, 0, page_info->page, 275 page_info->page_offset + 276 GVE_RX_PAD, len, PAGE_SIZE / 2); 277 278 return skb; 279 } 280 281 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 282 { 283 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2); 284 285 /* "flip" to other packet buffer on this page */ 286 page_info->page_offset ^= PAGE_SIZE / 2; 287 *(slot_addr) ^= offset; 288 } 289 290 static bool gve_rx_can_flip_buffers(struct net_device *netdev) 291 { 292 return PAGE_SIZE == 4096 293 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; 294 } 295 296 static int gve_rx_can_recycle_buffer(struct page *page) 297 { 298 int pagecount = page_count(page); 299 300 /* This page is not being used by any SKBs - reuse */ 301 if (pagecount == 1) 302 return 1; 303 /* This page is still being used by an SKB - we can't reuse */ 304 else if (pagecount >= 2) 305 return 0; 306 WARN(pagecount < 1, "Pagecount should never be < 1"); 307 return -1; 308 } 309 310 static struct sk_buff * 311 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, 312 struct gve_rx_slot_page_info *page_info, u16 len, 313 struct napi_struct *napi, 314 union gve_rx_data_slot *data_slot) 315 { 316 struct sk_buff *skb; 317 318 skb = gve_rx_add_frags(napi, page_info, len); 319 if (!skb) 320 return NULL; 321 322 /* Optimistically stop the kernel from freeing the page by increasing 323 * the page bias. We will check the refcount in refill to determine if 324 * we need to alloc a new page. 325 */ 326 get_page(page_info->page); 327 328 return skb; 329 } 330 331 static struct sk_buff * 332 gve_rx_qpl(struct device *dev, struct net_device *netdev, 333 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info, 334 u16 len, struct napi_struct *napi, 335 union gve_rx_data_slot *data_slot) 336 { 337 struct sk_buff *skb; 338 339 /* if raw_addressing mode is not enabled gvnic can only receive into 340 * registered segments. If the buffer can't be recycled, our only 341 * choice is to copy the data out of it so that we can return it to the 342 * device. 343 */ 344 if (page_info->can_flip) { 345 skb = gve_rx_add_frags(napi, page_info, len); 346 /* No point in recycling if we didn't get the skb */ 347 if (skb) { 348 /* Make sure that the page isn't freed. */ 349 get_page(page_info->page); 350 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 351 } 352 } else { 353 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); 354 if (skb) { 355 u64_stats_update_begin(&rx->statss); 356 rx->rx_copied_pkt++; 357 u64_stats_update_end(&rx->statss); 358 } 359 } 360 return skb; 361 } 362 363 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 364 netdev_features_t feat, u32 idx) 365 { 366 struct gve_rx_slot_page_info *page_info; 367 struct gve_priv *priv = rx->gve; 368 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 369 struct net_device *dev = priv->dev; 370 union gve_rx_data_slot *data_slot; 371 struct sk_buff *skb = NULL; 372 dma_addr_t page_bus; 373 u16 len; 374 375 /* drop this packet */ 376 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) { 377 u64_stats_update_begin(&rx->statss); 378 rx->rx_desc_err_dropped_pkt++; 379 u64_stats_update_end(&rx->statss); 380 return false; 381 } 382 383 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 384 page_info = &rx->data.page_info[idx]; 385 386 data_slot = &rx->data.data_ring[idx]; 387 page_bus = (rx->data.raw_addressing) ? 388 be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK : 389 rx->data.qpl->page_buses[idx]; 390 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, 391 PAGE_SIZE, DMA_FROM_DEVICE); 392 393 if (len <= priv->rx_copybreak) { 394 /* Just copy small packets */ 395 skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD); 396 u64_stats_update_begin(&rx->statss); 397 rx->rx_copied_pkt++; 398 rx->rx_copybreak_pkt++; 399 u64_stats_update_end(&rx->statss); 400 } else { 401 u8 can_flip = gve_rx_can_flip_buffers(dev); 402 int recycle = 0; 403 404 if (can_flip) { 405 recycle = gve_rx_can_recycle_buffer(page_info->page); 406 if (recycle < 0) { 407 if (!rx->data.raw_addressing) 408 gve_schedule_reset(priv); 409 return false; 410 } 411 } 412 413 page_info->can_flip = can_flip && recycle; 414 if (rx->data.raw_addressing) { 415 skb = gve_rx_raw_addressing(&priv->pdev->dev, dev, 416 page_info, len, napi, 417 data_slot); 418 } else { 419 skb = gve_rx_qpl(&priv->pdev->dev, dev, rx, 420 page_info, len, napi, data_slot); 421 } 422 } 423 424 if (!skb) { 425 u64_stats_update_begin(&rx->statss); 426 rx->rx_skb_alloc_fail++; 427 u64_stats_update_end(&rx->statss); 428 return false; 429 } 430 431 if (likely(feat & NETIF_F_RXCSUM)) { 432 /* NIC passes up the partial sum */ 433 if (rx_desc->csum) 434 skb->ip_summed = CHECKSUM_COMPLETE; 435 else 436 skb->ip_summed = CHECKSUM_NONE; 437 skb->csum = csum_unfold(rx_desc->csum); 438 } 439 440 /* parse flags & pass relevant info up */ 441 if (likely(feat & NETIF_F_RXHASH) && 442 gve_needs_rss(rx_desc->flags_seq)) 443 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 444 gve_rss_type(rx_desc->flags_seq)); 445 446 if (skb_is_nonlinear(skb)) 447 napi_gro_frags(napi); 448 else 449 napi_gro_receive(napi, skb); 450 return true; 451 } 452 453 static bool gve_rx_work_pending(struct gve_rx_ring *rx) 454 { 455 struct gve_rx_desc *desc; 456 __be16 flags_seq; 457 u32 next_idx; 458 459 next_idx = rx->cnt & rx->mask; 460 desc = rx->desc.desc_ring + next_idx; 461 462 flags_seq = desc->flags_seq; 463 /* Make sure we have synchronized the seq no with the device */ 464 smp_rmb(); 465 466 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 467 } 468 469 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) 470 { 471 int refill_target = rx->mask + 1; 472 u32 fill_cnt = rx->fill_cnt; 473 474 while (fill_cnt - rx->cnt < refill_target) { 475 struct gve_rx_slot_page_info *page_info; 476 u32 idx = fill_cnt & rx->mask; 477 478 page_info = &rx->data.page_info[idx]; 479 if (page_info->can_flip) { 480 /* The other half of the page is free because it was 481 * free when we processed the descriptor. Flip to it. 482 */ 483 union gve_rx_data_slot *data_slot = 484 &rx->data.data_ring[idx]; 485 486 gve_rx_flip_buff(page_info, &data_slot->addr); 487 page_info->can_flip = 0; 488 } else { 489 /* It is possible that the networking stack has already 490 * finished processing all outstanding packets in the buffer 491 * and it can be reused. 492 * Flipping is unnecessary here - if the networking stack still 493 * owns half the page it is impossible to tell which half. Either 494 * the whole page is free or it needs to be replaced. 495 */ 496 int recycle = gve_rx_can_recycle_buffer(page_info->page); 497 498 if (recycle < 0) { 499 if (!rx->data.raw_addressing) 500 gve_schedule_reset(priv); 501 return false; 502 } 503 if (!recycle) { 504 /* We can't reuse the buffer - alloc a new one*/ 505 union gve_rx_data_slot *data_slot = 506 &rx->data.data_ring[idx]; 507 struct device *dev = &priv->pdev->dev; 508 509 gve_rx_free_buffer(dev, page_info, data_slot); 510 page_info->page = NULL; 511 if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot)) 512 break; 513 } 514 } 515 fill_cnt++; 516 } 517 rx->fill_cnt = fill_cnt; 518 return true; 519 } 520 521 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 522 netdev_features_t feat) 523 { 524 struct gve_priv *priv = rx->gve; 525 u32 work_done = 0, packets = 0; 526 struct gve_rx_desc *desc; 527 u32 cnt = rx->cnt; 528 u32 idx = cnt & rx->mask; 529 u64 bytes = 0; 530 531 desc = rx->desc.desc_ring + idx; 532 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 533 work_done < budget) { 534 bool dropped; 535 536 netif_info(priv, rx_status, priv->dev, 537 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", 538 rx->q_num, idx, desc, desc->flags_seq); 539 netif_info(priv, rx_status, priv->dev, 540 "[%d] seqno=%d rx->desc.seqno=%d\n", 541 rx->q_num, GVE_SEQNO(desc->flags_seq), 542 rx->desc.seqno); 543 dropped = !gve_rx(rx, desc, feat, idx); 544 if (!dropped) { 545 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 546 packets++; 547 } 548 cnt++; 549 idx = cnt & rx->mask; 550 desc = rx->desc.desc_ring + idx; 551 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 552 work_done++; 553 } 554 555 if (!work_done && rx->fill_cnt - cnt > rx->db_threshold) 556 return false; 557 558 u64_stats_update_begin(&rx->statss); 559 rx->rpackets += packets; 560 rx->rbytes += bytes; 561 u64_stats_update_end(&rx->statss); 562 rx->cnt = cnt; 563 564 /* restock ring slots */ 565 if (!rx->data.raw_addressing) { 566 /* In QPL mode buffs are refilled as the desc are processed */ 567 rx->fill_cnt += work_done; 568 } else if (rx->fill_cnt - cnt <= rx->db_threshold) { 569 /* In raw addressing mode buffs are only refilled if the avail 570 * falls below a threshold. 571 */ 572 if (!gve_rx_refill_buffers(priv, rx)) 573 return false; 574 575 /* If we were not able to completely refill buffers, we'll want 576 * to schedule this queue for work again to refill buffers. 577 */ 578 if (rx->fill_cnt - cnt <= rx->db_threshold) { 579 gve_rx_write_doorbell(priv, rx); 580 return true; 581 } 582 } 583 584 gve_rx_write_doorbell(priv, rx); 585 return gve_rx_work_pending(rx); 586 } 587 588 bool gve_rx_poll(struct gve_notify_block *block, int budget) 589 { 590 struct gve_rx_ring *rx = block->rx; 591 netdev_features_t feat; 592 bool repoll = false; 593 594 feat = block->napi.dev->features; 595 596 /* If budget is 0, do all the work */ 597 if (budget == 0) 598 budget = INT_MAX; 599 600 if (budget > 0) 601 repoll |= gve_clean_rx_done(rx, budget, feat); 602 else 603 repoll |= gve_rx_work_pending(rx); 604 return repoll; 605 } 606