1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_dqo.h" 9 #include "gve_adminq.h" 10 #include "gve_utils.h" 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/skbuff.h> 14 #include <linux/slab.h> 15 #include <net/ip6_checksum.h> 16 #include <net/ipv6.h> 17 #include <net/tcp.h> 18 19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) 20 { 21 return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; 22 } 23 24 static void gve_free_page_dqo(struct gve_priv *priv, 25 struct gve_rx_buf_state_dqo *bs, 26 bool free_page) 27 { 28 page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); 29 if (free_page) 30 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, 31 DMA_FROM_DEVICE); 32 bs->page_info.page = NULL; 33 } 34 35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) 36 { 37 struct gve_rx_buf_state_dqo *buf_state; 38 s16 buffer_id; 39 40 buffer_id = rx->dqo.free_buf_states; 41 if (unlikely(buffer_id == -1)) 42 return NULL; 43 44 buf_state = &rx->dqo.buf_states[buffer_id]; 45 46 /* Remove buf_state from free list */ 47 rx->dqo.free_buf_states = buf_state->next; 48 49 /* Point buf_state to itself to mark it as allocated */ 50 buf_state->next = buffer_id; 51 52 return buf_state; 53 } 54 55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, 56 struct gve_rx_buf_state_dqo *buf_state) 57 { 58 s16 buffer_id = buf_state - rx->dqo.buf_states; 59 60 return buf_state->next == buffer_id; 61 } 62 63 static void gve_free_buf_state(struct gve_rx_ring *rx, 64 struct gve_rx_buf_state_dqo *buf_state) 65 { 66 s16 buffer_id = buf_state - rx->dqo.buf_states; 67 68 buf_state->next = rx->dqo.free_buf_states; 69 rx->dqo.free_buf_states = buffer_id; 70 } 71 72 static struct gve_rx_buf_state_dqo * 73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) 74 { 75 struct gve_rx_buf_state_dqo *buf_state; 76 s16 buffer_id; 77 78 buffer_id = list->head; 79 if (unlikely(buffer_id == -1)) 80 return NULL; 81 82 buf_state = &rx->dqo.buf_states[buffer_id]; 83 84 /* Remove buf_state from list */ 85 list->head = buf_state->next; 86 if (buf_state->next == -1) 87 list->tail = -1; 88 89 /* Point buf_state to itself to mark it as allocated */ 90 buf_state->next = buffer_id; 91 92 return buf_state; 93 } 94 95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx, 96 struct gve_index_list *list, 97 struct gve_rx_buf_state_dqo *buf_state) 98 { 99 s16 buffer_id = buf_state - rx->dqo.buf_states; 100 101 buf_state->next = -1; 102 103 if (list->head == -1) { 104 list->head = buffer_id; 105 list->tail = buffer_id; 106 } else { 107 int tail = list->tail; 108 109 rx->dqo.buf_states[tail].next = buffer_id; 110 list->tail = buffer_id; 111 } 112 } 113 114 static struct gve_rx_buf_state_dqo * 115 gve_get_recycled_buf_state(struct gve_rx_ring *rx) 116 { 117 struct gve_rx_buf_state_dqo *buf_state; 118 int i; 119 120 /* Recycled buf states are immediately usable. */ 121 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); 122 if (likely(buf_state)) 123 return buf_state; 124 125 if (unlikely(rx->dqo.used_buf_states.head == -1)) 126 return NULL; 127 128 /* Used buf states are only usable when ref count reaches 0, which means 129 * no SKBs refer to them. 130 * 131 * Search a limited number before giving up. 132 */ 133 for (i = 0; i < 5; i++) { 134 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); 135 if (gve_buf_ref_cnt(buf_state) == 0) { 136 rx->dqo.used_buf_states_cnt--; 137 return buf_state; 138 } 139 140 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); 141 } 142 143 /* For QPL, we cannot allocate any new buffers and must 144 * wait for the existing ones to be available. 145 */ 146 if (rx->dqo.qpl) 147 return NULL; 148 149 /* If there are no free buf states discard an entry from 150 * `used_buf_states` so it can be used. 151 */ 152 if (unlikely(rx->dqo.free_buf_states == -1)) { 153 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); 154 if (gve_buf_ref_cnt(buf_state) == 0) 155 return buf_state; 156 157 gve_free_page_dqo(rx->gve, buf_state, true); 158 gve_free_buf_state(rx, buf_state); 159 } 160 161 return NULL; 162 } 163 164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx, 165 struct gve_rx_buf_state_dqo *buf_state) 166 { 167 struct gve_priv *priv = rx->gve; 168 u32 idx; 169 170 if (!rx->dqo.qpl) { 171 int err; 172 173 err = gve_alloc_page(priv, &priv->pdev->dev, 174 &buf_state->page_info.page, 175 &buf_state->addr, 176 DMA_FROM_DEVICE, GFP_ATOMIC); 177 if (err) 178 return err; 179 } else { 180 idx = rx->dqo.next_qpl_page_idx; 181 if (idx >= priv->rx_pages_per_qpl) { 182 net_err_ratelimited("%s: Out of QPL pages\n", 183 priv->dev->name); 184 return -ENOMEM; 185 } 186 buf_state->page_info.page = rx->dqo.qpl->pages[idx]; 187 buf_state->addr = rx->dqo.qpl->page_buses[idx]; 188 rx->dqo.next_qpl_page_idx++; 189 } 190 buf_state->page_info.page_offset = 0; 191 buf_state->page_info.page_address = 192 page_address(buf_state->page_info.page); 193 buf_state->last_single_ref_offset = 0; 194 195 /* The page already has 1 ref. */ 196 page_ref_add(buf_state->page_info.page, INT_MAX - 1); 197 buf_state->page_info.pagecnt_bias = INT_MAX; 198 199 return 0; 200 } 201 202 static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) 203 { 204 struct gve_rx_ring *rx = &priv->rx[idx]; 205 struct device *hdev = &priv->pdev->dev; 206 size_t completion_queue_slots; 207 size_t buffer_queue_slots; 208 size_t size; 209 int i; 210 211 completion_queue_slots = rx->dqo.complq.mask + 1; 212 buffer_queue_slots = rx->dqo.bufq.mask + 1; 213 214 gve_rx_remove_from_block(priv, idx); 215 216 if (rx->q_resources) { 217 dma_free_coherent(hdev, sizeof(*rx->q_resources), 218 rx->q_resources, rx->q_resources_bus); 219 rx->q_resources = NULL; 220 } 221 222 for (i = 0; i < rx->dqo.num_buf_states; i++) { 223 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 224 /* Only free page for RDA. QPL pages are freed in gve_main. */ 225 if (bs->page_info.page) 226 gve_free_page_dqo(priv, bs, !rx->dqo.qpl); 227 } 228 if (rx->dqo.qpl) { 229 gve_unassign_qpl(priv, rx->dqo.qpl->id); 230 rx->dqo.qpl = NULL; 231 } 232 233 if (rx->dqo.bufq.desc_ring) { 234 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 235 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring, 236 rx->dqo.bufq.bus); 237 rx->dqo.bufq.desc_ring = NULL; 238 } 239 240 if (rx->dqo.complq.desc_ring) { 241 size = sizeof(rx->dqo.complq.desc_ring[0]) * 242 completion_queue_slots; 243 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring, 244 rx->dqo.complq.bus); 245 rx->dqo.complq.desc_ring = NULL; 246 } 247 248 kvfree(rx->dqo.buf_states); 249 rx->dqo.buf_states = NULL; 250 251 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 252 } 253 254 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) 255 { 256 struct gve_rx_ring *rx = &priv->rx[idx]; 257 struct device *hdev = &priv->pdev->dev; 258 size_t size; 259 int i; 260 261 const u32 buffer_queue_slots = 262 priv->queue_format == GVE_DQO_RDA_FORMAT ? 263 priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt; 264 const u32 completion_queue_slots = priv->rx_desc_cnt; 265 266 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); 267 268 memset(rx, 0, sizeof(*rx)); 269 rx->gve = priv; 270 rx->q_num = idx; 271 rx->dqo.bufq.mask = buffer_queue_slots - 1; 272 rx->dqo.complq.num_free_slots = completion_queue_slots; 273 rx->dqo.complq.mask = completion_queue_slots - 1; 274 rx->ctx.skb_head = NULL; 275 rx->ctx.skb_tail = NULL; 276 277 rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ? 278 min_t(s16, S16_MAX, buffer_queue_slots * 4) : 279 priv->rx_pages_per_qpl; 280 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, 281 sizeof(rx->dqo.buf_states[0]), 282 GFP_KERNEL); 283 if (!rx->dqo.buf_states) 284 return -ENOMEM; 285 286 /* Set up linked list of buffer IDs */ 287 for (i = 0; i < rx->dqo.num_buf_states - 1; i++) 288 rx->dqo.buf_states[i].next = i + 1; 289 290 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; 291 rx->dqo.recycled_buf_states.head = -1; 292 rx->dqo.recycled_buf_states.tail = -1; 293 rx->dqo.used_buf_states.head = -1; 294 rx->dqo.used_buf_states.tail = -1; 295 296 /* Allocate RX completion queue */ 297 size = sizeof(rx->dqo.complq.desc_ring[0]) * 298 completion_queue_slots; 299 rx->dqo.complq.desc_ring = 300 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL); 301 if (!rx->dqo.complq.desc_ring) 302 goto err; 303 304 /* Allocate RX buffer queue */ 305 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 306 rx->dqo.bufq.desc_ring = 307 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL); 308 if (!rx->dqo.bufq.desc_ring) 309 goto err; 310 311 if (priv->queue_format != GVE_DQO_RDA_FORMAT) { 312 rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num); 313 if (!rx->dqo.qpl) 314 goto err; 315 rx->dqo.next_qpl_page_idx = 0; 316 } 317 318 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources), 319 &rx->q_resources_bus, GFP_KERNEL); 320 if (!rx->q_resources) 321 goto err; 322 323 gve_rx_add_to_block(priv, idx); 324 325 return 0; 326 327 err: 328 gve_rx_free_ring_dqo(priv, idx); 329 return -ENOMEM; 330 } 331 332 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) 333 { 334 const struct gve_rx_ring *rx = &priv->rx[queue_idx]; 335 u64 index = be32_to_cpu(rx->q_resources->db_index); 336 337 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); 338 } 339 340 int gve_rx_alloc_rings_dqo(struct gve_priv *priv) 341 { 342 int err = 0; 343 int i; 344 345 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 346 err = gve_rx_alloc_ring_dqo(priv, i); 347 if (err) { 348 netif_err(priv, drv, priv->dev, 349 "Failed to alloc rx ring=%d: err=%d\n", 350 i, err); 351 goto err; 352 } 353 } 354 355 return 0; 356 357 err: 358 for (i--; i >= 0; i--) 359 gve_rx_free_ring_dqo(priv, i); 360 361 return err; 362 } 363 364 void gve_rx_free_rings_dqo(struct gve_priv *priv) 365 { 366 int i; 367 368 for (i = 0; i < priv->rx_cfg.num_queues; i++) 369 gve_rx_free_ring_dqo(priv, i); 370 } 371 372 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) 373 { 374 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 375 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 376 struct gve_priv *priv = rx->gve; 377 u32 num_avail_slots; 378 u32 num_full_slots; 379 u32 num_posted = 0; 380 381 num_full_slots = (bufq->tail - bufq->head) & bufq->mask; 382 num_avail_slots = bufq->mask - num_full_slots; 383 384 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); 385 while (num_posted < num_avail_slots) { 386 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; 387 struct gve_rx_buf_state_dqo *buf_state; 388 389 buf_state = gve_get_recycled_buf_state(rx); 390 if (unlikely(!buf_state)) { 391 buf_state = gve_alloc_buf_state(rx); 392 if (unlikely(!buf_state)) 393 break; 394 395 if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { 396 u64_stats_update_begin(&rx->statss); 397 rx->rx_buf_alloc_fail++; 398 u64_stats_update_end(&rx->statss); 399 gve_free_buf_state(rx, buf_state); 400 break; 401 } 402 } 403 404 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); 405 desc->buf_addr = cpu_to_le64(buf_state->addr + 406 buf_state->page_info.page_offset); 407 408 bufq->tail = (bufq->tail + 1) & bufq->mask; 409 complq->num_free_slots--; 410 num_posted++; 411 412 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) 413 gve_rx_write_doorbell_dqo(priv, rx->q_num); 414 } 415 416 rx->fill_cnt += num_posted; 417 } 418 419 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, 420 struct gve_rx_buf_state_dqo *buf_state) 421 { 422 const int data_buffer_size = priv->data_buffer_size_dqo; 423 int pagecount; 424 425 /* Can't reuse if we only fit one buffer per page */ 426 if (data_buffer_size * 2 > PAGE_SIZE) 427 goto mark_used; 428 429 pagecount = gve_buf_ref_cnt(buf_state); 430 431 /* Record the offset when we have a single remaining reference. 432 * 433 * When this happens, we know all of the other offsets of the page are 434 * usable. 435 */ 436 if (pagecount == 1) { 437 buf_state->last_single_ref_offset = 438 buf_state->page_info.page_offset; 439 } 440 441 /* Use the next buffer sized chunk in the page. */ 442 buf_state->page_info.page_offset += data_buffer_size; 443 buf_state->page_info.page_offset &= (PAGE_SIZE - 1); 444 445 /* If we wrap around to the same offset without ever dropping to 1 446 * reference, then we don't know if this offset was ever freed. 447 */ 448 if (buf_state->page_info.page_offset == 449 buf_state->last_single_ref_offset) { 450 goto mark_used; 451 } 452 453 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 454 return; 455 456 mark_used: 457 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); 458 rx->dqo.used_buf_states_cnt++; 459 } 460 461 static void gve_rx_skb_csum(struct sk_buff *skb, 462 const struct gve_rx_compl_desc_dqo *desc, 463 struct gve_ptype ptype) 464 { 465 skb->ip_summed = CHECKSUM_NONE; 466 467 /* HW did not identify and process L3 and L4 headers. */ 468 if (unlikely(!desc->l3_l4_processed)) 469 return; 470 471 if (ptype.l3_type == GVE_L3_TYPE_IPV4) { 472 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err)) 473 return; 474 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) { 475 /* Checksum should be skipped if this flag is set. */ 476 if (unlikely(desc->ipv6_ex_add)) 477 return; 478 } 479 480 if (unlikely(desc->csum_l4_err)) 481 return; 482 483 switch (ptype.l4_type) { 484 case GVE_L4_TYPE_TCP: 485 case GVE_L4_TYPE_UDP: 486 case GVE_L4_TYPE_ICMP: 487 case GVE_L4_TYPE_SCTP: 488 skb->ip_summed = CHECKSUM_UNNECESSARY; 489 break; 490 default: 491 break; 492 } 493 } 494 495 static void gve_rx_skb_hash(struct sk_buff *skb, 496 const struct gve_rx_compl_desc_dqo *compl_desc, 497 struct gve_ptype ptype) 498 { 499 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2; 500 501 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN) 502 hash_type = PKT_HASH_TYPE_L4; 503 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN) 504 hash_type = PKT_HASH_TYPE_L3; 505 506 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); 507 } 508 509 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) 510 { 511 if (!rx->ctx.skb_head) 512 return; 513 514 if (rx->ctx.skb_head == napi->skb) 515 napi->skb = NULL; 516 dev_kfree_skb_any(rx->ctx.skb_head); 517 rx->ctx.skb_head = NULL; 518 rx->ctx.skb_tail = NULL; 519 } 520 521 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) 522 { 523 if (!rx->dqo.qpl) 524 return false; 525 if (rx->dqo.used_buf_states_cnt < 526 (rx->dqo.num_buf_states - 527 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) 528 return false; 529 return true; 530 } 531 532 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, 533 struct gve_rx_buf_state_dqo *buf_state, 534 u16 buf_len) 535 { 536 struct page *page = alloc_page(GFP_ATOMIC); 537 int num_frags; 538 539 if (!page) 540 return -ENOMEM; 541 542 memcpy(page_address(page), 543 buf_state->page_info.page_address + 544 buf_state->page_info.page_offset, 545 buf_len); 546 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 547 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page, 548 0, buf_len, PAGE_SIZE); 549 550 u64_stats_update_begin(&rx->statss); 551 rx->rx_frag_alloc_cnt++; 552 u64_stats_update_end(&rx->statss); 553 /* Return unused buffer. */ 554 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 555 return 0; 556 } 557 558 /* Chains multi skbs for single rx packet. 559 * Returns 0 if buffer is appended, -1 otherwise. 560 */ 561 static int gve_rx_append_frags(struct napi_struct *napi, 562 struct gve_rx_buf_state_dqo *buf_state, 563 u16 buf_len, struct gve_rx_ring *rx, 564 struct gve_priv *priv) 565 { 566 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 567 568 if (unlikely(num_frags == MAX_SKB_FRAGS)) { 569 struct sk_buff *skb; 570 571 skb = napi_alloc_skb(napi, 0); 572 if (!skb) 573 return -1; 574 575 if (rx->ctx.skb_tail == rx->ctx.skb_head) 576 skb_shinfo(rx->ctx.skb_head)->frag_list = skb; 577 else 578 rx->ctx.skb_tail->next = skb; 579 rx->ctx.skb_tail = skb; 580 num_frags = 0; 581 } 582 if (rx->ctx.skb_tail != rx->ctx.skb_head) { 583 rx->ctx.skb_head->len += buf_len; 584 rx->ctx.skb_head->data_len += buf_len; 585 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; 586 } 587 588 /* Trigger ondemand page allocation if we are running low on buffers */ 589 if (gve_rx_should_trigger_copy_ondemand(rx)) 590 return gve_rx_copy_ondemand(rx, buf_state, buf_len); 591 592 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, 593 buf_state->page_info.page, 594 buf_state->page_info.page_offset, 595 buf_len, priv->data_buffer_size_dqo); 596 gve_dec_pagecnt_bias(&buf_state->page_info); 597 598 /* Advances buffer page-offset if page is partially used. 599 * Marks buffer as used if page is full. 600 */ 601 gve_try_recycle_buf(priv, rx, buf_state); 602 return 0; 603 } 604 605 /* Returns 0 if descriptor is completed successfully. 606 * Returns -EINVAL if descriptor is invalid. 607 * Returns -ENOMEM if data cannot be copied to skb. 608 */ 609 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, 610 const struct gve_rx_compl_desc_dqo *compl_desc, 611 int queue_idx) 612 { 613 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); 614 const bool eop = compl_desc->end_of_packet != 0; 615 struct gve_rx_buf_state_dqo *buf_state; 616 struct gve_priv *priv = rx->gve; 617 u16 buf_len; 618 619 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { 620 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", 621 priv->dev->name, buffer_id); 622 return -EINVAL; 623 } 624 buf_state = &rx->dqo.buf_states[buffer_id]; 625 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) { 626 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n", 627 priv->dev->name, buffer_id); 628 return -EINVAL; 629 } 630 631 if (unlikely(compl_desc->rx_error)) { 632 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, 633 buf_state); 634 return -EINVAL; 635 } 636 637 buf_len = compl_desc->packet_len; 638 639 /* Page might have not been used for awhile and was likely last written 640 * by a different thread. 641 */ 642 prefetch(buf_state->page_info.page); 643 644 /* Sync the portion of dma buffer for CPU to read. */ 645 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, 646 buf_state->page_info.page_offset, 647 buf_len, DMA_FROM_DEVICE); 648 649 /* Append to current skb if one exists. */ 650 if (rx->ctx.skb_head) { 651 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, 652 priv)) != 0) { 653 goto error; 654 } 655 return 0; 656 } 657 658 if (eop && buf_len <= priv->rx_copybreak) { 659 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, 660 &buf_state->page_info, buf_len); 661 if (unlikely(!rx->ctx.skb_head)) 662 goto error; 663 rx->ctx.skb_tail = rx->ctx.skb_head; 664 665 u64_stats_update_begin(&rx->statss); 666 rx->rx_copied_pkt++; 667 rx->rx_copybreak_pkt++; 668 u64_stats_update_end(&rx->statss); 669 670 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, 671 buf_state); 672 return 0; 673 } 674 675 rx->ctx.skb_head = napi_get_frags(napi); 676 if (unlikely(!rx->ctx.skb_head)) 677 goto error; 678 rx->ctx.skb_tail = rx->ctx.skb_head; 679 680 if (gve_rx_should_trigger_copy_ondemand(rx)) { 681 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) 682 goto error; 683 return 0; 684 } 685 686 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, 687 buf_state->page_info.page_offset, buf_len, 688 priv->data_buffer_size_dqo); 689 gve_dec_pagecnt_bias(&buf_state->page_info); 690 691 gve_try_recycle_buf(priv, rx, buf_state); 692 return 0; 693 694 error: 695 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 696 return -ENOMEM; 697 } 698 699 static int gve_rx_complete_rsc(struct sk_buff *skb, 700 const struct gve_rx_compl_desc_dqo *desc, 701 struct gve_ptype ptype) 702 { 703 struct skb_shared_info *shinfo = skb_shinfo(skb); 704 705 /* Only TCP is supported right now. */ 706 if (ptype.l4_type != GVE_L4_TYPE_TCP) 707 return -EINVAL; 708 709 switch (ptype.l3_type) { 710 case GVE_L3_TYPE_IPV4: 711 shinfo->gso_type = SKB_GSO_TCPV4; 712 break; 713 case GVE_L3_TYPE_IPV6: 714 shinfo->gso_type = SKB_GSO_TCPV6; 715 break; 716 default: 717 return -EINVAL; 718 } 719 720 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); 721 return 0; 722 } 723 724 /* Returns 0 if skb is completed successfully, -1 otherwise. */ 725 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, 726 const struct gve_rx_compl_desc_dqo *desc, 727 netdev_features_t feat) 728 { 729 struct gve_ptype ptype = 730 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; 731 int err; 732 733 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num); 734 735 if (feat & NETIF_F_RXHASH) 736 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype); 737 738 if (feat & NETIF_F_RXCSUM) 739 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); 740 741 /* RSC packets must set gso_size otherwise the TCP stack will complain 742 * that packets are larger than MTU. 743 */ 744 if (desc->rsc) { 745 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype); 746 if (err < 0) 747 return err; 748 } 749 750 if (skb_headlen(rx->ctx.skb_head) == 0) 751 napi_gro_frags(napi); 752 else 753 napi_gro_receive(napi, rx->ctx.skb_head); 754 755 return 0; 756 } 757 758 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) 759 { 760 struct napi_struct *napi = &block->napi; 761 netdev_features_t feat = napi->dev->features; 762 763 struct gve_rx_ring *rx = block->rx; 764 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 765 766 u32 work_done = 0; 767 u64 bytes = 0; 768 int err; 769 770 while (work_done < budget) { 771 struct gve_rx_compl_desc_dqo *compl_desc = 772 &complq->desc_ring[complq->head]; 773 u32 pkt_bytes; 774 775 /* No more new packets */ 776 if (compl_desc->generation == complq->cur_gen_bit) 777 break; 778 779 /* Prefetch the next two descriptors. */ 780 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]); 781 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]); 782 783 /* Do not read data until we own the descriptor */ 784 dma_rmb(); 785 786 err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); 787 if (err < 0) { 788 gve_rx_free_skb(napi, rx); 789 u64_stats_update_begin(&rx->statss); 790 if (err == -ENOMEM) 791 rx->rx_skb_alloc_fail++; 792 else if (err == -EINVAL) 793 rx->rx_desc_err_dropped_pkt++; 794 u64_stats_update_end(&rx->statss); 795 } 796 797 complq->head = (complq->head + 1) & complq->mask; 798 complq->num_free_slots++; 799 800 /* When the ring wraps, the generation bit is flipped. */ 801 complq->cur_gen_bit ^= (complq->head == 0); 802 803 /* Receiving a completion means we have space to post another 804 * buffer on the buffer queue. 805 */ 806 { 807 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 808 809 bufq->head = (bufq->head + 1) & bufq->mask; 810 } 811 812 /* Free running counter of completed descriptors */ 813 rx->cnt++; 814 815 if (!rx->ctx.skb_head) 816 continue; 817 818 if (!compl_desc->end_of_packet) 819 continue; 820 821 work_done++; 822 pkt_bytes = rx->ctx.skb_head->len; 823 /* The ethernet header (first ETH_HLEN bytes) is snipped off 824 * by eth_type_trans. 825 */ 826 if (skb_headlen(rx->ctx.skb_head)) 827 pkt_bytes += ETH_HLEN; 828 829 /* gve_rx_complete_skb() will consume skb if successful */ 830 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { 831 gve_rx_free_skb(napi, rx); 832 u64_stats_update_begin(&rx->statss); 833 rx->rx_desc_err_dropped_pkt++; 834 u64_stats_update_end(&rx->statss); 835 continue; 836 } 837 838 bytes += pkt_bytes; 839 rx->ctx.skb_head = NULL; 840 rx->ctx.skb_tail = NULL; 841 } 842 843 gve_rx_post_buffers_dqo(rx); 844 845 u64_stats_update_begin(&rx->statss); 846 rx->rpackets += work_done; 847 rx->rbytes += bytes; 848 u64_stats_update_end(&rx->statss); 849 850 return work_done; 851 } 852