1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18 
19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21 	return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23 
24 static void gve_free_page_dqo(struct gve_priv *priv,
25 			      struct gve_rx_buf_state_dqo *bs,
26 			      bool free_page)
27 {
28 	page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
29 	if (free_page)
30 		gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
31 			      DMA_FROM_DEVICE);
32 	bs->page_info.page = NULL;
33 }
34 
35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36 {
37 	struct gve_rx_buf_state_dqo *buf_state;
38 	s16 buffer_id;
39 
40 	buffer_id = rx->dqo.free_buf_states;
41 	if (unlikely(buffer_id == -1))
42 		return NULL;
43 
44 	buf_state = &rx->dqo.buf_states[buffer_id];
45 
46 	/* Remove buf_state from free list */
47 	rx->dqo.free_buf_states = buf_state->next;
48 
49 	/* Point buf_state to itself to mark it as allocated */
50 	buf_state->next = buffer_id;
51 
52 	return buf_state;
53 }
54 
55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56 				       struct gve_rx_buf_state_dqo *buf_state)
57 {
58 	s16 buffer_id = buf_state - rx->dqo.buf_states;
59 
60 	return buf_state->next == buffer_id;
61 }
62 
63 static void gve_free_buf_state(struct gve_rx_ring *rx,
64 			       struct gve_rx_buf_state_dqo *buf_state)
65 {
66 	s16 buffer_id = buf_state - rx->dqo.buf_states;
67 
68 	buf_state->next = rx->dqo.free_buf_states;
69 	rx->dqo.free_buf_states = buffer_id;
70 }
71 
72 static struct gve_rx_buf_state_dqo *
73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74 {
75 	struct gve_rx_buf_state_dqo *buf_state;
76 	s16 buffer_id;
77 
78 	buffer_id = list->head;
79 	if (unlikely(buffer_id == -1))
80 		return NULL;
81 
82 	buf_state = &rx->dqo.buf_states[buffer_id];
83 
84 	/* Remove buf_state from list */
85 	list->head = buf_state->next;
86 	if (buf_state->next == -1)
87 		list->tail = -1;
88 
89 	/* Point buf_state to itself to mark it as allocated */
90 	buf_state->next = buffer_id;
91 
92 	return buf_state;
93 }
94 
95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96 				  struct gve_index_list *list,
97 				  struct gve_rx_buf_state_dqo *buf_state)
98 {
99 	s16 buffer_id = buf_state - rx->dqo.buf_states;
100 
101 	buf_state->next = -1;
102 
103 	if (list->head == -1) {
104 		list->head = buffer_id;
105 		list->tail = buffer_id;
106 	} else {
107 		int tail = list->tail;
108 
109 		rx->dqo.buf_states[tail].next = buffer_id;
110 		list->tail = buffer_id;
111 	}
112 }
113 
114 static struct gve_rx_buf_state_dqo *
115 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116 {
117 	struct gve_rx_buf_state_dqo *buf_state;
118 	int i;
119 
120 	/* Recycled buf states are immediately usable. */
121 	buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
122 	if (likely(buf_state))
123 		return buf_state;
124 
125 	if (unlikely(rx->dqo.used_buf_states.head == -1))
126 		return NULL;
127 
128 	/* Used buf states are only usable when ref count reaches 0, which means
129 	 * no SKBs refer to them.
130 	 *
131 	 * Search a limited number before giving up.
132 	 */
133 	for (i = 0; i < 5; i++) {
134 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
135 		if (gve_buf_ref_cnt(buf_state) == 0) {
136 			rx->dqo.used_buf_states_cnt--;
137 			return buf_state;
138 		}
139 
140 		gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
141 	}
142 
143 	/* For QPL, we cannot allocate any new buffers and must
144 	 * wait for the existing ones to be available.
145 	 */
146 	if (rx->dqo.qpl)
147 		return NULL;
148 
149 	/* If there are no free buf states discard an entry from
150 	 * `used_buf_states` so it can be used.
151 	 */
152 	if (unlikely(rx->dqo.free_buf_states == -1)) {
153 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
154 		if (gve_buf_ref_cnt(buf_state) == 0)
155 			return buf_state;
156 
157 		gve_free_page_dqo(rx->gve, buf_state, true);
158 		gve_free_buf_state(rx, buf_state);
159 	}
160 
161 	return NULL;
162 }
163 
164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165 			      struct gve_rx_buf_state_dqo *buf_state)
166 {
167 	struct gve_priv *priv = rx->gve;
168 	u32 idx;
169 
170 	if (!rx->dqo.qpl) {
171 		int err;
172 
173 		err = gve_alloc_page(priv, &priv->pdev->dev,
174 				     &buf_state->page_info.page,
175 				     &buf_state->addr,
176 				     DMA_FROM_DEVICE, GFP_ATOMIC);
177 		if (err)
178 			return err;
179 	} else {
180 		idx = rx->dqo.next_qpl_page_idx;
181 		if (idx >= priv->rx_pages_per_qpl) {
182 			net_err_ratelimited("%s: Out of QPL pages\n",
183 					    priv->dev->name);
184 			return -ENOMEM;
185 		}
186 		buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187 		buf_state->addr = rx->dqo.qpl->page_buses[idx];
188 		rx->dqo.next_qpl_page_idx++;
189 	}
190 	buf_state->page_info.page_offset = 0;
191 	buf_state->page_info.page_address =
192 		page_address(buf_state->page_info.page);
193 	buf_state->last_single_ref_offset = 0;
194 
195 	/* The page already has 1 ref. */
196 	page_ref_add(buf_state->page_info.page, INT_MAX - 1);
197 	buf_state->page_info.pagecnt_bias = INT_MAX;
198 
199 	return 0;
200 }
201 
202 static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
203 {
204 	struct gve_rx_ring *rx = &priv->rx[idx];
205 	struct device *hdev = &priv->pdev->dev;
206 	size_t completion_queue_slots;
207 	size_t buffer_queue_slots;
208 	size_t size;
209 	int i;
210 
211 	completion_queue_slots = rx->dqo.complq.mask + 1;
212 	buffer_queue_slots = rx->dqo.bufq.mask + 1;
213 
214 	gve_rx_remove_from_block(priv, idx);
215 
216 	if (rx->q_resources) {
217 		dma_free_coherent(hdev, sizeof(*rx->q_resources),
218 				  rx->q_resources, rx->q_resources_bus);
219 		rx->q_resources = NULL;
220 	}
221 
222 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
223 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
224 		/* Only free page for RDA. QPL pages are freed in gve_main. */
225 		if (bs->page_info.page)
226 			gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
227 	}
228 	if (rx->dqo.qpl) {
229 		gve_unassign_qpl(priv, rx->dqo.qpl->id);
230 		rx->dqo.qpl = NULL;
231 	}
232 
233 	if (rx->dqo.bufq.desc_ring) {
234 		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
235 		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
236 				  rx->dqo.bufq.bus);
237 		rx->dqo.bufq.desc_ring = NULL;
238 	}
239 
240 	if (rx->dqo.complq.desc_ring) {
241 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
242 			completion_queue_slots;
243 		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
244 				  rx->dqo.complq.bus);
245 		rx->dqo.complq.desc_ring = NULL;
246 	}
247 
248 	kvfree(rx->dqo.buf_states);
249 	rx->dqo.buf_states = NULL;
250 
251 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
252 }
253 
254 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
255 {
256 	struct gve_rx_ring *rx = &priv->rx[idx];
257 	struct device *hdev = &priv->pdev->dev;
258 	size_t size;
259 	int i;
260 
261 	const u32 buffer_queue_slots =
262 		priv->queue_format == GVE_DQO_RDA_FORMAT ?
263 		priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt;
264 	const u32 completion_queue_slots = priv->rx_desc_cnt;
265 
266 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
267 
268 	memset(rx, 0, sizeof(*rx));
269 	rx->gve = priv;
270 	rx->q_num = idx;
271 	rx->dqo.bufq.mask = buffer_queue_slots - 1;
272 	rx->dqo.complq.num_free_slots = completion_queue_slots;
273 	rx->dqo.complq.mask = completion_queue_slots - 1;
274 	rx->ctx.skb_head = NULL;
275 	rx->ctx.skb_tail = NULL;
276 
277 	rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ?
278 		min_t(s16, S16_MAX, buffer_queue_slots * 4) :
279 		priv->rx_pages_per_qpl;
280 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
281 				      sizeof(rx->dqo.buf_states[0]),
282 				      GFP_KERNEL);
283 	if (!rx->dqo.buf_states)
284 		return -ENOMEM;
285 
286 	/* Set up linked list of buffer IDs */
287 	for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
288 		rx->dqo.buf_states[i].next = i + 1;
289 
290 	rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
291 	rx->dqo.recycled_buf_states.head = -1;
292 	rx->dqo.recycled_buf_states.tail = -1;
293 	rx->dqo.used_buf_states.head = -1;
294 	rx->dqo.used_buf_states.tail = -1;
295 
296 	/* Allocate RX completion queue */
297 	size = sizeof(rx->dqo.complq.desc_ring[0]) *
298 		completion_queue_slots;
299 	rx->dqo.complq.desc_ring =
300 		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
301 	if (!rx->dqo.complq.desc_ring)
302 		goto err;
303 
304 	/* Allocate RX buffer queue */
305 	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
306 	rx->dqo.bufq.desc_ring =
307 		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
308 	if (!rx->dqo.bufq.desc_ring)
309 		goto err;
310 
311 	if (priv->queue_format != GVE_DQO_RDA_FORMAT) {
312 		rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num);
313 		if (!rx->dqo.qpl)
314 			goto err;
315 		rx->dqo.next_qpl_page_idx = 0;
316 	}
317 
318 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
319 					     &rx->q_resources_bus, GFP_KERNEL);
320 	if (!rx->q_resources)
321 		goto err;
322 
323 	gve_rx_add_to_block(priv, idx);
324 
325 	return 0;
326 
327 err:
328 	gve_rx_free_ring_dqo(priv, idx);
329 	return -ENOMEM;
330 }
331 
332 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
333 {
334 	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
335 	u64 index = be32_to_cpu(rx->q_resources->db_index);
336 
337 	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
338 }
339 
340 int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
341 {
342 	int err = 0;
343 	int i;
344 
345 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
346 		err = gve_rx_alloc_ring_dqo(priv, i);
347 		if (err) {
348 			netif_err(priv, drv, priv->dev,
349 				  "Failed to alloc rx ring=%d: err=%d\n",
350 				  i, err);
351 			goto err;
352 		}
353 	}
354 
355 	return 0;
356 
357 err:
358 	for (i--; i >= 0; i--)
359 		gve_rx_free_ring_dqo(priv, i);
360 
361 	return err;
362 }
363 
364 void gve_rx_free_rings_dqo(struct gve_priv *priv)
365 {
366 	int i;
367 
368 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
369 		gve_rx_free_ring_dqo(priv, i);
370 }
371 
372 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
373 {
374 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
375 	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
376 	struct gve_priv *priv = rx->gve;
377 	u32 num_avail_slots;
378 	u32 num_full_slots;
379 	u32 num_posted = 0;
380 
381 	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
382 	num_avail_slots = bufq->mask - num_full_slots;
383 
384 	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
385 	while (num_posted < num_avail_slots) {
386 		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
387 		struct gve_rx_buf_state_dqo *buf_state;
388 
389 		buf_state = gve_get_recycled_buf_state(rx);
390 		if (unlikely(!buf_state)) {
391 			buf_state = gve_alloc_buf_state(rx);
392 			if (unlikely(!buf_state))
393 				break;
394 
395 			if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
396 				u64_stats_update_begin(&rx->statss);
397 				rx->rx_buf_alloc_fail++;
398 				u64_stats_update_end(&rx->statss);
399 				gve_free_buf_state(rx, buf_state);
400 				break;
401 			}
402 		}
403 
404 		desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
405 		desc->buf_addr = cpu_to_le64(buf_state->addr +
406 					     buf_state->page_info.page_offset);
407 
408 		bufq->tail = (bufq->tail + 1) & bufq->mask;
409 		complq->num_free_slots--;
410 		num_posted++;
411 
412 		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
413 			gve_rx_write_doorbell_dqo(priv, rx->q_num);
414 	}
415 
416 	rx->fill_cnt += num_posted;
417 }
418 
419 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
420 				struct gve_rx_buf_state_dqo *buf_state)
421 {
422 	const int data_buffer_size = priv->data_buffer_size_dqo;
423 	int pagecount;
424 
425 	/* Can't reuse if we only fit one buffer per page */
426 	if (data_buffer_size * 2 > PAGE_SIZE)
427 		goto mark_used;
428 
429 	pagecount = gve_buf_ref_cnt(buf_state);
430 
431 	/* Record the offset when we have a single remaining reference.
432 	 *
433 	 * When this happens, we know all of the other offsets of the page are
434 	 * usable.
435 	 */
436 	if (pagecount == 1) {
437 		buf_state->last_single_ref_offset =
438 			buf_state->page_info.page_offset;
439 	}
440 
441 	/* Use the next buffer sized chunk in the page. */
442 	buf_state->page_info.page_offset += data_buffer_size;
443 	buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
444 
445 	/* If we wrap around to the same offset without ever dropping to 1
446 	 * reference, then we don't know if this offset was ever freed.
447 	 */
448 	if (buf_state->page_info.page_offset ==
449 	    buf_state->last_single_ref_offset) {
450 		goto mark_used;
451 	}
452 
453 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
454 	return;
455 
456 mark_used:
457 	gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
458 	rx->dqo.used_buf_states_cnt++;
459 }
460 
461 static void gve_rx_skb_csum(struct sk_buff *skb,
462 			    const struct gve_rx_compl_desc_dqo *desc,
463 			    struct gve_ptype ptype)
464 {
465 	skb->ip_summed = CHECKSUM_NONE;
466 
467 	/* HW did not identify and process L3 and L4 headers. */
468 	if (unlikely(!desc->l3_l4_processed))
469 		return;
470 
471 	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
472 		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
473 			return;
474 	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
475 		/* Checksum should be skipped if this flag is set. */
476 		if (unlikely(desc->ipv6_ex_add))
477 			return;
478 	}
479 
480 	if (unlikely(desc->csum_l4_err))
481 		return;
482 
483 	switch (ptype.l4_type) {
484 	case GVE_L4_TYPE_TCP:
485 	case GVE_L4_TYPE_UDP:
486 	case GVE_L4_TYPE_ICMP:
487 	case GVE_L4_TYPE_SCTP:
488 		skb->ip_summed = CHECKSUM_UNNECESSARY;
489 		break;
490 	default:
491 		break;
492 	}
493 }
494 
495 static void gve_rx_skb_hash(struct sk_buff *skb,
496 			    const struct gve_rx_compl_desc_dqo *compl_desc,
497 			    struct gve_ptype ptype)
498 {
499 	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
500 
501 	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
502 		hash_type = PKT_HASH_TYPE_L4;
503 	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
504 		hash_type = PKT_HASH_TYPE_L3;
505 
506 	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
507 }
508 
509 static void gve_rx_free_skb(struct gve_rx_ring *rx)
510 {
511 	if (!rx->ctx.skb_head)
512 		return;
513 
514 	dev_kfree_skb_any(rx->ctx.skb_head);
515 	rx->ctx.skb_head = NULL;
516 	rx->ctx.skb_tail = NULL;
517 }
518 
519 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
520 {
521 	if (!rx->dqo.qpl)
522 		return false;
523 	if (rx->dqo.used_buf_states_cnt <
524 		     (rx->dqo.num_buf_states -
525 		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
526 		return false;
527 	return true;
528 }
529 
530 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
531 				struct gve_rx_buf_state_dqo *buf_state,
532 				u16 buf_len)
533 {
534 	struct page *page = alloc_page(GFP_ATOMIC);
535 	int num_frags;
536 
537 	if (!page)
538 		return -ENOMEM;
539 
540 	memcpy(page_address(page),
541 	       buf_state->page_info.page_address +
542 	       buf_state->page_info.page_offset,
543 	       buf_len);
544 	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
545 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
546 			0, buf_len, PAGE_SIZE);
547 
548 	u64_stats_update_begin(&rx->statss);
549 	rx->rx_frag_alloc_cnt++;
550 	u64_stats_update_end(&rx->statss);
551 	/* Return unused buffer. */
552 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
553 	return 0;
554 }
555 
556 /* Chains multi skbs for single rx packet.
557  * Returns 0 if buffer is appended, -1 otherwise.
558  */
559 static int gve_rx_append_frags(struct napi_struct *napi,
560 			       struct gve_rx_buf_state_dqo *buf_state,
561 			       u16 buf_len, struct gve_rx_ring *rx,
562 			       struct gve_priv *priv)
563 {
564 	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
565 
566 	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
567 		struct sk_buff *skb;
568 
569 		skb = napi_alloc_skb(napi, 0);
570 		if (!skb)
571 			return -1;
572 
573 		skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
574 		rx->ctx.skb_tail = skb;
575 		num_frags = 0;
576 	}
577 	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
578 		rx->ctx.skb_head->len += buf_len;
579 		rx->ctx.skb_head->data_len += buf_len;
580 		rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
581 	}
582 
583 	/* Trigger ondemand page allocation if we are running low on buffers */
584 	if (gve_rx_should_trigger_copy_ondemand(rx))
585 		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
586 
587 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
588 			buf_state->page_info.page,
589 			buf_state->page_info.page_offset,
590 			buf_len, priv->data_buffer_size_dqo);
591 	gve_dec_pagecnt_bias(&buf_state->page_info);
592 
593 	/* Advances buffer page-offset if page is partially used.
594 	 * Marks buffer as used if page is full.
595 	 */
596 	gve_try_recycle_buf(priv, rx, buf_state);
597 	return 0;
598 }
599 
600 /* Returns 0 if descriptor is completed successfully.
601  * Returns -EINVAL if descriptor is invalid.
602  * Returns -ENOMEM if data cannot be copied to skb.
603  */
604 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
605 		      const struct gve_rx_compl_desc_dqo *compl_desc,
606 		      int queue_idx)
607 {
608 	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
609 	const bool eop = compl_desc->end_of_packet != 0;
610 	struct gve_rx_buf_state_dqo *buf_state;
611 	struct gve_priv *priv = rx->gve;
612 	u16 buf_len;
613 
614 	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
615 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
616 				    priv->dev->name, buffer_id);
617 		return -EINVAL;
618 	}
619 	buf_state = &rx->dqo.buf_states[buffer_id];
620 	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
621 		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
622 				    priv->dev->name, buffer_id);
623 		return -EINVAL;
624 	}
625 
626 	if (unlikely(compl_desc->rx_error)) {
627 		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
628 				      buf_state);
629 		return -EINVAL;
630 	}
631 
632 	buf_len = compl_desc->packet_len;
633 
634 	/* Page might have not been used for awhile and was likely last written
635 	 * by a different thread.
636 	 */
637 	prefetch(buf_state->page_info.page);
638 
639 	/* Sync the portion of dma buffer for CPU to read. */
640 	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
641 				      buf_state->page_info.page_offset,
642 				      buf_len, DMA_FROM_DEVICE);
643 
644 	/* Append to current skb if one exists. */
645 	if (rx->ctx.skb_head) {
646 		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
647 						 priv)) != 0) {
648 			goto error;
649 		}
650 		return 0;
651 	}
652 
653 	if (eop && buf_len <= priv->rx_copybreak) {
654 		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
655 					       &buf_state->page_info, buf_len);
656 		if (unlikely(!rx->ctx.skb_head))
657 			goto error;
658 		rx->ctx.skb_tail = rx->ctx.skb_head;
659 
660 		u64_stats_update_begin(&rx->statss);
661 		rx->rx_copied_pkt++;
662 		rx->rx_copybreak_pkt++;
663 		u64_stats_update_end(&rx->statss);
664 
665 		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
666 				      buf_state);
667 		return 0;
668 	}
669 
670 	rx->ctx.skb_head = napi_get_frags(napi);
671 	if (unlikely(!rx->ctx.skb_head))
672 		goto error;
673 	rx->ctx.skb_tail = rx->ctx.skb_head;
674 
675 	if (gve_rx_should_trigger_copy_ondemand(rx)) {
676 		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
677 			goto error;
678 		return 0;
679 	}
680 
681 	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
682 			buf_state->page_info.page_offset, buf_len,
683 			priv->data_buffer_size_dqo);
684 	gve_dec_pagecnt_bias(&buf_state->page_info);
685 
686 	gve_try_recycle_buf(priv, rx, buf_state);
687 	return 0;
688 
689 error:
690 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
691 	return -ENOMEM;
692 }
693 
694 static int gve_rx_complete_rsc(struct sk_buff *skb,
695 			       const struct gve_rx_compl_desc_dqo *desc,
696 			       struct gve_ptype ptype)
697 {
698 	struct skb_shared_info *shinfo = skb_shinfo(skb);
699 
700 	/* Only TCP is supported right now. */
701 	if (ptype.l4_type != GVE_L4_TYPE_TCP)
702 		return -EINVAL;
703 
704 	switch (ptype.l3_type) {
705 	case GVE_L3_TYPE_IPV4:
706 		shinfo->gso_type = SKB_GSO_TCPV4;
707 		break;
708 	case GVE_L3_TYPE_IPV6:
709 		shinfo->gso_type = SKB_GSO_TCPV6;
710 		break;
711 	default:
712 		return -EINVAL;
713 	}
714 
715 	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
716 	return 0;
717 }
718 
719 /* Returns 0 if skb is completed successfully, -1 otherwise. */
720 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
721 			       const struct gve_rx_compl_desc_dqo *desc,
722 			       netdev_features_t feat)
723 {
724 	struct gve_ptype ptype =
725 		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
726 	int err;
727 
728 	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
729 
730 	if (feat & NETIF_F_RXHASH)
731 		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
732 
733 	if (feat & NETIF_F_RXCSUM)
734 		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
735 
736 	/* RSC packets must set gso_size otherwise the TCP stack will complain
737 	 * that packets are larger than MTU.
738 	 */
739 	if (desc->rsc) {
740 		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
741 		if (err < 0)
742 			return err;
743 	}
744 
745 	if (skb_headlen(rx->ctx.skb_head) == 0)
746 		napi_gro_frags(napi);
747 	else
748 		napi_gro_receive(napi, rx->ctx.skb_head);
749 
750 	return 0;
751 }
752 
753 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
754 {
755 	struct napi_struct *napi = &block->napi;
756 	netdev_features_t feat = napi->dev->features;
757 
758 	struct gve_rx_ring *rx = block->rx;
759 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
760 
761 	u32 work_done = 0;
762 	u64 bytes = 0;
763 	int err;
764 
765 	while (work_done < budget) {
766 		struct gve_rx_compl_desc_dqo *compl_desc =
767 			&complq->desc_ring[complq->head];
768 		u32 pkt_bytes;
769 
770 		/* No more new packets */
771 		if (compl_desc->generation == complq->cur_gen_bit)
772 			break;
773 
774 		/* Prefetch the next two descriptors. */
775 		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
776 		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
777 
778 		/* Do not read data until we own the descriptor */
779 		dma_rmb();
780 
781 		err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
782 		if (err < 0) {
783 			gve_rx_free_skb(rx);
784 			u64_stats_update_begin(&rx->statss);
785 			if (err == -ENOMEM)
786 				rx->rx_skb_alloc_fail++;
787 			else if (err == -EINVAL)
788 				rx->rx_desc_err_dropped_pkt++;
789 			u64_stats_update_end(&rx->statss);
790 		}
791 
792 		complq->head = (complq->head + 1) & complq->mask;
793 		complq->num_free_slots++;
794 
795 		/* When the ring wraps, the generation bit is flipped. */
796 		complq->cur_gen_bit ^= (complq->head == 0);
797 
798 		/* Receiving a completion means we have space to post another
799 		 * buffer on the buffer queue.
800 		 */
801 		{
802 			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
803 
804 			bufq->head = (bufq->head + 1) & bufq->mask;
805 		}
806 
807 		/* Free running counter of completed descriptors */
808 		rx->cnt++;
809 
810 		if (!rx->ctx.skb_head)
811 			continue;
812 
813 		if (!compl_desc->end_of_packet)
814 			continue;
815 
816 		work_done++;
817 		pkt_bytes = rx->ctx.skb_head->len;
818 		/* The ethernet header (first ETH_HLEN bytes) is snipped off
819 		 * by eth_type_trans.
820 		 */
821 		if (skb_headlen(rx->ctx.skb_head))
822 			pkt_bytes += ETH_HLEN;
823 
824 		/* gve_rx_complete_skb() will consume skb if successful */
825 		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
826 			gve_rx_free_skb(rx);
827 			u64_stats_update_begin(&rx->statss);
828 			rx->rx_desc_err_dropped_pkt++;
829 			u64_stats_update_end(&rx->statss);
830 			continue;
831 		}
832 
833 		bytes += pkt_bytes;
834 		rx->ctx.skb_head = NULL;
835 		rx->ctx.skb_tail = NULL;
836 	}
837 
838 	gve_rx_post_buffers_dqo(rx);
839 
840 	u64_stats_update_begin(&rx->statss);
841 	rx->rpackets += work_done;
842 	rx->rbytes += bytes;
843 	u64_stats_update_end(&rx->statss);
844 
845 	return work_done;
846 }
847