xref: /openbmc/linux/drivers/net/ethernet/google/gve/gve_rx.c (revision 19b438592238b3b40c3f945bb5f9c4ca971c0c45)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include "gve_utils.h"
10 #include <linux/etherdevice.h>
11 
12 static void gve_rx_free_buffer(struct device *dev,
13 			       struct gve_rx_slot_page_info *page_info,
14 			       union gve_rx_data_slot *data_slot)
15 {
16 	dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
17 				      GVE_DATA_SLOT_ADDR_PAGE_MASK);
18 
19 	gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
20 }
21 
22 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
23 {
24 	if (rx->data.raw_addressing) {
25 		u32 slots = rx->mask + 1;
26 		int i;
27 
28 		for (i = 0; i < slots; i++)
29 			gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
30 					   &rx->data.data_ring[i]);
31 	} else {
32 		gve_unassign_qpl(priv, rx->data.qpl->id);
33 		rx->data.qpl = NULL;
34 	}
35 	kvfree(rx->data.page_info);
36 	rx->data.page_info = NULL;
37 }
38 
39 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
40 {
41 	struct gve_rx_ring *rx = &priv->rx[idx];
42 	struct device *dev = &priv->pdev->dev;
43 	u32 slots = rx->mask + 1;
44 	size_t bytes;
45 
46 	gve_rx_remove_from_block(priv, idx);
47 
48 	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
49 	dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
50 	rx->desc.desc_ring = NULL;
51 
52 	dma_free_coherent(dev, sizeof(*rx->q_resources),
53 			  rx->q_resources, rx->q_resources_bus);
54 	rx->q_resources = NULL;
55 
56 	gve_rx_unfill_pages(priv, rx);
57 
58 	bytes = sizeof(*rx->data.data_ring) * slots;
59 	dma_free_coherent(dev, bytes, rx->data.data_ring,
60 			  rx->data.data_bus);
61 	rx->data.data_ring = NULL;
62 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
63 }
64 
65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
66 			     dma_addr_t addr, struct page *page, __be64 *slot_addr)
67 {
68 	page_info->page = page;
69 	page_info->page_offset = 0;
70 	page_info->page_address = page_address(page);
71 	*slot_addr = cpu_to_be64(addr);
72 }
73 
74 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
75 			       struct gve_rx_slot_page_info *page_info,
76 			       union gve_rx_data_slot *data_slot)
77 {
78 	struct page *page;
79 	dma_addr_t dma;
80 	int err;
81 
82 	err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
83 	if (err)
84 		return err;
85 
86 	gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
87 	return 0;
88 }
89 
90 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
91 {
92 	struct gve_priv *priv = rx->gve;
93 	u32 slots;
94 	int err;
95 	int i;
96 
97 	/* Allocate one page per Rx queue slot. Each page is split into two
98 	 * packet buffers, when possible we "page flip" between the two.
99 	 */
100 	slots = rx->mask + 1;
101 
102 	rx->data.page_info = kvzalloc(slots *
103 				      sizeof(*rx->data.page_info), GFP_KERNEL);
104 	if (!rx->data.page_info)
105 		return -ENOMEM;
106 
107 	if (!rx->data.raw_addressing)
108 		rx->data.qpl = gve_assign_rx_qpl(priv);
109 	for (i = 0; i < slots; i++) {
110 		if (!rx->data.raw_addressing) {
111 			struct page *page = rx->data.qpl->pages[i];
112 			dma_addr_t addr = i * PAGE_SIZE;
113 
114 			gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
115 					    &rx->data.data_ring[i].qpl_offset);
116 			continue;
117 		}
118 		err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
119 					  &rx->data.data_ring[i]);
120 		if (err)
121 			goto alloc_err;
122 	}
123 
124 	return slots;
125 alloc_err:
126 	while (i--)
127 		gve_rx_free_buffer(&priv->pdev->dev,
128 				   &rx->data.page_info[i],
129 				   &rx->data.data_ring[i]);
130 	return err;
131 }
132 
133 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
134 {
135 	struct gve_rx_ring *rx = &priv->rx[idx];
136 	struct device *hdev = &priv->pdev->dev;
137 	u32 slots, npages;
138 	int filled_pages;
139 	size_t bytes;
140 	int err;
141 
142 	netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
143 	/* Make sure everything is zeroed to start with */
144 	memset(rx, 0, sizeof(*rx));
145 
146 	rx->gve = priv;
147 	rx->q_num = idx;
148 
149 	slots = priv->rx_data_slot_cnt;
150 	rx->mask = slots - 1;
151 	rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
152 
153 	/* alloc rx data ring */
154 	bytes = sizeof(*rx->data.data_ring) * slots;
155 	rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
156 						&rx->data.data_bus,
157 						GFP_KERNEL);
158 	if (!rx->data.data_ring)
159 		return -ENOMEM;
160 	filled_pages = gve_prefill_rx_pages(rx);
161 	if (filled_pages < 0) {
162 		err = -ENOMEM;
163 		goto abort_with_slots;
164 	}
165 	rx->fill_cnt = filled_pages;
166 	/* Ensure data ring slots (packet buffers) are visible. */
167 	dma_wmb();
168 
169 	/* Alloc gve_queue_resources */
170 	rx->q_resources =
171 		dma_alloc_coherent(hdev,
172 				   sizeof(*rx->q_resources),
173 				   &rx->q_resources_bus,
174 				   GFP_KERNEL);
175 	if (!rx->q_resources) {
176 		err = -ENOMEM;
177 		goto abort_filled;
178 	}
179 	netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
180 		  (unsigned long)rx->data.data_bus);
181 
182 	/* alloc rx desc ring */
183 	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
184 	npages = bytes / PAGE_SIZE;
185 	if (npages * PAGE_SIZE != bytes) {
186 		err = -EIO;
187 		goto abort_with_q_resources;
188 	}
189 
190 	rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
191 						GFP_KERNEL);
192 	if (!rx->desc.desc_ring) {
193 		err = -ENOMEM;
194 		goto abort_with_q_resources;
195 	}
196 	rx->cnt = 0;
197 	rx->db_threshold = priv->rx_desc_cnt / 2;
198 	rx->desc.seqno = 1;
199 	gve_rx_add_to_block(priv, idx);
200 
201 	return 0;
202 
203 abort_with_q_resources:
204 	dma_free_coherent(hdev, sizeof(*rx->q_resources),
205 			  rx->q_resources, rx->q_resources_bus);
206 	rx->q_resources = NULL;
207 abort_filled:
208 	gve_rx_unfill_pages(priv, rx);
209 abort_with_slots:
210 	bytes = sizeof(*rx->data.data_ring) * slots;
211 	dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
212 	rx->data.data_ring = NULL;
213 
214 	return err;
215 }
216 
217 int gve_rx_alloc_rings(struct gve_priv *priv)
218 {
219 	int err = 0;
220 	int i;
221 
222 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
223 		err = gve_rx_alloc_ring(priv, i);
224 		if (err) {
225 			netif_err(priv, drv, priv->dev,
226 				  "Failed to alloc rx ring=%d: err=%d\n",
227 				  i, err);
228 			break;
229 		}
230 	}
231 	/* Unallocate if there was an error */
232 	if (err) {
233 		int j;
234 
235 		for (j = 0; j < i; j++)
236 			gve_rx_free_ring(priv, j);
237 	}
238 	return err;
239 }
240 
241 void gve_rx_free_rings_gqi(struct gve_priv *priv)
242 {
243 	int i;
244 
245 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
246 		gve_rx_free_ring(priv, i);
247 }
248 
249 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
250 {
251 	u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
252 
253 	iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
254 }
255 
256 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
257 {
258 	if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
259 		return PKT_HASH_TYPE_L4;
260 	if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
261 		return PKT_HASH_TYPE_L3;
262 	return PKT_HASH_TYPE_L2;
263 }
264 
265 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
266 					struct gve_rx_slot_page_info *page_info,
267 					u16 len)
268 {
269 	struct sk_buff *skb = napi_get_frags(napi);
270 
271 	if (unlikely(!skb))
272 		return NULL;
273 
274 	skb_add_rx_frag(skb, 0, page_info->page,
275 			page_info->page_offset +
276 			GVE_RX_PAD, len, PAGE_SIZE / 2);
277 
278 	return skb;
279 }
280 
281 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
282 {
283 	const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
284 
285 	/* "flip" to other packet buffer on this page */
286 	page_info->page_offset ^= PAGE_SIZE / 2;
287 	*(slot_addr) ^= offset;
288 }
289 
290 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
291 {
292 	return PAGE_SIZE == 4096
293 		? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
294 }
295 
296 static int gve_rx_can_recycle_buffer(struct page *page)
297 {
298 	int pagecount = page_count(page);
299 
300 	/* This page is not being used by any SKBs - reuse */
301 	if (pagecount == 1)
302 		return 1;
303 	/* This page is still being used by an SKB - we can't reuse */
304 	else if (pagecount >= 2)
305 		return 0;
306 	WARN(pagecount < 1, "Pagecount should never be < 1");
307 	return -1;
308 }
309 
310 static struct sk_buff *
311 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
312 		      struct gve_rx_slot_page_info *page_info, u16 len,
313 		      struct napi_struct *napi,
314 		      union gve_rx_data_slot *data_slot)
315 {
316 	struct sk_buff *skb;
317 
318 	skb = gve_rx_add_frags(napi, page_info, len);
319 	if (!skb)
320 		return NULL;
321 
322 	/* Optimistically stop the kernel from freeing the page by increasing
323 	 * the page bias. We will check the refcount in refill to determine if
324 	 * we need to alloc a new page.
325 	 */
326 	get_page(page_info->page);
327 
328 	return skb;
329 }
330 
331 static struct sk_buff *
332 gve_rx_qpl(struct device *dev, struct net_device *netdev,
333 	   struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
334 	   u16 len, struct napi_struct *napi,
335 	   union gve_rx_data_slot *data_slot)
336 {
337 	struct sk_buff *skb;
338 
339 	/* if raw_addressing mode is not enabled gvnic can only receive into
340 	 * registered segments. If the buffer can't be recycled, our only
341 	 * choice is to copy the data out of it so that we can return it to the
342 	 * device.
343 	 */
344 	if (page_info->can_flip) {
345 		skb = gve_rx_add_frags(napi, page_info, len);
346 		/* No point in recycling if we didn't get the skb */
347 		if (skb) {
348 			/* Make sure that the page isn't freed. */
349 			get_page(page_info->page);
350 			gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
351 		}
352 	} else {
353 		skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
354 		if (skb) {
355 			u64_stats_update_begin(&rx->statss);
356 			rx->rx_copied_pkt++;
357 			u64_stats_update_end(&rx->statss);
358 		}
359 	}
360 	return skb;
361 }
362 
363 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
364 		   netdev_features_t feat, u32 idx)
365 {
366 	struct gve_rx_slot_page_info *page_info;
367 	struct gve_priv *priv = rx->gve;
368 	struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
369 	struct net_device *dev = priv->dev;
370 	union gve_rx_data_slot *data_slot;
371 	struct sk_buff *skb = NULL;
372 	dma_addr_t page_bus;
373 	u16 len;
374 
375 	/* drop this packet */
376 	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
377 		u64_stats_update_begin(&rx->statss);
378 		rx->rx_desc_err_dropped_pkt++;
379 		u64_stats_update_end(&rx->statss);
380 		return false;
381 	}
382 
383 	len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
384 	page_info = &rx->data.page_info[idx];
385 
386 	data_slot = &rx->data.data_ring[idx];
387 	page_bus = (rx->data.raw_addressing) ?
388 			be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
389 			rx->data.qpl->page_buses[idx];
390 	dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
391 				PAGE_SIZE, DMA_FROM_DEVICE);
392 
393 	if (len <= priv->rx_copybreak) {
394 		/* Just copy small packets */
395 		skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
396 		u64_stats_update_begin(&rx->statss);
397 		rx->rx_copied_pkt++;
398 		rx->rx_copybreak_pkt++;
399 		u64_stats_update_end(&rx->statss);
400 	} else {
401 		u8 can_flip = gve_rx_can_flip_buffers(dev);
402 		int recycle = 0;
403 
404 		if (can_flip) {
405 			recycle = gve_rx_can_recycle_buffer(page_info->page);
406 			if (recycle < 0) {
407 				if (!rx->data.raw_addressing)
408 					gve_schedule_reset(priv);
409 				return false;
410 			}
411 		}
412 
413 		page_info->can_flip = can_flip && recycle;
414 		if (rx->data.raw_addressing) {
415 			skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
416 						    page_info, len, napi,
417 						    data_slot);
418 		} else {
419 			skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
420 					 page_info, len, napi, data_slot);
421 		}
422 	}
423 
424 	if (!skb) {
425 		u64_stats_update_begin(&rx->statss);
426 		rx->rx_skb_alloc_fail++;
427 		u64_stats_update_end(&rx->statss);
428 		return false;
429 	}
430 
431 	if (likely(feat & NETIF_F_RXCSUM)) {
432 		/* NIC passes up the partial sum */
433 		if (rx_desc->csum)
434 			skb->ip_summed = CHECKSUM_COMPLETE;
435 		else
436 			skb->ip_summed = CHECKSUM_NONE;
437 		skb->csum = csum_unfold(rx_desc->csum);
438 	}
439 
440 	/* parse flags & pass relevant info up */
441 	if (likely(feat & NETIF_F_RXHASH) &&
442 	    gve_needs_rss(rx_desc->flags_seq))
443 		skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
444 			     gve_rss_type(rx_desc->flags_seq));
445 
446 	if (skb_is_nonlinear(skb))
447 		napi_gro_frags(napi);
448 	else
449 		napi_gro_receive(napi, skb);
450 	return true;
451 }
452 
453 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
454 {
455 	struct gve_rx_desc *desc;
456 	__be16 flags_seq;
457 	u32 next_idx;
458 
459 	next_idx = rx->cnt & rx->mask;
460 	desc = rx->desc.desc_ring + next_idx;
461 
462 	flags_seq = desc->flags_seq;
463 	/* Make sure we have synchronized the seq no with the device */
464 	smp_rmb();
465 
466 	return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
467 }
468 
469 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
470 {
471 	int refill_target = rx->mask + 1;
472 	u32 fill_cnt = rx->fill_cnt;
473 
474 	while (fill_cnt - rx->cnt < refill_target) {
475 		struct gve_rx_slot_page_info *page_info;
476 		u32 idx = fill_cnt & rx->mask;
477 
478 		page_info = &rx->data.page_info[idx];
479 		if (page_info->can_flip) {
480 			/* The other half of the page is free because it was
481 			 * free when we processed the descriptor. Flip to it.
482 			 */
483 			union gve_rx_data_slot *data_slot =
484 						&rx->data.data_ring[idx];
485 
486 			gve_rx_flip_buff(page_info, &data_slot->addr);
487 			page_info->can_flip = 0;
488 		} else {
489 			/* It is possible that the networking stack has already
490 			 * finished processing all outstanding packets in the buffer
491 			 * and it can be reused.
492 			 * Flipping is unnecessary here - if the networking stack still
493 			 * owns half the page it is impossible to tell which half. Either
494 			 * the whole page is free or it needs to be replaced.
495 			 */
496 			int recycle = gve_rx_can_recycle_buffer(page_info->page);
497 
498 			if (recycle < 0) {
499 				if (!rx->data.raw_addressing)
500 					gve_schedule_reset(priv);
501 				return false;
502 			}
503 			if (!recycle) {
504 				/* We can't reuse the buffer - alloc a new one*/
505 				union gve_rx_data_slot *data_slot =
506 						&rx->data.data_ring[idx];
507 				struct device *dev = &priv->pdev->dev;
508 
509 				gve_rx_free_buffer(dev, page_info, data_slot);
510 				page_info->page = NULL;
511 				if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
512 					break;
513 			}
514 		}
515 		fill_cnt++;
516 	}
517 	rx->fill_cnt = fill_cnt;
518 	return true;
519 }
520 
521 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
522 		       netdev_features_t feat)
523 {
524 	struct gve_priv *priv = rx->gve;
525 	u32 work_done = 0, packets = 0;
526 	struct gve_rx_desc *desc;
527 	u32 cnt = rx->cnt;
528 	u32 idx = cnt & rx->mask;
529 	u64 bytes = 0;
530 
531 	desc = rx->desc.desc_ring + idx;
532 	while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
533 	       work_done < budget) {
534 		bool dropped;
535 
536 		netif_info(priv, rx_status, priv->dev,
537 			   "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
538 			   rx->q_num, idx, desc, desc->flags_seq);
539 		netif_info(priv, rx_status, priv->dev,
540 			   "[%d] seqno=%d rx->desc.seqno=%d\n",
541 			   rx->q_num, GVE_SEQNO(desc->flags_seq),
542 			   rx->desc.seqno);
543 		dropped = !gve_rx(rx, desc, feat, idx);
544 		if (!dropped) {
545 			bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
546 			packets++;
547 		}
548 		cnt++;
549 		idx = cnt & rx->mask;
550 		desc = rx->desc.desc_ring + idx;
551 		rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
552 		work_done++;
553 	}
554 
555 	if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
556 		return false;
557 
558 	u64_stats_update_begin(&rx->statss);
559 	rx->rpackets += packets;
560 	rx->rbytes += bytes;
561 	u64_stats_update_end(&rx->statss);
562 	rx->cnt = cnt;
563 
564 	/* restock ring slots */
565 	if (!rx->data.raw_addressing) {
566 		/* In QPL mode buffs are refilled as the desc are processed */
567 		rx->fill_cnt += work_done;
568 	} else if (rx->fill_cnt - cnt <= rx->db_threshold) {
569 		/* In raw addressing mode buffs are only refilled if the avail
570 		 * falls below a threshold.
571 		 */
572 		if (!gve_rx_refill_buffers(priv, rx))
573 			return false;
574 
575 		/* If we were not able to completely refill buffers, we'll want
576 		 * to schedule this queue for work again to refill buffers.
577 		 */
578 		if (rx->fill_cnt - cnt <= rx->db_threshold) {
579 			gve_rx_write_doorbell(priv, rx);
580 			return true;
581 		}
582 	}
583 
584 	gve_rx_write_doorbell(priv, rx);
585 	return gve_rx_work_pending(rx);
586 }
587 
588 bool gve_rx_poll(struct gve_notify_block *block, int budget)
589 {
590 	struct gve_rx_ring *rx = block->rx;
591 	netdev_features_t feat;
592 	bool repoll = false;
593 
594 	feat = block->napi.dev->features;
595 
596 	/* If budget is 0, do all the work */
597 	if (budget == 0)
598 		budget = INT_MAX;
599 
600 	if (budget > 0)
601 		repoll |= gve_clean_rx_done(rx, budget, feat);
602 	else
603 		repoll |= gve_rx_work_pending(rx);
604 	return repoll;
605 }
606