1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18
gve_buf_ref_cnt(struct gve_rx_buf_state_dqo * bs)19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21 return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23
gve_free_page_dqo(struct gve_priv * priv,struct gve_rx_buf_state_dqo * bs,bool free_page)24 static void gve_free_page_dqo(struct gve_priv *priv,
25 struct gve_rx_buf_state_dqo *bs,
26 bool free_page)
27 {
28 page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
29 if (free_page)
30 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
31 DMA_FROM_DEVICE);
32 bs->page_info.page = NULL;
33 }
34
gve_alloc_buf_state(struct gve_rx_ring * rx)35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36 {
37 struct gve_rx_buf_state_dqo *buf_state;
38 s16 buffer_id;
39
40 buffer_id = rx->dqo.free_buf_states;
41 if (unlikely(buffer_id == -1))
42 return NULL;
43
44 buf_state = &rx->dqo.buf_states[buffer_id];
45
46 /* Remove buf_state from free list */
47 rx->dqo.free_buf_states = buf_state->next;
48
49 /* Point buf_state to itself to mark it as allocated */
50 buf_state->next = buffer_id;
51
52 return buf_state;
53 }
54
gve_buf_state_is_allocated(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56 struct gve_rx_buf_state_dqo *buf_state)
57 {
58 s16 buffer_id = buf_state - rx->dqo.buf_states;
59
60 return buf_state->next == buffer_id;
61 }
62
gve_free_buf_state(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)63 static void gve_free_buf_state(struct gve_rx_ring *rx,
64 struct gve_rx_buf_state_dqo *buf_state)
65 {
66 s16 buffer_id = buf_state - rx->dqo.buf_states;
67
68 buf_state->next = rx->dqo.free_buf_states;
69 rx->dqo.free_buf_states = buffer_id;
70 }
71
72 static struct gve_rx_buf_state_dqo *
gve_dequeue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list)73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74 {
75 struct gve_rx_buf_state_dqo *buf_state;
76 s16 buffer_id;
77
78 buffer_id = list->head;
79 if (unlikely(buffer_id == -1))
80 return NULL;
81
82 buf_state = &rx->dqo.buf_states[buffer_id];
83
84 /* Remove buf_state from list */
85 list->head = buf_state->next;
86 if (buf_state->next == -1)
87 list->tail = -1;
88
89 /* Point buf_state to itself to mark it as allocated */
90 buf_state->next = buffer_id;
91
92 return buf_state;
93 }
94
gve_enqueue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list,struct gve_rx_buf_state_dqo * buf_state)95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96 struct gve_index_list *list,
97 struct gve_rx_buf_state_dqo *buf_state)
98 {
99 s16 buffer_id = buf_state - rx->dqo.buf_states;
100
101 buf_state->next = -1;
102
103 if (list->head == -1) {
104 list->head = buffer_id;
105 list->tail = buffer_id;
106 } else {
107 int tail = list->tail;
108
109 rx->dqo.buf_states[tail].next = buffer_id;
110 list->tail = buffer_id;
111 }
112 }
113
114 static struct gve_rx_buf_state_dqo *
gve_get_recycled_buf_state(struct gve_rx_ring * rx)115 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116 {
117 struct gve_rx_buf_state_dqo *buf_state;
118 int i;
119
120 /* Recycled buf states are immediately usable. */
121 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
122 if (likely(buf_state))
123 return buf_state;
124
125 if (unlikely(rx->dqo.used_buf_states.head == -1))
126 return NULL;
127
128 /* Used buf states are only usable when ref count reaches 0, which means
129 * no SKBs refer to them.
130 *
131 * Search a limited number before giving up.
132 */
133 for (i = 0; i < 5; i++) {
134 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
135 if (gve_buf_ref_cnt(buf_state) == 0) {
136 rx->dqo.used_buf_states_cnt--;
137 return buf_state;
138 }
139
140 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
141 }
142
143 /* For QPL, we cannot allocate any new buffers and must
144 * wait for the existing ones to be available.
145 */
146 if (rx->dqo.qpl)
147 return NULL;
148
149 /* If there are no free buf states discard an entry from
150 * `used_buf_states` so it can be used.
151 */
152 if (unlikely(rx->dqo.free_buf_states == -1)) {
153 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
154 if (gve_buf_ref_cnt(buf_state) == 0)
155 return buf_state;
156
157 gve_free_page_dqo(rx->gve, buf_state, true);
158 gve_free_buf_state(rx, buf_state);
159 }
160
161 return NULL;
162 }
163
gve_alloc_page_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165 struct gve_rx_buf_state_dqo *buf_state)
166 {
167 struct gve_priv *priv = rx->gve;
168 u32 idx;
169
170 if (!rx->dqo.qpl) {
171 int err;
172
173 err = gve_alloc_page(priv, &priv->pdev->dev,
174 &buf_state->page_info.page,
175 &buf_state->addr,
176 DMA_FROM_DEVICE, GFP_ATOMIC);
177 if (err)
178 return err;
179 } else {
180 idx = rx->dqo.next_qpl_page_idx;
181 if (idx >= priv->rx_pages_per_qpl) {
182 net_err_ratelimited("%s: Out of QPL pages\n",
183 priv->dev->name);
184 return -ENOMEM;
185 }
186 buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187 buf_state->addr = rx->dqo.qpl->page_buses[idx];
188 rx->dqo.next_qpl_page_idx++;
189 }
190 buf_state->page_info.page_offset = 0;
191 buf_state->page_info.page_address =
192 page_address(buf_state->page_info.page);
193 buf_state->last_single_ref_offset = 0;
194
195 /* The page already has 1 ref. */
196 page_ref_add(buf_state->page_info.page, INT_MAX - 1);
197 buf_state->page_info.pagecnt_bias = INT_MAX;
198
199 return 0;
200 }
201
gve_rx_free_ring_dqo(struct gve_priv * priv,int idx)202 static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
203 {
204 struct gve_rx_ring *rx = &priv->rx[idx];
205 struct device *hdev = &priv->pdev->dev;
206 size_t completion_queue_slots;
207 size_t buffer_queue_slots;
208 size_t size;
209 int i;
210
211 completion_queue_slots = rx->dqo.complq.mask + 1;
212 buffer_queue_slots = rx->dqo.bufq.mask + 1;
213
214 gve_rx_remove_from_block(priv, idx);
215
216 if (rx->q_resources) {
217 dma_free_coherent(hdev, sizeof(*rx->q_resources),
218 rx->q_resources, rx->q_resources_bus);
219 rx->q_resources = NULL;
220 }
221
222 for (i = 0; i < rx->dqo.num_buf_states; i++) {
223 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
224 /* Only free page for RDA. QPL pages are freed in gve_main. */
225 if (bs->page_info.page)
226 gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
227 }
228 if (rx->dqo.qpl) {
229 gve_unassign_qpl(priv, rx->dqo.qpl->id);
230 rx->dqo.qpl = NULL;
231 }
232
233 if (rx->dqo.bufq.desc_ring) {
234 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
235 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
236 rx->dqo.bufq.bus);
237 rx->dqo.bufq.desc_ring = NULL;
238 }
239
240 if (rx->dqo.complq.desc_ring) {
241 size = sizeof(rx->dqo.complq.desc_ring[0]) *
242 completion_queue_slots;
243 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
244 rx->dqo.complq.bus);
245 rx->dqo.complq.desc_ring = NULL;
246 }
247
248 kvfree(rx->dqo.buf_states);
249 rx->dqo.buf_states = NULL;
250
251 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
252 }
253
gve_rx_alloc_ring_dqo(struct gve_priv * priv,int idx)254 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
255 {
256 struct gve_rx_ring *rx = &priv->rx[idx];
257 struct device *hdev = &priv->pdev->dev;
258 size_t size;
259 int i;
260
261 const u32 buffer_queue_slots =
262 priv->queue_format == GVE_DQO_RDA_FORMAT ?
263 priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt;
264 const u32 completion_queue_slots = priv->rx_desc_cnt;
265
266 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
267
268 memset(rx, 0, sizeof(*rx));
269 rx->gve = priv;
270 rx->q_num = idx;
271 rx->dqo.bufq.mask = buffer_queue_slots - 1;
272 rx->dqo.complq.num_free_slots = completion_queue_slots;
273 rx->dqo.complq.mask = completion_queue_slots - 1;
274 rx->ctx.skb_head = NULL;
275 rx->ctx.skb_tail = NULL;
276
277 rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ?
278 min_t(s16, S16_MAX, buffer_queue_slots * 4) :
279 priv->rx_pages_per_qpl;
280 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
281 sizeof(rx->dqo.buf_states[0]),
282 GFP_KERNEL);
283 if (!rx->dqo.buf_states)
284 return -ENOMEM;
285
286 /* Set up linked list of buffer IDs */
287 for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
288 rx->dqo.buf_states[i].next = i + 1;
289
290 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
291 rx->dqo.recycled_buf_states.head = -1;
292 rx->dqo.recycled_buf_states.tail = -1;
293 rx->dqo.used_buf_states.head = -1;
294 rx->dqo.used_buf_states.tail = -1;
295
296 /* Allocate RX completion queue */
297 size = sizeof(rx->dqo.complq.desc_ring[0]) *
298 completion_queue_slots;
299 rx->dqo.complq.desc_ring =
300 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
301 if (!rx->dqo.complq.desc_ring)
302 goto err;
303
304 /* Allocate RX buffer queue */
305 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
306 rx->dqo.bufq.desc_ring =
307 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
308 if (!rx->dqo.bufq.desc_ring)
309 goto err;
310
311 if (priv->queue_format != GVE_DQO_RDA_FORMAT) {
312 rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num);
313 if (!rx->dqo.qpl)
314 goto err;
315 rx->dqo.next_qpl_page_idx = 0;
316 }
317
318 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
319 &rx->q_resources_bus, GFP_KERNEL);
320 if (!rx->q_resources)
321 goto err;
322
323 gve_rx_add_to_block(priv, idx);
324
325 return 0;
326
327 err:
328 gve_rx_free_ring_dqo(priv, idx);
329 return -ENOMEM;
330 }
331
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)332 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
333 {
334 const struct gve_rx_ring *rx = &priv->rx[queue_idx];
335 u64 index = be32_to_cpu(rx->q_resources->db_index);
336
337 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
338 }
339
gve_rx_alloc_rings_dqo(struct gve_priv * priv)340 int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
341 {
342 int err = 0;
343 int i;
344
345 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
346 err = gve_rx_alloc_ring_dqo(priv, i);
347 if (err) {
348 netif_err(priv, drv, priv->dev,
349 "Failed to alloc rx ring=%d: err=%d\n",
350 i, err);
351 goto err;
352 }
353 }
354
355 return 0;
356
357 err:
358 for (i--; i >= 0; i--)
359 gve_rx_free_ring_dqo(priv, i);
360
361 return err;
362 }
363
gve_rx_free_rings_dqo(struct gve_priv * priv)364 void gve_rx_free_rings_dqo(struct gve_priv *priv)
365 {
366 int i;
367
368 for (i = 0; i < priv->rx_cfg.num_queues; i++)
369 gve_rx_free_ring_dqo(priv, i);
370 }
371
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)372 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
373 {
374 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
375 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
376 struct gve_priv *priv = rx->gve;
377 u32 num_avail_slots;
378 u32 num_full_slots;
379 u32 num_posted = 0;
380
381 num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
382 num_avail_slots = bufq->mask - num_full_slots;
383
384 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
385 while (num_posted < num_avail_slots) {
386 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
387 struct gve_rx_buf_state_dqo *buf_state;
388
389 buf_state = gve_get_recycled_buf_state(rx);
390 if (unlikely(!buf_state)) {
391 buf_state = gve_alloc_buf_state(rx);
392 if (unlikely(!buf_state))
393 break;
394
395 if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
396 u64_stats_update_begin(&rx->statss);
397 rx->rx_buf_alloc_fail++;
398 u64_stats_update_end(&rx->statss);
399 gve_free_buf_state(rx, buf_state);
400 break;
401 }
402 }
403
404 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
405 desc->buf_addr = cpu_to_le64(buf_state->addr +
406 buf_state->page_info.page_offset);
407
408 bufq->tail = (bufq->tail + 1) & bufq->mask;
409 complq->num_free_slots--;
410 num_posted++;
411
412 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
413 gve_rx_write_doorbell_dqo(priv, rx->q_num);
414 }
415
416 rx->fill_cnt += num_posted;
417 }
418
gve_try_recycle_buf(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)419 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
420 struct gve_rx_buf_state_dqo *buf_state)
421 {
422 const int data_buffer_size = priv->data_buffer_size_dqo;
423 int pagecount;
424
425 /* Can't reuse if we only fit one buffer per page */
426 if (data_buffer_size * 2 > PAGE_SIZE)
427 goto mark_used;
428
429 pagecount = gve_buf_ref_cnt(buf_state);
430
431 /* Record the offset when we have a single remaining reference.
432 *
433 * When this happens, we know all of the other offsets of the page are
434 * usable.
435 */
436 if (pagecount == 1) {
437 buf_state->last_single_ref_offset =
438 buf_state->page_info.page_offset;
439 }
440
441 /* Use the next buffer sized chunk in the page. */
442 buf_state->page_info.page_offset += data_buffer_size;
443 buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
444
445 /* If we wrap around to the same offset without ever dropping to 1
446 * reference, then we don't know if this offset was ever freed.
447 */
448 if (buf_state->page_info.page_offset ==
449 buf_state->last_single_ref_offset) {
450 goto mark_used;
451 }
452
453 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
454 return;
455
456 mark_used:
457 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
458 rx->dqo.used_buf_states_cnt++;
459 }
460
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)461 static void gve_rx_skb_csum(struct sk_buff *skb,
462 const struct gve_rx_compl_desc_dqo *desc,
463 struct gve_ptype ptype)
464 {
465 skb->ip_summed = CHECKSUM_NONE;
466
467 /* HW did not identify and process L3 and L4 headers. */
468 if (unlikely(!desc->l3_l4_processed))
469 return;
470
471 if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
472 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
473 return;
474 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
475 /* Checksum should be skipped if this flag is set. */
476 if (unlikely(desc->ipv6_ex_add))
477 return;
478 }
479
480 if (unlikely(desc->csum_l4_err))
481 return;
482
483 switch (ptype.l4_type) {
484 case GVE_L4_TYPE_TCP:
485 case GVE_L4_TYPE_UDP:
486 case GVE_L4_TYPE_ICMP:
487 case GVE_L4_TYPE_SCTP:
488 skb->ip_summed = CHECKSUM_UNNECESSARY;
489 break;
490 default:
491 break;
492 }
493 }
494
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)495 static void gve_rx_skb_hash(struct sk_buff *skb,
496 const struct gve_rx_compl_desc_dqo *compl_desc,
497 struct gve_ptype ptype)
498 {
499 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
500
501 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
502 hash_type = PKT_HASH_TYPE_L4;
503 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
504 hash_type = PKT_HASH_TYPE_L3;
505
506 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
507 }
508
gve_rx_free_skb(struct napi_struct * napi,struct gve_rx_ring * rx)509 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
510 {
511 if (!rx->ctx.skb_head)
512 return;
513
514 if (rx->ctx.skb_head == napi->skb)
515 napi->skb = NULL;
516 dev_kfree_skb_any(rx->ctx.skb_head);
517 rx->ctx.skb_head = NULL;
518 rx->ctx.skb_tail = NULL;
519 }
520
gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring * rx)521 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
522 {
523 if (!rx->dqo.qpl)
524 return false;
525 if (rx->dqo.used_buf_states_cnt <
526 (rx->dqo.num_buf_states -
527 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
528 return false;
529 return true;
530 }
531
gve_rx_copy_ondemand(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len)532 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
533 struct gve_rx_buf_state_dqo *buf_state,
534 u16 buf_len)
535 {
536 struct page *page = alloc_page(GFP_ATOMIC);
537 int num_frags;
538
539 if (!page)
540 return -ENOMEM;
541
542 memcpy(page_address(page),
543 buf_state->page_info.page_address +
544 buf_state->page_info.page_offset,
545 buf_len);
546 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
547 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
548 0, buf_len, PAGE_SIZE);
549
550 u64_stats_update_begin(&rx->statss);
551 rx->rx_frag_alloc_cnt++;
552 u64_stats_update_end(&rx->statss);
553 /* Return unused buffer. */
554 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
555 return 0;
556 }
557
558 /* Chains multi skbs for single rx packet.
559 * Returns 0 if buffer is appended, -1 otherwise.
560 */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)561 static int gve_rx_append_frags(struct napi_struct *napi,
562 struct gve_rx_buf_state_dqo *buf_state,
563 u16 buf_len, struct gve_rx_ring *rx,
564 struct gve_priv *priv)
565 {
566 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
567
568 if (unlikely(num_frags == MAX_SKB_FRAGS)) {
569 struct sk_buff *skb;
570
571 skb = napi_alloc_skb(napi, 0);
572 if (!skb)
573 return -1;
574
575 if (rx->ctx.skb_tail == rx->ctx.skb_head)
576 skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
577 else
578 rx->ctx.skb_tail->next = skb;
579 rx->ctx.skb_tail = skb;
580 num_frags = 0;
581 }
582 if (rx->ctx.skb_tail != rx->ctx.skb_head) {
583 rx->ctx.skb_head->len += buf_len;
584 rx->ctx.skb_head->data_len += buf_len;
585 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
586 }
587
588 /* Trigger ondemand page allocation if we are running low on buffers */
589 if (gve_rx_should_trigger_copy_ondemand(rx))
590 return gve_rx_copy_ondemand(rx, buf_state, buf_len);
591
592 skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
593 buf_state->page_info.page,
594 buf_state->page_info.page_offset,
595 buf_len, priv->data_buffer_size_dqo);
596 gve_dec_pagecnt_bias(&buf_state->page_info);
597
598 /* Advances buffer page-offset if page is partially used.
599 * Marks buffer as used if page is full.
600 */
601 gve_try_recycle_buf(priv, rx, buf_state);
602 return 0;
603 }
604
605 /* Returns 0 if descriptor is completed successfully.
606 * Returns -EINVAL if descriptor is invalid.
607 * Returns -ENOMEM if data cannot be copied to skb.
608 */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,int queue_idx)609 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
610 const struct gve_rx_compl_desc_dqo *compl_desc,
611 int queue_idx)
612 {
613 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
614 const bool eop = compl_desc->end_of_packet != 0;
615 struct gve_rx_buf_state_dqo *buf_state;
616 struct gve_priv *priv = rx->gve;
617 u16 buf_len;
618
619 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
620 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
621 priv->dev->name, buffer_id);
622 return -EINVAL;
623 }
624 buf_state = &rx->dqo.buf_states[buffer_id];
625 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
626 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
627 priv->dev->name, buffer_id);
628 return -EINVAL;
629 }
630
631 if (unlikely(compl_desc->rx_error)) {
632 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
633 buf_state);
634 return -EINVAL;
635 }
636
637 buf_len = compl_desc->packet_len;
638
639 /* Page might have not been used for awhile and was likely last written
640 * by a different thread.
641 */
642 prefetch(buf_state->page_info.page);
643
644 /* Sync the portion of dma buffer for CPU to read. */
645 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
646 buf_state->page_info.page_offset,
647 buf_len, DMA_FROM_DEVICE);
648
649 /* Append to current skb if one exists. */
650 if (rx->ctx.skb_head) {
651 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
652 priv)) != 0) {
653 goto error;
654 }
655 return 0;
656 }
657
658 if (eop && buf_len <= priv->rx_copybreak) {
659 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
660 &buf_state->page_info, buf_len);
661 if (unlikely(!rx->ctx.skb_head))
662 goto error;
663 rx->ctx.skb_tail = rx->ctx.skb_head;
664
665 u64_stats_update_begin(&rx->statss);
666 rx->rx_copied_pkt++;
667 rx->rx_copybreak_pkt++;
668 u64_stats_update_end(&rx->statss);
669
670 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
671 buf_state);
672 return 0;
673 }
674
675 rx->ctx.skb_head = napi_get_frags(napi);
676 if (unlikely(!rx->ctx.skb_head))
677 goto error;
678 rx->ctx.skb_tail = rx->ctx.skb_head;
679
680 if (gve_rx_should_trigger_copy_ondemand(rx)) {
681 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
682 goto error;
683 return 0;
684 }
685
686 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
687 buf_state->page_info.page_offset, buf_len,
688 priv->data_buffer_size_dqo);
689 gve_dec_pagecnt_bias(&buf_state->page_info);
690
691 gve_try_recycle_buf(priv, rx, buf_state);
692 return 0;
693
694 error:
695 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
696 return -ENOMEM;
697 }
698
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)699 static int gve_rx_complete_rsc(struct sk_buff *skb,
700 const struct gve_rx_compl_desc_dqo *desc,
701 struct gve_ptype ptype)
702 {
703 struct skb_shared_info *shinfo = skb_shinfo(skb);
704
705 /* Only TCP is supported right now. */
706 if (ptype.l4_type != GVE_L4_TYPE_TCP)
707 return -EINVAL;
708
709 switch (ptype.l3_type) {
710 case GVE_L3_TYPE_IPV4:
711 shinfo->gso_type = SKB_GSO_TCPV4;
712 break;
713 case GVE_L3_TYPE_IPV6:
714 shinfo->gso_type = SKB_GSO_TCPV6;
715 break;
716 default:
717 return -EINVAL;
718 }
719
720 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
721 return 0;
722 }
723
724 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)725 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
726 const struct gve_rx_compl_desc_dqo *desc,
727 netdev_features_t feat)
728 {
729 struct gve_ptype ptype =
730 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
731 int err;
732
733 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
734
735 if (feat & NETIF_F_RXHASH)
736 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
737
738 if (feat & NETIF_F_RXCSUM)
739 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
740
741 /* RSC packets must set gso_size otherwise the TCP stack will complain
742 * that packets are larger than MTU.
743 */
744 if (desc->rsc) {
745 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
746 if (err < 0)
747 return err;
748 }
749
750 if (skb_headlen(rx->ctx.skb_head) == 0)
751 napi_gro_frags(napi);
752 else
753 napi_gro_receive(napi, rx->ctx.skb_head);
754
755 return 0;
756 }
757
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)758 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
759 {
760 struct napi_struct *napi = &block->napi;
761 netdev_features_t feat = napi->dev->features;
762
763 struct gve_rx_ring *rx = block->rx;
764 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
765
766 u32 work_done = 0;
767 u64 bytes = 0;
768 int err;
769
770 while (work_done < budget) {
771 struct gve_rx_compl_desc_dqo *compl_desc =
772 &complq->desc_ring[complq->head];
773 u32 pkt_bytes;
774
775 /* No more new packets */
776 if (compl_desc->generation == complq->cur_gen_bit)
777 break;
778
779 /* Prefetch the next two descriptors. */
780 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
781 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
782
783 /* Do not read data until we own the descriptor */
784 dma_rmb();
785
786 err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
787 if (err < 0) {
788 gve_rx_free_skb(napi, rx);
789 u64_stats_update_begin(&rx->statss);
790 if (err == -ENOMEM)
791 rx->rx_skb_alloc_fail++;
792 else if (err == -EINVAL)
793 rx->rx_desc_err_dropped_pkt++;
794 u64_stats_update_end(&rx->statss);
795 }
796
797 complq->head = (complq->head + 1) & complq->mask;
798 complq->num_free_slots++;
799
800 /* When the ring wraps, the generation bit is flipped. */
801 complq->cur_gen_bit ^= (complq->head == 0);
802
803 /* Receiving a completion means we have space to post another
804 * buffer on the buffer queue.
805 */
806 {
807 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
808
809 bufq->head = (bufq->head + 1) & bufq->mask;
810 }
811
812 /* Free running counter of completed descriptors */
813 rx->cnt++;
814
815 if (!rx->ctx.skb_head)
816 continue;
817
818 if (!compl_desc->end_of_packet)
819 continue;
820
821 work_done++;
822 pkt_bytes = rx->ctx.skb_head->len;
823 /* The ethernet header (first ETH_HLEN bytes) is snipped off
824 * by eth_type_trans.
825 */
826 if (skb_headlen(rx->ctx.skb_head))
827 pkt_bytes += ETH_HLEN;
828
829 /* gve_rx_complete_skb() will consume skb if successful */
830 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
831 gve_rx_free_skb(napi, rx);
832 u64_stats_update_begin(&rx->statss);
833 rx->rx_desc_err_dropped_pkt++;
834 u64_stats_update_end(&rx->statss);
835 continue;
836 }
837
838 bytes += pkt_bytes;
839 rx->ctx.skb_head = NULL;
840 rx->ctx.skb_tail = NULL;
841 }
842
843 gve_rx_post_buffers_dqo(rx);
844
845 u64_stats_update_begin(&rx->statss);
846 rx->rpackets += work_done;
847 rx->rbytes += bytes;
848 u64_stats_update_end(&rx->statss);
849
850 return work_done;
851 }
852