xref: /openbmc/linux/drivers/net/ethernet/cavium/thunder/nicvf_queues.c (revision f43e47c090dc7fe32d5410d8740c3a004eb2676f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Cavium, Inc.
4  */
5 
6 #include <linux/pci.h>
7 #include <linux/netdevice.h>
8 #include <linux/ip.h>
9 #include <linux/etherdevice.h>
10 #include <linux/iommu.h>
11 #include <net/ip.h>
12 #include <net/tso.h>
13 #include <uapi/linux/bpf.h>
14 
15 #include "nic_reg.h"
16 #include "nic.h"
17 #include "q_struct.h"
18 #include "nicvf_queues.h"
19 
20 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
21 					       int size, u64 data);
22 static void nicvf_get_page(struct nicvf *nic)
23 {
24 	if (!nic->rb_pageref || !nic->rb_page)
25 		return;
26 
27 	page_ref_add(nic->rb_page, nic->rb_pageref);
28 	nic->rb_pageref = 0;
29 }
30 
31 /* Poll a register for a specific value */
32 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
33 			  u64 reg, int bit_pos, int bits, int val)
34 {
35 	u64 bit_mask;
36 	u64 reg_val;
37 	int timeout = 10;
38 
39 	bit_mask = (1ULL << bits) - 1;
40 	bit_mask = (bit_mask << bit_pos);
41 
42 	while (timeout) {
43 		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
44 		if (((reg_val & bit_mask) >> bit_pos) == val)
45 			return 0;
46 		usleep_range(1000, 2000);
47 		timeout--;
48 	}
49 	netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
50 	return 1;
51 }
52 
53 /* Allocate memory for a queue's descriptors */
54 static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
55 				  int q_len, int desc_size, int align_bytes)
56 {
57 	dmem->q_len = q_len;
58 	dmem->size = (desc_size * q_len) + align_bytes;
59 	/* Save address, need it while freeing */
60 	dmem->unalign_base = dma_alloc_coherent(&nic->pdev->dev, dmem->size,
61 						&dmem->dma, GFP_KERNEL);
62 	if (!dmem->unalign_base)
63 		return -ENOMEM;
64 
65 	/* Align memory address for 'align_bytes' */
66 	dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
67 	dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
68 	return 0;
69 }
70 
71 /* Free queue's descriptor memory */
72 static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
73 {
74 	if (!dmem)
75 		return;
76 
77 	dma_free_coherent(&nic->pdev->dev, dmem->size,
78 			  dmem->unalign_base, dmem->dma);
79 	dmem->unalign_base = NULL;
80 	dmem->base = NULL;
81 }
82 
83 #define XDP_PAGE_REFCNT_REFILL 256
84 
85 /* Allocate a new page or recycle one if possible
86  *
87  * We cannot optimize dma mapping here, since
88  * 1. It's only one RBDR ring for 8 Rx queues.
89  * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
90  *    and not idx into RBDR ring, so can't refer to saved info.
91  * 3. There are multiple receive buffers per page
92  */
93 static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
94 					       struct rbdr *rbdr, gfp_t gfp)
95 {
96 	int ref_count;
97 	struct page *page = NULL;
98 	struct pgcache *pgcache, *next;
99 
100 	/* Check if page is already allocated */
101 	pgcache = &rbdr->pgcache[rbdr->pgidx];
102 	page = pgcache->page;
103 	/* Check if page can be recycled */
104 	if (page) {
105 		ref_count = page_ref_count(page);
106 		/* This page can be recycled if internal ref_count and page's
107 		 * ref_count are equal, indicating that the page has been used
108 		 * once for packet transmission. For non-XDP mode, internal
109 		 * ref_count is always '1'.
110 		 */
111 		if (rbdr->is_xdp) {
112 			if (ref_count == pgcache->ref_count)
113 				pgcache->ref_count--;
114 			else
115 				page = NULL;
116 		} else if (ref_count != 1) {
117 			page = NULL;
118 		}
119 	}
120 
121 	if (!page) {
122 		page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
123 		if (!page)
124 			return NULL;
125 
126 		this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
127 
128 		/* Check for space */
129 		if (rbdr->pgalloc >= rbdr->pgcnt) {
130 			/* Page can still be used */
131 			nic->rb_page = page;
132 			return NULL;
133 		}
134 
135 		/* Save the page in page cache */
136 		pgcache->page = page;
137 		pgcache->dma_addr = 0;
138 		pgcache->ref_count = 0;
139 		rbdr->pgalloc++;
140 	}
141 
142 	/* Take additional page references for recycling */
143 	if (rbdr->is_xdp) {
144 		/* Since there is single RBDR (i.e single core doing
145 		 * page recycling) per 8 Rx queues, in XDP mode adjusting
146 		 * page references atomically is the biggest bottleneck, so
147 		 * take bunch of references at a time.
148 		 *
149 		 * So here, below reference counts defer by '1'.
150 		 */
151 		if (!pgcache->ref_count) {
152 			pgcache->ref_count = XDP_PAGE_REFCNT_REFILL;
153 			page_ref_add(page, XDP_PAGE_REFCNT_REFILL);
154 		}
155 	} else {
156 		/* In non-XDP case, single 64K page is divided across multiple
157 		 * receive buffers, so cost of recycling is less anyway.
158 		 * So we can do with just one extra reference.
159 		 */
160 		page_ref_add(page, 1);
161 	}
162 
163 	rbdr->pgidx++;
164 	rbdr->pgidx &= (rbdr->pgcnt - 1);
165 
166 	/* Prefetch refcount of next page in page cache */
167 	next = &rbdr->pgcache[rbdr->pgidx];
168 	page = next->page;
169 	if (page)
170 		prefetch(&page->_refcount);
171 
172 	return pgcache;
173 }
174 
175 /* Allocate buffer for packet reception */
176 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
177 					 gfp_t gfp, u32 buf_len, u64 *rbuf)
178 {
179 	struct pgcache *pgcache = NULL;
180 
181 	/* Check if request can be accomodated in previous allocated page.
182 	 * But in XDP mode only one buffer per page is permitted.
183 	 */
184 	if (!rbdr->is_xdp && nic->rb_page &&
185 	    ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
186 		nic->rb_pageref++;
187 		goto ret;
188 	}
189 
190 	nicvf_get_page(nic);
191 	nic->rb_page = NULL;
192 
193 	/* Get new page, either recycled or new one */
194 	pgcache = nicvf_alloc_page(nic, rbdr, gfp);
195 	if (!pgcache && !nic->rb_page) {
196 		this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
197 		return -ENOMEM;
198 	}
199 
200 	nic->rb_page_offset = 0;
201 
202 	/* Reserve space for header modifications by BPF program */
203 	if (rbdr->is_xdp)
204 		buf_len += XDP_PACKET_HEADROOM;
205 
206 	/* Check if it's recycled */
207 	if (pgcache)
208 		nic->rb_page = pgcache->page;
209 ret:
210 	if (rbdr->is_xdp && pgcache && pgcache->dma_addr) {
211 		*rbuf = pgcache->dma_addr;
212 	} else {
213 		/* HW will ensure data coherency, CPU sync not required */
214 		*rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
215 						nic->rb_page_offset, buf_len,
216 						DMA_FROM_DEVICE,
217 						DMA_ATTR_SKIP_CPU_SYNC);
218 		if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
219 			if (!nic->rb_page_offset)
220 				__free_pages(nic->rb_page, 0);
221 			nic->rb_page = NULL;
222 			return -ENOMEM;
223 		}
224 		if (pgcache)
225 			pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
226 		nic->rb_page_offset += buf_len;
227 	}
228 
229 	return 0;
230 }
231 
232 /* Build skb around receive buffer */
233 static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
234 					   u64 rb_ptr, int len)
235 {
236 	void *data;
237 	struct sk_buff *skb;
238 
239 	data = phys_to_virt(rb_ptr);
240 
241 	/* Now build an skb to give to stack */
242 	skb = build_skb(data, RCV_FRAG_LEN);
243 	if (!skb) {
244 		put_page(virt_to_page(data));
245 		return NULL;
246 	}
247 
248 	prefetch(skb->data);
249 	return skb;
250 }
251 
252 /* Allocate RBDR ring and populate receive buffers */
253 static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
254 			    int ring_len, int buf_size)
255 {
256 	int idx;
257 	u64 rbuf;
258 	struct rbdr_entry_t *desc;
259 	int err;
260 
261 	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
262 				     sizeof(struct rbdr_entry_t),
263 				     NICVF_RCV_BUF_ALIGN_BYTES);
264 	if (err)
265 		return err;
266 
267 	rbdr->desc = rbdr->dmem.base;
268 	/* Buffer size has to be in multiples of 128 bytes */
269 	rbdr->dma_size = buf_size;
270 	rbdr->enable = true;
271 	rbdr->thresh = RBDR_THRESH;
272 	rbdr->head = 0;
273 	rbdr->tail = 0;
274 
275 	/* Initialize page recycling stuff.
276 	 *
277 	 * Can't use single buffer per page especially with 64K pages.
278 	 * On embedded platforms i.e 81xx/83xx available memory itself
279 	 * is low and minimum ring size of RBDR is 8K, that takes away
280 	 * lots of memory.
281 	 *
282 	 * But for XDP it has to be a single buffer per page.
283 	 */
284 	if (!nic->pnicvf->xdp_prog) {
285 		rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
286 		rbdr->is_xdp = false;
287 	} else {
288 		rbdr->pgcnt = ring_len;
289 		rbdr->is_xdp = true;
290 	}
291 	rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
292 	rbdr->pgcache = kcalloc(rbdr->pgcnt, sizeof(*rbdr->pgcache),
293 				GFP_KERNEL);
294 	if (!rbdr->pgcache)
295 		return -ENOMEM;
296 	rbdr->pgidx = 0;
297 	rbdr->pgalloc = 0;
298 
299 	nic->rb_page = NULL;
300 	for (idx = 0; idx < ring_len; idx++) {
301 		err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
302 					     RCV_FRAG_LEN, &rbuf);
303 		if (err) {
304 			/* To free already allocated and mapped ones */
305 			rbdr->tail = idx - 1;
306 			return err;
307 		}
308 
309 		desc = GET_RBDR_DESC(rbdr, idx);
310 		desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
311 	}
312 
313 	nicvf_get_page(nic);
314 
315 	return 0;
316 }
317 
318 /* Free RBDR ring and its receive buffers */
319 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
320 {
321 	int head, tail;
322 	u64 buf_addr, phys_addr;
323 	struct pgcache *pgcache;
324 	struct rbdr_entry_t *desc;
325 
326 	if (!rbdr)
327 		return;
328 
329 	rbdr->enable = false;
330 	if (!rbdr->dmem.base)
331 		return;
332 
333 	head = rbdr->head;
334 	tail = rbdr->tail;
335 
336 	/* Release page references */
337 	while (head != tail) {
338 		desc = GET_RBDR_DESC(rbdr, head);
339 		buf_addr = desc->buf_addr;
340 		phys_addr = nicvf_iova_to_phys(nic, buf_addr);
341 		dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
342 				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
343 		if (phys_addr)
344 			put_page(virt_to_page(phys_to_virt(phys_addr)));
345 		head++;
346 		head &= (rbdr->dmem.q_len - 1);
347 	}
348 	/* Release buffer of tail desc */
349 	desc = GET_RBDR_DESC(rbdr, tail);
350 	buf_addr = desc->buf_addr;
351 	phys_addr = nicvf_iova_to_phys(nic, buf_addr);
352 	dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
353 			     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
354 	if (phys_addr)
355 		put_page(virt_to_page(phys_to_virt(phys_addr)));
356 
357 	/* Sync page cache info */
358 	smp_rmb();
359 
360 	/* Release additional page references held for recycling */
361 	head = 0;
362 	while (head < rbdr->pgcnt) {
363 		pgcache = &rbdr->pgcache[head];
364 		if (pgcache->page && page_ref_count(pgcache->page) != 0) {
365 			if (rbdr->is_xdp) {
366 				page_ref_sub(pgcache->page,
367 					     pgcache->ref_count - 1);
368 			}
369 			put_page(pgcache->page);
370 		}
371 		head++;
372 	}
373 
374 	/* Free RBDR ring */
375 	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
376 }
377 
378 /* Refill receive buffer descriptors with new buffers.
379  */
380 static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
381 {
382 	struct queue_set *qs = nic->qs;
383 	int rbdr_idx = qs->rbdr_cnt;
384 	int tail, qcount;
385 	int refill_rb_cnt;
386 	struct rbdr *rbdr;
387 	struct rbdr_entry_t *desc;
388 	u64 rbuf;
389 	int new_rb = 0;
390 
391 refill:
392 	if (!rbdr_idx)
393 		return;
394 	rbdr_idx--;
395 	rbdr = &qs->rbdr[rbdr_idx];
396 	/* Check if it's enabled */
397 	if (!rbdr->enable)
398 		goto next_rbdr;
399 
400 	/* Get no of desc's to be refilled */
401 	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
402 	qcount &= 0x7FFFF;
403 	/* Doorbell can be ringed with a max of ring size minus 1 */
404 	if (qcount >= (qs->rbdr_len - 1))
405 		goto next_rbdr;
406 	else
407 		refill_rb_cnt = qs->rbdr_len - qcount - 1;
408 
409 	/* Sync page cache info */
410 	smp_rmb();
411 
412 	/* Start filling descs from tail */
413 	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
414 	while (refill_rb_cnt) {
415 		tail++;
416 		tail &= (rbdr->dmem.q_len - 1);
417 
418 		if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
419 			break;
420 
421 		desc = GET_RBDR_DESC(rbdr, tail);
422 		desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
423 		refill_rb_cnt--;
424 		new_rb++;
425 	}
426 
427 	nicvf_get_page(nic);
428 
429 	/* make sure all memory stores are done before ringing doorbell */
430 	smp_wmb();
431 
432 	/* Check if buffer allocation failed */
433 	if (refill_rb_cnt)
434 		nic->rb_alloc_fail = true;
435 	else
436 		nic->rb_alloc_fail = false;
437 
438 	/* Notify HW */
439 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
440 			      rbdr_idx, new_rb);
441 next_rbdr:
442 	/* Re-enable RBDR interrupts only if buffer allocation is success */
443 	if (!nic->rb_alloc_fail && rbdr->enable &&
444 	    netif_running(nic->pnicvf->netdev))
445 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
446 
447 	if (rbdr_idx)
448 		goto refill;
449 }
450 
451 /* Alloc rcv buffers in non-atomic mode for better success */
452 void nicvf_rbdr_work(struct work_struct *work)
453 {
454 	struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
455 
456 	nicvf_refill_rbdr(nic, GFP_KERNEL);
457 	if (nic->rb_alloc_fail)
458 		schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
459 	else
460 		nic->rb_work_scheduled = false;
461 }
462 
463 /* In Softirq context, alloc rcv buffers in atomic mode */
464 void nicvf_rbdr_task(struct tasklet_struct *t)
465 {
466 	struct nicvf *nic = from_tasklet(nic, t, rbdr_task);
467 
468 	nicvf_refill_rbdr(nic, GFP_ATOMIC);
469 	if (nic->rb_alloc_fail) {
470 		nic->rb_work_scheduled = true;
471 		schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
472 	}
473 }
474 
475 /* Initialize completion queue */
476 static int nicvf_init_cmp_queue(struct nicvf *nic,
477 				struct cmp_queue *cq, int q_len)
478 {
479 	int err;
480 
481 	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
482 				     NICVF_CQ_BASE_ALIGN_BYTES);
483 	if (err)
484 		return err;
485 
486 	cq->desc = cq->dmem.base;
487 	cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
488 	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
489 
490 	return 0;
491 }
492 
493 static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
494 {
495 	if (!cq)
496 		return;
497 	if (!cq->dmem.base)
498 		return;
499 
500 	nicvf_free_q_desc_mem(nic, &cq->dmem);
501 }
502 
503 /* Initialize transmit queue */
504 static int nicvf_init_snd_queue(struct nicvf *nic,
505 				struct snd_queue *sq, int q_len, int qidx)
506 {
507 	int err;
508 
509 	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
510 				     NICVF_SQ_BASE_ALIGN_BYTES);
511 	if (err)
512 		return err;
513 
514 	sq->desc = sq->dmem.base;
515 	sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
516 	if (!sq->skbuff)
517 		return -ENOMEM;
518 
519 	sq->head = 0;
520 	sq->tail = 0;
521 	sq->thresh = SND_QUEUE_THRESH;
522 
523 	/* Check if this SQ is a XDP TX queue */
524 	if (nic->sqs_mode)
525 		qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
526 	if (qidx < nic->pnicvf->xdp_tx_queues) {
527 		/* Alloc memory to save page pointers for XDP_TX */
528 		sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
529 		if (!sq->xdp_page)
530 			return -ENOMEM;
531 		sq->xdp_desc_cnt = 0;
532 		sq->xdp_free_cnt = q_len - 1;
533 		sq->is_xdp = true;
534 	} else {
535 		sq->xdp_page = NULL;
536 		sq->xdp_desc_cnt = 0;
537 		sq->xdp_free_cnt = 0;
538 		sq->is_xdp = false;
539 
540 		atomic_set(&sq->free_cnt, q_len - 1);
541 
542 		/* Preallocate memory for TSO segment's header */
543 		sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
544 						  q_len * TSO_HEADER_SIZE,
545 						  &sq->tso_hdrs_phys,
546 						  GFP_KERNEL);
547 		if (!sq->tso_hdrs)
548 			return -ENOMEM;
549 	}
550 
551 	return 0;
552 }
553 
554 void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
555 			      int hdr_sqe, u8 subdesc_cnt)
556 {
557 	u8 idx;
558 	struct sq_gather_subdesc *gather;
559 
560 	/* Unmap DMA mapped skb data buffers */
561 	for (idx = 0; idx < subdesc_cnt; idx++) {
562 		hdr_sqe++;
563 		hdr_sqe &= (sq->dmem.q_len - 1);
564 		gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
565 		/* HW will ensure data coherency, CPU sync not required */
566 		dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
567 				     gather->size, DMA_TO_DEVICE,
568 				     DMA_ATTR_SKIP_CPU_SYNC);
569 	}
570 }
571 
572 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
573 {
574 	struct sk_buff *skb;
575 	struct page *page;
576 	struct sq_hdr_subdesc *hdr;
577 	struct sq_hdr_subdesc *tso_sqe;
578 
579 	if (!sq)
580 		return;
581 	if (!sq->dmem.base)
582 		return;
583 
584 	if (sq->tso_hdrs) {
585 		dma_free_coherent(&nic->pdev->dev,
586 				  sq->dmem.q_len * TSO_HEADER_SIZE,
587 				  sq->tso_hdrs, sq->tso_hdrs_phys);
588 		sq->tso_hdrs = NULL;
589 	}
590 
591 	/* Free pending skbs in the queue */
592 	smp_rmb();
593 	while (sq->head != sq->tail) {
594 		skb = (struct sk_buff *)sq->skbuff[sq->head];
595 		if (!skb || !sq->xdp_page)
596 			goto next;
597 
598 		page = (struct page *)sq->xdp_page[sq->head];
599 		if (!page)
600 			goto next;
601 		else
602 			put_page(page);
603 
604 		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
605 		/* Check for dummy descriptor used for HW TSO offload on 88xx */
606 		if (hdr->dont_send) {
607 			/* Get actual TSO descriptors and unmap them */
608 			tso_sqe =
609 			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
610 			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
611 						 tso_sqe->subdesc_cnt);
612 		} else {
613 			nicvf_unmap_sndq_buffers(nic, sq, sq->head,
614 						 hdr->subdesc_cnt);
615 		}
616 		if (skb)
617 			dev_kfree_skb_any(skb);
618 next:
619 		sq->head++;
620 		sq->head &= (sq->dmem.q_len - 1);
621 	}
622 	kfree(sq->skbuff);
623 	kfree(sq->xdp_page);
624 	nicvf_free_q_desc_mem(nic, &sq->dmem);
625 }
626 
627 static void nicvf_reclaim_snd_queue(struct nicvf *nic,
628 				    struct queue_set *qs, int qidx)
629 {
630 	/* Disable send queue */
631 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
632 	/* Check if SQ is stopped */
633 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
634 		return;
635 	/* Reset send queue */
636 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
637 }
638 
639 static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
640 				    struct queue_set *qs, int qidx)
641 {
642 	union nic_mbx mbx = {};
643 
644 	/* Make sure all packets in the pipeline are written back into mem */
645 	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
646 	nicvf_send_msg_to_pf(nic, &mbx);
647 }
648 
649 static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
650 				    struct queue_set *qs, int qidx)
651 {
652 	/* Disable timer threshold (doesn't get reset upon CQ reset */
653 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
654 	/* Disable completion queue */
655 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
656 	/* Reset completion queue */
657 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
658 }
659 
660 static void nicvf_reclaim_rbdr(struct nicvf *nic,
661 			       struct rbdr *rbdr, int qidx)
662 {
663 	u64 tmp, fifo_state;
664 	int timeout = 10;
665 
666 	/* Save head and tail pointers for feeing up buffers */
667 	rbdr->head = nicvf_queue_reg_read(nic,
668 					  NIC_QSET_RBDR_0_1_HEAD,
669 					  qidx) >> 3;
670 	rbdr->tail = nicvf_queue_reg_read(nic,
671 					  NIC_QSET_RBDR_0_1_TAIL,
672 					  qidx) >> 3;
673 
674 	/* If RBDR FIFO is in 'FAIL' state then do a reset first
675 	 * before relaiming.
676 	 */
677 	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
678 	if (((fifo_state >> 62) & 0x03) == 0x3)
679 		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
680 				      qidx, NICVF_RBDR_RESET);
681 
682 	/* Disable RBDR */
683 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
684 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
685 		return;
686 	while (1) {
687 		tmp = nicvf_queue_reg_read(nic,
688 					   NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
689 					   qidx);
690 		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
691 			break;
692 		usleep_range(1000, 2000);
693 		timeout--;
694 		if (!timeout) {
695 			netdev_err(nic->netdev,
696 				   "Failed polling on prefetch status\n");
697 			return;
698 		}
699 	}
700 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
701 			      qidx, NICVF_RBDR_RESET);
702 
703 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
704 		return;
705 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
706 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
707 		return;
708 }
709 
710 void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
711 {
712 	u64 rq_cfg;
713 	int sqs;
714 
715 	rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
716 
717 	/* Enable first VLAN stripping */
718 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
719 		rq_cfg |= (1ULL << 25);
720 	else
721 		rq_cfg &= ~(1ULL << 25);
722 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
723 
724 	/* Configure Secondary Qsets, if any */
725 	for (sqs = 0; sqs < nic->sqs_count; sqs++)
726 		if (nic->snicvf[sqs])
727 			nicvf_queue_reg_write(nic->snicvf[sqs],
728 					      NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
729 }
730 
731 static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
732 {
733 	union nic_mbx mbx = {};
734 
735 	/* Reset all RQ/SQ and VF stats */
736 	mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
737 	mbx.reset_stat.rx_stat_mask = 0x3FFF;
738 	mbx.reset_stat.tx_stat_mask = 0x1F;
739 	mbx.reset_stat.rq_stat_mask = 0xFFFF;
740 	mbx.reset_stat.sq_stat_mask = 0xFFFF;
741 	nicvf_send_msg_to_pf(nic, &mbx);
742 }
743 
744 /* Configures receive queue */
745 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
746 				   int qidx, bool enable)
747 {
748 	union nic_mbx mbx = {};
749 	struct rcv_queue *rq;
750 	struct rq_cfg rq_cfg;
751 
752 	rq = &qs->rq[qidx];
753 	rq->enable = enable;
754 
755 	/* Disable receive queue */
756 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
757 
758 	if (!rq->enable) {
759 		nicvf_reclaim_rcv_queue(nic, qs, qidx);
760 		xdp_rxq_info_unreg(&rq->xdp_rxq);
761 		return;
762 	}
763 
764 	rq->cq_qs = qs->vnic_id;
765 	rq->cq_idx = qidx;
766 	rq->start_rbdr_qs = qs->vnic_id;
767 	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
768 	rq->cont_rbdr_qs = qs->vnic_id;
769 	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
770 	/* all writes of RBDR data to be loaded into L2 Cache as well*/
771 	rq->caching = 1;
772 
773 	/* Driver have no proper error path for failed XDP RX-queue info reg */
774 	WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx, 0) < 0);
775 
776 	/* Send a mailbox msg to PF to config RQ */
777 	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
778 	mbx.rq.qs_num = qs->vnic_id;
779 	mbx.rq.rq_num = qidx;
780 	mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) |
781 			  (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
782 			  (rq->cont_qs_rbdr_idx << 8) |
783 			  (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
784 	nicvf_send_msg_to_pf(nic, &mbx);
785 
786 	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
787 	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
788 		     (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
789 		     (qs->vnic_id << 0);
790 	nicvf_send_msg_to_pf(nic, &mbx);
791 
792 	/* RQ drop config
793 	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
794 	 */
795 	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
796 	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
797 		     (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
798 		     (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
799 	nicvf_send_msg_to_pf(nic, &mbx);
800 
801 	if (!nic->sqs_mode && (qidx == 0)) {
802 		/* Enable checking L3/L4 length and TCP/UDP checksums
803 		 * Also allow IPv6 pkts with zero UDP checksum.
804 		 */
805 		nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
806 				      (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
807 		nicvf_config_vlan_stripping(nic, nic->netdev->features);
808 	}
809 
810 	/* Enable Receive queue */
811 	memset(&rq_cfg, 0, sizeof(struct rq_cfg));
812 	rq_cfg.ena = 1;
813 	rq_cfg.tcp_ena = 0;
814 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
815 }
816 
817 /* Configures completion queue */
818 void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
819 			    int qidx, bool enable)
820 {
821 	struct cmp_queue *cq;
822 	struct cq_cfg cq_cfg;
823 
824 	cq = &qs->cq[qidx];
825 	cq->enable = enable;
826 
827 	if (!cq->enable) {
828 		nicvf_reclaim_cmp_queue(nic, qs, qidx);
829 		return;
830 	}
831 
832 	/* Reset completion queue */
833 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
834 
835 	if (!cq->enable)
836 		return;
837 
838 	spin_lock_init(&cq->lock);
839 	/* Set completion queue base address */
840 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
841 			      qidx, (u64)(cq->dmem.phys_base));
842 
843 	/* Enable Completion queue */
844 	memset(&cq_cfg, 0, sizeof(struct cq_cfg));
845 	cq_cfg.ena = 1;
846 	cq_cfg.reset = 0;
847 	cq_cfg.caching = 0;
848 	cq_cfg.qsize = ilog2(qs->cq_len >> 10);
849 	cq_cfg.avg_con = 0;
850 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
851 
852 	/* Set threshold value for interrupt generation */
853 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
854 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
855 			      qidx, CMP_QUEUE_TIMER_THRESH);
856 }
857 
858 /* Configures transmit queue */
859 static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
860 				   int qidx, bool enable)
861 {
862 	union nic_mbx mbx = {};
863 	struct snd_queue *sq;
864 	struct sq_cfg sq_cfg;
865 
866 	sq = &qs->sq[qidx];
867 	sq->enable = enable;
868 
869 	if (!sq->enable) {
870 		nicvf_reclaim_snd_queue(nic, qs, qidx);
871 		return;
872 	}
873 
874 	/* Reset send queue */
875 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
876 
877 	sq->cq_qs = qs->vnic_id;
878 	sq->cq_idx = qidx;
879 
880 	/* Send a mailbox msg to PF to config SQ */
881 	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
882 	mbx.sq.qs_num = qs->vnic_id;
883 	mbx.sq.sq_num = qidx;
884 	mbx.sq.sqs_mode = nic->sqs_mode;
885 	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
886 	nicvf_send_msg_to_pf(nic, &mbx);
887 
888 	/* Set queue base address */
889 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
890 			      qidx, (u64)(sq->dmem.phys_base));
891 
892 	/* Enable send queue  & set queue size */
893 	memset(&sq_cfg, 0, sizeof(struct sq_cfg));
894 	sq_cfg.ena = 1;
895 	sq_cfg.reset = 0;
896 	sq_cfg.ldwb = 0;
897 	sq_cfg.qsize = ilog2(qs->sq_len >> 10);
898 	sq_cfg.tstmp_bgx_intf = 0;
899 	/* CQ's level at which HW will stop processing SQEs to avoid
900 	 * transmitting a pkt with no space in CQ to post CQE_TX.
901 	 */
902 	sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len;
903 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
904 
905 	/* Set threshold value for interrupt generation */
906 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
907 
908 	/* Set queue:cpu affinity for better load distribution */
909 	if (cpu_online(qidx)) {
910 		cpumask_set_cpu(qidx, &sq->affinity_mask);
911 		netif_set_xps_queue(nic->netdev,
912 				    &sq->affinity_mask, qidx);
913 	}
914 }
915 
916 /* Configures receive buffer descriptor ring */
917 static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
918 			      int qidx, bool enable)
919 {
920 	struct rbdr *rbdr;
921 	struct rbdr_cfg rbdr_cfg;
922 
923 	rbdr = &qs->rbdr[qidx];
924 	nicvf_reclaim_rbdr(nic, rbdr, qidx);
925 	if (!enable)
926 		return;
927 
928 	/* Set descriptor base address */
929 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
930 			      qidx, (u64)(rbdr->dmem.phys_base));
931 
932 	/* Enable RBDR  & set queue size */
933 	/* Buffer size should be in multiples of 128 bytes */
934 	memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
935 	rbdr_cfg.ena = 1;
936 	rbdr_cfg.reset = 0;
937 	rbdr_cfg.ldwb = 0;
938 	rbdr_cfg.qsize = RBDR_SIZE;
939 	rbdr_cfg.avg_con = 0;
940 	rbdr_cfg.lines = rbdr->dma_size / 128;
941 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
942 			      qidx, *(u64 *)&rbdr_cfg);
943 
944 	/* Notify HW */
945 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
946 			      qidx, qs->rbdr_len - 1);
947 
948 	/* Set threshold value for interrupt generation */
949 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
950 			      qidx, rbdr->thresh - 1);
951 }
952 
953 /* Requests PF to assign and enable Qset */
954 void nicvf_qset_config(struct nicvf *nic, bool enable)
955 {
956 	union nic_mbx mbx = {};
957 	struct queue_set *qs = nic->qs;
958 	struct qs_cfg *qs_cfg;
959 
960 	if (!qs) {
961 		netdev_warn(nic->netdev,
962 			    "Qset is still not allocated, don't init queues\n");
963 		return;
964 	}
965 
966 	qs->enable = enable;
967 	qs->vnic_id = nic->vf_id;
968 
969 	/* Send a mailbox msg to PF to config Qset */
970 	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
971 	mbx.qs.num = qs->vnic_id;
972 	mbx.qs.sqs_count = nic->sqs_count;
973 
974 	mbx.qs.cfg = 0;
975 	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
976 	if (qs->enable) {
977 		qs_cfg->ena = 1;
978 #ifdef __BIG_ENDIAN
979 		qs_cfg->be = 1;
980 #endif
981 		qs_cfg->vnic = qs->vnic_id;
982 		/* Enable Tx timestamping capability */
983 		if (nic->ptp_clock)
984 			qs_cfg->send_tstmp_ena = 1;
985 	}
986 	nicvf_send_msg_to_pf(nic, &mbx);
987 }
988 
989 static void nicvf_free_resources(struct nicvf *nic)
990 {
991 	int qidx;
992 	struct queue_set *qs = nic->qs;
993 
994 	/* Free receive buffer descriptor ring */
995 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
996 		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
997 
998 	/* Free completion queue */
999 	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1000 		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1001 
1002 	/* Free send queue */
1003 	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1004 		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1005 }
1006 
1007 static int nicvf_alloc_resources(struct nicvf *nic)
1008 {
1009 	int qidx;
1010 	struct queue_set *qs = nic->qs;
1011 
1012 	/* Alloc receive buffer descriptor ring */
1013 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1014 		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1015 				    DMA_BUFFER_LEN))
1016 			goto alloc_fail;
1017 	}
1018 
1019 	/* Alloc send queue */
1020 	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1021 		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1022 			goto alloc_fail;
1023 	}
1024 
1025 	/* Alloc completion queue */
1026 	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1027 		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
1028 			goto alloc_fail;
1029 	}
1030 
1031 	return 0;
1032 alloc_fail:
1033 	nicvf_free_resources(nic);
1034 	return -ENOMEM;
1035 }
1036 
1037 int nicvf_set_qset_resources(struct nicvf *nic)
1038 {
1039 	struct queue_set *qs;
1040 
1041 	qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
1042 	if (!qs)
1043 		return -ENOMEM;
1044 	nic->qs = qs;
1045 
1046 	/* Set count of each queue */
1047 	qs->rbdr_cnt = DEFAULT_RBDR_CNT;
1048 	qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
1049 	qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
1050 	qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
1051 
1052 	/* Set queue lengths */
1053 	qs->rbdr_len = RCV_BUF_COUNT;
1054 	qs->sq_len = SND_QUEUE_LEN;
1055 	qs->cq_len = CMP_QUEUE_LEN;
1056 
1057 	nic->rx_queues = qs->rq_cnt;
1058 	nic->tx_queues = qs->sq_cnt;
1059 	nic->xdp_tx_queues = 0;
1060 
1061 	return 0;
1062 }
1063 
1064 int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
1065 {
1066 	bool disable = false;
1067 	struct queue_set *qs = nic->qs;
1068 	struct queue_set *pqs = nic->pnicvf->qs;
1069 	int qidx;
1070 
1071 	if (!qs)
1072 		return 0;
1073 
1074 	/* Take primary VF's queue lengths.
1075 	 * This is needed to take queue lengths set from ethtool
1076 	 * into consideration.
1077 	 */
1078 	if (nic->sqs_mode && pqs) {
1079 		qs->cq_len = pqs->cq_len;
1080 		qs->sq_len = pqs->sq_len;
1081 	}
1082 
1083 	if (enable) {
1084 		if (nicvf_alloc_resources(nic))
1085 			return -ENOMEM;
1086 
1087 		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1088 			nicvf_snd_queue_config(nic, qs, qidx, enable);
1089 		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1090 			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1091 		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1092 			nicvf_rbdr_config(nic, qs, qidx, enable);
1093 		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1094 			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1095 	} else {
1096 		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1097 			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1098 		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1099 			nicvf_rbdr_config(nic, qs, qidx, disable);
1100 		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1101 			nicvf_snd_queue_config(nic, qs, qidx, disable);
1102 		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1103 			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1104 
1105 		nicvf_free_resources(nic);
1106 	}
1107 
1108 	/* Reset RXQ's stats.
1109 	 * SQ's stats will get reset automatically once SQ is reset.
1110 	 */
1111 	nicvf_reset_rcv_queue_stats(nic);
1112 
1113 	return 0;
1114 }
1115 
1116 /* Get a free desc from SQ
1117  * returns descriptor ponter & descriptor number
1118  */
1119 static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1120 {
1121 	int qentry;
1122 
1123 	qentry = sq->tail;
1124 	if (!sq->is_xdp)
1125 		atomic_sub(desc_cnt, &sq->free_cnt);
1126 	else
1127 		sq->xdp_free_cnt -= desc_cnt;
1128 	sq->tail += desc_cnt;
1129 	sq->tail &= (sq->dmem.q_len - 1);
1130 
1131 	return qentry;
1132 }
1133 
1134 /* Rollback to previous tail pointer when descriptors not used */
1135 static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
1136 					  int qentry, int desc_cnt)
1137 {
1138 	sq->tail = qentry;
1139 	atomic_add(desc_cnt, &sq->free_cnt);
1140 }
1141 
1142 /* Free descriptor back to SQ for future use */
1143 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1144 {
1145 	if (!sq->is_xdp)
1146 		atomic_add(desc_cnt, &sq->free_cnt);
1147 	else
1148 		sq->xdp_free_cnt += desc_cnt;
1149 	sq->head += desc_cnt;
1150 	sq->head &= (sq->dmem.q_len - 1);
1151 }
1152 
1153 static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1154 {
1155 	qentry++;
1156 	qentry &= (sq->dmem.q_len - 1);
1157 	return qentry;
1158 }
1159 
1160 void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1161 {
1162 	u64 sq_cfg;
1163 
1164 	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1165 	sq_cfg |= NICVF_SQ_EN;
1166 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1167 	/* Ring doorbell so that H/W restarts processing SQEs */
1168 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1169 }
1170 
1171 void nicvf_sq_disable(struct nicvf *nic, int qidx)
1172 {
1173 	u64 sq_cfg;
1174 
1175 	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1176 	sq_cfg &= ~NICVF_SQ_EN;
1177 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1178 }
1179 
1180 void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
1181 			      int qidx)
1182 {
1183 	u64 head;
1184 	struct sk_buff *skb;
1185 	struct nicvf *nic = netdev_priv(netdev);
1186 	struct sq_hdr_subdesc *hdr;
1187 
1188 	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1189 	while (sq->head != head) {
1190 		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1191 		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1192 			nicvf_put_sq_desc(sq, 1);
1193 			continue;
1194 		}
1195 		skb = (struct sk_buff *)sq->skbuff[sq->head];
1196 		if (skb)
1197 			dev_kfree_skb_any(skb);
1198 		atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
1199 		atomic64_add(hdr->tot_len,
1200 			     (atomic64_t *)&netdev->stats.tx_bytes);
1201 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1202 	}
1203 }
1204 
1205 /* XDP Transmit APIs */
1206 void nicvf_xdp_sq_doorbell(struct nicvf *nic,
1207 			   struct snd_queue *sq, int sq_num)
1208 {
1209 	if (!sq->xdp_desc_cnt)
1210 		return;
1211 
1212 	/* make sure all memory stores are done before ringing doorbell */
1213 	wmb();
1214 
1215 	/* Inform HW to xmit all TSO segments */
1216 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1217 			      sq_num, sq->xdp_desc_cnt);
1218 	sq->xdp_desc_cnt = 0;
1219 }
1220 
1221 static inline void
1222 nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1223 			     int subdesc_cnt, u64 data, int len)
1224 {
1225 	struct sq_hdr_subdesc *hdr;
1226 
1227 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1228 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1229 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1230 	hdr->subdesc_cnt = subdesc_cnt;
1231 	hdr->tot_len = len;
1232 	hdr->post_cqe = 1;
1233 	sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
1234 }
1235 
1236 int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
1237 			    u64 bufaddr, u64 dma_addr, u16 len)
1238 {
1239 	int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1240 	int qentry;
1241 
1242 	if (subdesc_cnt > sq->xdp_free_cnt)
1243 		return 0;
1244 
1245 	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1246 
1247 	nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
1248 
1249 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
1250 	nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
1251 
1252 	sq->xdp_desc_cnt += subdesc_cnt;
1253 
1254 	return 1;
1255 }
1256 
1257 /* Calculate no of SQ subdescriptors needed to transmit all
1258  * segments of this TSO packet.
1259  * Taken from 'Tilera network driver' with a minor modification.
1260  */
1261 static int nicvf_tso_count_subdescs(struct sk_buff *skb)
1262 {
1263 	struct skb_shared_info *sh = skb_shinfo(skb);
1264 	unsigned int sh_len = skb_tcp_all_headers(skb);
1265 	unsigned int data_len = skb->len - sh_len;
1266 	unsigned int p_len = sh->gso_size;
1267 	long f_id = -1;    /* id of the current fragment */
1268 	long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
1269 	long f_used = 0;  /* bytes used from the current fragment */
1270 	long n;            /* size of the current piece of payload */
1271 	int num_edescs = 0;
1272 	int segment;
1273 
1274 	for (segment = 0; segment < sh->gso_segs; segment++) {
1275 		unsigned int p_used = 0;
1276 
1277 		/* One edesc for header and for each piece of the payload. */
1278 		for (num_edescs++; p_used < p_len; num_edescs++) {
1279 			/* Advance as needed. */
1280 			while (f_used >= f_size) {
1281 				f_id++;
1282 				f_size = skb_frag_size(&sh->frags[f_id]);
1283 				f_used = 0;
1284 			}
1285 
1286 			/* Use bytes from the current fragment. */
1287 			n = p_len - p_used;
1288 			if (n > f_size - f_used)
1289 				n = f_size - f_used;
1290 			f_used += n;
1291 			p_used += n;
1292 		}
1293 
1294 		/* The last segment may be less than gso_size. */
1295 		data_len -= p_len;
1296 		if (data_len < p_len)
1297 			p_len = data_len;
1298 	}
1299 
1300 	/* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
1301 	return num_edescs + sh->gso_segs;
1302 }
1303 
1304 #define POST_CQE_DESC_COUNT 2
1305 
1306 /* Get the number of SQ descriptors needed to xmit this skb */
1307 static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
1308 {
1309 	int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1310 
1311 	if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
1312 		subdesc_cnt = nicvf_tso_count_subdescs(skb);
1313 		return subdesc_cnt;
1314 	}
1315 
1316 	/* Dummy descriptors to get TSO pkt completion notification */
1317 	if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
1318 		subdesc_cnt += POST_CQE_DESC_COUNT;
1319 
1320 	if (skb_shinfo(skb)->nr_frags)
1321 		subdesc_cnt += skb_shinfo(skb)->nr_frags;
1322 
1323 	return subdesc_cnt;
1324 }
1325 
1326 /* Add SQ HEADER subdescriptor.
1327  * First subdescriptor for every send descriptor.
1328  */
1329 static inline void
1330 nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
1331 			 int subdesc_cnt, struct sk_buff *skb, int len)
1332 {
1333 	int proto;
1334 	struct sq_hdr_subdesc *hdr;
1335 	union {
1336 		struct iphdr *v4;
1337 		struct ipv6hdr *v6;
1338 		unsigned char *hdr;
1339 	} ip;
1340 
1341 	ip.hdr = skb_network_header(skb);
1342 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1343 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1344 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1345 
1346 	if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
1347 		/* post_cqe = 0, to avoid HW posting a CQE for every TSO
1348 		 * segment transmitted on 88xx.
1349 		 */
1350 		hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
1351 	} else {
1352 		sq->skbuff[qentry] = (u64)skb;
1353 		/* Enable notification via CQE after processing SQE */
1354 		hdr->post_cqe = 1;
1355 		/* No of subdescriptors following this */
1356 		hdr->subdesc_cnt = subdesc_cnt;
1357 	}
1358 	hdr->tot_len = len;
1359 
1360 	/* Offload checksum calculation to HW */
1361 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1362 		if (ip.v4->version == 4)
1363 			hdr->csum_l3 = 1; /* Enable IP csum calculation */
1364 		hdr->l3_offset = skb_network_offset(skb);
1365 		hdr->l4_offset = skb_transport_offset(skb);
1366 
1367 		proto = (ip.v4->version == 4) ? ip.v4->protocol :
1368 			ip.v6->nexthdr;
1369 
1370 		switch (proto) {
1371 		case IPPROTO_TCP:
1372 			hdr->csum_l4 = SEND_L4_CSUM_TCP;
1373 			break;
1374 		case IPPROTO_UDP:
1375 			hdr->csum_l4 = SEND_L4_CSUM_UDP;
1376 			break;
1377 		case IPPROTO_SCTP:
1378 			hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1379 			break;
1380 		}
1381 	}
1382 
1383 	if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
1384 		hdr->tso = 1;
1385 		hdr->tso_start = skb_tcp_all_headers(skb);
1386 		hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
1387 		/* For non-tunneled pkts, point this to L2 ethertype */
1388 		hdr->inner_l3_offset = skb_network_offset(skb) - 2;
1389 		this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1390 	}
1391 
1392 	/* Check if timestamp is requested */
1393 	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1394 		skb_tx_timestamp(skb);
1395 		return;
1396 	}
1397 
1398 	/* Tx timestamping not supported along with TSO, so ignore request */
1399 	if (skb_shinfo(skb)->gso_size)
1400 		return;
1401 
1402 	/* HW supports only a single outstanding packet to timestamp */
1403 	if (!atomic_add_unless(&nic->pnicvf->tx_ptp_skbs, 1, 1))
1404 		return;
1405 
1406 	/* Mark the SKB for later reference */
1407 	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1408 
1409 	/* Finally enable timestamp generation
1410 	 * Since 'post_cqe' is also set, two CQEs will be posted
1411 	 * for this packet i.e CQE_TYPE_SEND and CQE_TYPE_SEND_PTP.
1412 	 */
1413 	hdr->tstmp = 1;
1414 }
1415 
1416 /* SQ GATHER subdescriptor
1417  * Must follow HDR descriptor
1418  */
1419 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1420 					       int size, u64 data)
1421 {
1422 	struct sq_gather_subdesc *gather;
1423 
1424 	qentry &= (sq->dmem.q_len - 1);
1425 	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1426 
1427 	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1428 	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1429 	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1430 	gather->size = size;
1431 	gather->addr = data;
1432 }
1433 
1434 /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
1435  * packet so that a CQE is posted as a notifation for transmission of
1436  * TSO packet.
1437  */
1438 static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
1439 					    int tso_sqe, struct sk_buff *skb)
1440 {
1441 	struct sq_imm_subdesc *imm;
1442 	struct sq_hdr_subdesc *hdr;
1443 
1444 	sq->skbuff[qentry] = (u64)skb;
1445 
1446 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1447 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1448 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1449 	/* Enable notification via CQE after processing SQE */
1450 	hdr->post_cqe = 1;
1451 	/* There is no packet to transmit here */
1452 	hdr->dont_send = 1;
1453 	hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
1454 	hdr->tot_len = 1;
1455 	/* Actual TSO header SQE index, needed for cleanup */
1456 	hdr->rsvd2 = tso_sqe;
1457 
1458 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
1459 	imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
1460 	memset(imm, 0, SND_QUEUE_DESC_SIZE);
1461 	imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
1462 	imm->len = 1;
1463 }
1464 
1465 static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
1466 				     int sq_num, int desc_cnt)
1467 {
1468 	struct netdev_queue *txq;
1469 
1470 	txq = netdev_get_tx_queue(nic->pnicvf->netdev,
1471 				  skb_get_queue_mapping(skb));
1472 
1473 	netdev_tx_sent_queue(txq, skb->len);
1474 
1475 	/* make sure all memory stores are done before ringing doorbell */
1476 	smp_wmb();
1477 
1478 	/* Inform HW to xmit all TSO segments */
1479 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1480 			      sq_num, desc_cnt);
1481 }
1482 
1483 /* Segment a TSO packet into 'gso_size' segments and append
1484  * them to SQ for transfer
1485  */
1486 static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
1487 			       int sq_num, int qentry, struct sk_buff *skb)
1488 {
1489 	struct tso_t tso;
1490 	int seg_subdescs = 0, desc_cnt = 0;
1491 	int seg_len, total_len, data_left;
1492 	int hdr_qentry = qentry;
1493 	int hdr_len;
1494 
1495 	hdr_len = tso_start(skb, &tso);
1496 
1497 	total_len = skb->len - hdr_len;
1498 	while (total_len > 0) {
1499 		char *hdr;
1500 
1501 		/* Save Qentry for adding HDR_SUBDESC at the end */
1502 		hdr_qentry = qentry;
1503 
1504 		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
1505 		total_len -= data_left;
1506 
1507 		/* Add segment's header */
1508 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1509 		hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE;
1510 		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
1511 		nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len,
1512 					    sq->tso_hdrs_phys +
1513 					    qentry * TSO_HEADER_SIZE);
1514 		/* HDR_SUDESC + GATHER */
1515 		seg_subdescs = 2;
1516 		seg_len = hdr_len;
1517 
1518 		/* Add segment's payload fragments */
1519 		while (data_left > 0) {
1520 			int size;
1521 
1522 			size = min_t(int, tso.size, data_left);
1523 
1524 			qentry = nicvf_get_nxt_sqentry(sq, qentry);
1525 			nicvf_sq_add_gather_subdesc(sq, qentry, size,
1526 						    virt_to_phys(tso.data));
1527 			seg_subdescs++;
1528 			seg_len += size;
1529 
1530 			data_left -= size;
1531 			tso_build_data(skb, &tso, size);
1532 		}
1533 		nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
1534 					 seg_subdescs - 1, skb, seg_len);
1535 		sq->skbuff[hdr_qentry] = (u64)NULL;
1536 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1537 
1538 		desc_cnt += seg_subdescs;
1539 	}
1540 	/* Save SKB in the last segment for freeing */
1541 	sq->skbuff[hdr_qentry] = (u64)skb;
1542 
1543 	nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
1544 
1545 	this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1546 	return 1;
1547 }
1548 
1549 /* Append an skb to a SQ for packet transfer. */
1550 int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
1551 			struct sk_buff *skb, u8 sq_num)
1552 {
1553 	int i, size;
1554 	int subdesc_cnt, hdr_sqe = 0;
1555 	int qentry;
1556 	u64 dma_addr;
1557 
1558 	subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
1559 	if (subdesc_cnt > atomic_read(&sq->free_cnt))
1560 		goto append_fail;
1561 
1562 	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1563 
1564 	/* Check if its a TSO packet */
1565 	if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
1566 		return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
1567 
1568 	/* Add SQ header subdesc */
1569 	nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
1570 				 skb, skb->len);
1571 	hdr_sqe = qentry;
1572 
1573 	/* Add SQ gather subdescs */
1574 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
1575 	size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
1576 	/* HW will ensure data coherency, CPU sync not required */
1577 	dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
1578 				      offset_in_page(skb->data), size,
1579 				      DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1580 	if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1581 		nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1582 		return 0;
1583 	}
1584 
1585 	nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1586 
1587 	/* Check for scattered buffer */
1588 	if (!skb_is_nonlinear(skb))
1589 		goto doorbell;
1590 
1591 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1592 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1593 
1594 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1595 		size = skb_frag_size(frag);
1596 		dma_addr = dma_map_page_attrs(&nic->pdev->dev,
1597 					      skb_frag_page(frag),
1598 					      skb_frag_off(frag), size,
1599 					      DMA_TO_DEVICE,
1600 					      DMA_ATTR_SKIP_CPU_SYNC);
1601 		if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1602 			/* Free entire chain of mapped buffers
1603 			 * here 'i' = frags mapped + above mapped skb->data
1604 			 */
1605 			nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
1606 			nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1607 			return 0;
1608 		}
1609 		nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1610 	}
1611 
1612 doorbell:
1613 	if (nic->t88 && skb_shinfo(skb)->gso_size) {
1614 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1615 		nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
1616 	}
1617 
1618 	nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
1619 
1620 	return 1;
1621 
1622 append_fail:
1623 	/* Use original PCI dev for debug log */
1624 	nic = nic->pnicvf;
1625 	netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
1626 	return 0;
1627 }
1628 
1629 static inline unsigned frag_num(unsigned i)
1630 {
1631 #ifdef __BIG_ENDIAN
1632 	return (i & ~3) + 3 - (i & 3);
1633 #else
1634 	return i;
1635 #endif
1636 }
1637 
1638 static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
1639 				   u64 buf_addr, bool xdp)
1640 {
1641 	struct page *page = NULL;
1642 	int len = RCV_FRAG_LEN;
1643 
1644 	if (xdp) {
1645 		page = virt_to_page(phys_to_virt(buf_addr));
1646 		/* Check if it's a recycled page, if not
1647 		 * unmap the DMA mapping.
1648 		 *
1649 		 * Recycled page holds an extra reference.
1650 		 */
1651 		if (page_ref_count(page) != 1)
1652 			return;
1653 
1654 		len += XDP_PACKET_HEADROOM;
1655 		/* Receive buffers in XDP mode are mapped from page start */
1656 		dma_addr &= PAGE_MASK;
1657 	}
1658 	dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len,
1659 			     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1660 }
1661 
1662 /* Returns SKB for a received packet */
1663 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
1664 				  struct cqe_rx_t *cqe_rx, bool xdp)
1665 {
1666 	int frag;
1667 	int payload_len = 0;
1668 	struct sk_buff *skb = NULL;
1669 	struct page *page;
1670 	int offset;
1671 	u16 *rb_lens = NULL;
1672 	u64 *rb_ptrs = NULL;
1673 	u64 phys_addr;
1674 
1675 	rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
1676 	/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
1677 	 * CQE_RX at word6, hence buffer pointers move by word
1678 	 *
1679 	 * Use existing 'hw_tso' flag which will be set for all chips
1680 	 * except 88xx pass1 instead of a additional cache line
1681 	 * access (or miss) by using pci dev's revision.
1682 	 */
1683 	if (!nic->hw_tso)
1684 		rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
1685 	else
1686 		rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
1687 
1688 	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1689 		payload_len = rb_lens[frag_num(frag)];
1690 		phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
1691 		if (!phys_addr) {
1692 			if (skb)
1693 				dev_kfree_skb_any(skb);
1694 			return NULL;
1695 		}
1696 
1697 		if (!frag) {
1698 			/* First fragment */
1699 			nicvf_unmap_rcv_buffer(nic,
1700 					       *rb_ptrs - cqe_rx->align_pad,
1701 					       phys_addr, xdp);
1702 			skb = nicvf_rb_ptr_to_skb(nic,
1703 						  phys_addr - cqe_rx->align_pad,
1704 						  payload_len);
1705 			if (!skb)
1706 				return NULL;
1707 			skb_reserve(skb, cqe_rx->align_pad);
1708 			skb_put(skb, payload_len);
1709 		} else {
1710 			/* Add fragments */
1711 			nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp);
1712 			page = virt_to_page(phys_to_virt(phys_addr));
1713 			offset = phys_to_virt(phys_addr) - page_address(page);
1714 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
1715 					offset, payload_len, RCV_FRAG_LEN);
1716 		}
1717 		/* Next buffer pointer */
1718 		rb_ptrs++;
1719 	}
1720 	return skb;
1721 }
1722 
1723 static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
1724 {
1725 	u64 reg_val;
1726 
1727 	switch (int_type) {
1728 	case NICVF_INTR_CQ:
1729 		reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
1730 		break;
1731 	case NICVF_INTR_SQ:
1732 		reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
1733 		break;
1734 	case NICVF_INTR_RBDR:
1735 		reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
1736 		break;
1737 	case NICVF_INTR_PKT_DROP:
1738 		reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
1739 		break;
1740 	case NICVF_INTR_TCP_TIMER:
1741 		reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
1742 		break;
1743 	case NICVF_INTR_MBOX:
1744 		reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
1745 		break;
1746 	case NICVF_INTR_QS_ERR:
1747 		reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
1748 		break;
1749 	default:
1750 		reg_val = 0;
1751 	}
1752 
1753 	return reg_val;
1754 }
1755 
1756 /* Enable interrupt */
1757 void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
1758 {
1759 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1760 
1761 	if (!mask) {
1762 		netdev_dbg(nic->netdev,
1763 			   "Failed to enable interrupt: unknown type\n");
1764 		return;
1765 	}
1766 	nicvf_reg_write(nic, NIC_VF_ENA_W1S,
1767 			nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
1768 }
1769 
1770 /* Disable interrupt */
1771 void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
1772 {
1773 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1774 
1775 	if (!mask) {
1776 		netdev_dbg(nic->netdev,
1777 			   "Failed to disable interrupt: unknown type\n");
1778 		return;
1779 	}
1780 
1781 	nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
1782 }
1783 
1784 /* Clear interrupt */
1785 void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
1786 {
1787 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1788 
1789 	if (!mask) {
1790 		netdev_dbg(nic->netdev,
1791 			   "Failed to clear interrupt: unknown type\n");
1792 		return;
1793 	}
1794 
1795 	nicvf_reg_write(nic, NIC_VF_INT, mask);
1796 }
1797 
1798 /* Check if interrupt is enabled */
1799 int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
1800 {
1801 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1802 	/* If interrupt type is unknown, we treat it disabled. */
1803 	if (!mask) {
1804 		netdev_dbg(nic->netdev,
1805 			   "Failed to check interrupt enable: unknown type\n");
1806 		return 0;
1807 	}
1808 
1809 	return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
1810 }
1811 
1812 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
1813 {
1814 	struct rcv_queue *rq;
1815 
1816 #define GET_RQ_STATS(reg) \
1817 	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
1818 			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1819 
1820 	rq = &nic->qs->rq[rq_idx];
1821 	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
1822 	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
1823 }
1824 
1825 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
1826 {
1827 	struct snd_queue *sq;
1828 
1829 #define GET_SQ_STATS(reg) \
1830 	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
1831 			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1832 
1833 	sq = &nic->qs->sq[sq_idx];
1834 	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
1835 	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
1836 }
1837 
1838 /* Check for errors in the receive cmp.queue entry */
1839 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1840 {
1841 	netif_err(nic, rx_err, nic->netdev,
1842 		  "RX error CQE err_level 0x%x err_opcode 0x%x\n",
1843 		  cqe_rx->err_level, cqe_rx->err_opcode);
1844 
1845 	switch (cqe_rx->err_opcode) {
1846 	case CQ_RX_ERROP_RE_PARTIAL:
1847 		this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
1848 		break;
1849 	case CQ_RX_ERROP_RE_JABBER:
1850 		this_cpu_inc(nic->drv_stats->rx_jabber_errs);
1851 		break;
1852 	case CQ_RX_ERROP_RE_FCS:
1853 		this_cpu_inc(nic->drv_stats->rx_fcs_errs);
1854 		break;
1855 	case CQ_RX_ERROP_RE_RX_CTL:
1856 		this_cpu_inc(nic->drv_stats->rx_bgx_errs);
1857 		break;
1858 	case CQ_RX_ERROP_PREL2_ERR:
1859 		this_cpu_inc(nic->drv_stats->rx_prel2_errs);
1860 		break;
1861 	case CQ_RX_ERROP_L2_MAL:
1862 		this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
1863 		break;
1864 	case CQ_RX_ERROP_L2_OVERSIZE:
1865 		this_cpu_inc(nic->drv_stats->rx_oversize);
1866 		break;
1867 	case CQ_RX_ERROP_L2_UNDERSIZE:
1868 		this_cpu_inc(nic->drv_stats->rx_undersize);
1869 		break;
1870 	case CQ_RX_ERROP_L2_LENMISM:
1871 		this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
1872 		break;
1873 	case CQ_RX_ERROP_L2_PCLP:
1874 		this_cpu_inc(nic->drv_stats->rx_l2_pclp);
1875 		break;
1876 	case CQ_RX_ERROP_IP_NOT:
1877 		this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
1878 		break;
1879 	case CQ_RX_ERROP_IP_CSUM_ERR:
1880 		this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
1881 		break;
1882 	case CQ_RX_ERROP_IP_MAL:
1883 		this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
1884 		break;
1885 	case CQ_RX_ERROP_IP_MALD:
1886 		this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
1887 		break;
1888 	case CQ_RX_ERROP_IP_HOP:
1889 		this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
1890 		break;
1891 	case CQ_RX_ERROP_L3_PCLP:
1892 		this_cpu_inc(nic->drv_stats->rx_l3_pclp);
1893 		break;
1894 	case CQ_RX_ERROP_L4_MAL:
1895 		this_cpu_inc(nic->drv_stats->rx_l4_malformed);
1896 		break;
1897 	case CQ_RX_ERROP_L4_CHK:
1898 		this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
1899 		break;
1900 	case CQ_RX_ERROP_UDP_LEN:
1901 		this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
1902 		break;
1903 	case CQ_RX_ERROP_L4_PORT:
1904 		this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
1905 		break;
1906 	case CQ_RX_ERROP_TCP_FLAG:
1907 		this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
1908 		break;
1909 	case CQ_RX_ERROP_TCP_OFFSET:
1910 		this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
1911 		break;
1912 	case CQ_RX_ERROP_L4_PCLP:
1913 		this_cpu_inc(nic->drv_stats->rx_l4_pclp);
1914 		break;
1915 	case CQ_RX_ERROP_RBDR_TRUNC:
1916 		this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
1917 		break;
1918 	}
1919 
1920 	return 1;
1921 }
1922 
1923 /* Check for errors in the send cmp.queue entry */
1924 int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
1925 {
1926 	switch (cqe_tx->send_status) {
1927 	case CQ_TX_ERROP_DESC_FAULT:
1928 		this_cpu_inc(nic->drv_stats->tx_desc_fault);
1929 		break;
1930 	case CQ_TX_ERROP_HDR_CONS_ERR:
1931 		this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
1932 		break;
1933 	case CQ_TX_ERROP_SUBDC_ERR:
1934 		this_cpu_inc(nic->drv_stats->tx_subdesc_err);
1935 		break;
1936 	case CQ_TX_ERROP_MAX_SIZE_VIOL:
1937 		this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
1938 		break;
1939 	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
1940 		this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
1941 		break;
1942 	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
1943 		this_cpu_inc(nic->drv_stats->tx_data_seq_err);
1944 		break;
1945 	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
1946 		this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
1947 		break;
1948 	case CQ_TX_ERROP_LOCK_VIOL:
1949 		this_cpu_inc(nic->drv_stats->tx_lock_viol);
1950 		break;
1951 	case CQ_TX_ERROP_DATA_FAULT:
1952 		this_cpu_inc(nic->drv_stats->tx_data_fault);
1953 		break;
1954 	case CQ_TX_ERROP_TSTMP_CONFLICT:
1955 		this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
1956 		break;
1957 	case CQ_TX_ERROP_TSTMP_TIMEOUT:
1958 		this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
1959 		break;
1960 	case CQ_TX_ERROP_MEM_FAULT:
1961 		this_cpu_inc(nic->drv_stats->tx_mem_fault);
1962 		break;
1963 	case CQ_TX_ERROP_CK_OVERLAP:
1964 		this_cpu_inc(nic->drv_stats->tx_csum_overlap);
1965 		break;
1966 	case CQ_TX_ERROP_CK_OFLOW:
1967 		this_cpu_inc(nic->drv_stats->tx_csum_overflow);
1968 		break;
1969 	}
1970 
1971 	return 1;
1972 }
1973