1 /*
2  * Copyright (C) 2015 Cavium, Inc.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License
6  * as published by the Free Software Foundation.
7  */
8 
9 #include <linux/pci.h>
10 #include <linux/netdevice.h>
11 #include <linux/ip.h>
12 #include <linux/etherdevice.h>
13 #include <linux/iommu.h>
14 #include <net/ip.h>
15 #include <net/tso.h>
16 
17 #include "nic_reg.h"
18 #include "nic.h"
19 #include "q_struct.h"
20 #include "nicvf_queues.h"
21 
22 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
23 					       int size, u64 data);
24 static void nicvf_get_page(struct nicvf *nic)
25 {
26 	if (!nic->rb_pageref || !nic->rb_page)
27 		return;
28 
29 	page_ref_add(nic->rb_page, nic->rb_pageref);
30 	nic->rb_pageref = 0;
31 }
32 
33 /* Poll a register for a specific value */
34 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
35 			  u64 reg, int bit_pos, int bits, int val)
36 {
37 	u64 bit_mask;
38 	u64 reg_val;
39 	int timeout = 10;
40 
41 	bit_mask = (1ULL << bits) - 1;
42 	bit_mask = (bit_mask << bit_pos);
43 
44 	while (timeout) {
45 		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
46 		if (((reg_val & bit_mask) >> bit_pos) == val)
47 			return 0;
48 		usleep_range(1000, 2000);
49 		timeout--;
50 	}
51 	netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
52 	return 1;
53 }
54 
55 /* Allocate memory for a queue's descriptors */
56 static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
57 				  int q_len, int desc_size, int align_bytes)
58 {
59 	dmem->q_len = q_len;
60 	dmem->size = (desc_size * q_len) + align_bytes;
61 	/* Save address, need it while freeing */
62 	dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
63 						&dmem->dma, GFP_KERNEL);
64 	if (!dmem->unalign_base)
65 		return -ENOMEM;
66 
67 	/* Align memory address for 'align_bytes' */
68 	dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
69 	dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
70 	return 0;
71 }
72 
73 /* Free queue's descriptor memory */
74 static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
75 {
76 	if (!dmem)
77 		return;
78 
79 	dma_free_coherent(&nic->pdev->dev, dmem->size,
80 			  dmem->unalign_base, dmem->dma);
81 	dmem->unalign_base = NULL;
82 	dmem->base = NULL;
83 }
84 
85 #define XDP_PAGE_REFCNT_REFILL 256
86 
87 /* Allocate a new page or recycle one if possible
88  *
89  * We cannot optimize dma mapping here, since
90  * 1. It's only one RBDR ring for 8 Rx queues.
91  * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
92  *    and not idx into RBDR ring, so can't refer to saved info.
93  * 3. There are multiple receive buffers per page
94  */
95 static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
96 					       struct rbdr *rbdr, gfp_t gfp)
97 {
98 	int ref_count;
99 	struct page *page = NULL;
100 	struct pgcache *pgcache, *next;
101 
102 	/* Check if page is already allocated */
103 	pgcache = &rbdr->pgcache[rbdr->pgidx];
104 	page = pgcache->page;
105 	/* Check if page can be recycled */
106 	if (page) {
107 		ref_count = page_ref_count(page);
108 		/* Check if this page has been used once i.e 'put_page'
109 		 * called after packet transmission i.e internal ref_count
110 		 * and page's ref_count are equal i.e page can be recycled.
111 		 */
112 		if (rbdr->is_xdp && (ref_count == pgcache->ref_count))
113 			pgcache->ref_count--;
114 		else
115 			page = NULL;
116 
117 		/* In non-XDP mode, page's ref_count needs to be '1' for it
118 		 * to be recycled.
119 		 */
120 		if (!rbdr->is_xdp && (ref_count != 1))
121 			page = NULL;
122 	}
123 
124 	if (!page) {
125 		page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
126 		if (!page)
127 			return NULL;
128 
129 		this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
130 
131 		/* Check for space */
132 		if (rbdr->pgalloc >= rbdr->pgcnt) {
133 			/* Page can still be used */
134 			nic->rb_page = page;
135 			return NULL;
136 		}
137 
138 		/* Save the page in page cache */
139 		pgcache->page = page;
140 		pgcache->dma_addr = 0;
141 		pgcache->ref_count = 0;
142 		rbdr->pgalloc++;
143 	}
144 
145 	/* Take additional page references for recycling */
146 	if (rbdr->is_xdp) {
147 		/* Since there is single RBDR (i.e single core doing
148 		 * page recycling) per 8 Rx queues, in XDP mode adjusting
149 		 * page references atomically is the biggest bottleneck, so
150 		 * take bunch of references at a time.
151 		 *
152 		 * So here, below reference counts defer by '1'.
153 		 */
154 		if (!pgcache->ref_count) {
155 			pgcache->ref_count = XDP_PAGE_REFCNT_REFILL;
156 			page_ref_add(page, XDP_PAGE_REFCNT_REFILL);
157 		}
158 	} else {
159 		/* In non-XDP case, single 64K page is divided across multiple
160 		 * receive buffers, so cost of recycling is less anyway.
161 		 * So we can do with just one extra reference.
162 		 */
163 		page_ref_add(page, 1);
164 	}
165 
166 	rbdr->pgidx++;
167 	rbdr->pgidx &= (rbdr->pgcnt - 1);
168 
169 	/* Prefetch refcount of next page in page cache */
170 	next = &rbdr->pgcache[rbdr->pgidx];
171 	page = next->page;
172 	if (page)
173 		prefetch(&page->_refcount);
174 
175 	return pgcache;
176 }
177 
178 /* Allocate buffer for packet reception */
179 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
180 					 gfp_t gfp, u32 buf_len, u64 *rbuf)
181 {
182 	struct pgcache *pgcache = NULL;
183 
184 	/* Check if request can be accomodated in previous allocated page.
185 	 * But in XDP mode only one buffer per page is permitted.
186 	 */
187 	if (!rbdr->is_xdp && nic->rb_page &&
188 	    ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
189 		nic->rb_pageref++;
190 		goto ret;
191 	}
192 
193 	nicvf_get_page(nic);
194 	nic->rb_page = NULL;
195 
196 	/* Get new page, either recycled or new one */
197 	pgcache = nicvf_alloc_page(nic, rbdr, gfp);
198 	if (!pgcache && !nic->rb_page) {
199 		this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
200 		return -ENOMEM;
201 	}
202 
203 	nic->rb_page_offset = 0;
204 
205 	/* Reserve space for header modifications by BPF program */
206 	if (rbdr->is_xdp)
207 		buf_len += XDP_PACKET_HEADROOM;
208 
209 	/* Check if it's recycled */
210 	if (pgcache)
211 		nic->rb_page = pgcache->page;
212 ret:
213 	if (rbdr->is_xdp && pgcache && pgcache->dma_addr) {
214 		*rbuf = pgcache->dma_addr;
215 	} else {
216 		/* HW will ensure data coherency, CPU sync not required */
217 		*rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
218 						nic->rb_page_offset, buf_len,
219 						DMA_FROM_DEVICE,
220 						DMA_ATTR_SKIP_CPU_SYNC);
221 		if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
222 			if (!nic->rb_page_offset)
223 				__free_pages(nic->rb_page, 0);
224 			nic->rb_page = NULL;
225 			return -ENOMEM;
226 		}
227 		if (pgcache)
228 			pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
229 		nic->rb_page_offset += buf_len;
230 	}
231 
232 	return 0;
233 }
234 
235 /* Build skb around receive buffer */
236 static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
237 					   u64 rb_ptr, int len)
238 {
239 	void *data;
240 	struct sk_buff *skb;
241 
242 	data = phys_to_virt(rb_ptr);
243 
244 	/* Now build an skb to give to stack */
245 	skb = build_skb(data, RCV_FRAG_LEN);
246 	if (!skb) {
247 		put_page(virt_to_page(data));
248 		return NULL;
249 	}
250 
251 	prefetch(skb->data);
252 	return skb;
253 }
254 
255 /* Allocate RBDR ring and populate receive buffers */
256 static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
257 			    int ring_len, int buf_size)
258 {
259 	int idx;
260 	u64 rbuf;
261 	struct rbdr_entry_t *desc;
262 	int err;
263 
264 	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
265 				     sizeof(struct rbdr_entry_t),
266 				     NICVF_RCV_BUF_ALIGN_BYTES);
267 	if (err)
268 		return err;
269 
270 	rbdr->desc = rbdr->dmem.base;
271 	/* Buffer size has to be in multiples of 128 bytes */
272 	rbdr->dma_size = buf_size;
273 	rbdr->enable = true;
274 	rbdr->thresh = RBDR_THRESH;
275 	rbdr->head = 0;
276 	rbdr->tail = 0;
277 
278 	/* Initialize page recycling stuff.
279 	 *
280 	 * Can't use single buffer per page especially with 64K pages.
281 	 * On embedded platforms i.e 81xx/83xx available memory itself
282 	 * is low and minimum ring size of RBDR is 8K, that takes away
283 	 * lots of memory.
284 	 *
285 	 * But for XDP it has to be a single buffer per page.
286 	 */
287 	if (!nic->pnicvf->xdp_prog) {
288 		rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
289 		rbdr->is_xdp = false;
290 	} else {
291 		rbdr->pgcnt = ring_len;
292 		rbdr->is_xdp = true;
293 	}
294 	rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
295 	rbdr->pgcache = kcalloc(rbdr->pgcnt, sizeof(*rbdr->pgcache),
296 				GFP_KERNEL);
297 	if (!rbdr->pgcache)
298 		return -ENOMEM;
299 	rbdr->pgidx = 0;
300 	rbdr->pgalloc = 0;
301 
302 	nic->rb_page = NULL;
303 	for (idx = 0; idx < ring_len; idx++) {
304 		err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
305 					     RCV_FRAG_LEN, &rbuf);
306 		if (err) {
307 			/* To free already allocated and mapped ones */
308 			rbdr->tail = idx - 1;
309 			return err;
310 		}
311 
312 		desc = GET_RBDR_DESC(rbdr, idx);
313 		desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
314 	}
315 
316 	nicvf_get_page(nic);
317 
318 	return 0;
319 }
320 
321 /* Free RBDR ring and its receive buffers */
322 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
323 {
324 	int head, tail;
325 	u64 buf_addr, phys_addr;
326 	struct pgcache *pgcache;
327 	struct rbdr_entry_t *desc;
328 
329 	if (!rbdr)
330 		return;
331 
332 	rbdr->enable = false;
333 	if (!rbdr->dmem.base)
334 		return;
335 
336 	head = rbdr->head;
337 	tail = rbdr->tail;
338 
339 	/* Release page references */
340 	while (head != tail) {
341 		desc = GET_RBDR_DESC(rbdr, head);
342 		buf_addr = desc->buf_addr;
343 		phys_addr = nicvf_iova_to_phys(nic, buf_addr);
344 		dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
345 				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
346 		if (phys_addr)
347 			put_page(virt_to_page(phys_to_virt(phys_addr)));
348 		head++;
349 		head &= (rbdr->dmem.q_len - 1);
350 	}
351 	/* Release buffer of tail desc */
352 	desc = GET_RBDR_DESC(rbdr, tail);
353 	buf_addr = desc->buf_addr;
354 	phys_addr = nicvf_iova_to_phys(nic, buf_addr);
355 	dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
356 			     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
357 	if (phys_addr)
358 		put_page(virt_to_page(phys_to_virt(phys_addr)));
359 
360 	/* Sync page cache info */
361 	smp_rmb();
362 
363 	/* Release additional page references held for recycling */
364 	head = 0;
365 	while (head < rbdr->pgcnt) {
366 		pgcache = &rbdr->pgcache[head];
367 		if (pgcache->page && page_ref_count(pgcache->page) != 0) {
368 			if (!rbdr->is_xdp) {
369 				put_page(pgcache->page);
370 				continue;
371 			}
372 			page_ref_sub(pgcache->page, pgcache->ref_count - 1);
373 			put_page(pgcache->page);
374 		}
375 		head++;
376 	}
377 
378 	/* Free RBDR ring */
379 	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
380 }
381 
382 /* Refill receive buffer descriptors with new buffers.
383  */
384 static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
385 {
386 	struct queue_set *qs = nic->qs;
387 	int rbdr_idx = qs->rbdr_cnt;
388 	int tail, qcount;
389 	int refill_rb_cnt;
390 	struct rbdr *rbdr;
391 	struct rbdr_entry_t *desc;
392 	u64 rbuf;
393 	int new_rb = 0;
394 
395 refill:
396 	if (!rbdr_idx)
397 		return;
398 	rbdr_idx--;
399 	rbdr = &qs->rbdr[rbdr_idx];
400 	/* Check if it's enabled */
401 	if (!rbdr->enable)
402 		goto next_rbdr;
403 
404 	/* Get no of desc's to be refilled */
405 	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
406 	qcount &= 0x7FFFF;
407 	/* Doorbell can be ringed with a max of ring size minus 1 */
408 	if (qcount >= (qs->rbdr_len - 1))
409 		goto next_rbdr;
410 	else
411 		refill_rb_cnt = qs->rbdr_len - qcount - 1;
412 
413 	/* Sync page cache info */
414 	smp_rmb();
415 
416 	/* Start filling descs from tail */
417 	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
418 	while (refill_rb_cnt) {
419 		tail++;
420 		tail &= (rbdr->dmem.q_len - 1);
421 
422 		if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
423 			break;
424 
425 		desc = GET_RBDR_DESC(rbdr, tail);
426 		desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
427 		refill_rb_cnt--;
428 		new_rb++;
429 	}
430 
431 	nicvf_get_page(nic);
432 
433 	/* make sure all memory stores are done before ringing doorbell */
434 	smp_wmb();
435 
436 	/* Check if buffer allocation failed */
437 	if (refill_rb_cnt)
438 		nic->rb_alloc_fail = true;
439 	else
440 		nic->rb_alloc_fail = false;
441 
442 	/* Notify HW */
443 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
444 			      rbdr_idx, new_rb);
445 next_rbdr:
446 	/* Re-enable RBDR interrupts only if buffer allocation is success */
447 	if (!nic->rb_alloc_fail && rbdr->enable &&
448 	    netif_running(nic->pnicvf->netdev))
449 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
450 
451 	if (rbdr_idx)
452 		goto refill;
453 }
454 
455 /* Alloc rcv buffers in non-atomic mode for better success */
456 void nicvf_rbdr_work(struct work_struct *work)
457 {
458 	struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
459 
460 	nicvf_refill_rbdr(nic, GFP_KERNEL);
461 	if (nic->rb_alloc_fail)
462 		schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
463 	else
464 		nic->rb_work_scheduled = false;
465 }
466 
467 /* In Softirq context, alloc rcv buffers in atomic mode */
468 void nicvf_rbdr_task(unsigned long data)
469 {
470 	struct nicvf *nic = (struct nicvf *)data;
471 
472 	nicvf_refill_rbdr(nic, GFP_ATOMIC);
473 	if (nic->rb_alloc_fail) {
474 		nic->rb_work_scheduled = true;
475 		schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
476 	}
477 }
478 
479 /* Initialize completion queue */
480 static int nicvf_init_cmp_queue(struct nicvf *nic,
481 				struct cmp_queue *cq, int q_len)
482 {
483 	int err;
484 
485 	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
486 				     NICVF_CQ_BASE_ALIGN_BYTES);
487 	if (err)
488 		return err;
489 
490 	cq->desc = cq->dmem.base;
491 	cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
492 	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
493 
494 	return 0;
495 }
496 
497 static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
498 {
499 	if (!cq)
500 		return;
501 	if (!cq->dmem.base)
502 		return;
503 
504 	nicvf_free_q_desc_mem(nic, &cq->dmem);
505 }
506 
507 /* Initialize transmit queue */
508 static int nicvf_init_snd_queue(struct nicvf *nic,
509 				struct snd_queue *sq, int q_len, int qidx)
510 {
511 	int err;
512 
513 	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
514 				     NICVF_SQ_BASE_ALIGN_BYTES);
515 	if (err)
516 		return err;
517 
518 	sq->desc = sq->dmem.base;
519 	sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
520 	if (!sq->skbuff)
521 		return -ENOMEM;
522 
523 	sq->head = 0;
524 	sq->tail = 0;
525 	sq->thresh = SND_QUEUE_THRESH;
526 
527 	/* Check if this SQ is a XDP TX queue */
528 	if (nic->sqs_mode)
529 		qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
530 	if (qidx < nic->pnicvf->xdp_tx_queues) {
531 		/* Alloc memory to save page pointers for XDP_TX */
532 		sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
533 		if (!sq->xdp_page)
534 			return -ENOMEM;
535 		sq->xdp_desc_cnt = 0;
536 		sq->xdp_free_cnt = q_len - 1;
537 		sq->is_xdp = true;
538 	} else {
539 		sq->xdp_page = NULL;
540 		sq->xdp_desc_cnt = 0;
541 		sq->xdp_free_cnt = 0;
542 		sq->is_xdp = false;
543 
544 		atomic_set(&sq->free_cnt, q_len - 1);
545 
546 		/* Preallocate memory for TSO segment's header */
547 		sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
548 						  q_len * TSO_HEADER_SIZE,
549 						  &sq->tso_hdrs_phys,
550 						  GFP_KERNEL);
551 		if (!sq->tso_hdrs)
552 			return -ENOMEM;
553 	}
554 
555 	return 0;
556 }
557 
558 void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
559 			      int hdr_sqe, u8 subdesc_cnt)
560 {
561 	u8 idx;
562 	struct sq_gather_subdesc *gather;
563 
564 	/* Unmap DMA mapped skb data buffers */
565 	for (idx = 0; idx < subdesc_cnt; idx++) {
566 		hdr_sqe++;
567 		hdr_sqe &= (sq->dmem.q_len - 1);
568 		gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
569 		/* HW will ensure data coherency, CPU sync not required */
570 		dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
571 				     gather->size, DMA_TO_DEVICE,
572 				     DMA_ATTR_SKIP_CPU_SYNC);
573 	}
574 }
575 
576 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
577 {
578 	struct sk_buff *skb;
579 	struct page *page;
580 	struct sq_hdr_subdesc *hdr;
581 	struct sq_hdr_subdesc *tso_sqe;
582 
583 	if (!sq)
584 		return;
585 	if (!sq->dmem.base)
586 		return;
587 
588 	if (sq->tso_hdrs) {
589 		dma_free_coherent(&nic->pdev->dev,
590 				  sq->dmem.q_len * TSO_HEADER_SIZE,
591 				  sq->tso_hdrs, sq->tso_hdrs_phys);
592 		sq->tso_hdrs = NULL;
593 	}
594 
595 	/* Free pending skbs in the queue */
596 	smp_rmb();
597 	while (sq->head != sq->tail) {
598 		skb = (struct sk_buff *)sq->skbuff[sq->head];
599 		if (!skb || !sq->xdp_page)
600 			goto next;
601 
602 		page = (struct page *)sq->xdp_page[sq->head];
603 		if (!page)
604 			goto next;
605 		else
606 			put_page(page);
607 
608 		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
609 		/* Check for dummy descriptor used for HW TSO offload on 88xx */
610 		if (hdr->dont_send) {
611 			/* Get actual TSO descriptors and unmap them */
612 			tso_sqe =
613 			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
614 			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
615 						 tso_sqe->subdesc_cnt);
616 		} else {
617 			nicvf_unmap_sndq_buffers(nic, sq, sq->head,
618 						 hdr->subdesc_cnt);
619 		}
620 		if (skb)
621 			dev_kfree_skb_any(skb);
622 next:
623 		sq->head++;
624 		sq->head &= (sq->dmem.q_len - 1);
625 	}
626 	kfree(sq->skbuff);
627 	kfree(sq->xdp_page);
628 	nicvf_free_q_desc_mem(nic, &sq->dmem);
629 }
630 
631 static void nicvf_reclaim_snd_queue(struct nicvf *nic,
632 				    struct queue_set *qs, int qidx)
633 {
634 	/* Disable send queue */
635 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
636 	/* Check if SQ is stopped */
637 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
638 		return;
639 	/* Reset send queue */
640 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
641 }
642 
643 static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
644 				    struct queue_set *qs, int qidx)
645 {
646 	union nic_mbx mbx = {};
647 
648 	/* Make sure all packets in the pipeline are written back into mem */
649 	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
650 	nicvf_send_msg_to_pf(nic, &mbx);
651 }
652 
653 static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
654 				    struct queue_set *qs, int qidx)
655 {
656 	/* Disable timer threshold (doesn't get reset upon CQ reset */
657 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
658 	/* Disable completion queue */
659 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
660 	/* Reset completion queue */
661 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
662 }
663 
664 static void nicvf_reclaim_rbdr(struct nicvf *nic,
665 			       struct rbdr *rbdr, int qidx)
666 {
667 	u64 tmp, fifo_state;
668 	int timeout = 10;
669 
670 	/* Save head and tail pointers for feeing up buffers */
671 	rbdr->head = nicvf_queue_reg_read(nic,
672 					  NIC_QSET_RBDR_0_1_HEAD,
673 					  qidx) >> 3;
674 	rbdr->tail = nicvf_queue_reg_read(nic,
675 					  NIC_QSET_RBDR_0_1_TAIL,
676 					  qidx) >> 3;
677 
678 	/* If RBDR FIFO is in 'FAIL' state then do a reset first
679 	 * before relaiming.
680 	 */
681 	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
682 	if (((fifo_state >> 62) & 0x03) == 0x3)
683 		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
684 				      qidx, NICVF_RBDR_RESET);
685 
686 	/* Disable RBDR */
687 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
688 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
689 		return;
690 	while (1) {
691 		tmp = nicvf_queue_reg_read(nic,
692 					   NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
693 					   qidx);
694 		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
695 			break;
696 		usleep_range(1000, 2000);
697 		timeout--;
698 		if (!timeout) {
699 			netdev_err(nic->netdev,
700 				   "Failed polling on prefetch status\n");
701 			return;
702 		}
703 	}
704 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
705 			      qidx, NICVF_RBDR_RESET);
706 
707 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
708 		return;
709 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
710 	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
711 		return;
712 }
713 
714 void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
715 {
716 	u64 rq_cfg;
717 	int sqs;
718 
719 	rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
720 
721 	/* Enable first VLAN stripping */
722 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
723 		rq_cfg |= (1ULL << 25);
724 	else
725 		rq_cfg &= ~(1ULL << 25);
726 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
727 
728 	/* Configure Secondary Qsets, if any */
729 	for (sqs = 0; sqs < nic->sqs_count; sqs++)
730 		if (nic->snicvf[sqs])
731 			nicvf_queue_reg_write(nic->snicvf[sqs],
732 					      NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
733 }
734 
735 static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
736 {
737 	union nic_mbx mbx = {};
738 
739 	/* Reset all RQ/SQ and VF stats */
740 	mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
741 	mbx.reset_stat.rx_stat_mask = 0x3FFF;
742 	mbx.reset_stat.tx_stat_mask = 0x1F;
743 	mbx.reset_stat.rq_stat_mask = 0xFFFF;
744 	mbx.reset_stat.sq_stat_mask = 0xFFFF;
745 	nicvf_send_msg_to_pf(nic, &mbx);
746 }
747 
748 /* Configures receive queue */
749 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
750 				   int qidx, bool enable)
751 {
752 	union nic_mbx mbx = {};
753 	struct rcv_queue *rq;
754 	struct rq_cfg rq_cfg;
755 
756 	rq = &qs->rq[qidx];
757 	rq->enable = enable;
758 
759 	/* Disable receive queue */
760 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
761 
762 	if (!rq->enable) {
763 		nicvf_reclaim_rcv_queue(nic, qs, qidx);
764 		xdp_rxq_info_unreg(&rq->xdp_rxq);
765 		return;
766 	}
767 
768 	rq->cq_qs = qs->vnic_id;
769 	rq->cq_idx = qidx;
770 	rq->start_rbdr_qs = qs->vnic_id;
771 	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
772 	rq->cont_rbdr_qs = qs->vnic_id;
773 	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
774 	/* all writes of RBDR data to be loaded into L2 Cache as well*/
775 	rq->caching = 1;
776 
777 	/* Driver have no proper error path for failed XDP RX-queue info reg */
778 	WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
779 
780 	/* Send a mailbox msg to PF to config RQ */
781 	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
782 	mbx.rq.qs_num = qs->vnic_id;
783 	mbx.rq.rq_num = qidx;
784 	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
785 			  (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
786 			  (rq->cont_qs_rbdr_idx << 8) |
787 			  (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
788 	nicvf_send_msg_to_pf(nic, &mbx);
789 
790 	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
791 	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
792 		     (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
793 		     (qs->vnic_id << 0);
794 	nicvf_send_msg_to_pf(nic, &mbx);
795 
796 	/* RQ drop config
797 	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
798 	 */
799 	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
800 	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
801 		     (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
802 		     (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
803 	nicvf_send_msg_to_pf(nic, &mbx);
804 
805 	if (!nic->sqs_mode && (qidx == 0)) {
806 		/* Enable checking L3/L4 length and TCP/UDP checksums
807 		 * Also allow IPv6 pkts with zero UDP checksum.
808 		 */
809 		nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
810 				      (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
811 		nicvf_config_vlan_stripping(nic, nic->netdev->features);
812 	}
813 
814 	/* Enable Receive queue */
815 	memset(&rq_cfg, 0, sizeof(struct rq_cfg));
816 	rq_cfg.ena = 1;
817 	rq_cfg.tcp_ena = 0;
818 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
819 }
820 
821 /* Configures completion queue */
822 void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
823 			    int qidx, bool enable)
824 {
825 	struct cmp_queue *cq;
826 	struct cq_cfg cq_cfg;
827 
828 	cq = &qs->cq[qidx];
829 	cq->enable = enable;
830 
831 	if (!cq->enable) {
832 		nicvf_reclaim_cmp_queue(nic, qs, qidx);
833 		return;
834 	}
835 
836 	/* Reset completion queue */
837 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
838 
839 	if (!cq->enable)
840 		return;
841 
842 	spin_lock_init(&cq->lock);
843 	/* Set completion queue base address */
844 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
845 			      qidx, (u64)(cq->dmem.phys_base));
846 
847 	/* Enable Completion queue */
848 	memset(&cq_cfg, 0, sizeof(struct cq_cfg));
849 	cq_cfg.ena = 1;
850 	cq_cfg.reset = 0;
851 	cq_cfg.caching = 0;
852 	cq_cfg.qsize = ilog2(qs->cq_len >> 10);
853 	cq_cfg.avg_con = 0;
854 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
855 
856 	/* Set threshold value for interrupt generation */
857 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
858 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
859 			      qidx, CMP_QUEUE_TIMER_THRESH);
860 }
861 
862 /* Configures transmit queue */
863 static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
864 				   int qidx, bool enable)
865 {
866 	union nic_mbx mbx = {};
867 	struct snd_queue *sq;
868 	struct sq_cfg sq_cfg;
869 
870 	sq = &qs->sq[qidx];
871 	sq->enable = enable;
872 
873 	if (!sq->enable) {
874 		nicvf_reclaim_snd_queue(nic, qs, qidx);
875 		return;
876 	}
877 
878 	/* Reset send queue */
879 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
880 
881 	sq->cq_qs = qs->vnic_id;
882 	sq->cq_idx = qidx;
883 
884 	/* Send a mailbox msg to PF to config SQ */
885 	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
886 	mbx.sq.qs_num = qs->vnic_id;
887 	mbx.sq.sq_num = qidx;
888 	mbx.sq.sqs_mode = nic->sqs_mode;
889 	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
890 	nicvf_send_msg_to_pf(nic, &mbx);
891 
892 	/* Set queue base address */
893 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
894 			      qidx, (u64)(sq->dmem.phys_base));
895 
896 	/* Enable send queue  & set queue size */
897 	memset(&sq_cfg, 0, sizeof(struct sq_cfg));
898 	sq_cfg.ena = 1;
899 	sq_cfg.reset = 0;
900 	sq_cfg.ldwb = 0;
901 	sq_cfg.qsize = ilog2(qs->sq_len >> 10);
902 	sq_cfg.tstmp_bgx_intf = 0;
903 	/* CQ's level at which HW will stop processing SQEs to avoid
904 	 * transmitting a pkt with no space in CQ to post CQE_TX.
905 	 */
906 	sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len;
907 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
908 
909 	/* Set threshold value for interrupt generation */
910 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
911 
912 	/* Set queue:cpu affinity for better load distribution */
913 	if (cpu_online(qidx)) {
914 		cpumask_set_cpu(qidx, &sq->affinity_mask);
915 		netif_set_xps_queue(nic->netdev,
916 				    &sq->affinity_mask, qidx);
917 	}
918 }
919 
920 /* Configures receive buffer descriptor ring */
921 static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
922 			      int qidx, bool enable)
923 {
924 	struct rbdr *rbdr;
925 	struct rbdr_cfg rbdr_cfg;
926 
927 	rbdr = &qs->rbdr[qidx];
928 	nicvf_reclaim_rbdr(nic, rbdr, qidx);
929 	if (!enable)
930 		return;
931 
932 	/* Set descriptor base address */
933 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
934 			      qidx, (u64)(rbdr->dmem.phys_base));
935 
936 	/* Enable RBDR  & set queue size */
937 	/* Buffer size should be in multiples of 128 bytes */
938 	memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
939 	rbdr_cfg.ena = 1;
940 	rbdr_cfg.reset = 0;
941 	rbdr_cfg.ldwb = 0;
942 	rbdr_cfg.qsize = RBDR_SIZE;
943 	rbdr_cfg.avg_con = 0;
944 	rbdr_cfg.lines = rbdr->dma_size / 128;
945 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
946 			      qidx, *(u64 *)&rbdr_cfg);
947 
948 	/* Notify HW */
949 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
950 			      qidx, qs->rbdr_len - 1);
951 
952 	/* Set threshold value for interrupt generation */
953 	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
954 			      qidx, rbdr->thresh - 1);
955 }
956 
957 /* Requests PF to assign and enable Qset */
958 void nicvf_qset_config(struct nicvf *nic, bool enable)
959 {
960 	union nic_mbx mbx = {};
961 	struct queue_set *qs = nic->qs;
962 	struct qs_cfg *qs_cfg;
963 
964 	if (!qs) {
965 		netdev_warn(nic->netdev,
966 			    "Qset is still not allocated, don't init queues\n");
967 		return;
968 	}
969 
970 	qs->enable = enable;
971 	qs->vnic_id = nic->vf_id;
972 
973 	/* Send a mailbox msg to PF to config Qset */
974 	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
975 	mbx.qs.num = qs->vnic_id;
976 	mbx.qs.sqs_count = nic->sqs_count;
977 
978 	mbx.qs.cfg = 0;
979 	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
980 	if (qs->enable) {
981 		qs_cfg->ena = 1;
982 #ifdef __BIG_ENDIAN
983 		qs_cfg->be = 1;
984 #endif
985 		qs_cfg->vnic = qs->vnic_id;
986 		/* Enable Tx timestamping capability */
987 		if (nic->ptp_clock)
988 			qs_cfg->send_tstmp_ena = 1;
989 	}
990 	nicvf_send_msg_to_pf(nic, &mbx);
991 }
992 
993 static void nicvf_free_resources(struct nicvf *nic)
994 {
995 	int qidx;
996 	struct queue_set *qs = nic->qs;
997 
998 	/* Free receive buffer descriptor ring */
999 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1000 		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1001 
1002 	/* Free completion queue */
1003 	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1004 		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1005 
1006 	/* Free send queue */
1007 	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1008 		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1009 }
1010 
1011 static int nicvf_alloc_resources(struct nicvf *nic)
1012 {
1013 	int qidx;
1014 	struct queue_set *qs = nic->qs;
1015 
1016 	/* Alloc receive buffer descriptor ring */
1017 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1018 		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1019 				    DMA_BUFFER_LEN))
1020 			goto alloc_fail;
1021 	}
1022 
1023 	/* Alloc send queue */
1024 	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1025 		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1026 			goto alloc_fail;
1027 	}
1028 
1029 	/* Alloc completion queue */
1030 	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1031 		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
1032 			goto alloc_fail;
1033 	}
1034 
1035 	return 0;
1036 alloc_fail:
1037 	nicvf_free_resources(nic);
1038 	return -ENOMEM;
1039 }
1040 
1041 int nicvf_set_qset_resources(struct nicvf *nic)
1042 {
1043 	struct queue_set *qs;
1044 
1045 	qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
1046 	if (!qs)
1047 		return -ENOMEM;
1048 	nic->qs = qs;
1049 
1050 	/* Set count of each queue */
1051 	qs->rbdr_cnt = DEFAULT_RBDR_CNT;
1052 	qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
1053 	qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
1054 	qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
1055 
1056 	/* Set queue lengths */
1057 	qs->rbdr_len = RCV_BUF_COUNT;
1058 	qs->sq_len = SND_QUEUE_LEN;
1059 	qs->cq_len = CMP_QUEUE_LEN;
1060 
1061 	nic->rx_queues = qs->rq_cnt;
1062 	nic->tx_queues = qs->sq_cnt;
1063 	nic->xdp_tx_queues = 0;
1064 
1065 	return 0;
1066 }
1067 
1068 int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
1069 {
1070 	bool disable = false;
1071 	struct queue_set *qs = nic->qs;
1072 	struct queue_set *pqs = nic->pnicvf->qs;
1073 	int qidx;
1074 
1075 	if (!qs)
1076 		return 0;
1077 
1078 	/* Take primary VF's queue lengths.
1079 	 * This is needed to take queue lengths set from ethtool
1080 	 * into consideration.
1081 	 */
1082 	if (nic->sqs_mode && pqs) {
1083 		qs->cq_len = pqs->cq_len;
1084 		qs->sq_len = pqs->sq_len;
1085 	}
1086 
1087 	if (enable) {
1088 		if (nicvf_alloc_resources(nic))
1089 			return -ENOMEM;
1090 
1091 		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1092 			nicvf_snd_queue_config(nic, qs, qidx, enable);
1093 		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1094 			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1095 		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1096 			nicvf_rbdr_config(nic, qs, qidx, enable);
1097 		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1098 			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1099 	} else {
1100 		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1101 			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1102 		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1103 			nicvf_rbdr_config(nic, qs, qidx, disable);
1104 		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1105 			nicvf_snd_queue_config(nic, qs, qidx, disable);
1106 		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1107 			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1108 
1109 		nicvf_free_resources(nic);
1110 	}
1111 
1112 	/* Reset RXQ's stats.
1113 	 * SQ's stats will get reset automatically once SQ is reset.
1114 	 */
1115 	nicvf_reset_rcv_queue_stats(nic);
1116 
1117 	return 0;
1118 }
1119 
1120 /* Get a free desc from SQ
1121  * returns descriptor ponter & descriptor number
1122  */
1123 static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1124 {
1125 	int qentry;
1126 
1127 	qentry = sq->tail;
1128 	if (!sq->is_xdp)
1129 		atomic_sub(desc_cnt, &sq->free_cnt);
1130 	else
1131 		sq->xdp_free_cnt -= desc_cnt;
1132 	sq->tail += desc_cnt;
1133 	sq->tail &= (sq->dmem.q_len - 1);
1134 
1135 	return qentry;
1136 }
1137 
1138 /* Rollback to previous tail pointer when descriptors not used */
1139 static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
1140 					  int qentry, int desc_cnt)
1141 {
1142 	sq->tail = qentry;
1143 	atomic_add(desc_cnt, &sq->free_cnt);
1144 }
1145 
1146 /* Free descriptor back to SQ for future use */
1147 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1148 {
1149 	if (!sq->is_xdp)
1150 		atomic_add(desc_cnt, &sq->free_cnt);
1151 	else
1152 		sq->xdp_free_cnt += desc_cnt;
1153 	sq->head += desc_cnt;
1154 	sq->head &= (sq->dmem.q_len - 1);
1155 }
1156 
1157 static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1158 {
1159 	qentry++;
1160 	qentry &= (sq->dmem.q_len - 1);
1161 	return qentry;
1162 }
1163 
1164 void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1165 {
1166 	u64 sq_cfg;
1167 
1168 	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1169 	sq_cfg |= NICVF_SQ_EN;
1170 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1171 	/* Ring doorbell so that H/W restarts processing SQEs */
1172 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1173 }
1174 
1175 void nicvf_sq_disable(struct nicvf *nic, int qidx)
1176 {
1177 	u64 sq_cfg;
1178 
1179 	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1180 	sq_cfg &= ~NICVF_SQ_EN;
1181 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1182 }
1183 
1184 void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
1185 			      int qidx)
1186 {
1187 	u64 head, tail;
1188 	struct sk_buff *skb;
1189 	struct nicvf *nic = netdev_priv(netdev);
1190 	struct sq_hdr_subdesc *hdr;
1191 
1192 	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1193 	tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1194 	while (sq->head != head) {
1195 		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1196 		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1197 			nicvf_put_sq_desc(sq, 1);
1198 			continue;
1199 		}
1200 		skb = (struct sk_buff *)sq->skbuff[sq->head];
1201 		if (skb)
1202 			dev_kfree_skb_any(skb);
1203 		atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
1204 		atomic64_add(hdr->tot_len,
1205 			     (atomic64_t *)&netdev->stats.tx_bytes);
1206 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1207 	}
1208 }
1209 
1210 /* XDP Transmit APIs */
1211 void nicvf_xdp_sq_doorbell(struct nicvf *nic,
1212 			   struct snd_queue *sq, int sq_num)
1213 {
1214 	if (!sq->xdp_desc_cnt)
1215 		return;
1216 
1217 	/* make sure all memory stores are done before ringing doorbell */
1218 	wmb();
1219 
1220 	/* Inform HW to xmit all TSO segments */
1221 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1222 			      sq_num, sq->xdp_desc_cnt);
1223 	sq->xdp_desc_cnt = 0;
1224 }
1225 
1226 static inline void
1227 nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1228 			     int subdesc_cnt, u64 data, int len)
1229 {
1230 	struct sq_hdr_subdesc *hdr;
1231 
1232 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1233 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1234 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1235 	hdr->subdesc_cnt = subdesc_cnt;
1236 	hdr->tot_len = len;
1237 	hdr->post_cqe = 1;
1238 	sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
1239 }
1240 
1241 int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
1242 			    u64 bufaddr, u64 dma_addr, u16 len)
1243 {
1244 	int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1245 	int qentry;
1246 
1247 	if (subdesc_cnt > sq->xdp_free_cnt)
1248 		return 0;
1249 
1250 	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1251 
1252 	nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
1253 
1254 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
1255 	nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
1256 
1257 	sq->xdp_desc_cnt += subdesc_cnt;
1258 
1259 	return 1;
1260 }
1261 
1262 /* Calculate no of SQ subdescriptors needed to transmit all
1263  * segments of this TSO packet.
1264  * Taken from 'Tilera network driver' with a minor modification.
1265  */
1266 static int nicvf_tso_count_subdescs(struct sk_buff *skb)
1267 {
1268 	struct skb_shared_info *sh = skb_shinfo(skb);
1269 	unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1270 	unsigned int data_len = skb->len - sh_len;
1271 	unsigned int p_len = sh->gso_size;
1272 	long f_id = -1;    /* id of the current fragment */
1273 	long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
1274 	long f_used = 0;  /* bytes used from the current fragment */
1275 	long n;            /* size of the current piece of payload */
1276 	int num_edescs = 0;
1277 	int segment;
1278 
1279 	for (segment = 0; segment < sh->gso_segs; segment++) {
1280 		unsigned int p_used = 0;
1281 
1282 		/* One edesc for header and for each piece of the payload. */
1283 		for (num_edescs++; p_used < p_len; num_edescs++) {
1284 			/* Advance as needed. */
1285 			while (f_used >= f_size) {
1286 				f_id++;
1287 				f_size = skb_frag_size(&sh->frags[f_id]);
1288 				f_used = 0;
1289 			}
1290 
1291 			/* Use bytes from the current fragment. */
1292 			n = p_len - p_used;
1293 			if (n > f_size - f_used)
1294 				n = f_size - f_used;
1295 			f_used += n;
1296 			p_used += n;
1297 		}
1298 
1299 		/* The last segment may be less than gso_size. */
1300 		data_len -= p_len;
1301 		if (data_len < p_len)
1302 			p_len = data_len;
1303 	}
1304 
1305 	/* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
1306 	return num_edescs + sh->gso_segs;
1307 }
1308 
1309 #define POST_CQE_DESC_COUNT 2
1310 
1311 /* Get the number of SQ descriptors needed to xmit this skb */
1312 static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
1313 {
1314 	int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1315 
1316 	if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
1317 		subdesc_cnt = nicvf_tso_count_subdescs(skb);
1318 		return subdesc_cnt;
1319 	}
1320 
1321 	/* Dummy descriptors to get TSO pkt completion notification */
1322 	if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
1323 		subdesc_cnt += POST_CQE_DESC_COUNT;
1324 
1325 	if (skb_shinfo(skb)->nr_frags)
1326 		subdesc_cnt += skb_shinfo(skb)->nr_frags;
1327 
1328 	return subdesc_cnt;
1329 }
1330 
1331 /* Add SQ HEADER subdescriptor.
1332  * First subdescriptor for every send descriptor.
1333  */
1334 static inline void
1335 nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
1336 			 int subdesc_cnt, struct sk_buff *skb, int len)
1337 {
1338 	int proto;
1339 	struct sq_hdr_subdesc *hdr;
1340 	union {
1341 		struct iphdr *v4;
1342 		struct ipv6hdr *v6;
1343 		unsigned char *hdr;
1344 	} ip;
1345 
1346 	ip.hdr = skb_network_header(skb);
1347 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1348 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1349 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1350 
1351 	if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
1352 		/* post_cqe = 0, to avoid HW posting a CQE for every TSO
1353 		 * segment transmitted on 88xx.
1354 		 */
1355 		hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
1356 	} else {
1357 		sq->skbuff[qentry] = (u64)skb;
1358 		/* Enable notification via CQE after processing SQE */
1359 		hdr->post_cqe = 1;
1360 		/* No of subdescriptors following this */
1361 		hdr->subdesc_cnt = subdesc_cnt;
1362 	}
1363 	hdr->tot_len = len;
1364 
1365 	/* Offload checksum calculation to HW */
1366 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1367 		if (ip.v4->version == 4)
1368 			hdr->csum_l3 = 1; /* Enable IP csum calculation */
1369 		hdr->l3_offset = skb_network_offset(skb);
1370 		hdr->l4_offset = skb_transport_offset(skb);
1371 
1372 		proto = (ip.v4->version == 4) ? ip.v4->protocol :
1373 			ip.v6->nexthdr;
1374 
1375 		switch (proto) {
1376 		case IPPROTO_TCP:
1377 			hdr->csum_l4 = SEND_L4_CSUM_TCP;
1378 			break;
1379 		case IPPROTO_UDP:
1380 			hdr->csum_l4 = SEND_L4_CSUM_UDP;
1381 			break;
1382 		case IPPROTO_SCTP:
1383 			hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1384 			break;
1385 		}
1386 	}
1387 
1388 	if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
1389 		hdr->tso = 1;
1390 		hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb);
1391 		hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
1392 		/* For non-tunneled pkts, point this to L2 ethertype */
1393 		hdr->inner_l3_offset = skb_network_offset(skb) - 2;
1394 		this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1395 	}
1396 
1397 	/* Check if timestamp is requested */
1398 	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1399 		skb_tx_timestamp(skb);
1400 		return;
1401 	}
1402 
1403 	/* Tx timestamping not supported along with TSO, so ignore request */
1404 	if (skb_shinfo(skb)->gso_size)
1405 		return;
1406 
1407 	/* HW supports only a single outstanding packet to timestamp */
1408 	if (!atomic_add_unless(&nic->pnicvf->tx_ptp_skbs, 1, 1))
1409 		return;
1410 
1411 	/* Mark the SKB for later reference */
1412 	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1413 
1414 	/* Finally enable timestamp generation
1415 	 * Since 'post_cqe' is also set, two CQEs will be posted
1416 	 * for this packet i.e CQE_TYPE_SEND and CQE_TYPE_SEND_PTP.
1417 	 */
1418 	hdr->tstmp = 1;
1419 }
1420 
1421 /* SQ GATHER subdescriptor
1422  * Must follow HDR descriptor
1423  */
1424 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1425 					       int size, u64 data)
1426 {
1427 	struct sq_gather_subdesc *gather;
1428 
1429 	qentry &= (sq->dmem.q_len - 1);
1430 	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1431 
1432 	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1433 	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1434 	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1435 	gather->size = size;
1436 	gather->addr = data;
1437 }
1438 
1439 /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
1440  * packet so that a CQE is posted as a notifation for transmission of
1441  * TSO packet.
1442  */
1443 static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
1444 					    int tso_sqe, struct sk_buff *skb)
1445 {
1446 	struct sq_imm_subdesc *imm;
1447 	struct sq_hdr_subdesc *hdr;
1448 
1449 	sq->skbuff[qentry] = (u64)skb;
1450 
1451 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1452 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1453 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1454 	/* Enable notification via CQE after processing SQE */
1455 	hdr->post_cqe = 1;
1456 	/* There is no packet to transmit here */
1457 	hdr->dont_send = 1;
1458 	hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
1459 	hdr->tot_len = 1;
1460 	/* Actual TSO header SQE index, needed for cleanup */
1461 	hdr->rsvd2 = tso_sqe;
1462 
1463 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
1464 	imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
1465 	memset(imm, 0, SND_QUEUE_DESC_SIZE);
1466 	imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
1467 	imm->len = 1;
1468 }
1469 
1470 static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
1471 				     int sq_num, int desc_cnt)
1472 {
1473 	struct netdev_queue *txq;
1474 
1475 	txq = netdev_get_tx_queue(nic->pnicvf->netdev,
1476 				  skb_get_queue_mapping(skb));
1477 
1478 	netdev_tx_sent_queue(txq, skb->len);
1479 
1480 	/* make sure all memory stores are done before ringing doorbell */
1481 	smp_wmb();
1482 
1483 	/* Inform HW to xmit all TSO segments */
1484 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1485 			      sq_num, desc_cnt);
1486 }
1487 
1488 /* Segment a TSO packet into 'gso_size' segments and append
1489  * them to SQ for transfer
1490  */
1491 static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
1492 			       int sq_num, int qentry, struct sk_buff *skb)
1493 {
1494 	struct tso_t tso;
1495 	int seg_subdescs = 0, desc_cnt = 0;
1496 	int seg_len, total_len, data_left;
1497 	int hdr_qentry = qentry;
1498 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1499 
1500 	tso_start(skb, &tso);
1501 	total_len = skb->len - hdr_len;
1502 	while (total_len > 0) {
1503 		char *hdr;
1504 
1505 		/* Save Qentry for adding HDR_SUBDESC at the end */
1506 		hdr_qentry = qentry;
1507 
1508 		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
1509 		total_len -= data_left;
1510 
1511 		/* Add segment's header */
1512 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1513 		hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE;
1514 		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
1515 		nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len,
1516 					    sq->tso_hdrs_phys +
1517 					    qentry * TSO_HEADER_SIZE);
1518 		/* HDR_SUDESC + GATHER */
1519 		seg_subdescs = 2;
1520 		seg_len = hdr_len;
1521 
1522 		/* Add segment's payload fragments */
1523 		while (data_left > 0) {
1524 			int size;
1525 
1526 			size = min_t(int, tso.size, data_left);
1527 
1528 			qentry = nicvf_get_nxt_sqentry(sq, qentry);
1529 			nicvf_sq_add_gather_subdesc(sq, qentry, size,
1530 						    virt_to_phys(tso.data));
1531 			seg_subdescs++;
1532 			seg_len += size;
1533 
1534 			data_left -= size;
1535 			tso_build_data(skb, &tso, size);
1536 		}
1537 		nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
1538 					 seg_subdescs - 1, skb, seg_len);
1539 		sq->skbuff[hdr_qentry] = (u64)NULL;
1540 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1541 
1542 		desc_cnt += seg_subdescs;
1543 	}
1544 	/* Save SKB in the last segment for freeing */
1545 	sq->skbuff[hdr_qentry] = (u64)skb;
1546 
1547 	nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
1548 
1549 	this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1550 	return 1;
1551 }
1552 
1553 /* Append an skb to a SQ for packet transfer. */
1554 int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
1555 			struct sk_buff *skb, u8 sq_num)
1556 {
1557 	int i, size;
1558 	int subdesc_cnt, hdr_sqe = 0;
1559 	int qentry;
1560 	u64 dma_addr;
1561 
1562 	subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
1563 	if (subdesc_cnt > atomic_read(&sq->free_cnt))
1564 		goto append_fail;
1565 
1566 	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1567 
1568 	/* Check if its a TSO packet */
1569 	if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
1570 		return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
1571 
1572 	/* Add SQ header subdesc */
1573 	nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
1574 				 skb, skb->len);
1575 	hdr_sqe = qentry;
1576 
1577 	/* Add SQ gather subdescs */
1578 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
1579 	size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
1580 	/* HW will ensure data coherency, CPU sync not required */
1581 	dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
1582 				      offset_in_page(skb->data), size,
1583 				      DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1584 	if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1585 		nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1586 		return 0;
1587 	}
1588 
1589 	nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1590 
1591 	/* Check for scattered buffer */
1592 	if (!skb_is_nonlinear(skb))
1593 		goto doorbell;
1594 
1595 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1596 		const struct skb_frag_struct *frag;
1597 
1598 		frag = &skb_shinfo(skb)->frags[i];
1599 
1600 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1601 		size = skb_frag_size(frag);
1602 		dma_addr = dma_map_page_attrs(&nic->pdev->dev,
1603 					      skb_frag_page(frag),
1604 					      frag->page_offset, size,
1605 					      DMA_TO_DEVICE,
1606 					      DMA_ATTR_SKIP_CPU_SYNC);
1607 		if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1608 			/* Free entire chain of mapped buffers
1609 			 * here 'i' = frags mapped + above mapped skb->data
1610 			 */
1611 			nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
1612 			nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1613 			return 0;
1614 		}
1615 		nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1616 	}
1617 
1618 doorbell:
1619 	if (nic->t88 && skb_shinfo(skb)->gso_size) {
1620 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1621 		nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
1622 	}
1623 
1624 	nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
1625 
1626 	return 1;
1627 
1628 append_fail:
1629 	/* Use original PCI dev for debug log */
1630 	nic = nic->pnicvf;
1631 	netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
1632 	return 0;
1633 }
1634 
1635 static inline unsigned frag_num(unsigned i)
1636 {
1637 #ifdef __BIG_ENDIAN
1638 	return (i & ~3) + 3 - (i & 3);
1639 #else
1640 	return i;
1641 #endif
1642 }
1643 
1644 static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
1645 				   u64 buf_addr, bool xdp)
1646 {
1647 	struct page *page = NULL;
1648 	int len = RCV_FRAG_LEN;
1649 
1650 	if (xdp) {
1651 		page = virt_to_page(phys_to_virt(buf_addr));
1652 		/* Check if it's a recycled page, if not
1653 		 * unmap the DMA mapping.
1654 		 *
1655 		 * Recycled page holds an extra reference.
1656 		 */
1657 		if (page_ref_count(page) != 1)
1658 			return;
1659 
1660 		len += XDP_PACKET_HEADROOM;
1661 		/* Receive buffers in XDP mode are mapped from page start */
1662 		dma_addr &= PAGE_MASK;
1663 	}
1664 	dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len,
1665 			     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1666 }
1667 
1668 /* Returns SKB for a received packet */
1669 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
1670 				  struct cqe_rx_t *cqe_rx, bool xdp)
1671 {
1672 	int frag;
1673 	int payload_len = 0;
1674 	struct sk_buff *skb = NULL;
1675 	struct page *page;
1676 	int offset;
1677 	u16 *rb_lens = NULL;
1678 	u64 *rb_ptrs = NULL;
1679 	u64 phys_addr;
1680 
1681 	rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
1682 	/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
1683 	 * CQE_RX at word6, hence buffer pointers move by word
1684 	 *
1685 	 * Use existing 'hw_tso' flag which will be set for all chips
1686 	 * except 88xx pass1 instead of a additional cache line
1687 	 * access (or miss) by using pci dev's revision.
1688 	 */
1689 	if (!nic->hw_tso)
1690 		rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
1691 	else
1692 		rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
1693 
1694 	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1695 		payload_len = rb_lens[frag_num(frag)];
1696 		phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
1697 		if (!phys_addr) {
1698 			if (skb)
1699 				dev_kfree_skb_any(skb);
1700 			return NULL;
1701 		}
1702 
1703 		if (!frag) {
1704 			/* First fragment */
1705 			nicvf_unmap_rcv_buffer(nic,
1706 					       *rb_ptrs - cqe_rx->align_pad,
1707 					       phys_addr, xdp);
1708 			skb = nicvf_rb_ptr_to_skb(nic,
1709 						  phys_addr - cqe_rx->align_pad,
1710 						  payload_len);
1711 			if (!skb)
1712 				return NULL;
1713 			skb_reserve(skb, cqe_rx->align_pad);
1714 			skb_put(skb, payload_len);
1715 		} else {
1716 			/* Add fragments */
1717 			nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp);
1718 			page = virt_to_page(phys_to_virt(phys_addr));
1719 			offset = phys_to_virt(phys_addr) - page_address(page);
1720 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
1721 					offset, payload_len, RCV_FRAG_LEN);
1722 		}
1723 		/* Next buffer pointer */
1724 		rb_ptrs++;
1725 	}
1726 	return skb;
1727 }
1728 
1729 static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
1730 {
1731 	u64 reg_val;
1732 
1733 	switch (int_type) {
1734 	case NICVF_INTR_CQ:
1735 		reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
1736 		break;
1737 	case NICVF_INTR_SQ:
1738 		reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
1739 		break;
1740 	case NICVF_INTR_RBDR:
1741 		reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
1742 		break;
1743 	case NICVF_INTR_PKT_DROP:
1744 		reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
1745 		break;
1746 	case NICVF_INTR_TCP_TIMER:
1747 		reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
1748 		break;
1749 	case NICVF_INTR_MBOX:
1750 		reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
1751 		break;
1752 	case NICVF_INTR_QS_ERR:
1753 		reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
1754 		break;
1755 	default:
1756 		reg_val = 0;
1757 	}
1758 
1759 	return reg_val;
1760 }
1761 
1762 /* Enable interrupt */
1763 void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
1764 {
1765 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1766 
1767 	if (!mask) {
1768 		netdev_dbg(nic->netdev,
1769 			   "Failed to enable interrupt: unknown type\n");
1770 		return;
1771 	}
1772 	nicvf_reg_write(nic, NIC_VF_ENA_W1S,
1773 			nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
1774 }
1775 
1776 /* Disable interrupt */
1777 void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
1778 {
1779 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1780 
1781 	if (!mask) {
1782 		netdev_dbg(nic->netdev,
1783 			   "Failed to disable interrupt: unknown type\n");
1784 		return;
1785 	}
1786 
1787 	nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
1788 }
1789 
1790 /* Clear interrupt */
1791 void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
1792 {
1793 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1794 
1795 	if (!mask) {
1796 		netdev_dbg(nic->netdev,
1797 			   "Failed to clear interrupt: unknown type\n");
1798 		return;
1799 	}
1800 
1801 	nicvf_reg_write(nic, NIC_VF_INT, mask);
1802 }
1803 
1804 /* Check if interrupt is enabled */
1805 int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
1806 {
1807 	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1808 	/* If interrupt type is unknown, we treat it disabled. */
1809 	if (!mask) {
1810 		netdev_dbg(nic->netdev,
1811 			   "Failed to check interrupt enable: unknown type\n");
1812 		return 0;
1813 	}
1814 
1815 	return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
1816 }
1817 
1818 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
1819 {
1820 	struct rcv_queue *rq;
1821 
1822 #define GET_RQ_STATS(reg) \
1823 	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
1824 			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1825 
1826 	rq = &nic->qs->rq[rq_idx];
1827 	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
1828 	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
1829 }
1830 
1831 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
1832 {
1833 	struct snd_queue *sq;
1834 
1835 #define GET_SQ_STATS(reg) \
1836 	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
1837 			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1838 
1839 	sq = &nic->qs->sq[sq_idx];
1840 	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
1841 	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
1842 }
1843 
1844 /* Check for errors in the receive cmp.queue entry */
1845 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1846 {
1847 	netif_err(nic, rx_err, nic->netdev,
1848 		  "RX error CQE err_level 0x%x err_opcode 0x%x\n",
1849 		  cqe_rx->err_level, cqe_rx->err_opcode);
1850 
1851 	switch (cqe_rx->err_opcode) {
1852 	case CQ_RX_ERROP_RE_PARTIAL:
1853 		this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
1854 		break;
1855 	case CQ_RX_ERROP_RE_JABBER:
1856 		this_cpu_inc(nic->drv_stats->rx_jabber_errs);
1857 		break;
1858 	case CQ_RX_ERROP_RE_FCS:
1859 		this_cpu_inc(nic->drv_stats->rx_fcs_errs);
1860 		break;
1861 	case CQ_RX_ERROP_RE_RX_CTL:
1862 		this_cpu_inc(nic->drv_stats->rx_bgx_errs);
1863 		break;
1864 	case CQ_RX_ERROP_PREL2_ERR:
1865 		this_cpu_inc(nic->drv_stats->rx_prel2_errs);
1866 		break;
1867 	case CQ_RX_ERROP_L2_MAL:
1868 		this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
1869 		break;
1870 	case CQ_RX_ERROP_L2_OVERSIZE:
1871 		this_cpu_inc(nic->drv_stats->rx_oversize);
1872 		break;
1873 	case CQ_RX_ERROP_L2_UNDERSIZE:
1874 		this_cpu_inc(nic->drv_stats->rx_undersize);
1875 		break;
1876 	case CQ_RX_ERROP_L2_LENMISM:
1877 		this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
1878 		break;
1879 	case CQ_RX_ERROP_L2_PCLP:
1880 		this_cpu_inc(nic->drv_stats->rx_l2_pclp);
1881 		break;
1882 	case CQ_RX_ERROP_IP_NOT:
1883 		this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
1884 		break;
1885 	case CQ_RX_ERROP_IP_CSUM_ERR:
1886 		this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
1887 		break;
1888 	case CQ_RX_ERROP_IP_MAL:
1889 		this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
1890 		break;
1891 	case CQ_RX_ERROP_IP_MALD:
1892 		this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
1893 		break;
1894 	case CQ_RX_ERROP_IP_HOP:
1895 		this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
1896 		break;
1897 	case CQ_RX_ERROP_L3_PCLP:
1898 		this_cpu_inc(nic->drv_stats->rx_l3_pclp);
1899 		break;
1900 	case CQ_RX_ERROP_L4_MAL:
1901 		this_cpu_inc(nic->drv_stats->rx_l4_malformed);
1902 		break;
1903 	case CQ_RX_ERROP_L4_CHK:
1904 		this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
1905 		break;
1906 	case CQ_RX_ERROP_UDP_LEN:
1907 		this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
1908 		break;
1909 	case CQ_RX_ERROP_L4_PORT:
1910 		this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
1911 		break;
1912 	case CQ_RX_ERROP_TCP_FLAG:
1913 		this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
1914 		break;
1915 	case CQ_RX_ERROP_TCP_OFFSET:
1916 		this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
1917 		break;
1918 	case CQ_RX_ERROP_L4_PCLP:
1919 		this_cpu_inc(nic->drv_stats->rx_l4_pclp);
1920 		break;
1921 	case CQ_RX_ERROP_RBDR_TRUNC:
1922 		this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
1923 		break;
1924 	}
1925 
1926 	return 1;
1927 }
1928 
1929 /* Check for errors in the send cmp.queue entry */
1930 int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
1931 {
1932 	switch (cqe_tx->send_status) {
1933 	case CQ_TX_ERROP_DESC_FAULT:
1934 		this_cpu_inc(nic->drv_stats->tx_desc_fault);
1935 		break;
1936 	case CQ_TX_ERROP_HDR_CONS_ERR:
1937 		this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
1938 		break;
1939 	case CQ_TX_ERROP_SUBDC_ERR:
1940 		this_cpu_inc(nic->drv_stats->tx_subdesc_err);
1941 		break;
1942 	case CQ_TX_ERROP_MAX_SIZE_VIOL:
1943 		this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
1944 		break;
1945 	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
1946 		this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
1947 		break;
1948 	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
1949 		this_cpu_inc(nic->drv_stats->tx_data_seq_err);
1950 		break;
1951 	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
1952 		this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
1953 		break;
1954 	case CQ_TX_ERROP_LOCK_VIOL:
1955 		this_cpu_inc(nic->drv_stats->tx_lock_viol);
1956 		break;
1957 	case CQ_TX_ERROP_DATA_FAULT:
1958 		this_cpu_inc(nic->drv_stats->tx_data_fault);
1959 		break;
1960 	case CQ_TX_ERROP_TSTMP_CONFLICT:
1961 		this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
1962 		break;
1963 	case CQ_TX_ERROP_TSTMP_TIMEOUT:
1964 		this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
1965 		break;
1966 	case CQ_TX_ERROP_MEM_FAULT:
1967 		this_cpu_inc(nic->drv_stats->tx_mem_fault);
1968 		break;
1969 	case CQ_TX_ERROP_CK_OVERLAP:
1970 		this_cpu_inc(nic->drv_stats->tx_csum_overlap);
1971 		break;
1972 	case CQ_TX_ERROP_CK_OFLOW:
1973 		this_cpu_inc(nic->drv_stats->tx_csum_overflow);
1974 		break;
1975 	}
1976 
1977 	return 1;
1978 }
1979