1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Huawei HiNIC PCI Express Linux driver
4  * Copyright(c) 2017 Huawei Technologies Co., Ltd
5  */
6 
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/pci.h>
10 #include <linux/device.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/vmalloc.h>
13 #include <linux/errno.h>
14 #include <linux/sizes.h>
15 #include <linux/atomic.h>
16 #include <linux/skbuff.h>
17 #include <linux/io.h>
18 #include <asm/barrier.h>
19 #include <asm/byteorder.h>
20 
21 #include "hinic_common.h"
22 #include "hinic_hw_if.h"
23 #include "hinic_hw_wqe.h"
24 #include "hinic_hw_wq.h"
25 #include "hinic_hw_qp_ctxt.h"
26 #include "hinic_hw_qp.h"
27 #include "hinic_hw_io.h"
28 
29 #define SQ_DB_OFF               SZ_2K
30 
31 /* The number of cache line to prefetch Until threshold state */
32 #define WQ_PREFETCH_MAX         2
33 /* The number of cache line to prefetch After threshold state */
34 #define WQ_PREFETCH_MIN         1
35 /* Threshold state */
36 #define WQ_PREFETCH_THRESHOLD   256
37 
38 /* sizes of the SQ/RQ ctxt */
39 #define Q_CTXT_SIZE             48
40 #define CTXT_RSVD               240
41 
42 #define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
43 		(((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
44 
45 #define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
46 		(((max_rqs) + (max_sqs)) * CTXT_RSVD + \
47 		 (max_sqs + (q_id)) * Q_CTXT_SIZE)
48 
49 #define SIZE_16BYTES(size)              (ALIGN(size, 16) >> 4)
50 #define SIZE_8BYTES(size)               (ALIGN(size, 8) >> 3)
51 #define SECT_SIZE_FROM_8BYTES(size)     ((size) << 3)
52 
53 #define SQ_DB_PI_HI_SHIFT       8
54 #define SQ_DB_PI_HI(prod_idx)   ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
55 
56 #define SQ_DB_PI_LOW_MASK       0xFF
57 #define SQ_DB_PI_LOW(prod_idx)  ((prod_idx) & SQ_DB_PI_LOW_MASK)
58 
59 #define SQ_DB_ADDR(sq, pi)      ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
60 
61 #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
62 #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
63 
64 enum sq_wqe_type {
65 	SQ_NORMAL_WQE = 0,
66 };
67 
68 enum rq_completion_fmt {
69 	RQ_COMPLETE_SGE = 1
70 };
71 
72 void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
73 			     enum hinic_qp_ctxt_type ctxt_type,
74 			     u16 num_queues, u16 max_queues)
75 {
76 	u16 max_sqs = max_queues;
77 	u16 max_rqs = max_queues;
78 
79 	qp_ctxt_hdr->num_queues = num_queues;
80 	qp_ctxt_hdr->queue_type = ctxt_type;
81 
82 	if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
83 		qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
84 	else
85 		qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
86 
87 	qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
88 
89 	hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
90 }
91 
92 void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
93 			   struct hinic_sq *sq, u16 global_qid)
94 {
95 	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
96 	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
97 	u16 pi_start, ci_start;
98 	struct hinic_wq *wq;
99 
100 	wq = sq->wq;
101 	ci_start = atomic_read(&wq->cons_idx);
102 	pi_start = atomic_read(&wq->prod_idx);
103 
104 	/* Read the first page paddr from the WQ page paddr ptrs */
105 	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
106 
107 	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
108 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
109 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
110 
111 	/* If only one page, use 0-level CLA */
112 	if (wq->num_q_pages == 1)
113 		wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq_page_addr);
114 	else
115 		wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
116 
117 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
118 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
119 
120 	sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
121 						       GLOBAL_SQ_ID) |
122 			    HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
123 
124 	sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
125 			      HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
126 
127 	sq_ctxt->wq_hi_pfn_pi =
128 			HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
129 			HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
130 
131 	sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
132 
133 	sq_ctxt->pref_cache =
134 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
135 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
136 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
137 
138 	sq_ctxt->pref_wrapped = 1;
139 
140 	sq_ctxt->pref_wq_hi_pfn_ci =
141 		HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
142 		HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
143 
144 	sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
145 
146 	sq_ctxt->wq_block_hi_pfn =
147 		HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
148 
149 	sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
150 
151 	hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
152 }
153 
154 void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
155 			   struct hinic_rq *rq, u16 global_qid)
156 {
157 	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
158 	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
159 	u16 pi_start, ci_start;
160 	struct hinic_wq *wq;
161 
162 	wq = rq->wq;
163 	ci_start = atomic_read(&wq->cons_idx);
164 	pi_start = atomic_read(&wq->prod_idx);
165 
166 	/* Read the first page paddr from the WQ page paddr ptrs */
167 	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
168 
169 	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
170 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
171 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
172 
173 	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
174 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
175 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
176 
177 	rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
178 			    HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
179 
180 	rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
181 				HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
182 
183 	rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
184 							  HI_PFN) |
185 				HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
186 
187 	rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
188 
189 	rq_ctxt->pref_cache =
190 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
191 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
192 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
193 
194 	rq_ctxt->pref_wrapped = 1;
195 
196 	rq_ctxt->pref_wq_hi_pfn_ci =
197 		HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
198 		HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
199 
200 	rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
201 
202 	rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
203 	rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
204 
205 	rq_ctxt->wq_block_hi_pfn =
206 		HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
207 
208 	rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
209 
210 	hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
211 }
212 
213 /**
214  * alloc_sq_skb_arr - allocate sq array for saved skb
215  * @sq: HW Send Queue
216  *
217  * Return 0 - Success, negative - Failure
218  **/
219 static int alloc_sq_skb_arr(struct hinic_sq *sq)
220 {
221 	struct hinic_wq *wq = sq->wq;
222 	size_t skb_arr_size;
223 
224 	skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
225 	sq->saved_skb = vzalloc(skb_arr_size);
226 	if (!sq->saved_skb)
227 		return -ENOMEM;
228 
229 	return 0;
230 }
231 
232 /**
233  * free_sq_skb_arr - free sq array for saved skb
234  * @sq: HW Send Queue
235  **/
236 static void free_sq_skb_arr(struct hinic_sq *sq)
237 {
238 	vfree(sq->saved_skb);
239 }
240 
241 /**
242  * alloc_rq_skb_arr - allocate rq array for saved skb
243  * @rq: HW Receive Queue
244  *
245  * Return 0 - Success, negative - Failure
246  **/
247 static int alloc_rq_skb_arr(struct hinic_rq *rq)
248 {
249 	struct hinic_wq *wq = rq->wq;
250 	size_t skb_arr_size;
251 
252 	skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
253 	rq->saved_skb = vzalloc(skb_arr_size);
254 	if (!rq->saved_skb)
255 		return -ENOMEM;
256 
257 	return 0;
258 }
259 
260 /**
261  * free_rq_skb_arr - free rq array for saved skb
262  * @rq: HW Receive Queue
263  **/
264 static void free_rq_skb_arr(struct hinic_rq *rq)
265 {
266 	vfree(rq->saved_skb);
267 }
268 
269 /**
270  * hinic_init_sq - Initialize HW Send Queue
271  * @sq: HW Send Queue
272  * @hwif: HW Interface for accessing HW
273  * @wq: Work Queue for the data of the SQ
274  * @entry: msix entry for sq
275  * @ci_addr: address for reading the current HW consumer index
276  * @ci_dma_addr: dma address for reading the current HW consumer index
277  * @db_base: doorbell base address
278  *
279  * Return 0 - Success, negative - Failure
280  **/
281 int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
282 		  struct hinic_wq *wq, struct msix_entry *entry,
283 		  void *ci_addr, dma_addr_t ci_dma_addr,
284 		  void __iomem *db_base)
285 {
286 	sq->hwif = hwif;
287 
288 	sq->wq = wq;
289 
290 	sq->irq = entry->vector;
291 	sq->msix_entry = entry->entry;
292 
293 	sq->hw_ci_addr = ci_addr;
294 	sq->hw_ci_dma_addr = ci_dma_addr;
295 
296 	sq->db_base = db_base + SQ_DB_OFF;
297 
298 	return alloc_sq_skb_arr(sq);
299 }
300 
301 /**
302  * hinic_clean_sq - Clean HW Send Queue's Resources
303  * @sq: Send Queue
304  **/
305 void hinic_clean_sq(struct hinic_sq *sq)
306 {
307 	free_sq_skb_arr(sq);
308 }
309 
310 /**
311  * alloc_rq_cqe - allocate rq completion queue elements
312  * @rq: HW Receive Queue
313  *
314  * Return 0 - Success, negative - Failure
315  **/
316 static int alloc_rq_cqe(struct hinic_rq *rq)
317 {
318 	struct hinic_hwif *hwif = rq->hwif;
319 	struct pci_dev *pdev = hwif->pdev;
320 	size_t cqe_dma_size, cqe_size;
321 	struct hinic_wq *wq = rq->wq;
322 	int j, i;
323 
324 	cqe_size = wq->q_depth * sizeof(*rq->cqe);
325 	rq->cqe = vzalloc(cqe_size);
326 	if (!rq->cqe)
327 		return -ENOMEM;
328 
329 	cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
330 	rq->cqe_dma = vzalloc(cqe_dma_size);
331 	if (!rq->cqe_dma)
332 		goto err_cqe_dma_arr_alloc;
333 
334 	for (i = 0; i < wq->q_depth; i++) {
335 		rq->cqe[i] = dma_alloc_coherent(&pdev->dev,
336 						sizeof(*rq->cqe[i]),
337 						&rq->cqe_dma[i], GFP_KERNEL);
338 		if (!rq->cqe[i])
339 			goto err_cqe_alloc;
340 	}
341 
342 	return 0;
343 
344 err_cqe_alloc:
345 	for (j = 0; j < i; j++)
346 		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
347 				  rq->cqe_dma[j]);
348 
349 	vfree(rq->cqe_dma);
350 
351 err_cqe_dma_arr_alloc:
352 	vfree(rq->cqe);
353 	return -ENOMEM;
354 }
355 
356 /**
357  * free_rq_cqe - free rq completion queue elements
358  * @rq: HW Receive Queue
359  **/
360 static void free_rq_cqe(struct hinic_rq *rq)
361 {
362 	struct hinic_hwif *hwif = rq->hwif;
363 	struct pci_dev *pdev = hwif->pdev;
364 	struct hinic_wq *wq = rq->wq;
365 	int i;
366 
367 	for (i = 0; i < wq->q_depth; i++)
368 		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
369 				  rq->cqe_dma[i]);
370 
371 	vfree(rq->cqe_dma);
372 	vfree(rq->cqe);
373 }
374 
375 /**
376  * hinic_init_rq - Initialize HW Receive Queue
377  * @rq: HW Receive Queue
378  * @hwif: HW Interface for accessing HW
379  * @wq: Work Queue for the data of the RQ
380  * @entry: msix entry for rq
381  *
382  * Return 0 - Success, negative - Failure
383  **/
384 int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
385 		  struct hinic_wq *wq, struct msix_entry *entry)
386 {
387 	struct pci_dev *pdev = hwif->pdev;
388 	size_t pi_size;
389 	int err;
390 
391 	rq->hwif = hwif;
392 
393 	rq->wq = wq;
394 
395 	rq->irq = entry->vector;
396 	rq->msix_entry = entry->entry;
397 
398 	rq->buf_sz = HINIC_RX_BUF_SZ;
399 
400 	err = alloc_rq_skb_arr(rq);
401 	if (err) {
402 		dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
403 		return err;
404 	}
405 
406 	err = alloc_rq_cqe(rq);
407 	if (err) {
408 		dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
409 		goto err_alloc_rq_cqe;
410 	}
411 
412 	/* HW requirements: Must be at least 32 bit */
413 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
414 	rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
415 					      &rq->pi_dma_addr, GFP_KERNEL);
416 	if (!rq->pi_virt_addr) {
417 		dev_err(&pdev->dev, "Failed to allocate PI address\n");
418 		err = -ENOMEM;
419 		goto err_pi_virt;
420 	}
421 
422 	return 0;
423 
424 err_pi_virt:
425 	free_rq_cqe(rq);
426 
427 err_alloc_rq_cqe:
428 	free_rq_skb_arr(rq);
429 	return err;
430 }
431 
432 /**
433  * hinic_clean_rq - Clean HW Receive Queue's Resources
434  * @rq: HW Receive Queue
435  **/
436 void hinic_clean_rq(struct hinic_rq *rq)
437 {
438 	struct hinic_hwif *hwif = rq->hwif;
439 	struct pci_dev *pdev = hwif->pdev;
440 	size_t pi_size;
441 
442 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
443 	dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
444 			  rq->pi_dma_addr);
445 
446 	free_rq_cqe(rq);
447 	free_rq_skb_arr(rq);
448 }
449 
450 /**
451  * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
452  * @sq: send queue
453  *
454  * Return number of free wqebbs
455  **/
456 int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
457 {
458 	struct hinic_wq *wq = sq->wq;
459 
460 	return atomic_read(&wq->delta) - 1;
461 }
462 
463 /**
464  * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
465  * @rq: recv queue
466  *
467  * Return number of free wqebbs
468  **/
469 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
470 {
471 	struct hinic_wq *wq = rq->wq;
472 
473 	return atomic_read(&wq->delta) - 1;
474 }
475 
476 static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
477 			    int nr_descs)
478 {
479 	u32 ctrl_size, task_size, bufdesc_size;
480 
481 	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
482 	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
483 	bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
484 	bufdesc_size = SIZE_8BYTES(bufdesc_size);
485 
486 	ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
487 			  HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN)        |
488 			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
489 			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
490 
491 	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
492 					     QUEUE_INFO_MSS) |
493 			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
494 }
495 
496 static void sq_prepare_task(struct hinic_sq_task *task)
497 {
498 	task->pkt_info0 = 0;
499 	task->pkt_info1 = 0;
500 	task->pkt_info2 = 0;
501 
502 	task->ufo_v6_identify = 0;
503 
504 	task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
505 
506 	task->zero_pad = 0;
507 }
508 
509 void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
510 {
511 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
512 }
513 
514 void hinic_task_set_outter_l3(struct hinic_sq_task *task,
515 			      enum hinic_l3_offload_type l3_type,
516 			      u32 network_len)
517 {
518 	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
519 			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
520 }
521 
522 void hinic_task_set_inner_l3(struct hinic_sq_task *task,
523 			     enum hinic_l3_offload_type l3_type,
524 			     u32 network_len)
525 {
526 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
527 	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
528 }
529 
530 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
531 			      enum hinic_l4_tunnel_type l4_type,
532 			      u32 tunnel_len)
533 {
534 	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
535 			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
536 }
537 
538 void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
539 			   enum hinic_l4_offload_type l4_offload,
540 			   u32 l4_len, u32 offset)
541 {
542 	u32 tcp_udp_cs = 0, sctp = 0;
543 	u32 mss = HINIC_MSS_DEFAULT;
544 
545 	if (l4_offload == TCP_OFFLOAD_ENABLE ||
546 	    l4_offload == UDP_OFFLOAD_ENABLE)
547 		tcp_udp_cs = 1;
548 	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
549 		sctp = 1;
550 
551 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
552 	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
553 
554 	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
555 		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
556 		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
557 
558 	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
559 	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
560 }
561 
562 void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
563 			    enum hinic_l4_offload_type l4_offload,
564 			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
565 {
566 	u32 tso = 0, ufo = 0;
567 
568 	if (l4_offload == TCP_OFFLOAD_ENABLE)
569 		tso = 1;
570 	else if (l4_offload == UDP_OFFLOAD_ENABLE)
571 		ufo = 1;
572 
573 	task->ufo_v6_identify = ip_ident;
574 
575 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
576 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
577 	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
578 
579 	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
580 		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
581 		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
582 		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
583 
584 	/* set MSS value */
585 	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
586 	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
587 }
588 
589 /**
590  * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
591  * @sq: send queue
592  * @prod_idx: pi value
593  * @sq_wqe: wqe to prepare
594  * @sges: sges for use by the wqe for send for buf addresses
595  * @nr_sges: number of sges
596  **/
597 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
598 			  struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
599 			  int nr_sges)
600 {
601 	int i;
602 
603 	sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
604 
605 	sq_prepare_task(&sq_wqe->task);
606 
607 	for (i = 0; i < nr_sges; i++)
608 		sq_wqe->buf_descs[i].sge = sges[i];
609 }
610 
611 /**
612  * sq_prepare_db - prepare doorbell to write
613  * @sq: send queue
614  * @prod_idx: pi value for the doorbell
615  * @cos: cos of the doorbell
616  *
617  * Return db value
618  **/
619 static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
620 {
621 	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
622 	u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
623 
624 	/* Data should be written to HW in Big Endian Format */
625 	return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI)     |
626 			   HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
627 			   HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH)  |
628 			   HINIC_SQ_DB_INFO_SET(cos, COS)               |
629 			   HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
630 }
631 
632 /**
633  * hinic_sq_write_db- write doorbell
634  * @sq: send queue
635  * @prod_idx: pi value for the doorbell
636  * @wqe_size: wqe size
637  * @cos: cos of the wqe
638  **/
639 void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
640 		       unsigned int cos)
641 {
642 	struct hinic_wq *wq = sq->wq;
643 
644 	/* increment prod_idx to the next */
645 	prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
646 	prod_idx = SQ_MASKED_IDX(sq, prod_idx);
647 
648 	wmb();  /* Write all before the doorbell */
649 
650 	writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
651 }
652 
653 /**
654  * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
655  * @sq: sq to get wqe from
656  * @wqe_size: wqe size
657  * @prod_idx: returned pi
658  *
659  * Return wqe pointer
660  **/
661 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
662 				      unsigned int wqe_size, u16 *prod_idx)
663 {
664 	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
665 						    prod_idx);
666 
667 	if (IS_ERR(hw_wqe))
668 		return NULL;
669 
670 	return &hw_wqe->sq_wqe;
671 }
672 
673 /**
674  * hinic_sq_return_wqe - return the wqe to the sq
675  * @sq: send queue
676  * @wqe_size: the size of the wqe
677  **/
678 void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
679 {
680 	hinic_return_wqe(sq->wq, wqe_size);
681 }
682 
683 /**
684  * hinic_sq_write_wqe - write the wqe to the sq
685  * @sq: send queue
686  * @prod_idx: pi of the wqe
687  * @sq_wqe: the wqe to write
688  * @skb: skb to save
689  * @wqe_size: the size of the wqe
690  **/
691 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
692 			struct hinic_sq_wqe *sq_wqe,
693 			struct sk_buff *skb, unsigned int wqe_size)
694 {
695 	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
696 
697 	sq->saved_skb[prod_idx] = skb;
698 
699 	/* The data in the HW should be in Big Endian Format */
700 	hinic_cpu_to_be32(sq_wqe, wqe_size);
701 
702 	hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
703 }
704 
705 /**
706  * hinic_sq_read_wqebb - read wqe ptr in the current ci and update the ci, the
707  * wqe only have one wqebb
708  * @sq: send queue
709  * @skb: return skb that was saved
710  * @wqe_size: the wqe size ptr
711  * @cons_idx: consumer index of the wqe
712  *
713  * Return wqe in ci position
714  **/
715 struct hinic_sq_wqe *hinic_sq_read_wqebb(struct hinic_sq *sq,
716 					 struct sk_buff **skb,
717 					 unsigned int *wqe_size, u16 *cons_idx)
718 {
719 	struct hinic_hw_wqe *hw_wqe;
720 	struct hinic_sq_wqe *sq_wqe;
721 	struct hinic_sq_ctrl *ctrl;
722 	unsigned int buf_sect_len;
723 	u32 ctrl_info;
724 
725 	/* read the ctrl section for getting wqe size */
726 	hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
727 	if (IS_ERR(hw_wqe))
728 		return NULL;
729 
730 	*skb = sq->saved_skb[*cons_idx];
731 
732 	sq_wqe = &hw_wqe->sq_wqe;
733 	ctrl = &sq_wqe->ctrl;
734 	ctrl_info = be32_to_cpu(ctrl->ctrl_info);
735 	buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
736 
737 	*wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
738 	*wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
739 	*wqe_size = ALIGN(*wqe_size, sq->wq->wqebb_size);
740 
741 	return &hw_wqe->sq_wqe;
742 }
743 
744 /**
745  * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
746  * @sq: send queue
747  * @skb: return skb that was saved
748  * @wqe_size: the size of the wqe
749  * @cons_idx: consumer index of the wqe
750  *
751  * Return wqe in ci position
752  **/
753 struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
754 				       struct sk_buff **skb,
755 				       unsigned int wqe_size, u16 *cons_idx)
756 {
757 	struct hinic_hw_wqe *hw_wqe;
758 
759 	hw_wqe = hinic_read_wqe(sq->wq, wqe_size, cons_idx);
760 	*skb = sq->saved_skb[*cons_idx];
761 
762 	return &hw_wqe->sq_wqe;
763 }
764 
765 /**
766  * hinic_sq_put_wqe - release the ci for new wqes
767  * @sq: send queue
768  * @wqe_size: the size of the wqe
769  **/
770 void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
771 {
772 	hinic_put_wqe(sq->wq, wqe_size);
773 }
774 
775 /**
776  * hinic_sq_get_sges - get sges from the wqe
777  * @sq_wqe: wqe to get the sges from its buffer addresses
778  * @sges: returned sges
779  * @nr_sges: number sges to return
780  **/
781 void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
782 		       int nr_sges)
783 {
784 	int i;
785 
786 	for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
787 		sges[i] = sq_wqe->buf_descs[i].sge;
788 		hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
789 	}
790 }
791 
792 /**
793  * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
794  * @rq: rq to get wqe from
795  * @wqe_size: wqe size
796  * @prod_idx: returned pi
797  *
798  * Return wqe pointer
799  **/
800 struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
801 				      unsigned int wqe_size, u16 *prod_idx)
802 {
803 	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
804 						    prod_idx);
805 
806 	if (IS_ERR(hw_wqe))
807 		return NULL;
808 
809 	return &hw_wqe->rq_wqe;
810 }
811 
812 /**
813  * hinic_rq_write_wqe - write the wqe to the rq
814  * @rq: recv queue
815  * @prod_idx: pi of the wqe
816  * @rq_wqe: the wqe to write
817  * @skb: skb to save
818  **/
819 void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
820 			struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
821 {
822 	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
823 
824 	rq->saved_skb[prod_idx] = skb;
825 
826 	/* The data in the HW should be in Big Endian Format */
827 	hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
828 
829 	hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
830 }
831 
832 /**
833  * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
834  * @rq: recv queue
835  * @wqe_size: the size of the wqe
836  * @skb: return saved skb
837  * @cons_idx: consumer index of the wqe
838  *
839  * Return wqe in ci position
840  **/
841 struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
842 				       unsigned int wqe_size,
843 				       struct sk_buff **skb, u16 *cons_idx)
844 {
845 	struct hinic_hw_wqe *hw_wqe;
846 	struct hinic_rq_cqe *cqe;
847 	int rx_done;
848 	u32 status;
849 
850 	hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
851 	if (IS_ERR(hw_wqe))
852 		return NULL;
853 
854 	cqe = rq->cqe[*cons_idx];
855 
856 	status = be32_to_cpu(cqe->status);
857 
858 	rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
859 	if (!rx_done)
860 		return NULL;
861 
862 	*skb = rq->saved_skb[*cons_idx];
863 
864 	return &hw_wqe->rq_wqe;
865 }
866 
867 /**
868  * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
869  * @rq: recv queue
870  * @wqe_size: the size of the wqe
871  * @skb: return saved skb
872  * @cons_idx: consumer index in the wq
873  *
874  * Return wqe in incremented ci position
875  **/
876 struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
877 					    unsigned int wqe_size,
878 					    struct sk_buff **skb,
879 					    u16 *cons_idx)
880 {
881 	struct hinic_wq *wq = rq->wq;
882 	struct hinic_hw_wqe *hw_wqe;
883 	unsigned int num_wqebbs;
884 
885 	wqe_size = ALIGN(wqe_size, wq->wqebb_size);
886 	num_wqebbs = wqe_size / wq->wqebb_size;
887 
888 	*cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
889 
890 	*skb = rq->saved_skb[*cons_idx];
891 
892 	hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
893 
894 	return &hw_wqe->rq_wqe;
895 }
896 
897 /**
898  * hinic_put_wqe - release the ci for new wqes
899  * @rq: recv queue
900  * @cons_idx: consumer index of the wqe
901  * @wqe_size: the size of the wqe
902  **/
903 void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
904 		      unsigned int wqe_size)
905 {
906 	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
907 	u32 status = be32_to_cpu(cqe->status);
908 
909 	status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
910 
911 	/* Rx WQE size is 1 WQEBB, no wq shadow*/
912 	cqe->status = cpu_to_be32(status);
913 
914 	wmb();          /* clear done flag */
915 
916 	hinic_put_wqe(rq->wq, wqe_size);
917 }
918 
919 /**
920  * hinic_rq_get_sge - get sge from the wqe
921  * @rq: recv queue
922  * @rq_wqe: wqe to get the sge from its buf address
923  * @cons_idx: consumer index
924  * @sge: returned sge
925  **/
926 void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
927 		      u16 cons_idx, struct hinic_sge *sge)
928 {
929 	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
930 	u32 len = be32_to_cpu(cqe->len);
931 
932 	sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
933 	sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
934 	sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
935 }
936 
937 /**
938  * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
939  * @rq: recv queue
940  * @prod_idx: pi value
941  * @rq_wqe: the wqe
942  * @sge: sge for use by the wqe for recv buf address
943  **/
944 void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
945 			  struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
946 {
947 	struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
948 	struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
949 	struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
950 	struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
951 	dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
952 
953 	ctrl->ctrl_info =
954 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
955 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
956 				  COMPLETE_LEN)                    |
957 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
958 				  BUFDESC_SECT_LEN)                |
959 		HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
960 
961 	hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
962 
963 	buf_desc->hi_addr = sge->hi_addr;
964 	buf_desc->lo_addr = sge->lo_addr;
965 }
966 
967 /**
968  * hinic_rq_update - update pi of the rq
969  * @rq: recv queue
970  * @prod_idx: pi value
971  **/
972 void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
973 {
974 	*rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
975 }
976