1 /*
2  * Huawei HiNIC PCI Express Linux driver
3  * Copyright(c) 2017 Huawei Technologies Co., Ltd
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * for more details.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/types.h>
18 #include <linux/pci.h>
19 #include <linux/device.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/vmalloc.h>
22 #include <linux/errno.h>
23 #include <linux/sizes.h>
24 #include <linux/atomic.h>
25 #include <linux/skbuff.h>
26 #include <linux/io.h>
27 #include <asm/barrier.h>
28 #include <asm/byteorder.h>
29 
30 #include "hinic_common.h"
31 #include "hinic_hw_if.h"
32 #include "hinic_hw_wqe.h"
33 #include "hinic_hw_wq.h"
34 #include "hinic_hw_qp_ctxt.h"
35 #include "hinic_hw_qp.h"
36 #include "hinic_hw_io.h"
37 
38 #define SQ_DB_OFF               SZ_2K
39 
40 /* The number of cache line to prefetch Until threshold state */
41 #define WQ_PREFETCH_MAX         2
42 /* The number of cache line to prefetch After threshold state */
43 #define WQ_PREFETCH_MIN         1
44 /* Threshold state */
45 #define WQ_PREFETCH_THRESHOLD   256
46 
47 /* sizes of the SQ/RQ ctxt */
48 #define Q_CTXT_SIZE             48
49 #define CTXT_RSVD               240
50 
51 #define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
52 		(((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
53 
54 #define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
55 		(((max_rqs) + (max_sqs)) * CTXT_RSVD + \
56 		 (max_sqs + (q_id)) * Q_CTXT_SIZE)
57 
58 #define SIZE_16BYTES(size)              (ALIGN(size, 16) >> 4)
59 #define SIZE_8BYTES(size)               (ALIGN(size, 8) >> 3)
60 #define SECT_SIZE_FROM_8BYTES(size)     ((size) << 3)
61 
62 #define SQ_DB_PI_HI_SHIFT       8
63 #define SQ_DB_PI_HI(prod_idx)   ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
64 
65 #define SQ_DB_PI_LOW_MASK       0xFF
66 #define SQ_DB_PI_LOW(prod_idx)  ((prod_idx) & SQ_DB_PI_LOW_MASK)
67 
68 #define SQ_DB_ADDR(sq, pi)      ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
69 
70 #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
71 #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
72 
73 enum sq_wqe_type {
74 	SQ_NORMAL_WQE = 0,
75 };
76 
77 enum rq_completion_fmt {
78 	RQ_COMPLETE_SGE = 1
79 };
80 
81 void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
82 			     enum hinic_qp_ctxt_type ctxt_type,
83 			     u16 num_queues, u16 max_queues)
84 {
85 	u16 max_sqs = max_queues;
86 	u16 max_rqs = max_queues;
87 
88 	qp_ctxt_hdr->num_queues = num_queues;
89 	qp_ctxt_hdr->queue_type = ctxt_type;
90 
91 	if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
92 		qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
93 	else
94 		qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
95 
96 	qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
97 
98 	hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
99 }
100 
101 void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
102 			   struct hinic_sq *sq, u16 global_qid)
103 {
104 	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
105 	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
106 	u16 pi_start, ci_start;
107 	struct hinic_wq *wq;
108 
109 	wq = sq->wq;
110 	ci_start = atomic_read(&wq->cons_idx);
111 	pi_start = atomic_read(&wq->prod_idx);
112 
113 	/* Read the first page paddr from the WQ page paddr ptrs */
114 	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
115 
116 	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
117 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
118 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
119 
120 	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
121 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
122 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
123 
124 	sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
125 						       GLOBAL_SQ_ID) |
126 			    HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
127 
128 	sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
129 			      HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
130 
131 	sq_ctxt->wq_hi_pfn_pi =
132 			HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
133 			HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
134 
135 	sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
136 
137 	sq_ctxt->pref_cache =
138 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
139 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
140 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
141 
142 	sq_ctxt->pref_wrapped = 1;
143 
144 	sq_ctxt->pref_wq_hi_pfn_ci =
145 		HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
146 		HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
147 
148 	sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
149 
150 	sq_ctxt->wq_block_hi_pfn =
151 		HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
152 
153 	sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
154 
155 	hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
156 }
157 
158 void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
159 			   struct hinic_rq *rq, u16 global_qid)
160 {
161 	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
162 	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
163 	u16 pi_start, ci_start;
164 	struct hinic_wq *wq;
165 
166 	wq = rq->wq;
167 	ci_start = atomic_read(&wq->cons_idx);
168 	pi_start = atomic_read(&wq->prod_idx);
169 
170 	/* Read the first page paddr from the WQ page paddr ptrs */
171 	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
172 
173 	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
174 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
175 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
176 
177 	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
178 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
179 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
180 
181 	rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
182 			    HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
183 
184 	rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
185 				HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
186 
187 	rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
188 							  HI_PFN) |
189 				HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
190 
191 	rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
192 
193 	rq_ctxt->pref_cache =
194 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
195 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
196 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
197 
198 	rq_ctxt->pref_wrapped = 1;
199 
200 	rq_ctxt->pref_wq_hi_pfn_ci =
201 		HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
202 		HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
203 
204 	rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
205 
206 	rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
207 	rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
208 
209 	rq_ctxt->wq_block_hi_pfn =
210 		HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
211 
212 	rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
213 
214 	hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
215 }
216 
217 /**
218  * alloc_sq_skb_arr - allocate sq array for saved skb
219  * @sq: HW Send Queue
220  *
221  * Return 0 - Success, negative - Failure
222  **/
223 static int alloc_sq_skb_arr(struct hinic_sq *sq)
224 {
225 	struct hinic_wq *wq = sq->wq;
226 	size_t skb_arr_size;
227 
228 	skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
229 	sq->saved_skb = vzalloc(skb_arr_size);
230 	if (!sq->saved_skb)
231 		return -ENOMEM;
232 
233 	return 0;
234 }
235 
236 /**
237  * free_sq_skb_arr - free sq array for saved skb
238  * @sq: HW Send Queue
239  **/
240 static void free_sq_skb_arr(struct hinic_sq *sq)
241 {
242 	vfree(sq->saved_skb);
243 }
244 
245 /**
246  * alloc_rq_skb_arr - allocate rq array for saved skb
247  * @rq: HW Receive Queue
248  *
249  * Return 0 - Success, negative - Failure
250  **/
251 static int alloc_rq_skb_arr(struct hinic_rq *rq)
252 {
253 	struct hinic_wq *wq = rq->wq;
254 	size_t skb_arr_size;
255 
256 	skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
257 	rq->saved_skb = vzalloc(skb_arr_size);
258 	if (!rq->saved_skb)
259 		return -ENOMEM;
260 
261 	return 0;
262 }
263 
264 /**
265  * free_rq_skb_arr - free rq array for saved skb
266  * @rq: HW Receive Queue
267  **/
268 static void free_rq_skb_arr(struct hinic_rq *rq)
269 {
270 	vfree(rq->saved_skb);
271 }
272 
273 /**
274  * hinic_init_sq - Initialize HW Send Queue
275  * @sq: HW Send Queue
276  * @hwif: HW Interface for accessing HW
277  * @wq: Work Queue for the data of the SQ
278  * @entry: msix entry for sq
279  * @ci_addr: address for reading the current HW consumer index
280  * @ci_dma_addr: dma address for reading the current HW consumer index
281  * @db_base: doorbell base address
282  *
283  * Return 0 - Success, negative - Failure
284  **/
285 int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
286 		  struct hinic_wq *wq, struct msix_entry *entry,
287 		  void *ci_addr, dma_addr_t ci_dma_addr,
288 		  void __iomem *db_base)
289 {
290 	sq->hwif = hwif;
291 
292 	sq->wq = wq;
293 
294 	sq->irq = entry->vector;
295 	sq->msix_entry = entry->entry;
296 
297 	sq->hw_ci_addr = ci_addr;
298 	sq->hw_ci_dma_addr = ci_dma_addr;
299 
300 	sq->db_base = db_base + SQ_DB_OFF;
301 
302 	return alloc_sq_skb_arr(sq);
303 }
304 
305 /**
306  * hinic_clean_sq - Clean HW Send Queue's Resources
307  * @sq: Send Queue
308  **/
309 void hinic_clean_sq(struct hinic_sq *sq)
310 {
311 	free_sq_skb_arr(sq);
312 }
313 
314 /**
315  * alloc_rq_cqe - allocate rq completion queue elements
316  * @rq: HW Receive Queue
317  *
318  * Return 0 - Success, negative - Failure
319  **/
320 static int alloc_rq_cqe(struct hinic_rq *rq)
321 {
322 	struct hinic_hwif *hwif = rq->hwif;
323 	struct pci_dev *pdev = hwif->pdev;
324 	size_t cqe_dma_size, cqe_size;
325 	struct hinic_wq *wq = rq->wq;
326 	int j, i;
327 
328 	cqe_size = wq->q_depth * sizeof(*rq->cqe);
329 	rq->cqe = vzalloc(cqe_size);
330 	if (!rq->cqe)
331 		return -ENOMEM;
332 
333 	cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
334 	rq->cqe_dma = vzalloc(cqe_dma_size);
335 	if (!rq->cqe_dma)
336 		goto err_cqe_dma_arr_alloc;
337 
338 	for (i = 0; i < wq->q_depth; i++) {
339 		rq->cqe[i] = dma_alloc_coherent(&pdev->dev,
340 						sizeof(*rq->cqe[i]),
341 						&rq->cqe_dma[i], GFP_KERNEL);
342 		if (!rq->cqe[i])
343 			goto err_cqe_alloc;
344 	}
345 
346 	return 0;
347 
348 err_cqe_alloc:
349 	for (j = 0; j < i; j++)
350 		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
351 				  rq->cqe_dma[j]);
352 
353 	vfree(rq->cqe_dma);
354 
355 err_cqe_dma_arr_alloc:
356 	vfree(rq->cqe);
357 	return -ENOMEM;
358 }
359 
360 /**
361  * free_rq_cqe - free rq completion queue elements
362  * @rq: HW Receive Queue
363  **/
364 static void free_rq_cqe(struct hinic_rq *rq)
365 {
366 	struct hinic_hwif *hwif = rq->hwif;
367 	struct pci_dev *pdev = hwif->pdev;
368 	struct hinic_wq *wq = rq->wq;
369 	int i;
370 
371 	for (i = 0; i < wq->q_depth; i++)
372 		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
373 				  rq->cqe_dma[i]);
374 
375 	vfree(rq->cqe_dma);
376 	vfree(rq->cqe);
377 }
378 
379 /**
380  * hinic_init_rq - Initialize HW Receive Queue
381  * @rq: HW Receive Queue
382  * @hwif: HW Interface for accessing HW
383  * @wq: Work Queue for the data of the RQ
384  * @entry: msix entry for rq
385  *
386  * Return 0 - Success, negative - Failure
387  **/
388 int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
389 		  struct hinic_wq *wq, struct msix_entry *entry)
390 {
391 	struct pci_dev *pdev = hwif->pdev;
392 	size_t pi_size;
393 	int err;
394 
395 	rq->hwif = hwif;
396 
397 	rq->wq = wq;
398 
399 	rq->irq = entry->vector;
400 	rq->msix_entry = entry->entry;
401 
402 	rq->buf_sz = HINIC_RX_BUF_SZ;
403 
404 	err = alloc_rq_skb_arr(rq);
405 	if (err) {
406 		dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
407 		return err;
408 	}
409 
410 	err = alloc_rq_cqe(rq);
411 	if (err) {
412 		dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
413 		goto err_alloc_rq_cqe;
414 	}
415 
416 	/* HW requirements: Must be at least 32 bit */
417 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
418 	rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
419 					      &rq->pi_dma_addr, GFP_KERNEL);
420 	if (!rq->pi_virt_addr) {
421 		dev_err(&pdev->dev, "Failed to allocate PI address\n");
422 		err = -ENOMEM;
423 		goto err_pi_virt;
424 	}
425 
426 	return 0;
427 
428 err_pi_virt:
429 	free_rq_cqe(rq);
430 
431 err_alloc_rq_cqe:
432 	free_rq_skb_arr(rq);
433 	return err;
434 }
435 
436 /**
437  * hinic_clean_rq - Clean HW Receive Queue's Resources
438  * @rq: HW Receive Queue
439  **/
440 void hinic_clean_rq(struct hinic_rq *rq)
441 {
442 	struct hinic_hwif *hwif = rq->hwif;
443 	struct pci_dev *pdev = hwif->pdev;
444 	size_t pi_size;
445 
446 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
447 	dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
448 			  rq->pi_dma_addr);
449 
450 	free_rq_cqe(rq);
451 	free_rq_skb_arr(rq);
452 }
453 
454 /**
455  * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
456  * @sq: send queue
457  *
458  * Return number of free wqebbs
459  **/
460 int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
461 {
462 	struct hinic_wq *wq = sq->wq;
463 
464 	return atomic_read(&wq->delta) - 1;
465 }
466 
467 /**
468  * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
469  * @rq: recv queue
470  *
471  * Return number of free wqebbs
472  **/
473 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
474 {
475 	struct hinic_wq *wq = rq->wq;
476 
477 	return atomic_read(&wq->delta) - 1;
478 }
479 
480 static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
481 			    int nr_descs)
482 {
483 	u32 ctrl_size, task_size, bufdesc_size;
484 
485 	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
486 	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
487 	bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
488 	bufdesc_size = SIZE_8BYTES(bufdesc_size);
489 
490 	ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
491 			  HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN)        |
492 			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
493 			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
494 
495 	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
496 					     QUEUE_INFO_MSS) |
497 			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
498 }
499 
500 static void sq_prepare_task(struct hinic_sq_task *task)
501 {
502 	task->pkt_info0 = 0;
503 	task->pkt_info1 = 0;
504 	task->pkt_info2 = 0;
505 
506 	task->ufo_v6_identify = 0;
507 
508 	task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
509 
510 	task->zero_pad = 0;
511 }
512 
513 void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
514 {
515 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
516 }
517 
518 void hinic_task_set_outter_l3(struct hinic_sq_task *task,
519 			      enum hinic_l3_offload_type l3_type,
520 			      u32 network_len)
521 {
522 	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
523 			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
524 }
525 
526 void hinic_task_set_inner_l3(struct hinic_sq_task *task,
527 			     enum hinic_l3_offload_type l3_type,
528 			     u32 network_len)
529 {
530 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
531 	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
532 }
533 
534 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
535 			      enum hinic_l4_tunnel_type l4_type,
536 			      u32 tunnel_len)
537 {
538 	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
539 			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
540 }
541 
542 void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
543 			   enum hinic_l4_offload_type l4_offload,
544 			   u32 l4_len, u32 offset)
545 {
546 	u32 tcp_udp_cs = 0, sctp = 0;
547 	u32 mss = HINIC_MSS_DEFAULT;
548 
549 	if (l4_offload == TCP_OFFLOAD_ENABLE ||
550 	    l4_offload == UDP_OFFLOAD_ENABLE)
551 		tcp_udp_cs = 1;
552 	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
553 		sctp = 1;
554 
555 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
556 	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
557 
558 	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
559 		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
560 		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
561 
562 	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
563 	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
564 }
565 
566 void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
567 			    enum hinic_l4_offload_type l4_offload,
568 			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
569 {
570 	u32 tso = 0, ufo = 0;
571 
572 	if (l4_offload == TCP_OFFLOAD_ENABLE)
573 		tso = 1;
574 	else if (l4_offload == UDP_OFFLOAD_ENABLE)
575 		ufo = 1;
576 
577 	task->ufo_v6_identify = ip_ident;
578 
579 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
580 	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
581 	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
582 
583 	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
584 		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
585 		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
586 		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
587 
588 	/* set MSS value */
589 	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
590 	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
591 }
592 
593 /**
594  * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
595  * @sq: send queue
596  * @prod_idx: pi value
597  * @sq_wqe: wqe to prepare
598  * @sges: sges for use by the wqe for send for buf addresses
599  * @nr_sges: number of sges
600  **/
601 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
602 			  struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
603 			  int nr_sges)
604 {
605 	int i;
606 
607 	sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
608 
609 	sq_prepare_task(&sq_wqe->task);
610 
611 	for (i = 0; i < nr_sges; i++)
612 		sq_wqe->buf_descs[i].sge = sges[i];
613 }
614 
615 /**
616  * sq_prepare_db - prepare doorbell to write
617  * @sq: send queue
618  * @prod_idx: pi value for the doorbell
619  * @cos: cos of the doorbell
620  *
621  * Return db value
622  **/
623 static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
624 {
625 	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
626 	u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
627 
628 	/* Data should be written to HW in Big Endian Format */
629 	return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI)     |
630 			   HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
631 			   HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH)  |
632 			   HINIC_SQ_DB_INFO_SET(cos, COS)               |
633 			   HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
634 }
635 
636 /**
637  * hinic_sq_write_db- write doorbell
638  * @sq: send queue
639  * @prod_idx: pi value for the doorbell
640  * @wqe_size: wqe size
641  * @cos: cos of the wqe
642  **/
643 void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
644 		       unsigned int cos)
645 {
646 	struct hinic_wq *wq = sq->wq;
647 
648 	/* increment prod_idx to the next */
649 	prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
650 
651 	wmb();  /* Write all before the doorbell */
652 
653 	writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
654 }
655 
656 /**
657  * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
658  * @sq: sq to get wqe from
659  * @wqe_size: wqe size
660  * @prod_idx: returned pi
661  *
662  * Return wqe pointer
663  **/
664 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
665 				      unsigned int wqe_size, u16 *prod_idx)
666 {
667 	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
668 						    prod_idx);
669 
670 	if (IS_ERR(hw_wqe))
671 		return NULL;
672 
673 	return &hw_wqe->sq_wqe;
674 }
675 
676 /**
677  * hinic_sq_return_wqe - return the wqe to the sq
678  * @sq: send queue
679  * @wqe_size: the size of the wqe
680  **/
681 void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
682 {
683 	hinic_return_wqe(sq->wq, wqe_size);
684 }
685 
686 /**
687  * hinic_sq_write_wqe - write the wqe to the sq
688  * @sq: send queue
689  * @prod_idx: pi of the wqe
690  * @sq_wqe: the wqe to write
691  * @skb: skb to save
692  * @wqe_size: the size of the wqe
693  **/
694 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
695 			struct hinic_sq_wqe *sq_wqe,
696 			struct sk_buff *skb, unsigned int wqe_size)
697 {
698 	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
699 
700 	sq->saved_skb[prod_idx] = skb;
701 
702 	/* The data in the HW should be in Big Endian Format */
703 	hinic_cpu_to_be32(sq_wqe, wqe_size);
704 
705 	hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
706 }
707 
708 /**
709  * hinic_sq_read_wqebb - read wqe ptr in the current ci and update the ci, the
710  * wqe only have one wqebb
711  * @sq: send queue
712  * @skb: return skb that was saved
713  * @wqe_size: the wqe size ptr
714  * @cons_idx: consumer index of the wqe
715  *
716  * Return wqe in ci position
717  **/
718 struct hinic_sq_wqe *hinic_sq_read_wqebb(struct hinic_sq *sq,
719 					 struct sk_buff **skb,
720 					 unsigned int *wqe_size, u16 *cons_idx)
721 {
722 	struct hinic_hw_wqe *hw_wqe;
723 	struct hinic_sq_wqe *sq_wqe;
724 	struct hinic_sq_ctrl *ctrl;
725 	unsigned int buf_sect_len;
726 	u32 ctrl_info;
727 
728 	/* read the ctrl section for getting wqe size */
729 	hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
730 	if (IS_ERR(hw_wqe))
731 		return NULL;
732 
733 	*skb = sq->saved_skb[*cons_idx];
734 
735 	sq_wqe = &hw_wqe->sq_wqe;
736 	ctrl = &sq_wqe->ctrl;
737 	ctrl_info = be32_to_cpu(ctrl->ctrl_info);
738 	buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
739 
740 	*wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
741 	*wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
742 	*wqe_size = ALIGN(*wqe_size, sq->wq->wqebb_size);
743 
744 	return &hw_wqe->sq_wqe;
745 }
746 
747 /**
748  * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
749  * @sq: send queue
750  * @skb: return skb that was saved
751  * @wqe_size: the size of the wqe
752  * @cons_idx: consumer index of the wqe
753  *
754  * Return wqe in ci position
755  **/
756 struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
757 				       struct sk_buff **skb,
758 				       unsigned int wqe_size, u16 *cons_idx)
759 {
760 	struct hinic_hw_wqe *hw_wqe;
761 
762 	hw_wqe = hinic_read_wqe(sq->wq, wqe_size, cons_idx);
763 	*skb = sq->saved_skb[*cons_idx];
764 
765 	return &hw_wqe->sq_wqe;
766 }
767 
768 /**
769  * hinic_sq_put_wqe - release the ci for new wqes
770  * @sq: send queue
771  * @wqe_size: the size of the wqe
772  **/
773 void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
774 {
775 	hinic_put_wqe(sq->wq, wqe_size);
776 }
777 
778 /**
779  * hinic_sq_get_sges - get sges from the wqe
780  * @sq_wqe: wqe to get the sges from its buffer addresses
781  * @sges: returned sges
782  * @nr_sges: number sges to return
783  **/
784 void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
785 		       int nr_sges)
786 {
787 	int i;
788 
789 	for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
790 		sges[i] = sq_wqe->buf_descs[i].sge;
791 		hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
792 	}
793 }
794 
795 /**
796  * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
797  * @rq: rq to get wqe from
798  * @wqe_size: wqe size
799  * @prod_idx: returned pi
800  *
801  * Return wqe pointer
802  **/
803 struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
804 				      unsigned int wqe_size, u16 *prod_idx)
805 {
806 	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
807 						    prod_idx);
808 
809 	if (IS_ERR(hw_wqe))
810 		return NULL;
811 
812 	return &hw_wqe->rq_wqe;
813 }
814 
815 /**
816  * hinic_rq_write_wqe - write the wqe to the rq
817  * @rq: recv queue
818  * @prod_idx: pi of the wqe
819  * @rq_wqe: the wqe to write
820  * @skb: skb to save
821  **/
822 void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
823 			struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
824 {
825 	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
826 
827 	rq->saved_skb[prod_idx] = skb;
828 
829 	/* The data in the HW should be in Big Endian Format */
830 	hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
831 
832 	hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
833 }
834 
835 /**
836  * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
837  * @rq: recv queue
838  * @wqe_size: the size of the wqe
839  * @skb: return saved skb
840  * @cons_idx: consumer index of the wqe
841  *
842  * Return wqe in ci position
843  **/
844 struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
845 				       unsigned int wqe_size,
846 				       struct sk_buff **skb, u16 *cons_idx)
847 {
848 	struct hinic_hw_wqe *hw_wqe;
849 	struct hinic_rq_cqe *cqe;
850 	int rx_done;
851 	u32 status;
852 
853 	hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
854 	if (IS_ERR(hw_wqe))
855 		return NULL;
856 
857 	cqe = rq->cqe[*cons_idx];
858 
859 	status = be32_to_cpu(cqe->status);
860 
861 	rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
862 	if (!rx_done)
863 		return NULL;
864 
865 	*skb = rq->saved_skb[*cons_idx];
866 
867 	return &hw_wqe->rq_wqe;
868 }
869 
870 /**
871  * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
872  * @rq: recv queue
873  * @wqe_size: the size of the wqe
874  * @skb: return saved skb
875  * @cons_idx: consumer index in the wq
876  *
877  * Return wqe in incremented ci position
878  **/
879 struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
880 					    unsigned int wqe_size,
881 					    struct sk_buff **skb,
882 					    u16 *cons_idx)
883 {
884 	struct hinic_wq *wq = rq->wq;
885 	struct hinic_hw_wqe *hw_wqe;
886 	unsigned int num_wqebbs;
887 
888 	wqe_size = ALIGN(wqe_size, wq->wqebb_size);
889 	num_wqebbs = wqe_size / wq->wqebb_size;
890 
891 	*cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
892 
893 	*skb = rq->saved_skb[*cons_idx];
894 
895 	hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
896 
897 	return &hw_wqe->rq_wqe;
898 }
899 
900 /**
901  * hinic_put_wqe - release the ci for new wqes
902  * @rq: recv queue
903  * @cons_idx: consumer index of the wqe
904  * @wqe_size: the size of the wqe
905  **/
906 void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
907 		      unsigned int wqe_size)
908 {
909 	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
910 	u32 status = be32_to_cpu(cqe->status);
911 
912 	status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
913 
914 	/* Rx WQE size is 1 WQEBB, no wq shadow*/
915 	cqe->status = cpu_to_be32(status);
916 
917 	wmb();          /* clear done flag */
918 
919 	hinic_put_wqe(rq->wq, wqe_size);
920 }
921 
922 /**
923  * hinic_rq_get_sge - get sge from the wqe
924  * @rq: recv queue
925  * @rq_wqe: wqe to get the sge from its buf address
926  * @cons_idx: consumer index
927  * @sge: returned sge
928  **/
929 void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
930 		      u16 cons_idx, struct hinic_sge *sge)
931 {
932 	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
933 	u32 len = be32_to_cpu(cqe->len);
934 
935 	sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
936 	sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
937 	sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
938 }
939 
940 /**
941  * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
942  * @rq: recv queue
943  * @prod_idx: pi value
944  * @rq_wqe: the wqe
945  * @sge: sge for use by the wqe for recv buf address
946  **/
947 void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
948 			  struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
949 {
950 	struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
951 	struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
952 	struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
953 	struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
954 	dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
955 
956 	ctrl->ctrl_info =
957 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
958 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
959 				  COMPLETE_LEN)                    |
960 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
961 				  BUFDESC_SECT_LEN)                |
962 		HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
963 
964 	hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
965 
966 	buf_desc->hi_addr = sge->hi_addr;
967 	buf_desc->lo_addr = sge->lo_addr;
968 }
969 
970 /**
971  * hinic_rq_update - update pi of the rq
972  * @rq: recv queue
973  * @prod_idx: pi value
974  **/
975 void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
976 {
977 	*rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
978 }
979