1 /*
2  * Huawei HiNIC PCI Express Linux driver
3  * Copyright(c) 2017 Huawei Technologies Co., Ltd
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * for more details.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/types.h>
18 #include <linux/pci.h>
19 #include <linux/device.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/vmalloc.h>
22 #include <linux/errno.h>
23 #include <linux/sizes.h>
24 #include <linux/atomic.h>
25 #include <linux/skbuff.h>
26 #include <linux/io.h>
27 #include <asm/barrier.h>
28 #include <asm/byteorder.h>
29 
30 #include "hinic_common.h"
31 #include "hinic_hw_if.h"
32 #include "hinic_hw_wqe.h"
33 #include "hinic_hw_wq.h"
34 #include "hinic_hw_qp_ctxt.h"
35 #include "hinic_hw_qp.h"
36 #include "hinic_hw_io.h"
37 
38 #define SQ_DB_OFF               SZ_2K
39 
40 /* The number of cache line to prefetch Until threshold state */
41 #define WQ_PREFETCH_MAX         2
42 /* The number of cache line to prefetch After threshold state */
43 #define WQ_PREFETCH_MIN         1
44 /* Threshold state */
45 #define WQ_PREFETCH_THRESHOLD   256
46 
47 /* sizes of the SQ/RQ ctxt */
48 #define Q_CTXT_SIZE             48
49 #define CTXT_RSVD               240
50 
51 #define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
52 		(((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
53 
54 #define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
55 		(((max_rqs) + (max_sqs)) * CTXT_RSVD + \
56 		 (max_sqs + (q_id)) * Q_CTXT_SIZE)
57 
58 #define SIZE_16BYTES(size)              (ALIGN(size, 16) >> 4)
59 #define SIZE_8BYTES(size)               (ALIGN(size, 8) >> 3)
60 #define SECT_SIZE_FROM_8BYTES(size)     ((size) << 3)
61 
62 #define SQ_DB_PI_HI_SHIFT       8
63 #define SQ_DB_PI_HI(prod_idx)   ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
64 
65 #define SQ_DB_PI_LOW_MASK       0xFF
66 #define SQ_DB_PI_LOW(prod_idx)  ((prod_idx) & SQ_DB_PI_LOW_MASK)
67 
68 #define SQ_DB_ADDR(sq, pi)      ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
69 
70 #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
71 #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
72 
73 #define TX_MAX_MSS_DEFAULT      0x3E00
74 
75 enum sq_wqe_type {
76 	SQ_NORMAL_WQE = 0,
77 };
78 
79 enum rq_completion_fmt {
80 	RQ_COMPLETE_SGE = 1
81 };
82 
83 void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
84 			     enum hinic_qp_ctxt_type ctxt_type,
85 			     u16 num_queues, u16 max_queues)
86 {
87 	u16 max_sqs = max_queues;
88 	u16 max_rqs = max_queues;
89 
90 	qp_ctxt_hdr->num_queues = num_queues;
91 	qp_ctxt_hdr->queue_type = ctxt_type;
92 
93 	if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
94 		qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
95 	else
96 		qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
97 
98 	qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
99 
100 	hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
101 }
102 
103 void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
104 			   struct hinic_sq *sq, u16 global_qid)
105 {
106 	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
107 	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
108 	u16 pi_start, ci_start;
109 	struct hinic_wq *wq;
110 
111 	wq = sq->wq;
112 	ci_start = atomic_read(&wq->cons_idx);
113 	pi_start = atomic_read(&wq->prod_idx);
114 
115 	/* Read the first page paddr from the WQ page paddr ptrs */
116 	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
117 
118 	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
119 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
120 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
121 
122 	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
123 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
124 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
125 
126 	sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
127 						       GLOBAL_SQ_ID) |
128 			    HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
129 
130 	sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
131 			      HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
132 
133 	sq_ctxt->wq_hi_pfn_pi =
134 			HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
135 			HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
136 
137 	sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
138 
139 	sq_ctxt->pref_cache =
140 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
141 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
142 		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
143 
144 	sq_ctxt->pref_wrapped = 1;
145 
146 	sq_ctxt->pref_wq_hi_pfn_ci =
147 		HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
148 		HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
149 
150 	sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
151 
152 	sq_ctxt->wq_block_hi_pfn =
153 		HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
154 
155 	sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
156 
157 	hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
158 }
159 
160 void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
161 			   struct hinic_rq *rq, u16 global_qid)
162 {
163 	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
164 	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
165 	u16 pi_start, ci_start;
166 	struct hinic_wq *wq;
167 
168 	wq = rq->wq;
169 	ci_start = atomic_read(&wq->cons_idx);
170 	pi_start = atomic_read(&wq->prod_idx);
171 
172 	/* Read the first page paddr from the WQ page paddr ptrs */
173 	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
174 
175 	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
176 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
177 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
178 
179 	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
180 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
181 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
182 
183 	rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
184 			    HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
185 
186 	rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
187 				HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
188 
189 	rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
190 							  HI_PFN) |
191 				HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
192 
193 	rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
194 
195 	rq_ctxt->pref_cache =
196 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
197 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
198 		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
199 
200 	rq_ctxt->pref_wrapped = 1;
201 
202 	rq_ctxt->pref_wq_hi_pfn_ci =
203 		HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
204 		HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
205 
206 	rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
207 
208 	rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
209 	rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
210 
211 	rq_ctxt->wq_block_hi_pfn =
212 		HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
213 
214 	rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
215 
216 	hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
217 }
218 
219 /**
220  * alloc_sq_skb_arr - allocate sq array for saved skb
221  * @sq: HW Send Queue
222  *
223  * Return 0 - Success, negative - Failure
224  **/
225 static int alloc_sq_skb_arr(struct hinic_sq *sq)
226 {
227 	struct hinic_wq *wq = sq->wq;
228 	size_t skb_arr_size;
229 
230 	skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
231 	sq->saved_skb = vzalloc(skb_arr_size);
232 	if (!sq->saved_skb)
233 		return -ENOMEM;
234 
235 	return 0;
236 }
237 
238 /**
239  * free_sq_skb_arr - free sq array for saved skb
240  * @sq: HW Send Queue
241  **/
242 static void free_sq_skb_arr(struct hinic_sq *sq)
243 {
244 	vfree(sq->saved_skb);
245 }
246 
247 /**
248  * alloc_rq_skb_arr - allocate rq array for saved skb
249  * @rq: HW Receive Queue
250  *
251  * Return 0 - Success, negative - Failure
252  **/
253 static int alloc_rq_skb_arr(struct hinic_rq *rq)
254 {
255 	struct hinic_wq *wq = rq->wq;
256 	size_t skb_arr_size;
257 
258 	skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
259 	rq->saved_skb = vzalloc(skb_arr_size);
260 	if (!rq->saved_skb)
261 		return -ENOMEM;
262 
263 	return 0;
264 }
265 
266 /**
267  * free_rq_skb_arr - free rq array for saved skb
268  * @rq: HW Receive Queue
269  **/
270 static void free_rq_skb_arr(struct hinic_rq *rq)
271 {
272 	vfree(rq->saved_skb);
273 }
274 
275 /**
276  * hinic_init_sq - Initialize HW Send Queue
277  * @sq: HW Send Queue
278  * @hwif: HW Interface for accessing HW
279  * @wq: Work Queue for the data of the SQ
280  * @entry: msix entry for sq
281  * @ci_addr: address for reading the current HW consumer index
282  * @ci_dma_addr: dma address for reading the current HW consumer index
283  * @db_base: doorbell base address
284  *
285  * Return 0 - Success, negative - Failure
286  **/
287 int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
288 		  struct hinic_wq *wq, struct msix_entry *entry,
289 		  void *ci_addr, dma_addr_t ci_dma_addr,
290 		  void __iomem *db_base)
291 {
292 	sq->hwif = hwif;
293 
294 	sq->wq = wq;
295 
296 	sq->irq = entry->vector;
297 	sq->msix_entry = entry->entry;
298 
299 	sq->hw_ci_addr = ci_addr;
300 	sq->hw_ci_dma_addr = ci_dma_addr;
301 
302 	sq->db_base = db_base + SQ_DB_OFF;
303 
304 	return alloc_sq_skb_arr(sq);
305 }
306 
307 /**
308  * hinic_clean_sq - Clean HW Send Queue's Resources
309  * @sq: Send Queue
310  **/
311 void hinic_clean_sq(struct hinic_sq *sq)
312 {
313 	free_sq_skb_arr(sq);
314 }
315 
316 /**
317  * alloc_rq_cqe - allocate rq completion queue elements
318  * @rq: HW Receive Queue
319  *
320  * Return 0 - Success, negative - Failure
321  **/
322 static int alloc_rq_cqe(struct hinic_rq *rq)
323 {
324 	struct hinic_hwif *hwif = rq->hwif;
325 	struct pci_dev *pdev = hwif->pdev;
326 	size_t cqe_dma_size, cqe_size;
327 	struct hinic_wq *wq = rq->wq;
328 	int j, i;
329 
330 	cqe_size = wq->q_depth * sizeof(*rq->cqe);
331 	rq->cqe = vzalloc(cqe_size);
332 	if (!rq->cqe)
333 		return -ENOMEM;
334 
335 	cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
336 	rq->cqe_dma = vzalloc(cqe_dma_size);
337 	if (!rq->cqe_dma)
338 		goto err_cqe_dma_arr_alloc;
339 
340 	for (i = 0; i < wq->q_depth; i++) {
341 		rq->cqe[i] = dma_zalloc_coherent(&pdev->dev,
342 						 sizeof(*rq->cqe[i]),
343 						 &rq->cqe_dma[i], GFP_KERNEL);
344 		if (!rq->cqe[i])
345 			goto err_cqe_alloc;
346 	}
347 
348 	return 0;
349 
350 err_cqe_alloc:
351 	for (j = 0; j < i; j++)
352 		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
353 				  rq->cqe_dma[j]);
354 
355 	vfree(rq->cqe_dma);
356 
357 err_cqe_dma_arr_alloc:
358 	vfree(rq->cqe);
359 	return -ENOMEM;
360 }
361 
362 /**
363  * free_rq_cqe - free rq completion queue elements
364  * @rq: HW Receive Queue
365  **/
366 static void free_rq_cqe(struct hinic_rq *rq)
367 {
368 	struct hinic_hwif *hwif = rq->hwif;
369 	struct pci_dev *pdev = hwif->pdev;
370 	struct hinic_wq *wq = rq->wq;
371 	int i;
372 
373 	for (i = 0; i < wq->q_depth; i++)
374 		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
375 				  rq->cqe_dma[i]);
376 
377 	vfree(rq->cqe_dma);
378 	vfree(rq->cqe);
379 }
380 
381 /**
382  * hinic_init_rq - Initialize HW Receive Queue
383  * @rq: HW Receive Queue
384  * @hwif: HW Interface for accessing HW
385  * @wq: Work Queue for the data of the RQ
386  * @entry: msix entry for rq
387  *
388  * Return 0 - Success, negative - Failure
389  **/
390 int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
391 		  struct hinic_wq *wq, struct msix_entry *entry)
392 {
393 	struct pci_dev *pdev = hwif->pdev;
394 	size_t pi_size;
395 	int err;
396 
397 	rq->hwif = hwif;
398 
399 	rq->wq = wq;
400 
401 	rq->irq = entry->vector;
402 	rq->msix_entry = entry->entry;
403 
404 	rq->buf_sz = HINIC_RX_BUF_SZ;
405 
406 	err = alloc_rq_skb_arr(rq);
407 	if (err) {
408 		dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
409 		return err;
410 	}
411 
412 	err = alloc_rq_cqe(rq);
413 	if (err) {
414 		dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
415 		goto err_alloc_rq_cqe;
416 	}
417 
418 	/* HW requirements: Must be at least 32 bit */
419 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
420 	rq->pi_virt_addr = dma_zalloc_coherent(&pdev->dev, pi_size,
421 					       &rq->pi_dma_addr, GFP_KERNEL);
422 	if (!rq->pi_virt_addr) {
423 		dev_err(&pdev->dev, "Failed to allocate PI address\n");
424 		err = -ENOMEM;
425 		goto err_pi_virt;
426 	}
427 
428 	return 0;
429 
430 err_pi_virt:
431 	free_rq_cqe(rq);
432 
433 err_alloc_rq_cqe:
434 	free_rq_skb_arr(rq);
435 	return err;
436 }
437 
438 /**
439  * hinic_clean_rq - Clean HW Receive Queue's Resources
440  * @rq: HW Receive Queue
441  **/
442 void hinic_clean_rq(struct hinic_rq *rq)
443 {
444 	struct hinic_hwif *hwif = rq->hwif;
445 	struct pci_dev *pdev = hwif->pdev;
446 	size_t pi_size;
447 
448 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
449 	dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
450 			  rq->pi_dma_addr);
451 
452 	free_rq_cqe(rq);
453 	free_rq_skb_arr(rq);
454 }
455 
456 /**
457  * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
458  * @sq: send queue
459  *
460  * Return number of free wqebbs
461  **/
462 int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
463 {
464 	struct hinic_wq *wq = sq->wq;
465 
466 	return atomic_read(&wq->delta) - 1;
467 }
468 
469 /**
470  * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
471  * @rq: recv queue
472  *
473  * Return number of free wqebbs
474  **/
475 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
476 {
477 	struct hinic_wq *wq = rq->wq;
478 
479 	return atomic_read(&wq->delta) - 1;
480 }
481 
482 static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
483 			    int nr_descs)
484 {
485 	u32 ctrl_size, task_size, bufdesc_size;
486 
487 	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
488 	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
489 	bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
490 	bufdesc_size = SIZE_8BYTES(bufdesc_size);
491 
492 	ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
493 			  HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN)        |
494 			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
495 			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
496 
497 	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
498 					     QUEUE_INFO_MSS);
499 }
500 
501 static void sq_prepare_task(struct hinic_sq_task *task)
502 {
503 	task->pkt_info0 =
504 		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
505 		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
506 		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
507 					INNER_L3TYPE) |
508 		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
509 					VLAN_OFFLOAD) |
510 		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
511 
512 	task->pkt_info1 =
513 		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
514 		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
515 		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
516 
517 	task->pkt_info2 =
518 		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
519 		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
520 		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
521 					TUNNEL_L4TYPE)    |
522 		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
523 					OUTER_L3TYPE);
524 
525 	task->ufo_v6_identify = 0;
526 
527 	task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
528 
529 	task->zero_pad = 0;
530 }
531 
532 /**
533  * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
534  * @sq: send queue
535  * @prod_idx: pi value
536  * @sq_wqe: wqe to prepare
537  * @sges: sges for use by the wqe for send for buf addresses
538  * @nr_sges: number of sges
539  **/
540 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
541 			  struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
542 			  int nr_sges)
543 {
544 	int i;
545 
546 	sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
547 
548 	sq_prepare_task(&sq_wqe->task);
549 
550 	for (i = 0; i < nr_sges; i++)
551 		sq_wqe->buf_descs[i].sge = sges[i];
552 }
553 
554 /**
555  * sq_prepare_db - prepare doorbell to write
556  * @sq: send queue
557  * @prod_idx: pi value for the doorbell
558  * @cos: cos of the doorbell
559  *
560  * Return db value
561  **/
562 static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
563 {
564 	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
565 	u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
566 
567 	/* Data should be written to HW in Big Endian Format */
568 	return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI)     |
569 			   HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
570 			   HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH)  |
571 			   HINIC_SQ_DB_INFO_SET(cos, COS)               |
572 			   HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
573 }
574 
575 /**
576  * hinic_sq_write_db- write doorbell
577  * @sq: send queue
578  * @prod_idx: pi value for the doorbell
579  * @wqe_size: wqe size
580  * @cos: cos of the wqe
581  **/
582 void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
583 		       unsigned int cos)
584 {
585 	struct hinic_wq *wq = sq->wq;
586 
587 	/* increment prod_idx to the next */
588 	prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
589 
590 	wmb();  /* Write all before the doorbell */
591 
592 	writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
593 }
594 
595 /**
596  * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
597  * @sq: sq to get wqe from
598  * @wqe_size: wqe size
599  * @prod_idx: returned pi
600  *
601  * Return wqe pointer
602  **/
603 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
604 				      unsigned int wqe_size, u16 *prod_idx)
605 {
606 	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
607 						    prod_idx);
608 
609 	if (IS_ERR(hw_wqe))
610 		return NULL;
611 
612 	return &hw_wqe->sq_wqe;
613 }
614 
615 /**
616  * hinic_sq_write_wqe - write the wqe to the sq
617  * @sq: send queue
618  * @prod_idx: pi of the wqe
619  * @sq_wqe: the wqe to write
620  * @skb: skb to save
621  * @wqe_size: the size of the wqe
622  **/
623 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
624 			struct hinic_sq_wqe *sq_wqe,
625 			struct sk_buff *skb, unsigned int wqe_size)
626 {
627 	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
628 
629 	sq->saved_skb[prod_idx] = skb;
630 
631 	/* The data in the HW should be in Big Endian Format */
632 	hinic_cpu_to_be32(sq_wqe, wqe_size);
633 
634 	hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
635 }
636 
637 /**
638  * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
639  * @sq: send queue
640  * @skb: return skb that was saved
641  * @wqe_size: the size of the wqe
642  * @cons_idx: consumer index of the wqe
643  *
644  * Return wqe in ci position
645  **/
646 struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
647 				       struct sk_buff **skb,
648 				       unsigned int *wqe_size, u16 *cons_idx)
649 {
650 	struct hinic_hw_wqe *hw_wqe;
651 	struct hinic_sq_wqe *sq_wqe;
652 	struct hinic_sq_ctrl *ctrl;
653 	unsigned int buf_sect_len;
654 	u32 ctrl_info;
655 
656 	/* read the ctrl section for getting wqe size */
657 	hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
658 	if (IS_ERR(hw_wqe))
659 		return NULL;
660 
661 	sq_wqe = &hw_wqe->sq_wqe;
662 	ctrl = &sq_wqe->ctrl;
663 	ctrl_info = be32_to_cpu(ctrl->ctrl_info);
664 	buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
665 
666 	*wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
667 	*wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
668 
669 	*skb = sq->saved_skb[*cons_idx];
670 
671 	/* using the real wqe size to read wqe again */
672 	hw_wqe = hinic_read_wqe(sq->wq, *wqe_size, cons_idx);
673 
674 	return &hw_wqe->sq_wqe;
675 }
676 
677 /**
678  * hinic_sq_put_wqe - release the ci for new wqes
679  * @sq: send queue
680  * @wqe_size: the size of the wqe
681  **/
682 void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
683 {
684 	hinic_put_wqe(sq->wq, wqe_size);
685 }
686 
687 /**
688  * hinic_sq_get_sges - get sges from the wqe
689  * @sq_wqe: wqe to get the sges from its buffer addresses
690  * @sges: returned sges
691  * @nr_sges: number sges to return
692  **/
693 void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
694 		       int nr_sges)
695 {
696 	int i;
697 
698 	for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
699 		sges[i] = sq_wqe->buf_descs[i].sge;
700 		hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
701 	}
702 }
703 
704 /**
705  * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
706  * @rq: rq to get wqe from
707  * @wqe_size: wqe size
708  * @prod_idx: returned pi
709  *
710  * Return wqe pointer
711  **/
712 struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
713 				      unsigned int wqe_size, u16 *prod_idx)
714 {
715 	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
716 						    prod_idx);
717 
718 	if (IS_ERR(hw_wqe))
719 		return NULL;
720 
721 	return &hw_wqe->rq_wqe;
722 }
723 
724 /**
725  * hinic_rq_write_wqe - write the wqe to the rq
726  * @rq: recv queue
727  * @prod_idx: pi of the wqe
728  * @rq_wqe: the wqe to write
729  * @skb: skb to save
730  **/
731 void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
732 			struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
733 {
734 	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
735 
736 	rq->saved_skb[prod_idx] = skb;
737 
738 	/* The data in the HW should be in Big Endian Format */
739 	hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
740 
741 	hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
742 }
743 
744 /**
745  * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
746  * @rq: recv queue
747  * @wqe_size: the size of the wqe
748  * @skb: return saved skb
749  * @cons_idx: consumer index of the wqe
750  *
751  * Return wqe in ci position
752  **/
753 struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
754 				       unsigned int wqe_size,
755 				       struct sk_buff **skb, u16 *cons_idx)
756 {
757 	struct hinic_hw_wqe *hw_wqe;
758 	struct hinic_rq_cqe *cqe;
759 	int rx_done;
760 	u32 status;
761 
762 	hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
763 	if (IS_ERR(hw_wqe))
764 		return NULL;
765 
766 	cqe = rq->cqe[*cons_idx];
767 
768 	status = be32_to_cpu(cqe->status);
769 
770 	rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
771 	if (!rx_done)
772 		return NULL;
773 
774 	*skb = rq->saved_skb[*cons_idx];
775 
776 	return &hw_wqe->rq_wqe;
777 }
778 
779 /**
780  * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
781  * @rq: recv queue
782  * @wqe_size: the size of the wqe
783  * @skb: return saved skb
784  * @cons_idx: consumer index in the wq
785  *
786  * Return wqe in incremented ci position
787  **/
788 struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
789 					    unsigned int wqe_size,
790 					    struct sk_buff **skb,
791 					    u16 *cons_idx)
792 {
793 	struct hinic_wq *wq = rq->wq;
794 	struct hinic_hw_wqe *hw_wqe;
795 	unsigned int num_wqebbs;
796 
797 	wqe_size = ALIGN(wqe_size, wq->wqebb_size);
798 	num_wqebbs = wqe_size / wq->wqebb_size;
799 
800 	*cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
801 
802 	*skb = rq->saved_skb[*cons_idx];
803 
804 	hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
805 
806 	return &hw_wqe->rq_wqe;
807 }
808 
809 /**
810  * hinic_put_wqe - release the ci for new wqes
811  * @rq: recv queue
812  * @cons_idx: consumer index of the wqe
813  * @wqe_size: the size of the wqe
814  **/
815 void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
816 		      unsigned int wqe_size)
817 {
818 	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
819 	u32 status = be32_to_cpu(cqe->status);
820 
821 	status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
822 
823 	/* Rx WQE size is 1 WQEBB, no wq shadow*/
824 	cqe->status = cpu_to_be32(status);
825 
826 	wmb();          /* clear done flag */
827 
828 	hinic_put_wqe(rq->wq, wqe_size);
829 }
830 
831 /**
832  * hinic_rq_get_sge - get sge from the wqe
833  * @rq: recv queue
834  * @rq_wqe: wqe to get the sge from its buf address
835  * @cons_idx: consumer index
836  * @sge: returned sge
837  **/
838 void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
839 		      u16 cons_idx, struct hinic_sge *sge)
840 {
841 	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
842 	u32 len = be32_to_cpu(cqe->len);
843 
844 	sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
845 	sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
846 	sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
847 }
848 
849 /**
850  * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
851  * @rq: recv queue
852  * @prod_idx: pi value
853  * @rq_wqe: the wqe
854  * @sge: sge for use by the wqe for recv buf address
855  **/
856 void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
857 			  struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
858 {
859 	struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
860 	struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
861 	struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
862 	struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
863 	dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
864 
865 	ctrl->ctrl_info =
866 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
867 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
868 				  COMPLETE_LEN)                    |
869 		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
870 				  BUFDESC_SECT_LEN)                |
871 		HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
872 
873 	hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
874 
875 	buf_desc->hi_addr = sge->hi_addr;
876 	buf_desc->lo_addr = sge->lo_addr;
877 }
878 
879 /**
880  * hinic_rq_update - update pi of the rq
881  * @rq: recv queue
882  * @prod_idx: pi value
883  **/
884 void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
885 {
886 	*rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
887 }
888