1 /*
2  * Huawei HiNIC PCI Express Linux driver
3  * Copyright(c) 2017 Huawei Technologies Co., Ltd
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * for more details.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/types.h>
18 #include <linux/errno.h>
19 #include <linux/pci.h>
20 #include <linux/device.h>
21 #include <linux/netdevice.h>
22 #include <linux/etherdevice.h>
23 #include <linux/u64_stats_sync.h>
24 #include <linux/slab.h>
25 #include <linux/interrupt.h>
26 #include <linux/skbuff.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/prefetch.h>
29 #include <asm/barrier.h>
30 
31 #include "hinic_common.h"
32 #include "hinic_hw_if.h"
33 #include "hinic_hw_wqe.h"
34 #include "hinic_hw_wq.h"
35 #include "hinic_hw_qp.h"
36 #include "hinic_hw_dev.h"
37 #include "hinic_rx.h"
38 #include "hinic_dev.h"
39 
40 #define RX_IRQ_NO_PENDING               0
41 #define RX_IRQ_NO_COALESC               0
42 #define RX_IRQ_NO_LLI_TIMER             0
43 #define RX_IRQ_NO_CREDIT                0
44 #define RX_IRQ_NO_RESEND_TIMER          0
45 
46 /**
47  * hinic_rxq_clean_stats - Clean the statistics of specific queue
48  * @rxq: Logical Rx Queue
49  **/
50 void hinic_rxq_clean_stats(struct hinic_rxq *rxq)
51 {
52 	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
53 
54 	u64_stats_update_begin(&rxq_stats->syncp);
55 	rxq_stats->pkts  = 0;
56 	rxq_stats->bytes = 0;
57 	u64_stats_update_end(&rxq_stats->syncp);
58 }
59 
60 /**
61  * hinic_rxq_get_stats - get statistics of Rx Queue
62  * @rxq: Logical Rx Queue
63  * @stats: return updated stats here
64  **/
65 void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
66 {
67 	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
68 	unsigned int start;
69 
70 	u64_stats_update_begin(&stats->syncp);
71 	do {
72 		start = u64_stats_fetch_begin(&rxq_stats->syncp);
73 		stats->pkts = rxq_stats->pkts;
74 		stats->bytes = rxq_stats->bytes;
75 	} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
76 	u64_stats_update_end(&stats->syncp);
77 }
78 
79 /**
80  * rxq_stats_init - Initialize the statistics of specific queue
81  * @rxq: Logical Rx Queue
82  **/
83 static void rxq_stats_init(struct hinic_rxq *rxq)
84 {
85 	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
86 
87 	u64_stats_init(&rxq_stats->syncp);
88 	hinic_rxq_clean_stats(rxq);
89 }
90 
91 /**
92  * rx_alloc_skb - allocate skb and map it to dma address
93  * @rxq: rx queue
94  * @dma_addr: returned dma address for the skb
95  *
96  * Return skb
97  **/
98 static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq,
99 				    dma_addr_t *dma_addr)
100 {
101 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
102 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
103 	struct hinic_hwif *hwif = hwdev->hwif;
104 	struct pci_dev *pdev = hwif->pdev;
105 	struct sk_buff *skb;
106 	dma_addr_t addr;
107 	int err;
108 
109 	skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz);
110 	if (!skb) {
111 		netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n");
112 		return NULL;
113 	}
114 
115 	addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz,
116 			      DMA_FROM_DEVICE);
117 	err = dma_mapping_error(&pdev->dev, addr);
118 	if (err) {
119 		dev_err(&pdev->dev, "Failed to map Rx DMA, err = %d\n", err);
120 		goto err_rx_map;
121 	}
122 
123 	*dma_addr = addr;
124 	return skb;
125 
126 err_rx_map:
127 	dev_kfree_skb_any(skb);
128 	return NULL;
129 }
130 
131 /**
132  * rx_unmap_skb - unmap the dma address of the skb
133  * @rxq: rx queue
134  * @dma_addr: dma address of the skb
135  **/
136 static void rx_unmap_skb(struct hinic_rxq *rxq, dma_addr_t dma_addr)
137 {
138 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
139 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
140 	struct hinic_hwif *hwif = hwdev->hwif;
141 	struct pci_dev *pdev = hwif->pdev;
142 
143 	dma_unmap_single(&pdev->dev, dma_addr, rxq->rq->buf_sz,
144 			 DMA_FROM_DEVICE);
145 }
146 
147 /**
148  * rx_free_skb - unmap and free skb
149  * @rxq: rx queue
150  * @skb: skb to free
151  * @dma_addr: dma address of the skb
152  **/
153 static void rx_free_skb(struct hinic_rxq *rxq, struct sk_buff *skb,
154 			dma_addr_t dma_addr)
155 {
156 	rx_unmap_skb(rxq, dma_addr);
157 	dev_kfree_skb_any(skb);
158 }
159 
160 /**
161  * rx_alloc_pkts - allocate pkts in rx queue
162  * @rxq: rx queue
163  *
164  * Return number of skbs allocated
165  **/
166 static int rx_alloc_pkts(struct hinic_rxq *rxq)
167 {
168 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
169 	struct hinic_rq_wqe *rq_wqe;
170 	unsigned int free_wqebbs;
171 	struct hinic_sge sge;
172 	dma_addr_t dma_addr;
173 	struct sk_buff *skb;
174 	int i, alloc_more;
175 	u16 prod_idx;
176 
177 	free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
178 	alloc_more = 0;
179 
180 	/* Limit the allocation chunks */
181 	if (free_wqebbs > nic_dev->rx_weight)
182 		free_wqebbs = nic_dev->rx_weight;
183 
184 	for (i = 0; i < free_wqebbs; i++) {
185 		skb = rx_alloc_skb(rxq, &dma_addr);
186 		if (!skb) {
187 			netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
188 			alloc_more = 1;
189 			goto skb_out;
190 		}
191 
192 		hinic_set_sge(&sge, dma_addr, skb->len);
193 
194 		rq_wqe = hinic_rq_get_wqe(rxq->rq, HINIC_RQ_WQE_SIZE,
195 					  &prod_idx);
196 		if (!rq_wqe) {
197 			rx_free_skb(rxq, skb, dma_addr);
198 			alloc_more = 1;
199 			goto skb_out;
200 		}
201 
202 		hinic_rq_prepare_wqe(rxq->rq, prod_idx, rq_wqe, &sge);
203 
204 		hinic_rq_write_wqe(rxq->rq, prod_idx, rq_wqe, skb);
205 	}
206 
207 skb_out:
208 	if (i) {
209 		wmb();  /* write all the wqes before update PI */
210 
211 		hinic_rq_update(rxq->rq, prod_idx);
212 	}
213 
214 	if (alloc_more)
215 		tasklet_schedule(&rxq->rx_task);
216 
217 	return i;
218 }
219 
220 /**
221  * free_all_rx_skbs - free all skbs in rx queue
222  * @rxq: rx queue
223  **/
224 static void free_all_rx_skbs(struct hinic_rxq *rxq)
225 {
226 	struct hinic_rq *rq = rxq->rq;
227 	struct hinic_hw_wqe *hw_wqe;
228 	struct hinic_sge sge;
229 	u16 ci;
230 
231 	while ((hw_wqe = hinic_read_wqe(rq->wq, HINIC_RQ_WQE_SIZE, &ci))) {
232 		if (IS_ERR(hw_wqe))
233 			break;
234 
235 		hinic_rq_get_sge(rq, &hw_wqe->rq_wqe, ci, &sge);
236 
237 		hinic_put_wqe(rq->wq, HINIC_RQ_WQE_SIZE);
238 
239 		rx_free_skb(rxq, rq->saved_skb[ci], hinic_sge_to_dma(&sge));
240 	}
241 }
242 
243 /**
244  * rx_alloc_task - tasklet for queue allocation
245  * @data: rx queue
246  **/
247 static void rx_alloc_task(unsigned long data)
248 {
249 	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
250 
251 	(void)rx_alloc_pkts(rxq);
252 }
253 
254 /**
255  * rx_recv_jumbo_pkt - Rx handler for jumbo pkt
256  * @rxq: rx queue
257  * @head_skb: the first skb in the list
258  * @left_pkt_len: left size of the pkt exclude head skb
259  * @ci: consumer index
260  *
261  * Return number of wqes that used for the left of the pkt
262  **/
263 static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb,
264 			     unsigned int left_pkt_len, u16 ci)
265 {
266 	struct sk_buff *skb, *curr_skb = head_skb;
267 	struct hinic_rq_wqe *rq_wqe;
268 	unsigned int curr_len;
269 	struct hinic_sge sge;
270 	int num_wqes = 0;
271 
272 	while (left_pkt_len > 0) {
273 		rq_wqe = hinic_rq_read_next_wqe(rxq->rq, HINIC_RQ_WQE_SIZE,
274 						&skb, &ci);
275 
276 		num_wqes++;
277 
278 		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
279 
280 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
281 
282 		prefetch(skb->data);
283 
284 		curr_len = (left_pkt_len > HINIC_RX_BUF_SZ) ? HINIC_RX_BUF_SZ :
285 			    left_pkt_len;
286 
287 		left_pkt_len -= curr_len;
288 
289 		__skb_put(skb, curr_len);
290 
291 		if (curr_skb == head_skb)
292 			skb_shinfo(head_skb)->frag_list = skb;
293 		else
294 			curr_skb->next = skb;
295 
296 		head_skb->len += skb->len;
297 		head_skb->data_len += skb->len;
298 		head_skb->truesize += skb->truesize;
299 
300 		curr_skb = skb;
301 	}
302 
303 	return num_wqes;
304 }
305 
306 /**
307  * rxq_recv - Rx handler
308  * @rxq: rx queue
309  * @budget: maximum pkts to process
310  *
311  * Return number of pkts received
312  **/
313 static int rxq_recv(struct hinic_rxq *rxq, int budget)
314 {
315 	struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
316 	u64 pkt_len = 0, rx_bytes = 0;
317 	struct hinic_rq_wqe *rq_wqe;
318 	int num_wqes, pkts = 0;
319 	struct hinic_sge sge;
320 	struct sk_buff *skb;
321 	u16 ci;
322 
323 	while (pkts < budget) {
324 		num_wqes = 0;
325 
326 		rq_wqe = hinic_rq_read_wqe(rxq->rq, HINIC_RQ_WQE_SIZE, &skb,
327 					   &ci);
328 		if (!rq_wqe)
329 			break;
330 
331 		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
332 
333 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
334 
335 		prefetch(skb->data);
336 
337 		pkt_len = sge.len;
338 
339 		if (pkt_len <= HINIC_RX_BUF_SZ) {
340 			__skb_put(skb, pkt_len);
341 		} else {
342 			__skb_put(skb, HINIC_RX_BUF_SZ);
343 			num_wqes = rx_recv_jumbo_pkt(rxq, skb, pkt_len -
344 						     HINIC_RX_BUF_SZ, ci);
345 		}
346 
347 		hinic_rq_put_wqe(rxq->rq, ci,
348 				 (num_wqes + 1) * HINIC_RQ_WQE_SIZE);
349 
350 		skb_record_rx_queue(skb, qp->q_id);
351 		skb->protocol = eth_type_trans(skb, rxq->netdev);
352 
353 		napi_gro_receive(&rxq->napi, skb);
354 
355 		pkts++;
356 		rx_bytes += pkt_len;
357 	}
358 
359 	if (pkts)
360 		tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */
361 
362 	u64_stats_update_begin(&rxq->rxq_stats.syncp);
363 	rxq->rxq_stats.pkts += pkts;
364 	rxq->rxq_stats.bytes += rx_bytes;
365 	u64_stats_update_end(&rxq->rxq_stats.syncp);
366 
367 	return pkts;
368 }
369 
370 static int rx_poll(struct napi_struct *napi, int budget)
371 {
372 	struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi);
373 	struct hinic_rq *rq = rxq->rq;
374 	int pkts;
375 
376 	pkts = rxq_recv(rxq, budget);
377 	if (pkts >= budget)
378 		return budget;
379 
380 	napi_complete(napi);
381 	enable_irq(rq->irq);
382 	return pkts;
383 }
384 
385 static void rx_add_napi(struct hinic_rxq *rxq)
386 {
387 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
388 
389 	netif_napi_add(rxq->netdev, &rxq->napi, rx_poll, nic_dev->rx_weight);
390 	napi_enable(&rxq->napi);
391 }
392 
393 static void rx_del_napi(struct hinic_rxq *rxq)
394 {
395 	napi_disable(&rxq->napi);
396 	netif_napi_del(&rxq->napi);
397 }
398 
399 static irqreturn_t rx_irq(int irq, void *data)
400 {
401 	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
402 	struct hinic_rq *rq = rxq->rq;
403 	struct hinic_dev *nic_dev;
404 
405 	/* Disable the interrupt until napi will be completed */
406 	disable_irq_nosync(rq->irq);
407 
408 	nic_dev = netdev_priv(rxq->netdev);
409 	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry);
410 
411 	napi_schedule(&rxq->napi);
412 	return IRQ_HANDLED;
413 }
414 
415 static int rx_request_irq(struct hinic_rxq *rxq)
416 {
417 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
418 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
419 	struct hinic_rq *rq = rxq->rq;
420 	int err;
421 
422 	rx_add_napi(rxq);
423 
424 	hinic_hwdev_msix_set(hwdev, rq->msix_entry,
425 			     RX_IRQ_NO_PENDING, RX_IRQ_NO_COALESC,
426 			     RX_IRQ_NO_LLI_TIMER, RX_IRQ_NO_CREDIT,
427 			     RX_IRQ_NO_RESEND_TIMER);
428 
429 	err = request_irq(rq->irq, rx_irq, 0, rxq->irq_name, rxq);
430 	if (err) {
431 		rx_del_napi(rxq);
432 		return err;
433 	}
434 
435 	return 0;
436 }
437 
438 static void rx_free_irq(struct hinic_rxq *rxq)
439 {
440 	struct hinic_rq *rq = rxq->rq;
441 
442 	free_irq(rq->irq, rxq);
443 	rx_del_napi(rxq);
444 }
445 
446 /**
447  * hinic_init_rxq - Initialize the Rx Queue
448  * @rxq: Logical Rx Queue
449  * @rq: Hardware Rx Queue to connect the Logical queue with
450  * @netdev: network device to connect the Logical queue with
451  *
452  * Return 0 - Success, negative - Failure
453  **/
454 int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq,
455 		   struct net_device *netdev)
456 {
457 	struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq);
458 	int err, pkts, irqname_len;
459 
460 	rxq->netdev = netdev;
461 	rxq->rq = rq;
462 
463 	rxq_stats_init(rxq);
464 
465 	irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1;
466 	rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
467 	if (!rxq->irq_name)
468 		return -ENOMEM;
469 
470 	sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id);
471 
472 	tasklet_init(&rxq->rx_task, rx_alloc_task, (unsigned long)rxq);
473 
474 	pkts = rx_alloc_pkts(rxq);
475 	if (!pkts) {
476 		err = -ENOMEM;
477 		goto err_rx_pkts;
478 	}
479 
480 	err = rx_request_irq(rxq);
481 	if (err) {
482 		netdev_err(netdev, "Failed to request Rx irq\n");
483 		goto err_req_rx_irq;
484 	}
485 
486 	return 0;
487 
488 err_req_rx_irq:
489 err_rx_pkts:
490 	tasklet_kill(&rxq->rx_task);
491 	free_all_rx_skbs(rxq);
492 	devm_kfree(&netdev->dev, rxq->irq_name);
493 	return err;
494 }
495 
496 /**
497  * hinic_clean_rxq - Clean the Rx Queue
498  * @rxq: Logical Rx Queue
499  **/
500 void hinic_clean_rxq(struct hinic_rxq *rxq)
501 {
502 	struct net_device *netdev = rxq->netdev;
503 
504 	rx_free_irq(rxq);
505 
506 	tasklet_kill(&rxq->rx_task);
507 	free_all_rx_skbs(rxq);
508 	devm_kfree(&netdev->dev, rxq->irq_name);
509 }
510