1 /*
2  * Huawei HiNIC PCI Express Linux driver
3  * Copyright(c) 2017 Huawei Technologies Co., Ltd
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * for more details.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/types.h>
18 #include <linux/errno.h>
19 #include <linux/pci.h>
20 #include <linux/device.h>
21 #include <linux/netdevice.h>
22 #include <linux/etherdevice.h>
23 #include <linux/u64_stats_sync.h>
24 #include <linux/slab.h>
25 #include <linux/interrupt.h>
26 #include <linux/skbuff.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/prefetch.h>
29 #include <linux/cpumask.h>
30 #include <asm/barrier.h>
31 
32 #include "hinic_common.h"
33 #include "hinic_hw_if.h"
34 #include "hinic_hw_wqe.h"
35 #include "hinic_hw_wq.h"
36 #include "hinic_hw_qp.h"
37 #include "hinic_hw_dev.h"
38 #include "hinic_rx.h"
39 #include "hinic_dev.h"
40 
41 #define RX_IRQ_NO_PENDING               0
42 #define RX_IRQ_NO_COALESC               0
43 #define RX_IRQ_NO_LLI_TIMER             0
44 #define RX_IRQ_NO_CREDIT                0
45 #define RX_IRQ_NO_RESEND_TIMER          0
46 
47 /**
48  * hinic_rxq_clean_stats - Clean the statistics of specific queue
49  * @rxq: Logical Rx Queue
50  **/
51 void hinic_rxq_clean_stats(struct hinic_rxq *rxq)
52 {
53 	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
54 
55 	u64_stats_update_begin(&rxq_stats->syncp);
56 	rxq_stats->pkts  = 0;
57 	rxq_stats->bytes = 0;
58 	u64_stats_update_end(&rxq_stats->syncp);
59 }
60 
61 /**
62  * hinic_rxq_get_stats - get statistics of Rx Queue
63  * @rxq: Logical Rx Queue
64  * @stats: return updated stats here
65  **/
66 void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
67 {
68 	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
69 	unsigned int start;
70 
71 	u64_stats_update_begin(&stats->syncp);
72 	do {
73 		start = u64_stats_fetch_begin(&rxq_stats->syncp);
74 		stats->pkts = rxq_stats->pkts;
75 		stats->bytes = rxq_stats->bytes;
76 	} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
77 	u64_stats_update_end(&stats->syncp);
78 }
79 
80 /**
81  * rxq_stats_init - Initialize the statistics of specific queue
82  * @rxq: Logical Rx Queue
83  **/
84 static void rxq_stats_init(struct hinic_rxq *rxq)
85 {
86 	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
87 
88 	u64_stats_init(&rxq_stats->syncp);
89 	hinic_rxq_clean_stats(rxq);
90 }
91 
92 /**
93  * rx_alloc_skb - allocate skb and map it to dma address
94  * @rxq: rx queue
95  * @dma_addr: returned dma address for the skb
96  *
97  * Return skb
98  **/
99 static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq,
100 				    dma_addr_t *dma_addr)
101 {
102 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
103 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
104 	struct hinic_hwif *hwif = hwdev->hwif;
105 	struct pci_dev *pdev = hwif->pdev;
106 	struct sk_buff *skb;
107 	dma_addr_t addr;
108 	int err;
109 
110 	skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz);
111 	if (!skb) {
112 		netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n");
113 		return NULL;
114 	}
115 
116 	addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz,
117 			      DMA_FROM_DEVICE);
118 	err = dma_mapping_error(&pdev->dev, addr);
119 	if (err) {
120 		dev_err(&pdev->dev, "Failed to map Rx DMA, err = %d\n", err);
121 		goto err_rx_map;
122 	}
123 
124 	*dma_addr = addr;
125 	return skb;
126 
127 err_rx_map:
128 	dev_kfree_skb_any(skb);
129 	return NULL;
130 }
131 
132 /**
133  * rx_unmap_skb - unmap the dma address of the skb
134  * @rxq: rx queue
135  * @dma_addr: dma address of the skb
136  **/
137 static void rx_unmap_skb(struct hinic_rxq *rxq, dma_addr_t dma_addr)
138 {
139 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
140 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
141 	struct hinic_hwif *hwif = hwdev->hwif;
142 	struct pci_dev *pdev = hwif->pdev;
143 
144 	dma_unmap_single(&pdev->dev, dma_addr, rxq->rq->buf_sz,
145 			 DMA_FROM_DEVICE);
146 }
147 
148 /**
149  * rx_free_skb - unmap and free skb
150  * @rxq: rx queue
151  * @skb: skb to free
152  * @dma_addr: dma address of the skb
153  **/
154 static void rx_free_skb(struct hinic_rxq *rxq, struct sk_buff *skb,
155 			dma_addr_t dma_addr)
156 {
157 	rx_unmap_skb(rxq, dma_addr);
158 	dev_kfree_skb_any(skb);
159 }
160 
161 /**
162  * rx_alloc_pkts - allocate pkts in rx queue
163  * @rxq: rx queue
164  *
165  * Return number of skbs allocated
166  **/
167 static int rx_alloc_pkts(struct hinic_rxq *rxq)
168 {
169 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
170 	struct hinic_rq_wqe *rq_wqe;
171 	unsigned int free_wqebbs;
172 	struct hinic_sge sge;
173 	dma_addr_t dma_addr;
174 	struct sk_buff *skb;
175 	u16 prod_idx;
176 	int i;
177 
178 	free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
179 
180 	/* Limit the allocation chunks */
181 	if (free_wqebbs > nic_dev->rx_weight)
182 		free_wqebbs = nic_dev->rx_weight;
183 
184 	for (i = 0; i < free_wqebbs; i++) {
185 		skb = rx_alloc_skb(rxq, &dma_addr);
186 		if (!skb) {
187 			netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
188 			goto skb_out;
189 		}
190 
191 		hinic_set_sge(&sge, dma_addr, skb->len);
192 
193 		rq_wqe = hinic_rq_get_wqe(rxq->rq, HINIC_RQ_WQE_SIZE,
194 					  &prod_idx);
195 		if (!rq_wqe) {
196 			rx_free_skb(rxq, skb, dma_addr);
197 			goto skb_out;
198 		}
199 
200 		hinic_rq_prepare_wqe(rxq->rq, prod_idx, rq_wqe, &sge);
201 
202 		hinic_rq_write_wqe(rxq->rq, prod_idx, rq_wqe, skb);
203 	}
204 
205 skb_out:
206 	if (i) {
207 		wmb();  /* write all the wqes before update PI */
208 
209 		hinic_rq_update(rxq->rq, prod_idx);
210 	}
211 
212 	tasklet_schedule(&rxq->rx_task);
213 	return i;
214 }
215 
216 /**
217  * free_all_rx_skbs - free all skbs in rx queue
218  * @rxq: rx queue
219  **/
220 static void free_all_rx_skbs(struct hinic_rxq *rxq)
221 {
222 	struct hinic_rq *rq = rxq->rq;
223 	struct hinic_hw_wqe *hw_wqe;
224 	struct hinic_sge sge;
225 	u16 ci;
226 
227 	while ((hw_wqe = hinic_read_wqe(rq->wq, HINIC_RQ_WQE_SIZE, &ci))) {
228 		if (IS_ERR(hw_wqe))
229 			break;
230 
231 		hinic_rq_get_sge(rq, &hw_wqe->rq_wqe, ci, &sge);
232 
233 		hinic_put_wqe(rq->wq, HINIC_RQ_WQE_SIZE);
234 
235 		rx_free_skb(rxq, rq->saved_skb[ci], hinic_sge_to_dma(&sge));
236 	}
237 }
238 
239 /**
240  * rx_alloc_task - tasklet for queue allocation
241  * @data: rx queue
242  **/
243 static void rx_alloc_task(unsigned long data)
244 {
245 	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
246 
247 	(void)rx_alloc_pkts(rxq);
248 }
249 
250 /**
251  * rx_recv_jumbo_pkt - Rx handler for jumbo pkt
252  * @rxq: rx queue
253  * @head_skb: the first skb in the list
254  * @left_pkt_len: left size of the pkt exclude head skb
255  * @ci: consumer index
256  *
257  * Return number of wqes that used for the left of the pkt
258  **/
259 static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb,
260 			     unsigned int left_pkt_len, u16 ci)
261 {
262 	struct sk_buff *skb, *curr_skb = head_skb;
263 	struct hinic_rq_wqe *rq_wqe;
264 	unsigned int curr_len;
265 	struct hinic_sge sge;
266 	int num_wqes = 0;
267 
268 	while (left_pkt_len > 0) {
269 		rq_wqe = hinic_rq_read_next_wqe(rxq->rq, HINIC_RQ_WQE_SIZE,
270 						&skb, &ci);
271 
272 		num_wqes++;
273 
274 		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
275 
276 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
277 
278 		prefetch(skb->data);
279 
280 		curr_len = (left_pkt_len > HINIC_RX_BUF_SZ) ? HINIC_RX_BUF_SZ :
281 			    left_pkt_len;
282 
283 		left_pkt_len -= curr_len;
284 
285 		__skb_put(skb, curr_len);
286 
287 		if (curr_skb == head_skb)
288 			skb_shinfo(head_skb)->frag_list = skb;
289 		else
290 			curr_skb->next = skb;
291 
292 		head_skb->len += skb->len;
293 		head_skb->data_len += skb->len;
294 		head_skb->truesize += skb->truesize;
295 
296 		curr_skb = skb;
297 	}
298 
299 	return num_wqes;
300 }
301 
302 /**
303  * rxq_recv - Rx handler
304  * @rxq: rx queue
305  * @budget: maximum pkts to process
306  *
307  * Return number of pkts received
308  **/
309 static int rxq_recv(struct hinic_rxq *rxq, int budget)
310 {
311 	struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
312 	u64 pkt_len = 0, rx_bytes = 0;
313 	struct hinic_rq_wqe *rq_wqe;
314 	int num_wqes, pkts = 0;
315 	struct hinic_sge sge;
316 	struct sk_buff *skb;
317 	u16 ci;
318 
319 	while (pkts < budget) {
320 		num_wqes = 0;
321 
322 		rq_wqe = hinic_rq_read_wqe(rxq->rq, HINIC_RQ_WQE_SIZE, &skb,
323 					   &ci);
324 		if (!rq_wqe)
325 			break;
326 
327 		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
328 
329 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
330 
331 		prefetch(skb->data);
332 
333 		pkt_len = sge.len;
334 
335 		if (pkt_len <= HINIC_RX_BUF_SZ) {
336 			__skb_put(skb, pkt_len);
337 		} else {
338 			__skb_put(skb, HINIC_RX_BUF_SZ);
339 			num_wqes = rx_recv_jumbo_pkt(rxq, skb, pkt_len -
340 						     HINIC_RX_BUF_SZ, ci);
341 		}
342 
343 		hinic_rq_put_wqe(rxq->rq, ci,
344 				 (num_wqes + 1) * HINIC_RQ_WQE_SIZE);
345 
346 		skb_record_rx_queue(skb, qp->q_id);
347 		skb->protocol = eth_type_trans(skb, rxq->netdev);
348 
349 		napi_gro_receive(&rxq->napi, skb);
350 
351 		pkts++;
352 		rx_bytes += pkt_len;
353 	}
354 
355 	if (pkts)
356 		tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */
357 
358 	u64_stats_update_begin(&rxq->rxq_stats.syncp);
359 	rxq->rxq_stats.pkts += pkts;
360 	rxq->rxq_stats.bytes += rx_bytes;
361 	u64_stats_update_end(&rxq->rxq_stats.syncp);
362 
363 	return pkts;
364 }
365 
366 static int rx_poll(struct napi_struct *napi, int budget)
367 {
368 	struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi);
369 	struct hinic_rq *rq = rxq->rq;
370 	int pkts;
371 
372 	pkts = rxq_recv(rxq, budget);
373 	if (pkts >= budget)
374 		return budget;
375 
376 	napi_complete(napi);
377 	enable_irq(rq->irq);
378 	return pkts;
379 }
380 
381 static void rx_add_napi(struct hinic_rxq *rxq)
382 {
383 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
384 
385 	netif_napi_add(rxq->netdev, &rxq->napi, rx_poll, nic_dev->rx_weight);
386 	napi_enable(&rxq->napi);
387 }
388 
389 static void rx_del_napi(struct hinic_rxq *rxq)
390 {
391 	napi_disable(&rxq->napi);
392 	netif_napi_del(&rxq->napi);
393 }
394 
395 static irqreturn_t rx_irq(int irq, void *data)
396 {
397 	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
398 	struct hinic_rq *rq = rxq->rq;
399 	struct hinic_dev *nic_dev;
400 
401 	/* Disable the interrupt until napi will be completed */
402 	disable_irq_nosync(rq->irq);
403 
404 	nic_dev = netdev_priv(rxq->netdev);
405 	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry);
406 
407 	napi_schedule(&rxq->napi);
408 	return IRQ_HANDLED;
409 }
410 
411 static int rx_request_irq(struct hinic_rxq *rxq)
412 {
413 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
414 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
415 	struct hinic_rq *rq = rxq->rq;
416 	struct hinic_qp *qp;
417 	struct cpumask mask;
418 	int err;
419 
420 	rx_add_napi(rxq);
421 
422 	hinic_hwdev_msix_set(hwdev, rq->msix_entry,
423 			     RX_IRQ_NO_PENDING, RX_IRQ_NO_COALESC,
424 			     RX_IRQ_NO_LLI_TIMER, RX_IRQ_NO_CREDIT,
425 			     RX_IRQ_NO_RESEND_TIMER);
426 
427 	err = request_irq(rq->irq, rx_irq, 0, rxq->irq_name, rxq);
428 	if (err) {
429 		rx_del_napi(rxq);
430 		return err;
431 	}
432 
433 	qp = container_of(rq, struct hinic_qp, rq);
434 	cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask);
435 	return irq_set_affinity_hint(rq->irq, &mask);
436 }
437 
438 static void rx_free_irq(struct hinic_rxq *rxq)
439 {
440 	struct hinic_rq *rq = rxq->rq;
441 
442 	free_irq(rq->irq, rxq);
443 	rx_del_napi(rxq);
444 }
445 
446 /**
447  * hinic_init_rxq - Initialize the Rx Queue
448  * @rxq: Logical Rx Queue
449  * @rq: Hardware Rx Queue to connect the Logical queue with
450  * @netdev: network device to connect the Logical queue with
451  *
452  * Return 0 - Success, negative - Failure
453  **/
454 int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq,
455 		   struct net_device *netdev)
456 {
457 	struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq);
458 	int err, pkts, irqname_len;
459 
460 	rxq->netdev = netdev;
461 	rxq->rq = rq;
462 
463 	rxq_stats_init(rxq);
464 
465 	irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1;
466 	rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
467 	if (!rxq->irq_name)
468 		return -ENOMEM;
469 
470 	sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id);
471 
472 	tasklet_init(&rxq->rx_task, rx_alloc_task, (unsigned long)rxq);
473 
474 	pkts = rx_alloc_pkts(rxq);
475 	if (!pkts) {
476 		err = -ENOMEM;
477 		goto err_rx_pkts;
478 	}
479 
480 	err = rx_request_irq(rxq);
481 	if (err) {
482 		netdev_err(netdev, "Failed to request Rx irq\n");
483 		goto err_req_rx_irq;
484 	}
485 
486 	return 0;
487 
488 err_req_rx_irq:
489 err_rx_pkts:
490 	tasklet_kill(&rxq->rx_task);
491 	free_all_rx_skbs(rxq);
492 	devm_kfree(&netdev->dev, rxq->irq_name);
493 	return err;
494 }
495 
496 /**
497  * hinic_clean_rxq - Clean the Rx Queue
498  * @rxq: Logical Rx Queue
499  **/
500 void hinic_clean_rxq(struct hinic_rxq *rxq)
501 {
502 	struct net_device *netdev = rxq->netdev;
503 
504 	rx_free_irq(rxq);
505 
506 	tasklet_kill(&rxq->rx_task);
507 	free_all_rx_skbs(rxq);
508 	devm_kfree(&netdev->dev, rxq->irq_name);
509 }
510