1 /*
2  * Copyright 2015 Amazon.com, Inc. or its affiliates.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #ifdef CONFIG_RFS_ACCEL
36 #include <linux/cpu_rmap.h>
37 #endif /* CONFIG_RFS_ACCEL */
38 #include <linux/ethtool.h>
39 #include <linux/if_vlan.h>
40 #include <linux/kernel.h>
41 #include <linux/module.h>
42 #include <linux/moduleparam.h>
43 #include <linux/numa.h>
44 #include <linux/pci.h>
45 #include <linux/utsname.h>
46 #include <linux/version.h>
47 #include <linux/vmalloc.h>
48 #include <net/ip.h>
49 
50 #include "ena_netdev.h"
51 #include "ena_pci_id_tbl.h"
52 
53 static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
54 
55 MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
56 MODULE_DESCRIPTION(DEVICE_NAME);
57 MODULE_LICENSE("GPL");
58 MODULE_VERSION(DRV_MODULE_VERSION);
59 
60 /* Time in jiffies before concluding the transmitter is hung. */
61 #define TX_TIMEOUT  (5 * HZ)
62 
63 #define ENA_NAPI_BUDGET 64
64 
65 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
66 		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
67 static int debug = -1;
68 module_param(debug, int, 0);
69 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
70 
71 static struct ena_aenq_handlers aenq_handlers;
72 
73 static struct workqueue_struct *ena_wq;
74 
75 MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
76 
77 static int ena_rss_init_default(struct ena_adapter *adapter);
78 
79 static void ena_tx_timeout(struct net_device *dev)
80 {
81 	struct ena_adapter *adapter = netdev_priv(dev);
82 
83 	/* Change the state of the device to trigger reset
84 	 * Check that we are not in the middle or a trigger already
85 	 */
86 
87 	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
88 		return;
89 
90 	adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
91 	u64_stats_update_begin(&adapter->syncp);
92 	adapter->dev_stats.tx_timeout++;
93 	u64_stats_update_end(&adapter->syncp);
94 
95 	netif_err(adapter, tx_err, dev, "Transmit time out\n");
96 }
97 
98 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
99 {
100 	int i;
101 
102 	for (i = 0; i < adapter->num_queues; i++)
103 		adapter->rx_ring[i].mtu = mtu;
104 }
105 
106 static int ena_change_mtu(struct net_device *dev, int new_mtu)
107 {
108 	struct ena_adapter *adapter = netdev_priv(dev);
109 	int ret;
110 
111 	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
112 	if (!ret) {
113 		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
114 		update_rx_ring_mtu(adapter, new_mtu);
115 		dev->mtu = new_mtu;
116 	} else {
117 		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
118 			  new_mtu);
119 	}
120 
121 	return ret;
122 }
123 
124 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
125 {
126 #ifdef CONFIG_RFS_ACCEL
127 	u32 i;
128 	int rc;
129 
130 	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
131 	if (!adapter->netdev->rx_cpu_rmap)
132 		return -ENOMEM;
133 	for (i = 0; i < adapter->num_queues; i++) {
134 		int irq_idx = ENA_IO_IRQ_IDX(i);
135 
136 		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
137 				      pci_irq_vector(adapter->pdev, irq_idx));
138 		if (rc) {
139 			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
140 			adapter->netdev->rx_cpu_rmap = NULL;
141 			return rc;
142 		}
143 	}
144 #endif /* CONFIG_RFS_ACCEL */
145 	return 0;
146 }
147 
148 static void ena_init_io_rings_common(struct ena_adapter *adapter,
149 				     struct ena_ring *ring, u16 qid)
150 {
151 	ring->qid = qid;
152 	ring->pdev = adapter->pdev;
153 	ring->dev = &adapter->pdev->dev;
154 	ring->netdev = adapter->netdev;
155 	ring->napi = &adapter->ena_napi[qid].napi;
156 	ring->adapter = adapter;
157 	ring->ena_dev = adapter->ena_dev;
158 	ring->per_napi_packets = 0;
159 	ring->per_napi_bytes = 0;
160 	ring->cpu = 0;
161 	u64_stats_init(&ring->syncp);
162 }
163 
164 static void ena_init_io_rings(struct ena_adapter *adapter)
165 {
166 	struct ena_com_dev *ena_dev;
167 	struct ena_ring *txr, *rxr;
168 	int i;
169 
170 	ena_dev = adapter->ena_dev;
171 
172 	for (i = 0; i < adapter->num_queues; i++) {
173 		txr = &adapter->tx_ring[i];
174 		rxr = &adapter->rx_ring[i];
175 
176 		/* TX/RX common ring state */
177 		ena_init_io_rings_common(adapter, txr, i);
178 		ena_init_io_rings_common(adapter, rxr, i);
179 
180 		/* TX specific ring state */
181 		txr->ring_size = adapter->tx_ring_size;
182 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
183 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
184 		txr->sgl_size = adapter->max_tx_sgl_size;
185 		txr->smoothed_interval =
186 			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
187 
188 		/* RX specific ring state */
189 		rxr->ring_size = adapter->rx_ring_size;
190 		rxr->rx_copybreak = adapter->rx_copybreak;
191 		rxr->sgl_size = adapter->max_rx_sgl_size;
192 		rxr->smoothed_interval =
193 			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
194 		rxr->empty_rx_queue = 0;
195 	}
196 }
197 
198 /* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
199  * @adapter: network interface device structure
200  * @qid: queue index
201  *
202  * Return 0 on success, negative on failure
203  */
204 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
205 {
206 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
207 	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
208 	int size, i, node;
209 
210 	if (tx_ring->tx_buffer_info) {
211 		netif_err(adapter, ifup,
212 			  adapter->netdev, "tx_buffer_info info is not NULL");
213 		return -EEXIST;
214 	}
215 
216 	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
217 	node = cpu_to_node(ena_irq->cpu);
218 
219 	tx_ring->tx_buffer_info = vzalloc_node(size, node);
220 	if (!tx_ring->tx_buffer_info) {
221 		tx_ring->tx_buffer_info = vzalloc(size);
222 		if (!tx_ring->tx_buffer_info)
223 			return -ENOMEM;
224 	}
225 
226 	size = sizeof(u16) * tx_ring->ring_size;
227 	tx_ring->free_tx_ids = vzalloc_node(size, node);
228 	if (!tx_ring->free_tx_ids) {
229 		tx_ring->free_tx_ids = vzalloc(size);
230 		if (!tx_ring->free_tx_ids) {
231 			vfree(tx_ring->tx_buffer_info);
232 			return -ENOMEM;
233 		}
234 	}
235 
236 	/* Req id ring for TX out of order completions */
237 	for (i = 0; i < tx_ring->ring_size; i++)
238 		tx_ring->free_tx_ids[i] = i;
239 
240 	/* Reset tx statistics */
241 	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
242 
243 	tx_ring->next_to_use = 0;
244 	tx_ring->next_to_clean = 0;
245 	tx_ring->cpu = ena_irq->cpu;
246 	return 0;
247 }
248 
249 /* ena_free_tx_resources - Free I/O Tx Resources per Queue
250  * @adapter: network interface device structure
251  * @qid: queue index
252  *
253  * Free all transmit software resources
254  */
255 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
256 {
257 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
258 
259 	vfree(tx_ring->tx_buffer_info);
260 	tx_ring->tx_buffer_info = NULL;
261 
262 	vfree(tx_ring->free_tx_ids);
263 	tx_ring->free_tx_ids = NULL;
264 }
265 
266 /* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
267  * @adapter: private structure
268  *
269  * Return 0 on success, negative on failure
270  */
271 static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
272 {
273 	int i, rc = 0;
274 
275 	for (i = 0; i < adapter->num_queues; i++) {
276 		rc = ena_setup_tx_resources(adapter, i);
277 		if (rc)
278 			goto err_setup_tx;
279 	}
280 
281 	return 0;
282 
283 err_setup_tx:
284 
285 	netif_err(adapter, ifup, adapter->netdev,
286 		  "Tx queue %d: allocation failed\n", i);
287 
288 	/* rewind the index freeing the rings as we go */
289 	while (i--)
290 		ena_free_tx_resources(adapter, i);
291 	return rc;
292 }
293 
294 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
295  * @adapter: board private structure
296  *
297  * Free all transmit software resources
298  */
299 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
300 {
301 	int i;
302 
303 	for (i = 0; i < adapter->num_queues; i++)
304 		ena_free_tx_resources(adapter, i);
305 }
306 
307 static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
308 {
309 	if (likely(req_id < rx_ring->ring_size))
310 		return 0;
311 
312 	netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
313 		  "Invalid rx req_id: %hu\n", req_id);
314 
315 	u64_stats_update_begin(&rx_ring->syncp);
316 	rx_ring->rx_stats.bad_req_id++;
317 	u64_stats_update_end(&rx_ring->syncp);
318 
319 	/* Trigger device reset */
320 	rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
321 	set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
322 	return -EFAULT;
323 }
324 
325 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
326  * @adapter: network interface device structure
327  * @qid: queue index
328  *
329  * Returns 0 on success, negative on failure
330  */
331 static int ena_setup_rx_resources(struct ena_adapter *adapter,
332 				  u32 qid)
333 {
334 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
335 	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
336 	int size, node, i;
337 
338 	if (rx_ring->rx_buffer_info) {
339 		netif_err(adapter, ifup, adapter->netdev,
340 			  "rx_buffer_info is not NULL");
341 		return -EEXIST;
342 	}
343 
344 	/* alloc extra element so in rx path
345 	 * we can always prefetch rx_info + 1
346 	 */
347 	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
348 	node = cpu_to_node(ena_irq->cpu);
349 
350 	rx_ring->rx_buffer_info = vzalloc_node(size, node);
351 	if (!rx_ring->rx_buffer_info) {
352 		rx_ring->rx_buffer_info = vzalloc(size);
353 		if (!rx_ring->rx_buffer_info)
354 			return -ENOMEM;
355 	}
356 
357 	size = sizeof(u16) * rx_ring->ring_size;
358 	rx_ring->free_rx_ids = vzalloc_node(size, node);
359 	if (!rx_ring->free_rx_ids) {
360 		rx_ring->free_rx_ids = vzalloc(size);
361 		if (!rx_ring->free_rx_ids) {
362 			vfree(rx_ring->rx_buffer_info);
363 			return -ENOMEM;
364 		}
365 	}
366 
367 	/* Req id ring for receiving RX pkts out of order */
368 	for (i = 0; i < rx_ring->ring_size; i++)
369 		rx_ring->free_rx_ids[i] = i;
370 
371 	/* Reset rx statistics */
372 	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
373 
374 	rx_ring->next_to_clean = 0;
375 	rx_ring->next_to_use = 0;
376 	rx_ring->cpu = ena_irq->cpu;
377 
378 	return 0;
379 }
380 
381 /* ena_free_rx_resources - Free I/O Rx Resources
382  * @adapter: network interface device structure
383  * @qid: queue index
384  *
385  * Free all receive software resources
386  */
387 static void ena_free_rx_resources(struct ena_adapter *adapter,
388 				  u32 qid)
389 {
390 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
391 
392 	vfree(rx_ring->rx_buffer_info);
393 	rx_ring->rx_buffer_info = NULL;
394 
395 	vfree(rx_ring->free_rx_ids);
396 	rx_ring->free_rx_ids = NULL;
397 }
398 
399 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
400  * @adapter: board private structure
401  *
402  * Return 0 on success, negative on failure
403  */
404 static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
405 {
406 	int i, rc = 0;
407 
408 	for (i = 0; i < adapter->num_queues; i++) {
409 		rc = ena_setup_rx_resources(adapter, i);
410 		if (rc)
411 			goto err_setup_rx;
412 	}
413 
414 	return 0;
415 
416 err_setup_rx:
417 
418 	netif_err(adapter, ifup, adapter->netdev,
419 		  "Rx queue %d: allocation failed\n", i);
420 
421 	/* rewind the index freeing the rings as we go */
422 	while (i--)
423 		ena_free_rx_resources(adapter, i);
424 	return rc;
425 }
426 
427 /* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
428  * @adapter: board private structure
429  *
430  * Free all receive software resources
431  */
432 static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
433 {
434 	int i;
435 
436 	for (i = 0; i < adapter->num_queues; i++)
437 		ena_free_rx_resources(adapter, i);
438 }
439 
440 static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
441 				    struct ena_rx_buffer *rx_info, gfp_t gfp)
442 {
443 	struct ena_com_buf *ena_buf;
444 	struct page *page;
445 	dma_addr_t dma;
446 
447 	/* if previous allocated page is not used */
448 	if (unlikely(rx_info->page))
449 		return 0;
450 
451 	page = alloc_page(gfp);
452 	if (unlikely(!page)) {
453 		u64_stats_update_begin(&rx_ring->syncp);
454 		rx_ring->rx_stats.page_alloc_fail++;
455 		u64_stats_update_end(&rx_ring->syncp);
456 		return -ENOMEM;
457 	}
458 
459 	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
460 			   DMA_FROM_DEVICE);
461 	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
462 		u64_stats_update_begin(&rx_ring->syncp);
463 		rx_ring->rx_stats.dma_mapping_err++;
464 		u64_stats_update_end(&rx_ring->syncp);
465 
466 		__free_page(page);
467 		return -EIO;
468 	}
469 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
470 		  "alloc page %p, rx_info %p\n", page, rx_info);
471 
472 	rx_info->page = page;
473 	rx_info->page_offset = 0;
474 	ena_buf = &rx_info->ena_buf;
475 	ena_buf->paddr = dma;
476 	ena_buf->len = PAGE_SIZE;
477 
478 	return 0;
479 }
480 
481 static void ena_free_rx_page(struct ena_ring *rx_ring,
482 			     struct ena_rx_buffer *rx_info)
483 {
484 	struct page *page = rx_info->page;
485 	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
486 
487 	if (unlikely(!page)) {
488 		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
489 			   "Trying to free unallocated buffer\n");
490 		return;
491 	}
492 
493 	dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE,
494 		       DMA_FROM_DEVICE);
495 
496 	__free_page(page);
497 	rx_info->page = NULL;
498 }
499 
500 static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
501 {
502 	u16 next_to_use, req_id;
503 	u32 i;
504 	int rc;
505 
506 	next_to_use = rx_ring->next_to_use;
507 
508 	for (i = 0; i < num; i++) {
509 		struct ena_rx_buffer *rx_info;
510 
511 		req_id = rx_ring->free_rx_ids[next_to_use];
512 		rc = validate_rx_req_id(rx_ring, req_id);
513 		if (unlikely(rc < 0))
514 			break;
515 
516 		rx_info = &rx_ring->rx_buffer_info[req_id];
517 
518 
519 		rc = ena_alloc_rx_page(rx_ring, rx_info,
520 				       __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
521 		if (unlikely(rc < 0)) {
522 			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
523 				   "failed to alloc buffer for rx queue %d\n",
524 				   rx_ring->qid);
525 			break;
526 		}
527 		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
528 						&rx_info->ena_buf,
529 						req_id);
530 		if (unlikely(rc)) {
531 			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
532 				   "failed to add buffer for rx queue %d\n",
533 				   rx_ring->qid);
534 			break;
535 		}
536 		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
537 						   rx_ring->ring_size);
538 	}
539 
540 	if (unlikely(i < num)) {
541 		u64_stats_update_begin(&rx_ring->syncp);
542 		rx_ring->rx_stats.refil_partial++;
543 		u64_stats_update_end(&rx_ring->syncp);
544 		netdev_warn(rx_ring->netdev,
545 			    "refilled rx qid %d with only %d buffers (from %d)\n",
546 			    rx_ring->qid, i, num);
547 	}
548 
549 	if (likely(i)) {
550 		/* Add memory barrier to make sure the desc were written before
551 		 * issue a doorbell
552 		 */
553 		wmb();
554 		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
555 	}
556 
557 	rx_ring->next_to_use = next_to_use;
558 
559 	return i;
560 }
561 
562 static void ena_free_rx_bufs(struct ena_adapter *adapter,
563 			     u32 qid)
564 {
565 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
566 	u32 i;
567 
568 	for (i = 0; i < rx_ring->ring_size; i++) {
569 		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
570 
571 		if (rx_info->page)
572 			ena_free_rx_page(rx_ring, rx_info);
573 	}
574 }
575 
576 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
577  * @adapter: board private structure
578  *
579  */
580 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
581 {
582 	struct ena_ring *rx_ring;
583 	int i, rc, bufs_num;
584 
585 	for (i = 0; i < adapter->num_queues; i++) {
586 		rx_ring = &adapter->rx_ring[i];
587 		bufs_num = rx_ring->ring_size - 1;
588 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
589 
590 		if (unlikely(rc != bufs_num))
591 			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
592 				   "refilling Queue %d failed. allocated %d buffers from: %d\n",
593 				   i, rc, bufs_num);
594 	}
595 }
596 
597 static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
598 {
599 	int i;
600 
601 	for (i = 0; i < adapter->num_queues; i++)
602 		ena_free_rx_bufs(adapter, i);
603 }
604 
605 /* ena_free_tx_bufs - Free Tx Buffers per Queue
606  * @tx_ring: TX ring for which buffers be freed
607  */
608 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
609 {
610 	bool print_once = true;
611 	u32 i;
612 
613 	for (i = 0; i < tx_ring->ring_size; i++) {
614 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
615 		struct ena_com_buf *ena_buf;
616 		int nr_frags;
617 		int j;
618 
619 		if (!tx_info->skb)
620 			continue;
621 
622 		if (print_once) {
623 			netdev_notice(tx_ring->netdev,
624 				      "free uncompleted tx skb qid %d idx 0x%x\n",
625 				      tx_ring->qid, i);
626 			print_once = false;
627 		} else {
628 			netdev_dbg(tx_ring->netdev,
629 				   "free uncompleted tx skb qid %d idx 0x%x\n",
630 				   tx_ring->qid, i);
631 		}
632 
633 		ena_buf = tx_info->bufs;
634 		dma_unmap_single(tx_ring->dev,
635 				 ena_buf->paddr,
636 				 ena_buf->len,
637 				 DMA_TO_DEVICE);
638 
639 		/* unmap remaining mapped pages */
640 		nr_frags = tx_info->num_of_bufs - 1;
641 		for (j = 0; j < nr_frags; j++) {
642 			ena_buf++;
643 			dma_unmap_page(tx_ring->dev,
644 				       ena_buf->paddr,
645 				       ena_buf->len,
646 				       DMA_TO_DEVICE);
647 		}
648 
649 		dev_kfree_skb_any(tx_info->skb);
650 	}
651 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
652 						  tx_ring->qid));
653 }
654 
655 static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
656 {
657 	struct ena_ring *tx_ring;
658 	int i;
659 
660 	for (i = 0; i < adapter->num_queues; i++) {
661 		tx_ring = &adapter->tx_ring[i];
662 		ena_free_tx_bufs(tx_ring);
663 	}
664 }
665 
666 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
667 {
668 	u16 ena_qid;
669 	int i;
670 
671 	for (i = 0; i < adapter->num_queues; i++) {
672 		ena_qid = ENA_IO_TXQ_IDX(i);
673 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
674 	}
675 }
676 
677 static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
678 {
679 	u16 ena_qid;
680 	int i;
681 
682 	for (i = 0; i < adapter->num_queues; i++) {
683 		ena_qid = ENA_IO_RXQ_IDX(i);
684 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
685 	}
686 }
687 
688 static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
689 {
690 	ena_destroy_all_tx_queues(adapter);
691 	ena_destroy_all_rx_queues(adapter);
692 }
693 
694 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
695 {
696 	struct ena_tx_buffer *tx_info = NULL;
697 
698 	if (likely(req_id < tx_ring->ring_size)) {
699 		tx_info = &tx_ring->tx_buffer_info[req_id];
700 		if (likely(tx_info->skb))
701 			return 0;
702 	}
703 
704 	if (tx_info)
705 		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
706 			  "tx_info doesn't have valid skb\n");
707 	else
708 		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
709 			  "Invalid req_id: %hu\n", req_id);
710 
711 	u64_stats_update_begin(&tx_ring->syncp);
712 	tx_ring->tx_stats.bad_req_id++;
713 	u64_stats_update_end(&tx_ring->syncp);
714 
715 	/* Trigger device reset */
716 	tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
717 	set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
718 	return -EFAULT;
719 }
720 
721 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
722 {
723 	struct netdev_queue *txq;
724 	bool above_thresh;
725 	u32 tx_bytes = 0;
726 	u32 total_done = 0;
727 	u16 next_to_clean;
728 	u16 req_id;
729 	int tx_pkts = 0;
730 	int rc;
731 
732 	next_to_clean = tx_ring->next_to_clean;
733 	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
734 
735 	while (tx_pkts < budget) {
736 		struct ena_tx_buffer *tx_info;
737 		struct sk_buff *skb;
738 		struct ena_com_buf *ena_buf;
739 		int i, nr_frags;
740 
741 		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
742 						&req_id);
743 		if (rc)
744 			break;
745 
746 		rc = validate_tx_req_id(tx_ring, req_id);
747 		if (rc)
748 			break;
749 
750 		tx_info = &tx_ring->tx_buffer_info[req_id];
751 		skb = tx_info->skb;
752 
753 		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
754 		prefetch(&skb->end);
755 
756 		tx_info->skb = NULL;
757 		tx_info->last_jiffies = 0;
758 
759 		if (likely(tx_info->num_of_bufs != 0)) {
760 			ena_buf = tx_info->bufs;
761 
762 			dma_unmap_single(tx_ring->dev,
763 					 dma_unmap_addr(ena_buf, paddr),
764 					 dma_unmap_len(ena_buf, len),
765 					 DMA_TO_DEVICE);
766 
767 			/* unmap remaining mapped pages */
768 			nr_frags = tx_info->num_of_bufs - 1;
769 			for (i = 0; i < nr_frags; i++) {
770 				ena_buf++;
771 				dma_unmap_page(tx_ring->dev,
772 					       dma_unmap_addr(ena_buf, paddr),
773 					       dma_unmap_len(ena_buf, len),
774 					       DMA_TO_DEVICE);
775 			}
776 		}
777 
778 		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
779 			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
780 			  skb);
781 
782 		tx_bytes += skb->len;
783 		dev_kfree_skb(skb);
784 		tx_pkts++;
785 		total_done += tx_info->tx_descs;
786 
787 		tx_ring->free_tx_ids[next_to_clean] = req_id;
788 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
789 						     tx_ring->ring_size);
790 	}
791 
792 	tx_ring->next_to_clean = next_to_clean;
793 	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
794 	ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
795 
796 	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
797 
798 	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
799 		  "tx_poll: q %d done. total pkts: %d\n",
800 		  tx_ring->qid, tx_pkts);
801 
802 	/* need to make the rings circular update visible to
803 	 * ena_start_xmit() before checking for netif_queue_stopped().
804 	 */
805 	smp_mb();
806 
807 	above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
808 		ENA_TX_WAKEUP_THRESH;
809 	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
810 		__netif_tx_lock(txq, smp_processor_id());
811 		above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
812 			ENA_TX_WAKEUP_THRESH;
813 		if (netif_tx_queue_stopped(txq) && above_thresh) {
814 			netif_tx_wake_queue(txq);
815 			u64_stats_update_begin(&tx_ring->syncp);
816 			tx_ring->tx_stats.queue_wakeup++;
817 			u64_stats_update_end(&tx_ring->syncp);
818 		}
819 		__netif_tx_unlock(txq);
820 	}
821 
822 	tx_ring->per_napi_bytes += tx_bytes;
823 	tx_ring->per_napi_packets += tx_pkts;
824 
825 	return tx_pkts;
826 }
827 
828 static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
829 {
830 	struct sk_buff *skb;
831 
832 	if (frags)
833 		skb = napi_get_frags(rx_ring->napi);
834 	else
835 		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
836 						rx_ring->rx_copybreak);
837 
838 	if (unlikely(!skb)) {
839 		u64_stats_update_begin(&rx_ring->syncp);
840 		rx_ring->rx_stats.skb_alloc_fail++;
841 		u64_stats_update_end(&rx_ring->syncp);
842 		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
843 			  "Failed to allocate skb. frags: %d\n", frags);
844 		return NULL;
845 	}
846 
847 	return skb;
848 }
849 
850 static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
851 				  struct ena_com_rx_buf_info *ena_bufs,
852 				  u32 descs,
853 				  u16 *next_to_clean)
854 {
855 	struct sk_buff *skb;
856 	struct ena_rx_buffer *rx_info;
857 	u16 len, req_id, buf = 0;
858 	void *va;
859 
860 	len = ena_bufs[buf].len;
861 	req_id = ena_bufs[buf].req_id;
862 	rx_info = &rx_ring->rx_buffer_info[req_id];
863 
864 	if (unlikely(!rx_info->page)) {
865 		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
866 			  "Page is NULL\n");
867 		return NULL;
868 	}
869 
870 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
871 		  "rx_info %p page %p\n",
872 		  rx_info, rx_info->page);
873 
874 	/* save virt address of first buffer */
875 	va = page_address(rx_info->page) + rx_info->page_offset;
876 	prefetch(va + NET_IP_ALIGN);
877 
878 	if (len <= rx_ring->rx_copybreak) {
879 		skb = ena_alloc_skb(rx_ring, false);
880 		if (unlikely(!skb))
881 			return NULL;
882 
883 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
884 			  "rx allocated small packet. len %d. data_len %d\n",
885 			  skb->len, skb->data_len);
886 
887 		/* sync this buffer for CPU use */
888 		dma_sync_single_for_cpu(rx_ring->dev,
889 					dma_unmap_addr(&rx_info->ena_buf, paddr),
890 					len,
891 					DMA_FROM_DEVICE);
892 		skb_copy_to_linear_data(skb, va, len);
893 		dma_sync_single_for_device(rx_ring->dev,
894 					   dma_unmap_addr(&rx_info->ena_buf, paddr),
895 					   len,
896 					   DMA_FROM_DEVICE);
897 
898 		skb_put(skb, len);
899 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
900 		rx_ring->free_rx_ids[*next_to_clean] = req_id;
901 		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
902 						     rx_ring->ring_size);
903 		return skb;
904 	}
905 
906 	skb = ena_alloc_skb(rx_ring, true);
907 	if (unlikely(!skb))
908 		return NULL;
909 
910 	do {
911 		dma_unmap_page(rx_ring->dev,
912 			       dma_unmap_addr(&rx_info->ena_buf, paddr),
913 			       PAGE_SIZE, DMA_FROM_DEVICE);
914 
915 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
916 				rx_info->page_offset, len, PAGE_SIZE);
917 
918 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
919 			  "rx skb updated. len %d. data_len %d\n",
920 			  skb->len, skb->data_len);
921 
922 		rx_info->page = NULL;
923 
924 		rx_ring->free_rx_ids[*next_to_clean] = req_id;
925 		*next_to_clean =
926 			ENA_RX_RING_IDX_NEXT(*next_to_clean,
927 					     rx_ring->ring_size);
928 		if (likely(--descs == 0))
929 			break;
930 
931 		buf++;
932 		len = ena_bufs[buf].len;
933 		req_id = ena_bufs[buf].req_id;
934 		rx_info = &rx_ring->rx_buffer_info[req_id];
935 	} while (1);
936 
937 	return skb;
938 }
939 
940 /* ena_rx_checksum - indicate in skb if hw indicated a good cksum
941  * @adapter: structure containing adapter specific data
942  * @ena_rx_ctx: received packet context/metadata
943  * @skb: skb currently being received and modified
944  */
945 static inline void ena_rx_checksum(struct ena_ring *rx_ring,
946 				   struct ena_com_rx_ctx *ena_rx_ctx,
947 				   struct sk_buff *skb)
948 {
949 	/* Rx csum disabled */
950 	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
951 		skb->ip_summed = CHECKSUM_NONE;
952 		return;
953 	}
954 
955 	/* For fragmented packets the checksum isn't valid */
956 	if (ena_rx_ctx->frag) {
957 		skb->ip_summed = CHECKSUM_NONE;
958 		return;
959 	}
960 
961 	/* if IP and error */
962 	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
963 		     (ena_rx_ctx->l3_csum_err))) {
964 		/* ipv4 checksum error */
965 		skb->ip_summed = CHECKSUM_NONE;
966 		u64_stats_update_begin(&rx_ring->syncp);
967 		rx_ring->rx_stats.bad_csum++;
968 		u64_stats_update_end(&rx_ring->syncp);
969 		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
970 			  "RX IPv4 header checksum error\n");
971 		return;
972 	}
973 
974 	/* if TCP/UDP */
975 	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
976 		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
977 		if (unlikely(ena_rx_ctx->l4_csum_err)) {
978 			/* TCP/UDP checksum error */
979 			u64_stats_update_begin(&rx_ring->syncp);
980 			rx_ring->rx_stats.bad_csum++;
981 			u64_stats_update_end(&rx_ring->syncp);
982 			netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
983 				  "RX L4 checksum error\n");
984 			skb->ip_summed = CHECKSUM_NONE;
985 			return;
986 		}
987 
988 		skb->ip_summed = CHECKSUM_UNNECESSARY;
989 	}
990 }
991 
992 static void ena_set_rx_hash(struct ena_ring *rx_ring,
993 			    struct ena_com_rx_ctx *ena_rx_ctx,
994 			    struct sk_buff *skb)
995 {
996 	enum pkt_hash_types hash_type;
997 
998 	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
999 		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1000 			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1001 
1002 			hash_type = PKT_HASH_TYPE_L4;
1003 		else
1004 			hash_type = PKT_HASH_TYPE_NONE;
1005 
1006 		/* Override hash type if the packet is fragmented */
1007 		if (ena_rx_ctx->frag)
1008 			hash_type = PKT_HASH_TYPE_NONE;
1009 
1010 		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1011 	}
1012 }
1013 
1014 /* ena_clean_rx_irq - Cleanup RX irq
1015  * @rx_ring: RX ring to clean
1016  * @napi: napi handler
1017  * @budget: how many packets driver is allowed to clean
1018  *
1019  * Returns the number of cleaned buffers.
1020  */
1021 static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1022 			    u32 budget)
1023 {
1024 	u16 next_to_clean = rx_ring->next_to_clean;
1025 	u32 res_budget, work_done;
1026 
1027 	struct ena_com_rx_ctx ena_rx_ctx;
1028 	struct ena_adapter *adapter;
1029 	struct sk_buff *skb;
1030 	int refill_required;
1031 	int refill_threshold;
1032 	int rc = 0;
1033 	int total_len = 0;
1034 	int rx_copybreak_pkt = 0;
1035 	int i;
1036 
1037 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1038 		  "%s qid %d\n", __func__, rx_ring->qid);
1039 	res_budget = budget;
1040 
1041 	do {
1042 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1043 		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1044 		ena_rx_ctx.descs = 0;
1045 		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1046 				    rx_ring->ena_com_io_sq,
1047 				    &ena_rx_ctx);
1048 		if (unlikely(rc))
1049 			goto error;
1050 
1051 		if (unlikely(ena_rx_ctx.descs == 0))
1052 			break;
1053 
1054 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1055 			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1056 			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1057 			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1058 
1059 		/* allocate skb and fill it */
1060 		skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
1061 				 &next_to_clean);
1062 
1063 		/* exit if we failed to retrieve a buffer */
1064 		if (unlikely(!skb)) {
1065 			for (i = 0; i < ena_rx_ctx.descs; i++) {
1066 				rx_ring->free_tx_ids[next_to_clean] =
1067 					rx_ring->ena_bufs[i].req_id;
1068 				next_to_clean =
1069 					ENA_RX_RING_IDX_NEXT(next_to_clean,
1070 							     rx_ring->ring_size);
1071 			}
1072 			break;
1073 		}
1074 
1075 		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1076 
1077 		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1078 
1079 		skb_record_rx_queue(skb, rx_ring->qid);
1080 
1081 		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
1082 			total_len += rx_ring->ena_bufs[0].len;
1083 			rx_copybreak_pkt++;
1084 			napi_gro_receive(napi, skb);
1085 		} else {
1086 			total_len += skb->len;
1087 			napi_gro_frags(napi);
1088 		}
1089 
1090 		res_budget--;
1091 	} while (likely(res_budget));
1092 
1093 	work_done = budget - res_budget;
1094 	rx_ring->per_napi_bytes += total_len;
1095 	rx_ring->per_napi_packets += work_done;
1096 	u64_stats_update_begin(&rx_ring->syncp);
1097 	rx_ring->rx_stats.bytes += total_len;
1098 	rx_ring->rx_stats.cnt += work_done;
1099 	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1100 	u64_stats_update_end(&rx_ring->syncp);
1101 
1102 	rx_ring->next_to_clean = next_to_clean;
1103 
1104 	refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
1105 	refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
1106 
1107 	/* Optimization, try to batch new rx buffers */
1108 	if (refill_required > refill_threshold) {
1109 		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1110 		ena_refill_rx_bufs(rx_ring, refill_required);
1111 	}
1112 
1113 	return work_done;
1114 
1115 error:
1116 	adapter = netdev_priv(rx_ring->netdev);
1117 
1118 	u64_stats_update_begin(&rx_ring->syncp);
1119 	rx_ring->rx_stats.bad_desc_num++;
1120 	u64_stats_update_end(&rx_ring->syncp);
1121 
1122 	/* Too many desc from the device. Trigger reset */
1123 	adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1124 	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
1125 
1126 	return 0;
1127 }
1128 
1129 inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
1130 				       struct ena_ring *tx_ring)
1131 {
1132 	/* We apply adaptive moderation on Rx path only.
1133 	 * Tx uses static interrupt moderation.
1134 	 */
1135 	ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
1136 					  rx_ring->per_napi_packets,
1137 					  rx_ring->per_napi_bytes,
1138 					  &rx_ring->smoothed_interval,
1139 					  &rx_ring->moder_tbl_idx);
1140 
1141 	/* Reset per napi packets/bytes */
1142 	tx_ring->per_napi_packets = 0;
1143 	tx_ring->per_napi_bytes = 0;
1144 	rx_ring->per_napi_packets = 0;
1145 	rx_ring->per_napi_bytes = 0;
1146 }
1147 
1148 static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
1149 					struct ena_ring *rx_ring)
1150 {
1151 	struct ena_eth_io_intr_reg intr_reg;
1152 
1153 	/* Update intr register: rx intr delay,
1154 	 * tx intr delay and interrupt unmask
1155 	 */
1156 	ena_com_update_intr_reg(&intr_reg,
1157 				rx_ring->smoothed_interval,
1158 				tx_ring->smoothed_interval,
1159 				true);
1160 
1161 	/* It is a shared MSI-X.
1162 	 * Tx and Rx CQ have pointer to it.
1163 	 * So we use one of them to reach the intr reg
1164 	 */
1165 	ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
1166 }
1167 
1168 static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1169 					     struct ena_ring *rx_ring)
1170 {
1171 	int cpu = get_cpu();
1172 	int numa_node;
1173 
1174 	/* Check only one ring since the 2 rings are running on the same cpu */
1175 	if (likely(tx_ring->cpu == cpu))
1176 		goto out;
1177 
1178 	numa_node = cpu_to_node(cpu);
1179 	put_cpu();
1180 
1181 	if (numa_node != NUMA_NO_NODE) {
1182 		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1183 		ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
1184 	}
1185 
1186 	tx_ring->cpu = cpu;
1187 	rx_ring->cpu = cpu;
1188 
1189 	return;
1190 out:
1191 	put_cpu();
1192 }
1193 
1194 static int ena_io_poll(struct napi_struct *napi, int budget)
1195 {
1196 	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1197 	struct ena_ring *tx_ring, *rx_ring;
1198 
1199 	u32 tx_work_done;
1200 	u32 rx_work_done;
1201 	int tx_budget;
1202 	int napi_comp_call = 0;
1203 	int ret;
1204 
1205 	tx_ring = ena_napi->tx_ring;
1206 	rx_ring = ena_napi->rx_ring;
1207 
1208 	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1209 
1210 	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1211 	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1212 		napi_complete_done(napi, 0);
1213 		return 0;
1214 	}
1215 
1216 	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1217 	rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1218 
1219 	/* If the device is about to reset or down, avoid unmask
1220 	 * the interrupt and return 0 so NAPI won't reschedule
1221 	 */
1222 	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1223 		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1224 		napi_complete_done(napi, 0);
1225 		ret = 0;
1226 
1227 	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1228 		napi_comp_call = 1;
1229 
1230 		/* Update numa and unmask the interrupt only when schedule
1231 		 * from the interrupt context (vs from sk_busy_loop)
1232 		 */
1233 		if (napi_complete_done(napi, rx_work_done)) {
1234 			/* Tx and Rx share the same interrupt vector */
1235 			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1236 				ena_adjust_intr_moderation(rx_ring, tx_ring);
1237 
1238 			ena_unmask_interrupt(tx_ring, rx_ring);
1239 		}
1240 
1241 		ena_update_ring_numa_node(tx_ring, rx_ring);
1242 
1243 		ret = rx_work_done;
1244 	} else {
1245 		ret = budget;
1246 	}
1247 
1248 	u64_stats_update_begin(&tx_ring->syncp);
1249 	tx_ring->tx_stats.napi_comp += napi_comp_call;
1250 	tx_ring->tx_stats.tx_poll++;
1251 	u64_stats_update_end(&tx_ring->syncp);
1252 
1253 	return ret;
1254 }
1255 
1256 static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1257 {
1258 	struct ena_adapter *adapter = (struct ena_adapter *)data;
1259 
1260 	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1261 
1262 	/* Don't call the aenq handler before probe is done */
1263 	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1264 		ena_com_aenq_intr_handler(adapter->ena_dev, data);
1265 
1266 	return IRQ_HANDLED;
1267 }
1268 
1269 /* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1270  * @irq: interrupt number
1271  * @data: pointer to a network interface private napi device structure
1272  */
1273 static irqreturn_t ena_intr_msix_io(int irq, void *data)
1274 {
1275 	struct ena_napi *ena_napi = data;
1276 
1277 	napi_schedule_irqoff(&ena_napi->napi);
1278 
1279 	return IRQ_HANDLED;
1280 }
1281 
1282 /* Reserve a single MSI-X vector for management (admin + aenq).
1283  * plus reserve one vector for each potential io queue.
1284  * the number of potential io queues is the minimum of what the device
1285  * supports and the number of vCPUs.
1286  */
1287 static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
1288 {
1289 	int msix_vecs, irq_cnt;
1290 
1291 	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1292 		netif_err(adapter, probe, adapter->netdev,
1293 			  "Error, MSI-X is already enabled\n");
1294 		return -EPERM;
1295 	}
1296 
1297 	/* Reserved the max msix vectors we might need */
1298 	msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
1299 
1300 	netif_dbg(adapter, probe, adapter->netdev,
1301 		  "trying to enable MSI-X, vectors %d\n", msix_vecs);
1302 
1303 	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1304 					msix_vecs, PCI_IRQ_MSIX);
1305 
1306 	if (irq_cnt < 0) {
1307 		netif_err(adapter, probe, adapter->netdev,
1308 			  "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1309 		return -ENOSPC;
1310 	}
1311 
1312 	if (irq_cnt != msix_vecs) {
1313 		netif_notice(adapter, probe, adapter->netdev,
1314 			     "enable only %d MSI-X (out of %d), reduce the number of queues\n",
1315 			     irq_cnt, msix_vecs);
1316 		adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1317 	}
1318 
1319 	if (ena_init_rx_cpu_rmap(adapter))
1320 		netif_warn(adapter, probe, adapter->netdev,
1321 			   "Failed to map IRQs to CPUs\n");
1322 
1323 	adapter->msix_vecs = irq_cnt;
1324 	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1325 
1326 	return 0;
1327 }
1328 
1329 static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1330 {
1331 	u32 cpu;
1332 
1333 	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1334 		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1335 		 pci_name(adapter->pdev));
1336 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
1337 		ena_intr_msix_mgmnt;
1338 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1339 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1340 		pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
1341 	cpu = cpumask_first(cpu_online_mask);
1342 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
1343 	cpumask_set_cpu(cpu,
1344 			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
1345 }
1346 
1347 static void ena_setup_io_intr(struct ena_adapter *adapter)
1348 {
1349 	struct net_device *netdev;
1350 	int irq_idx, i, cpu;
1351 
1352 	netdev = adapter->netdev;
1353 
1354 	for (i = 0; i < adapter->num_queues; i++) {
1355 		irq_idx = ENA_IO_IRQ_IDX(i);
1356 		cpu = i % num_online_cpus();
1357 
1358 		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1359 			 "%s-Tx-Rx-%d", netdev->name, i);
1360 		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
1361 		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
1362 		adapter->irq_tbl[irq_idx].vector =
1363 			pci_irq_vector(adapter->pdev, irq_idx);
1364 		adapter->irq_tbl[irq_idx].cpu = cpu;
1365 
1366 		cpumask_set_cpu(cpu,
1367 				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
1368 	}
1369 }
1370 
1371 static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
1372 {
1373 	unsigned long flags = 0;
1374 	struct ena_irq *irq;
1375 	int rc;
1376 
1377 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1378 	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1379 			 irq->data);
1380 	if (rc) {
1381 		netif_err(adapter, probe, adapter->netdev,
1382 			  "failed to request admin irq\n");
1383 		return rc;
1384 	}
1385 
1386 	netif_dbg(adapter, probe, adapter->netdev,
1387 		  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
1388 		  irq->affinity_hint_mask.bits[0], irq->vector);
1389 
1390 	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1391 
1392 	return rc;
1393 }
1394 
1395 static int ena_request_io_irq(struct ena_adapter *adapter)
1396 {
1397 	unsigned long flags = 0;
1398 	struct ena_irq *irq;
1399 	int rc = 0, i, k;
1400 
1401 	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1402 		netif_err(adapter, ifup, adapter->netdev,
1403 			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
1404 		return -EINVAL;
1405 	}
1406 
1407 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1408 		irq = &adapter->irq_tbl[i];
1409 		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1410 				 irq->data);
1411 		if (rc) {
1412 			netif_err(adapter, ifup, adapter->netdev,
1413 				  "Failed to request I/O IRQ. index %d rc %d\n",
1414 				   i, rc);
1415 			goto err;
1416 		}
1417 
1418 		netif_dbg(adapter, ifup, adapter->netdev,
1419 			  "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
1420 			  i, irq->affinity_hint_mask.bits[0], irq->vector);
1421 
1422 		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1423 	}
1424 
1425 	return rc;
1426 
1427 err:
1428 	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
1429 		irq = &adapter->irq_tbl[k];
1430 		free_irq(irq->vector, irq->data);
1431 	}
1432 
1433 	return rc;
1434 }
1435 
1436 static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
1437 {
1438 	struct ena_irq *irq;
1439 
1440 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1441 	synchronize_irq(irq->vector);
1442 	irq_set_affinity_hint(irq->vector, NULL);
1443 	free_irq(irq->vector, irq->data);
1444 }
1445 
1446 static void ena_free_io_irq(struct ena_adapter *adapter)
1447 {
1448 	struct ena_irq *irq;
1449 	int i;
1450 
1451 #ifdef CONFIG_RFS_ACCEL
1452 	if (adapter->msix_vecs >= 1) {
1453 		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
1454 		adapter->netdev->rx_cpu_rmap = NULL;
1455 	}
1456 #endif /* CONFIG_RFS_ACCEL */
1457 
1458 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1459 		irq = &adapter->irq_tbl[i];
1460 		irq_set_affinity_hint(irq->vector, NULL);
1461 		free_irq(irq->vector, irq->data);
1462 	}
1463 }
1464 
1465 static void ena_disable_msix(struct ena_adapter *adapter)
1466 {
1467 	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
1468 		pci_free_irq_vectors(adapter->pdev);
1469 }
1470 
1471 static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
1472 {
1473 	int i;
1474 
1475 	if (!netif_running(adapter->netdev))
1476 		return;
1477 
1478 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
1479 		synchronize_irq(adapter->irq_tbl[i].vector);
1480 }
1481 
1482 static void ena_del_napi(struct ena_adapter *adapter)
1483 {
1484 	int i;
1485 
1486 	for (i = 0; i < adapter->num_queues; i++)
1487 		netif_napi_del(&adapter->ena_napi[i].napi);
1488 }
1489 
1490 static void ena_init_napi(struct ena_adapter *adapter)
1491 {
1492 	struct ena_napi *napi;
1493 	int i;
1494 
1495 	for (i = 0; i < adapter->num_queues; i++) {
1496 		napi = &adapter->ena_napi[i];
1497 
1498 		netif_napi_add(adapter->netdev,
1499 			       &adapter->ena_napi[i].napi,
1500 			       ena_io_poll,
1501 			       ENA_NAPI_BUDGET);
1502 		napi->rx_ring = &adapter->rx_ring[i];
1503 		napi->tx_ring = &adapter->tx_ring[i];
1504 		napi->qid = i;
1505 	}
1506 }
1507 
1508 static void ena_napi_disable_all(struct ena_adapter *adapter)
1509 {
1510 	int i;
1511 
1512 	for (i = 0; i < adapter->num_queues; i++)
1513 		napi_disable(&adapter->ena_napi[i].napi);
1514 }
1515 
1516 static void ena_napi_enable_all(struct ena_adapter *adapter)
1517 {
1518 	int i;
1519 
1520 	for (i = 0; i < adapter->num_queues; i++)
1521 		napi_enable(&adapter->ena_napi[i].napi);
1522 }
1523 
1524 static void ena_restore_ethtool_params(struct ena_adapter *adapter)
1525 {
1526 	adapter->tx_usecs = 0;
1527 	adapter->rx_usecs = 0;
1528 	adapter->tx_frames = 1;
1529 	adapter->rx_frames = 1;
1530 }
1531 
1532 /* Configure the Rx forwarding */
1533 static int ena_rss_configure(struct ena_adapter *adapter)
1534 {
1535 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1536 	int rc;
1537 
1538 	/* In case the RSS table wasn't initialized by probe */
1539 	if (!ena_dev->rss.tbl_log_size) {
1540 		rc = ena_rss_init_default(adapter);
1541 		if (rc && (rc != -EOPNOTSUPP)) {
1542 			netif_err(adapter, ifup, adapter->netdev,
1543 				  "Failed to init RSS rc: %d\n", rc);
1544 			return rc;
1545 		}
1546 	}
1547 
1548 	/* Set indirect table */
1549 	rc = ena_com_indirect_table_set(ena_dev);
1550 	if (unlikely(rc && rc != -EOPNOTSUPP))
1551 		return rc;
1552 
1553 	/* Configure hash function (if supported) */
1554 	rc = ena_com_set_hash_function(ena_dev);
1555 	if (unlikely(rc && (rc != -EOPNOTSUPP)))
1556 		return rc;
1557 
1558 	/* Configure hash inputs (if supported) */
1559 	rc = ena_com_set_hash_ctrl(ena_dev);
1560 	if (unlikely(rc && (rc != -EOPNOTSUPP)))
1561 		return rc;
1562 
1563 	return 0;
1564 }
1565 
1566 static int ena_up_complete(struct ena_adapter *adapter)
1567 {
1568 	int rc, i;
1569 
1570 	rc = ena_rss_configure(adapter);
1571 	if (rc)
1572 		return rc;
1573 
1574 	ena_init_napi(adapter);
1575 
1576 	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
1577 
1578 	ena_refill_all_rx_bufs(adapter);
1579 
1580 	/* enable transmits */
1581 	netif_tx_start_all_queues(adapter->netdev);
1582 
1583 	ena_restore_ethtool_params(adapter);
1584 
1585 	ena_napi_enable_all(adapter);
1586 
1587 	/* Enable completion queues interrupt */
1588 	for (i = 0; i < adapter->num_queues; i++)
1589 		ena_unmask_interrupt(&adapter->tx_ring[i],
1590 				     &adapter->rx_ring[i]);
1591 
1592 	/* schedule napi in case we had pending packets
1593 	 * from the last time we disable napi
1594 	 */
1595 	for (i = 0; i < adapter->num_queues; i++)
1596 		napi_schedule(&adapter->ena_napi[i].napi);
1597 
1598 	return 0;
1599 }
1600 
1601 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
1602 {
1603 	struct ena_com_create_io_ctx ctx = { 0 };
1604 	struct ena_com_dev *ena_dev;
1605 	struct ena_ring *tx_ring;
1606 	u32 msix_vector;
1607 	u16 ena_qid;
1608 	int rc;
1609 
1610 	ena_dev = adapter->ena_dev;
1611 
1612 	tx_ring = &adapter->tx_ring[qid];
1613 	msix_vector = ENA_IO_IRQ_IDX(qid);
1614 	ena_qid = ENA_IO_TXQ_IDX(qid);
1615 
1616 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1617 	ctx.qid = ena_qid;
1618 	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1619 	ctx.msix_vector = msix_vector;
1620 	ctx.queue_size = adapter->tx_ring_size;
1621 	ctx.numa_node = cpu_to_node(tx_ring->cpu);
1622 
1623 	rc = ena_com_create_io_queue(ena_dev, &ctx);
1624 	if (rc) {
1625 		netif_err(adapter, ifup, adapter->netdev,
1626 			  "Failed to create I/O TX queue num %d rc: %d\n",
1627 			  qid, rc);
1628 		return rc;
1629 	}
1630 
1631 	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1632 				     &tx_ring->ena_com_io_sq,
1633 				     &tx_ring->ena_com_io_cq);
1634 	if (rc) {
1635 		netif_err(adapter, ifup, adapter->netdev,
1636 			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
1637 			  qid, rc);
1638 		ena_com_destroy_io_queue(ena_dev, ena_qid);
1639 		return rc;
1640 	}
1641 
1642 	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
1643 	return rc;
1644 }
1645 
1646 static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
1647 {
1648 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1649 	int rc, i;
1650 
1651 	for (i = 0; i < adapter->num_queues; i++) {
1652 		rc = ena_create_io_tx_queue(adapter, i);
1653 		if (rc)
1654 			goto create_err;
1655 	}
1656 
1657 	return 0;
1658 
1659 create_err:
1660 	while (i--)
1661 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1662 
1663 	return rc;
1664 }
1665 
1666 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
1667 {
1668 	struct ena_com_dev *ena_dev;
1669 	struct ena_com_create_io_ctx ctx = { 0 };
1670 	struct ena_ring *rx_ring;
1671 	u32 msix_vector;
1672 	u16 ena_qid;
1673 	int rc;
1674 
1675 	ena_dev = adapter->ena_dev;
1676 
1677 	rx_ring = &adapter->rx_ring[qid];
1678 	msix_vector = ENA_IO_IRQ_IDX(qid);
1679 	ena_qid = ENA_IO_RXQ_IDX(qid);
1680 
1681 	ctx.qid = ena_qid;
1682 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1683 	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1684 	ctx.msix_vector = msix_vector;
1685 	ctx.queue_size = adapter->rx_ring_size;
1686 	ctx.numa_node = cpu_to_node(rx_ring->cpu);
1687 
1688 	rc = ena_com_create_io_queue(ena_dev, &ctx);
1689 	if (rc) {
1690 		netif_err(adapter, ifup, adapter->netdev,
1691 			  "Failed to create I/O RX queue num %d rc: %d\n",
1692 			  qid, rc);
1693 		return rc;
1694 	}
1695 
1696 	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1697 				     &rx_ring->ena_com_io_sq,
1698 				     &rx_ring->ena_com_io_cq);
1699 	if (rc) {
1700 		netif_err(adapter, ifup, adapter->netdev,
1701 			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
1702 			  qid, rc);
1703 		ena_com_destroy_io_queue(ena_dev, ena_qid);
1704 		return rc;
1705 	}
1706 
1707 	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
1708 
1709 	return rc;
1710 }
1711 
1712 static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
1713 {
1714 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1715 	int rc, i;
1716 
1717 	for (i = 0; i < adapter->num_queues; i++) {
1718 		rc = ena_create_io_rx_queue(adapter, i);
1719 		if (rc)
1720 			goto create_err;
1721 	}
1722 
1723 	return 0;
1724 
1725 create_err:
1726 	while (i--)
1727 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1728 
1729 	return rc;
1730 }
1731 
1732 static int ena_up(struct ena_adapter *adapter)
1733 {
1734 	int rc;
1735 
1736 	netdev_dbg(adapter->netdev, "%s\n", __func__);
1737 
1738 	ena_setup_io_intr(adapter);
1739 
1740 	rc = ena_request_io_irq(adapter);
1741 	if (rc)
1742 		goto err_req_irq;
1743 
1744 	/* allocate transmit descriptors */
1745 	rc = ena_setup_all_tx_resources(adapter);
1746 	if (rc)
1747 		goto err_setup_tx;
1748 
1749 	/* allocate receive descriptors */
1750 	rc = ena_setup_all_rx_resources(adapter);
1751 	if (rc)
1752 		goto err_setup_rx;
1753 
1754 	/* Create TX queues */
1755 	rc = ena_create_all_io_tx_queues(adapter);
1756 	if (rc)
1757 		goto err_create_tx_queues;
1758 
1759 	/* Create RX queues */
1760 	rc = ena_create_all_io_rx_queues(adapter);
1761 	if (rc)
1762 		goto err_create_rx_queues;
1763 
1764 	rc = ena_up_complete(adapter);
1765 	if (rc)
1766 		goto err_up;
1767 
1768 	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
1769 		netif_carrier_on(adapter->netdev);
1770 
1771 	u64_stats_update_begin(&adapter->syncp);
1772 	adapter->dev_stats.interface_up++;
1773 	u64_stats_update_end(&adapter->syncp);
1774 
1775 	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1776 
1777 	return rc;
1778 
1779 err_up:
1780 	ena_destroy_all_rx_queues(adapter);
1781 err_create_rx_queues:
1782 	ena_destroy_all_tx_queues(adapter);
1783 err_create_tx_queues:
1784 	ena_free_all_io_rx_resources(adapter);
1785 err_setup_rx:
1786 	ena_free_all_io_tx_resources(adapter);
1787 err_setup_tx:
1788 	ena_free_io_irq(adapter);
1789 err_req_irq:
1790 
1791 	return rc;
1792 }
1793 
1794 static void ena_down(struct ena_adapter *adapter)
1795 {
1796 	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
1797 
1798 	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1799 
1800 	u64_stats_update_begin(&adapter->syncp);
1801 	adapter->dev_stats.interface_down++;
1802 	u64_stats_update_end(&adapter->syncp);
1803 
1804 	netif_carrier_off(adapter->netdev);
1805 	netif_tx_disable(adapter->netdev);
1806 
1807 	/* After this point the napi handler won't enable the tx queue */
1808 	ena_napi_disable_all(adapter);
1809 
1810 	/* After destroy the queue there won't be any new interrupts */
1811 
1812 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
1813 		int rc;
1814 
1815 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
1816 		if (rc)
1817 			dev_err(&adapter->pdev->dev, "Device reset failed\n");
1818 	}
1819 
1820 	ena_destroy_all_io_queues(adapter);
1821 
1822 	ena_disable_io_intr_sync(adapter);
1823 	ena_free_io_irq(adapter);
1824 	ena_del_napi(adapter);
1825 
1826 	ena_free_all_tx_bufs(adapter);
1827 	ena_free_all_rx_bufs(adapter);
1828 	ena_free_all_io_tx_resources(adapter);
1829 	ena_free_all_io_rx_resources(adapter);
1830 }
1831 
1832 /* ena_open - Called when a network interface is made active
1833  * @netdev: network interface device structure
1834  *
1835  * Returns 0 on success, negative value on failure
1836  *
1837  * The open entry point is called when a network interface is made
1838  * active by the system (IFF_UP).  At this point all resources needed
1839  * for transmit and receive operations are allocated, the interrupt
1840  * handler is registered with the OS, the watchdog timer is started,
1841  * and the stack is notified that the interface is ready.
1842  */
1843 static int ena_open(struct net_device *netdev)
1844 {
1845 	struct ena_adapter *adapter = netdev_priv(netdev);
1846 	int rc;
1847 
1848 	/* Notify the stack of the actual queue counts. */
1849 	rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
1850 	if (rc) {
1851 		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
1852 		return rc;
1853 	}
1854 
1855 	rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
1856 	if (rc) {
1857 		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
1858 		return rc;
1859 	}
1860 
1861 	rc = ena_up(adapter);
1862 	if (rc)
1863 		return rc;
1864 
1865 	return rc;
1866 }
1867 
1868 /* ena_close - Disables a network interface
1869  * @netdev: network interface device structure
1870  *
1871  * Returns 0, this is not allowed to fail
1872  *
1873  * The close entry point is called when an interface is de-activated
1874  * by the OS.  The hardware is still under the drivers control, but
1875  * needs to be disabled.  A global MAC reset is issued to stop the
1876  * hardware, and all transmit and receive resources are freed.
1877  */
1878 static int ena_close(struct net_device *netdev)
1879 {
1880 	struct ena_adapter *adapter = netdev_priv(netdev);
1881 
1882 	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
1883 
1884 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
1885 		ena_down(adapter);
1886 
1887 	return 0;
1888 }
1889 
1890 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
1891 {
1892 	u32 mss = skb_shinfo(skb)->gso_size;
1893 	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
1894 	u8 l4_protocol = 0;
1895 
1896 	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
1897 		ena_tx_ctx->l4_csum_enable = 1;
1898 		if (mss) {
1899 			ena_tx_ctx->tso_enable = 1;
1900 			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
1901 			ena_tx_ctx->l4_csum_partial = 0;
1902 		} else {
1903 			ena_tx_ctx->tso_enable = 0;
1904 			ena_meta->l4_hdr_len = 0;
1905 			ena_tx_ctx->l4_csum_partial = 1;
1906 		}
1907 
1908 		switch (ip_hdr(skb)->version) {
1909 		case IPVERSION:
1910 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
1911 			if (ip_hdr(skb)->frag_off & htons(IP_DF))
1912 				ena_tx_ctx->df = 1;
1913 			if (mss)
1914 				ena_tx_ctx->l3_csum_enable = 1;
1915 			l4_protocol = ip_hdr(skb)->protocol;
1916 			break;
1917 		case 6:
1918 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
1919 			l4_protocol = ipv6_hdr(skb)->nexthdr;
1920 			break;
1921 		default:
1922 			break;
1923 		}
1924 
1925 		if (l4_protocol == IPPROTO_TCP)
1926 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
1927 		else
1928 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
1929 
1930 		ena_meta->mss = mss;
1931 		ena_meta->l3_hdr_len = skb_network_header_len(skb);
1932 		ena_meta->l3_hdr_offset = skb_network_offset(skb);
1933 		ena_tx_ctx->meta_valid = 1;
1934 
1935 	} else {
1936 		ena_tx_ctx->meta_valid = 0;
1937 	}
1938 }
1939 
1940 static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
1941 				       struct sk_buff *skb)
1942 {
1943 	int num_frags, header_len, rc;
1944 
1945 	num_frags = skb_shinfo(skb)->nr_frags;
1946 	header_len = skb_headlen(skb);
1947 
1948 	if (num_frags < tx_ring->sgl_size)
1949 		return 0;
1950 
1951 	if ((num_frags == tx_ring->sgl_size) &&
1952 	    (header_len < tx_ring->tx_max_header_size))
1953 		return 0;
1954 
1955 	u64_stats_update_begin(&tx_ring->syncp);
1956 	tx_ring->tx_stats.linearize++;
1957 	u64_stats_update_end(&tx_ring->syncp);
1958 
1959 	rc = skb_linearize(skb);
1960 	if (unlikely(rc)) {
1961 		u64_stats_update_begin(&tx_ring->syncp);
1962 		tx_ring->tx_stats.linearize_failed++;
1963 		u64_stats_update_end(&tx_ring->syncp);
1964 	}
1965 
1966 	return rc;
1967 }
1968 
1969 /* Called with netif_tx_lock. */
1970 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
1971 {
1972 	struct ena_adapter *adapter = netdev_priv(dev);
1973 	struct ena_tx_buffer *tx_info;
1974 	struct ena_com_tx_ctx ena_tx_ctx;
1975 	struct ena_ring *tx_ring;
1976 	struct netdev_queue *txq;
1977 	struct ena_com_buf *ena_buf;
1978 	void *push_hdr;
1979 	u32 len, last_frag;
1980 	u16 next_to_use;
1981 	u16 req_id;
1982 	u16 push_len;
1983 	u16 header_len;
1984 	dma_addr_t dma;
1985 	int qid, rc, nb_hw_desc;
1986 	int i = -1;
1987 
1988 	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
1989 	/*  Determine which tx ring we will be placed on */
1990 	qid = skb_get_queue_mapping(skb);
1991 	tx_ring = &adapter->tx_ring[qid];
1992 	txq = netdev_get_tx_queue(dev, qid);
1993 
1994 	rc = ena_check_and_linearize_skb(tx_ring, skb);
1995 	if (unlikely(rc))
1996 		goto error_drop_packet;
1997 
1998 	skb_tx_timestamp(skb);
1999 	len = skb_headlen(skb);
2000 
2001 	next_to_use = tx_ring->next_to_use;
2002 	req_id = tx_ring->free_tx_ids[next_to_use];
2003 	tx_info = &tx_ring->tx_buffer_info[req_id];
2004 	tx_info->num_of_bufs = 0;
2005 
2006 	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2007 	ena_buf = tx_info->bufs;
2008 	tx_info->skb = skb;
2009 
2010 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2011 		/* prepared the push buffer */
2012 		push_len = min_t(u32, len, tx_ring->tx_max_header_size);
2013 		header_len = push_len;
2014 		push_hdr = skb->data;
2015 	} else {
2016 		push_len = 0;
2017 		header_len = min_t(u32, len, tx_ring->tx_max_header_size);
2018 		push_hdr = NULL;
2019 	}
2020 
2021 	netif_dbg(adapter, tx_queued, dev,
2022 		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2023 		  push_hdr, push_len);
2024 
2025 	if (len > push_len) {
2026 		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2027 				     len - push_len, DMA_TO_DEVICE);
2028 		if (dma_mapping_error(tx_ring->dev, dma))
2029 			goto error_report_dma_error;
2030 
2031 		ena_buf->paddr = dma;
2032 		ena_buf->len = len - push_len;
2033 
2034 		ena_buf++;
2035 		tx_info->num_of_bufs++;
2036 	}
2037 
2038 	last_frag = skb_shinfo(skb)->nr_frags;
2039 
2040 	for (i = 0; i < last_frag; i++) {
2041 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2042 
2043 		len = skb_frag_size(frag);
2044 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
2045 				       DMA_TO_DEVICE);
2046 		if (dma_mapping_error(tx_ring->dev, dma))
2047 			goto error_report_dma_error;
2048 
2049 		ena_buf->paddr = dma;
2050 		ena_buf->len = len;
2051 		ena_buf++;
2052 	}
2053 
2054 	tx_info->num_of_bufs += last_frag;
2055 
2056 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2057 	ena_tx_ctx.ena_bufs = tx_info->bufs;
2058 	ena_tx_ctx.push_header = push_hdr;
2059 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2060 	ena_tx_ctx.req_id = req_id;
2061 	ena_tx_ctx.header_len = header_len;
2062 
2063 	/* set flags and meta data */
2064 	ena_tx_csum(&ena_tx_ctx, skb);
2065 
2066 	/* prepare the packet's descriptors to dma engine */
2067 	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
2068 				&nb_hw_desc);
2069 
2070 	if (unlikely(rc)) {
2071 		netif_err(adapter, tx_queued, dev,
2072 			  "failed to prepare tx bufs\n");
2073 		u64_stats_update_begin(&tx_ring->syncp);
2074 		tx_ring->tx_stats.queue_stop++;
2075 		tx_ring->tx_stats.prepare_ctx_err++;
2076 		u64_stats_update_end(&tx_ring->syncp);
2077 		netif_tx_stop_queue(txq);
2078 		goto error_unmap_dma;
2079 	}
2080 
2081 	netdev_tx_sent_queue(txq, skb->len);
2082 
2083 	u64_stats_update_begin(&tx_ring->syncp);
2084 	tx_ring->tx_stats.cnt++;
2085 	tx_ring->tx_stats.bytes += skb->len;
2086 	u64_stats_update_end(&tx_ring->syncp);
2087 
2088 	tx_info->tx_descs = nb_hw_desc;
2089 	tx_info->last_jiffies = jiffies;
2090 	tx_info->print_once = 0;
2091 
2092 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2093 		tx_ring->ring_size);
2094 
2095 	/* This WMB is aimed to:
2096 	 * 1 - perform smp barrier before reading next_to_completion
2097 	 * 2 - make sure the desc were written before trigger DB
2098 	 */
2099 	wmb();
2100 
2101 	/* stop the queue when no more space available, the packet can have up
2102 	 * to sgl_size + 2. one for the meta descriptor and one for header
2103 	 * (if the header is larger than tx_max_header_size).
2104 	 */
2105 	if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
2106 		     (tx_ring->sgl_size + 2))) {
2107 		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
2108 			  __func__, qid);
2109 
2110 		netif_tx_stop_queue(txq);
2111 		u64_stats_update_begin(&tx_ring->syncp);
2112 		tx_ring->tx_stats.queue_stop++;
2113 		u64_stats_update_end(&tx_ring->syncp);
2114 
2115 		/* There is a rare condition where this function decide to
2116 		 * stop the queue but meanwhile clean_tx_irq updates
2117 		 * next_to_completion and terminates.
2118 		 * The queue will remain stopped forever.
2119 		 * To solve this issue this function perform rmb, check
2120 		 * the wakeup condition and wake up the queue if needed.
2121 		 */
2122 		smp_rmb();
2123 
2124 		if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
2125 				> ENA_TX_WAKEUP_THRESH) {
2126 			netif_tx_wake_queue(txq);
2127 			u64_stats_update_begin(&tx_ring->syncp);
2128 			tx_ring->tx_stats.queue_wakeup++;
2129 			u64_stats_update_end(&tx_ring->syncp);
2130 		}
2131 	}
2132 
2133 	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
2134 		/* trigger the dma engine */
2135 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
2136 		u64_stats_update_begin(&tx_ring->syncp);
2137 		tx_ring->tx_stats.doorbells++;
2138 		u64_stats_update_end(&tx_ring->syncp);
2139 	}
2140 
2141 	return NETDEV_TX_OK;
2142 
2143 error_report_dma_error:
2144 	u64_stats_update_begin(&tx_ring->syncp);
2145 	tx_ring->tx_stats.dma_mapping_err++;
2146 	u64_stats_update_end(&tx_ring->syncp);
2147 	netdev_warn(adapter->netdev, "failed to map skb\n");
2148 
2149 	tx_info->skb = NULL;
2150 
2151 error_unmap_dma:
2152 	if (i >= 0) {
2153 		/* save value of frag that failed */
2154 		last_frag = i;
2155 
2156 		/* start back at beginning and unmap skb */
2157 		tx_info->skb = NULL;
2158 		ena_buf = tx_info->bufs;
2159 		dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
2160 				 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
2161 
2162 		/* unmap remaining mapped pages */
2163 		for (i = 0; i < last_frag; i++) {
2164 			ena_buf++;
2165 			dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
2166 				       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
2167 		}
2168 	}
2169 
2170 error_drop_packet:
2171 
2172 	dev_kfree_skb(skb);
2173 	return NETDEV_TX_OK;
2174 }
2175 
2176 #ifdef CONFIG_NET_POLL_CONTROLLER
2177 static void ena_netpoll(struct net_device *netdev)
2178 {
2179 	struct ena_adapter *adapter = netdev_priv(netdev);
2180 	int i;
2181 
2182 	/* Dont schedule NAPI if the driver is in the middle of reset
2183 	 * or netdev is down.
2184 	 */
2185 
2186 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
2187 	    test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2188 		return;
2189 
2190 	for (i = 0; i < adapter->num_queues; i++)
2191 		napi_schedule(&adapter->ena_napi[i].napi);
2192 }
2193 #endif /* CONFIG_NET_POLL_CONTROLLER */
2194 
2195 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
2196 			    void *accel_priv, select_queue_fallback_t fallback)
2197 {
2198 	u16 qid;
2199 	/* we suspect that this is good for in--kernel network services that
2200 	 * want to loop incoming skb rx to tx in normal user generated traffic,
2201 	 * most probably we will not get to this
2202 	 */
2203 	if (skb_rx_queue_recorded(skb))
2204 		qid = skb_get_rx_queue(skb);
2205 	else
2206 		qid = fallback(dev, skb);
2207 
2208 	return qid;
2209 }
2210 
2211 static void ena_config_host_info(struct ena_com_dev *ena_dev)
2212 {
2213 	struct ena_admin_host_info *host_info;
2214 	int rc;
2215 
2216 	/* Allocate only the host info */
2217 	rc = ena_com_allocate_host_info(ena_dev);
2218 	if (rc) {
2219 		pr_err("Cannot allocate host info\n");
2220 		return;
2221 	}
2222 
2223 	host_info = ena_dev->host_attr.host_info;
2224 
2225 	host_info->os_type = ENA_ADMIN_OS_LINUX;
2226 	host_info->kernel_ver = LINUX_VERSION_CODE;
2227 	strncpy(host_info->kernel_ver_str, utsname()->version,
2228 		sizeof(host_info->kernel_ver_str) - 1);
2229 	host_info->os_dist = 0;
2230 	strncpy(host_info->os_dist_str, utsname()->release,
2231 		sizeof(host_info->os_dist_str) - 1);
2232 	host_info->driver_version =
2233 		(DRV_MODULE_VER_MAJOR) |
2234 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2235 		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2236 
2237 	rc = ena_com_set_host_attributes(ena_dev);
2238 	if (rc) {
2239 		if (rc == -EOPNOTSUPP)
2240 			pr_warn("Cannot set host attributes\n");
2241 		else
2242 			pr_err("Cannot set host attributes\n");
2243 
2244 		goto err;
2245 	}
2246 
2247 	return;
2248 
2249 err:
2250 	ena_com_delete_host_info(ena_dev);
2251 }
2252 
2253 static void ena_config_debug_area(struct ena_adapter *adapter)
2254 {
2255 	u32 debug_area_size;
2256 	int rc, ss_count;
2257 
2258 	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
2259 	if (ss_count <= 0) {
2260 		netif_err(adapter, drv, adapter->netdev,
2261 			  "SS count is negative\n");
2262 		return;
2263 	}
2264 
2265 	/* allocate 32 bytes for each string and 64bit for the value */
2266 	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
2267 
2268 	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
2269 	if (rc) {
2270 		pr_err("Cannot allocate debug area\n");
2271 		return;
2272 	}
2273 
2274 	rc = ena_com_set_host_attributes(adapter->ena_dev);
2275 	if (rc) {
2276 		if (rc == -EOPNOTSUPP)
2277 			netif_warn(adapter, drv, adapter->netdev,
2278 				   "Cannot set host attributes\n");
2279 		else
2280 			netif_err(adapter, drv, adapter->netdev,
2281 				  "Cannot set host attributes\n");
2282 		goto err;
2283 	}
2284 
2285 	return;
2286 err:
2287 	ena_com_delete_debug_area(adapter->ena_dev);
2288 }
2289 
2290 static void ena_get_stats64(struct net_device *netdev,
2291 			    struct rtnl_link_stats64 *stats)
2292 {
2293 	struct ena_adapter *adapter = netdev_priv(netdev);
2294 	struct ena_ring *rx_ring, *tx_ring;
2295 	unsigned int start;
2296 	u64 rx_drops;
2297 	int i;
2298 
2299 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2300 		return;
2301 
2302 	for (i = 0; i < adapter->num_queues; i++) {
2303 		u64 bytes, packets;
2304 
2305 		tx_ring = &adapter->tx_ring[i];
2306 
2307 		do {
2308 			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
2309 			packets = tx_ring->tx_stats.cnt;
2310 			bytes = tx_ring->tx_stats.bytes;
2311 		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
2312 
2313 		stats->tx_packets += packets;
2314 		stats->tx_bytes += bytes;
2315 
2316 		rx_ring = &adapter->rx_ring[i];
2317 
2318 		do {
2319 			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
2320 			packets = rx_ring->rx_stats.cnt;
2321 			bytes = rx_ring->rx_stats.bytes;
2322 		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
2323 
2324 		stats->rx_packets += packets;
2325 		stats->rx_bytes += bytes;
2326 	}
2327 
2328 	do {
2329 		start = u64_stats_fetch_begin_irq(&adapter->syncp);
2330 		rx_drops = adapter->dev_stats.rx_drops;
2331 	} while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
2332 
2333 	stats->rx_dropped = rx_drops;
2334 
2335 	stats->multicast = 0;
2336 	stats->collisions = 0;
2337 
2338 	stats->rx_length_errors = 0;
2339 	stats->rx_crc_errors = 0;
2340 	stats->rx_frame_errors = 0;
2341 	stats->rx_fifo_errors = 0;
2342 	stats->rx_missed_errors = 0;
2343 	stats->tx_window_errors = 0;
2344 
2345 	stats->rx_errors = 0;
2346 	stats->tx_errors = 0;
2347 }
2348 
2349 static const struct net_device_ops ena_netdev_ops = {
2350 	.ndo_open		= ena_open,
2351 	.ndo_stop		= ena_close,
2352 	.ndo_start_xmit		= ena_start_xmit,
2353 	.ndo_select_queue	= ena_select_queue,
2354 	.ndo_get_stats64	= ena_get_stats64,
2355 	.ndo_tx_timeout		= ena_tx_timeout,
2356 	.ndo_change_mtu		= ena_change_mtu,
2357 	.ndo_set_mac_address	= NULL,
2358 	.ndo_validate_addr	= eth_validate_addr,
2359 #ifdef CONFIG_NET_POLL_CONTROLLER
2360 	.ndo_poll_controller	= ena_netpoll,
2361 #endif /* CONFIG_NET_POLL_CONTROLLER */
2362 };
2363 
2364 static void ena_device_io_suspend(struct work_struct *work)
2365 {
2366 	struct ena_adapter *adapter =
2367 		container_of(work, struct ena_adapter, suspend_io_task);
2368 	struct net_device *netdev = adapter->netdev;
2369 
2370 	/* ena_napi_disable_all disables only the IO handling.
2371 	 * We are still subject to AENQ keep alive watchdog.
2372 	 */
2373 	u64_stats_update_begin(&adapter->syncp);
2374 	adapter->dev_stats.io_suspend++;
2375 	u64_stats_update_begin(&adapter->syncp);
2376 	ena_napi_disable_all(adapter);
2377 	netif_tx_lock(netdev);
2378 	netif_device_detach(netdev);
2379 	netif_tx_unlock(netdev);
2380 }
2381 
2382 static void ena_device_io_resume(struct work_struct *work)
2383 {
2384 	struct ena_adapter *adapter =
2385 		container_of(work, struct ena_adapter, resume_io_task);
2386 	struct net_device *netdev = adapter->netdev;
2387 
2388 	u64_stats_update_begin(&adapter->syncp);
2389 	adapter->dev_stats.io_resume++;
2390 	u64_stats_update_end(&adapter->syncp);
2391 
2392 	netif_device_attach(netdev);
2393 	ena_napi_enable_all(adapter);
2394 }
2395 
2396 static int ena_device_validate_params(struct ena_adapter *adapter,
2397 				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
2398 {
2399 	struct net_device *netdev = adapter->netdev;
2400 	int rc;
2401 
2402 	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
2403 			      adapter->mac_addr);
2404 	if (!rc) {
2405 		netif_err(adapter, drv, netdev,
2406 			  "Error, mac address are different\n");
2407 		return -EINVAL;
2408 	}
2409 
2410 	if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
2411 	    (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
2412 		netif_err(adapter, drv, netdev,
2413 			  "Error, device doesn't support enough queues\n");
2414 		return -EINVAL;
2415 	}
2416 
2417 	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
2418 		netif_err(adapter, drv, netdev,
2419 			  "Error, device max mtu is smaller than netdev MTU\n");
2420 		return -EINVAL;
2421 	}
2422 
2423 	return 0;
2424 }
2425 
2426 static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
2427 			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
2428 			   bool *wd_state)
2429 {
2430 	struct device *dev = &pdev->dev;
2431 	bool readless_supported;
2432 	u32 aenq_groups;
2433 	int dma_width;
2434 	int rc;
2435 
2436 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
2437 	if (rc) {
2438 		dev_err(dev, "failed to init mmio read less\n");
2439 		return rc;
2440 	}
2441 
2442 	/* The PCIe configuration space revision id indicate if mmio reg
2443 	 * read is disabled
2444 	 */
2445 	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
2446 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2447 
2448 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2449 	if (rc) {
2450 		dev_err(dev, "Can not reset device\n");
2451 		goto err_mmio_read_less;
2452 	}
2453 
2454 	rc = ena_com_validate_version(ena_dev);
2455 	if (rc) {
2456 		dev_err(dev, "device version is too low\n");
2457 		goto err_mmio_read_less;
2458 	}
2459 
2460 	dma_width = ena_com_get_dma_width(ena_dev);
2461 	if (dma_width < 0) {
2462 		dev_err(dev, "Invalid dma width value %d", dma_width);
2463 		rc = dma_width;
2464 		goto err_mmio_read_less;
2465 	}
2466 
2467 	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
2468 	if (rc) {
2469 		dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
2470 		goto err_mmio_read_less;
2471 	}
2472 
2473 	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
2474 	if (rc) {
2475 		dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
2476 			rc);
2477 		goto err_mmio_read_less;
2478 	}
2479 
2480 	/* ENA admin level init */
2481 	rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
2482 	if (rc) {
2483 		dev_err(dev,
2484 			"Can not initialize ena admin queue with device\n");
2485 		goto err_mmio_read_less;
2486 	}
2487 
2488 	/* To enable the msix interrupts the driver needs to know the number
2489 	 * of queues. So the driver uses polling mode to retrieve this
2490 	 * information
2491 	 */
2492 	ena_com_set_admin_polling_mode(ena_dev, true);
2493 
2494 	ena_config_host_info(ena_dev);
2495 
2496 	/* Get Device Attributes*/
2497 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2498 	if (rc) {
2499 		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
2500 		goto err_admin_init;
2501 	}
2502 
2503 	/* Try to turn all the available aenq groups */
2504 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2505 		BIT(ENA_ADMIN_FATAL_ERROR) |
2506 		BIT(ENA_ADMIN_WARNING) |
2507 		BIT(ENA_ADMIN_NOTIFICATION) |
2508 		BIT(ENA_ADMIN_KEEP_ALIVE);
2509 
2510 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
2511 
2512 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2513 	if (rc) {
2514 		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
2515 		goto err_admin_init;
2516 	}
2517 
2518 	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2519 
2520 	return 0;
2521 
2522 err_admin_init:
2523 	ena_com_delete_host_info(ena_dev);
2524 	ena_com_admin_destroy(ena_dev);
2525 err_mmio_read_less:
2526 	ena_com_mmio_reg_read_request_destroy(ena_dev);
2527 
2528 	return rc;
2529 }
2530 
2531 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
2532 						    int io_vectors)
2533 {
2534 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2535 	struct device *dev = &adapter->pdev->dev;
2536 	int rc;
2537 
2538 	rc = ena_enable_msix(adapter, io_vectors);
2539 	if (rc) {
2540 		dev_err(dev, "Can not reserve msix vectors\n");
2541 		return rc;
2542 	}
2543 
2544 	ena_setup_mgmnt_intr(adapter);
2545 
2546 	rc = ena_request_mgmnt_irq(adapter);
2547 	if (rc) {
2548 		dev_err(dev, "Can not setup management interrupts\n");
2549 		goto err_disable_msix;
2550 	}
2551 
2552 	ena_com_set_admin_polling_mode(ena_dev, false);
2553 
2554 	ena_com_admin_aenq_enable(ena_dev);
2555 
2556 	return 0;
2557 
2558 err_disable_msix:
2559 	ena_disable_msix(adapter);
2560 
2561 	return rc;
2562 }
2563 
2564 static void ena_fw_reset_device(struct work_struct *work)
2565 {
2566 	struct ena_com_dev_get_features_ctx get_feat_ctx;
2567 	struct ena_adapter *adapter =
2568 		container_of(work, struct ena_adapter, reset_task);
2569 	struct net_device *netdev = adapter->netdev;
2570 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2571 	struct pci_dev *pdev = adapter->pdev;
2572 	bool dev_up, wd_state;
2573 	int rc;
2574 
2575 	if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2576 		dev_err(&pdev->dev,
2577 			"device reset schedule while reset bit is off\n");
2578 		return;
2579 	}
2580 
2581 	netif_carrier_off(netdev);
2582 
2583 	del_timer_sync(&adapter->timer_service);
2584 
2585 	rtnl_lock();
2586 
2587 	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2588 	ena_com_set_admin_running_state(ena_dev, false);
2589 
2590 	/* After calling ena_close the tx queues and the napi
2591 	 * are disabled so no one can interfere or touch the
2592 	 * data structures
2593 	 */
2594 	ena_close(netdev);
2595 
2596 	ena_free_mgmnt_irq(adapter);
2597 
2598 	ena_disable_msix(adapter);
2599 
2600 	ena_com_abort_admin_commands(ena_dev);
2601 
2602 	ena_com_wait_for_abort_completion(ena_dev);
2603 
2604 	ena_com_admin_destroy(ena_dev);
2605 
2606 	ena_com_mmio_reg_read_request_destroy(ena_dev);
2607 
2608 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
2609 	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2610 
2611 	/* Finish with the destroy part. Start the init part */
2612 
2613 	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
2614 	if (rc) {
2615 		dev_err(&pdev->dev, "Can not initialize device\n");
2616 		goto err;
2617 	}
2618 	adapter->wd_state = wd_state;
2619 
2620 	rc = ena_device_validate_params(adapter, &get_feat_ctx);
2621 	if (rc) {
2622 		dev_err(&pdev->dev, "Validation of device parameters failed\n");
2623 		goto err_device_destroy;
2624 	}
2625 
2626 	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
2627 						      adapter->num_queues);
2628 	if (rc) {
2629 		dev_err(&pdev->dev, "Enable MSI-X failed\n");
2630 		goto err_device_destroy;
2631 	}
2632 	/* If the interface was up before the reset bring it up */
2633 	if (dev_up) {
2634 		rc = ena_up(adapter);
2635 		if (rc) {
2636 			dev_err(&pdev->dev, "Failed to create I/O queues\n");
2637 			goto err_disable_msix;
2638 		}
2639 	}
2640 
2641 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
2642 
2643 	rtnl_unlock();
2644 
2645 	dev_err(&pdev->dev, "Device reset completed successfully\n");
2646 
2647 	return;
2648 err_disable_msix:
2649 	ena_free_mgmnt_irq(adapter);
2650 	ena_disable_msix(adapter);
2651 err_device_destroy:
2652 	ena_com_admin_destroy(ena_dev);
2653 err:
2654 	rtnl_unlock();
2655 
2656 	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
2657 
2658 	dev_err(&pdev->dev,
2659 		"Reset attempt failed. Can not reset the device\n");
2660 }
2661 
2662 static int check_missing_comp_in_queue(struct ena_adapter *adapter,
2663 				       struct ena_ring *tx_ring)
2664 {
2665 	struct ena_tx_buffer *tx_buf;
2666 	unsigned long last_jiffies;
2667 	u32 missed_tx = 0;
2668 	int i;
2669 
2670 	for (i = 0; i < tx_ring->ring_size; i++) {
2671 		tx_buf = &tx_ring->tx_buffer_info[i];
2672 		last_jiffies = tx_buf->last_jiffies;
2673 		if (unlikely(last_jiffies &&
2674 			     time_is_before_jiffies(last_jiffies + adapter->missing_tx_completion_to))) {
2675 			if (!tx_buf->print_once)
2676 				netif_notice(adapter, tx_err, adapter->netdev,
2677 					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
2678 					     tx_ring->qid, i);
2679 
2680 			tx_buf->print_once = 1;
2681 			missed_tx++;
2682 
2683 			if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
2684 				netif_err(adapter, tx_err, adapter->netdev,
2685 					  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
2686 					  missed_tx,
2687 					  adapter->missing_tx_completion_threshold);
2688 				adapter->reset_reason =
2689 					ENA_REGS_RESET_MISS_TX_CMPL;
2690 				set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2691 				return -EIO;
2692 			}
2693 		}
2694 	}
2695 
2696 	return 0;
2697 }
2698 
2699 static void check_for_missing_tx_completions(struct ena_adapter *adapter)
2700 {
2701 	struct ena_ring *tx_ring;
2702 	int i, budget, rc;
2703 
2704 	/* Make sure the driver doesn't turn the device in other process */
2705 	smp_rmb();
2706 
2707 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2708 		return;
2709 
2710 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2711 		return;
2712 
2713 	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
2714 		return;
2715 
2716 	budget = ENA_MONITORED_TX_QUEUES;
2717 
2718 	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
2719 		tx_ring = &adapter->tx_ring[i];
2720 
2721 		rc = check_missing_comp_in_queue(adapter, tx_ring);
2722 		if (unlikely(rc))
2723 			return;
2724 
2725 		budget--;
2726 		if (!budget)
2727 			break;
2728 	}
2729 
2730 	adapter->last_monitored_tx_qid = i % adapter->num_queues;
2731 }
2732 
2733 /* trigger napi schedule after 2 consecutive detections */
2734 #define EMPTY_RX_REFILL 2
2735 /* For the rare case where the device runs out of Rx descriptors and the
2736  * napi handler failed to refill new Rx descriptors (due to a lack of memory
2737  * for example).
2738  * This case will lead to a deadlock:
2739  * The device won't send interrupts since all the new Rx packets will be dropped
2740  * The napi handler won't allocate new Rx descriptors so the device will be
2741  * able to send new packets.
2742  *
2743  * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
2744  * It is recommended to have at least 512MB, with a minimum of 128MB for
2745  * constrained environment).
2746  *
2747  * When such a situation is detected - Reschedule napi
2748  */
2749 static void check_for_empty_rx_ring(struct ena_adapter *adapter)
2750 {
2751 	struct ena_ring *rx_ring;
2752 	int i, refill_required;
2753 
2754 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2755 		return;
2756 
2757 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2758 		return;
2759 
2760 	for (i = 0; i < adapter->num_queues; i++) {
2761 		rx_ring = &adapter->rx_ring[i];
2762 
2763 		refill_required =
2764 			ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
2765 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
2766 			rx_ring->empty_rx_queue++;
2767 
2768 			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
2769 				u64_stats_update_begin(&rx_ring->syncp);
2770 				rx_ring->rx_stats.empty_rx_ring++;
2771 				u64_stats_update_end(&rx_ring->syncp);
2772 
2773 				netif_err(adapter, drv, adapter->netdev,
2774 					  "trigger refill for ring %d\n", i);
2775 
2776 				napi_schedule(rx_ring->napi);
2777 				rx_ring->empty_rx_queue = 0;
2778 			}
2779 		} else {
2780 			rx_ring->empty_rx_queue = 0;
2781 		}
2782 	}
2783 }
2784 
2785 /* Check for keep alive expiration */
2786 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2787 {
2788 	unsigned long keep_alive_expired;
2789 
2790 	if (!adapter->wd_state)
2791 		return;
2792 
2793 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2794 		return;
2795 
2796 	keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies +
2797 					   adapter->keep_alive_timeout);
2798 	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
2799 		netif_err(adapter, drv, adapter->netdev,
2800 			  "Keep alive watchdog timeout.\n");
2801 		u64_stats_update_begin(&adapter->syncp);
2802 		adapter->dev_stats.wd_expired++;
2803 		u64_stats_update_end(&adapter->syncp);
2804 		adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
2805 		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2806 	}
2807 }
2808 
2809 static void check_for_admin_com_state(struct ena_adapter *adapter)
2810 {
2811 	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
2812 		netif_err(adapter, drv, adapter->netdev,
2813 			  "ENA admin queue is not in running state!\n");
2814 		u64_stats_update_begin(&adapter->syncp);
2815 		adapter->dev_stats.admin_q_pause++;
2816 		u64_stats_update_end(&adapter->syncp);
2817 		adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
2818 		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2819 	}
2820 }
2821 
2822 static void ena_update_hints(struct ena_adapter *adapter,
2823 			     struct ena_admin_ena_hw_hints *hints)
2824 {
2825 	struct net_device *netdev = adapter->netdev;
2826 
2827 	if (hints->admin_completion_tx_timeout)
2828 		adapter->ena_dev->admin_queue.completion_timeout =
2829 			hints->admin_completion_tx_timeout * 1000;
2830 
2831 	if (hints->mmio_read_timeout)
2832 		/* convert to usec */
2833 		adapter->ena_dev->mmio_read.reg_read_to =
2834 			hints->mmio_read_timeout * 1000;
2835 
2836 	if (hints->missed_tx_completion_count_threshold_to_reset)
2837 		adapter->missing_tx_completion_threshold =
2838 			hints->missed_tx_completion_count_threshold_to_reset;
2839 
2840 	if (hints->missing_tx_completion_timeout) {
2841 		if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2842 			adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
2843 		else
2844 			adapter->missing_tx_completion_to =
2845 				msecs_to_jiffies(hints->missing_tx_completion_timeout);
2846 	}
2847 
2848 	if (hints->netdev_wd_timeout)
2849 		netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
2850 
2851 	if (hints->driver_watchdog_timeout) {
2852 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2853 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
2854 		else
2855 			adapter->keep_alive_timeout =
2856 				msecs_to_jiffies(hints->driver_watchdog_timeout);
2857 	}
2858 }
2859 
2860 static void ena_update_host_info(struct ena_admin_host_info *host_info,
2861 				 struct net_device *netdev)
2862 {
2863 	host_info->supported_network_features[0] =
2864 		netdev->features & GENMASK_ULL(31, 0);
2865 	host_info->supported_network_features[1] =
2866 		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
2867 }
2868 
2869 static void ena_timer_service(unsigned long data)
2870 {
2871 	struct ena_adapter *adapter = (struct ena_adapter *)data;
2872 	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
2873 	struct ena_admin_host_info *host_info =
2874 		adapter->ena_dev->host_attr.host_info;
2875 
2876 	check_for_missing_keep_alive(adapter);
2877 
2878 	check_for_admin_com_state(adapter);
2879 
2880 	check_for_missing_tx_completions(adapter);
2881 
2882 	check_for_empty_rx_ring(adapter);
2883 
2884 	if (debug_area)
2885 		ena_dump_stats_to_buf(adapter, debug_area);
2886 
2887 	if (host_info)
2888 		ena_update_host_info(host_info, adapter->netdev);
2889 
2890 	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2891 		netif_err(adapter, drv, adapter->netdev,
2892 			  "Trigger reset is on\n");
2893 		ena_dump_stats_to_dmesg(adapter);
2894 		queue_work(ena_wq, &adapter->reset_task);
2895 		return;
2896 	}
2897 
2898 	/* Reset the timer */
2899 	mod_timer(&adapter->timer_service, jiffies + HZ);
2900 }
2901 
2902 static int ena_calc_io_queue_num(struct pci_dev *pdev,
2903 				 struct ena_com_dev *ena_dev,
2904 				 struct ena_com_dev_get_features_ctx *get_feat_ctx)
2905 {
2906 	int io_sq_num, io_queue_num;
2907 
2908 	/* In case of LLQ use the llq number in the get feature cmd */
2909 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2910 		io_sq_num = get_feat_ctx->max_queues.max_llq_num;
2911 
2912 		if (io_sq_num == 0) {
2913 			dev_err(&pdev->dev,
2914 				"Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
2915 
2916 			ena_dev->tx_mem_queue_type =
2917 				ENA_ADMIN_PLACEMENT_POLICY_HOST;
2918 			io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2919 		}
2920 	} else {
2921 		io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2922 	}
2923 
2924 	io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
2925 	io_queue_num = min_t(int, io_queue_num, io_sq_num);
2926 	io_queue_num = min_t(int, io_queue_num,
2927 			     get_feat_ctx->max_queues.max_cq_num);
2928 	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
2929 	io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
2930 	if (unlikely(!io_queue_num)) {
2931 		dev_err(&pdev->dev, "The device doesn't have io queues\n");
2932 		return -EFAULT;
2933 	}
2934 
2935 	return io_queue_num;
2936 }
2937 
2938 static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
2939 			      struct ena_com_dev_get_features_ctx *get_feat_ctx)
2940 {
2941 	bool has_mem_bar;
2942 
2943 	has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
2944 
2945 	/* Enable push mode if device supports LLQ */
2946 	if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
2947 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
2948 	else
2949 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2950 }
2951 
2952 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
2953 				 struct net_device *netdev)
2954 {
2955 	netdev_features_t dev_features = 0;
2956 
2957 	/* Set offload features */
2958 	if (feat->offload.tx &
2959 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
2960 		dev_features |= NETIF_F_IP_CSUM;
2961 
2962 	if (feat->offload.tx &
2963 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
2964 		dev_features |= NETIF_F_IPV6_CSUM;
2965 
2966 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
2967 		dev_features |= NETIF_F_TSO;
2968 
2969 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
2970 		dev_features |= NETIF_F_TSO6;
2971 
2972 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
2973 		dev_features |= NETIF_F_TSO_ECN;
2974 
2975 	if (feat->offload.rx_supported &
2976 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
2977 		dev_features |= NETIF_F_RXCSUM;
2978 
2979 	if (feat->offload.rx_supported &
2980 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
2981 		dev_features |= NETIF_F_RXCSUM;
2982 
2983 	netdev->features =
2984 		dev_features |
2985 		NETIF_F_SG |
2986 		NETIF_F_RXHASH |
2987 		NETIF_F_HIGHDMA;
2988 
2989 	netdev->hw_features |= netdev->features;
2990 	netdev->vlan_features |= netdev->features;
2991 }
2992 
2993 static void ena_set_conf_feat_params(struct ena_adapter *adapter,
2994 				     struct ena_com_dev_get_features_ctx *feat)
2995 {
2996 	struct net_device *netdev = adapter->netdev;
2997 
2998 	/* Copy mac address */
2999 	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3000 		eth_hw_addr_random(netdev);
3001 		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3002 	} else {
3003 		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3004 		ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
3005 	}
3006 
3007 	/* Set offload features */
3008 	ena_set_dev_offloads(feat, netdev);
3009 
3010 	adapter->max_mtu = feat->dev_attr.max_mtu;
3011 	netdev->max_mtu = adapter->max_mtu;
3012 	netdev->min_mtu = ENA_MIN_MTU;
3013 }
3014 
3015 static int ena_rss_init_default(struct ena_adapter *adapter)
3016 {
3017 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3018 	struct device *dev = &adapter->pdev->dev;
3019 	int rc, i;
3020 	u32 val;
3021 
3022 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3023 	if (unlikely(rc)) {
3024 		dev_err(dev, "Cannot init indirect table\n");
3025 		goto err_rss_init;
3026 	}
3027 
3028 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3029 		val = ethtool_rxfh_indir_default(i, adapter->num_queues);
3030 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3031 						       ENA_IO_RXQ_IDX(val));
3032 		if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3033 			dev_err(dev, "Cannot fill indirect table\n");
3034 			goto err_fill_indir;
3035 		}
3036 	}
3037 
3038 	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3039 					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3040 	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3041 		dev_err(dev, "Cannot fill hash function\n");
3042 		goto err_fill_indir;
3043 	}
3044 
3045 	rc = ena_com_set_default_hash_ctrl(ena_dev);
3046 	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3047 		dev_err(dev, "Cannot fill hash control\n");
3048 		goto err_fill_indir;
3049 	}
3050 
3051 	return 0;
3052 
3053 err_fill_indir:
3054 	ena_com_rss_destroy(ena_dev);
3055 err_rss_init:
3056 
3057 	return rc;
3058 }
3059 
3060 static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3061 {
3062 	int release_bars;
3063 
3064 	if (ena_dev->mem_bar)
3065 		devm_iounmap(&pdev->dev, ena_dev->mem_bar);
3066 
3067 	devm_iounmap(&pdev->dev, ena_dev->reg_bar);
3068 
3069 	release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3070 	pci_release_selected_regions(pdev, release_bars);
3071 }
3072 
3073 static int ena_calc_queue_size(struct pci_dev *pdev,
3074 			       struct ena_com_dev *ena_dev,
3075 			       u16 *max_tx_sgl_size,
3076 			       u16 *max_rx_sgl_size,
3077 			       struct ena_com_dev_get_features_ctx *get_feat_ctx)
3078 {
3079 	u32 queue_size = ENA_DEFAULT_RING_SIZE;
3080 
3081 	queue_size = min_t(u32, queue_size,
3082 			   get_feat_ctx->max_queues.max_cq_depth);
3083 	queue_size = min_t(u32, queue_size,
3084 			   get_feat_ctx->max_queues.max_sq_depth);
3085 
3086 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3087 		queue_size = min_t(u32, queue_size,
3088 				   get_feat_ctx->max_queues.max_llq_depth);
3089 
3090 	queue_size = rounddown_pow_of_two(queue_size);
3091 
3092 	if (unlikely(!queue_size)) {
3093 		dev_err(&pdev->dev, "Invalid queue size\n");
3094 		return -EFAULT;
3095 	}
3096 
3097 	*max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3098 				 get_feat_ctx->max_queues.max_packet_tx_descs);
3099 	*max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3100 				 get_feat_ctx->max_queues.max_packet_rx_descs);
3101 
3102 	return queue_size;
3103 }
3104 
3105 /* ena_probe - Device Initialization Routine
3106  * @pdev: PCI device information struct
3107  * @ent: entry in ena_pci_tbl
3108  *
3109  * Returns 0 on success, negative on failure
3110  *
3111  * ena_probe initializes an adapter identified by a pci_dev structure.
3112  * The OS initialization, configuring of the adapter private structure,
3113  * and a hardware reset occur.
3114  */
3115 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3116 {
3117 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3118 	static int version_printed;
3119 	struct net_device *netdev;
3120 	struct ena_adapter *adapter;
3121 	struct ena_com_dev *ena_dev = NULL;
3122 	static int adapters_found;
3123 	int io_queue_num, bars, rc;
3124 	int queue_size;
3125 	u16 tx_sgl_size = 0;
3126 	u16 rx_sgl_size = 0;
3127 	bool wd_state;
3128 
3129 	dev_dbg(&pdev->dev, "%s\n", __func__);
3130 
3131 	if (version_printed++ == 0)
3132 		dev_info(&pdev->dev, "%s", version);
3133 
3134 	rc = pci_enable_device_mem(pdev);
3135 	if (rc) {
3136 		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
3137 		return rc;
3138 	}
3139 
3140 	pci_set_master(pdev);
3141 
3142 	ena_dev = vzalloc(sizeof(*ena_dev));
3143 	if (!ena_dev) {
3144 		rc = -ENOMEM;
3145 		goto err_disable_device;
3146 	}
3147 
3148 	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3149 	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
3150 	if (rc) {
3151 		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
3152 			rc);
3153 		goto err_free_ena_dev;
3154 	}
3155 
3156 	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
3157 					pci_resource_start(pdev, ENA_REG_BAR),
3158 					pci_resource_len(pdev, ENA_REG_BAR));
3159 	if (!ena_dev->reg_bar) {
3160 		dev_err(&pdev->dev, "failed to remap regs bar\n");
3161 		rc = -EFAULT;
3162 		goto err_free_region;
3163 	}
3164 
3165 	ena_dev->dmadev = &pdev->dev;
3166 
3167 	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
3168 	if (rc) {
3169 		dev_err(&pdev->dev, "ena device init failed\n");
3170 		if (rc == -ETIME)
3171 			rc = -EPROBE_DEFER;
3172 		goto err_free_region;
3173 	}
3174 
3175 	ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
3176 
3177 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3178 		ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3179 						   pci_resource_start(pdev, ENA_MEM_BAR),
3180 						   pci_resource_len(pdev, ENA_MEM_BAR));
3181 		if (!ena_dev->mem_bar) {
3182 			rc = -EFAULT;
3183 			goto err_device_destroy;
3184 		}
3185 	}
3186 
3187 	/* initial Tx interrupt delay, Assumes 1 usec granularity.
3188 	* Updated during device initialization with the real granularity
3189 	*/
3190 	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
3191 	io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
3192 	queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
3193 					 &rx_sgl_size, &get_feat_ctx);
3194 	if ((queue_size <= 0) || (io_queue_num <= 0)) {
3195 		rc = -EFAULT;
3196 		goto err_device_destroy;
3197 	}
3198 
3199 	dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
3200 		 io_queue_num, queue_size);
3201 
3202 	/* dev zeroed in init_etherdev */
3203 	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
3204 	if (!netdev) {
3205 		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
3206 		rc = -ENOMEM;
3207 		goto err_device_destroy;
3208 	}
3209 
3210 	SET_NETDEV_DEV(netdev, &pdev->dev);
3211 
3212 	adapter = netdev_priv(netdev);
3213 	pci_set_drvdata(pdev, adapter);
3214 
3215 	adapter->ena_dev = ena_dev;
3216 	adapter->netdev = netdev;
3217 	adapter->pdev = pdev;
3218 
3219 	ena_set_conf_feat_params(adapter, &get_feat_ctx);
3220 
3221 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
3222 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3223 
3224 	adapter->tx_ring_size = queue_size;
3225 	adapter->rx_ring_size = queue_size;
3226 
3227 	adapter->max_tx_sgl_size = tx_sgl_size;
3228 	adapter->max_rx_sgl_size = rx_sgl_size;
3229 
3230 	adapter->num_queues = io_queue_num;
3231 	adapter->last_monitored_tx_qid = 0;
3232 
3233 	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
3234 	adapter->wd_state = wd_state;
3235 
3236 	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
3237 
3238 	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
3239 	if (rc) {
3240 		dev_err(&pdev->dev,
3241 			"Failed to query interrupt moderation feature\n");
3242 		goto err_netdev_destroy;
3243 	}
3244 	ena_init_io_rings(adapter);
3245 
3246 	netdev->netdev_ops = &ena_netdev_ops;
3247 	netdev->watchdog_timeo = TX_TIMEOUT;
3248 	ena_set_ethtool_ops(netdev);
3249 
3250 	netdev->priv_flags |= IFF_UNICAST_FLT;
3251 
3252 	u64_stats_init(&adapter->syncp);
3253 
3254 	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
3255 	if (rc) {
3256 		dev_err(&pdev->dev,
3257 			"Failed to enable and set the admin interrupts\n");
3258 		goto err_worker_destroy;
3259 	}
3260 	rc = ena_rss_init_default(adapter);
3261 	if (rc && (rc != -EOPNOTSUPP)) {
3262 		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
3263 		goto err_free_msix;
3264 	}
3265 
3266 	ena_config_debug_area(adapter);
3267 
3268 	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
3269 
3270 	netif_carrier_off(netdev);
3271 
3272 	rc = register_netdev(netdev);
3273 	if (rc) {
3274 		dev_err(&pdev->dev, "Cannot register net device\n");
3275 		goto err_rss;
3276 	}
3277 
3278 	INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
3279 	INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
3280 	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
3281 
3282 	adapter->last_keep_alive_jiffies = jiffies;
3283 	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
3284 	adapter->missing_tx_completion_to = TX_TIMEOUT;
3285 	adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
3286 
3287 	ena_update_hints(adapter, &get_feat_ctx.hw_hints);
3288 
3289 	setup_timer(&adapter->timer_service, ena_timer_service,
3290 		    (unsigned long)adapter);
3291 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3292 
3293 	dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
3294 		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
3295 		 netdev->dev_addr, io_queue_num);
3296 
3297 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3298 
3299 	adapters_found++;
3300 
3301 	return 0;
3302 
3303 err_rss:
3304 	ena_com_delete_debug_area(ena_dev);
3305 	ena_com_rss_destroy(ena_dev);
3306 err_free_msix:
3307 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
3308 	ena_free_mgmnt_irq(adapter);
3309 	ena_disable_msix(adapter);
3310 err_worker_destroy:
3311 	ena_com_destroy_interrupt_moderation(ena_dev);
3312 	del_timer(&adapter->timer_service);
3313 	cancel_work_sync(&adapter->suspend_io_task);
3314 	cancel_work_sync(&adapter->resume_io_task);
3315 err_netdev_destroy:
3316 	free_netdev(netdev);
3317 err_device_destroy:
3318 	ena_com_delete_host_info(ena_dev);
3319 	ena_com_admin_destroy(ena_dev);
3320 err_free_region:
3321 	ena_release_bars(ena_dev, pdev);
3322 err_free_ena_dev:
3323 	vfree(ena_dev);
3324 err_disable_device:
3325 	pci_disable_device(pdev);
3326 	return rc;
3327 }
3328 
3329 /*****************************************************************************/
3330 static int ena_sriov_configure(struct pci_dev *dev, int numvfs)
3331 {
3332 	int rc;
3333 
3334 	if (numvfs > 0) {
3335 		rc = pci_enable_sriov(dev, numvfs);
3336 		if (rc != 0) {
3337 			dev_err(&dev->dev,
3338 				"pci_enable_sriov failed to enable: %d vfs with the error: %d\n",
3339 				numvfs, rc);
3340 			return rc;
3341 		}
3342 
3343 		return numvfs;
3344 	}
3345 
3346 	if (numvfs == 0) {
3347 		pci_disable_sriov(dev);
3348 		return 0;
3349 	}
3350 
3351 	return -EINVAL;
3352 }
3353 
3354 /*****************************************************************************/
3355 /*****************************************************************************/
3356 
3357 /* ena_remove - Device Removal Routine
3358  * @pdev: PCI device information struct
3359  *
3360  * ena_remove is called by the PCI subsystem to alert the driver
3361  * that it should release a PCI device.
3362  */
3363 static void ena_remove(struct pci_dev *pdev)
3364 {
3365 	struct ena_adapter *adapter = pci_get_drvdata(pdev);
3366 	struct ena_com_dev *ena_dev;
3367 	struct net_device *netdev;
3368 
3369 	ena_dev = adapter->ena_dev;
3370 	netdev = adapter->netdev;
3371 
3372 #ifdef CONFIG_RFS_ACCEL
3373 	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
3374 		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
3375 		netdev->rx_cpu_rmap = NULL;
3376 	}
3377 #endif /* CONFIG_RFS_ACCEL */
3378 
3379 	unregister_netdev(netdev);
3380 	del_timer_sync(&adapter->timer_service);
3381 
3382 	cancel_work_sync(&adapter->reset_task);
3383 
3384 	cancel_work_sync(&adapter->suspend_io_task);
3385 
3386 	cancel_work_sync(&adapter->resume_io_task);
3387 
3388 	/* Reset the device only if the device is running. */
3389 	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3390 		ena_com_dev_reset(ena_dev, adapter->reset_reason);
3391 
3392 	ena_free_mgmnt_irq(adapter);
3393 
3394 	ena_disable_msix(adapter);
3395 
3396 	free_netdev(netdev);
3397 
3398 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3399 
3400 	ena_com_abort_admin_commands(ena_dev);
3401 
3402 	ena_com_wait_for_abort_completion(ena_dev);
3403 
3404 	ena_com_admin_destroy(ena_dev);
3405 
3406 	ena_com_rss_destroy(ena_dev);
3407 
3408 	ena_com_delete_debug_area(ena_dev);
3409 
3410 	ena_com_delete_host_info(ena_dev);
3411 
3412 	ena_release_bars(ena_dev, pdev);
3413 
3414 	pci_disable_device(pdev);
3415 
3416 	ena_com_destroy_interrupt_moderation(ena_dev);
3417 
3418 	vfree(ena_dev);
3419 }
3420 
3421 static struct pci_driver ena_pci_driver = {
3422 	.name		= DRV_MODULE_NAME,
3423 	.id_table	= ena_pci_tbl,
3424 	.probe		= ena_probe,
3425 	.remove		= ena_remove,
3426 	.sriov_configure = ena_sriov_configure,
3427 };
3428 
3429 static int __init ena_init(void)
3430 {
3431 	pr_info("%s", version);
3432 
3433 	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
3434 	if (!ena_wq) {
3435 		pr_err("Failed to create workqueue\n");
3436 		return -ENOMEM;
3437 	}
3438 
3439 	return pci_register_driver(&ena_pci_driver);
3440 }
3441 
3442 static void __exit ena_cleanup(void)
3443 {
3444 	pci_unregister_driver(&ena_pci_driver);
3445 
3446 	if (ena_wq) {
3447 		destroy_workqueue(ena_wq);
3448 		ena_wq = NULL;
3449 	}
3450 }
3451 
3452 /******************************************************************************
3453  ******************************** AENQ Handlers *******************************
3454  *****************************************************************************/
3455 /* ena_update_on_link_change:
3456  * Notify the network interface about the change in link status
3457  */
3458 static void ena_update_on_link_change(void *adapter_data,
3459 				      struct ena_admin_aenq_entry *aenq_e)
3460 {
3461 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3462 	struct ena_admin_aenq_link_change_desc *aenq_desc =
3463 		(struct ena_admin_aenq_link_change_desc *)aenq_e;
3464 	int status = aenq_desc->flags &
3465 		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3466 
3467 	if (status) {
3468 		netdev_dbg(adapter->netdev, "%s\n", __func__);
3469 		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
3470 		netif_carrier_on(adapter->netdev);
3471 	} else {
3472 		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
3473 		netif_carrier_off(adapter->netdev);
3474 	}
3475 }
3476 
3477 static void ena_keep_alive_wd(void *adapter_data,
3478 			      struct ena_admin_aenq_entry *aenq_e)
3479 {
3480 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3481 	struct ena_admin_aenq_keep_alive_desc *desc;
3482 	u64 rx_drops;
3483 
3484 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3485 	adapter->last_keep_alive_jiffies = jiffies;
3486 
3487 	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
3488 
3489 	u64_stats_update_begin(&adapter->syncp);
3490 	adapter->dev_stats.rx_drops = rx_drops;
3491 	u64_stats_update_end(&adapter->syncp);
3492 }
3493 
3494 static void ena_notification(void *adapter_data,
3495 			     struct ena_admin_aenq_entry *aenq_e)
3496 {
3497 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3498 	struct ena_admin_ena_hw_hints *hints;
3499 
3500 	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3501 	     "Invalid group(%x) expected %x\n",
3502 	     aenq_e->aenq_common_desc.group,
3503 	     ENA_ADMIN_NOTIFICATION);
3504 
3505 	switch (aenq_e->aenq_common_desc.syndrom) {
3506 	case ENA_ADMIN_SUSPEND:
3507 		/* Suspend just the IO queues.
3508 		 * We deliberately don't suspend admin so the timer and
3509 		 * the keep_alive events should remain.
3510 		 */
3511 		queue_work(ena_wq, &adapter->suspend_io_task);
3512 		break;
3513 	case ENA_ADMIN_RESUME:
3514 		queue_work(ena_wq, &adapter->resume_io_task);
3515 		break;
3516 	case ENA_ADMIN_UPDATE_HINTS:
3517 		hints = (struct ena_admin_ena_hw_hints *)
3518 			(&aenq_e->inline_data_w4);
3519 		ena_update_hints(adapter, hints);
3520 		break;
3521 	default:
3522 		netif_err(adapter, drv, adapter->netdev,
3523 			  "Invalid aenq notification link state %d\n",
3524 			  aenq_e->aenq_common_desc.syndrom);
3525 	}
3526 }
3527 
3528 /* This handler will called for unknown event group or unimplemented handlers*/
3529 static void unimplemented_aenq_handler(void *data,
3530 				       struct ena_admin_aenq_entry *aenq_e)
3531 {
3532 	struct ena_adapter *adapter = (struct ena_adapter *)data;
3533 
3534 	netif_err(adapter, drv, adapter->netdev,
3535 		  "Unknown event was received or event with unimplemented handler\n");
3536 }
3537 
3538 static struct ena_aenq_handlers aenq_handlers = {
3539 	.handlers = {
3540 		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3541 		[ENA_ADMIN_NOTIFICATION] = ena_notification,
3542 		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3543 	},
3544 	.unimplemented_handler = unimplemented_aenq_handler
3545 };
3546 
3547 module_init(ena_init);
3548 module_exit(ena_cleanup);
3549