1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66 	{ 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69 
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72 
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75 	"CEV",
76 	"CTX",
77 	"DBUF",
78 	"ERX",
79 	"Host",
80 	"MPU",
81 	"NDMA",
82 	"PTC ",
83 	"RDMA ",
84 	"RXF ",
85 	"RXIPS ",
86 	"RXULP0 ",
87 	"RXULP1 ",
88 	"RXULP2 ",
89 	"TIM ",
90 	"TPOST ",
91 	"TPRE ",
92 	"TXIPS ",
93 	"TXULP0 ",
94 	"TXULP1 ",
95 	"UC ",
96 	"WDMA ",
97 	"TXULP2 ",
98 	"HOST1 ",
99 	"P0_OB_LINK ",
100 	"P1_OB_LINK ",
101 	"HOST_GPIO ",
102 	"MBOX ",
103 	"ERX2 ",
104 	"SPARE ",
105 	"JTAG ",
106 	"MPU_INTPEND "
107 };
108 
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111 	"LPCMEMHOST",
112 	"MGMT_MAC",
113 	"PCS0ONLINE",
114 	"MPU_IRAM",
115 	"PCS1ONLINE",
116 	"PCTL0",
117 	"PCTL1",
118 	"PMEM",
119 	"RR",
120 	"TXPB",
121 	"RXPP",
122 	"XAUI",
123 	"TXP",
124 	"ARM",
125 	"IPC",
126 	"HOST2",
127 	"HOST3",
128 	"HOST4",
129 	"HOST5",
130 	"HOST6",
131 	"HOST7",
132 	"ECRC",
133 	"Poison TLP",
134 	"NETC",
135 	"PERIPH",
136 	"LLTXULP",
137 	"D2P",
138 	"RCON",
139 	"LDMA",
140 	"LLTXP",
141 	"LLTXPB",
142 	"Unknown"
143 };
144 
145 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
146 				 BE_IF_FLAGS_BROADCAST | \
147 				 BE_IF_FLAGS_MULTICAST | \
148 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
149 
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152 	struct be_dma_mem *mem = &q->dma_mem;
153 
154 	if (mem->va) {
155 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156 				  mem->dma);
157 		mem->va = NULL;
158 	}
159 }
160 
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162 			  u16 len, u16 entry_size)
163 {
164 	struct be_dma_mem *mem = &q->dma_mem;
165 
166 	memset(q, 0, sizeof(*q));
167 	q->len = len;
168 	q->entry_size = entry_size;
169 	mem->size = len * entry_size;
170 	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
171 				     &mem->dma, GFP_KERNEL);
172 	if (!mem->va)
173 		return -ENOMEM;
174 	return 0;
175 }
176 
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179 	u32 reg, enabled;
180 
181 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182 			      &reg);
183 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184 
185 	if (!enabled && enable)
186 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187 	else if (enabled && !enable)
188 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189 	else
190 		return;
191 
192 	pci_write_config_dword(adapter->pdev,
193 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195 
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198 	int status = 0;
199 
200 	/* On lancer interrupts can't be controlled via this register */
201 	if (lancer_chip(adapter))
202 		return;
203 
204 	if (be_check_error(adapter, BE_ERROR_EEH))
205 		return;
206 
207 	status = be_cmd_intr_set(adapter, enable);
208 	if (status)
209 		be_reg_intr_set(adapter, enable);
210 }
211 
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214 	u32 val = 0;
215 
216 	if (be_check_error(adapter, BE_ERROR_HW))
217 		return;
218 
219 	val |= qid & DB_RQ_RING_ID_MASK;
220 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221 
222 	wmb();
223 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225 
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227 			  u16 posted)
228 {
229 	u32 val = 0;
230 
231 	if (be_check_error(adapter, BE_ERROR_HW))
232 		return;
233 
234 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236 
237 	wmb();
238 	iowrite32(val, adapter->db + txo->db_offset);
239 }
240 
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242 			 bool arm, bool clear_int, u16 num_popped,
243 			 u32 eq_delay_mult_enc)
244 {
245 	u32 val = 0;
246 
247 	val |= qid & DB_EQ_RING_ID_MASK;
248 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249 
250 	if (be_check_error(adapter, BE_ERROR_HW))
251 		return;
252 
253 	if (arm)
254 		val |= 1 << DB_EQ_REARM_SHIFT;
255 	if (clear_int)
256 		val |= 1 << DB_EQ_CLR_SHIFT;
257 	val |= 1 << DB_EQ_EVNT_SHIFT;
258 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262 
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265 	u32 val = 0;
266 
267 	val |= qid & DB_CQ_RING_ID_MASK;
268 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
270 
271 	if (be_check_error(adapter, BE_ERROR_HW))
272 		return;
273 
274 	if (arm)
275 		val |= 1 << DB_CQ_REARM_SHIFT;
276 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279 
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282 	int i;
283 
284 	/* Check if mac has already been added as part of uc-list */
285 	for (i = 0; i < adapter->uc_macs; i++) {
286 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287 			/* mac already added, skip addition */
288 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289 			return 0;
290 		}
291 	}
292 
293 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294 			       &adapter->pmac_id[0], 0);
295 }
296 
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299 	int i;
300 
301 	/* Skip deletion if the programmed mac is
302 	 * being used in uc-list
303 	 */
304 	for (i = 0; i < adapter->uc_macs; i++) {
305 		if (adapter->pmac_id[i + 1] == pmac_id)
306 			return;
307 	}
308 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310 
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313 	struct be_adapter *adapter = netdev_priv(netdev);
314 	struct device *dev = &adapter->pdev->dev;
315 	struct sockaddr *addr = p;
316 	int status;
317 	u8 mac[ETH_ALEN];
318 	u32 old_pmac_id = adapter->pmac_id[0];
319 
320 	if (!is_valid_ether_addr(addr->sa_data))
321 		return -EADDRNOTAVAIL;
322 
323 	/* Proceed further only if, User provided MAC is different
324 	 * from active MAC
325 	 */
326 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327 		return 0;
328 
329 	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330 	 * address
331 	 */
332 	if (BEx_chip(adapter) && be_virtfn(adapter) &&
333 	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
334 		return -EPERM;
335 
336 	/* if device is not running, copy MAC to netdev->dev_addr */
337 	if (!netif_running(netdev))
338 		goto done;
339 
340 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341 	 * privilege or if PF did not provision the new MAC address.
342 	 * On BE3, this cmd will always fail if the VF doesn't have the
343 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
344 	 * the MAC for the VF.
345 	 */
346 	mutex_lock(&adapter->rx_filter_lock);
347 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348 	if (!status) {
349 
350 		/* Delete the old programmed MAC. This call may fail if the
351 		 * old MAC was already deleted by the PF driver.
352 		 */
353 		if (adapter->pmac_id[0] != old_pmac_id)
354 			be_dev_mac_del(adapter, old_pmac_id);
355 	}
356 
357 	mutex_unlock(&adapter->rx_filter_lock);
358 	/* Decide if the new MAC is successfully activated only after
359 	 * querying the FW
360 	 */
361 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362 				       adapter->if_handle, true, 0);
363 	if (status)
364 		goto err;
365 
366 	/* The MAC change did not happen, either due to lack of privilege
367 	 * or PF didn't pre-provision.
368 	 */
369 	if (!ether_addr_equal(addr->sa_data, mac)) {
370 		status = -EPERM;
371 		goto err;
372 	}
373 
374 	/* Remember currently programmed MAC */
375 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
378 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379 	return 0;
380 err:
381 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382 	return status;
383 }
384 
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388 	if (BE2_chip(adapter)) {
389 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390 
391 		return &cmd->hw_stats;
392 	} else if (BE3_chip(adapter)) {
393 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394 
395 		return &cmd->hw_stats;
396 	} else {
397 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398 
399 		return &cmd->hw_stats;
400 	}
401 }
402 
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406 	if (BE2_chip(adapter)) {
407 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408 
409 		return &hw_stats->erx;
410 	} else if (BE3_chip(adapter)) {
411 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412 
413 		return &hw_stats->erx;
414 	} else {
415 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416 
417 		return &hw_stats->erx;
418 	}
419 }
420 
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426 	struct be_port_rxf_stats_v0 *port_stats =
427 					&rxf_stats->port[adapter->port_num];
428 	struct be_drv_stats *drvs = &adapter->drv_stats;
429 
430 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
432 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
433 	drvs->rx_control_frames = port_stats->rx_control_frames;
434 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446 	drvs->rx_dropped_header_too_small =
447 		port_stats->rx_dropped_header_too_small;
448 	drvs->rx_address_filtered =
449 					port_stats->rx_address_filtered +
450 					port_stats->rx_vlan_filtered;
451 	drvs->rx_alignment_symbol_errors =
452 		port_stats->rx_alignment_symbol_errors;
453 
454 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
455 	drvs->tx_controlframes = port_stats->tx_controlframes;
456 
457 	if (adapter->port_num)
458 		drvs->jabber_events = rxf_stats->port1_jabber_events;
459 	else
460 		drvs->jabber_events = rxf_stats->port0_jabber_events;
461 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
464 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469 
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475 	struct be_port_rxf_stats_v1 *port_stats =
476 					&rxf_stats->port[adapter->port_num];
477 	struct be_drv_stats *drvs = &adapter->drv_stats;
478 
479 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
483 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
484 	drvs->rx_control_frames = port_stats->rx_control_frames;
485 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495 	drvs->rx_dropped_header_too_small =
496 		port_stats->rx_dropped_header_too_small;
497 	drvs->rx_input_fifo_overflow_drop =
498 		port_stats->rx_input_fifo_overflow_drop;
499 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
500 	drvs->rx_alignment_symbol_errors =
501 		port_stats->rx_alignment_symbol_errors;
502 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
504 	drvs->tx_controlframes = port_stats->tx_controlframes;
505 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506 	drvs->jabber_events = port_stats->jabber_events;
507 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
510 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515 
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521 	struct be_port_rxf_stats_v2 *port_stats =
522 					&rxf_stats->port[adapter->port_num];
523 	struct be_drv_stats *drvs = &adapter->drv_stats;
524 
525 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
529 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
530 	drvs->rx_control_frames = port_stats->rx_control_frames;
531 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541 	drvs->rx_dropped_header_too_small =
542 		port_stats->rx_dropped_header_too_small;
543 	drvs->rx_input_fifo_overflow_drop =
544 		port_stats->rx_input_fifo_overflow_drop;
545 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
546 	drvs->rx_alignment_symbol_errors =
547 		port_stats->rx_alignment_symbol_errors;
548 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
550 	drvs->tx_controlframes = port_stats->tx_controlframes;
551 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552 	drvs->jabber_events = port_stats->jabber_events;
553 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
556 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560 	if (be_roce_supported(adapter)) {
561 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563 		drvs->rx_roce_frames = port_stats->roce_frames_received;
564 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
565 		drvs->roce_drops_payload_len =
566 			port_stats->roce_drops_payload_len;
567 	}
568 }
569 
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572 	struct be_drv_stats *drvs = &adapter->drv_stats;
573 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574 
575 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585 	drvs->rx_dropped_tcp_length =
586 				pport_stats->rx_dropped_invalid_tcp_length;
587 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590 	drvs->rx_dropped_header_too_small =
591 				pport_stats->rx_dropped_header_too_small;
592 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593 	drvs->rx_address_filtered =
594 					pport_stats->rx_address_filtered +
595 					pport_stats->rx_vlan_filtered;
596 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600 	drvs->jabber_events = pport_stats->rx_jabbers;
601 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
602 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603 	drvs->rx_drops_too_many_frags =
604 				pport_stats->rx_drops_too_many_frags_lo;
605 }
606 
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x)			(x & 0xFFFF)
610 #define hi(x)			(x & 0xFFFF0000)
611 	bool wrapped = val < lo(*acc);
612 	u32 newacc = hi(*acc) + val;
613 
614 	if (wrapped)
615 		newacc += 65536;
616 	WRITE_ONCE(*acc, newacc);
617 }
618 
619 static void populate_erx_stats(struct be_adapter *adapter,
620 			       struct be_rx_obj *rxo, u32 erx_stat)
621 {
622 	if (!BEx_chip(adapter))
623 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624 	else
625 		/* below erx HW counter can actually wrap around after
626 		 * 65535. Driver accumulates a 32-bit value
627 		 */
628 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629 				     (u16)erx_stat);
630 }
631 
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635 	struct be_rx_obj *rxo;
636 	int i;
637 	u32 erx_stat;
638 
639 	if (lancer_chip(adapter)) {
640 		populate_lancer_stats(adapter);
641 	} else {
642 		if (BE2_chip(adapter))
643 			populate_be_v0_stats(adapter);
644 		else if (BE3_chip(adapter))
645 			/* for BE3 */
646 			populate_be_v1_stats(adapter);
647 		else
648 			populate_be_v2_stats(adapter);
649 
650 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651 		for_all_rx_queues(adapter, rxo, i) {
652 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653 			populate_erx_stats(adapter, rxo, erx_stat);
654 		}
655 	}
656 }
657 
658 static void be_get_stats64(struct net_device *netdev,
659 			   struct rtnl_link_stats64 *stats)
660 {
661 	struct be_adapter *adapter = netdev_priv(netdev);
662 	struct be_drv_stats *drvs = &adapter->drv_stats;
663 	struct be_rx_obj *rxo;
664 	struct be_tx_obj *txo;
665 	u64 pkts, bytes;
666 	unsigned int start;
667 	int i;
668 
669 	for_all_rx_queues(adapter, rxo, i) {
670 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
671 
672 		do {
673 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674 			pkts = rx_stats(rxo)->rx_pkts;
675 			bytes = rx_stats(rxo)->rx_bytes;
676 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677 		stats->rx_packets += pkts;
678 		stats->rx_bytes += bytes;
679 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681 					rx_stats(rxo)->rx_drops_no_frags;
682 	}
683 
684 	for_all_tx_queues(adapter, txo, i) {
685 		const struct be_tx_stats *tx_stats = tx_stats(txo);
686 
687 		do {
688 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689 			pkts = tx_stats(txo)->tx_pkts;
690 			bytes = tx_stats(txo)->tx_bytes;
691 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692 		stats->tx_packets += pkts;
693 		stats->tx_bytes += bytes;
694 	}
695 
696 	/* bad pkts received */
697 	stats->rx_errors = drvs->rx_crc_errors +
698 		drvs->rx_alignment_symbol_errors +
699 		drvs->rx_in_range_errors +
700 		drvs->rx_out_range_errors +
701 		drvs->rx_frame_too_long +
702 		drvs->rx_dropped_too_small +
703 		drvs->rx_dropped_too_short +
704 		drvs->rx_dropped_header_too_small +
705 		drvs->rx_dropped_tcp_length +
706 		drvs->rx_dropped_runt;
707 
708 	/* detailed rx errors */
709 	stats->rx_length_errors = drvs->rx_in_range_errors +
710 		drvs->rx_out_range_errors +
711 		drvs->rx_frame_too_long;
712 
713 	stats->rx_crc_errors = drvs->rx_crc_errors;
714 
715 	/* frame alignment errors */
716 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717 
718 	/* receiver fifo overrun */
719 	/* drops_no_pbuf is no per i/f, it's per BE card */
720 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721 				drvs->rx_input_fifo_overflow_drop +
722 				drvs->rx_drops_no_pbuf;
723 }
724 
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727 	struct net_device *netdev = adapter->netdev;
728 
729 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730 		netif_carrier_off(netdev);
731 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732 	}
733 
734 	if (link_status)
735 		netif_carrier_on(netdev);
736 	else
737 		netif_carrier_off(netdev);
738 
739 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741 
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744 	if (skb->encapsulation)
745 		return skb_inner_transport_offset(skb) +
746 		       inner_tcp_hdrlen(skb);
747 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749 
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752 	struct be_tx_stats *stats = tx_stats(txo);
753 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754 	/* Account for headers which get duplicated in TSO pkt */
755 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756 
757 	u64_stats_update_begin(&stats->sync);
758 	stats->tx_reqs++;
759 	stats->tx_bytes += skb->len + dup_hdr_len;
760 	stats->tx_pkts += tx_pkts;
761 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762 		stats->tx_vxlan_offload_pkts += tx_pkts;
763 	u64_stats_update_end(&stats->sync);
764 }
765 
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769 	/* +1 for the header wrb */
770 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772 
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778 	wrb->rsvd0 = 0;
779 }
780 
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782  * to avoid the swap and shift/mask operations in wrb_fill().
783  */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786 	wrb->frag_pa_hi = 0;
787 	wrb->frag_pa_lo = 0;
788 	wrb->frag_len = 0;
789 	wrb->rsvd0 = 0;
790 }
791 
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793 				     struct sk_buff *skb)
794 {
795 	u8 vlan_prio;
796 	u16 vlan_tag;
797 
798 	vlan_tag = skb_vlan_tag_get(skb);
799 	vlan_prio = skb_vlan_tag_get_prio(skb);
800 	/* If vlan priority provided by OS is NOT in available bmap */
801 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803 				adapter->recommended_prio_bits;
804 
805 	return vlan_tag;
806 }
807 
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811 	return (inner_ip_hdr(skb)->version == 4) ?
812 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814 
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817 	return (ip_hdr(skb)->version == 4) ?
818 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820 
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825 
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828 	return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830 
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835 
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837 				       struct sk_buff *skb,
838 				       struct be_wrb_params *wrb_params)
839 {
840 	u16 proto;
841 
842 	if (skb_is_gso(skb)) {
843 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
844 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848 		if (skb->encapsulation) {
849 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850 			proto = skb_inner_ip_proto(skb);
851 		} else {
852 			proto = skb_ip_proto(skb);
853 		}
854 		if (proto == IPPROTO_TCP)
855 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856 		else if (proto == IPPROTO_UDP)
857 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858 	}
859 
860 	if (skb_vlan_tag_present(skb)) {
861 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863 	}
864 
865 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867 
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869 			 struct be_eth_hdr_wrb *hdr,
870 			 struct be_wrb_params *wrb_params,
871 			 struct sk_buff *skb)
872 {
873 	memset(hdr, 0, sizeof(*hdr));
874 
875 	SET_TX_WRB_HDR_BITS(crc, hdr,
876 			    BE_WRB_F_GET(wrb_params->features, CRC));
877 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
878 			    BE_WRB_F_GET(wrb_params->features, IPCS));
879 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
881 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
882 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
883 
884 	SET_TX_WRB_HDR_BITS(lso, hdr,
885 			    BE_WRB_F_GET(wrb_params->features, LSO));
886 	SET_TX_WRB_HDR_BITS(lso6, hdr,
887 			    BE_WRB_F_GET(wrb_params->features, LSO6));
888 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889 
890 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891 	 * hack is not needed, the evt bit is set while ringing DB.
892 	 */
893 	SET_TX_WRB_HDR_BITS(event, hdr,
894 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895 	SET_TX_WRB_HDR_BITS(vlan, hdr,
896 			    BE_WRB_F_GET(wrb_params->features, VLAN));
897 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898 
899 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
902 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904 
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906 			  bool unmap_single)
907 {
908 	dma_addr_t dma;
909 	u32 frag_len = le32_to_cpu(wrb->frag_len);
910 
911 
912 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913 		(u64)le32_to_cpu(wrb->frag_pa_lo);
914 	if (frag_len) {
915 		if (unmap_single)
916 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917 		else
918 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919 	}
920 }
921 
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925 	u32 head = txo->q.head;
926 
927 	queue_head_inc(&txo->q);
928 	return head;
929 }
930 
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933 				struct be_tx_obj *txo,
934 				struct be_wrb_params *wrb_params,
935 				struct sk_buff *skb, u16 head)
936 {
937 	u32 num_frags = skb_wrb_cnt(skb);
938 	struct be_queue_info *txq = &txo->q;
939 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940 
941 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
943 
944 	BUG_ON(txo->sent_skb_list[head]);
945 	txo->sent_skb_list[head] = skb;
946 	txo->last_req_hdr = head;
947 	atomic_add(num_frags, &txq->used);
948 	txo->last_req_wrb_cnt = num_frags;
949 	txo->pend_wrb_cnt += num_frags;
950 }
951 
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954 				 int len)
955 {
956 	struct be_eth_wrb *wrb;
957 	struct be_queue_info *txq = &txo->q;
958 
959 	wrb = queue_head_node(txq);
960 	wrb_fill(wrb, busaddr, len);
961 	queue_head_inc(txq);
962 }
963 
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965  * was invoked. The producer index is restored to the previous packet and the
966  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967  */
968 static void be_xmit_restore(struct be_adapter *adapter,
969 			    struct be_tx_obj *txo, u32 head, bool map_single,
970 			    u32 copied)
971 {
972 	struct device *dev;
973 	struct be_eth_wrb *wrb;
974 	struct be_queue_info *txq = &txo->q;
975 
976 	dev = &adapter->pdev->dev;
977 	txq->head = head;
978 
979 	/* skip the first wrb (hdr); it's not mapped */
980 	queue_head_inc(txq);
981 	while (copied) {
982 		wrb = queue_head_node(txq);
983 		unmap_tx_frag(dev, wrb, map_single);
984 		map_single = false;
985 		copied -= le32_to_cpu(wrb->frag_len);
986 		queue_head_inc(txq);
987 	}
988 
989 	txq->head = head;
990 }
991 
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994  * of WRBs used up by the packet.
995  */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997 			   struct sk_buff *skb,
998 			   struct be_wrb_params *wrb_params)
999 {
1000 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001 	struct device *dev = &adapter->pdev->dev;
1002 	bool map_single = false;
1003 	u32 head;
1004 	dma_addr_t busaddr;
1005 	int len;
1006 
1007 	head = be_tx_get_wrb_hdr(txo);
1008 
1009 	if (skb->len > skb->data_len) {
1010 		len = skb_headlen(skb);
1011 
1012 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013 		if (dma_mapping_error(dev, busaddr))
1014 			goto dma_err;
1015 		map_single = true;
1016 		be_tx_setup_wrb_frag(txo, busaddr, len);
1017 		copied += len;
1018 	}
1019 
1020 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022 		len = skb_frag_size(frag);
1023 
1024 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025 		if (dma_mapping_error(dev, busaddr))
1026 			goto dma_err;
1027 		be_tx_setup_wrb_frag(txo, busaddr, len);
1028 		copied += len;
1029 	}
1030 
1031 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032 
1033 	be_tx_stats_update(txo, skb);
1034 	return wrb_cnt;
1035 
1036 dma_err:
1037 	adapter->drv_stats.dma_map_errors++;
1038 	be_xmit_restore(adapter, txo, head, map_single, copied);
1039 	return 0;
1040 }
1041 
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046 
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048 					     struct sk_buff *skb,
1049 					     struct be_wrb_params
1050 					     *wrb_params)
1051 {
1052 	bool insert_vlan = false;
1053 	u16 vlan_tag = 0;
1054 
1055 	skb = skb_share_check(skb, GFP_ATOMIC);
1056 	if (unlikely(!skb))
1057 		return skb;
1058 
1059 	if (skb_vlan_tag_present(skb)) {
1060 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1061 		insert_vlan = true;
1062 	}
1063 
1064 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1065 		if (!insert_vlan) {
1066 			vlan_tag = adapter->pvid;
1067 			insert_vlan = true;
1068 		}
1069 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1070 		 * skip VLAN insertion
1071 		 */
1072 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1073 	}
1074 
1075 	if (insert_vlan) {
1076 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1077 						vlan_tag);
1078 		if (unlikely(!skb))
1079 			return skb;
1080 		__vlan_hwaccel_clear_tag(skb);
1081 	}
1082 
1083 	/* Insert the outer VLAN, if any */
1084 	if (adapter->qnq_vid) {
1085 		vlan_tag = adapter->qnq_vid;
1086 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1087 						vlan_tag);
1088 		if (unlikely(!skb))
1089 			return skb;
1090 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1091 	}
1092 
1093 	return skb;
1094 }
1095 
1096 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1097 {
1098 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1099 	u16 offset = ETH_HLEN;
1100 
1101 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1102 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1103 
1104 		offset += sizeof(struct ipv6hdr);
1105 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1106 		    ip6h->nexthdr != NEXTHDR_UDP) {
1107 			struct ipv6_opt_hdr *ehdr =
1108 				(struct ipv6_opt_hdr *)(skb->data + offset);
1109 
1110 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1111 			if (ehdr->hdrlen == 0xff)
1112 				return true;
1113 		}
1114 	}
1115 	return false;
1116 }
1117 
1118 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1121 }
1122 
1123 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1124 {
1125 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1126 }
1127 
1128 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1129 						  struct sk_buff *skb,
1130 						  struct be_wrb_params
1131 						  *wrb_params)
1132 {
1133 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1134 	unsigned int eth_hdr_len;
1135 	struct iphdr *ip;
1136 
1137 	/* For padded packets, BE HW modifies tot_len field in IP header
1138 	 * incorrecly when VLAN tag is inserted by HW.
1139 	 * For padded packets, Lancer computes incorrect checksum.
1140 	 */
1141 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1142 						VLAN_ETH_HLEN : ETH_HLEN;
1143 	if (skb->len <= 60 &&
1144 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1145 	    is_ipv4_pkt(skb)) {
1146 		ip = (struct iphdr *)ip_hdr(skb);
1147 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1148 	}
1149 
1150 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1151 	 * tagging in pvid-tagging mode
1152 	 */
1153 	if (be_pvid_tagging_enabled(adapter) &&
1154 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1155 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1156 
1157 	/* HW has a bug wherein it will calculate CSUM for VLAN
1158 	 * pkts even though it is disabled.
1159 	 * Manually insert VLAN in pkt.
1160 	 */
1161 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1162 	    skb_vlan_tag_present(skb)) {
1163 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1164 		if (unlikely(!skb))
1165 			goto err;
1166 	}
1167 
1168 	/* HW may lockup when VLAN HW tagging is requested on
1169 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1170 	 * skip HW tagging is not enabled by FW.
1171 	 */
1172 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1173 		     (adapter->pvid || adapter->qnq_vid) &&
1174 		     !qnq_async_evt_rcvd(adapter)))
1175 		goto tx_drop;
1176 
1177 	/* Manual VLAN tag insertion to prevent:
1178 	 * ASIC lockup when the ASIC inserts VLAN tag into
1179 	 * certain ipv6 packets. Insert VLAN tags in driver,
1180 	 * and set event, completion, vlan bits accordingly
1181 	 * in the Tx WRB.
1182 	 */
1183 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1184 	    be_vlan_tag_tx_chk(adapter, skb)) {
1185 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1186 		if (unlikely(!skb))
1187 			goto err;
1188 	}
1189 
1190 	return skb;
1191 tx_drop:
1192 	dev_kfree_skb_any(skb);
1193 err:
1194 	return NULL;
1195 }
1196 
1197 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1198 					   struct sk_buff *skb,
1199 					   struct be_wrb_params *wrb_params)
1200 {
1201 	int err;
1202 
1203 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1204 	 * packets that are 32b or less may cause a transmit stall
1205 	 * on that port. The workaround is to pad such packets
1206 	 * (len <= 32 bytes) to a minimum length of 36b.
1207 	 */
1208 	if (skb->len <= 32) {
1209 		if (skb_put_padto(skb, 36))
1210 			return NULL;
1211 	}
1212 
1213 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1214 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1215 		if (!skb)
1216 			return NULL;
1217 	}
1218 
1219 	/* The stack can send us skbs with length greater than
1220 	 * what the HW can handle. Trim the extra bytes.
1221 	 */
1222 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1223 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1224 	WARN_ON(err);
1225 
1226 	return skb;
1227 }
1228 
1229 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1230 {
1231 	struct be_queue_info *txq = &txo->q;
1232 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1233 
1234 	/* Mark the last request eventable if it hasn't been marked already */
1235 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1236 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1237 
1238 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1239 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1240 		wrb_fill_dummy(queue_head_node(txq));
1241 		queue_head_inc(txq);
1242 		atomic_inc(&txq->used);
1243 		txo->pend_wrb_cnt++;
1244 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1245 					   TX_HDR_WRB_NUM_SHIFT);
1246 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1247 					  TX_HDR_WRB_NUM_SHIFT);
1248 	}
1249 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1250 	txo->pend_wrb_cnt = 0;
1251 }
1252 
1253 /* OS2BMC related */
1254 
1255 #define DHCP_CLIENT_PORT	68
1256 #define DHCP_SERVER_PORT	67
1257 #define NET_BIOS_PORT1		137
1258 #define NET_BIOS_PORT2		138
1259 #define DHCPV6_RAS_PORT		547
1260 
1261 #define is_mc_allowed_on_bmc(adapter, eh)	\
1262 	(!is_multicast_filt_enabled(adapter) &&	\
1263 	 is_multicast_ether_addr(eh->h_dest) &&	\
1264 	 !is_broadcast_ether_addr(eh->h_dest))
1265 
1266 #define is_bc_allowed_on_bmc(adapter, eh)	\
1267 	(!is_broadcast_filt_enabled(adapter) &&	\
1268 	 is_broadcast_ether_addr(eh->h_dest))
1269 
1270 #define is_arp_allowed_on_bmc(adapter, skb)	\
1271 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1272 
1273 #define is_broadcast_packet(eh, adapter)	\
1274 		(is_multicast_ether_addr(eh->h_dest) && \
1275 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1276 
1277 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1278 
1279 #define is_arp_filt_enabled(adapter)	\
1280 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1281 
1282 #define is_dhcp_client_filt_enabled(adapter)	\
1283 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1284 
1285 #define is_dhcp_srvr_filt_enabled(adapter)	\
1286 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1287 
1288 #define is_nbios_filt_enabled(adapter)	\
1289 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1290 
1291 #define is_ipv6_na_filt_enabled(adapter)	\
1292 		(adapter->bmc_filt_mask &	\
1293 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1294 
1295 #define is_ipv6_ra_filt_enabled(adapter)	\
1296 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1297 
1298 #define is_ipv6_ras_filt_enabled(adapter)	\
1299 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1300 
1301 #define is_broadcast_filt_enabled(adapter)	\
1302 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1303 
1304 #define is_multicast_filt_enabled(adapter)	\
1305 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1306 
1307 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1308 			       struct sk_buff **skb)
1309 {
1310 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1311 	bool os2bmc = false;
1312 
1313 	if (!be_is_os2bmc_enabled(adapter))
1314 		goto done;
1315 
1316 	if (!is_multicast_ether_addr(eh->h_dest))
1317 		goto done;
1318 
1319 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1320 	    is_bc_allowed_on_bmc(adapter, eh) ||
1321 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1322 		os2bmc = true;
1323 		goto done;
1324 	}
1325 
1326 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1327 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1328 		u8 nexthdr = hdr->nexthdr;
1329 
1330 		if (nexthdr == IPPROTO_ICMPV6) {
1331 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1332 
1333 			switch (icmp6->icmp6_type) {
1334 			case NDISC_ROUTER_ADVERTISEMENT:
1335 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1336 				goto done;
1337 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1338 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1339 				goto done;
1340 			default:
1341 				break;
1342 			}
1343 		}
1344 	}
1345 
1346 	if (is_udp_pkt((*skb))) {
1347 		struct udphdr *udp = udp_hdr((*skb));
1348 
1349 		switch (ntohs(udp->dest)) {
1350 		case DHCP_CLIENT_PORT:
1351 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1352 			goto done;
1353 		case DHCP_SERVER_PORT:
1354 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1355 			goto done;
1356 		case NET_BIOS_PORT1:
1357 		case NET_BIOS_PORT2:
1358 			os2bmc = is_nbios_filt_enabled(adapter);
1359 			goto done;
1360 		case DHCPV6_RAS_PORT:
1361 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1362 			goto done;
1363 		default:
1364 			break;
1365 		}
1366 	}
1367 done:
1368 	/* For packets over a vlan, which are destined
1369 	 * to BMC, asic expects the vlan to be inline in the packet.
1370 	 */
1371 	if (os2bmc)
1372 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1373 
1374 	return os2bmc;
1375 }
1376 
1377 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1378 {
1379 	struct be_adapter *adapter = netdev_priv(netdev);
1380 	u16 q_idx = skb_get_queue_mapping(skb);
1381 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1382 	struct be_wrb_params wrb_params = { 0 };
1383 	bool flush = !skb->xmit_more;
1384 	u16 wrb_cnt;
1385 
1386 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1387 	if (unlikely(!skb))
1388 		goto drop;
1389 
1390 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1391 
1392 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393 	if (unlikely(!wrb_cnt)) {
1394 		dev_kfree_skb_any(skb);
1395 		goto drop;
1396 	}
1397 
1398 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1399 	 * enqueue the pkt a 2nd time with mgmt bit set.
1400 	 */
1401 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1402 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1403 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1404 		if (unlikely(!wrb_cnt))
1405 			goto drop;
1406 		else
1407 			skb_get(skb);
1408 	}
1409 
1410 	if (be_is_txq_full(txo)) {
1411 		netif_stop_subqueue(netdev, q_idx);
1412 		tx_stats(txo)->tx_stops++;
1413 	}
1414 
1415 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1416 		be_xmit_flush(adapter, txo);
1417 
1418 	return NETDEV_TX_OK;
1419 drop:
1420 	tx_stats(txo)->tx_drv_drops++;
1421 	/* Flush the already enqueued tx requests */
1422 	if (flush && txo->pend_wrb_cnt)
1423 		be_xmit_flush(adapter, txo);
1424 
1425 	return NETDEV_TX_OK;
1426 }
1427 
1428 static void be_tx_timeout(struct net_device *netdev)
1429 {
1430 	struct be_adapter *adapter = netdev_priv(netdev);
1431 	struct device *dev = &adapter->pdev->dev;
1432 	struct be_tx_obj *txo;
1433 	struct sk_buff *skb;
1434 	struct tcphdr *tcphdr;
1435 	struct udphdr *udphdr;
1436 	u32 *entry;
1437 	int status;
1438 	int i, j;
1439 
1440 	for_all_tx_queues(adapter, txo, i) {
1441 		dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1442 			 i, txo->q.head, txo->q.tail,
1443 			 atomic_read(&txo->q.used), txo->q.id);
1444 
1445 		entry = txo->q.dma_mem.va;
1446 		for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1447 			if (entry[j] != 0 || entry[j + 1] != 0 ||
1448 			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1449 				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1450 					 j, entry[j], entry[j + 1],
1451 					 entry[j + 2], entry[j + 3]);
1452 			}
1453 		}
1454 
1455 		entry = txo->cq.dma_mem.va;
1456 		dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1457 			 i, txo->cq.head, txo->cq.tail,
1458 			 atomic_read(&txo->cq.used));
1459 		for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1460 			if (entry[j] != 0 || entry[j + 1] != 0 ||
1461 			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1462 				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1463 					 j, entry[j], entry[j + 1],
1464 					 entry[j + 2], entry[j + 3]);
1465 			}
1466 		}
1467 
1468 		for (j = 0; j < TX_Q_LEN; j++) {
1469 			if (txo->sent_skb_list[j]) {
1470 				skb = txo->sent_skb_list[j];
1471 				if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1472 					tcphdr = tcp_hdr(skb);
1473 					dev_info(dev, "TCP source port %d\n",
1474 						 ntohs(tcphdr->source));
1475 					dev_info(dev, "TCP dest port %d\n",
1476 						 ntohs(tcphdr->dest));
1477 					dev_info(dev, "TCP sequence num %d\n",
1478 						 ntohs(tcphdr->seq));
1479 					dev_info(dev, "TCP ack_seq %d\n",
1480 						 ntohs(tcphdr->ack_seq));
1481 				} else if (ip_hdr(skb)->protocol ==
1482 					   IPPROTO_UDP) {
1483 					udphdr = udp_hdr(skb);
1484 					dev_info(dev, "UDP source port %d\n",
1485 						 ntohs(udphdr->source));
1486 					dev_info(dev, "UDP dest port %d\n",
1487 						 ntohs(udphdr->dest));
1488 				}
1489 				dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1490 					 j, skb, skb->len, skb->protocol);
1491 			}
1492 		}
1493 	}
1494 
1495 	if (lancer_chip(adapter)) {
1496 		dev_info(dev, "Initiating reset due to tx timeout\n");
1497 		dev_info(dev, "Resetting adapter\n");
1498 		status = lancer_physdev_ctrl(adapter,
1499 					     PHYSDEV_CONTROL_FW_RESET_MASK);
1500 		if (status)
1501 			dev_err(dev, "Reset failed .. Reboot server\n");
1502 	}
1503 }
1504 
1505 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1506 {
1507 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1508 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1509 }
1510 
1511 static int be_set_vlan_promisc(struct be_adapter *adapter)
1512 {
1513 	struct device *dev = &adapter->pdev->dev;
1514 	int status;
1515 
1516 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1517 		return 0;
1518 
1519 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1520 	if (!status) {
1521 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1522 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1523 	} else {
1524 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1525 	}
1526 	return status;
1527 }
1528 
1529 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1530 {
1531 	struct device *dev = &adapter->pdev->dev;
1532 	int status;
1533 
1534 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1535 	if (!status) {
1536 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1537 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1538 	}
1539 	return status;
1540 }
1541 
1542 /*
1543  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1544  * If the user configures more, place BE in vlan promiscuous mode.
1545  */
1546 static int be_vid_config(struct be_adapter *adapter)
1547 {
1548 	struct device *dev = &adapter->pdev->dev;
1549 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1550 	u16 num = 0, i = 0;
1551 	int status = 0;
1552 
1553 	/* No need to change the VLAN state if the I/F is in promiscuous */
1554 	if (adapter->netdev->flags & IFF_PROMISC)
1555 		return 0;
1556 
1557 	if (adapter->vlans_added > be_max_vlans(adapter))
1558 		return be_set_vlan_promisc(adapter);
1559 
1560 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1561 		status = be_clear_vlan_promisc(adapter);
1562 		if (status)
1563 			return status;
1564 	}
1565 	/* Construct VLAN Table to give to HW */
1566 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1567 		vids[num++] = cpu_to_le16(i);
1568 
1569 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1570 	if (status) {
1571 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1572 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1573 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1574 		    addl_status(status) ==
1575 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1576 			return be_set_vlan_promisc(adapter);
1577 	}
1578 	return status;
1579 }
1580 
1581 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1582 {
1583 	struct be_adapter *adapter = netdev_priv(netdev);
1584 	int status = 0;
1585 
1586 	mutex_lock(&adapter->rx_filter_lock);
1587 
1588 	/* Packets with VID 0 are always received by Lancer by default */
1589 	if (lancer_chip(adapter) && vid == 0)
1590 		goto done;
1591 
1592 	if (test_bit(vid, adapter->vids))
1593 		goto done;
1594 
1595 	set_bit(vid, adapter->vids);
1596 	adapter->vlans_added++;
1597 
1598 	status = be_vid_config(adapter);
1599 done:
1600 	mutex_unlock(&adapter->rx_filter_lock);
1601 	return status;
1602 }
1603 
1604 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1605 {
1606 	struct be_adapter *adapter = netdev_priv(netdev);
1607 	int status = 0;
1608 
1609 	mutex_lock(&adapter->rx_filter_lock);
1610 
1611 	/* Packets with VID 0 are always received by Lancer by default */
1612 	if (lancer_chip(adapter) && vid == 0)
1613 		goto done;
1614 
1615 	if (!test_bit(vid, adapter->vids))
1616 		goto done;
1617 
1618 	clear_bit(vid, adapter->vids);
1619 	adapter->vlans_added--;
1620 
1621 	status = be_vid_config(adapter);
1622 done:
1623 	mutex_unlock(&adapter->rx_filter_lock);
1624 	return status;
1625 }
1626 
1627 static void be_set_all_promisc(struct be_adapter *adapter)
1628 {
1629 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1630 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1631 }
1632 
1633 static void be_set_mc_promisc(struct be_adapter *adapter)
1634 {
1635 	int status;
1636 
1637 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1638 		return;
1639 
1640 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1641 	if (!status)
1642 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1643 }
1644 
1645 static void be_set_uc_promisc(struct be_adapter *adapter)
1646 {
1647 	int status;
1648 
1649 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1650 		return;
1651 
1652 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1653 	if (!status)
1654 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1655 }
1656 
1657 static void be_clear_uc_promisc(struct be_adapter *adapter)
1658 {
1659 	int status;
1660 
1661 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1662 		return;
1663 
1664 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1665 	if (!status)
1666 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1667 }
1668 
1669 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1670  * We use a single callback function for both sync and unsync. We really don't
1671  * add/remove addresses through this callback. But, we use it to detect changes
1672  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1673  */
1674 static int be_uc_list_update(struct net_device *netdev,
1675 			     const unsigned char *addr)
1676 {
1677 	struct be_adapter *adapter = netdev_priv(netdev);
1678 
1679 	adapter->update_uc_list = true;
1680 	return 0;
1681 }
1682 
1683 static int be_mc_list_update(struct net_device *netdev,
1684 			     const unsigned char *addr)
1685 {
1686 	struct be_adapter *adapter = netdev_priv(netdev);
1687 
1688 	adapter->update_mc_list = true;
1689 	return 0;
1690 }
1691 
1692 static void be_set_mc_list(struct be_adapter *adapter)
1693 {
1694 	struct net_device *netdev = adapter->netdev;
1695 	struct netdev_hw_addr *ha;
1696 	bool mc_promisc = false;
1697 	int status;
1698 
1699 	netif_addr_lock_bh(netdev);
1700 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1701 
1702 	if (netdev->flags & IFF_PROMISC) {
1703 		adapter->update_mc_list = false;
1704 	} else if (netdev->flags & IFF_ALLMULTI ||
1705 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1706 		/* Enable multicast promisc if num configured exceeds
1707 		 * what we support
1708 		 */
1709 		mc_promisc = true;
1710 		adapter->update_mc_list = false;
1711 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1712 		/* Update mc-list unconditionally if the iface was previously
1713 		 * in mc-promisc mode and now is out of that mode.
1714 		 */
1715 		adapter->update_mc_list = true;
1716 	}
1717 
1718 	if (adapter->update_mc_list) {
1719 		int i = 0;
1720 
1721 		/* cache the mc-list in adapter */
1722 		netdev_for_each_mc_addr(ha, netdev) {
1723 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1724 			i++;
1725 		}
1726 		adapter->mc_count = netdev_mc_count(netdev);
1727 	}
1728 	netif_addr_unlock_bh(netdev);
1729 
1730 	if (mc_promisc) {
1731 		be_set_mc_promisc(adapter);
1732 	} else if (adapter->update_mc_list) {
1733 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1734 		if (!status)
1735 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1736 		else
1737 			be_set_mc_promisc(adapter);
1738 
1739 		adapter->update_mc_list = false;
1740 	}
1741 }
1742 
1743 static void be_clear_mc_list(struct be_adapter *adapter)
1744 {
1745 	struct net_device *netdev = adapter->netdev;
1746 
1747 	__dev_mc_unsync(netdev, NULL);
1748 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1749 	adapter->mc_count = 0;
1750 }
1751 
1752 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1753 {
1754 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1755 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1756 		return 0;
1757 	}
1758 
1759 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1760 			       adapter->if_handle,
1761 			       &adapter->pmac_id[uc_idx + 1], 0);
1762 }
1763 
1764 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1765 {
1766 	if (pmac_id == adapter->pmac_id[0])
1767 		return;
1768 
1769 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1770 }
1771 
1772 static void be_set_uc_list(struct be_adapter *adapter)
1773 {
1774 	struct net_device *netdev = adapter->netdev;
1775 	struct netdev_hw_addr *ha;
1776 	bool uc_promisc = false;
1777 	int curr_uc_macs = 0, i;
1778 
1779 	netif_addr_lock_bh(netdev);
1780 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1781 
1782 	if (netdev->flags & IFF_PROMISC) {
1783 		adapter->update_uc_list = false;
1784 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1785 		uc_promisc = true;
1786 		adapter->update_uc_list = false;
1787 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1788 		/* Update uc-list unconditionally if the iface was previously
1789 		 * in uc-promisc mode and now is out of that mode.
1790 		 */
1791 		adapter->update_uc_list = true;
1792 	}
1793 
1794 	if (adapter->update_uc_list) {
1795 		/* cache the uc-list in adapter array */
1796 		i = 0;
1797 		netdev_for_each_uc_addr(ha, netdev) {
1798 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1799 			i++;
1800 		}
1801 		curr_uc_macs = netdev_uc_count(netdev);
1802 	}
1803 	netif_addr_unlock_bh(netdev);
1804 
1805 	if (uc_promisc) {
1806 		be_set_uc_promisc(adapter);
1807 	} else if (adapter->update_uc_list) {
1808 		be_clear_uc_promisc(adapter);
1809 
1810 		for (i = 0; i < adapter->uc_macs; i++)
1811 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1812 
1813 		for (i = 0; i < curr_uc_macs; i++)
1814 			be_uc_mac_add(adapter, i);
1815 		adapter->uc_macs = curr_uc_macs;
1816 		adapter->update_uc_list = false;
1817 	}
1818 }
1819 
1820 static void be_clear_uc_list(struct be_adapter *adapter)
1821 {
1822 	struct net_device *netdev = adapter->netdev;
1823 	int i;
1824 
1825 	__dev_uc_unsync(netdev, NULL);
1826 	for (i = 0; i < adapter->uc_macs; i++)
1827 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1828 
1829 	adapter->uc_macs = 0;
1830 }
1831 
1832 static void __be_set_rx_mode(struct be_adapter *adapter)
1833 {
1834 	struct net_device *netdev = adapter->netdev;
1835 
1836 	mutex_lock(&adapter->rx_filter_lock);
1837 
1838 	if (netdev->flags & IFF_PROMISC) {
1839 		if (!be_in_all_promisc(adapter))
1840 			be_set_all_promisc(adapter);
1841 	} else if (be_in_all_promisc(adapter)) {
1842 		/* We need to re-program the vlan-list or clear
1843 		 * vlan-promisc mode (if needed) when the interface
1844 		 * comes out of promisc mode.
1845 		 */
1846 		be_vid_config(adapter);
1847 	}
1848 
1849 	be_set_uc_list(adapter);
1850 	be_set_mc_list(adapter);
1851 
1852 	mutex_unlock(&adapter->rx_filter_lock);
1853 }
1854 
1855 static void be_work_set_rx_mode(struct work_struct *work)
1856 {
1857 	struct be_cmd_work *cmd_work =
1858 				container_of(work, struct be_cmd_work, work);
1859 
1860 	__be_set_rx_mode(cmd_work->adapter);
1861 	kfree(cmd_work);
1862 }
1863 
1864 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1865 {
1866 	struct be_adapter *adapter = netdev_priv(netdev);
1867 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1868 	int status;
1869 
1870 	if (!sriov_enabled(adapter))
1871 		return -EPERM;
1872 
1873 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1874 		return -EINVAL;
1875 
1876 	/* Proceed further only if user provided MAC is different
1877 	 * from active MAC
1878 	 */
1879 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1880 		return 0;
1881 
1882 	if (BEx_chip(adapter)) {
1883 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1884 				vf + 1);
1885 
1886 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1887 					 &vf_cfg->pmac_id, vf + 1);
1888 	} else {
1889 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1890 					vf + 1);
1891 	}
1892 
1893 	if (status) {
1894 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1895 			mac, vf, status);
1896 		return be_cmd_status(status);
1897 	}
1898 
1899 	ether_addr_copy(vf_cfg->mac_addr, mac);
1900 
1901 	return 0;
1902 }
1903 
1904 static int be_get_vf_config(struct net_device *netdev, int vf,
1905 			    struct ifla_vf_info *vi)
1906 {
1907 	struct be_adapter *adapter = netdev_priv(netdev);
1908 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1909 
1910 	if (!sriov_enabled(adapter))
1911 		return -EPERM;
1912 
1913 	if (vf >= adapter->num_vfs)
1914 		return -EINVAL;
1915 
1916 	vi->vf = vf;
1917 	vi->max_tx_rate = vf_cfg->tx_rate;
1918 	vi->min_tx_rate = 0;
1919 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1920 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1921 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1922 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1923 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1924 
1925 	return 0;
1926 }
1927 
1928 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1929 {
1930 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1931 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1932 	int vf_if_id = vf_cfg->if_handle;
1933 	int status;
1934 
1935 	/* Enable Transparent VLAN Tagging */
1936 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1937 	if (status)
1938 		return status;
1939 
1940 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1941 	vids[0] = 0;
1942 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1943 	if (!status)
1944 		dev_info(&adapter->pdev->dev,
1945 			 "Cleared guest VLANs on VF%d", vf);
1946 
1947 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1948 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1949 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1950 						  ~BE_PRIV_FILTMGMT, vf + 1);
1951 		if (!status)
1952 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1953 	}
1954 	return 0;
1955 }
1956 
1957 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1958 {
1959 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1960 	struct device *dev = &adapter->pdev->dev;
1961 	int status;
1962 
1963 	/* Reset Transparent VLAN Tagging. */
1964 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1965 				       vf_cfg->if_handle, 0, 0);
1966 	if (status)
1967 		return status;
1968 
1969 	/* Allow VFs to program VLAN filtering */
1970 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1971 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1972 						  BE_PRIV_FILTMGMT, vf + 1);
1973 		if (!status) {
1974 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1975 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1976 		}
1977 	}
1978 
1979 	dev_info(dev,
1980 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1981 	return 0;
1982 }
1983 
1984 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1985 			  __be16 vlan_proto)
1986 {
1987 	struct be_adapter *adapter = netdev_priv(netdev);
1988 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1989 	int status;
1990 
1991 	if (!sriov_enabled(adapter))
1992 		return -EPERM;
1993 
1994 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1995 		return -EINVAL;
1996 
1997 	if (vlan_proto != htons(ETH_P_8021Q))
1998 		return -EPROTONOSUPPORT;
1999 
2000 	if (vlan || qos) {
2001 		vlan |= qos << VLAN_PRIO_SHIFT;
2002 		status = be_set_vf_tvt(adapter, vf, vlan);
2003 	} else {
2004 		status = be_clear_vf_tvt(adapter, vf);
2005 	}
2006 
2007 	if (status) {
2008 		dev_err(&adapter->pdev->dev,
2009 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2010 			status);
2011 		return be_cmd_status(status);
2012 	}
2013 
2014 	vf_cfg->vlan_tag = vlan;
2015 	return 0;
2016 }
2017 
2018 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2019 			     int min_tx_rate, int max_tx_rate)
2020 {
2021 	struct be_adapter *adapter = netdev_priv(netdev);
2022 	struct device *dev = &adapter->pdev->dev;
2023 	int percent_rate, status = 0;
2024 	u16 link_speed = 0;
2025 	u8 link_status;
2026 
2027 	if (!sriov_enabled(adapter))
2028 		return -EPERM;
2029 
2030 	if (vf >= adapter->num_vfs)
2031 		return -EINVAL;
2032 
2033 	if (min_tx_rate)
2034 		return -EINVAL;
2035 
2036 	if (!max_tx_rate)
2037 		goto config_qos;
2038 
2039 	status = be_cmd_link_status_query(adapter, &link_speed,
2040 					  &link_status, 0);
2041 	if (status)
2042 		goto err;
2043 
2044 	if (!link_status) {
2045 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
2046 		status = -ENETDOWN;
2047 		goto err;
2048 	}
2049 
2050 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2051 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2052 			link_speed);
2053 		status = -EINVAL;
2054 		goto err;
2055 	}
2056 
2057 	/* On Skyhawk the QOS setting must be done only as a % value */
2058 	percent_rate = link_speed / 100;
2059 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2060 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2061 			percent_rate);
2062 		status = -EINVAL;
2063 		goto err;
2064 	}
2065 
2066 config_qos:
2067 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2068 	if (status)
2069 		goto err;
2070 
2071 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2072 	return 0;
2073 
2074 err:
2075 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2076 		max_tx_rate, vf);
2077 	return be_cmd_status(status);
2078 }
2079 
2080 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2081 				int link_state)
2082 {
2083 	struct be_adapter *adapter = netdev_priv(netdev);
2084 	int status;
2085 
2086 	if (!sriov_enabled(adapter))
2087 		return -EPERM;
2088 
2089 	if (vf >= adapter->num_vfs)
2090 		return -EINVAL;
2091 
2092 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2093 	if (status) {
2094 		dev_err(&adapter->pdev->dev,
2095 			"Link state change on VF %d failed: %#x\n", vf, status);
2096 		return be_cmd_status(status);
2097 	}
2098 
2099 	adapter->vf_cfg[vf].plink_tracking = link_state;
2100 
2101 	return 0;
2102 }
2103 
2104 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2105 {
2106 	struct be_adapter *adapter = netdev_priv(netdev);
2107 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2108 	u8 spoofchk;
2109 	int status;
2110 
2111 	if (!sriov_enabled(adapter))
2112 		return -EPERM;
2113 
2114 	if (vf >= adapter->num_vfs)
2115 		return -EINVAL;
2116 
2117 	if (BEx_chip(adapter))
2118 		return -EOPNOTSUPP;
2119 
2120 	if (enable == vf_cfg->spoofchk)
2121 		return 0;
2122 
2123 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2124 
2125 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2126 				       0, spoofchk);
2127 	if (status) {
2128 		dev_err(&adapter->pdev->dev,
2129 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2130 		return be_cmd_status(status);
2131 	}
2132 
2133 	vf_cfg->spoofchk = enable;
2134 	return 0;
2135 }
2136 
2137 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2138 			  ulong now)
2139 {
2140 	aic->rx_pkts_prev = rx_pkts;
2141 	aic->tx_reqs_prev = tx_pkts;
2142 	aic->jiffies = now;
2143 }
2144 
2145 static int be_get_new_eqd(struct be_eq_obj *eqo)
2146 {
2147 	struct be_adapter *adapter = eqo->adapter;
2148 	int eqd, start;
2149 	struct be_aic_obj *aic;
2150 	struct be_rx_obj *rxo;
2151 	struct be_tx_obj *txo;
2152 	u64 rx_pkts = 0, tx_pkts = 0;
2153 	ulong now;
2154 	u32 pps, delta;
2155 	int i;
2156 
2157 	aic = &adapter->aic_obj[eqo->idx];
2158 	if (!aic->enable) {
2159 		if (aic->jiffies)
2160 			aic->jiffies = 0;
2161 		eqd = aic->et_eqd;
2162 		return eqd;
2163 	}
2164 
2165 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2166 		do {
2167 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2168 			rx_pkts += rxo->stats.rx_pkts;
2169 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2170 	}
2171 
2172 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2173 		do {
2174 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2175 			tx_pkts += txo->stats.tx_reqs;
2176 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2177 	}
2178 
2179 	/* Skip, if wrapped around or first calculation */
2180 	now = jiffies;
2181 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2182 	    rx_pkts < aic->rx_pkts_prev ||
2183 	    tx_pkts < aic->tx_reqs_prev) {
2184 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2185 		return aic->prev_eqd;
2186 	}
2187 
2188 	delta = jiffies_to_msecs(now - aic->jiffies);
2189 	if (delta == 0)
2190 		return aic->prev_eqd;
2191 
2192 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2193 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2194 	eqd = (pps / 15000) << 2;
2195 
2196 	if (eqd < 8)
2197 		eqd = 0;
2198 	eqd = min_t(u32, eqd, aic->max_eqd);
2199 	eqd = max_t(u32, eqd, aic->min_eqd);
2200 
2201 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2202 
2203 	return eqd;
2204 }
2205 
2206 /* For Skyhawk-R only */
2207 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2208 {
2209 	struct be_adapter *adapter = eqo->adapter;
2210 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2211 	ulong now = jiffies;
2212 	int eqd;
2213 	u32 mult_enc;
2214 
2215 	if (!aic->enable)
2216 		return 0;
2217 
2218 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2219 		eqd = aic->prev_eqd;
2220 	else
2221 		eqd = be_get_new_eqd(eqo);
2222 
2223 	if (eqd > 100)
2224 		mult_enc = R2I_DLY_ENC_1;
2225 	else if (eqd > 60)
2226 		mult_enc = R2I_DLY_ENC_2;
2227 	else if (eqd > 20)
2228 		mult_enc = R2I_DLY_ENC_3;
2229 	else
2230 		mult_enc = R2I_DLY_ENC_0;
2231 
2232 	aic->prev_eqd = eqd;
2233 
2234 	return mult_enc;
2235 }
2236 
2237 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2238 {
2239 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2240 	struct be_aic_obj *aic;
2241 	struct be_eq_obj *eqo;
2242 	int i, num = 0, eqd;
2243 
2244 	for_all_evt_queues(adapter, eqo, i) {
2245 		aic = &adapter->aic_obj[eqo->idx];
2246 		eqd = be_get_new_eqd(eqo);
2247 		if (force_update || eqd != aic->prev_eqd) {
2248 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2249 			set_eqd[num].eq_id = eqo->q.id;
2250 			aic->prev_eqd = eqd;
2251 			num++;
2252 		}
2253 	}
2254 
2255 	if (num)
2256 		be_cmd_modify_eqd(adapter, set_eqd, num);
2257 }
2258 
2259 static void be_rx_stats_update(struct be_rx_obj *rxo,
2260 			       struct be_rx_compl_info *rxcp)
2261 {
2262 	struct be_rx_stats *stats = rx_stats(rxo);
2263 
2264 	u64_stats_update_begin(&stats->sync);
2265 	stats->rx_compl++;
2266 	stats->rx_bytes += rxcp->pkt_size;
2267 	stats->rx_pkts++;
2268 	if (rxcp->tunneled)
2269 		stats->rx_vxlan_offload_pkts++;
2270 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2271 		stats->rx_mcast_pkts++;
2272 	if (rxcp->err)
2273 		stats->rx_compl_err++;
2274 	u64_stats_update_end(&stats->sync);
2275 }
2276 
2277 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2278 {
2279 	/* L4 checksum is not reliable for non TCP/UDP packets.
2280 	 * Also ignore ipcksm for ipv6 pkts
2281 	 */
2282 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2283 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2284 }
2285 
2286 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2287 {
2288 	struct be_adapter *adapter = rxo->adapter;
2289 	struct be_rx_page_info *rx_page_info;
2290 	struct be_queue_info *rxq = &rxo->q;
2291 	u32 frag_idx = rxq->tail;
2292 
2293 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2294 	BUG_ON(!rx_page_info->page);
2295 
2296 	if (rx_page_info->last_frag) {
2297 		dma_unmap_page(&adapter->pdev->dev,
2298 			       dma_unmap_addr(rx_page_info, bus),
2299 			       adapter->big_page_size, DMA_FROM_DEVICE);
2300 		rx_page_info->last_frag = false;
2301 	} else {
2302 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2303 					dma_unmap_addr(rx_page_info, bus),
2304 					rx_frag_size, DMA_FROM_DEVICE);
2305 	}
2306 
2307 	queue_tail_inc(rxq);
2308 	atomic_dec(&rxq->used);
2309 	return rx_page_info;
2310 }
2311 
2312 /* Throwaway the data in the Rx completion */
2313 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2314 				struct be_rx_compl_info *rxcp)
2315 {
2316 	struct be_rx_page_info *page_info;
2317 	u16 i, num_rcvd = rxcp->num_rcvd;
2318 
2319 	for (i = 0; i < num_rcvd; i++) {
2320 		page_info = get_rx_page_info(rxo);
2321 		put_page(page_info->page);
2322 		memset(page_info, 0, sizeof(*page_info));
2323 	}
2324 }
2325 
2326 /*
2327  * skb_fill_rx_data forms a complete skb for an ether frame
2328  * indicated by rxcp.
2329  */
2330 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2331 			     struct be_rx_compl_info *rxcp)
2332 {
2333 	struct be_rx_page_info *page_info;
2334 	u16 i, j;
2335 	u16 hdr_len, curr_frag_len, remaining;
2336 	u8 *start;
2337 
2338 	page_info = get_rx_page_info(rxo);
2339 	start = page_address(page_info->page) + page_info->page_offset;
2340 	prefetch(start);
2341 
2342 	/* Copy data in the first descriptor of this completion */
2343 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2344 
2345 	skb->len = curr_frag_len;
2346 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2347 		memcpy(skb->data, start, curr_frag_len);
2348 		/* Complete packet has now been moved to data */
2349 		put_page(page_info->page);
2350 		skb->data_len = 0;
2351 		skb->tail += curr_frag_len;
2352 	} else {
2353 		hdr_len = ETH_HLEN;
2354 		memcpy(skb->data, start, hdr_len);
2355 		skb_shinfo(skb)->nr_frags = 1;
2356 		skb_frag_set_page(skb, 0, page_info->page);
2357 		skb_shinfo(skb)->frags[0].page_offset =
2358 					page_info->page_offset + hdr_len;
2359 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2360 				  curr_frag_len - hdr_len);
2361 		skb->data_len = curr_frag_len - hdr_len;
2362 		skb->truesize += rx_frag_size;
2363 		skb->tail += hdr_len;
2364 	}
2365 	page_info->page = NULL;
2366 
2367 	if (rxcp->pkt_size <= rx_frag_size) {
2368 		BUG_ON(rxcp->num_rcvd != 1);
2369 		return;
2370 	}
2371 
2372 	/* More frags present for this completion */
2373 	remaining = rxcp->pkt_size - curr_frag_len;
2374 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2375 		page_info = get_rx_page_info(rxo);
2376 		curr_frag_len = min(remaining, rx_frag_size);
2377 
2378 		/* Coalesce all frags from the same physical page in one slot */
2379 		if (page_info->page_offset == 0) {
2380 			/* Fresh page */
2381 			j++;
2382 			skb_frag_set_page(skb, j, page_info->page);
2383 			skb_shinfo(skb)->frags[j].page_offset =
2384 							page_info->page_offset;
2385 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2386 			skb_shinfo(skb)->nr_frags++;
2387 		} else {
2388 			put_page(page_info->page);
2389 		}
2390 
2391 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2392 		skb->len += curr_frag_len;
2393 		skb->data_len += curr_frag_len;
2394 		skb->truesize += rx_frag_size;
2395 		remaining -= curr_frag_len;
2396 		page_info->page = NULL;
2397 	}
2398 	BUG_ON(j > MAX_SKB_FRAGS);
2399 }
2400 
2401 /* Process the RX completion indicated by rxcp when GRO is disabled */
2402 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2403 				struct be_rx_compl_info *rxcp)
2404 {
2405 	struct be_adapter *adapter = rxo->adapter;
2406 	struct net_device *netdev = adapter->netdev;
2407 	struct sk_buff *skb;
2408 
2409 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2410 	if (unlikely(!skb)) {
2411 		rx_stats(rxo)->rx_drops_no_skbs++;
2412 		be_rx_compl_discard(rxo, rxcp);
2413 		return;
2414 	}
2415 
2416 	skb_fill_rx_data(rxo, skb, rxcp);
2417 
2418 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2419 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2420 	else
2421 		skb_checksum_none_assert(skb);
2422 
2423 	skb->protocol = eth_type_trans(skb, netdev);
2424 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2425 	if (netdev->features & NETIF_F_RXHASH)
2426 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2427 
2428 	skb->csum_level = rxcp->tunneled;
2429 	skb_mark_napi_id(skb, napi);
2430 
2431 	if (rxcp->vlanf)
2432 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2433 
2434 	netif_receive_skb(skb);
2435 }
2436 
2437 /* Process the RX completion indicated by rxcp when GRO is enabled */
2438 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2439 				    struct napi_struct *napi,
2440 				    struct be_rx_compl_info *rxcp)
2441 {
2442 	struct be_adapter *adapter = rxo->adapter;
2443 	struct be_rx_page_info *page_info;
2444 	struct sk_buff *skb = NULL;
2445 	u16 remaining, curr_frag_len;
2446 	u16 i, j;
2447 
2448 	skb = napi_get_frags(napi);
2449 	if (!skb) {
2450 		be_rx_compl_discard(rxo, rxcp);
2451 		return;
2452 	}
2453 
2454 	remaining = rxcp->pkt_size;
2455 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2456 		page_info = get_rx_page_info(rxo);
2457 
2458 		curr_frag_len = min(remaining, rx_frag_size);
2459 
2460 		/* Coalesce all frags from the same physical page in one slot */
2461 		if (i == 0 || page_info->page_offset == 0) {
2462 			/* First frag or Fresh page */
2463 			j++;
2464 			skb_frag_set_page(skb, j, page_info->page);
2465 			skb_shinfo(skb)->frags[j].page_offset =
2466 							page_info->page_offset;
2467 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2468 		} else {
2469 			put_page(page_info->page);
2470 		}
2471 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2472 		skb->truesize += rx_frag_size;
2473 		remaining -= curr_frag_len;
2474 		memset(page_info, 0, sizeof(*page_info));
2475 	}
2476 	BUG_ON(j > MAX_SKB_FRAGS);
2477 
2478 	skb_shinfo(skb)->nr_frags = j + 1;
2479 	skb->len = rxcp->pkt_size;
2480 	skb->data_len = rxcp->pkt_size;
2481 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2482 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2483 	if (adapter->netdev->features & NETIF_F_RXHASH)
2484 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2485 
2486 	skb->csum_level = rxcp->tunneled;
2487 
2488 	if (rxcp->vlanf)
2489 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2490 
2491 	napi_gro_frags(napi);
2492 }
2493 
2494 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2495 				 struct be_rx_compl_info *rxcp)
2496 {
2497 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2498 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2499 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2500 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2501 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2502 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2503 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2504 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2505 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2506 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2507 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2508 	if (rxcp->vlanf) {
2509 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2510 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2511 	}
2512 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2513 	rxcp->tunneled =
2514 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2515 }
2516 
2517 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2518 				 struct be_rx_compl_info *rxcp)
2519 {
2520 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2521 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2522 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2523 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2524 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2525 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2526 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2527 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2528 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2529 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2530 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2531 	if (rxcp->vlanf) {
2532 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2533 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2534 	}
2535 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2536 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2537 }
2538 
2539 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2540 {
2541 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2542 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2543 	struct be_adapter *adapter = rxo->adapter;
2544 
2545 	/* For checking the valid bit it is Ok to use either definition as the
2546 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2547 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2548 		return NULL;
2549 
2550 	rmb();
2551 	be_dws_le_to_cpu(compl, sizeof(*compl));
2552 
2553 	if (adapter->be3_native)
2554 		be_parse_rx_compl_v1(compl, rxcp);
2555 	else
2556 		be_parse_rx_compl_v0(compl, rxcp);
2557 
2558 	if (rxcp->ip_frag)
2559 		rxcp->l4_csum = 0;
2560 
2561 	if (rxcp->vlanf) {
2562 		/* In QNQ modes, if qnq bit is not set, then the packet was
2563 		 * tagged only with the transparent outer vlan-tag and must
2564 		 * not be treated as a vlan packet by host
2565 		 */
2566 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2567 			rxcp->vlanf = 0;
2568 
2569 		if (!lancer_chip(adapter))
2570 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2571 
2572 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2573 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2574 			rxcp->vlanf = 0;
2575 	}
2576 
2577 	/* As the compl has been parsed, reset it; we wont touch it again */
2578 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2579 
2580 	queue_tail_inc(&rxo->cq);
2581 	return rxcp;
2582 }
2583 
2584 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2585 {
2586 	u32 order = get_order(size);
2587 
2588 	if (order > 0)
2589 		gfp |= __GFP_COMP;
2590 	return  alloc_pages(gfp, order);
2591 }
2592 
2593 /*
2594  * Allocate a page, split it to fragments of size rx_frag_size and post as
2595  * receive buffers to BE
2596  */
2597 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2598 {
2599 	struct be_adapter *adapter = rxo->adapter;
2600 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2601 	struct be_queue_info *rxq = &rxo->q;
2602 	struct page *pagep = NULL;
2603 	struct device *dev = &adapter->pdev->dev;
2604 	struct be_eth_rx_d *rxd;
2605 	u64 page_dmaaddr = 0, frag_dmaaddr;
2606 	u32 posted, page_offset = 0, notify = 0;
2607 
2608 	page_info = &rxo->page_info_tbl[rxq->head];
2609 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2610 		if (!pagep) {
2611 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2612 			if (unlikely(!pagep)) {
2613 				rx_stats(rxo)->rx_post_fail++;
2614 				break;
2615 			}
2616 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2617 						    adapter->big_page_size,
2618 						    DMA_FROM_DEVICE);
2619 			if (dma_mapping_error(dev, page_dmaaddr)) {
2620 				put_page(pagep);
2621 				pagep = NULL;
2622 				adapter->drv_stats.dma_map_errors++;
2623 				break;
2624 			}
2625 			page_offset = 0;
2626 		} else {
2627 			get_page(pagep);
2628 			page_offset += rx_frag_size;
2629 		}
2630 		page_info->page_offset = page_offset;
2631 		page_info->page = pagep;
2632 
2633 		rxd = queue_head_node(rxq);
2634 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2635 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2636 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2637 
2638 		/* Any space left in the current big page for another frag? */
2639 		if ((page_offset + rx_frag_size + rx_frag_size) >
2640 					adapter->big_page_size) {
2641 			pagep = NULL;
2642 			page_info->last_frag = true;
2643 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2644 		} else {
2645 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2646 		}
2647 
2648 		prev_page_info = page_info;
2649 		queue_head_inc(rxq);
2650 		page_info = &rxo->page_info_tbl[rxq->head];
2651 	}
2652 
2653 	/* Mark the last frag of a page when we break out of the above loop
2654 	 * with no more slots available in the RXQ
2655 	 */
2656 	if (pagep) {
2657 		prev_page_info->last_frag = true;
2658 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2659 	}
2660 
2661 	if (posted) {
2662 		atomic_add(posted, &rxq->used);
2663 		if (rxo->rx_post_starved)
2664 			rxo->rx_post_starved = false;
2665 		do {
2666 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2667 			be_rxq_notify(adapter, rxq->id, notify);
2668 			posted -= notify;
2669 		} while (posted);
2670 	} else if (atomic_read(&rxq->used) == 0) {
2671 		/* Let be_worker replenish when memory is available */
2672 		rxo->rx_post_starved = true;
2673 	}
2674 }
2675 
2676 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2677 {
2678 	switch (status) {
2679 	case BE_TX_COMP_HDR_PARSE_ERR:
2680 		tx_stats(txo)->tx_hdr_parse_err++;
2681 		break;
2682 	case BE_TX_COMP_NDMA_ERR:
2683 		tx_stats(txo)->tx_dma_err++;
2684 		break;
2685 	case BE_TX_COMP_ACL_ERR:
2686 		tx_stats(txo)->tx_spoof_check_err++;
2687 		break;
2688 	}
2689 }
2690 
2691 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2692 {
2693 	switch (status) {
2694 	case LANCER_TX_COMP_LSO_ERR:
2695 		tx_stats(txo)->tx_tso_err++;
2696 		break;
2697 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2698 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2699 		tx_stats(txo)->tx_spoof_check_err++;
2700 		break;
2701 	case LANCER_TX_COMP_QINQ_ERR:
2702 		tx_stats(txo)->tx_qinq_err++;
2703 		break;
2704 	case LANCER_TX_COMP_PARITY_ERR:
2705 		tx_stats(txo)->tx_internal_parity_err++;
2706 		break;
2707 	case LANCER_TX_COMP_DMA_ERR:
2708 		tx_stats(txo)->tx_dma_err++;
2709 		break;
2710 	case LANCER_TX_COMP_SGE_ERR:
2711 		tx_stats(txo)->tx_sge_err++;
2712 		break;
2713 	}
2714 }
2715 
2716 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2717 						struct be_tx_obj *txo)
2718 {
2719 	struct be_queue_info *tx_cq = &txo->cq;
2720 	struct be_tx_compl_info *txcp = &txo->txcp;
2721 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2722 
2723 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2724 		return NULL;
2725 
2726 	/* Ensure load ordering of valid bit dword and other dwords below */
2727 	rmb();
2728 	be_dws_le_to_cpu(compl, sizeof(*compl));
2729 
2730 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2731 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2732 
2733 	if (txcp->status) {
2734 		if (lancer_chip(adapter)) {
2735 			lancer_update_tx_err(txo, txcp->status);
2736 			/* Reset the adapter incase of TSO,
2737 			 * SGE or Parity error
2738 			 */
2739 			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2740 			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2741 			    txcp->status == LANCER_TX_COMP_SGE_ERR)
2742 				be_set_error(adapter, BE_ERROR_TX);
2743 		} else {
2744 			be_update_tx_err(txo, txcp->status);
2745 		}
2746 	}
2747 
2748 	if (be_check_error(adapter, BE_ERROR_TX))
2749 		return NULL;
2750 
2751 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2752 	queue_tail_inc(tx_cq);
2753 	return txcp;
2754 }
2755 
2756 static u16 be_tx_compl_process(struct be_adapter *adapter,
2757 			       struct be_tx_obj *txo, u16 last_index)
2758 {
2759 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2760 	struct be_queue_info *txq = &txo->q;
2761 	struct sk_buff *skb = NULL;
2762 	bool unmap_skb_hdr = false;
2763 	struct be_eth_wrb *wrb;
2764 	u16 num_wrbs = 0;
2765 	u32 frag_index;
2766 
2767 	do {
2768 		if (sent_skbs[txq->tail]) {
2769 			/* Free skb from prev req */
2770 			if (skb)
2771 				dev_consume_skb_any(skb);
2772 			skb = sent_skbs[txq->tail];
2773 			sent_skbs[txq->tail] = NULL;
2774 			queue_tail_inc(txq);  /* skip hdr wrb */
2775 			num_wrbs++;
2776 			unmap_skb_hdr = true;
2777 		}
2778 		wrb = queue_tail_node(txq);
2779 		frag_index = txq->tail;
2780 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2781 			      (unmap_skb_hdr && skb_headlen(skb)));
2782 		unmap_skb_hdr = false;
2783 		queue_tail_inc(txq);
2784 		num_wrbs++;
2785 	} while (frag_index != last_index);
2786 	dev_consume_skb_any(skb);
2787 
2788 	return num_wrbs;
2789 }
2790 
2791 /* Return the number of events in the event queue */
2792 static inline int events_get(struct be_eq_obj *eqo)
2793 {
2794 	struct be_eq_entry *eqe;
2795 	int num = 0;
2796 
2797 	do {
2798 		eqe = queue_tail_node(&eqo->q);
2799 		if (eqe->evt == 0)
2800 			break;
2801 
2802 		rmb();
2803 		eqe->evt = 0;
2804 		num++;
2805 		queue_tail_inc(&eqo->q);
2806 	} while (true);
2807 
2808 	return num;
2809 }
2810 
2811 /* Leaves the EQ is disarmed state */
2812 static void be_eq_clean(struct be_eq_obj *eqo)
2813 {
2814 	int num = events_get(eqo);
2815 
2816 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2817 }
2818 
2819 /* Free posted rx buffers that were not used */
2820 static void be_rxq_clean(struct be_rx_obj *rxo)
2821 {
2822 	struct be_queue_info *rxq = &rxo->q;
2823 	struct be_rx_page_info *page_info;
2824 
2825 	while (atomic_read(&rxq->used) > 0) {
2826 		page_info = get_rx_page_info(rxo);
2827 		put_page(page_info->page);
2828 		memset(page_info, 0, sizeof(*page_info));
2829 	}
2830 	BUG_ON(atomic_read(&rxq->used));
2831 	rxq->tail = 0;
2832 	rxq->head = 0;
2833 }
2834 
2835 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2836 {
2837 	struct be_queue_info *rx_cq = &rxo->cq;
2838 	struct be_rx_compl_info *rxcp;
2839 	struct be_adapter *adapter = rxo->adapter;
2840 	int flush_wait = 0;
2841 
2842 	/* Consume pending rx completions.
2843 	 * Wait for the flush completion (identified by zero num_rcvd)
2844 	 * to arrive. Notify CQ even when there are no more CQ entries
2845 	 * for HW to flush partially coalesced CQ entries.
2846 	 * In Lancer, there is no need to wait for flush compl.
2847 	 */
2848 	for (;;) {
2849 		rxcp = be_rx_compl_get(rxo);
2850 		if (!rxcp) {
2851 			if (lancer_chip(adapter))
2852 				break;
2853 
2854 			if (flush_wait++ > 50 ||
2855 			    be_check_error(adapter,
2856 					   BE_ERROR_HW)) {
2857 				dev_warn(&adapter->pdev->dev,
2858 					 "did not receive flush compl\n");
2859 				break;
2860 			}
2861 			be_cq_notify(adapter, rx_cq->id, true, 0);
2862 			mdelay(1);
2863 		} else {
2864 			be_rx_compl_discard(rxo, rxcp);
2865 			be_cq_notify(adapter, rx_cq->id, false, 1);
2866 			if (rxcp->num_rcvd == 0)
2867 				break;
2868 		}
2869 	}
2870 
2871 	/* After cleanup, leave the CQ in unarmed state */
2872 	be_cq_notify(adapter, rx_cq->id, false, 0);
2873 }
2874 
2875 static void be_tx_compl_clean(struct be_adapter *adapter)
2876 {
2877 	struct device *dev = &adapter->pdev->dev;
2878 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2879 	struct be_tx_compl_info *txcp;
2880 	struct be_queue_info *txq;
2881 	u32 end_idx, notified_idx;
2882 	struct be_tx_obj *txo;
2883 	int i, pending_txqs;
2884 
2885 	/* Stop polling for compls when HW has been silent for 10ms */
2886 	do {
2887 		pending_txqs = adapter->num_tx_qs;
2888 
2889 		for_all_tx_queues(adapter, txo, i) {
2890 			cmpl = 0;
2891 			num_wrbs = 0;
2892 			txq = &txo->q;
2893 			while ((txcp = be_tx_compl_get(adapter, txo))) {
2894 				num_wrbs +=
2895 					be_tx_compl_process(adapter, txo,
2896 							    txcp->end_index);
2897 				cmpl++;
2898 			}
2899 			if (cmpl) {
2900 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2901 				atomic_sub(num_wrbs, &txq->used);
2902 				timeo = 0;
2903 			}
2904 			if (!be_is_tx_compl_pending(txo))
2905 				pending_txqs--;
2906 		}
2907 
2908 		if (pending_txqs == 0 || ++timeo > 10 ||
2909 		    be_check_error(adapter, BE_ERROR_HW))
2910 			break;
2911 
2912 		mdelay(1);
2913 	} while (true);
2914 
2915 	/* Free enqueued TX that was never notified to HW */
2916 	for_all_tx_queues(adapter, txo, i) {
2917 		txq = &txo->q;
2918 
2919 		if (atomic_read(&txq->used)) {
2920 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2921 				 i, atomic_read(&txq->used));
2922 			notified_idx = txq->tail;
2923 			end_idx = txq->tail;
2924 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2925 				  txq->len);
2926 			/* Use the tx-compl process logic to handle requests
2927 			 * that were not sent to the HW.
2928 			 */
2929 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2930 			atomic_sub(num_wrbs, &txq->used);
2931 			BUG_ON(atomic_read(&txq->used));
2932 			txo->pend_wrb_cnt = 0;
2933 			/* Since hw was never notified of these requests,
2934 			 * reset TXQ indices
2935 			 */
2936 			txq->head = notified_idx;
2937 			txq->tail = notified_idx;
2938 		}
2939 	}
2940 }
2941 
2942 static void be_evt_queues_destroy(struct be_adapter *adapter)
2943 {
2944 	struct be_eq_obj *eqo;
2945 	int i;
2946 
2947 	for_all_evt_queues(adapter, eqo, i) {
2948 		if (eqo->q.created) {
2949 			be_eq_clean(eqo);
2950 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2951 			netif_napi_del(&eqo->napi);
2952 			free_cpumask_var(eqo->affinity_mask);
2953 		}
2954 		be_queue_free(adapter, &eqo->q);
2955 	}
2956 }
2957 
2958 static int be_evt_queues_create(struct be_adapter *adapter)
2959 {
2960 	struct be_queue_info *eq;
2961 	struct be_eq_obj *eqo;
2962 	struct be_aic_obj *aic;
2963 	int i, rc;
2964 
2965 	/* need enough EQs to service both RX and TX queues */
2966 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2967 				    max(adapter->cfg_num_rx_irqs,
2968 					adapter->cfg_num_tx_irqs));
2969 
2970 	for_all_evt_queues(adapter, eqo, i) {
2971 		int numa_node = dev_to_node(&adapter->pdev->dev);
2972 
2973 		aic = &adapter->aic_obj[i];
2974 		eqo->adapter = adapter;
2975 		eqo->idx = i;
2976 		aic->max_eqd = BE_MAX_EQD;
2977 		aic->enable = true;
2978 
2979 		eq = &eqo->q;
2980 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2981 				    sizeof(struct be_eq_entry));
2982 		if (rc)
2983 			return rc;
2984 
2985 		rc = be_cmd_eq_create(adapter, eqo);
2986 		if (rc)
2987 			return rc;
2988 
2989 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2990 			return -ENOMEM;
2991 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2992 				eqo->affinity_mask);
2993 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2994 			       BE_NAPI_WEIGHT);
2995 	}
2996 	return 0;
2997 }
2998 
2999 static void be_mcc_queues_destroy(struct be_adapter *adapter)
3000 {
3001 	struct be_queue_info *q;
3002 
3003 	q = &adapter->mcc_obj.q;
3004 	if (q->created)
3005 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3006 	be_queue_free(adapter, q);
3007 
3008 	q = &adapter->mcc_obj.cq;
3009 	if (q->created)
3010 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3011 	be_queue_free(adapter, q);
3012 }
3013 
3014 /* Must be called only after TX qs are created as MCC shares TX EQ */
3015 static int be_mcc_queues_create(struct be_adapter *adapter)
3016 {
3017 	struct be_queue_info *q, *cq;
3018 
3019 	cq = &adapter->mcc_obj.cq;
3020 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3021 			   sizeof(struct be_mcc_compl)))
3022 		goto err;
3023 
3024 	/* Use the default EQ for MCC completions */
3025 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3026 		goto mcc_cq_free;
3027 
3028 	q = &adapter->mcc_obj.q;
3029 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3030 		goto mcc_cq_destroy;
3031 
3032 	if (be_cmd_mccq_create(adapter, q, cq))
3033 		goto mcc_q_free;
3034 
3035 	return 0;
3036 
3037 mcc_q_free:
3038 	be_queue_free(adapter, q);
3039 mcc_cq_destroy:
3040 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3041 mcc_cq_free:
3042 	be_queue_free(adapter, cq);
3043 err:
3044 	return -1;
3045 }
3046 
3047 static void be_tx_queues_destroy(struct be_adapter *adapter)
3048 {
3049 	struct be_queue_info *q;
3050 	struct be_tx_obj *txo;
3051 	u8 i;
3052 
3053 	for_all_tx_queues(adapter, txo, i) {
3054 		q = &txo->q;
3055 		if (q->created)
3056 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3057 		be_queue_free(adapter, q);
3058 
3059 		q = &txo->cq;
3060 		if (q->created)
3061 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3062 		be_queue_free(adapter, q);
3063 	}
3064 }
3065 
3066 static int be_tx_qs_create(struct be_adapter *adapter)
3067 {
3068 	struct be_queue_info *cq;
3069 	struct be_tx_obj *txo;
3070 	struct be_eq_obj *eqo;
3071 	int status, i;
3072 
3073 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3074 
3075 	for_all_tx_queues(adapter, txo, i) {
3076 		cq = &txo->cq;
3077 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3078 					sizeof(struct be_eth_tx_compl));
3079 		if (status)
3080 			return status;
3081 
3082 		u64_stats_init(&txo->stats.sync);
3083 		u64_stats_init(&txo->stats.sync_compl);
3084 
3085 		/* If num_evt_qs is less than num_tx_qs, then more than
3086 		 * one txq share an eq
3087 		 */
3088 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3089 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3090 		if (status)
3091 			return status;
3092 
3093 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3094 					sizeof(struct be_eth_wrb));
3095 		if (status)
3096 			return status;
3097 
3098 		status = be_cmd_txq_create(adapter, txo);
3099 		if (status)
3100 			return status;
3101 
3102 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3103 				    eqo->idx);
3104 	}
3105 
3106 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3107 		 adapter->num_tx_qs);
3108 	return 0;
3109 }
3110 
3111 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3112 {
3113 	struct be_queue_info *q;
3114 	struct be_rx_obj *rxo;
3115 	int i;
3116 
3117 	for_all_rx_queues(adapter, rxo, i) {
3118 		q = &rxo->cq;
3119 		if (q->created)
3120 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3121 		be_queue_free(adapter, q);
3122 	}
3123 }
3124 
3125 static int be_rx_cqs_create(struct be_adapter *adapter)
3126 {
3127 	struct be_queue_info *eq, *cq;
3128 	struct be_rx_obj *rxo;
3129 	int rc, i;
3130 
3131 	adapter->num_rss_qs =
3132 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3133 
3134 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3135 	if (adapter->num_rss_qs < 2)
3136 		adapter->num_rss_qs = 0;
3137 
3138 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3139 
3140 	/* When the interface is not capable of RSS rings (and there is no
3141 	 * need to create a default RXQ) we'll still need one RXQ
3142 	 */
3143 	if (adapter->num_rx_qs == 0)
3144 		adapter->num_rx_qs = 1;
3145 
3146 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3147 	for_all_rx_queues(adapter, rxo, i) {
3148 		rxo->adapter = adapter;
3149 		cq = &rxo->cq;
3150 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3151 				    sizeof(struct be_eth_rx_compl));
3152 		if (rc)
3153 			return rc;
3154 
3155 		u64_stats_init(&rxo->stats.sync);
3156 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3157 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3158 		if (rc)
3159 			return rc;
3160 	}
3161 
3162 	dev_info(&adapter->pdev->dev,
3163 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3164 	return 0;
3165 }
3166 
3167 static irqreturn_t be_intx(int irq, void *dev)
3168 {
3169 	struct be_eq_obj *eqo = dev;
3170 	struct be_adapter *adapter = eqo->adapter;
3171 	int num_evts = 0;
3172 
3173 	/* IRQ is not expected when NAPI is scheduled as the EQ
3174 	 * will not be armed.
3175 	 * But, this can happen on Lancer INTx where it takes
3176 	 * a while to de-assert INTx or in BE2 where occasionaly
3177 	 * an interrupt may be raised even when EQ is unarmed.
3178 	 * If NAPI is already scheduled, then counting & notifying
3179 	 * events will orphan them.
3180 	 */
3181 	if (napi_schedule_prep(&eqo->napi)) {
3182 		num_evts = events_get(eqo);
3183 		__napi_schedule(&eqo->napi);
3184 		if (num_evts)
3185 			eqo->spurious_intr = 0;
3186 	}
3187 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3188 
3189 	/* Return IRQ_HANDLED only for the the first spurious intr
3190 	 * after a valid intr to stop the kernel from branding
3191 	 * this irq as a bad one!
3192 	 */
3193 	if (num_evts || eqo->spurious_intr++ == 0)
3194 		return IRQ_HANDLED;
3195 	else
3196 		return IRQ_NONE;
3197 }
3198 
3199 static irqreturn_t be_msix(int irq, void *dev)
3200 {
3201 	struct be_eq_obj *eqo = dev;
3202 
3203 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3204 	napi_schedule(&eqo->napi);
3205 	return IRQ_HANDLED;
3206 }
3207 
3208 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3209 {
3210 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3211 }
3212 
3213 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3214 			 int budget)
3215 {
3216 	struct be_adapter *adapter = rxo->adapter;
3217 	struct be_queue_info *rx_cq = &rxo->cq;
3218 	struct be_rx_compl_info *rxcp;
3219 	u32 work_done;
3220 	u32 frags_consumed = 0;
3221 
3222 	for (work_done = 0; work_done < budget; work_done++) {
3223 		rxcp = be_rx_compl_get(rxo);
3224 		if (!rxcp)
3225 			break;
3226 
3227 		/* Is it a flush compl that has no data */
3228 		if (unlikely(rxcp->num_rcvd == 0))
3229 			goto loop_continue;
3230 
3231 		/* Discard compl with partial DMA Lancer B0 */
3232 		if (unlikely(!rxcp->pkt_size)) {
3233 			be_rx_compl_discard(rxo, rxcp);
3234 			goto loop_continue;
3235 		}
3236 
3237 		/* On BE drop pkts that arrive due to imperfect filtering in
3238 		 * promiscuous mode on some skews
3239 		 */
3240 		if (unlikely(rxcp->port != adapter->port_num &&
3241 			     !lancer_chip(adapter))) {
3242 			be_rx_compl_discard(rxo, rxcp);
3243 			goto loop_continue;
3244 		}
3245 
3246 		if (do_gro(rxcp))
3247 			be_rx_compl_process_gro(rxo, napi, rxcp);
3248 		else
3249 			be_rx_compl_process(rxo, napi, rxcp);
3250 
3251 loop_continue:
3252 		frags_consumed += rxcp->num_rcvd;
3253 		be_rx_stats_update(rxo, rxcp);
3254 	}
3255 
3256 	if (work_done) {
3257 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3258 
3259 		/* When an rx-obj gets into post_starved state, just
3260 		 * let be_worker do the posting.
3261 		 */
3262 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3263 		    !rxo->rx_post_starved)
3264 			be_post_rx_frags(rxo, GFP_ATOMIC,
3265 					 max_t(u32, MAX_RX_POST,
3266 					       frags_consumed));
3267 	}
3268 
3269 	return work_done;
3270 }
3271 
3272 
3273 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3274 			  int idx)
3275 {
3276 	int num_wrbs = 0, work_done = 0;
3277 	struct be_tx_compl_info *txcp;
3278 
3279 	while ((txcp = be_tx_compl_get(adapter, txo))) {
3280 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3281 		work_done++;
3282 	}
3283 
3284 	if (work_done) {
3285 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3286 		atomic_sub(num_wrbs, &txo->q.used);
3287 
3288 		/* As Tx wrbs have been freed up, wake up netdev queue
3289 		 * if it was stopped due to lack of tx wrbs.  */
3290 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3291 		    be_can_txq_wake(txo)) {
3292 			netif_wake_subqueue(adapter->netdev, idx);
3293 		}
3294 
3295 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3296 		tx_stats(txo)->tx_compl += work_done;
3297 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3298 	}
3299 }
3300 
3301 int be_poll(struct napi_struct *napi, int budget)
3302 {
3303 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3304 	struct be_adapter *adapter = eqo->adapter;
3305 	int max_work = 0, work, i, num_evts;
3306 	struct be_rx_obj *rxo;
3307 	struct be_tx_obj *txo;
3308 	u32 mult_enc = 0;
3309 
3310 	num_evts = events_get(eqo);
3311 
3312 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3313 		be_process_tx(adapter, txo, i);
3314 
3315 	/* This loop will iterate twice for EQ0 in which
3316 	 * completions of the last RXQ (default one) are also processed
3317 	 * For other EQs the loop iterates only once
3318 	 */
3319 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3320 		work = be_process_rx(rxo, napi, budget);
3321 		max_work = max(work, max_work);
3322 	}
3323 
3324 	if (is_mcc_eqo(eqo))
3325 		be_process_mcc(adapter);
3326 
3327 	if (max_work < budget) {
3328 		napi_complete_done(napi, max_work);
3329 
3330 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3331 		 * delay via a delay multiplier encoding value
3332 		 */
3333 		if (skyhawk_chip(adapter))
3334 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3335 
3336 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3337 			     mult_enc);
3338 	} else {
3339 		/* As we'll continue in polling mode, count and clear events */
3340 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3341 	}
3342 	return max_work;
3343 }
3344 
3345 void be_detect_error(struct be_adapter *adapter)
3346 {
3347 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3348 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3349 	struct device *dev = &adapter->pdev->dev;
3350 	u16 val;
3351 	u32 i;
3352 
3353 	if (be_check_error(adapter, BE_ERROR_HW))
3354 		return;
3355 
3356 	if (lancer_chip(adapter)) {
3357 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3358 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3359 			be_set_error(adapter, BE_ERROR_UE);
3360 			sliport_err1 = ioread32(adapter->db +
3361 						SLIPORT_ERROR1_OFFSET);
3362 			sliport_err2 = ioread32(adapter->db +
3363 						SLIPORT_ERROR2_OFFSET);
3364 			/* Do not log error messages if its a FW reset */
3365 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3366 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3367 				dev_info(dev, "Reset is in progress\n");
3368 			} else {
3369 				dev_err(dev, "Error detected in the card\n");
3370 				dev_err(dev, "ERR: sliport status 0x%x\n",
3371 					sliport_status);
3372 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3373 					sliport_err1);
3374 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3375 					sliport_err2);
3376 			}
3377 		}
3378 	} else {
3379 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3380 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3381 		ue_lo_mask = ioread32(adapter->pcicfg +
3382 				      PCICFG_UE_STATUS_LOW_MASK);
3383 		ue_hi_mask = ioread32(adapter->pcicfg +
3384 				      PCICFG_UE_STATUS_HI_MASK);
3385 
3386 		ue_lo = (ue_lo & ~ue_lo_mask);
3387 		ue_hi = (ue_hi & ~ue_hi_mask);
3388 
3389 		if (ue_lo || ue_hi) {
3390 			/* On certain platforms BE3 hardware can indicate
3391 			 * spurious UEs. In case of a UE in the chip,
3392 			 * the POST register correctly reports either a
3393 			 * FAT_LOG_START state (FW is currently dumping
3394 			 * FAT log data) or a ARMFW_UE state. Check for the
3395 			 * above states to ascertain if the UE is valid or not.
3396 			 */
3397 			if (BE3_chip(adapter)) {
3398 				val = be_POST_stage_get(adapter);
3399 				if ((val & POST_STAGE_FAT_LOG_START)
3400 				     != POST_STAGE_FAT_LOG_START &&
3401 				    (val & POST_STAGE_ARMFW_UE)
3402 				     != POST_STAGE_ARMFW_UE &&
3403 				    (val & POST_STAGE_RECOVERABLE_ERR)
3404 				     != POST_STAGE_RECOVERABLE_ERR)
3405 					return;
3406 			}
3407 
3408 			dev_err(dev, "Error detected in the adapter");
3409 			be_set_error(adapter, BE_ERROR_UE);
3410 
3411 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3412 				if (ue_lo & 1)
3413 					dev_err(dev, "UE: %s bit set\n",
3414 						ue_status_low_desc[i]);
3415 			}
3416 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3417 				if (ue_hi & 1)
3418 					dev_err(dev, "UE: %s bit set\n",
3419 						ue_status_hi_desc[i]);
3420 			}
3421 		}
3422 	}
3423 }
3424 
3425 static void be_msix_disable(struct be_adapter *adapter)
3426 {
3427 	if (msix_enabled(adapter)) {
3428 		pci_disable_msix(adapter->pdev);
3429 		adapter->num_msix_vec = 0;
3430 		adapter->num_msix_roce_vec = 0;
3431 	}
3432 }
3433 
3434 static int be_msix_enable(struct be_adapter *adapter)
3435 {
3436 	unsigned int i, max_roce_eqs;
3437 	struct device *dev = &adapter->pdev->dev;
3438 	int num_vec;
3439 
3440 	/* If RoCE is supported, program the max number of vectors that
3441 	 * could be used for NIC and RoCE, else, just program the number
3442 	 * we'll use initially.
3443 	 */
3444 	if (be_roce_supported(adapter)) {
3445 		max_roce_eqs =
3446 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3447 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3448 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3449 	} else {
3450 		num_vec = max(adapter->cfg_num_rx_irqs,
3451 			      adapter->cfg_num_tx_irqs);
3452 	}
3453 
3454 	for (i = 0; i < num_vec; i++)
3455 		adapter->msix_entries[i].entry = i;
3456 
3457 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3458 					MIN_MSIX_VECTORS, num_vec);
3459 	if (num_vec < 0)
3460 		goto fail;
3461 
3462 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3463 		adapter->num_msix_roce_vec = num_vec / 2;
3464 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3465 			 adapter->num_msix_roce_vec);
3466 	}
3467 
3468 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3469 
3470 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3471 		 adapter->num_msix_vec);
3472 	return 0;
3473 
3474 fail:
3475 	dev_warn(dev, "MSIx enable failed\n");
3476 
3477 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3478 	if (be_virtfn(adapter))
3479 		return num_vec;
3480 	return 0;
3481 }
3482 
3483 static inline int be_msix_vec_get(struct be_adapter *adapter,
3484 				  struct be_eq_obj *eqo)
3485 {
3486 	return adapter->msix_entries[eqo->msix_idx].vector;
3487 }
3488 
3489 static int be_msix_register(struct be_adapter *adapter)
3490 {
3491 	struct net_device *netdev = adapter->netdev;
3492 	struct be_eq_obj *eqo;
3493 	int status, i, vec;
3494 
3495 	for_all_evt_queues(adapter, eqo, i) {
3496 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3497 		vec = be_msix_vec_get(adapter, eqo);
3498 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3499 		if (status)
3500 			goto err_msix;
3501 
3502 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3503 	}
3504 
3505 	return 0;
3506 err_msix:
3507 	for (i--; i >= 0; i--) {
3508 		eqo = &adapter->eq_obj[i];
3509 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3510 	}
3511 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3512 		 status);
3513 	be_msix_disable(adapter);
3514 	return status;
3515 }
3516 
3517 static int be_irq_register(struct be_adapter *adapter)
3518 {
3519 	struct net_device *netdev = adapter->netdev;
3520 	int status;
3521 
3522 	if (msix_enabled(adapter)) {
3523 		status = be_msix_register(adapter);
3524 		if (status == 0)
3525 			goto done;
3526 		/* INTx is not supported for VF */
3527 		if (be_virtfn(adapter))
3528 			return status;
3529 	}
3530 
3531 	/* INTx: only the first EQ is used */
3532 	netdev->irq = adapter->pdev->irq;
3533 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3534 			     &adapter->eq_obj[0]);
3535 	if (status) {
3536 		dev_err(&adapter->pdev->dev,
3537 			"INTx request IRQ failed - err %d\n", status);
3538 		return status;
3539 	}
3540 done:
3541 	adapter->isr_registered = true;
3542 	return 0;
3543 }
3544 
3545 static void be_irq_unregister(struct be_adapter *adapter)
3546 {
3547 	struct net_device *netdev = adapter->netdev;
3548 	struct be_eq_obj *eqo;
3549 	int i, vec;
3550 
3551 	if (!adapter->isr_registered)
3552 		return;
3553 
3554 	/* INTx */
3555 	if (!msix_enabled(adapter)) {
3556 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3557 		goto done;
3558 	}
3559 
3560 	/* MSIx */
3561 	for_all_evt_queues(adapter, eqo, i) {
3562 		vec = be_msix_vec_get(adapter, eqo);
3563 		irq_set_affinity_hint(vec, NULL);
3564 		free_irq(vec, eqo);
3565 	}
3566 
3567 done:
3568 	adapter->isr_registered = false;
3569 }
3570 
3571 static void be_rx_qs_destroy(struct be_adapter *adapter)
3572 {
3573 	struct rss_info *rss = &adapter->rss_info;
3574 	struct be_queue_info *q;
3575 	struct be_rx_obj *rxo;
3576 	int i;
3577 
3578 	for_all_rx_queues(adapter, rxo, i) {
3579 		q = &rxo->q;
3580 		if (q->created) {
3581 			/* If RXQs are destroyed while in an "out of buffer"
3582 			 * state, there is a possibility of an HW stall on
3583 			 * Lancer. So, post 64 buffers to each queue to relieve
3584 			 * the "out of buffer" condition.
3585 			 * Make sure there's space in the RXQ before posting.
3586 			 */
3587 			if (lancer_chip(adapter)) {
3588 				be_rx_cq_clean(rxo);
3589 				if (atomic_read(&q->used) == 0)
3590 					be_post_rx_frags(rxo, GFP_KERNEL,
3591 							 MAX_RX_POST);
3592 			}
3593 
3594 			be_cmd_rxq_destroy(adapter, q);
3595 			be_rx_cq_clean(rxo);
3596 			be_rxq_clean(rxo);
3597 		}
3598 		be_queue_free(adapter, q);
3599 	}
3600 
3601 	if (rss->rss_flags) {
3602 		rss->rss_flags = RSS_ENABLE_NONE;
3603 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3604 				  128, rss->rss_hkey);
3605 	}
3606 }
3607 
3608 static void be_disable_if_filters(struct be_adapter *adapter)
3609 {
3610 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3611 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3612 	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3613 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3614 		eth_zero_addr(adapter->dev_mac);
3615 	}
3616 
3617 	be_clear_uc_list(adapter);
3618 	be_clear_mc_list(adapter);
3619 
3620 	/* The IFACE flags are enabled in the open path and cleared
3621 	 * in the close path. When a VF gets detached from the host and
3622 	 * assigned to a VM the following happens:
3623 	 *	- VF's IFACE flags get cleared in the detach path
3624 	 *	- IFACE create is issued by the VF in the attach path
3625 	 * Due to a bug in the BE3/Skyhawk-R FW
3626 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3627 	 * specified along with the IFACE create cmd issued by a VF are not
3628 	 * honoured by FW.  As a consequence, if a *new* driver
3629 	 * (that enables/disables IFACE flags in open/close)
3630 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3631 	 * the IFACE gets created *without* the needed flags.
3632 	 * To avoid this, disable RX-filter flags only for Lancer.
3633 	 */
3634 	if (lancer_chip(adapter)) {
3635 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3636 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3637 	}
3638 }
3639 
3640 static int be_close(struct net_device *netdev)
3641 {
3642 	struct be_adapter *adapter = netdev_priv(netdev);
3643 	struct be_eq_obj *eqo;
3644 	int i;
3645 
3646 	/* This protection is needed as be_close() may be called even when the
3647 	 * adapter is in cleared state (after eeh perm failure)
3648 	 */
3649 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3650 		return 0;
3651 
3652 	/* Before attempting cleanup ensure all the pending cmds in the
3653 	 * config_wq have finished execution
3654 	 */
3655 	flush_workqueue(be_wq);
3656 
3657 	be_disable_if_filters(adapter);
3658 
3659 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3660 		for_all_evt_queues(adapter, eqo, i) {
3661 			napi_disable(&eqo->napi);
3662 		}
3663 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3664 	}
3665 
3666 	be_async_mcc_disable(adapter);
3667 
3668 	/* Wait for all pending tx completions to arrive so that
3669 	 * all tx skbs are freed.
3670 	 */
3671 	netif_tx_disable(netdev);
3672 	be_tx_compl_clean(adapter);
3673 
3674 	be_rx_qs_destroy(adapter);
3675 
3676 	for_all_evt_queues(adapter, eqo, i) {
3677 		if (msix_enabled(adapter))
3678 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3679 		else
3680 			synchronize_irq(netdev->irq);
3681 		be_eq_clean(eqo);
3682 	}
3683 
3684 	be_irq_unregister(adapter);
3685 
3686 	return 0;
3687 }
3688 
3689 static int be_rx_qs_create(struct be_adapter *adapter)
3690 {
3691 	struct rss_info *rss = &adapter->rss_info;
3692 	u8 rss_key[RSS_HASH_KEY_LEN];
3693 	struct be_rx_obj *rxo;
3694 	int rc, i, j;
3695 
3696 	for_all_rx_queues(adapter, rxo, i) {
3697 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3698 				    sizeof(struct be_eth_rx_d));
3699 		if (rc)
3700 			return rc;
3701 	}
3702 
3703 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3704 		rxo = default_rxo(adapter);
3705 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706 				       rx_frag_size, adapter->if_handle,
3707 				       false, &rxo->rss_id);
3708 		if (rc)
3709 			return rc;
3710 	}
3711 
3712 	for_all_rss_queues(adapter, rxo, i) {
3713 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3714 				       rx_frag_size, adapter->if_handle,
3715 				       true, &rxo->rss_id);
3716 		if (rc)
3717 			return rc;
3718 	}
3719 
3720 	if (be_multi_rxq(adapter)) {
3721 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3722 			for_all_rss_queues(adapter, rxo, i) {
3723 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3724 					break;
3725 				rss->rsstable[j + i] = rxo->rss_id;
3726 				rss->rss_queue[j + i] = i;
3727 			}
3728 		}
3729 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3730 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3731 
3732 		if (!BEx_chip(adapter))
3733 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3734 				RSS_ENABLE_UDP_IPV6;
3735 
3736 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3737 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3738 				       RSS_INDIR_TABLE_LEN, rss_key);
3739 		if (rc) {
3740 			rss->rss_flags = RSS_ENABLE_NONE;
3741 			return rc;
3742 		}
3743 
3744 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3745 	} else {
3746 		/* Disable RSS, if only default RX Q is created */
3747 		rss->rss_flags = RSS_ENABLE_NONE;
3748 	}
3749 
3750 
3751 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3752 	 * which is a queue empty condition
3753 	 */
3754 	for_all_rx_queues(adapter, rxo, i)
3755 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3756 
3757 	return 0;
3758 }
3759 
3760 static int be_enable_if_filters(struct be_adapter *adapter)
3761 {
3762 	int status;
3763 
3764 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3765 	if (status)
3766 		return status;
3767 
3768 	/* Normally this condition usually true as the ->dev_mac is zeroed.
3769 	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3770 	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3771 	 */
3772 	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3773 		int old_pmac_id = -1;
3774 
3775 		/* Remember old programmed MAC if any - can happen on BE3 VF */
3776 		if (!is_zero_ether_addr(adapter->dev_mac))
3777 			old_pmac_id = adapter->pmac_id[0];
3778 
3779 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780 		if (status)
3781 			return status;
3782 
3783 		/* Delete the old programmed MAC as we successfully programmed
3784 		 * a new MAC
3785 		 */
3786 		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3787 			be_dev_mac_del(adapter, old_pmac_id);
3788 
3789 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3790 	}
3791 
3792 	if (adapter->vlans_added)
3793 		be_vid_config(adapter);
3794 
3795 	__be_set_rx_mode(adapter);
3796 
3797 	return 0;
3798 }
3799 
3800 static int be_open(struct net_device *netdev)
3801 {
3802 	struct be_adapter *adapter = netdev_priv(netdev);
3803 	struct be_eq_obj *eqo;
3804 	struct be_rx_obj *rxo;
3805 	struct be_tx_obj *txo;
3806 	u8 link_status;
3807 	int status, i;
3808 
3809 	status = be_rx_qs_create(adapter);
3810 	if (status)
3811 		goto err;
3812 
3813 	status = be_enable_if_filters(adapter);
3814 	if (status)
3815 		goto err;
3816 
3817 	status = be_irq_register(adapter);
3818 	if (status)
3819 		goto err;
3820 
3821 	for_all_rx_queues(adapter, rxo, i)
3822 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3823 
3824 	for_all_tx_queues(adapter, txo, i)
3825 		be_cq_notify(adapter, txo->cq.id, true, 0);
3826 
3827 	be_async_mcc_enable(adapter);
3828 
3829 	for_all_evt_queues(adapter, eqo, i) {
3830 		napi_enable(&eqo->napi);
3831 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3832 	}
3833 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3834 
3835 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3836 	if (!status)
3837 		be_link_status_update(adapter, link_status);
3838 
3839 	netif_tx_start_all_queues(netdev);
3840 	if (skyhawk_chip(adapter))
3841 		udp_tunnel_get_rx_info(netdev);
3842 
3843 	return 0;
3844 err:
3845 	be_close(adapter->netdev);
3846 	return -EIO;
3847 }
3848 
3849 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3850 {
3851 	u32 addr;
3852 
3853 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3854 
3855 	mac[5] = (u8)(addr & 0xFF);
3856 	mac[4] = (u8)((addr >> 8) & 0xFF);
3857 	mac[3] = (u8)((addr >> 16) & 0xFF);
3858 	/* Use the OUI from the current MAC address */
3859 	memcpy(mac, adapter->netdev->dev_addr, 3);
3860 }
3861 
3862 /*
3863  * Generate a seed MAC address from the PF MAC Address using jhash.
3864  * MAC Address for VFs are assigned incrementally starting from the seed.
3865  * These addresses are programmed in the ASIC by the PF and the VF driver
3866  * queries for the MAC address during its probe.
3867  */
3868 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3869 {
3870 	u32 vf;
3871 	int status = 0;
3872 	u8 mac[ETH_ALEN];
3873 	struct be_vf_cfg *vf_cfg;
3874 
3875 	be_vf_eth_addr_generate(adapter, mac);
3876 
3877 	for_all_vfs(adapter, vf_cfg, vf) {
3878 		if (BEx_chip(adapter))
3879 			status = be_cmd_pmac_add(adapter, mac,
3880 						 vf_cfg->if_handle,
3881 						 &vf_cfg->pmac_id, vf + 1);
3882 		else
3883 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3884 						vf + 1);
3885 
3886 		if (status)
3887 			dev_err(&adapter->pdev->dev,
3888 				"Mac address assignment failed for VF %d\n",
3889 				vf);
3890 		else
3891 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3892 
3893 		mac[5] += 1;
3894 	}
3895 	return status;
3896 }
3897 
3898 static int be_vfs_mac_query(struct be_adapter *adapter)
3899 {
3900 	int status, vf;
3901 	u8 mac[ETH_ALEN];
3902 	struct be_vf_cfg *vf_cfg;
3903 
3904 	for_all_vfs(adapter, vf_cfg, vf) {
3905 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3906 					       mac, vf_cfg->if_handle,
3907 					       false, vf+1);
3908 		if (status)
3909 			return status;
3910 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3911 	}
3912 	return 0;
3913 }
3914 
3915 static void be_vf_clear(struct be_adapter *adapter)
3916 {
3917 	struct be_vf_cfg *vf_cfg;
3918 	u32 vf;
3919 
3920 	if (pci_vfs_assigned(adapter->pdev)) {
3921 		dev_warn(&adapter->pdev->dev,
3922 			 "VFs are assigned to VMs: not disabling VFs\n");
3923 		goto done;
3924 	}
3925 
3926 	pci_disable_sriov(adapter->pdev);
3927 
3928 	for_all_vfs(adapter, vf_cfg, vf) {
3929 		if (BEx_chip(adapter))
3930 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3931 					vf_cfg->pmac_id, vf + 1);
3932 		else
3933 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3934 				       vf + 1);
3935 
3936 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3937 	}
3938 
3939 	if (BE3_chip(adapter))
3940 		be_cmd_set_hsw_config(adapter, 0, 0,
3941 				      adapter->if_handle,
3942 				      PORT_FWD_TYPE_PASSTHRU, 0);
3943 done:
3944 	kfree(adapter->vf_cfg);
3945 	adapter->num_vfs = 0;
3946 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3947 }
3948 
3949 static void be_clear_queues(struct be_adapter *adapter)
3950 {
3951 	be_mcc_queues_destroy(adapter);
3952 	be_rx_cqs_destroy(adapter);
3953 	be_tx_queues_destroy(adapter);
3954 	be_evt_queues_destroy(adapter);
3955 }
3956 
3957 static void be_cancel_worker(struct be_adapter *adapter)
3958 {
3959 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3960 		cancel_delayed_work_sync(&adapter->work);
3961 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3962 	}
3963 }
3964 
3965 static void be_cancel_err_detection(struct be_adapter *adapter)
3966 {
3967 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3968 
3969 	if (!be_err_recovery_workq)
3970 		return;
3971 
3972 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3973 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3974 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3975 	}
3976 }
3977 
3978 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3979 {
3980 	struct net_device *netdev = adapter->netdev;
3981 	struct device *dev = &adapter->pdev->dev;
3982 	struct be_vxlan_port *vxlan_port;
3983 	__be16 port;
3984 	int status;
3985 
3986 	vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3987 				      struct be_vxlan_port, list);
3988 	port = vxlan_port->port;
3989 
3990 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3991 				     OP_CONVERT_NORMAL_TO_TUNNEL);
3992 	if (status) {
3993 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3994 		return status;
3995 	}
3996 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3997 
3998 	status = be_cmd_set_vxlan_port(adapter, port);
3999 	if (status) {
4000 		dev_warn(dev, "Failed to add VxLAN port\n");
4001 		return status;
4002 	}
4003 	adapter->vxlan_port = port;
4004 
4005 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4006 				   NETIF_F_TSO | NETIF_F_TSO6 |
4007 				   NETIF_F_GSO_UDP_TUNNEL;
4008 
4009 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4010 		 be16_to_cpu(port));
4011 	return 0;
4012 }
4013 
4014 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4015 {
4016 	struct net_device *netdev = adapter->netdev;
4017 
4018 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4019 		be_cmd_manage_iface(adapter, adapter->if_handle,
4020 				    OP_CONVERT_TUNNEL_TO_NORMAL);
4021 
4022 	if (adapter->vxlan_port)
4023 		be_cmd_set_vxlan_port(adapter, 0);
4024 
4025 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4026 	adapter->vxlan_port = 0;
4027 
4028 	netdev->hw_enc_features = 0;
4029 }
4030 
4031 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4032 				struct be_resources *vft_res)
4033 {
4034 	struct be_resources res = adapter->pool_res;
4035 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
4036 	struct be_resources res_mod = {0};
4037 	u16 num_vf_qs = 1;
4038 
4039 	/* Distribute the queue resources among the PF and it's VFs */
4040 	if (num_vfs) {
4041 		/* Divide the rx queues evenly among the VFs and the PF, capped
4042 		 * at VF-EQ-count. Any remainder queues belong to the PF.
4043 		 */
4044 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4045 				res.max_rss_qs / (num_vfs + 1));
4046 
4047 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4048 		 * RSS Tables per port. Provide RSS on VFs, only if number of
4049 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4050 		 */
4051 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4052 			num_vf_qs = 1;
4053 	}
4054 
4055 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4056 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4057 	 */
4058 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4059 				  RESOURCE_MODIFIABLE, 0);
4060 
4061 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4062 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4063 	 * more than 1 RSSQ is available for a VF.
4064 	 * Otherwise, provision only 1 queue pair for VF.
4065 	 */
4066 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4067 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4068 		if (num_vf_qs > 1) {
4069 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4070 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4071 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4072 		} else {
4073 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4074 					     BE_IF_FLAGS_DEFQ_RSS);
4075 		}
4076 	} else {
4077 		num_vf_qs = 1;
4078 	}
4079 
4080 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4081 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4082 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4083 	}
4084 
4085 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4086 	vft_res->max_rx_qs = num_vf_qs;
4087 	vft_res->max_rss_qs = num_vf_qs;
4088 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4089 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4090 
4091 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4092 	 * among the PF and it's VFs, if the fields are changeable
4093 	 */
4094 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4095 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4096 
4097 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4098 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4099 
4100 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4101 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4102 
4103 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4104 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4105 }
4106 
4107 static void be_if_destroy(struct be_adapter *adapter)
4108 {
4109 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4110 
4111 	kfree(adapter->pmac_id);
4112 	adapter->pmac_id = NULL;
4113 
4114 	kfree(adapter->mc_list);
4115 	adapter->mc_list = NULL;
4116 
4117 	kfree(adapter->uc_list);
4118 	adapter->uc_list = NULL;
4119 }
4120 
4121 static int be_clear(struct be_adapter *adapter)
4122 {
4123 	struct pci_dev *pdev = adapter->pdev;
4124 	struct  be_resources vft_res = {0};
4125 
4126 	be_cancel_worker(adapter);
4127 
4128 	flush_workqueue(be_wq);
4129 
4130 	if (sriov_enabled(adapter))
4131 		be_vf_clear(adapter);
4132 
4133 	/* Re-configure FW to distribute resources evenly across max-supported
4134 	 * number of VFs, only when VFs are not already enabled.
4135 	 */
4136 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4137 	    !pci_vfs_assigned(pdev)) {
4138 		be_calculate_vf_res(adapter,
4139 				    pci_sriov_get_totalvfs(pdev),
4140 				    &vft_res);
4141 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4142 					pci_sriov_get_totalvfs(pdev),
4143 					&vft_res);
4144 	}
4145 
4146 	be_disable_vxlan_offloads(adapter);
4147 
4148 	be_if_destroy(adapter);
4149 
4150 	be_clear_queues(adapter);
4151 
4152 	be_msix_disable(adapter);
4153 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4154 	return 0;
4155 }
4156 
4157 static int be_vfs_if_create(struct be_adapter *adapter)
4158 {
4159 	struct be_resources res = {0};
4160 	u32 cap_flags, en_flags, vf;
4161 	struct be_vf_cfg *vf_cfg;
4162 	int status;
4163 
4164 	/* If a FW profile exists, then cap_flags are updated */
4165 	cap_flags = BE_VF_IF_EN_FLAGS;
4166 
4167 	for_all_vfs(adapter, vf_cfg, vf) {
4168 		if (!BE3_chip(adapter)) {
4169 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4170 							   ACTIVE_PROFILE_TYPE,
4171 							   RESOURCE_LIMITS,
4172 							   vf + 1);
4173 			if (!status) {
4174 				cap_flags = res.if_cap_flags;
4175 				/* Prevent VFs from enabling VLAN promiscuous
4176 				 * mode
4177 				 */
4178 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4179 			}
4180 		}
4181 
4182 		/* PF should enable IF flags during proxy if_create call */
4183 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4184 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4185 					  &vf_cfg->if_handle, vf + 1);
4186 		if (status)
4187 			return status;
4188 	}
4189 
4190 	return 0;
4191 }
4192 
4193 static int be_vf_setup_init(struct be_adapter *adapter)
4194 {
4195 	struct be_vf_cfg *vf_cfg;
4196 	int vf;
4197 
4198 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4199 				  GFP_KERNEL);
4200 	if (!adapter->vf_cfg)
4201 		return -ENOMEM;
4202 
4203 	for_all_vfs(adapter, vf_cfg, vf) {
4204 		vf_cfg->if_handle = -1;
4205 		vf_cfg->pmac_id = -1;
4206 	}
4207 	return 0;
4208 }
4209 
4210 static int be_vf_setup(struct be_adapter *adapter)
4211 {
4212 	struct device *dev = &adapter->pdev->dev;
4213 	struct be_vf_cfg *vf_cfg;
4214 	int status, old_vfs, vf;
4215 	bool spoofchk;
4216 
4217 	old_vfs = pci_num_vf(adapter->pdev);
4218 
4219 	status = be_vf_setup_init(adapter);
4220 	if (status)
4221 		goto err;
4222 
4223 	if (old_vfs) {
4224 		for_all_vfs(adapter, vf_cfg, vf) {
4225 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4226 			if (status)
4227 				goto err;
4228 		}
4229 
4230 		status = be_vfs_mac_query(adapter);
4231 		if (status)
4232 			goto err;
4233 	} else {
4234 		status = be_vfs_if_create(adapter);
4235 		if (status)
4236 			goto err;
4237 
4238 		status = be_vf_eth_addr_config(adapter);
4239 		if (status)
4240 			goto err;
4241 	}
4242 
4243 	for_all_vfs(adapter, vf_cfg, vf) {
4244 		/* Allow VFs to programs MAC/VLAN filters */
4245 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4246 						  vf + 1);
4247 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4248 			status = be_cmd_set_fn_privileges(adapter,
4249 							  vf_cfg->privileges |
4250 							  BE_PRIV_FILTMGMT,
4251 							  vf + 1);
4252 			if (!status) {
4253 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4254 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4255 					 vf);
4256 			}
4257 		}
4258 
4259 		/* Allow full available bandwidth */
4260 		if (!old_vfs)
4261 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4262 
4263 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4264 					       vf_cfg->if_handle, NULL,
4265 					       &spoofchk);
4266 		if (!status)
4267 			vf_cfg->spoofchk = spoofchk;
4268 
4269 		if (!old_vfs) {
4270 			be_cmd_enable_vf(adapter, vf + 1);
4271 			be_cmd_set_logical_link_config(adapter,
4272 						       IFLA_VF_LINK_STATE_AUTO,
4273 						       vf+1);
4274 		}
4275 	}
4276 
4277 	if (!old_vfs) {
4278 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4279 		if (status) {
4280 			dev_err(dev, "SRIOV enable failed\n");
4281 			adapter->num_vfs = 0;
4282 			goto err;
4283 		}
4284 	}
4285 
4286 	if (BE3_chip(adapter)) {
4287 		/* On BE3, enable VEB only when SRIOV is enabled */
4288 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4289 					       adapter->if_handle,
4290 					       PORT_FWD_TYPE_VEB, 0);
4291 		if (status)
4292 			goto err;
4293 	}
4294 
4295 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4296 	return 0;
4297 err:
4298 	dev_err(dev, "VF setup failed\n");
4299 	be_vf_clear(adapter);
4300 	return status;
4301 }
4302 
4303 /* Converting function_mode bits on BE3 to SH mc_type enums */
4304 
4305 static u8 be_convert_mc_type(u32 function_mode)
4306 {
4307 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4308 		return vNIC1;
4309 	else if (function_mode & QNQ_MODE)
4310 		return FLEX10;
4311 	else if (function_mode & VNIC_MODE)
4312 		return vNIC2;
4313 	else if (function_mode & UMC_ENABLED)
4314 		return UMC;
4315 	else
4316 		return MC_NONE;
4317 }
4318 
4319 /* On BE2/BE3 FW does not suggest the supported limits */
4320 static void BEx_get_resources(struct be_adapter *adapter,
4321 			      struct be_resources *res)
4322 {
4323 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4324 
4325 	if (be_physfn(adapter))
4326 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4327 	else
4328 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4329 
4330 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4331 
4332 	if (be_is_mc(adapter)) {
4333 		/* Assuming that there are 4 channels per port,
4334 		 * when multi-channel is enabled
4335 		 */
4336 		if (be_is_qnq_mode(adapter))
4337 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4338 		else
4339 			/* In a non-qnq multichannel mode, the pvid
4340 			 * takes up one vlan entry
4341 			 */
4342 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4343 	} else {
4344 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4345 	}
4346 
4347 	res->max_mcast_mac = BE_MAX_MC;
4348 
4349 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4350 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4351 	 *    *only* if it is RSS-capable.
4352 	 */
4353 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4354 	    be_virtfn(adapter) ||
4355 	    (be_is_mc(adapter) &&
4356 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4357 		res->max_tx_qs = 1;
4358 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4359 		struct be_resources super_nic_res = {0};
4360 
4361 		/* On a SuperNIC profile, the driver needs to use the
4362 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4363 		 */
4364 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4365 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4366 					  0);
4367 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4368 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4369 	} else {
4370 		res->max_tx_qs = BE3_MAX_TX_QS;
4371 	}
4372 
4373 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4374 	    !use_sriov && be_physfn(adapter))
4375 		res->max_rss_qs = (adapter->be3_native) ?
4376 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4377 	res->max_rx_qs = res->max_rss_qs + 1;
4378 
4379 	if (be_physfn(adapter))
4380 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4381 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4382 	else
4383 		res->max_evt_qs = 1;
4384 
4385 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4386 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4387 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4388 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4389 }
4390 
4391 static void be_setup_init(struct be_adapter *adapter)
4392 {
4393 	adapter->vlan_prio_bmap = 0xff;
4394 	adapter->phy.link_speed = -1;
4395 	adapter->if_handle = -1;
4396 	adapter->be3_native = false;
4397 	adapter->if_flags = 0;
4398 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4399 	if (be_physfn(adapter))
4400 		adapter->cmd_privileges = MAX_PRIVILEGES;
4401 	else
4402 		adapter->cmd_privileges = MIN_PRIVILEGES;
4403 }
4404 
4405 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4406  * However, this HW limitation is not exposed to the host via any SLI cmd.
4407  * As a result, in the case of SRIOV and in particular multi-partition configs
4408  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4409  * for distribution between the VFs. This self-imposed limit will determine the
4410  * no: of VFs for which RSS can be enabled.
4411  */
4412 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4413 {
4414 	struct be_port_resources port_res = {0};
4415 	u8 rss_tables_on_port;
4416 	u16 max_vfs = be_max_vfs(adapter);
4417 
4418 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4419 				  RESOURCE_LIMITS, 0);
4420 
4421 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4422 
4423 	/* Each PF Pool's RSS Tables limit =
4424 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4425 	 */
4426 	adapter->pool_res.max_rss_tables =
4427 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4428 }
4429 
4430 static int be_get_sriov_config(struct be_adapter *adapter)
4431 {
4432 	struct be_resources res = {0};
4433 	int max_vfs, old_vfs;
4434 
4435 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4436 				  RESOURCE_LIMITS, 0);
4437 
4438 	/* Some old versions of BE3 FW don't report max_vfs value */
4439 	if (BE3_chip(adapter) && !res.max_vfs) {
4440 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4441 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4442 	}
4443 
4444 	adapter->pool_res = res;
4445 
4446 	/* If during previous unload of the driver, the VFs were not disabled,
4447 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4448 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4449 	 */
4450 	old_vfs = pci_num_vf(adapter->pdev);
4451 	if (old_vfs) {
4452 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4453 			 old_vfs);
4454 
4455 		adapter->pool_res.max_vfs =
4456 			pci_sriov_get_totalvfs(adapter->pdev);
4457 		adapter->num_vfs = old_vfs;
4458 	}
4459 
4460 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4461 		be_calculate_pf_pool_rss_tables(adapter);
4462 		dev_info(&adapter->pdev->dev,
4463 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4464 			 be_max_pf_pool_rss_tables(adapter));
4465 	}
4466 	return 0;
4467 }
4468 
4469 static void be_alloc_sriov_res(struct be_adapter *adapter)
4470 {
4471 	int old_vfs = pci_num_vf(adapter->pdev);
4472 	struct  be_resources vft_res = {0};
4473 	int status;
4474 
4475 	be_get_sriov_config(adapter);
4476 
4477 	if (!old_vfs)
4478 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4479 
4480 	/* When the HW is in SRIOV capable configuration, the PF-pool
4481 	 * resources are given to PF during driver load, if there are no
4482 	 * old VFs. This facility is not available in BE3 FW.
4483 	 * Also, this is done by FW in Lancer chip.
4484 	 */
4485 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4486 		be_calculate_vf_res(adapter, 0, &vft_res);
4487 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4488 						 &vft_res);
4489 		if (status)
4490 			dev_err(&adapter->pdev->dev,
4491 				"Failed to optimize SRIOV resources\n");
4492 	}
4493 }
4494 
4495 static int be_get_resources(struct be_adapter *adapter)
4496 {
4497 	struct device *dev = &adapter->pdev->dev;
4498 	struct be_resources res = {0};
4499 	int status;
4500 
4501 	/* For Lancer, SH etc read per-function resource limits from FW.
4502 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4503 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4504 	 */
4505 	if (BEx_chip(adapter)) {
4506 		BEx_get_resources(adapter, &res);
4507 	} else {
4508 		status = be_cmd_get_func_config(adapter, &res);
4509 		if (status)
4510 			return status;
4511 
4512 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4513 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4514 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4515 			res.max_rss_qs -= 1;
4516 	}
4517 
4518 	/* If RoCE is supported stash away half the EQs for RoCE */
4519 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4520 				res.max_evt_qs / 2 : res.max_evt_qs;
4521 	adapter->res = res;
4522 
4523 	/* If FW supports RSS default queue, then skip creating non-RSS
4524 	 * queue for non-IP traffic.
4525 	 */
4526 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4527 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4528 
4529 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4530 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4531 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4532 		 be_max_vfs(adapter));
4533 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4534 		 be_max_uc(adapter), be_max_mc(adapter),
4535 		 be_max_vlans(adapter));
4536 
4537 	/* Ensure RX and TX queues are created in pairs at init time */
4538 	adapter->cfg_num_rx_irqs =
4539 				min_t(u16, netif_get_num_default_rss_queues(),
4540 				      be_max_qp_irqs(adapter));
4541 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4542 	return 0;
4543 }
4544 
4545 static int be_get_config(struct be_adapter *adapter)
4546 {
4547 	int status, level;
4548 	u16 profile_id;
4549 
4550 	status = be_cmd_get_cntl_attributes(adapter);
4551 	if (status)
4552 		return status;
4553 
4554 	status = be_cmd_query_fw_cfg(adapter);
4555 	if (status)
4556 		return status;
4557 
4558 	if (!lancer_chip(adapter) && be_physfn(adapter))
4559 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4560 
4561 	if (BEx_chip(adapter)) {
4562 		level = be_cmd_get_fw_log_level(adapter);
4563 		adapter->msg_enable =
4564 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4565 	}
4566 
4567 	be_cmd_get_acpi_wol_cap(adapter);
4568 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4569 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4570 
4571 	be_cmd_query_port_name(adapter);
4572 
4573 	if (be_physfn(adapter)) {
4574 		status = be_cmd_get_active_profile(adapter, &profile_id);
4575 		if (!status)
4576 			dev_info(&adapter->pdev->dev,
4577 				 "Using profile 0x%x\n", profile_id);
4578 	}
4579 
4580 	return 0;
4581 }
4582 
4583 static int be_mac_setup(struct be_adapter *adapter)
4584 {
4585 	u8 mac[ETH_ALEN];
4586 	int status;
4587 
4588 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4589 		status = be_cmd_get_perm_mac(adapter, mac);
4590 		if (status)
4591 			return status;
4592 
4593 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4594 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4595 
4596 		/* Initial MAC for BE3 VFs is already programmed by PF */
4597 		if (BEx_chip(adapter) && be_virtfn(adapter))
4598 			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4599 	}
4600 
4601 	return 0;
4602 }
4603 
4604 static void be_schedule_worker(struct be_adapter *adapter)
4605 {
4606 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4607 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4608 }
4609 
4610 static void be_destroy_err_recovery_workq(void)
4611 {
4612 	if (!be_err_recovery_workq)
4613 		return;
4614 
4615 	flush_workqueue(be_err_recovery_workq);
4616 	destroy_workqueue(be_err_recovery_workq);
4617 	be_err_recovery_workq = NULL;
4618 }
4619 
4620 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4621 {
4622 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4623 
4624 	if (!be_err_recovery_workq)
4625 		return;
4626 
4627 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4628 			   msecs_to_jiffies(delay));
4629 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4630 }
4631 
4632 static int be_setup_queues(struct be_adapter *adapter)
4633 {
4634 	struct net_device *netdev = adapter->netdev;
4635 	int status;
4636 
4637 	status = be_evt_queues_create(adapter);
4638 	if (status)
4639 		goto err;
4640 
4641 	status = be_tx_qs_create(adapter);
4642 	if (status)
4643 		goto err;
4644 
4645 	status = be_rx_cqs_create(adapter);
4646 	if (status)
4647 		goto err;
4648 
4649 	status = be_mcc_queues_create(adapter);
4650 	if (status)
4651 		goto err;
4652 
4653 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4654 	if (status)
4655 		goto err;
4656 
4657 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4658 	if (status)
4659 		goto err;
4660 
4661 	return 0;
4662 err:
4663 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4664 	return status;
4665 }
4666 
4667 static int be_if_create(struct be_adapter *adapter)
4668 {
4669 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4670 	u32 cap_flags = be_if_cap_flags(adapter);
4671 	int status;
4672 
4673 	/* alloc required memory for other filtering fields */
4674 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4675 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4676 	if (!adapter->pmac_id)
4677 		return -ENOMEM;
4678 
4679 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4680 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4681 	if (!adapter->mc_list)
4682 		return -ENOMEM;
4683 
4684 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4685 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4686 	if (!adapter->uc_list)
4687 		return -ENOMEM;
4688 
4689 	if (adapter->cfg_num_rx_irqs == 1)
4690 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4691 
4692 	en_flags &= cap_flags;
4693 	/* will enable all the needed filter flags in be_open() */
4694 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4695 				  &adapter->if_handle, 0);
4696 
4697 	if (status)
4698 		return status;
4699 
4700 	return 0;
4701 }
4702 
4703 int be_update_queues(struct be_adapter *adapter)
4704 {
4705 	struct net_device *netdev = adapter->netdev;
4706 	int status;
4707 
4708 	if (netif_running(netdev))
4709 		be_close(netdev);
4710 
4711 	be_cancel_worker(adapter);
4712 
4713 	/* If any vectors have been shared with RoCE we cannot re-program
4714 	 * the MSIx table.
4715 	 */
4716 	if (!adapter->num_msix_roce_vec)
4717 		be_msix_disable(adapter);
4718 
4719 	be_clear_queues(adapter);
4720 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4721 	if (status)
4722 		return status;
4723 
4724 	if (!msix_enabled(adapter)) {
4725 		status = be_msix_enable(adapter);
4726 		if (status)
4727 			return status;
4728 	}
4729 
4730 	status = be_if_create(adapter);
4731 	if (status)
4732 		return status;
4733 
4734 	status = be_setup_queues(adapter);
4735 	if (status)
4736 		return status;
4737 
4738 	be_schedule_worker(adapter);
4739 
4740 	/* The IF was destroyed and re-created. We need to clear
4741 	 * all promiscuous flags valid for the destroyed IF.
4742 	 * Without this promisc mode is not restored during
4743 	 * be_open() because the driver thinks that it is
4744 	 * already enabled in HW.
4745 	 */
4746 	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4747 
4748 	if (netif_running(netdev))
4749 		status = be_open(netdev);
4750 
4751 	return status;
4752 }
4753 
4754 static inline int fw_major_num(const char *fw_ver)
4755 {
4756 	int fw_major = 0, i;
4757 
4758 	i = sscanf(fw_ver, "%d.", &fw_major);
4759 	if (i != 1)
4760 		return 0;
4761 
4762 	return fw_major;
4763 }
4764 
4765 /* If it is error recovery, FLR the PF
4766  * Else if any VFs are already enabled don't FLR the PF
4767  */
4768 static bool be_reset_required(struct be_adapter *adapter)
4769 {
4770 	if (be_error_recovering(adapter))
4771 		return true;
4772 	else
4773 		return pci_num_vf(adapter->pdev) == 0;
4774 }
4775 
4776 /* Wait for the FW to be ready and perform the required initialization */
4777 static int be_func_init(struct be_adapter *adapter)
4778 {
4779 	int status;
4780 
4781 	status = be_fw_wait_ready(adapter);
4782 	if (status)
4783 		return status;
4784 
4785 	/* FW is now ready; clear errors to allow cmds/doorbell */
4786 	be_clear_error(adapter, BE_CLEAR_ALL);
4787 
4788 	if (be_reset_required(adapter)) {
4789 		status = be_cmd_reset_function(adapter);
4790 		if (status)
4791 			return status;
4792 
4793 		/* Wait for interrupts to quiesce after an FLR */
4794 		msleep(100);
4795 	}
4796 
4797 	/* Tell FW we're ready to fire cmds */
4798 	status = be_cmd_fw_init(adapter);
4799 	if (status)
4800 		return status;
4801 
4802 	/* Allow interrupts for other ULPs running on NIC function */
4803 	be_intr_set(adapter, true);
4804 
4805 	return 0;
4806 }
4807 
4808 static int be_setup(struct be_adapter *adapter)
4809 {
4810 	struct device *dev = &adapter->pdev->dev;
4811 	int status;
4812 
4813 	status = be_func_init(adapter);
4814 	if (status)
4815 		return status;
4816 
4817 	be_setup_init(adapter);
4818 
4819 	if (!lancer_chip(adapter))
4820 		be_cmd_req_native_mode(adapter);
4821 
4822 	/* invoke this cmd first to get pf_num and vf_num which are needed
4823 	 * for issuing profile related cmds
4824 	 */
4825 	if (!BEx_chip(adapter)) {
4826 		status = be_cmd_get_func_config(adapter, NULL);
4827 		if (status)
4828 			return status;
4829 	}
4830 
4831 	status = be_get_config(adapter);
4832 	if (status)
4833 		goto err;
4834 
4835 	if (!BE2_chip(adapter) && be_physfn(adapter))
4836 		be_alloc_sriov_res(adapter);
4837 
4838 	status = be_get_resources(adapter);
4839 	if (status)
4840 		goto err;
4841 
4842 	status = be_msix_enable(adapter);
4843 	if (status)
4844 		goto err;
4845 
4846 	/* will enable all the needed filter flags in be_open() */
4847 	status = be_if_create(adapter);
4848 	if (status)
4849 		goto err;
4850 
4851 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4852 	rtnl_lock();
4853 	status = be_setup_queues(adapter);
4854 	rtnl_unlock();
4855 	if (status)
4856 		goto err;
4857 
4858 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4859 
4860 	status = be_mac_setup(adapter);
4861 	if (status)
4862 		goto err;
4863 
4864 	be_cmd_get_fw_ver(adapter);
4865 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4866 
4867 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4868 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4869 			adapter->fw_ver);
4870 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4871 	}
4872 
4873 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4874 					 adapter->rx_fc);
4875 	if (status)
4876 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4877 					&adapter->rx_fc);
4878 
4879 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4880 		 adapter->tx_fc, adapter->rx_fc);
4881 
4882 	if (be_physfn(adapter))
4883 		be_cmd_set_logical_link_config(adapter,
4884 					       IFLA_VF_LINK_STATE_AUTO, 0);
4885 
4886 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4887 	 * confusing a linux bridge or OVS that it might be connected to.
4888 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4889 	 * when SRIOV is not enabled.
4890 	 */
4891 	if (BE3_chip(adapter))
4892 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4893 				      PORT_FWD_TYPE_PASSTHRU, 0);
4894 
4895 	if (adapter->num_vfs)
4896 		be_vf_setup(adapter);
4897 
4898 	status = be_cmd_get_phy_info(adapter);
4899 	if (!status && be_pause_supported(adapter))
4900 		adapter->phy.fc_autoneg = 1;
4901 
4902 	if (be_physfn(adapter) && !lancer_chip(adapter))
4903 		be_cmd_set_features(adapter);
4904 
4905 	be_schedule_worker(adapter);
4906 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4907 	return 0;
4908 err:
4909 	be_clear(adapter);
4910 	return status;
4911 }
4912 
4913 #ifdef CONFIG_NET_POLL_CONTROLLER
4914 static void be_netpoll(struct net_device *netdev)
4915 {
4916 	struct be_adapter *adapter = netdev_priv(netdev);
4917 	struct be_eq_obj *eqo;
4918 	int i;
4919 
4920 	for_all_evt_queues(adapter, eqo, i) {
4921 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4922 		napi_schedule(&eqo->napi);
4923 	}
4924 }
4925 #endif
4926 
4927 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4928 {
4929 	const struct firmware *fw;
4930 	int status;
4931 
4932 	if (!netif_running(adapter->netdev)) {
4933 		dev_err(&adapter->pdev->dev,
4934 			"Firmware load not allowed (interface is down)\n");
4935 		return -ENETDOWN;
4936 	}
4937 
4938 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4939 	if (status)
4940 		goto fw_exit;
4941 
4942 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4943 
4944 	if (lancer_chip(adapter))
4945 		status = lancer_fw_download(adapter, fw);
4946 	else
4947 		status = be_fw_download(adapter, fw);
4948 
4949 	if (!status)
4950 		be_cmd_get_fw_ver(adapter);
4951 
4952 fw_exit:
4953 	release_firmware(fw);
4954 	return status;
4955 }
4956 
4957 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4958 				 u16 flags, struct netlink_ext_ack *extack)
4959 {
4960 	struct be_adapter *adapter = netdev_priv(dev);
4961 	struct nlattr *attr, *br_spec;
4962 	int rem;
4963 	int status = 0;
4964 	u16 mode = 0;
4965 
4966 	if (!sriov_enabled(adapter))
4967 		return -EOPNOTSUPP;
4968 
4969 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4970 	if (!br_spec)
4971 		return -EINVAL;
4972 
4973 	nla_for_each_nested(attr, br_spec, rem) {
4974 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4975 			continue;
4976 
4977 		if (nla_len(attr) < sizeof(mode))
4978 			return -EINVAL;
4979 
4980 		mode = nla_get_u16(attr);
4981 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4982 			return -EOPNOTSUPP;
4983 
4984 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4985 			return -EINVAL;
4986 
4987 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4988 					       adapter->if_handle,
4989 					       mode == BRIDGE_MODE_VEPA ?
4990 					       PORT_FWD_TYPE_VEPA :
4991 					       PORT_FWD_TYPE_VEB, 0);
4992 		if (status)
4993 			goto err;
4994 
4995 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4996 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4997 
4998 		return status;
4999 	}
5000 err:
5001 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5002 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5003 
5004 	return status;
5005 }
5006 
5007 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5008 				 struct net_device *dev, u32 filter_mask,
5009 				 int nlflags)
5010 {
5011 	struct be_adapter *adapter = netdev_priv(dev);
5012 	int status = 0;
5013 	u8 hsw_mode;
5014 
5015 	/* BE and Lancer chips support VEB mode only */
5016 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
5017 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5018 		if (!pci_sriov_get_totalvfs(adapter->pdev))
5019 			return 0;
5020 		hsw_mode = PORT_FWD_TYPE_VEB;
5021 	} else {
5022 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
5023 					       adapter->if_handle, &hsw_mode,
5024 					       NULL);
5025 		if (status)
5026 			return 0;
5027 
5028 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5029 			return 0;
5030 	}
5031 
5032 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5033 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
5034 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5035 				       0, 0, nlflags, filter_mask, NULL);
5036 }
5037 
5038 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5039 					 void (*func)(struct work_struct *))
5040 {
5041 	struct be_cmd_work *work;
5042 
5043 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
5044 	if (!work) {
5045 		dev_err(&adapter->pdev->dev,
5046 			"be_work memory allocation failed\n");
5047 		return NULL;
5048 	}
5049 
5050 	INIT_WORK(&work->work, func);
5051 	work->adapter = adapter;
5052 	return work;
5053 }
5054 
5055 /* VxLAN offload Notes:
5056  *
5057  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5058  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5059  * is expected to work across all types of IP tunnels once exported. Skyhawk
5060  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5061  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5062  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5063  * those other tunnels are unexported on the fly through ndo_features_check().
5064  *
5065  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5066  * adds more than one port, disable offloads and re-enable them again when
5067  * there's only one port left. We maintain a list of ports for this purpose.
5068  */
5069 static void be_work_add_vxlan_port(struct work_struct *work)
5070 {
5071 	struct be_cmd_work *cmd_work =
5072 				container_of(work, struct be_cmd_work, work);
5073 	struct be_adapter *adapter = cmd_work->adapter;
5074 	struct device *dev = &adapter->pdev->dev;
5075 	__be16 port = cmd_work->info.vxlan_port;
5076 	struct be_vxlan_port *vxlan_port;
5077 	int status;
5078 
5079 	/* Bump up the alias count if it is an existing port */
5080 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5081 		if (vxlan_port->port == port) {
5082 			vxlan_port->port_aliases++;
5083 			goto done;
5084 		}
5085 	}
5086 
5087 	/* Add a new port to our list. We don't need a lock here since port
5088 	 * add/delete are done only in the context of a single-threaded work
5089 	 * queue (be_wq).
5090 	 */
5091 	vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5092 	if (!vxlan_port)
5093 		goto done;
5094 
5095 	vxlan_port->port = port;
5096 	INIT_LIST_HEAD(&vxlan_port->list);
5097 	list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5098 	adapter->vxlan_port_count++;
5099 
5100 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5101 		dev_info(dev,
5102 			 "Only one UDP port supported for VxLAN offloads\n");
5103 		dev_info(dev, "Disabling VxLAN offloads\n");
5104 		goto err;
5105 	}
5106 
5107 	if (adapter->vxlan_port_count > 1)
5108 		goto done;
5109 
5110 	status = be_enable_vxlan_offloads(adapter);
5111 	if (!status)
5112 		goto done;
5113 
5114 err:
5115 	be_disable_vxlan_offloads(adapter);
5116 done:
5117 	kfree(cmd_work);
5118 	return;
5119 }
5120 
5121 static void be_work_del_vxlan_port(struct work_struct *work)
5122 {
5123 	struct be_cmd_work *cmd_work =
5124 				container_of(work, struct be_cmd_work, work);
5125 	struct be_adapter *adapter = cmd_work->adapter;
5126 	__be16 port = cmd_work->info.vxlan_port;
5127 	struct be_vxlan_port *vxlan_port;
5128 
5129 	/* Nothing to be done if a port alias is being deleted */
5130 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5131 		if (vxlan_port->port == port) {
5132 			if (vxlan_port->port_aliases) {
5133 				vxlan_port->port_aliases--;
5134 				goto done;
5135 			}
5136 			break;
5137 		}
5138 	}
5139 
5140 	/* No port aliases left; delete the port from the list */
5141 	list_del(&vxlan_port->list);
5142 	adapter->vxlan_port_count--;
5143 
5144 	/* Disable VxLAN offload if this is the offloaded port */
5145 	if (adapter->vxlan_port == vxlan_port->port) {
5146 		WARN_ON(adapter->vxlan_port_count);
5147 		be_disable_vxlan_offloads(adapter);
5148 		dev_info(&adapter->pdev->dev,
5149 			 "Disabled VxLAN offloads for UDP port %d\n",
5150 			 be16_to_cpu(port));
5151 		goto out;
5152 	}
5153 
5154 	/* If only 1 port is left, re-enable VxLAN offload */
5155 	if (adapter->vxlan_port_count == 1)
5156 		be_enable_vxlan_offloads(adapter);
5157 
5158 out:
5159 	kfree(vxlan_port);
5160 done:
5161 	kfree(cmd_work);
5162 }
5163 
5164 static void be_cfg_vxlan_port(struct net_device *netdev,
5165 			      struct udp_tunnel_info *ti,
5166 			      void (*func)(struct work_struct *))
5167 {
5168 	struct be_adapter *adapter = netdev_priv(netdev);
5169 	struct be_cmd_work *cmd_work;
5170 
5171 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5172 		return;
5173 
5174 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5175 		return;
5176 
5177 	cmd_work = be_alloc_work(adapter, func);
5178 	if (cmd_work) {
5179 		cmd_work->info.vxlan_port = ti->port;
5180 		queue_work(be_wq, &cmd_work->work);
5181 	}
5182 }
5183 
5184 static void be_del_vxlan_port(struct net_device *netdev,
5185 			      struct udp_tunnel_info *ti)
5186 {
5187 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5188 }
5189 
5190 static void be_add_vxlan_port(struct net_device *netdev,
5191 			      struct udp_tunnel_info *ti)
5192 {
5193 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5194 }
5195 
5196 static netdev_features_t be_features_check(struct sk_buff *skb,
5197 					   struct net_device *dev,
5198 					   netdev_features_t features)
5199 {
5200 	struct be_adapter *adapter = netdev_priv(dev);
5201 	u8 l4_hdr = 0;
5202 
5203 	if (skb_is_gso(skb)) {
5204 		/* IPv6 TSO requests with extension hdrs are a problem
5205 		 * to Lancer and BE3 HW. Disable TSO6 feature.
5206 		 */
5207 		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5208 			features &= ~NETIF_F_TSO6;
5209 
5210 		/* Lancer cannot handle the packet with MSS less than 256.
5211 		 * Also it can't handle a TSO packet with a single segment
5212 		 * Disable the GSO support in such cases
5213 		 */
5214 		if (lancer_chip(adapter) &&
5215 		    (skb_shinfo(skb)->gso_size < 256 ||
5216 		     skb_shinfo(skb)->gso_segs == 1))
5217 			features &= ~NETIF_F_GSO_MASK;
5218 	}
5219 
5220 	/* The code below restricts offload features for some tunneled and
5221 	 * Q-in-Q packets.
5222 	 * Offload features for normal (non tunnel) packets are unchanged.
5223 	 */
5224 	features = vlan_features_check(skb, features);
5225 	if (!skb->encapsulation ||
5226 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5227 		return features;
5228 
5229 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5230 	 * should disable tunnel offload features if it's not a VxLAN packet,
5231 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5232 	 * allow other tunneled traffic like GRE work fine while VxLAN
5233 	 * offloads are configured in Skyhawk-R.
5234 	 */
5235 	switch (vlan_get_protocol(skb)) {
5236 	case htons(ETH_P_IP):
5237 		l4_hdr = ip_hdr(skb)->protocol;
5238 		break;
5239 	case htons(ETH_P_IPV6):
5240 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5241 		break;
5242 	default:
5243 		return features;
5244 	}
5245 
5246 	if (l4_hdr != IPPROTO_UDP ||
5247 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5248 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5249 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5250 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5251 	    !adapter->vxlan_port ||
5252 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5253 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5254 
5255 	return features;
5256 }
5257 
5258 static int be_get_phys_port_id(struct net_device *dev,
5259 			       struct netdev_phys_item_id *ppid)
5260 {
5261 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5262 	struct be_adapter *adapter = netdev_priv(dev);
5263 	u8 *id;
5264 
5265 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5266 		return -ENOSPC;
5267 
5268 	ppid->id[0] = adapter->hba_port_num + 1;
5269 	id = &ppid->id[1];
5270 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5271 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5272 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5273 
5274 	ppid->id_len = id_len;
5275 
5276 	return 0;
5277 }
5278 
5279 static void be_set_rx_mode(struct net_device *dev)
5280 {
5281 	struct be_adapter *adapter = netdev_priv(dev);
5282 	struct be_cmd_work *work;
5283 
5284 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5285 	if (work)
5286 		queue_work(be_wq, &work->work);
5287 }
5288 
5289 static const struct net_device_ops be_netdev_ops = {
5290 	.ndo_open		= be_open,
5291 	.ndo_stop		= be_close,
5292 	.ndo_start_xmit		= be_xmit,
5293 	.ndo_set_rx_mode	= be_set_rx_mode,
5294 	.ndo_set_mac_address	= be_mac_addr_set,
5295 	.ndo_get_stats64	= be_get_stats64,
5296 	.ndo_validate_addr	= eth_validate_addr,
5297 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5298 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5299 	.ndo_set_vf_mac		= be_set_vf_mac,
5300 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5301 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5302 	.ndo_get_vf_config	= be_get_vf_config,
5303 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5304 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5305 	.ndo_tx_timeout		= be_tx_timeout,
5306 #ifdef CONFIG_NET_POLL_CONTROLLER
5307 	.ndo_poll_controller	= be_netpoll,
5308 #endif
5309 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5310 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5311 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5312 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5313 	.ndo_features_check	= be_features_check,
5314 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5315 };
5316 
5317 static void be_netdev_init(struct net_device *netdev)
5318 {
5319 	struct be_adapter *adapter = netdev_priv(netdev);
5320 
5321 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5322 		NETIF_F_GSO_UDP_TUNNEL |
5323 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5324 		NETIF_F_HW_VLAN_CTAG_TX;
5325 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5326 		netdev->hw_features |= NETIF_F_RXHASH;
5327 
5328 	netdev->features |= netdev->hw_features |
5329 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5330 
5331 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5332 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5333 
5334 	netdev->priv_flags |= IFF_UNICAST_FLT;
5335 
5336 	netdev->flags |= IFF_MULTICAST;
5337 
5338 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5339 
5340 	netdev->netdev_ops = &be_netdev_ops;
5341 
5342 	netdev->ethtool_ops = &be_ethtool_ops;
5343 
5344 	/* MTU range: 256 - 9000 */
5345 	netdev->min_mtu = BE_MIN_MTU;
5346 	netdev->max_mtu = BE_MAX_MTU;
5347 }
5348 
5349 static void be_cleanup(struct be_adapter *adapter)
5350 {
5351 	struct net_device *netdev = adapter->netdev;
5352 
5353 	rtnl_lock();
5354 	netif_device_detach(netdev);
5355 	if (netif_running(netdev))
5356 		be_close(netdev);
5357 	rtnl_unlock();
5358 
5359 	be_clear(adapter);
5360 }
5361 
5362 static int be_resume(struct be_adapter *adapter)
5363 {
5364 	struct net_device *netdev = adapter->netdev;
5365 	int status;
5366 
5367 	status = be_setup(adapter);
5368 	if (status)
5369 		return status;
5370 
5371 	rtnl_lock();
5372 	if (netif_running(netdev))
5373 		status = be_open(netdev);
5374 	rtnl_unlock();
5375 
5376 	if (status)
5377 		return status;
5378 
5379 	netif_device_attach(netdev);
5380 
5381 	return 0;
5382 }
5383 
5384 static void be_soft_reset(struct be_adapter *adapter)
5385 {
5386 	u32 val;
5387 
5388 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5389 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5390 	val |= SLIPORT_SOFTRESET_SR_MASK;
5391 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5392 }
5393 
5394 static bool be_err_is_recoverable(struct be_adapter *adapter)
5395 {
5396 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5397 	unsigned long initial_idle_time =
5398 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5399 	unsigned long recovery_interval =
5400 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5401 	u16 ue_err_code;
5402 	u32 val;
5403 
5404 	val = be_POST_stage_get(adapter);
5405 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5406 		return false;
5407 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5408 	if (ue_err_code == 0)
5409 		return false;
5410 
5411 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5412 		ue_err_code);
5413 
5414 	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5415 		dev_err(&adapter->pdev->dev,
5416 			"Cannot recover within %lu sec from driver load\n",
5417 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5418 		return false;
5419 	}
5420 
5421 	if (err_rec->last_recovery_time && time_before_eq(
5422 		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5423 		dev_err(&adapter->pdev->dev,
5424 			"Cannot recover within %lu sec from last recovery\n",
5425 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5426 		return false;
5427 	}
5428 
5429 	if (ue_err_code == err_rec->last_err_code) {
5430 		dev_err(&adapter->pdev->dev,
5431 			"Cannot recover from a consecutive TPE error\n");
5432 		return false;
5433 	}
5434 
5435 	err_rec->last_recovery_time = jiffies;
5436 	err_rec->last_err_code = ue_err_code;
5437 	return true;
5438 }
5439 
5440 static int be_tpe_recover(struct be_adapter *adapter)
5441 {
5442 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5443 	int status = -EAGAIN;
5444 	u32 val;
5445 
5446 	switch (err_rec->recovery_state) {
5447 	case ERR_RECOVERY_ST_NONE:
5448 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5449 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5450 		break;
5451 
5452 	case ERR_RECOVERY_ST_DETECT:
5453 		val = be_POST_stage_get(adapter);
5454 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5455 		    POST_STAGE_RECOVERABLE_ERR) {
5456 			dev_err(&adapter->pdev->dev,
5457 				"Unrecoverable HW error detected: 0x%x\n", val);
5458 			status = -EINVAL;
5459 			err_rec->resched_delay = 0;
5460 			break;
5461 		}
5462 
5463 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5464 
5465 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5466 		 * milliseconds before it checks for final error status in
5467 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5468 		 * If it does, then PF0 initiates a Soft Reset.
5469 		 */
5470 		if (adapter->pf_num == 0) {
5471 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5472 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5473 					ERR_RECOVERY_UE_DETECT_DURATION;
5474 			break;
5475 		}
5476 
5477 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5478 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5479 					ERR_RECOVERY_UE_DETECT_DURATION;
5480 		break;
5481 
5482 	case ERR_RECOVERY_ST_RESET:
5483 		if (!be_err_is_recoverable(adapter)) {
5484 			dev_err(&adapter->pdev->dev,
5485 				"Failed to meet recovery criteria\n");
5486 			status = -EIO;
5487 			err_rec->resched_delay = 0;
5488 			break;
5489 		}
5490 		be_soft_reset(adapter);
5491 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5492 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5493 					err_rec->ue_to_reset_time;
5494 		break;
5495 
5496 	case ERR_RECOVERY_ST_PRE_POLL:
5497 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5498 		err_rec->resched_delay = 0;
5499 		status = 0;			/* done */
5500 		break;
5501 
5502 	default:
5503 		status = -EINVAL;
5504 		err_rec->resched_delay = 0;
5505 		break;
5506 	}
5507 
5508 	return status;
5509 }
5510 
5511 static int be_err_recover(struct be_adapter *adapter)
5512 {
5513 	int status;
5514 
5515 	if (!lancer_chip(adapter)) {
5516 		if (!adapter->error_recovery.recovery_supported ||
5517 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5518 			return -EIO;
5519 		status = be_tpe_recover(adapter);
5520 		if (status)
5521 			goto err;
5522 	}
5523 
5524 	/* Wait for adapter to reach quiescent state before
5525 	 * destroying queues
5526 	 */
5527 	status = be_fw_wait_ready(adapter);
5528 	if (status)
5529 		goto err;
5530 
5531 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5532 
5533 	be_cleanup(adapter);
5534 
5535 	status = be_resume(adapter);
5536 	if (status)
5537 		goto err;
5538 
5539 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5540 
5541 err:
5542 	return status;
5543 }
5544 
5545 static void be_err_detection_task(struct work_struct *work)
5546 {
5547 	struct be_error_recovery *err_rec =
5548 			container_of(work, struct be_error_recovery,
5549 				     err_detection_work.work);
5550 	struct be_adapter *adapter =
5551 			container_of(err_rec, struct be_adapter,
5552 				     error_recovery);
5553 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5554 	struct device *dev = &adapter->pdev->dev;
5555 	int recovery_status;
5556 
5557 	be_detect_error(adapter);
5558 	if (!be_check_error(adapter, BE_ERROR_HW))
5559 		goto reschedule_task;
5560 
5561 	recovery_status = be_err_recover(adapter);
5562 	if (!recovery_status) {
5563 		err_rec->recovery_retries = 0;
5564 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5565 		dev_info(dev, "Adapter recovery successful\n");
5566 		goto reschedule_task;
5567 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5568 		/* BEx/SH recovery state machine */
5569 		if (adapter->pf_num == 0 &&
5570 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5571 			dev_err(&adapter->pdev->dev,
5572 				"Adapter recovery in progress\n");
5573 		resched_delay = err_rec->resched_delay;
5574 		goto reschedule_task;
5575 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5576 		/* For VFs, check if PF have allocated resources
5577 		 * every second.
5578 		 */
5579 		dev_err(dev, "Re-trying adapter recovery\n");
5580 		goto reschedule_task;
5581 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5582 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5583 		/* In case of another error during recovery, it takes 30 sec
5584 		 * for adapter to come out of error. Retry error recovery after
5585 		 * this time interval.
5586 		 */
5587 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5588 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5589 		goto reschedule_task;
5590 	} else {
5591 		dev_err(dev, "Adapter recovery failed\n");
5592 		dev_err(dev, "Please reboot server to recover\n");
5593 	}
5594 
5595 	return;
5596 
5597 reschedule_task:
5598 	be_schedule_err_detection(adapter, resched_delay);
5599 }
5600 
5601 static void be_log_sfp_info(struct be_adapter *adapter)
5602 {
5603 	int status;
5604 
5605 	status = be_cmd_query_sfp_info(adapter);
5606 	if (!status) {
5607 		dev_err(&adapter->pdev->dev,
5608 			"Port %c: %s Vendor: %s part no: %s",
5609 			adapter->port_name,
5610 			be_misconfig_evt_port_state[adapter->phy_state],
5611 			adapter->phy.vendor_name,
5612 			adapter->phy.vendor_pn);
5613 	}
5614 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5615 }
5616 
5617 static void be_worker(struct work_struct *work)
5618 {
5619 	struct be_adapter *adapter =
5620 		container_of(work, struct be_adapter, work.work);
5621 	struct be_rx_obj *rxo;
5622 	int i;
5623 
5624 	if (be_physfn(adapter) &&
5625 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5626 		be_cmd_get_die_temperature(adapter);
5627 
5628 	/* when interrupts are not yet enabled, just reap any pending
5629 	 * mcc completions
5630 	 */
5631 	if (!netif_running(adapter->netdev)) {
5632 		local_bh_disable();
5633 		be_process_mcc(adapter);
5634 		local_bh_enable();
5635 		goto reschedule;
5636 	}
5637 
5638 	if (!adapter->stats_cmd_sent) {
5639 		if (lancer_chip(adapter))
5640 			lancer_cmd_get_pport_stats(adapter,
5641 						   &adapter->stats_cmd);
5642 		else
5643 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5644 	}
5645 
5646 	for_all_rx_queues(adapter, rxo, i) {
5647 		/* Replenish RX-queues starved due to memory
5648 		 * allocation failures.
5649 		 */
5650 		if (rxo->rx_post_starved)
5651 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5652 	}
5653 
5654 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5655 	if (!skyhawk_chip(adapter))
5656 		be_eqd_update(adapter, false);
5657 
5658 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5659 		be_log_sfp_info(adapter);
5660 
5661 reschedule:
5662 	adapter->work_counter++;
5663 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5664 }
5665 
5666 static void be_unmap_pci_bars(struct be_adapter *adapter)
5667 {
5668 	if (adapter->csr)
5669 		pci_iounmap(adapter->pdev, adapter->csr);
5670 	if (adapter->db)
5671 		pci_iounmap(adapter->pdev, adapter->db);
5672 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5673 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5674 }
5675 
5676 static int db_bar(struct be_adapter *adapter)
5677 {
5678 	if (lancer_chip(adapter) || be_virtfn(adapter))
5679 		return 0;
5680 	else
5681 		return 4;
5682 }
5683 
5684 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5685 {
5686 	if (skyhawk_chip(adapter)) {
5687 		adapter->roce_db.size = 4096;
5688 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5689 							      db_bar(adapter));
5690 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5691 							       db_bar(adapter));
5692 	}
5693 	return 0;
5694 }
5695 
5696 static int be_map_pci_bars(struct be_adapter *adapter)
5697 {
5698 	struct pci_dev *pdev = adapter->pdev;
5699 	u8 __iomem *addr;
5700 	u32 sli_intf;
5701 
5702 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5703 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5704 				SLI_INTF_FAMILY_SHIFT;
5705 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5706 
5707 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5708 		adapter->csr = pci_iomap(pdev, 2, 0);
5709 		if (!adapter->csr)
5710 			return -ENOMEM;
5711 	}
5712 
5713 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5714 	if (!addr)
5715 		goto pci_map_err;
5716 	adapter->db = addr;
5717 
5718 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5719 		if (be_physfn(adapter)) {
5720 			/* PCICFG is the 2nd BAR in BE2 */
5721 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5722 			if (!addr)
5723 				goto pci_map_err;
5724 			adapter->pcicfg = addr;
5725 			adapter->pcicfg_mapped = true;
5726 		} else {
5727 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5728 			adapter->pcicfg_mapped = false;
5729 		}
5730 	}
5731 
5732 	be_roce_map_pci_bars(adapter);
5733 	return 0;
5734 
5735 pci_map_err:
5736 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5737 	be_unmap_pci_bars(adapter);
5738 	return -ENOMEM;
5739 }
5740 
5741 static void be_drv_cleanup(struct be_adapter *adapter)
5742 {
5743 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5744 	struct device *dev = &adapter->pdev->dev;
5745 
5746 	if (mem->va)
5747 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5748 
5749 	mem = &adapter->rx_filter;
5750 	if (mem->va)
5751 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5752 
5753 	mem = &adapter->stats_cmd;
5754 	if (mem->va)
5755 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5756 }
5757 
5758 /* Allocate and initialize various fields in be_adapter struct */
5759 static int be_drv_init(struct be_adapter *adapter)
5760 {
5761 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5762 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5763 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5764 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5765 	struct device *dev = &adapter->pdev->dev;
5766 	int status = 0;
5767 
5768 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5769 	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5770 						&mbox_mem_alloc->dma,
5771 						GFP_KERNEL);
5772 	if (!mbox_mem_alloc->va)
5773 		return -ENOMEM;
5774 
5775 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5776 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5777 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5778 
5779 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5780 	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5781 					   &rx_filter->dma, GFP_KERNEL);
5782 	if (!rx_filter->va) {
5783 		status = -ENOMEM;
5784 		goto free_mbox;
5785 	}
5786 
5787 	if (lancer_chip(adapter))
5788 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5789 	else if (BE2_chip(adapter))
5790 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5791 	else if (BE3_chip(adapter))
5792 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5793 	else
5794 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5795 	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5796 					   &stats_cmd->dma, GFP_KERNEL);
5797 	if (!stats_cmd->va) {
5798 		status = -ENOMEM;
5799 		goto free_rx_filter;
5800 	}
5801 
5802 	mutex_init(&adapter->mbox_lock);
5803 	mutex_init(&adapter->mcc_lock);
5804 	mutex_init(&adapter->rx_filter_lock);
5805 	spin_lock_init(&adapter->mcc_cq_lock);
5806 	init_completion(&adapter->et_cmd_compl);
5807 
5808 	pci_save_state(adapter->pdev);
5809 
5810 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5811 
5812 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5813 	adapter->error_recovery.resched_delay = 0;
5814 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5815 			  be_err_detection_task);
5816 
5817 	adapter->rx_fc = true;
5818 	adapter->tx_fc = true;
5819 
5820 	/* Must be a power of 2 or else MODULO will BUG_ON */
5821 	adapter->be_get_temp_freq = 64;
5822 
5823 	INIT_LIST_HEAD(&adapter->vxlan_port_list);
5824 	return 0;
5825 
5826 free_rx_filter:
5827 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5828 free_mbox:
5829 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5830 			  mbox_mem_alloc->dma);
5831 	return status;
5832 }
5833 
5834 static void be_remove(struct pci_dev *pdev)
5835 {
5836 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5837 
5838 	if (!adapter)
5839 		return;
5840 
5841 	be_roce_dev_remove(adapter);
5842 	be_intr_set(adapter, false);
5843 
5844 	be_cancel_err_detection(adapter);
5845 
5846 	unregister_netdev(adapter->netdev);
5847 
5848 	be_clear(adapter);
5849 
5850 	if (!pci_vfs_assigned(adapter->pdev))
5851 		be_cmd_reset_function(adapter);
5852 
5853 	/* tell fw we're done with firing cmds */
5854 	be_cmd_fw_clean(adapter);
5855 
5856 	be_unmap_pci_bars(adapter);
5857 	be_drv_cleanup(adapter);
5858 
5859 	pci_disable_pcie_error_reporting(pdev);
5860 
5861 	pci_release_regions(pdev);
5862 	pci_disable_device(pdev);
5863 
5864 	free_netdev(adapter->netdev);
5865 }
5866 
5867 static ssize_t be_hwmon_show_temp(struct device *dev,
5868 				  struct device_attribute *dev_attr,
5869 				  char *buf)
5870 {
5871 	struct be_adapter *adapter = dev_get_drvdata(dev);
5872 
5873 	/* Unit: millidegree Celsius */
5874 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5875 		return -EIO;
5876 	else
5877 		return sprintf(buf, "%u\n",
5878 			       adapter->hwmon_info.be_on_die_temp * 1000);
5879 }
5880 
5881 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5882 			  be_hwmon_show_temp, NULL, 1);
5883 
5884 static struct attribute *be_hwmon_attrs[] = {
5885 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5886 	NULL
5887 };
5888 
5889 ATTRIBUTE_GROUPS(be_hwmon);
5890 
5891 static char *mc_name(struct be_adapter *adapter)
5892 {
5893 	char *str = "";	/* default */
5894 
5895 	switch (adapter->mc_type) {
5896 	case UMC:
5897 		str = "UMC";
5898 		break;
5899 	case FLEX10:
5900 		str = "FLEX10";
5901 		break;
5902 	case vNIC1:
5903 		str = "vNIC-1";
5904 		break;
5905 	case nPAR:
5906 		str = "nPAR";
5907 		break;
5908 	case UFP:
5909 		str = "UFP";
5910 		break;
5911 	case vNIC2:
5912 		str = "vNIC-2";
5913 		break;
5914 	default:
5915 		str = "";
5916 	}
5917 
5918 	return str;
5919 }
5920 
5921 static inline char *func_name(struct be_adapter *adapter)
5922 {
5923 	return be_physfn(adapter) ? "PF" : "VF";
5924 }
5925 
5926 static inline char *nic_name(struct pci_dev *pdev)
5927 {
5928 	switch (pdev->device) {
5929 	case OC_DEVICE_ID1:
5930 		return OC_NAME;
5931 	case OC_DEVICE_ID2:
5932 		return OC_NAME_BE;
5933 	case OC_DEVICE_ID3:
5934 	case OC_DEVICE_ID4:
5935 		return OC_NAME_LANCER;
5936 	case BE_DEVICE_ID2:
5937 		return BE3_NAME;
5938 	case OC_DEVICE_ID5:
5939 	case OC_DEVICE_ID6:
5940 		return OC_NAME_SH;
5941 	default:
5942 		return BE_NAME;
5943 	}
5944 }
5945 
5946 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5947 {
5948 	struct be_adapter *adapter;
5949 	struct net_device *netdev;
5950 	int status = 0;
5951 
5952 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5953 
5954 	status = pci_enable_device(pdev);
5955 	if (status)
5956 		goto do_none;
5957 
5958 	status = pci_request_regions(pdev, DRV_NAME);
5959 	if (status)
5960 		goto disable_dev;
5961 	pci_set_master(pdev);
5962 
5963 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5964 	if (!netdev) {
5965 		status = -ENOMEM;
5966 		goto rel_reg;
5967 	}
5968 	adapter = netdev_priv(netdev);
5969 	adapter->pdev = pdev;
5970 	pci_set_drvdata(pdev, adapter);
5971 	adapter->netdev = netdev;
5972 	SET_NETDEV_DEV(netdev, &pdev->dev);
5973 
5974 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5975 	if (!status) {
5976 		netdev->features |= NETIF_F_HIGHDMA;
5977 	} else {
5978 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5979 		if (status) {
5980 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5981 			goto free_netdev;
5982 		}
5983 	}
5984 
5985 	status = pci_enable_pcie_error_reporting(pdev);
5986 	if (!status)
5987 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5988 
5989 	status = be_map_pci_bars(adapter);
5990 	if (status)
5991 		goto free_netdev;
5992 
5993 	status = be_drv_init(adapter);
5994 	if (status)
5995 		goto unmap_bars;
5996 
5997 	status = be_setup(adapter);
5998 	if (status)
5999 		goto drv_cleanup;
6000 
6001 	be_netdev_init(netdev);
6002 	status = register_netdev(netdev);
6003 	if (status != 0)
6004 		goto unsetup;
6005 
6006 	be_roce_dev_add(adapter);
6007 
6008 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6009 	adapter->error_recovery.probe_time = jiffies;
6010 
6011 	/* On Die temperature not supported for VF. */
6012 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6013 		adapter->hwmon_info.hwmon_dev =
6014 			devm_hwmon_device_register_with_groups(&pdev->dev,
6015 							       DRV_NAME,
6016 							       adapter,
6017 							       be_hwmon_groups);
6018 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6019 	}
6020 
6021 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6022 		 func_name(adapter), mc_name(adapter), adapter->port_name);
6023 
6024 	return 0;
6025 
6026 unsetup:
6027 	be_clear(adapter);
6028 drv_cleanup:
6029 	be_drv_cleanup(adapter);
6030 unmap_bars:
6031 	be_unmap_pci_bars(adapter);
6032 free_netdev:
6033 	free_netdev(netdev);
6034 rel_reg:
6035 	pci_release_regions(pdev);
6036 disable_dev:
6037 	pci_disable_device(pdev);
6038 do_none:
6039 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6040 	return status;
6041 }
6042 
6043 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6044 {
6045 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6046 
6047 	be_intr_set(adapter, false);
6048 	be_cancel_err_detection(adapter);
6049 
6050 	be_cleanup(adapter);
6051 
6052 	pci_save_state(pdev);
6053 	pci_disable_device(pdev);
6054 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
6055 	return 0;
6056 }
6057 
6058 static int be_pci_resume(struct pci_dev *pdev)
6059 {
6060 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6061 	int status = 0;
6062 
6063 	status = pci_enable_device(pdev);
6064 	if (status)
6065 		return status;
6066 
6067 	pci_restore_state(pdev);
6068 
6069 	status = be_resume(adapter);
6070 	if (status)
6071 		return status;
6072 
6073 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6074 
6075 	return 0;
6076 }
6077 
6078 /*
6079  * An FLR will stop BE from DMAing any data.
6080  */
6081 static void be_shutdown(struct pci_dev *pdev)
6082 {
6083 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6084 
6085 	if (!adapter)
6086 		return;
6087 
6088 	be_roce_dev_shutdown(adapter);
6089 	cancel_delayed_work_sync(&adapter->work);
6090 	be_cancel_err_detection(adapter);
6091 
6092 	netif_device_detach(adapter->netdev);
6093 
6094 	be_cmd_reset_function(adapter);
6095 
6096 	pci_disable_device(pdev);
6097 }
6098 
6099 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6100 					    pci_channel_state_t state)
6101 {
6102 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6103 
6104 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6105 
6106 	be_roce_dev_remove(adapter);
6107 
6108 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6109 		be_set_error(adapter, BE_ERROR_EEH);
6110 
6111 		be_cancel_err_detection(adapter);
6112 
6113 		be_cleanup(adapter);
6114 	}
6115 
6116 	if (state == pci_channel_io_perm_failure)
6117 		return PCI_ERS_RESULT_DISCONNECT;
6118 
6119 	pci_disable_device(pdev);
6120 
6121 	/* The error could cause the FW to trigger a flash debug dump.
6122 	 * Resetting the card while flash dump is in progress
6123 	 * can cause it not to recover; wait for it to finish.
6124 	 * Wait only for first function as it is needed only once per
6125 	 * adapter.
6126 	 */
6127 	if (pdev->devfn == 0)
6128 		ssleep(30);
6129 
6130 	return PCI_ERS_RESULT_NEED_RESET;
6131 }
6132 
6133 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6134 {
6135 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6136 	int status;
6137 
6138 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6139 
6140 	status = pci_enable_device(pdev);
6141 	if (status)
6142 		return PCI_ERS_RESULT_DISCONNECT;
6143 
6144 	pci_set_master(pdev);
6145 	pci_restore_state(pdev);
6146 
6147 	/* Check if card is ok and fw is ready */
6148 	dev_info(&adapter->pdev->dev,
6149 		 "Waiting for FW to be ready after EEH reset\n");
6150 	status = be_fw_wait_ready(adapter);
6151 	if (status)
6152 		return PCI_ERS_RESULT_DISCONNECT;
6153 
6154 	be_clear_error(adapter, BE_CLEAR_ALL);
6155 	return PCI_ERS_RESULT_RECOVERED;
6156 }
6157 
6158 static void be_eeh_resume(struct pci_dev *pdev)
6159 {
6160 	int status = 0;
6161 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6162 
6163 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6164 
6165 	pci_save_state(pdev);
6166 
6167 	status = be_resume(adapter);
6168 	if (status)
6169 		goto err;
6170 
6171 	be_roce_dev_add(adapter);
6172 
6173 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6174 	return;
6175 err:
6176 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6177 }
6178 
6179 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6180 {
6181 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6182 	struct be_resources vft_res = {0};
6183 	int status;
6184 
6185 	if (!num_vfs)
6186 		be_vf_clear(adapter);
6187 
6188 	adapter->num_vfs = num_vfs;
6189 
6190 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6191 		dev_warn(&pdev->dev,
6192 			 "Cannot disable VFs while they are assigned\n");
6193 		return -EBUSY;
6194 	}
6195 
6196 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6197 	 * are equally distributed across the max-number of VFs. The user may
6198 	 * request only a subset of the max-vfs to be enabled.
6199 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6200 	 * each VF will have access to more number of resources.
6201 	 * This facility is not available in BE3 FW.
6202 	 * Also, this is done by FW in Lancer chip.
6203 	 */
6204 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6205 		be_calculate_vf_res(adapter, adapter->num_vfs,
6206 				    &vft_res);
6207 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6208 						 adapter->num_vfs, &vft_res);
6209 		if (status)
6210 			dev_err(&pdev->dev,
6211 				"Failed to optimize SR-IOV resources\n");
6212 	}
6213 
6214 	status = be_get_resources(adapter);
6215 	if (status)
6216 		return be_cmd_status(status);
6217 
6218 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6219 	rtnl_lock();
6220 	status = be_update_queues(adapter);
6221 	rtnl_unlock();
6222 	if (status)
6223 		return be_cmd_status(status);
6224 
6225 	if (adapter->num_vfs)
6226 		status = be_vf_setup(adapter);
6227 
6228 	if (!status)
6229 		return adapter->num_vfs;
6230 
6231 	return 0;
6232 }
6233 
6234 static const struct pci_error_handlers be_eeh_handlers = {
6235 	.error_detected = be_eeh_err_detected,
6236 	.slot_reset = be_eeh_reset,
6237 	.resume = be_eeh_resume,
6238 };
6239 
6240 static struct pci_driver be_driver = {
6241 	.name = DRV_NAME,
6242 	.id_table = be_dev_ids,
6243 	.probe = be_probe,
6244 	.remove = be_remove,
6245 	.suspend = be_suspend,
6246 	.resume = be_pci_resume,
6247 	.shutdown = be_shutdown,
6248 	.sriov_configure = be_pci_sriov_configure,
6249 	.err_handler = &be_eeh_handlers
6250 };
6251 
6252 static int __init be_init_module(void)
6253 {
6254 	int status;
6255 
6256 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6257 	    rx_frag_size != 2048) {
6258 		printk(KERN_WARNING DRV_NAME
6259 			" : Module param rx_frag_size must be 2048/4096/8192."
6260 			" Using 2048\n");
6261 		rx_frag_size = 2048;
6262 	}
6263 
6264 	if (num_vfs > 0) {
6265 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6266 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6267 	}
6268 
6269 	be_wq = create_singlethread_workqueue("be_wq");
6270 	if (!be_wq) {
6271 		pr_warn(DRV_NAME "workqueue creation failed\n");
6272 		return -1;
6273 	}
6274 
6275 	be_err_recovery_workq =
6276 		create_singlethread_workqueue("be_err_recover");
6277 	if (!be_err_recovery_workq)
6278 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6279 
6280 	status = pci_register_driver(&be_driver);
6281 	if (status) {
6282 		destroy_workqueue(be_wq);
6283 		be_destroy_err_recovery_workq();
6284 	}
6285 	return status;
6286 }
6287 module_init(be_init_module);
6288 
6289 static void __exit be_exit_module(void)
6290 {
6291 	pci_unregister_driver(&be_driver);
6292 
6293 	be_destroy_err_recovery_workq();
6294 
6295 	if (be_wq)
6296 		destroy_workqueue(be_wq);
6297 }
6298 module_exit(be_exit_module);
6299