1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279 			/* mac already added, skip addition */
280 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281 			return 0;
282 		}
283 	}
284 
285 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286 			       &adapter->pmac_id[0], 0);
287 }
288 
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291 	int i;
292 
293 	/* Skip deletion if the programmed mac is
294 	 * being used in uc-list
295 	 */
296 	for (i = 0; i < adapter->uc_macs; i++) {
297 		if (adapter->pmac_id[i + 1] == pmac_id)
298 			return;
299 	}
300 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302 
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305 	struct be_adapter *adapter = netdev_priv(netdev);
306 	struct device *dev = &adapter->pdev->dev;
307 	struct sockaddr *addr = p;
308 	int status;
309 	u8 mac[ETH_ALEN];
310 	u32 old_pmac_id = adapter->pmac_id[0];
311 
312 	if (!is_valid_ether_addr(addr->sa_data))
313 		return -EADDRNOTAVAIL;
314 
315 	/* Proceed further only if, User provided MAC is different
316 	 * from active MAC
317 	 */
318 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319 		return 0;
320 
321 	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322 	 * address
323 	 */
324 	if (BEx_chip(adapter) && be_virtfn(adapter) &&
325 	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
326 		return -EPERM;
327 
328 	/* if device is not running, copy MAC to netdev->dev_addr */
329 	if (!netif_running(netdev))
330 		goto done;
331 
332 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333 	 * privilege or if PF did not provision the new MAC address.
334 	 * On BE3, this cmd will always fail if the VF doesn't have the
335 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
336 	 * the MAC for the VF.
337 	 */
338 	mutex_lock(&adapter->rx_filter_lock);
339 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340 	if (!status) {
341 
342 		/* Delete the old programmed MAC. This call may fail if the
343 		 * old MAC was already deleted by the PF driver.
344 		 */
345 		if (adapter->pmac_id[0] != old_pmac_id)
346 			be_dev_mac_del(adapter, old_pmac_id);
347 	}
348 
349 	mutex_unlock(&adapter->rx_filter_lock);
350 	/* Decide if the new MAC is successfully activated only after
351 	 * querying the FW
352 	 */
353 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354 				       adapter->if_handle, true, 0);
355 	if (status)
356 		goto err;
357 
358 	/* The MAC change did not happen, either due to lack of privilege
359 	 * or PF didn't pre-provision.
360 	 */
361 	if (!ether_addr_equal(addr->sa_data, mac)) {
362 		status = -EPERM;
363 		goto err;
364 	}
365 
366 	/* Remember currently programmed MAC */
367 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
370 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371 	return 0;
372 err:
373 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374 	return status;
375 }
376 
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380 	if (BE2_chip(adapter)) {
381 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	} else if (BE3_chip(adapter)) {
385 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386 
387 		return &cmd->hw_stats;
388 	} else {
389 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390 
391 		return &cmd->hw_stats;
392 	}
393 }
394 
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398 	if (BE2_chip(adapter)) {
399 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	} else if (BE3_chip(adapter)) {
403 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404 
405 		return &hw_stats->erx;
406 	} else {
407 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408 
409 		return &hw_stats->erx;
410 	}
411 }
412 
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418 	struct be_port_rxf_stats_v0 *port_stats =
419 					&rxf_stats->port[adapter->port_num];
420 	struct be_drv_stats *drvs = &adapter->drv_stats;
421 
422 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
424 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
425 	drvs->rx_control_frames = port_stats->rx_control_frames;
426 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438 	drvs->rx_dropped_header_too_small =
439 		port_stats->rx_dropped_header_too_small;
440 	drvs->rx_address_filtered =
441 					port_stats->rx_address_filtered +
442 					port_stats->rx_vlan_filtered;
443 	drvs->rx_alignment_symbol_errors =
444 		port_stats->rx_alignment_symbol_errors;
445 
446 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
447 	drvs->tx_controlframes = port_stats->tx_controlframes;
448 
449 	if (adapter->port_num)
450 		drvs->jabber_events = rxf_stats->port1_jabber_events;
451 	else
452 		drvs->jabber_events = rxf_stats->port0_jabber_events;
453 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
456 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461 
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467 	struct be_port_rxf_stats_v1 *port_stats =
468 					&rxf_stats->port[adapter->port_num];
469 	struct be_drv_stats *drvs = &adapter->drv_stats;
470 
471 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
475 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
476 	drvs->rx_control_frames = port_stats->rx_control_frames;
477 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487 	drvs->rx_dropped_header_too_small =
488 		port_stats->rx_dropped_header_too_small;
489 	drvs->rx_input_fifo_overflow_drop =
490 		port_stats->rx_input_fifo_overflow_drop;
491 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
492 	drvs->rx_alignment_symbol_errors =
493 		port_stats->rx_alignment_symbol_errors;
494 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
496 	drvs->tx_controlframes = port_stats->tx_controlframes;
497 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498 	drvs->jabber_events = port_stats->jabber_events;
499 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
502 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507 
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513 	struct be_port_rxf_stats_v2 *port_stats =
514 					&rxf_stats->port[adapter->port_num];
515 	struct be_drv_stats *drvs = &adapter->drv_stats;
516 
517 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
521 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
522 	drvs->rx_control_frames = port_stats->rx_control_frames;
523 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533 	drvs->rx_dropped_header_too_small =
534 		port_stats->rx_dropped_header_too_small;
535 	drvs->rx_input_fifo_overflow_drop =
536 		port_stats->rx_input_fifo_overflow_drop;
537 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
538 	drvs->rx_alignment_symbol_errors =
539 		port_stats->rx_alignment_symbol_errors;
540 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
542 	drvs->tx_controlframes = port_stats->tx_controlframes;
543 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544 	drvs->jabber_events = port_stats->jabber_events;
545 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
548 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552 	if (be_roce_supported(adapter)) {
553 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555 		drvs->rx_roce_frames = port_stats->roce_frames_received;
556 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
557 		drvs->roce_drops_payload_len =
558 			port_stats->roce_drops_payload_len;
559 	}
560 }
561 
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564 	struct be_drv_stats *drvs = &adapter->drv_stats;
565 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566 
567 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577 	drvs->rx_dropped_tcp_length =
578 				pport_stats->rx_dropped_invalid_tcp_length;
579 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582 	drvs->rx_dropped_header_too_small =
583 				pport_stats->rx_dropped_header_too_small;
584 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585 	drvs->rx_address_filtered =
586 					pport_stats->rx_address_filtered +
587 					pport_stats->rx_vlan_filtered;
588 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592 	drvs->jabber_events = pport_stats->rx_jabbers;
593 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
594 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595 	drvs->rx_drops_too_many_frags =
596 				pport_stats->rx_drops_too_many_frags_lo;
597 }
598 
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)			(x & 0xFFFF)
602 #define hi(x)			(x & 0xFFFF0000)
603 	bool wrapped = val < lo(*acc);
604 	u32 newacc = hi(*acc) + val;
605 
606 	if (wrapped)
607 		newacc += 65536;
608 	ACCESS_ONCE(*acc) = newacc;
609 }
610 
611 static void populate_erx_stats(struct be_adapter *adapter,
612 			       struct be_rx_obj *rxo, u32 erx_stat)
613 {
614 	if (!BEx_chip(adapter))
615 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616 	else
617 		/* below erx HW counter can actually wrap around after
618 		 * 65535. Driver accumulates a 32-bit value
619 		 */
620 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621 				     (u16)erx_stat);
622 }
623 
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627 	struct be_rx_obj *rxo;
628 	int i;
629 	u32 erx_stat;
630 
631 	if (lancer_chip(adapter)) {
632 		populate_lancer_stats(adapter);
633 	} else {
634 		if (BE2_chip(adapter))
635 			populate_be_v0_stats(adapter);
636 		else if (BE3_chip(adapter))
637 			/* for BE3 */
638 			populate_be_v1_stats(adapter);
639 		else
640 			populate_be_v2_stats(adapter);
641 
642 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643 		for_all_rx_queues(adapter, rxo, i) {
644 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645 			populate_erx_stats(adapter, rxo, erx_stat);
646 		}
647 	}
648 }
649 
650 static void be_get_stats64(struct net_device *netdev,
651 			   struct rtnl_link_stats64 *stats)
652 {
653 	struct be_adapter *adapter = netdev_priv(netdev);
654 	struct be_drv_stats *drvs = &adapter->drv_stats;
655 	struct be_rx_obj *rxo;
656 	struct be_tx_obj *txo;
657 	u64 pkts, bytes;
658 	unsigned int start;
659 	int i;
660 
661 	for_all_rx_queues(adapter, rxo, i) {
662 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
663 
664 		do {
665 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666 			pkts = rx_stats(rxo)->rx_pkts;
667 			bytes = rx_stats(rxo)->rx_bytes;
668 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669 		stats->rx_packets += pkts;
670 		stats->rx_bytes += bytes;
671 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673 					rx_stats(rxo)->rx_drops_no_frags;
674 	}
675 
676 	for_all_tx_queues(adapter, txo, i) {
677 		const struct be_tx_stats *tx_stats = tx_stats(txo);
678 
679 		do {
680 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681 			pkts = tx_stats(txo)->tx_pkts;
682 			bytes = tx_stats(txo)->tx_bytes;
683 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684 		stats->tx_packets += pkts;
685 		stats->tx_bytes += bytes;
686 	}
687 
688 	/* bad pkts received */
689 	stats->rx_errors = drvs->rx_crc_errors +
690 		drvs->rx_alignment_symbol_errors +
691 		drvs->rx_in_range_errors +
692 		drvs->rx_out_range_errors +
693 		drvs->rx_frame_too_long +
694 		drvs->rx_dropped_too_small +
695 		drvs->rx_dropped_too_short +
696 		drvs->rx_dropped_header_too_small +
697 		drvs->rx_dropped_tcp_length +
698 		drvs->rx_dropped_runt;
699 
700 	/* detailed rx errors */
701 	stats->rx_length_errors = drvs->rx_in_range_errors +
702 		drvs->rx_out_range_errors +
703 		drvs->rx_frame_too_long;
704 
705 	stats->rx_crc_errors = drvs->rx_crc_errors;
706 
707 	/* frame alignment errors */
708 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709 
710 	/* receiver fifo overrun */
711 	/* drops_no_pbuf is no per i/f, it's per BE card */
712 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713 				drvs->rx_input_fifo_overflow_drop +
714 				drvs->rx_drops_no_pbuf;
715 }
716 
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719 	struct net_device *netdev = adapter->netdev;
720 
721 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722 		netif_carrier_off(netdev);
723 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724 	}
725 
726 	if (link_status)
727 		netif_carrier_on(netdev);
728 	else
729 		netif_carrier_off(netdev);
730 
731 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733 
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736 	if (skb->encapsulation)
737 		return skb_inner_transport_offset(skb) +
738 		       inner_tcp_hdrlen(skb);
739 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741 
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744 	struct be_tx_stats *stats = tx_stats(txo);
745 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746 	/* Account for headers which get duplicated in TSO pkt */
747 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748 
749 	u64_stats_update_begin(&stats->sync);
750 	stats->tx_reqs++;
751 	stats->tx_bytes += skb->len + dup_hdr_len;
752 	stats->tx_pkts += tx_pkts;
753 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754 		stats->tx_vxlan_offload_pkts += tx_pkts;
755 	u64_stats_update_end(&stats->sync);
756 }
757 
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761 	/* +1 for the header wrb */
762 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764 
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770 	wrb->rsvd0 = 0;
771 }
772 
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778 	wrb->frag_pa_hi = 0;
779 	wrb->frag_pa_lo = 0;
780 	wrb->frag_len = 0;
781 	wrb->rsvd0 = 0;
782 }
783 
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785 				     struct sk_buff *skb)
786 {
787 	u8 vlan_prio;
788 	u16 vlan_tag;
789 
790 	vlan_tag = skb_vlan_tag_get(skb);
791 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792 	/* If vlan priority provided by OS is NOT in available bmap */
793 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795 				adapter->recommended_prio_bits;
796 
797 	return vlan_tag;
798 }
799 
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803 	return (inner_ip_hdr(skb)->version == 4) ?
804 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806 
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809 	return (ip_hdr(skb)->version == 4) ?
810 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812 
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817 
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820 	return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822 
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827 
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829 				       struct sk_buff *skb,
830 				       struct be_wrb_params *wrb_params)
831 {
832 	u16 proto;
833 
834 	if (skb_is_gso(skb)) {
835 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
836 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840 		if (skb->encapsulation) {
841 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842 			proto = skb_inner_ip_proto(skb);
843 		} else {
844 			proto = skb_ip_proto(skb);
845 		}
846 		if (proto == IPPROTO_TCP)
847 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848 		else if (proto == IPPROTO_UDP)
849 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850 	}
851 
852 	if (skb_vlan_tag_present(skb)) {
853 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855 	}
856 
857 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859 
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861 			 struct be_eth_hdr_wrb *hdr,
862 			 struct be_wrb_params *wrb_params,
863 			 struct sk_buff *skb)
864 {
865 	memset(hdr, 0, sizeof(*hdr));
866 
867 	SET_TX_WRB_HDR_BITS(crc, hdr,
868 			    BE_WRB_F_GET(wrb_params->features, CRC));
869 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
870 			    BE_WRB_F_GET(wrb_params->features, IPCS));
871 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
873 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
874 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
875 
876 	SET_TX_WRB_HDR_BITS(lso, hdr,
877 			    BE_WRB_F_GET(wrb_params->features, LSO));
878 	SET_TX_WRB_HDR_BITS(lso6, hdr,
879 			    BE_WRB_F_GET(wrb_params->features, LSO6));
880 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881 
882 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883 	 * hack is not needed, the evt bit is set while ringing DB.
884 	 */
885 	SET_TX_WRB_HDR_BITS(event, hdr,
886 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887 	SET_TX_WRB_HDR_BITS(vlan, hdr,
888 			    BE_WRB_F_GET(wrb_params->features, VLAN));
889 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890 
891 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
894 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896 
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898 			  bool unmap_single)
899 {
900 	dma_addr_t dma;
901 	u32 frag_len = le32_to_cpu(wrb->frag_len);
902 
903 
904 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905 		(u64)le32_to_cpu(wrb->frag_pa_lo);
906 	if (frag_len) {
907 		if (unmap_single)
908 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909 		else
910 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911 	}
912 }
913 
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917 	u32 head = txo->q.head;
918 
919 	queue_head_inc(&txo->q);
920 	return head;
921 }
922 
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925 				struct be_tx_obj *txo,
926 				struct be_wrb_params *wrb_params,
927 				struct sk_buff *skb, u16 head)
928 {
929 	u32 num_frags = skb_wrb_cnt(skb);
930 	struct be_queue_info *txq = &txo->q;
931 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932 
933 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
935 
936 	BUG_ON(txo->sent_skb_list[head]);
937 	txo->sent_skb_list[head] = skb;
938 	txo->last_req_hdr = head;
939 	atomic_add(num_frags, &txq->used);
940 	txo->last_req_wrb_cnt = num_frags;
941 	txo->pend_wrb_cnt += num_frags;
942 }
943 
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946 				 int len)
947 {
948 	struct be_eth_wrb *wrb;
949 	struct be_queue_info *txq = &txo->q;
950 
951 	wrb = queue_head_node(txq);
952 	wrb_fill(wrb, busaddr, len);
953 	queue_head_inc(txq);
954 }
955 
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961 			    struct be_tx_obj *txo, u32 head, bool map_single,
962 			    u32 copied)
963 {
964 	struct device *dev;
965 	struct be_eth_wrb *wrb;
966 	struct be_queue_info *txq = &txo->q;
967 
968 	dev = &adapter->pdev->dev;
969 	txq->head = head;
970 
971 	/* skip the first wrb (hdr); it's not mapped */
972 	queue_head_inc(txq);
973 	while (copied) {
974 		wrb = queue_head_node(txq);
975 		unmap_tx_frag(dev, wrb, map_single);
976 		map_single = false;
977 		copied -= le32_to_cpu(wrb->frag_len);
978 		queue_head_inc(txq);
979 	}
980 
981 	txq->head = head;
982 }
983 
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989 			   struct sk_buff *skb,
990 			   struct be_wrb_params *wrb_params)
991 {
992 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993 	struct device *dev = &adapter->pdev->dev;
994 	struct be_queue_info *txq = &txo->q;
995 	bool map_single = false;
996 	u32 head = txq->head;
997 	dma_addr_t busaddr;
998 	int len;
999 
1000 	head = be_tx_get_wrb_hdr(txo);
1001 
1002 	if (skb->len > skb->data_len) {
1003 		len = skb_headlen(skb);
1004 
1005 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006 		if (dma_mapping_error(dev, busaddr))
1007 			goto dma_err;
1008 		map_single = true;
1009 		be_tx_setup_wrb_frag(txo, busaddr, len);
1010 		copied += len;
1011 	}
1012 
1013 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015 		len = skb_frag_size(frag);
1016 
1017 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018 		if (dma_mapping_error(dev, busaddr))
1019 			goto dma_err;
1020 		be_tx_setup_wrb_frag(txo, busaddr, len);
1021 		copied += len;
1022 	}
1023 
1024 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025 
1026 	be_tx_stats_update(txo, skb);
1027 	return wrb_cnt;
1028 
1029 dma_err:
1030 	adapter->drv_stats.dma_map_errors++;
1031 	be_xmit_restore(adapter, txo, head, map_single, copied);
1032 	return 0;
1033 }
1034 
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039 
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041 					     struct sk_buff *skb,
1042 					     struct be_wrb_params
1043 					     *wrb_params)
1044 {
1045 	u16 vlan_tag = 0;
1046 
1047 	skb = skb_share_check(skb, GFP_ATOMIC);
1048 	if (unlikely(!skb))
1049 		return skb;
1050 
1051 	if (skb_vlan_tag_present(skb))
1052 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053 
1054 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055 		if (!vlan_tag)
1056 			vlan_tag = adapter->pvid;
1057 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058 		 * skip VLAN insertion
1059 		 */
1060 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061 	}
1062 
1063 	if (vlan_tag) {
1064 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065 						vlan_tag);
1066 		if (unlikely(!skb))
1067 			return skb;
1068 		skb->vlan_tci = 0;
1069 	}
1070 
1071 	/* Insert the outer VLAN, if any */
1072 	if (adapter->qnq_vid) {
1073 		vlan_tag = adapter->qnq_vid;
1074 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075 						vlan_tag);
1076 		if (unlikely(!skb))
1077 			return skb;
1078 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079 	}
1080 
1081 	return skb;
1082 }
1083 
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1087 	u16 offset = ETH_HLEN;
1088 
1089 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1090 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091 
1092 		offset += sizeof(struct ipv6hdr);
1093 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1094 		    ip6h->nexthdr != NEXTHDR_UDP) {
1095 			struct ipv6_opt_hdr *ehdr =
1096 				(struct ipv6_opt_hdr *)(skb->data + offset);
1097 
1098 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099 			if (ehdr->hdrlen == 0xff)
1100 				return true;
1101 		}
1102 	}
1103 	return false;
1104 }
1105 
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110 
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115 
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117 						  struct sk_buff *skb,
1118 						  struct be_wrb_params
1119 						  *wrb_params)
1120 {
1121 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122 	unsigned int eth_hdr_len;
1123 	struct iphdr *ip;
1124 
1125 	/* For padded packets, BE HW modifies tot_len field in IP header
1126 	 * incorrecly when VLAN tag is inserted by HW.
1127 	 * For padded packets, Lancer computes incorrect checksum.
1128 	 */
1129 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130 						VLAN_ETH_HLEN : ETH_HLEN;
1131 	if (skb->len <= 60 &&
1132 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133 	    is_ipv4_pkt(skb)) {
1134 		ip = (struct iphdr *)ip_hdr(skb);
1135 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136 	}
1137 
1138 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1139 	 * tagging in pvid-tagging mode
1140 	 */
1141 	if (be_pvid_tagging_enabled(adapter) &&
1142 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1143 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144 
1145 	/* HW has a bug wherein it will calculate CSUM for VLAN
1146 	 * pkts even though it is disabled.
1147 	 * Manually insert VLAN in pkt.
1148 	 */
1149 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150 	    skb_vlan_tag_present(skb)) {
1151 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152 		if (unlikely(!skb))
1153 			goto err;
1154 	}
1155 
1156 	/* HW may lockup when VLAN HW tagging is requested on
1157 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1158 	 * skip HW tagging is not enabled by FW.
1159 	 */
1160 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161 		     (adapter->pvid || adapter->qnq_vid) &&
1162 		     !qnq_async_evt_rcvd(adapter)))
1163 		goto tx_drop;
1164 
1165 	/* Manual VLAN tag insertion to prevent:
1166 	 * ASIC lockup when the ASIC inserts VLAN tag into
1167 	 * certain ipv6 packets. Insert VLAN tags in driver,
1168 	 * and set event, completion, vlan bits accordingly
1169 	 * in the Tx WRB.
1170 	 */
1171 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172 	    be_vlan_tag_tx_chk(adapter, skb)) {
1173 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174 		if (unlikely(!skb))
1175 			goto err;
1176 	}
1177 
1178 	return skb;
1179 tx_drop:
1180 	dev_kfree_skb_any(skb);
1181 err:
1182 	return NULL;
1183 }
1184 
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186 					   struct sk_buff *skb,
1187 					   struct be_wrb_params *wrb_params)
1188 {
1189 	int err;
1190 
1191 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192 	 * packets that are 32b or less may cause a transmit stall
1193 	 * on that port. The workaround is to pad such packets
1194 	 * (len <= 32 bytes) to a minimum length of 36b.
1195 	 */
1196 	if (skb->len <= 32) {
1197 		if (skb_put_padto(skb, 36))
1198 			return NULL;
1199 	}
1200 
1201 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203 		if (!skb)
1204 			return NULL;
1205 	}
1206 
1207 	/* The stack can send us skbs with length greater than
1208 	 * what the HW can handle. Trim the extra bytes.
1209 	 */
1210 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212 	WARN_ON(err);
1213 
1214 	return skb;
1215 }
1216 
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219 	struct be_queue_info *txq = &txo->q;
1220 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221 
1222 	/* Mark the last request eventable if it hasn't been marked already */
1223 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225 
1226 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1227 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228 		wrb_fill_dummy(queue_head_node(txq));
1229 		queue_head_inc(txq);
1230 		atomic_inc(&txq->used);
1231 		txo->pend_wrb_cnt++;
1232 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233 					   TX_HDR_WRB_NUM_SHIFT);
1234 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235 					  TX_HDR_WRB_NUM_SHIFT);
1236 	}
1237 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238 	txo->pend_wrb_cnt = 0;
1239 }
1240 
1241 /* OS2BMC related */
1242 
1243 #define DHCP_CLIENT_PORT	68
1244 #define DHCP_SERVER_PORT	67
1245 #define NET_BIOS_PORT1		137
1246 #define NET_BIOS_PORT2		138
1247 #define DHCPV6_RAS_PORT		547
1248 
1249 #define is_mc_allowed_on_bmc(adapter, eh)	\
1250 	(!is_multicast_filt_enabled(adapter) &&	\
1251 	 is_multicast_ether_addr(eh->h_dest) &&	\
1252 	 !is_broadcast_ether_addr(eh->h_dest))
1253 
1254 #define is_bc_allowed_on_bmc(adapter, eh)	\
1255 	(!is_broadcast_filt_enabled(adapter) &&	\
1256 	 is_broadcast_ether_addr(eh->h_dest))
1257 
1258 #define is_arp_allowed_on_bmc(adapter, skb)	\
1259 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1260 
1261 #define is_broadcast_packet(eh, adapter)	\
1262 		(is_multicast_ether_addr(eh->h_dest) && \
1263 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264 
1265 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1266 
1267 #define is_arp_filt_enabled(adapter)	\
1268 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269 
1270 #define is_dhcp_client_filt_enabled(adapter)	\
1271 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272 
1273 #define is_dhcp_srvr_filt_enabled(adapter)	\
1274 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275 
1276 #define is_nbios_filt_enabled(adapter)	\
1277 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278 
1279 #define is_ipv6_na_filt_enabled(adapter)	\
1280 		(adapter->bmc_filt_mask &	\
1281 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282 
1283 #define is_ipv6_ra_filt_enabled(adapter)	\
1284 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285 
1286 #define is_ipv6_ras_filt_enabled(adapter)	\
1287 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288 
1289 #define is_broadcast_filt_enabled(adapter)	\
1290 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291 
1292 #define is_multicast_filt_enabled(adapter)	\
1293 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294 
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296 			       struct sk_buff **skb)
1297 {
1298 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299 	bool os2bmc = false;
1300 
1301 	if (!be_is_os2bmc_enabled(adapter))
1302 		goto done;
1303 
1304 	if (!is_multicast_ether_addr(eh->h_dest))
1305 		goto done;
1306 
1307 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1308 	    is_bc_allowed_on_bmc(adapter, eh) ||
1309 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1310 		os2bmc = true;
1311 		goto done;
1312 	}
1313 
1314 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316 		u8 nexthdr = hdr->nexthdr;
1317 
1318 		if (nexthdr == IPPROTO_ICMPV6) {
1319 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320 
1321 			switch (icmp6->icmp6_type) {
1322 			case NDISC_ROUTER_ADVERTISEMENT:
1323 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324 				goto done;
1325 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1327 				goto done;
1328 			default:
1329 				break;
1330 			}
1331 		}
1332 	}
1333 
1334 	if (is_udp_pkt((*skb))) {
1335 		struct udphdr *udp = udp_hdr((*skb));
1336 
1337 		switch (ntohs(udp->dest)) {
1338 		case DHCP_CLIENT_PORT:
1339 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1340 			goto done;
1341 		case DHCP_SERVER_PORT:
1342 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343 			goto done;
1344 		case NET_BIOS_PORT1:
1345 		case NET_BIOS_PORT2:
1346 			os2bmc = is_nbios_filt_enabled(adapter);
1347 			goto done;
1348 		case DHCPV6_RAS_PORT:
1349 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350 			goto done;
1351 		default:
1352 			break;
1353 		}
1354 	}
1355 done:
1356 	/* For packets over a vlan, which are destined
1357 	 * to BMC, asic expects the vlan to be inline in the packet.
1358 	 */
1359 	if (os2bmc)
1360 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361 
1362 	return os2bmc;
1363 }
1364 
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367 	struct be_adapter *adapter = netdev_priv(netdev);
1368 	u16 q_idx = skb_get_queue_mapping(skb);
1369 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370 	struct be_wrb_params wrb_params = { 0 };
1371 	bool flush = !skb->xmit_more;
1372 	u16 wrb_cnt;
1373 
1374 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375 	if (unlikely(!skb))
1376 		goto drop;
1377 
1378 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379 
1380 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381 	if (unlikely(!wrb_cnt)) {
1382 		dev_kfree_skb_any(skb);
1383 		goto drop;
1384 	}
1385 
1386 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1387 	 * enqueue the pkt a 2nd time with mgmt bit set.
1388 	 */
1389 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1390 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392 		if (unlikely(!wrb_cnt))
1393 			goto drop;
1394 		else
1395 			skb_get(skb);
1396 	}
1397 
1398 	if (be_is_txq_full(txo)) {
1399 		netif_stop_subqueue(netdev, q_idx);
1400 		tx_stats(txo)->tx_stops++;
1401 	}
1402 
1403 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404 		be_xmit_flush(adapter, txo);
1405 
1406 	return NETDEV_TX_OK;
1407 drop:
1408 	tx_stats(txo)->tx_drv_drops++;
1409 	/* Flush the already enqueued tx requests */
1410 	if (flush && txo->pend_wrb_cnt)
1411 		be_xmit_flush(adapter, txo);
1412 
1413 	return NETDEV_TX_OK;
1414 }
1415 
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421 
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424 	struct device *dev = &adapter->pdev->dev;
1425 	int status;
1426 
1427 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428 		return 0;
1429 
1430 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431 	if (!status) {
1432 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434 	} else {
1435 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436 	}
1437 	return status;
1438 }
1439 
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442 	struct device *dev = &adapter->pdev->dev;
1443 	int status;
1444 
1445 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446 	if (!status) {
1447 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449 	}
1450 	return status;
1451 }
1452 
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459 	struct device *dev = &adapter->pdev->dev;
1460 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1461 	u16 num = 0, i = 0;
1462 	int status = 0;
1463 
1464 	/* No need to change the VLAN state if the I/F is in promiscuous */
1465 	if (adapter->netdev->flags & IFF_PROMISC)
1466 		return 0;
1467 
1468 	if (adapter->vlans_added > be_max_vlans(adapter))
1469 		return be_set_vlan_promisc(adapter);
1470 
1471 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472 		status = be_clear_vlan_promisc(adapter);
1473 		if (status)
1474 			return status;
1475 	}
1476 	/* Construct VLAN Table to give to HW */
1477 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478 		vids[num++] = cpu_to_le16(i);
1479 
1480 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481 	if (status) {
1482 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1483 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1484 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485 		    addl_status(status) ==
1486 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487 			return be_set_vlan_promisc(adapter);
1488 	}
1489 	return status;
1490 }
1491 
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494 	struct be_adapter *adapter = netdev_priv(netdev);
1495 	int status = 0;
1496 
1497 	mutex_lock(&adapter->rx_filter_lock);
1498 
1499 	/* Packets with VID 0 are always received by Lancer by default */
1500 	if (lancer_chip(adapter) && vid == 0)
1501 		goto done;
1502 
1503 	if (test_bit(vid, adapter->vids))
1504 		goto done;
1505 
1506 	set_bit(vid, adapter->vids);
1507 	adapter->vlans_added++;
1508 
1509 	status = be_vid_config(adapter);
1510 done:
1511 	mutex_unlock(&adapter->rx_filter_lock);
1512 	return status;
1513 }
1514 
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517 	struct be_adapter *adapter = netdev_priv(netdev);
1518 	int status = 0;
1519 
1520 	mutex_lock(&adapter->rx_filter_lock);
1521 
1522 	/* Packets with VID 0 are always received by Lancer by default */
1523 	if (lancer_chip(adapter) && vid == 0)
1524 		goto done;
1525 
1526 	if (!test_bit(vid, adapter->vids))
1527 		goto done;
1528 
1529 	clear_bit(vid, adapter->vids);
1530 	adapter->vlans_added--;
1531 
1532 	status = be_vid_config(adapter);
1533 done:
1534 	mutex_unlock(&adapter->rx_filter_lock);
1535 	return status;
1536 }
1537 
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543 
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546 	int status;
1547 
1548 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549 		return;
1550 
1551 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552 	if (!status)
1553 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555 
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558 	int status;
1559 
1560 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561 		return;
1562 
1563 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564 	if (!status)
1565 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567 
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570 	int status;
1571 
1572 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573 		return;
1574 
1575 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576 	if (!status)
1577 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579 
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586 			     const unsigned char *addr)
1587 {
1588 	struct be_adapter *adapter = netdev_priv(netdev);
1589 
1590 	adapter->update_uc_list = true;
1591 	return 0;
1592 }
1593 
1594 static int be_mc_list_update(struct net_device *netdev,
1595 			     const unsigned char *addr)
1596 {
1597 	struct be_adapter *adapter = netdev_priv(netdev);
1598 
1599 	adapter->update_mc_list = true;
1600 	return 0;
1601 }
1602 
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605 	struct net_device *netdev = adapter->netdev;
1606 	struct netdev_hw_addr *ha;
1607 	bool mc_promisc = false;
1608 	int status;
1609 
1610 	netif_addr_lock_bh(netdev);
1611 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612 
1613 	if (netdev->flags & IFF_PROMISC) {
1614 		adapter->update_mc_list = false;
1615 	} else if (netdev->flags & IFF_ALLMULTI ||
1616 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617 		/* Enable multicast promisc if num configured exceeds
1618 		 * what we support
1619 		 */
1620 		mc_promisc = true;
1621 		adapter->update_mc_list = false;
1622 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623 		/* Update mc-list unconditionally if the iface was previously
1624 		 * in mc-promisc mode and now is out of that mode.
1625 		 */
1626 		adapter->update_mc_list = true;
1627 	}
1628 
1629 	if (adapter->update_mc_list) {
1630 		int i = 0;
1631 
1632 		/* cache the mc-list in adapter */
1633 		netdev_for_each_mc_addr(ha, netdev) {
1634 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635 			i++;
1636 		}
1637 		adapter->mc_count = netdev_mc_count(netdev);
1638 	}
1639 	netif_addr_unlock_bh(netdev);
1640 
1641 	if (mc_promisc) {
1642 		be_set_mc_promisc(adapter);
1643 	} else if (adapter->update_mc_list) {
1644 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645 		if (!status)
1646 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647 		else
1648 			be_set_mc_promisc(adapter);
1649 
1650 		adapter->update_mc_list = false;
1651 	}
1652 }
1653 
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656 	struct net_device *netdev = adapter->netdev;
1657 
1658 	__dev_mc_unsync(netdev, NULL);
1659 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660 	adapter->mc_count = 0;
1661 }
1662 
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667 		return 0;
1668 	}
1669 
1670 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671 			       adapter->if_handle,
1672 			       &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674 
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677 	if (pmac_id == adapter->pmac_id[0])
1678 		return;
1679 
1680 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682 
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685 	struct net_device *netdev = adapter->netdev;
1686 	struct netdev_hw_addr *ha;
1687 	bool uc_promisc = false;
1688 	int curr_uc_macs = 0, i;
1689 
1690 	netif_addr_lock_bh(netdev);
1691 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692 
1693 	if (netdev->flags & IFF_PROMISC) {
1694 		adapter->update_uc_list = false;
1695 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696 		uc_promisc = true;
1697 		adapter->update_uc_list = false;
1698 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699 		/* Update uc-list unconditionally if the iface was previously
1700 		 * in uc-promisc mode and now is out of that mode.
1701 		 */
1702 		adapter->update_uc_list = true;
1703 	}
1704 
1705 	if (adapter->update_uc_list) {
1706 		/* cache the uc-list in adapter array */
1707 		i = 0;
1708 		netdev_for_each_uc_addr(ha, netdev) {
1709 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710 			i++;
1711 		}
1712 		curr_uc_macs = netdev_uc_count(netdev);
1713 	}
1714 	netif_addr_unlock_bh(netdev);
1715 
1716 	if (uc_promisc) {
1717 		be_set_uc_promisc(adapter);
1718 	} else if (adapter->update_uc_list) {
1719 		be_clear_uc_promisc(adapter);
1720 
1721 		for (i = 0; i < adapter->uc_macs; i++)
1722 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723 
1724 		for (i = 0; i < curr_uc_macs; i++)
1725 			be_uc_mac_add(adapter, i);
1726 		adapter->uc_macs = curr_uc_macs;
1727 		adapter->update_uc_list = false;
1728 	}
1729 }
1730 
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733 	struct net_device *netdev = adapter->netdev;
1734 	int i;
1735 
1736 	__dev_uc_unsync(netdev, NULL);
1737 	for (i = 0; i < adapter->uc_macs; i++)
1738 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739 
1740 	adapter->uc_macs = 0;
1741 }
1742 
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745 	struct net_device *netdev = adapter->netdev;
1746 
1747 	mutex_lock(&adapter->rx_filter_lock);
1748 
1749 	if (netdev->flags & IFF_PROMISC) {
1750 		if (!be_in_all_promisc(adapter))
1751 			be_set_all_promisc(adapter);
1752 	} else if (be_in_all_promisc(adapter)) {
1753 		/* We need to re-program the vlan-list or clear
1754 		 * vlan-promisc mode (if needed) when the interface
1755 		 * comes out of promisc mode.
1756 		 */
1757 		be_vid_config(adapter);
1758 	}
1759 
1760 	be_set_uc_list(adapter);
1761 	be_set_mc_list(adapter);
1762 
1763 	mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765 
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768 	struct be_cmd_work *cmd_work =
1769 				container_of(work, struct be_cmd_work, work);
1770 
1771 	__be_set_rx_mode(cmd_work->adapter);
1772 	kfree(cmd_work);
1773 }
1774 
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777 	struct be_adapter *adapter = netdev_priv(netdev);
1778 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779 	int status;
1780 
1781 	if (!sriov_enabled(adapter))
1782 		return -EPERM;
1783 
1784 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785 		return -EINVAL;
1786 
1787 	/* Proceed further only if user provided MAC is different
1788 	 * from active MAC
1789 	 */
1790 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791 		return 0;
1792 
1793 	if (BEx_chip(adapter)) {
1794 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795 				vf + 1);
1796 
1797 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798 					 &vf_cfg->pmac_id, vf + 1);
1799 	} else {
1800 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801 					vf + 1);
1802 	}
1803 
1804 	if (status) {
1805 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806 			mac, vf, status);
1807 		return be_cmd_status(status);
1808 	}
1809 
1810 	ether_addr_copy(vf_cfg->mac_addr, mac);
1811 
1812 	return 0;
1813 }
1814 
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816 			    struct ifla_vf_info *vi)
1817 {
1818 	struct be_adapter *adapter = netdev_priv(netdev);
1819 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820 
1821 	if (!sriov_enabled(adapter))
1822 		return -EPERM;
1823 
1824 	if (vf >= adapter->num_vfs)
1825 		return -EINVAL;
1826 
1827 	vi->vf = vf;
1828 	vi->max_tx_rate = vf_cfg->tx_rate;
1829 	vi->min_tx_rate = 0;
1830 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835 
1836 	return 0;
1837 }
1838 
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1843 	int vf_if_id = vf_cfg->if_handle;
1844 	int status;
1845 
1846 	/* Enable Transparent VLAN Tagging */
1847 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848 	if (status)
1849 		return status;
1850 
1851 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852 	vids[0] = 0;
1853 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854 	if (!status)
1855 		dev_info(&adapter->pdev->dev,
1856 			 "Cleared guest VLANs on VF%d", vf);
1857 
1858 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1859 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861 						  ~BE_PRIV_FILTMGMT, vf + 1);
1862 		if (!status)
1863 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864 	}
1865 	return 0;
1866 }
1867 
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871 	struct device *dev = &adapter->pdev->dev;
1872 	int status;
1873 
1874 	/* Reset Transparent VLAN Tagging. */
1875 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876 				       vf_cfg->if_handle, 0, 0);
1877 	if (status)
1878 		return status;
1879 
1880 	/* Allow VFs to program VLAN filtering */
1881 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883 						  BE_PRIV_FILTMGMT, vf + 1);
1884 		if (!status) {
1885 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887 		}
1888 	}
1889 
1890 	dev_info(dev,
1891 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892 	return 0;
1893 }
1894 
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896 			  __be16 vlan_proto)
1897 {
1898 	struct be_adapter *adapter = netdev_priv(netdev);
1899 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900 	int status;
1901 
1902 	if (!sriov_enabled(adapter))
1903 		return -EPERM;
1904 
1905 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906 		return -EINVAL;
1907 
1908 	if (vlan_proto != htons(ETH_P_8021Q))
1909 		return -EPROTONOSUPPORT;
1910 
1911 	if (vlan || qos) {
1912 		vlan |= qos << VLAN_PRIO_SHIFT;
1913 		status = be_set_vf_tvt(adapter, vf, vlan);
1914 	} else {
1915 		status = be_clear_vf_tvt(adapter, vf);
1916 	}
1917 
1918 	if (status) {
1919 		dev_err(&adapter->pdev->dev,
1920 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921 			status);
1922 		return be_cmd_status(status);
1923 	}
1924 
1925 	vf_cfg->vlan_tag = vlan;
1926 	return 0;
1927 }
1928 
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930 			     int min_tx_rate, int max_tx_rate)
1931 {
1932 	struct be_adapter *adapter = netdev_priv(netdev);
1933 	struct device *dev = &adapter->pdev->dev;
1934 	int percent_rate, status = 0;
1935 	u16 link_speed = 0;
1936 	u8 link_status;
1937 
1938 	if (!sriov_enabled(adapter))
1939 		return -EPERM;
1940 
1941 	if (vf >= adapter->num_vfs)
1942 		return -EINVAL;
1943 
1944 	if (min_tx_rate)
1945 		return -EINVAL;
1946 
1947 	if (!max_tx_rate)
1948 		goto config_qos;
1949 
1950 	status = be_cmd_link_status_query(adapter, &link_speed,
1951 					  &link_status, 0);
1952 	if (status)
1953 		goto err;
1954 
1955 	if (!link_status) {
1956 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957 		status = -ENETDOWN;
1958 		goto err;
1959 	}
1960 
1961 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963 			link_speed);
1964 		status = -EINVAL;
1965 		goto err;
1966 	}
1967 
1968 	/* On Skyhawk the QOS setting must be done only as a % value */
1969 	percent_rate = link_speed / 100;
1970 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972 			percent_rate);
1973 		status = -EINVAL;
1974 		goto err;
1975 	}
1976 
1977 config_qos:
1978 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979 	if (status)
1980 		goto err;
1981 
1982 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983 	return 0;
1984 
1985 err:
1986 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987 		max_tx_rate, vf);
1988 	return be_cmd_status(status);
1989 }
1990 
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992 				int link_state)
1993 {
1994 	struct be_adapter *adapter = netdev_priv(netdev);
1995 	int status;
1996 
1997 	if (!sriov_enabled(adapter))
1998 		return -EPERM;
1999 
2000 	if (vf >= adapter->num_vfs)
2001 		return -EINVAL;
2002 
2003 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004 	if (status) {
2005 		dev_err(&adapter->pdev->dev,
2006 			"Link state change on VF %d failed: %#x\n", vf, status);
2007 		return be_cmd_status(status);
2008 	}
2009 
2010 	adapter->vf_cfg[vf].plink_tracking = link_state;
2011 
2012 	return 0;
2013 }
2014 
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017 	struct be_adapter *adapter = netdev_priv(netdev);
2018 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019 	u8 spoofchk;
2020 	int status;
2021 
2022 	if (!sriov_enabled(adapter))
2023 		return -EPERM;
2024 
2025 	if (vf >= adapter->num_vfs)
2026 		return -EINVAL;
2027 
2028 	if (BEx_chip(adapter))
2029 		return -EOPNOTSUPP;
2030 
2031 	if (enable == vf_cfg->spoofchk)
2032 		return 0;
2033 
2034 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035 
2036 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037 				       0, spoofchk);
2038 	if (status) {
2039 		dev_err(&adapter->pdev->dev,
2040 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2041 		return be_cmd_status(status);
2042 	}
2043 
2044 	vf_cfg->spoofchk = enable;
2045 	return 0;
2046 }
2047 
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049 			  ulong now)
2050 {
2051 	aic->rx_pkts_prev = rx_pkts;
2052 	aic->tx_reqs_prev = tx_pkts;
2053 	aic->jiffies = now;
2054 }
2055 
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058 	struct be_adapter *adapter = eqo->adapter;
2059 	int eqd, start;
2060 	struct be_aic_obj *aic;
2061 	struct be_rx_obj *rxo;
2062 	struct be_tx_obj *txo;
2063 	u64 rx_pkts = 0, tx_pkts = 0;
2064 	ulong now;
2065 	u32 pps, delta;
2066 	int i;
2067 
2068 	aic = &adapter->aic_obj[eqo->idx];
2069 	if (!aic->enable) {
2070 		if (aic->jiffies)
2071 			aic->jiffies = 0;
2072 		eqd = aic->et_eqd;
2073 		return eqd;
2074 	}
2075 
2076 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077 		do {
2078 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079 			rx_pkts += rxo->stats.rx_pkts;
2080 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081 	}
2082 
2083 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084 		do {
2085 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086 			tx_pkts += txo->stats.tx_reqs;
2087 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088 	}
2089 
2090 	/* Skip, if wrapped around or first calculation */
2091 	now = jiffies;
2092 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093 	    rx_pkts < aic->rx_pkts_prev ||
2094 	    tx_pkts < aic->tx_reqs_prev) {
2095 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2096 		return aic->prev_eqd;
2097 	}
2098 
2099 	delta = jiffies_to_msecs(now - aic->jiffies);
2100 	if (delta == 0)
2101 		return aic->prev_eqd;
2102 
2103 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105 	eqd = (pps / 15000) << 2;
2106 
2107 	if (eqd < 8)
2108 		eqd = 0;
2109 	eqd = min_t(u32, eqd, aic->max_eqd);
2110 	eqd = max_t(u32, eqd, aic->min_eqd);
2111 
2112 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2113 
2114 	return eqd;
2115 }
2116 
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120 	struct be_adapter *adapter = eqo->adapter;
2121 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122 	ulong now = jiffies;
2123 	int eqd;
2124 	u32 mult_enc;
2125 
2126 	if (!aic->enable)
2127 		return 0;
2128 
2129 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130 		eqd = aic->prev_eqd;
2131 	else
2132 		eqd = be_get_new_eqd(eqo);
2133 
2134 	if (eqd > 100)
2135 		mult_enc = R2I_DLY_ENC_1;
2136 	else if (eqd > 60)
2137 		mult_enc = R2I_DLY_ENC_2;
2138 	else if (eqd > 20)
2139 		mult_enc = R2I_DLY_ENC_3;
2140 	else
2141 		mult_enc = R2I_DLY_ENC_0;
2142 
2143 	aic->prev_eqd = eqd;
2144 
2145 	return mult_enc;
2146 }
2147 
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2151 	struct be_aic_obj *aic;
2152 	struct be_eq_obj *eqo;
2153 	int i, num = 0, eqd;
2154 
2155 	for_all_evt_queues(adapter, eqo, i) {
2156 		aic = &adapter->aic_obj[eqo->idx];
2157 		eqd = be_get_new_eqd(eqo);
2158 		if (force_update || eqd != aic->prev_eqd) {
2159 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160 			set_eqd[num].eq_id = eqo->q.id;
2161 			aic->prev_eqd = eqd;
2162 			num++;
2163 		}
2164 	}
2165 
2166 	if (num)
2167 		be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169 
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171 			       struct be_rx_compl_info *rxcp)
2172 {
2173 	struct be_rx_stats *stats = rx_stats(rxo);
2174 
2175 	u64_stats_update_begin(&stats->sync);
2176 	stats->rx_compl++;
2177 	stats->rx_bytes += rxcp->pkt_size;
2178 	stats->rx_pkts++;
2179 	if (rxcp->tunneled)
2180 		stats->rx_vxlan_offload_pkts++;
2181 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182 		stats->rx_mcast_pkts++;
2183 	if (rxcp->err)
2184 		stats->rx_compl_err++;
2185 	u64_stats_update_end(&stats->sync);
2186 }
2187 
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190 	/* L4 checksum is not reliable for non TCP/UDP packets.
2191 	 * Also ignore ipcksm for ipv6 pkts
2192 	 */
2193 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196 
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199 	struct be_adapter *adapter = rxo->adapter;
2200 	struct be_rx_page_info *rx_page_info;
2201 	struct be_queue_info *rxq = &rxo->q;
2202 	u32 frag_idx = rxq->tail;
2203 
2204 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2205 	BUG_ON(!rx_page_info->page);
2206 
2207 	if (rx_page_info->last_frag) {
2208 		dma_unmap_page(&adapter->pdev->dev,
2209 			       dma_unmap_addr(rx_page_info, bus),
2210 			       adapter->big_page_size, DMA_FROM_DEVICE);
2211 		rx_page_info->last_frag = false;
2212 	} else {
2213 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2214 					dma_unmap_addr(rx_page_info, bus),
2215 					rx_frag_size, DMA_FROM_DEVICE);
2216 	}
2217 
2218 	queue_tail_inc(rxq);
2219 	atomic_dec(&rxq->used);
2220 	return rx_page_info;
2221 }
2222 
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225 				struct be_rx_compl_info *rxcp)
2226 {
2227 	struct be_rx_page_info *page_info;
2228 	u16 i, num_rcvd = rxcp->num_rcvd;
2229 
2230 	for (i = 0; i < num_rcvd; i++) {
2231 		page_info = get_rx_page_info(rxo);
2232 		put_page(page_info->page);
2233 		memset(page_info, 0, sizeof(*page_info));
2234 	}
2235 }
2236 
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242 			     struct be_rx_compl_info *rxcp)
2243 {
2244 	struct be_rx_page_info *page_info;
2245 	u16 i, j;
2246 	u16 hdr_len, curr_frag_len, remaining;
2247 	u8 *start;
2248 
2249 	page_info = get_rx_page_info(rxo);
2250 	start = page_address(page_info->page) + page_info->page_offset;
2251 	prefetch(start);
2252 
2253 	/* Copy data in the first descriptor of this completion */
2254 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255 
2256 	skb->len = curr_frag_len;
2257 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258 		memcpy(skb->data, start, curr_frag_len);
2259 		/* Complete packet has now been moved to data */
2260 		put_page(page_info->page);
2261 		skb->data_len = 0;
2262 		skb->tail += curr_frag_len;
2263 	} else {
2264 		hdr_len = ETH_HLEN;
2265 		memcpy(skb->data, start, hdr_len);
2266 		skb_shinfo(skb)->nr_frags = 1;
2267 		skb_frag_set_page(skb, 0, page_info->page);
2268 		skb_shinfo(skb)->frags[0].page_offset =
2269 					page_info->page_offset + hdr_len;
2270 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271 				  curr_frag_len - hdr_len);
2272 		skb->data_len = curr_frag_len - hdr_len;
2273 		skb->truesize += rx_frag_size;
2274 		skb->tail += hdr_len;
2275 	}
2276 	page_info->page = NULL;
2277 
2278 	if (rxcp->pkt_size <= rx_frag_size) {
2279 		BUG_ON(rxcp->num_rcvd != 1);
2280 		return;
2281 	}
2282 
2283 	/* More frags present for this completion */
2284 	remaining = rxcp->pkt_size - curr_frag_len;
2285 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286 		page_info = get_rx_page_info(rxo);
2287 		curr_frag_len = min(remaining, rx_frag_size);
2288 
2289 		/* Coalesce all frags from the same physical page in one slot */
2290 		if (page_info->page_offset == 0) {
2291 			/* Fresh page */
2292 			j++;
2293 			skb_frag_set_page(skb, j, page_info->page);
2294 			skb_shinfo(skb)->frags[j].page_offset =
2295 							page_info->page_offset;
2296 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297 			skb_shinfo(skb)->nr_frags++;
2298 		} else {
2299 			put_page(page_info->page);
2300 		}
2301 
2302 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303 		skb->len += curr_frag_len;
2304 		skb->data_len += curr_frag_len;
2305 		skb->truesize += rx_frag_size;
2306 		remaining -= curr_frag_len;
2307 		page_info->page = NULL;
2308 	}
2309 	BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311 
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314 				struct be_rx_compl_info *rxcp)
2315 {
2316 	struct be_adapter *adapter = rxo->adapter;
2317 	struct net_device *netdev = adapter->netdev;
2318 	struct sk_buff *skb;
2319 
2320 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321 	if (unlikely(!skb)) {
2322 		rx_stats(rxo)->rx_drops_no_skbs++;
2323 		be_rx_compl_discard(rxo, rxcp);
2324 		return;
2325 	}
2326 
2327 	skb_fill_rx_data(rxo, skb, rxcp);
2328 
2329 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2331 	else
2332 		skb_checksum_none_assert(skb);
2333 
2334 	skb->protocol = eth_type_trans(skb, netdev);
2335 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336 	if (netdev->features & NETIF_F_RXHASH)
2337 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338 
2339 	skb->csum_level = rxcp->tunneled;
2340 	skb_mark_napi_id(skb, napi);
2341 
2342 	if (rxcp->vlanf)
2343 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344 
2345 	netif_receive_skb(skb);
2346 }
2347 
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350 				    struct napi_struct *napi,
2351 				    struct be_rx_compl_info *rxcp)
2352 {
2353 	struct be_adapter *adapter = rxo->adapter;
2354 	struct be_rx_page_info *page_info;
2355 	struct sk_buff *skb = NULL;
2356 	u16 remaining, curr_frag_len;
2357 	u16 i, j;
2358 
2359 	skb = napi_get_frags(napi);
2360 	if (!skb) {
2361 		be_rx_compl_discard(rxo, rxcp);
2362 		return;
2363 	}
2364 
2365 	remaining = rxcp->pkt_size;
2366 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367 		page_info = get_rx_page_info(rxo);
2368 
2369 		curr_frag_len = min(remaining, rx_frag_size);
2370 
2371 		/* Coalesce all frags from the same physical page in one slot */
2372 		if (i == 0 || page_info->page_offset == 0) {
2373 			/* First frag or Fresh page */
2374 			j++;
2375 			skb_frag_set_page(skb, j, page_info->page);
2376 			skb_shinfo(skb)->frags[j].page_offset =
2377 							page_info->page_offset;
2378 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379 		} else {
2380 			put_page(page_info->page);
2381 		}
2382 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383 		skb->truesize += rx_frag_size;
2384 		remaining -= curr_frag_len;
2385 		memset(page_info, 0, sizeof(*page_info));
2386 	}
2387 	BUG_ON(j > MAX_SKB_FRAGS);
2388 
2389 	skb_shinfo(skb)->nr_frags = j + 1;
2390 	skb->len = rxcp->pkt_size;
2391 	skb->data_len = rxcp->pkt_size;
2392 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2393 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394 	if (adapter->netdev->features & NETIF_F_RXHASH)
2395 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396 
2397 	skb->csum_level = rxcp->tunneled;
2398 
2399 	if (rxcp->vlanf)
2400 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401 
2402 	napi_gro_frags(napi);
2403 }
2404 
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406 				 struct be_rx_compl_info *rxcp)
2407 {
2408 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419 	if (rxcp->vlanf) {
2420 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422 	}
2423 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424 	rxcp->tunneled =
2425 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427 
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429 				 struct be_rx_compl_info *rxcp)
2430 {
2431 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442 	if (rxcp->vlanf) {
2443 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445 	}
2446 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449 
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454 	struct be_adapter *adapter = rxo->adapter;
2455 
2456 	/* For checking the valid bit it is Ok to use either definition as the
2457 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2458 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459 		return NULL;
2460 
2461 	rmb();
2462 	be_dws_le_to_cpu(compl, sizeof(*compl));
2463 
2464 	if (adapter->be3_native)
2465 		be_parse_rx_compl_v1(compl, rxcp);
2466 	else
2467 		be_parse_rx_compl_v0(compl, rxcp);
2468 
2469 	if (rxcp->ip_frag)
2470 		rxcp->l4_csum = 0;
2471 
2472 	if (rxcp->vlanf) {
2473 		/* In QNQ modes, if qnq bit is not set, then the packet was
2474 		 * tagged only with the transparent outer vlan-tag and must
2475 		 * not be treated as a vlan packet by host
2476 		 */
2477 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478 			rxcp->vlanf = 0;
2479 
2480 		if (!lancer_chip(adapter))
2481 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482 
2483 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2485 			rxcp->vlanf = 0;
2486 	}
2487 
2488 	/* As the compl has been parsed, reset it; we wont touch it again */
2489 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490 
2491 	queue_tail_inc(&rxo->cq);
2492 	return rxcp;
2493 }
2494 
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497 	u32 order = get_order(size);
2498 
2499 	if (order > 0)
2500 		gfp |= __GFP_COMP;
2501 	return  alloc_pages(gfp, order);
2502 }
2503 
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510 	struct be_adapter *adapter = rxo->adapter;
2511 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512 	struct be_queue_info *rxq = &rxo->q;
2513 	struct page *pagep = NULL;
2514 	struct device *dev = &adapter->pdev->dev;
2515 	struct be_eth_rx_d *rxd;
2516 	u64 page_dmaaddr = 0, frag_dmaaddr;
2517 	u32 posted, page_offset = 0, notify = 0;
2518 
2519 	page_info = &rxo->page_info_tbl[rxq->head];
2520 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521 		if (!pagep) {
2522 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523 			if (unlikely(!pagep)) {
2524 				rx_stats(rxo)->rx_post_fail++;
2525 				break;
2526 			}
2527 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2528 						    adapter->big_page_size,
2529 						    DMA_FROM_DEVICE);
2530 			if (dma_mapping_error(dev, page_dmaaddr)) {
2531 				put_page(pagep);
2532 				pagep = NULL;
2533 				adapter->drv_stats.dma_map_errors++;
2534 				break;
2535 			}
2536 			page_offset = 0;
2537 		} else {
2538 			get_page(pagep);
2539 			page_offset += rx_frag_size;
2540 		}
2541 		page_info->page_offset = page_offset;
2542 		page_info->page = pagep;
2543 
2544 		rxd = queue_head_node(rxq);
2545 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548 
2549 		/* Any space left in the current big page for another frag? */
2550 		if ((page_offset + rx_frag_size + rx_frag_size) >
2551 					adapter->big_page_size) {
2552 			pagep = NULL;
2553 			page_info->last_frag = true;
2554 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555 		} else {
2556 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557 		}
2558 
2559 		prev_page_info = page_info;
2560 		queue_head_inc(rxq);
2561 		page_info = &rxo->page_info_tbl[rxq->head];
2562 	}
2563 
2564 	/* Mark the last frag of a page when we break out of the above loop
2565 	 * with no more slots available in the RXQ
2566 	 */
2567 	if (pagep) {
2568 		prev_page_info->last_frag = true;
2569 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570 	}
2571 
2572 	if (posted) {
2573 		atomic_add(posted, &rxq->used);
2574 		if (rxo->rx_post_starved)
2575 			rxo->rx_post_starved = false;
2576 		do {
2577 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2578 			be_rxq_notify(adapter, rxq->id, notify);
2579 			posted -= notify;
2580 		} while (posted);
2581 	} else if (atomic_read(&rxq->used) == 0) {
2582 		/* Let be_worker replenish when memory is available */
2583 		rxo->rx_post_starved = true;
2584 	}
2585 }
2586 
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589 	struct be_queue_info *tx_cq = &txo->cq;
2590 	struct be_tx_compl_info *txcp = &txo->txcp;
2591 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592 
2593 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594 		return NULL;
2595 
2596 	/* Ensure load ordering of valid bit dword and other dwords below */
2597 	rmb();
2598 	be_dws_le_to_cpu(compl, sizeof(*compl));
2599 
2600 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2601 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602 
2603 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604 	queue_tail_inc(tx_cq);
2605 	return txcp;
2606 }
2607 
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609 			       struct be_tx_obj *txo, u16 last_index)
2610 {
2611 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2612 	struct be_queue_info *txq = &txo->q;
2613 	struct sk_buff *skb = NULL;
2614 	bool unmap_skb_hdr = false;
2615 	struct be_eth_wrb *wrb;
2616 	u16 num_wrbs = 0;
2617 	u32 frag_index;
2618 
2619 	do {
2620 		if (sent_skbs[txq->tail]) {
2621 			/* Free skb from prev req */
2622 			if (skb)
2623 				dev_consume_skb_any(skb);
2624 			skb = sent_skbs[txq->tail];
2625 			sent_skbs[txq->tail] = NULL;
2626 			queue_tail_inc(txq);  /* skip hdr wrb */
2627 			num_wrbs++;
2628 			unmap_skb_hdr = true;
2629 		}
2630 		wrb = queue_tail_node(txq);
2631 		frag_index = txq->tail;
2632 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2633 			      (unmap_skb_hdr && skb_headlen(skb)));
2634 		unmap_skb_hdr = false;
2635 		queue_tail_inc(txq);
2636 		num_wrbs++;
2637 	} while (frag_index != last_index);
2638 	dev_consume_skb_any(skb);
2639 
2640 	return num_wrbs;
2641 }
2642 
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646 	struct be_eq_entry *eqe;
2647 	int num = 0;
2648 
2649 	do {
2650 		eqe = queue_tail_node(&eqo->q);
2651 		if (eqe->evt == 0)
2652 			break;
2653 
2654 		rmb();
2655 		eqe->evt = 0;
2656 		num++;
2657 		queue_tail_inc(&eqo->q);
2658 	} while (true);
2659 
2660 	return num;
2661 }
2662 
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666 	int num = events_get(eqo);
2667 
2668 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670 
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674 	struct be_queue_info *rxq = &rxo->q;
2675 	struct be_rx_page_info *page_info;
2676 
2677 	while (atomic_read(&rxq->used) > 0) {
2678 		page_info = get_rx_page_info(rxo);
2679 		put_page(page_info->page);
2680 		memset(page_info, 0, sizeof(*page_info));
2681 	}
2682 	BUG_ON(atomic_read(&rxq->used));
2683 	rxq->tail = 0;
2684 	rxq->head = 0;
2685 }
2686 
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689 	struct be_queue_info *rx_cq = &rxo->cq;
2690 	struct be_rx_compl_info *rxcp;
2691 	struct be_adapter *adapter = rxo->adapter;
2692 	int flush_wait = 0;
2693 
2694 	/* Consume pending rx completions.
2695 	 * Wait for the flush completion (identified by zero num_rcvd)
2696 	 * to arrive. Notify CQ even when there are no more CQ entries
2697 	 * for HW to flush partially coalesced CQ entries.
2698 	 * In Lancer, there is no need to wait for flush compl.
2699 	 */
2700 	for (;;) {
2701 		rxcp = be_rx_compl_get(rxo);
2702 		if (!rxcp) {
2703 			if (lancer_chip(adapter))
2704 				break;
2705 
2706 			if (flush_wait++ > 50 ||
2707 			    be_check_error(adapter,
2708 					   BE_ERROR_HW)) {
2709 				dev_warn(&adapter->pdev->dev,
2710 					 "did not receive flush compl\n");
2711 				break;
2712 			}
2713 			be_cq_notify(adapter, rx_cq->id, true, 0);
2714 			mdelay(1);
2715 		} else {
2716 			be_rx_compl_discard(rxo, rxcp);
2717 			be_cq_notify(adapter, rx_cq->id, false, 1);
2718 			if (rxcp->num_rcvd == 0)
2719 				break;
2720 		}
2721 	}
2722 
2723 	/* After cleanup, leave the CQ in unarmed state */
2724 	be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726 
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729 	struct device *dev = &adapter->pdev->dev;
2730 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731 	struct be_tx_compl_info *txcp;
2732 	struct be_queue_info *txq;
2733 	u32 end_idx, notified_idx;
2734 	struct be_tx_obj *txo;
2735 	int i, pending_txqs;
2736 
2737 	/* Stop polling for compls when HW has been silent for 10ms */
2738 	do {
2739 		pending_txqs = adapter->num_tx_qs;
2740 
2741 		for_all_tx_queues(adapter, txo, i) {
2742 			cmpl = 0;
2743 			num_wrbs = 0;
2744 			txq = &txo->q;
2745 			while ((txcp = be_tx_compl_get(txo))) {
2746 				num_wrbs +=
2747 					be_tx_compl_process(adapter, txo,
2748 							    txcp->end_index);
2749 				cmpl++;
2750 			}
2751 			if (cmpl) {
2752 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753 				atomic_sub(num_wrbs, &txq->used);
2754 				timeo = 0;
2755 			}
2756 			if (!be_is_tx_compl_pending(txo))
2757 				pending_txqs--;
2758 		}
2759 
2760 		if (pending_txqs == 0 || ++timeo > 10 ||
2761 		    be_check_error(adapter, BE_ERROR_HW))
2762 			break;
2763 
2764 		mdelay(1);
2765 	} while (true);
2766 
2767 	/* Free enqueued TX that was never notified to HW */
2768 	for_all_tx_queues(adapter, txo, i) {
2769 		txq = &txo->q;
2770 
2771 		if (atomic_read(&txq->used)) {
2772 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773 				 i, atomic_read(&txq->used));
2774 			notified_idx = txq->tail;
2775 			end_idx = txq->tail;
2776 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777 				  txq->len);
2778 			/* Use the tx-compl process logic to handle requests
2779 			 * that were not sent to the HW.
2780 			 */
2781 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782 			atomic_sub(num_wrbs, &txq->used);
2783 			BUG_ON(atomic_read(&txq->used));
2784 			txo->pend_wrb_cnt = 0;
2785 			/* Since hw was never notified of these requests,
2786 			 * reset TXQ indices
2787 			 */
2788 			txq->head = notified_idx;
2789 			txq->tail = notified_idx;
2790 		}
2791 	}
2792 }
2793 
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796 	struct be_eq_obj *eqo;
2797 	int i;
2798 
2799 	for_all_evt_queues(adapter, eqo, i) {
2800 		if (eqo->q.created) {
2801 			be_eq_clean(eqo);
2802 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803 			netif_napi_del(&eqo->napi);
2804 			free_cpumask_var(eqo->affinity_mask);
2805 		}
2806 		be_queue_free(adapter, &eqo->q);
2807 	}
2808 }
2809 
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812 	struct be_queue_info *eq;
2813 	struct be_eq_obj *eqo;
2814 	struct be_aic_obj *aic;
2815 	int i, rc;
2816 
2817 	/* need enough EQs to service both RX and TX queues */
2818 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819 				    max(adapter->cfg_num_rx_irqs,
2820 					adapter->cfg_num_tx_irqs));
2821 
2822 	for_all_evt_queues(adapter, eqo, i) {
2823 		int numa_node = dev_to_node(&adapter->pdev->dev);
2824 
2825 		aic = &adapter->aic_obj[i];
2826 		eqo->adapter = adapter;
2827 		eqo->idx = i;
2828 		aic->max_eqd = BE_MAX_EQD;
2829 		aic->enable = true;
2830 
2831 		eq = &eqo->q;
2832 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833 				    sizeof(struct be_eq_entry));
2834 		if (rc)
2835 			return rc;
2836 
2837 		rc = be_cmd_eq_create(adapter, eqo);
2838 		if (rc)
2839 			return rc;
2840 
2841 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842 			return -ENOMEM;
2843 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844 				eqo->affinity_mask);
2845 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846 			       BE_NAPI_WEIGHT);
2847 	}
2848 	return 0;
2849 }
2850 
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853 	struct be_queue_info *q;
2854 
2855 	q = &adapter->mcc_obj.q;
2856 	if (q->created)
2857 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858 	be_queue_free(adapter, q);
2859 
2860 	q = &adapter->mcc_obj.cq;
2861 	if (q->created)
2862 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863 	be_queue_free(adapter, q);
2864 }
2865 
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869 	struct be_queue_info *q, *cq;
2870 
2871 	cq = &adapter->mcc_obj.cq;
2872 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873 			   sizeof(struct be_mcc_compl)))
2874 		goto err;
2875 
2876 	/* Use the default EQ for MCC completions */
2877 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878 		goto mcc_cq_free;
2879 
2880 	q = &adapter->mcc_obj.q;
2881 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882 		goto mcc_cq_destroy;
2883 
2884 	if (be_cmd_mccq_create(adapter, q, cq))
2885 		goto mcc_q_free;
2886 
2887 	return 0;
2888 
2889 mcc_q_free:
2890 	be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894 	be_queue_free(adapter, cq);
2895 err:
2896 	return -1;
2897 }
2898 
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901 	struct be_queue_info *q;
2902 	struct be_tx_obj *txo;
2903 	u8 i;
2904 
2905 	for_all_tx_queues(adapter, txo, i) {
2906 		q = &txo->q;
2907 		if (q->created)
2908 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909 		be_queue_free(adapter, q);
2910 
2911 		q = &txo->cq;
2912 		if (q->created)
2913 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914 		be_queue_free(adapter, q);
2915 	}
2916 }
2917 
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920 	struct be_queue_info *cq;
2921 	struct be_tx_obj *txo;
2922 	struct be_eq_obj *eqo;
2923 	int status, i;
2924 
2925 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926 
2927 	for_all_tx_queues(adapter, txo, i) {
2928 		cq = &txo->cq;
2929 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930 					sizeof(struct be_eth_tx_compl));
2931 		if (status)
2932 			return status;
2933 
2934 		u64_stats_init(&txo->stats.sync);
2935 		u64_stats_init(&txo->stats.sync_compl);
2936 
2937 		/* If num_evt_qs is less than num_tx_qs, then more than
2938 		 * one txq share an eq
2939 		 */
2940 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942 		if (status)
2943 			return status;
2944 
2945 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946 					sizeof(struct be_eth_wrb));
2947 		if (status)
2948 			return status;
2949 
2950 		status = be_cmd_txq_create(adapter, txo);
2951 		if (status)
2952 			return status;
2953 
2954 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955 				    eqo->idx);
2956 	}
2957 
2958 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959 		 adapter->num_tx_qs);
2960 	return 0;
2961 }
2962 
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965 	struct be_queue_info *q;
2966 	struct be_rx_obj *rxo;
2967 	int i;
2968 
2969 	for_all_rx_queues(adapter, rxo, i) {
2970 		q = &rxo->cq;
2971 		if (q->created)
2972 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973 		be_queue_free(adapter, q);
2974 	}
2975 }
2976 
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979 	struct be_queue_info *eq, *cq;
2980 	struct be_rx_obj *rxo;
2981 	int rc, i;
2982 
2983 	adapter->num_rss_qs =
2984 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985 
2986 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2987 	if (adapter->num_rss_qs < 2)
2988 		adapter->num_rss_qs = 0;
2989 
2990 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991 
2992 	/* When the interface is not capable of RSS rings (and there is no
2993 	 * need to create a default RXQ) we'll still need one RXQ
2994 	 */
2995 	if (adapter->num_rx_qs == 0)
2996 		adapter->num_rx_qs = 1;
2997 
2998 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999 	for_all_rx_queues(adapter, rxo, i) {
3000 		rxo->adapter = adapter;
3001 		cq = &rxo->cq;
3002 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003 				    sizeof(struct be_eth_rx_compl));
3004 		if (rc)
3005 			return rc;
3006 
3007 		u64_stats_init(&rxo->stats.sync);
3008 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010 		if (rc)
3011 			return rc;
3012 	}
3013 
3014 	dev_info(&adapter->pdev->dev,
3015 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3016 	return 0;
3017 }
3018 
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021 	struct be_eq_obj *eqo = dev;
3022 	struct be_adapter *adapter = eqo->adapter;
3023 	int num_evts = 0;
3024 
3025 	/* IRQ is not expected when NAPI is scheduled as the EQ
3026 	 * will not be armed.
3027 	 * But, this can happen on Lancer INTx where it takes
3028 	 * a while to de-assert INTx or in BE2 where occasionaly
3029 	 * an interrupt may be raised even when EQ is unarmed.
3030 	 * If NAPI is already scheduled, then counting & notifying
3031 	 * events will orphan them.
3032 	 */
3033 	if (napi_schedule_prep(&eqo->napi)) {
3034 		num_evts = events_get(eqo);
3035 		__napi_schedule(&eqo->napi);
3036 		if (num_evts)
3037 			eqo->spurious_intr = 0;
3038 	}
3039 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040 
3041 	/* Return IRQ_HANDLED only for the the first spurious intr
3042 	 * after a valid intr to stop the kernel from branding
3043 	 * this irq as a bad one!
3044 	 */
3045 	if (num_evts || eqo->spurious_intr++ == 0)
3046 		return IRQ_HANDLED;
3047 	else
3048 		return IRQ_NONE;
3049 }
3050 
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053 	struct be_eq_obj *eqo = dev;
3054 
3055 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056 	napi_schedule(&eqo->napi);
3057 	return IRQ_HANDLED;
3058 }
3059 
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064 
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066 			 int budget)
3067 {
3068 	struct be_adapter *adapter = rxo->adapter;
3069 	struct be_queue_info *rx_cq = &rxo->cq;
3070 	struct be_rx_compl_info *rxcp;
3071 	u32 work_done;
3072 	u32 frags_consumed = 0;
3073 
3074 	for (work_done = 0; work_done < budget; work_done++) {
3075 		rxcp = be_rx_compl_get(rxo);
3076 		if (!rxcp)
3077 			break;
3078 
3079 		/* Is it a flush compl that has no data */
3080 		if (unlikely(rxcp->num_rcvd == 0))
3081 			goto loop_continue;
3082 
3083 		/* Discard compl with partial DMA Lancer B0 */
3084 		if (unlikely(!rxcp->pkt_size)) {
3085 			be_rx_compl_discard(rxo, rxcp);
3086 			goto loop_continue;
3087 		}
3088 
3089 		/* On BE drop pkts that arrive due to imperfect filtering in
3090 		 * promiscuous mode on some skews
3091 		 */
3092 		if (unlikely(rxcp->port != adapter->port_num &&
3093 			     !lancer_chip(adapter))) {
3094 			be_rx_compl_discard(rxo, rxcp);
3095 			goto loop_continue;
3096 		}
3097 
3098 		if (do_gro(rxcp))
3099 			be_rx_compl_process_gro(rxo, napi, rxcp);
3100 		else
3101 			be_rx_compl_process(rxo, napi, rxcp);
3102 
3103 loop_continue:
3104 		frags_consumed += rxcp->num_rcvd;
3105 		be_rx_stats_update(rxo, rxcp);
3106 	}
3107 
3108 	if (work_done) {
3109 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3110 
3111 		/* When an rx-obj gets into post_starved state, just
3112 		 * let be_worker do the posting.
3113 		 */
3114 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115 		    !rxo->rx_post_starved)
3116 			be_post_rx_frags(rxo, GFP_ATOMIC,
3117 					 max_t(u32, MAX_RX_POST,
3118 					       frags_consumed));
3119 	}
3120 
3121 	return work_done;
3122 }
3123 
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126 	switch (status) {
3127 	case BE_TX_COMP_HDR_PARSE_ERR:
3128 		tx_stats(txo)->tx_hdr_parse_err++;
3129 		break;
3130 	case BE_TX_COMP_NDMA_ERR:
3131 		tx_stats(txo)->tx_dma_err++;
3132 		break;
3133 	case BE_TX_COMP_ACL_ERR:
3134 		tx_stats(txo)->tx_spoof_check_err++;
3135 		break;
3136 	}
3137 }
3138 
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141 	switch (status) {
3142 	case LANCER_TX_COMP_LSO_ERR:
3143 		tx_stats(txo)->tx_tso_err++;
3144 		break;
3145 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147 		tx_stats(txo)->tx_spoof_check_err++;
3148 		break;
3149 	case LANCER_TX_COMP_QINQ_ERR:
3150 		tx_stats(txo)->tx_qinq_err++;
3151 		break;
3152 	case LANCER_TX_COMP_PARITY_ERR:
3153 		tx_stats(txo)->tx_internal_parity_err++;
3154 		break;
3155 	case LANCER_TX_COMP_DMA_ERR:
3156 		tx_stats(txo)->tx_dma_err++;
3157 		break;
3158 	}
3159 }
3160 
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162 			  int idx)
3163 {
3164 	int num_wrbs = 0, work_done = 0;
3165 	struct be_tx_compl_info *txcp;
3166 
3167 	while ((txcp = be_tx_compl_get(txo))) {
3168 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169 		work_done++;
3170 
3171 		if (txcp->status) {
3172 			if (lancer_chip(adapter))
3173 				lancer_update_tx_err(txo, txcp->status);
3174 			else
3175 				be_update_tx_err(txo, txcp->status);
3176 		}
3177 	}
3178 
3179 	if (work_done) {
3180 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3181 		atomic_sub(num_wrbs, &txo->q.used);
3182 
3183 		/* As Tx wrbs have been freed up, wake up netdev queue
3184 		 * if it was stopped due to lack of tx wrbs.  */
3185 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186 		    be_can_txq_wake(txo)) {
3187 			netif_wake_subqueue(adapter->netdev, idx);
3188 		}
3189 
3190 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191 		tx_stats(txo)->tx_compl += work_done;
3192 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193 	}
3194 }
3195 
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199 	struct be_adapter *adapter = eqo->adapter;
3200 	int max_work = 0, work, i, num_evts;
3201 	struct be_rx_obj *rxo;
3202 	struct be_tx_obj *txo;
3203 	u32 mult_enc = 0;
3204 
3205 	num_evts = events_get(eqo);
3206 
3207 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208 		be_process_tx(adapter, txo, i);
3209 
3210 	/* This loop will iterate twice for EQ0 in which
3211 	 * completions of the last RXQ (default one) are also processed
3212 	 * For other EQs the loop iterates only once
3213 	 */
3214 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215 		work = be_process_rx(rxo, napi, budget);
3216 		max_work = max(work, max_work);
3217 	}
3218 
3219 	if (is_mcc_eqo(eqo))
3220 		be_process_mcc(adapter);
3221 
3222 	if (max_work < budget) {
3223 		napi_complete_done(napi, max_work);
3224 
3225 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226 		 * delay via a delay multiplier encoding value
3227 		 */
3228 		if (skyhawk_chip(adapter))
3229 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3230 
3231 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232 			     mult_enc);
3233 	} else {
3234 		/* As we'll continue in polling mode, count and clear events */
3235 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236 	}
3237 	return max_work;
3238 }
3239 
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244 	struct device *dev = &adapter->pdev->dev;
3245 	u16 val;
3246 	u32 i;
3247 
3248 	if (be_check_error(adapter, BE_ERROR_HW))
3249 		return;
3250 
3251 	if (lancer_chip(adapter)) {
3252 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3253 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3254 			be_set_error(adapter, BE_ERROR_UE);
3255 			sliport_err1 = ioread32(adapter->db +
3256 						SLIPORT_ERROR1_OFFSET);
3257 			sliport_err2 = ioread32(adapter->db +
3258 						SLIPORT_ERROR2_OFFSET);
3259 			/* Do not log error messages if its a FW reset */
3260 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3261 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3262 				dev_info(dev, "Firmware update in progress\n");
3263 			} else {
3264 				dev_err(dev, "Error detected in the card\n");
3265 				dev_err(dev, "ERR: sliport status 0x%x\n",
3266 					sliport_status);
3267 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3268 					sliport_err1);
3269 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3270 					sliport_err2);
3271 			}
3272 		}
3273 	} else {
3274 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3275 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3276 		ue_lo_mask = ioread32(adapter->pcicfg +
3277 				      PCICFG_UE_STATUS_LOW_MASK);
3278 		ue_hi_mask = ioread32(adapter->pcicfg +
3279 				      PCICFG_UE_STATUS_HI_MASK);
3280 
3281 		ue_lo = (ue_lo & ~ue_lo_mask);
3282 		ue_hi = (ue_hi & ~ue_hi_mask);
3283 
3284 		if (ue_lo || ue_hi) {
3285 			/* On certain platforms BE3 hardware can indicate
3286 			 * spurious UEs. In case of a UE in the chip,
3287 			 * the POST register correctly reports either a
3288 			 * FAT_LOG_START state (FW is currently dumping
3289 			 * FAT log data) or a ARMFW_UE state. Check for the
3290 			 * above states to ascertain if the UE is valid or not.
3291 			 */
3292 			if (BE3_chip(adapter)) {
3293 				val = be_POST_stage_get(adapter);
3294 				if ((val & POST_STAGE_FAT_LOG_START)
3295 				     != POST_STAGE_FAT_LOG_START &&
3296 				    (val & POST_STAGE_ARMFW_UE)
3297 				     != POST_STAGE_ARMFW_UE)
3298 					return;
3299 			}
3300 
3301 			dev_err(dev, "Error detected in the adapter");
3302 			be_set_error(adapter, BE_ERROR_UE);
3303 
3304 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3305 				if (ue_lo & 1)
3306 					dev_err(dev, "UE: %s bit set\n",
3307 						ue_status_low_desc[i]);
3308 			}
3309 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3310 				if (ue_hi & 1)
3311 					dev_err(dev, "UE: %s bit set\n",
3312 						ue_status_hi_desc[i]);
3313 			}
3314 		}
3315 	}
3316 }
3317 
3318 static void be_msix_disable(struct be_adapter *adapter)
3319 {
3320 	if (msix_enabled(adapter)) {
3321 		pci_disable_msix(adapter->pdev);
3322 		adapter->num_msix_vec = 0;
3323 		adapter->num_msix_roce_vec = 0;
3324 	}
3325 }
3326 
3327 static int be_msix_enable(struct be_adapter *adapter)
3328 {
3329 	unsigned int i, max_roce_eqs;
3330 	struct device *dev = &adapter->pdev->dev;
3331 	int num_vec;
3332 
3333 	/* If RoCE is supported, program the max number of vectors that
3334 	 * could be used for NIC and RoCE, else, just program the number
3335 	 * we'll use initially.
3336 	 */
3337 	if (be_roce_supported(adapter)) {
3338 		max_roce_eqs =
3339 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3340 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3341 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3342 	} else {
3343 		num_vec = max(adapter->cfg_num_rx_irqs,
3344 			      adapter->cfg_num_tx_irqs);
3345 	}
3346 
3347 	for (i = 0; i < num_vec; i++)
3348 		adapter->msix_entries[i].entry = i;
3349 
3350 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3351 					MIN_MSIX_VECTORS, num_vec);
3352 	if (num_vec < 0)
3353 		goto fail;
3354 
3355 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3356 		adapter->num_msix_roce_vec = num_vec / 2;
3357 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3358 			 adapter->num_msix_roce_vec);
3359 	}
3360 
3361 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3362 
3363 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3364 		 adapter->num_msix_vec);
3365 	return 0;
3366 
3367 fail:
3368 	dev_warn(dev, "MSIx enable failed\n");
3369 
3370 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3371 	if (be_virtfn(adapter))
3372 		return num_vec;
3373 	return 0;
3374 }
3375 
3376 static inline int be_msix_vec_get(struct be_adapter *adapter,
3377 				  struct be_eq_obj *eqo)
3378 {
3379 	return adapter->msix_entries[eqo->msix_idx].vector;
3380 }
3381 
3382 static int be_msix_register(struct be_adapter *adapter)
3383 {
3384 	struct net_device *netdev = adapter->netdev;
3385 	struct be_eq_obj *eqo;
3386 	int status, i, vec;
3387 
3388 	for_all_evt_queues(adapter, eqo, i) {
3389 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3390 		vec = be_msix_vec_get(adapter, eqo);
3391 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3392 		if (status)
3393 			goto err_msix;
3394 
3395 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3396 	}
3397 
3398 	return 0;
3399 err_msix:
3400 	for (i--; i >= 0; i--) {
3401 		eqo = &adapter->eq_obj[i];
3402 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3403 	}
3404 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3405 		 status);
3406 	be_msix_disable(adapter);
3407 	return status;
3408 }
3409 
3410 static int be_irq_register(struct be_adapter *adapter)
3411 {
3412 	struct net_device *netdev = adapter->netdev;
3413 	int status;
3414 
3415 	if (msix_enabled(adapter)) {
3416 		status = be_msix_register(adapter);
3417 		if (status == 0)
3418 			goto done;
3419 		/* INTx is not supported for VF */
3420 		if (be_virtfn(adapter))
3421 			return status;
3422 	}
3423 
3424 	/* INTx: only the first EQ is used */
3425 	netdev->irq = adapter->pdev->irq;
3426 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3427 			     &adapter->eq_obj[0]);
3428 	if (status) {
3429 		dev_err(&adapter->pdev->dev,
3430 			"INTx request IRQ failed - err %d\n", status);
3431 		return status;
3432 	}
3433 done:
3434 	adapter->isr_registered = true;
3435 	return 0;
3436 }
3437 
3438 static void be_irq_unregister(struct be_adapter *adapter)
3439 {
3440 	struct net_device *netdev = adapter->netdev;
3441 	struct be_eq_obj *eqo;
3442 	int i, vec;
3443 
3444 	if (!adapter->isr_registered)
3445 		return;
3446 
3447 	/* INTx */
3448 	if (!msix_enabled(adapter)) {
3449 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3450 		goto done;
3451 	}
3452 
3453 	/* MSIx */
3454 	for_all_evt_queues(adapter, eqo, i) {
3455 		vec = be_msix_vec_get(adapter, eqo);
3456 		irq_set_affinity_hint(vec, NULL);
3457 		free_irq(vec, eqo);
3458 	}
3459 
3460 done:
3461 	adapter->isr_registered = false;
3462 }
3463 
3464 static void be_rx_qs_destroy(struct be_adapter *adapter)
3465 {
3466 	struct rss_info *rss = &adapter->rss_info;
3467 	struct be_queue_info *q;
3468 	struct be_rx_obj *rxo;
3469 	int i;
3470 
3471 	for_all_rx_queues(adapter, rxo, i) {
3472 		q = &rxo->q;
3473 		if (q->created) {
3474 			/* If RXQs are destroyed while in an "out of buffer"
3475 			 * state, there is a possibility of an HW stall on
3476 			 * Lancer. So, post 64 buffers to each queue to relieve
3477 			 * the "out of buffer" condition.
3478 			 * Make sure there's space in the RXQ before posting.
3479 			 */
3480 			if (lancer_chip(adapter)) {
3481 				be_rx_cq_clean(rxo);
3482 				if (atomic_read(&q->used) == 0)
3483 					be_post_rx_frags(rxo, GFP_KERNEL,
3484 							 MAX_RX_POST);
3485 			}
3486 
3487 			be_cmd_rxq_destroy(adapter, q);
3488 			be_rx_cq_clean(rxo);
3489 			be_rxq_clean(rxo);
3490 		}
3491 		be_queue_free(adapter, q);
3492 	}
3493 
3494 	if (rss->rss_flags) {
3495 		rss->rss_flags = RSS_ENABLE_NONE;
3496 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3497 				  128, rss->rss_hkey);
3498 	}
3499 }
3500 
3501 static void be_disable_if_filters(struct be_adapter *adapter)
3502 {
3503 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3504 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3505 	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3506 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3507 		eth_zero_addr(adapter->dev_mac);
3508 	}
3509 
3510 	be_clear_uc_list(adapter);
3511 	be_clear_mc_list(adapter);
3512 
3513 	/* The IFACE flags are enabled in the open path and cleared
3514 	 * in the close path. When a VF gets detached from the host and
3515 	 * assigned to a VM the following happens:
3516 	 *	- VF's IFACE flags get cleared in the detach path
3517 	 *	- IFACE create is issued by the VF in the attach path
3518 	 * Due to a bug in the BE3/Skyhawk-R FW
3519 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3520 	 * specified along with the IFACE create cmd issued by a VF are not
3521 	 * honoured by FW.  As a consequence, if a *new* driver
3522 	 * (that enables/disables IFACE flags in open/close)
3523 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3524 	 * the IFACE gets created *without* the needed flags.
3525 	 * To avoid this, disable RX-filter flags only for Lancer.
3526 	 */
3527 	if (lancer_chip(adapter)) {
3528 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3529 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3530 	}
3531 }
3532 
3533 static int be_close(struct net_device *netdev)
3534 {
3535 	struct be_adapter *adapter = netdev_priv(netdev);
3536 	struct be_eq_obj *eqo;
3537 	int i;
3538 
3539 	/* This protection is needed as be_close() may be called even when the
3540 	 * adapter is in cleared state (after eeh perm failure)
3541 	 */
3542 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3543 		return 0;
3544 
3545 	/* Before attempting cleanup ensure all the pending cmds in the
3546 	 * config_wq have finished execution
3547 	 */
3548 	flush_workqueue(be_wq);
3549 
3550 	be_disable_if_filters(adapter);
3551 
3552 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3553 		for_all_evt_queues(adapter, eqo, i) {
3554 			napi_disable(&eqo->napi);
3555 		}
3556 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3557 	}
3558 
3559 	be_async_mcc_disable(adapter);
3560 
3561 	/* Wait for all pending tx completions to arrive so that
3562 	 * all tx skbs are freed.
3563 	 */
3564 	netif_tx_disable(netdev);
3565 	be_tx_compl_clean(adapter);
3566 
3567 	be_rx_qs_destroy(adapter);
3568 
3569 	for_all_evt_queues(adapter, eqo, i) {
3570 		if (msix_enabled(adapter))
3571 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3572 		else
3573 			synchronize_irq(netdev->irq);
3574 		be_eq_clean(eqo);
3575 	}
3576 
3577 	be_irq_unregister(adapter);
3578 
3579 	return 0;
3580 }
3581 
3582 static int be_rx_qs_create(struct be_adapter *adapter)
3583 {
3584 	struct rss_info *rss = &adapter->rss_info;
3585 	u8 rss_key[RSS_HASH_KEY_LEN];
3586 	struct be_rx_obj *rxo;
3587 	int rc, i, j;
3588 
3589 	for_all_rx_queues(adapter, rxo, i) {
3590 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3591 				    sizeof(struct be_eth_rx_d));
3592 		if (rc)
3593 			return rc;
3594 	}
3595 
3596 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3597 		rxo = default_rxo(adapter);
3598 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3599 				       rx_frag_size, adapter->if_handle,
3600 				       false, &rxo->rss_id);
3601 		if (rc)
3602 			return rc;
3603 	}
3604 
3605 	for_all_rss_queues(adapter, rxo, i) {
3606 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3607 				       rx_frag_size, adapter->if_handle,
3608 				       true, &rxo->rss_id);
3609 		if (rc)
3610 			return rc;
3611 	}
3612 
3613 	if (be_multi_rxq(adapter)) {
3614 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3615 			for_all_rss_queues(adapter, rxo, i) {
3616 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3617 					break;
3618 				rss->rsstable[j + i] = rxo->rss_id;
3619 				rss->rss_queue[j + i] = i;
3620 			}
3621 		}
3622 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3623 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3624 
3625 		if (!BEx_chip(adapter))
3626 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3627 				RSS_ENABLE_UDP_IPV6;
3628 
3629 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3630 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3631 				       RSS_INDIR_TABLE_LEN, rss_key);
3632 		if (rc) {
3633 			rss->rss_flags = RSS_ENABLE_NONE;
3634 			return rc;
3635 		}
3636 
3637 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3638 	} else {
3639 		/* Disable RSS, if only default RX Q is created */
3640 		rss->rss_flags = RSS_ENABLE_NONE;
3641 	}
3642 
3643 
3644 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3645 	 * which is a queue empty condition
3646 	 */
3647 	for_all_rx_queues(adapter, rxo, i)
3648 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3649 
3650 	return 0;
3651 }
3652 
3653 static int be_enable_if_filters(struct be_adapter *adapter)
3654 {
3655 	int status;
3656 
3657 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3658 	if (status)
3659 		return status;
3660 
3661 	/* Normally this condition usually true as the ->dev_mac is zeroed.
3662 	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3663 	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3664 	 */
3665 	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3666 		int old_pmac_id = -1;
3667 
3668 		/* Remember old programmed MAC if any - can happen on BE3 VF */
3669 		if (!is_zero_ether_addr(adapter->dev_mac))
3670 			old_pmac_id = adapter->pmac_id[0];
3671 
3672 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3673 		if (status)
3674 			return status;
3675 
3676 		/* Delete the old programmed MAC as we successfully programmed
3677 		 * a new MAC
3678 		 */
3679 		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3680 			be_dev_mac_del(adapter, old_pmac_id);
3681 
3682 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3683 	}
3684 
3685 	if (adapter->vlans_added)
3686 		be_vid_config(adapter);
3687 
3688 	__be_set_rx_mode(adapter);
3689 
3690 	return 0;
3691 }
3692 
3693 static int be_open(struct net_device *netdev)
3694 {
3695 	struct be_adapter *adapter = netdev_priv(netdev);
3696 	struct be_eq_obj *eqo;
3697 	struct be_rx_obj *rxo;
3698 	struct be_tx_obj *txo;
3699 	u8 link_status;
3700 	int status, i;
3701 
3702 	status = be_rx_qs_create(adapter);
3703 	if (status)
3704 		goto err;
3705 
3706 	status = be_enable_if_filters(adapter);
3707 	if (status)
3708 		goto err;
3709 
3710 	status = be_irq_register(adapter);
3711 	if (status)
3712 		goto err;
3713 
3714 	for_all_rx_queues(adapter, rxo, i)
3715 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3716 
3717 	for_all_tx_queues(adapter, txo, i)
3718 		be_cq_notify(adapter, txo->cq.id, true, 0);
3719 
3720 	be_async_mcc_enable(adapter);
3721 
3722 	for_all_evt_queues(adapter, eqo, i) {
3723 		napi_enable(&eqo->napi);
3724 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3725 	}
3726 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3727 
3728 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3729 	if (!status)
3730 		be_link_status_update(adapter, link_status);
3731 
3732 	netif_tx_start_all_queues(netdev);
3733 	if (skyhawk_chip(adapter))
3734 		udp_tunnel_get_rx_info(netdev);
3735 
3736 	return 0;
3737 err:
3738 	be_close(adapter->netdev);
3739 	return -EIO;
3740 }
3741 
3742 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3743 {
3744 	u32 addr;
3745 
3746 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3747 
3748 	mac[5] = (u8)(addr & 0xFF);
3749 	mac[4] = (u8)((addr >> 8) & 0xFF);
3750 	mac[3] = (u8)((addr >> 16) & 0xFF);
3751 	/* Use the OUI from the current MAC address */
3752 	memcpy(mac, adapter->netdev->dev_addr, 3);
3753 }
3754 
3755 /*
3756  * Generate a seed MAC address from the PF MAC Address using jhash.
3757  * MAC Address for VFs are assigned incrementally starting from the seed.
3758  * These addresses are programmed in the ASIC by the PF and the VF driver
3759  * queries for the MAC address during its probe.
3760  */
3761 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3762 {
3763 	u32 vf;
3764 	int status = 0;
3765 	u8 mac[ETH_ALEN];
3766 	struct be_vf_cfg *vf_cfg;
3767 
3768 	be_vf_eth_addr_generate(adapter, mac);
3769 
3770 	for_all_vfs(adapter, vf_cfg, vf) {
3771 		if (BEx_chip(adapter))
3772 			status = be_cmd_pmac_add(adapter, mac,
3773 						 vf_cfg->if_handle,
3774 						 &vf_cfg->pmac_id, vf + 1);
3775 		else
3776 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3777 						vf + 1);
3778 
3779 		if (status)
3780 			dev_err(&adapter->pdev->dev,
3781 				"Mac address assignment failed for VF %d\n",
3782 				vf);
3783 		else
3784 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3785 
3786 		mac[5] += 1;
3787 	}
3788 	return status;
3789 }
3790 
3791 static int be_vfs_mac_query(struct be_adapter *adapter)
3792 {
3793 	int status, vf;
3794 	u8 mac[ETH_ALEN];
3795 	struct be_vf_cfg *vf_cfg;
3796 
3797 	for_all_vfs(adapter, vf_cfg, vf) {
3798 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3799 					       mac, vf_cfg->if_handle,
3800 					       false, vf+1);
3801 		if (status)
3802 			return status;
3803 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3804 	}
3805 	return 0;
3806 }
3807 
3808 static void be_vf_clear(struct be_adapter *adapter)
3809 {
3810 	struct be_vf_cfg *vf_cfg;
3811 	u32 vf;
3812 
3813 	if (pci_vfs_assigned(adapter->pdev)) {
3814 		dev_warn(&adapter->pdev->dev,
3815 			 "VFs are assigned to VMs: not disabling VFs\n");
3816 		goto done;
3817 	}
3818 
3819 	pci_disable_sriov(adapter->pdev);
3820 
3821 	for_all_vfs(adapter, vf_cfg, vf) {
3822 		if (BEx_chip(adapter))
3823 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3824 					vf_cfg->pmac_id, vf + 1);
3825 		else
3826 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3827 				       vf + 1);
3828 
3829 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3830 	}
3831 
3832 	if (BE3_chip(adapter))
3833 		be_cmd_set_hsw_config(adapter, 0, 0,
3834 				      adapter->if_handle,
3835 				      PORT_FWD_TYPE_PASSTHRU, 0);
3836 done:
3837 	kfree(adapter->vf_cfg);
3838 	adapter->num_vfs = 0;
3839 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3840 }
3841 
3842 static void be_clear_queues(struct be_adapter *adapter)
3843 {
3844 	be_mcc_queues_destroy(adapter);
3845 	be_rx_cqs_destroy(adapter);
3846 	be_tx_queues_destroy(adapter);
3847 	be_evt_queues_destroy(adapter);
3848 }
3849 
3850 static void be_cancel_worker(struct be_adapter *adapter)
3851 {
3852 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3853 		cancel_delayed_work_sync(&adapter->work);
3854 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3855 	}
3856 }
3857 
3858 static void be_cancel_err_detection(struct be_adapter *adapter)
3859 {
3860 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3861 
3862 	if (!be_err_recovery_workq)
3863 		return;
3864 
3865 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3866 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3867 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3868 	}
3869 }
3870 
3871 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3872 {
3873 	struct net_device *netdev = adapter->netdev;
3874 	struct device *dev = &adapter->pdev->dev;
3875 	struct be_vxlan_port *vxlan_port;
3876 	__be16 port;
3877 	int status;
3878 
3879 	vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3880 				      struct be_vxlan_port, list);
3881 	port = vxlan_port->port;
3882 
3883 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3884 				     OP_CONVERT_NORMAL_TO_TUNNEL);
3885 	if (status) {
3886 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3887 		return status;
3888 	}
3889 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3890 
3891 	status = be_cmd_set_vxlan_port(adapter, port);
3892 	if (status) {
3893 		dev_warn(dev, "Failed to add VxLAN port\n");
3894 		return status;
3895 	}
3896 	adapter->vxlan_port = port;
3897 
3898 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3899 				   NETIF_F_TSO | NETIF_F_TSO6 |
3900 				   NETIF_F_GSO_UDP_TUNNEL;
3901 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
3902 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
3903 
3904 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3905 		 be16_to_cpu(port));
3906 	return 0;
3907 }
3908 
3909 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3910 {
3911 	struct net_device *netdev = adapter->netdev;
3912 
3913 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3914 		be_cmd_manage_iface(adapter, adapter->if_handle,
3915 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3916 
3917 	if (adapter->vxlan_port)
3918 		be_cmd_set_vxlan_port(adapter, 0);
3919 
3920 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3921 	adapter->vxlan_port = 0;
3922 
3923 	netdev->hw_enc_features = 0;
3924 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3925 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3926 }
3927 
3928 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3929 				struct be_resources *vft_res)
3930 {
3931 	struct be_resources res = adapter->pool_res;
3932 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3933 	struct be_resources res_mod = {0};
3934 	u16 num_vf_qs = 1;
3935 
3936 	/* Distribute the queue resources among the PF and it's VFs */
3937 	if (num_vfs) {
3938 		/* Divide the rx queues evenly among the VFs and the PF, capped
3939 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3940 		 */
3941 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3942 				res.max_rss_qs / (num_vfs + 1));
3943 
3944 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3945 		 * RSS Tables per port. Provide RSS on VFs, only if number of
3946 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
3947 		 */
3948 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3949 			num_vf_qs = 1;
3950 	}
3951 
3952 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3953 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
3954 	 */
3955 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3956 				  RESOURCE_MODIFIABLE, 0);
3957 
3958 	/* If RSS IFACE capability flags are modifiable for a VF, set the
3959 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3960 	 * more than 1 RSSQ is available for a VF.
3961 	 * Otherwise, provision only 1 queue pair for VF.
3962 	 */
3963 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3964 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3965 		if (num_vf_qs > 1) {
3966 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3967 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3968 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3969 		} else {
3970 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3971 					     BE_IF_FLAGS_DEFQ_RSS);
3972 		}
3973 	} else {
3974 		num_vf_qs = 1;
3975 	}
3976 
3977 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3978 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3979 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3980 	}
3981 
3982 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
3983 	vft_res->max_rx_qs = num_vf_qs;
3984 	vft_res->max_rss_qs = num_vf_qs;
3985 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3986 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3987 
3988 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3989 	 * among the PF and it's VFs, if the fields are changeable
3990 	 */
3991 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3992 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3993 
3994 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
3995 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3996 
3997 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3998 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3999 
4000 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4001 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4002 }
4003 
4004 static void be_if_destroy(struct be_adapter *adapter)
4005 {
4006 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4007 
4008 	kfree(adapter->pmac_id);
4009 	adapter->pmac_id = NULL;
4010 
4011 	kfree(adapter->mc_list);
4012 	adapter->mc_list = NULL;
4013 
4014 	kfree(adapter->uc_list);
4015 	adapter->uc_list = NULL;
4016 }
4017 
4018 static int be_clear(struct be_adapter *adapter)
4019 {
4020 	struct pci_dev *pdev = adapter->pdev;
4021 	struct  be_resources vft_res = {0};
4022 
4023 	be_cancel_worker(adapter);
4024 
4025 	flush_workqueue(be_wq);
4026 
4027 	if (sriov_enabled(adapter))
4028 		be_vf_clear(adapter);
4029 
4030 	/* Re-configure FW to distribute resources evenly across max-supported
4031 	 * number of VFs, only when VFs are not already enabled.
4032 	 */
4033 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4034 	    !pci_vfs_assigned(pdev)) {
4035 		be_calculate_vf_res(adapter,
4036 				    pci_sriov_get_totalvfs(pdev),
4037 				    &vft_res);
4038 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4039 					pci_sriov_get_totalvfs(pdev),
4040 					&vft_res);
4041 	}
4042 
4043 	be_disable_vxlan_offloads(adapter);
4044 
4045 	be_if_destroy(adapter);
4046 
4047 	be_clear_queues(adapter);
4048 
4049 	be_msix_disable(adapter);
4050 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4051 	return 0;
4052 }
4053 
4054 static int be_vfs_if_create(struct be_adapter *adapter)
4055 {
4056 	struct be_resources res = {0};
4057 	u32 cap_flags, en_flags, vf;
4058 	struct be_vf_cfg *vf_cfg;
4059 	int status;
4060 
4061 	/* If a FW profile exists, then cap_flags are updated */
4062 	cap_flags = BE_VF_IF_EN_FLAGS;
4063 
4064 	for_all_vfs(adapter, vf_cfg, vf) {
4065 		if (!BE3_chip(adapter)) {
4066 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4067 							   ACTIVE_PROFILE_TYPE,
4068 							   RESOURCE_LIMITS,
4069 							   vf + 1);
4070 			if (!status) {
4071 				cap_flags = res.if_cap_flags;
4072 				/* Prevent VFs from enabling VLAN promiscuous
4073 				 * mode
4074 				 */
4075 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4076 			}
4077 		}
4078 
4079 		/* PF should enable IF flags during proxy if_create call */
4080 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4081 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4082 					  &vf_cfg->if_handle, vf + 1);
4083 		if (status)
4084 			return status;
4085 	}
4086 
4087 	return 0;
4088 }
4089 
4090 static int be_vf_setup_init(struct be_adapter *adapter)
4091 {
4092 	struct be_vf_cfg *vf_cfg;
4093 	int vf;
4094 
4095 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4096 				  GFP_KERNEL);
4097 	if (!adapter->vf_cfg)
4098 		return -ENOMEM;
4099 
4100 	for_all_vfs(adapter, vf_cfg, vf) {
4101 		vf_cfg->if_handle = -1;
4102 		vf_cfg->pmac_id = -1;
4103 	}
4104 	return 0;
4105 }
4106 
4107 static int be_vf_setup(struct be_adapter *adapter)
4108 {
4109 	struct device *dev = &adapter->pdev->dev;
4110 	struct be_vf_cfg *vf_cfg;
4111 	int status, old_vfs, vf;
4112 	bool spoofchk;
4113 
4114 	old_vfs = pci_num_vf(adapter->pdev);
4115 
4116 	status = be_vf_setup_init(adapter);
4117 	if (status)
4118 		goto err;
4119 
4120 	if (old_vfs) {
4121 		for_all_vfs(adapter, vf_cfg, vf) {
4122 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4123 			if (status)
4124 				goto err;
4125 		}
4126 
4127 		status = be_vfs_mac_query(adapter);
4128 		if (status)
4129 			goto err;
4130 	} else {
4131 		status = be_vfs_if_create(adapter);
4132 		if (status)
4133 			goto err;
4134 
4135 		status = be_vf_eth_addr_config(adapter);
4136 		if (status)
4137 			goto err;
4138 	}
4139 
4140 	for_all_vfs(adapter, vf_cfg, vf) {
4141 		/* Allow VFs to programs MAC/VLAN filters */
4142 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4143 						  vf + 1);
4144 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4145 			status = be_cmd_set_fn_privileges(adapter,
4146 							  vf_cfg->privileges |
4147 							  BE_PRIV_FILTMGMT,
4148 							  vf + 1);
4149 			if (!status) {
4150 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4151 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4152 					 vf);
4153 			}
4154 		}
4155 
4156 		/* Allow full available bandwidth */
4157 		if (!old_vfs)
4158 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4159 
4160 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4161 					       vf_cfg->if_handle, NULL,
4162 					       &spoofchk);
4163 		if (!status)
4164 			vf_cfg->spoofchk = spoofchk;
4165 
4166 		if (!old_vfs) {
4167 			be_cmd_enable_vf(adapter, vf + 1);
4168 			be_cmd_set_logical_link_config(adapter,
4169 						       IFLA_VF_LINK_STATE_AUTO,
4170 						       vf+1);
4171 		}
4172 	}
4173 
4174 	if (!old_vfs) {
4175 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4176 		if (status) {
4177 			dev_err(dev, "SRIOV enable failed\n");
4178 			adapter->num_vfs = 0;
4179 			goto err;
4180 		}
4181 	}
4182 
4183 	if (BE3_chip(adapter)) {
4184 		/* On BE3, enable VEB only when SRIOV is enabled */
4185 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4186 					       adapter->if_handle,
4187 					       PORT_FWD_TYPE_VEB, 0);
4188 		if (status)
4189 			goto err;
4190 	}
4191 
4192 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4193 	return 0;
4194 err:
4195 	dev_err(dev, "VF setup failed\n");
4196 	be_vf_clear(adapter);
4197 	return status;
4198 }
4199 
4200 /* Converting function_mode bits on BE3 to SH mc_type enums */
4201 
4202 static u8 be_convert_mc_type(u32 function_mode)
4203 {
4204 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4205 		return vNIC1;
4206 	else if (function_mode & QNQ_MODE)
4207 		return FLEX10;
4208 	else if (function_mode & VNIC_MODE)
4209 		return vNIC2;
4210 	else if (function_mode & UMC_ENABLED)
4211 		return UMC;
4212 	else
4213 		return MC_NONE;
4214 }
4215 
4216 /* On BE2/BE3 FW does not suggest the supported limits */
4217 static void BEx_get_resources(struct be_adapter *adapter,
4218 			      struct be_resources *res)
4219 {
4220 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4221 
4222 	if (be_physfn(adapter))
4223 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4224 	else
4225 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4226 
4227 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4228 
4229 	if (be_is_mc(adapter)) {
4230 		/* Assuming that there are 4 channels per port,
4231 		 * when multi-channel is enabled
4232 		 */
4233 		if (be_is_qnq_mode(adapter))
4234 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4235 		else
4236 			/* In a non-qnq multichannel mode, the pvid
4237 			 * takes up one vlan entry
4238 			 */
4239 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4240 	} else {
4241 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4242 	}
4243 
4244 	res->max_mcast_mac = BE_MAX_MC;
4245 
4246 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4247 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4248 	 *    *only* if it is RSS-capable.
4249 	 */
4250 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4251 	    be_virtfn(adapter) ||
4252 	    (be_is_mc(adapter) &&
4253 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4254 		res->max_tx_qs = 1;
4255 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4256 		struct be_resources super_nic_res = {0};
4257 
4258 		/* On a SuperNIC profile, the driver needs to use the
4259 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4260 		 */
4261 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4262 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4263 					  0);
4264 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4265 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4266 	} else {
4267 		res->max_tx_qs = BE3_MAX_TX_QS;
4268 	}
4269 
4270 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4271 	    !use_sriov && be_physfn(adapter))
4272 		res->max_rss_qs = (adapter->be3_native) ?
4273 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4274 	res->max_rx_qs = res->max_rss_qs + 1;
4275 
4276 	if (be_physfn(adapter))
4277 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4278 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4279 	else
4280 		res->max_evt_qs = 1;
4281 
4282 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4283 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4284 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4285 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4286 }
4287 
4288 static void be_setup_init(struct be_adapter *adapter)
4289 {
4290 	adapter->vlan_prio_bmap = 0xff;
4291 	adapter->phy.link_speed = -1;
4292 	adapter->if_handle = -1;
4293 	adapter->be3_native = false;
4294 	adapter->if_flags = 0;
4295 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4296 	if (be_physfn(adapter))
4297 		adapter->cmd_privileges = MAX_PRIVILEGES;
4298 	else
4299 		adapter->cmd_privileges = MIN_PRIVILEGES;
4300 }
4301 
4302 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4303  * However, this HW limitation is not exposed to the host via any SLI cmd.
4304  * As a result, in the case of SRIOV and in particular multi-partition configs
4305  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4306  * for distribution between the VFs. This self-imposed limit will determine the
4307  * no: of VFs for which RSS can be enabled.
4308  */
4309 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4310 {
4311 	struct be_port_resources port_res = {0};
4312 	u8 rss_tables_on_port;
4313 	u16 max_vfs = be_max_vfs(adapter);
4314 
4315 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4316 				  RESOURCE_LIMITS, 0);
4317 
4318 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4319 
4320 	/* Each PF Pool's RSS Tables limit =
4321 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4322 	 */
4323 	adapter->pool_res.max_rss_tables =
4324 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4325 }
4326 
4327 static int be_get_sriov_config(struct be_adapter *adapter)
4328 {
4329 	struct be_resources res = {0};
4330 	int max_vfs, old_vfs;
4331 
4332 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4333 				  RESOURCE_LIMITS, 0);
4334 
4335 	/* Some old versions of BE3 FW don't report max_vfs value */
4336 	if (BE3_chip(adapter) && !res.max_vfs) {
4337 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4338 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4339 	}
4340 
4341 	adapter->pool_res = res;
4342 
4343 	/* If during previous unload of the driver, the VFs were not disabled,
4344 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4345 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4346 	 */
4347 	old_vfs = pci_num_vf(adapter->pdev);
4348 	if (old_vfs) {
4349 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4350 			 old_vfs);
4351 
4352 		adapter->pool_res.max_vfs =
4353 			pci_sriov_get_totalvfs(adapter->pdev);
4354 		adapter->num_vfs = old_vfs;
4355 	}
4356 
4357 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4358 		be_calculate_pf_pool_rss_tables(adapter);
4359 		dev_info(&adapter->pdev->dev,
4360 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4361 			 be_max_pf_pool_rss_tables(adapter));
4362 	}
4363 	return 0;
4364 }
4365 
4366 static void be_alloc_sriov_res(struct be_adapter *adapter)
4367 {
4368 	int old_vfs = pci_num_vf(adapter->pdev);
4369 	struct  be_resources vft_res = {0};
4370 	int status;
4371 
4372 	be_get_sriov_config(adapter);
4373 
4374 	if (!old_vfs)
4375 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4376 
4377 	/* When the HW is in SRIOV capable configuration, the PF-pool
4378 	 * resources are given to PF during driver load, if there are no
4379 	 * old VFs. This facility is not available in BE3 FW.
4380 	 * Also, this is done by FW in Lancer chip.
4381 	 */
4382 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4383 		be_calculate_vf_res(adapter, 0, &vft_res);
4384 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4385 						 &vft_res);
4386 		if (status)
4387 			dev_err(&adapter->pdev->dev,
4388 				"Failed to optimize SRIOV resources\n");
4389 	}
4390 }
4391 
4392 static int be_get_resources(struct be_adapter *adapter)
4393 {
4394 	struct device *dev = &adapter->pdev->dev;
4395 	struct be_resources res = {0};
4396 	int status;
4397 
4398 	/* For Lancer, SH etc read per-function resource limits from FW.
4399 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4400 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4401 	 */
4402 	if (BEx_chip(adapter)) {
4403 		BEx_get_resources(adapter, &res);
4404 	} else {
4405 		status = be_cmd_get_func_config(adapter, &res);
4406 		if (status)
4407 			return status;
4408 
4409 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4410 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4411 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4412 			res.max_rss_qs -= 1;
4413 	}
4414 
4415 	/* If RoCE is supported stash away half the EQs for RoCE */
4416 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4417 				res.max_evt_qs / 2 : res.max_evt_qs;
4418 	adapter->res = res;
4419 
4420 	/* If FW supports RSS default queue, then skip creating non-RSS
4421 	 * queue for non-IP traffic.
4422 	 */
4423 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4424 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4425 
4426 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4427 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4428 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4429 		 be_max_vfs(adapter));
4430 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4431 		 be_max_uc(adapter), be_max_mc(adapter),
4432 		 be_max_vlans(adapter));
4433 
4434 	/* Ensure RX and TX queues are created in pairs at init time */
4435 	adapter->cfg_num_rx_irqs =
4436 				min_t(u16, netif_get_num_default_rss_queues(),
4437 				      be_max_qp_irqs(adapter));
4438 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4439 	return 0;
4440 }
4441 
4442 static int be_get_config(struct be_adapter *adapter)
4443 {
4444 	int status, level;
4445 	u16 profile_id;
4446 
4447 	status = be_cmd_get_cntl_attributes(adapter);
4448 	if (status)
4449 		return status;
4450 
4451 	status = be_cmd_query_fw_cfg(adapter);
4452 	if (status)
4453 		return status;
4454 
4455 	if (!lancer_chip(adapter) && be_physfn(adapter))
4456 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4457 
4458 	if (BEx_chip(adapter)) {
4459 		level = be_cmd_get_fw_log_level(adapter);
4460 		adapter->msg_enable =
4461 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4462 	}
4463 
4464 	be_cmd_get_acpi_wol_cap(adapter);
4465 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4466 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4467 
4468 	be_cmd_query_port_name(adapter);
4469 
4470 	if (be_physfn(adapter)) {
4471 		status = be_cmd_get_active_profile(adapter, &profile_id);
4472 		if (!status)
4473 			dev_info(&adapter->pdev->dev,
4474 				 "Using profile 0x%x\n", profile_id);
4475 	}
4476 
4477 	return 0;
4478 }
4479 
4480 static int be_mac_setup(struct be_adapter *adapter)
4481 {
4482 	u8 mac[ETH_ALEN];
4483 	int status;
4484 
4485 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4486 		status = be_cmd_get_perm_mac(adapter, mac);
4487 		if (status)
4488 			return status;
4489 
4490 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4491 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4492 
4493 		/* Initial MAC for BE3 VFs is already programmed by PF */
4494 		if (BEx_chip(adapter) && be_virtfn(adapter))
4495 			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4496 	}
4497 
4498 	return 0;
4499 }
4500 
4501 static void be_schedule_worker(struct be_adapter *adapter)
4502 {
4503 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4504 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4505 }
4506 
4507 static void be_destroy_err_recovery_workq(void)
4508 {
4509 	if (!be_err_recovery_workq)
4510 		return;
4511 
4512 	flush_workqueue(be_err_recovery_workq);
4513 	destroy_workqueue(be_err_recovery_workq);
4514 	be_err_recovery_workq = NULL;
4515 }
4516 
4517 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4518 {
4519 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4520 
4521 	if (!be_err_recovery_workq)
4522 		return;
4523 
4524 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4525 			   msecs_to_jiffies(delay));
4526 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4527 }
4528 
4529 static int be_setup_queues(struct be_adapter *adapter)
4530 {
4531 	struct net_device *netdev = adapter->netdev;
4532 	int status;
4533 
4534 	status = be_evt_queues_create(adapter);
4535 	if (status)
4536 		goto err;
4537 
4538 	status = be_tx_qs_create(adapter);
4539 	if (status)
4540 		goto err;
4541 
4542 	status = be_rx_cqs_create(adapter);
4543 	if (status)
4544 		goto err;
4545 
4546 	status = be_mcc_queues_create(adapter);
4547 	if (status)
4548 		goto err;
4549 
4550 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4551 	if (status)
4552 		goto err;
4553 
4554 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4555 	if (status)
4556 		goto err;
4557 
4558 	return 0;
4559 err:
4560 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4561 	return status;
4562 }
4563 
4564 static int be_if_create(struct be_adapter *adapter)
4565 {
4566 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4567 	u32 cap_flags = be_if_cap_flags(adapter);
4568 	int status;
4569 
4570 	/* alloc required memory for other filtering fields */
4571 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4572 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4573 	if (!adapter->pmac_id)
4574 		return -ENOMEM;
4575 
4576 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4577 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4578 	if (!adapter->mc_list)
4579 		return -ENOMEM;
4580 
4581 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4582 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4583 	if (!adapter->uc_list)
4584 		return -ENOMEM;
4585 
4586 	if (adapter->cfg_num_rx_irqs == 1)
4587 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4588 
4589 	en_flags &= cap_flags;
4590 	/* will enable all the needed filter flags in be_open() */
4591 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4592 				  &adapter->if_handle, 0);
4593 
4594 	if (status)
4595 		return status;
4596 
4597 	return 0;
4598 }
4599 
4600 int be_update_queues(struct be_adapter *adapter)
4601 {
4602 	struct net_device *netdev = adapter->netdev;
4603 	int status;
4604 
4605 	if (netif_running(netdev))
4606 		be_close(netdev);
4607 
4608 	be_cancel_worker(adapter);
4609 
4610 	/* If any vectors have been shared with RoCE we cannot re-program
4611 	 * the MSIx table.
4612 	 */
4613 	if (!adapter->num_msix_roce_vec)
4614 		be_msix_disable(adapter);
4615 
4616 	be_clear_queues(adapter);
4617 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4618 	if (status)
4619 		return status;
4620 
4621 	if (!msix_enabled(adapter)) {
4622 		status = be_msix_enable(adapter);
4623 		if (status)
4624 			return status;
4625 	}
4626 
4627 	status = be_if_create(adapter);
4628 	if (status)
4629 		return status;
4630 
4631 	status = be_setup_queues(adapter);
4632 	if (status)
4633 		return status;
4634 
4635 	be_schedule_worker(adapter);
4636 
4637 	if (netif_running(netdev))
4638 		status = be_open(netdev);
4639 
4640 	return status;
4641 }
4642 
4643 static inline int fw_major_num(const char *fw_ver)
4644 {
4645 	int fw_major = 0, i;
4646 
4647 	i = sscanf(fw_ver, "%d.", &fw_major);
4648 	if (i != 1)
4649 		return 0;
4650 
4651 	return fw_major;
4652 }
4653 
4654 /* If it is error recovery, FLR the PF
4655  * Else if any VFs are already enabled don't FLR the PF
4656  */
4657 static bool be_reset_required(struct be_adapter *adapter)
4658 {
4659 	if (be_error_recovering(adapter))
4660 		return true;
4661 	else
4662 		return pci_num_vf(adapter->pdev) == 0;
4663 }
4664 
4665 /* Wait for the FW to be ready and perform the required initialization */
4666 static int be_func_init(struct be_adapter *adapter)
4667 {
4668 	int status;
4669 
4670 	status = be_fw_wait_ready(adapter);
4671 	if (status)
4672 		return status;
4673 
4674 	/* FW is now ready; clear errors to allow cmds/doorbell */
4675 	be_clear_error(adapter, BE_CLEAR_ALL);
4676 
4677 	if (be_reset_required(adapter)) {
4678 		status = be_cmd_reset_function(adapter);
4679 		if (status)
4680 			return status;
4681 
4682 		/* Wait for interrupts to quiesce after an FLR */
4683 		msleep(100);
4684 	}
4685 
4686 	/* Tell FW we're ready to fire cmds */
4687 	status = be_cmd_fw_init(adapter);
4688 	if (status)
4689 		return status;
4690 
4691 	/* Allow interrupts for other ULPs running on NIC function */
4692 	be_intr_set(adapter, true);
4693 
4694 	return 0;
4695 }
4696 
4697 static int be_setup(struct be_adapter *adapter)
4698 {
4699 	struct device *dev = &adapter->pdev->dev;
4700 	int status;
4701 
4702 	status = be_func_init(adapter);
4703 	if (status)
4704 		return status;
4705 
4706 	be_setup_init(adapter);
4707 
4708 	if (!lancer_chip(adapter))
4709 		be_cmd_req_native_mode(adapter);
4710 
4711 	/* invoke this cmd first to get pf_num and vf_num which are needed
4712 	 * for issuing profile related cmds
4713 	 */
4714 	if (!BEx_chip(adapter)) {
4715 		status = be_cmd_get_func_config(adapter, NULL);
4716 		if (status)
4717 			return status;
4718 	}
4719 
4720 	status = be_get_config(adapter);
4721 	if (status)
4722 		goto err;
4723 
4724 	if (!BE2_chip(adapter) && be_physfn(adapter))
4725 		be_alloc_sriov_res(adapter);
4726 
4727 	status = be_get_resources(adapter);
4728 	if (status)
4729 		goto err;
4730 
4731 	status = be_msix_enable(adapter);
4732 	if (status)
4733 		goto err;
4734 
4735 	/* will enable all the needed filter flags in be_open() */
4736 	status = be_if_create(adapter);
4737 	if (status)
4738 		goto err;
4739 
4740 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4741 	rtnl_lock();
4742 	status = be_setup_queues(adapter);
4743 	rtnl_unlock();
4744 	if (status)
4745 		goto err;
4746 
4747 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4748 
4749 	status = be_mac_setup(adapter);
4750 	if (status)
4751 		goto err;
4752 
4753 	be_cmd_get_fw_ver(adapter);
4754 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4755 
4756 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4757 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4758 			adapter->fw_ver);
4759 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4760 	}
4761 
4762 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4763 					 adapter->rx_fc);
4764 	if (status)
4765 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4766 					&adapter->rx_fc);
4767 
4768 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4769 		 adapter->tx_fc, adapter->rx_fc);
4770 
4771 	if (be_physfn(adapter))
4772 		be_cmd_set_logical_link_config(adapter,
4773 					       IFLA_VF_LINK_STATE_AUTO, 0);
4774 
4775 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4776 	 * confusing a linux bridge or OVS that it might be connected to.
4777 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4778 	 * when SRIOV is not enabled.
4779 	 */
4780 	if (BE3_chip(adapter))
4781 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4782 				      PORT_FWD_TYPE_PASSTHRU, 0);
4783 
4784 	if (adapter->num_vfs)
4785 		be_vf_setup(adapter);
4786 
4787 	status = be_cmd_get_phy_info(adapter);
4788 	if (!status && be_pause_supported(adapter))
4789 		adapter->phy.fc_autoneg = 1;
4790 
4791 	if (be_physfn(adapter) && !lancer_chip(adapter))
4792 		be_cmd_set_features(adapter);
4793 
4794 	be_schedule_worker(adapter);
4795 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4796 	return 0;
4797 err:
4798 	be_clear(adapter);
4799 	return status;
4800 }
4801 
4802 #ifdef CONFIG_NET_POLL_CONTROLLER
4803 static void be_netpoll(struct net_device *netdev)
4804 {
4805 	struct be_adapter *adapter = netdev_priv(netdev);
4806 	struct be_eq_obj *eqo;
4807 	int i;
4808 
4809 	for_all_evt_queues(adapter, eqo, i) {
4810 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4811 		napi_schedule(&eqo->napi);
4812 	}
4813 }
4814 #endif
4815 
4816 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4817 {
4818 	const struct firmware *fw;
4819 	int status;
4820 
4821 	if (!netif_running(adapter->netdev)) {
4822 		dev_err(&adapter->pdev->dev,
4823 			"Firmware load not allowed (interface is down)\n");
4824 		return -ENETDOWN;
4825 	}
4826 
4827 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4828 	if (status)
4829 		goto fw_exit;
4830 
4831 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4832 
4833 	if (lancer_chip(adapter))
4834 		status = lancer_fw_download(adapter, fw);
4835 	else
4836 		status = be_fw_download(adapter, fw);
4837 
4838 	if (!status)
4839 		be_cmd_get_fw_ver(adapter);
4840 
4841 fw_exit:
4842 	release_firmware(fw);
4843 	return status;
4844 }
4845 
4846 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4847 				 u16 flags)
4848 {
4849 	struct be_adapter *adapter = netdev_priv(dev);
4850 	struct nlattr *attr, *br_spec;
4851 	int rem;
4852 	int status = 0;
4853 	u16 mode = 0;
4854 
4855 	if (!sriov_enabled(adapter))
4856 		return -EOPNOTSUPP;
4857 
4858 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4859 	if (!br_spec)
4860 		return -EINVAL;
4861 
4862 	nla_for_each_nested(attr, br_spec, rem) {
4863 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4864 			continue;
4865 
4866 		if (nla_len(attr) < sizeof(mode))
4867 			return -EINVAL;
4868 
4869 		mode = nla_get_u16(attr);
4870 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4871 			return -EOPNOTSUPP;
4872 
4873 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4874 			return -EINVAL;
4875 
4876 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4877 					       adapter->if_handle,
4878 					       mode == BRIDGE_MODE_VEPA ?
4879 					       PORT_FWD_TYPE_VEPA :
4880 					       PORT_FWD_TYPE_VEB, 0);
4881 		if (status)
4882 			goto err;
4883 
4884 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4885 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4886 
4887 		return status;
4888 	}
4889 err:
4890 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4891 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4892 
4893 	return status;
4894 }
4895 
4896 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4897 				 struct net_device *dev, u32 filter_mask,
4898 				 int nlflags)
4899 {
4900 	struct be_adapter *adapter = netdev_priv(dev);
4901 	int status = 0;
4902 	u8 hsw_mode;
4903 
4904 	/* BE and Lancer chips support VEB mode only */
4905 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4906 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4907 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4908 			return 0;
4909 		hsw_mode = PORT_FWD_TYPE_VEB;
4910 	} else {
4911 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4912 					       adapter->if_handle, &hsw_mode,
4913 					       NULL);
4914 		if (status)
4915 			return 0;
4916 
4917 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4918 			return 0;
4919 	}
4920 
4921 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4922 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4923 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4924 				       0, 0, nlflags, filter_mask, NULL);
4925 }
4926 
4927 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4928 					 void (*func)(struct work_struct *))
4929 {
4930 	struct be_cmd_work *work;
4931 
4932 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4933 	if (!work) {
4934 		dev_err(&adapter->pdev->dev,
4935 			"be_work memory allocation failed\n");
4936 		return NULL;
4937 	}
4938 
4939 	INIT_WORK(&work->work, func);
4940 	work->adapter = adapter;
4941 	return work;
4942 }
4943 
4944 /* VxLAN offload Notes:
4945  *
4946  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4947  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4948  * is expected to work across all types of IP tunnels once exported. Skyhawk
4949  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4950  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4951  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4952  * those other tunnels are unexported on the fly through ndo_features_check().
4953  *
4954  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4955  * adds more than one port, disable offloads and re-enable them again when
4956  * there's only one port left. We maintain a list of ports for this purpose.
4957  */
4958 static void be_work_add_vxlan_port(struct work_struct *work)
4959 {
4960 	struct be_cmd_work *cmd_work =
4961 				container_of(work, struct be_cmd_work, work);
4962 	struct be_adapter *adapter = cmd_work->adapter;
4963 	struct device *dev = &adapter->pdev->dev;
4964 	__be16 port = cmd_work->info.vxlan_port;
4965 	struct be_vxlan_port *vxlan_port;
4966 	int status;
4967 
4968 	/* Bump up the alias count if it is an existing port */
4969 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4970 		if (vxlan_port->port == port) {
4971 			vxlan_port->port_aliases++;
4972 			goto done;
4973 		}
4974 	}
4975 
4976 	/* Add a new port to our list. We don't need a lock here since port
4977 	 * add/delete are done only in the context of a single-threaded work
4978 	 * queue (be_wq).
4979 	 */
4980 	vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
4981 	if (!vxlan_port)
4982 		goto done;
4983 
4984 	vxlan_port->port = port;
4985 	INIT_LIST_HEAD(&vxlan_port->list);
4986 	list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
4987 	adapter->vxlan_port_count++;
4988 
4989 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4990 		dev_info(dev,
4991 			 "Only one UDP port supported for VxLAN offloads\n");
4992 		dev_info(dev, "Disabling VxLAN offloads\n");
4993 		goto err;
4994 	}
4995 
4996 	if (adapter->vxlan_port_count > 1)
4997 		goto done;
4998 
4999 	status = be_enable_vxlan_offloads(adapter);
5000 	if (!status)
5001 		goto done;
5002 
5003 err:
5004 	be_disable_vxlan_offloads(adapter);
5005 done:
5006 	kfree(cmd_work);
5007 	return;
5008 }
5009 
5010 static void be_work_del_vxlan_port(struct work_struct *work)
5011 {
5012 	struct be_cmd_work *cmd_work =
5013 				container_of(work, struct be_cmd_work, work);
5014 	struct be_adapter *adapter = cmd_work->adapter;
5015 	__be16 port = cmd_work->info.vxlan_port;
5016 	struct be_vxlan_port *vxlan_port;
5017 
5018 	/* Nothing to be done if a port alias is being deleted */
5019 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5020 		if (vxlan_port->port == port) {
5021 			if (vxlan_port->port_aliases) {
5022 				vxlan_port->port_aliases--;
5023 				goto done;
5024 			}
5025 			break;
5026 		}
5027 	}
5028 
5029 	/* No port aliases left; delete the port from the list */
5030 	list_del(&vxlan_port->list);
5031 	adapter->vxlan_port_count--;
5032 
5033 	/* Disable VxLAN offload if this is the offloaded port */
5034 	if (adapter->vxlan_port == vxlan_port->port) {
5035 		WARN_ON(adapter->vxlan_port_count);
5036 		be_disable_vxlan_offloads(adapter);
5037 		dev_info(&adapter->pdev->dev,
5038 			 "Disabled VxLAN offloads for UDP port %d\n",
5039 			 be16_to_cpu(port));
5040 		goto out;
5041 	}
5042 
5043 	/* If only 1 port is left, re-enable VxLAN offload */
5044 	if (adapter->vxlan_port_count == 1)
5045 		be_enable_vxlan_offloads(adapter);
5046 
5047 out:
5048 	kfree(vxlan_port);
5049 done:
5050 	kfree(cmd_work);
5051 }
5052 
5053 static void be_cfg_vxlan_port(struct net_device *netdev,
5054 			      struct udp_tunnel_info *ti,
5055 			      void (*func)(struct work_struct *))
5056 {
5057 	struct be_adapter *adapter = netdev_priv(netdev);
5058 	struct be_cmd_work *cmd_work;
5059 
5060 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5061 		return;
5062 
5063 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5064 		return;
5065 
5066 	cmd_work = be_alloc_work(adapter, func);
5067 	if (cmd_work) {
5068 		cmd_work->info.vxlan_port = ti->port;
5069 		queue_work(be_wq, &cmd_work->work);
5070 	}
5071 }
5072 
5073 static void be_del_vxlan_port(struct net_device *netdev,
5074 			      struct udp_tunnel_info *ti)
5075 {
5076 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5077 }
5078 
5079 static void be_add_vxlan_port(struct net_device *netdev,
5080 			      struct udp_tunnel_info *ti)
5081 {
5082 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5083 }
5084 
5085 static netdev_features_t be_features_check(struct sk_buff *skb,
5086 					   struct net_device *dev,
5087 					   netdev_features_t features)
5088 {
5089 	struct be_adapter *adapter = netdev_priv(dev);
5090 	u8 l4_hdr = 0;
5091 
5092 	/* The code below restricts offload features for some tunneled and
5093 	 * Q-in-Q packets.
5094 	 * Offload features for normal (non tunnel) packets are unchanged.
5095 	 */
5096 	features = vlan_features_check(skb, features);
5097 	if (!skb->encapsulation ||
5098 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5099 		return features;
5100 
5101 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5102 	 * should disable tunnel offload features if it's not a VxLAN packet,
5103 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5104 	 * allow other tunneled traffic like GRE work fine while VxLAN
5105 	 * offloads are configured in Skyhawk-R.
5106 	 */
5107 	switch (vlan_get_protocol(skb)) {
5108 	case htons(ETH_P_IP):
5109 		l4_hdr = ip_hdr(skb)->protocol;
5110 		break;
5111 	case htons(ETH_P_IPV6):
5112 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5113 		break;
5114 	default:
5115 		return features;
5116 	}
5117 
5118 	if (l4_hdr != IPPROTO_UDP ||
5119 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5120 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5121 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5122 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5123 	    !adapter->vxlan_port ||
5124 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5125 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5126 
5127 	return features;
5128 }
5129 
5130 static int be_get_phys_port_id(struct net_device *dev,
5131 			       struct netdev_phys_item_id *ppid)
5132 {
5133 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5134 	struct be_adapter *adapter = netdev_priv(dev);
5135 	u8 *id;
5136 
5137 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5138 		return -ENOSPC;
5139 
5140 	ppid->id[0] = adapter->hba_port_num + 1;
5141 	id = &ppid->id[1];
5142 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5143 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5144 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5145 
5146 	ppid->id_len = id_len;
5147 
5148 	return 0;
5149 }
5150 
5151 static void be_set_rx_mode(struct net_device *dev)
5152 {
5153 	struct be_adapter *adapter = netdev_priv(dev);
5154 	struct be_cmd_work *work;
5155 
5156 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5157 	if (work)
5158 		queue_work(be_wq, &work->work);
5159 }
5160 
5161 static const struct net_device_ops be_netdev_ops = {
5162 	.ndo_open		= be_open,
5163 	.ndo_stop		= be_close,
5164 	.ndo_start_xmit		= be_xmit,
5165 	.ndo_set_rx_mode	= be_set_rx_mode,
5166 	.ndo_set_mac_address	= be_mac_addr_set,
5167 	.ndo_get_stats64	= be_get_stats64,
5168 	.ndo_validate_addr	= eth_validate_addr,
5169 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5170 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5171 	.ndo_set_vf_mac		= be_set_vf_mac,
5172 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5173 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5174 	.ndo_get_vf_config	= be_get_vf_config,
5175 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5176 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5177 #ifdef CONFIG_NET_POLL_CONTROLLER
5178 	.ndo_poll_controller	= be_netpoll,
5179 #endif
5180 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5181 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5182 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5183 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5184 	.ndo_features_check	= be_features_check,
5185 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5186 };
5187 
5188 static void be_netdev_init(struct net_device *netdev)
5189 {
5190 	struct be_adapter *adapter = netdev_priv(netdev);
5191 
5192 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5193 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5194 		NETIF_F_HW_VLAN_CTAG_TX;
5195 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5196 		netdev->hw_features |= NETIF_F_RXHASH;
5197 
5198 	netdev->features |= netdev->hw_features |
5199 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5200 
5201 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5202 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5203 
5204 	netdev->priv_flags |= IFF_UNICAST_FLT;
5205 
5206 	netdev->flags |= IFF_MULTICAST;
5207 
5208 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5209 
5210 	netdev->netdev_ops = &be_netdev_ops;
5211 
5212 	netdev->ethtool_ops = &be_ethtool_ops;
5213 
5214 	/* MTU range: 256 - 9000 */
5215 	netdev->min_mtu = BE_MIN_MTU;
5216 	netdev->max_mtu = BE_MAX_MTU;
5217 }
5218 
5219 static void be_cleanup(struct be_adapter *adapter)
5220 {
5221 	struct net_device *netdev = adapter->netdev;
5222 
5223 	rtnl_lock();
5224 	netif_device_detach(netdev);
5225 	if (netif_running(netdev))
5226 		be_close(netdev);
5227 	rtnl_unlock();
5228 
5229 	be_clear(adapter);
5230 }
5231 
5232 static int be_resume(struct be_adapter *adapter)
5233 {
5234 	struct net_device *netdev = adapter->netdev;
5235 	int status;
5236 
5237 	status = be_setup(adapter);
5238 	if (status)
5239 		return status;
5240 
5241 	rtnl_lock();
5242 	if (netif_running(netdev))
5243 		status = be_open(netdev);
5244 	rtnl_unlock();
5245 
5246 	if (status)
5247 		return status;
5248 
5249 	netif_device_attach(netdev);
5250 
5251 	return 0;
5252 }
5253 
5254 static void be_soft_reset(struct be_adapter *adapter)
5255 {
5256 	u32 val;
5257 
5258 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5259 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5260 	val |= SLIPORT_SOFTRESET_SR_MASK;
5261 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5262 }
5263 
5264 static bool be_err_is_recoverable(struct be_adapter *adapter)
5265 {
5266 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5267 	unsigned long initial_idle_time =
5268 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5269 	unsigned long recovery_interval =
5270 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5271 	u16 ue_err_code;
5272 	u32 val;
5273 
5274 	val = be_POST_stage_get(adapter);
5275 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5276 		return false;
5277 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5278 	if (ue_err_code == 0)
5279 		return false;
5280 
5281 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5282 		ue_err_code);
5283 
5284 	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5285 		dev_err(&adapter->pdev->dev,
5286 			"Cannot recover within %lu sec from driver load\n",
5287 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5288 		return false;
5289 	}
5290 
5291 	if (err_rec->last_recovery_time && time_before_eq(
5292 		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5293 		dev_err(&adapter->pdev->dev,
5294 			"Cannot recover within %lu sec from last recovery\n",
5295 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5296 		return false;
5297 	}
5298 
5299 	if (ue_err_code == err_rec->last_err_code) {
5300 		dev_err(&adapter->pdev->dev,
5301 			"Cannot recover from a consecutive TPE error\n");
5302 		return false;
5303 	}
5304 
5305 	err_rec->last_recovery_time = jiffies;
5306 	err_rec->last_err_code = ue_err_code;
5307 	return true;
5308 }
5309 
5310 static int be_tpe_recover(struct be_adapter *adapter)
5311 {
5312 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5313 	int status = -EAGAIN;
5314 	u32 val;
5315 
5316 	switch (err_rec->recovery_state) {
5317 	case ERR_RECOVERY_ST_NONE:
5318 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5319 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5320 		break;
5321 
5322 	case ERR_RECOVERY_ST_DETECT:
5323 		val = be_POST_stage_get(adapter);
5324 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5325 		    POST_STAGE_RECOVERABLE_ERR) {
5326 			dev_err(&adapter->pdev->dev,
5327 				"Unrecoverable HW error detected: 0x%x\n", val);
5328 			status = -EINVAL;
5329 			err_rec->resched_delay = 0;
5330 			break;
5331 		}
5332 
5333 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5334 
5335 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5336 		 * milliseconds before it checks for final error status in
5337 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5338 		 * If it does, then PF0 initiates a Soft Reset.
5339 		 */
5340 		if (adapter->pf_num == 0) {
5341 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5342 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5343 					ERR_RECOVERY_UE_DETECT_DURATION;
5344 			break;
5345 		}
5346 
5347 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5348 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5349 					ERR_RECOVERY_UE_DETECT_DURATION;
5350 		break;
5351 
5352 	case ERR_RECOVERY_ST_RESET:
5353 		if (!be_err_is_recoverable(adapter)) {
5354 			dev_err(&adapter->pdev->dev,
5355 				"Failed to meet recovery criteria\n");
5356 			status = -EIO;
5357 			err_rec->resched_delay = 0;
5358 			break;
5359 		}
5360 		be_soft_reset(adapter);
5361 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5362 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5363 					err_rec->ue_to_reset_time;
5364 		break;
5365 
5366 	case ERR_RECOVERY_ST_PRE_POLL:
5367 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5368 		err_rec->resched_delay = 0;
5369 		status = 0;			/* done */
5370 		break;
5371 
5372 	default:
5373 		status = -EINVAL;
5374 		err_rec->resched_delay = 0;
5375 		break;
5376 	}
5377 
5378 	return status;
5379 }
5380 
5381 static int be_err_recover(struct be_adapter *adapter)
5382 {
5383 	int status;
5384 
5385 	if (!lancer_chip(adapter)) {
5386 		if (!adapter->error_recovery.recovery_supported ||
5387 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5388 			return -EIO;
5389 		status = be_tpe_recover(adapter);
5390 		if (status)
5391 			goto err;
5392 	}
5393 
5394 	/* Wait for adapter to reach quiescent state before
5395 	 * destroying queues
5396 	 */
5397 	status = be_fw_wait_ready(adapter);
5398 	if (status)
5399 		goto err;
5400 
5401 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5402 
5403 	be_cleanup(adapter);
5404 
5405 	status = be_resume(adapter);
5406 	if (status)
5407 		goto err;
5408 
5409 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5410 
5411 err:
5412 	return status;
5413 }
5414 
5415 static void be_err_detection_task(struct work_struct *work)
5416 {
5417 	struct be_error_recovery *err_rec =
5418 			container_of(work, struct be_error_recovery,
5419 				     err_detection_work.work);
5420 	struct be_adapter *adapter =
5421 			container_of(err_rec, struct be_adapter,
5422 				     error_recovery);
5423 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5424 	struct device *dev = &adapter->pdev->dev;
5425 	int recovery_status;
5426 
5427 	be_detect_error(adapter);
5428 	if (!be_check_error(adapter, BE_ERROR_HW))
5429 		goto reschedule_task;
5430 
5431 	recovery_status = be_err_recover(adapter);
5432 	if (!recovery_status) {
5433 		err_rec->recovery_retries = 0;
5434 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5435 		dev_info(dev, "Adapter recovery successful\n");
5436 		goto reschedule_task;
5437 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5438 		/* BEx/SH recovery state machine */
5439 		if (adapter->pf_num == 0 &&
5440 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5441 			dev_err(&adapter->pdev->dev,
5442 				"Adapter recovery in progress\n");
5443 		resched_delay = err_rec->resched_delay;
5444 		goto reschedule_task;
5445 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5446 		/* For VFs, check if PF have allocated resources
5447 		 * every second.
5448 		 */
5449 		dev_err(dev, "Re-trying adapter recovery\n");
5450 		goto reschedule_task;
5451 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5452 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5453 		/* In case of another error during recovery, it takes 30 sec
5454 		 * for adapter to come out of error. Retry error recovery after
5455 		 * this time interval.
5456 		 */
5457 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5458 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5459 		goto reschedule_task;
5460 	} else {
5461 		dev_err(dev, "Adapter recovery failed\n");
5462 		dev_err(dev, "Please reboot server to recover\n");
5463 	}
5464 
5465 	return;
5466 
5467 reschedule_task:
5468 	be_schedule_err_detection(adapter, resched_delay);
5469 }
5470 
5471 static void be_log_sfp_info(struct be_adapter *adapter)
5472 {
5473 	int status;
5474 
5475 	status = be_cmd_query_sfp_info(adapter);
5476 	if (!status) {
5477 		dev_err(&adapter->pdev->dev,
5478 			"Port %c: %s Vendor: %s part no: %s",
5479 			adapter->port_name,
5480 			be_misconfig_evt_port_state[adapter->phy_state],
5481 			adapter->phy.vendor_name,
5482 			adapter->phy.vendor_pn);
5483 	}
5484 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5485 }
5486 
5487 static void be_worker(struct work_struct *work)
5488 {
5489 	struct be_adapter *adapter =
5490 		container_of(work, struct be_adapter, work.work);
5491 	struct be_rx_obj *rxo;
5492 	int i;
5493 
5494 	if (be_physfn(adapter) &&
5495 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5496 		be_cmd_get_die_temperature(adapter);
5497 
5498 	/* when interrupts are not yet enabled, just reap any pending
5499 	 * mcc completions
5500 	 */
5501 	if (!netif_running(adapter->netdev)) {
5502 		local_bh_disable();
5503 		be_process_mcc(adapter);
5504 		local_bh_enable();
5505 		goto reschedule;
5506 	}
5507 
5508 	if (!adapter->stats_cmd_sent) {
5509 		if (lancer_chip(adapter))
5510 			lancer_cmd_get_pport_stats(adapter,
5511 						   &adapter->stats_cmd);
5512 		else
5513 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5514 	}
5515 
5516 	for_all_rx_queues(adapter, rxo, i) {
5517 		/* Replenish RX-queues starved due to memory
5518 		 * allocation failures.
5519 		 */
5520 		if (rxo->rx_post_starved)
5521 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5522 	}
5523 
5524 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5525 	if (!skyhawk_chip(adapter))
5526 		be_eqd_update(adapter, false);
5527 
5528 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5529 		be_log_sfp_info(adapter);
5530 
5531 reschedule:
5532 	adapter->work_counter++;
5533 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5534 }
5535 
5536 static void be_unmap_pci_bars(struct be_adapter *adapter)
5537 {
5538 	if (adapter->csr)
5539 		pci_iounmap(adapter->pdev, adapter->csr);
5540 	if (adapter->db)
5541 		pci_iounmap(adapter->pdev, adapter->db);
5542 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5543 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5544 }
5545 
5546 static int db_bar(struct be_adapter *adapter)
5547 {
5548 	if (lancer_chip(adapter) || be_virtfn(adapter))
5549 		return 0;
5550 	else
5551 		return 4;
5552 }
5553 
5554 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5555 {
5556 	if (skyhawk_chip(adapter)) {
5557 		adapter->roce_db.size = 4096;
5558 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5559 							      db_bar(adapter));
5560 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5561 							       db_bar(adapter));
5562 	}
5563 	return 0;
5564 }
5565 
5566 static int be_map_pci_bars(struct be_adapter *adapter)
5567 {
5568 	struct pci_dev *pdev = adapter->pdev;
5569 	u8 __iomem *addr;
5570 	u32 sli_intf;
5571 
5572 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5573 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5574 				SLI_INTF_FAMILY_SHIFT;
5575 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5576 
5577 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5578 		adapter->csr = pci_iomap(pdev, 2, 0);
5579 		if (!adapter->csr)
5580 			return -ENOMEM;
5581 	}
5582 
5583 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5584 	if (!addr)
5585 		goto pci_map_err;
5586 	adapter->db = addr;
5587 
5588 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5589 		if (be_physfn(adapter)) {
5590 			/* PCICFG is the 2nd BAR in BE2 */
5591 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5592 			if (!addr)
5593 				goto pci_map_err;
5594 			adapter->pcicfg = addr;
5595 			adapter->pcicfg_mapped = true;
5596 		} else {
5597 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5598 			adapter->pcicfg_mapped = false;
5599 		}
5600 	}
5601 
5602 	be_roce_map_pci_bars(adapter);
5603 	return 0;
5604 
5605 pci_map_err:
5606 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5607 	be_unmap_pci_bars(adapter);
5608 	return -ENOMEM;
5609 }
5610 
5611 static void be_drv_cleanup(struct be_adapter *adapter)
5612 {
5613 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5614 	struct device *dev = &adapter->pdev->dev;
5615 
5616 	if (mem->va)
5617 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5618 
5619 	mem = &adapter->rx_filter;
5620 	if (mem->va)
5621 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5622 
5623 	mem = &adapter->stats_cmd;
5624 	if (mem->va)
5625 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5626 }
5627 
5628 /* Allocate and initialize various fields in be_adapter struct */
5629 static int be_drv_init(struct be_adapter *adapter)
5630 {
5631 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5632 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5633 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5634 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5635 	struct device *dev = &adapter->pdev->dev;
5636 	int status = 0;
5637 
5638 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5639 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5640 						 &mbox_mem_alloc->dma,
5641 						 GFP_KERNEL);
5642 	if (!mbox_mem_alloc->va)
5643 		return -ENOMEM;
5644 
5645 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5646 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5647 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5648 
5649 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5650 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5651 					    &rx_filter->dma, GFP_KERNEL);
5652 	if (!rx_filter->va) {
5653 		status = -ENOMEM;
5654 		goto free_mbox;
5655 	}
5656 
5657 	if (lancer_chip(adapter))
5658 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5659 	else if (BE2_chip(adapter))
5660 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5661 	else if (BE3_chip(adapter))
5662 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5663 	else
5664 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5665 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5666 					    &stats_cmd->dma, GFP_KERNEL);
5667 	if (!stats_cmd->va) {
5668 		status = -ENOMEM;
5669 		goto free_rx_filter;
5670 	}
5671 
5672 	mutex_init(&adapter->mbox_lock);
5673 	mutex_init(&adapter->mcc_lock);
5674 	mutex_init(&adapter->rx_filter_lock);
5675 	spin_lock_init(&adapter->mcc_cq_lock);
5676 	init_completion(&adapter->et_cmd_compl);
5677 
5678 	pci_save_state(adapter->pdev);
5679 
5680 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5681 
5682 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5683 	adapter->error_recovery.resched_delay = 0;
5684 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5685 			  be_err_detection_task);
5686 
5687 	adapter->rx_fc = true;
5688 	adapter->tx_fc = true;
5689 
5690 	/* Must be a power of 2 or else MODULO will BUG_ON */
5691 	adapter->be_get_temp_freq = 64;
5692 
5693 	INIT_LIST_HEAD(&adapter->vxlan_port_list);
5694 	return 0;
5695 
5696 free_rx_filter:
5697 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5698 free_mbox:
5699 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5700 			  mbox_mem_alloc->dma);
5701 	return status;
5702 }
5703 
5704 static void be_remove(struct pci_dev *pdev)
5705 {
5706 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5707 
5708 	if (!adapter)
5709 		return;
5710 
5711 	be_roce_dev_remove(adapter);
5712 	be_intr_set(adapter, false);
5713 
5714 	be_cancel_err_detection(adapter);
5715 
5716 	unregister_netdev(adapter->netdev);
5717 
5718 	be_clear(adapter);
5719 
5720 	if (!pci_vfs_assigned(adapter->pdev))
5721 		be_cmd_reset_function(adapter);
5722 
5723 	/* tell fw we're done with firing cmds */
5724 	be_cmd_fw_clean(adapter);
5725 
5726 	be_unmap_pci_bars(adapter);
5727 	be_drv_cleanup(adapter);
5728 
5729 	pci_disable_pcie_error_reporting(pdev);
5730 
5731 	pci_release_regions(pdev);
5732 	pci_disable_device(pdev);
5733 
5734 	free_netdev(adapter->netdev);
5735 }
5736 
5737 static ssize_t be_hwmon_show_temp(struct device *dev,
5738 				  struct device_attribute *dev_attr,
5739 				  char *buf)
5740 {
5741 	struct be_adapter *adapter = dev_get_drvdata(dev);
5742 
5743 	/* Unit: millidegree Celsius */
5744 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5745 		return -EIO;
5746 	else
5747 		return sprintf(buf, "%u\n",
5748 			       adapter->hwmon_info.be_on_die_temp * 1000);
5749 }
5750 
5751 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5752 			  be_hwmon_show_temp, NULL, 1);
5753 
5754 static struct attribute *be_hwmon_attrs[] = {
5755 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5756 	NULL
5757 };
5758 
5759 ATTRIBUTE_GROUPS(be_hwmon);
5760 
5761 static char *mc_name(struct be_adapter *adapter)
5762 {
5763 	char *str = "";	/* default */
5764 
5765 	switch (adapter->mc_type) {
5766 	case UMC:
5767 		str = "UMC";
5768 		break;
5769 	case FLEX10:
5770 		str = "FLEX10";
5771 		break;
5772 	case vNIC1:
5773 		str = "vNIC-1";
5774 		break;
5775 	case nPAR:
5776 		str = "nPAR";
5777 		break;
5778 	case UFP:
5779 		str = "UFP";
5780 		break;
5781 	case vNIC2:
5782 		str = "vNIC-2";
5783 		break;
5784 	default:
5785 		str = "";
5786 	}
5787 
5788 	return str;
5789 }
5790 
5791 static inline char *func_name(struct be_adapter *adapter)
5792 {
5793 	return be_physfn(adapter) ? "PF" : "VF";
5794 }
5795 
5796 static inline char *nic_name(struct pci_dev *pdev)
5797 {
5798 	switch (pdev->device) {
5799 	case OC_DEVICE_ID1:
5800 		return OC_NAME;
5801 	case OC_DEVICE_ID2:
5802 		return OC_NAME_BE;
5803 	case OC_DEVICE_ID3:
5804 	case OC_DEVICE_ID4:
5805 		return OC_NAME_LANCER;
5806 	case BE_DEVICE_ID2:
5807 		return BE3_NAME;
5808 	case OC_DEVICE_ID5:
5809 	case OC_DEVICE_ID6:
5810 		return OC_NAME_SH;
5811 	default:
5812 		return BE_NAME;
5813 	}
5814 }
5815 
5816 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5817 {
5818 	struct be_adapter *adapter;
5819 	struct net_device *netdev;
5820 	int status = 0;
5821 
5822 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5823 
5824 	status = pci_enable_device(pdev);
5825 	if (status)
5826 		goto do_none;
5827 
5828 	status = pci_request_regions(pdev, DRV_NAME);
5829 	if (status)
5830 		goto disable_dev;
5831 	pci_set_master(pdev);
5832 
5833 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5834 	if (!netdev) {
5835 		status = -ENOMEM;
5836 		goto rel_reg;
5837 	}
5838 	adapter = netdev_priv(netdev);
5839 	adapter->pdev = pdev;
5840 	pci_set_drvdata(pdev, adapter);
5841 	adapter->netdev = netdev;
5842 	SET_NETDEV_DEV(netdev, &pdev->dev);
5843 
5844 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5845 	if (!status) {
5846 		netdev->features |= NETIF_F_HIGHDMA;
5847 	} else {
5848 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5849 		if (status) {
5850 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5851 			goto free_netdev;
5852 		}
5853 	}
5854 
5855 	status = pci_enable_pcie_error_reporting(pdev);
5856 	if (!status)
5857 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5858 
5859 	status = be_map_pci_bars(adapter);
5860 	if (status)
5861 		goto free_netdev;
5862 
5863 	status = be_drv_init(adapter);
5864 	if (status)
5865 		goto unmap_bars;
5866 
5867 	status = be_setup(adapter);
5868 	if (status)
5869 		goto drv_cleanup;
5870 
5871 	be_netdev_init(netdev);
5872 	status = register_netdev(netdev);
5873 	if (status != 0)
5874 		goto unsetup;
5875 
5876 	be_roce_dev_add(adapter);
5877 
5878 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5879 	adapter->error_recovery.probe_time = jiffies;
5880 
5881 	/* On Die temperature not supported for VF. */
5882 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5883 		adapter->hwmon_info.hwmon_dev =
5884 			devm_hwmon_device_register_with_groups(&pdev->dev,
5885 							       DRV_NAME,
5886 							       adapter,
5887 							       be_hwmon_groups);
5888 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5889 	}
5890 
5891 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5892 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5893 
5894 	return 0;
5895 
5896 unsetup:
5897 	be_clear(adapter);
5898 drv_cleanup:
5899 	be_drv_cleanup(adapter);
5900 unmap_bars:
5901 	be_unmap_pci_bars(adapter);
5902 free_netdev:
5903 	free_netdev(netdev);
5904 rel_reg:
5905 	pci_release_regions(pdev);
5906 disable_dev:
5907 	pci_disable_device(pdev);
5908 do_none:
5909 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5910 	return status;
5911 }
5912 
5913 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5914 {
5915 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5916 
5917 	be_intr_set(adapter, false);
5918 	be_cancel_err_detection(adapter);
5919 
5920 	be_cleanup(adapter);
5921 
5922 	pci_save_state(pdev);
5923 	pci_disable_device(pdev);
5924 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5925 	return 0;
5926 }
5927 
5928 static int be_pci_resume(struct pci_dev *pdev)
5929 {
5930 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5931 	int status = 0;
5932 
5933 	status = pci_enable_device(pdev);
5934 	if (status)
5935 		return status;
5936 
5937 	pci_restore_state(pdev);
5938 
5939 	status = be_resume(adapter);
5940 	if (status)
5941 		return status;
5942 
5943 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5944 
5945 	return 0;
5946 }
5947 
5948 /*
5949  * An FLR will stop BE from DMAing any data.
5950  */
5951 static void be_shutdown(struct pci_dev *pdev)
5952 {
5953 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5954 
5955 	if (!adapter)
5956 		return;
5957 
5958 	be_roce_dev_shutdown(adapter);
5959 	cancel_delayed_work_sync(&adapter->work);
5960 	be_cancel_err_detection(adapter);
5961 
5962 	netif_device_detach(adapter->netdev);
5963 
5964 	be_cmd_reset_function(adapter);
5965 
5966 	pci_disable_device(pdev);
5967 }
5968 
5969 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5970 					    pci_channel_state_t state)
5971 {
5972 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5973 
5974 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5975 
5976 	be_roce_dev_remove(adapter);
5977 
5978 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5979 		be_set_error(adapter, BE_ERROR_EEH);
5980 
5981 		be_cancel_err_detection(adapter);
5982 
5983 		be_cleanup(adapter);
5984 	}
5985 
5986 	if (state == pci_channel_io_perm_failure)
5987 		return PCI_ERS_RESULT_DISCONNECT;
5988 
5989 	pci_disable_device(pdev);
5990 
5991 	/* The error could cause the FW to trigger a flash debug dump.
5992 	 * Resetting the card while flash dump is in progress
5993 	 * can cause it not to recover; wait for it to finish.
5994 	 * Wait only for first function as it is needed only once per
5995 	 * adapter.
5996 	 */
5997 	if (pdev->devfn == 0)
5998 		ssleep(30);
5999 
6000 	return PCI_ERS_RESULT_NEED_RESET;
6001 }
6002 
6003 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6004 {
6005 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6006 	int status;
6007 
6008 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6009 
6010 	status = pci_enable_device(pdev);
6011 	if (status)
6012 		return PCI_ERS_RESULT_DISCONNECT;
6013 
6014 	pci_set_master(pdev);
6015 	pci_restore_state(pdev);
6016 
6017 	/* Check if card is ok and fw is ready */
6018 	dev_info(&adapter->pdev->dev,
6019 		 "Waiting for FW to be ready after EEH reset\n");
6020 	status = be_fw_wait_ready(adapter);
6021 	if (status)
6022 		return PCI_ERS_RESULT_DISCONNECT;
6023 
6024 	pci_cleanup_aer_uncorrect_error_status(pdev);
6025 	be_clear_error(adapter, BE_CLEAR_ALL);
6026 	return PCI_ERS_RESULT_RECOVERED;
6027 }
6028 
6029 static void be_eeh_resume(struct pci_dev *pdev)
6030 {
6031 	int status = 0;
6032 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6033 
6034 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6035 
6036 	pci_save_state(pdev);
6037 
6038 	status = be_resume(adapter);
6039 	if (status)
6040 		goto err;
6041 
6042 	be_roce_dev_add(adapter);
6043 
6044 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6045 	return;
6046 err:
6047 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6048 }
6049 
6050 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6051 {
6052 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6053 	struct be_resources vft_res = {0};
6054 	int status;
6055 
6056 	if (!num_vfs)
6057 		be_vf_clear(adapter);
6058 
6059 	adapter->num_vfs = num_vfs;
6060 
6061 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6062 		dev_warn(&pdev->dev,
6063 			 "Cannot disable VFs while they are assigned\n");
6064 		return -EBUSY;
6065 	}
6066 
6067 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6068 	 * are equally distributed across the max-number of VFs. The user may
6069 	 * request only a subset of the max-vfs to be enabled.
6070 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6071 	 * each VF will have access to more number of resources.
6072 	 * This facility is not available in BE3 FW.
6073 	 * Also, this is done by FW in Lancer chip.
6074 	 */
6075 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6076 		be_calculate_vf_res(adapter, adapter->num_vfs,
6077 				    &vft_res);
6078 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6079 						 adapter->num_vfs, &vft_res);
6080 		if (status)
6081 			dev_err(&pdev->dev,
6082 				"Failed to optimize SR-IOV resources\n");
6083 	}
6084 
6085 	status = be_get_resources(adapter);
6086 	if (status)
6087 		return be_cmd_status(status);
6088 
6089 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6090 	rtnl_lock();
6091 	status = be_update_queues(adapter);
6092 	rtnl_unlock();
6093 	if (status)
6094 		return be_cmd_status(status);
6095 
6096 	if (adapter->num_vfs)
6097 		status = be_vf_setup(adapter);
6098 
6099 	if (!status)
6100 		return adapter->num_vfs;
6101 
6102 	return 0;
6103 }
6104 
6105 static const struct pci_error_handlers be_eeh_handlers = {
6106 	.error_detected = be_eeh_err_detected,
6107 	.slot_reset = be_eeh_reset,
6108 	.resume = be_eeh_resume,
6109 };
6110 
6111 static struct pci_driver be_driver = {
6112 	.name = DRV_NAME,
6113 	.id_table = be_dev_ids,
6114 	.probe = be_probe,
6115 	.remove = be_remove,
6116 	.suspend = be_suspend,
6117 	.resume = be_pci_resume,
6118 	.shutdown = be_shutdown,
6119 	.sriov_configure = be_pci_sriov_configure,
6120 	.err_handler = &be_eeh_handlers
6121 };
6122 
6123 static int __init be_init_module(void)
6124 {
6125 	int status;
6126 
6127 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6128 	    rx_frag_size != 2048) {
6129 		printk(KERN_WARNING DRV_NAME
6130 			" : Module param rx_frag_size must be 2048/4096/8192."
6131 			" Using 2048\n");
6132 		rx_frag_size = 2048;
6133 	}
6134 
6135 	if (num_vfs > 0) {
6136 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6137 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6138 	}
6139 
6140 	be_wq = create_singlethread_workqueue("be_wq");
6141 	if (!be_wq) {
6142 		pr_warn(DRV_NAME "workqueue creation failed\n");
6143 		return -1;
6144 	}
6145 
6146 	be_err_recovery_workq =
6147 		create_singlethread_workqueue("be_err_recover");
6148 	if (!be_err_recovery_workq)
6149 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6150 
6151 	status = pci_register_driver(&be_driver);
6152 	if (status) {
6153 		destroy_workqueue(be_wq);
6154 		be_destroy_err_recovery_workq();
6155 	}
6156 	return status;
6157 }
6158 module_init(be_init_module);
6159 
6160 static void __exit be_exit_module(void)
6161 {
6162 	pci_unregister_driver(&be_driver);
6163 
6164 	be_destroy_err_recovery_workq();
6165 
6166 	if (be_wq)
6167 		destroy_workqueue(be_wq);
6168 }
6169 module_exit(be_exit_module);
6170