1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279 			/* mac already added, skip addition */
280 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281 			return 0;
282 		}
283 	}
284 
285 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286 			       &adapter->pmac_id[0], 0);
287 }
288 
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291 	int i;
292 
293 	/* Skip deletion if the programmed mac is
294 	 * being used in uc-list
295 	 */
296 	for (i = 0; i < adapter->uc_macs; i++) {
297 		if (adapter->pmac_id[i + 1] == pmac_id)
298 			return;
299 	}
300 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302 
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305 	struct be_adapter *adapter = netdev_priv(netdev);
306 	struct device *dev = &adapter->pdev->dev;
307 	struct sockaddr *addr = p;
308 	int status;
309 	u8 mac[ETH_ALEN];
310 	u32 old_pmac_id = adapter->pmac_id[0];
311 
312 	if (!is_valid_ether_addr(addr->sa_data))
313 		return -EADDRNOTAVAIL;
314 
315 	/* Proceed further only if, User provided MAC is different
316 	 * from active MAC
317 	 */
318 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319 		return 0;
320 
321 	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322 	 * address
323 	 */
324 	if (BEx_chip(adapter) && be_virtfn(adapter) &&
325 	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
326 		return -EPERM;
327 
328 	/* if device is not running, copy MAC to netdev->dev_addr */
329 	if (!netif_running(netdev))
330 		goto done;
331 
332 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333 	 * privilege or if PF did not provision the new MAC address.
334 	 * On BE3, this cmd will always fail if the VF doesn't have the
335 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
336 	 * the MAC for the VF.
337 	 */
338 	mutex_lock(&adapter->rx_filter_lock);
339 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340 	if (!status) {
341 
342 		/* Delete the old programmed MAC. This call may fail if the
343 		 * old MAC was already deleted by the PF driver.
344 		 */
345 		if (adapter->pmac_id[0] != old_pmac_id)
346 			be_dev_mac_del(adapter, old_pmac_id);
347 	}
348 
349 	mutex_unlock(&adapter->rx_filter_lock);
350 	/* Decide if the new MAC is successfully activated only after
351 	 * querying the FW
352 	 */
353 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354 				       adapter->if_handle, true, 0);
355 	if (status)
356 		goto err;
357 
358 	/* The MAC change did not happen, either due to lack of privilege
359 	 * or PF didn't pre-provision.
360 	 */
361 	if (!ether_addr_equal(addr->sa_data, mac)) {
362 		status = -EPERM;
363 		goto err;
364 	}
365 done:
366 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
367 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
368 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
369 	return 0;
370 err:
371 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
372 	return status;
373 }
374 
375 /* BE2 supports only v0 cmd */
376 static void *hw_stats_from_cmd(struct be_adapter *adapter)
377 {
378 	if (BE2_chip(adapter)) {
379 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
380 
381 		return &cmd->hw_stats;
382 	} else if (BE3_chip(adapter)) {
383 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
384 
385 		return &cmd->hw_stats;
386 	} else {
387 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
388 
389 		return &cmd->hw_stats;
390 	}
391 }
392 
393 /* BE2 supports only v0 cmd */
394 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
395 {
396 	if (BE2_chip(adapter)) {
397 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
398 
399 		return &hw_stats->erx;
400 	} else if (BE3_chip(adapter)) {
401 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
402 
403 		return &hw_stats->erx;
404 	} else {
405 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
406 
407 		return &hw_stats->erx;
408 	}
409 }
410 
411 static void populate_be_v0_stats(struct be_adapter *adapter)
412 {
413 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
414 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
415 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
416 	struct be_port_rxf_stats_v0 *port_stats =
417 					&rxf_stats->port[adapter->port_num];
418 	struct be_drv_stats *drvs = &adapter->drv_stats;
419 
420 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
421 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
422 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
423 	drvs->rx_control_frames = port_stats->rx_control_frames;
424 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
425 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
426 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
427 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
428 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
429 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
430 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
431 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
432 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
433 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
434 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
435 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
436 	drvs->rx_dropped_header_too_small =
437 		port_stats->rx_dropped_header_too_small;
438 	drvs->rx_address_filtered =
439 					port_stats->rx_address_filtered +
440 					port_stats->rx_vlan_filtered;
441 	drvs->rx_alignment_symbol_errors =
442 		port_stats->rx_alignment_symbol_errors;
443 
444 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
445 	drvs->tx_controlframes = port_stats->tx_controlframes;
446 
447 	if (adapter->port_num)
448 		drvs->jabber_events = rxf_stats->port1_jabber_events;
449 	else
450 		drvs->jabber_events = rxf_stats->port0_jabber_events;
451 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
452 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
453 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
454 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
455 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
456 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
457 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
458 }
459 
460 static void populate_be_v1_stats(struct be_adapter *adapter)
461 {
462 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
463 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
464 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
465 	struct be_port_rxf_stats_v1 *port_stats =
466 					&rxf_stats->port[adapter->port_num];
467 	struct be_drv_stats *drvs = &adapter->drv_stats;
468 
469 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
470 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
471 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
472 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
473 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
474 	drvs->rx_control_frames = port_stats->rx_control_frames;
475 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
476 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
477 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
478 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
479 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
480 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
481 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
482 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
483 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
484 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
485 	drvs->rx_dropped_header_too_small =
486 		port_stats->rx_dropped_header_too_small;
487 	drvs->rx_input_fifo_overflow_drop =
488 		port_stats->rx_input_fifo_overflow_drop;
489 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
490 	drvs->rx_alignment_symbol_errors =
491 		port_stats->rx_alignment_symbol_errors;
492 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
493 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
494 	drvs->tx_controlframes = port_stats->tx_controlframes;
495 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
496 	drvs->jabber_events = port_stats->jabber_events;
497 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
498 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
499 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
500 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
501 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
502 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
503 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
504 }
505 
506 static void populate_be_v2_stats(struct be_adapter *adapter)
507 {
508 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
509 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
510 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
511 	struct be_port_rxf_stats_v2 *port_stats =
512 					&rxf_stats->port[adapter->port_num];
513 	struct be_drv_stats *drvs = &adapter->drv_stats;
514 
515 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
516 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
517 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
518 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
519 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
520 	drvs->rx_control_frames = port_stats->rx_control_frames;
521 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
522 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
523 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
524 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
525 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
526 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
527 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
528 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
529 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
530 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
531 	drvs->rx_dropped_header_too_small =
532 		port_stats->rx_dropped_header_too_small;
533 	drvs->rx_input_fifo_overflow_drop =
534 		port_stats->rx_input_fifo_overflow_drop;
535 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
536 	drvs->rx_alignment_symbol_errors =
537 		port_stats->rx_alignment_symbol_errors;
538 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
539 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
540 	drvs->tx_controlframes = port_stats->tx_controlframes;
541 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
542 	drvs->jabber_events = port_stats->jabber_events;
543 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
544 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
545 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
546 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
547 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
548 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
549 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
550 	if (be_roce_supported(adapter)) {
551 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
552 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
553 		drvs->rx_roce_frames = port_stats->roce_frames_received;
554 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
555 		drvs->roce_drops_payload_len =
556 			port_stats->roce_drops_payload_len;
557 	}
558 }
559 
560 static void populate_lancer_stats(struct be_adapter *adapter)
561 {
562 	struct be_drv_stats *drvs = &adapter->drv_stats;
563 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
564 
565 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
566 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
567 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
568 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
569 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
570 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
571 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
572 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
573 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
574 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
575 	drvs->rx_dropped_tcp_length =
576 				pport_stats->rx_dropped_invalid_tcp_length;
577 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
578 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
579 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
580 	drvs->rx_dropped_header_too_small =
581 				pport_stats->rx_dropped_header_too_small;
582 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
583 	drvs->rx_address_filtered =
584 					pport_stats->rx_address_filtered +
585 					pport_stats->rx_vlan_filtered;
586 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
587 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
589 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
590 	drvs->jabber_events = pport_stats->rx_jabbers;
591 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
592 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
593 	drvs->rx_drops_too_many_frags =
594 				pport_stats->rx_drops_too_many_frags_lo;
595 }
596 
597 static void accumulate_16bit_val(u32 *acc, u16 val)
598 {
599 #define lo(x)			(x & 0xFFFF)
600 #define hi(x)			(x & 0xFFFF0000)
601 	bool wrapped = val < lo(*acc);
602 	u32 newacc = hi(*acc) + val;
603 
604 	if (wrapped)
605 		newacc += 65536;
606 	ACCESS_ONCE(*acc) = newacc;
607 }
608 
609 static void populate_erx_stats(struct be_adapter *adapter,
610 			       struct be_rx_obj *rxo, u32 erx_stat)
611 {
612 	if (!BEx_chip(adapter))
613 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
614 	else
615 		/* below erx HW counter can actually wrap around after
616 		 * 65535. Driver accumulates a 32-bit value
617 		 */
618 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
619 				     (u16)erx_stat);
620 }
621 
622 void be_parse_stats(struct be_adapter *adapter)
623 {
624 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
625 	struct be_rx_obj *rxo;
626 	int i;
627 	u32 erx_stat;
628 
629 	if (lancer_chip(adapter)) {
630 		populate_lancer_stats(adapter);
631 	} else {
632 		if (BE2_chip(adapter))
633 			populate_be_v0_stats(adapter);
634 		else if (BE3_chip(adapter))
635 			/* for BE3 */
636 			populate_be_v1_stats(adapter);
637 		else
638 			populate_be_v2_stats(adapter);
639 
640 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
641 		for_all_rx_queues(adapter, rxo, i) {
642 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
643 			populate_erx_stats(adapter, rxo, erx_stat);
644 		}
645 	}
646 }
647 
648 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
649 						struct rtnl_link_stats64 *stats)
650 {
651 	struct be_adapter *adapter = netdev_priv(netdev);
652 	struct be_drv_stats *drvs = &adapter->drv_stats;
653 	struct be_rx_obj *rxo;
654 	struct be_tx_obj *txo;
655 	u64 pkts, bytes;
656 	unsigned int start;
657 	int i;
658 
659 	for_all_rx_queues(adapter, rxo, i) {
660 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
661 
662 		do {
663 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
664 			pkts = rx_stats(rxo)->rx_pkts;
665 			bytes = rx_stats(rxo)->rx_bytes;
666 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
667 		stats->rx_packets += pkts;
668 		stats->rx_bytes += bytes;
669 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
670 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
671 					rx_stats(rxo)->rx_drops_no_frags;
672 	}
673 
674 	for_all_tx_queues(adapter, txo, i) {
675 		const struct be_tx_stats *tx_stats = tx_stats(txo);
676 
677 		do {
678 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
679 			pkts = tx_stats(txo)->tx_pkts;
680 			bytes = tx_stats(txo)->tx_bytes;
681 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
682 		stats->tx_packets += pkts;
683 		stats->tx_bytes += bytes;
684 	}
685 
686 	/* bad pkts received */
687 	stats->rx_errors = drvs->rx_crc_errors +
688 		drvs->rx_alignment_symbol_errors +
689 		drvs->rx_in_range_errors +
690 		drvs->rx_out_range_errors +
691 		drvs->rx_frame_too_long +
692 		drvs->rx_dropped_too_small +
693 		drvs->rx_dropped_too_short +
694 		drvs->rx_dropped_header_too_small +
695 		drvs->rx_dropped_tcp_length +
696 		drvs->rx_dropped_runt;
697 
698 	/* detailed rx errors */
699 	stats->rx_length_errors = drvs->rx_in_range_errors +
700 		drvs->rx_out_range_errors +
701 		drvs->rx_frame_too_long;
702 
703 	stats->rx_crc_errors = drvs->rx_crc_errors;
704 
705 	/* frame alignment errors */
706 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
707 
708 	/* receiver fifo overrun */
709 	/* drops_no_pbuf is no per i/f, it's per BE card */
710 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
711 				drvs->rx_input_fifo_overflow_drop +
712 				drvs->rx_drops_no_pbuf;
713 	return stats;
714 }
715 
716 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
717 {
718 	struct net_device *netdev = adapter->netdev;
719 
720 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
721 		netif_carrier_off(netdev);
722 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
723 	}
724 
725 	if (link_status)
726 		netif_carrier_on(netdev);
727 	else
728 		netif_carrier_off(netdev);
729 
730 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
731 }
732 
733 static int be_gso_hdr_len(struct sk_buff *skb)
734 {
735 	if (skb->encapsulation)
736 		return skb_inner_transport_offset(skb) +
737 		       inner_tcp_hdrlen(skb);
738 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
739 }
740 
741 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
742 {
743 	struct be_tx_stats *stats = tx_stats(txo);
744 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
745 	/* Account for headers which get duplicated in TSO pkt */
746 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
747 
748 	u64_stats_update_begin(&stats->sync);
749 	stats->tx_reqs++;
750 	stats->tx_bytes += skb->len + dup_hdr_len;
751 	stats->tx_pkts += tx_pkts;
752 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
753 		stats->tx_vxlan_offload_pkts += tx_pkts;
754 	u64_stats_update_end(&stats->sync);
755 }
756 
757 /* Returns number of WRBs needed for the skb */
758 static u32 skb_wrb_cnt(struct sk_buff *skb)
759 {
760 	/* +1 for the header wrb */
761 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
762 }
763 
764 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
765 {
766 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
767 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
768 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
769 	wrb->rsvd0 = 0;
770 }
771 
772 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
773  * to avoid the swap and shift/mask operations in wrb_fill().
774  */
775 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
776 {
777 	wrb->frag_pa_hi = 0;
778 	wrb->frag_pa_lo = 0;
779 	wrb->frag_len = 0;
780 	wrb->rsvd0 = 0;
781 }
782 
783 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
784 				     struct sk_buff *skb)
785 {
786 	u8 vlan_prio;
787 	u16 vlan_tag;
788 
789 	vlan_tag = skb_vlan_tag_get(skb);
790 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
791 	/* If vlan priority provided by OS is NOT in available bmap */
792 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
793 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
794 				adapter->recommended_prio_bits;
795 
796 	return vlan_tag;
797 }
798 
799 /* Used only for IP tunnel packets */
800 static u16 skb_inner_ip_proto(struct sk_buff *skb)
801 {
802 	return (inner_ip_hdr(skb)->version == 4) ?
803 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
804 }
805 
806 static u16 skb_ip_proto(struct sk_buff *skb)
807 {
808 	return (ip_hdr(skb)->version == 4) ?
809 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
810 }
811 
812 static inline bool be_is_txq_full(struct be_tx_obj *txo)
813 {
814 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
815 }
816 
817 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
818 {
819 	return atomic_read(&txo->q.used) < txo->q.len / 2;
820 }
821 
822 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
823 {
824 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
825 }
826 
827 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
828 				       struct sk_buff *skb,
829 				       struct be_wrb_params *wrb_params)
830 {
831 	u16 proto;
832 
833 	if (skb_is_gso(skb)) {
834 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
835 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
836 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
837 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
838 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
839 		if (skb->encapsulation) {
840 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
841 			proto = skb_inner_ip_proto(skb);
842 		} else {
843 			proto = skb_ip_proto(skb);
844 		}
845 		if (proto == IPPROTO_TCP)
846 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
847 		else if (proto == IPPROTO_UDP)
848 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
849 	}
850 
851 	if (skb_vlan_tag_present(skb)) {
852 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
853 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
854 	}
855 
856 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
857 }
858 
859 static void wrb_fill_hdr(struct be_adapter *adapter,
860 			 struct be_eth_hdr_wrb *hdr,
861 			 struct be_wrb_params *wrb_params,
862 			 struct sk_buff *skb)
863 {
864 	memset(hdr, 0, sizeof(*hdr));
865 
866 	SET_TX_WRB_HDR_BITS(crc, hdr,
867 			    BE_WRB_F_GET(wrb_params->features, CRC));
868 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
869 			    BE_WRB_F_GET(wrb_params->features, IPCS));
870 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
871 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
872 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
873 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
874 
875 	SET_TX_WRB_HDR_BITS(lso, hdr,
876 			    BE_WRB_F_GET(wrb_params->features, LSO));
877 	SET_TX_WRB_HDR_BITS(lso6, hdr,
878 			    BE_WRB_F_GET(wrb_params->features, LSO6));
879 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
880 
881 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
882 	 * hack is not needed, the evt bit is set while ringing DB.
883 	 */
884 	SET_TX_WRB_HDR_BITS(event, hdr,
885 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
886 	SET_TX_WRB_HDR_BITS(vlan, hdr,
887 			    BE_WRB_F_GET(wrb_params->features, VLAN));
888 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
889 
890 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
891 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
892 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
893 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
894 }
895 
896 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
897 			  bool unmap_single)
898 {
899 	dma_addr_t dma;
900 	u32 frag_len = le32_to_cpu(wrb->frag_len);
901 
902 
903 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
904 		(u64)le32_to_cpu(wrb->frag_pa_lo);
905 	if (frag_len) {
906 		if (unmap_single)
907 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
908 		else
909 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
910 	}
911 }
912 
913 /* Grab a WRB header for xmit */
914 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
915 {
916 	u32 head = txo->q.head;
917 
918 	queue_head_inc(&txo->q);
919 	return head;
920 }
921 
922 /* Set up the WRB header for xmit */
923 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
924 				struct be_tx_obj *txo,
925 				struct be_wrb_params *wrb_params,
926 				struct sk_buff *skb, u16 head)
927 {
928 	u32 num_frags = skb_wrb_cnt(skb);
929 	struct be_queue_info *txq = &txo->q;
930 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
931 
932 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
933 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
934 
935 	BUG_ON(txo->sent_skb_list[head]);
936 	txo->sent_skb_list[head] = skb;
937 	txo->last_req_hdr = head;
938 	atomic_add(num_frags, &txq->used);
939 	txo->last_req_wrb_cnt = num_frags;
940 	txo->pend_wrb_cnt += num_frags;
941 }
942 
943 /* Setup a WRB fragment (buffer descriptor) for xmit */
944 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
945 				 int len)
946 {
947 	struct be_eth_wrb *wrb;
948 	struct be_queue_info *txq = &txo->q;
949 
950 	wrb = queue_head_node(txq);
951 	wrb_fill(wrb, busaddr, len);
952 	queue_head_inc(txq);
953 }
954 
955 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
956  * was invoked. The producer index is restored to the previous packet and the
957  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
958  */
959 static void be_xmit_restore(struct be_adapter *adapter,
960 			    struct be_tx_obj *txo, u32 head, bool map_single,
961 			    u32 copied)
962 {
963 	struct device *dev;
964 	struct be_eth_wrb *wrb;
965 	struct be_queue_info *txq = &txo->q;
966 
967 	dev = &adapter->pdev->dev;
968 	txq->head = head;
969 
970 	/* skip the first wrb (hdr); it's not mapped */
971 	queue_head_inc(txq);
972 	while (copied) {
973 		wrb = queue_head_node(txq);
974 		unmap_tx_frag(dev, wrb, map_single);
975 		map_single = false;
976 		copied -= le32_to_cpu(wrb->frag_len);
977 		queue_head_inc(txq);
978 	}
979 
980 	txq->head = head;
981 }
982 
983 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
984  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
985  * of WRBs used up by the packet.
986  */
987 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
988 			   struct sk_buff *skb,
989 			   struct be_wrb_params *wrb_params)
990 {
991 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
992 	struct device *dev = &adapter->pdev->dev;
993 	struct be_queue_info *txq = &txo->q;
994 	bool map_single = false;
995 	u32 head = txq->head;
996 	dma_addr_t busaddr;
997 	int len;
998 
999 	head = be_tx_get_wrb_hdr(txo);
1000 
1001 	if (skb->len > skb->data_len) {
1002 		len = skb_headlen(skb);
1003 
1004 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1005 		if (dma_mapping_error(dev, busaddr))
1006 			goto dma_err;
1007 		map_single = true;
1008 		be_tx_setup_wrb_frag(txo, busaddr, len);
1009 		copied += len;
1010 	}
1011 
1012 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1013 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1014 		len = skb_frag_size(frag);
1015 
1016 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1017 		if (dma_mapping_error(dev, busaddr))
1018 			goto dma_err;
1019 		be_tx_setup_wrb_frag(txo, busaddr, len);
1020 		copied += len;
1021 	}
1022 
1023 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1024 
1025 	be_tx_stats_update(txo, skb);
1026 	return wrb_cnt;
1027 
1028 dma_err:
1029 	adapter->drv_stats.dma_map_errors++;
1030 	be_xmit_restore(adapter, txo, head, map_single, copied);
1031 	return 0;
1032 }
1033 
1034 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1035 {
1036 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1037 }
1038 
1039 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1040 					     struct sk_buff *skb,
1041 					     struct be_wrb_params
1042 					     *wrb_params)
1043 {
1044 	u16 vlan_tag = 0;
1045 
1046 	skb = skb_share_check(skb, GFP_ATOMIC);
1047 	if (unlikely(!skb))
1048 		return skb;
1049 
1050 	if (skb_vlan_tag_present(skb))
1051 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1052 
1053 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1054 		if (!vlan_tag)
1055 			vlan_tag = adapter->pvid;
1056 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1057 		 * skip VLAN insertion
1058 		 */
1059 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1060 	}
1061 
1062 	if (vlan_tag) {
1063 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1064 						vlan_tag);
1065 		if (unlikely(!skb))
1066 			return skb;
1067 		skb->vlan_tci = 0;
1068 	}
1069 
1070 	/* Insert the outer VLAN, if any */
1071 	if (adapter->qnq_vid) {
1072 		vlan_tag = adapter->qnq_vid;
1073 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1074 						vlan_tag);
1075 		if (unlikely(!skb))
1076 			return skb;
1077 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1078 	}
1079 
1080 	return skb;
1081 }
1082 
1083 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1084 {
1085 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1086 	u16 offset = ETH_HLEN;
1087 
1088 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1089 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1090 
1091 		offset += sizeof(struct ipv6hdr);
1092 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1093 		    ip6h->nexthdr != NEXTHDR_UDP) {
1094 			struct ipv6_opt_hdr *ehdr =
1095 				(struct ipv6_opt_hdr *)(skb->data + offset);
1096 
1097 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1098 			if (ehdr->hdrlen == 0xff)
1099 				return true;
1100 		}
1101 	}
1102 	return false;
1103 }
1104 
1105 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1106 {
1107 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1108 }
1109 
1110 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1111 {
1112 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1113 }
1114 
1115 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1116 						  struct sk_buff *skb,
1117 						  struct be_wrb_params
1118 						  *wrb_params)
1119 {
1120 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1121 	unsigned int eth_hdr_len;
1122 	struct iphdr *ip;
1123 
1124 	/* For padded packets, BE HW modifies tot_len field in IP header
1125 	 * incorrecly when VLAN tag is inserted by HW.
1126 	 * For padded packets, Lancer computes incorrect checksum.
1127 	 */
1128 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1129 						VLAN_ETH_HLEN : ETH_HLEN;
1130 	if (skb->len <= 60 &&
1131 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1132 	    is_ipv4_pkt(skb)) {
1133 		ip = (struct iphdr *)ip_hdr(skb);
1134 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1135 	}
1136 
1137 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1138 	 * tagging in pvid-tagging mode
1139 	 */
1140 	if (be_pvid_tagging_enabled(adapter) &&
1141 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1142 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1143 
1144 	/* HW has a bug wherein it will calculate CSUM for VLAN
1145 	 * pkts even though it is disabled.
1146 	 * Manually insert VLAN in pkt.
1147 	 */
1148 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1149 	    skb_vlan_tag_present(skb)) {
1150 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1151 		if (unlikely(!skb))
1152 			goto err;
1153 	}
1154 
1155 	/* HW may lockup when VLAN HW tagging is requested on
1156 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1157 	 * skip HW tagging is not enabled by FW.
1158 	 */
1159 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1160 		     (adapter->pvid || adapter->qnq_vid) &&
1161 		     !qnq_async_evt_rcvd(adapter)))
1162 		goto tx_drop;
1163 
1164 	/* Manual VLAN tag insertion to prevent:
1165 	 * ASIC lockup when the ASIC inserts VLAN tag into
1166 	 * certain ipv6 packets. Insert VLAN tags in driver,
1167 	 * and set event, completion, vlan bits accordingly
1168 	 * in the Tx WRB.
1169 	 */
1170 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1171 	    be_vlan_tag_tx_chk(adapter, skb)) {
1172 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1173 		if (unlikely(!skb))
1174 			goto err;
1175 	}
1176 
1177 	return skb;
1178 tx_drop:
1179 	dev_kfree_skb_any(skb);
1180 err:
1181 	return NULL;
1182 }
1183 
1184 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1185 					   struct sk_buff *skb,
1186 					   struct be_wrb_params *wrb_params)
1187 {
1188 	int err;
1189 
1190 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1191 	 * packets that are 32b or less may cause a transmit stall
1192 	 * on that port. The workaround is to pad such packets
1193 	 * (len <= 32 bytes) to a minimum length of 36b.
1194 	 */
1195 	if (skb->len <= 32) {
1196 		if (skb_put_padto(skb, 36))
1197 			return NULL;
1198 	}
1199 
1200 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1201 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1202 		if (!skb)
1203 			return NULL;
1204 	}
1205 
1206 	/* The stack can send us skbs with length greater than
1207 	 * what the HW can handle. Trim the extra bytes.
1208 	 */
1209 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1210 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1211 	WARN_ON(err);
1212 
1213 	return skb;
1214 }
1215 
1216 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1217 {
1218 	struct be_queue_info *txq = &txo->q;
1219 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1220 
1221 	/* Mark the last request eventable if it hasn't been marked already */
1222 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1223 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1224 
1225 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1226 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1227 		wrb_fill_dummy(queue_head_node(txq));
1228 		queue_head_inc(txq);
1229 		atomic_inc(&txq->used);
1230 		txo->pend_wrb_cnt++;
1231 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1232 					   TX_HDR_WRB_NUM_SHIFT);
1233 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1234 					  TX_HDR_WRB_NUM_SHIFT);
1235 	}
1236 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1237 	txo->pend_wrb_cnt = 0;
1238 }
1239 
1240 /* OS2BMC related */
1241 
1242 #define DHCP_CLIENT_PORT	68
1243 #define DHCP_SERVER_PORT	67
1244 #define NET_BIOS_PORT1		137
1245 #define NET_BIOS_PORT2		138
1246 #define DHCPV6_RAS_PORT		547
1247 
1248 #define is_mc_allowed_on_bmc(adapter, eh)	\
1249 	(!is_multicast_filt_enabled(adapter) &&	\
1250 	 is_multicast_ether_addr(eh->h_dest) &&	\
1251 	 !is_broadcast_ether_addr(eh->h_dest))
1252 
1253 #define is_bc_allowed_on_bmc(adapter, eh)	\
1254 	(!is_broadcast_filt_enabled(adapter) &&	\
1255 	 is_broadcast_ether_addr(eh->h_dest))
1256 
1257 #define is_arp_allowed_on_bmc(adapter, skb)	\
1258 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1259 
1260 #define is_broadcast_packet(eh, adapter)	\
1261 		(is_multicast_ether_addr(eh->h_dest) && \
1262 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1263 
1264 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1265 
1266 #define is_arp_filt_enabled(adapter)	\
1267 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1268 
1269 #define is_dhcp_client_filt_enabled(adapter)	\
1270 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1271 
1272 #define is_dhcp_srvr_filt_enabled(adapter)	\
1273 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1274 
1275 #define is_nbios_filt_enabled(adapter)	\
1276 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1277 
1278 #define is_ipv6_na_filt_enabled(adapter)	\
1279 		(adapter->bmc_filt_mask &	\
1280 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1281 
1282 #define is_ipv6_ra_filt_enabled(adapter)	\
1283 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1284 
1285 #define is_ipv6_ras_filt_enabled(adapter)	\
1286 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1287 
1288 #define is_broadcast_filt_enabled(adapter)	\
1289 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1290 
1291 #define is_multicast_filt_enabled(adapter)	\
1292 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1293 
1294 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1295 			       struct sk_buff **skb)
1296 {
1297 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1298 	bool os2bmc = false;
1299 
1300 	if (!be_is_os2bmc_enabled(adapter))
1301 		goto done;
1302 
1303 	if (!is_multicast_ether_addr(eh->h_dest))
1304 		goto done;
1305 
1306 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1307 	    is_bc_allowed_on_bmc(adapter, eh) ||
1308 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1309 		os2bmc = true;
1310 		goto done;
1311 	}
1312 
1313 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1314 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1315 		u8 nexthdr = hdr->nexthdr;
1316 
1317 		if (nexthdr == IPPROTO_ICMPV6) {
1318 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1319 
1320 			switch (icmp6->icmp6_type) {
1321 			case NDISC_ROUTER_ADVERTISEMENT:
1322 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1323 				goto done;
1324 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1325 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1326 				goto done;
1327 			default:
1328 				break;
1329 			}
1330 		}
1331 	}
1332 
1333 	if (is_udp_pkt((*skb))) {
1334 		struct udphdr *udp = udp_hdr((*skb));
1335 
1336 		switch (ntohs(udp->dest)) {
1337 		case DHCP_CLIENT_PORT:
1338 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1339 			goto done;
1340 		case DHCP_SERVER_PORT:
1341 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1342 			goto done;
1343 		case NET_BIOS_PORT1:
1344 		case NET_BIOS_PORT2:
1345 			os2bmc = is_nbios_filt_enabled(adapter);
1346 			goto done;
1347 		case DHCPV6_RAS_PORT:
1348 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1349 			goto done;
1350 		default:
1351 			break;
1352 		}
1353 	}
1354 done:
1355 	/* For packets over a vlan, which are destined
1356 	 * to BMC, asic expects the vlan to be inline in the packet.
1357 	 */
1358 	if (os2bmc)
1359 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1360 
1361 	return os2bmc;
1362 }
1363 
1364 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1365 {
1366 	struct be_adapter *adapter = netdev_priv(netdev);
1367 	u16 q_idx = skb_get_queue_mapping(skb);
1368 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1369 	struct be_wrb_params wrb_params = { 0 };
1370 	bool flush = !skb->xmit_more;
1371 	u16 wrb_cnt;
1372 
1373 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1374 	if (unlikely(!skb))
1375 		goto drop;
1376 
1377 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1378 
1379 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1380 	if (unlikely(!wrb_cnt)) {
1381 		dev_kfree_skb_any(skb);
1382 		goto drop;
1383 	}
1384 
1385 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1386 	 * enqueue the pkt a 2nd time with mgmt bit set.
1387 	 */
1388 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1389 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1390 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1391 		if (unlikely(!wrb_cnt))
1392 			goto drop;
1393 		else
1394 			skb_get(skb);
1395 	}
1396 
1397 	if (be_is_txq_full(txo)) {
1398 		netif_stop_subqueue(netdev, q_idx);
1399 		tx_stats(txo)->tx_stops++;
1400 	}
1401 
1402 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1403 		be_xmit_flush(adapter, txo);
1404 
1405 	return NETDEV_TX_OK;
1406 drop:
1407 	tx_stats(txo)->tx_drv_drops++;
1408 	/* Flush the already enqueued tx requests */
1409 	if (flush && txo->pend_wrb_cnt)
1410 		be_xmit_flush(adapter, txo);
1411 
1412 	return NETDEV_TX_OK;
1413 }
1414 
1415 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1416 {
1417 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1418 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1419 }
1420 
1421 static int be_set_vlan_promisc(struct be_adapter *adapter)
1422 {
1423 	struct device *dev = &adapter->pdev->dev;
1424 	int status;
1425 
1426 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1427 		return 0;
1428 
1429 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1430 	if (!status) {
1431 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1432 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1433 	} else {
1434 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1435 	}
1436 	return status;
1437 }
1438 
1439 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1440 {
1441 	struct device *dev = &adapter->pdev->dev;
1442 	int status;
1443 
1444 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1445 	if (!status) {
1446 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1447 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1448 	}
1449 	return status;
1450 }
1451 
1452 /*
1453  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1454  * If the user configures more, place BE in vlan promiscuous mode.
1455  */
1456 static int be_vid_config(struct be_adapter *adapter)
1457 {
1458 	struct device *dev = &adapter->pdev->dev;
1459 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1460 	u16 num = 0, i = 0;
1461 	int status = 0;
1462 
1463 	/* No need to change the VLAN state if the I/F is in promiscuous */
1464 	if (adapter->netdev->flags & IFF_PROMISC)
1465 		return 0;
1466 
1467 	if (adapter->vlans_added > be_max_vlans(adapter))
1468 		return be_set_vlan_promisc(adapter);
1469 
1470 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1471 		status = be_clear_vlan_promisc(adapter);
1472 		if (status)
1473 			return status;
1474 	}
1475 	/* Construct VLAN Table to give to HW */
1476 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1477 		vids[num++] = cpu_to_le16(i);
1478 
1479 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1480 	if (status) {
1481 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1482 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1483 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1484 		    addl_status(status) ==
1485 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1486 			return be_set_vlan_promisc(adapter);
1487 	}
1488 	return status;
1489 }
1490 
1491 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1492 {
1493 	struct be_adapter *adapter = netdev_priv(netdev);
1494 	int status = 0;
1495 
1496 	mutex_lock(&adapter->rx_filter_lock);
1497 
1498 	/* Packets with VID 0 are always received by Lancer by default */
1499 	if (lancer_chip(adapter) && vid == 0)
1500 		goto done;
1501 
1502 	if (test_bit(vid, adapter->vids))
1503 		goto done;
1504 
1505 	set_bit(vid, adapter->vids);
1506 	adapter->vlans_added++;
1507 
1508 	status = be_vid_config(adapter);
1509 done:
1510 	mutex_unlock(&adapter->rx_filter_lock);
1511 	return status;
1512 }
1513 
1514 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1515 {
1516 	struct be_adapter *adapter = netdev_priv(netdev);
1517 	int status = 0;
1518 
1519 	mutex_lock(&adapter->rx_filter_lock);
1520 
1521 	/* Packets with VID 0 are always received by Lancer by default */
1522 	if (lancer_chip(adapter) && vid == 0)
1523 		goto done;
1524 
1525 	if (!test_bit(vid, adapter->vids))
1526 		goto done;
1527 
1528 	clear_bit(vid, adapter->vids);
1529 	adapter->vlans_added--;
1530 
1531 	status = be_vid_config(adapter);
1532 done:
1533 	mutex_unlock(&adapter->rx_filter_lock);
1534 	return status;
1535 }
1536 
1537 static void be_set_all_promisc(struct be_adapter *adapter)
1538 {
1539 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1540 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1541 }
1542 
1543 static void be_set_mc_promisc(struct be_adapter *adapter)
1544 {
1545 	int status;
1546 
1547 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1548 		return;
1549 
1550 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1551 	if (!status)
1552 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1553 }
1554 
1555 static void be_set_uc_promisc(struct be_adapter *adapter)
1556 {
1557 	int status;
1558 
1559 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1560 		return;
1561 
1562 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1563 	if (!status)
1564 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1565 }
1566 
1567 static void be_clear_uc_promisc(struct be_adapter *adapter)
1568 {
1569 	int status;
1570 
1571 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1572 		return;
1573 
1574 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1575 	if (!status)
1576 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1577 }
1578 
1579 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1580  * We use a single callback function for both sync and unsync. We really don't
1581  * add/remove addresses through this callback. But, we use it to detect changes
1582  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1583  */
1584 static int be_uc_list_update(struct net_device *netdev,
1585 			     const unsigned char *addr)
1586 {
1587 	struct be_adapter *adapter = netdev_priv(netdev);
1588 
1589 	adapter->update_uc_list = true;
1590 	return 0;
1591 }
1592 
1593 static int be_mc_list_update(struct net_device *netdev,
1594 			     const unsigned char *addr)
1595 {
1596 	struct be_adapter *adapter = netdev_priv(netdev);
1597 
1598 	adapter->update_mc_list = true;
1599 	return 0;
1600 }
1601 
1602 static void be_set_mc_list(struct be_adapter *adapter)
1603 {
1604 	struct net_device *netdev = adapter->netdev;
1605 	struct netdev_hw_addr *ha;
1606 	bool mc_promisc = false;
1607 	int status;
1608 
1609 	netif_addr_lock_bh(netdev);
1610 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1611 
1612 	if (netdev->flags & IFF_PROMISC) {
1613 		adapter->update_mc_list = false;
1614 	} else if (netdev->flags & IFF_ALLMULTI ||
1615 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1616 		/* Enable multicast promisc if num configured exceeds
1617 		 * what we support
1618 		 */
1619 		mc_promisc = true;
1620 		adapter->update_mc_list = false;
1621 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1622 		/* Update mc-list unconditionally if the iface was previously
1623 		 * in mc-promisc mode and now is out of that mode.
1624 		 */
1625 		adapter->update_mc_list = true;
1626 	}
1627 
1628 	if (adapter->update_mc_list) {
1629 		int i = 0;
1630 
1631 		/* cache the mc-list in adapter */
1632 		netdev_for_each_mc_addr(ha, netdev) {
1633 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1634 			i++;
1635 		}
1636 		adapter->mc_count = netdev_mc_count(netdev);
1637 	}
1638 	netif_addr_unlock_bh(netdev);
1639 
1640 	if (mc_promisc) {
1641 		be_set_mc_promisc(adapter);
1642 	} else if (adapter->update_mc_list) {
1643 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1644 		if (!status)
1645 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1646 		else
1647 			be_set_mc_promisc(adapter);
1648 
1649 		adapter->update_mc_list = false;
1650 	}
1651 }
1652 
1653 static void be_clear_mc_list(struct be_adapter *adapter)
1654 {
1655 	struct net_device *netdev = adapter->netdev;
1656 
1657 	__dev_mc_unsync(netdev, NULL);
1658 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1659 	adapter->mc_count = 0;
1660 }
1661 
1662 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1663 {
1664 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1665 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1666 		return 0;
1667 	}
1668 
1669 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1670 			       adapter->if_handle,
1671 			       &adapter->pmac_id[uc_idx + 1], 0);
1672 }
1673 
1674 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1675 {
1676 	if (pmac_id == adapter->pmac_id[0])
1677 		return;
1678 
1679 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1680 }
1681 
1682 static void be_set_uc_list(struct be_adapter *adapter)
1683 {
1684 	struct net_device *netdev = adapter->netdev;
1685 	struct netdev_hw_addr *ha;
1686 	bool uc_promisc = false;
1687 	int curr_uc_macs = 0, i;
1688 
1689 	netif_addr_lock_bh(netdev);
1690 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1691 
1692 	if (netdev->flags & IFF_PROMISC) {
1693 		adapter->update_uc_list = false;
1694 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1695 		uc_promisc = true;
1696 		adapter->update_uc_list = false;
1697 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1698 		/* Update uc-list unconditionally if the iface was previously
1699 		 * in uc-promisc mode and now is out of that mode.
1700 		 */
1701 		adapter->update_uc_list = true;
1702 	}
1703 
1704 	if (adapter->update_uc_list) {
1705 		/* cache the uc-list in adapter array */
1706 		i = 0;
1707 		netdev_for_each_uc_addr(ha, netdev) {
1708 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1709 			i++;
1710 		}
1711 		curr_uc_macs = netdev_uc_count(netdev);
1712 	}
1713 	netif_addr_unlock_bh(netdev);
1714 
1715 	if (uc_promisc) {
1716 		be_set_uc_promisc(adapter);
1717 	} else if (adapter->update_uc_list) {
1718 		be_clear_uc_promisc(adapter);
1719 
1720 		for (i = 0; i < adapter->uc_macs; i++)
1721 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1722 
1723 		for (i = 0; i < curr_uc_macs; i++)
1724 			be_uc_mac_add(adapter, i);
1725 		adapter->uc_macs = curr_uc_macs;
1726 		adapter->update_uc_list = false;
1727 	}
1728 }
1729 
1730 static void be_clear_uc_list(struct be_adapter *adapter)
1731 {
1732 	struct net_device *netdev = adapter->netdev;
1733 	int i;
1734 
1735 	__dev_uc_unsync(netdev, NULL);
1736 	for (i = 0; i < adapter->uc_macs; i++)
1737 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1738 
1739 	adapter->uc_macs = 0;
1740 }
1741 
1742 static void __be_set_rx_mode(struct be_adapter *adapter)
1743 {
1744 	struct net_device *netdev = adapter->netdev;
1745 
1746 	mutex_lock(&adapter->rx_filter_lock);
1747 
1748 	if (netdev->flags & IFF_PROMISC) {
1749 		if (!be_in_all_promisc(adapter))
1750 			be_set_all_promisc(adapter);
1751 	} else if (be_in_all_promisc(adapter)) {
1752 		/* We need to re-program the vlan-list or clear
1753 		 * vlan-promisc mode (if needed) when the interface
1754 		 * comes out of promisc mode.
1755 		 */
1756 		be_vid_config(adapter);
1757 	}
1758 
1759 	be_set_uc_list(adapter);
1760 	be_set_mc_list(adapter);
1761 
1762 	mutex_unlock(&adapter->rx_filter_lock);
1763 }
1764 
1765 static void be_work_set_rx_mode(struct work_struct *work)
1766 {
1767 	struct be_cmd_work *cmd_work =
1768 				container_of(work, struct be_cmd_work, work);
1769 
1770 	__be_set_rx_mode(cmd_work->adapter);
1771 	kfree(cmd_work);
1772 }
1773 
1774 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1775 {
1776 	struct be_adapter *adapter = netdev_priv(netdev);
1777 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1778 	int status;
1779 
1780 	if (!sriov_enabled(adapter))
1781 		return -EPERM;
1782 
1783 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1784 		return -EINVAL;
1785 
1786 	/* Proceed further only if user provided MAC is different
1787 	 * from active MAC
1788 	 */
1789 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1790 		return 0;
1791 
1792 	if (BEx_chip(adapter)) {
1793 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1794 				vf + 1);
1795 
1796 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1797 					 &vf_cfg->pmac_id, vf + 1);
1798 	} else {
1799 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1800 					vf + 1);
1801 	}
1802 
1803 	if (status) {
1804 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1805 			mac, vf, status);
1806 		return be_cmd_status(status);
1807 	}
1808 
1809 	ether_addr_copy(vf_cfg->mac_addr, mac);
1810 
1811 	return 0;
1812 }
1813 
1814 static int be_get_vf_config(struct net_device *netdev, int vf,
1815 			    struct ifla_vf_info *vi)
1816 {
1817 	struct be_adapter *adapter = netdev_priv(netdev);
1818 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1819 
1820 	if (!sriov_enabled(adapter))
1821 		return -EPERM;
1822 
1823 	if (vf >= adapter->num_vfs)
1824 		return -EINVAL;
1825 
1826 	vi->vf = vf;
1827 	vi->max_tx_rate = vf_cfg->tx_rate;
1828 	vi->min_tx_rate = 0;
1829 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1830 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1831 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1832 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1833 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1834 
1835 	return 0;
1836 }
1837 
1838 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1839 {
1840 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1841 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1842 	int vf_if_id = vf_cfg->if_handle;
1843 	int status;
1844 
1845 	/* Enable Transparent VLAN Tagging */
1846 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1847 	if (status)
1848 		return status;
1849 
1850 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1851 	vids[0] = 0;
1852 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1853 	if (!status)
1854 		dev_info(&adapter->pdev->dev,
1855 			 "Cleared guest VLANs on VF%d", vf);
1856 
1857 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1858 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1859 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1860 						  ~BE_PRIV_FILTMGMT, vf + 1);
1861 		if (!status)
1862 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1863 	}
1864 	return 0;
1865 }
1866 
1867 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1868 {
1869 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1870 	struct device *dev = &adapter->pdev->dev;
1871 	int status;
1872 
1873 	/* Reset Transparent VLAN Tagging. */
1874 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1875 				       vf_cfg->if_handle, 0, 0);
1876 	if (status)
1877 		return status;
1878 
1879 	/* Allow VFs to program VLAN filtering */
1880 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1881 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1882 						  BE_PRIV_FILTMGMT, vf + 1);
1883 		if (!status) {
1884 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1885 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1886 		}
1887 	}
1888 
1889 	dev_info(dev,
1890 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1891 	return 0;
1892 }
1893 
1894 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1895 			  __be16 vlan_proto)
1896 {
1897 	struct be_adapter *adapter = netdev_priv(netdev);
1898 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1899 	int status;
1900 
1901 	if (!sriov_enabled(adapter))
1902 		return -EPERM;
1903 
1904 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1905 		return -EINVAL;
1906 
1907 	if (vlan_proto != htons(ETH_P_8021Q))
1908 		return -EPROTONOSUPPORT;
1909 
1910 	if (vlan || qos) {
1911 		vlan |= qos << VLAN_PRIO_SHIFT;
1912 		status = be_set_vf_tvt(adapter, vf, vlan);
1913 	} else {
1914 		status = be_clear_vf_tvt(adapter, vf);
1915 	}
1916 
1917 	if (status) {
1918 		dev_err(&adapter->pdev->dev,
1919 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1920 			status);
1921 		return be_cmd_status(status);
1922 	}
1923 
1924 	vf_cfg->vlan_tag = vlan;
1925 	return 0;
1926 }
1927 
1928 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1929 			     int min_tx_rate, int max_tx_rate)
1930 {
1931 	struct be_adapter *adapter = netdev_priv(netdev);
1932 	struct device *dev = &adapter->pdev->dev;
1933 	int percent_rate, status = 0;
1934 	u16 link_speed = 0;
1935 	u8 link_status;
1936 
1937 	if (!sriov_enabled(adapter))
1938 		return -EPERM;
1939 
1940 	if (vf >= adapter->num_vfs)
1941 		return -EINVAL;
1942 
1943 	if (min_tx_rate)
1944 		return -EINVAL;
1945 
1946 	if (!max_tx_rate)
1947 		goto config_qos;
1948 
1949 	status = be_cmd_link_status_query(adapter, &link_speed,
1950 					  &link_status, 0);
1951 	if (status)
1952 		goto err;
1953 
1954 	if (!link_status) {
1955 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1956 		status = -ENETDOWN;
1957 		goto err;
1958 	}
1959 
1960 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1961 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1962 			link_speed);
1963 		status = -EINVAL;
1964 		goto err;
1965 	}
1966 
1967 	/* On Skyhawk the QOS setting must be done only as a % value */
1968 	percent_rate = link_speed / 100;
1969 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1970 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1971 			percent_rate);
1972 		status = -EINVAL;
1973 		goto err;
1974 	}
1975 
1976 config_qos:
1977 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1978 	if (status)
1979 		goto err;
1980 
1981 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1982 	return 0;
1983 
1984 err:
1985 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1986 		max_tx_rate, vf);
1987 	return be_cmd_status(status);
1988 }
1989 
1990 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1991 				int link_state)
1992 {
1993 	struct be_adapter *adapter = netdev_priv(netdev);
1994 	int status;
1995 
1996 	if (!sriov_enabled(adapter))
1997 		return -EPERM;
1998 
1999 	if (vf >= adapter->num_vfs)
2000 		return -EINVAL;
2001 
2002 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2003 	if (status) {
2004 		dev_err(&adapter->pdev->dev,
2005 			"Link state change on VF %d failed: %#x\n", vf, status);
2006 		return be_cmd_status(status);
2007 	}
2008 
2009 	adapter->vf_cfg[vf].plink_tracking = link_state;
2010 
2011 	return 0;
2012 }
2013 
2014 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2015 {
2016 	struct be_adapter *adapter = netdev_priv(netdev);
2017 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2018 	u8 spoofchk;
2019 	int status;
2020 
2021 	if (!sriov_enabled(adapter))
2022 		return -EPERM;
2023 
2024 	if (vf >= adapter->num_vfs)
2025 		return -EINVAL;
2026 
2027 	if (BEx_chip(adapter))
2028 		return -EOPNOTSUPP;
2029 
2030 	if (enable == vf_cfg->spoofchk)
2031 		return 0;
2032 
2033 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2034 
2035 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2036 				       0, spoofchk);
2037 	if (status) {
2038 		dev_err(&adapter->pdev->dev,
2039 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2040 		return be_cmd_status(status);
2041 	}
2042 
2043 	vf_cfg->spoofchk = enable;
2044 	return 0;
2045 }
2046 
2047 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2048 			  ulong now)
2049 {
2050 	aic->rx_pkts_prev = rx_pkts;
2051 	aic->tx_reqs_prev = tx_pkts;
2052 	aic->jiffies = now;
2053 }
2054 
2055 static int be_get_new_eqd(struct be_eq_obj *eqo)
2056 {
2057 	struct be_adapter *adapter = eqo->adapter;
2058 	int eqd, start;
2059 	struct be_aic_obj *aic;
2060 	struct be_rx_obj *rxo;
2061 	struct be_tx_obj *txo;
2062 	u64 rx_pkts = 0, tx_pkts = 0;
2063 	ulong now;
2064 	u32 pps, delta;
2065 	int i;
2066 
2067 	aic = &adapter->aic_obj[eqo->idx];
2068 	if (!aic->enable) {
2069 		if (aic->jiffies)
2070 			aic->jiffies = 0;
2071 		eqd = aic->et_eqd;
2072 		return eqd;
2073 	}
2074 
2075 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2076 		do {
2077 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2078 			rx_pkts += rxo->stats.rx_pkts;
2079 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2080 	}
2081 
2082 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2083 		do {
2084 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2085 			tx_pkts += txo->stats.tx_reqs;
2086 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2087 	}
2088 
2089 	/* Skip, if wrapped around or first calculation */
2090 	now = jiffies;
2091 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2092 	    rx_pkts < aic->rx_pkts_prev ||
2093 	    tx_pkts < aic->tx_reqs_prev) {
2094 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2095 		return aic->prev_eqd;
2096 	}
2097 
2098 	delta = jiffies_to_msecs(now - aic->jiffies);
2099 	if (delta == 0)
2100 		return aic->prev_eqd;
2101 
2102 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2103 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2104 	eqd = (pps / 15000) << 2;
2105 
2106 	if (eqd < 8)
2107 		eqd = 0;
2108 	eqd = min_t(u32, eqd, aic->max_eqd);
2109 	eqd = max_t(u32, eqd, aic->min_eqd);
2110 
2111 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2112 
2113 	return eqd;
2114 }
2115 
2116 /* For Skyhawk-R only */
2117 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2118 {
2119 	struct be_adapter *adapter = eqo->adapter;
2120 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2121 	ulong now = jiffies;
2122 	int eqd;
2123 	u32 mult_enc;
2124 
2125 	if (!aic->enable)
2126 		return 0;
2127 
2128 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2129 		eqd = aic->prev_eqd;
2130 	else
2131 		eqd = be_get_new_eqd(eqo);
2132 
2133 	if (eqd > 100)
2134 		mult_enc = R2I_DLY_ENC_1;
2135 	else if (eqd > 60)
2136 		mult_enc = R2I_DLY_ENC_2;
2137 	else if (eqd > 20)
2138 		mult_enc = R2I_DLY_ENC_3;
2139 	else
2140 		mult_enc = R2I_DLY_ENC_0;
2141 
2142 	aic->prev_eqd = eqd;
2143 
2144 	return mult_enc;
2145 }
2146 
2147 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2148 {
2149 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2150 	struct be_aic_obj *aic;
2151 	struct be_eq_obj *eqo;
2152 	int i, num = 0, eqd;
2153 
2154 	for_all_evt_queues(adapter, eqo, i) {
2155 		aic = &adapter->aic_obj[eqo->idx];
2156 		eqd = be_get_new_eqd(eqo);
2157 		if (force_update || eqd != aic->prev_eqd) {
2158 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2159 			set_eqd[num].eq_id = eqo->q.id;
2160 			aic->prev_eqd = eqd;
2161 			num++;
2162 		}
2163 	}
2164 
2165 	if (num)
2166 		be_cmd_modify_eqd(adapter, set_eqd, num);
2167 }
2168 
2169 static void be_rx_stats_update(struct be_rx_obj *rxo,
2170 			       struct be_rx_compl_info *rxcp)
2171 {
2172 	struct be_rx_stats *stats = rx_stats(rxo);
2173 
2174 	u64_stats_update_begin(&stats->sync);
2175 	stats->rx_compl++;
2176 	stats->rx_bytes += rxcp->pkt_size;
2177 	stats->rx_pkts++;
2178 	if (rxcp->tunneled)
2179 		stats->rx_vxlan_offload_pkts++;
2180 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2181 		stats->rx_mcast_pkts++;
2182 	if (rxcp->err)
2183 		stats->rx_compl_err++;
2184 	u64_stats_update_end(&stats->sync);
2185 }
2186 
2187 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2188 {
2189 	/* L4 checksum is not reliable for non TCP/UDP packets.
2190 	 * Also ignore ipcksm for ipv6 pkts
2191 	 */
2192 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2193 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2194 }
2195 
2196 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2197 {
2198 	struct be_adapter *adapter = rxo->adapter;
2199 	struct be_rx_page_info *rx_page_info;
2200 	struct be_queue_info *rxq = &rxo->q;
2201 	u32 frag_idx = rxq->tail;
2202 
2203 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2204 	BUG_ON(!rx_page_info->page);
2205 
2206 	if (rx_page_info->last_frag) {
2207 		dma_unmap_page(&adapter->pdev->dev,
2208 			       dma_unmap_addr(rx_page_info, bus),
2209 			       adapter->big_page_size, DMA_FROM_DEVICE);
2210 		rx_page_info->last_frag = false;
2211 	} else {
2212 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2213 					dma_unmap_addr(rx_page_info, bus),
2214 					rx_frag_size, DMA_FROM_DEVICE);
2215 	}
2216 
2217 	queue_tail_inc(rxq);
2218 	atomic_dec(&rxq->used);
2219 	return rx_page_info;
2220 }
2221 
2222 /* Throwaway the data in the Rx completion */
2223 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2224 				struct be_rx_compl_info *rxcp)
2225 {
2226 	struct be_rx_page_info *page_info;
2227 	u16 i, num_rcvd = rxcp->num_rcvd;
2228 
2229 	for (i = 0; i < num_rcvd; i++) {
2230 		page_info = get_rx_page_info(rxo);
2231 		put_page(page_info->page);
2232 		memset(page_info, 0, sizeof(*page_info));
2233 	}
2234 }
2235 
2236 /*
2237  * skb_fill_rx_data forms a complete skb for an ether frame
2238  * indicated by rxcp.
2239  */
2240 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2241 			     struct be_rx_compl_info *rxcp)
2242 {
2243 	struct be_rx_page_info *page_info;
2244 	u16 i, j;
2245 	u16 hdr_len, curr_frag_len, remaining;
2246 	u8 *start;
2247 
2248 	page_info = get_rx_page_info(rxo);
2249 	start = page_address(page_info->page) + page_info->page_offset;
2250 	prefetch(start);
2251 
2252 	/* Copy data in the first descriptor of this completion */
2253 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2254 
2255 	skb->len = curr_frag_len;
2256 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2257 		memcpy(skb->data, start, curr_frag_len);
2258 		/* Complete packet has now been moved to data */
2259 		put_page(page_info->page);
2260 		skb->data_len = 0;
2261 		skb->tail += curr_frag_len;
2262 	} else {
2263 		hdr_len = ETH_HLEN;
2264 		memcpy(skb->data, start, hdr_len);
2265 		skb_shinfo(skb)->nr_frags = 1;
2266 		skb_frag_set_page(skb, 0, page_info->page);
2267 		skb_shinfo(skb)->frags[0].page_offset =
2268 					page_info->page_offset + hdr_len;
2269 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2270 				  curr_frag_len - hdr_len);
2271 		skb->data_len = curr_frag_len - hdr_len;
2272 		skb->truesize += rx_frag_size;
2273 		skb->tail += hdr_len;
2274 	}
2275 	page_info->page = NULL;
2276 
2277 	if (rxcp->pkt_size <= rx_frag_size) {
2278 		BUG_ON(rxcp->num_rcvd != 1);
2279 		return;
2280 	}
2281 
2282 	/* More frags present for this completion */
2283 	remaining = rxcp->pkt_size - curr_frag_len;
2284 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2285 		page_info = get_rx_page_info(rxo);
2286 		curr_frag_len = min(remaining, rx_frag_size);
2287 
2288 		/* Coalesce all frags from the same physical page in one slot */
2289 		if (page_info->page_offset == 0) {
2290 			/* Fresh page */
2291 			j++;
2292 			skb_frag_set_page(skb, j, page_info->page);
2293 			skb_shinfo(skb)->frags[j].page_offset =
2294 							page_info->page_offset;
2295 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2296 			skb_shinfo(skb)->nr_frags++;
2297 		} else {
2298 			put_page(page_info->page);
2299 		}
2300 
2301 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2302 		skb->len += curr_frag_len;
2303 		skb->data_len += curr_frag_len;
2304 		skb->truesize += rx_frag_size;
2305 		remaining -= curr_frag_len;
2306 		page_info->page = NULL;
2307 	}
2308 	BUG_ON(j > MAX_SKB_FRAGS);
2309 }
2310 
2311 /* Process the RX completion indicated by rxcp when GRO is disabled */
2312 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2313 				struct be_rx_compl_info *rxcp)
2314 {
2315 	struct be_adapter *adapter = rxo->adapter;
2316 	struct net_device *netdev = adapter->netdev;
2317 	struct sk_buff *skb;
2318 
2319 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2320 	if (unlikely(!skb)) {
2321 		rx_stats(rxo)->rx_drops_no_skbs++;
2322 		be_rx_compl_discard(rxo, rxcp);
2323 		return;
2324 	}
2325 
2326 	skb_fill_rx_data(rxo, skb, rxcp);
2327 
2328 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2329 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2330 	else
2331 		skb_checksum_none_assert(skb);
2332 
2333 	skb->protocol = eth_type_trans(skb, netdev);
2334 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2335 	if (netdev->features & NETIF_F_RXHASH)
2336 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2337 
2338 	skb->csum_level = rxcp->tunneled;
2339 	skb_mark_napi_id(skb, napi);
2340 
2341 	if (rxcp->vlanf)
2342 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2343 
2344 	netif_receive_skb(skb);
2345 }
2346 
2347 /* Process the RX completion indicated by rxcp when GRO is enabled */
2348 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2349 				    struct napi_struct *napi,
2350 				    struct be_rx_compl_info *rxcp)
2351 {
2352 	struct be_adapter *adapter = rxo->adapter;
2353 	struct be_rx_page_info *page_info;
2354 	struct sk_buff *skb = NULL;
2355 	u16 remaining, curr_frag_len;
2356 	u16 i, j;
2357 
2358 	skb = napi_get_frags(napi);
2359 	if (!skb) {
2360 		be_rx_compl_discard(rxo, rxcp);
2361 		return;
2362 	}
2363 
2364 	remaining = rxcp->pkt_size;
2365 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2366 		page_info = get_rx_page_info(rxo);
2367 
2368 		curr_frag_len = min(remaining, rx_frag_size);
2369 
2370 		/* Coalesce all frags from the same physical page in one slot */
2371 		if (i == 0 || page_info->page_offset == 0) {
2372 			/* First frag or Fresh page */
2373 			j++;
2374 			skb_frag_set_page(skb, j, page_info->page);
2375 			skb_shinfo(skb)->frags[j].page_offset =
2376 							page_info->page_offset;
2377 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378 		} else {
2379 			put_page(page_info->page);
2380 		}
2381 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2382 		skb->truesize += rx_frag_size;
2383 		remaining -= curr_frag_len;
2384 		memset(page_info, 0, sizeof(*page_info));
2385 	}
2386 	BUG_ON(j > MAX_SKB_FRAGS);
2387 
2388 	skb_shinfo(skb)->nr_frags = j + 1;
2389 	skb->len = rxcp->pkt_size;
2390 	skb->data_len = rxcp->pkt_size;
2391 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2392 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2393 	if (adapter->netdev->features & NETIF_F_RXHASH)
2394 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2395 
2396 	skb->csum_level = rxcp->tunneled;
2397 
2398 	if (rxcp->vlanf)
2399 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2400 
2401 	napi_gro_frags(napi);
2402 }
2403 
2404 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2405 				 struct be_rx_compl_info *rxcp)
2406 {
2407 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2408 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2409 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2410 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2411 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2412 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2413 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2414 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2415 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2416 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2417 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2418 	if (rxcp->vlanf) {
2419 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2420 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2421 	}
2422 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2423 	rxcp->tunneled =
2424 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2425 }
2426 
2427 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2428 				 struct be_rx_compl_info *rxcp)
2429 {
2430 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2431 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2432 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2433 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2434 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2435 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2436 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2437 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2438 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2439 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2440 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2441 	if (rxcp->vlanf) {
2442 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2443 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2444 	}
2445 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2446 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2447 }
2448 
2449 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2450 {
2451 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2452 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2453 	struct be_adapter *adapter = rxo->adapter;
2454 
2455 	/* For checking the valid bit it is Ok to use either definition as the
2456 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2457 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2458 		return NULL;
2459 
2460 	rmb();
2461 	be_dws_le_to_cpu(compl, sizeof(*compl));
2462 
2463 	if (adapter->be3_native)
2464 		be_parse_rx_compl_v1(compl, rxcp);
2465 	else
2466 		be_parse_rx_compl_v0(compl, rxcp);
2467 
2468 	if (rxcp->ip_frag)
2469 		rxcp->l4_csum = 0;
2470 
2471 	if (rxcp->vlanf) {
2472 		/* In QNQ modes, if qnq bit is not set, then the packet was
2473 		 * tagged only with the transparent outer vlan-tag and must
2474 		 * not be treated as a vlan packet by host
2475 		 */
2476 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2477 			rxcp->vlanf = 0;
2478 
2479 		if (!lancer_chip(adapter))
2480 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2481 
2482 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2483 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2484 			rxcp->vlanf = 0;
2485 	}
2486 
2487 	/* As the compl has been parsed, reset it; we wont touch it again */
2488 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2489 
2490 	queue_tail_inc(&rxo->cq);
2491 	return rxcp;
2492 }
2493 
2494 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2495 {
2496 	u32 order = get_order(size);
2497 
2498 	if (order > 0)
2499 		gfp |= __GFP_COMP;
2500 	return  alloc_pages(gfp, order);
2501 }
2502 
2503 /*
2504  * Allocate a page, split it to fragments of size rx_frag_size and post as
2505  * receive buffers to BE
2506  */
2507 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2508 {
2509 	struct be_adapter *adapter = rxo->adapter;
2510 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2511 	struct be_queue_info *rxq = &rxo->q;
2512 	struct page *pagep = NULL;
2513 	struct device *dev = &adapter->pdev->dev;
2514 	struct be_eth_rx_d *rxd;
2515 	u64 page_dmaaddr = 0, frag_dmaaddr;
2516 	u32 posted, page_offset = 0, notify = 0;
2517 
2518 	page_info = &rxo->page_info_tbl[rxq->head];
2519 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2520 		if (!pagep) {
2521 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2522 			if (unlikely(!pagep)) {
2523 				rx_stats(rxo)->rx_post_fail++;
2524 				break;
2525 			}
2526 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2527 						    adapter->big_page_size,
2528 						    DMA_FROM_DEVICE);
2529 			if (dma_mapping_error(dev, page_dmaaddr)) {
2530 				put_page(pagep);
2531 				pagep = NULL;
2532 				adapter->drv_stats.dma_map_errors++;
2533 				break;
2534 			}
2535 			page_offset = 0;
2536 		} else {
2537 			get_page(pagep);
2538 			page_offset += rx_frag_size;
2539 		}
2540 		page_info->page_offset = page_offset;
2541 		page_info->page = pagep;
2542 
2543 		rxd = queue_head_node(rxq);
2544 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2545 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2546 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2547 
2548 		/* Any space left in the current big page for another frag? */
2549 		if ((page_offset + rx_frag_size + rx_frag_size) >
2550 					adapter->big_page_size) {
2551 			pagep = NULL;
2552 			page_info->last_frag = true;
2553 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2554 		} else {
2555 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2556 		}
2557 
2558 		prev_page_info = page_info;
2559 		queue_head_inc(rxq);
2560 		page_info = &rxo->page_info_tbl[rxq->head];
2561 	}
2562 
2563 	/* Mark the last frag of a page when we break out of the above loop
2564 	 * with no more slots available in the RXQ
2565 	 */
2566 	if (pagep) {
2567 		prev_page_info->last_frag = true;
2568 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2569 	}
2570 
2571 	if (posted) {
2572 		atomic_add(posted, &rxq->used);
2573 		if (rxo->rx_post_starved)
2574 			rxo->rx_post_starved = false;
2575 		do {
2576 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2577 			be_rxq_notify(adapter, rxq->id, notify);
2578 			posted -= notify;
2579 		} while (posted);
2580 	} else if (atomic_read(&rxq->used) == 0) {
2581 		/* Let be_worker replenish when memory is available */
2582 		rxo->rx_post_starved = true;
2583 	}
2584 }
2585 
2586 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2587 {
2588 	struct be_queue_info *tx_cq = &txo->cq;
2589 	struct be_tx_compl_info *txcp = &txo->txcp;
2590 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2591 
2592 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2593 		return NULL;
2594 
2595 	/* Ensure load ordering of valid bit dword and other dwords below */
2596 	rmb();
2597 	be_dws_le_to_cpu(compl, sizeof(*compl));
2598 
2599 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2600 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2601 
2602 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2603 	queue_tail_inc(tx_cq);
2604 	return txcp;
2605 }
2606 
2607 static u16 be_tx_compl_process(struct be_adapter *adapter,
2608 			       struct be_tx_obj *txo, u16 last_index)
2609 {
2610 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2611 	struct be_queue_info *txq = &txo->q;
2612 	struct sk_buff *skb = NULL;
2613 	bool unmap_skb_hdr = false;
2614 	struct be_eth_wrb *wrb;
2615 	u16 num_wrbs = 0;
2616 	u32 frag_index;
2617 
2618 	do {
2619 		if (sent_skbs[txq->tail]) {
2620 			/* Free skb from prev req */
2621 			if (skb)
2622 				dev_consume_skb_any(skb);
2623 			skb = sent_skbs[txq->tail];
2624 			sent_skbs[txq->tail] = NULL;
2625 			queue_tail_inc(txq);  /* skip hdr wrb */
2626 			num_wrbs++;
2627 			unmap_skb_hdr = true;
2628 		}
2629 		wrb = queue_tail_node(txq);
2630 		frag_index = txq->tail;
2631 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2632 			      (unmap_skb_hdr && skb_headlen(skb)));
2633 		unmap_skb_hdr = false;
2634 		queue_tail_inc(txq);
2635 		num_wrbs++;
2636 	} while (frag_index != last_index);
2637 	dev_consume_skb_any(skb);
2638 
2639 	return num_wrbs;
2640 }
2641 
2642 /* Return the number of events in the event queue */
2643 static inline int events_get(struct be_eq_obj *eqo)
2644 {
2645 	struct be_eq_entry *eqe;
2646 	int num = 0;
2647 
2648 	do {
2649 		eqe = queue_tail_node(&eqo->q);
2650 		if (eqe->evt == 0)
2651 			break;
2652 
2653 		rmb();
2654 		eqe->evt = 0;
2655 		num++;
2656 		queue_tail_inc(&eqo->q);
2657 	} while (true);
2658 
2659 	return num;
2660 }
2661 
2662 /* Leaves the EQ is disarmed state */
2663 static void be_eq_clean(struct be_eq_obj *eqo)
2664 {
2665 	int num = events_get(eqo);
2666 
2667 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2668 }
2669 
2670 /* Free posted rx buffers that were not used */
2671 static void be_rxq_clean(struct be_rx_obj *rxo)
2672 {
2673 	struct be_queue_info *rxq = &rxo->q;
2674 	struct be_rx_page_info *page_info;
2675 
2676 	while (atomic_read(&rxq->used) > 0) {
2677 		page_info = get_rx_page_info(rxo);
2678 		put_page(page_info->page);
2679 		memset(page_info, 0, sizeof(*page_info));
2680 	}
2681 	BUG_ON(atomic_read(&rxq->used));
2682 	rxq->tail = 0;
2683 	rxq->head = 0;
2684 }
2685 
2686 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2687 {
2688 	struct be_queue_info *rx_cq = &rxo->cq;
2689 	struct be_rx_compl_info *rxcp;
2690 	struct be_adapter *adapter = rxo->adapter;
2691 	int flush_wait = 0;
2692 
2693 	/* Consume pending rx completions.
2694 	 * Wait for the flush completion (identified by zero num_rcvd)
2695 	 * to arrive. Notify CQ even when there are no more CQ entries
2696 	 * for HW to flush partially coalesced CQ entries.
2697 	 * In Lancer, there is no need to wait for flush compl.
2698 	 */
2699 	for (;;) {
2700 		rxcp = be_rx_compl_get(rxo);
2701 		if (!rxcp) {
2702 			if (lancer_chip(adapter))
2703 				break;
2704 
2705 			if (flush_wait++ > 50 ||
2706 			    be_check_error(adapter,
2707 					   BE_ERROR_HW)) {
2708 				dev_warn(&adapter->pdev->dev,
2709 					 "did not receive flush compl\n");
2710 				break;
2711 			}
2712 			be_cq_notify(adapter, rx_cq->id, true, 0);
2713 			mdelay(1);
2714 		} else {
2715 			be_rx_compl_discard(rxo, rxcp);
2716 			be_cq_notify(adapter, rx_cq->id, false, 1);
2717 			if (rxcp->num_rcvd == 0)
2718 				break;
2719 		}
2720 	}
2721 
2722 	/* After cleanup, leave the CQ in unarmed state */
2723 	be_cq_notify(adapter, rx_cq->id, false, 0);
2724 }
2725 
2726 static void be_tx_compl_clean(struct be_adapter *adapter)
2727 {
2728 	struct device *dev = &adapter->pdev->dev;
2729 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2730 	struct be_tx_compl_info *txcp;
2731 	struct be_queue_info *txq;
2732 	u32 end_idx, notified_idx;
2733 	struct be_tx_obj *txo;
2734 	int i, pending_txqs;
2735 
2736 	/* Stop polling for compls when HW has been silent for 10ms */
2737 	do {
2738 		pending_txqs = adapter->num_tx_qs;
2739 
2740 		for_all_tx_queues(adapter, txo, i) {
2741 			cmpl = 0;
2742 			num_wrbs = 0;
2743 			txq = &txo->q;
2744 			while ((txcp = be_tx_compl_get(txo))) {
2745 				num_wrbs +=
2746 					be_tx_compl_process(adapter, txo,
2747 							    txcp->end_index);
2748 				cmpl++;
2749 			}
2750 			if (cmpl) {
2751 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2752 				atomic_sub(num_wrbs, &txq->used);
2753 				timeo = 0;
2754 			}
2755 			if (!be_is_tx_compl_pending(txo))
2756 				pending_txqs--;
2757 		}
2758 
2759 		if (pending_txqs == 0 || ++timeo > 10 ||
2760 		    be_check_error(adapter, BE_ERROR_HW))
2761 			break;
2762 
2763 		mdelay(1);
2764 	} while (true);
2765 
2766 	/* Free enqueued TX that was never notified to HW */
2767 	for_all_tx_queues(adapter, txo, i) {
2768 		txq = &txo->q;
2769 
2770 		if (atomic_read(&txq->used)) {
2771 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2772 				 i, atomic_read(&txq->used));
2773 			notified_idx = txq->tail;
2774 			end_idx = txq->tail;
2775 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2776 				  txq->len);
2777 			/* Use the tx-compl process logic to handle requests
2778 			 * that were not sent to the HW.
2779 			 */
2780 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2781 			atomic_sub(num_wrbs, &txq->used);
2782 			BUG_ON(atomic_read(&txq->used));
2783 			txo->pend_wrb_cnt = 0;
2784 			/* Since hw was never notified of these requests,
2785 			 * reset TXQ indices
2786 			 */
2787 			txq->head = notified_idx;
2788 			txq->tail = notified_idx;
2789 		}
2790 	}
2791 }
2792 
2793 static void be_evt_queues_destroy(struct be_adapter *adapter)
2794 {
2795 	struct be_eq_obj *eqo;
2796 	int i;
2797 
2798 	for_all_evt_queues(adapter, eqo, i) {
2799 		if (eqo->q.created) {
2800 			be_eq_clean(eqo);
2801 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2802 			netif_napi_del(&eqo->napi);
2803 			free_cpumask_var(eqo->affinity_mask);
2804 		}
2805 		be_queue_free(adapter, &eqo->q);
2806 	}
2807 }
2808 
2809 static int be_evt_queues_create(struct be_adapter *adapter)
2810 {
2811 	struct be_queue_info *eq;
2812 	struct be_eq_obj *eqo;
2813 	struct be_aic_obj *aic;
2814 	int i, rc;
2815 
2816 	/* need enough EQs to service both RX and TX queues */
2817 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2818 				    max(adapter->cfg_num_rx_irqs,
2819 					adapter->cfg_num_tx_irqs));
2820 
2821 	for_all_evt_queues(adapter, eqo, i) {
2822 		int numa_node = dev_to_node(&adapter->pdev->dev);
2823 
2824 		aic = &adapter->aic_obj[i];
2825 		eqo->adapter = adapter;
2826 		eqo->idx = i;
2827 		aic->max_eqd = BE_MAX_EQD;
2828 		aic->enable = true;
2829 
2830 		eq = &eqo->q;
2831 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2832 				    sizeof(struct be_eq_entry));
2833 		if (rc)
2834 			return rc;
2835 
2836 		rc = be_cmd_eq_create(adapter, eqo);
2837 		if (rc)
2838 			return rc;
2839 
2840 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2841 			return -ENOMEM;
2842 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2843 				eqo->affinity_mask);
2844 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2845 			       BE_NAPI_WEIGHT);
2846 	}
2847 	return 0;
2848 }
2849 
2850 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2851 {
2852 	struct be_queue_info *q;
2853 
2854 	q = &adapter->mcc_obj.q;
2855 	if (q->created)
2856 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2857 	be_queue_free(adapter, q);
2858 
2859 	q = &adapter->mcc_obj.cq;
2860 	if (q->created)
2861 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2862 	be_queue_free(adapter, q);
2863 }
2864 
2865 /* Must be called only after TX qs are created as MCC shares TX EQ */
2866 static int be_mcc_queues_create(struct be_adapter *adapter)
2867 {
2868 	struct be_queue_info *q, *cq;
2869 
2870 	cq = &adapter->mcc_obj.cq;
2871 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2872 			   sizeof(struct be_mcc_compl)))
2873 		goto err;
2874 
2875 	/* Use the default EQ for MCC completions */
2876 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2877 		goto mcc_cq_free;
2878 
2879 	q = &adapter->mcc_obj.q;
2880 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2881 		goto mcc_cq_destroy;
2882 
2883 	if (be_cmd_mccq_create(adapter, q, cq))
2884 		goto mcc_q_free;
2885 
2886 	return 0;
2887 
2888 mcc_q_free:
2889 	be_queue_free(adapter, q);
2890 mcc_cq_destroy:
2891 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2892 mcc_cq_free:
2893 	be_queue_free(adapter, cq);
2894 err:
2895 	return -1;
2896 }
2897 
2898 static void be_tx_queues_destroy(struct be_adapter *adapter)
2899 {
2900 	struct be_queue_info *q;
2901 	struct be_tx_obj *txo;
2902 	u8 i;
2903 
2904 	for_all_tx_queues(adapter, txo, i) {
2905 		q = &txo->q;
2906 		if (q->created)
2907 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2908 		be_queue_free(adapter, q);
2909 
2910 		q = &txo->cq;
2911 		if (q->created)
2912 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2913 		be_queue_free(adapter, q);
2914 	}
2915 }
2916 
2917 static int be_tx_qs_create(struct be_adapter *adapter)
2918 {
2919 	struct be_queue_info *cq;
2920 	struct be_tx_obj *txo;
2921 	struct be_eq_obj *eqo;
2922 	int status, i;
2923 
2924 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2925 
2926 	for_all_tx_queues(adapter, txo, i) {
2927 		cq = &txo->cq;
2928 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2929 					sizeof(struct be_eth_tx_compl));
2930 		if (status)
2931 			return status;
2932 
2933 		u64_stats_init(&txo->stats.sync);
2934 		u64_stats_init(&txo->stats.sync_compl);
2935 
2936 		/* If num_evt_qs is less than num_tx_qs, then more than
2937 		 * one txq share an eq
2938 		 */
2939 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2940 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2941 		if (status)
2942 			return status;
2943 
2944 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2945 					sizeof(struct be_eth_wrb));
2946 		if (status)
2947 			return status;
2948 
2949 		status = be_cmd_txq_create(adapter, txo);
2950 		if (status)
2951 			return status;
2952 
2953 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2954 				    eqo->idx);
2955 	}
2956 
2957 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2958 		 adapter->num_tx_qs);
2959 	return 0;
2960 }
2961 
2962 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2963 {
2964 	struct be_queue_info *q;
2965 	struct be_rx_obj *rxo;
2966 	int i;
2967 
2968 	for_all_rx_queues(adapter, rxo, i) {
2969 		q = &rxo->cq;
2970 		if (q->created)
2971 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2972 		be_queue_free(adapter, q);
2973 	}
2974 }
2975 
2976 static int be_rx_cqs_create(struct be_adapter *adapter)
2977 {
2978 	struct be_queue_info *eq, *cq;
2979 	struct be_rx_obj *rxo;
2980 	int rc, i;
2981 
2982 	adapter->num_rss_qs =
2983 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2984 
2985 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2986 	if (adapter->num_rss_qs < 2)
2987 		adapter->num_rss_qs = 0;
2988 
2989 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2990 
2991 	/* When the interface is not capable of RSS rings (and there is no
2992 	 * need to create a default RXQ) we'll still need one RXQ
2993 	 */
2994 	if (adapter->num_rx_qs == 0)
2995 		adapter->num_rx_qs = 1;
2996 
2997 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2998 	for_all_rx_queues(adapter, rxo, i) {
2999 		rxo->adapter = adapter;
3000 		cq = &rxo->cq;
3001 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3002 				    sizeof(struct be_eth_rx_compl));
3003 		if (rc)
3004 			return rc;
3005 
3006 		u64_stats_init(&rxo->stats.sync);
3007 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3008 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3009 		if (rc)
3010 			return rc;
3011 	}
3012 
3013 	dev_info(&adapter->pdev->dev,
3014 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3015 	return 0;
3016 }
3017 
3018 static irqreturn_t be_intx(int irq, void *dev)
3019 {
3020 	struct be_eq_obj *eqo = dev;
3021 	struct be_adapter *adapter = eqo->adapter;
3022 	int num_evts = 0;
3023 
3024 	/* IRQ is not expected when NAPI is scheduled as the EQ
3025 	 * will not be armed.
3026 	 * But, this can happen on Lancer INTx where it takes
3027 	 * a while to de-assert INTx or in BE2 where occasionaly
3028 	 * an interrupt may be raised even when EQ is unarmed.
3029 	 * If NAPI is already scheduled, then counting & notifying
3030 	 * events will orphan them.
3031 	 */
3032 	if (napi_schedule_prep(&eqo->napi)) {
3033 		num_evts = events_get(eqo);
3034 		__napi_schedule(&eqo->napi);
3035 		if (num_evts)
3036 			eqo->spurious_intr = 0;
3037 	}
3038 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3039 
3040 	/* Return IRQ_HANDLED only for the the first spurious intr
3041 	 * after a valid intr to stop the kernel from branding
3042 	 * this irq as a bad one!
3043 	 */
3044 	if (num_evts || eqo->spurious_intr++ == 0)
3045 		return IRQ_HANDLED;
3046 	else
3047 		return IRQ_NONE;
3048 }
3049 
3050 static irqreturn_t be_msix(int irq, void *dev)
3051 {
3052 	struct be_eq_obj *eqo = dev;
3053 
3054 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3055 	napi_schedule(&eqo->napi);
3056 	return IRQ_HANDLED;
3057 }
3058 
3059 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3060 {
3061 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3062 }
3063 
3064 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3065 			 int budget, int polling)
3066 {
3067 	struct be_adapter *adapter = rxo->adapter;
3068 	struct be_queue_info *rx_cq = &rxo->cq;
3069 	struct be_rx_compl_info *rxcp;
3070 	u32 work_done;
3071 	u32 frags_consumed = 0;
3072 
3073 	for (work_done = 0; work_done < budget; work_done++) {
3074 		rxcp = be_rx_compl_get(rxo);
3075 		if (!rxcp)
3076 			break;
3077 
3078 		/* Is it a flush compl that has no data */
3079 		if (unlikely(rxcp->num_rcvd == 0))
3080 			goto loop_continue;
3081 
3082 		/* Discard compl with partial DMA Lancer B0 */
3083 		if (unlikely(!rxcp->pkt_size)) {
3084 			be_rx_compl_discard(rxo, rxcp);
3085 			goto loop_continue;
3086 		}
3087 
3088 		/* On BE drop pkts that arrive due to imperfect filtering in
3089 		 * promiscuous mode on some skews
3090 		 */
3091 		if (unlikely(rxcp->port != adapter->port_num &&
3092 			     !lancer_chip(adapter))) {
3093 			be_rx_compl_discard(rxo, rxcp);
3094 			goto loop_continue;
3095 		}
3096 
3097 		/* Don't do gro when we're busy_polling */
3098 		if (do_gro(rxcp) && polling != BUSY_POLLING)
3099 			be_rx_compl_process_gro(rxo, napi, rxcp);
3100 		else
3101 			be_rx_compl_process(rxo, napi, rxcp);
3102 
3103 loop_continue:
3104 		frags_consumed += rxcp->num_rcvd;
3105 		be_rx_stats_update(rxo, rxcp);
3106 	}
3107 
3108 	if (work_done) {
3109 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3110 
3111 		/* When an rx-obj gets into post_starved state, just
3112 		 * let be_worker do the posting.
3113 		 */
3114 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115 		    !rxo->rx_post_starved)
3116 			be_post_rx_frags(rxo, GFP_ATOMIC,
3117 					 max_t(u32, MAX_RX_POST,
3118 					       frags_consumed));
3119 	}
3120 
3121 	return work_done;
3122 }
3123 
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126 	switch (status) {
3127 	case BE_TX_COMP_HDR_PARSE_ERR:
3128 		tx_stats(txo)->tx_hdr_parse_err++;
3129 		break;
3130 	case BE_TX_COMP_NDMA_ERR:
3131 		tx_stats(txo)->tx_dma_err++;
3132 		break;
3133 	case BE_TX_COMP_ACL_ERR:
3134 		tx_stats(txo)->tx_spoof_check_err++;
3135 		break;
3136 	}
3137 }
3138 
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141 	switch (status) {
3142 	case LANCER_TX_COMP_LSO_ERR:
3143 		tx_stats(txo)->tx_tso_err++;
3144 		break;
3145 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147 		tx_stats(txo)->tx_spoof_check_err++;
3148 		break;
3149 	case LANCER_TX_COMP_QINQ_ERR:
3150 		tx_stats(txo)->tx_qinq_err++;
3151 		break;
3152 	case LANCER_TX_COMP_PARITY_ERR:
3153 		tx_stats(txo)->tx_internal_parity_err++;
3154 		break;
3155 	case LANCER_TX_COMP_DMA_ERR:
3156 		tx_stats(txo)->tx_dma_err++;
3157 		break;
3158 	}
3159 }
3160 
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162 			  int idx)
3163 {
3164 	int num_wrbs = 0, work_done = 0;
3165 	struct be_tx_compl_info *txcp;
3166 
3167 	while ((txcp = be_tx_compl_get(txo))) {
3168 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169 		work_done++;
3170 
3171 		if (txcp->status) {
3172 			if (lancer_chip(adapter))
3173 				lancer_update_tx_err(txo, txcp->status);
3174 			else
3175 				be_update_tx_err(txo, txcp->status);
3176 		}
3177 	}
3178 
3179 	if (work_done) {
3180 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3181 		atomic_sub(num_wrbs, &txo->q.used);
3182 
3183 		/* As Tx wrbs have been freed up, wake up netdev queue
3184 		 * if it was stopped due to lack of tx wrbs.  */
3185 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186 		    be_can_txq_wake(txo)) {
3187 			netif_wake_subqueue(adapter->netdev, idx);
3188 		}
3189 
3190 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191 		tx_stats(txo)->tx_compl += work_done;
3192 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193 	}
3194 }
3195 
3196 #ifdef CONFIG_NET_RX_BUSY_POLL
3197 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3198 {
3199 	bool status = true;
3200 
3201 	spin_lock(&eqo->lock); /* BH is already disabled */
3202 	if (eqo->state & BE_EQ_LOCKED) {
3203 		WARN_ON(eqo->state & BE_EQ_NAPI);
3204 		eqo->state |= BE_EQ_NAPI_YIELD;
3205 		status = false;
3206 	} else {
3207 		eqo->state = BE_EQ_NAPI;
3208 	}
3209 	spin_unlock(&eqo->lock);
3210 	return status;
3211 }
3212 
3213 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3214 {
3215 	spin_lock(&eqo->lock); /* BH is already disabled */
3216 
3217 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3218 	eqo->state = BE_EQ_IDLE;
3219 
3220 	spin_unlock(&eqo->lock);
3221 }
3222 
3223 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3224 {
3225 	bool status = true;
3226 
3227 	spin_lock_bh(&eqo->lock);
3228 	if (eqo->state & BE_EQ_LOCKED) {
3229 		eqo->state |= BE_EQ_POLL_YIELD;
3230 		status = false;
3231 	} else {
3232 		eqo->state |= BE_EQ_POLL;
3233 	}
3234 	spin_unlock_bh(&eqo->lock);
3235 	return status;
3236 }
3237 
3238 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3239 {
3240 	spin_lock_bh(&eqo->lock);
3241 
3242 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3243 	eqo->state = BE_EQ_IDLE;
3244 
3245 	spin_unlock_bh(&eqo->lock);
3246 }
3247 
3248 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3249 {
3250 	spin_lock_init(&eqo->lock);
3251 	eqo->state = BE_EQ_IDLE;
3252 }
3253 
3254 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3255 {
3256 	local_bh_disable();
3257 
3258 	/* It's enough to just acquire napi lock on the eqo to stop
3259 	 * be_busy_poll() from processing any queueus.
3260 	 */
3261 	while (!be_lock_napi(eqo))
3262 		mdelay(1);
3263 
3264 	local_bh_enable();
3265 }
3266 
3267 #else /* CONFIG_NET_RX_BUSY_POLL */
3268 
3269 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3270 {
3271 	return true;
3272 }
3273 
3274 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3275 {
3276 }
3277 
3278 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3279 {
3280 	return false;
3281 }
3282 
3283 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3284 {
3285 }
3286 
3287 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3288 {
3289 }
3290 
3291 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3292 {
3293 }
3294 #endif /* CONFIG_NET_RX_BUSY_POLL */
3295 
3296 int be_poll(struct napi_struct *napi, int budget)
3297 {
3298 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3299 	struct be_adapter *adapter = eqo->adapter;
3300 	int max_work = 0, work, i, num_evts;
3301 	struct be_rx_obj *rxo;
3302 	struct be_tx_obj *txo;
3303 	u32 mult_enc = 0;
3304 
3305 	num_evts = events_get(eqo);
3306 
3307 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3308 		be_process_tx(adapter, txo, i);
3309 
3310 	if (be_lock_napi(eqo)) {
3311 		/* This loop will iterate twice for EQ0 in which
3312 		 * completions of the last RXQ (default one) are also processed
3313 		 * For other EQs the loop iterates only once
3314 		 */
3315 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3316 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3317 			max_work = max(work, max_work);
3318 		}
3319 		be_unlock_napi(eqo);
3320 	} else {
3321 		max_work = budget;
3322 	}
3323 
3324 	if (is_mcc_eqo(eqo))
3325 		be_process_mcc(adapter);
3326 
3327 	if (max_work < budget) {
3328 		napi_complete(napi);
3329 
3330 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3331 		 * delay via a delay multiplier encoding value
3332 		 */
3333 		if (skyhawk_chip(adapter))
3334 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3335 
3336 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3337 			     mult_enc);
3338 	} else {
3339 		/* As we'll continue in polling mode, count and clear events */
3340 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3341 	}
3342 	return max_work;
3343 }
3344 
3345 #ifdef CONFIG_NET_RX_BUSY_POLL
3346 static int be_busy_poll(struct napi_struct *napi)
3347 {
3348 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3349 	struct be_adapter *adapter = eqo->adapter;
3350 	struct be_rx_obj *rxo;
3351 	int i, work = 0;
3352 
3353 	if (!be_lock_busy_poll(eqo))
3354 		return LL_FLUSH_BUSY;
3355 
3356 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3357 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3358 		if (work)
3359 			break;
3360 	}
3361 
3362 	be_unlock_busy_poll(eqo);
3363 	return work;
3364 }
3365 #endif
3366 
3367 void be_detect_error(struct be_adapter *adapter)
3368 {
3369 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3370 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3371 	u32 i;
3372 	struct device *dev = &adapter->pdev->dev;
3373 
3374 	if (be_check_error(adapter, BE_ERROR_HW))
3375 		return;
3376 
3377 	if (lancer_chip(adapter)) {
3378 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3379 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3380 			be_set_error(adapter, BE_ERROR_UE);
3381 			sliport_err1 = ioread32(adapter->db +
3382 						SLIPORT_ERROR1_OFFSET);
3383 			sliport_err2 = ioread32(adapter->db +
3384 						SLIPORT_ERROR2_OFFSET);
3385 			/* Do not log error messages if its a FW reset */
3386 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3387 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3388 				dev_info(dev, "Firmware update in progress\n");
3389 			} else {
3390 				dev_err(dev, "Error detected in the card\n");
3391 				dev_err(dev, "ERR: sliport status 0x%x\n",
3392 					sliport_status);
3393 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3394 					sliport_err1);
3395 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3396 					sliport_err2);
3397 			}
3398 		}
3399 	} else {
3400 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3401 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3402 		ue_lo_mask = ioread32(adapter->pcicfg +
3403 				      PCICFG_UE_STATUS_LOW_MASK);
3404 		ue_hi_mask = ioread32(adapter->pcicfg +
3405 				      PCICFG_UE_STATUS_HI_MASK);
3406 
3407 		ue_lo = (ue_lo & ~ue_lo_mask);
3408 		ue_hi = (ue_hi & ~ue_hi_mask);
3409 
3410 		/* On certain platforms BE hardware can indicate spurious UEs.
3411 		 * Allow HW to stop working completely in case of a real UE.
3412 		 * Hence not setting the hw_error for UE detection.
3413 		 */
3414 
3415 		if (ue_lo || ue_hi) {
3416 			dev_err(dev, "Error detected in the adapter");
3417 			if (skyhawk_chip(adapter))
3418 				be_set_error(adapter, BE_ERROR_UE);
3419 
3420 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3421 				if (ue_lo & 1)
3422 					dev_err(dev, "UE: %s bit set\n",
3423 						ue_status_low_desc[i]);
3424 			}
3425 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3426 				if (ue_hi & 1)
3427 					dev_err(dev, "UE: %s bit set\n",
3428 						ue_status_hi_desc[i]);
3429 			}
3430 		}
3431 	}
3432 }
3433 
3434 static void be_msix_disable(struct be_adapter *adapter)
3435 {
3436 	if (msix_enabled(adapter)) {
3437 		pci_disable_msix(adapter->pdev);
3438 		adapter->num_msix_vec = 0;
3439 		adapter->num_msix_roce_vec = 0;
3440 	}
3441 }
3442 
3443 static int be_msix_enable(struct be_adapter *adapter)
3444 {
3445 	unsigned int i, max_roce_eqs;
3446 	struct device *dev = &adapter->pdev->dev;
3447 	int num_vec;
3448 
3449 	/* If RoCE is supported, program the max number of vectors that
3450 	 * could be used for NIC and RoCE, else, just program the number
3451 	 * we'll use initially.
3452 	 */
3453 	if (be_roce_supported(adapter)) {
3454 		max_roce_eqs =
3455 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3456 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3457 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3458 	} else {
3459 		num_vec = max(adapter->cfg_num_rx_irqs,
3460 			      adapter->cfg_num_tx_irqs);
3461 	}
3462 
3463 	for (i = 0; i < num_vec; i++)
3464 		adapter->msix_entries[i].entry = i;
3465 
3466 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3467 					MIN_MSIX_VECTORS, num_vec);
3468 	if (num_vec < 0)
3469 		goto fail;
3470 
3471 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3472 		adapter->num_msix_roce_vec = num_vec / 2;
3473 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3474 			 adapter->num_msix_roce_vec);
3475 	}
3476 
3477 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3478 
3479 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3480 		 adapter->num_msix_vec);
3481 	return 0;
3482 
3483 fail:
3484 	dev_warn(dev, "MSIx enable failed\n");
3485 
3486 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3487 	if (be_virtfn(adapter))
3488 		return num_vec;
3489 	return 0;
3490 }
3491 
3492 static inline int be_msix_vec_get(struct be_adapter *adapter,
3493 				  struct be_eq_obj *eqo)
3494 {
3495 	return adapter->msix_entries[eqo->msix_idx].vector;
3496 }
3497 
3498 static int be_msix_register(struct be_adapter *adapter)
3499 {
3500 	struct net_device *netdev = adapter->netdev;
3501 	struct be_eq_obj *eqo;
3502 	int status, i, vec;
3503 
3504 	for_all_evt_queues(adapter, eqo, i) {
3505 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3506 		vec = be_msix_vec_get(adapter, eqo);
3507 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3508 		if (status)
3509 			goto err_msix;
3510 
3511 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3512 	}
3513 
3514 	return 0;
3515 err_msix:
3516 	for (i--; i >= 0; i--) {
3517 		eqo = &adapter->eq_obj[i];
3518 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3519 	}
3520 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3521 		 status);
3522 	be_msix_disable(adapter);
3523 	return status;
3524 }
3525 
3526 static int be_irq_register(struct be_adapter *adapter)
3527 {
3528 	struct net_device *netdev = adapter->netdev;
3529 	int status;
3530 
3531 	if (msix_enabled(adapter)) {
3532 		status = be_msix_register(adapter);
3533 		if (status == 0)
3534 			goto done;
3535 		/* INTx is not supported for VF */
3536 		if (be_virtfn(adapter))
3537 			return status;
3538 	}
3539 
3540 	/* INTx: only the first EQ is used */
3541 	netdev->irq = adapter->pdev->irq;
3542 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3543 			     &adapter->eq_obj[0]);
3544 	if (status) {
3545 		dev_err(&adapter->pdev->dev,
3546 			"INTx request IRQ failed - err %d\n", status);
3547 		return status;
3548 	}
3549 done:
3550 	adapter->isr_registered = true;
3551 	return 0;
3552 }
3553 
3554 static void be_irq_unregister(struct be_adapter *adapter)
3555 {
3556 	struct net_device *netdev = adapter->netdev;
3557 	struct be_eq_obj *eqo;
3558 	int i, vec;
3559 
3560 	if (!adapter->isr_registered)
3561 		return;
3562 
3563 	/* INTx */
3564 	if (!msix_enabled(adapter)) {
3565 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3566 		goto done;
3567 	}
3568 
3569 	/* MSIx */
3570 	for_all_evt_queues(adapter, eqo, i) {
3571 		vec = be_msix_vec_get(adapter, eqo);
3572 		irq_set_affinity_hint(vec, NULL);
3573 		free_irq(vec, eqo);
3574 	}
3575 
3576 done:
3577 	adapter->isr_registered = false;
3578 }
3579 
3580 static void be_rx_qs_destroy(struct be_adapter *adapter)
3581 {
3582 	struct rss_info *rss = &adapter->rss_info;
3583 	struct be_queue_info *q;
3584 	struct be_rx_obj *rxo;
3585 	int i;
3586 
3587 	for_all_rx_queues(adapter, rxo, i) {
3588 		q = &rxo->q;
3589 		if (q->created) {
3590 			/* If RXQs are destroyed while in an "out of buffer"
3591 			 * state, there is a possibility of an HW stall on
3592 			 * Lancer. So, post 64 buffers to each queue to relieve
3593 			 * the "out of buffer" condition.
3594 			 * Make sure there's space in the RXQ before posting.
3595 			 */
3596 			if (lancer_chip(adapter)) {
3597 				be_rx_cq_clean(rxo);
3598 				if (atomic_read(&q->used) == 0)
3599 					be_post_rx_frags(rxo, GFP_KERNEL,
3600 							 MAX_RX_POST);
3601 			}
3602 
3603 			be_cmd_rxq_destroy(adapter, q);
3604 			be_rx_cq_clean(rxo);
3605 			be_rxq_clean(rxo);
3606 		}
3607 		be_queue_free(adapter, q);
3608 	}
3609 
3610 	if (rss->rss_flags) {
3611 		rss->rss_flags = RSS_ENABLE_NONE;
3612 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3613 				  128, rss->rss_hkey);
3614 	}
3615 }
3616 
3617 static void be_disable_if_filters(struct be_adapter *adapter)
3618 {
3619 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3620 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3621 	    check_privilege(adapter, BE_PRIV_FILTMGMT))
3622 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3623 
3624 	be_clear_uc_list(adapter);
3625 	be_clear_mc_list(adapter);
3626 
3627 	/* The IFACE flags are enabled in the open path and cleared
3628 	 * in the close path. When a VF gets detached from the host and
3629 	 * assigned to a VM the following happens:
3630 	 *	- VF's IFACE flags get cleared in the detach path
3631 	 *	- IFACE create is issued by the VF in the attach path
3632 	 * Due to a bug in the BE3/Skyhawk-R FW
3633 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3634 	 * specified along with the IFACE create cmd issued by a VF are not
3635 	 * honoured by FW.  As a consequence, if a *new* driver
3636 	 * (that enables/disables IFACE flags in open/close)
3637 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3638 	 * the IFACE gets created *without* the needed flags.
3639 	 * To avoid this, disable RX-filter flags only for Lancer.
3640 	 */
3641 	if (lancer_chip(adapter)) {
3642 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3643 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3644 	}
3645 }
3646 
3647 static int be_close(struct net_device *netdev)
3648 {
3649 	struct be_adapter *adapter = netdev_priv(netdev);
3650 	struct be_eq_obj *eqo;
3651 	int i;
3652 
3653 	/* This protection is needed as be_close() may be called even when the
3654 	 * adapter is in cleared state (after eeh perm failure)
3655 	 */
3656 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3657 		return 0;
3658 
3659 	/* Before attempting cleanup ensure all the pending cmds in the
3660 	 * config_wq have finished execution
3661 	 */
3662 	flush_workqueue(be_wq);
3663 
3664 	be_disable_if_filters(adapter);
3665 
3666 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3667 		for_all_evt_queues(adapter, eqo, i) {
3668 			napi_disable(&eqo->napi);
3669 			be_disable_busy_poll(eqo);
3670 		}
3671 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3672 	}
3673 
3674 	be_async_mcc_disable(adapter);
3675 
3676 	/* Wait for all pending tx completions to arrive so that
3677 	 * all tx skbs are freed.
3678 	 */
3679 	netif_tx_disable(netdev);
3680 	be_tx_compl_clean(adapter);
3681 
3682 	be_rx_qs_destroy(adapter);
3683 
3684 	for_all_evt_queues(adapter, eqo, i) {
3685 		if (msix_enabled(adapter))
3686 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3687 		else
3688 			synchronize_irq(netdev->irq);
3689 		be_eq_clean(eqo);
3690 	}
3691 
3692 	be_irq_unregister(adapter);
3693 
3694 	return 0;
3695 }
3696 
3697 static int be_rx_qs_create(struct be_adapter *adapter)
3698 {
3699 	struct rss_info *rss = &adapter->rss_info;
3700 	u8 rss_key[RSS_HASH_KEY_LEN];
3701 	struct be_rx_obj *rxo;
3702 	int rc, i, j;
3703 
3704 	for_all_rx_queues(adapter, rxo, i) {
3705 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3706 				    sizeof(struct be_eth_rx_d));
3707 		if (rc)
3708 			return rc;
3709 	}
3710 
3711 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3712 		rxo = default_rxo(adapter);
3713 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3714 				       rx_frag_size, adapter->if_handle,
3715 				       false, &rxo->rss_id);
3716 		if (rc)
3717 			return rc;
3718 	}
3719 
3720 	for_all_rss_queues(adapter, rxo, i) {
3721 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3722 				       rx_frag_size, adapter->if_handle,
3723 				       true, &rxo->rss_id);
3724 		if (rc)
3725 			return rc;
3726 	}
3727 
3728 	if (be_multi_rxq(adapter)) {
3729 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3730 			for_all_rss_queues(adapter, rxo, i) {
3731 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3732 					break;
3733 				rss->rsstable[j + i] = rxo->rss_id;
3734 				rss->rss_queue[j + i] = i;
3735 			}
3736 		}
3737 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3738 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3739 
3740 		if (!BEx_chip(adapter))
3741 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3742 				RSS_ENABLE_UDP_IPV6;
3743 
3744 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3745 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3746 				       RSS_INDIR_TABLE_LEN, rss_key);
3747 		if (rc) {
3748 			rss->rss_flags = RSS_ENABLE_NONE;
3749 			return rc;
3750 		}
3751 
3752 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3753 	} else {
3754 		/* Disable RSS, if only default RX Q is created */
3755 		rss->rss_flags = RSS_ENABLE_NONE;
3756 	}
3757 
3758 
3759 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3760 	 * which is a queue empty condition
3761 	 */
3762 	for_all_rx_queues(adapter, rxo, i)
3763 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3764 
3765 	return 0;
3766 }
3767 
3768 static int be_enable_if_filters(struct be_adapter *adapter)
3769 {
3770 	int status;
3771 
3772 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3773 	if (status)
3774 		return status;
3775 
3776 	/* Don't add MAC on BE3 VFs without FILTMGMT privilege */
3777 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3778 	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3779 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780 		if (status)
3781 			return status;
3782 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783 	}
3784 
3785 	if (adapter->vlans_added)
3786 		be_vid_config(adapter);
3787 
3788 	__be_set_rx_mode(adapter);
3789 
3790 	return 0;
3791 }
3792 
3793 static int be_open(struct net_device *netdev)
3794 {
3795 	struct be_adapter *adapter = netdev_priv(netdev);
3796 	struct be_eq_obj *eqo;
3797 	struct be_rx_obj *rxo;
3798 	struct be_tx_obj *txo;
3799 	u8 link_status;
3800 	int status, i;
3801 
3802 	status = be_rx_qs_create(adapter);
3803 	if (status)
3804 		goto err;
3805 
3806 	status = be_enable_if_filters(adapter);
3807 	if (status)
3808 		goto err;
3809 
3810 	status = be_irq_register(adapter);
3811 	if (status)
3812 		goto err;
3813 
3814 	for_all_rx_queues(adapter, rxo, i)
3815 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3816 
3817 	for_all_tx_queues(adapter, txo, i)
3818 		be_cq_notify(adapter, txo->cq.id, true, 0);
3819 
3820 	be_async_mcc_enable(adapter);
3821 
3822 	for_all_evt_queues(adapter, eqo, i) {
3823 		napi_enable(&eqo->napi);
3824 		be_enable_busy_poll(eqo);
3825 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3826 	}
3827 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3828 
3829 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3830 	if (!status)
3831 		be_link_status_update(adapter, link_status);
3832 
3833 	netif_tx_start_all_queues(netdev);
3834 	if (skyhawk_chip(adapter))
3835 		udp_tunnel_get_rx_info(netdev);
3836 
3837 	return 0;
3838 err:
3839 	be_close(adapter->netdev);
3840 	return -EIO;
3841 }
3842 
3843 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3844 {
3845 	u32 addr;
3846 
3847 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3848 
3849 	mac[5] = (u8)(addr & 0xFF);
3850 	mac[4] = (u8)((addr >> 8) & 0xFF);
3851 	mac[3] = (u8)((addr >> 16) & 0xFF);
3852 	/* Use the OUI from the current MAC address */
3853 	memcpy(mac, adapter->netdev->dev_addr, 3);
3854 }
3855 
3856 /*
3857  * Generate a seed MAC address from the PF MAC Address using jhash.
3858  * MAC Address for VFs are assigned incrementally starting from the seed.
3859  * These addresses are programmed in the ASIC by the PF and the VF driver
3860  * queries for the MAC address during its probe.
3861  */
3862 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3863 {
3864 	u32 vf;
3865 	int status = 0;
3866 	u8 mac[ETH_ALEN];
3867 	struct be_vf_cfg *vf_cfg;
3868 
3869 	be_vf_eth_addr_generate(adapter, mac);
3870 
3871 	for_all_vfs(adapter, vf_cfg, vf) {
3872 		if (BEx_chip(adapter))
3873 			status = be_cmd_pmac_add(adapter, mac,
3874 						 vf_cfg->if_handle,
3875 						 &vf_cfg->pmac_id, vf + 1);
3876 		else
3877 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3878 						vf + 1);
3879 
3880 		if (status)
3881 			dev_err(&adapter->pdev->dev,
3882 				"Mac address assignment failed for VF %d\n",
3883 				vf);
3884 		else
3885 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3886 
3887 		mac[5] += 1;
3888 	}
3889 	return status;
3890 }
3891 
3892 static int be_vfs_mac_query(struct be_adapter *adapter)
3893 {
3894 	int status, vf;
3895 	u8 mac[ETH_ALEN];
3896 	struct be_vf_cfg *vf_cfg;
3897 
3898 	for_all_vfs(adapter, vf_cfg, vf) {
3899 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3900 					       mac, vf_cfg->if_handle,
3901 					       false, vf+1);
3902 		if (status)
3903 			return status;
3904 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905 	}
3906 	return 0;
3907 }
3908 
3909 static void be_vf_clear(struct be_adapter *adapter)
3910 {
3911 	struct be_vf_cfg *vf_cfg;
3912 	u32 vf;
3913 
3914 	if (pci_vfs_assigned(adapter->pdev)) {
3915 		dev_warn(&adapter->pdev->dev,
3916 			 "VFs are assigned to VMs: not disabling VFs\n");
3917 		goto done;
3918 	}
3919 
3920 	pci_disable_sriov(adapter->pdev);
3921 
3922 	for_all_vfs(adapter, vf_cfg, vf) {
3923 		if (BEx_chip(adapter))
3924 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3925 					vf_cfg->pmac_id, vf + 1);
3926 		else
3927 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3928 				       vf + 1);
3929 
3930 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3931 	}
3932 
3933 	if (BE3_chip(adapter))
3934 		be_cmd_set_hsw_config(adapter, 0, 0,
3935 				      adapter->if_handle,
3936 				      PORT_FWD_TYPE_PASSTHRU, 0);
3937 done:
3938 	kfree(adapter->vf_cfg);
3939 	adapter->num_vfs = 0;
3940 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3941 }
3942 
3943 static void be_clear_queues(struct be_adapter *adapter)
3944 {
3945 	be_mcc_queues_destroy(adapter);
3946 	be_rx_cqs_destroy(adapter);
3947 	be_tx_queues_destroy(adapter);
3948 	be_evt_queues_destroy(adapter);
3949 }
3950 
3951 static void be_cancel_worker(struct be_adapter *adapter)
3952 {
3953 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3954 		cancel_delayed_work_sync(&adapter->work);
3955 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3956 	}
3957 }
3958 
3959 static void be_cancel_err_detection(struct be_adapter *adapter)
3960 {
3961 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3962 
3963 	if (!be_err_recovery_workq)
3964 		return;
3965 
3966 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3967 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3968 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3969 	}
3970 }
3971 
3972 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3973 {
3974 	struct net_device *netdev = adapter->netdev;
3975 
3976 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3977 		be_cmd_manage_iface(adapter, adapter->if_handle,
3978 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3979 
3980 	if (adapter->vxlan_port)
3981 		be_cmd_set_vxlan_port(adapter, 0);
3982 
3983 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3984 	adapter->vxlan_port = 0;
3985 
3986 	netdev->hw_enc_features = 0;
3987 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3988 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3989 }
3990 
3991 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3992 				struct be_resources *vft_res)
3993 {
3994 	struct be_resources res = adapter->pool_res;
3995 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3996 	struct be_resources res_mod = {0};
3997 	u16 num_vf_qs = 1;
3998 
3999 	/* Distribute the queue resources among the PF and it's VFs */
4000 	if (num_vfs) {
4001 		/* Divide the rx queues evenly among the VFs and the PF, capped
4002 		 * at VF-EQ-count. Any remainder queues belong to the PF.
4003 		 */
4004 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4005 				res.max_rss_qs / (num_vfs + 1));
4006 
4007 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4008 		 * RSS Tables per port. Provide RSS on VFs, only if number of
4009 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4010 		 */
4011 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4012 			num_vf_qs = 1;
4013 	}
4014 
4015 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4016 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4017 	 */
4018 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4019 				  RESOURCE_MODIFIABLE, 0);
4020 
4021 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4022 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4023 	 * more than 1 RSSQ is available for a VF.
4024 	 * Otherwise, provision only 1 queue pair for VF.
4025 	 */
4026 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4027 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4028 		if (num_vf_qs > 1) {
4029 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4030 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4031 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4032 		} else {
4033 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4034 					     BE_IF_FLAGS_DEFQ_RSS);
4035 		}
4036 	} else {
4037 		num_vf_qs = 1;
4038 	}
4039 
4040 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4041 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4042 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4043 	}
4044 
4045 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4046 	vft_res->max_rx_qs = num_vf_qs;
4047 	vft_res->max_rss_qs = num_vf_qs;
4048 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4049 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4050 
4051 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4052 	 * among the PF and it's VFs, if the fields are changeable
4053 	 */
4054 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4055 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4056 
4057 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4058 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4059 
4060 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4061 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4062 
4063 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4064 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4065 }
4066 
4067 static void be_if_destroy(struct be_adapter *adapter)
4068 {
4069 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4070 
4071 	kfree(adapter->pmac_id);
4072 	adapter->pmac_id = NULL;
4073 
4074 	kfree(adapter->mc_list);
4075 	adapter->mc_list = NULL;
4076 
4077 	kfree(adapter->uc_list);
4078 	adapter->uc_list = NULL;
4079 }
4080 
4081 static int be_clear(struct be_adapter *adapter)
4082 {
4083 	struct pci_dev *pdev = adapter->pdev;
4084 	struct  be_resources vft_res = {0};
4085 
4086 	be_cancel_worker(adapter);
4087 
4088 	flush_workqueue(be_wq);
4089 
4090 	if (sriov_enabled(adapter))
4091 		be_vf_clear(adapter);
4092 
4093 	/* Re-configure FW to distribute resources evenly across max-supported
4094 	 * number of VFs, only when VFs are not already enabled.
4095 	 */
4096 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4097 	    !pci_vfs_assigned(pdev)) {
4098 		be_calculate_vf_res(adapter,
4099 				    pci_sriov_get_totalvfs(pdev),
4100 				    &vft_res);
4101 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4102 					pci_sriov_get_totalvfs(pdev),
4103 					&vft_res);
4104 	}
4105 
4106 	be_disable_vxlan_offloads(adapter);
4107 
4108 	be_if_destroy(adapter);
4109 
4110 	be_clear_queues(adapter);
4111 
4112 	be_msix_disable(adapter);
4113 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4114 	return 0;
4115 }
4116 
4117 static int be_vfs_if_create(struct be_adapter *adapter)
4118 {
4119 	struct be_resources res = {0};
4120 	u32 cap_flags, en_flags, vf;
4121 	struct be_vf_cfg *vf_cfg;
4122 	int status;
4123 
4124 	/* If a FW profile exists, then cap_flags are updated */
4125 	cap_flags = BE_VF_IF_EN_FLAGS;
4126 
4127 	for_all_vfs(adapter, vf_cfg, vf) {
4128 		if (!BE3_chip(adapter)) {
4129 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4130 							   ACTIVE_PROFILE_TYPE,
4131 							   RESOURCE_LIMITS,
4132 							   vf + 1);
4133 			if (!status) {
4134 				cap_flags = res.if_cap_flags;
4135 				/* Prevent VFs from enabling VLAN promiscuous
4136 				 * mode
4137 				 */
4138 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4139 			}
4140 		}
4141 
4142 		/* PF should enable IF flags during proxy if_create call */
4143 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4144 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4145 					  &vf_cfg->if_handle, vf + 1);
4146 		if (status)
4147 			return status;
4148 	}
4149 
4150 	return 0;
4151 }
4152 
4153 static int be_vf_setup_init(struct be_adapter *adapter)
4154 {
4155 	struct be_vf_cfg *vf_cfg;
4156 	int vf;
4157 
4158 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4159 				  GFP_KERNEL);
4160 	if (!adapter->vf_cfg)
4161 		return -ENOMEM;
4162 
4163 	for_all_vfs(adapter, vf_cfg, vf) {
4164 		vf_cfg->if_handle = -1;
4165 		vf_cfg->pmac_id = -1;
4166 	}
4167 	return 0;
4168 }
4169 
4170 static int be_vf_setup(struct be_adapter *adapter)
4171 {
4172 	struct device *dev = &adapter->pdev->dev;
4173 	struct be_vf_cfg *vf_cfg;
4174 	int status, old_vfs, vf;
4175 	bool spoofchk;
4176 
4177 	old_vfs = pci_num_vf(adapter->pdev);
4178 
4179 	status = be_vf_setup_init(adapter);
4180 	if (status)
4181 		goto err;
4182 
4183 	if (old_vfs) {
4184 		for_all_vfs(adapter, vf_cfg, vf) {
4185 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4186 			if (status)
4187 				goto err;
4188 		}
4189 
4190 		status = be_vfs_mac_query(adapter);
4191 		if (status)
4192 			goto err;
4193 	} else {
4194 		status = be_vfs_if_create(adapter);
4195 		if (status)
4196 			goto err;
4197 
4198 		status = be_vf_eth_addr_config(adapter);
4199 		if (status)
4200 			goto err;
4201 	}
4202 
4203 	for_all_vfs(adapter, vf_cfg, vf) {
4204 		/* Allow VFs to programs MAC/VLAN filters */
4205 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4206 						  vf + 1);
4207 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4208 			status = be_cmd_set_fn_privileges(adapter,
4209 							  vf_cfg->privileges |
4210 							  BE_PRIV_FILTMGMT,
4211 							  vf + 1);
4212 			if (!status) {
4213 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4214 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4215 					 vf);
4216 			}
4217 		}
4218 
4219 		/* Allow full available bandwidth */
4220 		if (!old_vfs)
4221 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4222 
4223 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4224 					       vf_cfg->if_handle, NULL,
4225 					       &spoofchk);
4226 		if (!status)
4227 			vf_cfg->spoofchk = spoofchk;
4228 
4229 		if (!old_vfs) {
4230 			be_cmd_enable_vf(adapter, vf + 1);
4231 			be_cmd_set_logical_link_config(adapter,
4232 						       IFLA_VF_LINK_STATE_AUTO,
4233 						       vf+1);
4234 		}
4235 	}
4236 
4237 	if (!old_vfs) {
4238 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4239 		if (status) {
4240 			dev_err(dev, "SRIOV enable failed\n");
4241 			adapter->num_vfs = 0;
4242 			goto err;
4243 		}
4244 	}
4245 
4246 	if (BE3_chip(adapter)) {
4247 		/* On BE3, enable VEB only when SRIOV is enabled */
4248 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4249 					       adapter->if_handle,
4250 					       PORT_FWD_TYPE_VEB, 0);
4251 		if (status)
4252 			goto err;
4253 	}
4254 
4255 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4256 	return 0;
4257 err:
4258 	dev_err(dev, "VF setup failed\n");
4259 	be_vf_clear(adapter);
4260 	return status;
4261 }
4262 
4263 /* Converting function_mode bits on BE3 to SH mc_type enums */
4264 
4265 static u8 be_convert_mc_type(u32 function_mode)
4266 {
4267 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4268 		return vNIC1;
4269 	else if (function_mode & QNQ_MODE)
4270 		return FLEX10;
4271 	else if (function_mode & VNIC_MODE)
4272 		return vNIC2;
4273 	else if (function_mode & UMC_ENABLED)
4274 		return UMC;
4275 	else
4276 		return MC_NONE;
4277 }
4278 
4279 /* On BE2/BE3 FW does not suggest the supported limits */
4280 static void BEx_get_resources(struct be_adapter *adapter,
4281 			      struct be_resources *res)
4282 {
4283 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4284 
4285 	if (be_physfn(adapter))
4286 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4287 	else
4288 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4289 
4290 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4291 
4292 	if (be_is_mc(adapter)) {
4293 		/* Assuming that there are 4 channels per port,
4294 		 * when multi-channel is enabled
4295 		 */
4296 		if (be_is_qnq_mode(adapter))
4297 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4298 		else
4299 			/* In a non-qnq multichannel mode, the pvid
4300 			 * takes up one vlan entry
4301 			 */
4302 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4303 	} else {
4304 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4305 	}
4306 
4307 	res->max_mcast_mac = BE_MAX_MC;
4308 
4309 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4310 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4311 	 *    *only* if it is RSS-capable.
4312 	 */
4313 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4314 	    be_virtfn(adapter) ||
4315 	    (be_is_mc(adapter) &&
4316 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4317 		res->max_tx_qs = 1;
4318 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4319 		struct be_resources super_nic_res = {0};
4320 
4321 		/* On a SuperNIC profile, the driver needs to use the
4322 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4323 		 */
4324 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4325 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4326 					  0);
4327 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4328 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4329 	} else {
4330 		res->max_tx_qs = BE3_MAX_TX_QS;
4331 	}
4332 
4333 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4334 	    !use_sriov && be_physfn(adapter))
4335 		res->max_rss_qs = (adapter->be3_native) ?
4336 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4337 	res->max_rx_qs = res->max_rss_qs + 1;
4338 
4339 	if (be_physfn(adapter))
4340 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4341 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4342 	else
4343 		res->max_evt_qs = 1;
4344 
4345 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4346 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4347 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4348 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4349 }
4350 
4351 static void be_setup_init(struct be_adapter *adapter)
4352 {
4353 	adapter->vlan_prio_bmap = 0xff;
4354 	adapter->phy.link_speed = -1;
4355 	adapter->if_handle = -1;
4356 	adapter->be3_native = false;
4357 	adapter->if_flags = 0;
4358 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4359 	if (be_physfn(adapter))
4360 		adapter->cmd_privileges = MAX_PRIVILEGES;
4361 	else
4362 		adapter->cmd_privileges = MIN_PRIVILEGES;
4363 }
4364 
4365 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4366  * However, this HW limitation is not exposed to the host via any SLI cmd.
4367  * As a result, in the case of SRIOV and in particular multi-partition configs
4368  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4369  * for distribution between the VFs. This self-imposed limit will determine the
4370  * no: of VFs for which RSS can be enabled.
4371  */
4372 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4373 {
4374 	struct be_port_resources port_res = {0};
4375 	u8 rss_tables_on_port;
4376 	u16 max_vfs = be_max_vfs(adapter);
4377 
4378 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4379 				  RESOURCE_LIMITS, 0);
4380 
4381 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4382 
4383 	/* Each PF Pool's RSS Tables limit =
4384 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4385 	 */
4386 	adapter->pool_res.max_rss_tables =
4387 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4388 }
4389 
4390 static int be_get_sriov_config(struct be_adapter *adapter)
4391 {
4392 	struct be_resources res = {0};
4393 	int max_vfs, old_vfs;
4394 
4395 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4396 				  RESOURCE_LIMITS, 0);
4397 
4398 	/* Some old versions of BE3 FW don't report max_vfs value */
4399 	if (BE3_chip(adapter) && !res.max_vfs) {
4400 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4401 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4402 	}
4403 
4404 	adapter->pool_res = res;
4405 
4406 	/* If during previous unload of the driver, the VFs were not disabled,
4407 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4408 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4409 	 */
4410 	old_vfs = pci_num_vf(adapter->pdev);
4411 	if (old_vfs) {
4412 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4413 			 old_vfs);
4414 
4415 		adapter->pool_res.max_vfs =
4416 			pci_sriov_get_totalvfs(adapter->pdev);
4417 		adapter->num_vfs = old_vfs;
4418 	}
4419 
4420 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4421 		be_calculate_pf_pool_rss_tables(adapter);
4422 		dev_info(&adapter->pdev->dev,
4423 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4424 			 be_max_pf_pool_rss_tables(adapter));
4425 	}
4426 	return 0;
4427 }
4428 
4429 static void be_alloc_sriov_res(struct be_adapter *adapter)
4430 {
4431 	int old_vfs = pci_num_vf(adapter->pdev);
4432 	struct  be_resources vft_res = {0};
4433 	int status;
4434 
4435 	be_get_sriov_config(adapter);
4436 
4437 	if (!old_vfs)
4438 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4439 
4440 	/* When the HW is in SRIOV capable configuration, the PF-pool
4441 	 * resources are given to PF during driver load, if there are no
4442 	 * old VFs. This facility is not available in BE3 FW.
4443 	 * Also, this is done by FW in Lancer chip.
4444 	 */
4445 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4446 		be_calculate_vf_res(adapter, 0, &vft_res);
4447 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4448 						 &vft_res);
4449 		if (status)
4450 			dev_err(&adapter->pdev->dev,
4451 				"Failed to optimize SRIOV resources\n");
4452 	}
4453 }
4454 
4455 static int be_get_resources(struct be_adapter *adapter)
4456 {
4457 	struct device *dev = &adapter->pdev->dev;
4458 	struct be_resources res = {0};
4459 	int status;
4460 
4461 	/* For Lancer, SH etc read per-function resource limits from FW.
4462 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4463 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4464 	 */
4465 	if (BEx_chip(adapter)) {
4466 		BEx_get_resources(adapter, &res);
4467 	} else {
4468 		status = be_cmd_get_func_config(adapter, &res);
4469 		if (status)
4470 			return status;
4471 
4472 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4473 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4474 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4475 			res.max_rss_qs -= 1;
4476 	}
4477 
4478 	/* If RoCE is supported stash away half the EQs for RoCE */
4479 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4480 				res.max_evt_qs / 2 : res.max_evt_qs;
4481 	adapter->res = res;
4482 
4483 	/* If FW supports RSS default queue, then skip creating non-RSS
4484 	 * queue for non-IP traffic.
4485 	 */
4486 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4487 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4488 
4489 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4490 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4491 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4492 		 be_max_vfs(adapter));
4493 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4494 		 be_max_uc(adapter), be_max_mc(adapter),
4495 		 be_max_vlans(adapter));
4496 
4497 	/* Ensure RX and TX queues are created in pairs at init time */
4498 	adapter->cfg_num_rx_irqs =
4499 				min_t(u16, netif_get_num_default_rss_queues(),
4500 				      be_max_qp_irqs(adapter));
4501 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4502 	return 0;
4503 }
4504 
4505 static int be_get_config(struct be_adapter *adapter)
4506 {
4507 	int status, level;
4508 	u16 profile_id;
4509 
4510 	status = be_cmd_get_cntl_attributes(adapter);
4511 	if (status)
4512 		return status;
4513 
4514 	status = be_cmd_query_fw_cfg(adapter);
4515 	if (status)
4516 		return status;
4517 
4518 	if (!lancer_chip(adapter) && be_physfn(adapter))
4519 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4520 
4521 	if (BEx_chip(adapter)) {
4522 		level = be_cmd_get_fw_log_level(adapter);
4523 		adapter->msg_enable =
4524 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4525 	}
4526 
4527 	be_cmd_get_acpi_wol_cap(adapter);
4528 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4529 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4530 
4531 	be_cmd_query_port_name(adapter);
4532 
4533 	if (be_physfn(adapter)) {
4534 		status = be_cmd_get_active_profile(adapter, &profile_id);
4535 		if (!status)
4536 			dev_info(&adapter->pdev->dev,
4537 				 "Using profile 0x%x\n", profile_id);
4538 	}
4539 
4540 	return 0;
4541 }
4542 
4543 static int be_mac_setup(struct be_adapter *adapter)
4544 {
4545 	u8 mac[ETH_ALEN];
4546 	int status;
4547 
4548 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4549 		status = be_cmd_get_perm_mac(adapter, mac);
4550 		if (status)
4551 			return status;
4552 
4553 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4554 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4555 	}
4556 
4557 	return 0;
4558 }
4559 
4560 static void be_schedule_worker(struct be_adapter *adapter)
4561 {
4562 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4563 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4564 }
4565 
4566 static void be_destroy_err_recovery_workq(void)
4567 {
4568 	if (!be_err_recovery_workq)
4569 		return;
4570 
4571 	flush_workqueue(be_err_recovery_workq);
4572 	destroy_workqueue(be_err_recovery_workq);
4573 	be_err_recovery_workq = NULL;
4574 }
4575 
4576 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4577 {
4578 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4579 
4580 	if (!be_err_recovery_workq)
4581 		return;
4582 
4583 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4584 			   msecs_to_jiffies(delay));
4585 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4586 }
4587 
4588 static int be_setup_queues(struct be_adapter *adapter)
4589 {
4590 	struct net_device *netdev = adapter->netdev;
4591 	int status;
4592 
4593 	status = be_evt_queues_create(adapter);
4594 	if (status)
4595 		goto err;
4596 
4597 	status = be_tx_qs_create(adapter);
4598 	if (status)
4599 		goto err;
4600 
4601 	status = be_rx_cqs_create(adapter);
4602 	if (status)
4603 		goto err;
4604 
4605 	status = be_mcc_queues_create(adapter);
4606 	if (status)
4607 		goto err;
4608 
4609 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4610 	if (status)
4611 		goto err;
4612 
4613 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4614 	if (status)
4615 		goto err;
4616 
4617 	return 0;
4618 err:
4619 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4620 	return status;
4621 }
4622 
4623 static int be_if_create(struct be_adapter *adapter)
4624 {
4625 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4626 	u32 cap_flags = be_if_cap_flags(adapter);
4627 	int status;
4628 
4629 	/* alloc required memory for other filtering fields */
4630 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4631 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4632 	if (!adapter->pmac_id)
4633 		return -ENOMEM;
4634 
4635 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4636 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4637 	if (!adapter->mc_list)
4638 		return -ENOMEM;
4639 
4640 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4641 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4642 	if (!adapter->uc_list)
4643 		return -ENOMEM;
4644 
4645 	if (adapter->cfg_num_rx_irqs == 1)
4646 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4647 
4648 	en_flags &= cap_flags;
4649 	/* will enable all the needed filter flags in be_open() */
4650 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4651 				  &adapter->if_handle, 0);
4652 
4653 	if (status)
4654 		return status;
4655 
4656 	return 0;
4657 }
4658 
4659 int be_update_queues(struct be_adapter *adapter)
4660 {
4661 	struct net_device *netdev = adapter->netdev;
4662 	int status;
4663 
4664 	if (netif_running(netdev))
4665 		be_close(netdev);
4666 
4667 	be_cancel_worker(adapter);
4668 
4669 	/* If any vectors have been shared with RoCE we cannot re-program
4670 	 * the MSIx table.
4671 	 */
4672 	if (!adapter->num_msix_roce_vec)
4673 		be_msix_disable(adapter);
4674 
4675 	be_clear_queues(adapter);
4676 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4677 	if (status)
4678 		return status;
4679 
4680 	if (!msix_enabled(adapter)) {
4681 		status = be_msix_enable(adapter);
4682 		if (status)
4683 			return status;
4684 	}
4685 
4686 	status = be_if_create(adapter);
4687 	if (status)
4688 		return status;
4689 
4690 	status = be_setup_queues(adapter);
4691 	if (status)
4692 		return status;
4693 
4694 	be_schedule_worker(adapter);
4695 
4696 	if (netif_running(netdev))
4697 		status = be_open(netdev);
4698 
4699 	return status;
4700 }
4701 
4702 static inline int fw_major_num(const char *fw_ver)
4703 {
4704 	int fw_major = 0, i;
4705 
4706 	i = sscanf(fw_ver, "%d.", &fw_major);
4707 	if (i != 1)
4708 		return 0;
4709 
4710 	return fw_major;
4711 }
4712 
4713 /* If it is error recovery, FLR the PF
4714  * Else if any VFs are already enabled don't FLR the PF
4715  */
4716 static bool be_reset_required(struct be_adapter *adapter)
4717 {
4718 	if (be_error_recovering(adapter))
4719 		return true;
4720 	else
4721 		return pci_num_vf(adapter->pdev) == 0;
4722 }
4723 
4724 /* Wait for the FW to be ready and perform the required initialization */
4725 static int be_func_init(struct be_adapter *adapter)
4726 {
4727 	int status;
4728 
4729 	status = be_fw_wait_ready(adapter);
4730 	if (status)
4731 		return status;
4732 
4733 	/* FW is now ready; clear errors to allow cmds/doorbell */
4734 	be_clear_error(adapter, BE_CLEAR_ALL);
4735 
4736 	if (be_reset_required(adapter)) {
4737 		status = be_cmd_reset_function(adapter);
4738 		if (status)
4739 			return status;
4740 
4741 		/* Wait for interrupts to quiesce after an FLR */
4742 		msleep(100);
4743 	}
4744 
4745 	/* Tell FW we're ready to fire cmds */
4746 	status = be_cmd_fw_init(adapter);
4747 	if (status)
4748 		return status;
4749 
4750 	/* Allow interrupts for other ULPs running on NIC function */
4751 	be_intr_set(adapter, true);
4752 
4753 	return 0;
4754 }
4755 
4756 static int be_setup(struct be_adapter *adapter)
4757 {
4758 	struct device *dev = &adapter->pdev->dev;
4759 	int status;
4760 
4761 	status = be_func_init(adapter);
4762 	if (status)
4763 		return status;
4764 
4765 	be_setup_init(adapter);
4766 
4767 	if (!lancer_chip(adapter))
4768 		be_cmd_req_native_mode(adapter);
4769 
4770 	/* invoke this cmd first to get pf_num and vf_num which are needed
4771 	 * for issuing profile related cmds
4772 	 */
4773 	if (!BEx_chip(adapter)) {
4774 		status = be_cmd_get_func_config(adapter, NULL);
4775 		if (status)
4776 			return status;
4777 	}
4778 
4779 	status = be_get_config(adapter);
4780 	if (status)
4781 		goto err;
4782 
4783 	if (!BE2_chip(adapter) && be_physfn(adapter))
4784 		be_alloc_sriov_res(adapter);
4785 
4786 	status = be_get_resources(adapter);
4787 	if (status)
4788 		goto err;
4789 
4790 	status = be_msix_enable(adapter);
4791 	if (status)
4792 		goto err;
4793 
4794 	/* will enable all the needed filter flags in be_open() */
4795 	status = be_if_create(adapter);
4796 	if (status)
4797 		goto err;
4798 
4799 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4800 	rtnl_lock();
4801 	status = be_setup_queues(adapter);
4802 	rtnl_unlock();
4803 	if (status)
4804 		goto err;
4805 
4806 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4807 
4808 	status = be_mac_setup(adapter);
4809 	if (status)
4810 		goto err;
4811 
4812 	be_cmd_get_fw_ver(adapter);
4813 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4814 
4815 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4816 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4817 			adapter->fw_ver);
4818 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4819 	}
4820 
4821 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4822 					 adapter->rx_fc);
4823 	if (status)
4824 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4825 					&adapter->rx_fc);
4826 
4827 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4828 		 adapter->tx_fc, adapter->rx_fc);
4829 
4830 	if (be_physfn(adapter))
4831 		be_cmd_set_logical_link_config(adapter,
4832 					       IFLA_VF_LINK_STATE_AUTO, 0);
4833 
4834 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4835 	 * confusing a linux bridge or OVS that it might be connected to.
4836 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4837 	 * when SRIOV is not enabled.
4838 	 */
4839 	if (BE3_chip(adapter))
4840 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4841 				      PORT_FWD_TYPE_PASSTHRU, 0);
4842 
4843 	if (adapter->num_vfs)
4844 		be_vf_setup(adapter);
4845 
4846 	status = be_cmd_get_phy_info(adapter);
4847 	if (!status && be_pause_supported(adapter))
4848 		adapter->phy.fc_autoneg = 1;
4849 
4850 	if (be_physfn(adapter) && !lancer_chip(adapter))
4851 		be_cmd_set_features(adapter);
4852 
4853 	be_schedule_worker(adapter);
4854 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4855 	return 0;
4856 err:
4857 	be_clear(adapter);
4858 	return status;
4859 }
4860 
4861 #ifdef CONFIG_NET_POLL_CONTROLLER
4862 static void be_netpoll(struct net_device *netdev)
4863 {
4864 	struct be_adapter *adapter = netdev_priv(netdev);
4865 	struct be_eq_obj *eqo;
4866 	int i;
4867 
4868 	for_all_evt_queues(adapter, eqo, i) {
4869 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4870 		napi_schedule(&eqo->napi);
4871 	}
4872 }
4873 #endif
4874 
4875 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4876 {
4877 	const struct firmware *fw;
4878 	int status;
4879 
4880 	if (!netif_running(adapter->netdev)) {
4881 		dev_err(&adapter->pdev->dev,
4882 			"Firmware load not allowed (interface is down)\n");
4883 		return -ENETDOWN;
4884 	}
4885 
4886 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4887 	if (status)
4888 		goto fw_exit;
4889 
4890 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4891 
4892 	if (lancer_chip(adapter))
4893 		status = lancer_fw_download(adapter, fw);
4894 	else
4895 		status = be_fw_download(adapter, fw);
4896 
4897 	if (!status)
4898 		be_cmd_get_fw_ver(adapter);
4899 
4900 fw_exit:
4901 	release_firmware(fw);
4902 	return status;
4903 }
4904 
4905 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4906 				 u16 flags)
4907 {
4908 	struct be_adapter *adapter = netdev_priv(dev);
4909 	struct nlattr *attr, *br_spec;
4910 	int rem;
4911 	int status = 0;
4912 	u16 mode = 0;
4913 
4914 	if (!sriov_enabled(adapter))
4915 		return -EOPNOTSUPP;
4916 
4917 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4918 	if (!br_spec)
4919 		return -EINVAL;
4920 
4921 	nla_for_each_nested(attr, br_spec, rem) {
4922 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4923 			continue;
4924 
4925 		if (nla_len(attr) < sizeof(mode))
4926 			return -EINVAL;
4927 
4928 		mode = nla_get_u16(attr);
4929 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4930 			return -EOPNOTSUPP;
4931 
4932 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4933 			return -EINVAL;
4934 
4935 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4936 					       adapter->if_handle,
4937 					       mode == BRIDGE_MODE_VEPA ?
4938 					       PORT_FWD_TYPE_VEPA :
4939 					       PORT_FWD_TYPE_VEB, 0);
4940 		if (status)
4941 			goto err;
4942 
4943 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4944 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4945 
4946 		return status;
4947 	}
4948 err:
4949 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4950 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4951 
4952 	return status;
4953 }
4954 
4955 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4956 				 struct net_device *dev, u32 filter_mask,
4957 				 int nlflags)
4958 {
4959 	struct be_adapter *adapter = netdev_priv(dev);
4960 	int status = 0;
4961 	u8 hsw_mode;
4962 
4963 	/* BE and Lancer chips support VEB mode only */
4964 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4965 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4966 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4967 			return 0;
4968 		hsw_mode = PORT_FWD_TYPE_VEB;
4969 	} else {
4970 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4971 					       adapter->if_handle, &hsw_mode,
4972 					       NULL);
4973 		if (status)
4974 			return 0;
4975 
4976 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4977 			return 0;
4978 	}
4979 
4980 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4981 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4982 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4983 				       0, 0, nlflags, filter_mask, NULL);
4984 }
4985 
4986 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4987 					 void (*func)(struct work_struct *))
4988 {
4989 	struct be_cmd_work *work;
4990 
4991 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4992 	if (!work) {
4993 		dev_err(&adapter->pdev->dev,
4994 			"be_work memory allocation failed\n");
4995 		return NULL;
4996 	}
4997 
4998 	INIT_WORK(&work->work, func);
4999 	work->adapter = adapter;
5000 	return work;
5001 }
5002 
5003 /* VxLAN offload Notes:
5004  *
5005  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5006  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5007  * is expected to work across all types of IP tunnels once exported. Skyhawk
5008  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5009  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5010  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5011  * those other tunnels are unexported on the fly through ndo_features_check().
5012  *
5013  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5014  * adds more than one port, disable offloads and don't re-enable them again
5015  * until after all the tunnels are removed.
5016  */
5017 static void be_work_add_vxlan_port(struct work_struct *work)
5018 {
5019 	struct be_cmd_work *cmd_work =
5020 				container_of(work, struct be_cmd_work, work);
5021 	struct be_adapter *adapter = cmd_work->adapter;
5022 	struct net_device *netdev = adapter->netdev;
5023 	struct device *dev = &adapter->pdev->dev;
5024 	__be16 port = cmd_work->info.vxlan_port;
5025 	int status;
5026 
5027 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5028 		adapter->vxlan_port_aliases++;
5029 		goto done;
5030 	}
5031 
5032 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5033 		dev_info(dev,
5034 			 "Only one UDP port supported for VxLAN offloads\n");
5035 		dev_info(dev, "Disabling VxLAN offloads\n");
5036 		adapter->vxlan_port_count++;
5037 		goto err;
5038 	}
5039 
5040 	if (adapter->vxlan_port_count++ >= 1)
5041 		goto done;
5042 
5043 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
5044 				     OP_CONVERT_NORMAL_TO_TUNNEL);
5045 	if (status) {
5046 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5047 		goto err;
5048 	}
5049 
5050 	status = be_cmd_set_vxlan_port(adapter, port);
5051 	if (status) {
5052 		dev_warn(dev, "Failed to add VxLAN port\n");
5053 		goto err;
5054 	}
5055 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5056 	adapter->vxlan_port = port;
5057 
5058 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5059 				   NETIF_F_TSO | NETIF_F_TSO6 |
5060 				   NETIF_F_GSO_UDP_TUNNEL;
5061 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5062 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5063 
5064 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5065 		 be16_to_cpu(port));
5066 	goto done;
5067 err:
5068 	be_disable_vxlan_offloads(adapter);
5069 done:
5070 	kfree(cmd_work);
5071 }
5072 
5073 static void be_work_del_vxlan_port(struct work_struct *work)
5074 {
5075 	struct be_cmd_work *cmd_work =
5076 				container_of(work, struct be_cmd_work, work);
5077 	struct be_adapter *adapter = cmd_work->adapter;
5078 	__be16 port = cmd_work->info.vxlan_port;
5079 
5080 	if (adapter->vxlan_port != port)
5081 		goto done;
5082 
5083 	if (adapter->vxlan_port_aliases) {
5084 		adapter->vxlan_port_aliases--;
5085 		goto out;
5086 	}
5087 
5088 	be_disable_vxlan_offloads(adapter);
5089 
5090 	dev_info(&adapter->pdev->dev,
5091 		 "Disabled VxLAN offloads for UDP port %d\n",
5092 		 be16_to_cpu(port));
5093 done:
5094 	adapter->vxlan_port_count--;
5095 out:
5096 	kfree(cmd_work);
5097 }
5098 
5099 static void be_cfg_vxlan_port(struct net_device *netdev,
5100 			      struct udp_tunnel_info *ti,
5101 			      void (*func)(struct work_struct *))
5102 {
5103 	struct be_adapter *adapter = netdev_priv(netdev);
5104 	struct be_cmd_work *cmd_work;
5105 
5106 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5107 		return;
5108 
5109 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5110 		return;
5111 
5112 	cmd_work = be_alloc_work(adapter, func);
5113 	if (cmd_work) {
5114 		cmd_work->info.vxlan_port = ti->port;
5115 		queue_work(be_wq, &cmd_work->work);
5116 	}
5117 }
5118 
5119 static void be_del_vxlan_port(struct net_device *netdev,
5120 			      struct udp_tunnel_info *ti)
5121 {
5122 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5123 }
5124 
5125 static void be_add_vxlan_port(struct net_device *netdev,
5126 			      struct udp_tunnel_info *ti)
5127 {
5128 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5129 }
5130 
5131 static netdev_features_t be_features_check(struct sk_buff *skb,
5132 					   struct net_device *dev,
5133 					   netdev_features_t features)
5134 {
5135 	struct be_adapter *adapter = netdev_priv(dev);
5136 	u8 l4_hdr = 0;
5137 
5138 	/* The code below restricts offload features for some tunneled packets.
5139 	 * Offload features for normal (non tunnel) packets are unchanged.
5140 	 */
5141 	if (!skb->encapsulation ||
5142 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5143 		return features;
5144 
5145 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5146 	 * should disable tunnel offload features if it's not a VxLAN packet,
5147 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5148 	 * allow other tunneled traffic like GRE work fine while VxLAN
5149 	 * offloads are configured in Skyhawk-R.
5150 	 */
5151 	switch (vlan_get_protocol(skb)) {
5152 	case htons(ETH_P_IP):
5153 		l4_hdr = ip_hdr(skb)->protocol;
5154 		break;
5155 	case htons(ETH_P_IPV6):
5156 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5157 		break;
5158 	default:
5159 		return features;
5160 	}
5161 
5162 	if (l4_hdr != IPPROTO_UDP ||
5163 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5164 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5165 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5166 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5167 	    !adapter->vxlan_port ||
5168 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5169 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5170 
5171 	return features;
5172 }
5173 
5174 static int be_get_phys_port_id(struct net_device *dev,
5175 			       struct netdev_phys_item_id *ppid)
5176 {
5177 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5178 	struct be_adapter *adapter = netdev_priv(dev);
5179 	u8 *id;
5180 
5181 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5182 		return -ENOSPC;
5183 
5184 	ppid->id[0] = adapter->hba_port_num + 1;
5185 	id = &ppid->id[1];
5186 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5187 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5188 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5189 
5190 	ppid->id_len = id_len;
5191 
5192 	return 0;
5193 }
5194 
5195 static void be_set_rx_mode(struct net_device *dev)
5196 {
5197 	struct be_adapter *adapter = netdev_priv(dev);
5198 	struct be_cmd_work *work;
5199 
5200 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5201 	if (work)
5202 		queue_work(be_wq, &work->work);
5203 }
5204 
5205 static const struct net_device_ops be_netdev_ops = {
5206 	.ndo_open		= be_open,
5207 	.ndo_stop		= be_close,
5208 	.ndo_start_xmit		= be_xmit,
5209 	.ndo_set_rx_mode	= be_set_rx_mode,
5210 	.ndo_set_mac_address	= be_mac_addr_set,
5211 	.ndo_get_stats64	= be_get_stats64,
5212 	.ndo_validate_addr	= eth_validate_addr,
5213 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5214 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5215 	.ndo_set_vf_mac		= be_set_vf_mac,
5216 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5217 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5218 	.ndo_get_vf_config	= be_get_vf_config,
5219 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5220 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5221 #ifdef CONFIG_NET_POLL_CONTROLLER
5222 	.ndo_poll_controller	= be_netpoll,
5223 #endif
5224 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5225 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5226 #ifdef CONFIG_NET_RX_BUSY_POLL
5227 	.ndo_busy_poll		= be_busy_poll,
5228 #endif
5229 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5230 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5231 	.ndo_features_check	= be_features_check,
5232 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5233 };
5234 
5235 static void be_netdev_init(struct net_device *netdev)
5236 {
5237 	struct be_adapter *adapter = netdev_priv(netdev);
5238 
5239 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5240 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5241 		NETIF_F_HW_VLAN_CTAG_TX;
5242 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5243 		netdev->hw_features |= NETIF_F_RXHASH;
5244 
5245 	netdev->features |= netdev->hw_features |
5246 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5247 
5248 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5249 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5250 
5251 	netdev->priv_flags |= IFF_UNICAST_FLT;
5252 
5253 	netdev->flags |= IFF_MULTICAST;
5254 
5255 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5256 
5257 	netdev->netdev_ops = &be_netdev_ops;
5258 
5259 	netdev->ethtool_ops = &be_ethtool_ops;
5260 
5261 	/* MTU range: 256 - 9000 */
5262 	netdev->min_mtu = BE_MIN_MTU;
5263 	netdev->max_mtu = BE_MAX_MTU;
5264 }
5265 
5266 static void be_cleanup(struct be_adapter *adapter)
5267 {
5268 	struct net_device *netdev = adapter->netdev;
5269 
5270 	rtnl_lock();
5271 	netif_device_detach(netdev);
5272 	if (netif_running(netdev))
5273 		be_close(netdev);
5274 	rtnl_unlock();
5275 
5276 	be_clear(adapter);
5277 }
5278 
5279 static int be_resume(struct be_adapter *adapter)
5280 {
5281 	struct net_device *netdev = adapter->netdev;
5282 	int status;
5283 
5284 	status = be_setup(adapter);
5285 	if (status)
5286 		return status;
5287 
5288 	rtnl_lock();
5289 	if (netif_running(netdev))
5290 		status = be_open(netdev);
5291 	rtnl_unlock();
5292 
5293 	if (status)
5294 		return status;
5295 
5296 	netif_device_attach(netdev);
5297 
5298 	return 0;
5299 }
5300 
5301 static void be_soft_reset(struct be_adapter *adapter)
5302 {
5303 	u32 val;
5304 
5305 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5306 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5307 	val |= SLIPORT_SOFTRESET_SR_MASK;
5308 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5309 }
5310 
5311 static bool be_err_is_recoverable(struct be_adapter *adapter)
5312 {
5313 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5314 	unsigned long initial_idle_time =
5315 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5316 	unsigned long recovery_interval =
5317 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5318 	u16 ue_err_code;
5319 	u32 val;
5320 
5321 	val = be_POST_stage_get(adapter);
5322 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5323 		return false;
5324 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5325 	if (ue_err_code == 0)
5326 		return false;
5327 
5328 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5329 		ue_err_code);
5330 
5331 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5332 		dev_err(&adapter->pdev->dev,
5333 			"Cannot recover within %lu sec from driver load\n",
5334 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5335 		return false;
5336 	}
5337 
5338 	if (err_rec->last_recovery_time &&
5339 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5340 		dev_err(&adapter->pdev->dev,
5341 			"Cannot recover within %lu sec from last recovery\n",
5342 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5343 		return false;
5344 	}
5345 
5346 	if (ue_err_code == err_rec->last_err_code) {
5347 		dev_err(&adapter->pdev->dev,
5348 			"Cannot recover from a consecutive TPE error\n");
5349 		return false;
5350 	}
5351 
5352 	err_rec->last_recovery_time = jiffies;
5353 	err_rec->last_err_code = ue_err_code;
5354 	return true;
5355 }
5356 
5357 static int be_tpe_recover(struct be_adapter *adapter)
5358 {
5359 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5360 	int status = -EAGAIN;
5361 	u32 val;
5362 
5363 	switch (err_rec->recovery_state) {
5364 	case ERR_RECOVERY_ST_NONE:
5365 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5366 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5367 		break;
5368 
5369 	case ERR_RECOVERY_ST_DETECT:
5370 		val = be_POST_stage_get(adapter);
5371 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5372 		    POST_STAGE_RECOVERABLE_ERR) {
5373 			dev_err(&adapter->pdev->dev,
5374 				"Unrecoverable HW error detected: 0x%x\n", val);
5375 			status = -EINVAL;
5376 			err_rec->resched_delay = 0;
5377 			break;
5378 		}
5379 
5380 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5381 
5382 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5383 		 * milliseconds before it checks for final error status in
5384 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5385 		 * If it does, then PF0 initiates a Soft Reset.
5386 		 */
5387 		if (adapter->pf_num == 0) {
5388 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5389 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5390 					ERR_RECOVERY_UE_DETECT_DURATION;
5391 			break;
5392 		}
5393 
5394 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5395 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5396 					ERR_RECOVERY_UE_DETECT_DURATION;
5397 		break;
5398 
5399 	case ERR_RECOVERY_ST_RESET:
5400 		if (!be_err_is_recoverable(adapter)) {
5401 			dev_err(&adapter->pdev->dev,
5402 				"Failed to meet recovery criteria\n");
5403 			status = -EIO;
5404 			err_rec->resched_delay = 0;
5405 			break;
5406 		}
5407 		be_soft_reset(adapter);
5408 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5409 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5410 					err_rec->ue_to_reset_time;
5411 		break;
5412 
5413 	case ERR_RECOVERY_ST_PRE_POLL:
5414 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5415 		err_rec->resched_delay = 0;
5416 		status = 0;			/* done */
5417 		break;
5418 
5419 	default:
5420 		status = -EINVAL;
5421 		err_rec->resched_delay = 0;
5422 		break;
5423 	}
5424 
5425 	return status;
5426 }
5427 
5428 static int be_err_recover(struct be_adapter *adapter)
5429 {
5430 	int status;
5431 
5432 	if (!lancer_chip(adapter)) {
5433 		if (!adapter->error_recovery.recovery_supported ||
5434 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5435 			return -EIO;
5436 		status = be_tpe_recover(adapter);
5437 		if (status)
5438 			goto err;
5439 	}
5440 
5441 	/* Wait for adapter to reach quiescent state before
5442 	 * destroying queues
5443 	 */
5444 	status = be_fw_wait_ready(adapter);
5445 	if (status)
5446 		goto err;
5447 
5448 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5449 
5450 	be_cleanup(adapter);
5451 
5452 	status = be_resume(adapter);
5453 	if (status)
5454 		goto err;
5455 
5456 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5457 
5458 err:
5459 	return status;
5460 }
5461 
5462 static void be_err_detection_task(struct work_struct *work)
5463 {
5464 	struct be_error_recovery *err_rec =
5465 			container_of(work, struct be_error_recovery,
5466 				     err_detection_work.work);
5467 	struct be_adapter *adapter =
5468 			container_of(err_rec, struct be_adapter,
5469 				     error_recovery);
5470 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5471 	struct device *dev = &adapter->pdev->dev;
5472 	int recovery_status;
5473 
5474 	be_detect_error(adapter);
5475 	if (!be_check_error(adapter, BE_ERROR_HW))
5476 		goto reschedule_task;
5477 
5478 	recovery_status = be_err_recover(adapter);
5479 	if (!recovery_status) {
5480 		err_rec->recovery_retries = 0;
5481 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5482 		dev_info(dev, "Adapter recovery successful\n");
5483 		goto reschedule_task;
5484 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5485 		/* BEx/SH recovery state machine */
5486 		if (adapter->pf_num == 0 &&
5487 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5488 			dev_err(&adapter->pdev->dev,
5489 				"Adapter recovery in progress\n");
5490 		resched_delay = err_rec->resched_delay;
5491 		goto reschedule_task;
5492 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5493 		/* For VFs, check if PF have allocated resources
5494 		 * every second.
5495 		 */
5496 		dev_err(dev, "Re-trying adapter recovery\n");
5497 		goto reschedule_task;
5498 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5499 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5500 		/* In case of another error during recovery, it takes 30 sec
5501 		 * for adapter to come out of error. Retry error recovery after
5502 		 * this time interval.
5503 		 */
5504 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5505 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5506 		goto reschedule_task;
5507 	} else {
5508 		dev_err(dev, "Adapter recovery failed\n");
5509 		dev_err(dev, "Please reboot server to recover\n");
5510 	}
5511 
5512 	return;
5513 
5514 reschedule_task:
5515 	be_schedule_err_detection(adapter, resched_delay);
5516 }
5517 
5518 static void be_log_sfp_info(struct be_adapter *adapter)
5519 {
5520 	int status;
5521 
5522 	status = be_cmd_query_sfp_info(adapter);
5523 	if (!status) {
5524 		dev_err(&adapter->pdev->dev,
5525 			"Port %c: %s Vendor: %s part no: %s",
5526 			adapter->port_name,
5527 			be_misconfig_evt_port_state[adapter->phy_state],
5528 			adapter->phy.vendor_name,
5529 			adapter->phy.vendor_pn);
5530 	}
5531 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5532 }
5533 
5534 static void be_worker(struct work_struct *work)
5535 {
5536 	struct be_adapter *adapter =
5537 		container_of(work, struct be_adapter, work.work);
5538 	struct be_rx_obj *rxo;
5539 	int i;
5540 
5541 	if (be_physfn(adapter) &&
5542 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5543 		be_cmd_get_die_temperature(adapter);
5544 
5545 	/* when interrupts are not yet enabled, just reap any pending
5546 	 * mcc completions
5547 	 */
5548 	if (!netif_running(adapter->netdev)) {
5549 		local_bh_disable();
5550 		be_process_mcc(adapter);
5551 		local_bh_enable();
5552 		goto reschedule;
5553 	}
5554 
5555 	if (!adapter->stats_cmd_sent) {
5556 		if (lancer_chip(adapter))
5557 			lancer_cmd_get_pport_stats(adapter,
5558 						   &adapter->stats_cmd);
5559 		else
5560 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5561 	}
5562 
5563 	for_all_rx_queues(adapter, rxo, i) {
5564 		/* Replenish RX-queues starved due to memory
5565 		 * allocation failures.
5566 		 */
5567 		if (rxo->rx_post_starved)
5568 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5569 	}
5570 
5571 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5572 	if (!skyhawk_chip(adapter))
5573 		be_eqd_update(adapter, false);
5574 
5575 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5576 		be_log_sfp_info(adapter);
5577 
5578 reschedule:
5579 	adapter->work_counter++;
5580 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5581 }
5582 
5583 static void be_unmap_pci_bars(struct be_adapter *adapter)
5584 {
5585 	if (adapter->csr)
5586 		pci_iounmap(adapter->pdev, adapter->csr);
5587 	if (adapter->db)
5588 		pci_iounmap(adapter->pdev, adapter->db);
5589 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5590 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5591 }
5592 
5593 static int db_bar(struct be_adapter *adapter)
5594 {
5595 	if (lancer_chip(adapter) || be_virtfn(adapter))
5596 		return 0;
5597 	else
5598 		return 4;
5599 }
5600 
5601 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5602 {
5603 	if (skyhawk_chip(adapter)) {
5604 		adapter->roce_db.size = 4096;
5605 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5606 							      db_bar(adapter));
5607 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5608 							       db_bar(adapter));
5609 	}
5610 	return 0;
5611 }
5612 
5613 static int be_map_pci_bars(struct be_adapter *adapter)
5614 {
5615 	struct pci_dev *pdev = adapter->pdev;
5616 	u8 __iomem *addr;
5617 	u32 sli_intf;
5618 
5619 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5620 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5621 				SLI_INTF_FAMILY_SHIFT;
5622 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5623 
5624 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5625 		adapter->csr = pci_iomap(pdev, 2, 0);
5626 		if (!adapter->csr)
5627 			return -ENOMEM;
5628 	}
5629 
5630 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5631 	if (!addr)
5632 		goto pci_map_err;
5633 	adapter->db = addr;
5634 
5635 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5636 		if (be_physfn(adapter)) {
5637 			/* PCICFG is the 2nd BAR in BE2 */
5638 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5639 			if (!addr)
5640 				goto pci_map_err;
5641 			adapter->pcicfg = addr;
5642 			adapter->pcicfg_mapped = true;
5643 		} else {
5644 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5645 			adapter->pcicfg_mapped = false;
5646 		}
5647 	}
5648 
5649 	be_roce_map_pci_bars(adapter);
5650 	return 0;
5651 
5652 pci_map_err:
5653 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5654 	be_unmap_pci_bars(adapter);
5655 	return -ENOMEM;
5656 }
5657 
5658 static void be_drv_cleanup(struct be_adapter *adapter)
5659 {
5660 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5661 	struct device *dev = &adapter->pdev->dev;
5662 
5663 	if (mem->va)
5664 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5665 
5666 	mem = &adapter->rx_filter;
5667 	if (mem->va)
5668 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5669 
5670 	mem = &adapter->stats_cmd;
5671 	if (mem->va)
5672 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5673 }
5674 
5675 /* Allocate and initialize various fields in be_adapter struct */
5676 static int be_drv_init(struct be_adapter *adapter)
5677 {
5678 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5679 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5680 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5681 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5682 	struct device *dev = &adapter->pdev->dev;
5683 	int status = 0;
5684 
5685 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5686 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5687 						 &mbox_mem_alloc->dma,
5688 						 GFP_KERNEL);
5689 	if (!mbox_mem_alloc->va)
5690 		return -ENOMEM;
5691 
5692 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5693 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5694 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5695 
5696 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5697 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5698 					    &rx_filter->dma, GFP_KERNEL);
5699 	if (!rx_filter->va) {
5700 		status = -ENOMEM;
5701 		goto free_mbox;
5702 	}
5703 
5704 	if (lancer_chip(adapter))
5705 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5706 	else if (BE2_chip(adapter))
5707 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5708 	else if (BE3_chip(adapter))
5709 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5710 	else
5711 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5712 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5713 					    &stats_cmd->dma, GFP_KERNEL);
5714 	if (!stats_cmd->va) {
5715 		status = -ENOMEM;
5716 		goto free_rx_filter;
5717 	}
5718 
5719 	mutex_init(&adapter->mbox_lock);
5720 	mutex_init(&adapter->mcc_lock);
5721 	mutex_init(&adapter->rx_filter_lock);
5722 	spin_lock_init(&adapter->mcc_cq_lock);
5723 	init_completion(&adapter->et_cmd_compl);
5724 
5725 	pci_save_state(adapter->pdev);
5726 
5727 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5728 
5729 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5730 	adapter->error_recovery.resched_delay = 0;
5731 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5732 			  be_err_detection_task);
5733 
5734 	adapter->rx_fc = true;
5735 	adapter->tx_fc = true;
5736 
5737 	/* Must be a power of 2 or else MODULO will BUG_ON */
5738 	adapter->be_get_temp_freq = 64;
5739 
5740 	return 0;
5741 
5742 free_rx_filter:
5743 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5744 free_mbox:
5745 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5746 			  mbox_mem_alloc->dma);
5747 	return status;
5748 }
5749 
5750 static void be_remove(struct pci_dev *pdev)
5751 {
5752 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5753 
5754 	if (!adapter)
5755 		return;
5756 
5757 	be_roce_dev_remove(adapter);
5758 	be_intr_set(adapter, false);
5759 
5760 	be_cancel_err_detection(adapter);
5761 
5762 	unregister_netdev(adapter->netdev);
5763 
5764 	be_clear(adapter);
5765 
5766 	if (!pci_vfs_assigned(adapter->pdev))
5767 		be_cmd_reset_function(adapter);
5768 
5769 	/* tell fw we're done with firing cmds */
5770 	be_cmd_fw_clean(adapter);
5771 
5772 	be_unmap_pci_bars(adapter);
5773 	be_drv_cleanup(adapter);
5774 
5775 	pci_disable_pcie_error_reporting(pdev);
5776 
5777 	pci_release_regions(pdev);
5778 	pci_disable_device(pdev);
5779 
5780 	free_netdev(adapter->netdev);
5781 }
5782 
5783 static ssize_t be_hwmon_show_temp(struct device *dev,
5784 				  struct device_attribute *dev_attr,
5785 				  char *buf)
5786 {
5787 	struct be_adapter *adapter = dev_get_drvdata(dev);
5788 
5789 	/* Unit: millidegree Celsius */
5790 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5791 		return -EIO;
5792 	else
5793 		return sprintf(buf, "%u\n",
5794 			       adapter->hwmon_info.be_on_die_temp * 1000);
5795 }
5796 
5797 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5798 			  be_hwmon_show_temp, NULL, 1);
5799 
5800 static struct attribute *be_hwmon_attrs[] = {
5801 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5802 	NULL
5803 };
5804 
5805 ATTRIBUTE_GROUPS(be_hwmon);
5806 
5807 static char *mc_name(struct be_adapter *adapter)
5808 {
5809 	char *str = "";	/* default */
5810 
5811 	switch (adapter->mc_type) {
5812 	case UMC:
5813 		str = "UMC";
5814 		break;
5815 	case FLEX10:
5816 		str = "FLEX10";
5817 		break;
5818 	case vNIC1:
5819 		str = "vNIC-1";
5820 		break;
5821 	case nPAR:
5822 		str = "nPAR";
5823 		break;
5824 	case UFP:
5825 		str = "UFP";
5826 		break;
5827 	case vNIC2:
5828 		str = "vNIC-2";
5829 		break;
5830 	default:
5831 		str = "";
5832 	}
5833 
5834 	return str;
5835 }
5836 
5837 static inline char *func_name(struct be_adapter *adapter)
5838 {
5839 	return be_physfn(adapter) ? "PF" : "VF";
5840 }
5841 
5842 static inline char *nic_name(struct pci_dev *pdev)
5843 {
5844 	switch (pdev->device) {
5845 	case OC_DEVICE_ID1:
5846 		return OC_NAME;
5847 	case OC_DEVICE_ID2:
5848 		return OC_NAME_BE;
5849 	case OC_DEVICE_ID3:
5850 	case OC_DEVICE_ID4:
5851 		return OC_NAME_LANCER;
5852 	case BE_DEVICE_ID2:
5853 		return BE3_NAME;
5854 	case OC_DEVICE_ID5:
5855 	case OC_DEVICE_ID6:
5856 		return OC_NAME_SH;
5857 	default:
5858 		return BE_NAME;
5859 	}
5860 }
5861 
5862 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5863 {
5864 	struct be_adapter *adapter;
5865 	struct net_device *netdev;
5866 	int status = 0;
5867 
5868 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5869 
5870 	status = pci_enable_device(pdev);
5871 	if (status)
5872 		goto do_none;
5873 
5874 	status = pci_request_regions(pdev, DRV_NAME);
5875 	if (status)
5876 		goto disable_dev;
5877 	pci_set_master(pdev);
5878 
5879 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5880 	if (!netdev) {
5881 		status = -ENOMEM;
5882 		goto rel_reg;
5883 	}
5884 	adapter = netdev_priv(netdev);
5885 	adapter->pdev = pdev;
5886 	pci_set_drvdata(pdev, adapter);
5887 	adapter->netdev = netdev;
5888 	SET_NETDEV_DEV(netdev, &pdev->dev);
5889 
5890 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5891 	if (!status) {
5892 		netdev->features |= NETIF_F_HIGHDMA;
5893 	} else {
5894 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5895 		if (status) {
5896 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5897 			goto free_netdev;
5898 		}
5899 	}
5900 
5901 	status = pci_enable_pcie_error_reporting(pdev);
5902 	if (!status)
5903 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5904 
5905 	status = be_map_pci_bars(adapter);
5906 	if (status)
5907 		goto free_netdev;
5908 
5909 	status = be_drv_init(adapter);
5910 	if (status)
5911 		goto unmap_bars;
5912 
5913 	status = be_setup(adapter);
5914 	if (status)
5915 		goto drv_cleanup;
5916 
5917 	be_netdev_init(netdev);
5918 	status = register_netdev(netdev);
5919 	if (status != 0)
5920 		goto unsetup;
5921 
5922 	be_roce_dev_add(adapter);
5923 
5924 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5925 	adapter->error_recovery.probe_time = jiffies;
5926 
5927 	/* On Die temperature not supported for VF. */
5928 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5929 		adapter->hwmon_info.hwmon_dev =
5930 			devm_hwmon_device_register_with_groups(&pdev->dev,
5931 							       DRV_NAME,
5932 							       adapter,
5933 							       be_hwmon_groups);
5934 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5935 	}
5936 
5937 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5938 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5939 
5940 	return 0;
5941 
5942 unsetup:
5943 	be_clear(adapter);
5944 drv_cleanup:
5945 	be_drv_cleanup(adapter);
5946 unmap_bars:
5947 	be_unmap_pci_bars(adapter);
5948 free_netdev:
5949 	free_netdev(netdev);
5950 rel_reg:
5951 	pci_release_regions(pdev);
5952 disable_dev:
5953 	pci_disable_device(pdev);
5954 do_none:
5955 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5956 	return status;
5957 }
5958 
5959 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5960 {
5961 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5962 
5963 	be_intr_set(adapter, false);
5964 	be_cancel_err_detection(adapter);
5965 
5966 	be_cleanup(adapter);
5967 
5968 	pci_save_state(pdev);
5969 	pci_disable_device(pdev);
5970 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5971 	return 0;
5972 }
5973 
5974 static int be_pci_resume(struct pci_dev *pdev)
5975 {
5976 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5977 	int status = 0;
5978 
5979 	status = pci_enable_device(pdev);
5980 	if (status)
5981 		return status;
5982 
5983 	pci_restore_state(pdev);
5984 
5985 	status = be_resume(adapter);
5986 	if (status)
5987 		return status;
5988 
5989 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5990 
5991 	return 0;
5992 }
5993 
5994 /*
5995  * An FLR will stop BE from DMAing any data.
5996  */
5997 static void be_shutdown(struct pci_dev *pdev)
5998 {
5999 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6000 
6001 	if (!adapter)
6002 		return;
6003 
6004 	be_roce_dev_shutdown(adapter);
6005 	cancel_delayed_work_sync(&adapter->work);
6006 	be_cancel_err_detection(adapter);
6007 
6008 	netif_device_detach(adapter->netdev);
6009 
6010 	be_cmd_reset_function(adapter);
6011 
6012 	pci_disable_device(pdev);
6013 }
6014 
6015 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6016 					    pci_channel_state_t state)
6017 {
6018 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6019 
6020 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6021 
6022 	be_roce_dev_remove(adapter);
6023 
6024 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6025 		be_set_error(adapter, BE_ERROR_EEH);
6026 
6027 		be_cancel_err_detection(adapter);
6028 
6029 		be_cleanup(adapter);
6030 	}
6031 
6032 	if (state == pci_channel_io_perm_failure)
6033 		return PCI_ERS_RESULT_DISCONNECT;
6034 
6035 	pci_disable_device(pdev);
6036 
6037 	/* The error could cause the FW to trigger a flash debug dump.
6038 	 * Resetting the card while flash dump is in progress
6039 	 * can cause it not to recover; wait for it to finish.
6040 	 * Wait only for first function as it is needed only once per
6041 	 * adapter.
6042 	 */
6043 	if (pdev->devfn == 0)
6044 		ssleep(30);
6045 
6046 	return PCI_ERS_RESULT_NEED_RESET;
6047 }
6048 
6049 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6050 {
6051 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6052 	int status;
6053 
6054 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6055 
6056 	status = pci_enable_device(pdev);
6057 	if (status)
6058 		return PCI_ERS_RESULT_DISCONNECT;
6059 
6060 	pci_set_master(pdev);
6061 	pci_restore_state(pdev);
6062 
6063 	/* Check if card is ok and fw is ready */
6064 	dev_info(&adapter->pdev->dev,
6065 		 "Waiting for FW to be ready after EEH reset\n");
6066 	status = be_fw_wait_ready(adapter);
6067 	if (status)
6068 		return PCI_ERS_RESULT_DISCONNECT;
6069 
6070 	pci_cleanup_aer_uncorrect_error_status(pdev);
6071 	be_clear_error(adapter, BE_CLEAR_ALL);
6072 	return PCI_ERS_RESULT_RECOVERED;
6073 }
6074 
6075 static void be_eeh_resume(struct pci_dev *pdev)
6076 {
6077 	int status = 0;
6078 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6079 
6080 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6081 
6082 	pci_save_state(pdev);
6083 
6084 	status = be_resume(adapter);
6085 	if (status)
6086 		goto err;
6087 
6088 	be_roce_dev_add(adapter);
6089 
6090 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6091 	return;
6092 err:
6093 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6094 }
6095 
6096 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6097 {
6098 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6099 	struct be_resources vft_res = {0};
6100 	int status;
6101 
6102 	if (!num_vfs)
6103 		be_vf_clear(adapter);
6104 
6105 	adapter->num_vfs = num_vfs;
6106 
6107 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6108 		dev_warn(&pdev->dev,
6109 			 "Cannot disable VFs while they are assigned\n");
6110 		return -EBUSY;
6111 	}
6112 
6113 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6114 	 * are equally distributed across the max-number of VFs. The user may
6115 	 * request only a subset of the max-vfs to be enabled.
6116 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6117 	 * each VF will have access to more number of resources.
6118 	 * This facility is not available in BE3 FW.
6119 	 * Also, this is done by FW in Lancer chip.
6120 	 */
6121 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6122 		be_calculate_vf_res(adapter, adapter->num_vfs,
6123 				    &vft_res);
6124 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6125 						 adapter->num_vfs, &vft_res);
6126 		if (status)
6127 			dev_err(&pdev->dev,
6128 				"Failed to optimize SR-IOV resources\n");
6129 	}
6130 
6131 	status = be_get_resources(adapter);
6132 	if (status)
6133 		return be_cmd_status(status);
6134 
6135 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6136 	rtnl_lock();
6137 	status = be_update_queues(adapter);
6138 	rtnl_unlock();
6139 	if (status)
6140 		return be_cmd_status(status);
6141 
6142 	if (adapter->num_vfs)
6143 		status = be_vf_setup(adapter);
6144 
6145 	if (!status)
6146 		return adapter->num_vfs;
6147 
6148 	return 0;
6149 }
6150 
6151 static const struct pci_error_handlers be_eeh_handlers = {
6152 	.error_detected = be_eeh_err_detected,
6153 	.slot_reset = be_eeh_reset,
6154 	.resume = be_eeh_resume,
6155 };
6156 
6157 static struct pci_driver be_driver = {
6158 	.name = DRV_NAME,
6159 	.id_table = be_dev_ids,
6160 	.probe = be_probe,
6161 	.remove = be_remove,
6162 	.suspend = be_suspend,
6163 	.resume = be_pci_resume,
6164 	.shutdown = be_shutdown,
6165 	.sriov_configure = be_pci_sriov_configure,
6166 	.err_handler = &be_eeh_handlers
6167 };
6168 
6169 static int __init be_init_module(void)
6170 {
6171 	int status;
6172 
6173 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6174 	    rx_frag_size != 2048) {
6175 		printk(KERN_WARNING DRV_NAME
6176 			" : Module param rx_frag_size must be 2048/4096/8192."
6177 			" Using 2048\n");
6178 		rx_frag_size = 2048;
6179 	}
6180 
6181 	if (num_vfs > 0) {
6182 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6183 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6184 	}
6185 
6186 	be_wq = create_singlethread_workqueue("be_wq");
6187 	if (!be_wq) {
6188 		pr_warn(DRV_NAME "workqueue creation failed\n");
6189 		return -1;
6190 	}
6191 
6192 	be_err_recovery_workq =
6193 		create_singlethread_workqueue("be_err_recover");
6194 	if (!be_err_recovery_workq)
6195 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6196 
6197 	status = pci_register_driver(&be_driver);
6198 	if (status) {
6199 		destroy_workqueue(be_wq);
6200 		be_destroy_err_recovery_workq();
6201 	}
6202 	return status;
6203 }
6204 module_init(be_init_module);
6205 
6206 static void __exit be_exit_module(void)
6207 {
6208 	pci_unregister_driver(&be_driver);
6209 
6210 	be_destroy_err_recovery_workq();
6211 
6212 	if (be_wq)
6213 		destroy_workqueue(be_wq);
6214 }
6215 module_exit(be_exit_module);
6216