1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279 				     mac)) {
280 			/* mac already added, skip addition */
281 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282 			return 0;
283 		}
284 	}
285 
286 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287 			       &adapter->pmac_id[0], 0);
288 }
289 
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292 	int i;
293 
294 	/* Skip deletion if the programmed mac is
295 	 * being used in uc-list
296 	 */
297 	for (i = 0; i < adapter->uc_macs; i++) {
298 		if (adapter->pmac_id[i + 1] == pmac_id)
299 			return;
300 	}
301 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303 
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306 	struct be_adapter *adapter = netdev_priv(netdev);
307 	struct device *dev = &adapter->pdev->dev;
308 	struct sockaddr *addr = p;
309 	int status;
310 	u8 mac[ETH_ALEN];
311 	u32 old_pmac_id = adapter->pmac_id[0];
312 
313 	if (!is_valid_ether_addr(addr->sa_data))
314 		return -EADDRNOTAVAIL;
315 
316 	/* Proceed further only if, User provided MAC is different
317 	 * from active MAC
318 	 */
319 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320 		return 0;
321 
322 	/* if device is not running, copy MAC to netdev->dev_addr */
323 	if (!netif_running(netdev))
324 		goto done;
325 
326 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327 	 * privilege or if PF did not provision the new MAC address.
328 	 * On BE3, this cmd will always fail if the VF doesn't have the
329 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
330 	 * the MAC for the VF.
331 	 */
332 	mutex_lock(&adapter->rx_filter_lock);
333 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334 	if (!status) {
335 
336 		/* Delete the old programmed MAC. This call may fail if the
337 		 * old MAC was already deleted by the PF driver.
338 		 */
339 		if (adapter->pmac_id[0] != old_pmac_id)
340 			be_dev_mac_del(adapter, old_pmac_id);
341 	}
342 
343 	mutex_unlock(&adapter->rx_filter_lock);
344 	/* Decide if the new MAC is successfully activated only after
345 	 * querying the FW
346 	 */
347 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348 				       adapter->if_handle, true, 0);
349 	if (status)
350 		goto err;
351 
352 	/* The MAC change did not happen, either due to lack of privilege
353 	 * or PF didn't pre-provision.
354 	 */
355 	if (!ether_addr_equal(addr->sa_data, mac)) {
356 		status = -EPERM;
357 		goto err;
358 	}
359 done:
360 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
361 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
362 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363 	return 0;
364 err:
365 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366 	return status;
367 }
368 
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372 	if (BE2_chip(adapter)) {
373 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374 
375 		return &cmd->hw_stats;
376 	} else if (BE3_chip(adapter)) {
377 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378 
379 		return &cmd->hw_stats;
380 	} else {
381 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	}
385 }
386 
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390 	if (BE2_chip(adapter)) {
391 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392 
393 		return &hw_stats->erx;
394 	} else if (BE3_chip(adapter)) {
395 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396 
397 		return &hw_stats->erx;
398 	} else {
399 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	}
403 }
404 
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410 	struct be_port_rxf_stats_v0 *port_stats =
411 					&rxf_stats->port[adapter->port_num];
412 	struct be_drv_stats *drvs = &adapter->drv_stats;
413 
414 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
416 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
417 	drvs->rx_control_frames = port_stats->rx_control_frames;
418 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430 	drvs->rx_dropped_header_too_small =
431 		port_stats->rx_dropped_header_too_small;
432 	drvs->rx_address_filtered =
433 					port_stats->rx_address_filtered +
434 					port_stats->rx_vlan_filtered;
435 	drvs->rx_alignment_symbol_errors =
436 		port_stats->rx_alignment_symbol_errors;
437 
438 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
439 	drvs->tx_controlframes = port_stats->tx_controlframes;
440 
441 	if (adapter->port_num)
442 		drvs->jabber_events = rxf_stats->port1_jabber_events;
443 	else
444 		drvs->jabber_events = rxf_stats->port0_jabber_events;
445 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
448 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453 
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459 	struct be_port_rxf_stats_v1 *port_stats =
460 					&rxf_stats->port[adapter->port_num];
461 	struct be_drv_stats *drvs = &adapter->drv_stats;
462 
463 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
467 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
468 	drvs->rx_control_frames = port_stats->rx_control_frames;
469 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479 	drvs->rx_dropped_header_too_small =
480 		port_stats->rx_dropped_header_too_small;
481 	drvs->rx_input_fifo_overflow_drop =
482 		port_stats->rx_input_fifo_overflow_drop;
483 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
484 	drvs->rx_alignment_symbol_errors =
485 		port_stats->rx_alignment_symbol_errors;
486 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
488 	drvs->tx_controlframes = port_stats->tx_controlframes;
489 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490 	drvs->jabber_events = port_stats->jabber_events;
491 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
494 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499 
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505 	struct be_port_rxf_stats_v2 *port_stats =
506 					&rxf_stats->port[adapter->port_num];
507 	struct be_drv_stats *drvs = &adapter->drv_stats;
508 
509 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
513 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
514 	drvs->rx_control_frames = port_stats->rx_control_frames;
515 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525 	drvs->rx_dropped_header_too_small =
526 		port_stats->rx_dropped_header_too_small;
527 	drvs->rx_input_fifo_overflow_drop =
528 		port_stats->rx_input_fifo_overflow_drop;
529 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
530 	drvs->rx_alignment_symbol_errors =
531 		port_stats->rx_alignment_symbol_errors;
532 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
534 	drvs->tx_controlframes = port_stats->tx_controlframes;
535 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536 	drvs->jabber_events = port_stats->jabber_events;
537 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
540 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544 	if (be_roce_supported(adapter)) {
545 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547 		drvs->rx_roce_frames = port_stats->roce_frames_received;
548 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
549 		drvs->roce_drops_payload_len =
550 			port_stats->roce_drops_payload_len;
551 	}
552 }
553 
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556 	struct be_drv_stats *drvs = &adapter->drv_stats;
557 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558 
559 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569 	drvs->rx_dropped_tcp_length =
570 				pport_stats->rx_dropped_invalid_tcp_length;
571 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574 	drvs->rx_dropped_header_too_small =
575 				pport_stats->rx_dropped_header_too_small;
576 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577 	drvs->rx_address_filtered =
578 					pport_stats->rx_address_filtered +
579 					pport_stats->rx_vlan_filtered;
580 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584 	drvs->jabber_events = pport_stats->rx_jabbers;
585 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
586 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587 	drvs->rx_drops_too_many_frags =
588 				pport_stats->rx_drops_too_many_frags_lo;
589 }
590 
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)			(x & 0xFFFF)
594 #define hi(x)			(x & 0xFFFF0000)
595 	bool wrapped = val < lo(*acc);
596 	u32 newacc = hi(*acc) + val;
597 
598 	if (wrapped)
599 		newacc += 65536;
600 	ACCESS_ONCE(*acc) = newacc;
601 }
602 
603 static void populate_erx_stats(struct be_adapter *adapter,
604 			       struct be_rx_obj *rxo, u32 erx_stat)
605 {
606 	if (!BEx_chip(adapter))
607 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608 	else
609 		/* below erx HW counter can actually wrap around after
610 		 * 65535. Driver accumulates a 32-bit value
611 		 */
612 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613 				     (u16)erx_stat);
614 }
615 
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619 	struct be_rx_obj *rxo;
620 	int i;
621 	u32 erx_stat;
622 
623 	if (lancer_chip(adapter)) {
624 		populate_lancer_stats(adapter);
625 	} else {
626 		if (BE2_chip(adapter))
627 			populate_be_v0_stats(adapter);
628 		else if (BE3_chip(adapter))
629 			/* for BE3 */
630 			populate_be_v1_stats(adapter);
631 		else
632 			populate_be_v2_stats(adapter);
633 
634 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635 		for_all_rx_queues(adapter, rxo, i) {
636 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637 			populate_erx_stats(adapter, rxo, erx_stat);
638 		}
639 	}
640 }
641 
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643 						struct rtnl_link_stats64 *stats)
644 {
645 	struct be_adapter *adapter = netdev_priv(netdev);
646 	struct be_drv_stats *drvs = &adapter->drv_stats;
647 	struct be_rx_obj *rxo;
648 	struct be_tx_obj *txo;
649 	u64 pkts, bytes;
650 	unsigned int start;
651 	int i;
652 
653 	for_all_rx_queues(adapter, rxo, i) {
654 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
655 
656 		do {
657 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658 			pkts = rx_stats(rxo)->rx_pkts;
659 			bytes = rx_stats(rxo)->rx_bytes;
660 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661 		stats->rx_packets += pkts;
662 		stats->rx_bytes += bytes;
663 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665 					rx_stats(rxo)->rx_drops_no_frags;
666 	}
667 
668 	for_all_tx_queues(adapter, txo, i) {
669 		const struct be_tx_stats *tx_stats = tx_stats(txo);
670 
671 		do {
672 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673 			pkts = tx_stats(txo)->tx_pkts;
674 			bytes = tx_stats(txo)->tx_bytes;
675 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676 		stats->tx_packets += pkts;
677 		stats->tx_bytes += bytes;
678 	}
679 
680 	/* bad pkts received */
681 	stats->rx_errors = drvs->rx_crc_errors +
682 		drvs->rx_alignment_symbol_errors +
683 		drvs->rx_in_range_errors +
684 		drvs->rx_out_range_errors +
685 		drvs->rx_frame_too_long +
686 		drvs->rx_dropped_too_small +
687 		drvs->rx_dropped_too_short +
688 		drvs->rx_dropped_header_too_small +
689 		drvs->rx_dropped_tcp_length +
690 		drvs->rx_dropped_runt;
691 
692 	/* detailed rx errors */
693 	stats->rx_length_errors = drvs->rx_in_range_errors +
694 		drvs->rx_out_range_errors +
695 		drvs->rx_frame_too_long;
696 
697 	stats->rx_crc_errors = drvs->rx_crc_errors;
698 
699 	/* frame alignment errors */
700 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701 
702 	/* receiver fifo overrun */
703 	/* drops_no_pbuf is no per i/f, it's per BE card */
704 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705 				drvs->rx_input_fifo_overflow_drop +
706 				drvs->rx_drops_no_pbuf;
707 	return stats;
708 }
709 
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712 	struct net_device *netdev = adapter->netdev;
713 
714 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715 		netif_carrier_off(netdev);
716 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717 	}
718 
719 	if (link_status)
720 		netif_carrier_on(netdev);
721 	else
722 		netif_carrier_off(netdev);
723 
724 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726 
727 static int be_gso_hdr_len(struct sk_buff *skb)
728 {
729 	if (skb->encapsulation)
730 		return skb_inner_transport_offset(skb) +
731 		       inner_tcp_hdrlen(skb);
732 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
733 }
734 
735 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
736 {
737 	struct be_tx_stats *stats = tx_stats(txo);
738 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
739 	/* Account for headers which get duplicated in TSO pkt */
740 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
741 
742 	u64_stats_update_begin(&stats->sync);
743 	stats->tx_reqs++;
744 	stats->tx_bytes += skb->len + dup_hdr_len;
745 	stats->tx_pkts += tx_pkts;
746 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
747 		stats->tx_vxlan_offload_pkts += tx_pkts;
748 	u64_stats_update_end(&stats->sync);
749 }
750 
751 /* Returns number of WRBs needed for the skb */
752 static u32 skb_wrb_cnt(struct sk_buff *skb)
753 {
754 	/* +1 for the header wrb */
755 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
756 }
757 
758 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
759 {
760 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
761 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
762 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
763 	wrb->rsvd0 = 0;
764 }
765 
766 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
767  * to avoid the swap and shift/mask operations in wrb_fill().
768  */
769 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
770 {
771 	wrb->frag_pa_hi = 0;
772 	wrb->frag_pa_lo = 0;
773 	wrb->frag_len = 0;
774 	wrb->rsvd0 = 0;
775 }
776 
777 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
778 				     struct sk_buff *skb)
779 {
780 	u8 vlan_prio;
781 	u16 vlan_tag;
782 
783 	vlan_tag = skb_vlan_tag_get(skb);
784 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
785 	/* If vlan priority provided by OS is NOT in available bmap */
786 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
787 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
788 				adapter->recommended_prio_bits;
789 
790 	return vlan_tag;
791 }
792 
793 /* Used only for IP tunnel packets */
794 static u16 skb_inner_ip_proto(struct sk_buff *skb)
795 {
796 	return (inner_ip_hdr(skb)->version == 4) ?
797 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
798 }
799 
800 static u16 skb_ip_proto(struct sk_buff *skb)
801 {
802 	return (ip_hdr(skb)->version == 4) ?
803 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
804 }
805 
806 static inline bool be_is_txq_full(struct be_tx_obj *txo)
807 {
808 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
809 }
810 
811 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
812 {
813 	return atomic_read(&txo->q.used) < txo->q.len / 2;
814 }
815 
816 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
817 {
818 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
819 }
820 
821 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
822 				       struct sk_buff *skb,
823 				       struct be_wrb_params *wrb_params)
824 {
825 	u16 proto;
826 
827 	if (skb_is_gso(skb)) {
828 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
829 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
830 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
831 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
832 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
833 		if (skb->encapsulation) {
834 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
835 			proto = skb_inner_ip_proto(skb);
836 		} else {
837 			proto = skb_ip_proto(skb);
838 		}
839 		if (proto == IPPROTO_TCP)
840 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
841 		else if (proto == IPPROTO_UDP)
842 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
843 	}
844 
845 	if (skb_vlan_tag_present(skb)) {
846 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
847 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
848 	}
849 
850 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
851 }
852 
853 static void wrb_fill_hdr(struct be_adapter *adapter,
854 			 struct be_eth_hdr_wrb *hdr,
855 			 struct be_wrb_params *wrb_params,
856 			 struct sk_buff *skb)
857 {
858 	memset(hdr, 0, sizeof(*hdr));
859 
860 	SET_TX_WRB_HDR_BITS(crc, hdr,
861 			    BE_WRB_F_GET(wrb_params->features, CRC));
862 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
863 			    BE_WRB_F_GET(wrb_params->features, IPCS));
864 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
865 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
866 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
867 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
868 
869 	SET_TX_WRB_HDR_BITS(lso, hdr,
870 			    BE_WRB_F_GET(wrb_params->features, LSO));
871 	SET_TX_WRB_HDR_BITS(lso6, hdr,
872 			    BE_WRB_F_GET(wrb_params->features, LSO6));
873 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
874 
875 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
876 	 * hack is not needed, the evt bit is set while ringing DB.
877 	 */
878 	SET_TX_WRB_HDR_BITS(event, hdr,
879 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
880 	SET_TX_WRB_HDR_BITS(vlan, hdr,
881 			    BE_WRB_F_GET(wrb_params->features, VLAN));
882 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
883 
884 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
885 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
886 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
887 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
888 }
889 
890 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
891 			  bool unmap_single)
892 {
893 	dma_addr_t dma;
894 	u32 frag_len = le32_to_cpu(wrb->frag_len);
895 
896 
897 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
898 		(u64)le32_to_cpu(wrb->frag_pa_lo);
899 	if (frag_len) {
900 		if (unmap_single)
901 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
902 		else
903 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
904 	}
905 }
906 
907 /* Grab a WRB header for xmit */
908 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
909 {
910 	u32 head = txo->q.head;
911 
912 	queue_head_inc(&txo->q);
913 	return head;
914 }
915 
916 /* Set up the WRB header for xmit */
917 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
918 				struct be_tx_obj *txo,
919 				struct be_wrb_params *wrb_params,
920 				struct sk_buff *skb, u16 head)
921 {
922 	u32 num_frags = skb_wrb_cnt(skb);
923 	struct be_queue_info *txq = &txo->q;
924 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
925 
926 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
927 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
928 
929 	BUG_ON(txo->sent_skb_list[head]);
930 	txo->sent_skb_list[head] = skb;
931 	txo->last_req_hdr = head;
932 	atomic_add(num_frags, &txq->used);
933 	txo->last_req_wrb_cnt = num_frags;
934 	txo->pend_wrb_cnt += num_frags;
935 }
936 
937 /* Setup a WRB fragment (buffer descriptor) for xmit */
938 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
939 				 int len)
940 {
941 	struct be_eth_wrb *wrb;
942 	struct be_queue_info *txq = &txo->q;
943 
944 	wrb = queue_head_node(txq);
945 	wrb_fill(wrb, busaddr, len);
946 	queue_head_inc(txq);
947 }
948 
949 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
950  * was invoked. The producer index is restored to the previous packet and the
951  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
952  */
953 static void be_xmit_restore(struct be_adapter *adapter,
954 			    struct be_tx_obj *txo, u32 head, bool map_single,
955 			    u32 copied)
956 {
957 	struct device *dev;
958 	struct be_eth_wrb *wrb;
959 	struct be_queue_info *txq = &txo->q;
960 
961 	dev = &adapter->pdev->dev;
962 	txq->head = head;
963 
964 	/* skip the first wrb (hdr); it's not mapped */
965 	queue_head_inc(txq);
966 	while (copied) {
967 		wrb = queue_head_node(txq);
968 		unmap_tx_frag(dev, wrb, map_single);
969 		map_single = false;
970 		copied -= le32_to_cpu(wrb->frag_len);
971 		queue_head_inc(txq);
972 	}
973 
974 	txq->head = head;
975 }
976 
977 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
978  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
979  * of WRBs used up by the packet.
980  */
981 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
982 			   struct sk_buff *skb,
983 			   struct be_wrb_params *wrb_params)
984 {
985 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
986 	struct device *dev = &adapter->pdev->dev;
987 	struct be_queue_info *txq = &txo->q;
988 	bool map_single = false;
989 	u32 head = txq->head;
990 	dma_addr_t busaddr;
991 	int len;
992 
993 	head = be_tx_get_wrb_hdr(txo);
994 
995 	if (skb->len > skb->data_len) {
996 		len = skb_headlen(skb);
997 
998 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
999 		if (dma_mapping_error(dev, busaddr))
1000 			goto dma_err;
1001 		map_single = true;
1002 		be_tx_setup_wrb_frag(txo, busaddr, len);
1003 		copied += len;
1004 	}
1005 
1006 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1007 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1008 		len = skb_frag_size(frag);
1009 
1010 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1011 		if (dma_mapping_error(dev, busaddr))
1012 			goto dma_err;
1013 		be_tx_setup_wrb_frag(txo, busaddr, len);
1014 		copied += len;
1015 	}
1016 
1017 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1018 
1019 	be_tx_stats_update(txo, skb);
1020 	return wrb_cnt;
1021 
1022 dma_err:
1023 	adapter->drv_stats.dma_map_errors++;
1024 	be_xmit_restore(adapter, txo, head, map_single, copied);
1025 	return 0;
1026 }
1027 
1028 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1029 {
1030 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1031 }
1032 
1033 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1034 					     struct sk_buff *skb,
1035 					     struct be_wrb_params
1036 					     *wrb_params)
1037 {
1038 	u16 vlan_tag = 0;
1039 
1040 	skb = skb_share_check(skb, GFP_ATOMIC);
1041 	if (unlikely(!skb))
1042 		return skb;
1043 
1044 	if (skb_vlan_tag_present(skb))
1045 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1046 
1047 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1048 		if (!vlan_tag)
1049 			vlan_tag = adapter->pvid;
1050 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1051 		 * skip VLAN insertion
1052 		 */
1053 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1054 	}
1055 
1056 	if (vlan_tag) {
1057 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058 						vlan_tag);
1059 		if (unlikely(!skb))
1060 			return skb;
1061 		skb->vlan_tci = 0;
1062 	}
1063 
1064 	/* Insert the outer VLAN, if any */
1065 	if (adapter->qnq_vid) {
1066 		vlan_tag = adapter->qnq_vid;
1067 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1068 						vlan_tag);
1069 		if (unlikely(!skb))
1070 			return skb;
1071 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1072 	}
1073 
1074 	return skb;
1075 }
1076 
1077 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1078 {
1079 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1080 	u16 offset = ETH_HLEN;
1081 
1082 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1083 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1084 
1085 		offset += sizeof(struct ipv6hdr);
1086 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1087 		    ip6h->nexthdr != NEXTHDR_UDP) {
1088 			struct ipv6_opt_hdr *ehdr =
1089 				(struct ipv6_opt_hdr *)(skb->data + offset);
1090 
1091 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1092 			if (ehdr->hdrlen == 0xff)
1093 				return true;
1094 		}
1095 	}
1096 	return false;
1097 }
1098 
1099 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1100 {
1101 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1102 }
1103 
1104 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1105 {
1106 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1107 }
1108 
1109 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1110 						  struct sk_buff *skb,
1111 						  struct be_wrb_params
1112 						  *wrb_params)
1113 {
1114 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1115 	unsigned int eth_hdr_len;
1116 	struct iphdr *ip;
1117 
1118 	/* For padded packets, BE HW modifies tot_len field in IP header
1119 	 * incorrecly when VLAN tag is inserted by HW.
1120 	 * For padded packets, Lancer computes incorrect checksum.
1121 	 */
1122 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1123 						VLAN_ETH_HLEN : ETH_HLEN;
1124 	if (skb->len <= 60 &&
1125 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1126 	    is_ipv4_pkt(skb)) {
1127 		ip = (struct iphdr *)ip_hdr(skb);
1128 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1129 	}
1130 
1131 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1132 	 * tagging in pvid-tagging mode
1133 	 */
1134 	if (be_pvid_tagging_enabled(adapter) &&
1135 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1136 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1137 
1138 	/* HW has a bug wherein it will calculate CSUM for VLAN
1139 	 * pkts even though it is disabled.
1140 	 * Manually insert VLAN in pkt.
1141 	 */
1142 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1143 	    skb_vlan_tag_present(skb)) {
1144 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1145 		if (unlikely(!skb))
1146 			goto err;
1147 	}
1148 
1149 	/* HW may lockup when VLAN HW tagging is requested on
1150 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1151 	 * skip HW tagging is not enabled by FW.
1152 	 */
1153 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1154 		     (adapter->pvid || adapter->qnq_vid) &&
1155 		     !qnq_async_evt_rcvd(adapter)))
1156 		goto tx_drop;
1157 
1158 	/* Manual VLAN tag insertion to prevent:
1159 	 * ASIC lockup when the ASIC inserts VLAN tag into
1160 	 * certain ipv6 packets. Insert VLAN tags in driver,
1161 	 * and set event, completion, vlan bits accordingly
1162 	 * in the Tx WRB.
1163 	 */
1164 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1165 	    be_vlan_tag_tx_chk(adapter, skb)) {
1166 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1167 		if (unlikely(!skb))
1168 			goto err;
1169 	}
1170 
1171 	return skb;
1172 tx_drop:
1173 	dev_kfree_skb_any(skb);
1174 err:
1175 	return NULL;
1176 }
1177 
1178 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1179 					   struct sk_buff *skb,
1180 					   struct be_wrb_params *wrb_params)
1181 {
1182 	int err;
1183 
1184 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1185 	 * packets that are 32b or less may cause a transmit stall
1186 	 * on that port. The workaround is to pad such packets
1187 	 * (len <= 32 bytes) to a minimum length of 36b.
1188 	 */
1189 	if (skb->len <= 32) {
1190 		if (skb_put_padto(skb, 36))
1191 			return NULL;
1192 	}
1193 
1194 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1195 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1196 		if (!skb)
1197 			return NULL;
1198 	}
1199 
1200 	/* The stack can send us skbs with length greater than
1201 	 * what the HW can handle. Trim the extra bytes.
1202 	 */
1203 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1204 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1205 	WARN_ON(err);
1206 
1207 	return skb;
1208 }
1209 
1210 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1211 {
1212 	struct be_queue_info *txq = &txo->q;
1213 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1214 
1215 	/* Mark the last request eventable if it hasn't been marked already */
1216 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1217 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1218 
1219 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1220 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1221 		wrb_fill_dummy(queue_head_node(txq));
1222 		queue_head_inc(txq);
1223 		atomic_inc(&txq->used);
1224 		txo->pend_wrb_cnt++;
1225 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1226 					   TX_HDR_WRB_NUM_SHIFT);
1227 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1228 					  TX_HDR_WRB_NUM_SHIFT);
1229 	}
1230 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1231 	txo->pend_wrb_cnt = 0;
1232 }
1233 
1234 /* OS2BMC related */
1235 
1236 #define DHCP_CLIENT_PORT	68
1237 #define DHCP_SERVER_PORT	67
1238 #define NET_BIOS_PORT1		137
1239 #define NET_BIOS_PORT2		138
1240 #define DHCPV6_RAS_PORT		547
1241 
1242 #define is_mc_allowed_on_bmc(adapter, eh)	\
1243 	(!is_multicast_filt_enabled(adapter) &&	\
1244 	 is_multicast_ether_addr(eh->h_dest) &&	\
1245 	 !is_broadcast_ether_addr(eh->h_dest))
1246 
1247 #define is_bc_allowed_on_bmc(adapter, eh)	\
1248 	(!is_broadcast_filt_enabled(adapter) &&	\
1249 	 is_broadcast_ether_addr(eh->h_dest))
1250 
1251 #define is_arp_allowed_on_bmc(adapter, skb)	\
1252 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1253 
1254 #define is_broadcast_packet(eh, adapter)	\
1255 		(is_multicast_ether_addr(eh->h_dest) && \
1256 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1257 
1258 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1259 
1260 #define is_arp_filt_enabled(adapter)	\
1261 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1262 
1263 #define is_dhcp_client_filt_enabled(adapter)	\
1264 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1265 
1266 #define is_dhcp_srvr_filt_enabled(adapter)	\
1267 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1268 
1269 #define is_nbios_filt_enabled(adapter)	\
1270 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1271 
1272 #define is_ipv6_na_filt_enabled(adapter)	\
1273 		(adapter->bmc_filt_mask &	\
1274 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1275 
1276 #define is_ipv6_ra_filt_enabled(adapter)	\
1277 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1278 
1279 #define is_ipv6_ras_filt_enabled(adapter)	\
1280 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1281 
1282 #define is_broadcast_filt_enabled(adapter)	\
1283 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1284 
1285 #define is_multicast_filt_enabled(adapter)	\
1286 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1287 
1288 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1289 			       struct sk_buff **skb)
1290 {
1291 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1292 	bool os2bmc = false;
1293 
1294 	if (!be_is_os2bmc_enabled(adapter))
1295 		goto done;
1296 
1297 	if (!is_multicast_ether_addr(eh->h_dest))
1298 		goto done;
1299 
1300 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1301 	    is_bc_allowed_on_bmc(adapter, eh) ||
1302 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1303 		os2bmc = true;
1304 		goto done;
1305 	}
1306 
1307 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1308 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1309 		u8 nexthdr = hdr->nexthdr;
1310 
1311 		if (nexthdr == IPPROTO_ICMPV6) {
1312 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1313 
1314 			switch (icmp6->icmp6_type) {
1315 			case NDISC_ROUTER_ADVERTISEMENT:
1316 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1317 				goto done;
1318 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1319 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1320 				goto done;
1321 			default:
1322 				break;
1323 			}
1324 		}
1325 	}
1326 
1327 	if (is_udp_pkt((*skb))) {
1328 		struct udphdr *udp = udp_hdr((*skb));
1329 
1330 		switch (ntohs(udp->dest)) {
1331 		case DHCP_CLIENT_PORT:
1332 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1333 			goto done;
1334 		case DHCP_SERVER_PORT:
1335 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1336 			goto done;
1337 		case NET_BIOS_PORT1:
1338 		case NET_BIOS_PORT2:
1339 			os2bmc = is_nbios_filt_enabled(adapter);
1340 			goto done;
1341 		case DHCPV6_RAS_PORT:
1342 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1343 			goto done;
1344 		default:
1345 			break;
1346 		}
1347 	}
1348 done:
1349 	/* For packets over a vlan, which are destined
1350 	 * to BMC, asic expects the vlan to be inline in the packet.
1351 	 */
1352 	if (os2bmc)
1353 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1354 
1355 	return os2bmc;
1356 }
1357 
1358 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1359 {
1360 	struct be_adapter *adapter = netdev_priv(netdev);
1361 	u16 q_idx = skb_get_queue_mapping(skb);
1362 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1363 	struct be_wrb_params wrb_params = { 0 };
1364 	bool flush = !skb->xmit_more;
1365 	u16 wrb_cnt;
1366 
1367 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1368 	if (unlikely(!skb))
1369 		goto drop;
1370 
1371 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1372 
1373 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1374 	if (unlikely(!wrb_cnt)) {
1375 		dev_kfree_skb_any(skb);
1376 		goto drop;
1377 	}
1378 
1379 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1380 	 * enqueue the pkt a 2nd time with mgmt bit set.
1381 	 */
1382 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1383 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1384 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385 		if (unlikely(!wrb_cnt))
1386 			goto drop;
1387 		else
1388 			skb_get(skb);
1389 	}
1390 
1391 	if (be_is_txq_full(txo)) {
1392 		netif_stop_subqueue(netdev, q_idx);
1393 		tx_stats(txo)->tx_stops++;
1394 	}
1395 
1396 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1397 		be_xmit_flush(adapter, txo);
1398 
1399 	return NETDEV_TX_OK;
1400 drop:
1401 	tx_stats(txo)->tx_drv_drops++;
1402 	/* Flush the already enqueued tx requests */
1403 	if (flush && txo->pend_wrb_cnt)
1404 		be_xmit_flush(adapter, txo);
1405 
1406 	return NETDEV_TX_OK;
1407 }
1408 
1409 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1410 {
1411 	struct be_adapter *adapter = netdev_priv(netdev);
1412 	struct device *dev = &adapter->pdev->dev;
1413 
1414 	if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1415 		dev_info(dev, "MTU must be between %d and %d bytes\n",
1416 			 BE_MIN_MTU, BE_MAX_MTU);
1417 		return -EINVAL;
1418 	}
1419 
1420 	dev_info(dev, "MTU changed from %d to %d bytes\n",
1421 		 netdev->mtu, new_mtu);
1422 	netdev->mtu = new_mtu;
1423 	return 0;
1424 }
1425 
1426 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1427 {
1428 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1429 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1430 }
1431 
1432 static int be_set_vlan_promisc(struct be_adapter *adapter)
1433 {
1434 	struct device *dev = &adapter->pdev->dev;
1435 	int status;
1436 
1437 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1438 		return 0;
1439 
1440 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1441 	if (!status) {
1442 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1443 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1444 	} else {
1445 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1446 	}
1447 	return status;
1448 }
1449 
1450 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1451 {
1452 	struct device *dev = &adapter->pdev->dev;
1453 	int status;
1454 
1455 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1456 	if (!status) {
1457 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1458 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1459 	}
1460 	return status;
1461 }
1462 
1463 /*
1464  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1465  * If the user configures more, place BE in vlan promiscuous mode.
1466  */
1467 static int be_vid_config(struct be_adapter *adapter)
1468 {
1469 	struct device *dev = &adapter->pdev->dev;
1470 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1471 	u16 num = 0, i = 0;
1472 	int status = 0;
1473 
1474 	/* No need to change the VLAN state if the I/F is in promiscuous */
1475 	if (adapter->netdev->flags & IFF_PROMISC)
1476 		return 0;
1477 
1478 	if (adapter->vlans_added > be_max_vlans(adapter))
1479 		return be_set_vlan_promisc(adapter);
1480 
1481 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1482 		status = be_clear_vlan_promisc(adapter);
1483 		if (status)
1484 			return status;
1485 	}
1486 	/* Construct VLAN Table to give to HW */
1487 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1488 		vids[num++] = cpu_to_le16(i);
1489 
1490 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1491 	if (status) {
1492 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1493 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1494 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1495 		    addl_status(status) ==
1496 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1497 			return be_set_vlan_promisc(adapter);
1498 	}
1499 	return status;
1500 }
1501 
1502 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1503 {
1504 	struct be_adapter *adapter = netdev_priv(netdev);
1505 	int status = 0;
1506 
1507 	mutex_lock(&adapter->rx_filter_lock);
1508 
1509 	/* Packets with VID 0 are always received by Lancer by default */
1510 	if (lancer_chip(adapter) && vid == 0)
1511 		goto done;
1512 
1513 	if (test_bit(vid, adapter->vids))
1514 		goto done;
1515 
1516 	set_bit(vid, adapter->vids);
1517 	adapter->vlans_added++;
1518 
1519 	status = be_vid_config(adapter);
1520 done:
1521 	mutex_unlock(&adapter->rx_filter_lock);
1522 	return status;
1523 }
1524 
1525 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1526 {
1527 	struct be_adapter *adapter = netdev_priv(netdev);
1528 	int status = 0;
1529 
1530 	mutex_lock(&adapter->rx_filter_lock);
1531 
1532 	/* Packets with VID 0 are always received by Lancer by default */
1533 	if (lancer_chip(adapter) && vid == 0)
1534 		goto done;
1535 
1536 	if (!test_bit(vid, adapter->vids))
1537 		goto done;
1538 
1539 	clear_bit(vid, adapter->vids);
1540 	adapter->vlans_added--;
1541 
1542 	status = be_vid_config(adapter);
1543 done:
1544 	mutex_unlock(&adapter->rx_filter_lock);
1545 	return status;
1546 }
1547 
1548 static void be_set_all_promisc(struct be_adapter *adapter)
1549 {
1550 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1551 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1552 }
1553 
1554 static void be_set_mc_promisc(struct be_adapter *adapter)
1555 {
1556 	int status;
1557 
1558 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1559 		return;
1560 
1561 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1562 	if (!status)
1563 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1564 }
1565 
1566 static void be_set_uc_promisc(struct be_adapter *adapter)
1567 {
1568 	int status;
1569 
1570 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1571 		return;
1572 
1573 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1574 	if (!status)
1575 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1576 }
1577 
1578 static void be_clear_uc_promisc(struct be_adapter *adapter)
1579 {
1580 	int status;
1581 
1582 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1583 		return;
1584 
1585 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1586 	if (!status)
1587 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1588 }
1589 
1590 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1591  * We use a single callback function for both sync and unsync. We really don't
1592  * add/remove addresses through this callback. But, we use it to detect changes
1593  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1594  */
1595 static int be_uc_list_update(struct net_device *netdev,
1596 			     const unsigned char *addr)
1597 {
1598 	struct be_adapter *adapter = netdev_priv(netdev);
1599 
1600 	adapter->update_uc_list = true;
1601 	return 0;
1602 }
1603 
1604 static int be_mc_list_update(struct net_device *netdev,
1605 			     const unsigned char *addr)
1606 {
1607 	struct be_adapter *adapter = netdev_priv(netdev);
1608 
1609 	adapter->update_mc_list = true;
1610 	return 0;
1611 }
1612 
1613 static void be_set_mc_list(struct be_adapter *adapter)
1614 {
1615 	struct net_device *netdev = adapter->netdev;
1616 	struct netdev_hw_addr *ha;
1617 	bool mc_promisc = false;
1618 	int status;
1619 
1620 	netif_addr_lock_bh(netdev);
1621 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1622 
1623 	if (netdev->flags & IFF_PROMISC) {
1624 		adapter->update_mc_list = false;
1625 	} else if (netdev->flags & IFF_ALLMULTI ||
1626 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1627 		/* Enable multicast promisc if num configured exceeds
1628 		 * what we support
1629 		 */
1630 		mc_promisc = true;
1631 		adapter->update_mc_list = false;
1632 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1633 		/* Update mc-list unconditionally if the iface was previously
1634 		 * in mc-promisc mode and now is out of that mode.
1635 		 */
1636 		adapter->update_mc_list = true;
1637 	}
1638 
1639 	if (adapter->update_mc_list) {
1640 		int i = 0;
1641 
1642 		/* cache the mc-list in adapter */
1643 		netdev_for_each_mc_addr(ha, netdev) {
1644 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1645 			i++;
1646 		}
1647 		adapter->mc_count = netdev_mc_count(netdev);
1648 	}
1649 	netif_addr_unlock_bh(netdev);
1650 
1651 	if (mc_promisc) {
1652 		be_set_mc_promisc(adapter);
1653 	} else if (adapter->update_mc_list) {
1654 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1655 		if (!status)
1656 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1657 		else
1658 			be_set_mc_promisc(adapter);
1659 
1660 		adapter->update_mc_list = false;
1661 	}
1662 }
1663 
1664 static void be_clear_mc_list(struct be_adapter *adapter)
1665 {
1666 	struct net_device *netdev = adapter->netdev;
1667 
1668 	__dev_mc_unsync(netdev, NULL);
1669 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1670 	adapter->mc_count = 0;
1671 }
1672 
1673 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1674 {
1675 	if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1676 			     adapter->dev_mac)) {
1677 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1678 		return 0;
1679 	}
1680 
1681 	return be_cmd_pmac_add(adapter,
1682 			       (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1683 			       adapter->if_handle,
1684 			       &adapter->pmac_id[uc_idx + 1], 0);
1685 }
1686 
1687 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1688 {
1689 	if (pmac_id == adapter->pmac_id[0])
1690 		return;
1691 
1692 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1693 }
1694 
1695 static void be_set_uc_list(struct be_adapter *adapter)
1696 {
1697 	struct net_device *netdev = adapter->netdev;
1698 	struct netdev_hw_addr *ha;
1699 	bool uc_promisc = false;
1700 	int curr_uc_macs = 0, i;
1701 
1702 	netif_addr_lock_bh(netdev);
1703 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1704 
1705 	if (netdev->flags & IFF_PROMISC) {
1706 		adapter->update_uc_list = false;
1707 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1708 		uc_promisc = true;
1709 		adapter->update_uc_list = false;
1710 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1711 		/* Update uc-list unconditionally if the iface was previously
1712 		 * in uc-promisc mode and now is out of that mode.
1713 		 */
1714 		adapter->update_uc_list = true;
1715 	}
1716 
1717 	if (adapter->update_uc_list) {
1718 		i = 1; /* First slot is claimed by the Primary MAC */
1719 
1720 		/* cache the uc-list in adapter array */
1721 		netdev_for_each_uc_addr(ha, netdev) {
1722 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1723 			i++;
1724 		}
1725 		curr_uc_macs = netdev_uc_count(netdev);
1726 	}
1727 	netif_addr_unlock_bh(netdev);
1728 
1729 	if (uc_promisc) {
1730 		be_set_uc_promisc(adapter);
1731 	} else if (adapter->update_uc_list) {
1732 		be_clear_uc_promisc(adapter);
1733 
1734 		for (i = 0; i < adapter->uc_macs; i++)
1735 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1736 
1737 		for (i = 0; i < curr_uc_macs; i++)
1738 			be_uc_mac_add(adapter, i);
1739 		adapter->uc_macs = curr_uc_macs;
1740 		adapter->update_uc_list = false;
1741 	}
1742 }
1743 
1744 static void be_clear_uc_list(struct be_adapter *adapter)
1745 {
1746 	struct net_device *netdev = adapter->netdev;
1747 	int i;
1748 
1749 	__dev_uc_unsync(netdev, NULL);
1750 	for (i = 0; i < adapter->uc_macs; i++)
1751 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1752 
1753 	adapter->uc_macs = 0;
1754 }
1755 
1756 static void __be_set_rx_mode(struct be_adapter *adapter)
1757 {
1758 	struct net_device *netdev = adapter->netdev;
1759 
1760 	mutex_lock(&adapter->rx_filter_lock);
1761 
1762 	if (netdev->flags & IFF_PROMISC) {
1763 		if (!be_in_all_promisc(adapter))
1764 			be_set_all_promisc(adapter);
1765 	} else if (be_in_all_promisc(adapter)) {
1766 		/* We need to re-program the vlan-list or clear
1767 		 * vlan-promisc mode (if needed) when the interface
1768 		 * comes out of promisc mode.
1769 		 */
1770 		be_vid_config(adapter);
1771 	}
1772 
1773 	be_set_uc_list(adapter);
1774 	be_set_mc_list(adapter);
1775 
1776 	mutex_unlock(&adapter->rx_filter_lock);
1777 }
1778 
1779 static void be_work_set_rx_mode(struct work_struct *work)
1780 {
1781 	struct be_cmd_work *cmd_work =
1782 				container_of(work, struct be_cmd_work, work);
1783 
1784 	__be_set_rx_mode(cmd_work->adapter);
1785 	kfree(cmd_work);
1786 }
1787 
1788 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1789 {
1790 	struct be_adapter *adapter = netdev_priv(netdev);
1791 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1792 	int status;
1793 
1794 	if (!sriov_enabled(adapter))
1795 		return -EPERM;
1796 
1797 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1798 		return -EINVAL;
1799 
1800 	/* Proceed further only if user provided MAC is different
1801 	 * from active MAC
1802 	 */
1803 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1804 		return 0;
1805 
1806 	if (BEx_chip(adapter)) {
1807 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1808 				vf + 1);
1809 
1810 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1811 					 &vf_cfg->pmac_id, vf + 1);
1812 	} else {
1813 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1814 					vf + 1);
1815 	}
1816 
1817 	if (status) {
1818 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1819 			mac, vf, status);
1820 		return be_cmd_status(status);
1821 	}
1822 
1823 	ether_addr_copy(vf_cfg->mac_addr, mac);
1824 
1825 	return 0;
1826 }
1827 
1828 static int be_get_vf_config(struct net_device *netdev, int vf,
1829 			    struct ifla_vf_info *vi)
1830 {
1831 	struct be_adapter *adapter = netdev_priv(netdev);
1832 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1833 
1834 	if (!sriov_enabled(adapter))
1835 		return -EPERM;
1836 
1837 	if (vf >= adapter->num_vfs)
1838 		return -EINVAL;
1839 
1840 	vi->vf = vf;
1841 	vi->max_tx_rate = vf_cfg->tx_rate;
1842 	vi->min_tx_rate = 0;
1843 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1844 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1845 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1846 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1847 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1848 
1849 	return 0;
1850 }
1851 
1852 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1853 {
1854 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1855 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1856 	int vf_if_id = vf_cfg->if_handle;
1857 	int status;
1858 
1859 	/* Enable Transparent VLAN Tagging */
1860 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1861 	if (status)
1862 		return status;
1863 
1864 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1865 	vids[0] = 0;
1866 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1867 	if (!status)
1868 		dev_info(&adapter->pdev->dev,
1869 			 "Cleared guest VLANs on VF%d", vf);
1870 
1871 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1872 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1873 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1874 						  ~BE_PRIV_FILTMGMT, vf + 1);
1875 		if (!status)
1876 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1877 	}
1878 	return 0;
1879 }
1880 
1881 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1882 {
1883 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1884 	struct device *dev = &adapter->pdev->dev;
1885 	int status;
1886 
1887 	/* Reset Transparent VLAN Tagging. */
1888 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1889 				       vf_cfg->if_handle, 0, 0);
1890 	if (status)
1891 		return status;
1892 
1893 	/* Allow VFs to program VLAN filtering */
1894 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1895 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1896 						  BE_PRIV_FILTMGMT, vf + 1);
1897 		if (!status) {
1898 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1899 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1900 		}
1901 	}
1902 
1903 	dev_info(dev,
1904 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1905 	return 0;
1906 }
1907 
1908 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1909 			  __be16 vlan_proto)
1910 {
1911 	struct be_adapter *adapter = netdev_priv(netdev);
1912 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1913 	int status;
1914 
1915 	if (!sriov_enabled(adapter))
1916 		return -EPERM;
1917 
1918 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1919 		return -EINVAL;
1920 
1921 	if (vlan_proto != htons(ETH_P_8021Q))
1922 		return -EPROTONOSUPPORT;
1923 
1924 	if (vlan || qos) {
1925 		vlan |= qos << VLAN_PRIO_SHIFT;
1926 		status = be_set_vf_tvt(adapter, vf, vlan);
1927 	} else {
1928 		status = be_clear_vf_tvt(adapter, vf);
1929 	}
1930 
1931 	if (status) {
1932 		dev_err(&adapter->pdev->dev,
1933 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1934 			status);
1935 		return be_cmd_status(status);
1936 	}
1937 
1938 	vf_cfg->vlan_tag = vlan;
1939 	return 0;
1940 }
1941 
1942 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1943 			     int min_tx_rate, int max_tx_rate)
1944 {
1945 	struct be_adapter *adapter = netdev_priv(netdev);
1946 	struct device *dev = &adapter->pdev->dev;
1947 	int percent_rate, status = 0;
1948 	u16 link_speed = 0;
1949 	u8 link_status;
1950 
1951 	if (!sriov_enabled(adapter))
1952 		return -EPERM;
1953 
1954 	if (vf >= adapter->num_vfs)
1955 		return -EINVAL;
1956 
1957 	if (min_tx_rate)
1958 		return -EINVAL;
1959 
1960 	if (!max_tx_rate)
1961 		goto config_qos;
1962 
1963 	status = be_cmd_link_status_query(adapter, &link_speed,
1964 					  &link_status, 0);
1965 	if (status)
1966 		goto err;
1967 
1968 	if (!link_status) {
1969 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1970 		status = -ENETDOWN;
1971 		goto err;
1972 	}
1973 
1974 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1975 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1976 			link_speed);
1977 		status = -EINVAL;
1978 		goto err;
1979 	}
1980 
1981 	/* On Skyhawk the QOS setting must be done only as a % value */
1982 	percent_rate = link_speed / 100;
1983 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1984 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1985 			percent_rate);
1986 		status = -EINVAL;
1987 		goto err;
1988 	}
1989 
1990 config_qos:
1991 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1992 	if (status)
1993 		goto err;
1994 
1995 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1996 	return 0;
1997 
1998 err:
1999 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2000 		max_tx_rate, vf);
2001 	return be_cmd_status(status);
2002 }
2003 
2004 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2005 				int link_state)
2006 {
2007 	struct be_adapter *adapter = netdev_priv(netdev);
2008 	int status;
2009 
2010 	if (!sriov_enabled(adapter))
2011 		return -EPERM;
2012 
2013 	if (vf >= adapter->num_vfs)
2014 		return -EINVAL;
2015 
2016 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2017 	if (status) {
2018 		dev_err(&adapter->pdev->dev,
2019 			"Link state change on VF %d failed: %#x\n", vf, status);
2020 		return be_cmd_status(status);
2021 	}
2022 
2023 	adapter->vf_cfg[vf].plink_tracking = link_state;
2024 
2025 	return 0;
2026 }
2027 
2028 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2029 {
2030 	struct be_adapter *adapter = netdev_priv(netdev);
2031 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2032 	u8 spoofchk;
2033 	int status;
2034 
2035 	if (!sriov_enabled(adapter))
2036 		return -EPERM;
2037 
2038 	if (vf >= adapter->num_vfs)
2039 		return -EINVAL;
2040 
2041 	if (BEx_chip(adapter))
2042 		return -EOPNOTSUPP;
2043 
2044 	if (enable == vf_cfg->spoofchk)
2045 		return 0;
2046 
2047 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2048 
2049 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2050 				       0, spoofchk);
2051 	if (status) {
2052 		dev_err(&adapter->pdev->dev,
2053 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2054 		return be_cmd_status(status);
2055 	}
2056 
2057 	vf_cfg->spoofchk = enable;
2058 	return 0;
2059 }
2060 
2061 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2062 			  ulong now)
2063 {
2064 	aic->rx_pkts_prev = rx_pkts;
2065 	aic->tx_reqs_prev = tx_pkts;
2066 	aic->jiffies = now;
2067 }
2068 
2069 static int be_get_new_eqd(struct be_eq_obj *eqo)
2070 {
2071 	struct be_adapter *adapter = eqo->adapter;
2072 	int eqd, start;
2073 	struct be_aic_obj *aic;
2074 	struct be_rx_obj *rxo;
2075 	struct be_tx_obj *txo;
2076 	u64 rx_pkts = 0, tx_pkts = 0;
2077 	ulong now;
2078 	u32 pps, delta;
2079 	int i;
2080 
2081 	aic = &adapter->aic_obj[eqo->idx];
2082 	if (!aic->enable) {
2083 		if (aic->jiffies)
2084 			aic->jiffies = 0;
2085 		eqd = aic->et_eqd;
2086 		return eqd;
2087 	}
2088 
2089 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2090 		do {
2091 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2092 			rx_pkts += rxo->stats.rx_pkts;
2093 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2094 	}
2095 
2096 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2097 		do {
2098 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2099 			tx_pkts += txo->stats.tx_reqs;
2100 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2101 	}
2102 
2103 	/* Skip, if wrapped around or first calculation */
2104 	now = jiffies;
2105 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2106 	    rx_pkts < aic->rx_pkts_prev ||
2107 	    tx_pkts < aic->tx_reqs_prev) {
2108 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2109 		return aic->prev_eqd;
2110 	}
2111 
2112 	delta = jiffies_to_msecs(now - aic->jiffies);
2113 	if (delta == 0)
2114 		return aic->prev_eqd;
2115 
2116 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2117 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2118 	eqd = (pps / 15000) << 2;
2119 
2120 	if (eqd < 8)
2121 		eqd = 0;
2122 	eqd = min_t(u32, eqd, aic->max_eqd);
2123 	eqd = max_t(u32, eqd, aic->min_eqd);
2124 
2125 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2126 
2127 	return eqd;
2128 }
2129 
2130 /* For Skyhawk-R only */
2131 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2132 {
2133 	struct be_adapter *adapter = eqo->adapter;
2134 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2135 	ulong now = jiffies;
2136 	int eqd;
2137 	u32 mult_enc;
2138 
2139 	if (!aic->enable)
2140 		return 0;
2141 
2142 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2143 		eqd = aic->prev_eqd;
2144 	else
2145 		eqd = be_get_new_eqd(eqo);
2146 
2147 	if (eqd > 100)
2148 		mult_enc = R2I_DLY_ENC_1;
2149 	else if (eqd > 60)
2150 		mult_enc = R2I_DLY_ENC_2;
2151 	else if (eqd > 20)
2152 		mult_enc = R2I_DLY_ENC_3;
2153 	else
2154 		mult_enc = R2I_DLY_ENC_0;
2155 
2156 	aic->prev_eqd = eqd;
2157 
2158 	return mult_enc;
2159 }
2160 
2161 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2162 {
2163 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2164 	struct be_aic_obj *aic;
2165 	struct be_eq_obj *eqo;
2166 	int i, num = 0, eqd;
2167 
2168 	for_all_evt_queues(adapter, eqo, i) {
2169 		aic = &adapter->aic_obj[eqo->idx];
2170 		eqd = be_get_new_eqd(eqo);
2171 		if (force_update || eqd != aic->prev_eqd) {
2172 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2173 			set_eqd[num].eq_id = eqo->q.id;
2174 			aic->prev_eqd = eqd;
2175 			num++;
2176 		}
2177 	}
2178 
2179 	if (num)
2180 		be_cmd_modify_eqd(adapter, set_eqd, num);
2181 }
2182 
2183 static void be_rx_stats_update(struct be_rx_obj *rxo,
2184 			       struct be_rx_compl_info *rxcp)
2185 {
2186 	struct be_rx_stats *stats = rx_stats(rxo);
2187 
2188 	u64_stats_update_begin(&stats->sync);
2189 	stats->rx_compl++;
2190 	stats->rx_bytes += rxcp->pkt_size;
2191 	stats->rx_pkts++;
2192 	if (rxcp->tunneled)
2193 		stats->rx_vxlan_offload_pkts++;
2194 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2195 		stats->rx_mcast_pkts++;
2196 	if (rxcp->err)
2197 		stats->rx_compl_err++;
2198 	u64_stats_update_end(&stats->sync);
2199 }
2200 
2201 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2202 {
2203 	/* L4 checksum is not reliable for non TCP/UDP packets.
2204 	 * Also ignore ipcksm for ipv6 pkts
2205 	 */
2206 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2207 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2208 }
2209 
2210 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2211 {
2212 	struct be_adapter *adapter = rxo->adapter;
2213 	struct be_rx_page_info *rx_page_info;
2214 	struct be_queue_info *rxq = &rxo->q;
2215 	u32 frag_idx = rxq->tail;
2216 
2217 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2218 	BUG_ON(!rx_page_info->page);
2219 
2220 	if (rx_page_info->last_frag) {
2221 		dma_unmap_page(&adapter->pdev->dev,
2222 			       dma_unmap_addr(rx_page_info, bus),
2223 			       adapter->big_page_size, DMA_FROM_DEVICE);
2224 		rx_page_info->last_frag = false;
2225 	} else {
2226 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2227 					dma_unmap_addr(rx_page_info, bus),
2228 					rx_frag_size, DMA_FROM_DEVICE);
2229 	}
2230 
2231 	queue_tail_inc(rxq);
2232 	atomic_dec(&rxq->used);
2233 	return rx_page_info;
2234 }
2235 
2236 /* Throwaway the data in the Rx completion */
2237 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2238 				struct be_rx_compl_info *rxcp)
2239 {
2240 	struct be_rx_page_info *page_info;
2241 	u16 i, num_rcvd = rxcp->num_rcvd;
2242 
2243 	for (i = 0; i < num_rcvd; i++) {
2244 		page_info = get_rx_page_info(rxo);
2245 		put_page(page_info->page);
2246 		memset(page_info, 0, sizeof(*page_info));
2247 	}
2248 }
2249 
2250 /*
2251  * skb_fill_rx_data forms a complete skb for an ether frame
2252  * indicated by rxcp.
2253  */
2254 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2255 			     struct be_rx_compl_info *rxcp)
2256 {
2257 	struct be_rx_page_info *page_info;
2258 	u16 i, j;
2259 	u16 hdr_len, curr_frag_len, remaining;
2260 	u8 *start;
2261 
2262 	page_info = get_rx_page_info(rxo);
2263 	start = page_address(page_info->page) + page_info->page_offset;
2264 	prefetch(start);
2265 
2266 	/* Copy data in the first descriptor of this completion */
2267 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2268 
2269 	skb->len = curr_frag_len;
2270 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2271 		memcpy(skb->data, start, curr_frag_len);
2272 		/* Complete packet has now been moved to data */
2273 		put_page(page_info->page);
2274 		skb->data_len = 0;
2275 		skb->tail += curr_frag_len;
2276 	} else {
2277 		hdr_len = ETH_HLEN;
2278 		memcpy(skb->data, start, hdr_len);
2279 		skb_shinfo(skb)->nr_frags = 1;
2280 		skb_frag_set_page(skb, 0, page_info->page);
2281 		skb_shinfo(skb)->frags[0].page_offset =
2282 					page_info->page_offset + hdr_len;
2283 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2284 				  curr_frag_len - hdr_len);
2285 		skb->data_len = curr_frag_len - hdr_len;
2286 		skb->truesize += rx_frag_size;
2287 		skb->tail += hdr_len;
2288 	}
2289 	page_info->page = NULL;
2290 
2291 	if (rxcp->pkt_size <= rx_frag_size) {
2292 		BUG_ON(rxcp->num_rcvd != 1);
2293 		return;
2294 	}
2295 
2296 	/* More frags present for this completion */
2297 	remaining = rxcp->pkt_size - curr_frag_len;
2298 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2299 		page_info = get_rx_page_info(rxo);
2300 		curr_frag_len = min(remaining, rx_frag_size);
2301 
2302 		/* Coalesce all frags from the same physical page in one slot */
2303 		if (page_info->page_offset == 0) {
2304 			/* Fresh page */
2305 			j++;
2306 			skb_frag_set_page(skb, j, page_info->page);
2307 			skb_shinfo(skb)->frags[j].page_offset =
2308 							page_info->page_offset;
2309 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2310 			skb_shinfo(skb)->nr_frags++;
2311 		} else {
2312 			put_page(page_info->page);
2313 		}
2314 
2315 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2316 		skb->len += curr_frag_len;
2317 		skb->data_len += curr_frag_len;
2318 		skb->truesize += rx_frag_size;
2319 		remaining -= curr_frag_len;
2320 		page_info->page = NULL;
2321 	}
2322 	BUG_ON(j > MAX_SKB_FRAGS);
2323 }
2324 
2325 /* Process the RX completion indicated by rxcp when GRO is disabled */
2326 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2327 				struct be_rx_compl_info *rxcp)
2328 {
2329 	struct be_adapter *adapter = rxo->adapter;
2330 	struct net_device *netdev = adapter->netdev;
2331 	struct sk_buff *skb;
2332 
2333 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2334 	if (unlikely(!skb)) {
2335 		rx_stats(rxo)->rx_drops_no_skbs++;
2336 		be_rx_compl_discard(rxo, rxcp);
2337 		return;
2338 	}
2339 
2340 	skb_fill_rx_data(rxo, skb, rxcp);
2341 
2342 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2343 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2344 	else
2345 		skb_checksum_none_assert(skb);
2346 
2347 	skb->protocol = eth_type_trans(skb, netdev);
2348 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2349 	if (netdev->features & NETIF_F_RXHASH)
2350 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2351 
2352 	skb->csum_level = rxcp->tunneled;
2353 	skb_mark_napi_id(skb, napi);
2354 
2355 	if (rxcp->vlanf)
2356 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2357 
2358 	netif_receive_skb(skb);
2359 }
2360 
2361 /* Process the RX completion indicated by rxcp when GRO is enabled */
2362 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2363 				    struct napi_struct *napi,
2364 				    struct be_rx_compl_info *rxcp)
2365 {
2366 	struct be_adapter *adapter = rxo->adapter;
2367 	struct be_rx_page_info *page_info;
2368 	struct sk_buff *skb = NULL;
2369 	u16 remaining, curr_frag_len;
2370 	u16 i, j;
2371 
2372 	skb = napi_get_frags(napi);
2373 	if (!skb) {
2374 		be_rx_compl_discard(rxo, rxcp);
2375 		return;
2376 	}
2377 
2378 	remaining = rxcp->pkt_size;
2379 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2380 		page_info = get_rx_page_info(rxo);
2381 
2382 		curr_frag_len = min(remaining, rx_frag_size);
2383 
2384 		/* Coalesce all frags from the same physical page in one slot */
2385 		if (i == 0 || page_info->page_offset == 0) {
2386 			/* First frag or Fresh page */
2387 			j++;
2388 			skb_frag_set_page(skb, j, page_info->page);
2389 			skb_shinfo(skb)->frags[j].page_offset =
2390 							page_info->page_offset;
2391 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2392 		} else {
2393 			put_page(page_info->page);
2394 		}
2395 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2396 		skb->truesize += rx_frag_size;
2397 		remaining -= curr_frag_len;
2398 		memset(page_info, 0, sizeof(*page_info));
2399 	}
2400 	BUG_ON(j > MAX_SKB_FRAGS);
2401 
2402 	skb_shinfo(skb)->nr_frags = j + 1;
2403 	skb->len = rxcp->pkt_size;
2404 	skb->data_len = rxcp->pkt_size;
2405 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2406 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2407 	if (adapter->netdev->features & NETIF_F_RXHASH)
2408 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2409 
2410 	skb->csum_level = rxcp->tunneled;
2411 
2412 	if (rxcp->vlanf)
2413 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2414 
2415 	napi_gro_frags(napi);
2416 }
2417 
2418 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2419 				 struct be_rx_compl_info *rxcp)
2420 {
2421 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2422 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2423 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2424 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2425 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2426 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2427 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2428 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2429 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2430 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2431 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2432 	if (rxcp->vlanf) {
2433 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2434 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2435 	}
2436 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2437 	rxcp->tunneled =
2438 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2439 }
2440 
2441 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2442 				 struct be_rx_compl_info *rxcp)
2443 {
2444 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2445 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2446 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2447 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2448 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2449 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2450 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2451 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2452 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2453 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2454 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2455 	if (rxcp->vlanf) {
2456 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2457 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2458 	}
2459 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2460 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2461 }
2462 
2463 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2464 {
2465 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2466 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2467 	struct be_adapter *adapter = rxo->adapter;
2468 
2469 	/* For checking the valid bit it is Ok to use either definition as the
2470 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2471 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2472 		return NULL;
2473 
2474 	rmb();
2475 	be_dws_le_to_cpu(compl, sizeof(*compl));
2476 
2477 	if (adapter->be3_native)
2478 		be_parse_rx_compl_v1(compl, rxcp);
2479 	else
2480 		be_parse_rx_compl_v0(compl, rxcp);
2481 
2482 	if (rxcp->ip_frag)
2483 		rxcp->l4_csum = 0;
2484 
2485 	if (rxcp->vlanf) {
2486 		/* In QNQ modes, if qnq bit is not set, then the packet was
2487 		 * tagged only with the transparent outer vlan-tag and must
2488 		 * not be treated as a vlan packet by host
2489 		 */
2490 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2491 			rxcp->vlanf = 0;
2492 
2493 		if (!lancer_chip(adapter))
2494 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2495 
2496 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2497 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2498 			rxcp->vlanf = 0;
2499 	}
2500 
2501 	/* As the compl has been parsed, reset it; we wont touch it again */
2502 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2503 
2504 	queue_tail_inc(&rxo->cq);
2505 	return rxcp;
2506 }
2507 
2508 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2509 {
2510 	u32 order = get_order(size);
2511 
2512 	if (order > 0)
2513 		gfp |= __GFP_COMP;
2514 	return  alloc_pages(gfp, order);
2515 }
2516 
2517 /*
2518  * Allocate a page, split it to fragments of size rx_frag_size and post as
2519  * receive buffers to BE
2520  */
2521 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2522 {
2523 	struct be_adapter *adapter = rxo->adapter;
2524 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2525 	struct be_queue_info *rxq = &rxo->q;
2526 	struct page *pagep = NULL;
2527 	struct device *dev = &adapter->pdev->dev;
2528 	struct be_eth_rx_d *rxd;
2529 	u64 page_dmaaddr = 0, frag_dmaaddr;
2530 	u32 posted, page_offset = 0, notify = 0;
2531 
2532 	page_info = &rxo->page_info_tbl[rxq->head];
2533 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2534 		if (!pagep) {
2535 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2536 			if (unlikely(!pagep)) {
2537 				rx_stats(rxo)->rx_post_fail++;
2538 				break;
2539 			}
2540 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2541 						    adapter->big_page_size,
2542 						    DMA_FROM_DEVICE);
2543 			if (dma_mapping_error(dev, page_dmaaddr)) {
2544 				put_page(pagep);
2545 				pagep = NULL;
2546 				adapter->drv_stats.dma_map_errors++;
2547 				break;
2548 			}
2549 			page_offset = 0;
2550 		} else {
2551 			get_page(pagep);
2552 			page_offset += rx_frag_size;
2553 		}
2554 		page_info->page_offset = page_offset;
2555 		page_info->page = pagep;
2556 
2557 		rxd = queue_head_node(rxq);
2558 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2559 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2560 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2561 
2562 		/* Any space left in the current big page for another frag? */
2563 		if ((page_offset + rx_frag_size + rx_frag_size) >
2564 					adapter->big_page_size) {
2565 			pagep = NULL;
2566 			page_info->last_frag = true;
2567 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2568 		} else {
2569 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2570 		}
2571 
2572 		prev_page_info = page_info;
2573 		queue_head_inc(rxq);
2574 		page_info = &rxo->page_info_tbl[rxq->head];
2575 	}
2576 
2577 	/* Mark the last frag of a page when we break out of the above loop
2578 	 * with no more slots available in the RXQ
2579 	 */
2580 	if (pagep) {
2581 		prev_page_info->last_frag = true;
2582 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2583 	}
2584 
2585 	if (posted) {
2586 		atomic_add(posted, &rxq->used);
2587 		if (rxo->rx_post_starved)
2588 			rxo->rx_post_starved = false;
2589 		do {
2590 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2591 			be_rxq_notify(adapter, rxq->id, notify);
2592 			posted -= notify;
2593 		} while (posted);
2594 	} else if (atomic_read(&rxq->used) == 0) {
2595 		/* Let be_worker replenish when memory is available */
2596 		rxo->rx_post_starved = true;
2597 	}
2598 }
2599 
2600 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2601 {
2602 	struct be_queue_info *tx_cq = &txo->cq;
2603 	struct be_tx_compl_info *txcp = &txo->txcp;
2604 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2605 
2606 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2607 		return NULL;
2608 
2609 	/* Ensure load ordering of valid bit dword and other dwords below */
2610 	rmb();
2611 	be_dws_le_to_cpu(compl, sizeof(*compl));
2612 
2613 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2614 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2615 
2616 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2617 	queue_tail_inc(tx_cq);
2618 	return txcp;
2619 }
2620 
2621 static u16 be_tx_compl_process(struct be_adapter *adapter,
2622 			       struct be_tx_obj *txo, u16 last_index)
2623 {
2624 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2625 	struct be_queue_info *txq = &txo->q;
2626 	struct sk_buff *skb = NULL;
2627 	bool unmap_skb_hdr = false;
2628 	struct be_eth_wrb *wrb;
2629 	u16 num_wrbs = 0;
2630 	u32 frag_index;
2631 
2632 	do {
2633 		if (sent_skbs[txq->tail]) {
2634 			/* Free skb from prev req */
2635 			if (skb)
2636 				dev_consume_skb_any(skb);
2637 			skb = sent_skbs[txq->tail];
2638 			sent_skbs[txq->tail] = NULL;
2639 			queue_tail_inc(txq);  /* skip hdr wrb */
2640 			num_wrbs++;
2641 			unmap_skb_hdr = true;
2642 		}
2643 		wrb = queue_tail_node(txq);
2644 		frag_index = txq->tail;
2645 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2646 			      (unmap_skb_hdr && skb_headlen(skb)));
2647 		unmap_skb_hdr = false;
2648 		queue_tail_inc(txq);
2649 		num_wrbs++;
2650 	} while (frag_index != last_index);
2651 	dev_consume_skb_any(skb);
2652 
2653 	return num_wrbs;
2654 }
2655 
2656 /* Return the number of events in the event queue */
2657 static inline int events_get(struct be_eq_obj *eqo)
2658 {
2659 	struct be_eq_entry *eqe;
2660 	int num = 0;
2661 
2662 	do {
2663 		eqe = queue_tail_node(&eqo->q);
2664 		if (eqe->evt == 0)
2665 			break;
2666 
2667 		rmb();
2668 		eqe->evt = 0;
2669 		num++;
2670 		queue_tail_inc(&eqo->q);
2671 	} while (true);
2672 
2673 	return num;
2674 }
2675 
2676 /* Leaves the EQ is disarmed state */
2677 static void be_eq_clean(struct be_eq_obj *eqo)
2678 {
2679 	int num = events_get(eqo);
2680 
2681 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2682 }
2683 
2684 /* Free posted rx buffers that were not used */
2685 static void be_rxq_clean(struct be_rx_obj *rxo)
2686 {
2687 	struct be_queue_info *rxq = &rxo->q;
2688 	struct be_rx_page_info *page_info;
2689 
2690 	while (atomic_read(&rxq->used) > 0) {
2691 		page_info = get_rx_page_info(rxo);
2692 		put_page(page_info->page);
2693 		memset(page_info, 0, sizeof(*page_info));
2694 	}
2695 	BUG_ON(atomic_read(&rxq->used));
2696 	rxq->tail = 0;
2697 	rxq->head = 0;
2698 }
2699 
2700 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2701 {
2702 	struct be_queue_info *rx_cq = &rxo->cq;
2703 	struct be_rx_compl_info *rxcp;
2704 	struct be_adapter *adapter = rxo->adapter;
2705 	int flush_wait = 0;
2706 
2707 	/* Consume pending rx completions.
2708 	 * Wait for the flush completion (identified by zero num_rcvd)
2709 	 * to arrive. Notify CQ even when there are no more CQ entries
2710 	 * for HW to flush partially coalesced CQ entries.
2711 	 * In Lancer, there is no need to wait for flush compl.
2712 	 */
2713 	for (;;) {
2714 		rxcp = be_rx_compl_get(rxo);
2715 		if (!rxcp) {
2716 			if (lancer_chip(adapter))
2717 				break;
2718 
2719 			if (flush_wait++ > 50 ||
2720 			    be_check_error(adapter,
2721 					   BE_ERROR_HW)) {
2722 				dev_warn(&adapter->pdev->dev,
2723 					 "did not receive flush compl\n");
2724 				break;
2725 			}
2726 			be_cq_notify(adapter, rx_cq->id, true, 0);
2727 			mdelay(1);
2728 		} else {
2729 			be_rx_compl_discard(rxo, rxcp);
2730 			be_cq_notify(adapter, rx_cq->id, false, 1);
2731 			if (rxcp->num_rcvd == 0)
2732 				break;
2733 		}
2734 	}
2735 
2736 	/* After cleanup, leave the CQ in unarmed state */
2737 	be_cq_notify(adapter, rx_cq->id, false, 0);
2738 }
2739 
2740 static void be_tx_compl_clean(struct be_adapter *adapter)
2741 {
2742 	struct device *dev = &adapter->pdev->dev;
2743 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2744 	struct be_tx_compl_info *txcp;
2745 	struct be_queue_info *txq;
2746 	u32 end_idx, notified_idx;
2747 	struct be_tx_obj *txo;
2748 	int i, pending_txqs;
2749 
2750 	/* Stop polling for compls when HW has been silent for 10ms */
2751 	do {
2752 		pending_txqs = adapter->num_tx_qs;
2753 
2754 		for_all_tx_queues(adapter, txo, i) {
2755 			cmpl = 0;
2756 			num_wrbs = 0;
2757 			txq = &txo->q;
2758 			while ((txcp = be_tx_compl_get(txo))) {
2759 				num_wrbs +=
2760 					be_tx_compl_process(adapter, txo,
2761 							    txcp->end_index);
2762 				cmpl++;
2763 			}
2764 			if (cmpl) {
2765 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2766 				atomic_sub(num_wrbs, &txq->used);
2767 				timeo = 0;
2768 			}
2769 			if (!be_is_tx_compl_pending(txo))
2770 				pending_txqs--;
2771 		}
2772 
2773 		if (pending_txqs == 0 || ++timeo > 10 ||
2774 		    be_check_error(adapter, BE_ERROR_HW))
2775 			break;
2776 
2777 		mdelay(1);
2778 	} while (true);
2779 
2780 	/* Free enqueued TX that was never notified to HW */
2781 	for_all_tx_queues(adapter, txo, i) {
2782 		txq = &txo->q;
2783 
2784 		if (atomic_read(&txq->used)) {
2785 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2786 				 i, atomic_read(&txq->used));
2787 			notified_idx = txq->tail;
2788 			end_idx = txq->tail;
2789 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2790 				  txq->len);
2791 			/* Use the tx-compl process logic to handle requests
2792 			 * that were not sent to the HW.
2793 			 */
2794 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2795 			atomic_sub(num_wrbs, &txq->used);
2796 			BUG_ON(atomic_read(&txq->used));
2797 			txo->pend_wrb_cnt = 0;
2798 			/* Since hw was never notified of these requests,
2799 			 * reset TXQ indices
2800 			 */
2801 			txq->head = notified_idx;
2802 			txq->tail = notified_idx;
2803 		}
2804 	}
2805 }
2806 
2807 static void be_evt_queues_destroy(struct be_adapter *adapter)
2808 {
2809 	struct be_eq_obj *eqo;
2810 	int i;
2811 
2812 	for_all_evt_queues(adapter, eqo, i) {
2813 		if (eqo->q.created) {
2814 			be_eq_clean(eqo);
2815 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2816 			napi_hash_del(&eqo->napi);
2817 			netif_napi_del(&eqo->napi);
2818 			free_cpumask_var(eqo->affinity_mask);
2819 		}
2820 		be_queue_free(adapter, &eqo->q);
2821 	}
2822 }
2823 
2824 static int be_evt_queues_create(struct be_adapter *adapter)
2825 {
2826 	struct be_queue_info *eq;
2827 	struct be_eq_obj *eqo;
2828 	struct be_aic_obj *aic;
2829 	int i, rc;
2830 
2831 	/* need enough EQs to service both RX and TX queues */
2832 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2833 				    max(adapter->cfg_num_rx_irqs,
2834 					adapter->cfg_num_tx_irqs));
2835 
2836 	for_all_evt_queues(adapter, eqo, i) {
2837 		int numa_node = dev_to_node(&adapter->pdev->dev);
2838 
2839 		aic = &adapter->aic_obj[i];
2840 		eqo->adapter = adapter;
2841 		eqo->idx = i;
2842 		aic->max_eqd = BE_MAX_EQD;
2843 		aic->enable = true;
2844 
2845 		eq = &eqo->q;
2846 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2847 				    sizeof(struct be_eq_entry));
2848 		if (rc)
2849 			return rc;
2850 
2851 		rc = be_cmd_eq_create(adapter, eqo);
2852 		if (rc)
2853 			return rc;
2854 
2855 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2856 			return -ENOMEM;
2857 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2858 				eqo->affinity_mask);
2859 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2860 			       BE_NAPI_WEIGHT);
2861 	}
2862 	return 0;
2863 }
2864 
2865 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2866 {
2867 	struct be_queue_info *q;
2868 
2869 	q = &adapter->mcc_obj.q;
2870 	if (q->created)
2871 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2872 	be_queue_free(adapter, q);
2873 
2874 	q = &adapter->mcc_obj.cq;
2875 	if (q->created)
2876 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2877 	be_queue_free(adapter, q);
2878 }
2879 
2880 /* Must be called only after TX qs are created as MCC shares TX EQ */
2881 static int be_mcc_queues_create(struct be_adapter *adapter)
2882 {
2883 	struct be_queue_info *q, *cq;
2884 
2885 	cq = &adapter->mcc_obj.cq;
2886 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2887 			   sizeof(struct be_mcc_compl)))
2888 		goto err;
2889 
2890 	/* Use the default EQ for MCC completions */
2891 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2892 		goto mcc_cq_free;
2893 
2894 	q = &adapter->mcc_obj.q;
2895 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2896 		goto mcc_cq_destroy;
2897 
2898 	if (be_cmd_mccq_create(adapter, q, cq))
2899 		goto mcc_q_free;
2900 
2901 	return 0;
2902 
2903 mcc_q_free:
2904 	be_queue_free(adapter, q);
2905 mcc_cq_destroy:
2906 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2907 mcc_cq_free:
2908 	be_queue_free(adapter, cq);
2909 err:
2910 	return -1;
2911 }
2912 
2913 static void be_tx_queues_destroy(struct be_adapter *adapter)
2914 {
2915 	struct be_queue_info *q;
2916 	struct be_tx_obj *txo;
2917 	u8 i;
2918 
2919 	for_all_tx_queues(adapter, txo, i) {
2920 		q = &txo->q;
2921 		if (q->created)
2922 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2923 		be_queue_free(adapter, q);
2924 
2925 		q = &txo->cq;
2926 		if (q->created)
2927 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2928 		be_queue_free(adapter, q);
2929 	}
2930 }
2931 
2932 static int be_tx_qs_create(struct be_adapter *adapter)
2933 {
2934 	struct be_queue_info *cq;
2935 	struct be_tx_obj *txo;
2936 	struct be_eq_obj *eqo;
2937 	int status, i;
2938 
2939 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2940 
2941 	for_all_tx_queues(adapter, txo, i) {
2942 		cq = &txo->cq;
2943 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2944 					sizeof(struct be_eth_tx_compl));
2945 		if (status)
2946 			return status;
2947 
2948 		u64_stats_init(&txo->stats.sync);
2949 		u64_stats_init(&txo->stats.sync_compl);
2950 
2951 		/* If num_evt_qs is less than num_tx_qs, then more than
2952 		 * one txq share an eq
2953 		 */
2954 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2955 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2956 		if (status)
2957 			return status;
2958 
2959 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2960 					sizeof(struct be_eth_wrb));
2961 		if (status)
2962 			return status;
2963 
2964 		status = be_cmd_txq_create(adapter, txo);
2965 		if (status)
2966 			return status;
2967 
2968 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2969 				    eqo->idx);
2970 	}
2971 
2972 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2973 		 adapter->num_tx_qs);
2974 	return 0;
2975 }
2976 
2977 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2978 {
2979 	struct be_queue_info *q;
2980 	struct be_rx_obj *rxo;
2981 	int i;
2982 
2983 	for_all_rx_queues(adapter, rxo, i) {
2984 		q = &rxo->cq;
2985 		if (q->created)
2986 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2987 		be_queue_free(adapter, q);
2988 	}
2989 }
2990 
2991 static int be_rx_cqs_create(struct be_adapter *adapter)
2992 {
2993 	struct be_queue_info *eq, *cq;
2994 	struct be_rx_obj *rxo;
2995 	int rc, i;
2996 
2997 	adapter->num_rss_qs =
2998 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2999 
3000 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3001 	if (adapter->num_rss_qs < 2)
3002 		adapter->num_rss_qs = 0;
3003 
3004 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3005 
3006 	/* When the interface is not capable of RSS rings (and there is no
3007 	 * need to create a default RXQ) we'll still need one RXQ
3008 	 */
3009 	if (adapter->num_rx_qs == 0)
3010 		adapter->num_rx_qs = 1;
3011 
3012 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3013 	for_all_rx_queues(adapter, rxo, i) {
3014 		rxo->adapter = adapter;
3015 		cq = &rxo->cq;
3016 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3017 				    sizeof(struct be_eth_rx_compl));
3018 		if (rc)
3019 			return rc;
3020 
3021 		u64_stats_init(&rxo->stats.sync);
3022 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3023 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3024 		if (rc)
3025 			return rc;
3026 	}
3027 
3028 	dev_info(&adapter->pdev->dev,
3029 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3030 	return 0;
3031 }
3032 
3033 static irqreturn_t be_intx(int irq, void *dev)
3034 {
3035 	struct be_eq_obj *eqo = dev;
3036 	struct be_adapter *adapter = eqo->adapter;
3037 	int num_evts = 0;
3038 
3039 	/* IRQ is not expected when NAPI is scheduled as the EQ
3040 	 * will not be armed.
3041 	 * But, this can happen on Lancer INTx where it takes
3042 	 * a while to de-assert INTx or in BE2 where occasionaly
3043 	 * an interrupt may be raised even when EQ is unarmed.
3044 	 * If NAPI is already scheduled, then counting & notifying
3045 	 * events will orphan them.
3046 	 */
3047 	if (napi_schedule_prep(&eqo->napi)) {
3048 		num_evts = events_get(eqo);
3049 		__napi_schedule(&eqo->napi);
3050 		if (num_evts)
3051 			eqo->spurious_intr = 0;
3052 	}
3053 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3054 
3055 	/* Return IRQ_HANDLED only for the the first spurious intr
3056 	 * after a valid intr to stop the kernel from branding
3057 	 * this irq as a bad one!
3058 	 */
3059 	if (num_evts || eqo->spurious_intr++ == 0)
3060 		return IRQ_HANDLED;
3061 	else
3062 		return IRQ_NONE;
3063 }
3064 
3065 static irqreturn_t be_msix(int irq, void *dev)
3066 {
3067 	struct be_eq_obj *eqo = dev;
3068 
3069 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3070 	napi_schedule(&eqo->napi);
3071 	return IRQ_HANDLED;
3072 }
3073 
3074 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3075 {
3076 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3077 }
3078 
3079 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3080 			 int budget, int polling)
3081 {
3082 	struct be_adapter *adapter = rxo->adapter;
3083 	struct be_queue_info *rx_cq = &rxo->cq;
3084 	struct be_rx_compl_info *rxcp;
3085 	u32 work_done;
3086 	u32 frags_consumed = 0;
3087 
3088 	for (work_done = 0; work_done < budget; work_done++) {
3089 		rxcp = be_rx_compl_get(rxo);
3090 		if (!rxcp)
3091 			break;
3092 
3093 		/* Is it a flush compl that has no data */
3094 		if (unlikely(rxcp->num_rcvd == 0))
3095 			goto loop_continue;
3096 
3097 		/* Discard compl with partial DMA Lancer B0 */
3098 		if (unlikely(!rxcp->pkt_size)) {
3099 			be_rx_compl_discard(rxo, rxcp);
3100 			goto loop_continue;
3101 		}
3102 
3103 		/* On BE drop pkts that arrive due to imperfect filtering in
3104 		 * promiscuous mode on some skews
3105 		 */
3106 		if (unlikely(rxcp->port != adapter->port_num &&
3107 			     !lancer_chip(adapter))) {
3108 			be_rx_compl_discard(rxo, rxcp);
3109 			goto loop_continue;
3110 		}
3111 
3112 		/* Don't do gro when we're busy_polling */
3113 		if (do_gro(rxcp) && polling != BUSY_POLLING)
3114 			be_rx_compl_process_gro(rxo, napi, rxcp);
3115 		else
3116 			be_rx_compl_process(rxo, napi, rxcp);
3117 
3118 loop_continue:
3119 		frags_consumed += rxcp->num_rcvd;
3120 		be_rx_stats_update(rxo, rxcp);
3121 	}
3122 
3123 	if (work_done) {
3124 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3125 
3126 		/* When an rx-obj gets into post_starved state, just
3127 		 * let be_worker do the posting.
3128 		 */
3129 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3130 		    !rxo->rx_post_starved)
3131 			be_post_rx_frags(rxo, GFP_ATOMIC,
3132 					 max_t(u32, MAX_RX_POST,
3133 					       frags_consumed));
3134 	}
3135 
3136 	return work_done;
3137 }
3138 
3139 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141 	switch (status) {
3142 	case BE_TX_COMP_HDR_PARSE_ERR:
3143 		tx_stats(txo)->tx_hdr_parse_err++;
3144 		break;
3145 	case BE_TX_COMP_NDMA_ERR:
3146 		tx_stats(txo)->tx_dma_err++;
3147 		break;
3148 	case BE_TX_COMP_ACL_ERR:
3149 		tx_stats(txo)->tx_spoof_check_err++;
3150 		break;
3151 	}
3152 }
3153 
3154 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3155 {
3156 	switch (status) {
3157 	case LANCER_TX_COMP_LSO_ERR:
3158 		tx_stats(txo)->tx_tso_err++;
3159 		break;
3160 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3161 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3162 		tx_stats(txo)->tx_spoof_check_err++;
3163 		break;
3164 	case LANCER_TX_COMP_QINQ_ERR:
3165 		tx_stats(txo)->tx_qinq_err++;
3166 		break;
3167 	case LANCER_TX_COMP_PARITY_ERR:
3168 		tx_stats(txo)->tx_internal_parity_err++;
3169 		break;
3170 	case LANCER_TX_COMP_DMA_ERR:
3171 		tx_stats(txo)->tx_dma_err++;
3172 		break;
3173 	}
3174 }
3175 
3176 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3177 			  int idx)
3178 {
3179 	int num_wrbs = 0, work_done = 0;
3180 	struct be_tx_compl_info *txcp;
3181 
3182 	while ((txcp = be_tx_compl_get(txo))) {
3183 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3184 		work_done++;
3185 
3186 		if (txcp->status) {
3187 			if (lancer_chip(adapter))
3188 				lancer_update_tx_err(txo, txcp->status);
3189 			else
3190 				be_update_tx_err(txo, txcp->status);
3191 		}
3192 	}
3193 
3194 	if (work_done) {
3195 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3196 		atomic_sub(num_wrbs, &txo->q.used);
3197 
3198 		/* As Tx wrbs have been freed up, wake up netdev queue
3199 		 * if it was stopped due to lack of tx wrbs.  */
3200 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3201 		    be_can_txq_wake(txo)) {
3202 			netif_wake_subqueue(adapter->netdev, idx);
3203 		}
3204 
3205 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3206 		tx_stats(txo)->tx_compl += work_done;
3207 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3208 	}
3209 }
3210 
3211 #ifdef CONFIG_NET_RX_BUSY_POLL
3212 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3213 {
3214 	bool status = true;
3215 
3216 	spin_lock(&eqo->lock); /* BH is already disabled */
3217 	if (eqo->state & BE_EQ_LOCKED) {
3218 		WARN_ON(eqo->state & BE_EQ_NAPI);
3219 		eqo->state |= BE_EQ_NAPI_YIELD;
3220 		status = false;
3221 	} else {
3222 		eqo->state = BE_EQ_NAPI;
3223 	}
3224 	spin_unlock(&eqo->lock);
3225 	return status;
3226 }
3227 
3228 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3229 {
3230 	spin_lock(&eqo->lock); /* BH is already disabled */
3231 
3232 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3233 	eqo->state = BE_EQ_IDLE;
3234 
3235 	spin_unlock(&eqo->lock);
3236 }
3237 
3238 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3239 {
3240 	bool status = true;
3241 
3242 	spin_lock_bh(&eqo->lock);
3243 	if (eqo->state & BE_EQ_LOCKED) {
3244 		eqo->state |= BE_EQ_POLL_YIELD;
3245 		status = false;
3246 	} else {
3247 		eqo->state |= BE_EQ_POLL;
3248 	}
3249 	spin_unlock_bh(&eqo->lock);
3250 	return status;
3251 }
3252 
3253 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3254 {
3255 	spin_lock_bh(&eqo->lock);
3256 
3257 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3258 	eqo->state = BE_EQ_IDLE;
3259 
3260 	spin_unlock_bh(&eqo->lock);
3261 }
3262 
3263 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3264 {
3265 	spin_lock_init(&eqo->lock);
3266 	eqo->state = BE_EQ_IDLE;
3267 }
3268 
3269 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3270 {
3271 	local_bh_disable();
3272 
3273 	/* It's enough to just acquire napi lock on the eqo to stop
3274 	 * be_busy_poll() from processing any queueus.
3275 	 */
3276 	while (!be_lock_napi(eqo))
3277 		mdelay(1);
3278 
3279 	local_bh_enable();
3280 }
3281 
3282 #else /* CONFIG_NET_RX_BUSY_POLL */
3283 
3284 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3285 {
3286 	return true;
3287 }
3288 
3289 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3290 {
3291 }
3292 
3293 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3294 {
3295 	return false;
3296 }
3297 
3298 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3299 {
3300 }
3301 
3302 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3303 {
3304 }
3305 
3306 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3307 {
3308 }
3309 #endif /* CONFIG_NET_RX_BUSY_POLL */
3310 
3311 int be_poll(struct napi_struct *napi, int budget)
3312 {
3313 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3314 	struct be_adapter *adapter = eqo->adapter;
3315 	int max_work = 0, work, i, num_evts;
3316 	struct be_rx_obj *rxo;
3317 	struct be_tx_obj *txo;
3318 	u32 mult_enc = 0;
3319 
3320 	num_evts = events_get(eqo);
3321 
3322 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3323 		be_process_tx(adapter, txo, i);
3324 
3325 	if (be_lock_napi(eqo)) {
3326 		/* This loop will iterate twice for EQ0 in which
3327 		 * completions of the last RXQ (default one) are also processed
3328 		 * For other EQs the loop iterates only once
3329 		 */
3330 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3331 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3332 			max_work = max(work, max_work);
3333 		}
3334 		be_unlock_napi(eqo);
3335 	} else {
3336 		max_work = budget;
3337 	}
3338 
3339 	if (is_mcc_eqo(eqo))
3340 		be_process_mcc(adapter);
3341 
3342 	if (max_work < budget) {
3343 		napi_complete(napi);
3344 
3345 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3346 		 * delay via a delay multiplier encoding value
3347 		 */
3348 		if (skyhawk_chip(adapter))
3349 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3350 
3351 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3352 			     mult_enc);
3353 	} else {
3354 		/* As we'll continue in polling mode, count and clear events */
3355 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3356 	}
3357 	return max_work;
3358 }
3359 
3360 #ifdef CONFIG_NET_RX_BUSY_POLL
3361 static int be_busy_poll(struct napi_struct *napi)
3362 {
3363 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3364 	struct be_adapter *adapter = eqo->adapter;
3365 	struct be_rx_obj *rxo;
3366 	int i, work = 0;
3367 
3368 	if (!be_lock_busy_poll(eqo))
3369 		return LL_FLUSH_BUSY;
3370 
3371 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3372 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3373 		if (work)
3374 			break;
3375 	}
3376 
3377 	be_unlock_busy_poll(eqo);
3378 	return work;
3379 }
3380 #endif
3381 
3382 void be_detect_error(struct be_adapter *adapter)
3383 {
3384 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3385 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3386 	u32 i;
3387 	struct device *dev = &adapter->pdev->dev;
3388 
3389 	if (be_check_error(adapter, BE_ERROR_HW))
3390 		return;
3391 
3392 	if (lancer_chip(adapter)) {
3393 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3394 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3395 			be_set_error(adapter, BE_ERROR_UE);
3396 			sliport_err1 = ioread32(adapter->db +
3397 						SLIPORT_ERROR1_OFFSET);
3398 			sliport_err2 = ioread32(adapter->db +
3399 						SLIPORT_ERROR2_OFFSET);
3400 			/* Do not log error messages if its a FW reset */
3401 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3402 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3403 				dev_info(dev, "Firmware update in progress\n");
3404 			} else {
3405 				dev_err(dev, "Error detected in the card\n");
3406 				dev_err(dev, "ERR: sliport status 0x%x\n",
3407 					sliport_status);
3408 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3409 					sliport_err1);
3410 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3411 					sliport_err2);
3412 			}
3413 		}
3414 	} else {
3415 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3416 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3417 		ue_lo_mask = ioread32(adapter->pcicfg +
3418 				      PCICFG_UE_STATUS_LOW_MASK);
3419 		ue_hi_mask = ioread32(adapter->pcicfg +
3420 				      PCICFG_UE_STATUS_HI_MASK);
3421 
3422 		ue_lo = (ue_lo & ~ue_lo_mask);
3423 		ue_hi = (ue_hi & ~ue_hi_mask);
3424 
3425 		/* On certain platforms BE hardware can indicate spurious UEs.
3426 		 * Allow HW to stop working completely in case of a real UE.
3427 		 * Hence not setting the hw_error for UE detection.
3428 		 */
3429 
3430 		if (ue_lo || ue_hi) {
3431 			dev_err(dev, "Error detected in the adapter");
3432 			if (skyhawk_chip(adapter))
3433 				be_set_error(adapter, BE_ERROR_UE);
3434 
3435 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3436 				if (ue_lo & 1)
3437 					dev_err(dev, "UE: %s bit set\n",
3438 						ue_status_low_desc[i]);
3439 			}
3440 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3441 				if (ue_hi & 1)
3442 					dev_err(dev, "UE: %s bit set\n",
3443 						ue_status_hi_desc[i]);
3444 			}
3445 		}
3446 	}
3447 }
3448 
3449 static void be_msix_disable(struct be_adapter *adapter)
3450 {
3451 	if (msix_enabled(adapter)) {
3452 		pci_disable_msix(adapter->pdev);
3453 		adapter->num_msix_vec = 0;
3454 		adapter->num_msix_roce_vec = 0;
3455 	}
3456 }
3457 
3458 static int be_msix_enable(struct be_adapter *adapter)
3459 {
3460 	unsigned int i, max_roce_eqs;
3461 	struct device *dev = &adapter->pdev->dev;
3462 	int num_vec;
3463 
3464 	/* If RoCE is supported, program the max number of vectors that
3465 	 * could be used for NIC and RoCE, else, just program the number
3466 	 * we'll use initially.
3467 	 */
3468 	if (be_roce_supported(adapter)) {
3469 		max_roce_eqs =
3470 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3471 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3472 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3473 	} else {
3474 		num_vec = max(adapter->cfg_num_rx_irqs,
3475 			      adapter->cfg_num_tx_irqs);
3476 	}
3477 
3478 	for (i = 0; i < num_vec; i++)
3479 		adapter->msix_entries[i].entry = i;
3480 
3481 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3482 					MIN_MSIX_VECTORS, num_vec);
3483 	if (num_vec < 0)
3484 		goto fail;
3485 
3486 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3487 		adapter->num_msix_roce_vec = num_vec / 2;
3488 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3489 			 adapter->num_msix_roce_vec);
3490 	}
3491 
3492 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3493 
3494 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3495 		 adapter->num_msix_vec);
3496 	return 0;
3497 
3498 fail:
3499 	dev_warn(dev, "MSIx enable failed\n");
3500 
3501 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3502 	if (be_virtfn(adapter))
3503 		return num_vec;
3504 	return 0;
3505 }
3506 
3507 static inline int be_msix_vec_get(struct be_adapter *adapter,
3508 				  struct be_eq_obj *eqo)
3509 {
3510 	return adapter->msix_entries[eqo->msix_idx].vector;
3511 }
3512 
3513 static int be_msix_register(struct be_adapter *adapter)
3514 {
3515 	struct net_device *netdev = adapter->netdev;
3516 	struct be_eq_obj *eqo;
3517 	int status, i, vec;
3518 
3519 	for_all_evt_queues(adapter, eqo, i) {
3520 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3521 		vec = be_msix_vec_get(adapter, eqo);
3522 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3523 		if (status)
3524 			goto err_msix;
3525 
3526 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3527 	}
3528 
3529 	return 0;
3530 err_msix:
3531 	for (i--; i >= 0; i--) {
3532 		eqo = &adapter->eq_obj[i];
3533 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3534 	}
3535 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3536 		 status);
3537 	be_msix_disable(adapter);
3538 	return status;
3539 }
3540 
3541 static int be_irq_register(struct be_adapter *adapter)
3542 {
3543 	struct net_device *netdev = adapter->netdev;
3544 	int status;
3545 
3546 	if (msix_enabled(adapter)) {
3547 		status = be_msix_register(adapter);
3548 		if (status == 0)
3549 			goto done;
3550 		/* INTx is not supported for VF */
3551 		if (be_virtfn(adapter))
3552 			return status;
3553 	}
3554 
3555 	/* INTx: only the first EQ is used */
3556 	netdev->irq = adapter->pdev->irq;
3557 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3558 			     &adapter->eq_obj[0]);
3559 	if (status) {
3560 		dev_err(&adapter->pdev->dev,
3561 			"INTx request IRQ failed - err %d\n", status);
3562 		return status;
3563 	}
3564 done:
3565 	adapter->isr_registered = true;
3566 	return 0;
3567 }
3568 
3569 static void be_irq_unregister(struct be_adapter *adapter)
3570 {
3571 	struct net_device *netdev = adapter->netdev;
3572 	struct be_eq_obj *eqo;
3573 	int i, vec;
3574 
3575 	if (!adapter->isr_registered)
3576 		return;
3577 
3578 	/* INTx */
3579 	if (!msix_enabled(adapter)) {
3580 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3581 		goto done;
3582 	}
3583 
3584 	/* MSIx */
3585 	for_all_evt_queues(adapter, eqo, i) {
3586 		vec = be_msix_vec_get(adapter, eqo);
3587 		irq_set_affinity_hint(vec, NULL);
3588 		free_irq(vec, eqo);
3589 	}
3590 
3591 done:
3592 	adapter->isr_registered = false;
3593 }
3594 
3595 static void be_rx_qs_destroy(struct be_adapter *adapter)
3596 {
3597 	struct rss_info *rss = &adapter->rss_info;
3598 	struct be_queue_info *q;
3599 	struct be_rx_obj *rxo;
3600 	int i;
3601 
3602 	for_all_rx_queues(adapter, rxo, i) {
3603 		q = &rxo->q;
3604 		if (q->created) {
3605 			/* If RXQs are destroyed while in an "out of buffer"
3606 			 * state, there is a possibility of an HW stall on
3607 			 * Lancer. So, post 64 buffers to each queue to relieve
3608 			 * the "out of buffer" condition.
3609 			 * Make sure there's space in the RXQ before posting.
3610 			 */
3611 			if (lancer_chip(adapter)) {
3612 				be_rx_cq_clean(rxo);
3613 				if (atomic_read(&q->used) == 0)
3614 					be_post_rx_frags(rxo, GFP_KERNEL,
3615 							 MAX_RX_POST);
3616 			}
3617 
3618 			be_cmd_rxq_destroy(adapter, q);
3619 			be_rx_cq_clean(rxo);
3620 			be_rxq_clean(rxo);
3621 		}
3622 		be_queue_free(adapter, q);
3623 	}
3624 
3625 	if (rss->rss_flags) {
3626 		rss->rss_flags = RSS_ENABLE_NONE;
3627 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3628 				  128, rss->rss_hkey);
3629 	}
3630 }
3631 
3632 static void be_disable_if_filters(struct be_adapter *adapter)
3633 {
3634 	be_dev_mac_del(adapter, adapter->pmac_id[0]);
3635 	be_clear_uc_list(adapter);
3636 	be_clear_mc_list(adapter);
3637 
3638 	/* The IFACE flags are enabled in the open path and cleared
3639 	 * in the close path. When a VF gets detached from the host and
3640 	 * assigned to a VM the following happens:
3641 	 *	- VF's IFACE flags get cleared in the detach path
3642 	 *	- IFACE create is issued by the VF in the attach path
3643 	 * Due to a bug in the BE3/Skyhawk-R FW
3644 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3645 	 * specified along with the IFACE create cmd issued by a VF are not
3646 	 * honoured by FW.  As a consequence, if a *new* driver
3647 	 * (that enables/disables IFACE flags in open/close)
3648 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3649 	 * the IFACE gets created *without* the needed flags.
3650 	 * To avoid this, disable RX-filter flags only for Lancer.
3651 	 */
3652 	if (lancer_chip(adapter)) {
3653 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3654 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3655 	}
3656 }
3657 
3658 static int be_close(struct net_device *netdev)
3659 {
3660 	struct be_adapter *adapter = netdev_priv(netdev);
3661 	struct be_eq_obj *eqo;
3662 	int i;
3663 
3664 	/* This protection is needed as be_close() may be called even when the
3665 	 * adapter is in cleared state (after eeh perm failure)
3666 	 */
3667 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3668 		return 0;
3669 
3670 	/* Before attempting cleanup ensure all the pending cmds in the
3671 	 * config_wq have finished execution
3672 	 */
3673 	flush_workqueue(be_wq);
3674 
3675 	be_disable_if_filters(adapter);
3676 
3677 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3678 		for_all_evt_queues(adapter, eqo, i) {
3679 			napi_disable(&eqo->napi);
3680 			be_disable_busy_poll(eqo);
3681 		}
3682 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3683 	}
3684 
3685 	be_async_mcc_disable(adapter);
3686 
3687 	/* Wait for all pending tx completions to arrive so that
3688 	 * all tx skbs are freed.
3689 	 */
3690 	netif_tx_disable(netdev);
3691 	be_tx_compl_clean(adapter);
3692 
3693 	be_rx_qs_destroy(adapter);
3694 
3695 	for_all_evt_queues(adapter, eqo, i) {
3696 		if (msix_enabled(adapter))
3697 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3698 		else
3699 			synchronize_irq(netdev->irq);
3700 		be_eq_clean(eqo);
3701 	}
3702 
3703 	be_irq_unregister(adapter);
3704 
3705 	return 0;
3706 }
3707 
3708 static int be_rx_qs_create(struct be_adapter *adapter)
3709 {
3710 	struct rss_info *rss = &adapter->rss_info;
3711 	u8 rss_key[RSS_HASH_KEY_LEN];
3712 	struct be_rx_obj *rxo;
3713 	int rc, i, j;
3714 
3715 	for_all_rx_queues(adapter, rxo, i) {
3716 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3717 				    sizeof(struct be_eth_rx_d));
3718 		if (rc)
3719 			return rc;
3720 	}
3721 
3722 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3723 		rxo = default_rxo(adapter);
3724 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3725 				       rx_frag_size, adapter->if_handle,
3726 				       false, &rxo->rss_id);
3727 		if (rc)
3728 			return rc;
3729 	}
3730 
3731 	for_all_rss_queues(adapter, rxo, i) {
3732 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3733 				       rx_frag_size, adapter->if_handle,
3734 				       true, &rxo->rss_id);
3735 		if (rc)
3736 			return rc;
3737 	}
3738 
3739 	if (be_multi_rxq(adapter)) {
3740 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3741 			for_all_rss_queues(adapter, rxo, i) {
3742 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3743 					break;
3744 				rss->rsstable[j + i] = rxo->rss_id;
3745 				rss->rss_queue[j + i] = i;
3746 			}
3747 		}
3748 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3749 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3750 
3751 		if (!BEx_chip(adapter))
3752 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3753 				RSS_ENABLE_UDP_IPV6;
3754 
3755 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3756 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3757 				       RSS_INDIR_TABLE_LEN, rss_key);
3758 		if (rc) {
3759 			rss->rss_flags = RSS_ENABLE_NONE;
3760 			return rc;
3761 		}
3762 
3763 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3764 	} else {
3765 		/* Disable RSS, if only default RX Q is created */
3766 		rss->rss_flags = RSS_ENABLE_NONE;
3767 	}
3768 
3769 
3770 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3771 	 * which is a queue empty condition
3772 	 */
3773 	for_all_rx_queues(adapter, rxo, i)
3774 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3775 
3776 	return 0;
3777 }
3778 
3779 static int be_enable_if_filters(struct be_adapter *adapter)
3780 {
3781 	int status;
3782 
3783 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3784 	if (status)
3785 		return status;
3786 
3787 	/* For BE3 VFs, the PF programs the initial MAC address */
3788 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3789 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3790 		if (status)
3791 			return status;
3792 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3793 	}
3794 
3795 	if (adapter->vlans_added)
3796 		be_vid_config(adapter);
3797 
3798 	__be_set_rx_mode(adapter);
3799 
3800 	return 0;
3801 }
3802 
3803 static int be_open(struct net_device *netdev)
3804 {
3805 	struct be_adapter *adapter = netdev_priv(netdev);
3806 	struct be_eq_obj *eqo;
3807 	struct be_rx_obj *rxo;
3808 	struct be_tx_obj *txo;
3809 	u8 link_status;
3810 	int status, i;
3811 
3812 	status = be_rx_qs_create(adapter);
3813 	if (status)
3814 		goto err;
3815 
3816 	status = be_enable_if_filters(adapter);
3817 	if (status)
3818 		goto err;
3819 
3820 	status = be_irq_register(adapter);
3821 	if (status)
3822 		goto err;
3823 
3824 	for_all_rx_queues(adapter, rxo, i)
3825 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3826 
3827 	for_all_tx_queues(adapter, txo, i)
3828 		be_cq_notify(adapter, txo->cq.id, true, 0);
3829 
3830 	be_async_mcc_enable(adapter);
3831 
3832 	for_all_evt_queues(adapter, eqo, i) {
3833 		napi_enable(&eqo->napi);
3834 		be_enable_busy_poll(eqo);
3835 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3836 	}
3837 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3838 
3839 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3840 	if (!status)
3841 		be_link_status_update(adapter, link_status);
3842 
3843 	netif_tx_start_all_queues(netdev);
3844 	if (skyhawk_chip(adapter))
3845 		udp_tunnel_get_rx_info(netdev);
3846 
3847 	return 0;
3848 err:
3849 	be_close(adapter->netdev);
3850 	return -EIO;
3851 }
3852 
3853 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3854 {
3855 	u32 addr;
3856 
3857 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3858 
3859 	mac[5] = (u8)(addr & 0xFF);
3860 	mac[4] = (u8)((addr >> 8) & 0xFF);
3861 	mac[3] = (u8)((addr >> 16) & 0xFF);
3862 	/* Use the OUI from the current MAC address */
3863 	memcpy(mac, adapter->netdev->dev_addr, 3);
3864 }
3865 
3866 /*
3867  * Generate a seed MAC address from the PF MAC Address using jhash.
3868  * MAC Address for VFs are assigned incrementally starting from the seed.
3869  * These addresses are programmed in the ASIC by the PF and the VF driver
3870  * queries for the MAC address during its probe.
3871  */
3872 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3873 {
3874 	u32 vf;
3875 	int status = 0;
3876 	u8 mac[ETH_ALEN];
3877 	struct be_vf_cfg *vf_cfg;
3878 
3879 	be_vf_eth_addr_generate(adapter, mac);
3880 
3881 	for_all_vfs(adapter, vf_cfg, vf) {
3882 		if (BEx_chip(adapter))
3883 			status = be_cmd_pmac_add(adapter, mac,
3884 						 vf_cfg->if_handle,
3885 						 &vf_cfg->pmac_id, vf + 1);
3886 		else
3887 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3888 						vf + 1);
3889 
3890 		if (status)
3891 			dev_err(&adapter->pdev->dev,
3892 				"Mac address assignment failed for VF %d\n",
3893 				vf);
3894 		else
3895 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3896 
3897 		mac[5] += 1;
3898 	}
3899 	return status;
3900 }
3901 
3902 static int be_vfs_mac_query(struct be_adapter *adapter)
3903 {
3904 	int status, vf;
3905 	u8 mac[ETH_ALEN];
3906 	struct be_vf_cfg *vf_cfg;
3907 
3908 	for_all_vfs(adapter, vf_cfg, vf) {
3909 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3910 					       mac, vf_cfg->if_handle,
3911 					       false, vf+1);
3912 		if (status)
3913 			return status;
3914 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3915 	}
3916 	return 0;
3917 }
3918 
3919 static void be_vf_clear(struct be_adapter *adapter)
3920 {
3921 	struct be_vf_cfg *vf_cfg;
3922 	u32 vf;
3923 
3924 	if (pci_vfs_assigned(adapter->pdev)) {
3925 		dev_warn(&adapter->pdev->dev,
3926 			 "VFs are assigned to VMs: not disabling VFs\n");
3927 		goto done;
3928 	}
3929 
3930 	pci_disable_sriov(adapter->pdev);
3931 
3932 	for_all_vfs(adapter, vf_cfg, vf) {
3933 		if (BEx_chip(adapter))
3934 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3935 					vf_cfg->pmac_id, vf + 1);
3936 		else
3937 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3938 				       vf + 1);
3939 
3940 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3941 	}
3942 
3943 	if (BE3_chip(adapter))
3944 		be_cmd_set_hsw_config(adapter, 0, 0,
3945 				      adapter->if_handle,
3946 				      PORT_FWD_TYPE_PASSTHRU, 0);
3947 done:
3948 	kfree(adapter->vf_cfg);
3949 	adapter->num_vfs = 0;
3950 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3951 }
3952 
3953 static void be_clear_queues(struct be_adapter *adapter)
3954 {
3955 	be_mcc_queues_destroy(adapter);
3956 	be_rx_cqs_destroy(adapter);
3957 	be_tx_queues_destroy(adapter);
3958 	be_evt_queues_destroy(adapter);
3959 }
3960 
3961 static void be_cancel_worker(struct be_adapter *adapter)
3962 {
3963 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3964 		cancel_delayed_work_sync(&adapter->work);
3965 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3966 	}
3967 }
3968 
3969 static void be_cancel_err_detection(struct be_adapter *adapter)
3970 {
3971 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3972 
3973 	if (!be_err_recovery_workq)
3974 		return;
3975 
3976 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3977 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3978 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3979 	}
3980 }
3981 
3982 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3983 {
3984 	struct net_device *netdev = adapter->netdev;
3985 
3986 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3987 		be_cmd_manage_iface(adapter, adapter->if_handle,
3988 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3989 
3990 	if (adapter->vxlan_port)
3991 		be_cmd_set_vxlan_port(adapter, 0);
3992 
3993 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3994 	adapter->vxlan_port = 0;
3995 
3996 	netdev->hw_enc_features = 0;
3997 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3998 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3999 }
4000 
4001 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4002 				struct be_resources *vft_res)
4003 {
4004 	struct be_resources res = adapter->pool_res;
4005 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
4006 	struct be_resources res_mod = {0};
4007 	u16 num_vf_qs = 1;
4008 
4009 	/* Distribute the queue resources among the PF and it's VFs */
4010 	if (num_vfs) {
4011 		/* Divide the rx queues evenly among the VFs and the PF, capped
4012 		 * at VF-EQ-count. Any remainder queues belong to the PF.
4013 		 */
4014 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4015 				res.max_rss_qs / (num_vfs + 1));
4016 
4017 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4018 		 * RSS Tables per port. Provide RSS on VFs, only if number of
4019 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4020 		 */
4021 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4022 			num_vf_qs = 1;
4023 	}
4024 
4025 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4026 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4027 	 */
4028 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4029 				  RESOURCE_MODIFIABLE, 0);
4030 
4031 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4032 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4033 	 * more than 1 RSSQ is available for a VF.
4034 	 * Otherwise, provision only 1 queue pair for VF.
4035 	 */
4036 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4037 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4038 		if (num_vf_qs > 1) {
4039 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4040 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4041 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4042 		} else {
4043 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4044 					     BE_IF_FLAGS_DEFQ_RSS);
4045 		}
4046 	} else {
4047 		num_vf_qs = 1;
4048 	}
4049 
4050 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4051 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4052 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4053 	}
4054 
4055 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4056 	vft_res->max_rx_qs = num_vf_qs;
4057 	vft_res->max_rss_qs = num_vf_qs;
4058 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4059 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4060 
4061 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4062 	 * among the PF and it's VFs, if the fields are changeable
4063 	 */
4064 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4065 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4066 
4067 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4068 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4069 
4070 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4071 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4072 
4073 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4074 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4075 }
4076 
4077 static void be_if_destroy(struct be_adapter *adapter)
4078 {
4079 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4080 
4081 	kfree(adapter->pmac_id);
4082 	adapter->pmac_id = NULL;
4083 
4084 	kfree(adapter->mc_list);
4085 	adapter->mc_list = NULL;
4086 
4087 	kfree(adapter->uc_list);
4088 	adapter->uc_list = NULL;
4089 }
4090 
4091 static int be_clear(struct be_adapter *adapter)
4092 {
4093 	struct pci_dev *pdev = adapter->pdev;
4094 	struct  be_resources vft_res = {0};
4095 
4096 	be_cancel_worker(adapter);
4097 
4098 	flush_workqueue(be_wq);
4099 
4100 	if (sriov_enabled(adapter))
4101 		be_vf_clear(adapter);
4102 
4103 	/* Re-configure FW to distribute resources evenly across max-supported
4104 	 * number of VFs, only when VFs are not already enabled.
4105 	 */
4106 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4107 	    !pci_vfs_assigned(pdev)) {
4108 		be_calculate_vf_res(adapter,
4109 				    pci_sriov_get_totalvfs(pdev),
4110 				    &vft_res);
4111 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4112 					pci_sriov_get_totalvfs(pdev),
4113 					&vft_res);
4114 	}
4115 
4116 	be_disable_vxlan_offloads(adapter);
4117 
4118 	be_if_destroy(adapter);
4119 
4120 	be_clear_queues(adapter);
4121 
4122 	be_msix_disable(adapter);
4123 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4124 	return 0;
4125 }
4126 
4127 static int be_vfs_if_create(struct be_adapter *adapter)
4128 {
4129 	struct be_resources res = {0};
4130 	u32 cap_flags, en_flags, vf;
4131 	struct be_vf_cfg *vf_cfg;
4132 	int status;
4133 
4134 	/* If a FW profile exists, then cap_flags are updated */
4135 	cap_flags = BE_VF_IF_EN_FLAGS;
4136 
4137 	for_all_vfs(adapter, vf_cfg, vf) {
4138 		if (!BE3_chip(adapter)) {
4139 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4140 							   ACTIVE_PROFILE_TYPE,
4141 							   RESOURCE_LIMITS,
4142 							   vf + 1);
4143 			if (!status) {
4144 				cap_flags = res.if_cap_flags;
4145 				/* Prevent VFs from enabling VLAN promiscuous
4146 				 * mode
4147 				 */
4148 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4149 			}
4150 		}
4151 
4152 		/* PF should enable IF flags during proxy if_create call */
4153 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4154 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4155 					  &vf_cfg->if_handle, vf + 1);
4156 		if (status)
4157 			return status;
4158 	}
4159 
4160 	return 0;
4161 }
4162 
4163 static int be_vf_setup_init(struct be_adapter *adapter)
4164 {
4165 	struct be_vf_cfg *vf_cfg;
4166 	int vf;
4167 
4168 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4169 				  GFP_KERNEL);
4170 	if (!adapter->vf_cfg)
4171 		return -ENOMEM;
4172 
4173 	for_all_vfs(adapter, vf_cfg, vf) {
4174 		vf_cfg->if_handle = -1;
4175 		vf_cfg->pmac_id = -1;
4176 	}
4177 	return 0;
4178 }
4179 
4180 static int be_vf_setup(struct be_adapter *adapter)
4181 {
4182 	struct device *dev = &adapter->pdev->dev;
4183 	struct be_vf_cfg *vf_cfg;
4184 	int status, old_vfs, vf;
4185 	bool spoofchk;
4186 
4187 	old_vfs = pci_num_vf(adapter->pdev);
4188 
4189 	status = be_vf_setup_init(adapter);
4190 	if (status)
4191 		goto err;
4192 
4193 	if (old_vfs) {
4194 		for_all_vfs(adapter, vf_cfg, vf) {
4195 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4196 			if (status)
4197 				goto err;
4198 		}
4199 
4200 		status = be_vfs_mac_query(adapter);
4201 		if (status)
4202 			goto err;
4203 	} else {
4204 		status = be_vfs_if_create(adapter);
4205 		if (status)
4206 			goto err;
4207 
4208 		status = be_vf_eth_addr_config(adapter);
4209 		if (status)
4210 			goto err;
4211 	}
4212 
4213 	for_all_vfs(adapter, vf_cfg, vf) {
4214 		/* Allow VFs to programs MAC/VLAN filters */
4215 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4216 						  vf + 1);
4217 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4218 			status = be_cmd_set_fn_privileges(adapter,
4219 							  vf_cfg->privileges |
4220 							  BE_PRIV_FILTMGMT,
4221 							  vf + 1);
4222 			if (!status) {
4223 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4224 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4225 					 vf);
4226 			}
4227 		}
4228 
4229 		/* Allow full available bandwidth */
4230 		if (!old_vfs)
4231 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4232 
4233 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4234 					       vf_cfg->if_handle, NULL,
4235 					       &spoofchk);
4236 		if (!status)
4237 			vf_cfg->spoofchk = spoofchk;
4238 
4239 		if (!old_vfs) {
4240 			be_cmd_enable_vf(adapter, vf + 1);
4241 			be_cmd_set_logical_link_config(adapter,
4242 						       IFLA_VF_LINK_STATE_AUTO,
4243 						       vf+1);
4244 		}
4245 	}
4246 
4247 	if (!old_vfs) {
4248 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4249 		if (status) {
4250 			dev_err(dev, "SRIOV enable failed\n");
4251 			adapter->num_vfs = 0;
4252 			goto err;
4253 		}
4254 	}
4255 
4256 	if (BE3_chip(adapter)) {
4257 		/* On BE3, enable VEB only when SRIOV is enabled */
4258 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4259 					       adapter->if_handle,
4260 					       PORT_FWD_TYPE_VEB, 0);
4261 		if (status)
4262 			goto err;
4263 	}
4264 
4265 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4266 	return 0;
4267 err:
4268 	dev_err(dev, "VF setup failed\n");
4269 	be_vf_clear(adapter);
4270 	return status;
4271 }
4272 
4273 /* Converting function_mode bits on BE3 to SH mc_type enums */
4274 
4275 static u8 be_convert_mc_type(u32 function_mode)
4276 {
4277 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4278 		return vNIC1;
4279 	else if (function_mode & QNQ_MODE)
4280 		return FLEX10;
4281 	else if (function_mode & VNIC_MODE)
4282 		return vNIC2;
4283 	else if (function_mode & UMC_ENABLED)
4284 		return UMC;
4285 	else
4286 		return MC_NONE;
4287 }
4288 
4289 /* On BE2/BE3 FW does not suggest the supported limits */
4290 static void BEx_get_resources(struct be_adapter *adapter,
4291 			      struct be_resources *res)
4292 {
4293 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4294 
4295 	if (be_physfn(adapter))
4296 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4297 	else
4298 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4299 
4300 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4301 
4302 	if (be_is_mc(adapter)) {
4303 		/* Assuming that there are 4 channels per port,
4304 		 * when multi-channel is enabled
4305 		 */
4306 		if (be_is_qnq_mode(adapter))
4307 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4308 		else
4309 			/* In a non-qnq multichannel mode, the pvid
4310 			 * takes up one vlan entry
4311 			 */
4312 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4313 	} else {
4314 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4315 	}
4316 
4317 	res->max_mcast_mac = BE_MAX_MC;
4318 
4319 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4320 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4321 	 *    *only* if it is RSS-capable.
4322 	 */
4323 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4324 	    be_virtfn(adapter) ||
4325 	    (be_is_mc(adapter) &&
4326 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4327 		res->max_tx_qs = 1;
4328 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4329 		struct be_resources super_nic_res = {0};
4330 
4331 		/* On a SuperNIC profile, the driver needs to use the
4332 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4333 		 */
4334 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4335 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4336 					  0);
4337 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4338 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4339 	} else {
4340 		res->max_tx_qs = BE3_MAX_TX_QS;
4341 	}
4342 
4343 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4344 	    !use_sriov && be_physfn(adapter))
4345 		res->max_rss_qs = (adapter->be3_native) ?
4346 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4347 	res->max_rx_qs = res->max_rss_qs + 1;
4348 
4349 	if (be_physfn(adapter))
4350 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4351 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4352 	else
4353 		res->max_evt_qs = 1;
4354 
4355 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4356 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4357 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4358 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4359 }
4360 
4361 static void be_setup_init(struct be_adapter *adapter)
4362 {
4363 	adapter->vlan_prio_bmap = 0xff;
4364 	adapter->phy.link_speed = -1;
4365 	adapter->if_handle = -1;
4366 	adapter->be3_native = false;
4367 	adapter->if_flags = 0;
4368 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4369 	if (be_physfn(adapter))
4370 		adapter->cmd_privileges = MAX_PRIVILEGES;
4371 	else
4372 		adapter->cmd_privileges = MIN_PRIVILEGES;
4373 }
4374 
4375 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4376  * However, this HW limitation is not exposed to the host via any SLI cmd.
4377  * As a result, in the case of SRIOV and in particular multi-partition configs
4378  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4379  * for distribution between the VFs. This self-imposed limit will determine the
4380  * no: of VFs for which RSS can be enabled.
4381  */
4382 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4383 {
4384 	struct be_port_resources port_res = {0};
4385 	u8 rss_tables_on_port;
4386 	u16 max_vfs = be_max_vfs(adapter);
4387 
4388 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4389 				  RESOURCE_LIMITS, 0);
4390 
4391 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4392 
4393 	/* Each PF Pool's RSS Tables limit =
4394 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4395 	 */
4396 	adapter->pool_res.max_rss_tables =
4397 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4398 }
4399 
4400 static int be_get_sriov_config(struct be_adapter *adapter)
4401 {
4402 	struct be_resources res = {0};
4403 	int max_vfs, old_vfs;
4404 
4405 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4406 				  RESOURCE_LIMITS, 0);
4407 
4408 	/* Some old versions of BE3 FW don't report max_vfs value */
4409 	if (BE3_chip(adapter) && !res.max_vfs) {
4410 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4411 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4412 	}
4413 
4414 	adapter->pool_res = res;
4415 
4416 	/* If during previous unload of the driver, the VFs were not disabled,
4417 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4418 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4419 	 */
4420 	old_vfs = pci_num_vf(adapter->pdev);
4421 	if (old_vfs) {
4422 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4423 			 old_vfs);
4424 
4425 		adapter->pool_res.max_vfs =
4426 			pci_sriov_get_totalvfs(adapter->pdev);
4427 		adapter->num_vfs = old_vfs;
4428 	}
4429 
4430 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4431 		be_calculate_pf_pool_rss_tables(adapter);
4432 		dev_info(&adapter->pdev->dev,
4433 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4434 			 be_max_pf_pool_rss_tables(adapter));
4435 	}
4436 	return 0;
4437 }
4438 
4439 static void be_alloc_sriov_res(struct be_adapter *adapter)
4440 {
4441 	int old_vfs = pci_num_vf(adapter->pdev);
4442 	struct  be_resources vft_res = {0};
4443 	int status;
4444 
4445 	be_get_sriov_config(adapter);
4446 
4447 	if (!old_vfs)
4448 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4449 
4450 	/* When the HW is in SRIOV capable configuration, the PF-pool
4451 	 * resources are given to PF during driver load, if there are no
4452 	 * old VFs. This facility is not available in BE3 FW.
4453 	 * Also, this is done by FW in Lancer chip.
4454 	 */
4455 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4456 		be_calculate_vf_res(adapter, 0, &vft_res);
4457 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4458 						 &vft_res);
4459 		if (status)
4460 			dev_err(&adapter->pdev->dev,
4461 				"Failed to optimize SRIOV resources\n");
4462 	}
4463 }
4464 
4465 static int be_get_resources(struct be_adapter *adapter)
4466 {
4467 	struct device *dev = &adapter->pdev->dev;
4468 	struct be_resources res = {0};
4469 	int status;
4470 
4471 	/* For Lancer, SH etc read per-function resource limits from FW.
4472 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4473 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4474 	 */
4475 	if (BEx_chip(adapter)) {
4476 		BEx_get_resources(adapter, &res);
4477 	} else {
4478 		status = be_cmd_get_func_config(adapter, &res);
4479 		if (status)
4480 			return status;
4481 
4482 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4483 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4484 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4485 			res.max_rss_qs -= 1;
4486 	}
4487 
4488 	/* If RoCE is supported stash away half the EQs for RoCE */
4489 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4490 				res.max_evt_qs / 2 : res.max_evt_qs;
4491 	adapter->res = res;
4492 
4493 	/* If FW supports RSS default queue, then skip creating non-RSS
4494 	 * queue for non-IP traffic.
4495 	 */
4496 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4497 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4498 
4499 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4500 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4501 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4502 		 be_max_vfs(adapter));
4503 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4504 		 be_max_uc(adapter), be_max_mc(adapter),
4505 		 be_max_vlans(adapter));
4506 
4507 	/* Ensure RX and TX queues are created in pairs at init time */
4508 	adapter->cfg_num_rx_irqs =
4509 				min_t(u16, netif_get_num_default_rss_queues(),
4510 				      be_max_qp_irqs(adapter));
4511 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4512 	return 0;
4513 }
4514 
4515 static int be_get_config(struct be_adapter *adapter)
4516 {
4517 	int status, level;
4518 	u16 profile_id;
4519 
4520 	status = be_cmd_get_cntl_attributes(adapter);
4521 	if (status)
4522 		return status;
4523 
4524 	status = be_cmd_query_fw_cfg(adapter);
4525 	if (status)
4526 		return status;
4527 
4528 	if (!lancer_chip(adapter) && be_physfn(adapter))
4529 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4530 
4531 	if (BEx_chip(adapter)) {
4532 		level = be_cmd_get_fw_log_level(adapter);
4533 		adapter->msg_enable =
4534 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4535 	}
4536 
4537 	be_cmd_get_acpi_wol_cap(adapter);
4538 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4539 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4540 
4541 	be_cmd_query_port_name(adapter);
4542 
4543 	if (be_physfn(adapter)) {
4544 		status = be_cmd_get_active_profile(adapter, &profile_id);
4545 		if (!status)
4546 			dev_info(&adapter->pdev->dev,
4547 				 "Using profile 0x%x\n", profile_id);
4548 	}
4549 
4550 	return 0;
4551 }
4552 
4553 static int be_mac_setup(struct be_adapter *adapter)
4554 {
4555 	u8 mac[ETH_ALEN];
4556 	int status;
4557 
4558 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4559 		status = be_cmd_get_perm_mac(adapter, mac);
4560 		if (status)
4561 			return status;
4562 
4563 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4564 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4565 	}
4566 
4567 	return 0;
4568 }
4569 
4570 static void be_schedule_worker(struct be_adapter *adapter)
4571 {
4572 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4573 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4574 }
4575 
4576 static void be_destroy_err_recovery_workq(void)
4577 {
4578 	if (!be_err_recovery_workq)
4579 		return;
4580 
4581 	flush_workqueue(be_err_recovery_workq);
4582 	destroy_workqueue(be_err_recovery_workq);
4583 	be_err_recovery_workq = NULL;
4584 }
4585 
4586 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4587 {
4588 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4589 
4590 	if (!be_err_recovery_workq)
4591 		return;
4592 
4593 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4594 			   msecs_to_jiffies(delay));
4595 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4596 }
4597 
4598 static int be_setup_queues(struct be_adapter *adapter)
4599 {
4600 	struct net_device *netdev = adapter->netdev;
4601 	int status;
4602 
4603 	status = be_evt_queues_create(adapter);
4604 	if (status)
4605 		goto err;
4606 
4607 	status = be_tx_qs_create(adapter);
4608 	if (status)
4609 		goto err;
4610 
4611 	status = be_rx_cqs_create(adapter);
4612 	if (status)
4613 		goto err;
4614 
4615 	status = be_mcc_queues_create(adapter);
4616 	if (status)
4617 		goto err;
4618 
4619 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4620 	if (status)
4621 		goto err;
4622 
4623 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4624 	if (status)
4625 		goto err;
4626 
4627 	return 0;
4628 err:
4629 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4630 	return status;
4631 }
4632 
4633 static int be_if_create(struct be_adapter *adapter)
4634 {
4635 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4636 	u32 cap_flags = be_if_cap_flags(adapter);
4637 	int status;
4638 
4639 	/* alloc required memory for other filtering fields */
4640 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4641 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4642 	if (!adapter->pmac_id)
4643 		return -ENOMEM;
4644 
4645 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4646 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4647 	if (!adapter->mc_list)
4648 		return -ENOMEM;
4649 
4650 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4651 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4652 	if (!adapter->uc_list)
4653 		return -ENOMEM;
4654 
4655 	if (adapter->cfg_num_rx_irqs == 1)
4656 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4657 
4658 	en_flags &= cap_flags;
4659 	/* will enable all the needed filter flags in be_open() */
4660 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4661 				  &adapter->if_handle, 0);
4662 
4663 	if (status)
4664 		return status;
4665 
4666 	return 0;
4667 }
4668 
4669 int be_update_queues(struct be_adapter *adapter)
4670 {
4671 	struct net_device *netdev = adapter->netdev;
4672 	int status;
4673 
4674 	if (netif_running(netdev))
4675 		be_close(netdev);
4676 
4677 	be_cancel_worker(adapter);
4678 
4679 	/* If any vectors have been shared with RoCE we cannot re-program
4680 	 * the MSIx table.
4681 	 */
4682 	if (!adapter->num_msix_roce_vec)
4683 		be_msix_disable(adapter);
4684 
4685 	be_clear_queues(adapter);
4686 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4687 	if (status)
4688 		return status;
4689 
4690 	if (!msix_enabled(adapter)) {
4691 		status = be_msix_enable(adapter);
4692 		if (status)
4693 			return status;
4694 	}
4695 
4696 	status = be_if_create(adapter);
4697 	if (status)
4698 		return status;
4699 
4700 	status = be_setup_queues(adapter);
4701 	if (status)
4702 		return status;
4703 
4704 	be_schedule_worker(adapter);
4705 
4706 	if (netif_running(netdev))
4707 		status = be_open(netdev);
4708 
4709 	return status;
4710 }
4711 
4712 static inline int fw_major_num(const char *fw_ver)
4713 {
4714 	int fw_major = 0, i;
4715 
4716 	i = sscanf(fw_ver, "%d.", &fw_major);
4717 	if (i != 1)
4718 		return 0;
4719 
4720 	return fw_major;
4721 }
4722 
4723 /* If it is error recovery, FLR the PF
4724  * Else if any VFs are already enabled don't FLR the PF
4725  */
4726 static bool be_reset_required(struct be_adapter *adapter)
4727 {
4728 	if (be_error_recovering(adapter))
4729 		return true;
4730 	else
4731 		return pci_num_vf(adapter->pdev) == 0;
4732 }
4733 
4734 /* Wait for the FW to be ready and perform the required initialization */
4735 static int be_func_init(struct be_adapter *adapter)
4736 {
4737 	int status;
4738 
4739 	status = be_fw_wait_ready(adapter);
4740 	if (status)
4741 		return status;
4742 
4743 	/* FW is now ready; clear errors to allow cmds/doorbell */
4744 	be_clear_error(adapter, BE_CLEAR_ALL);
4745 
4746 	if (be_reset_required(adapter)) {
4747 		status = be_cmd_reset_function(adapter);
4748 		if (status)
4749 			return status;
4750 
4751 		/* Wait for interrupts to quiesce after an FLR */
4752 		msleep(100);
4753 	}
4754 
4755 	/* Tell FW we're ready to fire cmds */
4756 	status = be_cmd_fw_init(adapter);
4757 	if (status)
4758 		return status;
4759 
4760 	/* Allow interrupts for other ULPs running on NIC function */
4761 	be_intr_set(adapter, true);
4762 
4763 	return 0;
4764 }
4765 
4766 static int be_setup(struct be_adapter *adapter)
4767 {
4768 	struct device *dev = &adapter->pdev->dev;
4769 	int status;
4770 
4771 	status = be_func_init(adapter);
4772 	if (status)
4773 		return status;
4774 
4775 	be_setup_init(adapter);
4776 
4777 	if (!lancer_chip(adapter))
4778 		be_cmd_req_native_mode(adapter);
4779 
4780 	/* invoke this cmd first to get pf_num and vf_num which are needed
4781 	 * for issuing profile related cmds
4782 	 */
4783 	if (!BEx_chip(adapter)) {
4784 		status = be_cmd_get_func_config(adapter, NULL);
4785 		if (status)
4786 			return status;
4787 	}
4788 
4789 	status = be_get_config(adapter);
4790 	if (status)
4791 		goto err;
4792 
4793 	if (!BE2_chip(adapter) && be_physfn(adapter))
4794 		be_alloc_sriov_res(adapter);
4795 
4796 	status = be_get_resources(adapter);
4797 	if (status)
4798 		goto err;
4799 
4800 	status = be_msix_enable(adapter);
4801 	if (status)
4802 		goto err;
4803 
4804 	/* will enable all the needed filter flags in be_open() */
4805 	status = be_if_create(adapter);
4806 	if (status)
4807 		goto err;
4808 
4809 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4810 	rtnl_lock();
4811 	status = be_setup_queues(adapter);
4812 	rtnl_unlock();
4813 	if (status)
4814 		goto err;
4815 
4816 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4817 
4818 	status = be_mac_setup(adapter);
4819 	if (status)
4820 		goto err;
4821 
4822 	be_cmd_get_fw_ver(adapter);
4823 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4824 
4825 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4826 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4827 			adapter->fw_ver);
4828 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4829 	}
4830 
4831 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4832 					 adapter->rx_fc);
4833 	if (status)
4834 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4835 					&adapter->rx_fc);
4836 
4837 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4838 		 adapter->tx_fc, adapter->rx_fc);
4839 
4840 	if (be_physfn(adapter))
4841 		be_cmd_set_logical_link_config(adapter,
4842 					       IFLA_VF_LINK_STATE_AUTO, 0);
4843 
4844 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4845 	 * confusing a linux bridge or OVS that it might be connected to.
4846 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4847 	 * when SRIOV is not enabled.
4848 	 */
4849 	if (BE3_chip(adapter))
4850 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4851 				      PORT_FWD_TYPE_PASSTHRU, 0);
4852 
4853 	if (adapter->num_vfs)
4854 		be_vf_setup(adapter);
4855 
4856 	status = be_cmd_get_phy_info(adapter);
4857 	if (!status && be_pause_supported(adapter))
4858 		adapter->phy.fc_autoneg = 1;
4859 
4860 	if (be_physfn(adapter) && !lancer_chip(adapter))
4861 		be_cmd_set_features(adapter);
4862 
4863 	be_schedule_worker(adapter);
4864 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4865 	return 0;
4866 err:
4867 	be_clear(adapter);
4868 	return status;
4869 }
4870 
4871 #ifdef CONFIG_NET_POLL_CONTROLLER
4872 static void be_netpoll(struct net_device *netdev)
4873 {
4874 	struct be_adapter *adapter = netdev_priv(netdev);
4875 	struct be_eq_obj *eqo;
4876 	int i;
4877 
4878 	for_all_evt_queues(adapter, eqo, i) {
4879 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4880 		napi_schedule(&eqo->napi);
4881 	}
4882 }
4883 #endif
4884 
4885 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4886 {
4887 	const struct firmware *fw;
4888 	int status;
4889 
4890 	if (!netif_running(adapter->netdev)) {
4891 		dev_err(&adapter->pdev->dev,
4892 			"Firmware load not allowed (interface is down)\n");
4893 		return -ENETDOWN;
4894 	}
4895 
4896 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4897 	if (status)
4898 		goto fw_exit;
4899 
4900 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4901 
4902 	if (lancer_chip(adapter))
4903 		status = lancer_fw_download(adapter, fw);
4904 	else
4905 		status = be_fw_download(adapter, fw);
4906 
4907 	if (!status)
4908 		be_cmd_get_fw_ver(adapter);
4909 
4910 fw_exit:
4911 	release_firmware(fw);
4912 	return status;
4913 }
4914 
4915 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4916 				 u16 flags)
4917 {
4918 	struct be_adapter *adapter = netdev_priv(dev);
4919 	struct nlattr *attr, *br_spec;
4920 	int rem;
4921 	int status = 0;
4922 	u16 mode = 0;
4923 
4924 	if (!sriov_enabled(adapter))
4925 		return -EOPNOTSUPP;
4926 
4927 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4928 	if (!br_spec)
4929 		return -EINVAL;
4930 
4931 	nla_for_each_nested(attr, br_spec, rem) {
4932 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4933 			continue;
4934 
4935 		if (nla_len(attr) < sizeof(mode))
4936 			return -EINVAL;
4937 
4938 		mode = nla_get_u16(attr);
4939 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4940 			return -EOPNOTSUPP;
4941 
4942 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4943 			return -EINVAL;
4944 
4945 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4946 					       adapter->if_handle,
4947 					       mode == BRIDGE_MODE_VEPA ?
4948 					       PORT_FWD_TYPE_VEPA :
4949 					       PORT_FWD_TYPE_VEB, 0);
4950 		if (status)
4951 			goto err;
4952 
4953 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4954 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4955 
4956 		return status;
4957 	}
4958 err:
4959 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4960 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4961 
4962 	return status;
4963 }
4964 
4965 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4966 				 struct net_device *dev, u32 filter_mask,
4967 				 int nlflags)
4968 {
4969 	struct be_adapter *adapter = netdev_priv(dev);
4970 	int status = 0;
4971 	u8 hsw_mode;
4972 
4973 	/* BE and Lancer chips support VEB mode only */
4974 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4975 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4976 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4977 			return 0;
4978 		hsw_mode = PORT_FWD_TYPE_VEB;
4979 	} else {
4980 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4981 					       adapter->if_handle, &hsw_mode,
4982 					       NULL);
4983 		if (status)
4984 			return 0;
4985 
4986 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4987 			return 0;
4988 	}
4989 
4990 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4991 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4992 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4993 				       0, 0, nlflags, filter_mask, NULL);
4994 }
4995 
4996 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4997 					 void (*func)(struct work_struct *))
4998 {
4999 	struct be_cmd_work *work;
5000 
5001 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
5002 	if (!work) {
5003 		dev_err(&adapter->pdev->dev,
5004 			"be_work memory allocation failed\n");
5005 		return NULL;
5006 	}
5007 
5008 	INIT_WORK(&work->work, func);
5009 	work->adapter = adapter;
5010 	return work;
5011 }
5012 
5013 /* VxLAN offload Notes:
5014  *
5015  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5016  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5017  * is expected to work across all types of IP tunnels once exported. Skyhawk
5018  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5019  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5020  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5021  * those other tunnels are unexported on the fly through ndo_features_check().
5022  *
5023  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5024  * adds more than one port, disable offloads and don't re-enable them again
5025  * until after all the tunnels are removed.
5026  */
5027 static void be_work_add_vxlan_port(struct work_struct *work)
5028 {
5029 	struct be_cmd_work *cmd_work =
5030 				container_of(work, struct be_cmd_work, work);
5031 	struct be_adapter *adapter = cmd_work->adapter;
5032 	struct net_device *netdev = adapter->netdev;
5033 	struct device *dev = &adapter->pdev->dev;
5034 	__be16 port = cmd_work->info.vxlan_port;
5035 	int status;
5036 
5037 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5038 		adapter->vxlan_port_aliases++;
5039 		goto done;
5040 	}
5041 
5042 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5043 		dev_info(dev,
5044 			 "Only one UDP port supported for VxLAN offloads\n");
5045 		dev_info(dev, "Disabling VxLAN offloads\n");
5046 		adapter->vxlan_port_count++;
5047 		goto err;
5048 	}
5049 
5050 	if (adapter->vxlan_port_count++ >= 1)
5051 		goto done;
5052 
5053 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
5054 				     OP_CONVERT_NORMAL_TO_TUNNEL);
5055 	if (status) {
5056 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5057 		goto err;
5058 	}
5059 
5060 	status = be_cmd_set_vxlan_port(adapter, port);
5061 	if (status) {
5062 		dev_warn(dev, "Failed to add VxLAN port\n");
5063 		goto err;
5064 	}
5065 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5066 	adapter->vxlan_port = port;
5067 
5068 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5069 				   NETIF_F_TSO | NETIF_F_TSO6 |
5070 				   NETIF_F_GSO_UDP_TUNNEL;
5071 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5072 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5073 
5074 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5075 		 be16_to_cpu(port));
5076 	goto done;
5077 err:
5078 	be_disable_vxlan_offloads(adapter);
5079 done:
5080 	kfree(cmd_work);
5081 }
5082 
5083 static void be_work_del_vxlan_port(struct work_struct *work)
5084 {
5085 	struct be_cmd_work *cmd_work =
5086 				container_of(work, struct be_cmd_work, work);
5087 	struct be_adapter *adapter = cmd_work->adapter;
5088 	__be16 port = cmd_work->info.vxlan_port;
5089 
5090 	if (adapter->vxlan_port != port)
5091 		goto done;
5092 
5093 	if (adapter->vxlan_port_aliases) {
5094 		adapter->vxlan_port_aliases--;
5095 		goto out;
5096 	}
5097 
5098 	be_disable_vxlan_offloads(adapter);
5099 
5100 	dev_info(&adapter->pdev->dev,
5101 		 "Disabled VxLAN offloads for UDP port %d\n",
5102 		 be16_to_cpu(port));
5103 done:
5104 	adapter->vxlan_port_count--;
5105 out:
5106 	kfree(cmd_work);
5107 }
5108 
5109 static void be_cfg_vxlan_port(struct net_device *netdev,
5110 			      struct udp_tunnel_info *ti,
5111 			      void (*func)(struct work_struct *))
5112 {
5113 	struct be_adapter *adapter = netdev_priv(netdev);
5114 	struct be_cmd_work *cmd_work;
5115 
5116 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5117 		return;
5118 
5119 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5120 		return;
5121 
5122 	cmd_work = be_alloc_work(adapter, func);
5123 	if (cmd_work) {
5124 		cmd_work->info.vxlan_port = ti->port;
5125 		queue_work(be_wq, &cmd_work->work);
5126 	}
5127 }
5128 
5129 static void be_del_vxlan_port(struct net_device *netdev,
5130 			      struct udp_tunnel_info *ti)
5131 {
5132 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5133 }
5134 
5135 static void be_add_vxlan_port(struct net_device *netdev,
5136 			      struct udp_tunnel_info *ti)
5137 {
5138 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5139 }
5140 
5141 static netdev_features_t be_features_check(struct sk_buff *skb,
5142 					   struct net_device *dev,
5143 					   netdev_features_t features)
5144 {
5145 	struct be_adapter *adapter = netdev_priv(dev);
5146 	u8 l4_hdr = 0;
5147 
5148 	/* The code below restricts offload features for some tunneled packets.
5149 	 * Offload features for normal (non tunnel) packets are unchanged.
5150 	 */
5151 	if (!skb->encapsulation ||
5152 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5153 		return features;
5154 
5155 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5156 	 * should disable tunnel offload features if it's not a VxLAN packet,
5157 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5158 	 * allow other tunneled traffic like GRE work fine while VxLAN
5159 	 * offloads are configured in Skyhawk-R.
5160 	 */
5161 	switch (vlan_get_protocol(skb)) {
5162 	case htons(ETH_P_IP):
5163 		l4_hdr = ip_hdr(skb)->protocol;
5164 		break;
5165 	case htons(ETH_P_IPV6):
5166 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5167 		break;
5168 	default:
5169 		return features;
5170 	}
5171 
5172 	if (l4_hdr != IPPROTO_UDP ||
5173 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5174 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5175 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5176 	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5177 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5178 
5179 	return features;
5180 }
5181 
5182 static int be_get_phys_port_id(struct net_device *dev,
5183 			       struct netdev_phys_item_id *ppid)
5184 {
5185 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5186 	struct be_adapter *adapter = netdev_priv(dev);
5187 	u8 *id;
5188 
5189 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5190 		return -ENOSPC;
5191 
5192 	ppid->id[0] = adapter->hba_port_num + 1;
5193 	id = &ppid->id[1];
5194 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5195 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5196 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5197 
5198 	ppid->id_len = id_len;
5199 
5200 	return 0;
5201 }
5202 
5203 static void be_set_rx_mode(struct net_device *dev)
5204 {
5205 	struct be_adapter *adapter = netdev_priv(dev);
5206 	struct be_cmd_work *work;
5207 
5208 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5209 	if (work)
5210 		queue_work(be_wq, &work->work);
5211 }
5212 
5213 static const struct net_device_ops be_netdev_ops = {
5214 	.ndo_open		= be_open,
5215 	.ndo_stop		= be_close,
5216 	.ndo_start_xmit		= be_xmit,
5217 	.ndo_set_rx_mode	= be_set_rx_mode,
5218 	.ndo_set_mac_address	= be_mac_addr_set,
5219 	.ndo_change_mtu		= be_change_mtu,
5220 	.ndo_get_stats64	= be_get_stats64,
5221 	.ndo_validate_addr	= eth_validate_addr,
5222 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5223 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5224 	.ndo_set_vf_mac		= be_set_vf_mac,
5225 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5226 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5227 	.ndo_get_vf_config	= be_get_vf_config,
5228 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5229 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5230 #ifdef CONFIG_NET_POLL_CONTROLLER
5231 	.ndo_poll_controller	= be_netpoll,
5232 #endif
5233 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5234 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5235 #ifdef CONFIG_NET_RX_BUSY_POLL
5236 	.ndo_busy_poll		= be_busy_poll,
5237 #endif
5238 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5239 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5240 	.ndo_features_check	= be_features_check,
5241 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5242 };
5243 
5244 static void be_netdev_init(struct net_device *netdev)
5245 {
5246 	struct be_adapter *adapter = netdev_priv(netdev);
5247 
5248 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5249 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5250 		NETIF_F_HW_VLAN_CTAG_TX;
5251 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5252 		netdev->hw_features |= NETIF_F_RXHASH;
5253 
5254 	netdev->features |= netdev->hw_features |
5255 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5256 
5257 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5258 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5259 
5260 	netdev->priv_flags |= IFF_UNICAST_FLT;
5261 
5262 	netdev->flags |= IFF_MULTICAST;
5263 
5264 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5265 
5266 	netdev->netdev_ops = &be_netdev_ops;
5267 
5268 	netdev->ethtool_ops = &be_ethtool_ops;
5269 }
5270 
5271 static void be_cleanup(struct be_adapter *adapter)
5272 {
5273 	struct net_device *netdev = adapter->netdev;
5274 
5275 	rtnl_lock();
5276 	netif_device_detach(netdev);
5277 	if (netif_running(netdev))
5278 		be_close(netdev);
5279 	rtnl_unlock();
5280 
5281 	be_clear(adapter);
5282 }
5283 
5284 static int be_resume(struct be_adapter *adapter)
5285 {
5286 	struct net_device *netdev = adapter->netdev;
5287 	int status;
5288 
5289 	status = be_setup(adapter);
5290 	if (status)
5291 		return status;
5292 
5293 	rtnl_lock();
5294 	if (netif_running(netdev))
5295 		status = be_open(netdev);
5296 	rtnl_unlock();
5297 
5298 	if (status)
5299 		return status;
5300 
5301 	netif_device_attach(netdev);
5302 
5303 	return 0;
5304 }
5305 
5306 static void be_soft_reset(struct be_adapter *adapter)
5307 {
5308 	u32 val;
5309 
5310 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5311 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5312 	val |= SLIPORT_SOFTRESET_SR_MASK;
5313 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5314 }
5315 
5316 static bool be_err_is_recoverable(struct be_adapter *adapter)
5317 {
5318 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5319 	unsigned long initial_idle_time =
5320 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5321 	unsigned long recovery_interval =
5322 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5323 	u16 ue_err_code;
5324 	u32 val;
5325 
5326 	val = be_POST_stage_get(adapter);
5327 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5328 		return false;
5329 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5330 	if (ue_err_code == 0)
5331 		return false;
5332 
5333 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5334 		ue_err_code);
5335 
5336 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5337 		dev_err(&adapter->pdev->dev,
5338 			"Cannot recover within %lu sec from driver load\n",
5339 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5340 		return false;
5341 	}
5342 
5343 	if (err_rec->last_recovery_time &&
5344 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5345 		dev_err(&adapter->pdev->dev,
5346 			"Cannot recover within %lu sec from last recovery\n",
5347 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5348 		return false;
5349 	}
5350 
5351 	if (ue_err_code == err_rec->last_err_code) {
5352 		dev_err(&adapter->pdev->dev,
5353 			"Cannot recover from a consecutive TPE error\n");
5354 		return false;
5355 	}
5356 
5357 	err_rec->last_recovery_time = jiffies;
5358 	err_rec->last_err_code = ue_err_code;
5359 	return true;
5360 }
5361 
5362 static int be_tpe_recover(struct be_adapter *adapter)
5363 {
5364 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5365 	int status = -EAGAIN;
5366 	u32 val;
5367 
5368 	switch (err_rec->recovery_state) {
5369 	case ERR_RECOVERY_ST_NONE:
5370 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5371 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5372 		break;
5373 
5374 	case ERR_RECOVERY_ST_DETECT:
5375 		val = be_POST_stage_get(adapter);
5376 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5377 		    POST_STAGE_RECOVERABLE_ERR) {
5378 			dev_err(&adapter->pdev->dev,
5379 				"Unrecoverable HW error detected: 0x%x\n", val);
5380 			status = -EINVAL;
5381 			err_rec->resched_delay = 0;
5382 			break;
5383 		}
5384 
5385 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5386 
5387 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5388 		 * milliseconds before it checks for final error status in
5389 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5390 		 * If it does, then PF0 initiates a Soft Reset.
5391 		 */
5392 		if (adapter->pf_num == 0) {
5393 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5394 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5395 					ERR_RECOVERY_UE_DETECT_DURATION;
5396 			break;
5397 		}
5398 
5399 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5400 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5401 					ERR_RECOVERY_UE_DETECT_DURATION;
5402 		break;
5403 
5404 	case ERR_RECOVERY_ST_RESET:
5405 		if (!be_err_is_recoverable(adapter)) {
5406 			dev_err(&adapter->pdev->dev,
5407 				"Failed to meet recovery criteria\n");
5408 			status = -EIO;
5409 			err_rec->resched_delay = 0;
5410 			break;
5411 		}
5412 		be_soft_reset(adapter);
5413 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5414 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5415 					err_rec->ue_to_reset_time;
5416 		break;
5417 
5418 	case ERR_RECOVERY_ST_PRE_POLL:
5419 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5420 		err_rec->resched_delay = 0;
5421 		status = 0;			/* done */
5422 		break;
5423 
5424 	default:
5425 		status = -EINVAL;
5426 		err_rec->resched_delay = 0;
5427 		break;
5428 	}
5429 
5430 	return status;
5431 }
5432 
5433 static int be_err_recover(struct be_adapter *adapter)
5434 {
5435 	int status;
5436 
5437 	if (!lancer_chip(adapter)) {
5438 		if (!adapter->error_recovery.recovery_supported ||
5439 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5440 			return -EIO;
5441 		status = be_tpe_recover(adapter);
5442 		if (status)
5443 			goto err;
5444 	}
5445 
5446 	/* Wait for adapter to reach quiescent state before
5447 	 * destroying queues
5448 	 */
5449 	status = be_fw_wait_ready(adapter);
5450 	if (status)
5451 		goto err;
5452 
5453 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5454 
5455 	be_cleanup(adapter);
5456 
5457 	status = be_resume(adapter);
5458 	if (status)
5459 		goto err;
5460 
5461 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5462 
5463 err:
5464 	return status;
5465 }
5466 
5467 static void be_err_detection_task(struct work_struct *work)
5468 {
5469 	struct be_error_recovery *err_rec =
5470 			container_of(work, struct be_error_recovery,
5471 				     err_detection_work.work);
5472 	struct be_adapter *adapter =
5473 			container_of(err_rec, struct be_adapter,
5474 				     error_recovery);
5475 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5476 	struct device *dev = &adapter->pdev->dev;
5477 	int recovery_status;
5478 
5479 	be_detect_error(adapter);
5480 	if (!be_check_error(adapter, BE_ERROR_HW))
5481 		goto reschedule_task;
5482 
5483 	recovery_status = be_err_recover(adapter);
5484 	if (!recovery_status) {
5485 		err_rec->recovery_retries = 0;
5486 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5487 		dev_info(dev, "Adapter recovery successful\n");
5488 		goto reschedule_task;
5489 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5490 		/* BEx/SH recovery state machine */
5491 		if (adapter->pf_num == 0 &&
5492 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5493 			dev_err(&adapter->pdev->dev,
5494 				"Adapter recovery in progress\n");
5495 		resched_delay = err_rec->resched_delay;
5496 		goto reschedule_task;
5497 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5498 		/* For VFs, check if PF have allocated resources
5499 		 * every second.
5500 		 */
5501 		dev_err(dev, "Re-trying adapter recovery\n");
5502 		goto reschedule_task;
5503 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5504 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5505 		/* In case of another error during recovery, it takes 30 sec
5506 		 * for adapter to come out of error. Retry error recovery after
5507 		 * this time interval.
5508 		 */
5509 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5510 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5511 		goto reschedule_task;
5512 	} else {
5513 		dev_err(dev, "Adapter recovery failed\n");
5514 		dev_err(dev, "Please reboot server to recover\n");
5515 	}
5516 
5517 	return;
5518 
5519 reschedule_task:
5520 	be_schedule_err_detection(adapter, resched_delay);
5521 }
5522 
5523 static void be_log_sfp_info(struct be_adapter *adapter)
5524 {
5525 	int status;
5526 
5527 	status = be_cmd_query_sfp_info(adapter);
5528 	if (!status) {
5529 		dev_err(&adapter->pdev->dev,
5530 			"Port %c: %s Vendor: %s part no: %s",
5531 			adapter->port_name,
5532 			be_misconfig_evt_port_state[adapter->phy_state],
5533 			adapter->phy.vendor_name,
5534 			adapter->phy.vendor_pn);
5535 	}
5536 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5537 }
5538 
5539 static void be_worker(struct work_struct *work)
5540 {
5541 	struct be_adapter *adapter =
5542 		container_of(work, struct be_adapter, work.work);
5543 	struct be_rx_obj *rxo;
5544 	int i;
5545 
5546 	if (be_physfn(adapter) &&
5547 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5548 		be_cmd_get_die_temperature(adapter);
5549 
5550 	/* when interrupts are not yet enabled, just reap any pending
5551 	 * mcc completions
5552 	 */
5553 	if (!netif_running(adapter->netdev)) {
5554 		local_bh_disable();
5555 		be_process_mcc(adapter);
5556 		local_bh_enable();
5557 		goto reschedule;
5558 	}
5559 
5560 	if (!adapter->stats_cmd_sent) {
5561 		if (lancer_chip(adapter))
5562 			lancer_cmd_get_pport_stats(adapter,
5563 						   &adapter->stats_cmd);
5564 		else
5565 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5566 	}
5567 
5568 	for_all_rx_queues(adapter, rxo, i) {
5569 		/* Replenish RX-queues starved due to memory
5570 		 * allocation failures.
5571 		 */
5572 		if (rxo->rx_post_starved)
5573 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5574 	}
5575 
5576 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5577 	if (!skyhawk_chip(adapter))
5578 		be_eqd_update(adapter, false);
5579 
5580 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5581 		be_log_sfp_info(adapter);
5582 
5583 reschedule:
5584 	adapter->work_counter++;
5585 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5586 }
5587 
5588 static void be_unmap_pci_bars(struct be_adapter *adapter)
5589 {
5590 	if (adapter->csr)
5591 		pci_iounmap(adapter->pdev, adapter->csr);
5592 	if (adapter->db)
5593 		pci_iounmap(adapter->pdev, adapter->db);
5594 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5595 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5596 }
5597 
5598 static int db_bar(struct be_adapter *adapter)
5599 {
5600 	if (lancer_chip(adapter) || be_virtfn(adapter))
5601 		return 0;
5602 	else
5603 		return 4;
5604 }
5605 
5606 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5607 {
5608 	if (skyhawk_chip(adapter)) {
5609 		adapter->roce_db.size = 4096;
5610 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5611 							      db_bar(adapter));
5612 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5613 							       db_bar(adapter));
5614 	}
5615 	return 0;
5616 }
5617 
5618 static int be_map_pci_bars(struct be_adapter *adapter)
5619 {
5620 	struct pci_dev *pdev = adapter->pdev;
5621 	u8 __iomem *addr;
5622 	u32 sli_intf;
5623 
5624 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5625 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5626 				SLI_INTF_FAMILY_SHIFT;
5627 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5628 
5629 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5630 		adapter->csr = pci_iomap(pdev, 2, 0);
5631 		if (!adapter->csr)
5632 			return -ENOMEM;
5633 	}
5634 
5635 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5636 	if (!addr)
5637 		goto pci_map_err;
5638 	adapter->db = addr;
5639 
5640 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5641 		if (be_physfn(adapter)) {
5642 			/* PCICFG is the 2nd BAR in BE2 */
5643 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5644 			if (!addr)
5645 				goto pci_map_err;
5646 			adapter->pcicfg = addr;
5647 			adapter->pcicfg_mapped = true;
5648 		} else {
5649 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5650 			adapter->pcicfg_mapped = false;
5651 		}
5652 	}
5653 
5654 	be_roce_map_pci_bars(adapter);
5655 	return 0;
5656 
5657 pci_map_err:
5658 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5659 	be_unmap_pci_bars(adapter);
5660 	return -ENOMEM;
5661 }
5662 
5663 static void be_drv_cleanup(struct be_adapter *adapter)
5664 {
5665 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5666 	struct device *dev = &adapter->pdev->dev;
5667 
5668 	if (mem->va)
5669 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5670 
5671 	mem = &adapter->rx_filter;
5672 	if (mem->va)
5673 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5674 
5675 	mem = &adapter->stats_cmd;
5676 	if (mem->va)
5677 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5678 }
5679 
5680 /* Allocate and initialize various fields in be_adapter struct */
5681 static int be_drv_init(struct be_adapter *adapter)
5682 {
5683 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5684 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5685 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5686 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5687 	struct device *dev = &adapter->pdev->dev;
5688 	int status = 0;
5689 
5690 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5691 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5692 						 &mbox_mem_alloc->dma,
5693 						 GFP_KERNEL);
5694 	if (!mbox_mem_alloc->va)
5695 		return -ENOMEM;
5696 
5697 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5698 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5699 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5700 
5701 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5702 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5703 					    &rx_filter->dma, GFP_KERNEL);
5704 	if (!rx_filter->va) {
5705 		status = -ENOMEM;
5706 		goto free_mbox;
5707 	}
5708 
5709 	if (lancer_chip(adapter))
5710 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5711 	else if (BE2_chip(adapter))
5712 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5713 	else if (BE3_chip(adapter))
5714 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5715 	else
5716 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5717 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5718 					    &stats_cmd->dma, GFP_KERNEL);
5719 	if (!stats_cmd->va) {
5720 		status = -ENOMEM;
5721 		goto free_rx_filter;
5722 	}
5723 
5724 	mutex_init(&adapter->mbox_lock);
5725 	mutex_init(&adapter->mcc_lock);
5726 	mutex_init(&adapter->rx_filter_lock);
5727 	spin_lock_init(&adapter->mcc_cq_lock);
5728 	init_completion(&adapter->et_cmd_compl);
5729 
5730 	pci_save_state(adapter->pdev);
5731 
5732 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5733 
5734 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5735 	adapter->error_recovery.resched_delay = 0;
5736 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5737 			  be_err_detection_task);
5738 
5739 	adapter->rx_fc = true;
5740 	adapter->tx_fc = true;
5741 
5742 	/* Must be a power of 2 or else MODULO will BUG_ON */
5743 	adapter->be_get_temp_freq = 64;
5744 
5745 	return 0;
5746 
5747 free_rx_filter:
5748 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5749 free_mbox:
5750 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5751 			  mbox_mem_alloc->dma);
5752 	return status;
5753 }
5754 
5755 static void be_remove(struct pci_dev *pdev)
5756 {
5757 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5758 
5759 	if (!adapter)
5760 		return;
5761 
5762 	be_roce_dev_remove(adapter);
5763 	be_intr_set(adapter, false);
5764 
5765 	be_cancel_err_detection(adapter);
5766 
5767 	unregister_netdev(adapter->netdev);
5768 
5769 	be_clear(adapter);
5770 
5771 	if (!pci_vfs_assigned(adapter->pdev))
5772 		be_cmd_reset_function(adapter);
5773 
5774 	/* tell fw we're done with firing cmds */
5775 	be_cmd_fw_clean(adapter);
5776 
5777 	be_unmap_pci_bars(adapter);
5778 	be_drv_cleanup(adapter);
5779 
5780 	pci_disable_pcie_error_reporting(pdev);
5781 
5782 	pci_release_regions(pdev);
5783 	pci_disable_device(pdev);
5784 
5785 	free_netdev(adapter->netdev);
5786 }
5787 
5788 static ssize_t be_hwmon_show_temp(struct device *dev,
5789 				  struct device_attribute *dev_attr,
5790 				  char *buf)
5791 {
5792 	struct be_adapter *adapter = dev_get_drvdata(dev);
5793 
5794 	/* Unit: millidegree Celsius */
5795 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5796 		return -EIO;
5797 	else
5798 		return sprintf(buf, "%u\n",
5799 			       adapter->hwmon_info.be_on_die_temp * 1000);
5800 }
5801 
5802 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5803 			  be_hwmon_show_temp, NULL, 1);
5804 
5805 static struct attribute *be_hwmon_attrs[] = {
5806 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5807 	NULL
5808 };
5809 
5810 ATTRIBUTE_GROUPS(be_hwmon);
5811 
5812 static char *mc_name(struct be_adapter *adapter)
5813 {
5814 	char *str = "";	/* default */
5815 
5816 	switch (adapter->mc_type) {
5817 	case UMC:
5818 		str = "UMC";
5819 		break;
5820 	case FLEX10:
5821 		str = "FLEX10";
5822 		break;
5823 	case vNIC1:
5824 		str = "vNIC-1";
5825 		break;
5826 	case nPAR:
5827 		str = "nPAR";
5828 		break;
5829 	case UFP:
5830 		str = "UFP";
5831 		break;
5832 	case vNIC2:
5833 		str = "vNIC-2";
5834 		break;
5835 	default:
5836 		str = "";
5837 	}
5838 
5839 	return str;
5840 }
5841 
5842 static inline char *func_name(struct be_adapter *adapter)
5843 {
5844 	return be_physfn(adapter) ? "PF" : "VF";
5845 }
5846 
5847 static inline char *nic_name(struct pci_dev *pdev)
5848 {
5849 	switch (pdev->device) {
5850 	case OC_DEVICE_ID1:
5851 		return OC_NAME;
5852 	case OC_DEVICE_ID2:
5853 		return OC_NAME_BE;
5854 	case OC_DEVICE_ID3:
5855 	case OC_DEVICE_ID4:
5856 		return OC_NAME_LANCER;
5857 	case BE_DEVICE_ID2:
5858 		return BE3_NAME;
5859 	case OC_DEVICE_ID5:
5860 	case OC_DEVICE_ID6:
5861 		return OC_NAME_SH;
5862 	default:
5863 		return BE_NAME;
5864 	}
5865 }
5866 
5867 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5868 {
5869 	struct be_adapter *adapter;
5870 	struct net_device *netdev;
5871 	int status = 0;
5872 
5873 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5874 
5875 	status = pci_enable_device(pdev);
5876 	if (status)
5877 		goto do_none;
5878 
5879 	status = pci_request_regions(pdev, DRV_NAME);
5880 	if (status)
5881 		goto disable_dev;
5882 	pci_set_master(pdev);
5883 
5884 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5885 	if (!netdev) {
5886 		status = -ENOMEM;
5887 		goto rel_reg;
5888 	}
5889 	adapter = netdev_priv(netdev);
5890 	adapter->pdev = pdev;
5891 	pci_set_drvdata(pdev, adapter);
5892 	adapter->netdev = netdev;
5893 	SET_NETDEV_DEV(netdev, &pdev->dev);
5894 
5895 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5896 	if (!status) {
5897 		netdev->features |= NETIF_F_HIGHDMA;
5898 	} else {
5899 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5900 		if (status) {
5901 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5902 			goto free_netdev;
5903 		}
5904 	}
5905 
5906 	status = pci_enable_pcie_error_reporting(pdev);
5907 	if (!status)
5908 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5909 
5910 	status = be_map_pci_bars(adapter);
5911 	if (status)
5912 		goto free_netdev;
5913 
5914 	status = be_drv_init(adapter);
5915 	if (status)
5916 		goto unmap_bars;
5917 
5918 	status = be_setup(adapter);
5919 	if (status)
5920 		goto drv_cleanup;
5921 
5922 	be_netdev_init(netdev);
5923 	status = register_netdev(netdev);
5924 	if (status != 0)
5925 		goto unsetup;
5926 
5927 	be_roce_dev_add(adapter);
5928 
5929 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5930 	adapter->error_recovery.probe_time = jiffies;
5931 
5932 	/* On Die temperature not supported for VF. */
5933 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5934 		adapter->hwmon_info.hwmon_dev =
5935 			devm_hwmon_device_register_with_groups(&pdev->dev,
5936 							       DRV_NAME,
5937 							       adapter,
5938 							       be_hwmon_groups);
5939 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5940 	}
5941 
5942 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5943 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5944 
5945 	return 0;
5946 
5947 unsetup:
5948 	be_clear(adapter);
5949 drv_cleanup:
5950 	be_drv_cleanup(adapter);
5951 unmap_bars:
5952 	be_unmap_pci_bars(adapter);
5953 free_netdev:
5954 	free_netdev(netdev);
5955 rel_reg:
5956 	pci_release_regions(pdev);
5957 disable_dev:
5958 	pci_disable_device(pdev);
5959 do_none:
5960 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5961 	return status;
5962 }
5963 
5964 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5965 {
5966 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5967 
5968 	be_intr_set(adapter, false);
5969 	be_cancel_err_detection(adapter);
5970 
5971 	be_cleanup(adapter);
5972 
5973 	pci_save_state(pdev);
5974 	pci_disable_device(pdev);
5975 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5976 	return 0;
5977 }
5978 
5979 static int be_pci_resume(struct pci_dev *pdev)
5980 {
5981 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5982 	int status = 0;
5983 
5984 	status = pci_enable_device(pdev);
5985 	if (status)
5986 		return status;
5987 
5988 	pci_restore_state(pdev);
5989 
5990 	status = be_resume(adapter);
5991 	if (status)
5992 		return status;
5993 
5994 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5995 
5996 	return 0;
5997 }
5998 
5999 /*
6000  * An FLR will stop BE from DMAing any data.
6001  */
6002 static void be_shutdown(struct pci_dev *pdev)
6003 {
6004 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6005 
6006 	if (!adapter)
6007 		return;
6008 
6009 	be_roce_dev_shutdown(adapter);
6010 	cancel_delayed_work_sync(&adapter->work);
6011 	be_cancel_err_detection(adapter);
6012 
6013 	netif_device_detach(adapter->netdev);
6014 
6015 	be_cmd_reset_function(adapter);
6016 
6017 	pci_disable_device(pdev);
6018 }
6019 
6020 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6021 					    pci_channel_state_t state)
6022 {
6023 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6024 
6025 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6026 
6027 	be_roce_dev_remove(adapter);
6028 
6029 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6030 		be_set_error(adapter, BE_ERROR_EEH);
6031 
6032 		be_cancel_err_detection(adapter);
6033 
6034 		be_cleanup(adapter);
6035 	}
6036 
6037 	if (state == pci_channel_io_perm_failure)
6038 		return PCI_ERS_RESULT_DISCONNECT;
6039 
6040 	pci_disable_device(pdev);
6041 
6042 	/* The error could cause the FW to trigger a flash debug dump.
6043 	 * Resetting the card while flash dump is in progress
6044 	 * can cause it not to recover; wait for it to finish.
6045 	 * Wait only for first function as it is needed only once per
6046 	 * adapter.
6047 	 */
6048 	if (pdev->devfn == 0)
6049 		ssleep(30);
6050 
6051 	return PCI_ERS_RESULT_NEED_RESET;
6052 }
6053 
6054 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6055 {
6056 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6057 	int status;
6058 
6059 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6060 
6061 	status = pci_enable_device(pdev);
6062 	if (status)
6063 		return PCI_ERS_RESULT_DISCONNECT;
6064 
6065 	pci_set_master(pdev);
6066 	pci_restore_state(pdev);
6067 
6068 	/* Check if card is ok and fw is ready */
6069 	dev_info(&adapter->pdev->dev,
6070 		 "Waiting for FW to be ready after EEH reset\n");
6071 	status = be_fw_wait_ready(adapter);
6072 	if (status)
6073 		return PCI_ERS_RESULT_DISCONNECT;
6074 
6075 	pci_cleanup_aer_uncorrect_error_status(pdev);
6076 	be_clear_error(adapter, BE_CLEAR_ALL);
6077 	return PCI_ERS_RESULT_RECOVERED;
6078 }
6079 
6080 static void be_eeh_resume(struct pci_dev *pdev)
6081 {
6082 	int status = 0;
6083 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6084 
6085 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6086 
6087 	pci_save_state(pdev);
6088 
6089 	status = be_resume(adapter);
6090 	if (status)
6091 		goto err;
6092 
6093 	be_roce_dev_add(adapter);
6094 
6095 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6096 	return;
6097 err:
6098 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6099 }
6100 
6101 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6102 {
6103 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6104 	struct be_resources vft_res = {0};
6105 	int status;
6106 
6107 	if (!num_vfs)
6108 		be_vf_clear(adapter);
6109 
6110 	adapter->num_vfs = num_vfs;
6111 
6112 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6113 		dev_warn(&pdev->dev,
6114 			 "Cannot disable VFs while they are assigned\n");
6115 		return -EBUSY;
6116 	}
6117 
6118 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6119 	 * are equally distributed across the max-number of VFs. The user may
6120 	 * request only a subset of the max-vfs to be enabled.
6121 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6122 	 * each VF will have access to more number of resources.
6123 	 * This facility is not available in BE3 FW.
6124 	 * Also, this is done by FW in Lancer chip.
6125 	 */
6126 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6127 		be_calculate_vf_res(adapter, adapter->num_vfs,
6128 				    &vft_res);
6129 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6130 						 adapter->num_vfs, &vft_res);
6131 		if (status)
6132 			dev_err(&pdev->dev,
6133 				"Failed to optimize SR-IOV resources\n");
6134 	}
6135 
6136 	status = be_get_resources(adapter);
6137 	if (status)
6138 		return be_cmd_status(status);
6139 
6140 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6141 	rtnl_lock();
6142 	status = be_update_queues(adapter);
6143 	rtnl_unlock();
6144 	if (status)
6145 		return be_cmd_status(status);
6146 
6147 	if (adapter->num_vfs)
6148 		status = be_vf_setup(adapter);
6149 
6150 	if (!status)
6151 		return adapter->num_vfs;
6152 
6153 	return 0;
6154 }
6155 
6156 static const struct pci_error_handlers be_eeh_handlers = {
6157 	.error_detected = be_eeh_err_detected,
6158 	.slot_reset = be_eeh_reset,
6159 	.resume = be_eeh_resume,
6160 };
6161 
6162 static struct pci_driver be_driver = {
6163 	.name = DRV_NAME,
6164 	.id_table = be_dev_ids,
6165 	.probe = be_probe,
6166 	.remove = be_remove,
6167 	.suspend = be_suspend,
6168 	.resume = be_pci_resume,
6169 	.shutdown = be_shutdown,
6170 	.sriov_configure = be_pci_sriov_configure,
6171 	.err_handler = &be_eeh_handlers
6172 };
6173 
6174 static int __init be_init_module(void)
6175 {
6176 	int status;
6177 
6178 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6179 	    rx_frag_size != 2048) {
6180 		printk(KERN_WARNING DRV_NAME
6181 			" : Module param rx_frag_size must be 2048/4096/8192."
6182 			" Using 2048\n");
6183 		rx_frag_size = 2048;
6184 	}
6185 
6186 	if (num_vfs > 0) {
6187 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6188 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6189 	}
6190 
6191 	be_wq = create_singlethread_workqueue("be_wq");
6192 	if (!be_wq) {
6193 		pr_warn(DRV_NAME "workqueue creation failed\n");
6194 		return -1;
6195 	}
6196 
6197 	be_err_recovery_workq =
6198 		create_singlethread_workqueue("be_err_recover");
6199 	if (!be_err_recovery_workq)
6200 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6201 
6202 	status = pci_register_driver(&be_driver);
6203 	if (status) {
6204 		destroy_workqueue(be_wq);
6205 		be_destroy_err_recovery_workq();
6206 	}
6207 	return status;
6208 }
6209 module_init(be_init_module);
6210 
6211 static void __exit be_exit_module(void)
6212 {
6213 	pci_unregister_driver(&be_driver);
6214 
6215 	be_destroy_err_recovery_workq();
6216 
6217 	if (be_wq)
6218 		destroy_workqueue(be_wq);
6219 }
6220 module_exit(be_exit_module);
6221