1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279 			/* mac already added, skip addition */
280 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281 			return 0;
282 		}
283 	}
284 
285 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286 			       &adapter->pmac_id[0], 0);
287 }
288 
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291 	int i;
292 
293 	/* Skip deletion if the programmed mac is
294 	 * being used in uc-list
295 	 */
296 	for (i = 0; i < adapter->uc_macs; i++) {
297 		if (adapter->pmac_id[i + 1] == pmac_id)
298 			return;
299 	}
300 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302 
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305 	struct be_adapter *adapter = netdev_priv(netdev);
306 	struct device *dev = &adapter->pdev->dev;
307 	struct sockaddr *addr = p;
308 	int status;
309 	u8 mac[ETH_ALEN];
310 	u32 old_pmac_id = adapter->pmac_id[0];
311 
312 	if (!is_valid_ether_addr(addr->sa_data))
313 		return -EADDRNOTAVAIL;
314 
315 	/* Proceed further only if, User provided MAC is different
316 	 * from active MAC
317 	 */
318 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319 		return 0;
320 
321 	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322 	 * address
323 	 */
324 	if (BEx_chip(adapter) && be_virtfn(adapter) &&
325 	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
326 		return -EPERM;
327 
328 	/* if device is not running, copy MAC to netdev->dev_addr */
329 	if (!netif_running(netdev))
330 		goto done;
331 
332 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333 	 * privilege or if PF did not provision the new MAC address.
334 	 * On BE3, this cmd will always fail if the VF doesn't have the
335 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
336 	 * the MAC for the VF.
337 	 */
338 	mutex_lock(&adapter->rx_filter_lock);
339 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340 	if (!status) {
341 
342 		/* Delete the old programmed MAC. This call may fail if the
343 		 * old MAC was already deleted by the PF driver.
344 		 */
345 		if (adapter->pmac_id[0] != old_pmac_id)
346 			be_dev_mac_del(adapter, old_pmac_id);
347 	}
348 
349 	mutex_unlock(&adapter->rx_filter_lock);
350 	/* Decide if the new MAC is successfully activated only after
351 	 * querying the FW
352 	 */
353 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354 				       adapter->if_handle, true, 0);
355 	if (status)
356 		goto err;
357 
358 	/* The MAC change did not happen, either due to lack of privilege
359 	 * or PF didn't pre-provision.
360 	 */
361 	if (!ether_addr_equal(addr->sa_data, mac)) {
362 		status = -EPERM;
363 		goto err;
364 	}
365 
366 	/* Remember currently programmed MAC */
367 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
370 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371 	return 0;
372 err:
373 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374 	return status;
375 }
376 
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380 	if (BE2_chip(adapter)) {
381 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	} else if (BE3_chip(adapter)) {
385 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386 
387 		return &cmd->hw_stats;
388 	} else {
389 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390 
391 		return &cmd->hw_stats;
392 	}
393 }
394 
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398 	if (BE2_chip(adapter)) {
399 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	} else if (BE3_chip(adapter)) {
403 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404 
405 		return &hw_stats->erx;
406 	} else {
407 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408 
409 		return &hw_stats->erx;
410 	}
411 }
412 
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418 	struct be_port_rxf_stats_v0 *port_stats =
419 					&rxf_stats->port[adapter->port_num];
420 	struct be_drv_stats *drvs = &adapter->drv_stats;
421 
422 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
424 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
425 	drvs->rx_control_frames = port_stats->rx_control_frames;
426 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438 	drvs->rx_dropped_header_too_small =
439 		port_stats->rx_dropped_header_too_small;
440 	drvs->rx_address_filtered =
441 					port_stats->rx_address_filtered +
442 					port_stats->rx_vlan_filtered;
443 	drvs->rx_alignment_symbol_errors =
444 		port_stats->rx_alignment_symbol_errors;
445 
446 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
447 	drvs->tx_controlframes = port_stats->tx_controlframes;
448 
449 	if (adapter->port_num)
450 		drvs->jabber_events = rxf_stats->port1_jabber_events;
451 	else
452 		drvs->jabber_events = rxf_stats->port0_jabber_events;
453 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
456 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461 
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467 	struct be_port_rxf_stats_v1 *port_stats =
468 					&rxf_stats->port[adapter->port_num];
469 	struct be_drv_stats *drvs = &adapter->drv_stats;
470 
471 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
475 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
476 	drvs->rx_control_frames = port_stats->rx_control_frames;
477 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487 	drvs->rx_dropped_header_too_small =
488 		port_stats->rx_dropped_header_too_small;
489 	drvs->rx_input_fifo_overflow_drop =
490 		port_stats->rx_input_fifo_overflow_drop;
491 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
492 	drvs->rx_alignment_symbol_errors =
493 		port_stats->rx_alignment_symbol_errors;
494 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
496 	drvs->tx_controlframes = port_stats->tx_controlframes;
497 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498 	drvs->jabber_events = port_stats->jabber_events;
499 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
502 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507 
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513 	struct be_port_rxf_stats_v2 *port_stats =
514 					&rxf_stats->port[adapter->port_num];
515 	struct be_drv_stats *drvs = &adapter->drv_stats;
516 
517 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
521 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
522 	drvs->rx_control_frames = port_stats->rx_control_frames;
523 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533 	drvs->rx_dropped_header_too_small =
534 		port_stats->rx_dropped_header_too_small;
535 	drvs->rx_input_fifo_overflow_drop =
536 		port_stats->rx_input_fifo_overflow_drop;
537 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
538 	drvs->rx_alignment_symbol_errors =
539 		port_stats->rx_alignment_symbol_errors;
540 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
542 	drvs->tx_controlframes = port_stats->tx_controlframes;
543 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544 	drvs->jabber_events = port_stats->jabber_events;
545 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
548 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552 	if (be_roce_supported(adapter)) {
553 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555 		drvs->rx_roce_frames = port_stats->roce_frames_received;
556 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
557 		drvs->roce_drops_payload_len =
558 			port_stats->roce_drops_payload_len;
559 	}
560 }
561 
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564 	struct be_drv_stats *drvs = &adapter->drv_stats;
565 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566 
567 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577 	drvs->rx_dropped_tcp_length =
578 				pport_stats->rx_dropped_invalid_tcp_length;
579 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582 	drvs->rx_dropped_header_too_small =
583 				pport_stats->rx_dropped_header_too_small;
584 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585 	drvs->rx_address_filtered =
586 					pport_stats->rx_address_filtered +
587 					pport_stats->rx_vlan_filtered;
588 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592 	drvs->jabber_events = pport_stats->rx_jabbers;
593 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
594 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595 	drvs->rx_drops_too_many_frags =
596 				pport_stats->rx_drops_too_many_frags_lo;
597 }
598 
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)			(x & 0xFFFF)
602 #define hi(x)			(x & 0xFFFF0000)
603 	bool wrapped = val < lo(*acc);
604 	u32 newacc = hi(*acc) + val;
605 
606 	if (wrapped)
607 		newacc += 65536;
608 	ACCESS_ONCE(*acc) = newacc;
609 }
610 
611 static void populate_erx_stats(struct be_adapter *adapter,
612 			       struct be_rx_obj *rxo, u32 erx_stat)
613 {
614 	if (!BEx_chip(adapter))
615 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616 	else
617 		/* below erx HW counter can actually wrap around after
618 		 * 65535. Driver accumulates a 32-bit value
619 		 */
620 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621 				     (u16)erx_stat);
622 }
623 
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627 	struct be_rx_obj *rxo;
628 	int i;
629 	u32 erx_stat;
630 
631 	if (lancer_chip(adapter)) {
632 		populate_lancer_stats(adapter);
633 	} else {
634 		if (BE2_chip(adapter))
635 			populate_be_v0_stats(adapter);
636 		else if (BE3_chip(adapter))
637 			/* for BE3 */
638 			populate_be_v1_stats(adapter);
639 		else
640 			populate_be_v2_stats(adapter);
641 
642 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643 		for_all_rx_queues(adapter, rxo, i) {
644 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645 			populate_erx_stats(adapter, rxo, erx_stat);
646 		}
647 	}
648 }
649 
650 static void be_get_stats64(struct net_device *netdev,
651 			   struct rtnl_link_stats64 *stats)
652 {
653 	struct be_adapter *adapter = netdev_priv(netdev);
654 	struct be_drv_stats *drvs = &adapter->drv_stats;
655 	struct be_rx_obj *rxo;
656 	struct be_tx_obj *txo;
657 	u64 pkts, bytes;
658 	unsigned int start;
659 	int i;
660 
661 	for_all_rx_queues(adapter, rxo, i) {
662 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
663 
664 		do {
665 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666 			pkts = rx_stats(rxo)->rx_pkts;
667 			bytes = rx_stats(rxo)->rx_bytes;
668 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669 		stats->rx_packets += pkts;
670 		stats->rx_bytes += bytes;
671 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673 					rx_stats(rxo)->rx_drops_no_frags;
674 	}
675 
676 	for_all_tx_queues(adapter, txo, i) {
677 		const struct be_tx_stats *tx_stats = tx_stats(txo);
678 
679 		do {
680 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681 			pkts = tx_stats(txo)->tx_pkts;
682 			bytes = tx_stats(txo)->tx_bytes;
683 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684 		stats->tx_packets += pkts;
685 		stats->tx_bytes += bytes;
686 	}
687 
688 	/* bad pkts received */
689 	stats->rx_errors = drvs->rx_crc_errors +
690 		drvs->rx_alignment_symbol_errors +
691 		drvs->rx_in_range_errors +
692 		drvs->rx_out_range_errors +
693 		drvs->rx_frame_too_long +
694 		drvs->rx_dropped_too_small +
695 		drvs->rx_dropped_too_short +
696 		drvs->rx_dropped_header_too_small +
697 		drvs->rx_dropped_tcp_length +
698 		drvs->rx_dropped_runt;
699 
700 	/* detailed rx errors */
701 	stats->rx_length_errors = drvs->rx_in_range_errors +
702 		drvs->rx_out_range_errors +
703 		drvs->rx_frame_too_long;
704 
705 	stats->rx_crc_errors = drvs->rx_crc_errors;
706 
707 	/* frame alignment errors */
708 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709 
710 	/* receiver fifo overrun */
711 	/* drops_no_pbuf is no per i/f, it's per BE card */
712 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713 				drvs->rx_input_fifo_overflow_drop +
714 				drvs->rx_drops_no_pbuf;
715 }
716 
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719 	struct net_device *netdev = adapter->netdev;
720 
721 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722 		netif_carrier_off(netdev);
723 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724 	}
725 
726 	if (link_status)
727 		netif_carrier_on(netdev);
728 	else
729 		netif_carrier_off(netdev);
730 
731 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733 
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736 	if (skb->encapsulation)
737 		return skb_inner_transport_offset(skb) +
738 		       inner_tcp_hdrlen(skb);
739 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741 
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744 	struct be_tx_stats *stats = tx_stats(txo);
745 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746 	/* Account for headers which get duplicated in TSO pkt */
747 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748 
749 	u64_stats_update_begin(&stats->sync);
750 	stats->tx_reqs++;
751 	stats->tx_bytes += skb->len + dup_hdr_len;
752 	stats->tx_pkts += tx_pkts;
753 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754 		stats->tx_vxlan_offload_pkts += tx_pkts;
755 	u64_stats_update_end(&stats->sync);
756 }
757 
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761 	/* +1 for the header wrb */
762 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764 
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770 	wrb->rsvd0 = 0;
771 }
772 
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778 	wrb->frag_pa_hi = 0;
779 	wrb->frag_pa_lo = 0;
780 	wrb->frag_len = 0;
781 	wrb->rsvd0 = 0;
782 }
783 
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785 				     struct sk_buff *skb)
786 {
787 	u8 vlan_prio;
788 	u16 vlan_tag;
789 
790 	vlan_tag = skb_vlan_tag_get(skb);
791 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792 	/* If vlan priority provided by OS is NOT in available bmap */
793 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795 				adapter->recommended_prio_bits;
796 
797 	return vlan_tag;
798 }
799 
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803 	return (inner_ip_hdr(skb)->version == 4) ?
804 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806 
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809 	return (ip_hdr(skb)->version == 4) ?
810 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812 
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817 
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820 	return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822 
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827 
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829 				       struct sk_buff *skb,
830 				       struct be_wrb_params *wrb_params)
831 {
832 	u16 proto;
833 
834 	if (skb_is_gso(skb)) {
835 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
836 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840 		if (skb->encapsulation) {
841 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842 			proto = skb_inner_ip_proto(skb);
843 		} else {
844 			proto = skb_ip_proto(skb);
845 		}
846 		if (proto == IPPROTO_TCP)
847 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848 		else if (proto == IPPROTO_UDP)
849 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850 	}
851 
852 	if (skb_vlan_tag_present(skb)) {
853 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855 	}
856 
857 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859 
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861 			 struct be_eth_hdr_wrb *hdr,
862 			 struct be_wrb_params *wrb_params,
863 			 struct sk_buff *skb)
864 {
865 	memset(hdr, 0, sizeof(*hdr));
866 
867 	SET_TX_WRB_HDR_BITS(crc, hdr,
868 			    BE_WRB_F_GET(wrb_params->features, CRC));
869 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
870 			    BE_WRB_F_GET(wrb_params->features, IPCS));
871 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
873 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
874 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
875 
876 	SET_TX_WRB_HDR_BITS(lso, hdr,
877 			    BE_WRB_F_GET(wrb_params->features, LSO));
878 	SET_TX_WRB_HDR_BITS(lso6, hdr,
879 			    BE_WRB_F_GET(wrb_params->features, LSO6));
880 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881 
882 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883 	 * hack is not needed, the evt bit is set while ringing DB.
884 	 */
885 	SET_TX_WRB_HDR_BITS(event, hdr,
886 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887 	SET_TX_WRB_HDR_BITS(vlan, hdr,
888 			    BE_WRB_F_GET(wrb_params->features, VLAN));
889 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890 
891 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
894 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896 
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898 			  bool unmap_single)
899 {
900 	dma_addr_t dma;
901 	u32 frag_len = le32_to_cpu(wrb->frag_len);
902 
903 
904 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905 		(u64)le32_to_cpu(wrb->frag_pa_lo);
906 	if (frag_len) {
907 		if (unmap_single)
908 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909 		else
910 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911 	}
912 }
913 
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917 	u32 head = txo->q.head;
918 
919 	queue_head_inc(&txo->q);
920 	return head;
921 }
922 
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925 				struct be_tx_obj *txo,
926 				struct be_wrb_params *wrb_params,
927 				struct sk_buff *skb, u16 head)
928 {
929 	u32 num_frags = skb_wrb_cnt(skb);
930 	struct be_queue_info *txq = &txo->q;
931 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932 
933 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
935 
936 	BUG_ON(txo->sent_skb_list[head]);
937 	txo->sent_skb_list[head] = skb;
938 	txo->last_req_hdr = head;
939 	atomic_add(num_frags, &txq->used);
940 	txo->last_req_wrb_cnt = num_frags;
941 	txo->pend_wrb_cnt += num_frags;
942 }
943 
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946 				 int len)
947 {
948 	struct be_eth_wrb *wrb;
949 	struct be_queue_info *txq = &txo->q;
950 
951 	wrb = queue_head_node(txq);
952 	wrb_fill(wrb, busaddr, len);
953 	queue_head_inc(txq);
954 }
955 
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961 			    struct be_tx_obj *txo, u32 head, bool map_single,
962 			    u32 copied)
963 {
964 	struct device *dev;
965 	struct be_eth_wrb *wrb;
966 	struct be_queue_info *txq = &txo->q;
967 
968 	dev = &adapter->pdev->dev;
969 	txq->head = head;
970 
971 	/* skip the first wrb (hdr); it's not mapped */
972 	queue_head_inc(txq);
973 	while (copied) {
974 		wrb = queue_head_node(txq);
975 		unmap_tx_frag(dev, wrb, map_single);
976 		map_single = false;
977 		copied -= le32_to_cpu(wrb->frag_len);
978 		queue_head_inc(txq);
979 	}
980 
981 	txq->head = head;
982 }
983 
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989 			   struct sk_buff *skb,
990 			   struct be_wrb_params *wrb_params)
991 {
992 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993 	struct device *dev = &adapter->pdev->dev;
994 	struct be_queue_info *txq = &txo->q;
995 	bool map_single = false;
996 	u32 head = txq->head;
997 	dma_addr_t busaddr;
998 	int len;
999 
1000 	head = be_tx_get_wrb_hdr(txo);
1001 
1002 	if (skb->len > skb->data_len) {
1003 		len = skb_headlen(skb);
1004 
1005 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006 		if (dma_mapping_error(dev, busaddr))
1007 			goto dma_err;
1008 		map_single = true;
1009 		be_tx_setup_wrb_frag(txo, busaddr, len);
1010 		copied += len;
1011 	}
1012 
1013 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015 		len = skb_frag_size(frag);
1016 
1017 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018 		if (dma_mapping_error(dev, busaddr))
1019 			goto dma_err;
1020 		be_tx_setup_wrb_frag(txo, busaddr, len);
1021 		copied += len;
1022 	}
1023 
1024 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025 
1026 	be_tx_stats_update(txo, skb);
1027 	return wrb_cnt;
1028 
1029 dma_err:
1030 	adapter->drv_stats.dma_map_errors++;
1031 	be_xmit_restore(adapter, txo, head, map_single, copied);
1032 	return 0;
1033 }
1034 
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039 
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041 					     struct sk_buff *skb,
1042 					     struct be_wrb_params
1043 					     *wrb_params)
1044 {
1045 	u16 vlan_tag = 0;
1046 
1047 	skb = skb_share_check(skb, GFP_ATOMIC);
1048 	if (unlikely(!skb))
1049 		return skb;
1050 
1051 	if (skb_vlan_tag_present(skb))
1052 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053 
1054 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055 		if (!vlan_tag)
1056 			vlan_tag = adapter->pvid;
1057 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058 		 * skip VLAN insertion
1059 		 */
1060 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061 	}
1062 
1063 	if (vlan_tag) {
1064 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065 						vlan_tag);
1066 		if (unlikely(!skb))
1067 			return skb;
1068 		skb->vlan_tci = 0;
1069 	}
1070 
1071 	/* Insert the outer VLAN, if any */
1072 	if (adapter->qnq_vid) {
1073 		vlan_tag = adapter->qnq_vid;
1074 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075 						vlan_tag);
1076 		if (unlikely(!skb))
1077 			return skb;
1078 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079 	}
1080 
1081 	return skb;
1082 }
1083 
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1087 	u16 offset = ETH_HLEN;
1088 
1089 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1090 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091 
1092 		offset += sizeof(struct ipv6hdr);
1093 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1094 		    ip6h->nexthdr != NEXTHDR_UDP) {
1095 			struct ipv6_opt_hdr *ehdr =
1096 				(struct ipv6_opt_hdr *)(skb->data + offset);
1097 
1098 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099 			if (ehdr->hdrlen == 0xff)
1100 				return true;
1101 		}
1102 	}
1103 	return false;
1104 }
1105 
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110 
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115 
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117 						  struct sk_buff *skb,
1118 						  struct be_wrb_params
1119 						  *wrb_params)
1120 {
1121 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122 	unsigned int eth_hdr_len;
1123 	struct iphdr *ip;
1124 
1125 	/* For padded packets, BE HW modifies tot_len field in IP header
1126 	 * incorrecly when VLAN tag is inserted by HW.
1127 	 * For padded packets, Lancer computes incorrect checksum.
1128 	 */
1129 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130 						VLAN_ETH_HLEN : ETH_HLEN;
1131 	if (skb->len <= 60 &&
1132 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133 	    is_ipv4_pkt(skb)) {
1134 		ip = (struct iphdr *)ip_hdr(skb);
1135 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136 	}
1137 
1138 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1139 	 * tagging in pvid-tagging mode
1140 	 */
1141 	if (be_pvid_tagging_enabled(adapter) &&
1142 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1143 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144 
1145 	/* HW has a bug wherein it will calculate CSUM for VLAN
1146 	 * pkts even though it is disabled.
1147 	 * Manually insert VLAN in pkt.
1148 	 */
1149 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150 	    skb_vlan_tag_present(skb)) {
1151 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152 		if (unlikely(!skb))
1153 			goto err;
1154 	}
1155 
1156 	/* HW may lockup when VLAN HW tagging is requested on
1157 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1158 	 * skip HW tagging is not enabled by FW.
1159 	 */
1160 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161 		     (adapter->pvid || adapter->qnq_vid) &&
1162 		     !qnq_async_evt_rcvd(adapter)))
1163 		goto tx_drop;
1164 
1165 	/* Manual VLAN tag insertion to prevent:
1166 	 * ASIC lockup when the ASIC inserts VLAN tag into
1167 	 * certain ipv6 packets. Insert VLAN tags in driver,
1168 	 * and set event, completion, vlan bits accordingly
1169 	 * in the Tx WRB.
1170 	 */
1171 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172 	    be_vlan_tag_tx_chk(adapter, skb)) {
1173 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174 		if (unlikely(!skb))
1175 			goto err;
1176 	}
1177 
1178 	return skb;
1179 tx_drop:
1180 	dev_kfree_skb_any(skb);
1181 err:
1182 	return NULL;
1183 }
1184 
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186 					   struct sk_buff *skb,
1187 					   struct be_wrb_params *wrb_params)
1188 {
1189 	int err;
1190 
1191 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192 	 * packets that are 32b or less may cause a transmit stall
1193 	 * on that port. The workaround is to pad such packets
1194 	 * (len <= 32 bytes) to a minimum length of 36b.
1195 	 */
1196 	if (skb->len <= 32) {
1197 		if (skb_put_padto(skb, 36))
1198 			return NULL;
1199 	}
1200 
1201 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203 		if (!skb)
1204 			return NULL;
1205 	}
1206 
1207 	/* The stack can send us skbs with length greater than
1208 	 * what the HW can handle. Trim the extra bytes.
1209 	 */
1210 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212 	WARN_ON(err);
1213 
1214 	return skb;
1215 }
1216 
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219 	struct be_queue_info *txq = &txo->q;
1220 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221 
1222 	/* Mark the last request eventable if it hasn't been marked already */
1223 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225 
1226 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1227 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228 		wrb_fill_dummy(queue_head_node(txq));
1229 		queue_head_inc(txq);
1230 		atomic_inc(&txq->used);
1231 		txo->pend_wrb_cnt++;
1232 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233 					   TX_HDR_WRB_NUM_SHIFT);
1234 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235 					  TX_HDR_WRB_NUM_SHIFT);
1236 	}
1237 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238 	txo->pend_wrb_cnt = 0;
1239 }
1240 
1241 /* OS2BMC related */
1242 
1243 #define DHCP_CLIENT_PORT	68
1244 #define DHCP_SERVER_PORT	67
1245 #define NET_BIOS_PORT1		137
1246 #define NET_BIOS_PORT2		138
1247 #define DHCPV6_RAS_PORT		547
1248 
1249 #define is_mc_allowed_on_bmc(adapter, eh)	\
1250 	(!is_multicast_filt_enabled(adapter) &&	\
1251 	 is_multicast_ether_addr(eh->h_dest) &&	\
1252 	 !is_broadcast_ether_addr(eh->h_dest))
1253 
1254 #define is_bc_allowed_on_bmc(adapter, eh)	\
1255 	(!is_broadcast_filt_enabled(adapter) &&	\
1256 	 is_broadcast_ether_addr(eh->h_dest))
1257 
1258 #define is_arp_allowed_on_bmc(adapter, skb)	\
1259 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1260 
1261 #define is_broadcast_packet(eh, adapter)	\
1262 		(is_multicast_ether_addr(eh->h_dest) && \
1263 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264 
1265 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1266 
1267 #define is_arp_filt_enabled(adapter)	\
1268 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269 
1270 #define is_dhcp_client_filt_enabled(adapter)	\
1271 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272 
1273 #define is_dhcp_srvr_filt_enabled(adapter)	\
1274 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275 
1276 #define is_nbios_filt_enabled(adapter)	\
1277 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278 
1279 #define is_ipv6_na_filt_enabled(adapter)	\
1280 		(adapter->bmc_filt_mask &	\
1281 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282 
1283 #define is_ipv6_ra_filt_enabled(adapter)	\
1284 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285 
1286 #define is_ipv6_ras_filt_enabled(adapter)	\
1287 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288 
1289 #define is_broadcast_filt_enabled(adapter)	\
1290 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291 
1292 #define is_multicast_filt_enabled(adapter)	\
1293 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294 
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296 			       struct sk_buff **skb)
1297 {
1298 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299 	bool os2bmc = false;
1300 
1301 	if (!be_is_os2bmc_enabled(adapter))
1302 		goto done;
1303 
1304 	if (!is_multicast_ether_addr(eh->h_dest))
1305 		goto done;
1306 
1307 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1308 	    is_bc_allowed_on_bmc(adapter, eh) ||
1309 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1310 		os2bmc = true;
1311 		goto done;
1312 	}
1313 
1314 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316 		u8 nexthdr = hdr->nexthdr;
1317 
1318 		if (nexthdr == IPPROTO_ICMPV6) {
1319 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320 
1321 			switch (icmp6->icmp6_type) {
1322 			case NDISC_ROUTER_ADVERTISEMENT:
1323 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324 				goto done;
1325 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1327 				goto done;
1328 			default:
1329 				break;
1330 			}
1331 		}
1332 	}
1333 
1334 	if (is_udp_pkt((*skb))) {
1335 		struct udphdr *udp = udp_hdr((*skb));
1336 
1337 		switch (ntohs(udp->dest)) {
1338 		case DHCP_CLIENT_PORT:
1339 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1340 			goto done;
1341 		case DHCP_SERVER_PORT:
1342 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343 			goto done;
1344 		case NET_BIOS_PORT1:
1345 		case NET_BIOS_PORT2:
1346 			os2bmc = is_nbios_filt_enabled(adapter);
1347 			goto done;
1348 		case DHCPV6_RAS_PORT:
1349 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350 			goto done;
1351 		default:
1352 			break;
1353 		}
1354 	}
1355 done:
1356 	/* For packets over a vlan, which are destined
1357 	 * to BMC, asic expects the vlan to be inline in the packet.
1358 	 */
1359 	if (os2bmc)
1360 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361 
1362 	return os2bmc;
1363 }
1364 
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367 	struct be_adapter *adapter = netdev_priv(netdev);
1368 	u16 q_idx = skb_get_queue_mapping(skb);
1369 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370 	struct be_wrb_params wrb_params = { 0 };
1371 	bool flush = !skb->xmit_more;
1372 	u16 wrb_cnt;
1373 
1374 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375 	if (unlikely(!skb))
1376 		goto drop;
1377 
1378 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379 
1380 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381 	if (unlikely(!wrb_cnt)) {
1382 		dev_kfree_skb_any(skb);
1383 		goto drop;
1384 	}
1385 
1386 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1387 	 * enqueue the pkt a 2nd time with mgmt bit set.
1388 	 */
1389 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1390 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392 		if (unlikely(!wrb_cnt))
1393 			goto drop;
1394 		else
1395 			skb_get(skb);
1396 	}
1397 
1398 	if (be_is_txq_full(txo)) {
1399 		netif_stop_subqueue(netdev, q_idx);
1400 		tx_stats(txo)->tx_stops++;
1401 	}
1402 
1403 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404 		be_xmit_flush(adapter, txo);
1405 
1406 	return NETDEV_TX_OK;
1407 drop:
1408 	tx_stats(txo)->tx_drv_drops++;
1409 	/* Flush the already enqueued tx requests */
1410 	if (flush && txo->pend_wrb_cnt)
1411 		be_xmit_flush(adapter, txo);
1412 
1413 	return NETDEV_TX_OK;
1414 }
1415 
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421 
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424 	struct device *dev = &adapter->pdev->dev;
1425 	int status;
1426 
1427 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428 		return 0;
1429 
1430 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431 	if (!status) {
1432 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434 	} else {
1435 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436 	}
1437 	return status;
1438 }
1439 
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442 	struct device *dev = &adapter->pdev->dev;
1443 	int status;
1444 
1445 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446 	if (!status) {
1447 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449 	}
1450 	return status;
1451 }
1452 
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459 	struct device *dev = &adapter->pdev->dev;
1460 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1461 	u16 num = 0, i = 0;
1462 	int status = 0;
1463 
1464 	/* No need to change the VLAN state if the I/F is in promiscuous */
1465 	if (adapter->netdev->flags & IFF_PROMISC)
1466 		return 0;
1467 
1468 	if (adapter->vlans_added > be_max_vlans(adapter))
1469 		return be_set_vlan_promisc(adapter);
1470 
1471 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472 		status = be_clear_vlan_promisc(adapter);
1473 		if (status)
1474 			return status;
1475 	}
1476 	/* Construct VLAN Table to give to HW */
1477 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478 		vids[num++] = cpu_to_le16(i);
1479 
1480 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481 	if (status) {
1482 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1483 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1484 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485 		    addl_status(status) ==
1486 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487 			return be_set_vlan_promisc(adapter);
1488 	}
1489 	return status;
1490 }
1491 
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494 	struct be_adapter *adapter = netdev_priv(netdev);
1495 	int status = 0;
1496 
1497 	mutex_lock(&adapter->rx_filter_lock);
1498 
1499 	/* Packets with VID 0 are always received by Lancer by default */
1500 	if (lancer_chip(adapter) && vid == 0)
1501 		goto done;
1502 
1503 	if (test_bit(vid, adapter->vids))
1504 		goto done;
1505 
1506 	set_bit(vid, adapter->vids);
1507 	adapter->vlans_added++;
1508 
1509 	status = be_vid_config(adapter);
1510 done:
1511 	mutex_unlock(&adapter->rx_filter_lock);
1512 	return status;
1513 }
1514 
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517 	struct be_adapter *adapter = netdev_priv(netdev);
1518 	int status = 0;
1519 
1520 	mutex_lock(&adapter->rx_filter_lock);
1521 
1522 	/* Packets with VID 0 are always received by Lancer by default */
1523 	if (lancer_chip(adapter) && vid == 0)
1524 		goto done;
1525 
1526 	if (!test_bit(vid, adapter->vids))
1527 		goto done;
1528 
1529 	clear_bit(vid, adapter->vids);
1530 	adapter->vlans_added--;
1531 
1532 	status = be_vid_config(adapter);
1533 done:
1534 	mutex_unlock(&adapter->rx_filter_lock);
1535 	return status;
1536 }
1537 
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543 
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546 	int status;
1547 
1548 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549 		return;
1550 
1551 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552 	if (!status)
1553 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555 
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558 	int status;
1559 
1560 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561 		return;
1562 
1563 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564 	if (!status)
1565 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567 
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570 	int status;
1571 
1572 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573 		return;
1574 
1575 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576 	if (!status)
1577 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579 
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586 			     const unsigned char *addr)
1587 {
1588 	struct be_adapter *adapter = netdev_priv(netdev);
1589 
1590 	adapter->update_uc_list = true;
1591 	return 0;
1592 }
1593 
1594 static int be_mc_list_update(struct net_device *netdev,
1595 			     const unsigned char *addr)
1596 {
1597 	struct be_adapter *adapter = netdev_priv(netdev);
1598 
1599 	adapter->update_mc_list = true;
1600 	return 0;
1601 }
1602 
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605 	struct net_device *netdev = adapter->netdev;
1606 	struct netdev_hw_addr *ha;
1607 	bool mc_promisc = false;
1608 	int status;
1609 
1610 	netif_addr_lock_bh(netdev);
1611 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612 
1613 	if (netdev->flags & IFF_PROMISC) {
1614 		adapter->update_mc_list = false;
1615 	} else if (netdev->flags & IFF_ALLMULTI ||
1616 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617 		/* Enable multicast promisc if num configured exceeds
1618 		 * what we support
1619 		 */
1620 		mc_promisc = true;
1621 		adapter->update_mc_list = false;
1622 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623 		/* Update mc-list unconditionally if the iface was previously
1624 		 * in mc-promisc mode and now is out of that mode.
1625 		 */
1626 		adapter->update_mc_list = true;
1627 	}
1628 
1629 	if (adapter->update_mc_list) {
1630 		int i = 0;
1631 
1632 		/* cache the mc-list in adapter */
1633 		netdev_for_each_mc_addr(ha, netdev) {
1634 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635 			i++;
1636 		}
1637 		adapter->mc_count = netdev_mc_count(netdev);
1638 	}
1639 	netif_addr_unlock_bh(netdev);
1640 
1641 	if (mc_promisc) {
1642 		be_set_mc_promisc(adapter);
1643 	} else if (adapter->update_mc_list) {
1644 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645 		if (!status)
1646 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647 		else
1648 			be_set_mc_promisc(adapter);
1649 
1650 		adapter->update_mc_list = false;
1651 	}
1652 }
1653 
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656 	struct net_device *netdev = adapter->netdev;
1657 
1658 	__dev_mc_unsync(netdev, NULL);
1659 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660 	adapter->mc_count = 0;
1661 }
1662 
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667 		return 0;
1668 	}
1669 
1670 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671 			       adapter->if_handle,
1672 			       &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674 
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677 	if (pmac_id == adapter->pmac_id[0])
1678 		return;
1679 
1680 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682 
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685 	struct net_device *netdev = adapter->netdev;
1686 	struct netdev_hw_addr *ha;
1687 	bool uc_promisc = false;
1688 	int curr_uc_macs = 0, i;
1689 
1690 	netif_addr_lock_bh(netdev);
1691 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692 
1693 	if (netdev->flags & IFF_PROMISC) {
1694 		adapter->update_uc_list = false;
1695 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696 		uc_promisc = true;
1697 		adapter->update_uc_list = false;
1698 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699 		/* Update uc-list unconditionally if the iface was previously
1700 		 * in uc-promisc mode and now is out of that mode.
1701 		 */
1702 		adapter->update_uc_list = true;
1703 	}
1704 
1705 	if (adapter->update_uc_list) {
1706 		/* cache the uc-list in adapter array */
1707 		i = 0;
1708 		netdev_for_each_uc_addr(ha, netdev) {
1709 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710 			i++;
1711 		}
1712 		curr_uc_macs = netdev_uc_count(netdev);
1713 	}
1714 	netif_addr_unlock_bh(netdev);
1715 
1716 	if (uc_promisc) {
1717 		be_set_uc_promisc(adapter);
1718 	} else if (adapter->update_uc_list) {
1719 		be_clear_uc_promisc(adapter);
1720 
1721 		for (i = 0; i < adapter->uc_macs; i++)
1722 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723 
1724 		for (i = 0; i < curr_uc_macs; i++)
1725 			be_uc_mac_add(adapter, i);
1726 		adapter->uc_macs = curr_uc_macs;
1727 		adapter->update_uc_list = false;
1728 	}
1729 }
1730 
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733 	struct net_device *netdev = adapter->netdev;
1734 	int i;
1735 
1736 	__dev_uc_unsync(netdev, NULL);
1737 	for (i = 0; i < adapter->uc_macs; i++)
1738 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739 
1740 	adapter->uc_macs = 0;
1741 }
1742 
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745 	struct net_device *netdev = adapter->netdev;
1746 
1747 	mutex_lock(&adapter->rx_filter_lock);
1748 
1749 	if (netdev->flags & IFF_PROMISC) {
1750 		if (!be_in_all_promisc(adapter))
1751 			be_set_all_promisc(adapter);
1752 	} else if (be_in_all_promisc(adapter)) {
1753 		/* We need to re-program the vlan-list or clear
1754 		 * vlan-promisc mode (if needed) when the interface
1755 		 * comes out of promisc mode.
1756 		 */
1757 		be_vid_config(adapter);
1758 	}
1759 
1760 	be_set_uc_list(adapter);
1761 	be_set_mc_list(adapter);
1762 
1763 	mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765 
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768 	struct be_cmd_work *cmd_work =
1769 				container_of(work, struct be_cmd_work, work);
1770 
1771 	__be_set_rx_mode(cmd_work->adapter);
1772 	kfree(cmd_work);
1773 }
1774 
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777 	struct be_adapter *adapter = netdev_priv(netdev);
1778 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779 	int status;
1780 
1781 	if (!sriov_enabled(adapter))
1782 		return -EPERM;
1783 
1784 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785 		return -EINVAL;
1786 
1787 	/* Proceed further only if user provided MAC is different
1788 	 * from active MAC
1789 	 */
1790 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791 		return 0;
1792 
1793 	if (BEx_chip(adapter)) {
1794 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795 				vf + 1);
1796 
1797 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798 					 &vf_cfg->pmac_id, vf + 1);
1799 	} else {
1800 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801 					vf + 1);
1802 	}
1803 
1804 	if (status) {
1805 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806 			mac, vf, status);
1807 		return be_cmd_status(status);
1808 	}
1809 
1810 	ether_addr_copy(vf_cfg->mac_addr, mac);
1811 
1812 	return 0;
1813 }
1814 
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816 			    struct ifla_vf_info *vi)
1817 {
1818 	struct be_adapter *adapter = netdev_priv(netdev);
1819 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820 
1821 	if (!sriov_enabled(adapter))
1822 		return -EPERM;
1823 
1824 	if (vf >= adapter->num_vfs)
1825 		return -EINVAL;
1826 
1827 	vi->vf = vf;
1828 	vi->max_tx_rate = vf_cfg->tx_rate;
1829 	vi->min_tx_rate = 0;
1830 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835 
1836 	return 0;
1837 }
1838 
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1843 	int vf_if_id = vf_cfg->if_handle;
1844 	int status;
1845 
1846 	/* Enable Transparent VLAN Tagging */
1847 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848 	if (status)
1849 		return status;
1850 
1851 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852 	vids[0] = 0;
1853 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854 	if (!status)
1855 		dev_info(&adapter->pdev->dev,
1856 			 "Cleared guest VLANs on VF%d", vf);
1857 
1858 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1859 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861 						  ~BE_PRIV_FILTMGMT, vf + 1);
1862 		if (!status)
1863 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864 	}
1865 	return 0;
1866 }
1867 
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871 	struct device *dev = &adapter->pdev->dev;
1872 	int status;
1873 
1874 	/* Reset Transparent VLAN Tagging. */
1875 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876 				       vf_cfg->if_handle, 0, 0);
1877 	if (status)
1878 		return status;
1879 
1880 	/* Allow VFs to program VLAN filtering */
1881 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883 						  BE_PRIV_FILTMGMT, vf + 1);
1884 		if (!status) {
1885 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887 		}
1888 	}
1889 
1890 	dev_info(dev,
1891 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892 	return 0;
1893 }
1894 
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896 			  __be16 vlan_proto)
1897 {
1898 	struct be_adapter *adapter = netdev_priv(netdev);
1899 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900 	int status;
1901 
1902 	if (!sriov_enabled(adapter))
1903 		return -EPERM;
1904 
1905 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906 		return -EINVAL;
1907 
1908 	if (vlan_proto != htons(ETH_P_8021Q))
1909 		return -EPROTONOSUPPORT;
1910 
1911 	if (vlan || qos) {
1912 		vlan |= qos << VLAN_PRIO_SHIFT;
1913 		status = be_set_vf_tvt(adapter, vf, vlan);
1914 	} else {
1915 		status = be_clear_vf_tvt(adapter, vf);
1916 	}
1917 
1918 	if (status) {
1919 		dev_err(&adapter->pdev->dev,
1920 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921 			status);
1922 		return be_cmd_status(status);
1923 	}
1924 
1925 	vf_cfg->vlan_tag = vlan;
1926 	return 0;
1927 }
1928 
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930 			     int min_tx_rate, int max_tx_rate)
1931 {
1932 	struct be_adapter *adapter = netdev_priv(netdev);
1933 	struct device *dev = &adapter->pdev->dev;
1934 	int percent_rate, status = 0;
1935 	u16 link_speed = 0;
1936 	u8 link_status;
1937 
1938 	if (!sriov_enabled(adapter))
1939 		return -EPERM;
1940 
1941 	if (vf >= adapter->num_vfs)
1942 		return -EINVAL;
1943 
1944 	if (min_tx_rate)
1945 		return -EINVAL;
1946 
1947 	if (!max_tx_rate)
1948 		goto config_qos;
1949 
1950 	status = be_cmd_link_status_query(adapter, &link_speed,
1951 					  &link_status, 0);
1952 	if (status)
1953 		goto err;
1954 
1955 	if (!link_status) {
1956 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957 		status = -ENETDOWN;
1958 		goto err;
1959 	}
1960 
1961 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963 			link_speed);
1964 		status = -EINVAL;
1965 		goto err;
1966 	}
1967 
1968 	/* On Skyhawk the QOS setting must be done only as a % value */
1969 	percent_rate = link_speed / 100;
1970 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972 			percent_rate);
1973 		status = -EINVAL;
1974 		goto err;
1975 	}
1976 
1977 config_qos:
1978 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979 	if (status)
1980 		goto err;
1981 
1982 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983 	return 0;
1984 
1985 err:
1986 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987 		max_tx_rate, vf);
1988 	return be_cmd_status(status);
1989 }
1990 
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992 				int link_state)
1993 {
1994 	struct be_adapter *adapter = netdev_priv(netdev);
1995 	int status;
1996 
1997 	if (!sriov_enabled(adapter))
1998 		return -EPERM;
1999 
2000 	if (vf >= adapter->num_vfs)
2001 		return -EINVAL;
2002 
2003 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004 	if (status) {
2005 		dev_err(&adapter->pdev->dev,
2006 			"Link state change on VF %d failed: %#x\n", vf, status);
2007 		return be_cmd_status(status);
2008 	}
2009 
2010 	adapter->vf_cfg[vf].plink_tracking = link_state;
2011 
2012 	return 0;
2013 }
2014 
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017 	struct be_adapter *adapter = netdev_priv(netdev);
2018 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019 	u8 spoofchk;
2020 	int status;
2021 
2022 	if (!sriov_enabled(adapter))
2023 		return -EPERM;
2024 
2025 	if (vf >= adapter->num_vfs)
2026 		return -EINVAL;
2027 
2028 	if (BEx_chip(adapter))
2029 		return -EOPNOTSUPP;
2030 
2031 	if (enable == vf_cfg->spoofchk)
2032 		return 0;
2033 
2034 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035 
2036 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037 				       0, spoofchk);
2038 	if (status) {
2039 		dev_err(&adapter->pdev->dev,
2040 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2041 		return be_cmd_status(status);
2042 	}
2043 
2044 	vf_cfg->spoofchk = enable;
2045 	return 0;
2046 }
2047 
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049 			  ulong now)
2050 {
2051 	aic->rx_pkts_prev = rx_pkts;
2052 	aic->tx_reqs_prev = tx_pkts;
2053 	aic->jiffies = now;
2054 }
2055 
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058 	struct be_adapter *adapter = eqo->adapter;
2059 	int eqd, start;
2060 	struct be_aic_obj *aic;
2061 	struct be_rx_obj *rxo;
2062 	struct be_tx_obj *txo;
2063 	u64 rx_pkts = 0, tx_pkts = 0;
2064 	ulong now;
2065 	u32 pps, delta;
2066 	int i;
2067 
2068 	aic = &adapter->aic_obj[eqo->idx];
2069 	if (!aic->enable) {
2070 		if (aic->jiffies)
2071 			aic->jiffies = 0;
2072 		eqd = aic->et_eqd;
2073 		return eqd;
2074 	}
2075 
2076 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077 		do {
2078 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079 			rx_pkts += rxo->stats.rx_pkts;
2080 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081 	}
2082 
2083 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084 		do {
2085 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086 			tx_pkts += txo->stats.tx_reqs;
2087 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088 	}
2089 
2090 	/* Skip, if wrapped around or first calculation */
2091 	now = jiffies;
2092 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093 	    rx_pkts < aic->rx_pkts_prev ||
2094 	    tx_pkts < aic->tx_reqs_prev) {
2095 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2096 		return aic->prev_eqd;
2097 	}
2098 
2099 	delta = jiffies_to_msecs(now - aic->jiffies);
2100 	if (delta == 0)
2101 		return aic->prev_eqd;
2102 
2103 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105 	eqd = (pps / 15000) << 2;
2106 
2107 	if (eqd < 8)
2108 		eqd = 0;
2109 	eqd = min_t(u32, eqd, aic->max_eqd);
2110 	eqd = max_t(u32, eqd, aic->min_eqd);
2111 
2112 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2113 
2114 	return eqd;
2115 }
2116 
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120 	struct be_adapter *adapter = eqo->adapter;
2121 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122 	ulong now = jiffies;
2123 	int eqd;
2124 	u32 mult_enc;
2125 
2126 	if (!aic->enable)
2127 		return 0;
2128 
2129 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130 		eqd = aic->prev_eqd;
2131 	else
2132 		eqd = be_get_new_eqd(eqo);
2133 
2134 	if (eqd > 100)
2135 		mult_enc = R2I_DLY_ENC_1;
2136 	else if (eqd > 60)
2137 		mult_enc = R2I_DLY_ENC_2;
2138 	else if (eqd > 20)
2139 		mult_enc = R2I_DLY_ENC_3;
2140 	else
2141 		mult_enc = R2I_DLY_ENC_0;
2142 
2143 	aic->prev_eqd = eqd;
2144 
2145 	return mult_enc;
2146 }
2147 
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2151 	struct be_aic_obj *aic;
2152 	struct be_eq_obj *eqo;
2153 	int i, num = 0, eqd;
2154 
2155 	for_all_evt_queues(adapter, eqo, i) {
2156 		aic = &adapter->aic_obj[eqo->idx];
2157 		eqd = be_get_new_eqd(eqo);
2158 		if (force_update || eqd != aic->prev_eqd) {
2159 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160 			set_eqd[num].eq_id = eqo->q.id;
2161 			aic->prev_eqd = eqd;
2162 			num++;
2163 		}
2164 	}
2165 
2166 	if (num)
2167 		be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169 
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171 			       struct be_rx_compl_info *rxcp)
2172 {
2173 	struct be_rx_stats *stats = rx_stats(rxo);
2174 
2175 	u64_stats_update_begin(&stats->sync);
2176 	stats->rx_compl++;
2177 	stats->rx_bytes += rxcp->pkt_size;
2178 	stats->rx_pkts++;
2179 	if (rxcp->tunneled)
2180 		stats->rx_vxlan_offload_pkts++;
2181 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182 		stats->rx_mcast_pkts++;
2183 	if (rxcp->err)
2184 		stats->rx_compl_err++;
2185 	u64_stats_update_end(&stats->sync);
2186 }
2187 
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190 	/* L4 checksum is not reliable for non TCP/UDP packets.
2191 	 * Also ignore ipcksm for ipv6 pkts
2192 	 */
2193 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196 
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199 	struct be_adapter *adapter = rxo->adapter;
2200 	struct be_rx_page_info *rx_page_info;
2201 	struct be_queue_info *rxq = &rxo->q;
2202 	u32 frag_idx = rxq->tail;
2203 
2204 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2205 	BUG_ON(!rx_page_info->page);
2206 
2207 	if (rx_page_info->last_frag) {
2208 		dma_unmap_page(&adapter->pdev->dev,
2209 			       dma_unmap_addr(rx_page_info, bus),
2210 			       adapter->big_page_size, DMA_FROM_DEVICE);
2211 		rx_page_info->last_frag = false;
2212 	} else {
2213 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2214 					dma_unmap_addr(rx_page_info, bus),
2215 					rx_frag_size, DMA_FROM_DEVICE);
2216 	}
2217 
2218 	queue_tail_inc(rxq);
2219 	atomic_dec(&rxq->used);
2220 	return rx_page_info;
2221 }
2222 
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225 				struct be_rx_compl_info *rxcp)
2226 {
2227 	struct be_rx_page_info *page_info;
2228 	u16 i, num_rcvd = rxcp->num_rcvd;
2229 
2230 	for (i = 0; i < num_rcvd; i++) {
2231 		page_info = get_rx_page_info(rxo);
2232 		put_page(page_info->page);
2233 		memset(page_info, 0, sizeof(*page_info));
2234 	}
2235 }
2236 
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242 			     struct be_rx_compl_info *rxcp)
2243 {
2244 	struct be_rx_page_info *page_info;
2245 	u16 i, j;
2246 	u16 hdr_len, curr_frag_len, remaining;
2247 	u8 *start;
2248 
2249 	page_info = get_rx_page_info(rxo);
2250 	start = page_address(page_info->page) + page_info->page_offset;
2251 	prefetch(start);
2252 
2253 	/* Copy data in the first descriptor of this completion */
2254 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255 
2256 	skb->len = curr_frag_len;
2257 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258 		memcpy(skb->data, start, curr_frag_len);
2259 		/* Complete packet has now been moved to data */
2260 		put_page(page_info->page);
2261 		skb->data_len = 0;
2262 		skb->tail += curr_frag_len;
2263 	} else {
2264 		hdr_len = ETH_HLEN;
2265 		memcpy(skb->data, start, hdr_len);
2266 		skb_shinfo(skb)->nr_frags = 1;
2267 		skb_frag_set_page(skb, 0, page_info->page);
2268 		skb_shinfo(skb)->frags[0].page_offset =
2269 					page_info->page_offset + hdr_len;
2270 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271 				  curr_frag_len - hdr_len);
2272 		skb->data_len = curr_frag_len - hdr_len;
2273 		skb->truesize += rx_frag_size;
2274 		skb->tail += hdr_len;
2275 	}
2276 	page_info->page = NULL;
2277 
2278 	if (rxcp->pkt_size <= rx_frag_size) {
2279 		BUG_ON(rxcp->num_rcvd != 1);
2280 		return;
2281 	}
2282 
2283 	/* More frags present for this completion */
2284 	remaining = rxcp->pkt_size - curr_frag_len;
2285 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286 		page_info = get_rx_page_info(rxo);
2287 		curr_frag_len = min(remaining, rx_frag_size);
2288 
2289 		/* Coalesce all frags from the same physical page in one slot */
2290 		if (page_info->page_offset == 0) {
2291 			/* Fresh page */
2292 			j++;
2293 			skb_frag_set_page(skb, j, page_info->page);
2294 			skb_shinfo(skb)->frags[j].page_offset =
2295 							page_info->page_offset;
2296 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297 			skb_shinfo(skb)->nr_frags++;
2298 		} else {
2299 			put_page(page_info->page);
2300 		}
2301 
2302 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303 		skb->len += curr_frag_len;
2304 		skb->data_len += curr_frag_len;
2305 		skb->truesize += rx_frag_size;
2306 		remaining -= curr_frag_len;
2307 		page_info->page = NULL;
2308 	}
2309 	BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311 
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314 				struct be_rx_compl_info *rxcp)
2315 {
2316 	struct be_adapter *adapter = rxo->adapter;
2317 	struct net_device *netdev = adapter->netdev;
2318 	struct sk_buff *skb;
2319 
2320 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321 	if (unlikely(!skb)) {
2322 		rx_stats(rxo)->rx_drops_no_skbs++;
2323 		be_rx_compl_discard(rxo, rxcp);
2324 		return;
2325 	}
2326 
2327 	skb_fill_rx_data(rxo, skb, rxcp);
2328 
2329 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2331 	else
2332 		skb_checksum_none_assert(skb);
2333 
2334 	skb->protocol = eth_type_trans(skb, netdev);
2335 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336 	if (netdev->features & NETIF_F_RXHASH)
2337 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338 
2339 	skb->csum_level = rxcp->tunneled;
2340 	skb_mark_napi_id(skb, napi);
2341 
2342 	if (rxcp->vlanf)
2343 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344 
2345 	netif_receive_skb(skb);
2346 }
2347 
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350 				    struct napi_struct *napi,
2351 				    struct be_rx_compl_info *rxcp)
2352 {
2353 	struct be_adapter *adapter = rxo->adapter;
2354 	struct be_rx_page_info *page_info;
2355 	struct sk_buff *skb = NULL;
2356 	u16 remaining, curr_frag_len;
2357 	u16 i, j;
2358 
2359 	skb = napi_get_frags(napi);
2360 	if (!skb) {
2361 		be_rx_compl_discard(rxo, rxcp);
2362 		return;
2363 	}
2364 
2365 	remaining = rxcp->pkt_size;
2366 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367 		page_info = get_rx_page_info(rxo);
2368 
2369 		curr_frag_len = min(remaining, rx_frag_size);
2370 
2371 		/* Coalesce all frags from the same physical page in one slot */
2372 		if (i == 0 || page_info->page_offset == 0) {
2373 			/* First frag or Fresh page */
2374 			j++;
2375 			skb_frag_set_page(skb, j, page_info->page);
2376 			skb_shinfo(skb)->frags[j].page_offset =
2377 							page_info->page_offset;
2378 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379 		} else {
2380 			put_page(page_info->page);
2381 		}
2382 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383 		skb->truesize += rx_frag_size;
2384 		remaining -= curr_frag_len;
2385 		memset(page_info, 0, sizeof(*page_info));
2386 	}
2387 	BUG_ON(j > MAX_SKB_FRAGS);
2388 
2389 	skb_shinfo(skb)->nr_frags = j + 1;
2390 	skb->len = rxcp->pkt_size;
2391 	skb->data_len = rxcp->pkt_size;
2392 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2393 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394 	if (adapter->netdev->features & NETIF_F_RXHASH)
2395 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396 
2397 	skb->csum_level = rxcp->tunneled;
2398 
2399 	if (rxcp->vlanf)
2400 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401 
2402 	napi_gro_frags(napi);
2403 }
2404 
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406 				 struct be_rx_compl_info *rxcp)
2407 {
2408 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419 	if (rxcp->vlanf) {
2420 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422 	}
2423 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424 	rxcp->tunneled =
2425 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427 
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429 				 struct be_rx_compl_info *rxcp)
2430 {
2431 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442 	if (rxcp->vlanf) {
2443 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445 	}
2446 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449 
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454 	struct be_adapter *adapter = rxo->adapter;
2455 
2456 	/* For checking the valid bit it is Ok to use either definition as the
2457 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2458 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459 		return NULL;
2460 
2461 	rmb();
2462 	be_dws_le_to_cpu(compl, sizeof(*compl));
2463 
2464 	if (adapter->be3_native)
2465 		be_parse_rx_compl_v1(compl, rxcp);
2466 	else
2467 		be_parse_rx_compl_v0(compl, rxcp);
2468 
2469 	if (rxcp->ip_frag)
2470 		rxcp->l4_csum = 0;
2471 
2472 	if (rxcp->vlanf) {
2473 		/* In QNQ modes, if qnq bit is not set, then the packet was
2474 		 * tagged only with the transparent outer vlan-tag and must
2475 		 * not be treated as a vlan packet by host
2476 		 */
2477 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478 			rxcp->vlanf = 0;
2479 
2480 		if (!lancer_chip(adapter))
2481 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482 
2483 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2485 			rxcp->vlanf = 0;
2486 	}
2487 
2488 	/* As the compl has been parsed, reset it; we wont touch it again */
2489 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490 
2491 	queue_tail_inc(&rxo->cq);
2492 	return rxcp;
2493 }
2494 
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497 	u32 order = get_order(size);
2498 
2499 	if (order > 0)
2500 		gfp |= __GFP_COMP;
2501 	return  alloc_pages(gfp, order);
2502 }
2503 
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510 	struct be_adapter *adapter = rxo->adapter;
2511 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512 	struct be_queue_info *rxq = &rxo->q;
2513 	struct page *pagep = NULL;
2514 	struct device *dev = &adapter->pdev->dev;
2515 	struct be_eth_rx_d *rxd;
2516 	u64 page_dmaaddr = 0, frag_dmaaddr;
2517 	u32 posted, page_offset = 0, notify = 0;
2518 
2519 	page_info = &rxo->page_info_tbl[rxq->head];
2520 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521 		if (!pagep) {
2522 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523 			if (unlikely(!pagep)) {
2524 				rx_stats(rxo)->rx_post_fail++;
2525 				break;
2526 			}
2527 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2528 						    adapter->big_page_size,
2529 						    DMA_FROM_DEVICE);
2530 			if (dma_mapping_error(dev, page_dmaaddr)) {
2531 				put_page(pagep);
2532 				pagep = NULL;
2533 				adapter->drv_stats.dma_map_errors++;
2534 				break;
2535 			}
2536 			page_offset = 0;
2537 		} else {
2538 			get_page(pagep);
2539 			page_offset += rx_frag_size;
2540 		}
2541 		page_info->page_offset = page_offset;
2542 		page_info->page = pagep;
2543 
2544 		rxd = queue_head_node(rxq);
2545 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548 
2549 		/* Any space left in the current big page for another frag? */
2550 		if ((page_offset + rx_frag_size + rx_frag_size) >
2551 					adapter->big_page_size) {
2552 			pagep = NULL;
2553 			page_info->last_frag = true;
2554 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555 		} else {
2556 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557 		}
2558 
2559 		prev_page_info = page_info;
2560 		queue_head_inc(rxq);
2561 		page_info = &rxo->page_info_tbl[rxq->head];
2562 	}
2563 
2564 	/* Mark the last frag of a page when we break out of the above loop
2565 	 * with no more slots available in the RXQ
2566 	 */
2567 	if (pagep) {
2568 		prev_page_info->last_frag = true;
2569 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570 	}
2571 
2572 	if (posted) {
2573 		atomic_add(posted, &rxq->used);
2574 		if (rxo->rx_post_starved)
2575 			rxo->rx_post_starved = false;
2576 		do {
2577 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2578 			be_rxq_notify(adapter, rxq->id, notify);
2579 			posted -= notify;
2580 		} while (posted);
2581 	} else if (atomic_read(&rxq->used) == 0) {
2582 		/* Let be_worker replenish when memory is available */
2583 		rxo->rx_post_starved = true;
2584 	}
2585 }
2586 
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589 	struct be_queue_info *tx_cq = &txo->cq;
2590 	struct be_tx_compl_info *txcp = &txo->txcp;
2591 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592 
2593 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594 		return NULL;
2595 
2596 	/* Ensure load ordering of valid bit dword and other dwords below */
2597 	rmb();
2598 	be_dws_le_to_cpu(compl, sizeof(*compl));
2599 
2600 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2601 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602 
2603 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604 	queue_tail_inc(tx_cq);
2605 	return txcp;
2606 }
2607 
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609 			       struct be_tx_obj *txo, u16 last_index)
2610 {
2611 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2612 	struct be_queue_info *txq = &txo->q;
2613 	struct sk_buff *skb = NULL;
2614 	bool unmap_skb_hdr = false;
2615 	struct be_eth_wrb *wrb;
2616 	u16 num_wrbs = 0;
2617 	u32 frag_index;
2618 
2619 	do {
2620 		if (sent_skbs[txq->tail]) {
2621 			/* Free skb from prev req */
2622 			if (skb)
2623 				dev_consume_skb_any(skb);
2624 			skb = sent_skbs[txq->tail];
2625 			sent_skbs[txq->tail] = NULL;
2626 			queue_tail_inc(txq);  /* skip hdr wrb */
2627 			num_wrbs++;
2628 			unmap_skb_hdr = true;
2629 		}
2630 		wrb = queue_tail_node(txq);
2631 		frag_index = txq->tail;
2632 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2633 			      (unmap_skb_hdr && skb_headlen(skb)));
2634 		unmap_skb_hdr = false;
2635 		queue_tail_inc(txq);
2636 		num_wrbs++;
2637 	} while (frag_index != last_index);
2638 	dev_consume_skb_any(skb);
2639 
2640 	return num_wrbs;
2641 }
2642 
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646 	struct be_eq_entry *eqe;
2647 	int num = 0;
2648 
2649 	do {
2650 		eqe = queue_tail_node(&eqo->q);
2651 		if (eqe->evt == 0)
2652 			break;
2653 
2654 		rmb();
2655 		eqe->evt = 0;
2656 		num++;
2657 		queue_tail_inc(&eqo->q);
2658 	} while (true);
2659 
2660 	return num;
2661 }
2662 
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666 	int num = events_get(eqo);
2667 
2668 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670 
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674 	struct be_queue_info *rxq = &rxo->q;
2675 	struct be_rx_page_info *page_info;
2676 
2677 	while (atomic_read(&rxq->used) > 0) {
2678 		page_info = get_rx_page_info(rxo);
2679 		put_page(page_info->page);
2680 		memset(page_info, 0, sizeof(*page_info));
2681 	}
2682 	BUG_ON(atomic_read(&rxq->used));
2683 	rxq->tail = 0;
2684 	rxq->head = 0;
2685 }
2686 
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689 	struct be_queue_info *rx_cq = &rxo->cq;
2690 	struct be_rx_compl_info *rxcp;
2691 	struct be_adapter *adapter = rxo->adapter;
2692 	int flush_wait = 0;
2693 
2694 	/* Consume pending rx completions.
2695 	 * Wait for the flush completion (identified by zero num_rcvd)
2696 	 * to arrive. Notify CQ even when there are no more CQ entries
2697 	 * for HW to flush partially coalesced CQ entries.
2698 	 * In Lancer, there is no need to wait for flush compl.
2699 	 */
2700 	for (;;) {
2701 		rxcp = be_rx_compl_get(rxo);
2702 		if (!rxcp) {
2703 			if (lancer_chip(adapter))
2704 				break;
2705 
2706 			if (flush_wait++ > 50 ||
2707 			    be_check_error(adapter,
2708 					   BE_ERROR_HW)) {
2709 				dev_warn(&adapter->pdev->dev,
2710 					 "did not receive flush compl\n");
2711 				break;
2712 			}
2713 			be_cq_notify(adapter, rx_cq->id, true, 0);
2714 			mdelay(1);
2715 		} else {
2716 			be_rx_compl_discard(rxo, rxcp);
2717 			be_cq_notify(adapter, rx_cq->id, false, 1);
2718 			if (rxcp->num_rcvd == 0)
2719 				break;
2720 		}
2721 	}
2722 
2723 	/* After cleanup, leave the CQ in unarmed state */
2724 	be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726 
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729 	struct device *dev = &adapter->pdev->dev;
2730 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731 	struct be_tx_compl_info *txcp;
2732 	struct be_queue_info *txq;
2733 	u32 end_idx, notified_idx;
2734 	struct be_tx_obj *txo;
2735 	int i, pending_txqs;
2736 
2737 	/* Stop polling for compls when HW has been silent for 10ms */
2738 	do {
2739 		pending_txqs = adapter->num_tx_qs;
2740 
2741 		for_all_tx_queues(adapter, txo, i) {
2742 			cmpl = 0;
2743 			num_wrbs = 0;
2744 			txq = &txo->q;
2745 			while ((txcp = be_tx_compl_get(txo))) {
2746 				num_wrbs +=
2747 					be_tx_compl_process(adapter, txo,
2748 							    txcp->end_index);
2749 				cmpl++;
2750 			}
2751 			if (cmpl) {
2752 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753 				atomic_sub(num_wrbs, &txq->used);
2754 				timeo = 0;
2755 			}
2756 			if (!be_is_tx_compl_pending(txo))
2757 				pending_txqs--;
2758 		}
2759 
2760 		if (pending_txqs == 0 || ++timeo > 10 ||
2761 		    be_check_error(adapter, BE_ERROR_HW))
2762 			break;
2763 
2764 		mdelay(1);
2765 	} while (true);
2766 
2767 	/* Free enqueued TX that was never notified to HW */
2768 	for_all_tx_queues(adapter, txo, i) {
2769 		txq = &txo->q;
2770 
2771 		if (atomic_read(&txq->used)) {
2772 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773 				 i, atomic_read(&txq->used));
2774 			notified_idx = txq->tail;
2775 			end_idx = txq->tail;
2776 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777 				  txq->len);
2778 			/* Use the tx-compl process logic to handle requests
2779 			 * that were not sent to the HW.
2780 			 */
2781 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782 			atomic_sub(num_wrbs, &txq->used);
2783 			BUG_ON(atomic_read(&txq->used));
2784 			txo->pend_wrb_cnt = 0;
2785 			/* Since hw was never notified of these requests,
2786 			 * reset TXQ indices
2787 			 */
2788 			txq->head = notified_idx;
2789 			txq->tail = notified_idx;
2790 		}
2791 	}
2792 }
2793 
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796 	struct be_eq_obj *eqo;
2797 	int i;
2798 
2799 	for_all_evt_queues(adapter, eqo, i) {
2800 		if (eqo->q.created) {
2801 			be_eq_clean(eqo);
2802 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803 			netif_napi_del(&eqo->napi);
2804 			free_cpumask_var(eqo->affinity_mask);
2805 		}
2806 		be_queue_free(adapter, &eqo->q);
2807 	}
2808 }
2809 
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812 	struct be_queue_info *eq;
2813 	struct be_eq_obj *eqo;
2814 	struct be_aic_obj *aic;
2815 	int i, rc;
2816 
2817 	/* need enough EQs to service both RX and TX queues */
2818 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819 				    max(adapter->cfg_num_rx_irqs,
2820 					adapter->cfg_num_tx_irqs));
2821 
2822 	for_all_evt_queues(adapter, eqo, i) {
2823 		int numa_node = dev_to_node(&adapter->pdev->dev);
2824 
2825 		aic = &adapter->aic_obj[i];
2826 		eqo->adapter = adapter;
2827 		eqo->idx = i;
2828 		aic->max_eqd = BE_MAX_EQD;
2829 		aic->enable = true;
2830 
2831 		eq = &eqo->q;
2832 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833 				    sizeof(struct be_eq_entry));
2834 		if (rc)
2835 			return rc;
2836 
2837 		rc = be_cmd_eq_create(adapter, eqo);
2838 		if (rc)
2839 			return rc;
2840 
2841 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842 			return -ENOMEM;
2843 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844 				eqo->affinity_mask);
2845 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846 			       BE_NAPI_WEIGHT);
2847 	}
2848 	return 0;
2849 }
2850 
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853 	struct be_queue_info *q;
2854 
2855 	q = &adapter->mcc_obj.q;
2856 	if (q->created)
2857 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858 	be_queue_free(adapter, q);
2859 
2860 	q = &adapter->mcc_obj.cq;
2861 	if (q->created)
2862 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863 	be_queue_free(adapter, q);
2864 }
2865 
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869 	struct be_queue_info *q, *cq;
2870 
2871 	cq = &adapter->mcc_obj.cq;
2872 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873 			   sizeof(struct be_mcc_compl)))
2874 		goto err;
2875 
2876 	/* Use the default EQ for MCC completions */
2877 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878 		goto mcc_cq_free;
2879 
2880 	q = &adapter->mcc_obj.q;
2881 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882 		goto mcc_cq_destroy;
2883 
2884 	if (be_cmd_mccq_create(adapter, q, cq))
2885 		goto mcc_q_free;
2886 
2887 	return 0;
2888 
2889 mcc_q_free:
2890 	be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894 	be_queue_free(adapter, cq);
2895 err:
2896 	return -1;
2897 }
2898 
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901 	struct be_queue_info *q;
2902 	struct be_tx_obj *txo;
2903 	u8 i;
2904 
2905 	for_all_tx_queues(adapter, txo, i) {
2906 		q = &txo->q;
2907 		if (q->created)
2908 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909 		be_queue_free(adapter, q);
2910 
2911 		q = &txo->cq;
2912 		if (q->created)
2913 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914 		be_queue_free(adapter, q);
2915 	}
2916 }
2917 
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920 	struct be_queue_info *cq;
2921 	struct be_tx_obj *txo;
2922 	struct be_eq_obj *eqo;
2923 	int status, i;
2924 
2925 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926 
2927 	for_all_tx_queues(adapter, txo, i) {
2928 		cq = &txo->cq;
2929 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930 					sizeof(struct be_eth_tx_compl));
2931 		if (status)
2932 			return status;
2933 
2934 		u64_stats_init(&txo->stats.sync);
2935 		u64_stats_init(&txo->stats.sync_compl);
2936 
2937 		/* If num_evt_qs is less than num_tx_qs, then more than
2938 		 * one txq share an eq
2939 		 */
2940 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942 		if (status)
2943 			return status;
2944 
2945 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946 					sizeof(struct be_eth_wrb));
2947 		if (status)
2948 			return status;
2949 
2950 		status = be_cmd_txq_create(adapter, txo);
2951 		if (status)
2952 			return status;
2953 
2954 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955 				    eqo->idx);
2956 	}
2957 
2958 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959 		 adapter->num_tx_qs);
2960 	return 0;
2961 }
2962 
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965 	struct be_queue_info *q;
2966 	struct be_rx_obj *rxo;
2967 	int i;
2968 
2969 	for_all_rx_queues(adapter, rxo, i) {
2970 		q = &rxo->cq;
2971 		if (q->created)
2972 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973 		be_queue_free(adapter, q);
2974 	}
2975 }
2976 
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979 	struct be_queue_info *eq, *cq;
2980 	struct be_rx_obj *rxo;
2981 	int rc, i;
2982 
2983 	adapter->num_rss_qs =
2984 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985 
2986 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2987 	if (adapter->num_rss_qs < 2)
2988 		adapter->num_rss_qs = 0;
2989 
2990 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991 
2992 	/* When the interface is not capable of RSS rings (and there is no
2993 	 * need to create a default RXQ) we'll still need one RXQ
2994 	 */
2995 	if (adapter->num_rx_qs == 0)
2996 		adapter->num_rx_qs = 1;
2997 
2998 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999 	for_all_rx_queues(adapter, rxo, i) {
3000 		rxo->adapter = adapter;
3001 		cq = &rxo->cq;
3002 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003 				    sizeof(struct be_eth_rx_compl));
3004 		if (rc)
3005 			return rc;
3006 
3007 		u64_stats_init(&rxo->stats.sync);
3008 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010 		if (rc)
3011 			return rc;
3012 	}
3013 
3014 	dev_info(&adapter->pdev->dev,
3015 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3016 	return 0;
3017 }
3018 
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021 	struct be_eq_obj *eqo = dev;
3022 	struct be_adapter *adapter = eqo->adapter;
3023 	int num_evts = 0;
3024 
3025 	/* IRQ is not expected when NAPI is scheduled as the EQ
3026 	 * will not be armed.
3027 	 * But, this can happen on Lancer INTx where it takes
3028 	 * a while to de-assert INTx or in BE2 where occasionaly
3029 	 * an interrupt may be raised even when EQ is unarmed.
3030 	 * If NAPI is already scheduled, then counting & notifying
3031 	 * events will orphan them.
3032 	 */
3033 	if (napi_schedule_prep(&eqo->napi)) {
3034 		num_evts = events_get(eqo);
3035 		__napi_schedule(&eqo->napi);
3036 		if (num_evts)
3037 			eqo->spurious_intr = 0;
3038 	}
3039 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040 
3041 	/* Return IRQ_HANDLED only for the the first spurious intr
3042 	 * after a valid intr to stop the kernel from branding
3043 	 * this irq as a bad one!
3044 	 */
3045 	if (num_evts || eqo->spurious_intr++ == 0)
3046 		return IRQ_HANDLED;
3047 	else
3048 		return IRQ_NONE;
3049 }
3050 
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053 	struct be_eq_obj *eqo = dev;
3054 
3055 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056 	napi_schedule(&eqo->napi);
3057 	return IRQ_HANDLED;
3058 }
3059 
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064 
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066 			 int budget)
3067 {
3068 	struct be_adapter *adapter = rxo->adapter;
3069 	struct be_queue_info *rx_cq = &rxo->cq;
3070 	struct be_rx_compl_info *rxcp;
3071 	u32 work_done;
3072 	u32 frags_consumed = 0;
3073 
3074 	for (work_done = 0; work_done < budget; work_done++) {
3075 		rxcp = be_rx_compl_get(rxo);
3076 		if (!rxcp)
3077 			break;
3078 
3079 		/* Is it a flush compl that has no data */
3080 		if (unlikely(rxcp->num_rcvd == 0))
3081 			goto loop_continue;
3082 
3083 		/* Discard compl with partial DMA Lancer B0 */
3084 		if (unlikely(!rxcp->pkt_size)) {
3085 			be_rx_compl_discard(rxo, rxcp);
3086 			goto loop_continue;
3087 		}
3088 
3089 		/* On BE drop pkts that arrive due to imperfect filtering in
3090 		 * promiscuous mode on some skews
3091 		 */
3092 		if (unlikely(rxcp->port != adapter->port_num &&
3093 			     !lancer_chip(adapter))) {
3094 			be_rx_compl_discard(rxo, rxcp);
3095 			goto loop_continue;
3096 		}
3097 
3098 		if (do_gro(rxcp))
3099 			be_rx_compl_process_gro(rxo, napi, rxcp);
3100 		else
3101 			be_rx_compl_process(rxo, napi, rxcp);
3102 
3103 loop_continue:
3104 		frags_consumed += rxcp->num_rcvd;
3105 		be_rx_stats_update(rxo, rxcp);
3106 	}
3107 
3108 	if (work_done) {
3109 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3110 
3111 		/* When an rx-obj gets into post_starved state, just
3112 		 * let be_worker do the posting.
3113 		 */
3114 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115 		    !rxo->rx_post_starved)
3116 			be_post_rx_frags(rxo, GFP_ATOMIC,
3117 					 max_t(u32, MAX_RX_POST,
3118 					       frags_consumed));
3119 	}
3120 
3121 	return work_done;
3122 }
3123 
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126 	switch (status) {
3127 	case BE_TX_COMP_HDR_PARSE_ERR:
3128 		tx_stats(txo)->tx_hdr_parse_err++;
3129 		break;
3130 	case BE_TX_COMP_NDMA_ERR:
3131 		tx_stats(txo)->tx_dma_err++;
3132 		break;
3133 	case BE_TX_COMP_ACL_ERR:
3134 		tx_stats(txo)->tx_spoof_check_err++;
3135 		break;
3136 	}
3137 }
3138 
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141 	switch (status) {
3142 	case LANCER_TX_COMP_LSO_ERR:
3143 		tx_stats(txo)->tx_tso_err++;
3144 		break;
3145 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147 		tx_stats(txo)->tx_spoof_check_err++;
3148 		break;
3149 	case LANCER_TX_COMP_QINQ_ERR:
3150 		tx_stats(txo)->tx_qinq_err++;
3151 		break;
3152 	case LANCER_TX_COMP_PARITY_ERR:
3153 		tx_stats(txo)->tx_internal_parity_err++;
3154 		break;
3155 	case LANCER_TX_COMP_DMA_ERR:
3156 		tx_stats(txo)->tx_dma_err++;
3157 		break;
3158 	}
3159 }
3160 
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162 			  int idx)
3163 {
3164 	int num_wrbs = 0, work_done = 0;
3165 	struct be_tx_compl_info *txcp;
3166 
3167 	while ((txcp = be_tx_compl_get(txo))) {
3168 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169 		work_done++;
3170 
3171 		if (txcp->status) {
3172 			if (lancer_chip(adapter))
3173 				lancer_update_tx_err(txo, txcp->status);
3174 			else
3175 				be_update_tx_err(txo, txcp->status);
3176 		}
3177 	}
3178 
3179 	if (work_done) {
3180 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3181 		atomic_sub(num_wrbs, &txo->q.used);
3182 
3183 		/* As Tx wrbs have been freed up, wake up netdev queue
3184 		 * if it was stopped due to lack of tx wrbs.  */
3185 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186 		    be_can_txq_wake(txo)) {
3187 			netif_wake_subqueue(adapter->netdev, idx);
3188 		}
3189 
3190 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191 		tx_stats(txo)->tx_compl += work_done;
3192 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193 	}
3194 }
3195 
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199 	struct be_adapter *adapter = eqo->adapter;
3200 	int max_work = 0, work, i, num_evts;
3201 	struct be_rx_obj *rxo;
3202 	struct be_tx_obj *txo;
3203 	u32 mult_enc = 0;
3204 
3205 	num_evts = events_get(eqo);
3206 
3207 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208 		be_process_tx(adapter, txo, i);
3209 
3210 	/* This loop will iterate twice for EQ0 in which
3211 	 * completions of the last RXQ (default one) are also processed
3212 	 * For other EQs the loop iterates only once
3213 	 */
3214 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215 		work = be_process_rx(rxo, napi, budget);
3216 		max_work = max(work, max_work);
3217 	}
3218 
3219 	if (is_mcc_eqo(eqo))
3220 		be_process_mcc(adapter);
3221 
3222 	if (max_work < budget) {
3223 		napi_complete_done(napi, max_work);
3224 
3225 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226 		 * delay via a delay multiplier encoding value
3227 		 */
3228 		if (skyhawk_chip(adapter))
3229 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3230 
3231 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232 			     mult_enc);
3233 	} else {
3234 		/* As we'll continue in polling mode, count and clear events */
3235 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236 	}
3237 	return max_work;
3238 }
3239 
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244 	u32 i;
3245 	struct device *dev = &adapter->pdev->dev;
3246 
3247 	if (be_check_error(adapter, BE_ERROR_HW))
3248 		return;
3249 
3250 	if (lancer_chip(adapter)) {
3251 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3252 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3253 			be_set_error(adapter, BE_ERROR_UE);
3254 			sliport_err1 = ioread32(adapter->db +
3255 						SLIPORT_ERROR1_OFFSET);
3256 			sliport_err2 = ioread32(adapter->db +
3257 						SLIPORT_ERROR2_OFFSET);
3258 			/* Do not log error messages if its a FW reset */
3259 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3260 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3261 				dev_info(dev, "Firmware update in progress\n");
3262 			} else {
3263 				dev_err(dev, "Error detected in the card\n");
3264 				dev_err(dev, "ERR: sliport status 0x%x\n",
3265 					sliport_status);
3266 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3267 					sliport_err1);
3268 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3269 					sliport_err2);
3270 			}
3271 		}
3272 	} else {
3273 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3274 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3275 		ue_lo_mask = ioread32(adapter->pcicfg +
3276 				      PCICFG_UE_STATUS_LOW_MASK);
3277 		ue_hi_mask = ioread32(adapter->pcicfg +
3278 				      PCICFG_UE_STATUS_HI_MASK);
3279 
3280 		ue_lo = (ue_lo & ~ue_lo_mask);
3281 		ue_hi = (ue_hi & ~ue_hi_mask);
3282 
3283 		/* On certain platforms BE hardware can indicate spurious UEs.
3284 		 * Allow HW to stop working completely in case of a real UE.
3285 		 * Hence not setting the hw_error for UE detection.
3286 		 */
3287 
3288 		if (ue_lo || ue_hi) {
3289 			dev_err(dev, "Error detected in the adapter");
3290 			if (skyhawk_chip(adapter))
3291 				be_set_error(adapter, BE_ERROR_UE);
3292 
3293 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3294 				if (ue_lo & 1)
3295 					dev_err(dev, "UE: %s bit set\n",
3296 						ue_status_low_desc[i]);
3297 			}
3298 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3299 				if (ue_hi & 1)
3300 					dev_err(dev, "UE: %s bit set\n",
3301 						ue_status_hi_desc[i]);
3302 			}
3303 		}
3304 	}
3305 }
3306 
3307 static void be_msix_disable(struct be_adapter *adapter)
3308 {
3309 	if (msix_enabled(adapter)) {
3310 		pci_disable_msix(adapter->pdev);
3311 		adapter->num_msix_vec = 0;
3312 		adapter->num_msix_roce_vec = 0;
3313 	}
3314 }
3315 
3316 static int be_msix_enable(struct be_adapter *adapter)
3317 {
3318 	unsigned int i, max_roce_eqs;
3319 	struct device *dev = &adapter->pdev->dev;
3320 	int num_vec;
3321 
3322 	/* If RoCE is supported, program the max number of vectors that
3323 	 * could be used for NIC and RoCE, else, just program the number
3324 	 * we'll use initially.
3325 	 */
3326 	if (be_roce_supported(adapter)) {
3327 		max_roce_eqs =
3328 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3329 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3330 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3331 	} else {
3332 		num_vec = max(adapter->cfg_num_rx_irqs,
3333 			      adapter->cfg_num_tx_irqs);
3334 	}
3335 
3336 	for (i = 0; i < num_vec; i++)
3337 		adapter->msix_entries[i].entry = i;
3338 
3339 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3340 					MIN_MSIX_VECTORS, num_vec);
3341 	if (num_vec < 0)
3342 		goto fail;
3343 
3344 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3345 		adapter->num_msix_roce_vec = num_vec / 2;
3346 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3347 			 adapter->num_msix_roce_vec);
3348 	}
3349 
3350 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3351 
3352 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3353 		 adapter->num_msix_vec);
3354 	return 0;
3355 
3356 fail:
3357 	dev_warn(dev, "MSIx enable failed\n");
3358 
3359 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3360 	if (be_virtfn(adapter))
3361 		return num_vec;
3362 	return 0;
3363 }
3364 
3365 static inline int be_msix_vec_get(struct be_adapter *adapter,
3366 				  struct be_eq_obj *eqo)
3367 {
3368 	return adapter->msix_entries[eqo->msix_idx].vector;
3369 }
3370 
3371 static int be_msix_register(struct be_adapter *adapter)
3372 {
3373 	struct net_device *netdev = adapter->netdev;
3374 	struct be_eq_obj *eqo;
3375 	int status, i, vec;
3376 
3377 	for_all_evt_queues(adapter, eqo, i) {
3378 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3379 		vec = be_msix_vec_get(adapter, eqo);
3380 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3381 		if (status)
3382 			goto err_msix;
3383 
3384 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3385 	}
3386 
3387 	return 0;
3388 err_msix:
3389 	for (i--; i >= 0; i--) {
3390 		eqo = &adapter->eq_obj[i];
3391 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3392 	}
3393 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3394 		 status);
3395 	be_msix_disable(adapter);
3396 	return status;
3397 }
3398 
3399 static int be_irq_register(struct be_adapter *adapter)
3400 {
3401 	struct net_device *netdev = adapter->netdev;
3402 	int status;
3403 
3404 	if (msix_enabled(adapter)) {
3405 		status = be_msix_register(adapter);
3406 		if (status == 0)
3407 			goto done;
3408 		/* INTx is not supported for VF */
3409 		if (be_virtfn(adapter))
3410 			return status;
3411 	}
3412 
3413 	/* INTx: only the first EQ is used */
3414 	netdev->irq = adapter->pdev->irq;
3415 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3416 			     &adapter->eq_obj[0]);
3417 	if (status) {
3418 		dev_err(&adapter->pdev->dev,
3419 			"INTx request IRQ failed - err %d\n", status);
3420 		return status;
3421 	}
3422 done:
3423 	adapter->isr_registered = true;
3424 	return 0;
3425 }
3426 
3427 static void be_irq_unregister(struct be_adapter *adapter)
3428 {
3429 	struct net_device *netdev = adapter->netdev;
3430 	struct be_eq_obj *eqo;
3431 	int i, vec;
3432 
3433 	if (!adapter->isr_registered)
3434 		return;
3435 
3436 	/* INTx */
3437 	if (!msix_enabled(adapter)) {
3438 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3439 		goto done;
3440 	}
3441 
3442 	/* MSIx */
3443 	for_all_evt_queues(adapter, eqo, i) {
3444 		vec = be_msix_vec_get(adapter, eqo);
3445 		irq_set_affinity_hint(vec, NULL);
3446 		free_irq(vec, eqo);
3447 	}
3448 
3449 done:
3450 	adapter->isr_registered = false;
3451 }
3452 
3453 static void be_rx_qs_destroy(struct be_adapter *adapter)
3454 {
3455 	struct rss_info *rss = &adapter->rss_info;
3456 	struct be_queue_info *q;
3457 	struct be_rx_obj *rxo;
3458 	int i;
3459 
3460 	for_all_rx_queues(adapter, rxo, i) {
3461 		q = &rxo->q;
3462 		if (q->created) {
3463 			/* If RXQs are destroyed while in an "out of buffer"
3464 			 * state, there is a possibility of an HW stall on
3465 			 * Lancer. So, post 64 buffers to each queue to relieve
3466 			 * the "out of buffer" condition.
3467 			 * Make sure there's space in the RXQ before posting.
3468 			 */
3469 			if (lancer_chip(adapter)) {
3470 				be_rx_cq_clean(rxo);
3471 				if (atomic_read(&q->used) == 0)
3472 					be_post_rx_frags(rxo, GFP_KERNEL,
3473 							 MAX_RX_POST);
3474 			}
3475 
3476 			be_cmd_rxq_destroy(adapter, q);
3477 			be_rx_cq_clean(rxo);
3478 			be_rxq_clean(rxo);
3479 		}
3480 		be_queue_free(adapter, q);
3481 	}
3482 
3483 	if (rss->rss_flags) {
3484 		rss->rss_flags = RSS_ENABLE_NONE;
3485 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3486 				  128, rss->rss_hkey);
3487 	}
3488 }
3489 
3490 static void be_disable_if_filters(struct be_adapter *adapter)
3491 {
3492 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3493 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3494 	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3495 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3496 		eth_zero_addr(adapter->dev_mac);
3497 	}
3498 
3499 	be_clear_uc_list(adapter);
3500 	be_clear_mc_list(adapter);
3501 
3502 	/* The IFACE flags are enabled in the open path and cleared
3503 	 * in the close path. When a VF gets detached from the host and
3504 	 * assigned to a VM the following happens:
3505 	 *	- VF's IFACE flags get cleared in the detach path
3506 	 *	- IFACE create is issued by the VF in the attach path
3507 	 * Due to a bug in the BE3/Skyhawk-R FW
3508 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3509 	 * specified along with the IFACE create cmd issued by a VF are not
3510 	 * honoured by FW.  As a consequence, if a *new* driver
3511 	 * (that enables/disables IFACE flags in open/close)
3512 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3513 	 * the IFACE gets created *without* the needed flags.
3514 	 * To avoid this, disable RX-filter flags only for Lancer.
3515 	 */
3516 	if (lancer_chip(adapter)) {
3517 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3518 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3519 	}
3520 }
3521 
3522 static int be_close(struct net_device *netdev)
3523 {
3524 	struct be_adapter *adapter = netdev_priv(netdev);
3525 	struct be_eq_obj *eqo;
3526 	int i;
3527 
3528 	/* This protection is needed as be_close() may be called even when the
3529 	 * adapter is in cleared state (after eeh perm failure)
3530 	 */
3531 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3532 		return 0;
3533 
3534 	/* Before attempting cleanup ensure all the pending cmds in the
3535 	 * config_wq have finished execution
3536 	 */
3537 	flush_workqueue(be_wq);
3538 
3539 	be_disable_if_filters(adapter);
3540 
3541 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3542 		for_all_evt_queues(adapter, eqo, i) {
3543 			napi_disable(&eqo->napi);
3544 		}
3545 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3546 	}
3547 
3548 	be_async_mcc_disable(adapter);
3549 
3550 	/* Wait for all pending tx completions to arrive so that
3551 	 * all tx skbs are freed.
3552 	 */
3553 	netif_tx_disable(netdev);
3554 	be_tx_compl_clean(adapter);
3555 
3556 	be_rx_qs_destroy(adapter);
3557 
3558 	for_all_evt_queues(adapter, eqo, i) {
3559 		if (msix_enabled(adapter))
3560 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3561 		else
3562 			synchronize_irq(netdev->irq);
3563 		be_eq_clean(eqo);
3564 	}
3565 
3566 	be_irq_unregister(adapter);
3567 
3568 	return 0;
3569 }
3570 
3571 static int be_rx_qs_create(struct be_adapter *adapter)
3572 {
3573 	struct rss_info *rss = &adapter->rss_info;
3574 	u8 rss_key[RSS_HASH_KEY_LEN];
3575 	struct be_rx_obj *rxo;
3576 	int rc, i, j;
3577 
3578 	for_all_rx_queues(adapter, rxo, i) {
3579 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3580 				    sizeof(struct be_eth_rx_d));
3581 		if (rc)
3582 			return rc;
3583 	}
3584 
3585 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3586 		rxo = default_rxo(adapter);
3587 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3588 				       rx_frag_size, adapter->if_handle,
3589 				       false, &rxo->rss_id);
3590 		if (rc)
3591 			return rc;
3592 	}
3593 
3594 	for_all_rss_queues(adapter, rxo, i) {
3595 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3596 				       rx_frag_size, adapter->if_handle,
3597 				       true, &rxo->rss_id);
3598 		if (rc)
3599 			return rc;
3600 	}
3601 
3602 	if (be_multi_rxq(adapter)) {
3603 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3604 			for_all_rss_queues(adapter, rxo, i) {
3605 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3606 					break;
3607 				rss->rsstable[j + i] = rxo->rss_id;
3608 				rss->rss_queue[j + i] = i;
3609 			}
3610 		}
3611 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3612 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3613 
3614 		if (!BEx_chip(adapter))
3615 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3616 				RSS_ENABLE_UDP_IPV6;
3617 
3618 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3619 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3620 				       RSS_INDIR_TABLE_LEN, rss_key);
3621 		if (rc) {
3622 			rss->rss_flags = RSS_ENABLE_NONE;
3623 			return rc;
3624 		}
3625 
3626 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3627 	} else {
3628 		/* Disable RSS, if only default RX Q is created */
3629 		rss->rss_flags = RSS_ENABLE_NONE;
3630 	}
3631 
3632 
3633 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3634 	 * which is a queue empty condition
3635 	 */
3636 	for_all_rx_queues(adapter, rxo, i)
3637 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3638 
3639 	return 0;
3640 }
3641 
3642 static int be_enable_if_filters(struct be_adapter *adapter)
3643 {
3644 	int status;
3645 
3646 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3647 	if (status)
3648 		return status;
3649 
3650 	/* Normally this condition usually true as the ->dev_mac is zeroed.
3651 	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3652 	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3653 	 */
3654 	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3655 		int old_pmac_id = -1;
3656 
3657 		/* Remember old programmed MAC if any - can happen on BE3 VF */
3658 		if (!is_zero_ether_addr(adapter->dev_mac))
3659 			old_pmac_id = adapter->pmac_id[0];
3660 
3661 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3662 		if (status)
3663 			return status;
3664 
3665 		/* Delete the old programmed MAC as we successfully programmed
3666 		 * a new MAC
3667 		 */
3668 		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3669 			be_dev_mac_del(adapter, old_pmac_id);
3670 
3671 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3672 	}
3673 
3674 	if (adapter->vlans_added)
3675 		be_vid_config(adapter);
3676 
3677 	__be_set_rx_mode(adapter);
3678 
3679 	return 0;
3680 }
3681 
3682 static int be_open(struct net_device *netdev)
3683 {
3684 	struct be_adapter *adapter = netdev_priv(netdev);
3685 	struct be_eq_obj *eqo;
3686 	struct be_rx_obj *rxo;
3687 	struct be_tx_obj *txo;
3688 	u8 link_status;
3689 	int status, i;
3690 
3691 	status = be_rx_qs_create(adapter);
3692 	if (status)
3693 		goto err;
3694 
3695 	status = be_enable_if_filters(adapter);
3696 	if (status)
3697 		goto err;
3698 
3699 	status = be_irq_register(adapter);
3700 	if (status)
3701 		goto err;
3702 
3703 	for_all_rx_queues(adapter, rxo, i)
3704 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3705 
3706 	for_all_tx_queues(adapter, txo, i)
3707 		be_cq_notify(adapter, txo->cq.id, true, 0);
3708 
3709 	be_async_mcc_enable(adapter);
3710 
3711 	for_all_evt_queues(adapter, eqo, i) {
3712 		napi_enable(&eqo->napi);
3713 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3714 	}
3715 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3716 
3717 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3718 	if (!status)
3719 		be_link_status_update(adapter, link_status);
3720 
3721 	netif_tx_start_all_queues(netdev);
3722 	if (skyhawk_chip(adapter))
3723 		udp_tunnel_get_rx_info(netdev);
3724 
3725 	return 0;
3726 err:
3727 	be_close(adapter->netdev);
3728 	return -EIO;
3729 }
3730 
3731 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3732 {
3733 	u32 addr;
3734 
3735 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3736 
3737 	mac[5] = (u8)(addr & 0xFF);
3738 	mac[4] = (u8)((addr >> 8) & 0xFF);
3739 	mac[3] = (u8)((addr >> 16) & 0xFF);
3740 	/* Use the OUI from the current MAC address */
3741 	memcpy(mac, adapter->netdev->dev_addr, 3);
3742 }
3743 
3744 /*
3745  * Generate a seed MAC address from the PF MAC Address using jhash.
3746  * MAC Address for VFs are assigned incrementally starting from the seed.
3747  * These addresses are programmed in the ASIC by the PF and the VF driver
3748  * queries for the MAC address during its probe.
3749  */
3750 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3751 {
3752 	u32 vf;
3753 	int status = 0;
3754 	u8 mac[ETH_ALEN];
3755 	struct be_vf_cfg *vf_cfg;
3756 
3757 	be_vf_eth_addr_generate(adapter, mac);
3758 
3759 	for_all_vfs(adapter, vf_cfg, vf) {
3760 		if (BEx_chip(adapter))
3761 			status = be_cmd_pmac_add(adapter, mac,
3762 						 vf_cfg->if_handle,
3763 						 &vf_cfg->pmac_id, vf + 1);
3764 		else
3765 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3766 						vf + 1);
3767 
3768 		if (status)
3769 			dev_err(&adapter->pdev->dev,
3770 				"Mac address assignment failed for VF %d\n",
3771 				vf);
3772 		else
3773 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3774 
3775 		mac[5] += 1;
3776 	}
3777 	return status;
3778 }
3779 
3780 static int be_vfs_mac_query(struct be_adapter *adapter)
3781 {
3782 	int status, vf;
3783 	u8 mac[ETH_ALEN];
3784 	struct be_vf_cfg *vf_cfg;
3785 
3786 	for_all_vfs(adapter, vf_cfg, vf) {
3787 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3788 					       mac, vf_cfg->if_handle,
3789 					       false, vf+1);
3790 		if (status)
3791 			return status;
3792 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3793 	}
3794 	return 0;
3795 }
3796 
3797 static void be_vf_clear(struct be_adapter *adapter)
3798 {
3799 	struct be_vf_cfg *vf_cfg;
3800 	u32 vf;
3801 
3802 	if (pci_vfs_assigned(adapter->pdev)) {
3803 		dev_warn(&adapter->pdev->dev,
3804 			 "VFs are assigned to VMs: not disabling VFs\n");
3805 		goto done;
3806 	}
3807 
3808 	pci_disable_sriov(adapter->pdev);
3809 
3810 	for_all_vfs(adapter, vf_cfg, vf) {
3811 		if (BEx_chip(adapter))
3812 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3813 					vf_cfg->pmac_id, vf + 1);
3814 		else
3815 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3816 				       vf + 1);
3817 
3818 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3819 	}
3820 
3821 	if (BE3_chip(adapter))
3822 		be_cmd_set_hsw_config(adapter, 0, 0,
3823 				      adapter->if_handle,
3824 				      PORT_FWD_TYPE_PASSTHRU, 0);
3825 done:
3826 	kfree(adapter->vf_cfg);
3827 	adapter->num_vfs = 0;
3828 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3829 }
3830 
3831 static void be_clear_queues(struct be_adapter *adapter)
3832 {
3833 	be_mcc_queues_destroy(adapter);
3834 	be_rx_cqs_destroy(adapter);
3835 	be_tx_queues_destroy(adapter);
3836 	be_evt_queues_destroy(adapter);
3837 }
3838 
3839 static void be_cancel_worker(struct be_adapter *adapter)
3840 {
3841 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3842 		cancel_delayed_work_sync(&adapter->work);
3843 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3844 	}
3845 }
3846 
3847 static void be_cancel_err_detection(struct be_adapter *adapter)
3848 {
3849 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3850 
3851 	if (!be_err_recovery_workq)
3852 		return;
3853 
3854 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3855 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3856 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3857 	}
3858 }
3859 
3860 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3861 {
3862 	struct net_device *netdev = adapter->netdev;
3863 
3864 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3865 		be_cmd_manage_iface(adapter, adapter->if_handle,
3866 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3867 
3868 	if (adapter->vxlan_port)
3869 		be_cmd_set_vxlan_port(adapter, 0);
3870 
3871 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3872 	adapter->vxlan_port = 0;
3873 
3874 	netdev->hw_enc_features = 0;
3875 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3876 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3877 }
3878 
3879 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3880 				struct be_resources *vft_res)
3881 {
3882 	struct be_resources res = adapter->pool_res;
3883 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3884 	struct be_resources res_mod = {0};
3885 	u16 num_vf_qs = 1;
3886 
3887 	/* Distribute the queue resources among the PF and it's VFs */
3888 	if (num_vfs) {
3889 		/* Divide the rx queues evenly among the VFs and the PF, capped
3890 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3891 		 */
3892 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3893 				res.max_rss_qs / (num_vfs + 1));
3894 
3895 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3896 		 * RSS Tables per port. Provide RSS on VFs, only if number of
3897 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
3898 		 */
3899 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3900 			num_vf_qs = 1;
3901 	}
3902 
3903 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3904 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
3905 	 */
3906 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3907 				  RESOURCE_MODIFIABLE, 0);
3908 
3909 	/* If RSS IFACE capability flags are modifiable for a VF, set the
3910 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3911 	 * more than 1 RSSQ is available for a VF.
3912 	 * Otherwise, provision only 1 queue pair for VF.
3913 	 */
3914 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3915 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3916 		if (num_vf_qs > 1) {
3917 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3918 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3919 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3920 		} else {
3921 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3922 					     BE_IF_FLAGS_DEFQ_RSS);
3923 		}
3924 	} else {
3925 		num_vf_qs = 1;
3926 	}
3927 
3928 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3929 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3930 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3931 	}
3932 
3933 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
3934 	vft_res->max_rx_qs = num_vf_qs;
3935 	vft_res->max_rss_qs = num_vf_qs;
3936 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3937 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3938 
3939 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3940 	 * among the PF and it's VFs, if the fields are changeable
3941 	 */
3942 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3943 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3944 
3945 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
3946 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3947 
3948 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3949 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3950 
3951 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3952 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3953 }
3954 
3955 static void be_if_destroy(struct be_adapter *adapter)
3956 {
3957 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3958 
3959 	kfree(adapter->pmac_id);
3960 	adapter->pmac_id = NULL;
3961 
3962 	kfree(adapter->mc_list);
3963 	adapter->mc_list = NULL;
3964 
3965 	kfree(adapter->uc_list);
3966 	adapter->uc_list = NULL;
3967 }
3968 
3969 static int be_clear(struct be_adapter *adapter)
3970 {
3971 	struct pci_dev *pdev = adapter->pdev;
3972 	struct  be_resources vft_res = {0};
3973 
3974 	be_cancel_worker(adapter);
3975 
3976 	flush_workqueue(be_wq);
3977 
3978 	if (sriov_enabled(adapter))
3979 		be_vf_clear(adapter);
3980 
3981 	/* Re-configure FW to distribute resources evenly across max-supported
3982 	 * number of VFs, only when VFs are not already enabled.
3983 	 */
3984 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3985 	    !pci_vfs_assigned(pdev)) {
3986 		be_calculate_vf_res(adapter,
3987 				    pci_sriov_get_totalvfs(pdev),
3988 				    &vft_res);
3989 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
3990 					pci_sriov_get_totalvfs(pdev),
3991 					&vft_res);
3992 	}
3993 
3994 	be_disable_vxlan_offloads(adapter);
3995 
3996 	be_if_destroy(adapter);
3997 
3998 	be_clear_queues(adapter);
3999 
4000 	be_msix_disable(adapter);
4001 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4002 	return 0;
4003 }
4004 
4005 static int be_vfs_if_create(struct be_adapter *adapter)
4006 {
4007 	struct be_resources res = {0};
4008 	u32 cap_flags, en_flags, vf;
4009 	struct be_vf_cfg *vf_cfg;
4010 	int status;
4011 
4012 	/* If a FW profile exists, then cap_flags are updated */
4013 	cap_flags = BE_VF_IF_EN_FLAGS;
4014 
4015 	for_all_vfs(adapter, vf_cfg, vf) {
4016 		if (!BE3_chip(adapter)) {
4017 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4018 							   ACTIVE_PROFILE_TYPE,
4019 							   RESOURCE_LIMITS,
4020 							   vf + 1);
4021 			if (!status) {
4022 				cap_flags = res.if_cap_flags;
4023 				/* Prevent VFs from enabling VLAN promiscuous
4024 				 * mode
4025 				 */
4026 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4027 			}
4028 		}
4029 
4030 		/* PF should enable IF flags during proxy if_create call */
4031 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4032 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4033 					  &vf_cfg->if_handle, vf + 1);
4034 		if (status)
4035 			return status;
4036 	}
4037 
4038 	return 0;
4039 }
4040 
4041 static int be_vf_setup_init(struct be_adapter *adapter)
4042 {
4043 	struct be_vf_cfg *vf_cfg;
4044 	int vf;
4045 
4046 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4047 				  GFP_KERNEL);
4048 	if (!adapter->vf_cfg)
4049 		return -ENOMEM;
4050 
4051 	for_all_vfs(adapter, vf_cfg, vf) {
4052 		vf_cfg->if_handle = -1;
4053 		vf_cfg->pmac_id = -1;
4054 	}
4055 	return 0;
4056 }
4057 
4058 static int be_vf_setup(struct be_adapter *adapter)
4059 {
4060 	struct device *dev = &adapter->pdev->dev;
4061 	struct be_vf_cfg *vf_cfg;
4062 	int status, old_vfs, vf;
4063 	bool spoofchk;
4064 
4065 	old_vfs = pci_num_vf(adapter->pdev);
4066 
4067 	status = be_vf_setup_init(adapter);
4068 	if (status)
4069 		goto err;
4070 
4071 	if (old_vfs) {
4072 		for_all_vfs(adapter, vf_cfg, vf) {
4073 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4074 			if (status)
4075 				goto err;
4076 		}
4077 
4078 		status = be_vfs_mac_query(adapter);
4079 		if (status)
4080 			goto err;
4081 	} else {
4082 		status = be_vfs_if_create(adapter);
4083 		if (status)
4084 			goto err;
4085 
4086 		status = be_vf_eth_addr_config(adapter);
4087 		if (status)
4088 			goto err;
4089 	}
4090 
4091 	for_all_vfs(adapter, vf_cfg, vf) {
4092 		/* Allow VFs to programs MAC/VLAN filters */
4093 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4094 						  vf + 1);
4095 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4096 			status = be_cmd_set_fn_privileges(adapter,
4097 							  vf_cfg->privileges |
4098 							  BE_PRIV_FILTMGMT,
4099 							  vf + 1);
4100 			if (!status) {
4101 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4102 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4103 					 vf);
4104 			}
4105 		}
4106 
4107 		/* Allow full available bandwidth */
4108 		if (!old_vfs)
4109 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4110 
4111 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4112 					       vf_cfg->if_handle, NULL,
4113 					       &spoofchk);
4114 		if (!status)
4115 			vf_cfg->spoofchk = spoofchk;
4116 
4117 		if (!old_vfs) {
4118 			be_cmd_enable_vf(adapter, vf + 1);
4119 			be_cmd_set_logical_link_config(adapter,
4120 						       IFLA_VF_LINK_STATE_AUTO,
4121 						       vf+1);
4122 		}
4123 	}
4124 
4125 	if (!old_vfs) {
4126 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4127 		if (status) {
4128 			dev_err(dev, "SRIOV enable failed\n");
4129 			adapter->num_vfs = 0;
4130 			goto err;
4131 		}
4132 	}
4133 
4134 	if (BE3_chip(adapter)) {
4135 		/* On BE3, enable VEB only when SRIOV is enabled */
4136 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4137 					       adapter->if_handle,
4138 					       PORT_FWD_TYPE_VEB, 0);
4139 		if (status)
4140 			goto err;
4141 	}
4142 
4143 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4144 	return 0;
4145 err:
4146 	dev_err(dev, "VF setup failed\n");
4147 	be_vf_clear(adapter);
4148 	return status;
4149 }
4150 
4151 /* Converting function_mode bits on BE3 to SH mc_type enums */
4152 
4153 static u8 be_convert_mc_type(u32 function_mode)
4154 {
4155 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4156 		return vNIC1;
4157 	else if (function_mode & QNQ_MODE)
4158 		return FLEX10;
4159 	else if (function_mode & VNIC_MODE)
4160 		return vNIC2;
4161 	else if (function_mode & UMC_ENABLED)
4162 		return UMC;
4163 	else
4164 		return MC_NONE;
4165 }
4166 
4167 /* On BE2/BE3 FW does not suggest the supported limits */
4168 static void BEx_get_resources(struct be_adapter *adapter,
4169 			      struct be_resources *res)
4170 {
4171 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4172 
4173 	if (be_physfn(adapter))
4174 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4175 	else
4176 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4177 
4178 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4179 
4180 	if (be_is_mc(adapter)) {
4181 		/* Assuming that there are 4 channels per port,
4182 		 * when multi-channel is enabled
4183 		 */
4184 		if (be_is_qnq_mode(adapter))
4185 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4186 		else
4187 			/* In a non-qnq multichannel mode, the pvid
4188 			 * takes up one vlan entry
4189 			 */
4190 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4191 	} else {
4192 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4193 	}
4194 
4195 	res->max_mcast_mac = BE_MAX_MC;
4196 
4197 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4198 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4199 	 *    *only* if it is RSS-capable.
4200 	 */
4201 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4202 	    be_virtfn(adapter) ||
4203 	    (be_is_mc(adapter) &&
4204 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4205 		res->max_tx_qs = 1;
4206 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4207 		struct be_resources super_nic_res = {0};
4208 
4209 		/* On a SuperNIC profile, the driver needs to use the
4210 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4211 		 */
4212 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4213 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4214 					  0);
4215 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4216 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4217 	} else {
4218 		res->max_tx_qs = BE3_MAX_TX_QS;
4219 	}
4220 
4221 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4222 	    !use_sriov && be_physfn(adapter))
4223 		res->max_rss_qs = (adapter->be3_native) ?
4224 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4225 	res->max_rx_qs = res->max_rss_qs + 1;
4226 
4227 	if (be_physfn(adapter))
4228 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4229 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4230 	else
4231 		res->max_evt_qs = 1;
4232 
4233 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4234 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4235 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4236 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4237 }
4238 
4239 static void be_setup_init(struct be_adapter *adapter)
4240 {
4241 	adapter->vlan_prio_bmap = 0xff;
4242 	adapter->phy.link_speed = -1;
4243 	adapter->if_handle = -1;
4244 	adapter->be3_native = false;
4245 	adapter->if_flags = 0;
4246 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4247 	if (be_physfn(adapter))
4248 		adapter->cmd_privileges = MAX_PRIVILEGES;
4249 	else
4250 		adapter->cmd_privileges = MIN_PRIVILEGES;
4251 }
4252 
4253 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4254  * However, this HW limitation is not exposed to the host via any SLI cmd.
4255  * As a result, in the case of SRIOV and in particular multi-partition configs
4256  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4257  * for distribution between the VFs. This self-imposed limit will determine the
4258  * no: of VFs for which RSS can be enabled.
4259  */
4260 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4261 {
4262 	struct be_port_resources port_res = {0};
4263 	u8 rss_tables_on_port;
4264 	u16 max_vfs = be_max_vfs(adapter);
4265 
4266 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4267 				  RESOURCE_LIMITS, 0);
4268 
4269 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4270 
4271 	/* Each PF Pool's RSS Tables limit =
4272 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4273 	 */
4274 	adapter->pool_res.max_rss_tables =
4275 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4276 }
4277 
4278 static int be_get_sriov_config(struct be_adapter *adapter)
4279 {
4280 	struct be_resources res = {0};
4281 	int max_vfs, old_vfs;
4282 
4283 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4284 				  RESOURCE_LIMITS, 0);
4285 
4286 	/* Some old versions of BE3 FW don't report max_vfs value */
4287 	if (BE3_chip(adapter) && !res.max_vfs) {
4288 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4289 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4290 	}
4291 
4292 	adapter->pool_res = res;
4293 
4294 	/* If during previous unload of the driver, the VFs were not disabled,
4295 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4296 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4297 	 */
4298 	old_vfs = pci_num_vf(adapter->pdev);
4299 	if (old_vfs) {
4300 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4301 			 old_vfs);
4302 
4303 		adapter->pool_res.max_vfs =
4304 			pci_sriov_get_totalvfs(adapter->pdev);
4305 		adapter->num_vfs = old_vfs;
4306 	}
4307 
4308 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4309 		be_calculate_pf_pool_rss_tables(adapter);
4310 		dev_info(&adapter->pdev->dev,
4311 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4312 			 be_max_pf_pool_rss_tables(adapter));
4313 	}
4314 	return 0;
4315 }
4316 
4317 static void be_alloc_sriov_res(struct be_adapter *adapter)
4318 {
4319 	int old_vfs = pci_num_vf(adapter->pdev);
4320 	struct  be_resources vft_res = {0};
4321 	int status;
4322 
4323 	be_get_sriov_config(adapter);
4324 
4325 	if (!old_vfs)
4326 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4327 
4328 	/* When the HW is in SRIOV capable configuration, the PF-pool
4329 	 * resources are given to PF during driver load, if there are no
4330 	 * old VFs. This facility is not available in BE3 FW.
4331 	 * Also, this is done by FW in Lancer chip.
4332 	 */
4333 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4334 		be_calculate_vf_res(adapter, 0, &vft_res);
4335 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4336 						 &vft_res);
4337 		if (status)
4338 			dev_err(&adapter->pdev->dev,
4339 				"Failed to optimize SRIOV resources\n");
4340 	}
4341 }
4342 
4343 static int be_get_resources(struct be_adapter *adapter)
4344 {
4345 	struct device *dev = &adapter->pdev->dev;
4346 	struct be_resources res = {0};
4347 	int status;
4348 
4349 	/* For Lancer, SH etc read per-function resource limits from FW.
4350 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4351 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4352 	 */
4353 	if (BEx_chip(adapter)) {
4354 		BEx_get_resources(adapter, &res);
4355 	} else {
4356 		status = be_cmd_get_func_config(adapter, &res);
4357 		if (status)
4358 			return status;
4359 
4360 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4361 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4362 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4363 			res.max_rss_qs -= 1;
4364 	}
4365 
4366 	/* If RoCE is supported stash away half the EQs for RoCE */
4367 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4368 				res.max_evt_qs / 2 : res.max_evt_qs;
4369 	adapter->res = res;
4370 
4371 	/* If FW supports RSS default queue, then skip creating non-RSS
4372 	 * queue for non-IP traffic.
4373 	 */
4374 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4375 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4376 
4377 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4378 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4379 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4380 		 be_max_vfs(adapter));
4381 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4382 		 be_max_uc(adapter), be_max_mc(adapter),
4383 		 be_max_vlans(adapter));
4384 
4385 	/* Ensure RX and TX queues are created in pairs at init time */
4386 	adapter->cfg_num_rx_irqs =
4387 				min_t(u16, netif_get_num_default_rss_queues(),
4388 				      be_max_qp_irqs(adapter));
4389 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4390 	return 0;
4391 }
4392 
4393 static int be_get_config(struct be_adapter *adapter)
4394 {
4395 	int status, level;
4396 	u16 profile_id;
4397 
4398 	status = be_cmd_get_cntl_attributes(adapter);
4399 	if (status)
4400 		return status;
4401 
4402 	status = be_cmd_query_fw_cfg(adapter);
4403 	if (status)
4404 		return status;
4405 
4406 	if (!lancer_chip(adapter) && be_physfn(adapter))
4407 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4408 
4409 	if (BEx_chip(adapter)) {
4410 		level = be_cmd_get_fw_log_level(adapter);
4411 		adapter->msg_enable =
4412 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4413 	}
4414 
4415 	be_cmd_get_acpi_wol_cap(adapter);
4416 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4417 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4418 
4419 	be_cmd_query_port_name(adapter);
4420 
4421 	if (be_physfn(adapter)) {
4422 		status = be_cmd_get_active_profile(adapter, &profile_id);
4423 		if (!status)
4424 			dev_info(&adapter->pdev->dev,
4425 				 "Using profile 0x%x\n", profile_id);
4426 	}
4427 
4428 	return 0;
4429 }
4430 
4431 static int be_mac_setup(struct be_adapter *adapter)
4432 {
4433 	u8 mac[ETH_ALEN];
4434 	int status;
4435 
4436 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4437 		status = be_cmd_get_perm_mac(adapter, mac);
4438 		if (status)
4439 			return status;
4440 
4441 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4442 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4443 
4444 		/* Initial MAC for BE3 VFs is already programmed by PF */
4445 		if (BEx_chip(adapter) && be_virtfn(adapter))
4446 			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4447 	}
4448 
4449 	return 0;
4450 }
4451 
4452 static void be_schedule_worker(struct be_adapter *adapter)
4453 {
4454 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4455 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4456 }
4457 
4458 static void be_destroy_err_recovery_workq(void)
4459 {
4460 	if (!be_err_recovery_workq)
4461 		return;
4462 
4463 	flush_workqueue(be_err_recovery_workq);
4464 	destroy_workqueue(be_err_recovery_workq);
4465 	be_err_recovery_workq = NULL;
4466 }
4467 
4468 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4469 {
4470 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4471 
4472 	if (!be_err_recovery_workq)
4473 		return;
4474 
4475 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4476 			   msecs_to_jiffies(delay));
4477 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4478 }
4479 
4480 static int be_setup_queues(struct be_adapter *adapter)
4481 {
4482 	struct net_device *netdev = adapter->netdev;
4483 	int status;
4484 
4485 	status = be_evt_queues_create(adapter);
4486 	if (status)
4487 		goto err;
4488 
4489 	status = be_tx_qs_create(adapter);
4490 	if (status)
4491 		goto err;
4492 
4493 	status = be_rx_cqs_create(adapter);
4494 	if (status)
4495 		goto err;
4496 
4497 	status = be_mcc_queues_create(adapter);
4498 	if (status)
4499 		goto err;
4500 
4501 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4502 	if (status)
4503 		goto err;
4504 
4505 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4506 	if (status)
4507 		goto err;
4508 
4509 	return 0;
4510 err:
4511 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4512 	return status;
4513 }
4514 
4515 static int be_if_create(struct be_adapter *adapter)
4516 {
4517 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4518 	u32 cap_flags = be_if_cap_flags(adapter);
4519 	int status;
4520 
4521 	/* alloc required memory for other filtering fields */
4522 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4523 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4524 	if (!adapter->pmac_id)
4525 		return -ENOMEM;
4526 
4527 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4528 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4529 	if (!adapter->mc_list)
4530 		return -ENOMEM;
4531 
4532 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4533 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4534 	if (!adapter->uc_list)
4535 		return -ENOMEM;
4536 
4537 	if (adapter->cfg_num_rx_irqs == 1)
4538 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4539 
4540 	en_flags &= cap_flags;
4541 	/* will enable all the needed filter flags in be_open() */
4542 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4543 				  &adapter->if_handle, 0);
4544 
4545 	if (status)
4546 		return status;
4547 
4548 	return 0;
4549 }
4550 
4551 int be_update_queues(struct be_adapter *adapter)
4552 {
4553 	struct net_device *netdev = adapter->netdev;
4554 	int status;
4555 
4556 	if (netif_running(netdev))
4557 		be_close(netdev);
4558 
4559 	be_cancel_worker(adapter);
4560 
4561 	/* If any vectors have been shared with RoCE we cannot re-program
4562 	 * the MSIx table.
4563 	 */
4564 	if (!adapter->num_msix_roce_vec)
4565 		be_msix_disable(adapter);
4566 
4567 	be_clear_queues(adapter);
4568 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4569 	if (status)
4570 		return status;
4571 
4572 	if (!msix_enabled(adapter)) {
4573 		status = be_msix_enable(adapter);
4574 		if (status)
4575 			return status;
4576 	}
4577 
4578 	status = be_if_create(adapter);
4579 	if (status)
4580 		return status;
4581 
4582 	status = be_setup_queues(adapter);
4583 	if (status)
4584 		return status;
4585 
4586 	be_schedule_worker(adapter);
4587 
4588 	if (netif_running(netdev))
4589 		status = be_open(netdev);
4590 
4591 	return status;
4592 }
4593 
4594 static inline int fw_major_num(const char *fw_ver)
4595 {
4596 	int fw_major = 0, i;
4597 
4598 	i = sscanf(fw_ver, "%d.", &fw_major);
4599 	if (i != 1)
4600 		return 0;
4601 
4602 	return fw_major;
4603 }
4604 
4605 /* If it is error recovery, FLR the PF
4606  * Else if any VFs are already enabled don't FLR the PF
4607  */
4608 static bool be_reset_required(struct be_adapter *adapter)
4609 {
4610 	if (be_error_recovering(adapter))
4611 		return true;
4612 	else
4613 		return pci_num_vf(adapter->pdev) == 0;
4614 }
4615 
4616 /* Wait for the FW to be ready and perform the required initialization */
4617 static int be_func_init(struct be_adapter *adapter)
4618 {
4619 	int status;
4620 
4621 	status = be_fw_wait_ready(adapter);
4622 	if (status)
4623 		return status;
4624 
4625 	/* FW is now ready; clear errors to allow cmds/doorbell */
4626 	be_clear_error(adapter, BE_CLEAR_ALL);
4627 
4628 	if (be_reset_required(adapter)) {
4629 		status = be_cmd_reset_function(adapter);
4630 		if (status)
4631 			return status;
4632 
4633 		/* Wait for interrupts to quiesce after an FLR */
4634 		msleep(100);
4635 	}
4636 
4637 	/* Tell FW we're ready to fire cmds */
4638 	status = be_cmd_fw_init(adapter);
4639 	if (status)
4640 		return status;
4641 
4642 	/* Allow interrupts for other ULPs running on NIC function */
4643 	be_intr_set(adapter, true);
4644 
4645 	return 0;
4646 }
4647 
4648 static int be_setup(struct be_adapter *adapter)
4649 {
4650 	struct device *dev = &adapter->pdev->dev;
4651 	int status;
4652 
4653 	status = be_func_init(adapter);
4654 	if (status)
4655 		return status;
4656 
4657 	be_setup_init(adapter);
4658 
4659 	if (!lancer_chip(adapter))
4660 		be_cmd_req_native_mode(adapter);
4661 
4662 	/* invoke this cmd first to get pf_num and vf_num which are needed
4663 	 * for issuing profile related cmds
4664 	 */
4665 	if (!BEx_chip(adapter)) {
4666 		status = be_cmd_get_func_config(adapter, NULL);
4667 		if (status)
4668 			return status;
4669 	}
4670 
4671 	status = be_get_config(adapter);
4672 	if (status)
4673 		goto err;
4674 
4675 	if (!BE2_chip(adapter) && be_physfn(adapter))
4676 		be_alloc_sriov_res(adapter);
4677 
4678 	status = be_get_resources(adapter);
4679 	if (status)
4680 		goto err;
4681 
4682 	status = be_msix_enable(adapter);
4683 	if (status)
4684 		goto err;
4685 
4686 	/* will enable all the needed filter flags in be_open() */
4687 	status = be_if_create(adapter);
4688 	if (status)
4689 		goto err;
4690 
4691 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4692 	rtnl_lock();
4693 	status = be_setup_queues(adapter);
4694 	rtnl_unlock();
4695 	if (status)
4696 		goto err;
4697 
4698 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4699 
4700 	status = be_mac_setup(adapter);
4701 	if (status)
4702 		goto err;
4703 
4704 	be_cmd_get_fw_ver(adapter);
4705 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4706 
4707 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4708 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4709 			adapter->fw_ver);
4710 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4711 	}
4712 
4713 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4714 					 adapter->rx_fc);
4715 	if (status)
4716 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4717 					&adapter->rx_fc);
4718 
4719 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4720 		 adapter->tx_fc, adapter->rx_fc);
4721 
4722 	if (be_physfn(adapter))
4723 		be_cmd_set_logical_link_config(adapter,
4724 					       IFLA_VF_LINK_STATE_AUTO, 0);
4725 
4726 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4727 	 * confusing a linux bridge or OVS that it might be connected to.
4728 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4729 	 * when SRIOV is not enabled.
4730 	 */
4731 	if (BE3_chip(adapter))
4732 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4733 				      PORT_FWD_TYPE_PASSTHRU, 0);
4734 
4735 	if (adapter->num_vfs)
4736 		be_vf_setup(adapter);
4737 
4738 	status = be_cmd_get_phy_info(adapter);
4739 	if (!status && be_pause_supported(adapter))
4740 		adapter->phy.fc_autoneg = 1;
4741 
4742 	if (be_physfn(adapter) && !lancer_chip(adapter))
4743 		be_cmd_set_features(adapter);
4744 
4745 	be_schedule_worker(adapter);
4746 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4747 	return 0;
4748 err:
4749 	be_clear(adapter);
4750 	return status;
4751 }
4752 
4753 #ifdef CONFIG_NET_POLL_CONTROLLER
4754 static void be_netpoll(struct net_device *netdev)
4755 {
4756 	struct be_adapter *adapter = netdev_priv(netdev);
4757 	struct be_eq_obj *eqo;
4758 	int i;
4759 
4760 	for_all_evt_queues(adapter, eqo, i) {
4761 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4762 		napi_schedule(&eqo->napi);
4763 	}
4764 }
4765 #endif
4766 
4767 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4768 {
4769 	const struct firmware *fw;
4770 	int status;
4771 
4772 	if (!netif_running(adapter->netdev)) {
4773 		dev_err(&adapter->pdev->dev,
4774 			"Firmware load not allowed (interface is down)\n");
4775 		return -ENETDOWN;
4776 	}
4777 
4778 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4779 	if (status)
4780 		goto fw_exit;
4781 
4782 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4783 
4784 	if (lancer_chip(adapter))
4785 		status = lancer_fw_download(adapter, fw);
4786 	else
4787 		status = be_fw_download(adapter, fw);
4788 
4789 	if (!status)
4790 		be_cmd_get_fw_ver(adapter);
4791 
4792 fw_exit:
4793 	release_firmware(fw);
4794 	return status;
4795 }
4796 
4797 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4798 				 u16 flags)
4799 {
4800 	struct be_adapter *adapter = netdev_priv(dev);
4801 	struct nlattr *attr, *br_spec;
4802 	int rem;
4803 	int status = 0;
4804 	u16 mode = 0;
4805 
4806 	if (!sriov_enabled(adapter))
4807 		return -EOPNOTSUPP;
4808 
4809 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4810 	if (!br_spec)
4811 		return -EINVAL;
4812 
4813 	nla_for_each_nested(attr, br_spec, rem) {
4814 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4815 			continue;
4816 
4817 		if (nla_len(attr) < sizeof(mode))
4818 			return -EINVAL;
4819 
4820 		mode = nla_get_u16(attr);
4821 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4822 			return -EOPNOTSUPP;
4823 
4824 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4825 			return -EINVAL;
4826 
4827 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4828 					       adapter->if_handle,
4829 					       mode == BRIDGE_MODE_VEPA ?
4830 					       PORT_FWD_TYPE_VEPA :
4831 					       PORT_FWD_TYPE_VEB, 0);
4832 		if (status)
4833 			goto err;
4834 
4835 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4836 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4837 
4838 		return status;
4839 	}
4840 err:
4841 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4842 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4843 
4844 	return status;
4845 }
4846 
4847 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4848 				 struct net_device *dev, u32 filter_mask,
4849 				 int nlflags)
4850 {
4851 	struct be_adapter *adapter = netdev_priv(dev);
4852 	int status = 0;
4853 	u8 hsw_mode;
4854 
4855 	/* BE and Lancer chips support VEB mode only */
4856 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4857 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4858 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4859 			return 0;
4860 		hsw_mode = PORT_FWD_TYPE_VEB;
4861 	} else {
4862 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4863 					       adapter->if_handle, &hsw_mode,
4864 					       NULL);
4865 		if (status)
4866 			return 0;
4867 
4868 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4869 			return 0;
4870 	}
4871 
4872 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4873 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4874 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4875 				       0, 0, nlflags, filter_mask, NULL);
4876 }
4877 
4878 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4879 					 void (*func)(struct work_struct *))
4880 {
4881 	struct be_cmd_work *work;
4882 
4883 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4884 	if (!work) {
4885 		dev_err(&adapter->pdev->dev,
4886 			"be_work memory allocation failed\n");
4887 		return NULL;
4888 	}
4889 
4890 	INIT_WORK(&work->work, func);
4891 	work->adapter = adapter;
4892 	return work;
4893 }
4894 
4895 /* VxLAN offload Notes:
4896  *
4897  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4898  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4899  * is expected to work across all types of IP tunnels once exported. Skyhawk
4900  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4901  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4902  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4903  * those other tunnels are unexported on the fly through ndo_features_check().
4904  *
4905  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4906  * adds more than one port, disable offloads and don't re-enable them again
4907  * until after all the tunnels are removed.
4908  */
4909 static void be_work_add_vxlan_port(struct work_struct *work)
4910 {
4911 	struct be_cmd_work *cmd_work =
4912 				container_of(work, struct be_cmd_work, work);
4913 	struct be_adapter *adapter = cmd_work->adapter;
4914 	struct net_device *netdev = adapter->netdev;
4915 	struct device *dev = &adapter->pdev->dev;
4916 	__be16 port = cmd_work->info.vxlan_port;
4917 	int status;
4918 
4919 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4920 		adapter->vxlan_port_aliases++;
4921 		goto done;
4922 	}
4923 
4924 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4925 		dev_info(dev,
4926 			 "Only one UDP port supported for VxLAN offloads\n");
4927 		dev_info(dev, "Disabling VxLAN offloads\n");
4928 		adapter->vxlan_port_count++;
4929 		goto err;
4930 	}
4931 
4932 	if (adapter->vxlan_port_count++ >= 1)
4933 		goto done;
4934 
4935 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
4936 				     OP_CONVERT_NORMAL_TO_TUNNEL);
4937 	if (status) {
4938 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4939 		goto err;
4940 	}
4941 
4942 	status = be_cmd_set_vxlan_port(adapter, port);
4943 	if (status) {
4944 		dev_warn(dev, "Failed to add VxLAN port\n");
4945 		goto err;
4946 	}
4947 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4948 	adapter->vxlan_port = port;
4949 
4950 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4951 				   NETIF_F_TSO | NETIF_F_TSO6 |
4952 				   NETIF_F_GSO_UDP_TUNNEL;
4953 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4954 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4955 
4956 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4957 		 be16_to_cpu(port));
4958 	goto done;
4959 err:
4960 	be_disable_vxlan_offloads(adapter);
4961 done:
4962 	kfree(cmd_work);
4963 }
4964 
4965 static void be_work_del_vxlan_port(struct work_struct *work)
4966 {
4967 	struct be_cmd_work *cmd_work =
4968 				container_of(work, struct be_cmd_work, work);
4969 	struct be_adapter *adapter = cmd_work->adapter;
4970 	__be16 port = cmd_work->info.vxlan_port;
4971 
4972 	if (adapter->vxlan_port != port)
4973 		goto done;
4974 
4975 	if (adapter->vxlan_port_aliases) {
4976 		adapter->vxlan_port_aliases--;
4977 		goto out;
4978 	}
4979 
4980 	be_disable_vxlan_offloads(adapter);
4981 
4982 	dev_info(&adapter->pdev->dev,
4983 		 "Disabled VxLAN offloads for UDP port %d\n",
4984 		 be16_to_cpu(port));
4985 done:
4986 	adapter->vxlan_port_count--;
4987 out:
4988 	kfree(cmd_work);
4989 }
4990 
4991 static void be_cfg_vxlan_port(struct net_device *netdev,
4992 			      struct udp_tunnel_info *ti,
4993 			      void (*func)(struct work_struct *))
4994 {
4995 	struct be_adapter *adapter = netdev_priv(netdev);
4996 	struct be_cmd_work *cmd_work;
4997 
4998 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4999 		return;
5000 
5001 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5002 		return;
5003 
5004 	cmd_work = be_alloc_work(adapter, func);
5005 	if (cmd_work) {
5006 		cmd_work->info.vxlan_port = ti->port;
5007 		queue_work(be_wq, &cmd_work->work);
5008 	}
5009 }
5010 
5011 static void be_del_vxlan_port(struct net_device *netdev,
5012 			      struct udp_tunnel_info *ti)
5013 {
5014 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5015 }
5016 
5017 static void be_add_vxlan_port(struct net_device *netdev,
5018 			      struct udp_tunnel_info *ti)
5019 {
5020 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5021 }
5022 
5023 static netdev_features_t be_features_check(struct sk_buff *skb,
5024 					   struct net_device *dev,
5025 					   netdev_features_t features)
5026 {
5027 	struct be_adapter *adapter = netdev_priv(dev);
5028 	u8 l4_hdr = 0;
5029 
5030 	/* The code below restricts offload features for some tunneled packets.
5031 	 * Offload features for normal (non tunnel) packets are unchanged.
5032 	 */
5033 	if (!skb->encapsulation ||
5034 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5035 		return features;
5036 
5037 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5038 	 * should disable tunnel offload features if it's not a VxLAN packet,
5039 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5040 	 * allow other tunneled traffic like GRE work fine while VxLAN
5041 	 * offloads are configured in Skyhawk-R.
5042 	 */
5043 	switch (vlan_get_protocol(skb)) {
5044 	case htons(ETH_P_IP):
5045 		l4_hdr = ip_hdr(skb)->protocol;
5046 		break;
5047 	case htons(ETH_P_IPV6):
5048 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5049 		break;
5050 	default:
5051 		return features;
5052 	}
5053 
5054 	if (l4_hdr != IPPROTO_UDP ||
5055 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5056 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5057 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5058 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5059 	    !adapter->vxlan_port ||
5060 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5061 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5062 
5063 	return features;
5064 }
5065 
5066 static int be_get_phys_port_id(struct net_device *dev,
5067 			       struct netdev_phys_item_id *ppid)
5068 {
5069 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5070 	struct be_adapter *adapter = netdev_priv(dev);
5071 	u8 *id;
5072 
5073 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5074 		return -ENOSPC;
5075 
5076 	ppid->id[0] = adapter->hba_port_num + 1;
5077 	id = &ppid->id[1];
5078 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5079 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5080 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5081 
5082 	ppid->id_len = id_len;
5083 
5084 	return 0;
5085 }
5086 
5087 static void be_set_rx_mode(struct net_device *dev)
5088 {
5089 	struct be_adapter *adapter = netdev_priv(dev);
5090 	struct be_cmd_work *work;
5091 
5092 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5093 	if (work)
5094 		queue_work(be_wq, &work->work);
5095 }
5096 
5097 static const struct net_device_ops be_netdev_ops = {
5098 	.ndo_open		= be_open,
5099 	.ndo_stop		= be_close,
5100 	.ndo_start_xmit		= be_xmit,
5101 	.ndo_set_rx_mode	= be_set_rx_mode,
5102 	.ndo_set_mac_address	= be_mac_addr_set,
5103 	.ndo_get_stats64	= be_get_stats64,
5104 	.ndo_validate_addr	= eth_validate_addr,
5105 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5106 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5107 	.ndo_set_vf_mac		= be_set_vf_mac,
5108 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5109 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5110 	.ndo_get_vf_config	= be_get_vf_config,
5111 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5112 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5113 #ifdef CONFIG_NET_POLL_CONTROLLER
5114 	.ndo_poll_controller	= be_netpoll,
5115 #endif
5116 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5117 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5118 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5119 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5120 	.ndo_features_check	= be_features_check,
5121 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5122 };
5123 
5124 static void be_netdev_init(struct net_device *netdev)
5125 {
5126 	struct be_adapter *adapter = netdev_priv(netdev);
5127 
5128 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5129 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5130 		NETIF_F_HW_VLAN_CTAG_TX;
5131 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5132 		netdev->hw_features |= NETIF_F_RXHASH;
5133 
5134 	netdev->features |= netdev->hw_features |
5135 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5136 
5137 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5138 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5139 
5140 	netdev->priv_flags |= IFF_UNICAST_FLT;
5141 
5142 	netdev->flags |= IFF_MULTICAST;
5143 
5144 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5145 
5146 	netdev->netdev_ops = &be_netdev_ops;
5147 
5148 	netdev->ethtool_ops = &be_ethtool_ops;
5149 
5150 	/* MTU range: 256 - 9000 */
5151 	netdev->min_mtu = BE_MIN_MTU;
5152 	netdev->max_mtu = BE_MAX_MTU;
5153 }
5154 
5155 static void be_cleanup(struct be_adapter *adapter)
5156 {
5157 	struct net_device *netdev = adapter->netdev;
5158 
5159 	rtnl_lock();
5160 	netif_device_detach(netdev);
5161 	if (netif_running(netdev))
5162 		be_close(netdev);
5163 	rtnl_unlock();
5164 
5165 	be_clear(adapter);
5166 }
5167 
5168 static int be_resume(struct be_adapter *adapter)
5169 {
5170 	struct net_device *netdev = adapter->netdev;
5171 	int status;
5172 
5173 	status = be_setup(adapter);
5174 	if (status)
5175 		return status;
5176 
5177 	rtnl_lock();
5178 	if (netif_running(netdev))
5179 		status = be_open(netdev);
5180 	rtnl_unlock();
5181 
5182 	if (status)
5183 		return status;
5184 
5185 	netif_device_attach(netdev);
5186 
5187 	return 0;
5188 }
5189 
5190 static void be_soft_reset(struct be_adapter *adapter)
5191 {
5192 	u32 val;
5193 
5194 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5195 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5196 	val |= SLIPORT_SOFTRESET_SR_MASK;
5197 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5198 }
5199 
5200 static bool be_err_is_recoverable(struct be_adapter *adapter)
5201 {
5202 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5203 	unsigned long initial_idle_time =
5204 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5205 	unsigned long recovery_interval =
5206 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5207 	u16 ue_err_code;
5208 	u32 val;
5209 
5210 	val = be_POST_stage_get(adapter);
5211 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5212 		return false;
5213 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5214 	if (ue_err_code == 0)
5215 		return false;
5216 
5217 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5218 		ue_err_code);
5219 
5220 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5221 		dev_err(&adapter->pdev->dev,
5222 			"Cannot recover within %lu sec from driver load\n",
5223 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5224 		return false;
5225 	}
5226 
5227 	if (err_rec->last_recovery_time &&
5228 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5229 		dev_err(&adapter->pdev->dev,
5230 			"Cannot recover within %lu sec from last recovery\n",
5231 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5232 		return false;
5233 	}
5234 
5235 	if (ue_err_code == err_rec->last_err_code) {
5236 		dev_err(&adapter->pdev->dev,
5237 			"Cannot recover from a consecutive TPE error\n");
5238 		return false;
5239 	}
5240 
5241 	err_rec->last_recovery_time = jiffies;
5242 	err_rec->last_err_code = ue_err_code;
5243 	return true;
5244 }
5245 
5246 static int be_tpe_recover(struct be_adapter *adapter)
5247 {
5248 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5249 	int status = -EAGAIN;
5250 	u32 val;
5251 
5252 	switch (err_rec->recovery_state) {
5253 	case ERR_RECOVERY_ST_NONE:
5254 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5255 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5256 		break;
5257 
5258 	case ERR_RECOVERY_ST_DETECT:
5259 		val = be_POST_stage_get(adapter);
5260 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5261 		    POST_STAGE_RECOVERABLE_ERR) {
5262 			dev_err(&adapter->pdev->dev,
5263 				"Unrecoverable HW error detected: 0x%x\n", val);
5264 			status = -EINVAL;
5265 			err_rec->resched_delay = 0;
5266 			break;
5267 		}
5268 
5269 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5270 
5271 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5272 		 * milliseconds before it checks for final error status in
5273 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5274 		 * If it does, then PF0 initiates a Soft Reset.
5275 		 */
5276 		if (adapter->pf_num == 0) {
5277 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5278 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5279 					ERR_RECOVERY_UE_DETECT_DURATION;
5280 			break;
5281 		}
5282 
5283 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5284 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5285 					ERR_RECOVERY_UE_DETECT_DURATION;
5286 		break;
5287 
5288 	case ERR_RECOVERY_ST_RESET:
5289 		if (!be_err_is_recoverable(adapter)) {
5290 			dev_err(&adapter->pdev->dev,
5291 				"Failed to meet recovery criteria\n");
5292 			status = -EIO;
5293 			err_rec->resched_delay = 0;
5294 			break;
5295 		}
5296 		be_soft_reset(adapter);
5297 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5298 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5299 					err_rec->ue_to_reset_time;
5300 		break;
5301 
5302 	case ERR_RECOVERY_ST_PRE_POLL:
5303 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5304 		err_rec->resched_delay = 0;
5305 		status = 0;			/* done */
5306 		break;
5307 
5308 	default:
5309 		status = -EINVAL;
5310 		err_rec->resched_delay = 0;
5311 		break;
5312 	}
5313 
5314 	return status;
5315 }
5316 
5317 static int be_err_recover(struct be_adapter *adapter)
5318 {
5319 	int status;
5320 
5321 	if (!lancer_chip(adapter)) {
5322 		if (!adapter->error_recovery.recovery_supported ||
5323 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5324 			return -EIO;
5325 		status = be_tpe_recover(adapter);
5326 		if (status)
5327 			goto err;
5328 	}
5329 
5330 	/* Wait for adapter to reach quiescent state before
5331 	 * destroying queues
5332 	 */
5333 	status = be_fw_wait_ready(adapter);
5334 	if (status)
5335 		goto err;
5336 
5337 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5338 
5339 	be_cleanup(adapter);
5340 
5341 	status = be_resume(adapter);
5342 	if (status)
5343 		goto err;
5344 
5345 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5346 
5347 err:
5348 	return status;
5349 }
5350 
5351 static void be_err_detection_task(struct work_struct *work)
5352 {
5353 	struct be_error_recovery *err_rec =
5354 			container_of(work, struct be_error_recovery,
5355 				     err_detection_work.work);
5356 	struct be_adapter *adapter =
5357 			container_of(err_rec, struct be_adapter,
5358 				     error_recovery);
5359 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5360 	struct device *dev = &adapter->pdev->dev;
5361 	int recovery_status;
5362 
5363 	be_detect_error(adapter);
5364 	if (!be_check_error(adapter, BE_ERROR_HW))
5365 		goto reschedule_task;
5366 
5367 	recovery_status = be_err_recover(adapter);
5368 	if (!recovery_status) {
5369 		err_rec->recovery_retries = 0;
5370 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5371 		dev_info(dev, "Adapter recovery successful\n");
5372 		goto reschedule_task;
5373 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5374 		/* BEx/SH recovery state machine */
5375 		if (adapter->pf_num == 0 &&
5376 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5377 			dev_err(&adapter->pdev->dev,
5378 				"Adapter recovery in progress\n");
5379 		resched_delay = err_rec->resched_delay;
5380 		goto reschedule_task;
5381 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5382 		/* For VFs, check if PF have allocated resources
5383 		 * every second.
5384 		 */
5385 		dev_err(dev, "Re-trying adapter recovery\n");
5386 		goto reschedule_task;
5387 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5388 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5389 		/* In case of another error during recovery, it takes 30 sec
5390 		 * for adapter to come out of error. Retry error recovery after
5391 		 * this time interval.
5392 		 */
5393 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5394 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5395 		goto reschedule_task;
5396 	} else {
5397 		dev_err(dev, "Adapter recovery failed\n");
5398 		dev_err(dev, "Please reboot server to recover\n");
5399 	}
5400 
5401 	return;
5402 
5403 reschedule_task:
5404 	be_schedule_err_detection(adapter, resched_delay);
5405 }
5406 
5407 static void be_log_sfp_info(struct be_adapter *adapter)
5408 {
5409 	int status;
5410 
5411 	status = be_cmd_query_sfp_info(adapter);
5412 	if (!status) {
5413 		dev_err(&adapter->pdev->dev,
5414 			"Port %c: %s Vendor: %s part no: %s",
5415 			adapter->port_name,
5416 			be_misconfig_evt_port_state[adapter->phy_state],
5417 			adapter->phy.vendor_name,
5418 			adapter->phy.vendor_pn);
5419 	}
5420 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5421 }
5422 
5423 static void be_worker(struct work_struct *work)
5424 {
5425 	struct be_adapter *adapter =
5426 		container_of(work, struct be_adapter, work.work);
5427 	struct be_rx_obj *rxo;
5428 	int i;
5429 
5430 	if (be_physfn(adapter) &&
5431 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5432 		be_cmd_get_die_temperature(adapter);
5433 
5434 	/* when interrupts are not yet enabled, just reap any pending
5435 	 * mcc completions
5436 	 */
5437 	if (!netif_running(adapter->netdev)) {
5438 		local_bh_disable();
5439 		be_process_mcc(adapter);
5440 		local_bh_enable();
5441 		goto reschedule;
5442 	}
5443 
5444 	if (!adapter->stats_cmd_sent) {
5445 		if (lancer_chip(adapter))
5446 			lancer_cmd_get_pport_stats(adapter,
5447 						   &adapter->stats_cmd);
5448 		else
5449 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5450 	}
5451 
5452 	for_all_rx_queues(adapter, rxo, i) {
5453 		/* Replenish RX-queues starved due to memory
5454 		 * allocation failures.
5455 		 */
5456 		if (rxo->rx_post_starved)
5457 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5458 	}
5459 
5460 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5461 	if (!skyhawk_chip(adapter))
5462 		be_eqd_update(adapter, false);
5463 
5464 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5465 		be_log_sfp_info(adapter);
5466 
5467 reschedule:
5468 	adapter->work_counter++;
5469 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5470 }
5471 
5472 static void be_unmap_pci_bars(struct be_adapter *adapter)
5473 {
5474 	if (adapter->csr)
5475 		pci_iounmap(adapter->pdev, adapter->csr);
5476 	if (adapter->db)
5477 		pci_iounmap(adapter->pdev, adapter->db);
5478 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5479 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5480 }
5481 
5482 static int db_bar(struct be_adapter *adapter)
5483 {
5484 	if (lancer_chip(adapter) || be_virtfn(adapter))
5485 		return 0;
5486 	else
5487 		return 4;
5488 }
5489 
5490 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5491 {
5492 	if (skyhawk_chip(adapter)) {
5493 		adapter->roce_db.size = 4096;
5494 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5495 							      db_bar(adapter));
5496 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5497 							       db_bar(adapter));
5498 	}
5499 	return 0;
5500 }
5501 
5502 static int be_map_pci_bars(struct be_adapter *adapter)
5503 {
5504 	struct pci_dev *pdev = adapter->pdev;
5505 	u8 __iomem *addr;
5506 	u32 sli_intf;
5507 
5508 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5509 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5510 				SLI_INTF_FAMILY_SHIFT;
5511 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5512 
5513 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5514 		adapter->csr = pci_iomap(pdev, 2, 0);
5515 		if (!adapter->csr)
5516 			return -ENOMEM;
5517 	}
5518 
5519 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5520 	if (!addr)
5521 		goto pci_map_err;
5522 	adapter->db = addr;
5523 
5524 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5525 		if (be_physfn(adapter)) {
5526 			/* PCICFG is the 2nd BAR in BE2 */
5527 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5528 			if (!addr)
5529 				goto pci_map_err;
5530 			adapter->pcicfg = addr;
5531 			adapter->pcicfg_mapped = true;
5532 		} else {
5533 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5534 			adapter->pcicfg_mapped = false;
5535 		}
5536 	}
5537 
5538 	be_roce_map_pci_bars(adapter);
5539 	return 0;
5540 
5541 pci_map_err:
5542 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5543 	be_unmap_pci_bars(adapter);
5544 	return -ENOMEM;
5545 }
5546 
5547 static void be_drv_cleanup(struct be_adapter *adapter)
5548 {
5549 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5550 	struct device *dev = &adapter->pdev->dev;
5551 
5552 	if (mem->va)
5553 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5554 
5555 	mem = &adapter->rx_filter;
5556 	if (mem->va)
5557 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5558 
5559 	mem = &adapter->stats_cmd;
5560 	if (mem->va)
5561 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5562 }
5563 
5564 /* Allocate and initialize various fields in be_adapter struct */
5565 static int be_drv_init(struct be_adapter *adapter)
5566 {
5567 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5568 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5569 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5570 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5571 	struct device *dev = &adapter->pdev->dev;
5572 	int status = 0;
5573 
5574 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5575 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5576 						 &mbox_mem_alloc->dma,
5577 						 GFP_KERNEL);
5578 	if (!mbox_mem_alloc->va)
5579 		return -ENOMEM;
5580 
5581 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5582 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5583 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5584 
5585 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5586 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5587 					    &rx_filter->dma, GFP_KERNEL);
5588 	if (!rx_filter->va) {
5589 		status = -ENOMEM;
5590 		goto free_mbox;
5591 	}
5592 
5593 	if (lancer_chip(adapter))
5594 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5595 	else if (BE2_chip(adapter))
5596 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5597 	else if (BE3_chip(adapter))
5598 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5599 	else
5600 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5601 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5602 					    &stats_cmd->dma, GFP_KERNEL);
5603 	if (!stats_cmd->va) {
5604 		status = -ENOMEM;
5605 		goto free_rx_filter;
5606 	}
5607 
5608 	mutex_init(&adapter->mbox_lock);
5609 	mutex_init(&adapter->mcc_lock);
5610 	mutex_init(&adapter->rx_filter_lock);
5611 	spin_lock_init(&adapter->mcc_cq_lock);
5612 	init_completion(&adapter->et_cmd_compl);
5613 
5614 	pci_save_state(adapter->pdev);
5615 
5616 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5617 
5618 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5619 	adapter->error_recovery.resched_delay = 0;
5620 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5621 			  be_err_detection_task);
5622 
5623 	adapter->rx_fc = true;
5624 	adapter->tx_fc = true;
5625 
5626 	/* Must be a power of 2 or else MODULO will BUG_ON */
5627 	adapter->be_get_temp_freq = 64;
5628 
5629 	return 0;
5630 
5631 free_rx_filter:
5632 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5633 free_mbox:
5634 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5635 			  mbox_mem_alloc->dma);
5636 	return status;
5637 }
5638 
5639 static void be_remove(struct pci_dev *pdev)
5640 {
5641 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5642 
5643 	if (!adapter)
5644 		return;
5645 
5646 	be_roce_dev_remove(adapter);
5647 	be_intr_set(adapter, false);
5648 
5649 	be_cancel_err_detection(adapter);
5650 
5651 	unregister_netdev(adapter->netdev);
5652 
5653 	be_clear(adapter);
5654 
5655 	if (!pci_vfs_assigned(adapter->pdev))
5656 		be_cmd_reset_function(adapter);
5657 
5658 	/* tell fw we're done with firing cmds */
5659 	be_cmd_fw_clean(adapter);
5660 
5661 	be_unmap_pci_bars(adapter);
5662 	be_drv_cleanup(adapter);
5663 
5664 	pci_disable_pcie_error_reporting(pdev);
5665 
5666 	pci_release_regions(pdev);
5667 	pci_disable_device(pdev);
5668 
5669 	free_netdev(adapter->netdev);
5670 }
5671 
5672 static ssize_t be_hwmon_show_temp(struct device *dev,
5673 				  struct device_attribute *dev_attr,
5674 				  char *buf)
5675 {
5676 	struct be_adapter *adapter = dev_get_drvdata(dev);
5677 
5678 	/* Unit: millidegree Celsius */
5679 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5680 		return -EIO;
5681 	else
5682 		return sprintf(buf, "%u\n",
5683 			       adapter->hwmon_info.be_on_die_temp * 1000);
5684 }
5685 
5686 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5687 			  be_hwmon_show_temp, NULL, 1);
5688 
5689 static struct attribute *be_hwmon_attrs[] = {
5690 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5691 	NULL
5692 };
5693 
5694 ATTRIBUTE_GROUPS(be_hwmon);
5695 
5696 static char *mc_name(struct be_adapter *adapter)
5697 {
5698 	char *str = "";	/* default */
5699 
5700 	switch (adapter->mc_type) {
5701 	case UMC:
5702 		str = "UMC";
5703 		break;
5704 	case FLEX10:
5705 		str = "FLEX10";
5706 		break;
5707 	case vNIC1:
5708 		str = "vNIC-1";
5709 		break;
5710 	case nPAR:
5711 		str = "nPAR";
5712 		break;
5713 	case UFP:
5714 		str = "UFP";
5715 		break;
5716 	case vNIC2:
5717 		str = "vNIC-2";
5718 		break;
5719 	default:
5720 		str = "";
5721 	}
5722 
5723 	return str;
5724 }
5725 
5726 static inline char *func_name(struct be_adapter *adapter)
5727 {
5728 	return be_physfn(adapter) ? "PF" : "VF";
5729 }
5730 
5731 static inline char *nic_name(struct pci_dev *pdev)
5732 {
5733 	switch (pdev->device) {
5734 	case OC_DEVICE_ID1:
5735 		return OC_NAME;
5736 	case OC_DEVICE_ID2:
5737 		return OC_NAME_BE;
5738 	case OC_DEVICE_ID3:
5739 	case OC_DEVICE_ID4:
5740 		return OC_NAME_LANCER;
5741 	case BE_DEVICE_ID2:
5742 		return BE3_NAME;
5743 	case OC_DEVICE_ID5:
5744 	case OC_DEVICE_ID6:
5745 		return OC_NAME_SH;
5746 	default:
5747 		return BE_NAME;
5748 	}
5749 }
5750 
5751 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5752 {
5753 	struct be_adapter *adapter;
5754 	struct net_device *netdev;
5755 	int status = 0;
5756 
5757 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5758 
5759 	status = pci_enable_device(pdev);
5760 	if (status)
5761 		goto do_none;
5762 
5763 	status = pci_request_regions(pdev, DRV_NAME);
5764 	if (status)
5765 		goto disable_dev;
5766 	pci_set_master(pdev);
5767 
5768 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5769 	if (!netdev) {
5770 		status = -ENOMEM;
5771 		goto rel_reg;
5772 	}
5773 	adapter = netdev_priv(netdev);
5774 	adapter->pdev = pdev;
5775 	pci_set_drvdata(pdev, adapter);
5776 	adapter->netdev = netdev;
5777 	SET_NETDEV_DEV(netdev, &pdev->dev);
5778 
5779 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5780 	if (!status) {
5781 		netdev->features |= NETIF_F_HIGHDMA;
5782 	} else {
5783 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5784 		if (status) {
5785 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5786 			goto free_netdev;
5787 		}
5788 	}
5789 
5790 	status = pci_enable_pcie_error_reporting(pdev);
5791 	if (!status)
5792 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5793 
5794 	status = be_map_pci_bars(adapter);
5795 	if (status)
5796 		goto free_netdev;
5797 
5798 	status = be_drv_init(adapter);
5799 	if (status)
5800 		goto unmap_bars;
5801 
5802 	status = be_setup(adapter);
5803 	if (status)
5804 		goto drv_cleanup;
5805 
5806 	be_netdev_init(netdev);
5807 	status = register_netdev(netdev);
5808 	if (status != 0)
5809 		goto unsetup;
5810 
5811 	be_roce_dev_add(adapter);
5812 
5813 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5814 	adapter->error_recovery.probe_time = jiffies;
5815 
5816 	/* On Die temperature not supported for VF. */
5817 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5818 		adapter->hwmon_info.hwmon_dev =
5819 			devm_hwmon_device_register_with_groups(&pdev->dev,
5820 							       DRV_NAME,
5821 							       adapter,
5822 							       be_hwmon_groups);
5823 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5824 	}
5825 
5826 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5827 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5828 
5829 	return 0;
5830 
5831 unsetup:
5832 	be_clear(adapter);
5833 drv_cleanup:
5834 	be_drv_cleanup(adapter);
5835 unmap_bars:
5836 	be_unmap_pci_bars(adapter);
5837 free_netdev:
5838 	free_netdev(netdev);
5839 rel_reg:
5840 	pci_release_regions(pdev);
5841 disable_dev:
5842 	pci_disable_device(pdev);
5843 do_none:
5844 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5845 	return status;
5846 }
5847 
5848 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5849 {
5850 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5851 
5852 	be_intr_set(adapter, false);
5853 	be_cancel_err_detection(adapter);
5854 
5855 	be_cleanup(adapter);
5856 
5857 	pci_save_state(pdev);
5858 	pci_disable_device(pdev);
5859 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5860 	return 0;
5861 }
5862 
5863 static int be_pci_resume(struct pci_dev *pdev)
5864 {
5865 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5866 	int status = 0;
5867 
5868 	status = pci_enable_device(pdev);
5869 	if (status)
5870 		return status;
5871 
5872 	pci_restore_state(pdev);
5873 
5874 	status = be_resume(adapter);
5875 	if (status)
5876 		return status;
5877 
5878 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5879 
5880 	return 0;
5881 }
5882 
5883 /*
5884  * An FLR will stop BE from DMAing any data.
5885  */
5886 static void be_shutdown(struct pci_dev *pdev)
5887 {
5888 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5889 
5890 	if (!adapter)
5891 		return;
5892 
5893 	be_roce_dev_shutdown(adapter);
5894 	cancel_delayed_work_sync(&adapter->work);
5895 	be_cancel_err_detection(adapter);
5896 
5897 	netif_device_detach(adapter->netdev);
5898 
5899 	be_cmd_reset_function(adapter);
5900 
5901 	pci_disable_device(pdev);
5902 }
5903 
5904 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5905 					    pci_channel_state_t state)
5906 {
5907 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5908 
5909 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5910 
5911 	be_roce_dev_remove(adapter);
5912 
5913 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5914 		be_set_error(adapter, BE_ERROR_EEH);
5915 
5916 		be_cancel_err_detection(adapter);
5917 
5918 		be_cleanup(adapter);
5919 	}
5920 
5921 	if (state == pci_channel_io_perm_failure)
5922 		return PCI_ERS_RESULT_DISCONNECT;
5923 
5924 	pci_disable_device(pdev);
5925 
5926 	/* The error could cause the FW to trigger a flash debug dump.
5927 	 * Resetting the card while flash dump is in progress
5928 	 * can cause it not to recover; wait for it to finish.
5929 	 * Wait only for first function as it is needed only once per
5930 	 * adapter.
5931 	 */
5932 	if (pdev->devfn == 0)
5933 		ssleep(30);
5934 
5935 	return PCI_ERS_RESULT_NEED_RESET;
5936 }
5937 
5938 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5939 {
5940 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5941 	int status;
5942 
5943 	dev_info(&adapter->pdev->dev, "EEH reset\n");
5944 
5945 	status = pci_enable_device(pdev);
5946 	if (status)
5947 		return PCI_ERS_RESULT_DISCONNECT;
5948 
5949 	pci_set_master(pdev);
5950 	pci_restore_state(pdev);
5951 
5952 	/* Check if card is ok and fw is ready */
5953 	dev_info(&adapter->pdev->dev,
5954 		 "Waiting for FW to be ready after EEH reset\n");
5955 	status = be_fw_wait_ready(adapter);
5956 	if (status)
5957 		return PCI_ERS_RESULT_DISCONNECT;
5958 
5959 	pci_cleanup_aer_uncorrect_error_status(pdev);
5960 	be_clear_error(adapter, BE_CLEAR_ALL);
5961 	return PCI_ERS_RESULT_RECOVERED;
5962 }
5963 
5964 static void be_eeh_resume(struct pci_dev *pdev)
5965 {
5966 	int status = 0;
5967 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5968 
5969 	dev_info(&adapter->pdev->dev, "EEH resume\n");
5970 
5971 	pci_save_state(pdev);
5972 
5973 	status = be_resume(adapter);
5974 	if (status)
5975 		goto err;
5976 
5977 	be_roce_dev_add(adapter);
5978 
5979 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5980 	return;
5981 err:
5982 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5983 }
5984 
5985 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5986 {
5987 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5988 	struct be_resources vft_res = {0};
5989 	int status;
5990 
5991 	if (!num_vfs)
5992 		be_vf_clear(adapter);
5993 
5994 	adapter->num_vfs = num_vfs;
5995 
5996 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5997 		dev_warn(&pdev->dev,
5998 			 "Cannot disable VFs while they are assigned\n");
5999 		return -EBUSY;
6000 	}
6001 
6002 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6003 	 * are equally distributed across the max-number of VFs. The user may
6004 	 * request only a subset of the max-vfs to be enabled.
6005 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6006 	 * each VF will have access to more number of resources.
6007 	 * This facility is not available in BE3 FW.
6008 	 * Also, this is done by FW in Lancer chip.
6009 	 */
6010 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6011 		be_calculate_vf_res(adapter, adapter->num_vfs,
6012 				    &vft_res);
6013 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6014 						 adapter->num_vfs, &vft_res);
6015 		if (status)
6016 			dev_err(&pdev->dev,
6017 				"Failed to optimize SR-IOV resources\n");
6018 	}
6019 
6020 	status = be_get_resources(adapter);
6021 	if (status)
6022 		return be_cmd_status(status);
6023 
6024 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6025 	rtnl_lock();
6026 	status = be_update_queues(adapter);
6027 	rtnl_unlock();
6028 	if (status)
6029 		return be_cmd_status(status);
6030 
6031 	if (adapter->num_vfs)
6032 		status = be_vf_setup(adapter);
6033 
6034 	if (!status)
6035 		return adapter->num_vfs;
6036 
6037 	return 0;
6038 }
6039 
6040 static const struct pci_error_handlers be_eeh_handlers = {
6041 	.error_detected = be_eeh_err_detected,
6042 	.slot_reset = be_eeh_reset,
6043 	.resume = be_eeh_resume,
6044 };
6045 
6046 static struct pci_driver be_driver = {
6047 	.name = DRV_NAME,
6048 	.id_table = be_dev_ids,
6049 	.probe = be_probe,
6050 	.remove = be_remove,
6051 	.suspend = be_suspend,
6052 	.resume = be_pci_resume,
6053 	.shutdown = be_shutdown,
6054 	.sriov_configure = be_pci_sriov_configure,
6055 	.err_handler = &be_eeh_handlers
6056 };
6057 
6058 static int __init be_init_module(void)
6059 {
6060 	int status;
6061 
6062 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6063 	    rx_frag_size != 2048) {
6064 		printk(KERN_WARNING DRV_NAME
6065 			" : Module param rx_frag_size must be 2048/4096/8192."
6066 			" Using 2048\n");
6067 		rx_frag_size = 2048;
6068 	}
6069 
6070 	if (num_vfs > 0) {
6071 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6072 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6073 	}
6074 
6075 	be_wq = create_singlethread_workqueue("be_wq");
6076 	if (!be_wq) {
6077 		pr_warn(DRV_NAME "workqueue creation failed\n");
6078 		return -1;
6079 	}
6080 
6081 	be_err_recovery_workq =
6082 		create_singlethread_workqueue("be_err_recover");
6083 	if (!be_err_recovery_workq)
6084 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6085 
6086 	status = pci_register_driver(&be_driver);
6087 	if (status) {
6088 		destroy_workqueue(be_wq);
6089 		be_destroy_err_recovery_workq();
6090 	}
6091 	return status;
6092 }
6093 module_init(be_init_module);
6094 
6095 static void __exit be_exit_module(void)
6096 {
6097 	pci_unregister_driver(&be_driver);
6098 
6099 	be_destroy_err_recovery_workq();
6100 
6101 	if (be_wq)
6102 		destroy_workqueue(be_wq);
6103 }
6104 module_exit(be_exit_module);
6105