xref: /openbmc/linux/drivers/net/ethernet/emulex/benet/be_main.c (revision a0ae2562c6c4b2721d9fddba63b7286c13517d9f)
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279 			/* mac already added, skip addition */
280 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281 			return 0;
282 		}
283 	}
284 
285 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286 			       &adapter->pmac_id[0], 0);
287 }
288 
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291 	int i;
292 
293 	/* Skip deletion if the programmed mac is
294 	 * being used in uc-list
295 	 */
296 	for (i = 0; i < adapter->uc_macs; i++) {
297 		if (adapter->pmac_id[i + 1] == pmac_id)
298 			return;
299 	}
300 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302 
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305 	struct be_adapter *adapter = netdev_priv(netdev);
306 	struct device *dev = &adapter->pdev->dev;
307 	struct sockaddr *addr = p;
308 	int status;
309 	u8 mac[ETH_ALEN];
310 	u32 old_pmac_id = adapter->pmac_id[0];
311 
312 	if (!is_valid_ether_addr(addr->sa_data))
313 		return -EADDRNOTAVAIL;
314 
315 	/* Proceed further only if, User provided MAC is different
316 	 * from active MAC
317 	 */
318 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319 		return 0;
320 
321 	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322 	 * address
323 	 */
324 	if (BEx_chip(adapter) && be_virtfn(adapter) &&
325 	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
326 		return -EPERM;
327 
328 	/* if device is not running, copy MAC to netdev->dev_addr */
329 	if (!netif_running(netdev))
330 		goto done;
331 
332 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333 	 * privilege or if PF did not provision the new MAC address.
334 	 * On BE3, this cmd will always fail if the VF doesn't have the
335 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
336 	 * the MAC for the VF.
337 	 */
338 	mutex_lock(&adapter->rx_filter_lock);
339 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340 	if (!status) {
341 
342 		/* Delete the old programmed MAC. This call may fail if the
343 		 * old MAC was already deleted by the PF driver.
344 		 */
345 		if (adapter->pmac_id[0] != old_pmac_id)
346 			be_dev_mac_del(adapter, old_pmac_id);
347 	}
348 
349 	mutex_unlock(&adapter->rx_filter_lock);
350 	/* Decide if the new MAC is successfully activated only after
351 	 * querying the FW
352 	 */
353 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354 				       adapter->if_handle, true, 0);
355 	if (status)
356 		goto err;
357 
358 	/* The MAC change did not happen, either due to lack of privilege
359 	 * or PF didn't pre-provision.
360 	 */
361 	if (!ether_addr_equal(addr->sa_data, mac)) {
362 		status = -EPERM;
363 		goto err;
364 	}
365 
366 	/* Remember currently programmed MAC */
367 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
370 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371 	return 0;
372 err:
373 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374 	return status;
375 }
376 
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380 	if (BE2_chip(adapter)) {
381 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	} else if (BE3_chip(adapter)) {
385 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386 
387 		return &cmd->hw_stats;
388 	} else {
389 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390 
391 		return &cmd->hw_stats;
392 	}
393 }
394 
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398 	if (BE2_chip(adapter)) {
399 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	} else if (BE3_chip(adapter)) {
403 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404 
405 		return &hw_stats->erx;
406 	} else {
407 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408 
409 		return &hw_stats->erx;
410 	}
411 }
412 
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418 	struct be_port_rxf_stats_v0 *port_stats =
419 					&rxf_stats->port[adapter->port_num];
420 	struct be_drv_stats *drvs = &adapter->drv_stats;
421 
422 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
424 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
425 	drvs->rx_control_frames = port_stats->rx_control_frames;
426 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438 	drvs->rx_dropped_header_too_small =
439 		port_stats->rx_dropped_header_too_small;
440 	drvs->rx_address_filtered =
441 					port_stats->rx_address_filtered +
442 					port_stats->rx_vlan_filtered;
443 	drvs->rx_alignment_symbol_errors =
444 		port_stats->rx_alignment_symbol_errors;
445 
446 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
447 	drvs->tx_controlframes = port_stats->tx_controlframes;
448 
449 	if (adapter->port_num)
450 		drvs->jabber_events = rxf_stats->port1_jabber_events;
451 	else
452 		drvs->jabber_events = rxf_stats->port0_jabber_events;
453 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
456 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461 
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467 	struct be_port_rxf_stats_v1 *port_stats =
468 					&rxf_stats->port[adapter->port_num];
469 	struct be_drv_stats *drvs = &adapter->drv_stats;
470 
471 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
475 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
476 	drvs->rx_control_frames = port_stats->rx_control_frames;
477 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487 	drvs->rx_dropped_header_too_small =
488 		port_stats->rx_dropped_header_too_small;
489 	drvs->rx_input_fifo_overflow_drop =
490 		port_stats->rx_input_fifo_overflow_drop;
491 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
492 	drvs->rx_alignment_symbol_errors =
493 		port_stats->rx_alignment_symbol_errors;
494 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
496 	drvs->tx_controlframes = port_stats->tx_controlframes;
497 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498 	drvs->jabber_events = port_stats->jabber_events;
499 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
502 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507 
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513 	struct be_port_rxf_stats_v2 *port_stats =
514 					&rxf_stats->port[adapter->port_num];
515 	struct be_drv_stats *drvs = &adapter->drv_stats;
516 
517 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
521 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
522 	drvs->rx_control_frames = port_stats->rx_control_frames;
523 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533 	drvs->rx_dropped_header_too_small =
534 		port_stats->rx_dropped_header_too_small;
535 	drvs->rx_input_fifo_overflow_drop =
536 		port_stats->rx_input_fifo_overflow_drop;
537 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
538 	drvs->rx_alignment_symbol_errors =
539 		port_stats->rx_alignment_symbol_errors;
540 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
542 	drvs->tx_controlframes = port_stats->tx_controlframes;
543 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544 	drvs->jabber_events = port_stats->jabber_events;
545 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
548 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552 	if (be_roce_supported(adapter)) {
553 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555 		drvs->rx_roce_frames = port_stats->roce_frames_received;
556 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
557 		drvs->roce_drops_payload_len =
558 			port_stats->roce_drops_payload_len;
559 	}
560 }
561 
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564 	struct be_drv_stats *drvs = &adapter->drv_stats;
565 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566 
567 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577 	drvs->rx_dropped_tcp_length =
578 				pport_stats->rx_dropped_invalid_tcp_length;
579 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582 	drvs->rx_dropped_header_too_small =
583 				pport_stats->rx_dropped_header_too_small;
584 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585 	drvs->rx_address_filtered =
586 					pport_stats->rx_address_filtered +
587 					pport_stats->rx_vlan_filtered;
588 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592 	drvs->jabber_events = pport_stats->rx_jabbers;
593 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
594 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595 	drvs->rx_drops_too_many_frags =
596 				pport_stats->rx_drops_too_many_frags_lo;
597 }
598 
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)			(x & 0xFFFF)
602 #define hi(x)			(x & 0xFFFF0000)
603 	bool wrapped = val < lo(*acc);
604 	u32 newacc = hi(*acc) + val;
605 
606 	if (wrapped)
607 		newacc += 65536;
608 	WRITE_ONCE(*acc, newacc);
609 }
610 
611 static void populate_erx_stats(struct be_adapter *adapter,
612 			       struct be_rx_obj *rxo, u32 erx_stat)
613 {
614 	if (!BEx_chip(adapter))
615 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616 	else
617 		/* below erx HW counter can actually wrap around after
618 		 * 65535. Driver accumulates a 32-bit value
619 		 */
620 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621 				     (u16)erx_stat);
622 }
623 
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627 	struct be_rx_obj *rxo;
628 	int i;
629 	u32 erx_stat;
630 
631 	if (lancer_chip(adapter)) {
632 		populate_lancer_stats(adapter);
633 	} else {
634 		if (BE2_chip(adapter))
635 			populate_be_v0_stats(adapter);
636 		else if (BE3_chip(adapter))
637 			/* for BE3 */
638 			populate_be_v1_stats(adapter);
639 		else
640 			populate_be_v2_stats(adapter);
641 
642 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643 		for_all_rx_queues(adapter, rxo, i) {
644 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645 			populate_erx_stats(adapter, rxo, erx_stat);
646 		}
647 	}
648 }
649 
650 static void be_get_stats64(struct net_device *netdev,
651 			   struct rtnl_link_stats64 *stats)
652 {
653 	struct be_adapter *adapter = netdev_priv(netdev);
654 	struct be_drv_stats *drvs = &adapter->drv_stats;
655 	struct be_rx_obj *rxo;
656 	struct be_tx_obj *txo;
657 	u64 pkts, bytes;
658 	unsigned int start;
659 	int i;
660 
661 	for_all_rx_queues(adapter, rxo, i) {
662 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
663 
664 		do {
665 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666 			pkts = rx_stats(rxo)->rx_pkts;
667 			bytes = rx_stats(rxo)->rx_bytes;
668 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669 		stats->rx_packets += pkts;
670 		stats->rx_bytes += bytes;
671 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673 					rx_stats(rxo)->rx_drops_no_frags;
674 	}
675 
676 	for_all_tx_queues(adapter, txo, i) {
677 		const struct be_tx_stats *tx_stats = tx_stats(txo);
678 
679 		do {
680 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681 			pkts = tx_stats(txo)->tx_pkts;
682 			bytes = tx_stats(txo)->tx_bytes;
683 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684 		stats->tx_packets += pkts;
685 		stats->tx_bytes += bytes;
686 	}
687 
688 	/* bad pkts received */
689 	stats->rx_errors = drvs->rx_crc_errors +
690 		drvs->rx_alignment_symbol_errors +
691 		drvs->rx_in_range_errors +
692 		drvs->rx_out_range_errors +
693 		drvs->rx_frame_too_long +
694 		drvs->rx_dropped_too_small +
695 		drvs->rx_dropped_too_short +
696 		drvs->rx_dropped_header_too_small +
697 		drvs->rx_dropped_tcp_length +
698 		drvs->rx_dropped_runt;
699 
700 	/* detailed rx errors */
701 	stats->rx_length_errors = drvs->rx_in_range_errors +
702 		drvs->rx_out_range_errors +
703 		drvs->rx_frame_too_long;
704 
705 	stats->rx_crc_errors = drvs->rx_crc_errors;
706 
707 	/* frame alignment errors */
708 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709 
710 	/* receiver fifo overrun */
711 	/* drops_no_pbuf is no per i/f, it's per BE card */
712 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713 				drvs->rx_input_fifo_overflow_drop +
714 				drvs->rx_drops_no_pbuf;
715 }
716 
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719 	struct net_device *netdev = adapter->netdev;
720 
721 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722 		netif_carrier_off(netdev);
723 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724 	}
725 
726 	if (link_status)
727 		netif_carrier_on(netdev);
728 	else
729 		netif_carrier_off(netdev);
730 
731 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733 
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736 	if (skb->encapsulation)
737 		return skb_inner_transport_offset(skb) +
738 		       inner_tcp_hdrlen(skb);
739 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741 
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744 	struct be_tx_stats *stats = tx_stats(txo);
745 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746 	/* Account for headers which get duplicated in TSO pkt */
747 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748 
749 	u64_stats_update_begin(&stats->sync);
750 	stats->tx_reqs++;
751 	stats->tx_bytes += skb->len + dup_hdr_len;
752 	stats->tx_pkts += tx_pkts;
753 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754 		stats->tx_vxlan_offload_pkts += tx_pkts;
755 	u64_stats_update_end(&stats->sync);
756 }
757 
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761 	/* +1 for the header wrb */
762 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764 
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770 	wrb->rsvd0 = 0;
771 }
772 
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778 	wrb->frag_pa_hi = 0;
779 	wrb->frag_pa_lo = 0;
780 	wrb->frag_len = 0;
781 	wrb->rsvd0 = 0;
782 }
783 
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785 				     struct sk_buff *skb)
786 {
787 	u8 vlan_prio;
788 	u16 vlan_tag;
789 
790 	vlan_tag = skb_vlan_tag_get(skb);
791 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792 	/* If vlan priority provided by OS is NOT in available bmap */
793 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795 				adapter->recommended_prio_bits;
796 
797 	return vlan_tag;
798 }
799 
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803 	return (inner_ip_hdr(skb)->version == 4) ?
804 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806 
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809 	return (ip_hdr(skb)->version == 4) ?
810 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812 
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817 
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820 	return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822 
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827 
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829 				       struct sk_buff *skb,
830 				       struct be_wrb_params *wrb_params)
831 {
832 	u16 proto;
833 
834 	if (skb_is_gso(skb)) {
835 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
836 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840 		if (skb->encapsulation) {
841 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842 			proto = skb_inner_ip_proto(skb);
843 		} else {
844 			proto = skb_ip_proto(skb);
845 		}
846 		if (proto == IPPROTO_TCP)
847 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848 		else if (proto == IPPROTO_UDP)
849 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850 	}
851 
852 	if (skb_vlan_tag_present(skb)) {
853 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855 	}
856 
857 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859 
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861 			 struct be_eth_hdr_wrb *hdr,
862 			 struct be_wrb_params *wrb_params,
863 			 struct sk_buff *skb)
864 {
865 	memset(hdr, 0, sizeof(*hdr));
866 
867 	SET_TX_WRB_HDR_BITS(crc, hdr,
868 			    BE_WRB_F_GET(wrb_params->features, CRC));
869 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
870 			    BE_WRB_F_GET(wrb_params->features, IPCS));
871 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
873 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
874 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
875 
876 	SET_TX_WRB_HDR_BITS(lso, hdr,
877 			    BE_WRB_F_GET(wrb_params->features, LSO));
878 	SET_TX_WRB_HDR_BITS(lso6, hdr,
879 			    BE_WRB_F_GET(wrb_params->features, LSO6));
880 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881 
882 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883 	 * hack is not needed, the evt bit is set while ringing DB.
884 	 */
885 	SET_TX_WRB_HDR_BITS(event, hdr,
886 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887 	SET_TX_WRB_HDR_BITS(vlan, hdr,
888 			    BE_WRB_F_GET(wrb_params->features, VLAN));
889 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890 
891 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
894 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896 
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898 			  bool unmap_single)
899 {
900 	dma_addr_t dma;
901 	u32 frag_len = le32_to_cpu(wrb->frag_len);
902 
903 
904 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905 		(u64)le32_to_cpu(wrb->frag_pa_lo);
906 	if (frag_len) {
907 		if (unmap_single)
908 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909 		else
910 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911 	}
912 }
913 
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917 	u32 head = txo->q.head;
918 
919 	queue_head_inc(&txo->q);
920 	return head;
921 }
922 
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925 				struct be_tx_obj *txo,
926 				struct be_wrb_params *wrb_params,
927 				struct sk_buff *skb, u16 head)
928 {
929 	u32 num_frags = skb_wrb_cnt(skb);
930 	struct be_queue_info *txq = &txo->q;
931 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932 
933 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
935 
936 	BUG_ON(txo->sent_skb_list[head]);
937 	txo->sent_skb_list[head] = skb;
938 	txo->last_req_hdr = head;
939 	atomic_add(num_frags, &txq->used);
940 	txo->last_req_wrb_cnt = num_frags;
941 	txo->pend_wrb_cnt += num_frags;
942 }
943 
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946 				 int len)
947 {
948 	struct be_eth_wrb *wrb;
949 	struct be_queue_info *txq = &txo->q;
950 
951 	wrb = queue_head_node(txq);
952 	wrb_fill(wrb, busaddr, len);
953 	queue_head_inc(txq);
954 }
955 
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961 			    struct be_tx_obj *txo, u32 head, bool map_single,
962 			    u32 copied)
963 {
964 	struct device *dev;
965 	struct be_eth_wrb *wrb;
966 	struct be_queue_info *txq = &txo->q;
967 
968 	dev = &adapter->pdev->dev;
969 	txq->head = head;
970 
971 	/* skip the first wrb (hdr); it's not mapped */
972 	queue_head_inc(txq);
973 	while (copied) {
974 		wrb = queue_head_node(txq);
975 		unmap_tx_frag(dev, wrb, map_single);
976 		map_single = false;
977 		copied -= le32_to_cpu(wrb->frag_len);
978 		queue_head_inc(txq);
979 	}
980 
981 	txq->head = head;
982 }
983 
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989 			   struct sk_buff *skb,
990 			   struct be_wrb_params *wrb_params)
991 {
992 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993 	struct device *dev = &adapter->pdev->dev;
994 	bool map_single = false;
995 	u32 head;
996 	dma_addr_t busaddr;
997 	int len;
998 
999 	head = be_tx_get_wrb_hdr(txo);
1000 
1001 	if (skb->len > skb->data_len) {
1002 		len = skb_headlen(skb);
1003 
1004 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1005 		if (dma_mapping_error(dev, busaddr))
1006 			goto dma_err;
1007 		map_single = true;
1008 		be_tx_setup_wrb_frag(txo, busaddr, len);
1009 		copied += len;
1010 	}
1011 
1012 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1013 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1014 		len = skb_frag_size(frag);
1015 
1016 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1017 		if (dma_mapping_error(dev, busaddr))
1018 			goto dma_err;
1019 		be_tx_setup_wrb_frag(txo, busaddr, len);
1020 		copied += len;
1021 	}
1022 
1023 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1024 
1025 	be_tx_stats_update(txo, skb);
1026 	return wrb_cnt;
1027 
1028 dma_err:
1029 	adapter->drv_stats.dma_map_errors++;
1030 	be_xmit_restore(adapter, txo, head, map_single, copied);
1031 	return 0;
1032 }
1033 
1034 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1035 {
1036 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1037 }
1038 
1039 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1040 					     struct sk_buff *skb,
1041 					     struct be_wrb_params
1042 					     *wrb_params)
1043 {
1044 	u16 vlan_tag = 0;
1045 
1046 	skb = skb_share_check(skb, GFP_ATOMIC);
1047 	if (unlikely(!skb))
1048 		return skb;
1049 
1050 	if (skb_vlan_tag_present(skb))
1051 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1052 
1053 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1054 		if (!vlan_tag)
1055 			vlan_tag = adapter->pvid;
1056 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1057 		 * skip VLAN insertion
1058 		 */
1059 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1060 	}
1061 
1062 	if (vlan_tag) {
1063 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1064 						vlan_tag);
1065 		if (unlikely(!skb))
1066 			return skb;
1067 		skb->vlan_tci = 0;
1068 	}
1069 
1070 	/* Insert the outer VLAN, if any */
1071 	if (adapter->qnq_vid) {
1072 		vlan_tag = adapter->qnq_vid;
1073 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1074 						vlan_tag);
1075 		if (unlikely(!skb))
1076 			return skb;
1077 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1078 	}
1079 
1080 	return skb;
1081 }
1082 
1083 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1084 {
1085 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1086 	u16 offset = ETH_HLEN;
1087 
1088 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1089 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1090 
1091 		offset += sizeof(struct ipv6hdr);
1092 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1093 		    ip6h->nexthdr != NEXTHDR_UDP) {
1094 			struct ipv6_opt_hdr *ehdr =
1095 				(struct ipv6_opt_hdr *)(skb->data + offset);
1096 
1097 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1098 			if (ehdr->hdrlen == 0xff)
1099 				return true;
1100 		}
1101 	}
1102 	return false;
1103 }
1104 
1105 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1106 {
1107 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1108 }
1109 
1110 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1111 {
1112 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1113 }
1114 
1115 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1116 						  struct sk_buff *skb,
1117 						  struct be_wrb_params
1118 						  *wrb_params)
1119 {
1120 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1121 	unsigned int eth_hdr_len;
1122 	struct iphdr *ip;
1123 
1124 	/* For padded packets, BE HW modifies tot_len field in IP header
1125 	 * incorrecly when VLAN tag is inserted by HW.
1126 	 * For padded packets, Lancer computes incorrect checksum.
1127 	 */
1128 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1129 						VLAN_ETH_HLEN : ETH_HLEN;
1130 	if (skb->len <= 60 &&
1131 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1132 	    is_ipv4_pkt(skb)) {
1133 		ip = (struct iphdr *)ip_hdr(skb);
1134 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1135 	}
1136 
1137 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1138 	 * tagging in pvid-tagging mode
1139 	 */
1140 	if (be_pvid_tagging_enabled(adapter) &&
1141 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1142 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1143 
1144 	/* HW has a bug wherein it will calculate CSUM for VLAN
1145 	 * pkts even though it is disabled.
1146 	 * Manually insert VLAN in pkt.
1147 	 */
1148 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1149 	    skb_vlan_tag_present(skb)) {
1150 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1151 		if (unlikely(!skb))
1152 			goto err;
1153 	}
1154 
1155 	/* HW may lockup when VLAN HW tagging is requested on
1156 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1157 	 * skip HW tagging is not enabled by FW.
1158 	 */
1159 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1160 		     (adapter->pvid || adapter->qnq_vid) &&
1161 		     !qnq_async_evt_rcvd(adapter)))
1162 		goto tx_drop;
1163 
1164 	/* Manual VLAN tag insertion to prevent:
1165 	 * ASIC lockup when the ASIC inserts VLAN tag into
1166 	 * certain ipv6 packets. Insert VLAN tags in driver,
1167 	 * and set event, completion, vlan bits accordingly
1168 	 * in the Tx WRB.
1169 	 */
1170 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1171 	    be_vlan_tag_tx_chk(adapter, skb)) {
1172 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1173 		if (unlikely(!skb))
1174 			goto err;
1175 	}
1176 
1177 	return skb;
1178 tx_drop:
1179 	dev_kfree_skb_any(skb);
1180 err:
1181 	return NULL;
1182 }
1183 
1184 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1185 					   struct sk_buff *skb,
1186 					   struct be_wrb_params *wrb_params)
1187 {
1188 	int err;
1189 
1190 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1191 	 * packets that are 32b or less may cause a transmit stall
1192 	 * on that port. The workaround is to pad such packets
1193 	 * (len <= 32 bytes) to a minimum length of 36b.
1194 	 */
1195 	if (skb->len <= 32) {
1196 		if (skb_put_padto(skb, 36))
1197 			return NULL;
1198 	}
1199 
1200 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1201 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1202 		if (!skb)
1203 			return NULL;
1204 	}
1205 
1206 	/* The stack can send us skbs with length greater than
1207 	 * what the HW can handle. Trim the extra bytes.
1208 	 */
1209 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1210 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1211 	WARN_ON(err);
1212 
1213 	return skb;
1214 }
1215 
1216 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1217 {
1218 	struct be_queue_info *txq = &txo->q;
1219 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1220 
1221 	/* Mark the last request eventable if it hasn't been marked already */
1222 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1223 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1224 
1225 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1226 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1227 		wrb_fill_dummy(queue_head_node(txq));
1228 		queue_head_inc(txq);
1229 		atomic_inc(&txq->used);
1230 		txo->pend_wrb_cnt++;
1231 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1232 					   TX_HDR_WRB_NUM_SHIFT);
1233 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1234 					  TX_HDR_WRB_NUM_SHIFT);
1235 	}
1236 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1237 	txo->pend_wrb_cnt = 0;
1238 }
1239 
1240 /* OS2BMC related */
1241 
1242 #define DHCP_CLIENT_PORT	68
1243 #define DHCP_SERVER_PORT	67
1244 #define NET_BIOS_PORT1		137
1245 #define NET_BIOS_PORT2		138
1246 #define DHCPV6_RAS_PORT		547
1247 
1248 #define is_mc_allowed_on_bmc(adapter, eh)	\
1249 	(!is_multicast_filt_enabled(adapter) &&	\
1250 	 is_multicast_ether_addr(eh->h_dest) &&	\
1251 	 !is_broadcast_ether_addr(eh->h_dest))
1252 
1253 #define is_bc_allowed_on_bmc(adapter, eh)	\
1254 	(!is_broadcast_filt_enabled(adapter) &&	\
1255 	 is_broadcast_ether_addr(eh->h_dest))
1256 
1257 #define is_arp_allowed_on_bmc(adapter, skb)	\
1258 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1259 
1260 #define is_broadcast_packet(eh, adapter)	\
1261 		(is_multicast_ether_addr(eh->h_dest) && \
1262 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1263 
1264 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1265 
1266 #define is_arp_filt_enabled(adapter)	\
1267 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1268 
1269 #define is_dhcp_client_filt_enabled(adapter)	\
1270 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1271 
1272 #define is_dhcp_srvr_filt_enabled(adapter)	\
1273 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1274 
1275 #define is_nbios_filt_enabled(adapter)	\
1276 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1277 
1278 #define is_ipv6_na_filt_enabled(adapter)	\
1279 		(adapter->bmc_filt_mask &	\
1280 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1281 
1282 #define is_ipv6_ra_filt_enabled(adapter)	\
1283 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1284 
1285 #define is_ipv6_ras_filt_enabled(adapter)	\
1286 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1287 
1288 #define is_broadcast_filt_enabled(adapter)	\
1289 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1290 
1291 #define is_multicast_filt_enabled(adapter)	\
1292 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1293 
1294 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1295 			       struct sk_buff **skb)
1296 {
1297 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1298 	bool os2bmc = false;
1299 
1300 	if (!be_is_os2bmc_enabled(adapter))
1301 		goto done;
1302 
1303 	if (!is_multicast_ether_addr(eh->h_dest))
1304 		goto done;
1305 
1306 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1307 	    is_bc_allowed_on_bmc(adapter, eh) ||
1308 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1309 		os2bmc = true;
1310 		goto done;
1311 	}
1312 
1313 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1314 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1315 		u8 nexthdr = hdr->nexthdr;
1316 
1317 		if (nexthdr == IPPROTO_ICMPV6) {
1318 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1319 
1320 			switch (icmp6->icmp6_type) {
1321 			case NDISC_ROUTER_ADVERTISEMENT:
1322 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1323 				goto done;
1324 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1325 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1326 				goto done;
1327 			default:
1328 				break;
1329 			}
1330 		}
1331 	}
1332 
1333 	if (is_udp_pkt((*skb))) {
1334 		struct udphdr *udp = udp_hdr((*skb));
1335 
1336 		switch (ntohs(udp->dest)) {
1337 		case DHCP_CLIENT_PORT:
1338 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1339 			goto done;
1340 		case DHCP_SERVER_PORT:
1341 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1342 			goto done;
1343 		case NET_BIOS_PORT1:
1344 		case NET_BIOS_PORT2:
1345 			os2bmc = is_nbios_filt_enabled(adapter);
1346 			goto done;
1347 		case DHCPV6_RAS_PORT:
1348 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1349 			goto done;
1350 		default:
1351 			break;
1352 		}
1353 	}
1354 done:
1355 	/* For packets over a vlan, which are destined
1356 	 * to BMC, asic expects the vlan to be inline in the packet.
1357 	 */
1358 	if (os2bmc)
1359 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1360 
1361 	return os2bmc;
1362 }
1363 
1364 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1365 {
1366 	struct be_adapter *adapter = netdev_priv(netdev);
1367 	u16 q_idx = skb_get_queue_mapping(skb);
1368 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1369 	struct be_wrb_params wrb_params = { 0 };
1370 	bool flush = !skb->xmit_more;
1371 	u16 wrb_cnt;
1372 
1373 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1374 	if (unlikely(!skb))
1375 		goto drop;
1376 
1377 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1378 
1379 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1380 	if (unlikely(!wrb_cnt)) {
1381 		dev_kfree_skb_any(skb);
1382 		goto drop;
1383 	}
1384 
1385 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1386 	 * enqueue the pkt a 2nd time with mgmt bit set.
1387 	 */
1388 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1389 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1390 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1391 		if (unlikely(!wrb_cnt))
1392 			goto drop;
1393 		else
1394 			skb_get(skb);
1395 	}
1396 
1397 	if (be_is_txq_full(txo)) {
1398 		netif_stop_subqueue(netdev, q_idx);
1399 		tx_stats(txo)->tx_stops++;
1400 	}
1401 
1402 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1403 		be_xmit_flush(adapter, txo);
1404 
1405 	return NETDEV_TX_OK;
1406 drop:
1407 	tx_stats(txo)->tx_drv_drops++;
1408 	/* Flush the already enqueued tx requests */
1409 	if (flush && txo->pend_wrb_cnt)
1410 		be_xmit_flush(adapter, txo);
1411 
1412 	return NETDEV_TX_OK;
1413 }
1414 
1415 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1416 {
1417 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1418 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1419 }
1420 
1421 static int be_set_vlan_promisc(struct be_adapter *adapter)
1422 {
1423 	struct device *dev = &adapter->pdev->dev;
1424 	int status;
1425 
1426 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1427 		return 0;
1428 
1429 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1430 	if (!status) {
1431 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1432 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1433 	} else {
1434 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1435 	}
1436 	return status;
1437 }
1438 
1439 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1440 {
1441 	struct device *dev = &adapter->pdev->dev;
1442 	int status;
1443 
1444 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1445 	if (!status) {
1446 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1447 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1448 	}
1449 	return status;
1450 }
1451 
1452 /*
1453  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1454  * If the user configures more, place BE in vlan promiscuous mode.
1455  */
1456 static int be_vid_config(struct be_adapter *adapter)
1457 {
1458 	struct device *dev = &adapter->pdev->dev;
1459 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1460 	u16 num = 0, i = 0;
1461 	int status = 0;
1462 
1463 	/* No need to change the VLAN state if the I/F is in promiscuous */
1464 	if (adapter->netdev->flags & IFF_PROMISC)
1465 		return 0;
1466 
1467 	if (adapter->vlans_added > be_max_vlans(adapter))
1468 		return be_set_vlan_promisc(adapter);
1469 
1470 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1471 		status = be_clear_vlan_promisc(adapter);
1472 		if (status)
1473 			return status;
1474 	}
1475 	/* Construct VLAN Table to give to HW */
1476 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1477 		vids[num++] = cpu_to_le16(i);
1478 
1479 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1480 	if (status) {
1481 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1482 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1483 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1484 		    addl_status(status) ==
1485 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1486 			return be_set_vlan_promisc(adapter);
1487 	}
1488 	return status;
1489 }
1490 
1491 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1492 {
1493 	struct be_adapter *adapter = netdev_priv(netdev);
1494 	int status = 0;
1495 
1496 	mutex_lock(&adapter->rx_filter_lock);
1497 
1498 	/* Packets with VID 0 are always received by Lancer by default */
1499 	if (lancer_chip(adapter) && vid == 0)
1500 		goto done;
1501 
1502 	if (test_bit(vid, adapter->vids))
1503 		goto done;
1504 
1505 	set_bit(vid, adapter->vids);
1506 	adapter->vlans_added++;
1507 
1508 	status = be_vid_config(adapter);
1509 done:
1510 	mutex_unlock(&adapter->rx_filter_lock);
1511 	return status;
1512 }
1513 
1514 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1515 {
1516 	struct be_adapter *adapter = netdev_priv(netdev);
1517 	int status = 0;
1518 
1519 	mutex_lock(&adapter->rx_filter_lock);
1520 
1521 	/* Packets with VID 0 are always received by Lancer by default */
1522 	if (lancer_chip(adapter) && vid == 0)
1523 		goto done;
1524 
1525 	if (!test_bit(vid, adapter->vids))
1526 		goto done;
1527 
1528 	clear_bit(vid, adapter->vids);
1529 	adapter->vlans_added--;
1530 
1531 	status = be_vid_config(adapter);
1532 done:
1533 	mutex_unlock(&adapter->rx_filter_lock);
1534 	return status;
1535 }
1536 
1537 static void be_set_all_promisc(struct be_adapter *adapter)
1538 {
1539 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1540 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1541 }
1542 
1543 static void be_set_mc_promisc(struct be_adapter *adapter)
1544 {
1545 	int status;
1546 
1547 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1548 		return;
1549 
1550 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1551 	if (!status)
1552 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1553 }
1554 
1555 static void be_set_uc_promisc(struct be_adapter *adapter)
1556 {
1557 	int status;
1558 
1559 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1560 		return;
1561 
1562 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1563 	if (!status)
1564 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1565 }
1566 
1567 static void be_clear_uc_promisc(struct be_adapter *adapter)
1568 {
1569 	int status;
1570 
1571 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1572 		return;
1573 
1574 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1575 	if (!status)
1576 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1577 }
1578 
1579 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1580  * We use a single callback function for both sync and unsync. We really don't
1581  * add/remove addresses through this callback. But, we use it to detect changes
1582  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1583  */
1584 static int be_uc_list_update(struct net_device *netdev,
1585 			     const unsigned char *addr)
1586 {
1587 	struct be_adapter *adapter = netdev_priv(netdev);
1588 
1589 	adapter->update_uc_list = true;
1590 	return 0;
1591 }
1592 
1593 static int be_mc_list_update(struct net_device *netdev,
1594 			     const unsigned char *addr)
1595 {
1596 	struct be_adapter *adapter = netdev_priv(netdev);
1597 
1598 	adapter->update_mc_list = true;
1599 	return 0;
1600 }
1601 
1602 static void be_set_mc_list(struct be_adapter *adapter)
1603 {
1604 	struct net_device *netdev = adapter->netdev;
1605 	struct netdev_hw_addr *ha;
1606 	bool mc_promisc = false;
1607 	int status;
1608 
1609 	netif_addr_lock_bh(netdev);
1610 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1611 
1612 	if (netdev->flags & IFF_PROMISC) {
1613 		adapter->update_mc_list = false;
1614 	} else if (netdev->flags & IFF_ALLMULTI ||
1615 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1616 		/* Enable multicast promisc if num configured exceeds
1617 		 * what we support
1618 		 */
1619 		mc_promisc = true;
1620 		adapter->update_mc_list = false;
1621 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1622 		/* Update mc-list unconditionally if the iface was previously
1623 		 * in mc-promisc mode and now is out of that mode.
1624 		 */
1625 		adapter->update_mc_list = true;
1626 	}
1627 
1628 	if (adapter->update_mc_list) {
1629 		int i = 0;
1630 
1631 		/* cache the mc-list in adapter */
1632 		netdev_for_each_mc_addr(ha, netdev) {
1633 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1634 			i++;
1635 		}
1636 		adapter->mc_count = netdev_mc_count(netdev);
1637 	}
1638 	netif_addr_unlock_bh(netdev);
1639 
1640 	if (mc_promisc) {
1641 		be_set_mc_promisc(adapter);
1642 	} else if (adapter->update_mc_list) {
1643 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1644 		if (!status)
1645 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1646 		else
1647 			be_set_mc_promisc(adapter);
1648 
1649 		adapter->update_mc_list = false;
1650 	}
1651 }
1652 
1653 static void be_clear_mc_list(struct be_adapter *adapter)
1654 {
1655 	struct net_device *netdev = adapter->netdev;
1656 
1657 	__dev_mc_unsync(netdev, NULL);
1658 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1659 	adapter->mc_count = 0;
1660 }
1661 
1662 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1663 {
1664 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1665 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1666 		return 0;
1667 	}
1668 
1669 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1670 			       adapter->if_handle,
1671 			       &adapter->pmac_id[uc_idx + 1], 0);
1672 }
1673 
1674 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1675 {
1676 	if (pmac_id == adapter->pmac_id[0])
1677 		return;
1678 
1679 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1680 }
1681 
1682 static void be_set_uc_list(struct be_adapter *adapter)
1683 {
1684 	struct net_device *netdev = adapter->netdev;
1685 	struct netdev_hw_addr *ha;
1686 	bool uc_promisc = false;
1687 	int curr_uc_macs = 0, i;
1688 
1689 	netif_addr_lock_bh(netdev);
1690 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1691 
1692 	if (netdev->flags & IFF_PROMISC) {
1693 		adapter->update_uc_list = false;
1694 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1695 		uc_promisc = true;
1696 		adapter->update_uc_list = false;
1697 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1698 		/* Update uc-list unconditionally if the iface was previously
1699 		 * in uc-promisc mode and now is out of that mode.
1700 		 */
1701 		adapter->update_uc_list = true;
1702 	}
1703 
1704 	if (adapter->update_uc_list) {
1705 		/* cache the uc-list in adapter array */
1706 		i = 0;
1707 		netdev_for_each_uc_addr(ha, netdev) {
1708 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1709 			i++;
1710 		}
1711 		curr_uc_macs = netdev_uc_count(netdev);
1712 	}
1713 	netif_addr_unlock_bh(netdev);
1714 
1715 	if (uc_promisc) {
1716 		be_set_uc_promisc(adapter);
1717 	} else if (adapter->update_uc_list) {
1718 		be_clear_uc_promisc(adapter);
1719 
1720 		for (i = 0; i < adapter->uc_macs; i++)
1721 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1722 
1723 		for (i = 0; i < curr_uc_macs; i++)
1724 			be_uc_mac_add(adapter, i);
1725 		adapter->uc_macs = curr_uc_macs;
1726 		adapter->update_uc_list = false;
1727 	}
1728 }
1729 
1730 static void be_clear_uc_list(struct be_adapter *adapter)
1731 {
1732 	struct net_device *netdev = adapter->netdev;
1733 	int i;
1734 
1735 	__dev_uc_unsync(netdev, NULL);
1736 	for (i = 0; i < adapter->uc_macs; i++)
1737 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1738 
1739 	adapter->uc_macs = 0;
1740 }
1741 
1742 static void __be_set_rx_mode(struct be_adapter *adapter)
1743 {
1744 	struct net_device *netdev = adapter->netdev;
1745 
1746 	mutex_lock(&adapter->rx_filter_lock);
1747 
1748 	if (netdev->flags & IFF_PROMISC) {
1749 		if (!be_in_all_promisc(adapter))
1750 			be_set_all_promisc(adapter);
1751 	} else if (be_in_all_promisc(adapter)) {
1752 		/* We need to re-program the vlan-list or clear
1753 		 * vlan-promisc mode (if needed) when the interface
1754 		 * comes out of promisc mode.
1755 		 */
1756 		be_vid_config(adapter);
1757 	}
1758 
1759 	be_set_uc_list(adapter);
1760 	be_set_mc_list(adapter);
1761 
1762 	mutex_unlock(&adapter->rx_filter_lock);
1763 }
1764 
1765 static void be_work_set_rx_mode(struct work_struct *work)
1766 {
1767 	struct be_cmd_work *cmd_work =
1768 				container_of(work, struct be_cmd_work, work);
1769 
1770 	__be_set_rx_mode(cmd_work->adapter);
1771 	kfree(cmd_work);
1772 }
1773 
1774 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1775 {
1776 	struct be_adapter *adapter = netdev_priv(netdev);
1777 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1778 	int status;
1779 
1780 	if (!sriov_enabled(adapter))
1781 		return -EPERM;
1782 
1783 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1784 		return -EINVAL;
1785 
1786 	/* Proceed further only if user provided MAC is different
1787 	 * from active MAC
1788 	 */
1789 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1790 		return 0;
1791 
1792 	if (BEx_chip(adapter)) {
1793 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1794 				vf + 1);
1795 
1796 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1797 					 &vf_cfg->pmac_id, vf + 1);
1798 	} else {
1799 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1800 					vf + 1);
1801 	}
1802 
1803 	if (status) {
1804 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1805 			mac, vf, status);
1806 		return be_cmd_status(status);
1807 	}
1808 
1809 	ether_addr_copy(vf_cfg->mac_addr, mac);
1810 
1811 	return 0;
1812 }
1813 
1814 static int be_get_vf_config(struct net_device *netdev, int vf,
1815 			    struct ifla_vf_info *vi)
1816 {
1817 	struct be_adapter *adapter = netdev_priv(netdev);
1818 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1819 
1820 	if (!sriov_enabled(adapter))
1821 		return -EPERM;
1822 
1823 	if (vf >= adapter->num_vfs)
1824 		return -EINVAL;
1825 
1826 	vi->vf = vf;
1827 	vi->max_tx_rate = vf_cfg->tx_rate;
1828 	vi->min_tx_rate = 0;
1829 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1830 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1831 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1832 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1833 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1834 
1835 	return 0;
1836 }
1837 
1838 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1839 {
1840 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1841 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1842 	int vf_if_id = vf_cfg->if_handle;
1843 	int status;
1844 
1845 	/* Enable Transparent VLAN Tagging */
1846 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1847 	if (status)
1848 		return status;
1849 
1850 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1851 	vids[0] = 0;
1852 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1853 	if (!status)
1854 		dev_info(&adapter->pdev->dev,
1855 			 "Cleared guest VLANs on VF%d", vf);
1856 
1857 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1858 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1859 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1860 						  ~BE_PRIV_FILTMGMT, vf + 1);
1861 		if (!status)
1862 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1863 	}
1864 	return 0;
1865 }
1866 
1867 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1868 {
1869 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1870 	struct device *dev = &adapter->pdev->dev;
1871 	int status;
1872 
1873 	/* Reset Transparent VLAN Tagging. */
1874 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1875 				       vf_cfg->if_handle, 0, 0);
1876 	if (status)
1877 		return status;
1878 
1879 	/* Allow VFs to program VLAN filtering */
1880 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1881 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1882 						  BE_PRIV_FILTMGMT, vf + 1);
1883 		if (!status) {
1884 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1885 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1886 		}
1887 	}
1888 
1889 	dev_info(dev,
1890 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1891 	return 0;
1892 }
1893 
1894 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1895 			  __be16 vlan_proto)
1896 {
1897 	struct be_adapter *adapter = netdev_priv(netdev);
1898 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1899 	int status;
1900 
1901 	if (!sriov_enabled(adapter))
1902 		return -EPERM;
1903 
1904 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1905 		return -EINVAL;
1906 
1907 	if (vlan_proto != htons(ETH_P_8021Q))
1908 		return -EPROTONOSUPPORT;
1909 
1910 	if (vlan || qos) {
1911 		vlan |= qos << VLAN_PRIO_SHIFT;
1912 		status = be_set_vf_tvt(adapter, vf, vlan);
1913 	} else {
1914 		status = be_clear_vf_tvt(adapter, vf);
1915 	}
1916 
1917 	if (status) {
1918 		dev_err(&adapter->pdev->dev,
1919 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1920 			status);
1921 		return be_cmd_status(status);
1922 	}
1923 
1924 	vf_cfg->vlan_tag = vlan;
1925 	return 0;
1926 }
1927 
1928 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1929 			     int min_tx_rate, int max_tx_rate)
1930 {
1931 	struct be_adapter *adapter = netdev_priv(netdev);
1932 	struct device *dev = &adapter->pdev->dev;
1933 	int percent_rate, status = 0;
1934 	u16 link_speed = 0;
1935 	u8 link_status;
1936 
1937 	if (!sriov_enabled(adapter))
1938 		return -EPERM;
1939 
1940 	if (vf >= adapter->num_vfs)
1941 		return -EINVAL;
1942 
1943 	if (min_tx_rate)
1944 		return -EINVAL;
1945 
1946 	if (!max_tx_rate)
1947 		goto config_qos;
1948 
1949 	status = be_cmd_link_status_query(adapter, &link_speed,
1950 					  &link_status, 0);
1951 	if (status)
1952 		goto err;
1953 
1954 	if (!link_status) {
1955 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1956 		status = -ENETDOWN;
1957 		goto err;
1958 	}
1959 
1960 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1961 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1962 			link_speed);
1963 		status = -EINVAL;
1964 		goto err;
1965 	}
1966 
1967 	/* On Skyhawk the QOS setting must be done only as a % value */
1968 	percent_rate = link_speed / 100;
1969 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1970 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1971 			percent_rate);
1972 		status = -EINVAL;
1973 		goto err;
1974 	}
1975 
1976 config_qos:
1977 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1978 	if (status)
1979 		goto err;
1980 
1981 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1982 	return 0;
1983 
1984 err:
1985 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1986 		max_tx_rate, vf);
1987 	return be_cmd_status(status);
1988 }
1989 
1990 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1991 				int link_state)
1992 {
1993 	struct be_adapter *adapter = netdev_priv(netdev);
1994 	int status;
1995 
1996 	if (!sriov_enabled(adapter))
1997 		return -EPERM;
1998 
1999 	if (vf >= adapter->num_vfs)
2000 		return -EINVAL;
2001 
2002 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2003 	if (status) {
2004 		dev_err(&adapter->pdev->dev,
2005 			"Link state change on VF %d failed: %#x\n", vf, status);
2006 		return be_cmd_status(status);
2007 	}
2008 
2009 	adapter->vf_cfg[vf].plink_tracking = link_state;
2010 
2011 	return 0;
2012 }
2013 
2014 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2015 {
2016 	struct be_adapter *adapter = netdev_priv(netdev);
2017 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2018 	u8 spoofchk;
2019 	int status;
2020 
2021 	if (!sriov_enabled(adapter))
2022 		return -EPERM;
2023 
2024 	if (vf >= adapter->num_vfs)
2025 		return -EINVAL;
2026 
2027 	if (BEx_chip(adapter))
2028 		return -EOPNOTSUPP;
2029 
2030 	if (enable == vf_cfg->spoofchk)
2031 		return 0;
2032 
2033 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2034 
2035 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2036 				       0, spoofchk);
2037 	if (status) {
2038 		dev_err(&adapter->pdev->dev,
2039 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2040 		return be_cmd_status(status);
2041 	}
2042 
2043 	vf_cfg->spoofchk = enable;
2044 	return 0;
2045 }
2046 
2047 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2048 			  ulong now)
2049 {
2050 	aic->rx_pkts_prev = rx_pkts;
2051 	aic->tx_reqs_prev = tx_pkts;
2052 	aic->jiffies = now;
2053 }
2054 
2055 static int be_get_new_eqd(struct be_eq_obj *eqo)
2056 {
2057 	struct be_adapter *adapter = eqo->adapter;
2058 	int eqd, start;
2059 	struct be_aic_obj *aic;
2060 	struct be_rx_obj *rxo;
2061 	struct be_tx_obj *txo;
2062 	u64 rx_pkts = 0, tx_pkts = 0;
2063 	ulong now;
2064 	u32 pps, delta;
2065 	int i;
2066 
2067 	aic = &adapter->aic_obj[eqo->idx];
2068 	if (!aic->enable) {
2069 		if (aic->jiffies)
2070 			aic->jiffies = 0;
2071 		eqd = aic->et_eqd;
2072 		return eqd;
2073 	}
2074 
2075 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2076 		do {
2077 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2078 			rx_pkts += rxo->stats.rx_pkts;
2079 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2080 	}
2081 
2082 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2083 		do {
2084 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2085 			tx_pkts += txo->stats.tx_reqs;
2086 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2087 	}
2088 
2089 	/* Skip, if wrapped around or first calculation */
2090 	now = jiffies;
2091 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2092 	    rx_pkts < aic->rx_pkts_prev ||
2093 	    tx_pkts < aic->tx_reqs_prev) {
2094 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2095 		return aic->prev_eqd;
2096 	}
2097 
2098 	delta = jiffies_to_msecs(now - aic->jiffies);
2099 	if (delta == 0)
2100 		return aic->prev_eqd;
2101 
2102 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2103 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2104 	eqd = (pps / 15000) << 2;
2105 
2106 	if (eqd < 8)
2107 		eqd = 0;
2108 	eqd = min_t(u32, eqd, aic->max_eqd);
2109 	eqd = max_t(u32, eqd, aic->min_eqd);
2110 
2111 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2112 
2113 	return eqd;
2114 }
2115 
2116 /* For Skyhawk-R only */
2117 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2118 {
2119 	struct be_adapter *adapter = eqo->adapter;
2120 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2121 	ulong now = jiffies;
2122 	int eqd;
2123 	u32 mult_enc;
2124 
2125 	if (!aic->enable)
2126 		return 0;
2127 
2128 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2129 		eqd = aic->prev_eqd;
2130 	else
2131 		eqd = be_get_new_eqd(eqo);
2132 
2133 	if (eqd > 100)
2134 		mult_enc = R2I_DLY_ENC_1;
2135 	else if (eqd > 60)
2136 		mult_enc = R2I_DLY_ENC_2;
2137 	else if (eqd > 20)
2138 		mult_enc = R2I_DLY_ENC_3;
2139 	else
2140 		mult_enc = R2I_DLY_ENC_0;
2141 
2142 	aic->prev_eqd = eqd;
2143 
2144 	return mult_enc;
2145 }
2146 
2147 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2148 {
2149 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2150 	struct be_aic_obj *aic;
2151 	struct be_eq_obj *eqo;
2152 	int i, num = 0, eqd;
2153 
2154 	for_all_evt_queues(adapter, eqo, i) {
2155 		aic = &adapter->aic_obj[eqo->idx];
2156 		eqd = be_get_new_eqd(eqo);
2157 		if (force_update || eqd != aic->prev_eqd) {
2158 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2159 			set_eqd[num].eq_id = eqo->q.id;
2160 			aic->prev_eqd = eqd;
2161 			num++;
2162 		}
2163 	}
2164 
2165 	if (num)
2166 		be_cmd_modify_eqd(adapter, set_eqd, num);
2167 }
2168 
2169 static void be_rx_stats_update(struct be_rx_obj *rxo,
2170 			       struct be_rx_compl_info *rxcp)
2171 {
2172 	struct be_rx_stats *stats = rx_stats(rxo);
2173 
2174 	u64_stats_update_begin(&stats->sync);
2175 	stats->rx_compl++;
2176 	stats->rx_bytes += rxcp->pkt_size;
2177 	stats->rx_pkts++;
2178 	if (rxcp->tunneled)
2179 		stats->rx_vxlan_offload_pkts++;
2180 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2181 		stats->rx_mcast_pkts++;
2182 	if (rxcp->err)
2183 		stats->rx_compl_err++;
2184 	u64_stats_update_end(&stats->sync);
2185 }
2186 
2187 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2188 {
2189 	/* L4 checksum is not reliable for non TCP/UDP packets.
2190 	 * Also ignore ipcksm for ipv6 pkts
2191 	 */
2192 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2193 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2194 }
2195 
2196 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2197 {
2198 	struct be_adapter *adapter = rxo->adapter;
2199 	struct be_rx_page_info *rx_page_info;
2200 	struct be_queue_info *rxq = &rxo->q;
2201 	u32 frag_idx = rxq->tail;
2202 
2203 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2204 	BUG_ON(!rx_page_info->page);
2205 
2206 	if (rx_page_info->last_frag) {
2207 		dma_unmap_page(&adapter->pdev->dev,
2208 			       dma_unmap_addr(rx_page_info, bus),
2209 			       adapter->big_page_size, DMA_FROM_DEVICE);
2210 		rx_page_info->last_frag = false;
2211 	} else {
2212 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2213 					dma_unmap_addr(rx_page_info, bus),
2214 					rx_frag_size, DMA_FROM_DEVICE);
2215 	}
2216 
2217 	queue_tail_inc(rxq);
2218 	atomic_dec(&rxq->used);
2219 	return rx_page_info;
2220 }
2221 
2222 /* Throwaway the data in the Rx completion */
2223 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2224 				struct be_rx_compl_info *rxcp)
2225 {
2226 	struct be_rx_page_info *page_info;
2227 	u16 i, num_rcvd = rxcp->num_rcvd;
2228 
2229 	for (i = 0; i < num_rcvd; i++) {
2230 		page_info = get_rx_page_info(rxo);
2231 		put_page(page_info->page);
2232 		memset(page_info, 0, sizeof(*page_info));
2233 	}
2234 }
2235 
2236 /*
2237  * skb_fill_rx_data forms a complete skb for an ether frame
2238  * indicated by rxcp.
2239  */
2240 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2241 			     struct be_rx_compl_info *rxcp)
2242 {
2243 	struct be_rx_page_info *page_info;
2244 	u16 i, j;
2245 	u16 hdr_len, curr_frag_len, remaining;
2246 	u8 *start;
2247 
2248 	page_info = get_rx_page_info(rxo);
2249 	start = page_address(page_info->page) + page_info->page_offset;
2250 	prefetch(start);
2251 
2252 	/* Copy data in the first descriptor of this completion */
2253 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2254 
2255 	skb->len = curr_frag_len;
2256 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2257 		memcpy(skb->data, start, curr_frag_len);
2258 		/* Complete packet has now been moved to data */
2259 		put_page(page_info->page);
2260 		skb->data_len = 0;
2261 		skb->tail += curr_frag_len;
2262 	} else {
2263 		hdr_len = ETH_HLEN;
2264 		memcpy(skb->data, start, hdr_len);
2265 		skb_shinfo(skb)->nr_frags = 1;
2266 		skb_frag_set_page(skb, 0, page_info->page);
2267 		skb_shinfo(skb)->frags[0].page_offset =
2268 					page_info->page_offset + hdr_len;
2269 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2270 				  curr_frag_len - hdr_len);
2271 		skb->data_len = curr_frag_len - hdr_len;
2272 		skb->truesize += rx_frag_size;
2273 		skb->tail += hdr_len;
2274 	}
2275 	page_info->page = NULL;
2276 
2277 	if (rxcp->pkt_size <= rx_frag_size) {
2278 		BUG_ON(rxcp->num_rcvd != 1);
2279 		return;
2280 	}
2281 
2282 	/* More frags present for this completion */
2283 	remaining = rxcp->pkt_size - curr_frag_len;
2284 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2285 		page_info = get_rx_page_info(rxo);
2286 		curr_frag_len = min(remaining, rx_frag_size);
2287 
2288 		/* Coalesce all frags from the same physical page in one slot */
2289 		if (page_info->page_offset == 0) {
2290 			/* Fresh page */
2291 			j++;
2292 			skb_frag_set_page(skb, j, page_info->page);
2293 			skb_shinfo(skb)->frags[j].page_offset =
2294 							page_info->page_offset;
2295 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2296 			skb_shinfo(skb)->nr_frags++;
2297 		} else {
2298 			put_page(page_info->page);
2299 		}
2300 
2301 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2302 		skb->len += curr_frag_len;
2303 		skb->data_len += curr_frag_len;
2304 		skb->truesize += rx_frag_size;
2305 		remaining -= curr_frag_len;
2306 		page_info->page = NULL;
2307 	}
2308 	BUG_ON(j > MAX_SKB_FRAGS);
2309 }
2310 
2311 /* Process the RX completion indicated by rxcp when GRO is disabled */
2312 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2313 				struct be_rx_compl_info *rxcp)
2314 {
2315 	struct be_adapter *adapter = rxo->adapter;
2316 	struct net_device *netdev = adapter->netdev;
2317 	struct sk_buff *skb;
2318 
2319 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2320 	if (unlikely(!skb)) {
2321 		rx_stats(rxo)->rx_drops_no_skbs++;
2322 		be_rx_compl_discard(rxo, rxcp);
2323 		return;
2324 	}
2325 
2326 	skb_fill_rx_data(rxo, skb, rxcp);
2327 
2328 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2329 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2330 	else
2331 		skb_checksum_none_assert(skb);
2332 
2333 	skb->protocol = eth_type_trans(skb, netdev);
2334 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2335 	if (netdev->features & NETIF_F_RXHASH)
2336 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2337 
2338 	skb->csum_level = rxcp->tunneled;
2339 	skb_mark_napi_id(skb, napi);
2340 
2341 	if (rxcp->vlanf)
2342 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2343 
2344 	netif_receive_skb(skb);
2345 }
2346 
2347 /* Process the RX completion indicated by rxcp when GRO is enabled */
2348 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2349 				    struct napi_struct *napi,
2350 				    struct be_rx_compl_info *rxcp)
2351 {
2352 	struct be_adapter *adapter = rxo->adapter;
2353 	struct be_rx_page_info *page_info;
2354 	struct sk_buff *skb = NULL;
2355 	u16 remaining, curr_frag_len;
2356 	u16 i, j;
2357 
2358 	skb = napi_get_frags(napi);
2359 	if (!skb) {
2360 		be_rx_compl_discard(rxo, rxcp);
2361 		return;
2362 	}
2363 
2364 	remaining = rxcp->pkt_size;
2365 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2366 		page_info = get_rx_page_info(rxo);
2367 
2368 		curr_frag_len = min(remaining, rx_frag_size);
2369 
2370 		/* Coalesce all frags from the same physical page in one slot */
2371 		if (i == 0 || page_info->page_offset == 0) {
2372 			/* First frag or Fresh page */
2373 			j++;
2374 			skb_frag_set_page(skb, j, page_info->page);
2375 			skb_shinfo(skb)->frags[j].page_offset =
2376 							page_info->page_offset;
2377 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378 		} else {
2379 			put_page(page_info->page);
2380 		}
2381 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2382 		skb->truesize += rx_frag_size;
2383 		remaining -= curr_frag_len;
2384 		memset(page_info, 0, sizeof(*page_info));
2385 	}
2386 	BUG_ON(j > MAX_SKB_FRAGS);
2387 
2388 	skb_shinfo(skb)->nr_frags = j + 1;
2389 	skb->len = rxcp->pkt_size;
2390 	skb->data_len = rxcp->pkt_size;
2391 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2392 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2393 	if (adapter->netdev->features & NETIF_F_RXHASH)
2394 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2395 
2396 	skb->csum_level = rxcp->tunneled;
2397 
2398 	if (rxcp->vlanf)
2399 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2400 
2401 	napi_gro_frags(napi);
2402 }
2403 
2404 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2405 				 struct be_rx_compl_info *rxcp)
2406 {
2407 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2408 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2409 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2410 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2411 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2412 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2413 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2414 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2415 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2416 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2417 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2418 	if (rxcp->vlanf) {
2419 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2420 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2421 	}
2422 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2423 	rxcp->tunneled =
2424 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2425 }
2426 
2427 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2428 				 struct be_rx_compl_info *rxcp)
2429 {
2430 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2431 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2432 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2433 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2434 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2435 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2436 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2437 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2438 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2439 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2440 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2441 	if (rxcp->vlanf) {
2442 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2443 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2444 	}
2445 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2446 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2447 }
2448 
2449 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2450 {
2451 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2452 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2453 	struct be_adapter *adapter = rxo->adapter;
2454 
2455 	/* For checking the valid bit it is Ok to use either definition as the
2456 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2457 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2458 		return NULL;
2459 
2460 	rmb();
2461 	be_dws_le_to_cpu(compl, sizeof(*compl));
2462 
2463 	if (adapter->be3_native)
2464 		be_parse_rx_compl_v1(compl, rxcp);
2465 	else
2466 		be_parse_rx_compl_v0(compl, rxcp);
2467 
2468 	if (rxcp->ip_frag)
2469 		rxcp->l4_csum = 0;
2470 
2471 	if (rxcp->vlanf) {
2472 		/* In QNQ modes, if qnq bit is not set, then the packet was
2473 		 * tagged only with the transparent outer vlan-tag and must
2474 		 * not be treated as a vlan packet by host
2475 		 */
2476 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2477 			rxcp->vlanf = 0;
2478 
2479 		if (!lancer_chip(adapter))
2480 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2481 
2482 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2483 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2484 			rxcp->vlanf = 0;
2485 	}
2486 
2487 	/* As the compl has been parsed, reset it; we wont touch it again */
2488 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2489 
2490 	queue_tail_inc(&rxo->cq);
2491 	return rxcp;
2492 }
2493 
2494 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2495 {
2496 	u32 order = get_order(size);
2497 
2498 	if (order > 0)
2499 		gfp |= __GFP_COMP;
2500 	return  alloc_pages(gfp, order);
2501 }
2502 
2503 /*
2504  * Allocate a page, split it to fragments of size rx_frag_size and post as
2505  * receive buffers to BE
2506  */
2507 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2508 {
2509 	struct be_adapter *adapter = rxo->adapter;
2510 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2511 	struct be_queue_info *rxq = &rxo->q;
2512 	struct page *pagep = NULL;
2513 	struct device *dev = &adapter->pdev->dev;
2514 	struct be_eth_rx_d *rxd;
2515 	u64 page_dmaaddr = 0, frag_dmaaddr;
2516 	u32 posted, page_offset = 0, notify = 0;
2517 
2518 	page_info = &rxo->page_info_tbl[rxq->head];
2519 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2520 		if (!pagep) {
2521 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2522 			if (unlikely(!pagep)) {
2523 				rx_stats(rxo)->rx_post_fail++;
2524 				break;
2525 			}
2526 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2527 						    adapter->big_page_size,
2528 						    DMA_FROM_DEVICE);
2529 			if (dma_mapping_error(dev, page_dmaaddr)) {
2530 				put_page(pagep);
2531 				pagep = NULL;
2532 				adapter->drv_stats.dma_map_errors++;
2533 				break;
2534 			}
2535 			page_offset = 0;
2536 		} else {
2537 			get_page(pagep);
2538 			page_offset += rx_frag_size;
2539 		}
2540 		page_info->page_offset = page_offset;
2541 		page_info->page = pagep;
2542 
2543 		rxd = queue_head_node(rxq);
2544 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2545 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2546 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2547 
2548 		/* Any space left in the current big page for another frag? */
2549 		if ((page_offset + rx_frag_size + rx_frag_size) >
2550 					adapter->big_page_size) {
2551 			pagep = NULL;
2552 			page_info->last_frag = true;
2553 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2554 		} else {
2555 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2556 		}
2557 
2558 		prev_page_info = page_info;
2559 		queue_head_inc(rxq);
2560 		page_info = &rxo->page_info_tbl[rxq->head];
2561 	}
2562 
2563 	/* Mark the last frag of a page when we break out of the above loop
2564 	 * with no more slots available in the RXQ
2565 	 */
2566 	if (pagep) {
2567 		prev_page_info->last_frag = true;
2568 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2569 	}
2570 
2571 	if (posted) {
2572 		atomic_add(posted, &rxq->used);
2573 		if (rxo->rx_post_starved)
2574 			rxo->rx_post_starved = false;
2575 		do {
2576 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2577 			be_rxq_notify(adapter, rxq->id, notify);
2578 			posted -= notify;
2579 		} while (posted);
2580 	} else if (atomic_read(&rxq->used) == 0) {
2581 		/* Let be_worker replenish when memory is available */
2582 		rxo->rx_post_starved = true;
2583 	}
2584 }
2585 
2586 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2587 {
2588 	switch (status) {
2589 	case BE_TX_COMP_HDR_PARSE_ERR:
2590 		tx_stats(txo)->tx_hdr_parse_err++;
2591 		break;
2592 	case BE_TX_COMP_NDMA_ERR:
2593 		tx_stats(txo)->tx_dma_err++;
2594 		break;
2595 	case BE_TX_COMP_ACL_ERR:
2596 		tx_stats(txo)->tx_spoof_check_err++;
2597 		break;
2598 	}
2599 }
2600 
2601 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2602 {
2603 	switch (status) {
2604 	case LANCER_TX_COMP_LSO_ERR:
2605 		tx_stats(txo)->tx_tso_err++;
2606 		break;
2607 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2608 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2609 		tx_stats(txo)->tx_spoof_check_err++;
2610 		break;
2611 	case LANCER_TX_COMP_QINQ_ERR:
2612 		tx_stats(txo)->tx_qinq_err++;
2613 		break;
2614 	case LANCER_TX_COMP_PARITY_ERR:
2615 		tx_stats(txo)->tx_internal_parity_err++;
2616 		break;
2617 	case LANCER_TX_COMP_DMA_ERR:
2618 		tx_stats(txo)->tx_dma_err++;
2619 		break;
2620 	case LANCER_TX_COMP_SGE_ERR:
2621 		tx_stats(txo)->tx_sge_err++;
2622 		break;
2623 	}
2624 }
2625 
2626 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2627 						struct be_tx_obj *txo)
2628 {
2629 	struct be_queue_info *tx_cq = &txo->cq;
2630 	struct be_tx_compl_info *txcp = &txo->txcp;
2631 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2632 
2633 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2634 		return NULL;
2635 
2636 	/* Ensure load ordering of valid bit dword and other dwords below */
2637 	rmb();
2638 	be_dws_le_to_cpu(compl, sizeof(*compl));
2639 
2640 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2641 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2642 
2643 	if (txcp->status) {
2644 		if (lancer_chip(adapter)) {
2645 			lancer_update_tx_err(txo, txcp->status);
2646 			/* Reset the adapter incase of TSO,
2647 			 * SGE or Parity error
2648 			 */
2649 			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2650 			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2651 			    txcp->status == LANCER_TX_COMP_SGE_ERR)
2652 				be_set_error(adapter, BE_ERROR_TX);
2653 		} else {
2654 			be_update_tx_err(txo, txcp->status);
2655 		}
2656 	}
2657 
2658 	if (be_check_error(adapter, BE_ERROR_TX))
2659 		return NULL;
2660 
2661 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2662 	queue_tail_inc(tx_cq);
2663 	return txcp;
2664 }
2665 
2666 static u16 be_tx_compl_process(struct be_adapter *adapter,
2667 			       struct be_tx_obj *txo, u16 last_index)
2668 {
2669 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2670 	struct be_queue_info *txq = &txo->q;
2671 	struct sk_buff *skb = NULL;
2672 	bool unmap_skb_hdr = false;
2673 	struct be_eth_wrb *wrb;
2674 	u16 num_wrbs = 0;
2675 	u32 frag_index;
2676 
2677 	do {
2678 		if (sent_skbs[txq->tail]) {
2679 			/* Free skb from prev req */
2680 			if (skb)
2681 				dev_consume_skb_any(skb);
2682 			skb = sent_skbs[txq->tail];
2683 			sent_skbs[txq->tail] = NULL;
2684 			queue_tail_inc(txq);  /* skip hdr wrb */
2685 			num_wrbs++;
2686 			unmap_skb_hdr = true;
2687 		}
2688 		wrb = queue_tail_node(txq);
2689 		frag_index = txq->tail;
2690 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2691 			      (unmap_skb_hdr && skb_headlen(skb)));
2692 		unmap_skb_hdr = false;
2693 		queue_tail_inc(txq);
2694 		num_wrbs++;
2695 	} while (frag_index != last_index);
2696 	dev_consume_skb_any(skb);
2697 
2698 	return num_wrbs;
2699 }
2700 
2701 /* Return the number of events in the event queue */
2702 static inline int events_get(struct be_eq_obj *eqo)
2703 {
2704 	struct be_eq_entry *eqe;
2705 	int num = 0;
2706 
2707 	do {
2708 		eqe = queue_tail_node(&eqo->q);
2709 		if (eqe->evt == 0)
2710 			break;
2711 
2712 		rmb();
2713 		eqe->evt = 0;
2714 		num++;
2715 		queue_tail_inc(&eqo->q);
2716 	} while (true);
2717 
2718 	return num;
2719 }
2720 
2721 /* Leaves the EQ is disarmed state */
2722 static void be_eq_clean(struct be_eq_obj *eqo)
2723 {
2724 	int num = events_get(eqo);
2725 
2726 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2727 }
2728 
2729 /* Free posted rx buffers that were not used */
2730 static void be_rxq_clean(struct be_rx_obj *rxo)
2731 {
2732 	struct be_queue_info *rxq = &rxo->q;
2733 	struct be_rx_page_info *page_info;
2734 
2735 	while (atomic_read(&rxq->used) > 0) {
2736 		page_info = get_rx_page_info(rxo);
2737 		put_page(page_info->page);
2738 		memset(page_info, 0, sizeof(*page_info));
2739 	}
2740 	BUG_ON(atomic_read(&rxq->used));
2741 	rxq->tail = 0;
2742 	rxq->head = 0;
2743 }
2744 
2745 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2746 {
2747 	struct be_queue_info *rx_cq = &rxo->cq;
2748 	struct be_rx_compl_info *rxcp;
2749 	struct be_adapter *adapter = rxo->adapter;
2750 	int flush_wait = 0;
2751 
2752 	/* Consume pending rx completions.
2753 	 * Wait for the flush completion (identified by zero num_rcvd)
2754 	 * to arrive. Notify CQ even when there are no more CQ entries
2755 	 * for HW to flush partially coalesced CQ entries.
2756 	 * In Lancer, there is no need to wait for flush compl.
2757 	 */
2758 	for (;;) {
2759 		rxcp = be_rx_compl_get(rxo);
2760 		if (!rxcp) {
2761 			if (lancer_chip(adapter))
2762 				break;
2763 
2764 			if (flush_wait++ > 50 ||
2765 			    be_check_error(adapter,
2766 					   BE_ERROR_HW)) {
2767 				dev_warn(&adapter->pdev->dev,
2768 					 "did not receive flush compl\n");
2769 				break;
2770 			}
2771 			be_cq_notify(adapter, rx_cq->id, true, 0);
2772 			mdelay(1);
2773 		} else {
2774 			be_rx_compl_discard(rxo, rxcp);
2775 			be_cq_notify(adapter, rx_cq->id, false, 1);
2776 			if (rxcp->num_rcvd == 0)
2777 				break;
2778 		}
2779 	}
2780 
2781 	/* After cleanup, leave the CQ in unarmed state */
2782 	be_cq_notify(adapter, rx_cq->id, false, 0);
2783 }
2784 
2785 static void be_tx_compl_clean(struct be_adapter *adapter)
2786 {
2787 	struct device *dev = &adapter->pdev->dev;
2788 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2789 	struct be_tx_compl_info *txcp;
2790 	struct be_queue_info *txq;
2791 	u32 end_idx, notified_idx;
2792 	struct be_tx_obj *txo;
2793 	int i, pending_txqs;
2794 
2795 	/* Stop polling for compls when HW has been silent for 10ms */
2796 	do {
2797 		pending_txqs = adapter->num_tx_qs;
2798 
2799 		for_all_tx_queues(adapter, txo, i) {
2800 			cmpl = 0;
2801 			num_wrbs = 0;
2802 			txq = &txo->q;
2803 			while ((txcp = be_tx_compl_get(adapter, txo))) {
2804 				num_wrbs +=
2805 					be_tx_compl_process(adapter, txo,
2806 							    txcp->end_index);
2807 				cmpl++;
2808 			}
2809 			if (cmpl) {
2810 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2811 				atomic_sub(num_wrbs, &txq->used);
2812 				timeo = 0;
2813 			}
2814 			if (!be_is_tx_compl_pending(txo))
2815 				pending_txqs--;
2816 		}
2817 
2818 		if (pending_txqs == 0 || ++timeo > 10 ||
2819 		    be_check_error(adapter, BE_ERROR_HW))
2820 			break;
2821 
2822 		mdelay(1);
2823 	} while (true);
2824 
2825 	/* Free enqueued TX that was never notified to HW */
2826 	for_all_tx_queues(adapter, txo, i) {
2827 		txq = &txo->q;
2828 
2829 		if (atomic_read(&txq->used)) {
2830 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2831 				 i, atomic_read(&txq->used));
2832 			notified_idx = txq->tail;
2833 			end_idx = txq->tail;
2834 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2835 				  txq->len);
2836 			/* Use the tx-compl process logic to handle requests
2837 			 * that were not sent to the HW.
2838 			 */
2839 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2840 			atomic_sub(num_wrbs, &txq->used);
2841 			BUG_ON(atomic_read(&txq->used));
2842 			txo->pend_wrb_cnt = 0;
2843 			/* Since hw was never notified of these requests,
2844 			 * reset TXQ indices
2845 			 */
2846 			txq->head = notified_idx;
2847 			txq->tail = notified_idx;
2848 		}
2849 	}
2850 }
2851 
2852 static void be_evt_queues_destroy(struct be_adapter *adapter)
2853 {
2854 	struct be_eq_obj *eqo;
2855 	int i;
2856 
2857 	for_all_evt_queues(adapter, eqo, i) {
2858 		if (eqo->q.created) {
2859 			be_eq_clean(eqo);
2860 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2861 			netif_napi_del(&eqo->napi);
2862 			free_cpumask_var(eqo->affinity_mask);
2863 		}
2864 		be_queue_free(adapter, &eqo->q);
2865 	}
2866 }
2867 
2868 static int be_evt_queues_create(struct be_adapter *adapter)
2869 {
2870 	struct be_queue_info *eq;
2871 	struct be_eq_obj *eqo;
2872 	struct be_aic_obj *aic;
2873 	int i, rc;
2874 
2875 	/* need enough EQs to service both RX and TX queues */
2876 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2877 				    max(adapter->cfg_num_rx_irqs,
2878 					adapter->cfg_num_tx_irqs));
2879 
2880 	for_all_evt_queues(adapter, eqo, i) {
2881 		int numa_node = dev_to_node(&adapter->pdev->dev);
2882 
2883 		aic = &adapter->aic_obj[i];
2884 		eqo->adapter = adapter;
2885 		eqo->idx = i;
2886 		aic->max_eqd = BE_MAX_EQD;
2887 		aic->enable = true;
2888 
2889 		eq = &eqo->q;
2890 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2891 				    sizeof(struct be_eq_entry));
2892 		if (rc)
2893 			return rc;
2894 
2895 		rc = be_cmd_eq_create(adapter, eqo);
2896 		if (rc)
2897 			return rc;
2898 
2899 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2900 			return -ENOMEM;
2901 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2902 				eqo->affinity_mask);
2903 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2904 			       BE_NAPI_WEIGHT);
2905 	}
2906 	return 0;
2907 }
2908 
2909 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2910 {
2911 	struct be_queue_info *q;
2912 
2913 	q = &adapter->mcc_obj.q;
2914 	if (q->created)
2915 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2916 	be_queue_free(adapter, q);
2917 
2918 	q = &adapter->mcc_obj.cq;
2919 	if (q->created)
2920 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2921 	be_queue_free(adapter, q);
2922 }
2923 
2924 /* Must be called only after TX qs are created as MCC shares TX EQ */
2925 static int be_mcc_queues_create(struct be_adapter *adapter)
2926 {
2927 	struct be_queue_info *q, *cq;
2928 
2929 	cq = &adapter->mcc_obj.cq;
2930 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2931 			   sizeof(struct be_mcc_compl)))
2932 		goto err;
2933 
2934 	/* Use the default EQ for MCC completions */
2935 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2936 		goto mcc_cq_free;
2937 
2938 	q = &adapter->mcc_obj.q;
2939 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2940 		goto mcc_cq_destroy;
2941 
2942 	if (be_cmd_mccq_create(adapter, q, cq))
2943 		goto mcc_q_free;
2944 
2945 	return 0;
2946 
2947 mcc_q_free:
2948 	be_queue_free(adapter, q);
2949 mcc_cq_destroy:
2950 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2951 mcc_cq_free:
2952 	be_queue_free(adapter, cq);
2953 err:
2954 	return -1;
2955 }
2956 
2957 static void be_tx_queues_destroy(struct be_adapter *adapter)
2958 {
2959 	struct be_queue_info *q;
2960 	struct be_tx_obj *txo;
2961 	u8 i;
2962 
2963 	for_all_tx_queues(adapter, txo, i) {
2964 		q = &txo->q;
2965 		if (q->created)
2966 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2967 		be_queue_free(adapter, q);
2968 
2969 		q = &txo->cq;
2970 		if (q->created)
2971 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2972 		be_queue_free(adapter, q);
2973 	}
2974 }
2975 
2976 static int be_tx_qs_create(struct be_adapter *adapter)
2977 {
2978 	struct be_queue_info *cq;
2979 	struct be_tx_obj *txo;
2980 	struct be_eq_obj *eqo;
2981 	int status, i;
2982 
2983 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2984 
2985 	for_all_tx_queues(adapter, txo, i) {
2986 		cq = &txo->cq;
2987 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2988 					sizeof(struct be_eth_tx_compl));
2989 		if (status)
2990 			return status;
2991 
2992 		u64_stats_init(&txo->stats.sync);
2993 		u64_stats_init(&txo->stats.sync_compl);
2994 
2995 		/* If num_evt_qs is less than num_tx_qs, then more than
2996 		 * one txq share an eq
2997 		 */
2998 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2999 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3000 		if (status)
3001 			return status;
3002 
3003 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3004 					sizeof(struct be_eth_wrb));
3005 		if (status)
3006 			return status;
3007 
3008 		status = be_cmd_txq_create(adapter, txo);
3009 		if (status)
3010 			return status;
3011 
3012 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3013 				    eqo->idx);
3014 	}
3015 
3016 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3017 		 adapter->num_tx_qs);
3018 	return 0;
3019 }
3020 
3021 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3022 {
3023 	struct be_queue_info *q;
3024 	struct be_rx_obj *rxo;
3025 	int i;
3026 
3027 	for_all_rx_queues(adapter, rxo, i) {
3028 		q = &rxo->cq;
3029 		if (q->created)
3030 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3031 		be_queue_free(adapter, q);
3032 	}
3033 }
3034 
3035 static int be_rx_cqs_create(struct be_adapter *adapter)
3036 {
3037 	struct be_queue_info *eq, *cq;
3038 	struct be_rx_obj *rxo;
3039 	int rc, i;
3040 
3041 	adapter->num_rss_qs =
3042 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3043 
3044 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3045 	if (adapter->num_rss_qs < 2)
3046 		adapter->num_rss_qs = 0;
3047 
3048 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3049 
3050 	/* When the interface is not capable of RSS rings (and there is no
3051 	 * need to create a default RXQ) we'll still need one RXQ
3052 	 */
3053 	if (adapter->num_rx_qs == 0)
3054 		adapter->num_rx_qs = 1;
3055 
3056 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3057 	for_all_rx_queues(adapter, rxo, i) {
3058 		rxo->adapter = adapter;
3059 		cq = &rxo->cq;
3060 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3061 				    sizeof(struct be_eth_rx_compl));
3062 		if (rc)
3063 			return rc;
3064 
3065 		u64_stats_init(&rxo->stats.sync);
3066 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3067 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3068 		if (rc)
3069 			return rc;
3070 	}
3071 
3072 	dev_info(&adapter->pdev->dev,
3073 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3074 	return 0;
3075 }
3076 
3077 static irqreturn_t be_intx(int irq, void *dev)
3078 {
3079 	struct be_eq_obj *eqo = dev;
3080 	struct be_adapter *adapter = eqo->adapter;
3081 	int num_evts = 0;
3082 
3083 	/* IRQ is not expected when NAPI is scheduled as the EQ
3084 	 * will not be armed.
3085 	 * But, this can happen on Lancer INTx where it takes
3086 	 * a while to de-assert INTx or in BE2 where occasionaly
3087 	 * an interrupt may be raised even when EQ is unarmed.
3088 	 * If NAPI is already scheduled, then counting & notifying
3089 	 * events will orphan them.
3090 	 */
3091 	if (napi_schedule_prep(&eqo->napi)) {
3092 		num_evts = events_get(eqo);
3093 		__napi_schedule(&eqo->napi);
3094 		if (num_evts)
3095 			eqo->spurious_intr = 0;
3096 	}
3097 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3098 
3099 	/* Return IRQ_HANDLED only for the the first spurious intr
3100 	 * after a valid intr to stop the kernel from branding
3101 	 * this irq as a bad one!
3102 	 */
3103 	if (num_evts || eqo->spurious_intr++ == 0)
3104 		return IRQ_HANDLED;
3105 	else
3106 		return IRQ_NONE;
3107 }
3108 
3109 static irqreturn_t be_msix(int irq, void *dev)
3110 {
3111 	struct be_eq_obj *eqo = dev;
3112 
3113 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3114 	napi_schedule(&eqo->napi);
3115 	return IRQ_HANDLED;
3116 }
3117 
3118 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3119 {
3120 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3121 }
3122 
3123 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3124 			 int budget)
3125 {
3126 	struct be_adapter *adapter = rxo->adapter;
3127 	struct be_queue_info *rx_cq = &rxo->cq;
3128 	struct be_rx_compl_info *rxcp;
3129 	u32 work_done;
3130 	u32 frags_consumed = 0;
3131 
3132 	for (work_done = 0; work_done < budget; work_done++) {
3133 		rxcp = be_rx_compl_get(rxo);
3134 		if (!rxcp)
3135 			break;
3136 
3137 		/* Is it a flush compl that has no data */
3138 		if (unlikely(rxcp->num_rcvd == 0))
3139 			goto loop_continue;
3140 
3141 		/* Discard compl with partial DMA Lancer B0 */
3142 		if (unlikely(!rxcp->pkt_size)) {
3143 			be_rx_compl_discard(rxo, rxcp);
3144 			goto loop_continue;
3145 		}
3146 
3147 		/* On BE drop pkts that arrive due to imperfect filtering in
3148 		 * promiscuous mode on some skews
3149 		 */
3150 		if (unlikely(rxcp->port != adapter->port_num &&
3151 			     !lancer_chip(adapter))) {
3152 			be_rx_compl_discard(rxo, rxcp);
3153 			goto loop_continue;
3154 		}
3155 
3156 		if (do_gro(rxcp))
3157 			be_rx_compl_process_gro(rxo, napi, rxcp);
3158 		else
3159 			be_rx_compl_process(rxo, napi, rxcp);
3160 
3161 loop_continue:
3162 		frags_consumed += rxcp->num_rcvd;
3163 		be_rx_stats_update(rxo, rxcp);
3164 	}
3165 
3166 	if (work_done) {
3167 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3168 
3169 		/* When an rx-obj gets into post_starved state, just
3170 		 * let be_worker do the posting.
3171 		 */
3172 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3173 		    !rxo->rx_post_starved)
3174 			be_post_rx_frags(rxo, GFP_ATOMIC,
3175 					 max_t(u32, MAX_RX_POST,
3176 					       frags_consumed));
3177 	}
3178 
3179 	return work_done;
3180 }
3181 
3182 
3183 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3184 			  int idx)
3185 {
3186 	int num_wrbs = 0, work_done = 0;
3187 	struct be_tx_compl_info *txcp;
3188 
3189 	while ((txcp = be_tx_compl_get(adapter, txo))) {
3190 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3191 		work_done++;
3192 	}
3193 
3194 	if (work_done) {
3195 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3196 		atomic_sub(num_wrbs, &txo->q.used);
3197 
3198 		/* As Tx wrbs have been freed up, wake up netdev queue
3199 		 * if it was stopped due to lack of tx wrbs.  */
3200 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3201 		    be_can_txq_wake(txo)) {
3202 			netif_wake_subqueue(adapter->netdev, idx);
3203 		}
3204 
3205 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3206 		tx_stats(txo)->tx_compl += work_done;
3207 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3208 	}
3209 }
3210 
3211 int be_poll(struct napi_struct *napi, int budget)
3212 {
3213 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3214 	struct be_adapter *adapter = eqo->adapter;
3215 	int max_work = 0, work, i, num_evts;
3216 	struct be_rx_obj *rxo;
3217 	struct be_tx_obj *txo;
3218 	u32 mult_enc = 0;
3219 
3220 	num_evts = events_get(eqo);
3221 
3222 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3223 		be_process_tx(adapter, txo, i);
3224 
3225 	/* This loop will iterate twice for EQ0 in which
3226 	 * completions of the last RXQ (default one) are also processed
3227 	 * For other EQs the loop iterates only once
3228 	 */
3229 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3230 		work = be_process_rx(rxo, napi, budget);
3231 		max_work = max(work, max_work);
3232 	}
3233 
3234 	if (is_mcc_eqo(eqo))
3235 		be_process_mcc(adapter);
3236 
3237 	if (max_work < budget) {
3238 		napi_complete_done(napi, max_work);
3239 
3240 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3241 		 * delay via a delay multiplier encoding value
3242 		 */
3243 		if (skyhawk_chip(adapter))
3244 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3245 
3246 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3247 			     mult_enc);
3248 	} else {
3249 		/* As we'll continue in polling mode, count and clear events */
3250 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3251 	}
3252 	return max_work;
3253 }
3254 
3255 void be_detect_error(struct be_adapter *adapter)
3256 {
3257 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3258 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3259 	struct device *dev = &adapter->pdev->dev;
3260 	u16 val;
3261 	u32 i;
3262 
3263 	if (be_check_error(adapter, BE_ERROR_HW))
3264 		return;
3265 
3266 	if (lancer_chip(adapter)) {
3267 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3268 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3269 			be_set_error(adapter, BE_ERROR_UE);
3270 			sliport_err1 = ioread32(adapter->db +
3271 						SLIPORT_ERROR1_OFFSET);
3272 			sliport_err2 = ioread32(adapter->db +
3273 						SLIPORT_ERROR2_OFFSET);
3274 			/* Do not log error messages if its a FW reset */
3275 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3276 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3277 				dev_info(dev, "Firmware update in progress\n");
3278 			} else {
3279 				dev_err(dev, "Error detected in the card\n");
3280 				dev_err(dev, "ERR: sliport status 0x%x\n",
3281 					sliport_status);
3282 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3283 					sliport_err1);
3284 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3285 					sliport_err2);
3286 			}
3287 		}
3288 	} else {
3289 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3290 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3291 		ue_lo_mask = ioread32(adapter->pcicfg +
3292 				      PCICFG_UE_STATUS_LOW_MASK);
3293 		ue_hi_mask = ioread32(adapter->pcicfg +
3294 				      PCICFG_UE_STATUS_HI_MASK);
3295 
3296 		ue_lo = (ue_lo & ~ue_lo_mask);
3297 		ue_hi = (ue_hi & ~ue_hi_mask);
3298 
3299 		if (ue_lo || ue_hi) {
3300 			/* On certain platforms BE3 hardware can indicate
3301 			 * spurious UEs. In case of a UE in the chip,
3302 			 * the POST register correctly reports either a
3303 			 * FAT_LOG_START state (FW is currently dumping
3304 			 * FAT log data) or a ARMFW_UE state. Check for the
3305 			 * above states to ascertain if the UE is valid or not.
3306 			 */
3307 			if (BE3_chip(adapter)) {
3308 				val = be_POST_stage_get(adapter);
3309 				if ((val & POST_STAGE_FAT_LOG_START)
3310 				     != POST_STAGE_FAT_LOG_START &&
3311 				    (val & POST_STAGE_ARMFW_UE)
3312 				     != POST_STAGE_ARMFW_UE &&
3313 				    (val & POST_STAGE_RECOVERABLE_ERR)
3314 				     != POST_STAGE_RECOVERABLE_ERR)
3315 					return;
3316 			}
3317 
3318 			dev_err(dev, "Error detected in the adapter");
3319 			be_set_error(adapter, BE_ERROR_UE);
3320 
3321 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3322 				if (ue_lo & 1)
3323 					dev_err(dev, "UE: %s bit set\n",
3324 						ue_status_low_desc[i]);
3325 			}
3326 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3327 				if (ue_hi & 1)
3328 					dev_err(dev, "UE: %s bit set\n",
3329 						ue_status_hi_desc[i]);
3330 			}
3331 		}
3332 	}
3333 }
3334 
3335 static void be_msix_disable(struct be_adapter *adapter)
3336 {
3337 	if (msix_enabled(adapter)) {
3338 		pci_disable_msix(adapter->pdev);
3339 		adapter->num_msix_vec = 0;
3340 		adapter->num_msix_roce_vec = 0;
3341 	}
3342 }
3343 
3344 static int be_msix_enable(struct be_adapter *adapter)
3345 {
3346 	unsigned int i, max_roce_eqs;
3347 	struct device *dev = &adapter->pdev->dev;
3348 	int num_vec;
3349 
3350 	/* If RoCE is supported, program the max number of vectors that
3351 	 * could be used for NIC and RoCE, else, just program the number
3352 	 * we'll use initially.
3353 	 */
3354 	if (be_roce_supported(adapter)) {
3355 		max_roce_eqs =
3356 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3357 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3358 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3359 	} else {
3360 		num_vec = max(adapter->cfg_num_rx_irqs,
3361 			      adapter->cfg_num_tx_irqs);
3362 	}
3363 
3364 	for (i = 0; i < num_vec; i++)
3365 		adapter->msix_entries[i].entry = i;
3366 
3367 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3368 					MIN_MSIX_VECTORS, num_vec);
3369 	if (num_vec < 0)
3370 		goto fail;
3371 
3372 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3373 		adapter->num_msix_roce_vec = num_vec / 2;
3374 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3375 			 adapter->num_msix_roce_vec);
3376 	}
3377 
3378 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3379 
3380 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3381 		 adapter->num_msix_vec);
3382 	return 0;
3383 
3384 fail:
3385 	dev_warn(dev, "MSIx enable failed\n");
3386 
3387 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3388 	if (be_virtfn(adapter))
3389 		return num_vec;
3390 	return 0;
3391 }
3392 
3393 static inline int be_msix_vec_get(struct be_adapter *adapter,
3394 				  struct be_eq_obj *eqo)
3395 {
3396 	return adapter->msix_entries[eqo->msix_idx].vector;
3397 }
3398 
3399 static int be_msix_register(struct be_adapter *adapter)
3400 {
3401 	struct net_device *netdev = adapter->netdev;
3402 	struct be_eq_obj *eqo;
3403 	int status, i, vec;
3404 
3405 	for_all_evt_queues(adapter, eqo, i) {
3406 		char irq_name[IFNAMSIZ+4];
3407 
3408 		snprintf(irq_name, sizeof(irq_name), "%s-q%d", netdev->name, i);
3409 		vec = be_msix_vec_get(adapter, eqo);
3410 		status = request_irq(vec, be_msix, 0, irq_name, eqo);
3411 		if (status)
3412 			goto err_msix;
3413 
3414 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3415 	}
3416 
3417 	return 0;
3418 err_msix:
3419 	for (i--; i >= 0; i--) {
3420 		eqo = &adapter->eq_obj[i];
3421 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3422 	}
3423 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3424 		 status);
3425 	be_msix_disable(adapter);
3426 	return status;
3427 }
3428 
3429 static int be_irq_register(struct be_adapter *adapter)
3430 {
3431 	struct net_device *netdev = adapter->netdev;
3432 	int status;
3433 
3434 	if (msix_enabled(adapter)) {
3435 		status = be_msix_register(adapter);
3436 		if (status == 0)
3437 			goto done;
3438 		/* INTx is not supported for VF */
3439 		if (be_virtfn(adapter))
3440 			return status;
3441 	}
3442 
3443 	/* INTx: only the first EQ is used */
3444 	netdev->irq = adapter->pdev->irq;
3445 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3446 			     &adapter->eq_obj[0]);
3447 	if (status) {
3448 		dev_err(&adapter->pdev->dev,
3449 			"INTx request IRQ failed - err %d\n", status);
3450 		return status;
3451 	}
3452 done:
3453 	adapter->isr_registered = true;
3454 	return 0;
3455 }
3456 
3457 static void be_irq_unregister(struct be_adapter *adapter)
3458 {
3459 	struct net_device *netdev = adapter->netdev;
3460 	struct be_eq_obj *eqo;
3461 	int i, vec;
3462 
3463 	if (!adapter->isr_registered)
3464 		return;
3465 
3466 	/* INTx */
3467 	if (!msix_enabled(adapter)) {
3468 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3469 		goto done;
3470 	}
3471 
3472 	/* MSIx */
3473 	for_all_evt_queues(adapter, eqo, i) {
3474 		vec = be_msix_vec_get(adapter, eqo);
3475 		irq_set_affinity_hint(vec, NULL);
3476 		free_irq(vec, eqo);
3477 	}
3478 
3479 done:
3480 	adapter->isr_registered = false;
3481 }
3482 
3483 static void be_rx_qs_destroy(struct be_adapter *adapter)
3484 {
3485 	struct rss_info *rss = &adapter->rss_info;
3486 	struct be_queue_info *q;
3487 	struct be_rx_obj *rxo;
3488 	int i;
3489 
3490 	for_all_rx_queues(adapter, rxo, i) {
3491 		q = &rxo->q;
3492 		if (q->created) {
3493 			/* If RXQs are destroyed while in an "out of buffer"
3494 			 * state, there is a possibility of an HW stall on
3495 			 * Lancer. So, post 64 buffers to each queue to relieve
3496 			 * the "out of buffer" condition.
3497 			 * Make sure there's space in the RXQ before posting.
3498 			 */
3499 			if (lancer_chip(adapter)) {
3500 				be_rx_cq_clean(rxo);
3501 				if (atomic_read(&q->used) == 0)
3502 					be_post_rx_frags(rxo, GFP_KERNEL,
3503 							 MAX_RX_POST);
3504 			}
3505 
3506 			be_cmd_rxq_destroy(adapter, q);
3507 			be_rx_cq_clean(rxo);
3508 			be_rxq_clean(rxo);
3509 		}
3510 		be_queue_free(adapter, q);
3511 	}
3512 
3513 	if (rss->rss_flags) {
3514 		rss->rss_flags = RSS_ENABLE_NONE;
3515 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3516 				  128, rss->rss_hkey);
3517 	}
3518 }
3519 
3520 static void be_disable_if_filters(struct be_adapter *adapter)
3521 {
3522 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3523 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3524 	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3525 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3526 		eth_zero_addr(adapter->dev_mac);
3527 	}
3528 
3529 	be_clear_uc_list(adapter);
3530 	be_clear_mc_list(adapter);
3531 
3532 	/* The IFACE flags are enabled in the open path and cleared
3533 	 * in the close path. When a VF gets detached from the host and
3534 	 * assigned to a VM the following happens:
3535 	 *	- VF's IFACE flags get cleared in the detach path
3536 	 *	- IFACE create is issued by the VF in the attach path
3537 	 * Due to a bug in the BE3/Skyhawk-R FW
3538 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3539 	 * specified along with the IFACE create cmd issued by a VF are not
3540 	 * honoured by FW.  As a consequence, if a *new* driver
3541 	 * (that enables/disables IFACE flags in open/close)
3542 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3543 	 * the IFACE gets created *without* the needed flags.
3544 	 * To avoid this, disable RX-filter flags only for Lancer.
3545 	 */
3546 	if (lancer_chip(adapter)) {
3547 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3548 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3549 	}
3550 }
3551 
3552 static int be_close(struct net_device *netdev)
3553 {
3554 	struct be_adapter *adapter = netdev_priv(netdev);
3555 	struct be_eq_obj *eqo;
3556 	int i;
3557 
3558 	/* This protection is needed as be_close() may be called even when the
3559 	 * adapter is in cleared state (after eeh perm failure)
3560 	 */
3561 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3562 		return 0;
3563 
3564 	/* Before attempting cleanup ensure all the pending cmds in the
3565 	 * config_wq have finished execution
3566 	 */
3567 	flush_workqueue(be_wq);
3568 
3569 	be_disable_if_filters(adapter);
3570 
3571 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3572 		for_all_evt_queues(adapter, eqo, i) {
3573 			napi_disable(&eqo->napi);
3574 		}
3575 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3576 	}
3577 
3578 	be_async_mcc_disable(adapter);
3579 
3580 	/* Wait for all pending tx completions to arrive so that
3581 	 * all tx skbs are freed.
3582 	 */
3583 	netif_tx_disable(netdev);
3584 	be_tx_compl_clean(adapter);
3585 
3586 	be_rx_qs_destroy(adapter);
3587 
3588 	for_all_evt_queues(adapter, eqo, i) {
3589 		if (msix_enabled(adapter))
3590 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3591 		else
3592 			synchronize_irq(netdev->irq);
3593 		be_eq_clean(eqo);
3594 	}
3595 
3596 	be_irq_unregister(adapter);
3597 
3598 	return 0;
3599 }
3600 
3601 static int be_rx_qs_create(struct be_adapter *adapter)
3602 {
3603 	struct rss_info *rss = &adapter->rss_info;
3604 	u8 rss_key[RSS_HASH_KEY_LEN];
3605 	struct be_rx_obj *rxo;
3606 	int rc, i, j;
3607 
3608 	for_all_rx_queues(adapter, rxo, i) {
3609 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3610 				    sizeof(struct be_eth_rx_d));
3611 		if (rc)
3612 			return rc;
3613 	}
3614 
3615 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3616 		rxo = default_rxo(adapter);
3617 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3618 				       rx_frag_size, adapter->if_handle,
3619 				       false, &rxo->rss_id);
3620 		if (rc)
3621 			return rc;
3622 	}
3623 
3624 	for_all_rss_queues(adapter, rxo, i) {
3625 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3626 				       rx_frag_size, adapter->if_handle,
3627 				       true, &rxo->rss_id);
3628 		if (rc)
3629 			return rc;
3630 	}
3631 
3632 	if (be_multi_rxq(adapter)) {
3633 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3634 			for_all_rss_queues(adapter, rxo, i) {
3635 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3636 					break;
3637 				rss->rsstable[j + i] = rxo->rss_id;
3638 				rss->rss_queue[j + i] = i;
3639 			}
3640 		}
3641 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3642 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3643 
3644 		if (!BEx_chip(adapter))
3645 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3646 				RSS_ENABLE_UDP_IPV6;
3647 
3648 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3649 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3650 				       RSS_INDIR_TABLE_LEN, rss_key);
3651 		if (rc) {
3652 			rss->rss_flags = RSS_ENABLE_NONE;
3653 			return rc;
3654 		}
3655 
3656 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3657 	} else {
3658 		/* Disable RSS, if only default RX Q is created */
3659 		rss->rss_flags = RSS_ENABLE_NONE;
3660 	}
3661 
3662 
3663 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3664 	 * which is a queue empty condition
3665 	 */
3666 	for_all_rx_queues(adapter, rxo, i)
3667 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3668 
3669 	return 0;
3670 }
3671 
3672 static int be_enable_if_filters(struct be_adapter *adapter)
3673 {
3674 	int status;
3675 
3676 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3677 	if (status)
3678 		return status;
3679 
3680 	/* Normally this condition usually true as the ->dev_mac is zeroed.
3681 	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3682 	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3683 	 */
3684 	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3685 		int old_pmac_id = -1;
3686 
3687 		/* Remember old programmed MAC if any - can happen on BE3 VF */
3688 		if (!is_zero_ether_addr(adapter->dev_mac))
3689 			old_pmac_id = adapter->pmac_id[0];
3690 
3691 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3692 		if (status)
3693 			return status;
3694 
3695 		/* Delete the old programmed MAC as we successfully programmed
3696 		 * a new MAC
3697 		 */
3698 		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3699 			be_dev_mac_del(adapter, old_pmac_id);
3700 
3701 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3702 	}
3703 
3704 	if (adapter->vlans_added)
3705 		be_vid_config(adapter);
3706 
3707 	__be_set_rx_mode(adapter);
3708 
3709 	return 0;
3710 }
3711 
3712 static int be_open(struct net_device *netdev)
3713 {
3714 	struct be_adapter *adapter = netdev_priv(netdev);
3715 	struct be_eq_obj *eqo;
3716 	struct be_rx_obj *rxo;
3717 	struct be_tx_obj *txo;
3718 	u8 link_status;
3719 	int status, i;
3720 
3721 	status = be_rx_qs_create(adapter);
3722 	if (status)
3723 		goto err;
3724 
3725 	status = be_enable_if_filters(adapter);
3726 	if (status)
3727 		goto err;
3728 
3729 	status = be_irq_register(adapter);
3730 	if (status)
3731 		goto err;
3732 
3733 	for_all_rx_queues(adapter, rxo, i)
3734 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3735 
3736 	for_all_tx_queues(adapter, txo, i)
3737 		be_cq_notify(adapter, txo->cq.id, true, 0);
3738 
3739 	be_async_mcc_enable(adapter);
3740 
3741 	for_all_evt_queues(adapter, eqo, i) {
3742 		napi_enable(&eqo->napi);
3743 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3744 	}
3745 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3746 
3747 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3748 	if (!status)
3749 		be_link_status_update(adapter, link_status);
3750 
3751 	netif_tx_start_all_queues(netdev);
3752 	if (skyhawk_chip(adapter))
3753 		udp_tunnel_get_rx_info(netdev);
3754 
3755 	return 0;
3756 err:
3757 	be_close(adapter->netdev);
3758 	return -EIO;
3759 }
3760 
3761 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3762 {
3763 	u32 addr;
3764 
3765 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3766 
3767 	mac[5] = (u8)(addr & 0xFF);
3768 	mac[4] = (u8)((addr >> 8) & 0xFF);
3769 	mac[3] = (u8)((addr >> 16) & 0xFF);
3770 	/* Use the OUI from the current MAC address */
3771 	memcpy(mac, adapter->netdev->dev_addr, 3);
3772 }
3773 
3774 /*
3775  * Generate a seed MAC address from the PF MAC Address using jhash.
3776  * MAC Address for VFs are assigned incrementally starting from the seed.
3777  * These addresses are programmed in the ASIC by the PF and the VF driver
3778  * queries for the MAC address during its probe.
3779  */
3780 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3781 {
3782 	u32 vf;
3783 	int status = 0;
3784 	u8 mac[ETH_ALEN];
3785 	struct be_vf_cfg *vf_cfg;
3786 
3787 	be_vf_eth_addr_generate(adapter, mac);
3788 
3789 	for_all_vfs(adapter, vf_cfg, vf) {
3790 		if (BEx_chip(adapter))
3791 			status = be_cmd_pmac_add(adapter, mac,
3792 						 vf_cfg->if_handle,
3793 						 &vf_cfg->pmac_id, vf + 1);
3794 		else
3795 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3796 						vf + 1);
3797 
3798 		if (status)
3799 			dev_err(&adapter->pdev->dev,
3800 				"Mac address assignment failed for VF %d\n",
3801 				vf);
3802 		else
3803 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3804 
3805 		mac[5] += 1;
3806 	}
3807 	return status;
3808 }
3809 
3810 static int be_vfs_mac_query(struct be_adapter *adapter)
3811 {
3812 	int status, vf;
3813 	u8 mac[ETH_ALEN];
3814 	struct be_vf_cfg *vf_cfg;
3815 
3816 	for_all_vfs(adapter, vf_cfg, vf) {
3817 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3818 					       mac, vf_cfg->if_handle,
3819 					       false, vf+1);
3820 		if (status)
3821 			return status;
3822 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3823 	}
3824 	return 0;
3825 }
3826 
3827 static void be_vf_clear(struct be_adapter *adapter)
3828 {
3829 	struct be_vf_cfg *vf_cfg;
3830 	u32 vf;
3831 
3832 	if (pci_vfs_assigned(adapter->pdev)) {
3833 		dev_warn(&adapter->pdev->dev,
3834 			 "VFs are assigned to VMs: not disabling VFs\n");
3835 		goto done;
3836 	}
3837 
3838 	pci_disable_sriov(adapter->pdev);
3839 
3840 	for_all_vfs(adapter, vf_cfg, vf) {
3841 		if (BEx_chip(adapter))
3842 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3843 					vf_cfg->pmac_id, vf + 1);
3844 		else
3845 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3846 				       vf + 1);
3847 
3848 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3849 	}
3850 
3851 	if (BE3_chip(adapter))
3852 		be_cmd_set_hsw_config(adapter, 0, 0,
3853 				      adapter->if_handle,
3854 				      PORT_FWD_TYPE_PASSTHRU, 0);
3855 done:
3856 	kfree(adapter->vf_cfg);
3857 	adapter->num_vfs = 0;
3858 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3859 }
3860 
3861 static void be_clear_queues(struct be_adapter *adapter)
3862 {
3863 	be_mcc_queues_destroy(adapter);
3864 	be_rx_cqs_destroy(adapter);
3865 	be_tx_queues_destroy(adapter);
3866 	be_evt_queues_destroy(adapter);
3867 }
3868 
3869 static void be_cancel_worker(struct be_adapter *adapter)
3870 {
3871 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3872 		cancel_delayed_work_sync(&adapter->work);
3873 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3874 	}
3875 }
3876 
3877 static void be_cancel_err_detection(struct be_adapter *adapter)
3878 {
3879 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3880 
3881 	if (!be_err_recovery_workq)
3882 		return;
3883 
3884 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3885 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3886 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3887 	}
3888 }
3889 
3890 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3891 {
3892 	struct net_device *netdev = adapter->netdev;
3893 	struct device *dev = &adapter->pdev->dev;
3894 	struct be_vxlan_port *vxlan_port;
3895 	__be16 port;
3896 	int status;
3897 
3898 	vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3899 				      struct be_vxlan_port, list);
3900 	port = vxlan_port->port;
3901 
3902 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3903 				     OP_CONVERT_NORMAL_TO_TUNNEL);
3904 	if (status) {
3905 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3906 		return status;
3907 	}
3908 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3909 
3910 	status = be_cmd_set_vxlan_port(adapter, port);
3911 	if (status) {
3912 		dev_warn(dev, "Failed to add VxLAN port\n");
3913 		return status;
3914 	}
3915 	adapter->vxlan_port = port;
3916 
3917 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3918 				   NETIF_F_TSO | NETIF_F_TSO6 |
3919 				   NETIF_F_GSO_UDP_TUNNEL;
3920 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
3921 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
3922 
3923 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3924 		 be16_to_cpu(port));
3925 	return 0;
3926 }
3927 
3928 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3929 {
3930 	struct net_device *netdev = adapter->netdev;
3931 
3932 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3933 		be_cmd_manage_iface(adapter, adapter->if_handle,
3934 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3935 
3936 	if (adapter->vxlan_port)
3937 		be_cmd_set_vxlan_port(adapter, 0);
3938 
3939 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3940 	adapter->vxlan_port = 0;
3941 
3942 	netdev->hw_enc_features = 0;
3943 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3944 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3945 }
3946 
3947 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3948 				struct be_resources *vft_res)
3949 {
3950 	struct be_resources res = adapter->pool_res;
3951 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3952 	struct be_resources res_mod = {0};
3953 	u16 num_vf_qs = 1;
3954 
3955 	/* Distribute the queue resources among the PF and it's VFs */
3956 	if (num_vfs) {
3957 		/* Divide the rx queues evenly among the VFs and the PF, capped
3958 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3959 		 */
3960 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3961 				res.max_rss_qs / (num_vfs + 1));
3962 
3963 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3964 		 * RSS Tables per port. Provide RSS on VFs, only if number of
3965 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
3966 		 */
3967 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3968 			num_vf_qs = 1;
3969 	}
3970 
3971 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3972 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
3973 	 */
3974 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3975 				  RESOURCE_MODIFIABLE, 0);
3976 
3977 	/* If RSS IFACE capability flags are modifiable for a VF, set the
3978 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3979 	 * more than 1 RSSQ is available for a VF.
3980 	 * Otherwise, provision only 1 queue pair for VF.
3981 	 */
3982 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3983 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3984 		if (num_vf_qs > 1) {
3985 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3986 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3987 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3988 		} else {
3989 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3990 					     BE_IF_FLAGS_DEFQ_RSS);
3991 		}
3992 	} else {
3993 		num_vf_qs = 1;
3994 	}
3995 
3996 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3997 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3998 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3999 	}
4000 
4001 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4002 	vft_res->max_rx_qs = num_vf_qs;
4003 	vft_res->max_rss_qs = num_vf_qs;
4004 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4005 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4006 
4007 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4008 	 * among the PF and it's VFs, if the fields are changeable
4009 	 */
4010 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4011 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4012 
4013 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4014 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4015 
4016 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4017 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4018 
4019 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4020 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4021 }
4022 
4023 static void be_if_destroy(struct be_adapter *adapter)
4024 {
4025 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4026 
4027 	kfree(adapter->pmac_id);
4028 	adapter->pmac_id = NULL;
4029 
4030 	kfree(adapter->mc_list);
4031 	adapter->mc_list = NULL;
4032 
4033 	kfree(adapter->uc_list);
4034 	adapter->uc_list = NULL;
4035 }
4036 
4037 static int be_clear(struct be_adapter *adapter)
4038 {
4039 	struct pci_dev *pdev = adapter->pdev;
4040 	struct  be_resources vft_res = {0};
4041 
4042 	be_cancel_worker(adapter);
4043 
4044 	flush_workqueue(be_wq);
4045 
4046 	if (sriov_enabled(adapter))
4047 		be_vf_clear(adapter);
4048 
4049 	/* Re-configure FW to distribute resources evenly across max-supported
4050 	 * number of VFs, only when VFs are not already enabled.
4051 	 */
4052 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4053 	    !pci_vfs_assigned(pdev)) {
4054 		be_calculate_vf_res(adapter,
4055 				    pci_sriov_get_totalvfs(pdev),
4056 				    &vft_res);
4057 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4058 					pci_sriov_get_totalvfs(pdev),
4059 					&vft_res);
4060 	}
4061 
4062 	be_disable_vxlan_offloads(adapter);
4063 
4064 	be_if_destroy(adapter);
4065 
4066 	be_clear_queues(adapter);
4067 
4068 	be_msix_disable(adapter);
4069 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4070 	return 0;
4071 }
4072 
4073 static int be_vfs_if_create(struct be_adapter *adapter)
4074 {
4075 	struct be_resources res = {0};
4076 	u32 cap_flags, en_flags, vf;
4077 	struct be_vf_cfg *vf_cfg;
4078 	int status;
4079 
4080 	/* If a FW profile exists, then cap_flags are updated */
4081 	cap_flags = BE_VF_IF_EN_FLAGS;
4082 
4083 	for_all_vfs(adapter, vf_cfg, vf) {
4084 		if (!BE3_chip(adapter)) {
4085 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4086 							   ACTIVE_PROFILE_TYPE,
4087 							   RESOURCE_LIMITS,
4088 							   vf + 1);
4089 			if (!status) {
4090 				cap_flags = res.if_cap_flags;
4091 				/* Prevent VFs from enabling VLAN promiscuous
4092 				 * mode
4093 				 */
4094 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4095 			}
4096 		}
4097 
4098 		/* PF should enable IF flags during proxy if_create call */
4099 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4100 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4101 					  &vf_cfg->if_handle, vf + 1);
4102 		if (status)
4103 			return status;
4104 	}
4105 
4106 	return 0;
4107 }
4108 
4109 static int be_vf_setup_init(struct be_adapter *adapter)
4110 {
4111 	struct be_vf_cfg *vf_cfg;
4112 	int vf;
4113 
4114 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4115 				  GFP_KERNEL);
4116 	if (!adapter->vf_cfg)
4117 		return -ENOMEM;
4118 
4119 	for_all_vfs(adapter, vf_cfg, vf) {
4120 		vf_cfg->if_handle = -1;
4121 		vf_cfg->pmac_id = -1;
4122 	}
4123 	return 0;
4124 }
4125 
4126 static int be_vf_setup(struct be_adapter *adapter)
4127 {
4128 	struct device *dev = &adapter->pdev->dev;
4129 	struct be_vf_cfg *vf_cfg;
4130 	int status, old_vfs, vf;
4131 	bool spoofchk;
4132 
4133 	old_vfs = pci_num_vf(adapter->pdev);
4134 
4135 	status = be_vf_setup_init(adapter);
4136 	if (status)
4137 		goto err;
4138 
4139 	if (old_vfs) {
4140 		for_all_vfs(adapter, vf_cfg, vf) {
4141 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4142 			if (status)
4143 				goto err;
4144 		}
4145 
4146 		status = be_vfs_mac_query(adapter);
4147 		if (status)
4148 			goto err;
4149 	} else {
4150 		status = be_vfs_if_create(adapter);
4151 		if (status)
4152 			goto err;
4153 
4154 		status = be_vf_eth_addr_config(adapter);
4155 		if (status)
4156 			goto err;
4157 	}
4158 
4159 	for_all_vfs(adapter, vf_cfg, vf) {
4160 		/* Allow VFs to programs MAC/VLAN filters */
4161 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4162 						  vf + 1);
4163 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4164 			status = be_cmd_set_fn_privileges(adapter,
4165 							  vf_cfg->privileges |
4166 							  BE_PRIV_FILTMGMT,
4167 							  vf + 1);
4168 			if (!status) {
4169 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4170 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4171 					 vf);
4172 			}
4173 		}
4174 
4175 		/* Allow full available bandwidth */
4176 		if (!old_vfs)
4177 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4178 
4179 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4180 					       vf_cfg->if_handle, NULL,
4181 					       &spoofchk);
4182 		if (!status)
4183 			vf_cfg->spoofchk = spoofchk;
4184 
4185 		if (!old_vfs) {
4186 			be_cmd_enable_vf(adapter, vf + 1);
4187 			be_cmd_set_logical_link_config(adapter,
4188 						       IFLA_VF_LINK_STATE_AUTO,
4189 						       vf+1);
4190 		}
4191 	}
4192 
4193 	if (!old_vfs) {
4194 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4195 		if (status) {
4196 			dev_err(dev, "SRIOV enable failed\n");
4197 			adapter->num_vfs = 0;
4198 			goto err;
4199 		}
4200 	}
4201 
4202 	if (BE3_chip(adapter)) {
4203 		/* On BE3, enable VEB only when SRIOV is enabled */
4204 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4205 					       adapter->if_handle,
4206 					       PORT_FWD_TYPE_VEB, 0);
4207 		if (status)
4208 			goto err;
4209 	}
4210 
4211 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4212 	return 0;
4213 err:
4214 	dev_err(dev, "VF setup failed\n");
4215 	be_vf_clear(adapter);
4216 	return status;
4217 }
4218 
4219 /* Converting function_mode bits on BE3 to SH mc_type enums */
4220 
4221 static u8 be_convert_mc_type(u32 function_mode)
4222 {
4223 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4224 		return vNIC1;
4225 	else if (function_mode & QNQ_MODE)
4226 		return FLEX10;
4227 	else if (function_mode & VNIC_MODE)
4228 		return vNIC2;
4229 	else if (function_mode & UMC_ENABLED)
4230 		return UMC;
4231 	else
4232 		return MC_NONE;
4233 }
4234 
4235 /* On BE2/BE3 FW does not suggest the supported limits */
4236 static void BEx_get_resources(struct be_adapter *adapter,
4237 			      struct be_resources *res)
4238 {
4239 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4240 
4241 	if (be_physfn(adapter))
4242 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4243 	else
4244 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4245 
4246 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4247 
4248 	if (be_is_mc(adapter)) {
4249 		/* Assuming that there are 4 channels per port,
4250 		 * when multi-channel is enabled
4251 		 */
4252 		if (be_is_qnq_mode(adapter))
4253 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4254 		else
4255 			/* In a non-qnq multichannel mode, the pvid
4256 			 * takes up one vlan entry
4257 			 */
4258 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4259 	} else {
4260 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4261 	}
4262 
4263 	res->max_mcast_mac = BE_MAX_MC;
4264 
4265 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4266 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4267 	 *    *only* if it is RSS-capable.
4268 	 */
4269 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4270 	    be_virtfn(adapter) ||
4271 	    (be_is_mc(adapter) &&
4272 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4273 		res->max_tx_qs = 1;
4274 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4275 		struct be_resources super_nic_res = {0};
4276 
4277 		/* On a SuperNIC profile, the driver needs to use the
4278 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4279 		 */
4280 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4281 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4282 					  0);
4283 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4284 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4285 	} else {
4286 		res->max_tx_qs = BE3_MAX_TX_QS;
4287 	}
4288 
4289 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4290 	    !use_sriov && be_physfn(adapter))
4291 		res->max_rss_qs = (adapter->be3_native) ?
4292 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4293 	res->max_rx_qs = res->max_rss_qs + 1;
4294 
4295 	if (be_physfn(adapter))
4296 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4297 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4298 	else
4299 		res->max_evt_qs = 1;
4300 
4301 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4302 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4303 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4304 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4305 }
4306 
4307 static void be_setup_init(struct be_adapter *adapter)
4308 {
4309 	adapter->vlan_prio_bmap = 0xff;
4310 	adapter->phy.link_speed = -1;
4311 	adapter->if_handle = -1;
4312 	adapter->be3_native = false;
4313 	adapter->if_flags = 0;
4314 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4315 	if (be_physfn(adapter))
4316 		adapter->cmd_privileges = MAX_PRIVILEGES;
4317 	else
4318 		adapter->cmd_privileges = MIN_PRIVILEGES;
4319 }
4320 
4321 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4322  * However, this HW limitation is not exposed to the host via any SLI cmd.
4323  * As a result, in the case of SRIOV and in particular multi-partition configs
4324  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4325  * for distribution between the VFs. This self-imposed limit will determine the
4326  * no: of VFs for which RSS can be enabled.
4327  */
4328 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4329 {
4330 	struct be_port_resources port_res = {0};
4331 	u8 rss_tables_on_port;
4332 	u16 max_vfs = be_max_vfs(adapter);
4333 
4334 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4335 				  RESOURCE_LIMITS, 0);
4336 
4337 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4338 
4339 	/* Each PF Pool's RSS Tables limit =
4340 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4341 	 */
4342 	adapter->pool_res.max_rss_tables =
4343 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4344 }
4345 
4346 static int be_get_sriov_config(struct be_adapter *adapter)
4347 {
4348 	struct be_resources res = {0};
4349 	int max_vfs, old_vfs;
4350 
4351 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4352 				  RESOURCE_LIMITS, 0);
4353 
4354 	/* Some old versions of BE3 FW don't report max_vfs value */
4355 	if (BE3_chip(adapter) && !res.max_vfs) {
4356 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4357 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4358 	}
4359 
4360 	adapter->pool_res = res;
4361 
4362 	/* If during previous unload of the driver, the VFs were not disabled,
4363 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4364 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4365 	 */
4366 	old_vfs = pci_num_vf(adapter->pdev);
4367 	if (old_vfs) {
4368 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4369 			 old_vfs);
4370 
4371 		adapter->pool_res.max_vfs =
4372 			pci_sriov_get_totalvfs(adapter->pdev);
4373 		adapter->num_vfs = old_vfs;
4374 	}
4375 
4376 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4377 		be_calculate_pf_pool_rss_tables(adapter);
4378 		dev_info(&adapter->pdev->dev,
4379 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4380 			 be_max_pf_pool_rss_tables(adapter));
4381 	}
4382 	return 0;
4383 }
4384 
4385 static void be_alloc_sriov_res(struct be_adapter *adapter)
4386 {
4387 	int old_vfs = pci_num_vf(adapter->pdev);
4388 	struct  be_resources vft_res = {0};
4389 	int status;
4390 
4391 	be_get_sriov_config(adapter);
4392 
4393 	if (!old_vfs)
4394 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4395 
4396 	/* When the HW is in SRIOV capable configuration, the PF-pool
4397 	 * resources are given to PF during driver load, if there are no
4398 	 * old VFs. This facility is not available in BE3 FW.
4399 	 * Also, this is done by FW in Lancer chip.
4400 	 */
4401 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4402 		be_calculate_vf_res(adapter, 0, &vft_res);
4403 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4404 						 &vft_res);
4405 		if (status)
4406 			dev_err(&adapter->pdev->dev,
4407 				"Failed to optimize SRIOV resources\n");
4408 	}
4409 }
4410 
4411 static int be_get_resources(struct be_adapter *adapter)
4412 {
4413 	struct device *dev = &adapter->pdev->dev;
4414 	struct be_resources res = {0};
4415 	int status;
4416 
4417 	/* For Lancer, SH etc read per-function resource limits from FW.
4418 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4419 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4420 	 */
4421 	if (BEx_chip(adapter)) {
4422 		BEx_get_resources(adapter, &res);
4423 	} else {
4424 		status = be_cmd_get_func_config(adapter, &res);
4425 		if (status)
4426 			return status;
4427 
4428 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4429 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4430 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4431 			res.max_rss_qs -= 1;
4432 	}
4433 
4434 	/* If RoCE is supported stash away half the EQs for RoCE */
4435 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4436 				res.max_evt_qs / 2 : res.max_evt_qs;
4437 	adapter->res = res;
4438 
4439 	/* If FW supports RSS default queue, then skip creating non-RSS
4440 	 * queue for non-IP traffic.
4441 	 */
4442 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4443 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4444 
4445 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4446 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4447 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4448 		 be_max_vfs(adapter));
4449 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4450 		 be_max_uc(adapter), be_max_mc(adapter),
4451 		 be_max_vlans(adapter));
4452 
4453 	/* Ensure RX and TX queues are created in pairs at init time */
4454 	adapter->cfg_num_rx_irqs =
4455 				min_t(u16, netif_get_num_default_rss_queues(),
4456 				      be_max_qp_irqs(adapter));
4457 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4458 	return 0;
4459 }
4460 
4461 static int be_get_config(struct be_adapter *adapter)
4462 {
4463 	int status, level;
4464 	u16 profile_id;
4465 
4466 	status = be_cmd_get_cntl_attributes(adapter);
4467 	if (status)
4468 		return status;
4469 
4470 	status = be_cmd_query_fw_cfg(adapter);
4471 	if (status)
4472 		return status;
4473 
4474 	if (!lancer_chip(adapter) && be_physfn(adapter))
4475 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4476 
4477 	if (BEx_chip(adapter)) {
4478 		level = be_cmd_get_fw_log_level(adapter);
4479 		adapter->msg_enable =
4480 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4481 	}
4482 
4483 	be_cmd_get_acpi_wol_cap(adapter);
4484 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4485 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4486 
4487 	be_cmd_query_port_name(adapter);
4488 
4489 	if (be_physfn(adapter)) {
4490 		status = be_cmd_get_active_profile(adapter, &profile_id);
4491 		if (!status)
4492 			dev_info(&adapter->pdev->dev,
4493 				 "Using profile 0x%x\n", profile_id);
4494 	}
4495 
4496 	return 0;
4497 }
4498 
4499 static int be_mac_setup(struct be_adapter *adapter)
4500 {
4501 	u8 mac[ETH_ALEN];
4502 	int status;
4503 
4504 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4505 		status = be_cmd_get_perm_mac(adapter, mac);
4506 		if (status)
4507 			return status;
4508 
4509 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4510 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4511 
4512 		/* Initial MAC for BE3 VFs is already programmed by PF */
4513 		if (BEx_chip(adapter) && be_virtfn(adapter))
4514 			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4515 	}
4516 
4517 	return 0;
4518 }
4519 
4520 static void be_schedule_worker(struct be_adapter *adapter)
4521 {
4522 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4523 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4524 }
4525 
4526 static void be_destroy_err_recovery_workq(void)
4527 {
4528 	if (!be_err_recovery_workq)
4529 		return;
4530 
4531 	flush_workqueue(be_err_recovery_workq);
4532 	destroy_workqueue(be_err_recovery_workq);
4533 	be_err_recovery_workq = NULL;
4534 }
4535 
4536 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4537 {
4538 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4539 
4540 	if (!be_err_recovery_workq)
4541 		return;
4542 
4543 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4544 			   msecs_to_jiffies(delay));
4545 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4546 }
4547 
4548 static int be_setup_queues(struct be_adapter *adapter)
4549 {
4550 	struct net_device *netdev = adapter->netdev;
4551 	int status;
4552 
4553 	status = be_evt_queues_create(adapter);
4554 	if (status)
4555 		goto err;
4556 
4557 	status = be_tx_qs_create(adapter);
4558 	if (status)
4559 		goto err;
4560 
4561 	status = be_rx_cqs_create(adapter);
4562 	if (status)
4563 		goto err;
4564 
4565 	status = be_mcc_queues_create(adapter);
4566 	if (status)
4567 		goto err;
4568 
4569 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4570 	if (status)
4571 		goto err;
4572 
4573 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4574 	if (status)
4575 		goto err;
4576 
4577 	return 0;
4578 err:
4579 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4580 	return status;
4581 }
4582 
4583 static int be_if_create(struct be_adapter *adapter)
4584 {
4585 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4586 	u32 cap_flags = be_if_cap_flags(adapter);
4587 	int status;
4588 
4589 	/* alloc required memory for other filtering fields */
4590 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4591 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4592 	if (!adapter->pmac_id)
4593 		return -ENOMEM;
4594 
4595 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4596 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4597 	if (!adapter->mc_list)
4598 		return -ENOMEM;
4599 
4600 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4601 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4602 	if (!adapter->uc_list)
4603 		return -ENOMEM;
4604 
4605 	if (adapter->cfg_num_rx_irqs == 1)
4606 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4607 
4608 	en_flags &= cap_flags;
4609 	/* will enable all the needed filter flags in be_open() */
4610 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4611 				  &adapter->if_handle, 0);
4612 
4613 	if (status)
4614 		return status;
4615 
4616 	return 0;
4617 }
4618 
4619 int be_update_queues(struct be_adapter *adapter)
4620 {
4621 	struct net_device *netdev = adapter->netdev;
4622 	int status;
4623 
4624 	if (netif_running(netdev))
4625 		be_close(netdev);
4626 
4627 	be_cancel_worker(adapter);
4628 
4629 	/* If any vectors have been shared with RoCE we cannot re-program
4630 	 * the MSIx table.
4631 	 */
4632 	if (!adapter->num_msix_roce_vec)
4633 		be_msix_disable(adapter);
4634 
4635 	be_clear_queues(adapter);
4636 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4637 	if (status)
4638 		return status;
4639 
4640 	if (!msix_enabled(adapter)) {
4641 		status = be_msix_enable(adapter);
4642 		if (status)
4643 			return status;
4644 	}
4645 
4646 	status = be_if_create(adapter);
4647 	if (status)
4648 		return status;
4649 
4650 	status = be_setup_queues(adapter);
4651 	if (status)
4652 		return status;
4653 
4654 	be_schedule_worker(adapter);
4655 
4656 	/* The IF was destroyed and re-created. We need to clear
4657 	 * all promiscuous flags valid for the destroyed IF.
4658 	 * Without this promisc mode is not restored during
4659 	 * be_open() because the driver thinks that it is
4660 	 * already enabled in HW.
4661 	 */
4662 	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4663 
4664 	if (netif_running(netdev))
4665 		status = be_open(netdev);
4666 
4667 	return status;
4668 }
4669 
4670 static inline int fw_major_num(const char *fw_ver)
4671 {
4672 	int fw_major = 0, i;
4673 
4674 	i = sscanf(fw_ver, "%d.", &fw_major);
4675 	if (i != 1)
4676 		return 0;
4677 
4678 	return fw_major;
4679 }
4680 
4681 /* If it is error recovery, FLR the PF
4682  * Else if any VFs are already enabled don't FLR the PF
4683  */
4684 static bool be_reset_required(struct be_adapter *adapter)
4685 {
4686 	if (be_error_recovering(adapter))
4687 		return true;
4688 	else
4689 		return pci_num_vf(adapter->pdev) == 0;
4690 }
4691 
4692 /* Wait for the FW to be ready and perform the required initialization */
4693 static int be_func_init(struct be_adapter *adapter)
4694 {
4695 	int status;
4696 
4697 	status = be_fw_wait_ready(adapter);
4698 	if (status)
4699 		return status;
4700 
4701 	/* FW is now ready; clear errors to allow cmds/doorbell */
4702 	be_clear_error(adapter, BE_CLEAR_ALL);
4703 
4704 	if (be_reset_required(adapter)) {
4705 		status = be_cmd_reset_function(adapter);
4706 		if (status)
4707 			return status;
4708 
4709 		/* Wait for interrupts to quiesce after an FLR */
4710 		msleep(100);
4711 	}
4712 
4713 	/* Tell FW we're ready to fire cmds */
4714 	status = be_cmd_fw_init(adapter);
4715 	if (status)
4716 		return status;
4717 
4718 	/* Allow interrupts for other ULPs running on NIC function */
4719 	be_intr_set(adapter, true);
4720 
4721 	return 0;
4722 }
4723 
4724 static int be_setup(struct be_adapter *adapter)
4725 {
4726 	struct device *dev = &adapter->pdev->dev;
4727 	int status;
4728 
4729 	status = be_func_init(adapter);
4730 	if (status)
4731 		return status;
4732 
4733 	be_setup_init(adapter);
4734 
4735 	if (!lancer_chip(adapter))
4736 		be_cmd_req_native_mode(adapter);
4737 
4738 	/* invoke this cmd first to get pf_num and vf_num which are needed
4739 	 * for issuing profile related cmds
4740 	 */
4741 	if (!BEx_chip(adapter)) {
4742 		status = be_cmd_get_func_config(adapter, NULL);
4743 		if (status)
4744 			return status;
4745 	}
4746 
4747 	status = be_get_config(adapter);
4748 	if (status)
4749 		goto err;
4750 
4751 	if (!BE2_chip(adapter) && be_physfn(adapter))
4752 		be_alloc_sriov_res(adapter);
4753 
4754 	status = be_get_resources(adapter);
4755 	if (status)
4756 		goto err;
4757 
4758 	status = be_msix_enable(adapter);
4759 	if (status)
4760 		goto err;
4761 
4762 	/* will enable all the needed filter flags in be_open() */
4763 	status = be_if_create(adapter);
4764 	if (status)
4765 		goto err;
4766 
4767 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4768 	rtnl_lock();
4769 	status = be_setup_queues(adapter);
4770 	rtnl_unlock();
4771 	if (status)
4772 		goto err;
4773 
4774 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4775 
4776 	status = be_mac_setup(adapter);
4777 	if (status)
4778 		goto err;
4779 
4780 	be_cmd_get_fw_ver(adapter);
4781 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4782 
4783 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4784 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4785 			adapter->fw_ver);
4786 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4787 	}
4788 
4789 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4790 					 adapter->rx_fc);
4791 	if (status)
4792 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4793 					&adapter->rx_fc);
4794 
4795 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4796 		 adapter->tx_fc, adapter->rx_fc);
4797 
4798 	if (be_physfn(adapter))
4799 		be_cmd_set_logical_link_config(adapter,
4800 					       IFLA_VF_LINK_STATE_AUTO, 0);
4801 
4802 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4803 	 * confusing a linux bridge or OVS that it might be connected to.
4804 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4805 	 * when SRIOV is not enabled.
4806 	 */
4807 	if (BE3_chip(adapter))
4808 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4809 				      PORT_FWD_TYPE_PASSTHRU, 0);
4810 
4811 	if (adapter->num_vfs)
4812 		be_vf_setup(adapter);
4813 
4814 	status = be_cmd_get_phy_info(adapter);
4815 	if (!status && be_pause_supported(adapter))
4816 		adapter->phy.fc_autoneg = 1;
4817 
4818 	if (be_physfn(adapter) && !lancer_chip(adapter))
4819 		be_cmd_set_features(adapter);
4820 
4821 	be_schedule_worker(adapter);
4822 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4823 	return 0;
4824 err:
4825 	be_clear(adapter);
4826 	return status;
4827 }
4828 
4829 #ifdef CONFIG_NET_POLL_CONTROLLER
4830 static void be_netpoll(struct net_device *netdev)
4831 {
4832 	struct be_adapter *adapter = netdev_priv(netdev);
4833 	struct be_eq_obj *eqo;
4834 	int i;
4835 
4836 	for_all_evt_queues(adapter, eqo, i) {
4837 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4838 		napi_schedule(&eqo->napi);
4839 	}
4840 }
4841 #endif
4842 
4843 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4844 {
4845 	const struct firmware *fw;
4846 	int status;
4847 
4848 	if (!netif_running(adapter->netdev)) {
4849 		dev_err(&adapter->pdev->dev,
4850 			"Firmware load not allowed (interface is down)\n");
4851 		return -ENETDOWN;
4852 	}
4853 
4854 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4855 	if (status)
4856 		goto fw_exit;
4857 
4858 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4859 
4860 	if (lancer_chip(adapter))
4861 		status = lancer_fw_download(adapter, fw);
4862 	else
4863 		status = be_fw_download(adapter, fw);
4864 
4865 	if (!status)
4866 		be_cmd_get_fw_ver(adapter);
4867 
4868 fw_exit:
4869 	release_firmware(fw);
4870 	return status;
4871 }
4872 
4873 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4874 				 u16 flags)
4875 {
4876 	struct be_adapter *adapter = netdev_priv(dev);
4877 	struct nlattr *attr, *br_spec;
4878 	int rem;
4879 	int status = 0;
4880 	u16 mode = 0;
4881 
4882 	if (!sriov_enabled(adapter))
4883 		return -EOPNOTSUPP;
4884 
4885 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4886 	if (!br_spec)
4887 		return -EINVAL;
4888 
4889 	nla_for_each_nested(attr, br_spec, rem) {
4890 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4891 			continue;
4892 
4893 		if (nla_len(attr) < sizeof(mode))
4894 			return -EINVAL;
4895 
4896 		mode = nla_get_u16(attr);
4897 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4898 			return -EOPNOTSUPP;
4899 
4900 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4901 			return -EINVAL;
4902 
4903 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4904 					       adapter->if_handle,
4905 					       mode == BRIDGE_MODE_VEPA ?
4906 					       PORT_FWD_TYPE_VEPA :
4907 					       PORT_FWD_TYPE_VEB, 0);
4908 		if (status)
4909 			goto err;
4910 
4911 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4912 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4913 
4914 		return status;
4915 	}
4916 err:
4917 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4918 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4919 
4920 	return status;
4921 }
4922 
4923 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4924 				 struct net_device *dev, u32 filter_mask,
4925 				 int nlflags)
4926 {
4927 	struct be_adapter *adapter = netdev_priv(dev);
4928 	int status = 0;
4929 	u8 hsw_mode;
4930 
4931 	/* BE and Lancer chips support VEB mode only */
4932 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4933 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4934 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4935 			return 0;
4936 		hsw_mode = PORT_FWD_TYPE_VEB;
4937 	} else {
4938 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4939 					       adapter->if_handle, &hsw_mode,
4940 					       NULL);
4941 		if (status)
4942 			return 0;
4943 
4944 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4945 			return 0;
4946 	}
4947 
4948 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4949 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4950 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4951 				       0, 0, nlflags, filter_mask, NULL);
4952 }
4953 
4954 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4955 					 void (*func)(struct work_struct *))
4956 {
4957 	struct be_cmd_work *work;
4958 
4959 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4960 	if (!work) {
4961 		dev_err(&adapter->pdev->dev,
4962 			"be_work memory allocation failed\n");
4963 		return NULL;
4964 	}
4965 
4966 	INIT_WORK(&work->work, func);
4967 	work->adapter = adapter;
4968 	return work;
4969 }
4970 
4971 /* VxLAN offload Notes:
4972  *
4973  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4974  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4975  * is expected to work across all types of IP tunnels once exported. Skyhawk
4976  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4977  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4978  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4979  * those other tunnels are unexported on the fly through ndo_features_check().
4980  *
4981  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4982  * adds more than one port, disable offloads and re-enable them again when
4983  * there's only one port left. We maintain a list of ports for this purpose.
4984  */
4985 static void be_work_add_vxlan_port(struct work_struct *work)
4986 {
4987 	struct be_cmd_work *cmd_work =
4988 				container_of(work, struct be_cmd_work, work);
4989 	struct be_adapter *adapter = cmd_work->adapter;
4990 	struct device *dev = &adapter->pdev->dev;
4991 	__be16 port = cmd_work->info.vxlan_port;
4992 	struct be_vxlan_port *vxlan_port;
4993 	int status;
4994 
4995 	/* Bump up the alias count if it is an existing port */
4996 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4997 		if (vxlan_port->port == port) {
4998 			vxlan_port->port_aliases++;
4999 			goto done;
5000 		}
5001 	}
5002 
5003 	/* Add a new port to our list. We don't need a lock here since port
5004 	 * add/delete are done only in the context of a single-threaded work
5005 	 * queue (be_wq).
5006 	 */
5007 	vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5008 	if (!vxlan_port)
5009 		goto done;
5010 
5011 	vxlan_port->port = port;
5012 	INIT_LIST_HEAD(&vxlan_port->list);
5013 	list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5014 	adapter->vxlan_port_count++;
5015 
5016 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5017 		dev_info(dev,
5018 			 "Only one UDP port supported for VxLAN offloads\n");
5019 		dev_info(dev, "Disabling VxLAN offloads\n");
5020 		goto err;
5021 	}
5022 
5023 	if (adapter->vxlan_port_count > 1)
5024 		goto done;
5025 
5026 	status = be_enable_vxlan_offloads(adapter);
5027 	if (!status)
5028 		goto done;
5029 
5030 err:
5031 	be_disable_vxlan_offloads(adapter);
5032 done:
5033 	kfree(cmd_work);
5034 	return;
5035 }
5036 
5037 static void be_work_del_vxlan_port(struct work_struct *work)
5038 {
5039 	struct be_cmd_work *cmd_work =
5040 				container_of(work, struct be_cmd_work, work);
5041 	struct be_adapter *adapter = cmd_work->adapter;
5042 	__be16 port = cmd_work->info.vxlan_port;
5043 	struct be_vxlan_port *vxlan_port;
5044 
5045 	/* Nothing to be done if a port alias is being deleted */
5046 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5047 		if (vxlan_port->port == port) {
5048 			if (vxlan_port->port_aliases) {
5049 				vxlan_port->port_aliases--;
5050 				goto done;
5051 			}
5052 			break;
5053 		}
5054 	}
5055 
5056 	/* No port aliases left; delete the port from the list */
5057 	list_del(&vxlan_port->list);
5058 	adapter->vxlan_port_count--;
5059 
5060 	/* Disable VxLAN offload if this is the offloaded port */
5061 	if (adapter->vxlan_port == vxlan_port->port) {
5062 		WARN_ON(adapter->vxlan_port_count);
5063 		be_disable_vxlan_offloads(adapter);
5064 		dev_info(&adapter->pdev->dev,
5065 			 "Disabled VxLAN offloads for UDP port %d\n",
5066 			 be16_to_cpu(port));
5067 		goto out;
5068 	}
5069 
5070 	/* If only 1 port is left, re-enable VxLAN offload */
5071 	if (adapter->vxlan_port_count == 1)
5072 		be_enable_vxlan_offloads(adapter);
5073 
5074 out:
5075 	kfree(vxlan_port);
5076 done:
5077 	kfree(cmd_work);
5078 }
5079 
5080 static void be_cfg_vxlan_port(struct net_device *netdev,
5081 			      struct udp_tunnel_info *ti,
5082 			      void (*func)(struct work_struct *))
5083 {
5084 	struct be_adapter *adapter = netdev_priv(netdev);
5085 	struct be_cmd_work *cmd_work;
5086 
5087 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5088 		return;
5089 
5090 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5091 		return;
5092 
5093 	cmd_work = be_alloc_work(adapter, func);
5094 	if (cmd_work) {
5095 		cmd_work->info.vxlan_port = ti->port;
5096 		queue_work(be_wq, &cmd_work->work);
5097 	}
5098 }
5099 
5100 static void be_del_vxlan_port(struct net_device *netdev,
5101 			      struct udp_tunnel_info *ti)
5102 {
5103 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5104 }
5105 
5106 static void be_add_vxlan_port(struct net_device *netdev,
5107 			      struct udp_tunnel_info *ti)
5108 {
5109 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5110 }
5111 
5112 static netdev_features_t be_features_check(struct sk_buff *skb,
5113 					   struct net_device *dev,
5114 					   netdev_features_t features)
5115 {
5116 	struct be_adapter *adapter = netdev_priv(dev);
5117 	u8 l4_hdr = 0;
5118 
5119 	if (skb_is_gso(skb)) {
5120 		/* IPv6 TSO requests with extension hdrs are a problem
5121 		 * to Lancer and BE3 HW. Disable TSO6 feature.
5122 		 */
5123 		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5124 			features &= ~NETIF_F_TSO6;
5125 
5126 		/* Lancer cannot handle the packet with MSS less than 256.
5127 		 * Also it can't handle a TSO packet with a single segment
5128 		 * Disable the GSO support in such cases
5129 		 */
5130 		if (lancer_chip(adapter) &&
5131 		    (skb_shinfo(skb)->gso_size < 256 ||
5132 		     skb_shinfo(skb)->gso_segs == 1))
5133 			features &= ~NETIF_F_GSO_MASK;
5134 	}
5135 
5136 	/* The code below restricts offload features for some tunneled and
5137 	 * Q-in-Q packets.
5138 	 * Offload features for normal (non tunnel) packets are unchanged.
5139 	 */
5140 	features = vlan_features_check(skb, features);
5141 	if (!skb->encapsulation ||
5142 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5143 		return features;
5144 
5145 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5146 	 * should disable tunnel offload features if it's not a VxLAN packet,
5147 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5148 	 * allow other tunneled traffic like GRE work fine while VxLAN
5149 	 * offloads are configured in Skyhawk-R.
5150 	 */
5151 	switch (vlan_get_protocol(skb)) {
5152 	case htons(ETH_P_IP):
5153 		l4_hdr = ip_hdr(skb)->protocol;
5154 		break;
5155 	case htons(ETH_P_IPV6):
5156 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5157 		break;
5158 	default:
5159 		return features;
5160 	}
5161 
5162 	if (l4_hdr != IPPROTO_UDP ||
5163 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5164 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5165 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5166 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5167 	    !adapter->vxlan_port ||
5168 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5169 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5170 
5171 	return features;
5172 }
5173 
5174 static int be_get_phys_port_id(struct net_device *dev,
5175 			       struct netdev_phys_item_id *ppid)
5176 {
5177 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5178 	struct be_adapter *adapter = netdev_priv(dev);
5179 	u8 *id;
5180 
5181 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5182 		return -ENOSPC;
5183 
5184 	ppid->id[0] = adapter->hba_port_num + 1;
5185 	id = &ppid->id[1];
5186 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5187 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5188 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5189 
5190 	ppid->id_len = id_len;
5191 
5192 	return 0;
5193 }
5194 
5195 static void be_set_rx_mode(struct net_device *dev)
5196 {
5197 	struct be_adapter *adapter = netdev_priv(dev);
5198 	struct be_cmd_work *work;
5199 
5200 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5201 	if (work)
5202 		queue_work(be_wq, &work->work);
5203 }
5204 
5205 static const struct net_device_ops be_netdev_ops = {
5206 	.ndo_open		= be_open,
5207 	.ndo_stop		= be_close,
5208 	.ndo_start_xmit		= be_xmit,
5209 	.ndo_set_rx_mode	= be_set_rx_mode,
5210 	.ndo_set_mac_address	= be_mac_addr_set,
5211 	.ndo_get_stats64	= be_get_stats64,
5212 	.ndo_validate_addr	= eth_validate_addr,
5213 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5214 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5215 	.ndo_set_vf_mac		= be_set_vf_mac,
5216 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5217 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5218 	.ndo_get_vf_config	= be_get_vf_config,
5219 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5220 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5221 #ifdef CONFIG_NET_POLL_CONTROLLER
5222 	.ndo_poll_controller	= be_netpoll,
5223 #endif
5224 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5225 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5226 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5227 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5228 	.ndo_features_check	= be_features_check,
5229 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5230 };
5231 
5232 static void be_netdev_init(struct net_device *netdev)
5233 {
5234 	struct be_adapter *adapter = netdev_priv(netdev);
5235 
5236 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5237 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5238 		NETIF_F_HW_VLAN_CTAG_TX;
5239 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5240 		netdev->hw_features |= NETIF_F_RXHASH;
5241 
5242 	netdev->features |= netdev->hw_features |
5243 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5244 
5245 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5246 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5247 
5248 	netdev->priv_flags |= IFF_UNICAST_FLT;
5249 
5250 	netdev->flags |= IFF_MULTICAST;
5251 
5252 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5253 
5254 	netdev->netdev_ops = &be_netdev_ops;
5255 
5256 	netdev->ethtool_ops = &be_ethtool_ops;
5257 
5258 	/* MTU range: 256 - 9000 */
5259 	netdev->min_mtu = BE_MIN_MTU;
5260 	netdev->max_mtu = BE_MAX_MTU;
5261 }
5262 
5263 static void be_cleanup(struct be_adapter *adapter)
5264 {
5265 	struct net_device *netdev = adapter->netdev;
5266 
5267 	rtnl_lock();
5268 	netif_device_detach(netdev);
5269 	if (netif_running(netdev))
5270 		be_close(netdev);
5271 	rtnl_unlock();
5272 
5273 	be_clear(adapter);
5274 }
5275 
5276 static int be_resume(struct be_adapter *adapter)
5277 {
5278 	struct net_device *netdev = adapter->netdev;
5279 	int status;
5280 
5281 	status = be_setup(adapter);
5282 	if (status)
5283 		return status;
5284 
5285 	rtnl_lock();
5286 	if (netif_running(netdev))
5287 		status = be_open(netdev);
5288 	rtnl_unlock();
5289 
5290 	if (status)
5291 		return status;
5292 
5293 	netif_device_attach(netdev);
5294 
5295 	return 0;
5296 }
5297 
5298 static void be_soft_reset(struct be_adapter *adapter)
5299 {
5300 	u32 val;
5301 
5302 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5303 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5304 	val |= SLIPORT_SOFTRESET_SR_MASK;
5305 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5306 }
5307 
5308 static bool be_err_is_recoverable(struct be_adapter *adapter)
5309 {
5310 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5311 	unsigned long initial_idle_time =
5312 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5313 	unsigned long recovery_interval =
5314 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5315 	u16 ue_err_code;
5316 	u32 val;
5317 
5318 	val = be_POST_stage_get(adapter);
5319 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5320 		return false;
5321 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5322 	if (ue_err_code == 0)
5323 		return false;
5324 
5325 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5326 		ue_err_code);
5327 
5328 	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5329 		dev_err(&adapter->pdev->dev,
5330 			"Cannot recover within %lu sec from driver load\n",
5331 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5332 		return false;
5333 	}
5334 
5335 	if (err_rec->last_recovery_time && time_before_eq(
5336 		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5337 		dev_err(&adapter->pdev->dev,
5338 			"Cannot recover within %lu sec from last recovery\n",
5339 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5340 		return false;
5341 	}
5342 
5343 	if (ue_err_code == err_rec->last_err_code) {
5344 		dev_err(&adapter->pdev->dev,
5345 			"Cannot recover from a consecutive TPE error\n");
5346 		return false;
5347 	}
5348 
5349 	err_rec->last_recovery_time = jiffies;
5350 	err_rec->last_err_code = ue_err_code;
5351 	return true;
5352 }
5353 
5354 static int be_tpe_recover(struct be_adapter *adapter)
5355 {
5356 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5357 	int status = -EAGAIN;
5358 	u32 val;
5359 
5360 	switch (err_rec->recovery_state) {
5361 	case ERR_RECOVERY_ST_NONE:
5362 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5363 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5364 		break;
5365 
5366 	case ERR_RECOVERY_ST_DETECT:
5367 		val = be_POST_stage_get(adapter);
5368 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5369 		    POST_STAGE_RECOVERABLE_ERR) {
5370 			dev_err(&adapter->pdev->dev,
5371 				"Unrecoverable HW error detected: 0x%x\n", val);
5372 			status = -EINVAL;
5373 			err_rec->resched_delay = 0;
5374 			break;
5375 		}
5376 
5377 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5378 
5379 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5380 		 * milliseconds before it checks for final error status in
5381 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5382 		 * If it does, then PF0 initiates a Soft Reset.
5383 		 */
5384 		if (adapter->pf_num == 0) {
5385 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5386 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5387 					ERR_RECOVERY_UE_DETECT_DURATION;
5388 			break;
5389 		}
5390 
5391 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5392 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5393 					ERR_RECOVERY_UE_DETECT_DURATION;
5394 		break;
5395 
5396 	case ERR_RECOVERY_ST_RESET:
5397 		if (!be_err_is_recoverable(adapter)) {
5398 			dev_err(&adapter->pdev->dev,
5399 				"Failed to meet recovery criteria\n");
5400 			status = -EIO;
5401 			err_rec->resched_delay = 0;
5402 			break;
5403 		}
5404 		be_soft_reset(adapter);
5405 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5406 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5407 					err_rec->ue_to_reset_time;
5408 		break;
5409 
5410 	case ERR_RECOVERY_ST_PRE_POLL:
5411 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5412 		err_rec->resched_delay = 0;
5413 		status = 0;			/* done */
5414 		break;
5415 
5416 	default:
5417 		status = -EINVAL;
5418 		err_rec->resched_delay = 0;
5419 		break;
5420 	}
5421 
5422 	return status;
5423 }
5424 
5425 static int be_err_recover(struct be_adapter *adapter)
5426 {
5427 	int status;
5428 
5429 	if (!lancer_chip(adapter)) {
5430 		if (!adapter->error_recovery.recovery_supported ||
5431 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5432 			return -EIO;
5433 		status = be_tpe_recover(adapter);
5434 		if (status)
5435 			goto err;
5436 	}
5437 
5438 	/* Wait for adapter to reach quiescent state before
5439 	 * destroying queues
5440 	 */
5441 	status = be_fw_wait_ready(adapter);
5442 	if (status)
5443 		goto err;
5444 
5445 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5446 
5447 	be_cleanup(adapter);
5448 
5449 	status = be_resume(adapter);
5450 	if (status)
5451 		goto err;
5452 
5453 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5454 
5455 err:
5456 	return status;
5457 }
5458 
5459 static void be_err_detection_task(struct work_struct *work)
5460 {
5461 	struct be_error_recovery *err_rec =
5462 			container_of(work, struct be_error_recovery,
5463 				     err_detection_work.work);
5464 	struct be_adapter *adapter =
5465 			container_of(err_rec, struct be_adapter,
5466 				     error_recovery);
5467 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5468 	struct device *dev = &adapter->pdev->dev;
5469 	int recovery_status;
5470 
5471 	be_detect_error(adapter);
5472 	if (!be_check_error(adapter, BE_ERROR_HW))
5473 		goto reschedule_task;
5474 
5475 	recovery_status = be_err_recover(adapter);
5476 	if (!recovery_status) {
5477 		err_rec->recovery_retries = 0;
5478 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5479 		dev_info(dev, "Adapter recovery successful\n");
5480 		goto reschedule_task;
5481 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5482 		/* BEx/SH recovery state machine */
5483 		if (adapter->pf_num == 0 &&
5484 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5485 			dev_err(&adapter->pdev->dev,
5486 				"Adapter recovery in progress\n");
5487 		resched_delay = err_rec->resched_delay;
5488 		goto reschedule_task;
5489 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5490 		/* For VFs, check if PF have allocated resources
5491 		 * every second.
5492 		 */
5493 		dev_err(dev, "Re-trying adapter recovery\n");
5494 		goto reschedule_task;
5495 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5496 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5497 		/* In case of another error during recovery, it takes 30 sec
5498 		 * for adapter to come out of error. Retry error recovery after
5499 		 * this time interval.
5500 		 */
5501 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5502 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5503 		goto reschedule_task;
5504 	} else {
5505 		dev_err(dev, "Adapter recovery failed\n");
5506 		dev_err(dev, "Please reboot server to recover\n");
5507 	}
5508 
5509 	return;
5510 
5511 reschedule_task:
5512 	be_schedule_err_detection(adapter, resched_delay);
5513 }
5514 
5515 static void be_log_sfp_info(struct be_adapter *adapter)
5516 {
5517 	int status;
5518 
5519 	status = be_cmd_query_sfp_info(adapter);
5520 	if (!status) {
5521 		dev_err(&adapter->pdev->dev,
5522 			"Port %c: %s Vendor: %s part no: %s",
5523 			adapter->port_name,
5524 			be_misconfig_evt_port_state[adapter->phy_state],
5525 			adapter->phy.vendor_name,
5526 			adapter->phy.vendor_pn);
5527 	}
5528 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5529 }
5530 
5531 static void be_worker(struct work_struct *work)
5532 {
5533 	struct be_adapter *adapter =
5534 		container_of(work, struct be_adapter, work.work);
5535 	struct be_rx_obj *rxo;
5536 	int i;
5537 
5538 	if (be_physfn(adapter) &&
5539 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5540 		be_cmd_get_die_temperature(adapter);
5541 
5542 	/* when interrupts are not yet enabled, just reap any pending
5543 	 * mcc completions
5544 	 */
5545 	if (!netif_running(adapter->netdev)) {
5546 		local_bh_disable();
5547 		be_process_mcc(adapter);
5548 		local_bh_enable();
5549 		goto reschedule;
5550 	}
5551 
5552 	if (!adapter->stats_cmd_sent) {
5553 		if (lancer_chip(adapter))
5554 			lancer_cmd_get_pport_stats(adapter,
5555 						   &adapter->stats_cmd);
5556 		else
5557 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5558 	}
5559 
5560 	for_all_rx_queues(adapter, rxo, i) {
5561 		/* Replenish RX-queues starved due to memory
5562 		 * allocation failures.
5563 		 */
5564 		if (rxo->rx_post_starved)
5565 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5566 	}
5567 
5568 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5569 	if (!skyhawk_chip(adapter))
5570 		be_eqd_update(adapter, false);
5571 
5572 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5573 		be_log_sfp_info(adapter);
5574 
5575 reschedule:
5576 	adapter->work_counter++;
5577 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5578 }
5579 
5580 static void be_unmap_pci_bars(struct be_adapter *adapter)
5581 {
5582 	if (adapter->csr)
5583 		pci_iounmap(adapter->pdev, adapter->csr);
5584 	if (adapter->db)
5585 		pci_iounmap(adapter->pdev, adapter->db);
5586 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5587 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5588 }
5589 
5590 static int db_bar(struct be_adapter *adapter)
5591 {
5592 	if (lancer_chip(adapter) || be_virtfn(adapter))
5593 		return 0;
5594 	else
5595 		return 4;
5596 }
5597 
5598 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5599 {
5600 	if (skyhawk_chip(adapter)) {
5601 		adapter->roce_db.size = 4096;
5602 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5603 							      db_bar(adapter));
5604 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5605 							       db_bar(adapter));
5606 	}
5607 	return 0;
5608 }
5609 
5610 static int be_map_pci_bars(struct be_adapter *adapter)
5611 {
5612 	struct pci_dev *pdev = adapter->pdev;
5613 	u8 __iomem *addr;
5614 	u32 sli_intf;
5615 
5616 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5617 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5618 				SLI_INTF_FAMILY_SHIFT;
5619 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5620 
5621 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5622 		adapter->csr = pci_iomap(pdev, 2, 0);
5623 		if (!adapter->csr)
5624 			return -ENOMEM;
5625 	}
5626 
5627 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5628 	if (!addr)
5629 		goto pci_map_err;
5630 	adapter->db = addr;
5631 
5632 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5633 		if (be_physfn(adapter)) {
5634 			/* PCICFG is the 2nd BAR in BE2 */
5635 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5636 			if (!addr)
5637 				goto pci_map_err;
5638 			adapter->pcicfg = addr;
5639 			adapter->pcicfg_mapped = true;
5640 		} else {
5641 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5642 			adapter->pcicfg_mapped = false;
5643 		}
5644 	}
5645 
5646 	be_roce_map_pci_bars(adapter);
5647 	return 0;
5648 
5649 pci_map_err:
5650 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5651 	be_unmap_pci_bars(adapter);
5652 	return -ENOMEM;
5653 }
5654 
5655 static void be_drv_cleanup(struct be_adapter *adapter)
5656 {
5657 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5658 	struct device *dev = &adapter->pdev->dev;
5659 
5660 	if (mem->va)
5661 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5662 
5663 	mem = &adapter->rx_filter;
5664 	if (mem->va)
5665 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5666 
5667 	mem = &adapter->stats_cmd;
5668 	if (mem->va)
5669 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5670 }
5671 
5672 /* Allocate and initialize various fields in be_adapter struct */
5673 static int be_drv_init(struct be_adapter *adapter)
5674 {
5675 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5676 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5677 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5678 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5679 	struct device *dev = &adapter->pdev->dev;
5680 	int status = 0;
5681 
5682 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5683 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5684 						 &mbox_mem_alloc->dma,
5685 						 GFP_KERNEL);
5686 	if (!mbox_mem_alloc->va)
5687 		return -ENOMEM;
5688 
5689 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5690 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5691 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5692 
5693 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5694 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5695 					    &rx_filter->dma, GFP_KERNEL);
5696 	if (!rx_filter->va) {
5697 		status = -ENOMEM;
5698 		goto free_mbox;
5699 	}
5700 
5701 	if (lancer_chip(adapter))
5702 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5703 	else if (BE2_chip(adapter))
5704 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5705 	else if (BE3_chip(adapter))
5706 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5707 	else
5708 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5709 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5710 					    &stats_cmd->dma, GFP_KERNEL);
5711 	if (!stats_cmd->va) {
5712 		status = -ENOMEM;
5713 		goto free_rx_filter;
5714 	}
5715 
5716 	mutex_init(&adapter->mbox_lock);
5717 	mutex_init(&adapter->mcc_lock);
5718 	mutex_init(&adapter->rx_filter_lock);
5719 	spin_lock_init(&adapter->mcc_cq_lock);
5720 	init_completion(&adapter->et_cmd_compl);
5721 
5722 	pci_save_state(adapter->pdev);
5723 
5724 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5725 
5726 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5727 	adapter->error_recovery.resched_delay = 0;
5728 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5729 			  be_err_detection_task);
5730 
5731 	adapter->rx_fc = true;
5732 	adapter->tx_fc = true;
5733 
5734 	/* Must be a power of 2 or else MODULO will BUG_ON */
5735 	adapter->be_get_temp_freq = 64;
5736 
5737 	INIT_LIST_HEAD(&adapter->vxlan_port_list);
5738 	return 0;
5739 
5740 free_rx_filter:
5741 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5742 free_mbox:
5743 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5744 			  mbox_mem_alloc->dma);
5745 	return status;
5746 }
5747 
5748 static void be_remove(struct pci_dev *pdev)
5749 {
5750 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5751 
5752 	if (!adapter)
5753 		return;
5754 
5755 	be_roce_dev_remove(adapter);
5756 	be_intr_set(adapter, false);
5757 
5758 	be_cancel_err_detection(adapter);
5759 
5760 	unregister_netdev(adapter->netdev);
5761 
5762 	be_clear(adapter);
5763 
5764 	if (!pci_vfs_assigned(adapter->pdev))
5765 		be_cmd_reset_function(adapter);
5766 
5767 	/* tell fw we're done with firing cmds */
5768 	be_cmd_fw_clean(adapter);
5769 
5770 	be_unmap_pci_bars(adapter);
5771 	be_drv_cleanup(adapter);
5772 
5773 	pci_disable_pcie_error_reporting(pdev);
5774 
5775 	pci_release_regions(pdev);
5776 	pci_disable_device(pdev);
5777 
5778 	free_netdev(adapter->netdev);
5779 }
5780 
5781 static ssize_t be_hwmon_show_temp(struct device *dev,
5782 				  struct device_attribute *dev_attr,
5783 				  char *buf)
5784 {
5785 	struct be_adapter *adapter = dev_get_drvdata(dev);
5786 
5787 	/* Unit: millidegree Celsius */
5788 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5789 		return -EIO;
5790 	else
5791 		return sprintf(buf, "%u\n",
5792 			       adapter->hwmon_info.be_on_die_temp * 1000);
5793 }
5794 
5795 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5796 			  be_hwmon_show_temp, NULL, 1);
5797 
5798 static struct attribute *be_hwmon_attrs[] = {
5799 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5800 	NULL
5801 };
5802 
5803 ATTRIBUTE_GROUPS(be_hwmon);
5804 
5805 static char *mc_name(struct be_adapter *adapter)
5806 {
5807 	char *str = "";	/* default */
5808 
5809 	switch (adapter->mc_type) {
5810 	case UMC:
5811 		str = "UMC";
5812 		break;
5813 	case FLEX10:
5814 		str = "FLEX10";
5815 		break;
5816 	case vNIC1:
5817 		str = "vNIC-1";
5818 		break;
5819 	case nPAR:
5820 		str = "nPAR";
5821 		break;
5822 	case UFP:
5823 		str = "UFP";
5824 		break;
5825 	case vNIC2:
5826 		str = "vNIC-2";
5827 		break;
5828 	default:
5829 		str = "";
5830 	}
5831 
5832 	return str;
5833 }
5834 
5835 static inline char *func_name(struct be_adapter *adapter)
5836 {
5837 	return be_physfn(adapter) ? "PF" : "VF";
5838 }
5839 
5840 static inline char *nic_name(struct pci_dev *pdev)
5841 {
5842 	switch (pdev->device) {
5843 	case OC_DEVICE_ID1:
5844 		return OC_NAME;
5845 	case OC_DEVICE_ID2:
5846 		return OC_NAME_BE;
5847 	case OC_DEVICE_ID3:
5848 	case OC_DEVICE_ID4:
5849 		return OC_NAME_LANCER;
5850 	case BE_DEVICE_ID2:
5851 		return BE3_NAME;
5852 	case OC_DEVICE_ID5:
5853 	case OC_DEVICE_ID6:
5854 		return OC_NAME_SH;
5855 	default:
5856 		return BE_NAME;
5857 	}
5858 }
5859 
5860 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5861 {
5862 	struct be_adapter *adapter;
5863 	struct net_device *netdev;
5864 	int status = 0;
5865 
5866 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5867 
5868 	status = pci_enable_device(pdev);
5869 	if (status)
5870 		goto do_none;
5871 
5872 	status = pci_request_regions(pdev, DRV_NAME);
5873 	if (status)
5874 		goto disable_dev;
5875 	pci_set_master(pdev);
5876 
5877 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5878 	if (!netdev) {
5879 		status = -ENOMEM;
5880 		goto rel_reg;
5881 	}
5882 	adapter = netdev_priv(netdev);
5883 	adapter->pdev = pdev;
5884 	pci_set_drvdata(pdev, adapter);
5885 	adapter->netdev = netdev;
5886 	SET_NETDEV_DEV(netdev, &pdev->dev);
5887 
5888 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5889 	if (!status) {
5890 		netdev->features |= NETIF_F_HIGHDMA;
5891 	} else {
5892 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5893 		if (status) {
5894 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5895 			goto free_netdev;
5896 		}
5897 	}
5898 
5899 	status = pci_enable_pcie_error_reporting(pdev);
5900 	if (!status)
5901 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5902 
5903 	status = be_map_pci_bars(adapter);
5904 	if (status)
5905 		goto free_netdev;
5906 
5907 	status = be_drv_init(adapter);
5908 	if (status)
5909 		goto unmap_bars;
5910 
5911 	status = be_setup(adapter);
5912 	if (status)
5913 		goto drv_cleanup;
5914 
5915 	be_netdev_init(netdev);
5916 	status = register_netdev(netdev);
5917 	if (status != 0)
5918 		goto unsetup;
5919 
5920 	be_roce_dev_add(adapter);
5921 
5922 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5923 	adapter->error_recovery.probe_time = jiffies;
5924 
5925 	/* On Die temperature not supported for VF. */
5926 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5927 		adapter->hwmon_info.hwmon_dev =
5928 			devm_hwmon_device_register_with_groups(&pdev->dev,
5929 							       DRV_NAME,
5930 							       adapter,
5931 							       be_hwmon_groups);
5932 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5933 	}
5934 
5935 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5936 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5937 
5938 	return 0;
5939 
5940 unsetup:
5941 	be_clear(adapter);
5942 drv_cleanup:
5943 	be_drv_cleanup(adapter);
5944 unmap_bars:
5945 	be_unmap_pci_bars(adapter);
5946 free_netdev:
5947 	free_netdev(netdev);
5948 rel_reg:
5949 	pci_release_regions(pdev);
5950 disable_dev:
5951 	pci_disable_device(pdev);
5952 do_none:
5953 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5954 	return status;
5955 }
5956 
5957 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5958 {
5959 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5960 
5961 	be_intr_set(adapter, false);
5962 	be_cancel_err_detection(adapter);
5963 
5964 	be_cleanup(adapter);
5965 
5966 	pci_save_state(pdev);
5967 	pci_disable_device(pdev);
5968 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5969 	return 0;
5970 }
5971 
5972 static int be_pci_resume(struct pci_dev *pdev)
5973 {
5974 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5975 	int status = 0;
5976 
5977 	status = pci_enable_device(pdev);
5978 	if (status)
5979 		return status;
5980 
5981 	pci_restore_state(pdev);
5982 
5983 	status = be_resume(adapter);
5984 	if (status)
5985 		return status;
5986 
5987 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5988 
5989 	return 0;
5990 }
5991 
5992 /*
5993  * An FLR will stop BE from DMAing any data.
5994  */
5995 static void be_shutdown(struct pci_dev *pdev)
5996 {
5997 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5998 
5999 	if (!adapter)
6000 		return;
6001 
6002 	be_roce_dev_shutdown(adapter);
6003 	cancel_delayed_work_sync(&adapter->work);
6004 	be_cancel_err_detection(adapter);
6005 
6006 	netif_device_detach(adapter->netdev);
6007 
6008 	be_cmd_reset_function(adapter);
6009 
6010 	pci_disable_device(pdev);
6011 }
6012 
6013 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6014 					    pci_channel_state_t state)
6015 {
6016 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6017 
6018 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6019 
6020 	be_roce_dev_remove(adapter);
6021 
6022 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6023 		be_set_error(adapter, BE_ERROR_EEH);
6024 
6025 		be_cancel_err_detection(adapter);
6026 
6027 		be_cleanup(adapter);
6028 	}
6029 
6030 	if (state == pci_channel_io_perm_failure)
6031 		return PCI_ERS_RESULT_DISCONNECT;
6032 
6033 	pci_disable_device(pdev);
6034 
6035 	/* The error could cause the FW to trigger a flash debug dump.
6036 	 * Resetting the card while flash dump is in progress
6037 	 * can cause it not to recover; wait for it to finish.
6038 	 * Wait only for first function as it is needed only once per
6039 	 * adapter.
6040 	 */
6041 	if (pdev->devfn == 0)
6042 		ssleep(30);
6043 
6044 	return PCI_ERS_RESULT_NEED_RESET;
6045 }
6046 
6047 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6048 {
6049 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6050 	int status;
6051 
6052 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6053 
6054 	status = pci_enable_device(pdev);
6055 	if (status)
6056 		return PCI_ERS_RESULT_DISCONNECT;
6057 
6058 	pci_set_master(pdev);
6059 	pci_restore_state(pdev);
6060 
6061 	/* Check if card is ok and fw is ready */
6062 	dev_info(&adapter->pdev->dev,
6063 		 "Waiting for FW to be ready after EEH reset\n");
6064 	status = be_fw_wait_ready(adapter);
6065 	if (status)
6066 		return PCI_ERS_RESULT_DISCONNECT;
6067 
6068 	pci_cleanup_aer_uncorrect_error_status(pdev);
6069 	be_clear_error(adapter, BE_CLEAR_ALL);
6070 	return PCI_ERS_RESULT_RECOVERED;
6071 }
6072 
6073 static void be_eeh_resume(struct pci_dev *pdev)
6074 {
6075 	int status = 0;
6076 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6077 
6078 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6079 
6080 	pci_save_state(pdev);
6081 
6082 	status = be_resume(adapter);
6083 	if (status)
6084 		goto err;
6085 
6086 	be_roce_dev_add(adapter);
6087 
6088 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6089 	return;
6090 err:
6091 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6092 }
6093 
6094 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6095 {
6096 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6097 	struct be_resources vft_res = {0};
6098 	int status;
6099 
6100 	if (!num_vfs)
6101 		be_vf_clear(adapter);
6102 
6103 	adapter->num_vfs = num_vfs;
6104 
6105 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6106 		dev_warn(&pdev->dev,
6107 			 "Cannot disable VFs while they are assigned\n");
6108 		return -EBUSY;
6109 	}
6110 
6111 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6112 	 * are equally distributed across the max-number of VFs. The user may
6113 	 * request only a subset of the max-vfs to be enabled.
6114 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6115 	 * each VF will have access to more number of resources.
6116 	 * This facility is not available in BE3 FW.
6117 	 * Also, this is done by FW in Lancer chip.
6118 	 */
6119 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6120 		be_calculate_vf_res(adapter, adapter->num_vfs,
6121 				    &vft_res);
6122 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6123 						 adapter->num_vfs, &vft_res);
6124 		if (status)
6125 			dev_err(&pdev->dev,
6126 				"Failed to optimize SR-IOV resources\n");
6127 	}
6128 
6129 	status = be_get_resources(adapter);
6130 	if (status)
6131 		return be_cmd_status(status);
6132 
6133 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6134 	rtnl_lock();
6135 	status = be_update_queues(adapter);
6136 	rtnl_unlock();
6137 	if (status)
6138 		return be_cmd_status(status);
6139 
6140 	if (adapter->num_vfs)
6141 		status = be_vf_setup(adapter);
6142 
6143 	if (!status)
6144 		return adapter->num_vfs;
6145 
6146 	return 0;
6147 }
6148 
6149 static const struct pci_error_handlers be_eeh_handlers = {
6150 	.error_detected = be_eeh_err_detected,
6151 	.slot_reset = be_eeh_reset,
6152 	.resume = be_eeh_resume,
6153 };
6154 
6155 static struct pci_driver be_driver = {
6156 	.name = DRV_NAME,
6157 	.id_table = be_dev_ids,
6158 	.probe = be_probe,
6159 	.remove = be_remove,
6160 	.suspend = be_suspend,
6161 	.resume = be_pci_resume,
6162 	.shutdown = be_shutdown,
6163 	.sriov_configure = be_pci_sriov_configure,
6164 	.err_handler = &be_eeh_handlers
6165 };
6166 
6167 static int __init be_init_module(void)
6168 {
6169 	int status;
6170 
6171 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6172 	    rx_frag_size != 2048) {
6173 		printk(KERN_WARNING DRV_NAME
6174 			" : Module param rx_frag_size must be 2048/4096/8192."
6175 			" Using 2048\n");
6176 		rx_frag_size = 2048;
6177 	}
6178 
6179 	if (num_vfs > 0) {
6180 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6181 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6182 	}
6183 
6184 	be_wq = create_singlethread_workqueue("be_wq");
6185 	if (!be_wq) {
6186 		pr_warn(DRV_NAME "workqueue creation failed\n");
6187 		return -1;
6188 	}
6189 
6190 	be_err_recovery_workq =
6191 		create_singlethread_workqueue("be_err_recover");
6192 	if (!be_err_recovery_workq)
6193 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6194 
6195 	status = pci_register_driver(&be_driver);
6196 	if (status) {
6197 		destroy_workqueue(be_wq);
6198 		be_destroy_err_recovery_workq();
6199 	}
6200 	return status;
6201 }
6202 module_init(be_init_module);
6203 
6204 static void __exit be_exit_module(void)
6205 {
6206 	pci_unregister_driver(&be_driver);
6207 
6208 	be_destroy_err_recovery_workq();
6209 
6210 	if (be_wq)
6211 		destroy_workqueue(be_wq);
6212 }
6213 module_exit(be_exit_module);
6214