xref: /openbmc/linux/drivers/net/ethernet/emulex/benet/be_main.c (revision c51d39010a1bccc9c1294e2d7c00005aefeb2b5c)
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279 				     mac)) {
280 			/* mac already added, skip addition */
281 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282 			return 0;
283 		}
284 	}
285 
286 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287 			       &adapter->pmac_id[0], 0);
288 }
289 
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292 	int i;
293 
294 	/* Skip deletion if the programmed mac is
295 	 * being used in uc-list
296 	 */
297 	for (i = 0; i < adapter->uc_macs; i++) {
298 		if (adapter->pmac_id[i + 1] == pmac_id)
299 			return;
300 	}
301 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303 
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306 	struct be_adapter *adapter = netdev_priv(netdev);
307 	struct device *dev = &adapter->pdev->dev;
308 	struct sockaddr *addr = p;
309 	int status;
310 	u8 mac[ETH_ALEN];
311 	u32 old_pmac_id = adapter->pmac_id[0];
312 
313 	if (!is_valid_ether_addr(addr->sa_data))
314 		return -EADDRNOTAVAIL;
315 
316 	/* Proceed further only if, User provided MAC is different
317 	 * from active MAC
318 	 */
319 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320 		return 0;
321 
322 	/* if device is not running, copy MAC to netdev->dev_addr */
323 	if (!netif_running(netdev))
324 		goto done;
325 
326 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327 	 * privilege or if PF did not provision the new MAC address.
328 	 * On BE3, this cmd will always fail if the VF doesn't have the
329 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
330 	 * the MAC for the VF.
331 	 */
332 	mutex_lock(&adapter->rx_filter_lock);
333 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334 	if (!status) {
335 
336 		/* Delete the old programmed MAC. This call may fail if the
337 		 * old MAC was already deleted by the PF driver.
338 		 */
339 		if (adapter->pmac_id[0] != old_pmac_id)
340 			be_dev_mac_del(adapter, old_pmac_id);
341 	}
342 
343 	mutex_unlock(&adapter->rx_filter_lock);
344 	/* Decide if the new MAC is successfully activated only after
345 	 * querying the FW
346 	 */
347 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348 				       adapter->if_handle, true, 0);
349 	if (status)
350 		goto err;
351 
352 	/* The MAC change did not happen, either due to lack of privilege
353 	 * or PF didn't pre-provision.
354 	 */
355 	if (!ether_addr_equal(addr->sa_data, mac)) {
356 		status = -EPERM;
357 		goto err;
358 	}
359 done:
360 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
361 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
362 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363 	return 0;
364 err:
365 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366 	return status;
367 }
368 
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372 	if (BE2_chip(adapter)) {
373 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374 
375 		return &cmd->hw_stats;
376 	} else if (BE3_chip(adapter)) {
377 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378 
379 		return &cmd->hw_stats;
380 	} else {
381 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	}
385 }
386 
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390 	if (BE2_chip(adapter)) {
391 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392 
393 		return &hw_stats->erx;
394 	} else if (BE3_chip(adapter)) {
395 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396 
397 		return &hw_stats->erx;
398 	} else {
399 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	}
403 }
404 
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410 	struct be_port_rxf_stats_v0 *port_stats =
411 					&rxf_stats->port[adapter->port_num];
412 	struct be_drv_stats *drvs = &adapter->drv_stats;
413 
414 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
416 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
417 	drvs->rx_control_frames = port_stats->rx_control_frames;
418 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430 	drvs->rx_dropped_header_too_small =
431 		port_stats->rx_dropped_header_too_small;
432 	drvs->rx_address_filtered =
433 					port_stats->rx_address_filtered +
434 					port_stats->rx_vlan_filtered;
435 	drvs->rx_alignment_symbol_errors =
436 		port_stats->rx_alignment_symbol_errors;
437 
438 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
439 	drvs->tx_controlframes = port_stats->tx_controlframes;
440 
441 	if (adapter->port_num)
442 		drvs->jabber_events = rxf_stats->port1_jabber_events;
443 	else
444 		drvs->jabber_events = rxf_stats->port0_jabber_events;
445 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
448 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453 
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459 	struct be_port_rxf_stats_v1 *port_stats =
460 					&rxf_stats->port[adapter->port_num];
461 	struct be_drv_stats *drvs = &adapter->drv_stats;
462 
463 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
467 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
468 	drvs->rx_control_frames = port_stats->rx_control_frames;
469 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479 	drvs->rx_dropped_header_too_small =
480 		port_stats->rx_dropped_header_too_small;
481 	drvs->rx_input_fifo_overflow_drop =
482 		port_stats->rx_input_fifo_overflow_drop;
483 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
484 	drvs->rx_alignment_symbol_errors =
485 		port_stats->rx_alignment_symbol_errors;
486 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
488 	drvs->tx_controlframes = port_stats->tx_controlframes;
489 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490 	drvs->jabber_events = port_stats->jabber_events;
491 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
494 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499 
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505 	struct be_port_rxf_stats_v2 *port_stats =
506 					&rxf_stats->port[adapter->port_num];
507 	struct be_drv_stats *drvs = &adapter->drv_stats;
508 
509 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
513 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
514 	drvs->rx_control_frames = port_stats->rx_control_frames;
515 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525 	drvs->rx_dropped_header_too_small =
526 		port_stats->rx_dropped_header_too_small;
527 	drvs->rx_input_fifo_overflow_drop =
528 		port_stats->rx_input_fifo_overflow_drop;
529 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
530 	drvs->rx_alignment_symbol_errors =
531 		port_stats->rx_alignment_symbol_errors;
532 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
534 	drvs->tx_controlframes = port_stats->tx_controlframes;
535 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536 	drvs->jabber_events = port_stats->jabber_events;
537 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
540 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544 	if (be_roce_supported(adapter)) {
545 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547 		drvs->rx_roce_frames = port_stats->roce_frames_received;
548 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
549 		drvs->roce_drops_payload_len =
550 			port_stats->roce_drops_payload_len;
551 	}
552 }
553 
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556 	struct be_drv_stats *drvs = &adapter->drv_stats;
557 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558 
559 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569 	drvs->rx_dropped_tcp_length =
570 				pport_stats->rx_dropped_invalid_tcp_length;
571 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574 	drvs->rx_dropped_header_too_small =
575 				pport_stats->rx_dropped_header_too_small;
576 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577 	drvs->rx_address_filtered =
578 					pport_stats->rx_address_filtered +
579 					pport_stats->rx_vlan_filtered;
580 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584 	drvs->jabber_events = pport_stats->rx_jabbers;
585 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
586 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587 	drvs->rx_drops_too_many_frags =
588 				pport_stats->rx_drops_too_many_frags_lo;
589 }
590 
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)			(x & 0xFFFF)
594 #define hi(x)			(x & 0xFFFF0000)
595 	bool wrapped = val < lo(*acc);
596 	u32 newacc = hi(*acc) + val;
597 
598 	if (wrapped)
599 		newacc += 65536;
600 	ACCESS_ONCE(*acc) = newacc;
601 }
602 
603 static void populate_erx_stats(struct be_adapter *adapter,
604 			       struct be_rx_obj *rxo, u32 erx_stat)
605 {
606 	if (!BEx_chip(adapter))
607 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608 	else
609 		/* below erx HW counter can actually wrap around after
610 		 * 65535. Driver accumulates a 32-bit value
611 		 */
612 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613 				     (u16)erx_stat);
614 }
615 
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619 	struct be_rx_obj *rxo;
620 	int i;
621 	u32 erx_stat;
622 
623 	if (lancer_chip(adapter)) {
624 		populate_lancer_stats(adapter);
625 	} else {
626 		if (BE2_chip(adapter))
627 			populate_be_v0_stats(adapter);
628 		else if (BE3_chip(adapter))
629 			/* for BE3 */
630 			populate_be_v1_stats(adapter);
631 		else
632 			populate_be_v2_stats(adapter);
633 
634 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635 		for_all_rx_queues(adapter, rxo, i) {
636 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637 			populate_erx_stats(adapter, rxo, erx_stat);
638 		}
639 	}
640 }
641 
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643 						struct rtnl_link_stats64 *stats)
644 {
645 	struct be_adapter *adapter = netdev_priv(netdev);
646 	struct be_drv_stats *drvs = &adapter->drv_stats;
647 	struct be_rx_obj *rxo;
648 	struct be_tx_obj *txo;
649 	u64 pkts, bytes;
650 	unsigned int start;
651 	int i;
652 
653 	for_all_rx_queues(adapter, rxo, i) {
654 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
655 
656 		do {
657 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658 			pkts = rx_stats(rxo)->rx_pkts;
659 			bytes = rx_stats(rxo)->rx_bytes;
660 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661 		stats->rx_packets += pkts;
662 		stats->rx_bytes += bytes;
663 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665 					rx_stats(rxo)->rx_drops_no_frags;
666 	}
667 
668 	for_all_tx_queues(adapter, txo, i) {
669 		const struct be_tx_stats *tx_stats = tx_stats(txo);
670 
671 		do {
672 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673 			pkts = tx_stats(txo)->tx_pkts;
674 			bytes = tx_stats(txo)->tx_bytes;
675 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676 		stats->tx_packets += pkts;
677 		stats->tx_bytes += bytes;
678 	}
679 
680 	/* bad pkts received */
681 	stats->rx_errors = drvs->rx_crc_errors +
682 		drvs->rx_alignment_symbol_errors +
683 		drvs->rx_in_range_errors +
684 		drvs->rx_out_range_errors +
685 		drvs->rx_frame_too_long +
686 		drvs->rx_dropped_too_small +
687 		drvs->rx_dropped_too_short +
688 		drvs->rx_dropped_header_too_small +
689 		drvs->rx_dropped_tcp_length +
690 		drvs->rx_dropped_runt;
691 
692 	/* detailed rx errors */
693 	stats->rx_length_errors = drvs->rx_in_range_errors +
694 		drvs->rx_out_range_errors +
695 		drvs->rx_frame_too_long;
696 
697 	stats->rx_crc_errors = drvs->rx_crc_errors;
698 
699 	/* frame alignment errors */
700 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701 
702 	/* receiver fifo overrun */
703 	/* drops_no_pbuf is no per i/f, it's per BE card */
704 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705 				drvs->rx_input_fifo_overflow_drop +
706 				drvs->rx_drops_no_pbuf;
707 	return stats;
708 }
709 
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712 	struct net_device *netdev = adapter->netdev;
713 
714 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715 		netif_carrier_off(netdev);
716 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717 	}
718 
719 	if (link_status)
720 		netif_carrier_on(netdev);
721 	else
722 		netif_carrier_off(netdev);
723 
724 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726 
727 static int be_gso_hdr_len(struct sk_buff *skb)
728 {
729 	if (skb->encapsulation)
730 		return skb_inner_transport_offset(skb) +
731 		       inner_tcp_hdrlen(skb);
732 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
733 }
734 
735 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
736 {
737 	struct be_tx_stats *stats = tx_stats(txo);
738 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
739 	/* Account for headers which get duplicated in TSO pkt */
740 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
741 
742 	u64_stats_update_begin(&stats->sync);
743 	stats->tx_reqs++;
744 	stats->tx_bytes += skb->len + dup_hdr_len;
745 	stats->tx_pkts += tx_pkts;
746 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
747 		stats->tx_vxlan_offload_pkts += tx_pkts;
748 	u64_stats_update_end(&stats->sync);
749 }
750 
751 /* Returns number of WRBs needed for the skb */
752 static u32 skb_wrb_cnt(struct sk_buff *skb)
753 {
754 	/* +1 for the header wrb */
755 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
756 }
757 
758 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
759 {
760 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
761 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
762 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
763 	wrb->rsvd0 = 0;
764 }
765 
766 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
767  * to avoid the swap and shift/mask operations in wrb_fill().
768  */
769 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
770 {
771 	wrb->frag_pa_hi = 0;
772 	wrb->frag_pa_lo = 0;
773 	wrb->frag_len = 0;
774 	wrb->rsvd0 = 0;
775 }
776 
777 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
778 				     struct sk_buff *skb)
779 {
780 	u8 vlan_prio;
781 	u16 vlan_tag;
782 
783 	vlan_tag = skb_vlan_tag_get(skb);
784 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
785 	/* If vlan priority provided by OS is NOT in available bmap */
786 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
787 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
788 				adapter->recommended_prio_bits;
789 
790 	return vlan_tag;
791 }
792 
793 /* Used only for IP tunnel packets */
794 static u16 skb_inner_ip_proto(struct sk_buff *skb)
795 {
796 	return (inner_ip_hdr(skb)->version == 4) ?
797 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
798 }
799 
800 static u16 skb_ip_proto(struct sk_buff *skb)
801 {
802 	return (ip_hdr(skb)->version == 4) ?
803 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
804 }
805 
806 static inline bool be_is_txq_full(struct be_tx_obj *txo)
807 {
808 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
809 }
810 
811 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
812 {
813 	return atomic_read(&txo->q.used) < txo->q.len / 2;
814 }
815 
816 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
817 {
818 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
819 }
820 
821 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
822 				       struct sk_buff *skb,
823 				       struct be_wrb_params *wrb_params)
824 {
825 	u16 proto;
826 
827 	if (skb_is_gso(skb)) {
828 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
829 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
830 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
831 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
832 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
833 		if (skb->encapsulation) {
834 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
835 			proto = skb_inner_ip_proto(skb);
836 		} else {
837 			proto = skb_ip_proto(skb);
838 		}
839 		if (proto == IPPROTO_TCP)
840 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
841 		else if (proto == IPPROTO_UDP)
842 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
843 	}
844 
845 	if (skb_vlan_tag_present(skb)) {
846 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
847 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
848 	}
849 
850 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
851 }
852 
853 static void wrb_fill_hdr(struct be_adapter *adapter,
854 			 struct be_eth_hdr_wrb *hdr,
855 			 struct be_wrb_params *wrb_params,
856 			 struct sk_buff *skb)
857 {
858 	memset(hdr, 0, sizeof(*hdr));
859 
860 	SET_TX_WRB_HDR_BITS(crc, hdr,
861 			    BE_WRB_F_GET(wrb_params->features, CRC));
862 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
863 			    BE_WRB_F_GET(wrb_params->features, IPCS));
864 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
865 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
866 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
867 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
868 
869 	SET_TX_WRB_HDR_BITS(lso, hdr,
870 			    BE_WRB_F_GET(wrb_params->features, LSO));
871 	SET_TX_WRB_HDR_BITS(lso6, hdr,
872 			    BE_WRB_F_GET(wrb_params->features, LSO6));
873 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
874 
875 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
876 	 * hack is not needed, the evt bit is set while ringing DB.
877 	 */
878 	SET_TX_WRB_HDR_BITS(event, hdr,
879 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
880 	SET_TX_WRB_HDR_BITS(vlan, hdr,
881 			    BE_WRB_F_GET(wrb_params->features, VLAN));
882 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
883 
884 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
885 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
886 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
887 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
888 }
889 
890 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
891 			  bool unmap_single)
892 {
893 	dma_addr_t dma;
894 	u32 frag_len = le32_to_cpu(wrb->frag_len);
895 
896 
897 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
898 		(u64)le32_to_cpu(wrb->frag_pa_lo);
899 	if (frag_len) {
900 		if (unmap_single)
901 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
902 		else
903 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
904 	}
905 }
906 
907 /* Grab a WRB header for xmit */
908 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
909 {
910 	u32 head = txo->q.head;
911 
912 	queue_head_inc(&txo->q);
913 	return head;
914 }
915 
916 /* Set up the WRB header for xmit */
917 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
918 				struct be_tx_obj *txo,
919 				struct be_wrb_params *wrb_params,
920 				struct sk_buff *skb, u16 head)
921 {
922 	u32 num_frags = skb_wrb_cnt(skb);
923 	struct be_queue_info *txq = &txo->q;
924 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
925 
926 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
927 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
928 
929 	BUG_ON(txo->sent_skb_list[head]);
930 	txo->sent_skb_list[head] = skb;
931 	txo->last_req_hdr = head;
932 	atomic_add(num_frags, &txq->used);
933 	txo->last_req_wrb_cnt = num_frags;
934 	txo->pend_wrb_cnt += num_frags;
935 }
936 
937 /* Setup a WRB fragment (buffer descriptor) for xmit */
938 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
939 				 int len)
940 {
941 	struct be_eth_wrb *wrb;
942 	struct be_queue_info *txq = &txo->q;
943 
944 	wrb = queue_head_node(txq);
945 	wrb_fill(wrb, busaddr, len);
946 	queue_head_inc(txq);
947 }
948 
949 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
950  * was invoked. The producer index is restored to the previous packet and the
951  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
952  */
953 static void be_xmit_restore(struct be_adapter *adapter,
954 			    struct be_tx_obj *txo, u32 head, bool map_single,
955 			    u32 copied)
956 {
957 	struct device *dev;
958 	struct be_eth_wrb *wrb;
959 	struct be_queue_info *txq = &txo->q;
960 
961 	dev = &adapter->pdev->dev;
962 	txq->head = head;
963 
964 	/* skip the first wrb (hdr); it's not mapped */
965 	queue_head_inc(txq);
966 	while (copied) {
967 		wrb = queue_head_node(txq);
968 		unmap_tx_frag(dev, wrb, map_single);
969 		map_single = false;
970 		copied -= le32_to_cpu(wrb->frag_len);
971 		queue_head_inc(txq);
972 	}
973 
974 	txq->head = head;
975 }
976 
977 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
978  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
979  * of WRBs used up by the packet.
980  */
981 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
982 			   struct sk_buff *skb,
983 			   struct be_wrb_params *wrb_params)
984 {
985 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
986 	struct device *dev = &adapter->pdev->dev;
987 	struct be_queue_info *txq = &txo->q;
988 	bool map_single = false;
989 	u32 head = txq->head;
990 	dma_addr_t busaddr;
991 	int len;
992 
993 	head = be_tx_get_wrb_hdr(txo);
994 
995 	if (skb->len > skb->data_len) {
996 		len = skb_headlen(skb);
997 
998 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
999 		if (dma_mapping_error(dev, busaddr))
1000 			goto dma_err;
1001 		map_single = true;
1002 		be_tx_setup_wrb_frag(txo, busaddr, len);
1003 		copied += len;
1004 	}
1005 
1006 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1007 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1008 		len = skb_frag_size(frag);
1009 
1010 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1011 		if (dma_mapping_error(dev, busaddr))
1012 			goto dma_err;
1013 		be_tx_setup_wrb_frag(txo, busaddr, len);
1014 		copied += len;
1015 	}
1016 
1017 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1018 
1019 	be_tx_stats_update(txo, skb);
1020 	return wrb_cnt;
1021 
1022 dma_err:
1023 	adapter->drv_stats.dma_map_errors++;
1024 	be_xmit_restore(adapter, txo, head, map_single, copied);
1025 	return 0;
1026 }
1027 
1028 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1029 {
1030 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1031 }
1032 
1033 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1034 					     struct sk_buff *skb,
1035 					     struct be_wrb_params
1036 					     *wrb_params)
1037 {
1038 	u16 vlan_tag = 0;
1039 
1040 	skb = skb_share_check(skb, GFP_ATOMIC);
1041 	if (unlikely(!skb))
1042 		return skb;
1043 
1044 	if (skb_vlan_tag_present(skb))
1045 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1046 
1047 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1048 		if (!vlan_tag)
1049 			vlan_tag = adapter->pvid;
1050 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1051 		 * skip VLAN insertion
1052 		 */
1053 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1054 	}
1055 
1056 	if (vlan_tag) {
1057 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058 						vlan_tag);
1059 		if (unlikely(!skb))
1060 			return skb;
1061 		skb->vlan_tci = 0;
1062 	}
1063 
1064 	/* Insert the outer VLAN, if any */
1065 	if (adapter->qnq_vid) {
1066 		vlan_tag = adapter->qnq_vid;
1067 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1068 						vlan_tag);
1069 		if (unlikely(!skb))
1070 			return skb;
1071 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1072 	}
1073 
1074 	return skb;
1075 }
1076 
1077 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1078 {
1079 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1080 	u16 offset = ETH_HLEN;
1081 
1082 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1083 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1084 
1085 		offset += sizeof(struct ipv6hdr);
1086 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1087 		    ip6h->nexthdr != NEXTHDR_UDP) {
1088 			struct ipv6_opt_hdr *ehdr =
1089 				(struct ipv6_opt_hdr *)(skb->data + offset);
1090 
1091 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1092 			if (ehdr->hdrlen == 0xff)
1093 				return true;
1094 		}
1095 	}
1096 	return false;
1097 }
1098 
1099 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1100 {
1101 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1102 }
1103 
1104 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1105 {
1106 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1107 }
1108 
1109 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1110 						  struct sk_buff *skb,
1111 						  struct be_wrb_params
1112 						  *wrb_params)
1113 {
1114 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1115 	unsigned int eth_hdr_len;
1116 	struct iphdr *ip;
1117 
1118 	/* For padded packets, BE HW modifies tot_len field in IP header
1119 	 * incorrecly when VLAN tag is inserted by HW.
1120 	 * For padded packets, Lancer computes incorrect checksum.
1121 	 */
1122 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1123 						VLAN_ETH_HLEN : ETH_HLEN;
1124 	if (skb->len <= 60 &&
1125 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1126 	    is_ipv4_pkt(skb)) {
1127 		ip = (struct iphdr *)ip_hdr(skb);
1128 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1129 	}
1130 
1131 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1132 	 * tagging in pvid-tagging mode
1133 	 */
1134 	if (be_pvid_tagging_enabled(adapter) &&
1135 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1136 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1137 
1138 	/* HW has a bug wherein it will calculate CSUM for VLAN
1139 	 * pkts even though it is disabled.
1140 	 * Manually insert VLAN in pkt.
1141 	 */
1142 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1143 	    skb_vlan_tag_present(skb)) {
1144 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1145 		if (unlikely(!skb))
1146 			goto err;
1147 	}
1148 
1149 	/* HW may lockup when VLAN HW tagging is requested on
1150 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1151 	 * skip HW tagging is not enabled by FW.
1152 	 */
1153 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1154 		     (adapter->pvid || adapter->qnq_vid) &&
1155 		     !qnq_async_evt_rcvd(adapter)))
1156 		goto tx_drop;
1157 
1158 	/* Manual VLAN tag insertion to prevent:
1159 	 * ASIC lockup when the ASIC inserts VLAN tag into
1160 	 * certain ipv6 packets. Insert VLAN tags in driver,
1161 	 * and set event, completion, vlan bits accordingly
1162 	 * in the Tx WRB.
1163 	 */
1164 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1165 	    be_vlan_tag_tx_chk(adapter, skb)) {
1166 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1167 		if (unlikely(!skb))
1168 			goto err;
1169 	}
1170 
1171 	return skb;
1172 tx_drop:
1173 	dev_kfree_skb_any(skb);
1174 err:
1175 	return NULL;
1176 }
1177 
1178 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1179 					   struct sk_buff *skb,
1180 					   struct be_wrb_params *wrb_params)
1181 {
1182 	int err;
1183 
1184 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1185 	 * packets that are 32b or less may cause a transmit stall
1186 	 * on that port. The workaround is to pad such packets
1187 	 * (len <= 32 bytes) to a minimum length of 36b.
1188 	 */
1189 	if (skb->len <= 32) {
1190 		if (skb_put_padto(skb, 36))
1191 			return NULL;
1192 	}
1193 
1194 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1195 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1196 		if (!skb)
1197 			return NULL;
1198 	}
1199 
1200 	/* The stack can send us skbs with length greater than
1201 	 * what the HW can handle. Trim the extra bytes.
1202 	 */
1203 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1204 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1205 	WARN_ON(err);
1206 
1207 	return skb;
1208 }
1209 
1210 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1211 {
1212 	struct be_queue_info *txq = &txo->q;
1213 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1214 
1215 	/* Mark the last request eventable if it hasn't been marked already */
1216 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1217 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1218 
1219 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1220 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1221 		wrb_fill_dummy(queue_head_node(txq));
1222 		queue_head_inc(txq);
1223 		atomic_inc(&txq->used);
1224 		txo->pend_wrb_cnt++;
1225 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1226 					   TX_HDR_WRB_NUM_SHIFT);
1227 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1228 					  TX_HDR_WRB_NUM_SHIFT);
1229 	}
1230 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1231 	txo->pend_wrb_cnt = 0;
1232 }
1233 
1234 /* OS2BMC related */
1235 
1236 #define DHCP_CLIENT_PORT	68
1237 #define DHCP_SERVER_PORT	67
1238 #define NET_BIOS_PORT1		137
1239 #define NET_BIOS_PORT2		138
1240 #define DHCPV6_RAS_PORT		547
1241 
1242 #define is_mc_allowed_on_bmc(adapter, eh)	\
1243 	(!is_multicast_filt_enabled(adapter) &&	\
1244 	 is_multicast_ether_addr(eh->h_dest) &&	\
1245 	 !is_broadcast_ether_addr(eh->h_dest))
1246 
1247 #define is_bc_allowed_on_bmc(adapter, eh)	\
1248 	(!is_broadcast_filt_enabled(adapter) &&	\
1249 	 is_broadcast_ether_addr(eh->h_dest))
1250 
1251 #define is_arp_allowed_on_bmc(adapter, skb)	\
1252 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1253 
1254 #define is_broadcast_packet(eh, adapter)	\
1255 		(is_multicast_ether_addr(eh->h_dest) && \
1256 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1257 
1258 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1259 
1260 #define is_arp_filt_enabled(adapter)	\
1261 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1262 
1263 #define is_dhcp_client_filt_enabled(adapter)	\
1264 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1265 
1266 #define is_dhcp_srvr_filt_enabled(adapter)	\
1267 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1268 
1269 #define is_nbios_filt_enabled(adapter)	\
1270 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1271 
1272 #define is_ipv6_na_filt_enabled(adapter)	\
1273 		(adapter->bmc_filt_mask &	\
1274 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1275 
1276 #define is_ipv6_ra_filt_enabled(adapter)	\
1277 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1278 
1279 #define is_ipv6_ras_filt_enabled(adapter)	\
1280 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1281 
1282 #define is_broadcast_filt_enabled(adapter)	\
1283 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1284 
1285 #define is_multicast_filt_enabled(adapter)	\
1286 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1287 
1288 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1289 			       struct sk_buff **skb)
1290 {
1291 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1292 	bool os2bmc = false;
1293 
1294 	if (!be_is_os2bmc_enabled(adapter))
1295 		goto done;
1296 
1297 	if (!is_multicast_ether_addr(eh->h_dest))
1298 		goto done;
1299 
1300 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1301 	    is_bc_allowed_on_bmc(adapter, eh) ||
1302 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1303 		os2bmc = true;
1304 		goto done;
1305 	}
1306 
1307 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1308 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1309 		u8 nexthdr = hdr->nexthdr;
1310 
1311 		if (nexthdr == IPPROTO_ICMPV6) {
1312 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1313 
1314 			switch (icmp6->icmp6_type) {
1315 			case NDISC_ROUTER_ADVERTISEMENT:
1316 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1317 				goto done;
1318 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1319 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1320 				goto done;
1321 			default:
1322 				break;
1323 			}
1324 		}
1325 	}
1326 
1327 	if (is_udp_pkt((*skb))) {
1328 		struct udphdr *udp = udp_hdr((*skb));
1329 
1330 		switch (ntohs(udp->dest)) {
1331 		case DHCP_CLIENT_PORT:
1332 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1333 			goto done;
1334 		case DHCP_SERVER_PORT:
1335 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1336 			goto done;
1337 		case NET_BIOS_PORT1:
1338 		case NET_BIOS_PORT2:
1339 			os2bmc = is_nbios_filt_enabled(adapter);
1340 			goto done;
1341 		case DHCPV6_RAS_PORT:
1342 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1343 			goto done;
1344 		default:
1345 			break;
1346 		}
1347 	}
1348 done:
1349 	/* For packets over a vlan, which are destined
1350 	 * to BMC, asic expects the vlan to be inline in the packet.
1351 	 */
1352 	if (os2bmc)
1353 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1354 
1355 	return os2bmc;
1356 }
1357 
1358 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1359 {
1360 	struct be_adapter *adapter = netdev_priv(netdev);
1361 	u16 q_idx = skb_get_queue_mapping(skb);
1362 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1363 	struct be_wrb_params wrb_params = { 0 };
1364 	bool flush = !skb->xmit_more;
1365 	u16 wrb_cnt;
1366 
1367 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1368 	if (unlikely(!skb))
1369 		goto drop;
1370 
1371 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1372 
1373 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1374 	if (unlikely(!wrb_cnt)) {
1375 		dev_kfree_skb_any(skb);
1376 		goto drop;
1377 	}
1378 
1379 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1380 	 * enqueue the pkt a 2nd time with mgmt bit set.
1381 	 */
1382 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1383 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1384 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385 		if (unlikely(!wrb_cnt))
1386 			goto drop;
1387 		else
1388 			skb_get(skb);
1389 	}
1390 
1391 	if (be_is_txq_full(txo)) {
1392 		netif_stop_subqueue(netdev, q_idx);
1393 		tx_stats(txo)->tx_stops++;
1394 	}
1395 
1396 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1397 		be_xmit_flush(adapter, txo);
1398 
1399 	return NETDEV_TX_OK;
1400 drop:
1401 	tx_stats(txo)->tx_drv_drops++;
1402 	/* Flush the already enqueued tx requests */
1403 	if (flush && txo->pend_wrb_cnt)
1404 		be_xmit_flush(adapter, txo);
1405 
1406 	return NETDEV_TX_OK;
1407 }
1408 
1409 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1410 {
1411 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1412 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1413 }
1414 
1415 static int be_set_vlan_promisc(struct be_adapter *adapter)
1416 {
1417 	struct device *dev = &adapter->pdev->dev;
1418 	int status;
1419 
1420 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1421 		return 0;
1422 
1423 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1424 	if (!status) {
1425 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1426 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1427 	} else {
1428 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1429 	}
1430 	return status;
1431 }
1432 
1433 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1434 {
1435 	struct device *dev = &adapter->pdev->dev;
1436 	int status;
1437 
1438 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1439 	if (!status) {
1440 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1441 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1442 	}
1443 	return status;
1444 }
1445 
1446 /*
1447  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1448  * If the user configures more, place BE in vlan promiscuous mode.
1449  */
1450 static int be_vid_config(struct be_adapter *adapter)
1451 {
1452 	struct device *dev = &adapter->pdev->dev;
1453 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1454 	u16 num = 0, i = 0;
1455 	int status = 0;
1456 
1457 	/* No need to change the VLAN state if the I/F is in promiscuous */
1458 	if (adapter->netdev->flags & IFF_PROMISC)
1459 		return 0;
1460 
1461 	if (adapter->vlans_added > be_max_vlans(adapter))
1462 		return be_set_vlan_promisc(adapter);
1463 
1464 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1465 		status = be_clear_vlan_promisc(adapter);
1466 		if (status)
1467 			return status;
1468 	}
1469 	/* Construct VLAN Table to give to HW */
1470 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1471 		vids[num++] = cpu_to_le16(i);
1472 
1473 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1474 	if (status) {
1475 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1476 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1477 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1478 		    addl_status(status) ==
1479 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1480 			return be_set_vlan_promisc(adapter);
1481 	}
1482 	return status;
1483 }
1484 
1485 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1486 {
1487 	struct be_adapter *adapter = netdev_priv(netdev);
1488 	int status = 0;
1489 
1490 	mutex_lock(&adapter->rx_filter_lock);
1491 
1492 	/* Packets with VID 0 are always received by Lancer by default */
1493 	if (lancer_chip(adapter) && vid == 0)
1494 		goto done;
1495 
1496 	if (test_bit(vid, adapter->vids))
1497 		goto done;
1498 
1499 	set_bit(vid, adapter->vids);
1500 	adapter->vlans_added++;
1501 
1502 	status = be_vid_config(adapter);
1503 done:
1504 	mutex_unlock(&adapter->rx_filter_lock);
1505 	return status;
1506 }
1507 
1508 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1509 {
1510 	struct be_adapter *adapter = netdev_priv(netdev);
1511 	int status = 0;
1512 
1513 	mutex_lock(&adapter->rx_filter_lock);
1514 
1515 	/* Packets with VID 0 are always received by Lancer by default */
1516 	if (lancer_chip(adapter) && vid == 0)
1517 		goto done;
1518 
1519 	if (!test_bit(vid, adapter->vids))
1520 		goto done;
1521 
1522 	clear_bit(vid, adapter->vids);
1523 	adapter->vlans_added--;
1524 
1525 	status = be_vid_config(adapter);
1526 done:
1527 	mutex_unlock(&adapter->rx_filter_lock);
1528 	return status;
1529 }
1530 
1531 static void be_set_all_promisc(struct be_adapter *adapter)
1532 {
1533 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1534 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1535 }
1536 
1537 static void be_set_mc_promisc(struct be_adapter *adapter)
1538 {
1539 	int status;
1540 
1541 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1542 		return;
1543 
1544 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1545 	if (!status)
1546 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1547 }
1548 
1549 static void be_set_uc_promisc(struct be_adapter *adapter)
1550 {
1551 	int status;
1552 
1553 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1554 		return;
1555 
1556 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1557 	if (!status)
1558 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1559 }
1560 
1561 static void be_clear_uc_promisc(struct be_adapter *adapter)
1562 {
1563 	int status;
1564 
1565 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1566 		return;
1567 
1568 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1569 	if (!status)
1570 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1571 }
1572 
1573 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1574  * We use a single callback function for both sync and unsync. We really don't
1575  * add/remove addresses through this callback. But, we use it to detect changes
1576  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1577  */
1578 static int be_uc_list_update(struct net_device *netdev,
1579 			     const unsigned char *addr)
1580 {
1581 	struct be_adapter *adapter = netdev_priv(netdev);
1582 
1583 	adapter->update_uc_list = true;
1584 	return 0;
1585 }
1586 
1587 static int be_mc_list_update(struct net_device *netdev,
1588 			     const unsigned char *addr)
1589 {
1590 	struct be_adapter *adapter = netdev_priv(netdev);
1591 
1592 	adapter->update_mc_list = true;
1593 	return 0;
1594 }
1595 
1596 static void be_set_mc_list(struct be_adapter *adapter)
1597 {
1598 	struct net_device *netdev = adapter->netdev;
1599 	struct netdev_hw_addr *ha;
1600 	bool mc_promisc = false;
1601 	int status;
1602 
1603 	netif_addr_lock_bh(netdev);
1604 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1605 
1606 	if (netdev->flags & IFF_PROMISC) {
1607 		adapter->update_mc_list = false;
1608 	} else if (netdev->flags & IFF_ALLMULTI ||
1609 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1610 		/* Enable multicast promisc if num configured exceeds
1611 		 * what we support
1612 		 */
1613 		mc_promisc = true;
1614 		adapter->update_mc_list = false;
1615 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1616 		/* Update mc-list unconditionally if the iface was previously
1617 		 * in mc-promisc mode and now is out of that mode.
1618 		 */
1619 		adapter->update_mc_list = true;
1620 	}
1621 
1622 	if (adapter->update_mc_list) {
1623 		int i = 0;
1624 
1625 		/* cache the mc-list in adapter */
1626 		netdev_for_each_mc_addr(ha, netdev) {
1627 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1628 			i++;
1629 		}
1630 		adapter->mc_count = netdev_mc_count(netdev);
1631 	}
1632 	netif_addr_unlock_bh(netdev);
1633 
1634 	if (mc_promisc) {
1635 		be_set_mc_promisc(adapter);
1636 	} else if (adapter->update_mc_list) {
1637 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1638 		if (!status)
1639 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1640 		else
1641 			be_set_mc_promisc(adapter);
1642 
1643 		adapter->update_mc_list = false;
1644 	}
1645 }
1646 
1647 static void be_clear_mc_list(struct be_adapter *adapter)
1648 {
1649 	struct net_device *netdev = adapter->netdev;
1650 
1651 	__dev_mc_unsync(netdev, NULL);
1652 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1653 	adapter->mc_count = 0;
1654 }
1655 
1656 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1657 {
1658 	if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1659 			     adapter->dev_mac)) {
1660 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1661 		return 0;
1662 	}
1663 
1664 	return be_cmd_pmac_add(adapter,
1665 			       (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1666 			       adapter->if_handle,
1667 			       &adapter->pmac_id[uc_idx + 1], 0);
1668 }
1669 
1670 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1671 {
1672 	if (pmac_id == adapter->pmac_id[0])
1673 		return;
1674 
1675 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1676 }
1677 
1678 static void be_set_uc_list(struct be_adapter *adapter)
1679 {
1680 	struct net_device *netdev = adapter->netdev;
1681 	struct netdev_hw_addr *ha;
1682 	bool uc_promisc = false;
1683 	int curr_uc_macs = 0, i;
1684 
1685 	netif_addr_lock_bh(netdev);
1686 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1687 
1688 	if (netdev->flags & IFF_PROMISC) {
1689 		adapter->update_uc_list = false;
1690 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1691 		uc_promisc = true;
1692 		adapter->update_uc_list = false;
1693 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1694 		/* Update uc-list unconditionally if the iface was previously
1695 		 * in uc-promisc mode and now is out of that mode.
1696 		 */
1697 		adapter->update_uc_list = true;
1698 	}
1699 
1700 	if (adapter->update_uc_list) {
1701 		i = 1; /* First slot is claimed by the Primary MAC */
1702 
1703 		/* cache the uc-list in adapter array */
1704 		netdev_for_each_uc_addr(ha, netdev) {
1705 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1706 			i++;
1707 		}
1708 		curr_uc_macs = netdev_uc_count(netdev);
1709 	}
1710 	netif_addr_unlock_bh(netdev);
1711 
1712 	if (uc_promisc) {
1713 		be_set_uc_promisc(adapter);
1714 	} else if (adapter->update_uc_list) {
1715 		be_clear_uc_promisc(adapter);
1716 
1717 		for (i = 0; i < adapter->uc_macs; i++)
1718 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1719 
1720 		for (i = 0; i < curr_uc_macs; i++)
1721 			be_uc_mac_add(adapter, i);
1722 		adapter->uc_macs = curr_uc_macs;
1723 		adapter->update_uc_list = false;
1724 	}
1725 }
1726 
1727 static void be_clear_uc_list(struct be_adapter *adapter)
1728 {
1729 	struct net_device *netdev = adapter->netdev;
1730 	int i;
1731 
1732 	__dev_uc_unsync(netdev, NULL);
1733 	for (i = 0; i < adapter->uc_macs; i++)
1734 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1735 
1736 	adapter->uc_macs = 0;
1737 }
1738 
1739 static void __be_set_rx_mode(struct be_adapter *adapter)
1740 {
1741 	struct net_device *netdev = adapter->netdev;
1742 
1743 	mutex_lock(&adapter->rx_filter_lock);
1744 
1745 	if (netdev->flags & IFF_PROMISC) {
1746 		if (!be_in_all_promisc(adapter))
1747 			be_set_all_promisc(adapter);
1748 	} else if (be_in_all_promisc(adapter)) {
1749 		/* We need to re-program the vlan-list or clear
1750 		 * vlan-promisc mode (if needed) when the interface
1751 		 * comes out of promisc mode.
1752 		 */
1753 		be_vid_config(adapter);
1754 	}
1755 
1756 	be_set_uc_list(adapter);
1757 	be_set_mc_list(adapter);
1758 
1759 	mutex_unlock(&adapter->rx_filter_lock);
1760 }
1761 
1762 static void be_work_set_rx_mode(struct work_struct *work)
1763 {
1764 	struct be_cmd_work *cmd_work =
1765 				container_of(work, struct be_cmd_work, work);
1766 
1767 	__be_set_rx_mode(cmd_work->adapter);
1768 	kfree(cmd_work);
1769 }
1770 
1771 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1772 {
1773 	struct be_adapter *adapter = netdev_priv(netdev);
1774 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1775 	int status;
1776 
1777 	if (!sriov_enabled(adapter))
1778 		return -EPERM;
1779 
1780 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1781 		return -EINVAL;
1782 
1783 	/* Proceed further only if user provided MAC is different
1784 	 * from active MAC
1785 	 */
1786 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1787 		return 0;
1788 
1789 	if (BEx_chip(adapter)) {
1790 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1791 				vf + 1);
1792 
1793 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1794 					 &vf_cfg->pmac_id, vf + 1);
1795 	} else {
1796 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1797 					vf + 1);
1798 	}
1799 
1800 	if (status) {
1801 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1802 			mac, vf, status);
1803 		return be_cmd_status(status);
1804 	}
1805 
1806 	ether_addr_copy(vf_cfg->mac_addr, mac);
1807 
1808 	return 0;
1809 }
1810 
1811 static int be_get_vf_config(struct net_device *netdev, int vf,
1812 			    struct ifla_vf_info *vi)
1813 {
1814 	struct be_adapter *adapter = netdev_priv(netdev);
1815 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1816 
1817 	if (!sriov_enabled(adapter))
1818 		return -EPERM;
1819 
1820 	if (vf >= adapter->num_vfs)
1821 		return -EINVAL;
1822 
1823 	vi->vf = vf;
1824 	vi->max_tx_rate = vf_cfg->tx_rate;
1825 	vi->min_tx_rate = 0;
1826 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1827 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1828 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1829 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1830 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1831 
1832 	return 0;
1833 }
1834 
1835 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1836 {
1837 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1838 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1839 	int vf_if_id = vf_cfg->if_handle;
1840 	int status;
1841 
1842 	/* Enable Transparent VLAN Tagging */
1843 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1844 	if (status)
1845 		return status;
1846 
1847 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1848 	vids[0] = 0;
1849 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1850 	if (!status)
1851 		dev_info(&adapter->pdev->dev,
1852 			 "Cleared guest VLANs on VF%d", vf);
1853 
1854 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1855 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1856 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1857 						  ~BE_PRIV_FILTMGMT, vf + 1);
1858 		if (!status)
1859 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1860 	}
1861 	return 0;
1862 }
1863 
1864 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1865 {
1866 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1867 	struct device *dev = &adapter->pdev->dev;
1868 	int status;
1869 
1870 	/* Reset Transparent VLAN Tagging. */
1871 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1872 				       vf_cfg->if_handle, 0, 0);
1873 	if (status)
1874 		return status;
1875 
1876 	/* Allow VFs to program VLAN filtering */
1877 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1878 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1879 						  BE_PRIV_FILTMGMT, vf + 1);
1880 		if (!status) {
1881 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1882 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1883 		}
1884 	}
1885 
1886 	dev_info(dev,
1887 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1888 	return 0;
1889 }
1890 
1891 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1892 			  __be16 vlan_proto)
1893 {
1894 	struct be_adapter *adapter = netdev_priv(netdev);
1895 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1896 	int status;
1897 
1898 	if (!sriov_enabled(adapter))
1899 		return -EPERM;
1900 
1901 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1902 		return -EINVAL;
1903 
1904 	if (vlan_proto != htons(ETH_P_8021Q))
1905 		return -EPROTONOSUPPORT;
1906 
1907 	if (vlan || qos) {
1908 		vlan |= qos << VLAN_PRIO_SHIFT;
1909 		status = be_set_vf_tvt(adapter, vf, vlan);
1910 	} else {
1911 		status = be_clear_vf_tvt(adapter, vf);
1912 	}
1913 
1914 	if (status) {
1915 		dev_err(&adapter->pdev->dev,
1916 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1917 			status);
1918 		return be_cmd_status(status);
1919 	}
1920 
1921 	vf_cfg->vlan_tag = vlan;
1922 	return 0;
1923 }
1924 
1925 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1926 			     int min_tx_rate, int max_tx_rate)
1927 {
1928 	struct be_adapter *adapter = netdev_priv(netdev);
1929 	struct device *dev = &adapter->pdev->dev;
1930 	int percent_rate, status = 0;
1931 	u16 link_speed = 0;
1932 	u8 link_status;
1933 
1934 	if (!sriov_enabled(adapter))
1935 		return -EPERM;
1936 
1937 	if (vf >= adapter->num_vfs)
1938 		return -EINVAL;
1939 
1940 	if (min_tx_rate)
1941 		return -EINVAL;
1942 
1943 	if (!max_tx_rate)
1944 		goto config_qos;
1945 
1946 	status = be_cmd_link_status_query(adapter, &link_speed,
1947 					  &link_status, 0);
1948 	if (status)
1949 		goto err;
1950 
1951 	if (!link_status) {
1952 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1953 		status = -ENETDOWN;
1954 		goto err;
1955 	}
1956 
1957 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1958 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1959 			link_speed);
1960 		status = -EINVAL;
1961 		goto err;
1962 	}
1963 
1964 	/* On Skyhawk the QOS setting must be done only as a % value */
1965 	percent_rate = link_speed / 100;
1966 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1967 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1968 			percent_rate);
1969 		status = -EINVAL;
1970 		goto err;
1971 	}
1972 
1973 config_qos:
1974 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1975 	if (status)
1976 		goto err;
1977 
1978 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1979 	return 0;
1980 
1981 err:
1982 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1983 		max_tx_rate, vf);
1984 	return be_cmd_status(status);
1985 }
1986 
1987 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1988 				int link_state)
1989 {
1990 	struct be_adapter *adapter = netdev_priv(netdev);
1991 	int status;
1992 
1993 	if (!sriov_enabled(adapter))
1994 		return -EPERM;
1995 
1996 	if (vf >= adapter->num_vfs)
1997 		return -EINVAL;
1998 
1999 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2000 	if (status) {
2001 		dev_err(&adapter->pdev->dev,
2002 			"Link state change on VF %d failed: %#x\n", vf, status);
2003 		return be_cmd_status(status);
2004 	}
2005 
2006 	adapter->vf_cfg[vf].plink_tracking = link_state;
2007 
2008 	return 0;
2009 }
2010 
2011 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2012 {
2013 	struct be_adapter *adapter = netdev_priv(netdev);
2014 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2015 	u8 spoofchk;
2016 	int status;
2017 
2018 	if (!sriov_enabled(adapter))
2019 		return -EPERM;
2020 
2021 	if (vf >= adapter->num_vfs)
2022 		return -EINVAL;
2023 
2024 	if (BEx_chip(adapter))
2025 		return -EOPNOTSUPP;
2026 
2027 	if (enable == vf_cfg->spoofchk)
2028 		return 0;
2029 
2030 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2031 
2032 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2033 				       0, spoofchk);
2034 	if (status) {
2035 		dev_err(&adapter->pdev->dev,
2036 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2037 		return be_cmd_status(status);
2038 	}
2039 
2040 	vf_cfg->spoofchk = enable;
2041 	return 0;
2042 }
2043 
2044 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2045 			  ulong now)
2046 {
2047 	aic->rx_pkts_prev = rx_pkts;
2048 	aic->tx_reqs_prev = tx_pkts;
2049 	aic->jiffies = now;
2050 }
2051 
2052 static int be_get_new_eqd(struct be_eq_obj *eqo)
2053 {
2054 	struct be_adapter *adapter = eqo->adapter;
2055 	int eqd, start;
2056 	struct be_aic_obj *aic;
2057 	struct be_rx_obj *rxo;
2058 	struct be_tx_obj *txo;
2059 	u64 rx_pkts = 0, tx_pkts = 0;
2060 	ulong now;
2061 	u32 pps, delta;
2062 	int i;
2063 
2064 	aic = &adapter->aic_obj[eqo->idx];
2065 	if (!aic->enable) {
2066 		if (aic->jiffies)
2067 			aic->jiffies = 0;
2068 		eqd = aic->et_eqd;
2069 		return eqd;
2070 	}
2071 
2072 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2073 		do {
2074 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2075 			rx_pkts += rxo->stats.rx_pkts;
2076 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2077 	}
2078 
2079 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2080 		do {
2081 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2082 			tx_pkts += txo->stats.tx_reqs;
2083 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2084 	}
2085 
2086 	/* Skip, if wrapped around or first calculation */
2087 	now = jiffies;
2088 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2089 	    rx_pkts < aic->rx_pkts_prev ||
2090 	    tx_pkts < aic->tx_reqs_prev) {
2091 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2092 		return aic->prev_eqd;
2093 	}
2094 
2095 	delta = jiffies_to_msecs(now - aic->jiffies);
2096 	if (delta == 0)
2097 		return aic->prev_eqd;
2098 
2099 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2100 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2101 	eqd = (pps / 15000) << 2;
2102 
2103 	if (eqd < 8)
2104 		eqd = 0;
2105 	eqd = min_t(u32, eqd, aic->max_eqd);
2106 	eqd = max_t(u32, eqd, aic->min_eqd);
2107 
2108 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2109 
2110 	return eqd;
2111 }
2112 
2113 /* For Skyhawk-R only */
2114 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2115 {
2116 	struct be_adapter *adapter = eqo->adapter;
2117 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2118 	ulong now = jiffies;
2119 	int eqd;
2120 	u32 mult_enc;
2121 
2122 	if (!aic->enable)
2123 		return 0;
2124 
2125 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2126 		eqd = aic->prev_eqd;
2127 	else
2128 		eqd = be_get_new_eqd(eqo);
2129 
2130 	if (eqd > 100)
2131 		mult_enc = R2I_DLY_ENC_1;
2132 	else if (eqd > 60)
2133 		mult_enc = R2I_DLY_ENC_2;
2134 	else if (eqd > 20)
2135 		mult_enc = R2I_DLY_ENC_3;
2136 	else
2137 		mult_enc = R2I_DLY_ENC_0;
2138 
2139 	aic->prev_eqd = eqd;
2140 
2141 	return mult_enc;
2142 }
2143 
2144 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2145 {
2146 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2147 	struct be_aic_obj *aic;
2148 	struct be_eq_obj *eqo;
2149 	int i, num = 0, eqd;
2150 
2151 	for_all_evt_queues(adapter, eqo, i) {
2152 		aic = &adapter->aic_obj[eqo->idx];
2153 		eqd = be_get_new_eqd(eqo);
2154 		if (force_update || eqd != aic->prev_eqd) {
2155 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2156 			set_eqd[num].eq_id = eqo->q.id;
2157 			aic->prev_eqd = eqd;
2158 			num++;
2159 		}
2160 	}
2161 
2162 	if (num)
2163 		be_cmd_modify_eqd(adapter, set_eqd, num);
2164 }
2165 
2166 static void be_rx_stats_update(struct be_rx_obj *rxo,
2167 			       struct be_rx_compl_info *rxcp)
2168 {
2169 	struct be_rx_stats *stats = rx_stats(rxo);
2170 
2171 	u64_stats_update_begin(&stats->sync);
2172 	stats->rx_compl++;
2173 	stats->rx_bytes += rxcp->pkt_size;
2174 	stats->rx_pkts++;
2175 	if (rxcp->tunneled)
2176 		stats->rx_vxlan_offload_pkts++;
2177 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2178 		stats->rx_mcast_pkts++;
2179 	if (rxcp->err)
2180 		stats->rx_compl_err++;
2181 	u64_stats_update_end(&stats->sync);
2182 }
2183 
2184 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2185 {
2186 	/* L4 checksum is not reliable for non TCP/UDP packets.
2187 	 * Also ignore ipcksm for ipv6 pkts
2188 	 */
2189 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2190 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2191 }
2192 
2193 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2194 {
2195 	struct be_adapter *adapter = rxo->adapter;
2196 	struct be_rx_page_info *rx_page_info;
2197 	struct be_queue_info *rxq = &rxo->q;
2198 	u32 frag_idx = rxq->tail;
2199 
2200 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2201 	BUG_ON(!rx_page_info->page);
2202 
2203 	if (rx_page_info->last_frag) {
2204 		dma_unmap_page(&adapter->pdev->dev,
2205 			       dma_unmap_addr(rx_page_info, bus),
2206 			       adapter->big_page_size, DMA_FROM_DEVICE);
2207 		rx_page_info->last_frag = false;
2208 	} else {
2209 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2210 					dma_unmap_addr(rx_page_info, bus),
2211 					rx_frag_size, DMA_FROM_DEVICE);
2212 	}
2213 
2214 	queue_tail_inc(rxq);
2215 	atomic_dec(&rxq->used);
2216 	return rx_page_info;
2217 }
2218 
2219 /* Throwaway the data in the Rx completion */
2220 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2221 				struct be_rx_compl_info *rxcp)
2222 {
2223 	struct be_rx_page_info *page_info;
2224 	u16 i, num_rcvd = rxcp->num_rcvd;
2225 
2226 	for (i = 0; i < num_rcvd; i++) {
2227 		page_info = get_rx_page_info(rxo);
2228 		put_page(page_info->page);
2229 		memset(page_info, 0, sizeof(*page_info));
2230 	}
2231 }
2232 
2233 /*
2234  * skb_fill_rx_data forms a complete skb for an ether frame
2235  * indicated by rxcp.
2236  */
2237 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2238 			     struct be_rx_compl_info *rxcp)
2239 {
2240 	struct be_rx_page_info *page_info;
2241 	u16 i, j;
2242 	u16 hdr_len, curr_frag_len, remaining;
2243 	u8 *start;
2244 
2245 	page_info = get_rx_page_info(rxo);
2246 	start = page_address(page_info->page) + page_info->page_offset;
2247 	prefetch(start);
2248 
2249 	/* Copy data in the first descriptor of this completion */
2250 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2251 
2252 	skb->len = curr_frag_len;
2253 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2254 		memcpy(skb->data, start, curr_frag_len);
2255 		/* Complete packet has now been moved to data */
2256 		put_page(page_info->page);
2257 		skb->data_len = 0;
2258 		skb->tail += curr_frag_len;
2259 	} else {
2260 		hdr_len = ETH_HLEN;
2261 		memcpy(skb->data, start, hdr_len);
2262 		skb_shinfo(skb)->nr_frags = 1;
2263 		skb_frag_set_page(skb, 0, page_info->page);
2264 		skb_shinfo(skb)->frags[0].page_offset =
2265 					page_info->page_offset + hdr_len;
2266 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2267 				  curr_frag_len - hdr_len);
2268 		skb->data_len = curr_frag_len - hdr_len;
2269 		skb->truesize += rx_frag_size;
2270 		skb->tail += hdr_len;
2271 	}
2272 	page_info->page = NULL;
2273 
2274 	if (rxcp->pkt_size <= rx_frag_size) {
2275 		BUG_ON(rxcp->num_rcvd != 1);
2276 		return;
2277 	}
2278 
2279 	/* More frags present for this completion */
2280 	remaining = rxcp->pkt_size - curr_frag_len;
2281 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2282 		page_info = get_rx_page_info(rxo);
2283 		curr_frag_len = min(remaining, rx_frag_size);
2284 
2285 		/* Coalesce all frags from the same physical page in one slot */
2286 		if (page_info->page_offset == 0) {
2287 			/* Fresh page */
2288 			j++;
2289 			skb_frag_set_page(skb, j, page_info->page);
2290 			skb_shinfo(skb)->frags[j].page_offset =
2291 							page_info->page_offset;
2292 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2293 			skb_shinfo(skb)->nr_frags++;
2294 		} else {
2295 			put_page(page_info->page);
2296 		}
2297 
2298 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2299 		skb->len += curr_frag_len;
2300 		skb->data_len += curr_frag_len;
2301 		skb->truesize += rx_frag_size;
2302 		remaining -= curr_frag_len;
2303 		page_info->page = NULL;
2304 	}
2305 	BUG_ON(j > MAX_SKB_FRAGS);
2306 }
2307 
2308 /* Process the RX completion indicated by rxcp when GRO is disabled */
2309 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2310 				struct be_rx_compl_info *rxcp)
2311 {
2312 	struct be_adapter *adapter = rxo->adapter;
2313 	struct net_device *netdev = adapter->netdev;
2314 	struct sk_buff *skb;
2315 
2316 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2317 	if (unlikely(!skb)) {
2318 		rx_stats(rxo)->rx_drops_no_skbs++;
2319 		be_rx_compl_discard(rxo, rxcp);
2320 		return;
2321 	}
2322 
2323 	skb_fill_rx_data(rxo, skb, rxcp);
2324 
2325 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2326 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2327 	else
2328 		skb_checksum_none_assert(skb);
2329 
2330 	skb->protocol = eth_type_trans(skb, netdev);
2331 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2332 	if (netdev->features & NETIF_F_RXHASH)
2333 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2334 
2335 	skb->csum_level = rxcp->tunneled;
2336 	skb_mark_napi_id(skb, napi);
2337 
2338 	if (rxcp->vlanf)
2339 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2340 
2341 	netif_receive_skb(skb);
2342 }
2343 
2344 /* Process the RX completion indicated by rxcp when GRO is enabled */
2345 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2346 				    struct napi_struct *napi,
2347 				    struct be_rx_compl_info *rxcp)
2348 {
2349 	struct be_adapter *adapter = rxo->adapter;
2350 	struct be_rx_page_info *page_info;
2351 	struct sk_buff *skb = NULL;
2352 	u16 remaining, curr_frag_len;
2353 	u16 i, j;
2354 
2355 	skb = napi_get_frags(napi);
2356 	if (!skb) {
2357 		be_rx_compl_discard(rxo, rxcp);
2358 		return;
2359 	}
2360 
2361 	remaining = rxcp->pkt_size;
2362 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2363 		page_info = get_rx_page_info(rxo);
2364 
2365 		curr_frag_len = min(remaining, rx_frag_size);
2366 
2367 		/* Coalesce all frags from the same physical page in one slot */
2368 		if (i == 0 || page_info->page_offset == 0) {
2369 			/* First frag or Fresh page */
2370 			j++;
2371 			skb_frag_set_page(skb, j, page_info->page);
2372 			skb_shinfo(skb)->frags[j].page_offset =
2373 							page_info->page_offset;
2374 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2375 		} else {
2376 			put_page(page_info->page);
2377 		}
2378 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2379 		skb->truesize += rx_frag_size;
2380 		remaining -= curr_frag_len;
2381 		memset(page_info, 0, sizeof(*page_info));
2382 	}
2383 	BUG_ON(j > MAX_SKB_FRAGS);
2384 
2385 	skb_shinfo(skb)->nr_frags = j + 1;
2386 	skb->len = rxcp->pkt_size;
2387 	skb->data_len = rxcp->pkt_size;
2388 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2389 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2390 	if (adapter->netdev->features & NETIF_F_RXHASH)
2391 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2392 
2393 	skb->csum_level = rxcp->tunneled;
2394 
2395 	if (rxcp->vlanf)
2396 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2397 
2398 	napi_gro_frags(napi);
2399 }
2400 
2401 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2402 				 struct be_rx_compl_info *rxcp)
2403 {
2404 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2405 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2406 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2407 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2408 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2409 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2410 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2411 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2412 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2413 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2414 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2415 	if (rxcp->vlanf) {
2416 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2417 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2418 	}
2419 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2420 	rxcp->tunneled =
2421 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2422 }
2423 
2424 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2425 				 struct be_rx_compl_info *rxcp)
2426 {
2427 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2428 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2429 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2430 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2431 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2432 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2433 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2434 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2435 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2436 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2437 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2438 	if (rxcp->vlanf) {
2439 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2440 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2441 	}
2442 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2443 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2444 }
2445 
2446 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2447 {
2448 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2449 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2450 	struct be_adapter *adapter = rxo->adapter;
2451 
2452 	/* For checking the valid bit it is Ok to use either definition as the
2453 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2454 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2455 		return NULL;
2456 
2457 	rmb();
2458 	be_dws_le_to_cpu(compl, sizeof(*compl));
2459 
2460 	if (adapter->be3_native)
2461 		be_parse_rx_compl_v1(compl, rxcp);
2462 	else
2463 		be_parse_rx_compl_v0(compl, rxcp);
2464 
2465 	if (rxcp->ip_frag)
2466 		rxcp->l4_csum = 0;
2467 
2468 	if (rxcp->vlanf) {
2469 		/* In QNQ modes, if qnq bit is not set, then the packet was
2470 		 * tagged only with the transparent outer vlan-tag and must
2471 		 * not be treated as a vlan packet by host
2472 		 */
2473 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2474 			rxcp->vlanf = 0;
2475 
2476 		if (!lancer_chip(adapter))
2477 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2478 
2479 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2480 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2481 			rxcp->vlanf = 0;
2482 	}
2483 
2484 	/* As the compl has been parsed, reset it; we wont touch it again */
2485 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2486 
2487 	queue_tail_inc(&rxo->cq);
2488 	return rxcp;
2489 }
2490 
2491 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2492 {
2493 	u32 order = get_order(size);
2494 
2495 	if (order > 0)
2496 		gfp |= __GFP_COMP;
2497 	return  alloc_pages(gfp, order);
2498 }
2499 
2500 /*
2501  * Allocate a page, split it to fragments of size rx_frag_size and post as
2502  * receive buffers to BE
2503  */
2504 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2505 {
2506 	struct be_adapter *adapter = rxo->adapter;
2507 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2508 	struct be_queue_info *rxq = &rxo->q;
2509 	struct page *pagep = NULL;
2510 	struct device *dev = &adapter->pdev->dev;
2511 	struct be_eth_rx_d *rxd;
2512 	u64 page_dmaaddr = 0, frag_dmaaddr;
2513 	u32 posted, page_offset = 0, notify = 0;
2514 
2515 	page_info = &rxo->page_info_tbl[rxq->head];
2516 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2517 		if (!pagep) {
2518 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2519 			if (unlikely(!pagep)) {
2520 				rx_stats(rxo)->rx_post_fail++;
2521 				break;
2522 			}
2523 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2524 						    adapter->big_page_size,
2525 						    DMA_FROM_DEVICE);
2526 			if (dma_mapping_error(dev, page_dmaaddr)) {
2527 				put_page(pagep);
2528 				pagep = NULL;
2529 				adapter->drv_stats.dma_map_errors++;
2530 				break;
2531 			}
2532 			page_offset = 0;
2533 		} else {
2534 			get_page(pagep);
2535 			page_offset += rx_frag_size;
2536 		}
2537 		page_info->page_offset = page_offset;
2538 		page_info->page = pagep;
2539 
2540 		rxd = queue_head_node(rxq);
2541 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2542 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2543 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2544 
2545 		/* Any space left in the current big page for another frag? */
2546 		if ((page_offset + rx_frag_size + rx_frag_size) >
2547 					adapter->big_page_size) {
2548 			pagep = NULL;
2549 			page_info->last_frag = true;
2550 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2551 		} else {
2552 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2553 		}
2554 
2555 		prev_page_info = page_info;
2556 		queue_head_inc(rxq);
2557 		page_info = &rxo->page_info_tbl[rxq->head];
2558 	}
2559 
2560 	/* Mark the last frag of a page when we break out of the above loop
2561 	 * with no more slots available in the RXQ
2562 	 */
2563 	if (pagep) {
2564 		prev_page_info->last_frag = true;
2565 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2566 	}
2567 
2568 	if (posted) {
2569 		atomic_add(posted, &rxq->used);
2570 		if (rxo->rx_post_starved)
2571 			rxo->rx_post_starved = false;
2572 		do {
2573 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2574 			be_rxq_notify(adapter, rxq->id, notify);
2575 			posted -= notify;
2576 		} while (posted);
2577 	} else if (atomic_read(&rxq->used) == 0) {
2578 		/* Let be_worker replenish when memory is available */
2579 		rxo->rx_post_starved = true;
2580 	}
2581 }
2582 
2583 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2584 {
2585 	struct be_queue_info *tx_cq = &txo->cq;
2586 	struct be_tx_compl_info *txcp = &txo->txcp;
2587 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2588 
2589 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2590 		return NULL;
2591 
2592 	/* Ensure load ordering of valid bit dword and other dwords below */
2593 	rmb();
2594 	be_dws_le_to_cpu(compl, sizeof(*compl));
2595 
2596 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2597 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2598 
2599 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2600 	queue_tail_inc(tx_cq);
2601 	return txcp;
2602 }
2603 
2604 static u16 be_tx_compl_process(struct be_adapter *adapter,
2605 			       struct be_tx_obj *txo, u16 last_index)
2606 {
2607 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2608 	struct be_queue_info *txq = &txo->q;
2609 	struct sk_buff *skb = NULL;
2610 	bool unmap_skb_hdr = false;
2611 	struct be_eth_wrb *wrb;
2612 	u16 num_wrbs = 0;
2613 	u32 frag_index;
2614 
2615 	do {
2616 		if (sent_skbs[txq->tail]) {
2617 			/* Free skb from prev req */
2618 			if (skb)
2619 				dev_consume_skb_any(skb);
2620 			skb = sent_skbs[txq->tail];
2621 			sent_skbs[txq->tail] = NULL;
2622 			queue_tail_inc(txq);  /* skip hdr wrb */
2623 			num_wrbs++;
2624 			unmap_skb_hdr = true;
2625 		}
2626 		wrb = queue_tail_node(txq);
2627 		frag_index = txq->tail;
2628 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2629 			      (unmap_skb_hdr && skb_headlen(skb)));
2630 		unmap_skb_hdr = false;
2631 		queue_tail_inc(txq);
2632 		num_wrbs++;
2633 	} while (frag_index != last_index);
2634 	dev_consume_skb_any(skb);
2635 
2636 	return num_wrbs;
2637 }
2638 
2639 /* Return the number of events in the event queue */
2640 static inline int events_get(struct be_eq_obj *eqo)
2641 {
2642 	struct be_eq_entry *eqe;
2643 	int num = 0;
2644 
2645 	do {
2646 		eqe = queue_tail_node(&eqo->q);
2647 		if (eqe->evt == 0)
2648 			break;
2649 
2650 		rmb();
2651 		eqe->evt = 0;
2652 		num++;
2653 		queue_tail_inc(&eqo->q);
2654 	} while (true);
2655 
2656 	return num;
2657 }
2658 
2659 /* Leaves the EQ is disarmed state */
2660 static void be_eq_clean(struct be_eq_obj *eqo)
2661 {
2662 	int num = events_get(eqo);
2663 
2664 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2665 }
2666 
2667 /* Free posted rx buffers that were not used */
2668 static void be_rxq_clean(struct be_rx_obj *rxo)
2669 {
2670 	struct be_queue_info *rxq = &rxo->q;
2671 	struct be_rx_page_info *page_info;
2672 
2673 	while (atomic_read(&rxq->used) > 0) {
2674 		page_info = get_rx_page_info(rxo);
2675 		put_page(page_info->page);
2676 		memset(page_info, 0, sizeof(*page_info));
2677 	}
2678 	BUG_ON(atomic_read(&rxq->used));
2679 	rxq->tail = 0;
2680 	rxq->head = 0;
2681 }
2682 
2683 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2684 {
2685 	struct be_queue_info *rx_cq = &rxo->cq;
2686 	struct be_rx_compl_info *rxcp;
2687 	struct be_adapter *adapter = rxo->adapter;
2688 	int flush_wait = 0;
2689 
2690 	/* Consume pending rx completions.
2691 	 * Wait for the flush completion (identified by zero num_rcvd)
2692 	 * to arrive. Notify CQ even when there are no more CQ entries
2693 	 * for HW to flush partially coalesced CQ entries.
2694 	 * In Lancer, there is no need to wait for flush compl.
2695 	 */
2696 	for (;;) {
2697 		rxcp = be_rx_compl_get(rxo);
2698 		if (!rxcp) {
2699 			if (lancer_chip(adapter))
2700 				break;
2701 
2702 			if (flush_wait++ > 50 ||
2703 			    be_check_error(adapter,
2704 					   BE_ERROR_HW)) {
2705 				dev_warn(&adapter->pdev->dev,
2706 					 "did not receive flush compl\n");
2707 				break;
2708 			}
2709 			be_cq_notify(adapter, rx_cq->id, true, 0);
2710 			mdelay(1);
2711 		} else {
2712 			be_rx_compl_discard(rxo, rxcp);
2713 			be_cq_notify(adapter, rx_cq->id, false, 1);
2714 			if (rxcp->num_rcvd == 0)
2715 				break;
2716 		}
2717 	}
2718 
2719 	/* After cleanup, leave the CQ in unarmed state */
2720 	be_cq_notify(adapter, rx_cq->id, false, 0);
2721 }
2722 
2723 static void be_tx_compl_clean(struct be_adapter *adapter)
2724 {
2725 	struct device *dev = &adapter->pdev->dev;
2726 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2727 	struct be_tx_compl_info *txcp;
2728 	struct be_queue_info *txq;
2729 	u32 end_idx, notified_idx;
2730 	struct be_tx_obj *txo;
2731 	int i, pending_txqs;
2732 
2733 	/* Stop polling for compls when HW has been silent for 10ms */
2734 	do {
2735 		pending_txqs = adapter->num_tx_qs;
2736 
2737 		for_all_tx_queues(adapter, txo, i) {
2738 			cmpl = 0;
2739 			num_wrbs = 0;
2740 			txq = &txo->q;
2741 			while ((txcp = be_tx_compl_get(txo))) {
2742 				num_wrbs +=
2743 					be_tx_compl_process(adapter, txo,
2744 							    txcp->end_index);
2745 				cmpl++;
2746 			}
2747 			if (cmpl) {
2748 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2749 				atomic_sub(num_wrbs, &txq->used);
2750 				timeo = 0;
2751 			}
2752 			if (!be_is_tx_compl_pending(txo))
2753 				pending_txqs--;
2754 		}
2755 
2756 		if (pending_txqs == 0 || ++timeo > 10 ||
2757 		    be_check_error(adapter, BE_ERROR_HW))
2758 			break;
2759 
2760 		mdelay(1);
2761 	} while (true);
2762 
2763 	/* Free enqueued TX that was never notified to HW */
2764 	for_all_tx_queues(adapter, txo, i) {
2765 		txq = &txo->q;
2766 
2767 		if (atomic_read(&txq->used)) {
2768 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2769 				 i, atomic_read(&txq->used));
2770 			notified_idx = txq->tail;
2771 			end_idx = txq->tail;
2772 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2773 				  txq->len);
2774 			/* Use the tx-compl process logic to handle requests
2775 			 * that were not sent to the HW.
2776 			 */
2777 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2778 			atomic_sub(num_wrbs, &txq->used);
2779 			BUG_ON(atomic_read(&txq->used));
2780 			txo->pend_wrb_cnt = 0;
2781 			/* Since hw was never notified of these requests,
2782 			 * reset TXQ indices
2783 			 */
2784 			txq->head = notified_idx;
2785 			txq->tail = notified_idx;
2786 		}
2787 	}
2788 }
2789 
2790 static void be_evt_queues_destroy(struct be_adapter *adapter)
2791 {
2792 	struct be_eq_obj *eqo;
2793 	int i;
2794 
2795 	for_all_evt_queues(adapter, eqo, i) {
2796 		if (eqo->q.created) {
2797 			be_eq_clean(eqo);
2798 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2799 			netif_napi_del(&eqo->napi);
2800 			free_cpumask_var(eqo->affinity_mask);
2801 		}
2802 		be_queue_free(adapter, &eqo->q);
2803 	}
2804 }
2805 
2806 static int be_evt_queues_create(struct be_adapter *adapter)
2807 {
2808 	struct be_queue_info *eq;
2809 	struct be_eq_obj *eqo;
2810 	struct be_aic_obj *aic;
2811 	int i, rc;
2812 
2813 	/* need enough EQs to service both RX and TX queues */
2814 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2815 				    max(adapter->cfg_num_rx_irqs,
2816 					adapter->cfg_num_tx_irqs));
2817 
2818 	for_all_evt_queues(adapter, eqo, i) {
2819 		int numa_node = dev_to_node(&adapter->pdev->dev);
2820 
2821 		aic = &adapter->aic_obj[i];
2822 		eqo->adapter = adapter;
2823 		eqo->idx = i;
2824 		aic->max_eqd = BE_MAX_EQD;
2825 		aic->enable = true;
2826 
2827 		eq = &eqo->q;
2828 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2829 				    sizeof(struct be_eq_entry));
2830 		if (rc)
2831 			return rc;
2832 
2833 		rc = be_cmd_eq_create(adapter, eqo);
2834 		if (rc)
2835 			return rc;
2836 
2837 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2838 			return -ENOMEM;
2839 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2840 				eqo->affinity_mask);
2841 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2842 			       BE_NAPI_WEIGHT);
2843 	}
2844 	return 0;
2845 }
2846 
2847 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2848 {
2849 	struct be_queue_info *q;
2850 
2851 	q = &adapter->mcc_obj.q;
2852 	if (q->created)
2853 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2854 	be_queue_free(adapter, q);
2855 
2856 	q = &adapter->mcc_obj.cq;
2857 	if (q->created)
2858 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2859 	be_queue_free(adapter, q);
2860 }
2861 
2862 /* Must be called only after TX qs are created as MCC shares TX EQ */
2863 static int be_mcc_queues_create(struct be_adapter *adapter)
2864 {
2865 	struct be_queue_info *q, *cq;
2866 
2867 	cq = &adapter->mcc_obj.cq;
2868 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2869 			   sizeof(struct be_mcc_compl)))
2870 		goto err;
2871 
2872 	/* Use the default EQ for MCC completions */
2873 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2874 		goto mcc_cq_free;
2875 
2876 	q = &adapter->mcc_obj.q;
2877 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2878 		goto mcc_cq_destroy;
2879 
2880 	if (be_cmd_mccq_create(adapter, q, cq))
2881 		goto mcc_q_free;
2882 
2883 	return 0;
2884 
2885 mcc_q_free:
2886 	be_queue_free(adapter, q);
2887 mcc_cq_destroy:
2888 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2889 mcc_cq_free:
2890 	be_queue_free(adapter, cq);
2891 err:
2892 	return -1;
2893 }
2894 
2895 static void be_tx_queues_destroy(struct be_adapter *adapter)
2896 {
2897 	struct be_queue_info *q;
2898 	struct be_tx_obj *txo;
2899 	u8 i;
2900 
2901 	for_all_tx_queues(adapter, txo, i) {
2902 		q = &txo->q;
2903 		if (q->created)
2904 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2905 		be_queue_free(adapter, q);
2906 
2907 		q = &txo->cq;
2908 		if (q->created)
2909 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2910 		be_queue_free(adapter, q);
2911 	}
2912 }
2913 
2914 static int be_tx_qs_create(struct be_adapter *adapter)
2915 {
2916 	struct be_queue_info *cq;
2917 	struct be_tx_obj *txo;
2918 	struct be_eq_obj *eqo;
2919 	int status, i;
2920 
2921 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2922 
2923 	for_all_tx_queues(adapter, txo, i) {
2924 		cq = &txo->cq;
2925 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2926 					sizeof(struct be_eth_tx_compl));
2927 		if (status)
2928 			return status;
2929 
2930 		u64_stats_init(&txo->stats.sync);
2931 		u64_stats_init(&txo->stats.sync_compl);
2932 
2933 		/* If num_evt_qs is less than num_tx_qs, then more than
2934 		 * one txq share an eq
2935 		 */
2936 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2937 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2938 		if (status)
2939 			return status;
2940 
2941 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2942 					sizeof(struct be_eth_wrb));
2943 		if (status)
2944 			return status;
2945 
2946 		status = be_cmd_txq_create(adapter, txo);
2947 		if (status)
2948 			return status;
2949 
2950 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2951 				    eqo->idx);
2952 	}
2953 
2954 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2955 		 adapter->num_tx_qs);
2956 	return 0;
2957 }
2958 
2959 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2960 {
2961 	struct be_queue_info *q;
2962 	struct be_rx_obj *rxo;
2963 	int i;
2964 
2965 	for_all_rx_queues(adapter, rxo, i) {
2966 		q = &rxo->cq;
2967 		if (q->created)
2968 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2969 		be_queue_free(adapter, q);
2970 	}
2971 }
2972 
2973 static int be_rx_cqs_create(struct be_adapter *adapter)
2974 {
2975 	struct be_queue_info *eq, *cq;
2976 	struct be_rx_obj *rxo;
2977 	int rc, i;
2978 
2979 	adapter->num_rss_qs =
2980 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2981 
2982 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2983 	if (adapter->num_rss_qs < 2)
2984 		adapter->num_rss_qs = 0;
2985 
2986 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2987 
2988 	/* When the interface is not capable of RSS rings (and there is no
2989 	 * need to create a default RXQ) we'll still need one RXQ
2990 	 */
2991 	if (adapter->num_rx_qs == 0)
2992 		adapter->num_rx_qs = 1;
2993 
2994 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2995 	for_all_rx_queues(adapter, rxo, i) {
2996 		rxo->adapter = adapter;
2997 		cq = &rxo->cq;
2998 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2999 				    sizeof(struct be_eth_rx_compl));
3000 		if (rc)
3001 			return rc;
3002 
3003 		u64_stats_init(&rxo->stats.sync);
3004 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3005 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3006 		if (rc)
3007 			return rc;
3008 	}
3009 
3010 	dev_info(&adapter->pdev->dev,
3011 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3012 	return 0;
3013 }
3014 
3015 static irqreturn_t be_intx(int irq, void *dev)
3016 {
3017 	struct be_eq_obj *eqo = dev;
3018 	struct be_adapter *adapter = eqo->adapter;
3019 	int num_evts = 0;
3020 
3021 	/* IRQ is not expected when NAPI is scheduled as the EQ
3022 	 * will not be armed.
3023 	 * But, this can happen on Lancer INTx where it takes
3024 	 * a while to de-assert INTx or in BE2 where occasionaly
3025 	 * an interrupt may be raised even when EQ is unarmed.
3026 	 * If NAPI is already scheduled, then counting & notifying
3027 	 * events will orphan them.
3028 	 */
3029 	if (napi_schedule_prep(&eqo->napi)) {
3030 		num_evts = events_get(eqo);
3031 		__napi_schedule(&eqo->napi);
3032 		if (num_evts)
3033 			eqo->spurious_intr = 0;
3034 	}
3035 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3036 
3037 	/* Return IRQ_HANDLED only for the the first spurious intr
3038 	 * after a valid intr to stop the kernel from branding
3039 	 * this irq as a bad one!
3040 	 */
3041 	if (num_evts || eqo->spurious_intr++ == 0)
3042 		return IRQ_HANDLED;
3043 	else
3044 		return IRQ_NONE;
3045 }
3046 
3047 static irqreturn_t be_msix(int irq, void *dev)
3048 {
3049 	struct be_eq_obj *eqo = dev;
3050 
3051 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3052 	napi_schedule(&eqo->napi);
3053 	return IRQ_HANDLED;
3054 }
3055 
3056 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3057 {
3058 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3059 }
3060 
3061 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3062 			 int budget, int polling)
3063 {
3064 	struct be_adapter *adapter = rxo->adapter;
3065 	struct be_queue_info *rx_cq = &rxo->cq;
3066 	struct be_rx_compl_info *rxcp;
3067 	u32 work_done;
3068 	u32 frags_consumed = 0;
3069 
3070 	for (work_done = 0; work_done < budget; work_done++) {
3071 		rxcp = be_rx_compl_get(rxo);
3072 		if (!rxcp)
3073 			break;
3074 
3075 		/* Is it a flush compl that has no data */
3076 		if (unlikely(rxcp->num_rcvd == 0))
3077 			goto loop_continue;
3078 
3079 		/* Discard compl with partial DMA Lancer B0 */
3080 		if (unlikely(!rxcp->pkt_size)) {
3081 			be_rx_compl_discard(rxo, rxcp);
3082 			goto loop_continue;
3083 		}
3084 
3085 		/* On BE drop pkts that arrive due to imperfect filtering in
3086 		 * promiscuous mode on some skews
3087 		 */
3088 		if (unlikely(rxcp->port != adapter->port_num &&
3089 			     !lancer_chip(adapter))) {
3090 			be_rx_compl_discard(rxo, rxcp);
3091 			goto loop_continue;
3092 		}
3093 
3094 		/* Don't do gro when we're busy_polling */
3095 		if (do_gro(rxcp) && polling != BUSY_POLLING)
3096 			be_rx_compl_process_gro(rxo, napi, rxcp);
3097 		else
3098 			be_rx_compl_process(rxo, napi, rxcp);
3099 
3100 loop_continue:
3101 		frags_consumed += rxcp->num_rcvd;
3102 		be_rx_stats_update(rxo, rxcp);
3103 	}
3104 
3105 	if (work_done) {
3106 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3107 
3108 		/* When an rx-obj gets into post_starved state, just
3109 		 * let be_worker do the posting.
3110 		 */
3111 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3112 		    !rxo->rx_post_starved)
3113 			be_post_rx_frags(rxo, GFP_ATOMIC,
3114 					 max_t(u32, MAX_RX_POST,
3115 					       frags_consumed));
3116 	}
3117 
3118 	return work_done;
3119 }
3120 
3121 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3122 {
3123 	switch (status) {
3124 	case BE_TX_COMP_HDR_PARSE_ERR:
3125 		tx_stats(txo)->tx_hdr_parse_err++;
3126 		break;
3127 	case BE_TX_COMP_NDMA_ERR:
3128 		tx_stats(txo)->tx_dma_err++;
3129 		break;
3130 	case BE_TX_COMP_ACL_ERR:
3131 		tx_stats(txo)->tx_spoof_check_err++;
3132 		break;
3133 	}
3134 }
3135 
3136 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3137 {
3138 	switch (status) {
3139 	case LANCER_TX_COMP_LSO_ERR:
3140 		tx_stats(txo)->tx_tso_err++;
3141 		break;
3142 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3143 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3144 		tx_stats(txo)->tx_spoof_check_err++;
3145 		break;
3146 	case LANCER_TX_COMP_QINQ_ERR:
3147 		tx_stats(txo)->tx_qinq_err++;
3148 		break;
3149 	case LANCER_TX_COMP_PARITY_ERR:
3150 		tx_stats(txo)->tx_internal_parity_err++;
3151 		break;
3152 	case LANCER_TX_COMP_DMA_ERR:
3153 		tx_stats(txo)->tx_dma_err++;
3154 		break;
3155 	}
3156 }
3157 
3158 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3159 			  int idx)
3160 {
3161 	int num_wrbs = 0, work_done = 0;
3162 	struct be_tx_compl_info *txcp;
3163 
3164 	while ((txcp = be_tx_compl_get(txo))) {
3165 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3166 		work_done++;
3167 
3168 		if (txcp->status) {
3169 			if (lancer_chip(adapter))
3170 				lancer_update_tx_err(txo, txcp->status);
3171 			else
3172 				be_update_tx_err(txo, txcp->status);
3173 		}
3174 	}
3175 
3176 	if (work_done) {
3177 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3178 		atomic_sub(num_wrbs, &txo->q.used);
3179 
3180 		/* As Tx wrbs have been freed up, wake up netdev queue
3181 		 * if it was stopped due to lack of tx wrbs.  */
3182 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3183 		    be_can_txq_wake(txo)) {
3184 			netif_wake_subqueue(adapter->netdev, idx);
3185 		}
3186 
3187 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3188 		tx_stats(txo)->tx_compl += work_done;
3189 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3190 	}
3191 }
3192 
3193 #ifdef CONFIG_NET_RX_BUSY_POLL
3194 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3195 {
3196 	bool status = true;
3197 
3198 	spin_lock(&eqo->lock); /* BH is already disabled */
3199 	if (eqo->state & BE_EQ_LOCKED) {
3200 		WARN_ON(eqo->state & BE_EQ_NAPI);
3201 		eqo->state |= BE_EQ_NAPI_YIELD;
3202 		status = false;
3203 	} else {
3204 		eqo->state = BE_EQ_NAPI;
3205 	}
3206 	spin_unlock(&eqo->lock);
3207 	return status;
3208 }
3209 
3210 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3211 {
3212 	spin_lock(&eqo->lock); /* BH is already disabled */
3213 
3214 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3215 	eqo->state = BE_EQ_IDLE;
3216 
3217 	spin_unlock(&eqo->lock);
3218 }
3219 
3220 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3221 {
3222 	bool status = true;
3223 
3224 	spin_lock_bh(&eqo->lock);
3225 	if (eqo->state & BE_EQ_LOCKED) {
3226 		eqo->state |= BE_EQ_POLL_YIELD;
3227 		status = false;
3228 	} else {
3229 		eqo->state |= BE_EQ_POLL;
3230 	}
3231 	spin_unlock_bh(&eqo->lock);
3232 	return status;
3233 }
3234 
3235 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3236 {
3237 	spin_lock_bh(&eqo->lock);
3238 
3239 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3240 	eqo->state = BE_EQ_IDLE;
3241 
3242 	spin_unlock_bh(&eqo->lock);
3243 }
3244 
3245 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3246 {
3247 	spin_lock_init(&eqo->lock);
3248 	eqo->state = BE_EQ_IDLE;
3249 }
3250 
3251 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3252 {
3253 	local_bh_disable();
3254 
3255 	/* It's enough to just acquire napi lock on the eqo to stop
3256 	 * be_busy_poll() from processing any queueus.
3257 	 */
3258 	while (!be_lock_napi(eqo))
3259 		mdelay(1);
3260 
3261 	local_bh_enable();
3262 }
3263 
3264 #else /* CONFIG_NET_RX_BUSY_POLL */
3265 
3266 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3267 {
3268 	return true;
3269 }
3270 
3271 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3272 {
3273 }
3274 
3275 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3276 {
3277 	return false;
3278 }
3279 
3280 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3281 {
3282 }
3283 
3284 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287 
3288 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3289 {
3290 }
3291 #endif /* CONFIG_NET_RX_BUSY_POLL */
3292 
3293 int be_poll(struct napi_struct *napi, int budget)
3294 {
3295 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296 	struct be_adapter *adapter = eqo->adapter;
3297 	int max_work = 0, work, i, num_evts;
3298 	struct be_rx_obj *rxo;
3299 	struct be_tx_obj *txo;
3300 	u32 mult_enc = 0;
3301 
3302 	num_evts = events_get(eqo);
3303 
3304 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305 		be_process_tx(adapter, txo, i);
3306 
3307 	if (be_lock_napi(eqo)) {
3308 		/* This loop will iterate twice for EQ0 in which
3309 		 * completions of the last RXQ (default one) are also processed
3310 		 * For other EQs the loop iterates only once
3311 		 */
3312 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3314 			max_work = max(work, max_work);
3315 		}
3316 		be_unlock_napi(eqo);
3317 	} else {
3318 		max_work = budget;
3319 	}
3320 
3321 	if (is_mcc_eqo(eqo))
3322 		be_process_mcc(adapter);
3323 
3324 	if (max_work < budget) {
3325 		napi_complete(napi);
3326 
3327 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3328 		 * delay via a delay multiplier encoding value
3329 		 */
3330 		if (skyhawk_chip(adapter))
3331 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3332 
3333 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3334 			     mult_enc);
3335 	} else {
3336 		/* As we'll continue in polling mode, count and clear events */
3337 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3338 	}
3339 	return max_work;
3340 }
3341 
3342 #ifdef CONFIG_NET_RX_BUSY_POLL
3343 static int be_busy_poll(struct napi_struct *napi)
3344 {
3345 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3346 	struct be_adapter *adapter = eqo->adapter;
3347 	struct be_rx_obj *rxo;
3348 	int i, work = 0;
3349 
3350 	if (!be_lock_busy_poll(eqo))
3351 		return LL_FLUSH_BUSY;
3352 
3353 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3354 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3355 		if (work)
3356 			break;
3357 	}
3358 
3359 	be_unlock_busy_poll(eqo);
3360 	return work;
3361 }
3362 #endif
3363 
3364 void be_detect_error(struct be_adapter *adapter)
3365 {
3366 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3367 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3368 	u32 i;
3369 	struct device *dev = &adapter->pdev->dev;
3370 
3371 	if (be_check_error(adapter, BE_ERROR_HW))
3372 		return;
3373 
3374 	if (lancer_chip(adapter)) {
3375 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3376 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3377 			be_set_error(adapter, BE_ERROR_UE);
3378 			sliport_err1 = ioread32(adapter->db +
3379 						SLIPORT_ERROR1_OFFSET);
3380 			sliport_err2 = ioread32(adapter->db +
3381 						SLIPORT_ERROR2_OFFSET);
3382 			/* Do not log error messages if its a FW reset */
3383 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3384 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3385 				dev_info(dev, "Firmware update in progress\n");
3386 			} else {
3387 				dev_err(dev, "Error detected in the card\n");
3388 				dev_err(dev, "ERR: sliport status 0x%x\n",
3389 					sliport_status);
3390 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3391 					sliport_err1);
3392 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3393 					sliport_err2);
3394 			}
3395 		}
3396 	} else {
3397 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3398 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3399 		ue_lo_mask = ioread32(adapter->pcicfg +
3400 				      PCICFG_UE_STATUS_LOW_MASK);
3401 		ue_hi_mask = ioread32(adapter->pcicfg +
3402 				      PCICFG_UE_STATUS_HI_MASK);
3403 
3404 		ue_lo = (ue_lo & ~ue_lo_mask);
3405 		ue_hi = (ue_hi & ~ue_hi_mask);
3406 
3407 		/* On certain platforms BE hardware can indicate spurious UEs.
3408 		 * Allow HW to stop working completely in case of a real UE.
3409 		 * Hence not setting the hw_error for UE detection.
3410 		 */
3411 
3412 		if (ue_lo || ue_hi) {
3413 			dev_err(dev, "Error detected in the adapter");
3414 			if (skyhawk_chip(adapter))
3415 				be_set_error(adapter, BE_ERROR_UE);
3416 
3417 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3418 				if (ue_lo & 1)
3419 					dev_err(dev, "UE: %s bit set\n",
3420 						ue_status_low_desc[i]);
3421 			}
3422 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3423 				if (ue_hi & 1)
3424 					dev_err(dev, "UE: %s bit set\n",
3425 						ue_status_hi_desc[i]);
3426 			}
3427 		}
3428 	}
3429 }
3430 
3431 static void be_msix_disable(struct be_adapter *adapter)
3432 {
3433 	if (msix_enabled(adapter)) {
3434 		pci_disable_msix(adapter->pdev);
3435 		adapter->num_msix_vec = 0;
3436 		adapter->num_msix_roce_vec = 0;
3437 	}
3438 }
3439 
3440 static int be_msix_enable(struct be_adapter *adapter)
3441 {
3442 	unsigned int i, max_roce_eqs;
3443 	struct device *dev = &adapter->pdev->dev;
3444 	int num_vec;
3445 
3446 	/* If RoCE is supported, program the max number of vectors that
3447 	 * could be used for NIC and RoCE, else, just program the number
3448 	 * we'll use initially.
3449 	 */
3450 	if (be_roce_supported(adapter)) {
3451 		max_roce_eqs =
3452 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3453 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3454 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3455 	} else {
3456 		num_vec = max(adapter->cfg_num_rx_irqs,
3457 			      adapter->cfg_num_tx_irqs);
3458 	}
3459 
3460 	for (i = 0; i < num_vec; i++)
3461 		adapter->msix_entries[i].entry = i;
3462 
3463 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3464 					MIN_MSIX_VECTORS, num_vec);
3465 	if (num_vec < 0)
3466 		goto fail;
3467 
3468 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3469 		adapter->num_msix_roce_vec = num_vec / 2;
3470 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3471 			 adapter->num_msix_roce_vec);
3472 	}
3473 
3474 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3475 
3476 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3477 		 adapter->num_msix_vec);
3478 	return 0;
3479 
3480 fail:
3481 	dev_warn(dev, "MSIx enable failed\n");
3482 
3483 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3484 	if (be_virtfn(adapter))
3485 		return num_vec;
3486 	return 0;
3487 }
3488 
3489 static inline int be_msix_vec_get(struct be_adapter *adapter,
3490 				  struct be_eq_obj *eqo)
3491 {
3492 	return adapter->msix_entries[eqo->msix_idx].vector;
3493 }
3494 
3495 static int be_msix_register(struct be_adapter *adapter)
3496 {
3497 	struct net_device *netdev = adapter->netdev;
3498 	struct be_eq_obj *eqo;
3499 	int status, i, vec;
3500 
3501 	for_all_evt_queues(adapter, eqo, i) {
3502 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3503 		vec = be_msix_vec_get(adapter, eqo);
3504 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3505 		if (status)
3506 			goto err_msix;
3507 
3508 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3509 	}
3510 
3511 	return 0;
3512 err_msix:
3513 	for (i--; i >= 0; i--) {
3514 		eqo = &adapter->eq_obj[i];
3515 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3516 	}
3517 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3518 		 status);
3519 	be_msix_disable(adapter);
3520 	return status;
3521 }
3522 
3523 static int be_irq_register(struct be_adapter *adapter)
3524 {
3525 	struct net_device *netdev = adapter->netdev;
3526 	int status;
3527 
3528 	if (msix_enabled(adapter)) {
3529 		status = be_msix_register(adapter);
3530 		if (status == 0)
3531 			goto done;
3532 		/* INTx is not supported for VF */
3533 		if (be_virtfn(adapter))
3534 			return status;
3535 	}
3536 
3537 	/* INTx: only the first EQ is used */
3538 	netdev->irq = adapter->pdev->irq;
3539 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3540 			     &adapter->eq_obj[0]);
3541 	if (status) {
3542 		dev_err(&adapter->pdev->dev,
3543 			"INTx request IRQ failed - err %d\n", status);
3544 		return status;
3545 	}
3546 done:
3547 	adapter->isr_registered = true;
3548 	return 0;
3549 }
3550 
3551 static void be_irq_unregister(struct be_adapter *adapter)
3552 {
3553 	struct net_device *netdev = adapter->netdev;
3554 	struct be_eq_obj *eqo;
3555 	int i, vec;
3556 
3557 	if (!adapter->isr_registered)
3558 		return;
3559 
3560 	/* INTx */
3561 	if (!msix_enabled(adapter)) {
3562 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3563 		goto done;
3564 	}
3565 
3566 	/* MSIx */
3567 	for_all_evt_queues(adapter, eqo, i) {
3568 		vec = be_msix_vec_get(adapter, eqo);
3569 		irq_set_affinity_hint(vec, NULL);
3570 		free_irq(vec, eqo);
3571 	}
3572 
3573 done:
3574 	adapter->isr_registered = false;
3575 }
3576 
3577 static void be_rx_qs_destroy(struct be_adapter *adapter)
3578 {
3579 	struct rss_info *rss = &adapter->rss_info;
3580 	struct be_queue_info *q;
3581 	struct be_rx_obj *rxo;
3582 	int i;
3583 
3584 	for_all_rx_queues(adapter, rxo, i) {
3585 		q = &rxo->q;
3586 		if (q->created) {
3587 			/* If RXQs are destroyed while in an "out of buffer"
3588 			 * state, there is a possibility of an HW stall on
3589 			 * Lancer. So, post 64 buffers to each queue to relieve
3590 			 * the "out of buffer" condition.
3591 			 * Make sure there's space in the RXQ before posting.
3592 			 */
3593 			if (lancer_chip(adapter)) {
3594 				be_rx_cq_clean(rxo);
3595 				if (atomic_read(&q->used) == 0)
3596 					be_post_rx_frags(rxo, GFP_KERNEL,
3597 							 MAX_RX_POST);
3598 			}
3599 
3600 			be_cmd_rxq_destroy(adapter, q);
3601 			be_rx_cq_clean(rxo);
3602 			be_rxq_clean(rxo);
3603 		}
3604 		be_queue_free(adapter, q);
3605 	}
3606 
3607 	if (rss->rss_flags) {
3608 		rss->rss_flags = RSS_ENABLE_NONE;
3609 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3610 				  128, rss->rss_hkey);
3611 	}
3612 }
3613 
3614 static void be_disable_if_filters(struct be_adapter *adapter)
3615 {
3616 	be_dev_mac_del(adapter, adapter->pmac_id[0]);
3617 	be_clear_uc_list(adapter);
3618 	be_clear_mc_list(adapter);
3619 
3620 	/* The IFACE flags are enabled in the open path and cleared
3621 	 * in the close path. When a VF gets detached from the host and
3622 	 * assigned to a VM the following happens:
3623 	 *	- VF's IFACE flags get cleared in the detach path
3624 	 *	- IFACE create is issued by the VF in the attach path
3625 	 * Due to a bug in the BE3/Skyhawk-R FW
3626 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3627 	 * specified along with the IFACE create cmd issued by a VF are not
3628 	 * honoured by FW.  As a consequence, if a *new* driver
3629 	 * (that enables/disables IFACE flags in open/close)
3630 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3631 	 * the IFACE gets created *without* the needed flags.
3632 	 * To avoid this, disable RX-filter flags only for Lancer.
3633 	 */
3634 	if (lancer_chip(adapter)) {
3635 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3636 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3637 	}
3638 }
3639 
3640 static int be_close(struct net_device *netdev)
3641 {
3642 	struct be_adapter *adapter = netdev_priv(netdev);
3643 	struct be_eq_obj *eqo;
3644 	int i;
3645 
3646 	/* This protection is needed as be_close() may be called even when the
3647 	 * adapter is in cleared state (after eeh perm failure)
3648 	 */
3649 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3650 		return 0;
3651 
3652 	/* Before attempting cleanup ensure all the pending cmds in the
3653 	 * config_wq have finished execution
3654 	 */
3655 	flush_workqueue(be_wq);
3656 
3657 	be_disable_if_filters(adapter);
3658 
3659 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3660 		for_all_evt_queues(adapter, eqo, i) {
3661 			napi_disable(&eqo->napi);
3662 			be_disable_busy_poll(eqo);
3663 		}
3664 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3665 	}
3666 
3667 	be_async_mcc_disable(adapter);
3668 
3669 	/* Wait for all pending tx completions to arrive so that
3670 	 * all tx skbs are freed.
3671 	 */
3672 	netif_tx_disable(netdev);
3673 	be_tx_compl_clean(adapter);
3674 
3675 	be_rx_qs_destroy(adapter);
3676 
3677 	for_all_evt_queues(adapter, eqo, i) {
3678 		if (msix_enabled(adapter))
3679 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3680 		else
3681 			synchronize_irq(netdev->irq);
3682 		be_eq_clean(eqo);
3683 	}
3684 
3685 	be_irq_unregister(adapter);
3686 
3687 	return 0;
3688 }
3689 
3690 static int be_rx_qs_create(struct be_adapter *adapter)
3691 {
3692 	struct rss_info *rss = &adapter->rss_info;
3693 	u8 rss_key[RSS_HASH_KEY_LEN];
3694 	struct be_rx_obj *rxo;
3695 	int rc, i, j;
3696 
3697 	for_all_rx_queues(adapter, rxo, i) {
3698 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3699 				    sizeof(struct be_eth_rx_d));
3700 		if (rc)
3701 			return rc;
3702 	}
3703 
3704 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3705 		rxo = default_rxo(adapter);
3706 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707 				       rx_frag_size, adapter->if_handle,
3708 				       false, &rxo->rss_id);
3709 		if (rc)
3710 			return rc;
3711 	}
3712 
3713 	for_all_rss_queues(adapter, rxo, i) {
3714 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3715 				       rx_frag_size, adapter->if_handle,
3716 				       true, &rxo->rss_id);
3717 		if (rc)
3718 			return rc;
3719 	}
3720 
3721 	if (be_multi_rxq(adapter)) {
3722 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3723 			for_all_rss_queues(adapter, rxo, i) {
3724 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3725 					break;
3726 				rss->rsstable[j + i] = rxo->rss_id;
3727 				rss->rss_queue[j + i] = i;
3728 			}
3729 		}
3730 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3731 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3732 
3733 		if (!BEx_chip(adapter))
3734 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3735 				RSS_ENABLE_UDP_IPV6;
3736 
3737 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3738 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3739 				       RSS_INDIR_TABLE_LEN, rss_key);
3740 		if (rc) {
3741 			rss->rss_flags = RSS_ENABLE_NONE;
3742 			return rc;
3743 		}
3744 
3745 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3746 	} else {
3747 		/* Disable RSS, if only default RX Q is created */
3748 		rss->rss_flags = RSS_ENABLE_NONE;
3749 	}
3750 
3751 
3752 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3753 	 * which is a queue empty condition
3754 	 */
3755 	for_all_rx_queues(adapter, rxo, i)
3756 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3757 
3758 	return 0;
3759 }
3760 
3761 static int be_enable_if_filters(struct be_adapter *adapter)
3762 {
3763 	int status;
3764 
3765 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3766 	if (status)
3767 		return status;
3768 
3769 	/* For BE3 VFs, the PF programs the initial MAC address */
3770 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3771 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772 		if (status)
3773 			return status;
3774 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3775 	}
3776 
3777 	if (adapter->vlans_added)
3778 		be_vid_config(adapter);
3779 
3780 	__be_set_rx_mode(adapter);
3781 
3782 	return 0;
3783 }
3784 
3785 static int be_open(struct net_device *netdev)
3786 {
3787 	struct be_adapter *adapter = netdev_priv(netdev);
3788 	struct be_eq_obj *eqo;
3789 	struct be_rx_obj *rxo;
3790 	struct be_tx_obj *txo;
3791 	u8 link_status;
3792 	int status, i;
3793 
3794 	status = be_rx_qs_create(adapter);
3795 	if (status)
3796 		goto err;
3797 
3798 	status = be_enable_if_filters(adapter);
3799 	if (status)
3800 		goto err;
3801 
3802 	status = be_irq_register(adapter);
3803 	if (status)
3804 		goto err;
3805 
3806 	for_all_rx_queues(adapter, rxo, i)
3807 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3808 
3809 	for_all_tx_queues(adapter, txo, i)
3810 		be_cq_notify(adapter, txo->cq.id, true, 0);
3811 
3812 	be_async_mcc_enable(adapter);
3813 
3814 	for_all_evt_queues(adapter, eqo, i) {
3815 		napi_enable(&eqo->napi);
3816 		be_enable_busy_poll(eqo);
3817 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3818 	}
3819 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3820 
3821 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3822 	if (!status)
3823 		be_link_status_update(adapter, link_status);
3824 
3825 	netif_tx_start_all_queues(netdev);
3826 	if (skyhawk_chip(adapter))
3827 		udp_tunnel_get_rx_info(netdev);
3828 
3829 	return 0;
3830 err:
3831 	be_close(adapter->netdev);
3832 	return -EIO;
3833 }
3834 
3835 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3836 {
3837 	u32 addr;
3838 
3839 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3840 
3841 	mac[5] = (u8)(addr & 0xFF);
3842 	mac[4] = (u8)((addr >> 8) & 0xFF);
3843 	mac[3] = (u8)((addr >> 16) & 0xFF);
3844 	/* Use the OUI from the current MAC address */
3845 	memcpy(mac, adapter->netdev->dev_addr, 3);
3846 }
3847 
3848 /*
3849  * Generate a seed MAC address from the PF MAC Address using jhash.
3850  * MAC Address for VFs are assigned incrementally starting from the seed.
3851  * These addresses are programmed in the ASIC by the PF and the VF driver
3852  * queries for the MAC address during its probe.
3853  */
3854 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3855 {
3856 	u32 vf;
3857 	int status = 0;
3858 	u8 mac[ETH_ALEN];
3859 	struct be_vf_cfg *vf_cfg;
3860 
3861 	be_vf_eth_addr_generate(adapter, mac);
3862 
3863 	for_all_vfs(adapter, vf_cfg, vf) {
3864 		if (BEx_chip(adapter))
3865 			status = be_cmd_pmac_add(adapter, mac,
3866 						 vf_cfg->if_handle,
3867 						 &vf_cfg->pmac_id, vf + 1);
3868 		else
3869 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3870 						vf + 1);
3871 
3872 		if (status)
3873 			dev_err(&adapter->pdev->dev,
3874 				"Mac address assignment failed for VF %d\n",
3875 				vf);
3876 		else
3877 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3878 
3879 		mac[5] += 1;
3880 	}
3881 	return status;
3882 }
3883 
3884 static int be_vfs_mac_query(struct be_adapter *adapter)
3885 {
3886 	int status, vf;
3887 	u8 mac[ETH_ALEN];
3888 	struct be_vf_cfg *vf_cfg;
3889 
3890 	for_all_vfs(adapter, vf_cfg, vf) {
3891 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3892 					       mac, vf_cfg->if_handle,
3893 					       false, vf+1);
3894 		if (status)
3895 			return status;
3896 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3897 	}
3898 	return 0;
3899 }
3900 
3901 static void be_vf_clear(struct be_adapter *adapter)
3902 {
3903 	struct be_vf_cfg *vf_cfg;
3904 	u32 vf;
3905 
3906 	if (pci_vfs_assigned(adapter->pdev)) {
3907 		dev_warn(&adapter->pdev->dev,
3908 			 "VFs are assigned to VMs: not disabling VFs\n");
3909 		goto done;
3910 	}
3911 
3912 	pci_disable_sriov(adapter->pdev);
3913 
3914 	for_all_vfs(adapter, vf_cfg, vf) {
3915 		if (BEx_chip(adapter))
3916 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3917 					vf_cfg->pmac_id, vf + 1);
3918 		else
3919 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3920 				       vf + 1);
3921 
3922 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3923 	}
3924 
3925 	if (BE3_chip(adapter))
3926 		be_cmd_set_hsw_config(adapter, 0, 0,
3927 				      adapter->if_handle,
3928 				      PORT_FWD_TYPE_PASSTHRU, 0);
3929 done:
3930 	kfree(adapter->vf_cfg);
3931 	adapter->num_vfs = 0;
3932 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3933 }
3934 
3935 static void be_clear_queues(struct be_adapter *adapter)
3936 {
3937 	be_mcc_queues_destroy(adapter);
3938 	be_rx_cqs_destroy(adapter);
3939 	be_tx_queues_destroy(adapter);
3940 	be_evt_queues_destroy(adapter);
3941 }
3942 
3943 static void be_cancel_worker(struct be_adapter *adapter)
3944 {
3945 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3946 		cancel_delayed_work_sync(&adapter->work);
3947 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3948 	}
3949 }
3950 
3951 static void be_cancel_err_detection(struct be_adapter *adapter)
3952 {
3953 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3954 
3955 	if (!be_err_recovery_workq)
3956 		return;
3957 
3958 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3959 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3960 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3961 	}
3962 }
3963 
3964 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3965 {
3966 	struct net_device *netdev = adapter->netdev;
3967 
3968 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3969 		be_cmd_manage_iface(adapter, adapter->if_handle,
3970 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3971 
3972 	if (adapter->vxlan_port)
3973 		be_cmd_set_vxlan_port(adapter, 0);
3974 
3975 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3976 	adapter->vxlan_port = 0;
3977 
3978 	netdev->hw_enc_features = 0;
3979 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3980 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3981 }
3982 
3983 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3984 				struct be_resources *vft_res)
3985 {
3986 	struct be_resources res = adapter->pool_res;
3987 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3988 	struct be_resources res_mod = {0};
3989 	u16 num_vf_qs = 1;
3990 
3991 	/* Distribute the queue resources among the PF and it's VFs */
3992 	if (num_vfs) {
3993 		/* Divide the rx queues evenly among the VFs and the PF, capped
3994 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3995 		 */
3996 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3997 				res.max_rss_qs / (num_vfs + 1));
3998 
3999 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4000 		 * RSS Tables per port. Provide RSS on VFs, only if number of
4001 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4002 		 */
4003 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4004 			num_vf_qs = 1;
4005 	}
4006 
4007 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4008 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4009 	 */
4010 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4011 				  RESOURCE_MODIFIABLE, 0);
4012 
4013 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4014 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4015 	 * more than 1 RSSQ is available for a VF.
4016 	 * Otherwise, provision only 1 queue pair for VF.
4017 	 */
4018 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4019 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4020 		if (num_vf_qs > 1) {
4021 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4022 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4023 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4024 		} else {
4025 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4026 					     BE_IF_FLAGS_DEFQ_RSS);
4027 		}
4028 	} else {
4029 		num_vf_qs = 1;
4030 	}
4031 
4032 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4033 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4034 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4035 	}
4036 
4037 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4038 	vft_res->max_rx_qs = num_vf_qs;
4039 	vft_res->max_rss_qs = num_vf_qs;
4040 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4041 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4042 
4043 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4044 	 * among the PF and it's VFs, if the fields are changeable
4045 	 */
4046 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4047 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4048 
4049 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4050 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4051 
4052 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4053 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4054 
4055 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4056 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4057 }
4058 
4059 static void be_if_destroy(struct be_adapter *adapter)
4060 {
4061 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4062 
4063 	kfree(adapter->pmac_id);
4064 	adapter->pmac_id = NULL;
4065 
4066 	kfree(adapter->mc_list);
4067 	adapter->mc_list = NULL;
4068 
4069 	kfree(adapter->uc_list);
4070 	adapter->uc_list = NULL;
4071 }
4072 
4073 static int be_clear(struct be_adapter *adapter)
4074 {
4075 	struct pci_dev *pdev = adapter->pdev;
4076 	struct  be_resources vft_res = {0};
4077 
4078 	be_cancel_worker(adapter);
4079 
4080 	flush_workqueue(be_wq);
4081 
4082 	if (sriov_enabled(adapter))
4083 		be_vf_clear(adapter);
4084 
4085 	/* Re-configure FW to distribute resources evenly across max-supported
4086 	 * number of VFs, only when VFs are not already enabled.
4087 	 */
4088 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4089 	    !pci_vfs_assigned(pdev)) {
4090 		be_calculate_vf_res(adapter,
4091 				    pci_sriov_get_totalvfs(pdev),
4092 				    &vft_res);
4093 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4094 					pci_sriov_get_totalvfs(pdev),
4095 					&vft_res);
4096 	}
4097 
4098 	be_disable_vxlan_offloads(adapter);
4099 
4100 	be_if_destroy(adapter);
4101 
4102 	be_clear_queues(adapter);
4103 
4104 	be_msix_disable(adapter);
4105 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4106 	return 0;
4107 }
4108 
4109 static int be_vfs_if_create(struct be_adapter *adapter)
4110 {
4111 	struct be_resources res = {0};
4112 	u32 cap_flags, en_flags, vf;
4113 	struct be_vf_cfg *vf_cfg;
4114 	int status;
4115 
4116 	/* If a FW profile exists, then cap_flags are updated */
4117 	cap_flags = BE_VF_IF_EN_FLAGS;
4118 
4119 	for_all_vfs(adapter, vf_cfg, vf) {
4120 		if (!BE3_chip(adapter)) {
4121 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4122 							   ACTIVE_PROFILE_TYPE,
4123 							   RESOURCE_LIMITS,
4124 							   vf + 1);
4125 			if (!status) {
4126 				cap_flags = res.if_cap_flags;
4127 				/* Prevent VFs from enabling VLAN promiscuous
4128 				 * mode
4129 				 */
4130 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4131 			}
4132 		}
4133 
4134 		/* PF should enable IF flags during proxy if_create call */
4135 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4136 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4137 					  &vf_cfg->if_handle, vf + 1);
4138 		if (status)
4139 			return status;
4140 	}
4141 
4142 	return 0;
4143 }
4144 
4145 static int be_vf_setup_init(struct be_adapter *adapter)
4146 {
4147 	struct be_vf_cfg *vf_cfg;
4148 	int vf;
4149 
4150 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4151 				  GFP_KERNEL);
4152 	if (!adapter->vf_cfg)
4153 		return -ENOMEM;
4154 
4155 	for_all_vfs(adapter, vf_cfg, vf) {
4156 		vf_cfg->if_handle = -1;
4157 		vf_cfg->pmac_id = -1;
4158 	}
4159 	return 0;
4160 }
4161 
4162 static int be_vf_setup(struct be_adapter *adapter)
4163 {
4164 	struct device *dev = &adapter->pdev->dev;
4165 	struct be_vf_cfg *vf_cfg;
4166 	int status, old_vfs, vf;
4167 	bool spoofchk;
4168 
4169 	old_vfs = pci_num_vf(adapter->pdev);
4170 
4171 	status = be_vf_setup_init(adapter);
4172 	if (status)
4173 		goto err;
4174 
4175 	if (old_vfs) {
4176 		for_all_vfs(adapter, vf_cfg, vf) {
4177 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4178 			if (status)
4179 				goto err;
4180 		}
4181 
4182 		status = be_vfs_mac_query(adapter);
4183 		if (status)
4184 			goto err;
4185 	} else {
4186 		status = be_vfs_if_create(adapter);
4187 		if (status)
4188 			goto err;
4189 
4190 		status = be_vf_eth_addr_config(adapter);
4191 		if (status)
4192 			goto err;
4193 	}
4194 
4195 	for_all_vfs(adapter, vf_cfg, vf) {
4196 		/* Allow VFs to programs MAC/VLAN filters */
4197 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4198 						  vf + 1);
4199 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4200 			status = be_cmd_set_fn_privileges(adapter,
4201 							  vf_cfg->privileges |
4202 							  BE_PRIV_FILTMGMT,
4203 							  vf + 1);
4204 			if (!status) {
4205 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4206 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4207 					 vf);
4208 			}
4209 		}
4210 
4211 		/* Allow full available bandwidth */
4212 		if (!old_vfs)
4213 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4214 
4215 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4216 					       vf_cfg->if_handle, NULL,
4217 					       &spoofchk);
4218 		if (!status)
4219 			vf_cfg->spoofchk = spoofchk;
4220 
4221 		if (!old_vfs) {
4222 			be_cmd_enable_vf(adapter, vf + 1);
4223 			be_cmd_set_logical_link_config(adapter,
4224 						       IFLA_VF_LINK_STATE_AUTO,
4225 						       vf+1);
4226 		}
4227 	}
4228 
4229 	if (!old_vfs) {
4230 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4231 		if (status) {
4232 			dev_err(dev, "SRIOV enable failed\n");
4233 			adapter->num_vfs = 0;
4234 			goto err;
4235 		}
4236 	}
4237 
4238 	if (BE3_chip(adapter)) {
4239 		/* On BE3, enable VEB only when SRIOV is enabled */
4240 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4241 					       adapter->if_handle,
4242 					       PORT_FWD_TYPE_VEB, 0);
4243 		if (status)
4244 			goto err;
4245 	}
4246 
4247 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4248 	return 0;
4249 err:
4250 	dev_err(dev, "VF setup failed\n");
4251 	be_vf_clear(adapter);
4252 	return status;
4253 }
4254 
4255 /* Converting function_mode bits on BE3 to SH mc_type enums */
4256 
4257 static u8 be_convert_mc_type(u32 function_mode)
4258 {
4259 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4260 		return vNIC1;
4261 	else if (function_mode & QNQ_MODE)
4262 		return FLEX10;
4263 	else if (function_mode & VNIC_MODE)
4264 		return vNIC2;
4265 	else if (function_mode & UMC_ENABLED)
4266 		return UMC;
4267 	else
4268 		return MC_NONE;
4269 }
4270 
4271 /* On BE2/BE3 FW does not suggest the supported limits */
4272 static void BEx_get_resources(struct be_adapter *adapter,
4273 			      struct be_resources *res)
4274 {
4275 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4276 
4277 	if (be_physfn(adapter))
4278 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4279 	else
4280 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4281 
4282 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4283 
4284 	if (be_is_mc(adapter)) {
4285 		/* Assuming that there are 4 channels per port,
4286 		 * when multi-channel is enabled
4287 		 */
4288 		if (be_is_qnq_mode(adapter))
4289 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4290 		else
4291 			/* In a non-qnq multichannel mode, the pvid
4292 			 * takes up one vlan entry
4293 			 */
4294 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4295 	} else {
4296 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4297 	}
4298 
4299 	res->max_mcast_mac = BE_MAX_MC;
4300 
4301 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4302 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4303 	 *    *only* if it is RSS-capable.
4304 	 */
4305 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4306 	    be_virtfn(adapter) ||
4307 	    (be_is_mc(adapter) &&
4308 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4309 		res->max_tx_qs = 1;
4310 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4311 		struct be_resources super_nic_res = {0};
4312 
4313 		/* On a SuperNIC profile, the driver needs to use the
4314 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4315 		 */
4316 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4317 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4318 					  0);
4319 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4320 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4321 	} else {
4322 		res->max_tx_qs = BE3_MAX_TX_QS;
4323 	}
4324 
4325 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4326 	    !use_sriov && be_physfn(adapter))
4327 		res->max_rss_qs = (adapter->be3_native) ?
4328 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4329 	res->max_rx_qs = res->max_rss_qs + 1;
4330 
4331 	if (be_physfn(adapter))
4332 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4333 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4334 	else
4335 		res->max_evt_qs = 1;
4336 
4337 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4338 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4339 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4340 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4341 }
4342 
4343 static void be_setup_init(struct be_adapter *adapter)
4344 {
4345 	adapter->vlan_prio_bmap = 0xff;
4346 	adapter->phy.link_speed = -1;
4347 	adapter->if_handle = -1;
4348 	adapter->be3_native = false;
4349 	adapter->if_flags = 0;
4350 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4351 	if (be_physfn(adapter))
4352 		adapter->cmd_privileges = MAX_PRIVILEGES;
4353 	else
4354 		adapter->cmd_privileges = MIN_PRIVILEGES;
4355 }
4356 
4357 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4358  * However, this HW limitation is not exposed to the host via any SLI cmd.
4359  * As a result, in the case of SRIOV and in particular multi-partition configs
4360  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4361  * for distribution between the VFs. This self-imposed limit will determine the
4362  * no: of VFs for which RSS can be enabled.
4363  */
4364 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4365 {
4366 	struct be_port_resources port_res = {0};
4367 	u8 rss_tables_on_port;
4368 	u16 max_vfs = be_max_vfs(adapter);
4369 
4370 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4371 				  RESOURCE_LIMITS, 0);
4372 
4373 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4374 
4375 	/* Each PF Pool's RSS Tables limit =
4376 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4377 	 */
4378 	adapter->pool_res.max_rss_tables =
4379 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4380 }
4381 
4382 static int be_get_sriov_config(struct be_adapter *adapter)
4383 {
4384 	struct be_resources res = {0};
4385 	int max_vfs, old_vfs;
4386 
4387 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4388 				  RESOURCE_LIMITS, 0);
4389 
4390 	/* Some old versions of BE3 FW don't report max_vfs value */
4391 	if (BE3_chip(adapter) && !res.max_vfs) {
4392 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4393 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4394 	}
4395 
4396 	adapter->pool_res = res;
4397 
4398 	/* If during previous unload of the driver, the VFs were not disabled,
4399 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4400 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4401 	 */
4402 	old_vfs = pci_num_vf(adapter->pdev);
4403 	if (old_vfs) {
4404 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4405 			 old_vfs);
4406 
4407 		adapter->pool_res.max_vfs =
4408 			pci_sriov_get_totalvfs(adapter->pdev);
4409 		adapter->num_vfs = old_vfs;
4410 	}
4411 
4412 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4413 		be_calculate_pf_pool_rss_tables(adapter);
4414 		dev_info(&adapter->pdev->dev,
4415 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4416 			 be_max_pf_pool_rss_tables(adapter));
4417 	}
4418 	return 0;
4419 }
4420 
4421 static void be_alloc_sriov_res(struct be_adapter *adapter)
4422 {
4423 	int old_vfs = pci_num_vf(adapter->pdev);
4424 	struct  be_resources vft_res = {0};
4425 	int status;
4426 
4427 	be_get_sriov_config(adapter);
4428 
4429 	if (!old_vfs)
4430 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4431 
4432 	/* When the HW is in SRIOV capable configuration, the PF-pool
4433 	 * resources are given to PF during driver load, if there are no
4434 	 * old VFs. This facility is not available in BE3 FW.
4435 	 * Also, this is done by FW in Lancer chip.
4436 	 */
4437 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4438 		be_calculate_vf_res(adapter, 0, &vft_res);
4439 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4440 						 &vft_res);
4441 		if (status)
4442 			dev_err(&adapter->pdev->dev,
4443 				"Failed to optimize SRIOV resources\n");
4444 	}
4445 }
4446 
4447 static int be_get_resources(struct be_adapter *adapter)
4448 {
4449 	struct device *dev = &adapter->pdev->dev;
4450 	struct be_resources res = {0};
4451 	int status;
4452 
4453 	/* For Lancer, SH etc read per-function resource limits from FW.
4454 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4455 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4456 	 */
4457 	if (BEx_chip(adapter)) {
4458 		BEx_get_resources(adapter, &res);
4459 	} else {
4460 		status = be_cmd_get_func_config(adapter, &res);
4461 		if (status)
4462 			return status;
4463 
4464 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4465 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4466 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4467 			res.max_rss_qs -= 1;
4468 	}
4469 
4470 	/* If RoCE is supported stash away half the EQs for RoCE */
4471 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4472 				res.max_evt_qs / 2 : res.max_evt_qs;
4473 	adapter->res = res;
4474 
4475 	/* If FW supports RSS default queue, then skip creating non-RSS
4476 	 * queue for non-IP traffic.
4477 	 */
4478 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4479 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4480 
4481 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4482 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4483 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4484 		 be_max_vfs(adapter));
4485 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4486 		 be_max_uc(adapter), be_max_mc(adapter),
4487 		 be_max_vlans(adapter));
4488 
4489 	/* Ensure RX and TX queues are created in pairs at init time */
4490 	adapter->cfg_num_rx_irqs =
4491 				min_t(u16, netif_get_num_default_rss_queues(),
4492 				      be_max_qp_irqs(adapter));
4493 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4494 	return 0;
4495 }
4496 
4497 static int be_get_config(struct be_adapter *adapter)
4498 {
4499 	int status, level;
4500 	u16 profile_id;
4501 
4502 	status = be_cmd_get_cntl_attributes(adapter);
4503 	if (status)
4504 		return status;
4505 
4506 	status = be_cmd_query_fw_cfg(adapter);
4507 	if (status)
4508 		return status;
4509 
4510 	if (!lancer_chip(adapter) && be_physfn(adapter))
4511 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4512 
4513 	if (BEx_chip(adapter)) {
4514 		level = be_cmd_get_fw_log_level(adapter);
4515 		adapter->msg_enable =
4516 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4517 	}
4518 
4519 	be_cmd_get_acpi_wol_cap(adapter);
4520 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4521 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4522 
4523 	be_cmd_query_port_name(adapter);
4524 
4525 	if (be_physfn(adapter)) {
4526 		status = be_cmd_get_active_profile(adapter, &profile_id);
4527 		if (!status)
4528 			dev_info(&adapter->pdev->dev,
4529 				 "Using profile 0x%x\n", profile_id);
4530 	}
4531 
4532 	return 0;
4533 }
4534 
4535 static int be_mac_setup(struct be_adapter *adapter)
4536 {
4537 	u8 mac[ETH_ALEN];
4538 	int status;
4539 
4540 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4541 		status = be_cmd_get_perm_mac(adapter, mac);
4542 		if (status)
4543 			return status;
4544 
4545 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4546 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4547 	}
4548 
4549 	return 0;
4550 }
4551 
4552 static void be_schedule_worker(struct be_adapter *adapter)
4553 {
4554 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4555 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4556 }
4557 
4558 static void be_destroy_err_recovery_workq(void)
4559 {
4560 	if (!be_err_recovery_workq)
4561 		return;
4562 
4563 	flush_workqueue(be_err_recovery_workq);
4564 	destroy_workqueue(be_err_recovery_workq);
4565 	be_err_recovery_workq = NULL;
4566 }
4567 
4568 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4569 {
4570 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4571 
4572 	if (!be_err_recovery_workq)
4573 		return;
4574 
4575 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4576 			   msecs_to_jiffies(delay));
4577 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4578 }
4579 
4580 static int be_setup_queues(struct be_adapter *adapter)
4581 {
4582 	struct net_device *netdev = adapter->netdev;
4583 	int status;
4584 
4585 	status = be_evt_queues_create(adapter);
4586 	if (status)
4587 		goto err;
4588 
4589 	status = be_tx_qs_create(adapter);
4590 	if (status)
4591 		goto err;
4592 
4593 	status = be_rx_cqs_create(adapter);
4594 	if (status)
4595 		goto err;
4596 
4597 	status = be_mcc_queues_create(adapter);
4598 	if (status)
4599 		goto err;
4600 
4601 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4602 	if (status)
4603 		goto err;
4604 
4605 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4606 	if (status)
4607 		goto err;
4608 
4609 	return 0;
4610 err:
4611 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4612 	return status;
4613 }
4614 
4615 static int be_if_create(struct be_adapter *adapter)
4616 {
4617 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4618 	u32 cap_flags = be_if_cap_flags(adapter);
4619 	int status;
4620 
4621 	/* alloc required memory for other filtering fields */
4622 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4623 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4624 	if (!adapter->pmac_id)
4625 		return -ENOMEM;
4626 
4627 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4628 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4629 	if (!adapter->mc_list)
4630 		return -ENOMEM;
4631 
4632 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4633 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4634 	if (!adapter->uc_list)
4635 		return -ENOMEM;
4636 
4637 	if (adapter->cfg_num_rx_irqs == 1)
4638 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4639 
4640 	en_flags &= cap_flags;
4641 	/* will enable all the needed filter flags in be_open() */
4642 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4643 				  &adapter->if_handle, 0);
4644 
4645 	if (status)
4646 		return status;
4647 
4648 	return 0;
4649 }
4650 
4651 int be_update_queues(struct be_adapter *adapter)
4652 {
4653 	struct net_device *netdev = adapter->netdev;
4654 	int status;
4655 
4656 	if (netif_running(netdev))
4657 		be_close(netdev);
4658 
4659 	be_cancel_worker(adapter);
4660 
4661 	/* If any vectors have been shared with RoCE we cannot re-program
4662 	 * the MSIx table.
4663 	 */
4664 	if (!adapter->num_msix_roce_vec)
4665 		be_msix_disable(adapter);
4666 
4667 	be_clear_queues(adapter);
4668 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4669 	if (status)
4670 		return status;
4671 
4672 	if (!msix_enabled(adapter)) {
4673 		status = be_msix_enable(adapter);
4674 		if (status)
4675 			return status;
4676 	}
4677 
4678 	status = be_if_create(adapter);
4679 	if (status)
4680 		return status;
4681 
4682 	status = be_setup_queues(adapter);
4683 	if (status)
4684 		return status;
4685 
4686 	be_schedule_worker(adapter);
4687 
4688 	if (netif_running(netdev))
4689 		status = be_open(netdev);
4690 
4691 	return status;
4692 }
4693 
4694 static inline int fw_major_num(const char *fw_ver)
4695 {
4696 	int fw_major = 0, i;
4697 
4698 	i = sscanf(fw_ver, "%d.", &fw_major);
4699 	if (i != 1)
4700 		return 0;
4701 
4702 	return fw_major;
4703 }
4704 
4705 /* If it is error recovery, FLR the PF
4706  * Else if any VFs are already enabled don't FLR the PF
4707  */
4708 static bool be_reset_required(struct be_adapter *adapter)
4709 {
4710 	if (be_error_recovering(adapter))
4711 		return true;
4712 	else
4713 		return pci_num_vf(adapter->pdev) == 0;
4714 }
4715 
4716 /* Wait for the FW to be ready and perform the required initialization */
4717 static int be_func_init(struct be_adapter *adapter)
4718 {
4719 	int status;
4720 
4721 	status = be_fw_wait_ready(adapter);
4722 	if (status)
4723 		return status;
4724 
4725 	/* FW is now ready; clear errors to allow cmds/doorbell */
4726 	be_clear_error(adapter, BE_CLEAR_ALL);
4727 
4728 	if (be_reset_required(adapter)) {
4729 		status = be_cmd_reset_function(adapter);
4730 		if (status)
4731 			return status;
4732 
4733 		/* Wait for interrupts to quiesce after an FLR */
4734 		msleep(100);
4735 	}
4736 
4737 	/* Tell FW we're ready to fire cmds */
4738 	status = be_cmd_fw_init(adapter);
4739 	if (status)
4740 		return status;
4741 
4742 	/* Allow interrupts for other ULPs running on NIC function */
4743 	be_intr_set(adapter, true);
4744 
4745 	return 0;
4746 }
4747 
4748 static int be_setup(struct be_adapter *adapter)
4749 {
4750 	struct device *dev = &adapter->pdev->dev;
4751 	int status;
4752 
4753 	status = be_func_init(adapter);
4754 	if (status)
4755 		return status;
4756 
4757 	be_setup_init(adapter);
4758 
4759 	if (!lancer_chip(adapter))
4760 		be_cmd_req_native_mode(adapter);
4761 
4762 	/* invoke this cmd first to get pf_num and vf_num which are needed
4763 	 * for issuing profile related cmds
4764 	 */
4765 	if (!BEx_chip(adapter)) {
4766 		status = be_cmd_get_func_config(adapter, NULL);
4767 		if (status)
4768 			return status;
4769 	}
4770 
4771 	status = be_get_config(adapter);
4772 	if (status)
4773 		goto err;
4774 
4775 	if (!BE2_chip(adapter) && be_physfn(adapter))
4776 		be_alloc_sriov_res(adapter);
4777 
4778 	status = be_get_resources(adapter);
4779 	if (status)
4780 		goto err;
4781 
4782 	status = be_msix_enable(adapter);
4783 	if (status)
4784 		goto err;
4785 
4786 	/* will enable all the needed filter flags in be_open() */
4787 	status = be_if_create(adapter);
4788 	if (status)
4789 		goto err;
4790 
4791 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4792 	rtnl_lock();
4793 	status = be_setup_queues(adapter);
4794 	rtnl_unlock();
4795 	if (status)
4796 		goto err;
4797 
4798 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4799 
4800 	status = be_mac_setup(adapter);
4801 	if (status)
4802 		goto err;
4803 
4804 	be_cmd_get_fw_ver(adapter);
4805 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4806 
4807 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4808 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4809 			adapter->fw_ver);
4810 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4811 	}
4812 
4813 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4814 					 adapter->rx_fc);
4815 	if (status)
4816 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4817 					&adapter->rx_fc);
4818 
4819 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4820 		 adapter->tx_fc, adapter->rx_fc);
4821 
4822 	if (be_physfn(adapter))
4823 		be_cmd_set_logical_link_config(adapter,
4824 					       IFLA_VF_LINK_STATE_AUTO, 0);
4825 
4826 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4827 	 * confusing a linux bridge or OVS that it might be connected to.
4828 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4829 	 * when SRIOV is not enabled.
4830 	 */
4831 	if (BE3_chip(adapter))
4832 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4833 				      PORT_FWD_TYPE_PASSTHRU, 0);
4834 
4835 	if (adapter->num_vfs)
4836 		be_vf_setup(adapter);
4837 
4838 	status = be_cmd_get_phy_info(adapter);
4839 	if (!status && be_pause_supported(adapter))
4840 		adapter->phy.fc_autoneg = 1;
4841 
4842 	if (be_physfn(adapter) && !lancer_chip(adapter))
4843 		be_cmd_set_features(adapter);
4844 
4845 	be_schedule_worker(adapter);
4846 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4847 	return 0;
4848 err:
4849 	be_clear(adapter);
4850 	return status;
4851 }
4852 
4853 #ifdef CONFIG_NET_POLL_CONTROLLER
4854 static void be_netpoll(struct net_device *netdev)
4855 {
4856 	struct be_adapter *adapter = netdev_priv(netdev);
4857 	struct be_eq_obj *eqo;
4858 	int i;
4859 
4860 	for_all_evt_queues(adapter, eqo, i) {
4861 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4862 		napi_schedule(&eqo->napi);
4863 	}
4864 }
4865 #endif
4866 
4867 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4868 {
4869 	const struct firmware *fw;
4870 	int status;
4871 
4872 	if (!netif_running(adapter->netdev)) {
4873 		dev_err(&adapter->pdev->dev,
4874 			"Firmware load not allowed (interface is down)\n");
4875 		return -ENETDOWN;
4876 	}
4877 
4878 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4879 	if (status)
4880 		goto fw_exit;
4881 
4882 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4883 
4884 	if (lancer_chip(adapter))
4885 		status = lancer_fw_download(adapter, fw);
4886 	else
4887 		status = be_fw_download(adapter, fw);
4888 
4889 	if (!status)
4890 		be_cmd_get_fw_ver(adapter);
4891 
4892 fw_exit:
4893 	release_firmware(fw);
4894 	return status;
4895 }
4896 
4897 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4898 				 u16 flags)
4899 {
4900 	struct be_adapter *adapter = netdev_priv(dev);
4901 	struct nlattr *attr, *br_spec;
4902 	int rem;
4903 	int status = 0;
4904 	u16 mode = 0;
4905 
4906 	if (!sriov_enabled(adapter))
4907 		return -EOPNOTSUPP;
4908 
4909 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4910 	if (!br_spec)
4911 		return -EINVAL;
4912 
4913 	nla_for_each_nested(attr, br_spec, rem) {
4914 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4915 			continue;
4916 
4917 		if (nla_len(attr) < sizeof(mode))
4918 			return -EINVAL;
4919 
4920 		mode = nla_get_u16(attr);
4921 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4922 			return -EOPNOTSUPP;
4923 
4924 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4925 			return -EINVAL;
4926 
4927 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4928 					       adapter->if_handle,
4929 					       mode == BRIDGE_MODE_VEPA ?
4930 					       PORT_FWD_TYPE_VEPA :
4931 					       PORT_FWD_TYPE_VEB, 0);
4932 		if (status)
4933 			goto err;
4934 
4935 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4936 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4937 
4938 		return status;
4939 	}
4940 err:
4941 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4942 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4943 
4944 	return status;
4945 }
4946 
4947 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4948 				 struct net_device *dev, u32 filter_mask,
4949 				 int nlflags)
4950 {
4951 	struct be_adapter *adapter = netdev_priv(dev);
4952 	int status = 0;
4953 	u8 hsw_mode;
4954 
4955 	/* BE and Lancer chips support VEB mode only */
4956 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4957 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4958 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4959 			return 0;
4960 		hsw_mode = PORT_FWD_TYPE_VEB;
4961 	} else {
4962 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4963 					       adapter->if_handle, &hsw_mode,
4964 					       NULL);
4965 		if (status)
4966 			return 0;
4967 
4968 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4969 			return 0;
4970 	}
4971 
4972 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4973 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4974 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4975 				       0, 0, nlflags, filter_mask, NULL);
4976 }
4977 
4978 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4979 					 void (*func)(struct work_struct *))
4980 {
4981 	struct be_cmd_work *work;
4982 
4983 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4984 	if (!work) {
4985 		dev_err(&adapter->pdev->dev,
4986 			"be_work memory allocation failed\n");
4987 		return NULL;
4988 	}
4989 
4990 	INIT_WORK(&work->work, func);
4991 	work->adapter = adapter;
4992 	return work;
4993 }
4994 
4995 /* VxLAN offload Notes:
4996  *
4997  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4998  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4999  * is expected to work across all types of IP tunnels once exported. Skyhawk
5000  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5001  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5002  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5003  * those other tunnels are unexported on the fly through ndo_features_check().
5004  *
5005  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5006  * adds more than one port, disable offloads and don't re-enable them again
5007  * until after all the tunnels are removed.
5008  */
5009 static void be_work_add_vxlan_port(struct work_struct *work)
5010 {
5011 	struct be_cmd_work *cmd_work =
5012 				container_of(work, struct be_cmd_work, work);
5013 	struct be_adapter *adapter = cmd_work->adapter;
5014 	struct net_device *netdev = adapter->netdev;
5015 	struct device *dev = &adapter->pdev->dev;
5016 	__be16 port = cmd_work->info.vxlan_port;
5017 	int status;
5018 
5019 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5020 		adapter->vxlan_port_aliases++;
5021 		goto done;
5022 	}
5023 
5024 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5025 		dev_info(dev,
5026 			 "Only one UDP port supported for VxLAN offloads\n");
5027 		dev_info(dev, "Disabling VxLAN offloads\n");
5028 		adapter->vxlan_port_count++;
5029 		goto err;
5030 	}
5031 
5032 	if (adapter->vxlan_port_count++ >= 1)
5033 		goto done;
5034 
5035 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
5036 				     OP_CONVERT_NORMAL_TO_TUNNEL);
5037 	if (status) {
5038 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5039 		goto err;
5040 	}
5041 
5042 	status = be_cmd_set_vxlan_port(adapter, port);
5043 	if (status) {
5044 		dev_warn(dev, "Failed to add VxLAN port\n");
5045 		goto err;
5046 	}
5047 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5048 	adapter->vxlan_port = port;
5049 
5050 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5051 				   NETIF_F_TSO | NETIF_F_TSO6 |
5052 				   NETIF_F_GSO_UDP_TUNNEL;
5053 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5054 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5055 
5056 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5057 		 be16_to_cpu(port));
5058 	goto done;
5059 err:
5060 	be_disable_vxlan_offloads(adapter);
5061 done:
5062 	kfree(cmd_work);
5063 }
5064 
5065 static void be_work_del_vxlan_port(struct work_struct *work)
5066 {
5067 	struct be_cmd_work *cmd_work =
5068 				container_of(work, struct be_cmd_work, work);
5069 	struct be_adapter *adapter = cmd_work->adapter;
5070 	__be16 port = cmd_work->info.vxlan_port;
5071 
5072 	if (adapter->vxlan_port != port)
5073 		goto done;
5074 
5075 	if (adapter->vxlan_port_aliases) {
5076 		adapter->vxlan_port_aliases--;
5077 		goto out;
5078 	}
5079 
5080 	be_disable_vxlan_offloads(adapter);
5081 
5082 	dev_info(&adapter->pdev->dev,
5083 		 "Disabled VxLAN offloads for UDP port %d\n",
5084 		 be16_to_cpu(port));
5085 done:
5086 	adapter->vxlan_port_count--;
5087 out:
5088 	kfree(cmd_work);
5089 }
5090 
5091 static void be_cfg_vxlan_port(struct net_device *netdev,
5092 			      struct udp_tunnel_info *ti,
5093 			      void (*func)(struct work_struct *))
5094 {
5095 	struct be_adapter *adapter = netdev_priv(netdev);
5096 	struct be_cmd_work *cmd_work;
5097 
5098 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5099 		return;
5100 
5101 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5102 		return;
5103 
5104 	cmd_work = be_alloc_work(adapter, func);
5105 	if (cmd_work) {
5106 		cmd_work->info.vxlan_port = ti->port;
5107 		queue_work(be_wq, &cmd_work->work);
5108 	}
5109 }
5110 
5111 static void be_del_vxlan_port(struct net_device *netdev,
5112 			      struct udp_tunnel_info *ti)
5113 {
5114 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5115 }
5116 
5117 static void be_add_vxlan_port(struct net_device *netdev,
5118 			      struct udp_tunnel_info *ti)
5119 {
5120 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5121 }
5122 
5123 static netdev_features_t be_features_check(struct sk_buff *skb,
5124 					   struct net_device *dev,
5125 					   netdev_features_t features)
5126 {
5127 	struct be_adapter *adapter = netdev_priv(dev);
5128 	u8 l4_hdr = 0;
5129 
5130 	/* The code below restricts offload features for some tunneled packets.
5131 	 * Offload features for normal (non tunnel) packets are unchanged.
5132 	 */
5133 	if (!skb->encapsulation ||
5134 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5135 		return features;
5136 
5137 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5138 	 * should disable tunnel offload features if it's not a VxLAN packet,
5139 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5140 	 * allow other tunneled traffic like GRE work fine while VxLAN
5141 	 * offloads are configured in Skyhawk-R.
5142 	 */
5143 	switch (vlan_get_protocol(skb)) {
5144 	case htons(ETH_P_IP):
5145 		l4_hdr = ip_hdr(skb)->protocol;
5146 		break;
5147 	case htons(ETH_P_IPV6):
5148 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5149 		break;
5150 	default:
5151 		return features;
5152 	}
5153 
5154 	if (l4_hdr != IPPROTO_UDP ||
5155 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5156 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5157 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5158 	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5159 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5160 
5161 	return features;
5162 }
5163 
5164 static int be_get_phys_port_id(struct net_device *dev,
5165 			       struct netdev_phys_item_id *ppid)
5166 {
5167 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5168 	struct be_adapter *adapter = netdev_priv(dev);
5169 	u8 *id;
5170 
5171 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5172 		return -ENOSPC;
5173 
5174 	ppid->id[0] = adapter->hba_port_num + 1;
5175 	id = &ppid->id[1];
5176 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5177 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5178 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5179 
5180 	ppid->id_len = id_len;
5181 
5182 	return 0;
5183 }
5184 
5185 static void be_set_rx_mode(struct net_device *dev)
5186 {
5187 	struct be_adapter *adapter = netdev_priv(dev);
5188 	struct be_cmd_work *work;
5189 
5190 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5191 	if (work)
5192 		queue_work(be_wq, &work->work);
5193 }
5194 
5195 static const struct net_device_ops be_netdev_ops = {
5196 	.ndo_open		= be_open,
5197 	.ndo_stop		= be_close,
5198 	.ndo_start_xmit		= be_xmit,
5199 	.ndo_set_rx_mode	= be_set_rx_mode,
5200 	.ndo_set_mac_address	= be_mac_addr_set,
5201 	.ndo_get_stats64	= be_get_stats64,
5202 	.ndo_validate_addr	= eth_validate_addr,
5203 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5204 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5205 	.ndo_set_vf_mac		= be_set_vf_mac,
5206 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5207 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5208 	.ndo_get_vf_config	= be_get_vf_config,
5209 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5210 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5211 #ifdef CONFIG_NET_POLL_CONTROLLER
5212 	.ndo_poll_controller	= be_netpoll,
5213 #endif
5214 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5215 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5216 #ifdef CONFIG_NET_RX_BUSY_POLL
5217 	.ndo_busy_poll		= be_busy_poll,
5218 #endif
5219 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5220 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5221 	.ndo_features_check	= be_features_check,
5222 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5223 };
5224 
5225 static void be_netdev_init(struct net_device *netdev)
5226 {
5227 	struct be_adapter *adapter = netdev_priv(netdev);
5228 
5229 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5230 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5231 		NETIF_F_HW_VLAN_CTAG_TX;
5232 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5233 		netdev->hw_features |= NETIF_F_RXHASH;
5234 
5235 	netdev->features |= netdev->hw_features |
5236 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5237 
5238 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5239 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5240 
5241 	netdev->priv_flags |= IFF_UNICAST_FLT;
5242 
5243 	netdev->flags |= IFF_MULTICAST;
5244 
5245 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5246 
5247 	netdev->netdev_ops = &be_netdev_ops;
5248 
5249 	netdev->ethtool_ops = &be_ethtool_ops;
5250 
5251 	/* MTU range: 256 - 9000 */
5252 	netdev->min_mtu = BE_MIN_MTU;
5253 	netdev->max_mtu = BE_MAX_MTU;
5254 }
5255 
5256 static void be_cleanup(struct be_adapter *adapter)
5257 {
5258 	struct net_device *netdev = adapter->netdev;
5259 
5260 	rtnl_lock();
5261 	netif_device_detach(netdev);
5262 	if (netif_running(netdev))
5263 		be_close(netdev);
5264 	rtnl_unlock();
5265 
5266 	be_clear(adapter);
5267 }
5268 
5269 static int be_resume(struct be_adapter *adapter)
5270 {
5271 	struct net_device *netdev = adapter->netdev;
5272 	int status;
5273 
5274 	status = be_setup(adapter);
5275 	if (status)
5276 		return status;
5277 
5278 	rtnl_lock();
5279 	if (netif_running(netdev))
5280 		status = be_open(netdev);
5281 	rtnl_unlock();
5282 
5283 	if (status)
5284 		return status;
5285 
5286 	netif_device_attach(netdev);
5287 
5288 	return 0;
5289 }
5290 
5291 static void be_soft_reset(struct be_adapter *adapter)
5292 {
5293 	u32 val;
5294 
5295 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5296 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5297 	val |= SLIPORT_SOFTRESET_SR_MASK;
5298 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5299 }
5300 
5301 static bool be_err_is_recoverable(struct be_adapter *adapter)
5302 {
5303 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5304 	unsigned long initial_idle_time =
5305 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5306 	unsigned long recovery_interval =
5307 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5308 	u16 ue_err_code;
5309 	u32 val;
5310 
5311 	val = be_POST_stage_get(adapter);
5312 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5313 		return false;
5314 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5315 	if (ue_err_code == 0)
5316 		return false;
5317 
5318 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5319 		ue_err_code);
5320 
5321 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5322 		dev_err(&adapter->pdev->dev,
5323 			"Cannot recover within %lu sec from driver load\n",
5324 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5325 		return false;
5326 	}
5327 
5328 	if (err_rec->last_recovery_time &&
5329 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5330 		dev_err(&adapter->pdev->dev,
5331 			"Cannot recover within %lu sec from last recovery\n",
5332 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5333 		return false;
5334 	}
5335 
5336 	if (ue_err_code == err_rec->last_err_code) {
5337 		dev_err(&adapter->pdev->dev,
5338 			"Cannot recover from a consecutive TPE error\n");
5339 		return false;
5340 	}
5341 
5342 	err_rec->last_recovery_time = jiffies;
5343 	err_rec->last_err_code = ue_err_code;
5344 	return true;
5345 }
5346 
5347 static int be_tpe_recover(struct be_adapter *adapter)
5348 {
5349 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5350 	int status = -EAGAIN;
5351 	u32 val;
5352 
5353 	switch (err_rec->recovery_state) {
5354 	case ERR_RECOVERY_ST_NONE:
5355 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5356 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5357 		break;
5358 
5359 	case ERR_RECOVERY_ST_DETECT:
5360 		val = be_POST_stage_get(adapter);
5361 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5362 		    POST_STAGE_RECOVERABLE_ERR) {
5363 			dev_err(&adapter->pdev->dev,
5364 				"Unrecoverable HW error detected: 0x%x\n", val);
5365 			status = -EINVAL;
5366 			err_rec->resched_delay = 0;
5367 			break;
5368 		}
5369 
5370 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5371 
5372 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5373 		 * milliseconds before it checks for final error status in
5374 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5375 		 * If it does, then PF0 initiates a Soft Reset.
5376 		 */
5377 		if (adapter->pf_num == 0) {
5378 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5379 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5380 					ERR_RECOVERY_UE_DETECT_DURATION;
5381 			break;
5382 		}
5383 
5384 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5385 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5386 					ERR_RECOVERY_UE_DETECT_DURATION;
5387 		break;
5388 
5389 	case ERR_RECOVERY_ST_RESET:
5390 		if (!be_err_is_recoverable(adapter)) {
5391 			dev_err(&adapter->pdev->dev,
5392 				"Failed to meet recovery criteria\n");
5393 			status = -EIO;
5394 			err_rec->resched_delay = 0;
5395 			break;
5396 		}
5397 		be_soft_reset(adapter);
5398 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5399 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5400 					err_rec->ue_to_reset_time;
5401 		break;
5402 
5403 	case ERR_RECOVERY_ST_PRE_POLL:
5404 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5405 		err_rec->resched_delay = 0;
5406 		status = 0;			/* done */
5407 		break;
5408 
5409 	default:
5410 		status = -EINVAL;
5411 		err_rec->resched_delay = 0;
5412 		break;
5413 	}
5414 
5415 	return status;
5416 }
5417 
5418 static int be_err_recover(struct be_adapter *adapter)
5419 {
5420 	int status;
5421 
5422 	if (!lancer_chip(adapter)) {
5423 		if (!adapter->error_recovery.recovery_supported ||
5424 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5425 			return -EIO;
5426 		status = be_tpe_recover(adapter);
5427 		if (status)
5428 			goto err;
5429 	}
5430 
5431 	/* Wait for adapter to reach quiescent state before
5432 	 * destroying queues
5433 	 */
5434 	status = be_fw_wait_ready(adapter);
5435 	if (status)
5436 		goto err;
5437 
5438 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5439 
5440 	be_cleanup(adapter);
5441 
5442 	status = be_resume(adapter);
5443 	if (status)
5444 		goto err;
5445 
5446 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5447 
5448 err:
5449 	return status;
5450 }
5451 
5452 static void be_err_detection_task(struct work_struct *work)
5453 {
5454 	struct be_error_recovery *err_rec =
5455 			container_of(work, struct be_error_recovery,
5456 				     err_detection_work.work);
5457 	struct be_adapter *adapter =
5458 			container_of(err_rec, struct be_adapter,
5459 				     error_recovery);
5460 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5461 	struct device *dev = &adapter->pdev->dev;
5462 	int recovery_status;
5463 
5464 	be_detect_error(adapter);
5465 	if (!be_check_error(adapter, BE_ERROR_HW))
5466 		goto reschedule_task;
5467 
5468 	recovery_status = be_err_recover(adapter);
5469 	if (!recovery_status) {
5470 		err_rec->recovery_retries = 0;
5471 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5472 		dev_info(dev, "Adapter recovery successful\n");
5473 		goto reschedule_task;
5474 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5475 		/* BEx/SH recovery state machine */
5476 		if (adapter->pf_num == 0 &&
5477 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5478 			dev_err(&adapter->pdev->dev,
5479 				"Adapter recovery in progress\n");
5480 		resched_delay = err_rec->resched_delay;
5481 		goto reschedule_task;
5482 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5483 		/* For VFs, check if PF have allocated resources
5484 		 * every second.
5485 		 */
5486 		dev_err(dev, "Re-trying adapter recovery\n");
5487 		goto reschedule_task;
5488 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5489 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5490 		/* In case of another error during recovery, it takes 30 sec
5491 		 * for adapter to come out of error. Retry error recovery after
5492 		 * this time interval.
5493 		 */
5494 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5495 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5496 		goto reschedule_task;
5497 	} else {
5498 		dev_err(dev, "Adapter recovery failed\n");
5499 		dev_err(dev, "Please reboot server to recover\n");
5500 	}
5501 
5502 	return;
5503 
5504 reschedule_task:
5505 	be_schedule_err_detection(adapter, resched_delay);
5506 }
5507 
5508 static void be_log_sfp_info(struct be_adapter *adapter)
5509 {
5510 	int status;
5511 
5512 	status = be_cmd_query_sfp_info(adapter);
5513 	if (!status) {
5514 		dev_err(&adapter->pdev->dev,
5515 			"Port %c: %s Vendor: %s part no: %s",
5516 			adapter->port_name,
5517 			be_misconfig_evt_port_state[adapter->phy_state],
5518 			adapter->phy.vendor_name,
5519 			adapter->phy.vendor_pn);
5520 	}
5521 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5522 }
5523 
5524 static void be_worker(struct work_struct *work)
5525 {
5526 	struct be_adapter *adapter =
5527 		container_of(work, struct be_adapter, work.work);
5528 	struct be_rx_obj *rxo;
5529 	int i;
5530 
5531 	if (be_physfn(adapter) &&
5532 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5533 		be_cmd_get_die_temperature(adapter);
5534 
5535 	/* when interrupts are not yet enabled, just reap any pending
5536 	 * mcc completions
5537 	 */
5538 	if (!netif_running(adapter->netdev)) {
5539 		local_bh_disable();
5540 		be_process_mcc(adapter);
5541 		local_bh_enable();
5542 		goto reschedule;
5543 	}
5544 
5545 	if (!adapter->stats_cmd_sent) {
5546 		if (lancer_chip(adapter))
5547 			lancer_cmd_get_pport_stats(adapter,
5548 						   &adapter->stats_cmd);
5549 		else
5550 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5551 	}
5552 
5553 	for_all_rx_queues(adapter, rxo, i) {
5554 		/* Replenish RX-queues starved due to memory
5555 		 * allocation failures.
5556 		 */
5557 		if (rxo->rx_post_starved)
5558 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5559 	}
5560 
5561 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5562 	if (!skyhawk_chip(adapter))
5563 		be_eqd_update(adapter, false);
5564 
5565 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5566 		be_log_sfp_info(adapter);
5567 
5568 reschedule:
5569 	adapter->work_counter++;
5570 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5571 }
5572 
5573 static void be_unmap_pci_bars(struct be_adapter *adapter)
5574 {
5575 	if (adapter->csr)
5576 		pci_iounmap(adapter->pdev, adapter->csr);
5577 	if (adapter->db)
5578 		pci_iounmap(adapter->pdev, adapter->db);
5579 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5580 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5581 }
5582 
5583 static int db_bar(struct be_adapter *adapter)
5584 {
5585 	if (lancer_chip(adapter) || be_virtfn(adapter))
5586 		return 0;
5587 	else
5588 		return 4;
5589 }
5590 
5591 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5592 {
5593 	if (skyhawk_chip(adapter)) {
5594 		adapter->roce_db.size = 4096;
5595 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5596 							      db_bar(adapter));
5597 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5598 							       db_bar(adapter));
5599 	}
5600 	return 0;
5601 }
5602 
5603 static int be_map_pci_bars(struct be_adapter *adapter)
5604 {
5605 	struct pci_dev *pdev = adapter->pdev;
5606 	u8 __iomem *addr;
5607 	u32 sli_intf;
5608 
5609 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5610 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5611 				SLI_INTF_FAMILY_SHIFT;
5612 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5613 
5614 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5615 		adapter->csr = pci_iomap(pdev, 2, 0);
5616 		if (!adapter->csr)
5617 			return -ENOMEM;
5618 	}
5619 
5620 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5621 	if (!addr)
5622 		goto pci_map_err;
5623 	adapter->db = addr;
5624 
5625 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5626 		if (be_physfn(adapter)) {
5627 			/* PCICFG is the 2nd BAR in BE2 */
5628 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5629 			if (!addr)
5630 				goto pci_map_err;
5631 			adapter->pcicfg = addr;
5632 			adapter->pcicfg_mapped = true;
5633 		} else {
5634 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5635 			adapter->pcicfg_mapped = false;
5636 		}
5637 	}
5638 
5639 	be_roce_map_pci_bars(adapter);
5640 	return 0;
5641 
5642 pci_map_err:
5643 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5644 	be_unmap_pci_bars(adapter);
5645 	return -ENOMEM;
5646 }
5647 
5648 static void be_drv_cleanup(struct be_adapter *adapter)
5649 {
5650 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5651 	struct device *dev = &adapter->pdev->dev;
5652 
5653 	if (mem->va)
5654 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5655 
5656 	mem = &adapter->rx_filter;
5657 	if (mem->va)
5658 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5659 
5660 	mem = &adapter->stats_cmd;
5661 	if (mem->va)
5662 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5663 }
5664 
5665 /* Allocate and initialize various fields in be_adapter struct */
5666 static int be_drv_init(struct be_adapter *adapter)
5667 {
5668 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5669 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5670 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5671 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5672 	struct device *dev = &adapter->pdev->dev;
5673 	int status = 0;
5674 
5675 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5676 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5677 						 &mbox_mem_alloc->dma,
5678 						 GFP_KERNEL);
5679 	if (!mbox_mem_alloc->va)
5680 		return -ENOMEM;
5681 
5682 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5683 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5684 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5685 
5686 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5687 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5688 					    &rx_filter->dma, GFP_KERNEL);
5689 	if (!rx_filter->va) {
5690 		status = -ENOMEM;
5691 		goto free_mbox;
5692 	}
5693 
5694 	if (lancer_chip(adapter))
5695 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5696 	else if (BE2_chip(adapter))
5697 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5698 	else if (BE3_chip(adapter))
5699 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5700 	else
5701 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5702 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5703 					    &stats_cmd->dma, GFP_KERNEL);
5704 	if (!stats_cmd->va) {
5705 		status = -ENOMEM;
5706 		goto free_rx_filter;
5707 	}
5708 
5709 	mutex_init(&adapter->mbox_lock);
5710 	mutex_init(&adapter->mcc_lock);
5711 	mutex_init(&adapter->rx_filter_lock);
5712 	spin_lock_init(&adapter->mcc_cq_lock);
5713 	init_completion(&adapter->et_cmd_compl);
5714 
5715 	pci_save_state(adapter->pdev);
5716 
5717 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5718 
5719 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5720 	adapter->error_recovery.resched_delay = 0;
5721 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5722 			  be_err_detection_task);
5723 
5724 	adapter->rx_fc = true;
5725 	adapter->tx_fc = true;
5726 
5727 	/* Must be a power of 2 or else MODULO will BUG_ON */
5728 	adapter->be_get_temp_freq = 64;
5729 
5730 	return 0;
5731 
5732 free_rx_filter:
5733 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5734 free_mbox:
5735 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5736 			  mbox_mem_alloc->dma);
5737 	return status;
5738 }
5739 
5740 static void be_remove(struct pci_dev *pdev)
5741 {
5742 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5743 
5744 	if (!adapter)
5745 		return;
5746 
5747 	be_roce_dev_remove(adapter);
5748 	be_intr_set(adapter, false);
5749 
5750 	be_cancel_err_detection(adapter);
5751 
5752 	unregister_netdev(adapter->netdev);
5753 
5754 	be_clear(adapter);
5755 
5756 	if (!pci_vfs_assigned(adapter->pdev))
5757 		be_cmd_reset_function(adapter);
5758 
5759 	/* tell fw we're done with firing cmds */
5760 	be_cmd_fw_clean(adapter);
5761 
5762 	be_unmap_pci_bars(adapter);
5763 	be_drv_cleanup(adapter);
5764 
5765 	pci_disable_pcie_error_reporting(pdev);
5766 
5767 	pci_release_regions(pdev);
5768 	pci_disable_device(pdev);
5769 
5770 	free_netdev(adapter->netdev);
5771 }
5772 
5773 static ssize_t be_hwmon_show_temp(struct device *dev,
5774 				  struct device_attribute *dev_attr,
5775 				  char *buf)
5776 {
5777 	struct be_adapter *adapter = dev_get_drvdata(dev);
5778 
5779 	/* Unit: millidegree Celsius */
5780 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5781 		return -EIO;
5782 	else
5783 		return sprintf(buf, "%u\n",
5784 			       adapter->hwmon_info.be_on_die_temp * 1000);
5785 }
5786 
5787 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5788 			  be_hwmon_show_temp, NULL, 1);
5789 
5790 static struct attribute *be_hwmon_attrs[] = {
5791 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5792 	NULL
5793 };
5794 
5795 ATTRIBUTE_GROUPS(be_hwmon);
5796 
5797 static char *mc_name(struct be_adapter *adapter)
5798 {
5799 	char *str = "";	/* default */
5800 
5801 	switch (adapter->mc_type) {
5802 	case UMC:
5803 		str = "UMC";
5804 		break;
5805 	case FLEX10:
5806 		str = "FLEX10";
5807 		break;
5808 	case vNIC1:
5809 		str = "vNIC-1";
5810 		break;
5811 	case nPAR:
5812 		str = "nPAR";
5813 		break;
5814 	case UFP:
5815 		str = "UFP";
5816 		break;
5817 	case vNIC2:
5818 		str = "vNIC-2";
5819 		break;
5820 	default:
5821 		str = "";
5822 	}
5823 
5824 	return str;
5825 }
5826 
5827 static inline char *func_name(struct be_adapter *adapter)
5828 {
5829 	return be_physfn(adapter) ? "PF" : "VF";
5830 }
5831 
5832 static inline char *nic_name(struct pci_dev *pdev)
5833 {
5834 	switch (pdev->device) {
5835 	case OC_DEVICE_ID1:
5836 		return OC_NAME;
5837 	case OC_DEVICE_ID2:
5838 		return OC_NAME_BE;
5839 	case OC_DEVICE_ID3:
5840 	case OC_DEVICE_ID4:
5841 		return OC_NAME_LANCER;
5842 	case BE_DEVICE_ID2:
5843 		return BE3_NAME;
5844 	case OC_DEVICE_ID5:
5845 	case OC_DEVICE_ID6:
5846 		return OC_NAME_SH;
5847 	default:
5848 		return BE_NAME;
5849 	}
5850 }
5851 
5852 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5853 {
5854 	struct be_adapter *adapter;
5855 	struct net_device *netdev;
5856 	int status = 0;
5857 
5858 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5859 
5860 	status = pci_enable_device(pdev);
5861 	if (status)
5862 		goto do_none;
5863 
5864 	status = pci_request_regions(pdev, DRV_NAME);
5865 	if (status)
5866 		goto disable_dev;
5867 	pci_set_master(pdev);
5868 
5869 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5870 	if (!netdev) {
5871 		status = -ENOMEM;
5872 		goto rel_reg;
5873 	}
5874 	adapter = netdev_priv(netdev);
5875 	adapter->pdev = pdev;
5876 	pci_set_drvdata(pdev, adapter);
5877 	adapter->netdev = netdev;
5878 	SET_NETDEV_DEV(netdev, &pdev->dev);
5879 
5880 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5881 	if (!status) {
5882 		netdev->features |= NETIF_F_HIGHDMA;
5883 	} else {
5884 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5885 		if (status) {
5886 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5887 			goto free_netdev;
5888 		}
5889 	}
5890 
5891 	status = pci_enable_pcie_error_reporting(pdev);
5892 	if (!status)
5893 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5894 
5895 	status = be_map_pci_bars(adapter);
5896 	if (status)
5897 		goto free_netdev;
5898 
5899 	status = be_drv_init(adapter);
5900 	if (status)
5901 		goto unmap_bars;
5902 
5903 	status = be_setup(adapter);
5904 	if (status)
5905 		goto drv_cleanup;
5906 
5907 	be_netdev_init(netdev);
5908 	status = register_netdev(netdev);
5909 	if (status != 0)
5910 		goto unsetup;
5911 
5912 	be_roce_dev_add(adapter);
5913 
5914 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5915 	adapter->error_recovery.probe_time = jiffies;
5916 
5917 	/* On Die temperature not supported for VF. */
5918 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5919 		adapter->hwmon_info.hwmon_dev =
5920 			devm_hwmon_device_register_with_groups(&pdev->dev,
5921 							       DRV_NAME,
5922 							       adapter,
5923 							       be_hwmon_groups);
5924 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5925 	}
5926 
5927 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5928 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5929 
5930 	return 0;
5931 
5932 unsetup:
5933 	be_clear(adapter);
5934 drv_cleanup:
5935 	be_drv_cleanup(adapter);
5936 unmap_bars:
5937 	be_unmap_pci_bars(adapter);
5938 free_netdev:
5939 	free_netdev(netdev);
5940 rel_reg:
5941 	pci_release_regions(pdev);
5942 disable_dev:
5943 	pci_disable_device(pdev);
5944 do_none:
5945 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5946 	return status;
5947 }
5948 
5949 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5950 {
5951 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5952 
5953 	be_intr_set(adapter, false);
5954 	be_cancel_err_detection(adapter);
5955 
5956 	be_cleanup(adapter);
5957 
5958 	pci_save_state(pdev);
5959 	pci_disable_device(pdev);
5960 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5961 	return 0;
5962 }
5963 
5964 static int be_pci_resume(struct pci_dev *pdev)
5965 {
5966 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5967 	int status = 0;
5968 
5969 	status = pci_enable_device(pdev);
5970 	if (status)
5971 		return status;
5972 
5973 	pci_restore_state(pdev);
5974 
5975 	status = be_resume(adapter);
5976 	if (status)
5977 		return status;
5978 
5979 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5980 
5981 	return 0;
5982 }
5983 
5984 /*
5985  * An FLR will stop BE from DMAing any data.
5986  */
5987 static void be_shutdown(struct pci_dev *pdev)
5988 {
5989 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5990 
5991 	if (!adapter)
5992 		return;
5993 
5994 	be_roce_dev_shutdown(adapter);
5995 	cancel_delayed_work_sync(&adapter->work);
5996 	be_cancel_err_detection(adapter);
5997 
5998 	netif_device_detach(adapter->netdev);
5999 
6000 	be_cmd_reset_function(adapter);
6001 
6002 	pci_disable_device(pdev);
6003 }
6004 
6005 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6006 					    pci_channel_state_t state)
6007 {
6008 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6009 
6010 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6011 
6012 	be_roce_dev_remove(adapter);
6013 
6014 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6015 		be_set_error(adapter, BE_ERROR_EEH);
6016 
6017 		be_cancel_err_detection(adapter);
6018 
6019 		be_cleanup(adapter);
6020 	}
6021 
6022 	if (state == pci_channel_io_perm_failure)
6023 		return PCI_ERS_RESULT_DISCONNECT;
6024 
6025 	pci_disable_device(pdev);
6026 
6027 	/* The error could cause the FW to trigger a flash debug dump.
6028 	 * Resetting the card while flash dump is in progress
6029 	 * can cause it not to recover; wait for it to finish.
6030 	 * Wait only for first function as it is needed only once per
6031 	 * adapter.
6032 	 */
6033 	if (pdev->devfn == 0)
6034 		ssleep(30);
6035 
6036 	return PCI_ERS_RESULT_NEED_RESET;
6037 }
6038 
6039 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6040 {
6041 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6042 	int status;
6043 
6044 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6045 
6046 	status = pci_enable_device(pdev);
6047 	if (status)
6048 		return PCI_ERS_RESULT_DISCONNECT;
6049 
6050 	pci_set_master(pdev);
6051 	pci_restore_state(pdev);
6052 
6053 	/* Check if card is ok and fw is ready */
6054 	dev_info(&adapter->pdev->dev,
6055 		 "Waiting for FW to be ready after EEH reset\n");
6056 	status = be_fw_wait_ready(adapter);
6057 	if (status)
6058 		return PCI_ERS_RESULT_DISCONNECT;
6059 
6060 	pci_cleanup_aer_uncorrect_error_status(pdev);
6061 	be_clear_error(adapter, BE_CLEAR_ALL);
6062 	return PCI_ERS_RESULT_RECOVERED;
6063 }
6064 
6065 static void be_eeh_resume(struct pci_dev *pdev)
6066 {
6067 	int status = 0;
6068 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6069 
6070 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6071 
6072 	pci_save_state(pdev);
6073 
6074 	status = be_resume(adapter);
6075 	if (status)
6076 		goto err;
6077 
6078 	be_roce_dev_add(adapter);
6079 
6080 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6081 	return;
6082 err:
6083 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6084 }
6085 
6086 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6087 {
6088 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6089 	struct be_resources vft_res = {0};
6090 	int status;
6091 
6092 	if (!num_vfs)
6093 		be_vf_clear(adapter);
6094 
6095 	adapter->num_vfs = num_vfs;
6096 
6097 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6098 		dev_warn(&pdev->dev,
6099 			 "Cannot disable VFs while they are assigned\n");
6100 		return -EBUSY;
6101 	}
6102 
6103 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6104 	 * are equally distributed across the max-number of VFs. The user may
6105 	 * request only a subset of the max-vfs to be enabled.
6106 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6107 	 * each VF will have access to more number of resources.
6108 	 * This facility is not available in BE3 FW.
6109 	 * Also, this is done by FW in Lancer chip.
6110 	 */
6111 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6112 		be_calculate_vf_res(adapter, adapter->num_vfs,
6113 				    &vft_res);
6114 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6115 						 adapter->num_vfs, &vft_res);
6116 		if (status)
6117 			dev_err(&pdev->dev,
6118 				"Failed to optimize SR-IOV resources\n");
6119 	}
6120 
6121 	status = be_get_resources(adapter);
6122 	if (status)
6123 		return be_cmd_status(status);
6124 
6125 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6126 	rtnl_lock();
6127 	status = be_update_queues(adapter);
6128 	rtnl_unlock();
6129 	if (status)
6130 		return be_cmd_status(status);
6131 
6132 	if (adapter->num_vfs)
6133 		status = be_vf_setup(adapter);
6134 
6135 	if (!status)
6136 		return adapter->num_vfs;
6137 
6138 	return 0;
6139 }
6140 
6141 static const struct pci_error_handlers be_eeh_handlers = {
6142 	.error_detected = be_eeh_err_detected,
6143 	.slot_reset = be_eeh_reset,
6144 	.resume = be_eeh_resume,
6145 };
6146 
6147 static struct pci_driver be_driver = {
6148 	.name = DRV_NAME,
6149 	.id_table = be_dev_ids,
6150 	.probe = be_probe,
6151 	.remove = be_remove,
6152 	.suspend = be_suspend,
6153 	.resume = be_pci_resume,
6154 	.shutdown = be_shutdown,
6155 	.sriov_configure = be_pci_sriov_configure,
6156 	.err_handler = &be_eeh_handlers
6157 };
6158 
6159 static int __init be_init_module(void)
6160 {
6161 	int status;
6162 
6163 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6164 	    rx_frag_size != 2048) {
6165 		printk(KERN_WARNING DRV_NAME
6166 			" : Module param rx_frag_size must be 2048/4096/8192."
6167 			" Using 2048\n");
6168 		rx_frag_size = 2048;
6169 	}
6170 
6171 	if (num_vfs > 0) {
6172 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6173 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6174 	}
6175 
6176 	be_wq = create_singlethread_workqueue("be_wq");
6177 	if (!be_wq) {
6178 		pr_warn(DRV_NAME "workqueue creation failed\n");
6179 		return -1;
6180 	}
6181 
6182 	be_err_recovery_workq =
6183 		create_singlethread_workqueue("be_err_recover");
6184 	if (!be_err_recovery_workq)
6185 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6186 
6187 	status = pci_register_driver(&be_driver);
6188 	if (status) {
6189 		destroy_workqueue(be_wq);
6190 		be_destroy_err_recovery_workq();
6191 	}
6192 	return status;
6193 }
6194 module_init(be_init_module);
6195 
6196 static void __exit be_exit_module(void)
6197 {
6198 	pci_unregister_driver(&be_driver);
6199 
6200 	be_destroy_err_recovery_workq();
6201 
6202 	if (be_wq)
6203 		destroy_workqueue(be_wq);
6204 }
6205 module_exit(be_exit_module);
6206