1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279 			/* mac already added, skip addition */
280 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281 			return 0;
282 		}
283 	}
284 
285 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286 			       &adapter->pmac_id[0], 0);
287 }
288 
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291 	int i;
292 
293 	/* Skip deletion if the programmed mac is
294 	 * being used in uc-list
295 	 */
296 	for (i = 0; i < adapter->uc_macs; i++) {
297 		if (adapter->pmac_id[i + 1] == pmac_id)
298 			return;
299 	}
300 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302 
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305 	struct be_adapter *adapter = netdev_priv(netdev);
306 	struct device *dev = &adapter->pdev->dev;
307 	struct sockaddr *addr = p;
308 	int status;
309 	u8 mac[ETH_ALEN];
310 	u32 old_pmac_id = adapter->pmac_id[0];
311 
312 	if (!is_valid_ether_addr(addr->sa_data))
313 		return -EADDRNOTAVAIL;
314 
315 	/* Proceed further only if, User provided MAC is different
316 	 * from active MAC
317 	 */
318 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319 		return 0;
320 
321 	/* if device is not running, copy MAC to netdev->dev_addr */
322 	if (!netif_running(netdev))
323 		goto done;
324 
325 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
326 	 * privilege or if PF did not provision the new MAC address.
327 	 * On BE3, this cmd will always fail if the VF doesn't have the
328 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
329 	 * the MAC for the VF.
330 	 */
331 	mutex_lock(&adapter->rx_filter_lock);
332 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
333 	if (!status) {
334 
335 		/* Delete the old programmed MAC. This call may fail if the
336 		 * old MAC was already deleted by the PF driver.
337 		 */
338 		if (adapter->pmac_id[0] != old_pmac_id)
339 			be_dev_mac_del(adapter, old_pmac_id);
340 	}
341 
342 	mutex_unlock(&adapter->rx_filter_lock);
343 	/* Decide if the new MAC is successfully activated only after
344 	 * querying the FW
345 	 */
346 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
347 				       adapter->if_handle, true, 0);
348 	if (status)
349 		goto err;
350 
351 	/* The MAC change did not happen, either due to lack of privilege
352 	 * or PF didn't pre-provision.
353 	 */
354 	if (!ether_addr_equal(addr->sa_data, mac)) {
355 		status = -EPERM;
356 		goto err;
357 	}
358 done:
359 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
360 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
361 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
362 	return 0;
363 err:
364 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
365 	return status;
366 }
367 
368 /* BE2 supports only v0 cmd */
369 static void *hw_stats_from_cmd(struct be_adapter *adapter)
370 {
371 	if (BE2_chip(adapter)) {
372 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
373 
374 		return &cmd->hw_stats;
375 	} else if (BE3_chip(adapter)) {
376 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
377 
378 		return &cmd->hw_stats;
379 	} else {
380 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
381 
382 		return &cmd->hw_stats;
383 	}
384 }
385 
386 /* BE2 supports only v0 cmd */
387 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
388 {
389 	if (BE2_chip(adapter)) {
390 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
391 
392 		return &hw_stats->erx;
393 	} else if (BE3_chip(adapter)) {
394 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
395 
396 		return &hw_stats->erx;
397 	} else {
398 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
399 
400 		return &hw_stats->erx;
401 	}
402 }
403 
404 static void populate_be_v0_stats(struct be_adapter *adapter)
405 {
406 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
407 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
408 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
409 	struct be_port_rxf_stats_v0 *port_stats =
410 					&rxf_stats->port[adapter->port_num];
411 	struct be_drv_stats *drvs = &adapter->drv_stats;
412 
413 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
414 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
415 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
416 	drvs->rx_control_frames = port_stats->rx_control_frames;
417 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
418 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
419 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
420 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
421 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
422 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
423 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
424 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
425 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
426 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
427 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
428 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
429 	drvs->rx_dropped_header_too_small =
430 		port_stats->rx_dropped_header_too_small;
431 	drvs->rx_address_filtered =
432 					port_stats->rx_address_filtered +
433 					port_stats->rx_vlan_filtered;
434 	drvs->rx_alignment_symbol_errors =
435 		port_stats->rx_alignment_symbol_errors;
436 
437 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
438 	drvs->tx_controlframes = port_stats->tx_controlframes;
439 
440 	if (adapter->port_num)
441 		drvs->jabber_events = rxf_stats->port1_jabber_events;
442 	else
443 		drvs->jabber_events = rxf_stats->port0_jabber_events;
444 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
445 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
446 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
447 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
448 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
449 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
450 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
451 }
452 
453 static void populate_be_v1_stats(struct be_adapter *adapter)
454 {
455 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
456 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
457 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
458 	struct be_port_rxf_stats_v1 *port_stats =
459 					&rxf_stats->port[adapter->port_num];
460 	struct be_drv_stats *drvs = &adapter->drv_stats;
461 
462 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
463 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
464 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
465 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
466 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
467 	drvs->rx_control_frames = port_stats->rx_control_frames;
468 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
469 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
470 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
471 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
472 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
473 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
474 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
475 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
476 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
477 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
478 	drvs->rx_dropped_header_too_small =
479 		port_stats->rx_dropped_header_too_small;
480 	drvs->rx_input_fifo_overflow_drop =
481 		port_stats->rx_input_fifo_overflow_drop;
482 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
483 	drvs->rx_alignment_symbol_errors =
484 		port_stats->rx_alignment_symbol_errors;
485 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
486 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
487 	drvs->tx_controlframes = port_stats->tx_controlframes;
488 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
489 	drvs->jabber_events = port_stats->jabber_events;
490 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
491 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
492 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
493 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
494 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
495 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
496 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
497 }
498 
499 static void populate_be_v2_stats(struct be_adapter *adapter)
500 {
501 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
502 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
503 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
504 	struct be_port_rxf_stats_v2 *port_stats =
505 					&rxf_stats->port[adapter->port_num];
506 	struct be_drv_stats *drvs = &adapter->drv_stats;
507 
508 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
509 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
510 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
511 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
512 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
513 	drvs->rx_control_frames = port_stats->rx_control_frames;
514 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
515 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
516 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
517 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
518 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
519 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
520 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
521 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
522 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
523 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
524 	drvs->rx_dropped_header_too_small =
525 		port_stats->rx_dropped_header_too_small;
526 	drvs->rx_input_fifo_overflow_drop =
527 		port_stats->rx_input_fifo_overflow_drop;
528 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
529 	drvs->rx_alignment_symbol_errors =
530 		port_stats->rx_alignment_symbol_errors;
531 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
532 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
533 	drvs->tx_controlframes = port_stats->tx_controlframes;
534 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
535 	drvs->jabber_events = port_stats->jabber_events;
536 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
537 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
538 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
539 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
540 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
541 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
542 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
543 	if (be_roce_supported(adapter)) {
544 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
545 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
546 		drvs->rx_roce_frames = port_stats->roce_frames_received;
547 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
548 		drvs->roce_drops_payload_len =
549 			port_stats->roce_drops_payload_len;
550 	}
551 }
552 
553 static void populate_lancer_stats(struct be_adapter *adapter)
554 {
555 	struct be_drv_stats *drvs = &adapter->drv_stats;
556 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
557 
558 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
559 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
560 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
561 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
562 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
563 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
564 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
565 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
566 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
567 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
568 	drvs->rx_dropped_tcp_length =
569 				pport_stats->rx_dropped_invalid_tcp_length;
570 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
571 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
572 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
573 	drvs->rx_dropped_header_too_small =
574 				pport_stats->rx_dropped_header_too_small;
575 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
576 	drvs->rx_address_filtered =
577 					pport_stats->rx_address_filtered +
578 					pport_stats->rx_vlan_filtered;
579 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
580 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
581 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
582 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
583 	drvs->jabber_events = pport_stats->rx_jabbers;
584 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
585 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
586 	drvs->rx_drops_too_many_frags =
587 				pport_stats->rx_drops_too_many_frags_lo;
588 }
589 
590 static void accumulate_16bit_val(u32 *acc, u16 val)
591 {
592 #define lo(x)			(x & 0xFFFF)
593 #define hi(x)			(x & 0xFFFF0000)
594 	bool wrapped = val < lo(*acc);
595 	u32 newacc = hi(*acc) + val;
596 
597 	if (wrapped)
598 		newacc += 65536;
599 	ACCESS_ONCE(*acc) = newacc;
600 }
601 
602 static void populate_erx_stats(struct be_adapter *adapter,
603 			       struct be_rx_obj *rxo, u32 erx_stat)
604 {
605 	if (!BEx_chip(adapter))
606 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
607 	else
608 		/* below erx HW counter can actually wrap around after
609 		 * 65535. Driver accumulates a 32-bit value
610 		 */
611 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
612 				     (u16)erx_stat);
613 }
614 
615 void be_parse_stats(struct be_adapter *adapter)
616 {
617 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
618 	struct be_rx_obj *rxo;
619 	int i;
620 	u32 erx_stat;
621 
622 	if (lancer_chip(adapter)) {
623 		populate_lancer_stats(adapter);
624 	} else {
625 		if (BE2_chip(adapter))
626 			populate_be_v0_stats(adapter);
627 		else if (BE3_chip(adapter))
628 			/* for BE3 */
629 			populate_be_v1_stats(adapter);
630 		else
631 			populate_be_v2_stats(adapter);
632 
633 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
634 		for_all_rx_queues(adapter, rxo, i) {
635 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
636 			populate_erx_stats(adapter, rxo, erx_stat);
637 		}
638 	}
639 }
640 
641 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
642 						struct rtnl_link_stats64 *stats)
643 {
644 	struct be_adapter *adapter = netdev_priv(netdev);
645 	struct be_drv_stats *drvs = &adapter->drv_stats;
646 	struct be_rx_obj *rxo;
647 	struct be_tx_obj *txo;
648 	u64 pkts, bytes;
649 	unsigned int start;
650 	int i;
651 
652 	for_all_rx_queues(adapter, rxo, i) {
653 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
654 
655 		do {
656 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
657 			pkts = rx_stats(rxo)->rx_pkts;
658 			bytes = rx_stats(rxo)->rx_bytes;
659 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
660 		stats->rx_packets += pkts;
661 		stats->rx_bytes += bytes;
662 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
663 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
664 					rx_stats(rxo)->rx_drops_no_frags;
665 	}
666 
667 	for_all_tx_queues(adapter, txo, i) {
668 		const struct be_tx_stats *tx_stats = tx_stats(txo);
669 
670 		do {
671 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
672 			pkts = tx_stats(txo)->tx_pkts;
673 			bytes = tx_stats(txo)->tx_bytes;
674 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
675 		stats->tx_packets += pkts;
676 		stats->tx_bytes += bytes;
677 	}
678 
679 	/* bad pkts received */
680 	stats->rx_errors = drvs->rx_crc_errors +
681 		drvs->rx_alignment_symbol_errors +
682 		drvs->rx_in_range_errors +
683 		drvs->rx_out_range_errors +
684 		drvs->rx_frame_too_long +
685 		drvs->rx_dropped_too_small +
686 		drvs->rx_dropped_too_short +
687 		drvs->rx_dropped_header_too_small +
688 		drvs->rx_dropped_tcp_length +
689 		drvs->rx_dropped_runt;
690 
691 	/* detailed rx errors */
692 	stats->rx_length_errors = drvs->rx_in_range_errors +
693 		drvs->rx_out_range_errors +
694 		drvs->rx_frame_too_long;
695 
696 	stats->rx_crc_errors = drvs->rx_crc_errors;
697 
698 	/* frame alignment errors */
699 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
700 
701 	/* receiver fifo overrun */
702 	/* drops_no_pbuf is no per i/f, it's per BE card */
703 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
704 				drvs->rx_input_fifo_overflow_drop +
705 				drvs->rx_drops_no_pbuf;
706 	return stats;
707 }
708 
709 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
710 {
711 	struct net_device *netdev = adapter->netdev;
712 
713 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
714 		netif_carrier_off(netdev);
715 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
716 	}
717 
718 	if (link_status)
719 		netif_carrier_on(netdev);
720 	else
721 		netif_carrier_off(netdev);
722 
723 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
724 }
725 
726 static int be_gso_hdr_len(struct sk_buff *skb)
727 {
728 	if (skb->encapsulation)
729 		return skb_inner_transport_offset(skb) +
730 		       inner_tcp_hdrlen(skb);
731 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
732 }
733 
734 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
735 {
736 	struct be_tx_stats *stats = tx_stats(txo);
737 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
738 	/* Account for headers which get duplicated in TSO pkt */
739 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
740 
741 	u64_stats_update_begin(&stats->sync);
742 	stats->tx_reqs++;
743 	stats->tx_bytes += skb->len + dup_hdr_len;
744 	stats->tx_pkts += tx_pkts;
745 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
746 		stats->tx_vxlan_offload_pkts += tx_pkts;
747 	u64_stats_update_end(&stats->sync);
748 }
749 
750 /* Returns number of WRBs needed for the skb */
751 static u32 skb_wrb_cnt(struct sk_buff *skb)
752 {
753 	/* +1 for the header wrb */
754 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
755 }
756 
757 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
758 {
759 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
760 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
761 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
762 	wrb->rsvd0 = 0;
763 }
764 
765 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
766  * to avoid the swap and shift/mask operations in wrb_fill().
767  */
768 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
769 {
770 	wrb->frag_pa_hi = 0;
771 	wrb->frag_pa_lo = 0;
772 	wrb->frag_len = 0;
773 	wrb->rsvd0 = 0;
774 }
775 
776 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
777 				     struct sk_buff *skb)
778 {
779 	u8 vlan_prio;
780 	u16 vlan_tag;
781 
782 	vlan_tag = skb_vlan_tag_get(skb);
783 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
784 	/* If vlan priority provided by OS is NOT in available bmap */
785 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
786 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
787 				adapter->recommended_prio_bits;
788 
789 	return vlan_tag;
790 }
791 
792 /* Used only for IP tunnel packets */
793 static u16 skb_inner_ip_proto(struct sk_buff *skb)
794 {
795 	return (inner_ip_hdr(skb)->version == 4) ?
796 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
797 }
798 
799 static u16 skb_ip_proto(struct sk_buff *skb)
800 {
801 	return (ip_hdr(skb)->version == 4) ?
802 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
803 }
804 
805 static inline bool be_is_txq_full(struct be_tx_obj *txo)
806 {
807 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
808 }
809 
810 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
811 {
812 	return atomic_read(&txo->q.used) < txo->q.len / 2;
813 }
814 
815 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
816 {
817 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
818 }
819 
820 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
821 				       struct sk_buff *skb,
822 				       struct be_wrb_params *wrb_params)
823 {
824 	u16 proto;
825 
826 	if (skb_is_gso(skb)) {
827 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
828 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
829 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
830 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
831 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
832 		if (skb->encapsulation) {
833 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
834 			proto = skb_inner_ip_proto(skb);
835 		} else {
836 			proto = skb_ip_proto(skb);
837 		}
838 		if (proto == IPPROTO_TCP)
839 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
840 		else if (proto == IPPROTO_UDP)
841 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
842 	}
843 
844 	if (skb_vlan_tag_present(skb)) {
845 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
846 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
847 	}
848 
849 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
850 }
851 
852 static void wrb_fill_hdr(struct be_adapter *adapter,
853 			 struct be_eth_hdr_wrb *hdr,
854 			 struct be_wrb_params *wrb_params,
855 			 struct sk_buff *skb)
856 {
857 	memset(hdr, 0, sizeof(*hdr));
858 
859 	SET_TX_WRB_HDR_BITS(crc, hdr,
860 			    BE_WRB_F_GET(wrb_params->features, CRC));
861 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
862 			    BE_WRB_F_GET(wrb_params->features, IPCS));
863 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
864 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
865 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
866 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
867 
868 	SET_TX_WRB_HDR_BITS(lso, hdr,
869 			    BE_WRB_F_GET(wrb_params->features, LSO));
870 	SET_TX_WRB_HDR_BITS(lso6, hdr,
871 			    BE_WRB_F_GET(wrb_params->features, LSO6));
872 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
873 
874 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
875 	 * hack is not needed, the evt bit is set while ringing DB.
876 	 */
877 	SET_TX_WRB_HDR_BITS(event, hdr,
878 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
879 	SET_TX_WRB_HDR_BITS(vlan, hdr,
880 			    BE_WRB_F_GET(wrb_params->features, VLAN));
881 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
882 
883 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
884 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
885 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
886 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
887 }
888 
889 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
890 			  bool unmap_single)
891 {
892 	dma_addr_t dma;
893 	u32 frag_len = le32_to_cpu(wrb->frag_len);
894 
895 
896 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
897 		(u64)le32_to_cpu(wrb->frag_pa_lo);
898 	if (frag_len) {
899 		if (unmap_single)
900 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
901 		else
902 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
903 	}
904 }
905 
906 /* Grab a WRB header for xmit */
907 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
908 {
909 	u32 head = txo->q.head;
910 
911 	queue_head_inc(&txo->q);
912 	return head;
913 }
914 
915 /* Set up the WRB header for xmit */
916 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
917 				struct be_tx_obj *txo,
918 				struct be_wrb_params *wrb_params,
919 				struct sk_buff *skb, u16 head)
920 {
921 	u32 num_frags = skb_wrb_cnt(skb);
922 	struct be_queue_info *txq = &txo->q;
923 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
924 
925 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
926 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
927 
928 	BUG_ON(txo->sent_skb_list[head]);
929 	txo->sent_skb_list[head] = skb;
930 	txo->last_req_hdr = head;
931 	atomic_add(num_frags, &txq->used);
932 	txo->last_req_wrb_cnt = num_frags;
933 	txo->pend_wrb_cnt += num_frags;
934 }
935 
936 /* Setup a WRB fragment (buffer descriptor) for xmit */
937 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
938 				 int len)
939 {
940 	struct be_eth_wrb *wrb;
941 	struct be_queue_info *txq = &txo->q;
942 
943 	wrb = queue_head_node(txq);
944 	wrb_fill(wrb, busaddr, len);
945 	queue_head_inc(txq);
946 }
947 
948 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
949  * was invoked. The producer index is restored to the previous packet and the
950  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
951  */
952 static void be_xmit_restore(struct be_adapter *adapter,
953 			    struct be_tx_obj *txo, u32 head, bool map_single,
954 			    u32 copied)
955 {
956 	struct device *dev;
957 	struct be_eth_wrb *wrb;
958 	struct be_queue_info *txq = &txo->q;
959 
960 	dev = &adapter->pdev->dev;
961 	txq->head = head;
962 
963 	/* skip the first wrb (hdr); it's not mapped */
964 	queue_head_inc(txq);
965 	while (copied) {
966 		wrb = queue_head_node(txq);
967 		unmap_tx_frag(dev, wrb, map_single);
968 		map_single = false;
969 		copied -= le32_to_cpu(wrb->frag_len);
970 		queue_head_inc(txq);
971 	}
972 
973 	txq->head = head;
974 }
975 
976 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
977  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
978  * of WRBs used up by the packet.
979  */
980 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
981 			   struct sk_buff *skb,
982 			   struct be_wrb_params *wrb_params)
983 {
984 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
985 	struct device *dev = &adapter->pdev->dev;
986 	struct be_queue_info *txq = &txo->q;
987 	bool map_single = false;
988 	u32 head = txq->head;
989 	dma_addr_t busaddr;
990 	int len;
991 
992 	head = be_tx_get_wrb_hdr(txo);
993 
994 	if (skb->len > skb->data_len) {
995 		len = skb_headlen(skb);
996 
997 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
998 		if (dma_mapping_error(dev, busaddr))
999 			goto dma_err;
1000 		map_single = true;
1001 		be_tx_setup_wrb_frag(txo, busaddr, len);
1002 		copied += len;
1003 	}
1004 
1005 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1006 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1007 		len = skb_frag_size(frag);
1008 
1009 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1010 		if (dma_mapping_error(dev, busaddr))
1011 			goto dma_err;
1012 		be_tx_setup_wrb_frag(txo, busaddr, len);
1013 		copied += len;
1014 	}
1015 
1016 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1017 
1018 	be_tx_stats_update(txo, skb);
1019 	return wrb_cnt;
1020 
1021 dma_err:
1022 	adapter->drv_stats.dma_map_errors++;
1023 	be_xmit_restore(adapter, txo, head, map_single, copied);
1024 	return 0;
1025 }
1026 
1027 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1028 {
1029 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1030 }
1031 
1032 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1033 					     struct sk_buff *skb,
1034 					     struct be_wrb_params
1035 					     *wrb_params)
1036 {
1037 	u16 vlan_tag = 0;
1038 
1039 	skb = skb_share_check(skb, GFP_ATOMIC);
1040 	if (unlikely(!skb))
1041 		return skb;
1042 
1043 	if (skb_vlan_tag_present(skb))
1044 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1045 
1046 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1047 		if (!vlan_tag)
1048 			vlan_tag = adapter->pvid;
1049 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1050 		 * skip VLAN insertion
1051 		 */
1052 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1053 	}
1054 
1055 	if (vlan_tag) {
1056 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1057 						vlan_tag);
1058 		if (unlikely(!skb))
1059 			return skb;
1060 		skb->vlan_tci = 0;
1061 	}
1062 
1063 	/* Insert the outer VLAN, if any */
1064 	if (adapter->qnq_vid) {
1065 		vlan_tag = adapter->qnq_vid;
1066 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1067 						vlan_tag);
1068 		if (unlikely(!skb))
1069 			return skb;
1070 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1071 	}
1072 
1073 	return skb;
1074 }
1075 
1076 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1077 {
1078 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1079 	u16 offset = ETH_HLEN;
1080 
1081 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1082 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1083 
1084 		offset += sizeof(struct ipv6hdr);
1085 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1086 		    ip6h->nexthdr != NEXTHDR_UDP) {
1087 			struct ipv6_opt_hdr *ehdr =
1088 				(struct ipv6_opt_hdr *)(skb->data + offset);
1089 
1090 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1091 			if (ehdr->hdrlen == 0xff)
1092 				return true;
1093 		}
1094 	}
1095 	return false;
1096 }
1097 
1098 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1099 {
1100 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1101 }
1102 
1103 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1104 {
1105 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1106 }
1107 
1108 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1109 						  struct sk_buff *skb,
1110 						  struct be_wrb_params
1111 						  *wrb_params)
1112 {
1113 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1114 	unsigned int eth_hdr_len;
1115 	struct iphdr *ip;
1116 
1117 	/* For padded packets, BE HW modifies tot_len field in IP header
1118 	 * incorrecly when VLAN tag is inserted by HW.
1119 	 * For padded packets, Lancer computes incorrect checksum.
1120 	 */
1121 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1122 						VLAN_ETH_HLEN : ETH_HLEN;
1123 	if (skb->len <= 60 &&
1124 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1125 	    is_ipv4_pkt(skb)) {
1126 		ip = (struct iphdr *)ip_hdr(skb);
1127 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1128 	}
1129 
1130 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1131 	 * tagging in pvid-tagging mode
1132 	 */
1133 	if (be_pvid_tagging_enabled(adapter) &&
1134 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1135 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1136 
1137 	/* HW has a bug wherein it will calculate CSUM for VLAN
1138 	 * pkts even though it is disabled.
1139 	 * Manually insert VLAN in pkt.
1140 	 */
1141 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1142 	    skb_vlan_tag_present(skb)) {
1143 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1144 		if (unlikely(!skb))
1145 			goto err;
1146 	}
1147 
1148 	/* HW may lockup when VLAN HW tagging is requested on
1149 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1150 	 * skip HW tagging is not enabled by FW.
1151 	 */
1152 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1153 		     (adapter->pvid || adapter->qnq_vid) &&
1154 		     !qnq_async_evt_rcvd(adapter)))
1155 		goto tx_drop;
1156 
1157 	/* Manual VLAN tag insertion to prevent:
1158 	 * ASIC lockup when the ASIC inserts VLAN tag into
1159 	 * certain ipv6 packets. Insert VLAN tags in driver,
1160 	 * and set event, completion, vlan bits accordingly
1161 	 * in the Tx WRB.
1162 	 */
1163 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1164 	    be_vlan_tag_tx_chk(adapter, skb)) {
1165 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1166 		if (unlikely(!skb))
1167 			goto err;
1168 	}
1169 
1170 	return skb;
1171 tx_drop:
1172 	dev_kfree_skb_any(skb);
1173 err:
1174 	return NULL;
1175 }
1176 
1177 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1178 					   struct sk_buff *skb,
1179 					   struct be_wrb_params *wrb_params)
1180 {
1181 	int err;
1182 
1183 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1184 	 * packets that are 32b or less may cause a transmit stall
1185 	 * on that port. The workaround is to pad such packets
1186 	 * (len <= 32 bytes) to a minimum length of 36b.
1187 	 */
1188 	if (skb->len <= 32) {
1189 		if (skb_put_padto(skb, 36))
1190 			return NULL;
1191 	}
1192 
1193 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1194 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1195 		if (!skb)
1196 			return NULL;
1197 	}
1198 
1199 	/* The stack can send us skbs with length greater than
1200 	 * what the HW can handle. Trim the extra bytes.
1201 	 */
1202 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1203 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1204 	WARN_ON(err);
1205 
1206 	return skb;
1207 }
1208 
1209 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1210 {
1211 	struct be_queue_info *txq = &txo->q;
1212 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1213 
1214 	/* Mark the last request eventable if it hasn't been marked already */
1215 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1216 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1217 
1218 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1219 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1220 		wrb_fill_dummy(queue_head_node(txq));
1221 		queue_head_inc(txq);
1222 		atomic_inc(&txq->used);
1223 		txo->pend_wrb_cnt++;
1224 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1225 					   TX_HDR_WRB_NUM_SHIFT);
1226 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1227 					  TX_HDR_WRB_NUM_SHIFT);
1228 	}
1229 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1230 	txo->pend_wrb_cnt = 0;
1231 }
1232 
1233 /* OS2BMC related */
1234 
1235 #define DHCP_CLIENT_PORT	68
1236 #define DHCP_SERVER_PORT	67
1237 #define NET_BIOS_PORT1		137
1238 #define NET_BIOS_PORT2		138
1239 #define DHCPV6_RAS_PORT		547
1240 
1241 #define is_mc_allowed_on_bmc(adapter, eh)	\
1242 	(!is_multicast_filt_enabled(adapter) &&	\
1243 	 is_multicast_ether_addr(eh->h_dest) &&	\
1244 	 !is_broadcast_ether_addr(eh->h_dest))
1245 
1246 #define is_bc_allowed_on_bmc(adapter, eh)	\
1247 	(!is_broadcast_filt_enabled(adapter) &&	\
1248 	 is_broadcast_ether_addr(eh->h_dest))
1249 
1250 #define is_arp_allowed_on_bmc(adapter, skb)	\
1251 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1252 
1253 #define is_broadcast_packet(eh, adapter)	\
1254 		(is_multicast_ether_addr(eh->h_dest) && \
1255 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1256 
1257 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1258 
1259 #define is_arp_filt_enabled(adapter)	\
1260 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1261 
1262 #define is_dhcp_client_filt_enabled(adapter)	\
1263 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1264 
1265 #define is_dhcp_srvr_filt_enabled(adapter)	\
1266 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1267 
1268 #define is_nbios_filt_enabled(adapter)	\
1269 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1270 
1271 #define is_ipv6_na_filt_enabled(adapter)	\
1272 		(adapter->bmc_filt_mask &	\
1273 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1274 
1275 #define is_ipv6_ra_filt_enabled(adapter)	\
1276 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1277 
1278 #define is_ipv6_ras_filt_enabled(adapter)	\
1279 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1280 
1281 #define is_broadcast_filt_enabled(adapter)	\
1282 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1283 
1284 #define is_multicast_filt_enabled(adapter)	\
1285 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1286 
1287 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1288 			       struct sk_buff **skb)
1289 {
1290 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1291 	bool os2bmc = false;
1292 
1293 	if (!be_is_os2bmc_enabled(adapter))
1294 		goto done;
1295 
1296 	if (!is_multicast_ether_addr(eh->h_dest))
1297 		goto done;
1298 
1299 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1300 	    is_bc_allowed_on_bmc(adapter, eh) ||
1301 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1302 		os2bmc = true;
1303 		goto done;
1304 	}
1305 
1306 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1307 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1308 		u8 nexthdr = hdr->nexthdr;
1309 
1310 		if (nexthdr == IPPROTO_ICMPV6) {
1311 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1312 
1313 			switch (icmp6->icmp6_type) {
1314 			case NDISC_ROUTER_ADVERTISEMENT:
1315 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1316 				goto done;
1317 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1318 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1319 				goto done;
1320 			default:
1321 				break;
1322 			}
1323 		}
1324 	}
1325 
1326 	if (is_udp_pkt((*skb))) {
1327 		struct udphdr *udp = udp_hdr((*skb));
1328 
1329 		switch (ntohs(udp->dest)) {
1330 		case DHCP_CLIENT_PORT:
1331 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1332 			goto done;
1333 		case DHCP_SERVER_PORT:
1334 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1335 			goto done;
1336 		case NET_BIOS_PORT1:
1337 		case NET_BIOS_PORT2:
1338 			os2bmc = is_nbios_filt_enabled(adapter);
1339 			goto done;
1340 		case DHCPV6_RAS_PORT:
1341 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1342 			goto done;
1343 		default:
1344 			break;
1345 		}
1346 	}
1347 done:
1348 	/* For packets over a vlan, which are destined
1349 	 * to BMC, asic expects the vlan to be inline in the packet.
1350 	 */
1351 	if (os2bmc)
1352 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1353 
1354 	return os2bmc;
1355 }
1356 
1357 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1358 {
1359 	struct be_adapter *adapter = netdev_priv(netdev);
1360 	u16 q_idx = skb_get_queue_mapping(skb);
1361 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1362 	struct be_wrb_params wrb_params = { 0 };
1363 	bool flush = !skb->xmit_more;
1364 	u16 wrb_cnt;
1365 
1366 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1367 	if (unlikely(!skb))
1368 		goto drop;
1369 
1370 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1371 
1372 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1373 	if (unlikely(!wrb_cnt)) {
1374 		dev_kfree_skb_any(skb);
1375 		goto drop;
1376 	}
1377 
1378 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1379 	 * enqueue the pkt a 2nd time with mgmt bit set.
1380 	 */
1381 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1382 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1383 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384 		if (unlikely(!wrb_cnt))
1385 			goto drop;
1386 		else
1387 			skb_get(skb);
1388 	}
1389 
1390 	if (be_is_txq_full(txo)) {
1391 		netif_stop_subqueue(netdev, q_idx);
1392 		tx_stats(txo)->tx_stops++;
1393 	}
1394 
1395 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1396 		be_xmit_flush(adapter, txo);
1397 
1398 	return NETDEV_TX_OK;
1399 drop:
1400 	tx_stats(txo)->tx_drv_drops++;
1401 	/* Flush the already enqueued tx requests */
1402 	if (flush && txo->pend_wrb_cnt)
1403 		be_xmit_flush(adapter, txo);
1404 
1405 	return NETDEV_TX_OK;
1406 }
1407 
1408 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1409 {
1410 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1411 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1412 }
1413 
1414 static int be_set_vlan_promisc(struct be_adapter *adapter)
1415 {
1416 	struct device *dev = &adapter->pdev->dev;
1417 	int status;
1418 
1419 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1420 		return 0;
1421 
1422 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1423 	if (!status) {
1424 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1425 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1426 	} else {
1427 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1428 	}
1429 	return status;
1430 }
1431 
1432 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1433 {
1434 	struct device *dev = &adapter->pdev->dev;
1435 	int status;
1436 
1437 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1438 	if (!status) {
1439 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1440 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1441 	}
1442 	return status;
1443 }
1444 
1445 /*
1446  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1447  * If the user configures more, place BE in vlan promiscuous mode.
1448  */
1449 static int be_vid_config(struct be_adapter *adapter)
1450 {
1451 	struct device *dev = &adapter->pdev->dev;
1452 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1453 	u16 num = 0, i = 0;
1454 	int status = 0;
1455 
1456 	/* No need to change the VLAN state if the I/F is in promiscuous */
1457 	if (adapter->netdev->flags & IFF_PROMISC)
1458 		return 0;
1459 
1460 	if (adapter->vlans_added > be_max_vlans(adapter))
1461 		return be_set_vlan_promisc(adapter);
1462 
1463 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1464 		status = be_clear_vlan_promisc(adapter);
1465 		if (status)
1466 			return status;
1467 	}
1468 	/* Construct VLAN Table to give to HW */
1469 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1470 		vids[num++] = cpu_to_le16(i);
1471 
1472 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1473 	if (status) {
1474 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1475 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1476 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1477 		    addl_status(status) ==
1478 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1479 			return be_set_vlan_promisc(adapter);
1480 	}
1481 	return status;
1482 }
1483 
1484 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1485 {
1486 	struct be_adapter *adapter = netdev_priv(netdev);
1487 	int status = 0;
1488 
1489 	mutex_lock(&adapter->rx_filter_lock);
1490 
1491 	/* Packets with VID 0 are always received by Lancer by default */
1492 	if (lancer_chip(adapter) && vid == 0)
1493 		goto done;
1494 
1495 	if (test_bit(vid, adapter->vids))
1496 		goto done;
1497 
1498 	set_bit(vid, adapter->vids);
1499 	adapter->vlans_added++;
1500 
1501 	status = be_vid_config(adapter);
1502 done:
1503 	mutex_unlock(&adapter->rx_filter_lock);
1504 	return status;
1505 }
1506 
1507 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1508 {
1509 	struct be_adapter *adapter = netdev_priv(netdev);
1510 	int status = 0;
1511 
1512 	mutex_lock(&adapter->rx_filter_lock);
1513 
1514 	/* Packets with VID 0 are always received by Lancer by default */
1515 	if (lancer_chip(adapter) && vid == 0)
1516 		goto done;
1517 
1518 	if (!test_bit(vid, adapter->vids))
1519 		goto done;
1520 
1521 	clear_bit(vid, adapter->vids);
1522 	adapter->vlans_added--;
1523 
1524 	status = be_vid_config(adapter);
1525 done:
1526 	mutex_unlock(&adapter->rx_filter_lock);
1527 	return status;
1528 }
1529 
1530 static void be_set_all_promisc(struct be_adapter *adapter)
1531 {
1532 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1533 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1534 }
1535 
1536 static void be_set_mc_promisc(struct be_adapter *adapter)
1537 {
1538 	int status;
1539 
1540 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1541 		return;
1542 
1543 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1544 	if (!status)
1545 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1546 }
1547 
1548 static void be_set_uc_promisc(struct be_adapter *adapter)
1549 {
1550 	int status;
1551 
1552 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1553 		return;
1554 
1555 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1556 	if (!status)
1557 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1558 }
1559 
1560 static void be_clear_uc_promisc(struct be_adapter *adapter)
1561 {
1562 	int status;
1563 
1564 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1565 		return;
1566 
1567 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1568 	if (!status)
1569 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1570 }
1571 
1572 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1573  * We use a single callback function for both sync and unsync. We really don't
1574  * add/remove addresses through this callback. But, we use it to detect changes
1575  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1576  */
1577 static int be_uc_list_update(struct net_device *netdev,
1578 			     const unsigned char *addr)
1579 {
1580 	struct be_adapter *adapter = netdev_priv(netdev);
1581 
1582 	adapter->update_uc_list = true;
1583 	return 0;
1584 }
1585 
1586 static int be_mc_list_update(struct net_device *netdev,
1587 			     const unsigned char *addr)
1588 {
1589 	struct be_adapter *adapter = netdev_priv(netdev);
1590 
1591 	adapter->update_mc_list = true;
1592 	return 0;
1593 }
1594 
1595 static void be_set_mc_list(struct be_adapter *adapter)
1596 {
1597 	struct net_device *netdev = adapter->netdev;
1598 	struct netdev_hw_addr *ha;
1599 	bool mc_promisc = false;
1600 	int status;
1601 
1602 	netif_addr_lock_bh(netdev);
1603 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1604 
1605 	if (netdev->flags & IFF_PROMISC) {
1606 		adapter->update_mc_list = false;
1607 	} else if (netdev->flags & IFF_ALLMULTI ||
1608 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1609 		/* Enable multicast promisc if num configured exceeds
1610 		 * what we support
1611 		 */
1612 		mc_promisc = true;
1613 		adapter->update_mc_list = false;
1614 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1615 		/* Update mc-list unconditionally if the iface was previously
1616 		 * in mc-promisc mode and now is out of that mode.
1617 		 */
1618 		adapter->update_mc_list = true;
1619 	}
1620 
1621 	if (adapter->update_mc_list) {
1622 		int i = 0;
1623 
1624 		/* cache the mc-list in adapter */
1625 		netdev_for_each_mc_addr(ha, netdev) {
1626 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1627 			i++;
1628 		}
1629 		adapter->mc_count = netdev_mc_count(netdev);
1630 	}
1631 	netif_addr_unlock_bh(netdev);
1632 
1633 	if (mc_promisc) {
1634 		be_set_mc_promisc(adapter);
1635 	} else if (adapter->update_mc_list) {
1636 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1637 		if (!status)
1638 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1639 		else
1640 			be_set_mc_promisc(adapter);
1641 
1642 		adapter->update_mc_list = false;
1643 	}
1644 }
1645 
1646 static void be_clear_mc_list(struct be_adapter *adapter)
1647 {
1648 	struct net_device *netdev = adapter->netdev;
1649 
1650 	__dev_mc_unsync(netdev, NULL);
1651 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1652 	adapter->mc_count = 0;
1653 }
1654 
1655 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1656 {
1657 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1658 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1659 		return 0;
1660 	}
1661 
1662 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1663 			       adapter->if_handle,
1664 			       &adapter->pmac_id[uc_idx + 1], 0);
1665 }
1666 
1667 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1668 {
1669 	if (pmac_id == adapter->pmac_id[0])
1670 		return;
1671 
1672 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1673 }
1674 
1675 static void be_set_uc_list(struct be_adapter *adapter)
1676 {
1677 	struct net_device *netdev = adapter->netdev;
1678 	struct netdev_hw_addr *ha;
1679 	bool uc_promisc = false;
1680 	int curr_uc_macs = 0, i;
1681 
1682 	netif_addr_lock_bh(netdev);
1683 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1684 
1685 	if (netdev->flags & IFF_PROMISC) {
1686 		adapter->update_uc_list = false;
1687 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1688 		uc_promisc = true;
1689 		adapter->update_uc_list = false;
1690 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1691 		/* Update uc-list unconditionally if the iface was previously
1692 		 * in uc-promisc mode and now is out of that mode.
1693 		 */
1694 		adapter->update_uc_list = true;
1695 	}
1696 
1697 	if (adapter->update_uc_list) {
1698 		/* cache the uc-list in adapter array */
1699 		i = 0;
1700 		netdev_for_each_uc_addr(ha, netdev) {
1701 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1702 			i++;
1703 		}
1704 		curr_uc_macs = netdev_uc_count(netdev);
1705 	}
1706 	netif_addr_unlock_bh(netdev);
1707 
1708 	if (uc_promisc) {
1709 		be_set_uc_promisc(adapter);
1710 	} else if (adapter->update_uc_list) {
1711 		be_clear_uc_promisc(adapter);
1712 
1713 		for (i = 0; i < adapter->uc_macs; i++)
1714 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1715 
1716 		for (i = 0; i < curr_uc_macs; i++)
1717 			be_uc_mac_add(adapter, i);
1718 		adapter->uc_macs = curr_uc_macs;
1719 		adapter->update_uc_list = false;
1720 	}
1721 }
1722 
1723 static void be_clear_uc_list(struct be_adapter *adapter)
1724 {
1725 	struct net_device *netdev = adapter->netdev;
1726 	int i;
1727 
1728 	__dev_uc_unsync(netdev, NULL);
1729 	for (i = 0; i < adapter->uc_macs; i++)
1730 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1731 
1732 	adapter->uc_macs = 0;
1733 }
1734 
1735 static void __be_set_rx_mode(struct be_adapter *adapter)
1736 {
1737 	struct net_device *netdev = adapter->netdev;
1738 
1739 	mutex_lock(&adapter->rx_filter_lock);
1740 
1741 	if (netdev->flags & IFF_PROMISC) {
1742 		if (!be_in_all_promisc(adapter))
1743 			be_set_all_promisc(adapter);
1744 	} else if (be_in_all_promisc(adapter)) {
1745 		/* We need to re-program the vlan-list or clear
1746 		 * vlan-promisc mode (if needed) when the interface
1747 		 * comes out of promisc mode.
1748 		 */
1749 		be_vid_config(adapter);
1750 	}
1751 
1752 	be_set_uc_list(adapter);
1753 	be_set_mc_list(adapter);
1754 
1755 	mutex_unlock(&adapter->rx_filter_lock);
1756 }
1757 
1758 static void be_work_set_rx_mode(struct work_struct *work)
1759 {
1760 	struct be_cmd_work *cmd_work =
1761 				container_of(work, struct be_cmd_work, work);
1762 
1763 	__be_set_rx_mode(cmd_work->adapter);
1764 	kfree(cmd_work);
1765 }
1766 
1767 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1768 {
1769 	struct be_adapter *adapter = netdev_priv(netdev);
1770 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1771 	int status;
1772 
1773 	if (!sriov_enabled(adapter))
1774 		return -EPERM;
1775 
1776 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1777 		return -EINVAL;
1778 
1779 	/* Proceed further only if user provided MAC is different
1780 	 * from active MAC
1781 	 */
1782 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1783 		return 0;
1784 
1785 	if (BEx_chip(adapter)) {
1786 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1787 				vf + 1);
1788 
1789 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1790 					 &vf_cfg->pmac_id, vf + 1);
1791 	} else {
1792 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1793 					vf + 1);
1794 	}
1795 
1796 	if (status) {
1797 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1798 			mac, vf, status);
1799 		return be_cmd_status(status);
1800 	}
1801 
1802 	ether_addr_copy(vf_cfg->mac_addr, mac);
1803 
1804 	return 0;
1805 }
1806 
1807 static int be_get_vf_config(struct net_device *netdev, int vf,
1808 			    struct ifla_vf_info *vi)
1809 {
1810 	struct be_adapter *adapter = netdev_priv(netdev);
1811 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1812 
1813 	if (!sriov_enabled(adapter))
1814 		return -EPERM;
1815 
1816 	if (vf >= adapter->num_vfs)
1817 		return -EINVAL;
1818 
1819 	vi->vf = vf;
1820 	vi->max_tx_rate = vf_cfg->tx_rate;
1821 	vi->min_tx_rate = 0;
1822 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1823 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1824 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1825 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1826 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1827 
1828 	return 0;
1829 }
1830 
1831 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1832 {
1833 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1834 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1835 	int vf_if_id = vf_cfg->if_handle;
1836 	int status;
1837 
1838 	/* Enable Transparent VLAN Tagging */
1839 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1840 	if (status)
1841 		return status;
1842 
1843 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1844 	vids[0] = 0;
1845 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1846 	if (!status)
1847 		dev_info(&adapter->pdev->dev,
1848 			 "Cleared guest VLANs on VF%d", vf);
1849 
1850 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1851 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1852 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1853 						  ~BE_PRIV_FILTMGMT, vf + 1);
1854 		if (!status)
1855 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1856 	}
1857 	return 0;
1858 }
1859 
1860 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1861 {
1862 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1863 	struct device *dev = &adapter->pdev->dev;
1864 	int status;
1865 
1866 	/* Reset Transparent VLAN Tagging. */
1867 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1868 				       vf_cfg->if_handle, 0, 0);
1869 	if (status)
1870 		return status;
1871 
1872 	/* Allow VFs to program VLAN filtering */
1873 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1874 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1875 						  BE_PRIV_FILTMGMT, vf + 1);
1876 		if (!status) {
1877 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1878 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1879 		}
1880 	}
1881 
1882 	dev_info(dev,
1883 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1884 	return 0;
1885 }
1886 
1887 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1888 			  __be16 vlan_proto)
1889 {
1890 	struct be_adapter *adapter = netdev_priv(netdev);
1891 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1892 	int status;
1893 
1894 	if (!sriov_enabled(adapter))
1895 		return -EPERM;
1896 
1897 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1898 		return -EINVAL;
1899 
1900 	if (vlan_proto != htons(ETH_P_8021Q))
1901 		return -EPROTONOSUPPORT;
1902 
1903 	if (vlan || qos) {
1904 		vlan |= qos << VLAN_PRIO_SHIFT;
1905 		status = be_set_vf_tvt(adapter, vf, vlan);
1906 	} else {
1907 		status = be_clear_vf_tvt(adapter, vf);
1908 	}
1909 
1910 	if (status) {
1911 		dev_err(&adapter->pdev->dev,
1912 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1913 			status);
1914 		return be_cmd_status(status);
1915 	}
1916 
1917 	vf_cfg->vlan_tag = vlan;
1918 	return 0;
1919 }
1920 
1921 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1922 			     int min_tx_rate, int max_tx_rate)
1923 {
1924 	struct be_adapter *adapter = netdev_priv(netdev);
1925 	struct device *dev = &adapter->pdev->dev;
1926 	int percent_rate, status = 0;
1927 	u16 link_speed = 0;
1928 	u8 link_status;
1929 
1930 	if (!sriov_enabled(adapter))
1931 		return -EPERM;
1932 
1933 	if (vf >= adapter->num_vfs)
1934 		return -EINVAL;
1935 
1936 	if (min_tx_rate)
1937 		return -EINVAL;
1938 
1939 	if (!max_tx_rate)
1940 		goto config_qos;
1941 
1942 	status = be_cmd_link_status_query(adapter, &link_speed,
1943 					  &link_status, 0);
1944 	if (status)
1945 		goto err;
1946 
1947 	if (!link_status) {
1948 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1949 		status = -ENETDOWN;
1950 		goto err;
1951 	}
1952 
1953 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1954 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1955 			link_speed);
1956 		status = -EINVAL;
1957 		goto err;
1958 	}
1959 
1960 	/* On Skyhawk the QOS setting must be done only as a % value */
1961 	percent_rate = link_speed / 100;
1962 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1963 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1964 			percent_rate);
1965 		status = -EINVAL;
1966 		goto err;
1967 	}
1968 
1969 config_qos:
1970 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1971 	if (status)
1972 		goto err;
1973 
1974 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1975 	return 0;
1976 
1977 err:
1978 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1979 		max_tx_rate, vf);
1980 	return be_cmd_status(status);
1981 }
1982 
1983 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1984 				int link_state)
1985 {
1986 	struct be_adapter *adapter = netdev_priv(netdev);
1987 	int status;
1988 
1989 	if (!sriov_enabled(adapter))
1990 		return -EPERM;
1991 
1992 	if (vf >= adapter->num_vfs)
1993 		return -EINVAL;
1994 
1995 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1996 	if (status) {
1997 		dev_err(&adapter->pdev->dev,
1998 			"Link state change on VF %d failed: %#x\n", vf, status);
1999 		return be_cmd_status(status);
2000 	}
2001 
2002 	adapter->vf_cfg[vf].plink_tracking = link_state;
2003 
2004 	return 0;
2005 }
2006 
2007 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2008 {
2009 	struct be_adapter *adapter = netdev_priv(netdev);
2010 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2011 	u8 spoofchk;
2012 	int status;
2013 
2014 	if (!sriov_enabled(adapter))
2015 		return -EPERM;
2016 
2017 	if (vf >= adapter->num_vfs)
2018 		return -EINVAL;
2019 
2020 	if (BEx_chip(adapter))
2021 		return -EOPNOTSUPP;
2022 
2023 	if (enable == vf_cfg->spoofchk)
2024 		return 0;
2025 
2026 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2027 
2028 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2029 				       0, spoofchk);
2030 	if (status) {
2031 		dev_err(&adapter->pdev->dev,
2032 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2033 		return be_cmd_status(status);
2034 	}
2035 
2036 	vf_cfg->spoofchk = enable;
2037 	return 0;
2038 }
2039 
2040 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2041 			  ulong now)
2042 {
2043 	aic->rx_pkts_prev = rx_pkts;
2044 	aic->tx_reqs_prev = tx_pkts;
2045 	aic->jiffies = now;
2046 }
2047 
2048 static int be_get_new_eqd(struct be_eq_obj *eqo)
2049 {
2050 	struct be_adapter *adapter = eqo->adapter;
2051 	int eqd, start;
2052 	struct be_aic_obj *aic;
2053 	struct be_rx_obj *rxo;
2054 	struct be_tx_obj *txo;
2055 	u64 rx_pkts = 0, tx_pkts = 0;
2056 	ulong now;
2057 	u32 pps, delta;
2058 	int i;
2059 
2060 	aic = &adapter->aic_obj[eqo->idx];
2061 	if (!aic->enable) {
2062 		if (aic->jiffies)
2063 			aic->jiffies = 0;
2064 		eqd = aic->et_eqd;
2065 		return eqd;
2066 	}
2067 
2068 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2069 		do {
2070 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2071 			rx_pkts += rxo->stats.rx_pkts;
2072 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2073 	}
2074 
2075 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2076 		do {
2077 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2078 			tx_pkts += txo->stats.tx_reqs;
2079 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2080 	}
2081 
2082 	/* Skip, if wrapped around or first calculation */
2083 	now = jiffies;
2084 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2085 	    rx_pkts < aic->rx_pkts_prev ||
2086 	    tx_pkts < aic->tx_reqs_prev) {
2087 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2088 		return aic->prev_eqd;
2089 	}
2090 
2091 	delta = jiffies_to_msecs(now - aic->jiffies);
2092 	if (delta == 0)
2093 		return aic->prev_eqd;
2094 
2095 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2096 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2097 	eqd = (pps / 15000) << 2;
2098 
2099 	if (eqd < 8)
2100 		eqd = 0;
2101 	eqd = min_t(u32, eqd, aic->max_eqd);
2102 	eqd = max_t(u32, eqd, aic->min_eqd);
2103 
2104 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2105 
2106 	return eqd;
2107 }
2108 
2109 /* For Skyhawk-R only */
2110 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2111 {
2112 	struct be_adapter *adapter = eqo->adapter;
2113 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2114 	ulong now = jiffies;
2115 	int eqd;
2116 	u32 mult_enc;
2117 
2118 	if (!aic->enable)
2119 		return 0;
2120 
2121 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2122 		eqd = aic->prev_eqd;
2123 	else
2124 		eqd = be_get_new_eqd(eqo);
2125 
2126 	if (eqd > 100)
2127 		mult_enc = R2I_DLY_ENC_1;
2128 	else if (eqd > 60)
2129 		mult_enc = R2I_DLY_ENC_2;
2130 	else if (eqd > 20)
2131 		mult_enc = R2I_DLY_ENC_3;
2132 	else
2133 		mult_enc = R2I_DLY_ENC_0;
2134 
2135 	aic->prev_eqd = eqd;
2136 
2137 	return mult_enc;
2138 }
2139 
2140 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2141 {
2142 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2143 	struct be_aic_obj *aic;
2144 	struct be_eq_obj *eqo;
2145 	int i, num = 0, eqd;
2146 
2147 	for_all_evt_queues(adapter, eqo, i) {
2148 		aic = &adapter->aic_obj[eqo->idx];
2149 		eqd = be_get_new_eqd(eqo);
2150 		if (force_update || eqd != aic->prev_eqd) {
2151 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2152 			set_eqd[num].eq_id = eqo->q.id;
2153 			aic->prev_eqd = eqd;
2154 			num++;
2155 		}
2156 	}
2157 
2158 	if (num)
2159 		be_cmd_modify_eqd(adapter, set_eqd, num);
2160 }
2161 
2162 static void be_rx_stats_update(struct be_rx_obj *rxo,
2163 			       struct be_rx_compl_info *rxcp)
2164 {
2165 	struct be_rx_stats *stats = rx_stats(rxo);
2166 
2167 	u64_stats_update_begin(&stats->sync);
2168 	stats->rx_compl++;
2169 	stats->rx_bytes += rxcp->pkt_size;
2170 	stats->rx_pkts++;
2171 	if (rxcp->tunneled)
2172 		stats->rx_vxlan_offload_pkts++;
2173 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2174 		stats->rx_mcast_pkts++;
2175 	if (rxcp->err)
2176 		stats->rx_compl_err++;
2177 	u64_stats_update_end(&stats->sync);
2178 }
2179 
2180 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2181 {
2182 	/* L4 checksum is not reliable for non TCP/UDP packets.
2183 	 * Also ignore ipcksm for ipv6 pkts
2184 	 */
2185 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2186 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2187 }
2188 
2189 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2190 {
2191 	struct be_adapter *adapter = rxo->adapter;
2192 	struct be_rx_page_info *rx_page_info;
2193 	struct be_queue_info *rxq = &rxo->q;
2194 	u32 frag_idx = rxq->tail;
2195 
2196 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2197 	BUG_ON(!rx_page_info->page);
2198 
2199 	if (rx_page_info->last_frag) {
2200 		dma_unmap_page(&adapter->pdev->dev,
2201 			       dma_unmap_addr(rx_page_info, bus),
2202 			       adapter->big_page_size, DMA_FROM_DEVICE);
2203 		rx_page_info->last_frag = false;
2204 	} else {
2205 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2206 					dma_unmap_addr(rx_page_info, bus),
2207 					rx_frag_size, DMA_FROM_DEVICE);
2208 	}
2209 
2210 	queue_tail_inc(rxq);
2211 	atomic_dec(&rxq->used);
2212 	return rx_page_info;
2213 }
2214 
2215 /* Throwaway the data in the Rx completion */
2216 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2217 				struct be_rx_compl_info *rxcp)
2218 {
2219 	struct be_rx_page_info *page_info;
2220 	u16 i, num_rcvd = rxcp->num_rcvd;
2221 
2222 	for (i = 0; i < num_rcvd; i++) {
2223 		page_info = get_rx_page_info(rxo);
2224 		put_page(page_info->page);
2225 		memset(page_info, 0, sizeof(*page_info));
2226 	}
2227 }
2228 
2229 /*
2230  * skb_fill_rx_data forms a complete skb for an ether frame
2231  * indicated by rxcp.
2232  */
2233 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2234 			     struct be_rx_compl_info *rxcp)
2235 {
2236 	struct be_rx_page_info *page_info;
2237 	u16 i, j;
2238 	u16 hdr_len, curr_frag_len, remaining;
2239 	u8 *start;
2240 
2241 	page_info = get_rx_page_info(rxo);
2242 	start = page_address(page_info->page) + page_info->page_offset;
2243 	prefetch(start);
2244 
2245 	/* Copy data in the first descriptor of this completion */
2246 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2247 
2248 	skb->len = curr_frag_len;
2249 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2250 		memcpy(skb->data, start, curr_frag_len);
2251 		/* Complete packet has now been moved to data */
2252 		put_page(page_info->page);
2253 		skb->data_len = 0;
2254 		skb->tail += curr_frag_len;
2255 	} else {
2256 		hdr_len = ETH_HLEN;
2257 		memcpy(skb->data, start, hdr_len);
2258 		skb_shinfo(skb)->nr_frags = 1;
2259 		skb_frag_set_page(skb, 0, page_info->page);
2260 		skb_shinfo(skb)->frags[0].page_offset =
2261 					page_info->page_offset + hdr_len;
2262 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2263 				  curr_frag_len - hdr_len);
2264 		skb->data_len = curr_frag_len - hdr_len;
2265 		skb->truesize += rx_frag_size;
2266 		skb->tail += hdr_len;
2267 	}
2268 	page_info->page = NULL;
2269 
2270 	if (rxcp->pkt_size <= rx_frag_size) {
2271 		BUG_ON(rxcp->num_rcvd != 1);
2272 		return;
2273 	}
2274 
2275 	/* More frags present for this completion */
2276 	remaining = rxcp->pkt_size - curr_frag_len;
2277 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2278 		page_info = get_rx_page_info(rxo);
2279 		curr_frag_len = min(remaining, rx_frag_size);
2280 
2281 		/* Coalesce all frags from the same physical page in one slot */
2282 		if (page_info->page_offset == 0) {
2283 			/* Fresh page */
2284 			j++;
2285 			skb_frag_set_page(skb, j, page_info->page);
2286 			skb_shinfo(skb)->frags[j].page_offset =
2287 							page_info->page_offset;
2288 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2289 			skb_shinfo(skb)->nr_frags++;
2290 		} else {
2291 			put_page(page_info->page);
2292 		}
2293 
2294 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2295 		skb->len += curr_frag_len;
2296 		skb->data_len += curr_frag_len;
2297 		skb->truesize += rx_frag_size;
2298 		remaining -= curr_frag_len;
2299 		page_info->page = NULL;
2300 	}
2301 	BUG_ON(j > MAX_SKB_FRAGS);
2302 }
2303 
2304 /* Process the RX completion indicated by rxcp when GRO is disabled */
2305 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2306 				struct be_rx_compl_info *rxcp)
2307 {
2308 	struct be_adapter *adapter = rxo->adapter;
2309 	struct net_device *netdev = adapter->netdev;
2310 	struct sk_buff *skb;
2311 
2312 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2313 	if (unlikely(!skb)) {
2314 		rx_stats(rxo)->rx_drops_no_skbs++;
2315 		be_rx_compl_discard(rxo, rxcp);
2316 		return;
2317 	}
2318 
2319 	skb_fill_rx_data(rxo, skb, rxcp);
2320 
2321 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2322 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2323 	else
2324 		skb_checksum_none_assert(skb);
2325 
2326 	skb->protocol = eth_type_trans(skb, netdev);
2327 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2328 	if (netdev->features & NETIF_F_RXHASH)
2329 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2330 
2331 	skb->csum_level = rxcp->tunneled;
2332 	skb_mark_napi_id(skb, napi);
2333 
2334 	if (rxcp->vlanf)
2335 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2336 
2337 	netif_receive_skb(skb);
2338 }
2339 
2340 /* Process the RX completion indicated by rxcp when GRO is enabled */
2341 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2342 				    struct napi_struct *napi,
2343 				    struct be_rx_compl_info *rxcp)
2344 {
2345 	struct be_adapter *adapter = rxo->adapter;
2346 	struct be_rx_page_info *page_info;
2347 	struct sk_buff *skb = NULL;
2348 	u16 remaining, curr_frag_len;
2349 	u16 i, j;
2350 
2351 	skb = napi_get_frags(napi);
2352 	if (!skb) {
2353 		be_rx_compl_discard(rxo, rxcp);
2354 		return;
2355 	}
2356 
2357 	remaining = rxcp->pkt_size;
2358 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2359 		page_info = get_rx_page_info(rxo);
2360 
2361 		curr_frag_len = min(remaining, rx_frag_size);
2362 
2363 		/* Coalesce all frags from the same physical page in one slot */
2364 		if (i == 0 || page_info->page_offset == 0) {
2365 			/* First frag or Fresh page */
2366 			j++;
2367 			skb_frag_set_page(skb, j, page_info->page);
2368 			skb_shinfo(skb)->frags[j].page_offset =
2369 							page_info->page_offset;
2370 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2371 		} else {
2372 			put_page(page_info->page);
2373 		}
2374 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2375 		skb->truesize += rx_frag_size;
2376 		remaining -= curr_frag_len;
2377 		memset(page_info, 0, sizeof(*page_info));
2378 	}
2379 	BUG_ON(j > MAX_SKB_FRAGS);
2380 
2381 	skb_shinfo(skb)->nr_frags = j + 1;
2382 	skb->len = rxcp->pkt_size;
2383 	skb->data_len = rxcp->pkt_size;
2384 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2385 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2386 	if (adapter->netdev->features & NETIF_F_RXHASH)
2387 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2388 
2389 	skb->csum_level = rxcp->tunneled;
2390 
2391 	if (rxcp->vlanf)
2392 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2393 
2394 	napi_gro_frags(napi);
2395 }
2396 
2397 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2398 				 struct be_rx_compl_info *rxcp)
2399 {
2400 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2401 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2402 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2403 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2404 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2405 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2406 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2407 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2408 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2409 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2410 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2411 	if (rxcp->vlanf) {
2412 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2413 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2414 	}
2415 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2416 	rxcp->tunneled =
2417 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2418 }
2419 
2420 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2421 				 struct be_rx_compl_info *rxcp)
2422 {
2423 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2424 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2425 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2426 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2427 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2428 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2429 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2430 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2431 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2432 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2433 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2434 	if (rxcp->vlanf) {
2435 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2436 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2437 	}
2438 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2439 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2440 }
2441 
2442 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2443 {
2444 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2445 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2446 	struct be_adapter *adapter = rxo->adapter;
2447 
2448 	/* For checking the valid bit it is Ok to use either definition as the
2449 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2450 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2451 		return NULL;
2452 
2453 	rmb();
2454 	be_dws_le_to_cpu(compl, sizeof(*compl));
2455 
2456 	if (adapter->be3_native)
2457 		be_parse_rx_compl_v1(compl, rxcp);
2458 	else
2459 		be_parse_rx_compl_v0(compl, rxcp);
2460 
2461 	if (rxcp->ip_frag)
2462 		rxcp->l4_csum = 0;
2463 
2464 	if (rxcp->vlanf) {
2465 		/* In QNQ modes, if qnq bit is not set, then the packet was
2466 		 * tagged only with the transparent outer vlan-tag and must
2467 		 * not be treated as a vlan packet by host
2468 		 */
2469 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2470 			rxcp->vlanf = 0;
2471 
2472 		if (!lancer_chip(adapter))
2473 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2474 
2475 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2476 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2477 			rxcp->vlanf = 0;
2478 	}
2479 
2480 	/* As the compl has been parsed, reset it; we wont touch it again */
2481 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2482 
2483 	queue_tail_inc(&rxo->cq);
2484 	return rxcp;
2485 }
2486 
2487 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2488 {
2489 	u32 order = get_order(size);
2490 
2491 	if (order > 0)
2492 		gfp |= __GFP_COMP;
2493 	return  alloc_pages(gfp, order);
2494 }
2495 
2496 /*
2497  * Allocate a page, split it to fragments of size rx_frag_size and post as
2498  * receive buffers to BE
2499  */
2500 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2501 {
2502 	struct be_adapter *adapter = rxo->adapter;
2503 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2504 	struct be_queue_info *rxq = &rxo->q;
2505 	struct page *pagep = NULL;
2506 	struct device *dev = &adapter->pdev->dev;
2507 	struct be_eth_rx_d *rxd;
2508 	u64 page_dmaaddr = 0, frag_dmaaddr;
2509 	u32 posted, page_offset = 0, notify = 0;
2510 
2511 	page_info = &rxo->page_info_tbl[rxq->head];
2512 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2513 		if (!pagep) {
2514 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2515 			if (unlikely(!pagep)) {
2516 				rx_stats(rxo)->rx_post_fail++;
2517 				break;
2518 			}
2519 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2520 						    adapter->big_page_size,
2521 						    DMA_FROM_DEVICE);
2522 			if (dma_mapping_error(dev, page_dmaaddr)) {
2523 				put_page(pagep);
2524 				pagep = NULL;
2525 				adapter->drv_stats.dma_map_errors++;
2526 				break;
2527 			}
2528 			page_offset = 0;
2529 		} else {
2530 			get_page(pagep);
2531 			page_offset += rx_frag_size;
2532 		}
2533 		page_info->page_offset = page_offset;
2534 		page_info->page = pagep;
2535 
2536 		rxd = queue_head_node(rxq);
2537 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2538 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2539 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2540 
2541 		/* Any space left in the current big page for another frag? */
2542 		if ((page_offset + rx_frag_size + rx_frag_size) >
2543 					adapter->big_page_size) {
2544 			pagep = NULL;
2545 			page_info->last_frag = true;
2546 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2547 		} else {
2548 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2549 		}
2550 
2551 		prev_page_info = page_info;
2552 		queue_head_inc(rxq);
2553 		page_info = &rxo->page_info_tbl[rxq->head];
2554 	}
2555 
2556 	/* Mark the last frag of a page when we break out of the above loop
2557 	 * with no more slots available in the RXQ
2558 	 */
2559 	if (pagep) {
2560 		prev_page_info->last_frag = true;
2561 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2562 	}
2563 
2564 	if (posted) {
2565 		atomic_add(posted, &rxq->used);
2566 		if (rxo->rx_post_starved)
2567 			rxo->rx_post_starved = false;
2568 		do {
2569 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2570 			be_rxq_notify(adapter, rxq->id, notify);
2571 			posted -= notify;
2572 		} while (posted);
2573 	} else if (atomic_read(&rxq->used) == 0) {
2574 		/* Let be_worker replenish when memory is available */
2575 		rxo->rx_post_starved = true;
2576 	}
2577 }
2578 
2579 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2580 {
2581 	struct be_queue_info *tx_cq = &txo->cq;
2582 	struct be_tx_compl_info *txcp = &txo->txcp;
2583 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2584 
2585 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2586 		return NULL;
2587 
2588 	/* Ensure load ordering of valid bit dword and other dwords below */
2589 	rmb();
2590 	be_dws_le_to_cpu(compl, sizeof(*compl));
2591 
2592 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2593 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2594 
2595 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2596 	queue_tail_inc(tx_cq);
2597 	return txcp;
2598 }
2599 
2600 static u16 be_tx_compl_process(struct be_adapter *adapter,
2601 			       struct be_tx_obj *txo, u16 last_index)
2602 {
2603 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2604 	struct be_queue_info *txq = &txo->q;
2605 	struct sk_buff *skb = NULL;
2606 	bool unmap_skb_hdr = false;
2607 	struct be_eth_wrb *wrb;
2608 	u16 num_wrbs = 0;
2609 	u32 frag_index;
2610 
2611 	do {
2612 		if (sent_skbs[txq->tail]) {
2613 			/* Free skb from prev req */
2614 			if (skb)
2615 				dev_consume_skb_any(skb);
2616 			skb = sent_skbs[txq->tail];
2617 			sent_skbs[txq->tail] = NULL;
2618 			queue_tail_inc(txq);  /* skip hdr wrb */
2619 			num_wrbs++;
2620 			unmap_skb_hdr = true;
2621 		}
2622 		wrb = queue_tail_node(txq);
2623 		frag_index = txq->tail;
2624 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2625 			      (unmap_skb_hdr && skb_headlen(skb)));
2626 		unmap_skb_hdr = false;
2627 		queue_tail_inc(txq);
2628 		num_wrbs++;
2629 	} while (frag_index != last_index);
2630 	dev_consume_skb_any(skb);
2631 
2632 	return num_wrbs;
2633 }
2634 
2635 /* Return the number of events in the event queue */
2636 static inline int events_get(struct be_eq_obj *eqo)
2637 {
2638 	struct be_eq_entry *eqe;
2639 	int num = 0;
2640 
2641 	do {
2642 		eqe = queue_tail_node(&eqo->q);
2643 		if (eqe->evt == 0)
2644 			break;
2645 
2646 		rmb();
2647 		eqe->evt = 0;
2648 		num++;
2649 		queue_tail_inc(&eqo->q);
2650 	} while (true);
2651 
2652 	return num;
2653 }
2654 
2655 /* Leaves the EQ is disarmed state */
2656 static void be_eq_clean(struct be_eq_obj *eqo)
2657 {
2658 	int num = events_get(eqo);
2659 
2660 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2661 }
2662 
2663 /* Free posted rx buffers that were not used */
2664 static void be_rxq_clean(struct be_rx_obj *rxo)
2665 {
2666 	struct be_queue_info *rxq = &rxo->q;
2667 	struct be_rx_page_info *page_info;
2668 
2669 	while (atomic_read(&rxq->used) > 0) {
2670 		page_info = get_rx_page_info(rxo);
2671 		put_page(page_info->page);
2672 		memset(page_info, 0, sizeof(*page_info));
2673 	}
2674 	BUG_ON(atomic_read(&rxq->used));
2675 	rxq->tail = 0;
2676 	rxq->head = 0;
2677 }
2678 
2679 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2680 {
2681 	struct be_queue_info *rx_cq = &rxo->cq;
2682 	struct be_rx_compl_info *rxcp;
2683 	struct be_adapter *adapter = rxo->adapter;
2684 	int flush_wait = 0;
2685 
2686 	/* Consume pending rx completions.
2687 	 * Wait for the flush completion (identified by zero num_rcvd)
2688 	 * to arrive. Notify CQ even when there are no more CQ entries
2689 	 * for HW to flush partially coalesced CQ entries.
2690 	 * In Lancer, there is no need to wait for flush compl.
2691 	 */
2692 	for (;;) {
2693 		rxcp = be_rx_compl_get(rxo);
2694 		if (!rxcp) {
2695 			if (lancer_chip(adapter))
2696 				break;
2697 
2698 			if (flush_wait++ > 50 ||
2699 			    be_check_error(adapter,
2700 					   BE_ERROR_HW)) {
2701 				dev_warn(&adapter->pdev->dev,
2702 					 "did not receive flush compl\n");
2703 				break;
2704 			}
2705 			be_cq_notify(adapter, rx_cq->id, true, 0);
2706 			mdelay(1);
2707 		} else {
2708 			be_rx_compl_discard(rxo, rxcp);
2709 			be_cq_notify(adapter, rx_cq->id, false, 1);
2710 			if (rxcp->num_rcvd == 0)
2711 				break;
2712 		}
2713 	}
2714 
2715 	/* After cleanup, leave the CQ in unarmed state */
2716 	be_cq_notify(adapter, rx_cq->id, false, 0);
2717 }
2718 
2719 static void be_tx_compl_clean(struct be_adapter *adapter)
2720 {
2721 	struct device *dev = &adapter->pdev->dev;
2722 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2723 	struct be_tx_compl_info *txcp;
2724 	struct be_queue_info *txq;
2725 	u32 end_idx, notified_idx;
2726 	struct be_tx_obj *txo;
2727 	int i, pending_txqs;
2728 
2729 	/* Stop polling for compls when HW has been silent for 10ms */
2730 	do {
2731 		pending_txqs = adapter->num_tx_qs;
2732 
2733 		for_all_tx_queues(adapter, txo, i) {
2734 			cmpl = 0;
2735 			num_wrbs = 0;
2736 			txq = &txo->q;
2737 			while ((txcp = be_tx_compl_get(txo))) {
2738 				num_wrbs +=
2739 					be_tx_compl_process(adapter, txo,
2740 							    txcp->end_index);
2741 				cmpl++;
2742 			}
2743 			if (cmpl) {
2744 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2745 				atomic_sub(num_wrbs, &txq->used);
2746 				timeo = 0;
2747 			}
2748 			if (!be_is_tx_compl_pending(txo))
2749 				pending_txqs--;
2750 		}
2751 
2752 		if (pending_txqs == 0 || ++timeo > 10 ||
2753 		    be_check_error(adapter, BE_ERROR_HW))
2754 			break;
2755 
2756 		mdelay(1);
2757 	} while (true);
2758 
2759 	/* Free enqueued TX that was never notified to HW */
2760 	for_all_tx_queues(adapter, txo, i) {
2761 		txq = &txo->q;
2762 
2763 		if (atomic_read(&txq->used)) {
2764 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2765 				 i, atomic_read(&txq->used));
2766 			notified_idx = txq->tail;
2767 			end_idx = txq->tail;
2768 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2769 				  txq->len);
2770 			/* Use the tx-compl process logic to handle requests
2771 			 * that were not sent to the HW.
2772 			 */
2773 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2774 			atomic_sub(num_wrbs, &txq->used);
2775 			BUG_ON(atomic_read(&txq->used));
2776 			txo->pend_wrb_cnt = 0;
2777 			/* Since hw was never notified of these requests,
2778 			 * reset TXQ indices
2779 			 */
2780 			txq->head = notified_idx;
2781 			txq->tail = notified_idx;
2782 		}
2783 	}
2784 }
2785 
2786 static void be_evt_queues_destroy(struct be_adapter *adapter)
2787 {
2788 	struct be_eq_obj *eqo;
2789 	int i;
2790 
2791 	for_all_evt_queues(adapter, eqo, i) {
2792 		if (eqo->q.created) {
2793 			be_eq_clean(eqo);
2794 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2795 			netif_napi_del(&eqo->napi);
2796 			free_cpumask_var(eqo->affinity_mask);
2797 		}
2798 		be_queue_free(adapter, &eqo->q);
2799 	}
2800 }
2801 
2802 static int be_evt_queues_create(struct be_adapter *adapter)
2803 {
2804 	struct be_queue_info *eq;
2805 	struct be_eq_obj *eqo;
2806 	struct be_aic_obj *aic;
2807 	int i, rc;
2808 
2809 	/* need enough EQs to service both RX and TX queues */
2810 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2811 				    max(adapter->cfg_num_rx_irqs,
2812 					adapter->cfg_num_tx_irqs));
2813 
2814 	for_all_evt_queues(adapter, eqo, i) {
2815 		int numa_node = dev_to_node(&adapter->pdev->dev);
2816 
2817 		aic = &adapter->aic_obj[i];
2818 		eqo->adapter = adapter;
2819 		eqo->idx = i;
2820 		aic->max_eqd = BE_MAX_EQD;
2821 		aic->enable = true;
2822 
2823 		eq = &eqo->q;
2824 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2825 				    sizeof(struct be_eq_entry));
2826 		if (rc)
2827 			return rc;
2828 
2829 		rc = be_cmd_eq_create(adapter, eqo);
2830 		if (rc)
2831 			return rc;
2832 
2833 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2834 			return -ENOMEM;
2835 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2836 				eqo->affinity_mask);
2837 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2838 			       BE_NAPI_WEIGHT);
2839 	}
2840 	return 0;
2841 }
2842 
2843 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2844 {
2845 	struct be_queue_info *q;
2846 
2847 	q = &adapter->mcc_obj.q;
2848 	if (q->created)
2849 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2850 	be_queue_free(adapter, q);
2851 
2852 	q = &adapter->mcc_obj.cq;
2853 	if (q->created)
2854 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2855 	be_queue_free(adapter, q);
2856 }
2857 
2858 /* Must be called only after TX qs are created as MCC shares TX EQ */
2859 static int be_mcc_queues_create(struct be_adapter *adapter)
2860 {
2861 	struct be_queue_info *q, *cq;
2862 
2863 	cq = &adapter->mcc_obj.cq;
2864 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2865 			   sizeof(struct be_mcc_compl)))
2866 		goto err;
2867 
2868 	/* Use the default EQ for MCC completions */
2869 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2870 		goto mcc_cq_free;
2871 
2872 	q = &adapter->mcc_obj.q;
2873 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2874 		goto mcc_cq_destroy;
2875 
2876 	if (be_cmd_mccq_create(adapter, q, cq))
2877 		goto mcc_q_free;
2878 
2879 	return 0;
2880 
2881 mcc_q_free:
2882 	be_queue_free(adapter, q);
2883 mcc_cq_destroy:
2884 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2885 mcc_cq_free:
2886 	be_queue_free(adapter, cq);
2887 err:
2888 	return -1;
2889 }
2890 
2891 static void be_tx_queues_destroy(struct be_adapter *adapter)
2892 {
2893 	struct be_queue_info *q;
2894 	struct be_tx_obj *txo;
2895 	u8 i;
2896 
2897 	for_all_tx_queues(adapter, txo, i) {
2898 		q = &txo->q;
2899 		if (q->created)
2900 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2901 		be_queue_free(adapter, q);
2902 
2903 		q = &txo->cq;
2904 		if (q->created)
2905 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2906 		be_queue_free(adapter, q);
2907 	}
2908 }
2909 
2910 static int be_tx_qs_create(struct be_adapter *adapter)
2911 {
2912 	struct be_queue_info *cq;
2913 	struct be_tx_obj *txo;
2914 	struct be_eq_obj *eqo;
2915 	int status, i;
2916 
2917 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2918 
2919 	for_all_tx_queues(adapter, txo, i) {
2920 		cq = &txo->cq;
2921 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2922 					sizeof(struct be_eth_tx_compl));
2923 		if (status)
2924 			return status;
2925 
2926 		u64_stats_init(&txo->stats.sync);
2927 		u64_stats_init(&txo->stats.sync_compl);
2928 
2929 		/* If num_evt_qs is less than num_tx_qs, then more than
2930 		 * one txq share an eq
2931 		 */
2932 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2933 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2934 		if (status)
2935 			return status;
2936 
2937 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2938 					sizeof(struct be_eth_wrb));
2939 		if (status)
2940 			return status;
2941 
2942 		status = be_cmd_txq_create(adapter, txo);
2943 		if (status)
2944 			return status;
2945 
2946 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2947 				    eqo->idx);
2948 	}
2949 
2950 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2951 		 adapter->num_tx_qs);
2952 	return 0;
2953 }
2954 
2955 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2956 {
2957 	struct be_queue_info *q;
2958 	struct be_rx_obj *rxo;
2959 	int i;
2960 
2961 	for_all_rx_queues(adapter, rxo, i) {
2962 		q = &rxo->cq;
2963 		if (q->created)
2964 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2965 		be_queue_free(adapter, q);
2966 	}
2967 }
2968 
2969 static int be_rx_cqs_create(struct be_adapter *adapter)
2970 {
2971 	struct be_queue_info *eq, *cq;
2972 	struct be_rx_obj *rxo;
2973 	int rc, i;
2974 
2975 	adapter->num_rss_qs =
2976 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2977 
2978 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2979 	if (adapter->num_rss_qs < 2)
2980 		adapter->num_rss_qs = 0;
2981 
2982 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2983 
2984 	/* When the interface is not capable of RSS rings (and there is no
2985 	 * need to create a default RXQ) we'll still need one RXQ
2986 	 */
2987 	if (adapter->num_rx_qs == 0)
2988 		adapter->num_rx_qs = 1;
2989 
2990 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2991 	for_all_rx_queues(adapter, rxo, i) {
2992 		rxo->adapter = adapter;
2993 		cq = &rxo->cq;
2994 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2995 				    sizeof(struct be_eth_rx_compl));
2996 		if (rc)
2997 			return rc;
2998 
2999 		u64_stats_init(&rxo->stats.sync);
3000 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3001 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3002 		if (rc)
3003 			return rc;
3004 	}
3005 
3006 	dev_info(&adapter->pdev->dev,
3007 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3008 	return 0;
3009 }
3010 
3011 static irqreturn_t be_intx(int irq, void *dev)
3012 {
3013 	struct be_eq_obj *eqo = dev;
3014 	struct be_adapter *adapter = eqo->adapter;
3015 	int num_evts = 0;
3016 
3017 	/* IRQ is not expected when NAPI is scheduled as the EQ
3018 	 * will not be armed.
3019 	 * But, this can happen on Lancer INTx where it takes
3020 	 * a while to de-assert INTx or in BE2 where occasionaly
3021 	 * an interrupt may be raised even when EQ is unarmed.
3022 	 * If NAPI is already scheduled, then counting & notifying
3023 	 * events will orphan them.
3024 	 */
3025 	if (napi_schedule_prep(&eqo->napi)) {
3026 		num_evts = events_get(eqo);
3027 		__napi_schedule(&eqo->napi);
3028 		if (num_evts)
3029 			eqo->spurious_intr = 0;
3030 	}
3031 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3032 
3033 	/* Return IRQ_HANDLED only for the the first spurious intr
3034 	 * after a valid intr to stop the kernel from branding
3035 	 * this irq as a bad one!
3036 	 */
3037 	if (num_evts || eqo->spurious_intr++ == 0)
3038 		return IRQ_HANDLED;
3039 	else
3040 		return IRQ_NONE;
3041 }
3042 
3043 static irqreturn_t be_msix(int irq, void *dev)
3044 {
3045 	struct be_eq_obj *eqo = dev;
3046 
3047 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3048 	napi_schedule(&eqo->napi);
3049 	return IRQ_HANDLED;
3050 }
3051 
3052 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3053 {
3054 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3055 }
3056 
3057 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3058 			 int budget, int polling)
3059 {
3060 	struct be_adapter *adapter = rxo->adapter;
3061 	struct be_queue_info *rx_cq = &rxo->cq;
3062 	struct be_rx_compl_info *rxcp;
3063 	u32 work_done;
3064 	u32 frags_consumed = 0;
3065 
3066 	for (work_done = 0; work_done < budget; work_done++) {
3067 		rxcp = be_rx_compl_get(rxo);
3068 		if (!rxcp)
3069 			break;
3070 
3071 		/* Is it a flush compl that has no data */
3072 		if (unlikely(rxcp->num_rcvd == 0))
3073 			goto loop_continue;
3074 
3075 		/* Discard compl with partial DMA Lancer B0 */
3076 		if (unlikely(!rxcp->pkt_size)) {
3077 			be_rx_compl_discard(rxo, rxcp);
3078 			goto loop_continue;
3079 		}
3080 
3081 		/* On BE drop pkts that arrive due to imperfect filtering in
3082 		 * promiscuous mode on some skews
3083 		 */
3084 		if (unlikely(rxcp->port != adapter->port_num &&
3085 			     !lancer_chip(adapter))) {
3086 			be_rx_compl_discard(rxo, rxcp);
3087 			goto loop_continue;
3088 		}
3089 
3090 		/* Don't do gro when we're busy_polling */
3091 		if (do_gro(rxcp) && polling != BUSY_POLLING)
3092 			be_rx_compl_process_gro(rxo, napi, rxcp);
3093 		else
3094 			be_rx_compl_process(rxo, napi, rxcp);
3095 
3096 loop_continue:
3097 		frags_consumed += rxcp->num_rcvd;
3098 		be_rx_stats_update(rxo, rxcp);
3099 	}
3100 
3101 	if (work_done) {
3102 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3103 
3104 		/* When an rx-obj gets into post_starved state, just
3105 		 * let be_worker do the posting.
3106 		 */
3107 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3108 		    !rxo->rx_post_starved)
3109 			be_post_rx_frags(rxo, GFP_ATOMIC,
3110 					 max_t(u32, MAX_RX_POST,
3111 					       frags_consumed));
3112 	}
3113 
3114 	return work_done;
3115 }
3116 
3117 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3118 {
3119 	switch (status) {
3120 	case BE_TX_COMP_HDR_PARSE_ERR:
3121 		tx_stats(txo)->tx_hdr_parse_err++;
3122 		break;
3123 	case BE_TX_COMP_NDMA_ERR:
3124 		tx_stats(txo)->tx_dma_err++;
3125 		break;
3126 	case BE_TX_COMP_ACL_ERR:
3127 		tx_stats(txo)->tx_spoof_check_err++;
3128 		break;
3129 	}
3130 }
3131 
3132 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3133 {
3134 	switch (status) {
3135 	case LANCER_TX_COMP_LSO_ERR:
3136 		tx_stats(txo)->tx_tso_err++;
3137 		break;
3138 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3139 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3140 		tx_stats(txo)->tx_spoof_check_err++;
3141 		break;
3142 	case LANCER_TX_COMP_QINQ_ERR:
3143 		tx_stats(txo)->tx_qinq_err++;
3144 		break;
3145 	case LANCER_TX_COMP_PARITY_ERR:
3146 		tx_stats(txo)->tx_internal_parity_err++;
3147 		break;
3148 	case LANCER_TX_COMP_DMA_ERR:
3149 		tx_stats(txo)->tx_dma_err++;
3150 		break;
3151 	}
3152 }
3153 
3154 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3155 			  int idx)
3156 {
3157 	int num_wrbs = 0, work_done = 0;
3158 	struct be_tx_compl_info *txcp;
3159 
3160 	while ((txcp = be_tx_compl_get(txo))) {
3161 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3162 		work_done++;
3163 
3164 		if (txcp->status) {
3165 			if (lancer_chip(adapter))
3166 				lancer_update_tx_err(txo, txcp->status);
3167 			else
3168 				be_update_tx_err(txo, txcp->status);
3169 		}
3170 	}
3171 
3172 	if (work_done) {
3173 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3174 		atomic_sub(num_wrbs, &txo->q.used);
3175 
3176 		/* As Tx wrbs have been freed up, wake up netdev queue
3177 		 * if it was stopped due to lack of tx wrbs.  */
3178 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3179 		    be_can_txq_wake(txo)) {
3180 			netif_wake_subqueue(adapter->netdev, idx);
3181 		}
3182 
3183 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3184 		tx_stats(txo)->tx_compl += work_done;
3185 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3186 	}
3187 }
3188 
3189 #ifdef CONFIG_NET_RX_BUSY_POLL
3190 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3191 {
3192 	bool status = true;
3193 
3194 	spin_lock(&eqo->lock); /* BH is already disabled */
3195 	if (eqo->state & BE_EQ_LOCKED) {
3196 		WARN_ON(eqo->state & BE_EQ_NAPI);
3197 		eqo->state |= BE_EQ_NAPI_YIELD;
3198 		status = false;
3199 	} else {
3200 		eqo->state = BE_EQ_NAPI;
3201 	}
3202 	spin_unlock(&eqo->lock);
3203 	return status;
3204 }
3205 
3206 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3207 {
3208 	spin_lock(&eqo->lock); /* BH is already disabled */
3209 
3210 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3211 	eqo->state = BE_EQ_IDLE;
3212 
3213 	spin_unlock(&eqo->lock);
3214 }
3215 
3216 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3217 {
3218 	bool status = true;
3219 
3220 	spin_lock_bh(&eqo->lock);
3221 	if (eqo->state & BE_EQ_LOCKED) {
3222 		eqo->state |= BE_EQ_POLL_YIELD;
3223 		status = false;
3224 	} else {
3225 		eqo->state |= BE_EQ_POLL;
3226 	}
3227 	spin_unlock_bh(&eqo->lock);
3228 	return status;
3229 }
3230 
3231 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3232 {
3233 	spin_lock_bh(&eqo->lock);
3234 
3235 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3236 	eqo->state = BE_EQ_IDLE;
3237 
3238 	spin_unlock_bh(&eqo->lock);
3239 }
3240 
3241 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3242 {
3243 	spin_lock_init(&eqo->lock);
3244 	eqo->state = BE_EQ_IDLE;
3245 }
3246 
3247 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3248 {
3249 	local_bh_disable();
3250 
3251 	/* It's enough to just acquire napi lock on the eqo to stop
3252 	 * be_busy_poll() from processing any queueus.
3253 	 */
3254 	while (!be_lock_napi(eqo))
3255 		mdelay(1);
3256 
3257 	local_bh_enable();
3258 }
3259 
3260 #else /* CONFIG_NET_RX_BUSY_POLL */
3261 
3262 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3263 {
3264 	return true;
3265 }
3266 
3267 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3268 {
3269 }
3270 
3271 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3272 {
3273 	return false;
3274 }
3275 
3276 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3277 {
3278 }
3279 
3280 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3281 {
3282 }
3283 
3284 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287 #endif /* CONFIG_NET_RX_BUSY_POLL */
3288 
3289 int be_poll(struct napi_struct *napi, int budget)
3290 {
3291 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3292 	struct be_adapter *adapter = eqo->adapter;
3293 	int max_work = 0, work, i, num_evts;
3294 	struct be_rx_obj *rxo;
3295 	struct be_tx_obj *txo;
3296 	u32 mult_enc = 0;
3297 
3298 	num_evts = events_get(eqo);
3299 
3300 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3301 		be_process_tx(adapter, txo, i);
3302 
3303 	if (be_lock_napi(eqo)) {
3304 		/* This loop will iterate twice for EQ0 in which
3305 		 * completions of the last RXQ (default one) are also processed
3306 		 * For other EQs the loop iterates only once
3307 		 */
3308 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3309 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3310 			max_work = max(work, max_work);
3311 		}
3312 		be_unlock_napi(eqo);
3313 	} else {
3314 		max_work = budget;
3315 	}
3316 
3317 	if (is_mcc_eqo(eqo))
3318 		be_process_mcc(adapter);
3319 
3320 	if (max_work < budget) {
3321 		napi_complete(napi);
3322 
3323 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324 		 * delay via a delay multiplier encoding value
3325 		 */
3326 		if (skyhawk_chip(adapter))
3327 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3328 
3329 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330 			     mult_enc);
3331 	} else {
3332 		/* As we'll continue in polling mode, count and clear events */
3333 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334 	}
3335 	return max_work;
3336 }
3337 
3338 #ifdef CONFIG_NET_RX_BUSY_POLL
3339 static int be_busy_poll(struct napi_struct *napi)
3340 {
3341 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3342 	struct be_adapter *adapter = eqo->adapter;
3343 	struct be_rx_obj *rxo;
3344 	int i, work = 0;
3345 
3346 	if (!be_lock_busy_poll(eqo))
3347 		return LL_FLUSH_BUSY;
3348 
3349 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3350 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3351 		if (work)
3352 			break;
3353 	}
3354 
3355 	be_unlock_busy_poll(eqo);
3356 	return work;
3357 }
3358 #endif
3359 
3360 void be_detect_error(struct be_adapter *adapter)
3361 {
3362 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3363 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3364 	u32 i;
3365 	struct device *dev = &adapter->pdev->dev;
3366 
3367 	if (be_check_error(adapter, BE_ERROR_HW))
3368 		return;
3369 
3370 	if (lancer_chip(adapter)) {
3371 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3372 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3373 			be_set_error(adapter, BE_ERROR_UE);
3374 			sliport_err1 = ioread32(adapter->db +
3375 						SLIPORT_ERROR1_OFFSET);
3376 			sliport_err2 = ioread32(adapter->db +
3377 						SLIPORT_ERROR2_OFFSET);
3378 			/* Do not log error messages if its a FW reset */
3379 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3380 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3381 				dev_info(dev, "Firmware update in progress\n");
3382 			} else {
3383 				dev_err(dev, "Error detected in the card\n");
3384 				dev_err(dev, "ERR: sliport status 0x%x\n",
3385 					sliport_status);
3386 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3387 					sliport_err1);
3388 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3389 					sliport_err2);
3390 			}
3391 		}
3392 	} else {
3393 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3394 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3395 		ue_lo_mask = ioread32(adapter->pcicfg +
3396 				      PCICFG_UE_STATUS_LOW_MASK);
3397 		ue_hi_mask = ioread32(adapter->pcicfg +
3398 				      PCICFG_UE_STATUS_HI_MASK);
3399 
3400 		ue_lo = (ue_lo & ~ue_lo_mask);
3401 		ue_hi = (ue_hi & ~ue_hi_mask);
3402 
3403 		/* On certain platforms BE hardware can indicate spurious UEs.
3404 		 * Allow HW to stop working completely in case of a real UE.
3405 		 * Hence not setting the hw_error for UE detection.
3406 		 */
3407 
3408 		if (ue_lo || ue_hi) {
3409 			dev_err(dev, "Error detected in the adapter");
3410 			if (skyhawk_chip(adapter))
3411 				be_set_error(adapter, BE_ERROR_UE);
3412 
3413 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3414 				if (ue_lo & 1)
3415 					dev_err(dev, "UE: %s bit set\n",
3416 						ue_status_low_desc[i]);
3417 			}
3418 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3419 				if (ue_hi & 1)
3420 					dev_err(dev, "UE: %s bit set\n",
3421 						ue_status_hi_desc[i]);
3422 			}
3423 		}
3424 	}
3425 }
3426 
3427 static void be_msix_disable(struct be_adapter *adapter)
3428 {
3429 	if (msix_enabled(adapter)) {
3430 		pci_disable_msix(adapter->pdev);
3431 		adapter->num_msix_vec = 0;
3432 		adapter->num_msix_roce_vec = 0;
3433 	}
3434 }
3435 
3436 static int be_msix_enable(struct be_adapter *adapter)
3437 {
3438 	unsigned int i, max_roce_eqs;
3439 	struct device *dev = &adapter->pdev->dev;
3440 	int num_vec;
3441 
3442 	/* If RoCE is supported, program the max number of vectors that
3443 	 * could be used for NIC and RoCE, else, just program the number
3444 	 * we'll use initially.
3445 	 */
3446 	if (be_roce_supported(adapter)) {
3447 		max_roce_eqs =
3448 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3449 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3450 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3451 	} else {
3452 		num_vec = max(adapter->cfg_num_rx_irqs,
3453 			      adapter->cfg_num_tx_irqs);
3454 	}
3455 
3456 	for (i = 0; i < num_vec; i++)
3457 		adapter->msix_entries[i].entry = i;
3458 
3459 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3460 					MIN_MSIX_VECTORS, num_vec);
3461 	if (num_vec < 0)
3462 		goto fail;
3463 
3464 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3465 		adapter->num_msix_roce_vec = num_vec / 2;
3466 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3467 			 adapter->num_msix_roce_vec);
3468 	}
3469 
3470 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3471 
3472 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3473 		 adapter->num_msix_vec);
3474 	return 0;
3475 
3476 fail:
3477 	dev_warn(dev, "MSIx enable failed\n");
3478 
3479 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3480 	if (be_virtfn(adapter))
3481 		return num_vec;
3482 	return 0;
3483 }
3484 
3485 static inline int be_msix_vec_get(struct be_adapter *adapter,
3486 				  struct be_eq_obj *eqo)
3487 {
3488 	return adapter->msix_entries[eqo->msix_idx].vector;
3489 }
3490 
3491 static int be_msix_register(struct be_adapter *adapter)
3492 {
3493 	struct net_device *netdev = adapter->netdev;
3494 	struct be_eq_obj *eqo;
3495 	int status, i, vec;
3496 
3497 	for_all_evt_queues(adapter, eqo, i) {
3498 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3499 		vec = be_msix_vec_get(adapter, eqo);
3500 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3501 		if (status)
3502 			goto err_msix;
3503 
3504 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3505 	}
3506 
3507 	return 0;
3508 err_msix:
3509 	for (i--; i >= 0; i--) {
3510 		eqo = &adapter->eq_obj[i];
3511 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3512 	}
3513 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3514 		 status);
3515 	be_msix_disable(adapter);
3516 	return status;
3517 }
3518 
3519 static int be_irq_register(struct be_adapter *adapter)
3520 {
3521 	struct net_device *netdev = adapter->netdev;
3522 	int status;
3523 
3524 	if (msix_enabled(adapter)) {
3525 		status = be_msix_register(adapter);
3526 		if (status == 0)
3527 			goto done;
3528 		/* INTx is not supported for VF */
3529 		if (be_virtfn(adapter))
3530 			return status;
3531 	}
3532 
3533 	/* INTx: only the first EQ is used */
3534 	netdev->irq = adapter->pdev->irq;
3535 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3536 			     &adapter->eq_obj[0]);
3537 	if (status) {
3538 		dev_err(&adapter->pdev->dev,
3539 			"INTx request IRQ failed - err %d\n", status);
3540 		return status;
3541 	}
3542 done:
3543 	adapter->isr_registered = true;
3544 	return 0;
3545 }
3546 
3547 static void be_irq_unregister(struct be_adapter *adapter)
3548 {
3549 	struct net_device *netdev = adapter->netdev;
3550 	struct be_eq_obj *eqo;
3551 	int i, vec;
3552 
3553 	if (!adapter->isr_registered)
3554 		return;
3555 
3556 	/* INTx */
3557 	if (!msix_enabled(adapter)) {
3558 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3559 		goto done;
3560 	}
3561 
3562 	/* MSIx */
3563 	for_all_evt_queues(adapter, eqo, i) {
3564 		vec = be_msix_vec_get(adapter, eqo);
3565 		irq_set_affinity_hint(vec, NULL);
3566 		free_irq(vec, eqo);
3567 	}
3568 
3569 done:
3570 	adapter->isr_registered = false;
3571 }
3572 
3573 static void be_rx_qs_destroy(struct be_adapter *adapter)
3574 {
3575 	struct rss_info *rss = &adapter->rss_info;
3576 	struct be_queue_info *q;
3577 	struct be_rx_obj *rxo;
3578 	int i;
3579 
3580 	for_all_rx_queues(adapter, rxo, i) {
3581 		q = &rxo->q;
3582 		if (q->created) {
3583 			/* If RXQs are destroyed while in an "out of buffer"
3584 			 * state, there is a possibility of an HW stall on
3585 			 * Lancer. So, post 64 buffers to each queue to relieve
3586 			 * the "out of buffer" condition.
3587 			 * Make sure there's space in the RXQ before posting.
3588 			 */
3589 			if (lancer_chip(adapter)) {
3590 				be_rx_cq_clean(rxo);
3591 				if (atomic_read(&q->used) == 0)
3592 					be_post_rx_frags(rxo, GFP_KERNEL,
3593 							 MAX_RX_POST);
3594 			}
3595 
3596 			be_cmd_rxq_destroy(adapter, q);
3597 			be_rx_cq_clean(rxo);
3598 			be_rxq_clean(rxo);
3599 		}
3600 		be_queue_free(adapter, q);
3601 	}
3602 
3603 	if (rss->rss_flags) {
3604 		rss->rss_flags = RSS_ENABLE_NONE;
3605 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3606 				  128, rss->rss_hkey);
3607 	}
3608 }
3609 
3610 static void be_disable_if_filters(struct be_adapter *adapter)
3611 {
3612 	be_dev_mac_del(adapter, adapter->pmac_id[0]);
3613 	be_clear_uc_list(adapter);
3614 	be_clear_mc_list(adapter);
3615 
3616 	/* The IFACE flags are enabled in the open path and cleared
3617 	 * in the close path. When a VF gets detached from the host and
3618 	 * assigned to a VM the following happens:
3619 	 *	- VF's IFACE flags get cleared in the detach path
3620 	 *	- IFACE create is issued by the VF in the attach path
3621 	 * Due to a bug in the BE3/Skyhawk-R FW
3622 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3623 	 * specified along with the IFACE create cmd issued by a VF are not
3624 	 * honoured by FW.  As a consequence, if a *new* driver
3625 	 * (that enables/disables IFACE flags in open/close)
3626 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3627 	 * the IFACE gets created *without* the needed flags.
3628 	 * To avoid this, disable RX-filter flags only for Lancer.
3629 	 */
3630 	if (lancer_chip(adapter)) {
3631 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3632 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3633 	}
3634 }
3635 
3636 static int be_close(struct net_device *netdev)
3637 {
3638 	struct be_adapter *adapter = netdev_priv(netdev);
3639 	struct be_eq_obj *eqo;
3640 	int i;
3641 
3642 	/* This protection is needed as be_close() may be called even when the
3643 	 * adapter is in cleared state (after eeh perm failure)
3644 	 */
3645 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3646 		return 0;
3647 
3648 	/* Before attempting cleanup ensure all the pending cmds in the
3649 	 * config_wq have finished execution
3650 	 */
3651 	flush_workqueue(be_wq);
3652 
3653 	be_disable_if_filters(adapter);
3654 
3655 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3656 		for_all_evt_queues(adapter, eqo, i) {
3657 			napi_disable(&eqo->napi);
3658 			be_disable_busy_poll(eqo);
3659 		}
3660 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3661 	}
3662 
3663 	be_async_mcc_disable(adapter);
3664 
3665 	/* Wait for all pending tx completions to arrive so that
3666 	 * all tx skbs are freed.
3667 	 */
3668 	netif_tx_disable(netdev);
3669 	be_tx_compl_clean(adapter);
3670 
3671 	be_rx_qs_destroy(adapter);
3672 
3673 	for_all_evt_queues(adapter, eqo, i) {
3674 		if (msix_enabled(adapter))
3675 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3676 		else
3677 			synchronize_irq(netdev->irq);
3678 		be_eq_clean(eqo);
3679 	}
3680 
3681 	be_irq_unregister(adapter);
3682 
3683 	return 0;
3684 }
3685 
3686 static int be_rx_qs_create(struct be_adapter *adapter)
3687 {
3688 	struct rss_info *rss = &adapter->rss_info;
3689 	u8 rss_key[RSS_HASH_KEY_LEN];
3690 	struct be_rx_obj *rxo;
3691 	int rc, i, j;
3692 
3693 	for_all_rx_queues(adapter, rxo, i) {
3694 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3695 				    sizeof(struct be_eth_rx_d));
3696 		if (rc)
3697 			return rc;
3698 	}
3699 
3700 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3701 		rxo = default_rxo(adapter);
3702 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3703 				       rx_frag_size, adapter->if_handle,
3704 				       false, &rxo->rss_id);
3705 		if (rc)
3706 			return rc;
3707 	}
3708 
3709 	for_all_rss_queues(adapter, rxo, i) {
3710 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3711 				       rx_frag_size, adapter->if_handle,
3712 				       true, &rxo->rss_id);
3713 		if (rc)
3714 			return rc;
3715 	}
3716 
3717 	if (be_multi_rxq(adapter)) {
3718 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3719 			for_all_rss_queues(adapter, rxo, i) {
3720 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3721 					break;
3722 				rss->rsstable[j + i] = rxo->rss_id;
3723 				rss->rss_queue[j + i] = i;
3724 			}
3725 		}
3726 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3727 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3728 
3729 		if (!BEx_chip(adapter))
3730 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3731 				RSS_ENABLE_UDP_IPV6;
3732 
3733 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3734 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3735 				       RSS_INDIR_TABLE_LEN, rss_key);
3736 		if (rc) {
3737 			rss->rss_flags = RSS_ENABLE_NONE;
3738 			return rc;
3739 		}
3740 
3741 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3742 	} else {
3743 		/* Disable RSS, if only default RX Q is created */
3744 		rss->rss_flags = RSS_ENABLE_NONE;
3745 	}
3746 
3747 
3748 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3749 	 * which is a queue empty condition
3750 	 */
3751 	for_all_rx_queues(adapter, rxo, i)
3752 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3753 
3754 	return 0;
3755 }
3756 
3757 static int be_enable_if_filters(struct be_adapter *adapter)
3758 {
3759 	int status;
3760 
3761 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3762 	if (status)
3763 		return status;
3764 
3765 	/* For BE3 VFs, the PF programs the initial MAC address */
3766 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3767 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3768 		if (status)
3769 			return status;
3770 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3771 	}
3772 
3773 	if (adapter->vlans_added)
3774 		be_vid_config(adapter);
3775 
3776 	__be_set_rx_mode(adapter);
3777 
3778 	return 0;
3779 }
3780 
3781 static int be_open(struct net_device *netdev)
3782 {
3783 	struct be_adapter *adapter = netdev_priv(netdev);
3784 	struct be_eq_obj *eqo;
3785 	struct be_rx_obj *rxo;
3786 	struct be_tx_obj *txo;
3787 	u8 link_status;
3788 	int status, i;
3789 
3790 	status = be_rx_qs_create(adapter);
3791 	if (status)
3792 		goto err;
3793 
3794 	status = be_enable_if_filters(adapter);
3795 	if (status)
3796 		goto err;
3797 
3798 	status = be_irq_register(adapter);
3799 	if (status)
3800 		goto err;
3801 
3802 	for_all_rx_queues(adapter, rxo, i)
3803 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3804 
3805 	for_all_tx_queues(adapter, txo, i)
3806 		be_cq_notify(adapter, txo->cq.id, true, 0);
3807 
3808 	be_async_mcc_enable(adapter);
3809 
3810 	for_all_evt_queues(adapter, eqo, i) {
3811 		napi_enable(&eqo->napi);
3812 		be_enable_busy_poll(eqo);
3813 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3814 	}
3815 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3816 
3817 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3818 	if (!status)
3819 		be_link_status_update(adapter, link_status);
3820 
3821 	netif_tx_start_all_queues(netdev);
3822 	if (skyhawk_chip(adapter))
3823 		udp_tunnel_get_rx_info(netdev);
3824 
3825 	return 0;
3826 err:
3827 	be_close(adapter->netdev);
3828 	return -EIO;
3829 }
3830 
3831 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3832 {
3833 	u32 addr;
3834 
3835 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3836 
3837 	mac[5] = (u8)(addr & 0xFF);
3838 	mac[4] = (u8)((addr >> 8) & 0xFF);
3839 	mac[3] = (u8)((addr >> 16) & 0xFF);
3840 	/* Use the OUI from the current MAC address */
3841 	memcpy(mac, adapter->netdev->dev_addr, 3);
3842 }
3843 
3844 /*
3845  * Generate a seed MAC address from the PF MAC Address using jhash.
3846  * MAC Address for VFs are assigned incrementally starting from the seed.
3847  * These addresses are programmed in the ASIC by the PF and the VF driver
3848  * queries for the MAC address during its probe.
3849  */
3850 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3851 {
3852 	u32 vf;
3853 	int status = 0;
3854 	u8 mac[ETH_ALEN];
3855 	struct be_vf_cfg *vf_cfg;
3856 
3857 	be_vf_eth_addr_generate(adapter, mac);
3858 
3859 	for_all_vfs(adapter, vf_cfg, vf) {
3860 		if (BEx_chip(adapter))
3861 			status = be_cmd_pmac_add(adapter, mac,
3862 						 vf_cfg->if_handle,
3863 						 &vf_cfg->pmac_id, vf + 1);
3864 		else
3865 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3866 						vf + 1);
3867 
3868 		if (status)
3869 			dev_err(&adapter->pdev->dev,
3870 				"Mac address assignment failed for VF %d\n",
3871 				vf);
3872 		else
3873 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3874 
3875 		mac[5] += 1;
3876 	}
3877 	return status;
3878 }
3879 
3880 static int be_vfs_mac_query(struct be_adapter *adapter)
3881 {
3882 	int status, vf;
3883 	u8 mac[ETH_ALEN];
3884 	struct be_vf_cfg *vf_cfg;
3885 
3886 	for_all_vfs(adapter, vf_cfg, vf) {
3887 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3888 					       mac, vf_cfg->if_handle,
3889 					       false, vf+1);
3890 		if (status)
3891 			return status;
3892 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3893 	}
3894 	return 0;
3895 }
3896 
3897 static void be_vf_clear(struct be_adapter *adapter)
3898 {
3899 	struct be_vf_cfg *vf_cfg;
3900 	u32 vf;
3901 
3902 	if (pci_vfs_assigned(adapter->pdev)) {
3903 		dev_warn(&adapter->pdev->dev,
3904 			 "VFs are assigned to VMs: not disabling VFs\n");
3905 		goto done;
3906 	}
3907 
3908 	pci_disable_sriov(adapter->pdev);
3909 
3910 	for_all_vfs(adapter, vf_cfg, vf) {
3911 		if (BEx_chip(adapter))
3912 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3913 					vf_cfg->pmac_id, vf + 1);
3914 		else
3915 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3916 				       vf + 1);
3917 
3918 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3919 	}
3920 
3921 	if (BE3_chip(adapter))
3922 		be_cmd_set_hsw_config(adapter, 0, 0,
3923 				      adapter->if_handle,
3924 				      PORT_FWD_TYPE_PASSTHRU, 0);
3925 done:
3926 	kfree(adapter->vf_cfg);
3927 	adapter->num_vfs = 0;
3928 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3929 }
3930 
3931 static void be_clear_queues(struct be_adapter *adapter)
3932 {
3933 	be_mcc_queues_destroy(adapter);
3934 	be_rx_cqs_destroy(adapter);
3935 	be_tx_queues_destroy(adapter);
3936 	be_evt_queues_destroy(adapter);
3937 }
3938 
3939 static void be_cancel_worker(struct be_adapter *adapter)
3940 {
3941 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3942 		cancel_delayed_work_sync(&adapter->work);
3943 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3944 	}
3945 }
3946 
3947 static void be_cancel_err_detection(struct be_adapter *adapter)
3948 {
3949 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3950 
3951 	if (!be_err_recovery_workq)
3952 		return;
3953 
3954 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3955 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3956 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3957 	}
3958 }
3959 
3960 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3961 {
3962 	struct net_device *netdev = adapter->netdev;
3963 
3964 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3965 		be_cmd_manage_iface(adapter, adapter->if_handle,
3966 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3967 
3968 	if (adapter->vxlan_port)
3969 		be_cmd_set_vxlan_port(adapter, 0);
3970 
3971 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3972 	adapter->vxlan_port = 0;
3973 
3974 	netdev->hw_enc_features = 0;
3975 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3976 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3977 }
3978 
3979 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3980 				struct be_resources *vft_res)
3981 {
3982 	struct be_resources res = adapter->pool_res;
3983 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3984 	struct be_resources res_mod = {0};
3985 	u16 num_vf_qs = 1;
3986 
3987 	/* Distribute the queue resources among the PF and it's VFs */
3988 	if (num_vfs) {
3989 		/* Divide the rx queues evenly among the VFs and the PF, capped
3990 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3991 		 */
3992 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3993 				res.max_rss_qs / (num_vfs + 1));
3994 
3995 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3996 		 * RSS Tables per port. Provide RSS on VFs, only if number of
3997 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
3998 		 */
3999 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4000 			num_vf_qs = 1;
4001 	}
4002 
4003 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4004 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4005 	 */
4006 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4007 				  RESOURCE_MODIFIABLE, 0);
4008 
4009 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4010 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4011 	 * more than 1 RSSQ is available for a VF.
4012 	 * Otherwise, provision only 1 queue pair for VF.
4013 	 */
4014 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4015 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4016 		if (num_vf_qs > 1) {
4017 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4018 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4019 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4020 		} else {
4021 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4022 					     BE_IF_FLAGS_DEFQ_RSS);
4023 		}
4024 	} else {
4025 		num_vf_qs = 1;
4026 	}
4027 
4028 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4029 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4030 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4031 	}
4032 
4033 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4034 	vft_res->max_rx_qs = num_vf_qs;
4035 	vft_res->max_rss_qs = num_vf_qs;
4036 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4037 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4038 
4039 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4040 	 * among the PF and it's VFs, if the fields are changeable
4041 	 */
4042 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4043 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4044 
4045 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4046 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4047 
4048 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4049 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4050 
4051 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4052 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4053 }
4054 
4055 static void be_if_destroy(struct be_adapter *adapter)
4056 {
4057 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4058 
4059 	kfree(adapter->pmac_id);
4060 	adapter->pmac_id = NULL;
4061 
4062 	kfree(adapter->mc_list);
4063 	adapter->mc_list = NULL;
4064 
4065 	kfree(adapter->uc_list);
4066 	adapter->uc_list = NULL;
4067 }
4068 
4069 static int be_clear(struct be_adapter *adapter)
4070 {
4071 	struct pci_dev *pdev = adapter->pdev;
4072 	struct  be_resources vft_res = {0};
4073 
4074 	be_cancel_worker(adapter);
4075 
4076 	flush_workqueue(be_wq);
4077 
4078 	if (sriov_enabled(adapter))
4079 		be_vf_clear(adapter);
4080 
4081 	/* Re-configure FW to distribute resources evenly across max-supported
4082 	 * number of VFs, only when VFs are not already enabled.
4083 	 */
4084 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4085 	    !pci_vfs_assigned(pdev)) {
4086 		be_calculate_vf_res(adapter,
4087 				    pci_sriov_get_totalvfs(pdev),
4088 				    &vft_res);
4089 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4090 					pci_sriov_get_totalvfs(pdev),
4091 					&vft_res);
4092 	}
4093 
4094 	be_disable_vxlan_offloads(adapter);
4095 
4096 	be_if_destroy(adapter);
4097 
4098 	be_clear_queues(adapter);
4099 
4100 	be_msix_disable(adapter);
4101 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4102 	return 0;
4103 }
4104 
4105 static int be_vfs_if_create(struct be_adapter *adapter)
4106 {
4107 	struct be_resources res = {0};
4108 	u32 cap_flags, en_flags, vf;
4109 	struct be_vf_cfg *vf_cfg;
4110 	int status;
4111 
4112 	/* If a FW profile exists, then cap_flags are updated */
4113 	cap_flags = BE_VF_IF_EN_FLAGS;
4114 
4115 	for_all_vfs(adapter, vf_cfg, vf) {
4116 		if (!BE3_chip(adapter)) {
4117 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4118 							   ACTIVE_PROFILE_TYPE,
4119 							   RESOURCE_LIMITS,
4120 							   vf + 1);
4121 			if (!status) {
4122 				cap_flags = res.if_cap_flags;
4123 				/* Prevent VFs from enabling VLAN promiscuous
4124 				 * mode
4125 				 */
4126 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4127 			}
4128 		}
4129 
4130 		/* PF should enable IF flags during proxy if_create call */
4131 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4132 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4133 					  &vf_cfg->if_handle, vf + 1);
4134 		if (status)
4135 			return status;
4136 	}
4137 
4138 	return 0;
4139 }
4140 
4141 static int be_vf_setup_init(struct be_adapter *adapter)
4142 {
4143 	struct be_vf_cfg *vf_cfg;
4144 	int vf;
4145 
4146 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4147 				  GFP_KERNEL);
4148 	if (!adapter->vf_cfg)
4149 		return -ENOMEM;
4150 
4151 	for_all_vfs(adapter, vf_cfg, vf) {
4152 		vf_cfg->if_handle = -1;
4153 		vf_cfg->pmac_id = -1;
4154 	}
4155 	return 0;
4156 }
4157 
4158 static int be_vf_setup(struct be_adapter *adapter)
4159 {
4160 	struct device *dev = &adapter->pdev->dev;
4161 	struct be_vf_cfg *vf_cfg;
4162 	int status, old_vfs, vf;
4163 	bool spoofchk;
4164 
4165 	old_vfs = pci_num_vf(adapter->pdev);
4166 
4167 	status = be_vf_setup_init(adapter);
4168 	if (status)
4169 		goto err;
4170 
4171 	if (old_vfs) {
4172 		for_all_vfs(adapter, vf_cfg, vf) {
4173 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4174 			if (status)
4175 				goto err;
4176 		}
4177 
4178 		status = be_vfs_mac_query(adapter);
4179 		if (status)
4180 			goto err;
4181 	} else {
4182 		status = be_vfs_if_create(adapter);
4183 		if (status)
4184 			goto err;
4185 
4186 		status = be_vf_eth_addr_config(adapter);
4187 		if (status)
4188 			goto err;
4189 	}
4190 
4191 	for_all_vfs(adapter, vf_cfg, vf) {
4192 		/* Allow VFs to programs MAC/VLAN filters */
4193 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4194 						  vf + 1);
4195 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4196 			status = be_cmd_set_fn_privileges(adapter,
4197 							  vf_cfg->privileges |
4198 							  BE_PRIV_FILTMGMT,
4199 							  vf + 1);
4200 			if (!status) {
4201 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4202 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4203 					 vf);
4204 			}
4205 		}
4206 
4207 		/* Allow full available bandwidth */
4208 		if (!old_vfs)
4209 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4210 
4211 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4212 					       vf_cfg->if_handle, NULL,
4213 					       &spoofchk);
4214 		if (!status)
4215 			vf_cfg->spoofchk = spoofchk;
4216 
4217 		if (!old_vfs) {
4218 			be_cmd_enable_vf(adapter, vf + 1);
4219 			be_cmd_set_logical_link_config(adapter,
4220 						       IFLA_VF_LINK_STATE_AUTO,
4221 						       vf+1);
4222 		}
4223 	}
4224 
4225 	if (!old_vfs) {
4226 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4227 		if (status) {
4228 			dev_err(dev, "SRIOV enable failed\n");
4229 			adapter->num_vfs = 0;
4230 			goto err;
4231 		}
4232 	}
4233 
4234 	if (BE3_chip(adapter)) {
4235 		/* On BE3, enable VEB only when SRIOV is enabled */
4236 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4237 					       adapter->if_handle,
4238 					       PORT_FWD_TYPE_VEB, 0);
4239 		if (status)
4240 			goto err;
4241 	}
4242 
4243 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4244 	return 0;
4245 err:
4246 	dev_err(dev, "VF setup failed\n");
4247 	be_vf_clear(adapter);
4248 	return status;
4249 }
4250 
4251 /* Converting function_mode bits on BE3 to SH mc_type enums */
4252 
4253 static u8 be_convert_mc_type(u32 function_mode)
4254 {
4255 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4256 		return vNIC1;
4257 	else if (function_mode & QNQ_MODE)
4258 		return FLEX10;
4259 	else if (function_mode & VNIC_MODE)
4260 		return vNIC2;
4261 	else if (function_mode & UMC_ENABLED)
4262 		return UMC;
4263 	else
4264 		return MC_NONE;
4265 }
4266 
4267 /* On BE2/BE3 FW does not suggest the supported limits */
4268 static void BEx_get_resources(struct be_adapter *adapter,
4269 			      struct be_resources *res)
4270 {
4271 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4272 
4273 	if (be_physfn(adapter))
4274 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4275 	else
4276 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4277 
4278 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4279 
4280 	if (be_is_mc(adapter)) {
4281 		/* Assuming that there are 4 channels per port,
4282 		 * when multi-channel is enabled
4283 		 */
4284 		if (be_is_qnq_mode(adapter))
4285 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4286 		else
4287 			/* In a non-qnq multichannel mode, the pvid
4288 			 * takes up one vlan entry
4289 			 */
4290 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4291 	} else {
4292 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4293 	}
4294 
4295 	res->max_mcast_mac = BE_MAX_MC;
4296 
4297 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4298 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4299 	 *    *only* if it is RSS-capable.
4300 	 */
4301 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4302 	    be_virtfn(adapter) ||
4303 	    (be_is_mc(adapter) &&
4304 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4305 		res->max_tx_qs = 1;
4306 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4307 		struct be_resources super_nic_res = {0};
4308 
4309 		/* On a SuperNIC profile, the driver needs to use the
4310 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4311 		 */
4312 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4313 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4314 					  0);
4315 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4316 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4317 	} else {
4318 		res->max_tx_qs = BE3_MAX_TX_QS;
4319 	}
4320 
4321 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4322 	    !use_sriov && be_physfn(adapter))
4323 		res->max_rss_qs = (adapter->be3_native) ?
4324 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4325 	res->max_rx_qs = res->max_rss_qs + 1;
4326 
4327 	if (be_physfn(adapter))
4328 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4329 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4330 	else
4331 		res->max_evt_qs = 1;
4332 
4333 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4334 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4335 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4336 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4337 }
4338 
4339 static void be_setup_init(struct be_adapter *adapter)
4340 {
4341 	adapter->vlan_prio_bmap = 0xff;
4342 	adapter->phy.link_speed = -1;
4343 	adapter->if_handle = -1;
4344 	adapter->be3_native = false;
4345 	adapter->if_flags = 0;
4346 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4347 	if (be_physfn(adapter))
4348 		adapter->cmd_privileges = MAX_PRIVILEGES;
4349 	else
4350 		adapter->cmd_privileges = MIN_PRIVILEGES;
4351 }
4352 
4353 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4354  * However, this HW limitation is not exposed to the host via any SLI cmd.
4355  * As a result, in the case of SRIOV and in particular multi-partition configs
4356  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4357  * for distribution between the VFs. This self-imposed limit will determine the
4358  * no: of VFs for which RSS can be enabled.
4359  */
4360 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4361 {
4362 	struct be_port_resources port_res = {0};
4363 	u8 rss_tables_on_port;
4364 	u16 max_vfs = be_max_vfs(adapter);
4365 
4366 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4367 				  RESOURCE_LIMITS, 0);
4368 
4369 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4370 
4371 	/* Each PF Pool's RSS Tables limit =
4372 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4373 	 */
4374 	adapter->pool_res.max_rss_tables =
4375 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4376 }
4377 
4378 static int be_get_sriov_config(struct be_adapter *adapter)
4379 {
4380 	struct be_resources res = {0};
4381 	int max_vfs, old_vfs;
4382 
4383 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4384 				  RESOURCE_LIMITS, 0);
4385 
4386 	/* Some old versions of BE3 FW don't report max_vfs value */
4387 	if (BE3_chip(adapter) && !res.max_vfs) {
4388 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4389 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4390 	}
4391 
4392 	adapter->pool_res = res;
4393 
4394 	/* If during previous unload of the driver, the VFs were not disabled,
4395 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4396 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4397 	 */
4398 	old_vfs = pci_num_vf(adapter->pdev);
4399 	if (old_vfs) {
4400 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4401 			 old_vfs);
4402 
4403 		adapter->pool_res.max_vfs =
4404 			pci_sriov_get_totalvfs(adapter->pdev);
4405 		adapter->num_vfs = old_vfs;
4406 	}
4407 
4408 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4409 		be_calculate_pf_pool_rss_tables(adapter);
4410 		dev_info(&adapter->pdev->dev,
4411 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4412 			 be_max_pf_pool_rss_tables(adapter));
4413 	}
4414 	return 0;
4415 }
4416 
4417 static void be_alloc_sriov_res(struct be_adapter *adapter)
4418 {
4419 	int old_vfs = pci_num_vf(adapter->pdev);
4420 	struct  be_resources vft_res = {0};
4421 	int status;
4422 
4423 	be_get_sriov_config(adapter);
4424 
4425 	if (!old_vfs)
4426 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4427 
4428 	/* When the HW is in SRIOV capable configuration, the PF-pool
4429 	 * resources are given to PF during driver load, if there are no
4430 	 * old VFs. This facility is not available in BE3 FW.
4431 	 * Also, this is done by FW in Lancer chip.
4432 	 */
4433 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4434 		be_calculate_vf_res(adapter, 0, &vft_res);
4435 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4436 						 &vft_res);
4437 		if (status)
4438 			dev_err(&adapter->pdev->dev,
4439 				"Failed to optimize SRIOV resources\n");
4440 	}
4441 }
4442 
4443 static int be_get_resources(struct be_adapter *adapter)
4444 {
4445 	struct device *dev = &adapter->pdev->dev;
4446 	struct be_resources res = {0};
4447 	int status;
4448 
4449 	/* For Lancer, SH etc read per-function resource limits from FW.
4450 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4451 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4452 	 */
4453 	if (BEx_chip(adapter)) {
4454 		BEx_get_resources(adapter, &res);
4455 	} else {
4456 		status = be_cmd_get_func_config(adapter, &res);
4457 		if (status)
4458 			return status;
4459 
4460 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4461 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4462 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4463 			res.max_rss_qs -= 1;
4464 	}
4465 
4466 	/* If RoCE is supported stash away half the EQs for RoCE */
4467 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4468 				res.max_evt_qs / 2 : res.max_evt_qs;
4469 	adapter->res = res;
4470 
4471 	/* If FW supports RSS default queue, then skip creating non-RSS
4472 	 * queue for non-IP traffic.
4473 	 */
4474 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4475 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4476 
4477 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4478 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4479 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4480 		 be_max_vfs(adapter));
4481 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4482 		 be_max_uc(adapter), be_max_mc(adapter),
4483 		 be_max_vlans(adapter));
4484 
4485 	/* Ensure RX and TX queues are created in pairs at init time */
4486 	adapter->cfg_num_rx_irqs =
4487 				min_t(u16, netif_get_num_default_rss_queues(),
4488 				      be_max_qp_irqs(adapter));
4489 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4490 	return 0;
4491 }
4492 
4493 static int be_get_config(struct be_adapter *adapter)
4494 {
4495 	int status, level;
4496 	u16 profile_id;
4497 
4498 	status = be_cmd_get_cntl_attributes(adapter);
4499 	if (status)
4500 		return status;
4501 
4502 	status = be_cmd_query_fw_cfg(adapter);
4503 	if (status)
4504 		return status;
4505 
4506 	if (!lancer_chip(adapter) && be_physfn(adapter))
4507 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4508 
4509 	if (BEx_chip(adapter)) {
4510 		level = be_cmd_get_fw_log_level(adapter);
4511 		adapter->msg_enable =
4512 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4513 	}
4514 
4515 	be_cmd_get_acpi_wol_cap(adapter);
4516 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4517 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4518 
4519 	be_cmd_query_port_name(adapter);
4520 
4521 	if (be_physfn(adapter)) {
4522 		status = be_cmd_get_active_profile(adapter, &profile_id);
4523 		if (!status)
4524 			dev_info(&adapter->pdev->dev,
4525 				 "Using profile 0x%x\n", profile_id);
4526 	}
4527 
4528 	return 0;
4529 }
4530 
4531 static int be_mac_setup(struct be_adapter *adapter)
4532 {
4533 	u8 mac[ETH_ALEN];
4534 	int status;
4535 
4536 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4537 		status = be_cmd_get_perm_mac(adapter, mac);
4538 		if (status)
4539 			return status;
4540 
4541 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4542 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4543 	}
4544 
4545 	return 0;
4546 }
4547 
4548 static void be_schedule_worker(struct be_adapter *adapter)
4549 {
4550 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4551 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4552 }
4553 
4554 static void be_destroy_err_recovery_workq(void)
4555 {
4556 	if (!be_err_recovery_workq)
4557 		return;
4558 
4559 	flush_workqueue(be_err_recovery_workq);
4560 	destroy_workqueue(be_err_recovery_workq);
4561 	be_err_recovery_workq = NULL;
4562 }
4563 
4564 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4565 {
4566 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4567 
4568 	if (!be_err_recovery_workq)
4569 		return;
4570 
4571 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4572 			   msecs_to_jiffies(delay));
4573 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4574 }
4575 
4576 static int be_setup_queues(struct be_adapter *adapter)
4577 {
4578 	struct net_device *netdev = adapter->netdev;
4579 	int status;
4580 
4581 	status = be_evt_queues_create(adapter);
4582 	if (status)
4583 		goto err;
4584 
4585 	status = be_tx_qs_create(adapter);
4586 	if (status)
4587 		goto err;
4588 
4589 	status = be_rx_cqs_create(adapter);
4590 	if (status)
4591 		goto err;
4592 
4593 	status = be_mcc_queues_create(adapter);
4594 	if (status)
4595 		goto err;
4596 
4597 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4598 	if (status)
4599 		goto err;
4600 
4601 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4602 	if (status)
4603 		goto err;
4604 
4605 	return 0;
4606 err:
4607 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4608 	return status;
4609 }
4610 
4611 static int be_if_create(struct be_adapter *adapter)
4612 {
4613 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4614 	u32 cap_flags = be_if_cap_flags(adapter);
4615 	int status;
4616 
4617 	/* alloc required memory for other filtering fields */
4618 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4619 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4620 	if (!adapter->pmac_id)
4621 		return -ENOMEM;
4622 
4623 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4624 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4625 	if (!adapter->mc_list)
4626 		return -ENOMEM;
4627 
4628 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4629 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4630 	if (!adapter->uc_list)
4631 		return -ENOMEM;
4632 
4633 	if (adapter->cfg_num_rx_irqs == 1)
4634 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4635 
4636 	en_flags &= cap_flags;
4637 	/* will enable all the needed filter flags in be_open() */
4638 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4639 				  &adapter->if_handle, 0);
4640 
4641 	if (status)
4642 		return status;
4643 
4644 	return 0;
4645 }
4646 
4647 int be_update_queues(struct be_adapter *adapter)
4648 {
4649 	struct net_device *netdev = adapter->netdev;
4650 	int status;
4651 
4652 	if (netif_running(netdev))
4653 		be_close(netdev);
4654 
4655 	be_cancel_worker(adapter);
4656 
4657 	/* If any vectors have been shared with RoCE we cannot re-program
4658 	 * the MSIx table.
4659 	 */
4660 	if (!adapter->num_msix_roce_vec)
4661 		be_msix_disable(adapter);
4662 
4663 	be_clear_queues(adapter);
4664 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4665 	if (status)
4666 		return status;
4667 
4668 	if (!msix_enabled(adapter)) {
4669 		status = be_msix_enable(adapter);
4670 		if (status)
4671 			return status;
4672 	}
4673 
4674 	status = be_if_create(adapter);
4675 	if (status)
4676 		return status;
4677 
4678 	status = be_setup_queues(adapter);
4679 	if (status)
4680 		return status;
4681 
4682 	be_schedule_worker(adapter);
4683 
4684 	if (netif_running(netdev))
4685 		status = be_open(netdev);
4686 
4687 	return status;
4688 }
4689 
4690 static inline int fw_major_num(const char *fw_ver)
4691 {
4692 	int fw_major = 0, i;
4693 
4694 	i = sscanf(fw_ver, "%d.", &fw_major);
4695 	if (i != 1)
4696 		return 0;
4697 
4698 	return fw_major;
4699 }
4700 
4701 /* If it is error recovery, FLR the PF
4702  * Else if any VFs are already enabled don't FLR the PF
4703  */
4704 static bool be_reset_required(struct be_adapter *adapter)
4705 {
4706 	if (be_error_recovering(adapter))
4707 		return true;
4708 	else
4709 		return pci_num_vf(adapter->pdev) == 0;
4710 }
4711 
4712 /* Wait for the FW to be ready and perform the required initialization */
4713 static int be_func_init(struct be_adapter *adapter)
4714 {
4715 	int status;
4716 
4717 	status = be_fw_wait_ready(adapter);
4718 	if (status)
4719 		return status;
4720 
4721 	/* FW is now ready; clear errors to allow cmds/doorbell */
4722 	be_clear_error(adapter, BE_CLEAR_ALL);
4723 
4724 	if (be_reset_required(adapter)) {
4725 		status = be_cmd_reset_function(adapter);
4726 		if (status)
4727 			return status;
4728 
4729 		/* Wait for interrupts to quiesce after an FLR */
4730 		msleep(100);
4731 	}
4732 
4733 	/* Tell FW we're ready to fire cmds */
4734 	status = be_cmd_fw_init(adapter);
4735 	if (status)
4736 		return status;
4737 
4738 	/* Allow interrupts for other ULPs running on NIC function */
4739 	be_intr_set(adapter, true);
4740 
4741 	return 0;
4742 }
4743 
4744 static int be_setup(struct be_adapter *adapter)
4745 {
4746 	struct device *dev = &adapter->pdev->dev;
4747 	int status;
4748 
4749 	status = be_func_init(adapter);
4750 	if (status)
4751 		return status;
4752 
4753 	be_setup_init(adapter);
4754 
4755 	if (!lancer_chip(adapter))
4756 		be_cmd_req_native_mode(adapter);
4757 
4758 	/* invoke this cmd first to get pf_num and vf_num which are needed
4759 	 * for issuing profile related cmds
4760 	 */
4761 	if (!BEx_chip(adapter)) {
4762 		status = be_cmd_get_func_config(adapter, NULL);
4763 		if (status)
4764 			return status;
4765 	}
4766 
4767 	status = be_get_config(adapter);
4768 	if (status)
4769 		goto err;
4770 
4771 	if (!BE2_chip(adapter) && be_physfn(adapter))
4772 		be_alloc_sriov_res(adapter);
4773 
4774 	status = be_get_resources(adapter);
4775 	if (status)
4776 		goto err;
4777 
4778 	status = be_msix_enable(adapter);
4779 	if (status)
4780 		goto err;
4781 
4782 	/* will enable all the needed filter flags in be_open() */
4783 	status = be_if_create(adapter);
4784 	if (status)
4785 		goto err;
4786 
4787 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4788 	rtnl_lock();
4789 	status = be_setup_queues(adapter);
4790 	rtnl_unlock();
4791 	if (status)
4792 		goto err;
4793 
4794 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4795 
4796 	status = be_mac_setup(adapter);
4797 	if (status)
4798 		goto err;
4799 
4800 	be_cmd_get_fw_ver(adapter);
4801 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4802 
4803 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4804 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4805 			adapter->fw_ver);
4806 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4807 	}
4808 
4809 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4810 					 adapter->rx_fc);
4811 	if (status)
4812 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4813 					&adapter->rx_fc);
4814 
4815 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4816 		 adapter->tx_fc, adapter->rx_fc);
4817 
4818 	if (be_physfn(adapter))
4819 		be_cmd_set_logical_link_config(adapter,
4820 					       IFLA_VF_LINK_STATE_AUTO, 0);
4821 
4822 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4823 	 * confusing a linux bridge or OVS that it might be connected to.
4824 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4825 	 * when SRIOV is not enabled.
4826 	 */
4827 	if (BE3_chip(adapter))
4828 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4829 				      PORT_FWD_TYPE_PASSTHRU, 0);
4830 
4831 	if (adapter->num_vfs)
4832 		be_vf_setup(adapter);
4833 
4834 	status = be_cmd_get_phy_info(adapter);
4835 	if (!status && be_pause_supported(adapter))
4836 		adapter->phy.fc_autoneg = 1;
4837 
4838 	if (be_physfn(adapter) && !lancer_chip(adapter))
4839 		be_cmd_set_features(adapter);
4840 
4841 	be_schedule_worker(adapter);
4842 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4843 	return 0;
4844 err:
4845 	be_clear(adapter);
4846 	return status;
4847 }
4848 
4849 #ifdef CONFIG_NET_POLL_CONTROLLER
4850 static void be_netpoll(struct net_device *netdev)
4851 {
4852 	struct be_adapter *adapter = netdev_priv(netdev);
4853 	struct be_eq_obj *eqo;
4854 	int i;
4855 
4856 	for_all_evt_queues(adapter, eqo, i) {
4857 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4858 		napi_schedule(&eqo->napi);
4859 	}
4860 }
4861 #endif
4862 
4863 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4864 {
4865 	const struct firmware *fw;
4866 	int status;
4867 
4868 	if (!netif_running(adapter->netdev)) {
4869 		dev_err(&adapter->pdev->dev,
4870 			"Firmware load not allowed (interface is down)\n");
4871 		return -ENETDOWN;
4872 	}
4873 
4874 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4875 	if (status)
4876 		goto fw_exit;
4877 
4878 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4879 
4880 	if (lancer_chip(adapter))
4881 		status = lancer_fw_download(adapter, fw);
4882 	else
4883 		status = be_fw_download(adapter, fw);
4884 
4885 	if (!status)
4886 		be_cmd_get_fw_ver(adapter);
4887 
4888 fw_exit:
4889 	release_firmware(fw);
4890 	return status;
4891 }
4892 
4893 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4894 				 u16 flags)
4895 {
4896 	struct be_adapter *adapter = netdev_priv(dev);
4897 	struct nlattr *attr, *br_spec;
4898 	int rem;
4899 	int status = 0;
4900 	u16 mode = 0;
4901 
4902 	if (!sriov_enabled(adapter))
4903 		return -EOPNOTSUPP;
4904 
4905 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4906 	if (!br_spec)
4907 		return -EINVAL;
4908 
4909 	nla_for_each_nested(attr, br_spec, rem) {
4910 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4911 			continue;
4912 
4913 		if (nla_len(attr) < sizeof(mode))
4914 			return -EINVAL;
4915 
4916 		mode = nla_get_u16(attr);
4917 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4918 			return -EOPNOTSUPP;
4919 
4920 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4921 			return -EINVAL;
4922 
4923 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4924 					       adapter->if_handle,
4925 					       mode == BRIDGE_MODE_VEPA ?
4926 					       PORT_FWD_TYPE_VEPA :
4927 					       PORT_FWD_TYPE_VEB, 0);
4928 		if (status)
4929 			goto err;
4930 
4931 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4932 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4933 
4934 		return status;
4935 	}
4936 err:
4937 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4938 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4939 
4940 	return status;
4941 }
4942 
4943 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4944 				 struct net_device *dev, u32 filter_mask,
4945 				 int nlflags)
4946 {
4947 	struct be_adapter *adapter = netdev_priv(dev);
4948 	int status = 0;
4949 	u8 hsw_mode;
4950 
4951 	/* BE and Lancer chips support VEB mode only */
4952 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4953 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4954 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4955 			return 0;
4956 		hsw_mode = PORT_FWD_TYPE_VEB;
4957 	} else {
4958 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4959 					       adapter->if_handle, &hsw_mode,
4960 					       NULL);
4961 		if (status)
4962 			return 0;
4963 
4964 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4965 			return 0;
4966 	}
4967 
4968 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4969 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4970 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4971 				       0, 0, nlflags, filter_mask, NULL);
4972 }
4973 
4974 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4975 					 void (*func)(struct work_struct *))
4976 {
4977 	struct be_cmd_work *work;
4978 
4979 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4980 	if (!work) {
4981 		dev_err(&adapter->pdev->dev,
4982 			"be_work memory allocation failed\n");
4983 		return NULL;
4984 	}
4985 
4986 	INIT_WORK(&work->work, func);
4987 	work->adapter = adapter;
4988 	return work;
4989 }
4990 
4991 /* VxLAN offload Notes:
4992  *
4993  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4994  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4995  * is expected to work across all types of IP tunnels once exported. Skyhawk
4996  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4997  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4998  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4999  * those other tunnels are unexported on the fly through ndo_features_check().
5000  *
5001  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5002  * adds more than one port, disable offloads and don't re-enable them again
5003  * until after all the tunnels are removed.
5004  */
5005 static void be_work_add_vxlan_port(struct work_struct *work)
5006 {
5007 	struct be_cmd_work *cmd_work =
5008 				container_of(work, struct be_cmd_work, work);
5009 	struct be_adapter *adapter = cmd_work->adapter;
5010 	struct net_device *netdev = adapter->netdev;
5011 	struct device *dev = &adapter->pdev->dev;
5012 	__be16 port = cmd_work->info.vxlan_port;
5013 	int status;
5014 
5015 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5016 		adapter->vxlan_port_aliases++;
5017 		goto done;
5018 	}
5019 
5020 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5021 		dev_info(dev,
5022 			 "Only one UDP port supported for VxLAN offloads\n");
5023 		dev_info(dev, "Disabling VxLAN offloads\n");
5024 		adapter->vxlan_port_count++;
5025 		goto err;
5026 	}
5027 
5028 	if (adapter->vxlan_port_count++ >= 1)
5029 		goto done;
5030 
5031 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
5032 				     OP_CONVERT_NORMAL_TO_TUNNEL);
5033 	if (status) {
5034 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5035 		goto err;
5036 	}
5037 
5038 	status = be_cmd_set_vxlan_port(adapter, port);
5039 	if (status) {
5040 		dev_warn(dev, "Failed to add VxLAN port\n");
5041 		goto err;
5042 	}
5043 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5044 	adapter->vxlan_port = port;
5045 
5046 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5047 				   NETIF_F_TSO | NETIF_F_TSO6 |
5048 				   NETIF_F_GSO_UDP_TUNNEL;
5049 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5050 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5051 
5052 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5053 		 be16_to_cpu(port));
5054 	goto done;
5055 err:
5056 	be_disable_vxlan_offloads(adapter);
5057 done:
5058 	kfree(cmd_work);
5059 }
5060 
5061 static void be_work_del_vxlan_port(struct work_struct *work)
5062 {
5063 	struct be_cmd_work *cmd_work =
5064 				container_of(work, struct be_cmd_work, work);
5065 	struct be_adapter *adapter = cmd_work->adapter;
5066 	__be16 port = cmd_work->info.vxlan_port;
5067 
5068 	if (adapter->vxlan_port != port)
5069 		goto done;
5070 
5071 	if (adapter->vxlan_port_aliases) {
5072 		adapter->vxlan_port_aliases--;
5073 		goto out;
5074 	}
5075 
5076 	be_disable_vxlan_offloads(adapter);
5077 
5078 	dev_info(&adapter->pdev->dev,
5079 		 "Disabled VxLAN offloads for UDP port %d\n",
5080 		 be16_to_cpu(port));
5081 done:
5082 	adapter->vxlan_port_count--;
5083 out:
5084 	kfree(cmd_work);
5085 }
5086 
5087 static void be_cfg_vxlan_port(struct net_device *netdev,
5088 			      struct udp_tunnel_info *ti,
5089 			      void (*func)(struct work_struct *))
5090 {
5091 	struct be_adapter *adapter = netdev_priv(netdev);
5092 	struct be_cmd_work *cmd_work;
5093 
5094 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5095 		return;
5096 
5097 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5098 		return;
5099 
5100 	cmd_work = be_alloc_work(adapter, func);
5101 	if (cmd_work) {
5102 		cmd_work->info.vxlan_port = ti->port;
5103 		queue_work(be_wq, &cmd_work->work);
5104 	}
5105 }
5106 
5107 static void be_del_vxlan_port(struct net_device *netdev,
5108 			      struct udp_tunnel_info *ti)
5109 {
5110 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5111 }
5112 
5113 static void be_add_vxlan_port(struct net_device *netdev,
5114 			      struct udp_tunnel_info *ti)
5115 {
5116 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5117 }
5118 
5119 static netdev_features_t be_features_check(struct sk_buff *skb,
5120 					   struct net_device *dev,
5121 					   netdev_features_t features)
5122 {
5123 	struct be_adapter *adapter = netdev_priv(dev);
5124 	u8 l4_hdr = 0;
5125 
5126 	/* The code below restricts offload features for some tunneled packets.
5127 	 * Offload features for normal (non tunnel) packets are unchanged.
5128 	 */
5129 	if (!skb->encapsulation ||
5130 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5131 		return features;
5132 
5133 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5134 	 * should disable tunnel offload features if it's not a VxLAN packet,
5135 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5136 	 * allow other tunneled traffic like GRE work fine while VxLAN
5137 	 * offloads are configured in Skyhawk-R.
5138 	 */
5139 	switch (vlan_get_protocol(skb)) {
5140 	case htons(ETH_P_IP):
5141 		l4_hdr = ip_hdr(skb)->protocol;
5142 		break;
5143 	case htons(ETH_P_IPV6):
5144 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5145 		break;
5146 	default:
5147 		return features;
5148 	}
5149 
5150 	if (l4_hdr != IPPROTO_UDP ||
5151 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5152 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5153 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5154 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5155 	    !adapter->vxlan_port ||
5156 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5157 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5158 
5159 	return features;
5160 }
5161 
5162 static int be_get_phys_port_id(struct net_device *dev,
5163 			       struct netdev_phys_item_id *ppid)
5164 {
5165 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5166 	struct be_adapter *adapter = netdev_priv(dev);
5167 	u8 *id;
5168 
5169 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5170 		return -ENOSPC;
5171 
5172 	ppid->id[0] = adapter->hba_port_num + 1;
5173 	id = &ppid->id[1];
5174 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5175 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5176 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5177 
5178 	ppid->id_len = id_len;
5179 
5180 	return 0;
5181 }
5182 
5183 static void be_set_rx_mode(struct net_device *dev)
5184 {
5185 	struct be_adapter *adapter = netdev_priv(dev);
5186 	struct be_cmd_work *work;
5187 
5188 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5189 	if (work)
5190 		queue_work(be_wq, &work->work);
5191 }
5192 
5193 static const struct net_device_ops be_netdev_ops = {
5194 	.ndo_open		= be_open,
5195 	.ndo_stop		= be_close,
5196 	.ndo_start_xmit		= be_xmit,
5197 	.ndo_set_rx_mode	= be_set_rx_mode,
5198 	.ndo_set_mac_address	= be_mac_addr_set,
5199 	.ndo_get_stats64	= be_get_stats64,
5200 	.ndo_validate_addr	= eth_validate_addr,
5201 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5202 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5203 	.ndo_set_vf_mac		= be_set_vf_mac,
5204 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5205 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5206 	.ndo_get_vf_config	= be_get_vf_config,
5207 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5208 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5209 #ifdef CONFIG_NET_POLL_CONTROLLER
5210 	.ndo_poll_controller	= be_netpoll,
5211 #endif
5212 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5213 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5214 #ifdef CONFIG_NET_RX_BUSY_POLL
5215 	.ndo_busy_poll		= be_busy_poll,
5216 #endif
5217 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5218 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5219 	.ndo_features_check	= be_features_check,
5220 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5221 };
5222 
5223 static void be_netdev_init(struct net_device *netdev)
5224 {
5225 	struct be_adapter *adapter = netdev_priv(netdev);
5226 
5227 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5228 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5229 		NETIF_F_HW_VLAN_CTAG_TX;
5230 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5231 		netdev->hw_features |= NETIF_F_RXHASH;
5232 
5233 	netdev->features |= netdev->hw_features |
5234 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5235 
5236 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5237 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5238 
5239 	netdev->priv_flags |= IFF_UNICAST_FLT;
5240 
5241 	netdev->flags |= IFF_MULTICAST;
5242 
5243 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5244 
5245 	netdev->netdev_ops = &be_netdev_ops;
5246 
5247 	netdev->ethtool_ops = &be_ethtool_ops;
5248 
5249 	/* MTU range: 256 - 9000 */
5250 	netdev->min_mtu = BE_MIN_MTU;
5251 	netdev->max_mtu = BE_MAX_MTU;
5252 }
5253 
5254 static void be_cleanup(struct be_adapter *adapter)
5255 {
5256 	struct net_device *netdev = adapter->netdev;
5257 
5258 	rtnl_lock();
5259 	netif_device_detach(netdev);
5260 	if (netif_running(netdev))
5261 		be_close(netdev);
5262 	rtnl_unlock();
5263 
5264 	be_clear(adapter);
5265 }
5266 
5267 static int be_resume(struct be_adapter *adapter)
5268 {
5269 	struct net_device *netdev = adapter->netdev;
5270 	int status;
5271 
5272 	status = be_setup(adapter);
5273 	if (status)
5274 		return status;
5275 
5276 	rtnl_lock();
5277 	if (netif_running(netdev))
5278 		status = be_open(netdev);
5279 	rtnl_unlock();
5280 
5281 	if (status)
5282 		return status;
5283 
5284 	netif_device_attach(netdev);
5285 
5286 	return 0;
5287 }
5288 
5289 static void be_soft_reset(struct be_adapter *adapter)
5290 {
5291 	u32 val;
5292 
5293 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5294 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5295 	val |= SLIPORT_SOFTRESET_SR_MASK;
5296 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5297 }
5298 
5299 static bool be_err_is_recoverable(struct be_adapter *adapter)
5300 {
5301 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5302 	unsigned long initial_idle_time =
5303 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5304 	unsigned long recovery_interval =
5305 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5306 	u16 ue_err_code;
5307 	u32 val;
5308 
5309 	val = be_POST_stage_get(adapter);
5310 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5311 		return false;
5312 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5313 	if (ue_err_code == 0)
5314 		return false;
5315 
5316 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5317 		ue_err_code);
5318 
5319 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5320 		dev_err(&adapter->pdev->dev,
5321 			"Cannot recover within %lu sec from driver load\n",
5322 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5323 		return false;
5324 	}
5325 
5326 	if (err_rec->last_recovery_time &&
5327 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5328 		dev_err(&adapter->pdev->dev,
5329 			"Cannot recover within %lu sec from last recovery\n",
5330 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5331 		return false;
5332 	}
5333 
5334 	if (ue_err_code == err_rec->last_err_code) {
5335 		dev_err(&adapter->pdev->dev,
5336 			"Cannot recover from a consecutive TPE error\n");
5337 		return false;
5338 	}
5339 
5340 	err_rec->last_recovery_time = jiffies;
5341 	err_rec->last_err_code = ue_err_code;
5342 	return true;
5343 }
5344 
5345 static int be_tpe_recover(struct be_adapter *adapter)
5346 {
5347 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5348 	int status = -EAGAIN;
5349 	u32 val;
5350 
5351 	switch (err_rec->recovery_state) {
5352 	case ERR_RECOVERY_ST_NONE:
5353 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5354 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5355 		break;
5356 
5357 	case ERR_RECOVERY_ST_DETECT:
5358 		val = be_POST_stage_get(adapter);
5359 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5360 		    POST_STAGE_RECOVERABLE_ERR) {
5361 			dev_err(&adapter->pdev->dev,
5362 				"Unrecoverable HW error detected: 0x%x\n", val);
5363 			status = -EINVAL;
5364 			err_rec->resched_delay = 0;
5365 			break;
5366 		}
5367 
5368 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5369 
5370 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5371 		 * milliseconds before it checks for final error status in
5372 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5373 		 * If it does, then PF0 initiates a Soft Reset.
5374 		 */
5375 		if (adapter->pf_num == 0) {
5376 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5377 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5378 					ERR_RECOVERY_UE_DETECT_DURATION;
5379 			break;
5380 		}
5381 
5382 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5383 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5384 					ERR_RECOVERY_UE_DETECT_DURATION;
5385 		break;
5386 
5387 	case ERR_RECOVERY_ST_RESET:
5388 		if (!be_err_is_recoverable(adapter)) {
5389 			dev_err(&adapter->pdev->dev,
5390 				"Failed to meet recovery criteria\n");
5391 			status = -EIO;
5392 			err_rec->resched_delay = 0;
5393 			break;
5394 		}
5395 		be_soft_reset(adapter);
5396 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5397 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5398 					err_rec->ue_to_reset_time;
5399 		break;
5400 
5401 	case ERR_RECOVERY_ST_PRE_POLL:
5402 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5403 		err_rec->resched_delay = 0;
5404 		status = 0;			/* done */
5405 		break;
5406 
5407 	default:
5408 		status = -EINVAL;
5409 		err_rec->resched_delay = 0;
5410 		break;
5411 	}
5412 
5413 	return status;
5414 }
5415 
5416 static int be_err_recover(struct be_adapter *adapter)
5417 {
5418 	int status;
5419 
5420 	if (!lancer_chip(adapter)) {
5421 		if (!adapter->error_recovery.recovery_supported ||
5422 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5423 			return -EIO;
5424 		status = be_tpe_recover(adapter);
5425 		if (status)
5426 			goto err;
5427 	}
5428 
5429 	/* Wait for adapter to reach quiescent state before
5430 	 * destroying queues
5431 	 */
5432 	status = be_fw_wait_ready(adapter);
5433 	if (status)
5434 		goto err;
5435 
5436 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5437 
5438 	be_cleanup(adapter);
5439 
5440 	status = be_resume(adapter);
5441 	if (status)
5442 		goto err;
5443 
5444 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5445 
5446 err:
5447 	return status;
5448 }
5449 
5450 static void be_err_detection_task(struct work_struct *work)
5451 {
5452 	struct be_error_recovery *err_rec =
5453 			container_of(work, struct be_error_recovery,
5454 				     err_detection_work.work);
5455 	struct be_adapter *adapter =
5456 			container_of(err_rec, struct be_adapter,
5457 				     error_recovery);
5458 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5459 	struct device *dev = &adapter->pdev->dev;
5460 	int recovery_status;
5461 
5462 	be_detect_error(adapter);
5463 	if (!be_check_error(adapter, BE_ERROR_HW))
5464 		goto reschedule_task;
5465 
5466 	recovery_status = be_err_recover(adapter);
5467 	if (!recovery_status) {
5468 		err_rec->recovery_retries = 0;
5469 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5470 		dev_info(dev, "Adapter recovery successful\n");
5471 		goto reschedule_task;
5472 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5473 		/* BEx/SH recovery state machine */
5474 		if (adapter->pf_num == 0 &&
5475 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5476 			dev_err(&adapter->pdev->dev,
5477 				"Adapter recovery in progress\n");
5478 		resched_delay = err_rec->resched_delay;
5479 		goto reschedule_task;
5480 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5481 		/* For VFs, check if PF have allocated resources
5482 		 * every second.
5483 		 */
5484 		dev_err(dev, "Re-trying adapter recovery\n");
5485 		goto reschedule_task;
5486 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5487 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5488 		/* In case of another error during recovery, it takes 30 sec
5489 		 * for adapter to come out of error. Retry error recovery after
5490 		 * this time interval.
5491 		 */
5492 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5493 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5494 		goto reschedule_task;
5495 	} else {
5496 		dev_err(dev, "Adapter recovery failed\n");
5497 		dev_err(dev, "Please reboot server to recover\n");
5498 	}
5499 
5500 	return;
5501 
5502 reschedule_task:
5503 	be_schedule_err_detection(adapter, resched_delay);
5504 }
5505 
5506 static void be_log_sfp_info(struct be_adapter *adapter)
5507 {
5508 	int status;
5509 
5510 	status = be_cmd_query_sfp_info(adapter);
5511 	if (!status) {
5512 		dev_err(&adapter->pdev->dev,
5513 			"Port %c: %s Vendor: %s part no: %s",
5514 			adapter->port_name,
5515 			be_misconfig_evt_port_state[adapter->phy_state],
5516 			adapter->phy.vendor_name,
5517 			adapter->phy.vendor_pn);
5518 	}
5519 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5520 }
5521 
5522 static void be_worker(struct work_struct *work)
5523 {
5524 	struct be_adapter *adapter =
5525 		container_of(work, struct be_adapter, work.work);
5526 	struct be_rx_obj *rxo;
5527 	int i;
5528 
5529 	if (be_physfn(adapter) &&
5530 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5531 		be_cmd_get_die_temperature(adapter);
5532 
5533 	/* when interrupts are not yet enabled, just reap any pending
5534 	 * mcc completions
5535 	 */
5536 	if (!netif_running(adapter->netdev)) {
5537 		local_bh_disable();
5538 		be_process_mcc(adapter);
5539 		local_bh_enable();
5540 		goto reschedule;
5541 	}
5542 
5543 	if (!adapter->stats_cmd_sent) {
5544 		if (lancer_chip(adapter))
5545 			lancer_cmd_get_pport_stats(adapter,
5546 						   &adapter->stats_cmd);
5547 		else
5548 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5549 	}
5550 
5551 	for_all_rx_queues(adapter, rxo, i) {
5552 		/* Replenish RX-queues starved due to memory
5553 		 * allocation failures.
5554 		 */
5555 		if (rxo->rx_post_starved)
5556 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5557 	}
5558 
5559 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5560 	if (!skyhawk_chip(adapter))
5561 		be_eqd_update(adapter, false);
5562 
5563 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5564 		be_log_sfp_info(adapter);
5565 
5566 reschedule:
5567 	adapter->work_counter++;
5568 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5569 }
5570 
5571 static void be_unmap_pci_bars(struct be_adapter *adapter)
5572 {
5573 	if (adapter->csr)
5574 		pci_iounmap(adapter->pdev, adapter->csr);
5575 	if (adapter->db)
5576 		pci_iounmap(adapter->pdev, adapter->db);
5577 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5578 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5579 }
5580 
5581 static int db_bar(struct be_adapter *adapter)
5582 {
5583 	if (lancer_chip(adapter) || be_virtfn(adapter))
5584 		return 0;
5585 	else
5586 		return 4;
5587 }
5588 
5589 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5590 {
5591 	if (skyhawk_chip(adapter)) {
5592 		adapter->roce_db.size = 4096;
5593 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5594 							      db_bar(adapter));
5595 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5596 							       db_bar(adapter));
5597 	}
5598 	return 0;
5599 }
5600 
5601 static int be_map_pci_bars(struct be_adapter *adapter)
5602 {
5603 	struct pci_dev *pdev = adapter->pdev;
5604 	u8 __iomem *addr;
5605 	u32 sli_intf;
5606 
5607 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5608 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5609 				SLI_INTF_FAMILY_SHIFT;
5610 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5611 
5612 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5613 		adapter->csr = pci_iomap(pdev, 2, 0);
5614 		if (!adapter->csr)
5615 			return -ENOMEM;
5616 	}
5617 
5618 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5619 	if (!addr)
5620 		goto pci_map_err;
5621 	adapter->db = addr;
5622 
5623 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5624 		if (be_physfn(adapter)) {
5625 			/* PCICFG is the 2nd BAR in BE2 */
5626 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5627 			if (!addr)
5628 				goto pci_map_err;
5629 			adapter->pcicfg = addr;
5630 			adapter->pcicfg_mapped = true;
5631 		} else {
5632 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5633 			adapter->pcicfg_mapped = false;
5634 		}
5635 	}
5636 
5637 	be_roce_map_pci_bars(adapter);
5638 	return 0;
5639 
5640 pci_map_err:
5641 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5642 	be_unmap_pci_bars(adapter);
5643 	return -ENOMEM;
5644 }
5645 
5646 static void be_drv_cleanup(struct be_adapter *adapter)
5647 {
5648 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5649 	struct device *dev = &adapter->pdev->dev;
5650 
5651 	if (mem->va)
5652 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5653 
5654 	mem = &adapter->rx_filter;
5655 	if (mem->va)
5656 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5657 
5658 	mem = &adapter->stats_cmd;
5659 	if (mem->va)
5660 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5661 }
5662 
5663 /* Allocate and initialize various fields in be_adapter struct */
5664 static int be_drv_init(struct be_adapter *adapter)
5665 {
5666 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5667 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5668 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5669 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5670 	struct device *dev = &adapter->pdev->dev;
5671 	int status = 0;
5672 
5673 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5674 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5675 						 &mbox_mem_alloc->dma,
5676 						 GFP_KERNEL);
5677 	if (!mbox_mem_alloc->va)
5678 		return -ENOMEM;
5679 
5680 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5681 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5682 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5683 
5684 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5685 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5686 					    &rx_filter->dma, GFP_KERNEL);
5687 	if (!rx_filter->va) {
5688 		status = -ENOMEM;
5689 		goto free_mbox;
5690 	}
5691 
5692 	if (lancer_chip(adapter))
5693 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5694 	else if (BE2_chip(adapter))
5695 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5696 	else if (BE3_chip(adapter))
5697 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5698 	else
5699 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5700 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5701 					    &stats_cmd->dma, GFP_KERNEL);
5702 	if (!stats_cmd->va) {
5703 		status = -ENOMEM;
5704 		goto free_rx_filter;
5705 	}
5706 
5707 	mutex_init(&adapter->mbox_lock);
5708 	mutex_init(&adapter->mcc_lock);
5709 	mutex_init(&adapter->rx_filter_lock);
5710 	spin_lock_init(&adapter->mcc_cq_lock);
5711 	init_completion(&adapter->et_cmd_compl);
5712 
5713 	pci_save_state(adapter->pdev);
5714 
5715 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5716 
5717 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5718 	adapter->error_recovery.resched_delay = 0;
5719 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5720 			  be_err_detection_task);
5721 
5722 	adapter->rx_fc = true;
5723 	adapter->tx_fc = true;
5724 
5725 	/* Must be a power of 2 or else MODULO will BUG_ON */
5726 	adapter->be_get_temp_freq = 64;
5727 
5728 	return 0;
5729 
5730 free_rx_filter:
5731 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5732 free_mbox:
5733 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5734 			  mbox_mem_alloc->dma);
5735 	return status;
5736 }
5737 
5738 static void be_remove(struct pci_dev *pdev)
5739 {
5740 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5741 
5742 	if (!adapter)
5743 		return;
5744 
5745 	be_roce_dev_remove(adapter);
5746 	be_intr_set(adapter, false);
5747 
5748 	be_cancel_err_detection(adapter);
5749 
5750 	unregister_netdev(adapter->netdev);
5751 
5752 	be_clear(adapter);
5753 
5754 	if (!pci_vfs_assigned(adapter->pdev))
5755 		be_cmd_reset_function(adapter);
5756 
5757 	/* tell fw we're done with firing cmds */
5758 	be_cmd_fw_clean(adapter);
5759 
5760 	be_unmap_pci_bars(adapter);
5761 	be_drv_cleanup(adapter);
5762 
5763 	pci_disable_pcie_error_reporting(pdev);
5764 
5765 	pci_release_regions(pdev);
5766 	pci_disable_device(pdev);
5767 
5768 	free_netdev(adapter->netdev);
5769 }
5770 
5771 static ssize_t be_hwmon_show_temp(struct device *dev,
5772 				  struct device_attribute *dev_attr,
5773 				  char *buf)
5774 {
5775 	struct be_adapter *adapter = dev_get_drvdata(dev);
5776 
5777 	/* Unit: millidegree Celsius */
5778 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5779 		return -EIO;
5780 	else
5781 		return sprintf(buf, "%u\n",
5782 			       adapter->hwmon_info.be_on_die_temp * 1000);
5783 }
5784 
5785 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5786 			  be_hwmon_show_temp, NULL, 1);
5787 
5788 static struct attribute *be_hwmon_attrs[] = {
5789 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5790 	NULL
5791 };
5792 
5793 ATTRIBUTE_GROUPS(be_hwmon);
5794 
5795 static char *mc_name(struct be_adapter *adapter)
5796 {
5797 	char *str = "";	/* default */
5798 
5799 	switch (adapter->mc_type) {
5800 	case UMC:
5801 		str = "UMC";
5802 		break;
5803 	case FLEX10:
5804 		str = "FLEX10";
5805 		break;
5806 	case vNIC1:
5807 		str = "vNIC-1";
5808 		break;
5809 	case nPAR:
5810 		str = "nPAR";
5811 		break;
5812 	case UFP:
5813 		str = "UFP";
5814 		break;
5815 	case vNIC2:
5816 		str = "vNIC-2";
5817 		break;
5818 	default:
5819 		str = "";
5820 	}
5821 
5822 	return str;
5823 }
5824 
5825 static inline char *func_name(struct be_adapter *adapter)
5826 {
5827 	return be_physfn(adapter) ? "PF" : "VF";
5828 }
5829 
5830 static inline char *nic_name(struct pci_dev *pdev)
5831 {
5832 	switch (pdev->device) {
5833 	case OC_DEVICE_ID1:
5834 		return OC_NAME;
5835 	case OC_DEVICE_ID2:
5836 		return OC_NAME_BE;
5837 	case OC_DEVICE_ID3:
5838 	case OC_DEVICE_ID4:
5839 		return OC_NAME_LANCER;
5840 	case BE_DEVICE_ID2:
5841 		return BE3_NAME;
5842 	case OC_DEVICE_ID5:
5843 	case OC_DEVICE_ID6:
5844 		return OC_NAME_SH;
5845 	default:
5846 		return BE_NAME;
5847 	}
5848 }
5849 
5850 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5851 {
5852 	struct be_adapter *adapter;
5853 	struct net_device *netdev;
5854 	int status = 0;
5855 
5856 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5857 
5858 	status = pci_enable_device(pdev);
5859 	if (status)
5860 		goto do_none;
5861 
5862 	status = pci_request_regions(pdev, DRV_NAME);
5863 	if (status)
5864 		goto disable_dev;
5865 	pci_set_master(pdev);
5866 
5867 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5868 	if (!netdev) {
5869 		status = -ENOMEM;
5870 		goto rel_reg;
5871 	}
5872 	adapter = netdev_priv(netdev);
5873 	adapter->pdev = pdev;
5874 	pci_set_drvdata(pdev, adapter);
5875 	adapter->netdev = netdev;
5876 	SET_NETDEV_DEV(netdev, &pdev->dev);
5877 
5878 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5879 	if (!status) {
5880 		netdev->features |= NETIF_F_HIGHDMA;
5881 	} else {
5882 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5883 		if (status) {
5884 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5885 			goto free_netdev;
5886 		}
5887 	}
5888 
5889 	status = pci_enable_pcie_error_reporting(pdev);
5890 	if (!status)
5891 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5892 
5893 	status = be_map_pci_bars(adapter);
5894 	if (status)
5895 		goto free_netdev;
5896 
5897 	status = be_drv_init(adapter);
5898 	if (status)
5899 		goto unmap_bars;
5900 
5901 	status = be_setup(adapter);
5902 	if (status)
5903 		goto drv_cleanup;
5904 
5905 	be_netdev_init(netdev);
5906 	status = register_netdev(netdev);
5907 	if (status != 0)
5908 		goto unsetup;
5909 
5910 	be_roce_dev_add(adapter);
5911 
5912 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5913 	adapter->error_recovery.probe_time = jiffies;
5914 
5915 	/* On Die temperature not supported for VF. */
5916 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5917 		adapter->hwmon_info.hwmon_dev =
5918 			devm_hwmon_device_register_with_groups(&pdev->dev,
5919 							       DRV_NAME,
5920 							       adapter,
5921 							       be_hwmon_groups);
5922 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5923 	}
5924 
5925 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5926 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5927 
5928 	return 0;
5929 
5930 unsetup:
5931 	be_clear(adapter);
5932 drv_cleanup:
5933 	be_drv_cleanup(adapter);
5934 unmap_bars:
5935 	be_unmap_pci_bars(adapter);
5936 free_netdev:
5937 	free_netdev(netdev);
5938 rel_reg:
5939 	pci_release_regions(pdev);
5940 disable_dev:
5941 	pci_disable_device(pdev);
5942 do_none:
5943 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5944 	return status;
5945 }
5946 
5947 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5948 {
5949 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5950 
5951 	be_intr_set(adapter, false);
5952 	be_cancel_err_detection(adapter);
5953 
5954 	be_cleanup(adapter);
5955 
5956 	pci_save_state(pdev);
5957 	pci_disable_device(pdev);
5958 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5959 	return 0;
5960 }
5961 
5962 static int be_pci_resume(struct pci_dev *pdev)
5963 {
5964 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5965 	int status = 0;
5966 
5967 	status = pci_enable_device(pdev);
5968 	if (status)
5969 		return status;
5970 
5971 	pci_restore_state(pdev);
5972 
5973 	status = be_resume(adapter);
5974 	if (status)
5975 		return status;
5976 
5977 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5978 
5979 	return 0;
5980 }
5981 
5982 /*
5983  * An FLR will stop BE from DMAing any data.
5984  */
5985 static void be_shutdown(struct pci_dev *pdev)
5986 {
5987 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5988 
5989 	if (!adapter)
5990 		return;
5991 
5992 	be_roce_dev_shutdown(adapter);
5993 	cancel_delayed_work_sync(&adapter->work);
5994 	be_cancel_err_detection(adapter);
5995 
5996 	netif_device_detach(adapter->netdev);
5997 
5998 	be_cmd_reset_function(adapter);
5999 
6000 	pci_disable_device(pdev);
6001 }
6002 
6003 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6004 					    pci_channel_state_t state)
6005 {
6006 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6007 
6008 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6009 
6010 	be_roce_dev_remove(adapter);
6011 
6012 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6013 		be_set_error(adapter, BE_ERROR_EEH);
6014 
6015 		be_cancel_err_detection(adapter);
6016 
6017 		be_cleanup(adapter);
6018 	}
6019 
6020 	if (state == pci_channel_io_perm_failure)
6021 		return PCI_ERS_RESULT_DISCONNECT;
6022 
6023 	pci_disable_device(pdev);
6024 
6025 	/* The error could cause the FW to trigger a flash debug dump.
6026 	 * Resetting the card while flash dump is in progress
6027 	 * can cause it not to recover; wait for it to finish.
6028 	 * Wait only for first function as it is needed only once per
6029 	 * adapter.
6030 	 */
6031 	if (pdev->devfn == 0)
6032 		ssleep(30);
6033 
6034 	return PCI_ERS_RESULT_NEED_RESET;
6035 }
6036 
6037 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6038 {
6039 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6040 	int status;
6041 
6042 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6043 
6044 	status = pci_enable_device(pdev);
6045 	if (status)
6046 		return PCI_ERS_RESULT_DISCONNECT;
6047 
6048 	pci_set_master(pdev);
6049 	pci_restore_state(pdev);
6050 
6051 	/* Check if card is ok and fw is ready */
6052 	dev_info(&adapter->pdev->dev,
6053 		 "Waiting for FW to be ready after EEH reset\n");
6054 	status = be_fw_wait_ready(adapter);
6055 	if (status)
6056 		return PCI_ERS_RESULT_DISCONNECT;
6057 
6058 	pci_cleanup_aer_uncorrect_error_status(pdev);
6059 	be_clear_error(adapter, BE_CLEAR_ALL);
6060 	return PCI_ERS_RESULT_RECOVERED;
6061 }
6062 
6063 static void be_eeh_resume(struct pci_dev *pdev)
6064 {
6065 	int status = 0;
6066 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6067 
6068 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6069 
6070 	pci_save_state(pdev);
6071 
6072 	status = be_resume(adapter);
6073 	if (status)
6074 		goto err;
6075 
6076 	be_roce_dev_add(adapter);
6077 
6078 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6079 	return;
6080 err:
6081 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6082 }
6083 
6084 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6085 {
6086 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6087 	struct be_resources vft_res = {0};
6088 	int status;
6089 
6090 	if (!num_vfs)
6091 		be_vf_clear(adapter);
6092 
6093 	adapter->num_vfs = num_vfs;
6094 
6095 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6096 		dev_warn(&pdev->dev,
6097 			 "Cannot disable VFs while they are assigned\n");
6098 		return -EBUSY;
6099 	}
6100 
6101 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6102 	 * are equally distributed across the max-number of VFs. The user may
6103 	 * request only a subset of the max-vfs to be enabled.
6104 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6105 	 * each VF will have access to more number of resources.
6106 	 * This facility is not available in BE3 FW.
6107 	 * Also, this is done by FW in Lancer chip.
6108 	 */
6109 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6110 		be_calculate_vf_res(adapter, adapter->num_vfs,
6111 				    &vft_res);
6112 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6113 						 adapter->num_vfs, &vft_res);
6114 		if (status)
6115 			dev_err(&pdev->dev,
6116 				"Failed to optimize SR-IOV resources\n");
6117 	}
6118 
6119 	status = be_get_resources(adapter);
6120 	if (status)
6121 		return be_cmd_status(status);
6122 
6123 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6124 	rtnl_lock();
6125 	status = be_update_queues(adapter);
6126 	rtnl_unlock();
6127 	if (status)
6128 		return be_cmd_status(status);
6129 
6130 	if (adapter->num_vfs)
6131 		status = be_vf_setup(adapter);
6132 
6133 	if (!status)
6134 		return adapter->num_vfs;
6135 
6136 	return 0;
6137 }
6138 
6139 static const struct pci_error_handlers be_eeh_handlers = {
6140 	.error_detected = be_eeh_err_detected,
6141 	.slot_reset = be_eeh_reset,
6142 	.resume = be_eeh_resume,
6143 };
6144 
6145 static struct pci_driver be_driver = {
6146 	.name = DRV_NAME,
6147 	.id_table = be_dev_ids,
6148 	.probe = be_probe,
6149 	.remove = be_remove,
6150 	.suspend = be_suspend,
6151 	.resume = be_pci_resume,
6152 	.shutdown = be_shutdown,
6153 	.sriov_configure = be_pci_sriov_configure,
6154 	.err_handler = &be_eeh_handlers
6155 };
6156 
6157 static int __init be_init_module(void)
6158 {
6159 	int status;
6160 
6161 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6162 	    rx_frag_size != 2048) {
6163 		printk(KERN_WARNING DRV_NAME
6164 			" : Module param rx_frag_size must be 2048/4096/8192."
6165 			" Using 2048\n");
6166 		rx_frag_size = 2048;
6167 	}
6168 
6169 	if (num_vfs > 0) {
6170 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6171 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6172 	}
6173 
6174 	be_wq = create_singlethread_workqueue("be_wq");
6175 	if (!be_wq) {
6176 		pr_warn(DRV_NAME "workqueue creation failed\n");
6177 		return -1;
6178 	}
6179 
6180 	be_err_recovery_workq =
6181 		create_singlethread_workqueue("be_err_recover");
6182 	if (!be_err_recovery_workq)
6183 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6184 
6185 	status = pci_register_driver(&be_driver);
6186 	if (status) {
6187 		destroy_workqueue(be_wq);
6188 		be_destroy_err_recovery_workq();
6189 	}
6190 	return status;
6191 }
6192 module_init(be_init_module);
6193 
6194 static void __exit be_exit_module(void)
6195 {
6196 	pci_unregister_driver(&be_driver);
6197 
6198 	be_destroy_err_recovery_workq();
6199 
6200 	if (be_wq)
6201 		destroy_workqueue(be_wq);
6202 }
6203 module_exit(be_exit_module);
6204