1 /*
2  * Copyright (C) 2005 - 2015 Emulex
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 static const struct pci_device_id be_dev_ids[] = {
45 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53 	{ 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58 	"CEV",
59 	"CTX",
60 	"DBUF",
61 	"ERX",
62 	"Host",
63 	"MPU",
64 	"NDMA",
65 	"PTC ",
66 	"RDMA ",
67 	"RXF ",
68 	"RXIPS ",
69 	"RXULP0 ",
70 	"RXULP1 ",
71 	"RXULP2 ",
72 	"TIM ",
73 	"TPOST ",
74 	"TPRE ",
75 	"TXIPS ",
76 	"TXULP0 ",
77 	"TXULP1 ",
78 	"UC ",
79 	"WDMA ",
80 	"TXULP2 ",
81 	"HOST1 ",
82 	"P0_OB_LINK ",
83 	"P1_OB_LINK ",
84 	"HOST_GPIO ",
85 	"MBOX ",
86 	"ERX2 ",
87 	"SPARE ",
88 	"JTAG ",
89 	"MPU_INTPEND "
90 };
91 
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94 	"LPCMEMHOST",
95 	"MGMT_MAC",
96 	"PCS0ONLINE",
97 	"MPU_IRAM",
98 	"PCS1ONLINE",
99 	"PCTL0",
100 	"PCTL1",
101 	"PMEM",
102 	"RR",
103 	"TXPB",
104 	"RXPP",
105 	"XAUI",
106 	"TXP",
107 	"ARM",
108 	"IPC",
109 	"HOST2",
110 	"HOST3",
111 	"HOST4",
112 	"HOST5",
113 	"HOST6",
114 	"HOST7",
115 	"ECRC",
116 	"Poison TLP",
117 	"NETC",
118 	"PERIPH",
119 	"LLTXULP",
120 	"D2P",
121 	"RCON",
122 	"LDMA",
123 	"LLTXP",
124 	"LLTXPB",
125 	"Unknown"
126 };
127 
128 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
129 {
130 	struct be_dma_mem *mem = &q->dma_mem;
131 
132 	if (mem->va) {
133 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
134 				  mem->dma);
135 		mem->va = NULL;
136 	}
137 }
138 
139 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
140 			  u16 len, u16 entry_size)
141 {
142 	struct be_dma_mem *mem = &q->dma_mem;
143 
144 	memset(q, 0, sizeof(*q));
145 	q->len = len;
146 	q->entry_size = entry_size;
147 	mem->size = len * entry_size;
148 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
149 				      GFP_KERNEL);
150 	if (!mem->va)
151 		return -ENOMEM;
152 	return 0;
153 }
154 
155 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
156 {
157 	u32 reg, enabled;
158 
159 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
160 			      &reg);
161 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
162 
163 	if (!enabled && enable)
164 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
165 	else if (enabled && !enable)
166 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167 	else
168 		return;
169 
170 	pci_write_config_dword(adapter->pdev,
171 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
172 }
173 
174 static void be_intr_set(struct be_adapter *adapter, bool enable)
175 {
176 	int status = 0;
177 
178 	/* On lancer interrupts can't be controlled via this register */
179 	if (lancer_chip(adapter))
180 		return;
181 
182 	if (be_check_error(adapter, BE_ERROR_EEH))
183 		return;
184 
185 	status = be_cmd_intr_set(adapter, enable);
186 	if (status)
187 		be_reg_intr_set(adapter, enable);
188 }
189 
190 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
191 {
192 	u32 val = 0;
193 
194 	if (be_check_error(adapter, BE_ERROR_HW))
195 		return;
196 
197 	val |= qid & DB_RQ_RING_ID_MASK;
198 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
199 
200 	wmb();
201 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
202 }
203 
204 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
205 			  u16 posted)
206 {
207 	u32 val = 0;
208 
209 	if (be_check_error(adapter, BE_ERROR_HW))
210 		return;
211 
212 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
213 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
214 
215 	wmb();
216 	iowrite32(val, adapter->db + txo->db_offset);
217 }
218 
219 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
220 			 bool arm, bool clear_int, u16 num_popped,
221 			 u32 eq_delay_mult_enc)
222 {
223 	u32 val = 0;
224 
225 	val |= qid & DB_EQ_RING_ID_MASK;
226 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
227 
228 	if (be_check_error(adapter, BE_ERROR_HW))
229 		return;
230 
231 	if (arm)
232 		val |= 1 << DB_EQ_REARM_SHIFT;
233 	if (clear_int)
234 		val |= 1 << DB_EQ_CLR_SHIFT;
235 	val |= 1 << DB_EQ_EVNT_SHIFT;
236 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
237 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
238 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
239 }
240 
241 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
242 {
243 	u32 val = 0;
244 
245 	val |= qid & DB_CQ_RING_ID_MASK;
246 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
247 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
248 
249 	if (be_check_error(adapter, BE_ERROR_HW))
250 		return;
251 
252 	if (arm)
253 		val |= 1 << DB_CQ_REARM_SHIFT;
254 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
255 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
256 }
257 
258 static int be_mac_addr_set(struct net_device *netdev, void *p)
259 {
260 	struct be_adapter *adapter = netdev_priv(netdev);
261 	struct device *dev = &adapter->pdev->dev;
262 	struct sockaddr *addr = p;
263 	int status;
264 	u8 mac[ETH_ALEN];
265 	u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
266 
267 	if (!is_valid_ether_addr(addr->sa_data))
268 		return -EADDRNOTAVAIL;
269 
270 	/* Proceed further only if, User provided MAC is different
271 	 * from active MAC
272 	 */
273 	if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
274 		return 0;
275 
276 	/* if device is not running, copy MAC to netdev->dev_addr */
277 	if (!netif_running(netdev))
278 		goto done;
279 
280 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
281 	 * privilege or if PF did not provision the new MAC address.
282 	 * On BE3, this cmd will always fail if the VF doesn't have the
283 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
284 	 * the MAC for the VF.
285 	 */
286 	status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
287 				 adapter->if_handle, &adapter->pmac_id[0], 0);
288 	if (!status) {
289 		curr_pmac_id = adapter->pmac_id[0];
290 
291 		/* Delete the old programmed MAC. This call may fail if the
292 		 * old MAC was already deleted by the PF driver.
293 		 */
294 		if (adapter->pmac_id[0] != old_pmac_id)
295 			be_cmd_pmac_del(adapter, adapter->if_handle,
296 					old_pmac_id, 0);
297 	}
298 
299 	/* Decide if the new MAC is successfully activated only after
300 	 * querying the FW
301 	 */
302 	status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
303 				       adapter->if_handle, true, 0);
304 	if (status)
305 		goto err;
306 
307 	/* The MAC change did not happen, either due to lack of privilege
308 	 * or PF didn't pre-provision.
309 	 */
310 	if (!ether_addr_equal(addr->sa_data, mac)) {
311 		status = -EPERM;
312 		goto err;
313 	}
314 done:
315 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
316 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
317 	return 0;
318 err:
319 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
320 	return status;
321 }
322 
323 /* BE2 supports only v0 cmd */
324 static void *hw_stats_from_cmd(struct be_adapter *adapter)
325 {
326 	if (BE2_chip(adapter)) {
327 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
328 
329 		return &cmd->hw_stats;
330 	} else if (BE3_chip(adapter)) {
331 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
332 
333 		return &cmd->hw_stats;
334 	} else {
335 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
336 
337 		return &cmd->hw_stats;
338 	}
339 }
340 
341 /* BE2 supports only v0 cmd */
342 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
343 {
344 	if (BE2_chip(adapter)) {
345 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
346 
347 		return &hw_stats->erx;
348 	} else if (BE3_chip(adapter)) {
349 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
350 
351 		return &hw_stats->erx;
352 	} else {
353 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
354 
355 		return &hw_stats->erx;
356 	}
357 }
358 
359 static void populate_be_v0_stats(struct be_adapter *adapter)
360 {
361 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
362 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
363 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
364 	struct be_port_rxf_stats_v0 *port_stats =
365 					&rxf_stats->port[adapter->port_num];
366 	struct be_drv_stats *drvs = &adapter->drv_stats;
367 
368 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
369 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
370 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
371 	drvs->rx_control_frames = port_stats->rx_control_frames;
372 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
373 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
374 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
375 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
376 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
377 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
378 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
379 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
380 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
381 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
382 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
383 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
384 	drvs->rx_dropped_header_too_small =
385 		port_stats->rx_dropped_header_too_small;
386 	drvs->rx_address_filtered =
387 					port_stats->rx_address_filtered +
388 					port_stats->rx_vlan_filtered;
389 	drvs->rx_alignment_symbol_errors =
390 		port_stats->rx_alignment_symbol_errors;
391 
392 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
393 	drvs->tx_controlframes = port_stats->tx_controlframes;
394 
395 	if (adapter->port_num)
396 		drvs->jabber_events = rxf_stats->port1_jabber_events;
397 	else
398 		drvs->jabber_events = rxf_stats->port0_jabber_events;
399 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
400 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
401 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
402 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
403 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
404 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
405 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
406 }
407 
408 static void populate_be_v1_stats(struct be_adapter *adapter)
409 {
410 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
411 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
412 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
413 	struct be_port_rxf_stats_v1 *port_stats =
414 					&rxf_stats->port[adapter->port_num];
415 	struct be_drv_stats *drvs = &adapter->drv_stats;
416 
417 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
418 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
419 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
420 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
421 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
422 	drvs->rx_control_frames = port_stats->rx_control_frames;
423 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
424 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
425 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
426 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
427 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
428 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
429 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
430 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
431 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
432 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
433 	drvs->rx_dropped_header_too_small =
434 		port_stats->rx_dropped_header_too_small;
435 	drvs->rx_input_fifo_overflow_drop =
436 		port_stats->rx_input_fifo_overflow_drop;
437 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
438 	drvs->rx_alignment_symbol_errors =
439 		port_stats->rx_alignment_symbol_errors;
440 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
441 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
442 	drvs->tx_controlframes = port_stats->tx_controlframes;
443 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
444 	drvs->jabber_events = port_stats->jabber_events;
445 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
448 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453 
454 static void populate_be_v2_stats(struct be_adapter *adapter)
455 {
456 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
457 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
459 	struct be_port_rxf_stats_v2 *port_stats =
460 					&rxf_stats->port[adapter->port_num];
461 	struct be_drv_stats *drvs = &adapter->drv_stats;
462 
463 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
467 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
468 	drvs->rx_control_frames = port_stats->rx_control_frames;
469 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479 	drvs->rx_dropped_header_too_small =
480 		port_stats->rx_dropped_header_too_small;
481 	drvs->rx_input_fifo_overflow_drop =
482 		port_stats->rx_input_fifo_overflow_drop;
483 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
484 	drvs->rx_alignment_symbol_errors =
485 		port_stats->rx_alignment_symbol_errors;
486 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
488 	drvs->tx_controlframes = port_stats->tx_controlframes;
489 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490 	drvs->jabber_events = port_stats->jabber_events;
491 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
494 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 	if (be_roce_supported(adapter)) {
499 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
500 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
501 		drvs->rx_roce_frames = port_stats->roce_frames_received;
502 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
503 		drvs->roce_drops_payload_len =
504 			port_stats->roce_drops_payload_len;
505 	}
506 }
507 
508 static void populate_lancer_stats(struct be_adapter *adapter)
509 {
510 	struct be_drv_stats *drvs = &adapter->drv_stats;
511 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
512 
513 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
514 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
515 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
516 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
517 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
518 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
519 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
520 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
521 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
522 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
523 	drvs->rx_dropped_tcp_length =
524 				pport_stats->rx_dropped_invalid_tcp_length;
525 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
526 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
527 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
528 	drvs->rx_dropped_header_too_small =
529 				pport_stats->rx_dropped_header_too_small;
530 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
531 	drvs->rx_address_filtered =
532 					pport_stats->rx_address_filtered +
533 					pport_stats->rx_vlan_filtered;
534 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
535 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
537 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
538 	drvs->jabber_events = pport_stats->rx_jabbers;
539 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
540 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
541 	drvs->rx_drops_too_many_frags =
542 				pport_stats->rx_drops_too_many_frags_lo;
543 }
544 
545 static void accumulate_16bit_val(u32 *acc, u16 val)
546 {
547 #define lo(x)			(x & 0xFFFF)
548 #define hi(x)			(x & 0xFFFF0000)
549 	bool wrapped = val < lo(*acc);
550 	u32 newacc = hi(*acc) + val;
551 
552 	if (wrapped)
553 		newacc += 65536;
554 	ACCESS_ONCE(*acc) = newacc;
555 }
556 
557 static void populate_erx_stats(struct be_adapter *adapter,
558 			       struct be_rx_obj *rxo, u32 erx_stat)
559 {
560 	if (!BEx_chip(adapter))
561 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
562 	else
563 		/* below erx HW counter can actually wrap around after
564 		 * 65535. Driver accumulates a 32-bit value
565 		 */
566 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
567 				     (u16)erx_stat);
568 }
569 
570 void be_parse_stats(struct be_adapter *adapter)
571 {
572 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
573 	struct be_rx_obj *rxo;
574 	int i;
575 	u32 erx_stat;
576 
577 	if (lancer_chip(adapter)) {
578 		populate_lancer_stats(adapter);
579 	} else {
580 		if (BE2_chip(adapter))
581 			populate_be_v0_stats(adapter);
582 		else if (BE3_chip(adapter))
583 			/* for BE3 */
584 			populate_be_v1_stats(adapter);
585 		else
586 			populate_be_v2_stats(adapter);
587 
588 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
589 		for_all_rx_queues(adapter, rxo, i) {
590 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
591 			populate_erx_stats(adapter, rxo, erx_stat);
592 		}
593 	}
594 }
595 
596 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
597 						struct rtnl_link_stats64 *stats)
598 {
599 	struct be_adapter *adapter = netdev_priv(netdev);
600 	struct be_drv_stats *drvs = &adapter->drv_stats;
601 	struct be_rx_obj *rxo;
602 	struct be_tx_obj *txo;
603 	u64 pkts, bytes;
604 	unsigned int start;
605 	int i;
606 
607 	for_all_rx_queues(adapter, rxo, i) {
608 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
609 
610 		do {
611 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
612 			pkts = rx_stats(rxo)->rx_pkts;
613 			bytes = rx_stats(rxo)->rx_bytes;
614 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
615 		stats->rx_packets += pkts;
616 		stats->rx_bytes += bytes;
617 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
618 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
619 					rx_stats(rxo)->rx_drops_no_frags;
620 	}
621 
622 	for_all_tx_queues(adapter, txo, i) {
623 		const struct be_tx_stats *tx_stats = tx_stats(txo);
624 
625 		do {
626 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
627 			pkts = tx_stats(txo)->tx_pkts;
628 			bytes = tx_stats(txo)->tx_bytes;
629 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
630 		stats->tx_packets += pkts;
631 		stats->tx_bytes += bytes;
632 	}
633 
634 	/* bad pkts received */
635 	stats->rx_errors = drvs->rx_crc_errors +
636 		drvs->rx_alignment_symbol_errors +
637 		drvs->rx_in_range_errors +
638 		drvs->rx_out_range_errors +
639 		drvs->rx_frame_too_long +
640 		drvs->rx_dropped_too_small +
641 		drvs->rx_dropped_too_short +
642 		drvs->rx_dropped_header_too_small +
643 		drvs->rx_dropped_tcp_length +
644 		drvs->rx_dropped_runt;
645 
646 	/* detailed rx errors */
647 	stats->rx_length_errors = drvs->rx_in_range_errors +
648 		drvs->rx_out_range_errors +
649 		drvs->rx_frame_too_long;
650 
651 	stats->rx_crc_errors = drvs->rx_crc_errors;
652 
653 	/* frame alignment errors */
654 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
655 
656 	/* receiver fifo overrun */
657 	/* drops_no_pbuf is no per i/f, it's per BE card */
658 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
659 				drvs->rx_input_fifo_overflow_drop +
660 				drvs->rx_drops_no_pbuf;
661 	return stats;
662 }
663 
664 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
665 {
666 	struct net_device *netdev = adapter->netdev;
667 
668 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
669 		netif_carrier_off(netdev);
670 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
671 	}
672 
673 	if (link_status)
674 		netif_carrier_on(netdev);
675 	else
676 		netif_carrier_off(netdev);
677 
678 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
679 }
680 
681 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
682 {
683 	struct be_tx_stats *stats = tx_stats(txo);
684 	u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
685 
686 	u64_stats_update_begin(&stats->sync);
687 	stats->tx_reqs++;
688 	stats->tx_bytes += skb->len;
689 	stats->tx_pkts += tx_pkts;
690 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
691 		stats->tx_vxlan_offload_pkts += tx_pkts;
692 	u64_stats_update_end(&stats->sync);
693 }
694 
695 /* Returns number of WRBs needed for the skb */
696 static u32 skb_wrb_cnt(struct sk_buff *skb)
697 {
698 	/* +1 for the header wrb */
699 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
700 }
701 
702 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
703 {
704 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
705 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
706 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
707 	wrb->rsvd0 = 0;
708 }
709 
710 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
711  * to avoid the swap and shift/mask operations in wrb_fill().
712  */
713 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
714 {
715 	wrb->frag_pa_hi = 0;
716 	wrb->frag_pa_lo = 0;
717 	wrb->frag_len = 0;
718 	wrb->rsvd0 = 0;
719 }
720 
721 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
722 				     struct sk_buff *skb)
723 {
724 	u8 vlan_prio;
725 	u16 vlan_tag;
726 
727 	vlan_tag = skb_vlan_tag_get(skb);
728 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
729 	/* If vlan priority provided by OS is NOT in available bmap */
730 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
731 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
732 				adapter->recommended_prio_bits;
733 
734 	return vlan_tag;
735 }
736 
737 /* Used only for IP tunnel packets */
738 static u16 skb_inner_ip_proto(struct sk_buff *skb)
739 {
740 	return (inner_ip_hdr(skb)->version == 4) ?
741 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
742 }
743 
744 static u16 skb_ip_proto(struct sk_buff *skb)
745 {
746 	return (ip_hdr(skb)->version == 4) ?
747 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
748 }
749 
750 static inline bool be_is_txq_full(struct be_tx_obj *txo)
751 {
752 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
753 }
754 
755 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
756 {
757 	return atomic_read(&txo->q.used) < txo->q.len / 2;
758 }
759 
760 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
761 {
762 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
763 }
764 
765 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
766 				       struct sk_buff *skb,
767 				       struct be_wrb_params *wrb_params)
768 {
769 	u16 proto;
770 
771 	if (skb_is_gso(skb)) {
772 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
773 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
774 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
775 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
776 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
777 		if (skb->encapsulation) {
778 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
779 			proto = skb_inner_ip_proto(skb);
780 		} else {
781 			proto = skb_ip_proto(skb);
782 		}
783 		if (proto == IPPROTO_TCP)
784 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
785 		else if (proto == IPPROTO_UDP)
786 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
787 	}
788 
789 	if (skb_vlan_tag_present(skb)) {
790 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
791 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
792 	}
793 
794 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
795 }
796 
797 static void wrb_fill_hdr(struct be_adapter *adapter,
798 			 struct be_eth_hdr_wrb *hdr,
799 			 struct be_wrb_params *wrb_params,
800 			 struct sk_buff *skb)
801 {
802 	memset(hdr, 0, sizeof(*hdr));
803 
804 	SET_TX_WRB_HDR_BITS(crc, hdr,
805 			    BE_WRB_F_GET(wrb_params->features, CRC));
806 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
807 			    BE_WRB_F_GET(wrb_params->features, IPCS));
808 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
809 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
810 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
811 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
812 
813 	SET_TX_WRB_HDR_BITS(lso, hdr,
814 			    BE_WRB_F_GET(wrb_params->features, LSO));
815 	SET_TX_WRB_HDR_BITS(lso6, hdr,
816 			    BE_WRB_F_GET(wrb_params->features, LSO6));
817 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
818 
819 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
820 	 * hack is not needed, the evt bit is set while ringing DB.
821 	 */
822 	SET_TX_WRB_HDR_BITS(event, hdr,
823 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
824 	SET_TX_WRB_HDR_BITS(vlan, hdr,
825 			    BE_WRB_F_GET(wrb_params->features, VLAN));
826 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
827 
828 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
829 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
830 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
831 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
832 }
833 
834 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
835 			  bool unmap_single)
836 {
837 	dma_addr_t dma;
838 	u32 frag_len = le32_to_cpu(wrb->frag_len);
839 
840 
841 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
842 		(u64)le32_to_cpu(wrb->frag_pa_lo);
843 	if (frag_len) {
844 		if (unmap_single)
845 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
846 		else
847 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
848 	}
849 }
850 
851 /* Grab a WRB header for xmit */
852 static u16 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
853 {
854 	u16 head = txo->q.head;
855 
856 	queue_head_inc(&txo->q);
857 	return head;
858 }
859 
860 /* Set up the WRB header for xmit */
861 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
862 				struct be_tx_obj *txo,
863 				struct be_wrb_params *wrb_params,
864 				struct sk_buff *skb, u16 head)
865 {
866 	u32 num_frags = skb_wrb_cnt(skb);
867 	struct be_queue_info *txq = &txo->q;
868 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
869 
870 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
871 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
872 
873 	BUG_ON(txo->sent_skb_list[head]);
874 	txo->sent_skb_list[head] = skb;
875 	txo->last_req_hdr = head;
876 	atomic_add(num_frags, &txq->used);
877 	txo->last_req_wrb_cnt = num_frags;
878 	txo->pend_wrb_cnt += num_frags;
879 }
880 
881 /* Setup a WRB fragment (buffer descriptor) for xmit */
882 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
883 				 int len)
884 {
885 	struct be_eth_wrb *wrb;
886 	struct be_queue_info *txq = &txo->q;
887 
888 	wrb = queue_head_node(txq);
889 	wrb_fill(wrb, busaddr, len);
890 	queue_head_inc(txq);
891 }
892 
893 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
894  * was invoked. The producer index is restored to the previous packet and the
895  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
896  */
897 static void be_xmit_restore(struct be_adapter *adapter,
898 			    struct be_tx_obj *txo, u16 head, bool map_single,
899 			    u32 copied)
900 {
901 	struct device *dev;
902 	struct be_eth_wrb *wrb;
903 	struct be_queue_info *txq = &txo->q;
904 
905 	dev = &adapter->pdev->dev;
906 	txq->head = head;
907 
908 	/* skip the first wrb (hdr); it's not mapped */
909 	queue_head_inc(txq);
910 	while (copied) {
911 		wrb = queue_head_node(txq);
912 		unmap_tx_frag(dev, wrb, map_single);
913 		map_single = false;
914 		copied -= le32_to_cpu(wrb->frag_len);
915 		queue_head_inc(txq);
916 	}
917 
918 	txq->head = head;
919 }
920 
921 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
922  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
923  * of WRBs used up by the packet.
924  */
925 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
926 			   struct sk_buff *skb,
927 			   struct be_wrb_params *wrb_params)
928 {
929 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
930 	struct device *dev = &adapter->pdev->dev;
931 	struct be_queue_info *txq = &txo->q;
932 	bool map_single = false;
933 	u16 head = txq->head;
934 	dma_addr_t busaddr;
935 	int len;
936 
937 	head = be_tx_get_wrb_hdr(txo);
938 
939 	if (skb->len > skb->data_len) {
940 		len = skb_headlen(skb);
941 
942 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
943 		if (dma_mapping_error(dev, busaddr))
944 			goto dma_err;
945 		map_single = true;
946 		be_tx_setup_wrb_frag(txo, busaddr, len);
947 		copied += len;
948 	}
949 
950 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
951 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
952 		len = skb_frag_size(frag);
953 
954 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
955 		if (dma_mapping_error(dev, busaddr))
956 			goto dma_err;
957 		be_tx_setup_wrb_frag(txo, busaddr, len);
958 		copied += len;
959 	}
960 
961 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
962 
963 	be_tx_stats_update(txo, skb);
964 	return wrb_cnt;
965 
966 dma_err:
967 	adapter->drv_stats.dma_map_errors++;
968 	be_xmit_restore(adapter, txo, head, map_single, copied);
969 	return 0;
970 }
971 
972 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
973 {
974 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
975 }
976 
977 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
978 					     struct sk_buff *skb,
979 					     struct be_wrb_params
980 					     *wrb_params)
981 {
982 	u16 vlan_tag = 0;
983 
984 	skb = skb_share_check(skb, GFP_ATOMIC);
985 	if (unlikely(!skb))
986 		return skb;
987 
988 	if (skb_vlan_tag_present(skb))
989 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
990 
991 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
992 		if (!vlan_tag)
993 			vlan_tag = adapter->pvid;
994 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
995 		 * skip VLAN insertion
996 		 */
997 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
998 	}
999 
1000 	if (vlan_tag) {
1001 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1002 						vlan_tag);
1003 		if (unlikely(!skb))
1004 			return skb;
1005 		skb->vlan_tci = 0;
1006 	}
1007 
1008 	/* Insert the outer VLAN, if any */
1009 	if (adapter->qnq_vid) {
1010 		vlan_tag = adapter->qnq_vid;
1011 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1012 						vlan_tag);
1013 		if (unlikely(!skb))
1014 			return skb;
1015 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1016 	}
1017 
1018 	return skb;
1019 }
1020 
1021 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1022 {
1023 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1024 	u16 offset = ETH_HLEN;
1025 
1026 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1027 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1028 
1029 		offset += sizeof(struct ipv6hdr);
1030 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1031 		    ip6h->nexthdr != NEXTHDR_UDP) {
1032 			struct ipv6_opt_hdr *ehdr =
1033 				(struct ipv6_opt_hdr *)(skb->data + offset);
1034 
1035 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1036 			if (ehdr->hdrlen == 0xff)
1037 				return true;
1038 		}
1039 	}
1040 	return false;
1041 }
1042 
1043 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1044 {
1045 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1046 }
1047 
1048 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1051 }
1052 
1053 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1054 						  struct sk_buff *skb,
1055 						  struct be_wrb_params
1056 						  *wrb_params)
1057 {
1058 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1059 	unsigned int eth_hdr_len;
1060 	struct iphdr *ip;
1061 
1062 	/* For padded packets, BE HW modifies tot_len field in IP header
1063 	 * incorrecly when VLAN tag is inserted by HW.
1064 	 * For padded packets, Lancer computes incorrect checksum.
1065 	 */
1066 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1067 						VLAN_ETH_HLEN : ETH_HLEN;
1068 	if (skb->len <= 60 &&
1069 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1070 	    is_ipv4_pkt(skb)) {
1071 		ip = (struct iphdr *)ip_hdr(skb);
1072 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1073 	}
1074 
1075 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1076 	 * tagging in pvid-tagging mode
1077 	 */
1078 	if (be_pvid_tagging_enabled(adapter) &&
1079 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1080 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1081 
1082 	/* HW has a bug wherein it will calculate CSUM for VLAN
1083 	 * pkts even though it is disabled.
1084 	 * Manually insert VLAN in pkt.
1085 	 */
1086 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1087 	    skb_vlan_tag_present(skb)) {
1088 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1089 		if (unlikely(!skb))
1090 			goto err;
1091 	}
1092 
1093 	/* HW may lockup when VLAN HW tagging is requested on
1094 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1095 	 * skip HW tagging is not enabled by FW.
1096 	 */
1097 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1098 		     (adapter->pvid || adapter->qnq_vid) &&
1099 		     !qnq_async_evt_rcvd(adapter)))
1100 		goto tx_drop;
1101 
1102 	/* Manual VLAN tag insertion to prevent:
1103 	 * ASIC lockup when the ASIC inserts VLAN tag into
1104 	 * certain ipv6 packets. Insert VLAN tags in driver,
1105 	 * and set event, completion, vlan bits accordingly
1106 	 * in the Tx WRB.
1107 	 */
1108 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1109 	    be_vlan_tag_tx_chk(adapter, skb)) {
1110 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1111 		if (unlikely(!skb))
1112 			goto err;
1113 	}
1114 
1115 	return skb;
1116 tx_drop:
1117 	dev_kfree_skb_any(skb);
1118 err:
1119 	return NULL;
1120 }
1121 
1122 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1123 					   struct sk_buff *skb,
1124 					   struct be_wrb_params *wrb_params)
1125 {
1126 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1127 	 * packets that are 32b or less may cause a transmit stall
1128 	 * on that port. The workaround is to pad such packets
1129 	 * (len <= 32 bytes) to a minimum length of 36b.
1130 	 */
1131 	if (skb->len <= 32) {
1132 		if (skb_put_padto(skb, 36))
1133 			return NULL;
1134 	}
1135 
1136 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1137 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1138 		if (!skb)
1139 			return NULL;
1140 	}
1141 
1142 	return skb;
1143 }
1144 
1145 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1146 {
1147 	struct be_queue_info *txq = &txo->q;
1148 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1149 
1150 	/* Mark the last request eventable if it hasn't been marked already */
1151 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1152 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1153 
1154 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1155 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1156 		wrb_fill_dummy(queue_head_node(txq));
1157 		queue_head_inc(txq);
1158 		atomic_inc(&txq->used);
1159 		txo->pend_wrb_cnt++;
1160 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1161 					   TX_HDR_WRB_NUM_SHIFT);
1162 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1163 					  TX_HDR_WRB_NUM_SHIFT);
1164 	}
1165 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1166 	txo->pend_wrb_cnt = 0;
1167 }
1168 
1169 /* OS2BMC related */
1170 
1171 #define DHCP_CLIENT_PORT	68
1172 #define DHCP_SERVER_PORT	67
1173 #define NET_BIOS_PORT1		137
1174 #define NET_BIOS_PORT2		138
1175 #define DHCPV6_RAS_PORT		547
1176 
1177 #define is_mc_allowed_on_bmc(adapter, eh)	\
1178 	(!is_multicast_filt_enabled(adapter) &&	\
1179 	 is_multicast_ether_addr(eh->h_dest) &&	\
1180 	 !is_broadcast_ether_addr(eh->h_dest))
1181 
1182 #define is_bc_allowed_on_bmc(adapter, eh)	\
1183 	(!is_broadcast_filt_enabled(adapter) &&	\
1184 	 is_broadcast_ether_addr(eh->h_dest))
1185 
1186 #define is_arp_allowed_on_bmc(adapter, skb)	\
1187 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1188 
1189 #define is_broadcast_packet(eh, adapter)	\
1190 		(is_multicast_ether_addr(eh->h_dest) && \
1191 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1192 
1193 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1194 
1195 #define is_arp_filt_enabled(adapter)	\
1196 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1197 
1198 #define is_dhcp_client_filt_enabled(adapter)	\
1199 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1200 
1201 #define is_dhcp_srvr_filt_enabled(adapter)	\
1202 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1203 
1204 #define is_nbios_filt_enabled(adapter)	\
1205 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1206 
1207 #define is_ipv6_na_filt_enabled(adapter)	\
1208 		(adapter->bmc_filt_mask &	\
1209 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1210 
1211 #define is_ipv6_ra_filt_enabled(adapter)	\
1212 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1213 
1214 #define is_ipv6_ras_filt_enabled(adapter)	\
1215 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1216 
1217 #define is_broadcast_filt_enabled(adapter)	\
1218 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1219 
1220 #define is_multicast_filt_enabled(adapter)	\
1221 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1222 
1223 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1224 			       struct sk_buff **skb)
1225 {
1226 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1227 	bool os2bmc = false;
1228 
1229 	if (!be_is_os2bmc_enabled(adapter))
1230 		goto done;
1231 
1232 	if (!is_multicast_ether_addr(eh->h_dest))
1233 		goto done;
1234 
1235 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1236 	    is_bc_allowed_on_bmc(adapter, eh) ||
1237 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1238 		os2bmc = true;
1239 		goto done;
1240 	}
1241 
1242 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1243 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1244 		u8 nexthdr = hdr->nexthdr;
1245 
1246 		if (nexthdr == IPPROTO_ICMPV6) {
1247 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1248 
1249 			switch (icmp6->icmp6_type) {
1250 			case NDISC_ROUTER_ADVERTISEMENT:
1251 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1252 				goto done;
1253 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1254 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1255 				goto done;
1256 			default:
1257 				break;
1258 			}
1259 		}
1260 	}
1261 
1262 	if (is_udp_pkt((*skb))) {
1263 		struct udphdr *udp = udp_hdr((*skb));
1264 
1265 		switch (ntohs(udp->dest)) {
1266 		case DHCP_CLIENT_PORT:
1267 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1268 			goto done;
1269 		case DHCP_SERVER_PORT:
1270 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1271 			goto done;
1272 		case NET_BIOS_PORT1:
1273 		case NET_BIOS_PORT2:
1274 			os2bmc = is_nbios_filt_enabled(adapter);
1275 			goto done;
1276 		case DHCPV6_RAS_PORT:
1277 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1278 			goto done;
1279 		default:
1280 			break;
1281 		}
1282 	}
1283 done:
1284 	/* For packets over a vlan, which are destined
1285 	 * to BMC, asic expects the vlan to be inline in the packet.
1286 	 */
1287 	if (os2bmc)
1288 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1289 
1290 	return os2bmc;
1291 }
1292 
1293 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1294 {
1295 	struct be_adapter *adapter = netdev_priv(netdev);
1296 	u16 q_idx = skb_get_queue_mapping(skb);
1297 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1298 	struct be_wrb_params wrb_params = { 0 };
1299 	bool flush = !skb->xmit_more;
1300 	u16 wrb_cnt;
1301 
1302 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1303 	if (unlikely(!skb))
1304 		goto drop;
1305 
1306 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1307 
1308 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1309 	if (unlikely(!wrb_cnt)) {
1310 		dev_kfree_skb_any(skb);
1311 		goto drop;
1312 	}
1313 
1314 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1315 	 * enqueue the pkt a 2nd time with mgmt bit set.
1316 	 */
1317 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1318 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1319 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1320 		if (unlikely(!wrb_cnt))
1321 			goto drop;
1322 		else
1323 			skb_get(skb);
1324 	}
1325 
1326 	if (be_is_txq_full(txo)) {
1327 		netif_stop_subqueue(netdev, q_idx);
1328 		tx_stats(txo)->tx_stops++;
1329 	}
1330 
1331 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1332 		be_xmit_flush(adapter, txo);
1333 
1334 	return NETDEV_TX_OK;
1335 drop:
1336 	tx_stats(txo)->tx_drv_drops++;
1337 	/* Flush the already enqueued tx requests */
1338 	if (flush && txo->pend_wrb_cnt)
1339 		be_xmit_flush(adapter, txo);
1340 
1341 	return NETDEV_TX_OK;
1342 }
1343 
1344 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1345 {
1346 	struct be_adapter *adapter = netdev_priv(netdev);
1347 	struct device *dev = &adapter->pdev->dev;
1348 
1349 	if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1350 		dev_info(dev, "MTU must be between %d and %d bytes\n",
1351 			 BE_MIN_MTU, BE_MAX_MTU);
1352 		return -EINVAL;
1353 	}
1354 
1355 	dev_info(dev, "MTU changed from %d to %d bytes\n",
1356 		 netdev->mtu, new_mtu);
1357 	netdev->mtu = new_mtu;
1358 	return 0;
1359 }
1360 
1361 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1362 {
1363 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1364 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1365 }
1366 
1367 static int be_set_vlan_promisc(struct be_adapter *adapter)
1368 {
1369 	struct device *dev = &adapter->pdev->dev;
1370 	int status;
1371 
1372 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1373 		return 0;
1374 
1375 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1376 	if (!status) {
1377 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1378 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1379 	} else {
1380 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1381 	}
1382 	return status;
1383 }
1384 
1385 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1386 {
1387 	struct device *dev = &adapter->pdev->dev;
1388 	int status;
1389 
1390 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1391 	if (!status) {
1392 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1393 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1394 	}
1395 	return status;
1396 }
1397 
1398 /*
1399  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1400  * If the user configures more, place BE in vlan promiscuous mode.
1401  */
1402 static int be_vid_config(struct be_adapter *adapter)
1403 {
1404 	struct device *dev = &adapter->pdev->dev;
1405 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1406 	u16 num = 0, i = 0;
1407 	int status = 0;
1408 
1409 	/* No need to further configure vids if in promiscuous mode */
1410 	if (be_in_all_promisc(adapter))
1411 		return 0;
1412 
1413 	if (adapter->vlans_added > be_max_vlans(adapter))
1414 		return be_set_vlan_promisc(adapter);
1415 
1416 	/* Construct VLAN Table to give to HW */
1417 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1418 		vids[num++] = cpu_to_le16(i);
1419 
1420 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1421 	if (status) {
1422 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1423 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1424 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1425 		    addl_status(status) ==
1426 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1427 			return be_set_vlan_promisc(adapter);
1428 	} else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1429 		status = be_clear_vlan_promisc(adapter);
1430 	}
1431 	return status;
1432 }
1433 
1434 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1435 {
1436 	struct be_adapter *adapter = netdev_priv(netdev);
1437 	int status = 0;
1438 
1439 	/* Packets with VID 0 are always received by Lancer by default */
1440 	if (lancer_chip(adapter) && vid == 0)
1441 		return status;
1442 
1443 	if (test_bit(vid, adapter->vids))
1444 		return status;
1445 
1446 	set_bit(vid, adapter->vids);
1447 	adapter->vlans_added++;
1448 
1449 	status = be_vid_config(adapter);
1450 	if (status) {
1451 		adapter->vlans_added--;
1452 		clear_bit(vid, adapter->vids);
1453 	}
1454 
1455 	return status;
1456 }
1457 
1458 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1459 {
1460 	struct be_adapter *adapter = netdev_priv(netdev);
1461 
1462 	/* Packets with VID 0 are always received by Lancer by default */
1463 	if (lancer_chip(adapter) && vid == 0)
1464 		return 0;
1465 
1466 	clear_bit(vid, adapter->vids);
1467 	adapter->vlans_added--;
1468 
1469 	return be_vid_config(adapter);
1470 }
1471 
1472 static void be_clear_all_promisc(struct be_adapter *adapter)
1473 {
1474 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
1475 	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
1476 }
1477 
1478 static void be_set_all_promisc(struct be_adapter *adapter)
1479 {
1480 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1481 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1482 }
1483 
1484 static void be_set_mc_promisc(struct be_adapter *adapter)
1485 {
1486 	int status;
1487 
1488 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1489 		return;
1490 
1491 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1492 	if (!status)
1493 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1494 }
1495 
1496 static void be_set_mc_list(struct be_adapter *adapter)
1497 {
1498 	int status;
1499 
1500 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1501 	if (!status)
1502 		adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1503 	else
1504 		be_set_mc_promisc(adapter);
1505 }
1506 
1507 static void be_set_uc_list(struct be_adapter *adapter)
1508 {
1509 	struct netdev_hw_addr *ha;
1510 	int i = 1; /* First slot is claimed by the Primary MAC */
1511 
1512 	for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1513 		be_cmd_pmac_del(adapter, adapter->if_handle,
1514 				adapter->pmac_id[i], 0);
1515 
1516 	if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
1517 		be_set_all_promisc(adapter);
1518 		return;
1519 	}
1520 
1521 	netdev_for_each_uc_addr(ha, adapter->netdev) {
1522 		adapter->uc_macs++; /* First slot is for Primary MAC */
1523 		be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
1524 				&adapter->pmac_id[adapter->uc_macs], 0);
1525 	}
1526 }
1527 
1528 static void be_clear_uc_list(struct be_adapter *adapter)
1529 {
1530 	int i;
1531 
1532 	for (i = 1; i < (adapter->uc_macs + 1); i++)
1533 		be_cmd_pmac_del(adapter, adapter->if_handle,
1534 				adapter->pmac_id[i], 0);
1535 	adapter->uc_macs = 0;
1536 }
1537 
1538 static void be_set_rx_mode(struct net_device *netdev)
1539 {
1540 	struct be_adapter *adapter = netdev_priv(netdev);
1541 
1542 	if (netdev->flags & IFF_PROMISC) {
1543 		be_set_all_promisc(adapter);
1544 		return;
1545 	}
1546 
1547 	/* Interface was previously in promiscuous mode; disable it */
1548 	if (be_in_all_promisc(adapter)) {
1549 		be_clear_all_promisc(adapter);
1550 		if (adapter->vlans_added)
1551 			be_vid_config(adapter);
1552 	}
1553 
1554 	/* Enable multicast promisc if num configured exceeds what we support */
1555 	if (netdev->flags & IFF_ALLMULTI ||
1556 	    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1557 		be_set_mc_promisc(adapter);
1558 		return;
1559 	}
1560 
1561 	if (netdev_uc_count(netdev) != adapter->uc_macs)
1562 		be_set_uc_list(adapter);
1563 
1564 	be_set_mc_list(adapter);
1565 }
1566 
1567 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1568 {
1569 	struct be_adapter *adapter = netdev_priv(netdev);
1570 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1571 	int status;
1572 
1573 	if (!sriov_enabled(adapter))
1574 		return -EPERM;
1575 
1576 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1577 		return -EINVAL;
1578 
1579 	/* Proceed further only if user provided MAC is different
1580 	 * from active MAC
1581 	 */
1582 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1583 		return 0;
1584 
1585 	if (BEx_chip(adapter)) {
1586 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1587 				vf + 1);
1588 
1589 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1590 					 &vf_cfg->pmac_id, vf + 1);
1591 	} else {
1592 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1593 					vf + 1);
1594 	}
1595 
1596 	if (status) {
1597 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1598 			mac, vf, status);
1599 		return be_cmd_status(status);
1600 	}
1601 
1602 	ether_addr_copy(vf_cfg->mac_addr, mac);
1603 
1604 	return 0;
1605 }
1606 
1607 static int be_get_vf_config(struct net_device *netdev, int vf,
1608 			    struct ifla_vf_info *vi)
1609 {
1610 	struct be_adapter *adapter = netdev_priv(netdev);
1611 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1612 
1613 	if (!sriov_enabled(adapter))
1614 		return -EPERM;
1615 
1616 	if (vf >= adapter->num_vfs)
1617 		return -EINVAL;
1618 
1619 	vi->vf = vf;
1620 	vi->max_tx_rate = vf_cfg->tx_rate;
1621 	vi->min_tx_rate = 0;
1622 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1623 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1624 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1625 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1626 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1627 
1628 	return 0;
1629 }
1630 
1631 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1632 {
1633 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1634 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1635 	int vf_if_id = vf_cfg->if_handle;
1636 	int status;
1637 
1638 	/* Enable Transparent VLAN Tagging */
1639 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1640 	if (status)
1641 		return status;
1642 
1643 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1644 	vids[0] = 0;
1645 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1646 	if (!status)
1647 		dev_info(&adapter->pdev->dev,
1648 			 "Cleared guest VLANs on VF%d", vf);
1649 
1650 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1651 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1652 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1653 						  ~BE_PRIV_FILTMGMT, vf + 1);
1654 		if (!status)
1655 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1656 	}
1657 	return 0;
1658 }
1659 
1660 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1661 {
1662 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1663 	struct device *dev = &adapter->pdev->dev;
1664 	int status;
1665 
1666 	/* Reset Transparent VLAN Tagging. */
1667 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1668 				       vf_cfg->if_handle, 0, 0);
1669 	if (status)
1670 		return status;
1671 
1672 	/* Allow VFs to program VLAN filtering */
1673 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1674 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1675 						  BE_PRIV_FILTMGMT, vf + 1);
1676 		if (!status) {
1677 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1678 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1679 		}
1680 	}
1681 
1682 	dev_info(dev,
1683 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1684 	return 0;
1685 }
1686 
1687 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1688 {
1689 	struct be_adapter *adapter = netdev_priv(netdev);
1690 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1691 	int status;
1692 
1693 	if (!sriov_enabled(adapter))
1694 		return -EPERM;
1695 
1696 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1697 		return -EINVAL;
1698 
1699 	if (vlan || qos) {
1700 		vlan |= qos << VLAN_PRIO_SHIFT;
1701 		status = be_set_vf_tvt(adapter, vf, vlan);
1702 	} else {
1703 		status = be_clear_vf_tvt(adapter, vf);
1704 	}
1705 
1706 	if (status) {
1707 		dev_err(&adapter->pdev->dev,
1708 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1709 			status);
1710 		return be_cmd_status(status);
1711 	}
1712 
1713 	vf_cfg->vlan_tag = vlan;
1714 	return 0;
1715 }
1716 
1717 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1718 			     int min_tx_rate, int max_tx_rate)
1719 {
1720 	struct be_adapter *adapter = netdev_priv(netdev);
1721 	struct device *dev = &adapter->pdev->dev;
1722 	int percent_rate, status = 0;
1723 	u16 link_speed = 0;
1724 	u8 link_status;
1725 
1726 	if (!sriov_enabled(adapter))
1727 		return -EPERM;
1728 
1729 	if (vf >= adapter->num_vfs)
1730 		return -EINVAL;
1731 
1732 	if (min_tx_rate)
1733 		return -EINVAL;
1734 
1735 	if (!max_tx_rate)
1736 		goto config_qos;
1737 
1738 	status = be_cmd_link_status_query(adapter, &link_speed,
1739 					  &link_status, 0);
1740 	if (status)
1741 		goto err;
1742 
1743 	if (!link_status) {
1744 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1745 		status = -ENETDOWN;
1746 		goto err;
1747 	}
1748 
1749 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1750 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1751 			link_speed);
1752 		status = -EINVAL;
1753 		goto err;
1754 	}
1755 
1756 	/* On Skyhawk the QOS setting must be done only as a % value */
1757 	percent_rate = link_speed / 100;
1758 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1759 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1760 			percent_rate);
1761 		status = -EINVAL;
1762 		goto err;
1763 	}
1764 
1765 config_qos:
1766 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1767 	if (status)
1768 		goto err;
1769 
1770 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1771 	return 0;
1772 
1773 err:
1774 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1775 		max_tx_rate, vf);
1776 	return be_cmd_status(status);
1777 }
1778 
1779 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1780 				int link_state)
1781 {
1782 	struct be_adapter *adapter = netdev_priv(netdev);
1783 	int status;
1784 
1785 	if (!sriov_enabled(adapter))
1786 		return -EPERM;
1787 
1788 	if (vf >= adapter->num_vfs)
1789 		return -EINVAL;
1790 
1791 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1792 	if (status) {
1793 		dev_err(&adapter->pdev->dev,
1794 			"Link state change on VF %d failed: %#x\n", vf, status);
1795 		return be_cmd_status(status);
1796 	}
1797 
1798 	adapter->vf_cfg[vf].plink_tracking = link_state;
1799 
1800 	return 0;
1801 }
1802 
1803 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1804 {
1805 	struct be_adapter *adapter = netdev_priv(netdev);
1806 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1807 	u8 spoofchk;
1808 	int status;
1809 
1810 	if (!sriov_enabled(adapter))
1811 		return -EPERM;
1812 
1813 	if (vf >= adapter->num_vfs)
1814 		return -EINVAL;
1815 
1816 	if (BEx_chip(adapter))
1817 		return -EOPNOTSUPP;
1818 
1819 	if (enable == vf_cfg->spoofchk)
1820 		return 0;
1821 
1822 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1823 
1824 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1825 				       0, spoofchk);
1826 	if (status) {
1827 		dev_err(&adapter->pdev->dev,
1828 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
1829 		return be_cmd_status(status);
1830 	}
1831 
1832 	vf_cfg->spoofchk = enable;
1833 	return 0;
1834 }
1835 
1836 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1837 			  ulong now)
1838 {
1839 	aic->rx_pkts_prev = rx_pkts;
1840 	aic->tx_reqs_prev = tx_pkts;
1841 	aic->jiffies = now;
1842 }
1843 
1844 static int be_get_new_eqd(struct be_eq_obj *eqo)
1845 {
1846 	struct be_adapter *adapter = eqo->adapter;
1847 	int eqd, start;
1848 	struct be_aic_obj *aic;
1849 	struct be_rx_obj *rxo;
1850 	struct be_tx_obj *txo;
1851 	u64 rx_pkts = 0, tx_pkts = 0;
1852 	ulong now;
1853 	u32 pps, delta;
1854 	int i;
1855 
1856 	aic = &adapter->aic_obj[eqo->idx];
1857 	if (!aic->enable) {
1858 		if (aic->jiffies)
1859 			aic->jiffies = 0;
1860 		eqd = aic->et_eqd;
1861 		return eqd;
1862 	}
1863 
1864 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1865 		do {
1866 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1867 			rx_pkts += rxo->stats.rx_pkts;
1868 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1869 	}
1870 
1871 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1872 		do {
1873 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1874 			tx_pkts += txo->stats.tx_reqs;
1875 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1876 	}
1877 
1878 	/* Skip, if wrapped around or first calculation */
1879 	now = jiffies;
1880 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
1881 	    rx_pkts < aic->rx_pkts_prev ||
1882 	    tx_pkts < aic->tx_reqs_prev) {
1883 		be_aic_update(aic, rx_pkts, tx_pkts, now);
1884 		return aic->prev_eqd;
1885 	}
1886 
1887 	delta = jiffies_to_msecs(now - aic->jiffies);
1888 	if (delta == 0)
1889 		return aic->prev_eqd;
1890 
1891 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1892 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1893 	eqd = (pps / 15000) << 2;
1894 
1895 	if (eqd < 8)
1896 		eqd = 0;
1897 	eqd = min_t(u32, eqd, aic->max_eqd);
1898 	eqd = max_t(u32, eqd, aic->min_eqd);
1899 
1900 	be_aic_update(aic, rx_pkts, tx_pkts, now);
1901 
1902 	return eqd;
1903 }
1904 
1905 /* For Skyhawk-R only */
1906 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
1907 {
1908 	struct be_adapter *adapter = eqo->adapter;
1909 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
1910 	ulong now = jiffies;
1911 	int eqd;
1912 	u32 mult_enc;
1913 
1914 	if (!aic->enable)
1915 		return 0;
1916 
1917 	if (time_before_eq(now, aic->jiffies) ||
1918 	    jiffies_to_msecs(now - aic->jiffies) < 1)
1919 		eqd = aic->prev_eqd;
1920 	else
1921 		eqd = be_get_new_eqd(eqo);
1922 
1923 	if (eqd > 100)
1924 		mult_enc = R2I_DLY_ENC_1;
1925 	else if (eqd > 60)
1926 		mult_enc = R2I_DLY_ENC_2;
1927 	else if (eqd > 20)
1928 		mult_enc = R2I_DLY_ENC_3;
1929 	else
1930 		mult_enc = R2I_DLY_ENC_0;
1931 
1932 	aic->prev_eqd = eqd;
1933 
1934 	return mult_enc;
1935 }
1936 
1937 void be_eqd_update(struct be_adapter *adapter, bool force_update)
1938 {
1939 	struct be_set_eqd set_eqd[MAX_EVT_QS];
1940 	struct be_aic_obj *aic;
1941 	struct be_eq_obj *eqo;
1942 	int i, num = 0, eqd;
1943 
1944 	for_all_evt_queues(adapter, eqo, i) {
1945 		aic = &adapter->aic_obj[eqo->idx];
1946 		eqd = be_get_new_eqd(eqo);
1947 		if (force_update || eqd != aic->prev_eqd) {
1948 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
1949 			set_eqd[num].eq_id = eqo->q.id;
1950 			aic->prev_eqd = eqd;
1951 			num++;
1952 		}
1953 	}
1954 
1955 	if (num)
1956 		be_cmd_modify_eqd(adapter, set_eqd, num);
1957 }
1958 
1959 static void be_rx_stats_update(struct be_rx_obj *rxo,
1960 			       struct be_rx_compl_info *rxcp)
1961 {
1962 	struct be_rx_stats *stats = rx_stats(rxo);
1963 
1964 	u64_stats_update_begin(&stats->sync);
1965 	stats->rx_compl++;
1966 	stats->rx_bytes += rxcp->pkt_size;
1967 	stats->rx_pkts++;
1968 	if (rxcp->tunneled)
1969 		stats->rx_vxlan_offload_pkts++;
1970 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
1971 		stats->rx_mcast_pkts++;
1972 	if (rxcp->err)
1973 		stats->rx_compl_err++;
1974 	u64_stats_update_end(&stats->sync);
1975 }
1976 
1977 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
1978 {
1979 	/* L4 checksum is not reliable for non TCP/UDP packets.
1980 	 * Also ignore ipcksm for ipv6 pkts
1981 	 */
1982 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
1983 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
1984 }
1985 
1986 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
1987 {
1988 	struct be_adapter *adapter = rxo->adapter;
1989 	struct be_rx_page_info *rx_page_info;
1990 	struct be_queue_info *rxq = &rxo->q;
1991 	u16 frag_idx = rxq->tail;
1992 
1993 	rx_page_info = &rxo->page_info_tbl[frag_idx];
1994 	BUG_ON(!rx_page_info->page);
1995 
1996 	if (rx_page_info->last_frag) {
1997 		dma_unmap_page(&adapter->pdev->dev,
1998 			       dma_unmap_addr(rx_page_info, bus),
1999 			       adapter->big_page_size, DMA_FROM_DEVICE);
2000 		rx_page_info->last_frag = false;
2001 	} else {
2002 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2003 					dma_unmap_addr(rx_page_info, bus),
2004 					rx_frag_size, DMA_FROM_DEVICE);
2005 	}
2006 
2007 	queue_tail_inc(rxq);
2008 	atomic_dec(&rxq->used);
2009 	return rx_page_info;
2010 }
2011 
2012 /* Throwaway the data in the Rx completion */
2013 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2014 				struct be_rx_compl_info *rxcp)
2015 {
2016 	struct be_rx_page_info *page_info;
2017 	u16 i, num_rcvd = rxcp->num_rcvd;
2018 
2019 	for (i = 0; i < num_rcvd; i++) {
2020 		page_info = get_rx_page_info(rxo);
2021 		put_page(page_info->page);
2022 		memset(page_info, 0, sizeof(*page_info));
2023 	}
2024 }
2025 
2026 /*
2027  * skb_fill_rx_data forms a complete skb for an ether frame
2028  * indicated by rxcp.
2029  */
2030 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2031 			     struct be_rx_compl_info *rxcp)
2032 {
2033 	struct be_rx_page_info *page_info;
2034 	u16 i, j;
2035 	u16 hdr_len, curr_frag_len, remaining;
2036 	u8 *start;
2037 
2038 	page_info = get_rx_page_info(rxo);
2039 	start = page_address(page_info->page) + page_info->page_offset;
2040 	prefetch(start);
2041 
2042 	/* Copy data in the first descriptor of this completion */
2043 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2044 
2045 	skb->len = curr_frag_len;
2046 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2047 		memcpy(skb->data, start, curr_frag_len);
2048 		/* Complete packet has now been moved to data */
2049 		put_page(page_info->page);
2050 		skb->data_len = 0;
2051 		skb->tail += curr_frag_len;
2052 	} else {
2053 		hdr_len = ETH_HLEN;
2054 		memcpy(skb->data, start, hdr_len);
2055 		skb_shinfo(skb)->nr_frags = 1;
2056 		skb_frag_set_page(skb, 0, page_info->page);
2057 		skb_shinfo(skb)->frags[0].page_offset =
2058 					page_info->page_offset + hdr_len;
2059 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2060 				  curr_frag_len - hdr_len);
2061 		skb->data_len = curr_frag_len - hdr_len;
2062 		skb->truesize += rx_frag_size;
2063 		skb->tail += hdr_len;
2064 	}
2065 	page_info->page = NULL;
2066 
2067 	if (rxcp->pkt_size <= rx_frag_size) {
2068 		BUG_ON(rxcp->num_rcvd != 1);
2069 		return;
2070 	}
2071 
2072 	/* More frags present for this completion */
2073 	remaining = rxcp->pkt_size - curr_frag_len;
2074 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2075 		page_info = get_rx_page_info(rxo);
2076 		curr_frag_len = min(remaining, rx_frag_size);
2077 
2078 		/* Coalesce all frags from the same physical page in one slot */
2079 		if (page_info->page_offset == 0) {
2080 			/* Fresh page */
2081 			j++;
2082 			skb_frag_set_page(skb, j, page_info->page);
2083 			skb_shinfo(skb)->frags[j].page_offset =
2084 							page_info->page_offset;
2085 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2086 			skb_shinfo(skb)->nr_frags++;
2087 		} else {
2088 			put_page(page_info->page);
2089 		}
2090 
2091 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2092 		skb->len += curr_frag_len;
2093 		skb->data_len += curr_frag_len;
2094 		skb->truesize += rx_frag_size;
2095 		remaining -= curr_frag_len;
2096 		page_info->page = NULL;
2097 	}
2098 	BUG_ON(j > MAX_SKB_FRAGS);
2099 }
2100 
2101 /* Process the RX completion indicated by rxcp when GRO is disabled */
2102 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2103 				struct be_rx_compl_info *rxcp)
2104 {
2105 	struct be_adapter *adapter = rxo->adapter;
2106 	struct net_device *netdev = adapter->netdev;
2107 	struct sk_buff *skb;
2108 
2109 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2110 	if (unlikely(!skb)) {
2111 		rx_stats(rxo)->rx_drops_no_skbs++;
2112 		be_rx_compl_discard(rxo, rxcp);
2113 		return;
2114 	}
2115 
2116 	skb_fill_rx_data(rxo, skb, rxcp);
2117 
2118 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2119 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2120 	else
2121 		skb_checksum_none_assert(skb);
2122 
2123 	skb->protocol = eth_type_trans(skb, netdev);
2124 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2125 	if (netdev->features & NETIF_F_RXHASH)
2126 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2127 
2128 	skb->csum_level = rxcp->tunneled;
2129 	skb_mark_napi_id(skb, napi);
2130 
2131 	if (rxcp->vlanf)
2132 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2133 
2134 	netif_receive_skb(skb);
2135 }
2136 
2137 /* Process the RX completion indicated by rxcp when GRO is enabled */
2138 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2139 				    struct napi_struct *napi,
2140 				    struct be_rx_compl_info *rxcp)
2141 {
2142 	struct be_adapter *adapter = rxo->adapter;
2143 	struct be_rx_page_info *page_info;
2144 	struct sk_buff *skb = NULL;
2145 	u16 remaining, curr_frag_len;
2146 	u16 i, j;
2147 
2148 	skb = napi_get_frags(napi);
2149 	if (!skb) {
2150 		be_rx_compl_discard(rxo, rxcp);
2151 		return;
2152 	}
2153 
2154 	remaining = rxcp->pkt_size;
2155 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2156 		page_info = get_rx_page_info(rxo);
2157 
2158 		curr_frag_len = min(remaining, rx_frag_size);
2159 
2160 		/* Coalesce all frags from the same physical page in one slot */
2161 		if (i == 0 || page_info->page_offset == 0) {
2162 			/* First frag or Fresh page */
2163 			j++;
2164 			skb_frag_set_page(skb, j, page_info->page);
2165 			skb_shinfo(skb)->frags[j].page_offset =
2166 							page_info->page_offset;
2167 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2168 		} else {
2169 			put_page(page_info->page);
2170 		}
2171 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2172 		skb->truesize += rx_frag_size;
2173 		remaining -= curr_frag_len;
2174 		memset(page_info, 0, sizeof(*page_info));
2175 	}
2176 	BUG_ON(j > MAX_SKB_FRAGS);
2177 
2178 	skb_shinfo(skb)->nr_frags = j + 1;
2179 	skb->len = rxcp->pkt_size;
2180 	skb->data_len = rxcp->pkt_size;
2181 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2182 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2183 	if (adapter->netdev->features & NETIF_F_RXHASH)
2184 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2185 
2186 	skb->csum_level = rxcp->tunneled;
2187 
2188 	if (rxcp->vlanf)
2189 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2190 
2191 	napi_gro_frags(napi);
2192 }
2193 
2194 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2195 				 struct be_rx_compl_info *rxcp)
2196 {
2197 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2198 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2199 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2200 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2201 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2202 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2203 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2204 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2205 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2206 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2207 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2208 	if (rxcp->vlanf) {
2209 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2210 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2211 	}
2212 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2213 	rxcp->tunneled =
2214 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2215 }
2216 
2217 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2218 				 struct be_rx_compl_info *rxcp)
2219 {
2220 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2221 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2222 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2223 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2224 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2225 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2226 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2227 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2228 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2229 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2230 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2231 	if (rxcp->vlanf) {
2232 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2233 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2234 	}
2235 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2236 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2237 }
2238 
2239 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2240 {
2241 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2242 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2243 	struct be_adapter *adapter = rxo->adapter;
2244 
2245 	/* For checking the valid bit it is Ok to use either definition as the
2246 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2247 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2248 		return NULL;
2249 
2250 	rmb();
2251 	be_dws_le_to_cpu(compl, sizeof(*compl));
2252 
2253 	if (adapter->be3_native)
2254 		be_parse_rx_compl_v1(compl, rxcp);
2255 	else
2256 		be_parse_rx_compl_v0(compl, rxcp);
2257 
2258 	if (rxcp->ip_frag)
2259 		rxcp->l4_csum = 0;
2260 
2261 	if (rxcp->vlanf) {
2262 		/* In QNQ modes, if qnq bit is not set, then the packet was
2263 		 * tagged only with the transparent outer vlan-tag and must
2264 		 * not be treated as a vlan packet by host
2265 		 */
2266 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2267 			rxcp->vlanf = 0;
2268 
2269 		if (!lancer_chip(adapter))
2270 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2271 
2272 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2273 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2274 			rxcp->vlanf = 0;
2275 	}
2276 
2277 	/* As the compl has been parsed, reset it; we wont touch it again */
2278 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2279 
2280 	queue_tail_inc(&rxo->cq);
2281 	return rxcp;
2282 }
2283 
2284 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2285 {
2286 	u32 order = get_order(size);
2287 
2288 	if (order > 0)
2289 		gfp |= __GFP_COMP;
2290 	return  alloc_pages(gfp, order);
2291 }
2292 
2293 /*
2294  * Allocate a page, split it to fragments of size rx_frag_size and post as
2295  * receive buffers to BE
2296  */
2297 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2298 {
2299 	struct be_adapter *adapter = rxo->adapter;
2300 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2301 	struct be_queue_info *rxq = &rxo->q;
2302 	struct page *pagep = NULL;
2303 	struct device *dev = &adapter->pdev->dev;
2304 	struct be_eth_rx_d *rxd;
2305 	u64 page_dmaaddr = 0, frag_dmaaddr;
2306 	u32 posted, page_offset = 0, notify = 0;
2307 
2308 	page_info = &rxo->page_info_tbl[rxq->head];
2309 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2310 		if (!pagep) {
2311 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2312 			if (unlikely(!pagep)) {
2313 				rx_stats(rxo)->rx_post_fail++;
2314 				break;
2315 			}
2316 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2317 						    adapter->big_page_size,
2318 						    DMA_FROM_DEVICE);
2319 			if (dma_mapping_error(dev, page_dmaaddr)) {
2320 				put_page(pagep);
2321 				pagep = NULL;
2322 				adapter->drv_stats.dma_map_errors++;
2323 				break;
2324 			}
2325 			page_offset = 0;
2326 		} else {
2327 			get_page(pagep);
2328 			page_offset += rx_frag_size;
2329 		}
2330 		page_info->page_offset = page_offset;
2331 		page_info->page = pagep;
2332 
2333 		rxd = queue_head_node(rxq);
2334 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2335 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2336 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2337 
2338 		/* Any space left in the current big page for another frag? */
2339 		if ((page_offset + rx_frag_size + rx_frag_size) >
2340 					adapter->big_page_size) {
2341 			pagep = NULL;
2342 			page_info->last_frag = true;
2343 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2344 		} else {
2345 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2346 		}
2347 
2348 		prev_page_info = page_info;
2349 		queue_head_inc(rxq);
2350 		page_info = &rxo->page_info_tbl[rxq->head];
2351 	}
2352 
2353 	/* Mark the last frag of a page when we break out of the above loop
2354 	 * with no more slots available in the RXQ
2355 	 */
2356 	if (pagep) {
2357 		prev_page_info->last_frag = true;
2358 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2359 	}
2360 
2361 	if (posted) {
2362 		atomic_add(posted, &rxq->used);
2363 		if (rxo->rx_post_starved)
2364 			rxo->rx_post_starved = false;
2365 		do {
2366 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2367 			be_rxq_notify(adapter, rxq->id, notify);
2368 			posted -= notify;
2369 		} while (posted);
2370 	} else if (atomic_read(&rxq->used) == 0) {
2371 		/* Let be_worker replenish when memory is available */
2372 		rxo->rx_post_starved = true;
2373 	}
2374 }
2375 
2376 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2377 {
2378 	struct be_queue_info *tx_cq = &txo->cq;
2379 	struct be_tx_compl_info *txcp = &txo->txcp;
2380 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2381 
2382 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2383 		return NULL;
2384 
2385 	/* Ensure load ordering of valid bit dword and other dwords below */
2386 	rmb();
2387 	be_dws_le_to_cpu(compl, sizeof(*compl));
2388 
2389 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2390 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2391 
2392 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2393 	queue_tail_inc(tx_cq);
2394 	return txcp;
2395 }
2396 
2397 static u16 be_tx_compl_process(struct be_adapter *adapter,
2398 			       struct be_tx_obj *txo, u16 last_index)
2399 {
2400 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2401 	struct be_queue_info *txq = &txo->q;
2402 	u16 frag_index, num_wrbs = 0;
2403 	struct sk_buff *skb = NULL;
2404 	bool unmap_skb_hdr = false;
2405 	struct be_eth_wrb *wrb;
2406 
2407 	do {
2408 		if (sent_skbs[txq->tail]) {
2409 			/* Free skb from prev req */
2410 			if (skb)
2411 				dev_consume_skb_any(skb);
2412 			skb = sent_skbs[txq->tail];
2413 			sent_skbs[txq->tail] = NULL;
2414 			queue_tail_inc(txq);  /* skip hdr wrb */
2415 			num_wrbs++;
2416 			unmap_skb_hdr = true;
2417 		}
2418 		wrb = queue_tail_node(txq);
2419 		frag_index = txq->tail;
2420 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2421 			      (unmap_skb_hdr && skb_headlen(skb)));
2422 		unmap_skb_hdr = false;
2423 		queue_tail_inc(txq);
2424 		num_wrbs++;
2425 	} while (frag_index != last_index);
2426 	dev_consume_skb_any(skb);
2427 
2428 	return num_wrbs;
2429 }
2430 
2431 /* Return the number of events in the event queue */
2432 static inline int events_get(struct be_eq_obj *eqo)
2433 {
2434 	struct be_eq_entry *eqe;
2435 	int num = 0;
2436 
2437 	do {
2438 		eqe = queue_tail_node(&eqo->q);
2439 		if (eqe->evt == 0)
2440 			break;
2441 
2442 		rmb();
2443 		eqe->evt = 0;
2444 		num++;
2445 		queue_tail_inc(&eqo->q);
2446 	} while (true);
2447 
2448 	return num;
2449 }
2450 
2451 /* Leaves the EQ is disarmed state */
2452 static void be_eq_clean(struct be_eq_obj *eqo)
2453 {
2454 	int num = events_get(eqo);
2455 
2456 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2457 }
2458 
2459 /* Free posted rx buffers that were not used */
2460 static void be_rxq_clean(struct be_rx_obj *rxo)
2461 {
2462 	struct be_queue_info *rxq = &rxo->q;
2463 	struct be_rx_page_info *page_info;
2464 
2465 	while (atomic_read(&rxq->used) > 0) {
2466 		page_info = get_rx_page_info(rxo);
2467 		put_page(page_info->page);
2468 		memset(page_info, 0, sizeof(*page_info));
2469 	}
2470 	BUG_ON(atomic_read(&rxq->used));
2471 	rxq->tail = 0;
2472 	rxq->head = 0;
2473 }
2474 
2475 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2476 {
2477 	struct be_queue_info *rx_cq = &rxo->cq;
2478 	struct be_rx_compl_info *rxcp;
2479 	struct be_adapter *adapter = rxo->adapter;
2480 	int flush_wait = 0;
2481 
2482 	/* Consume pending rx completions.
2483 	 * Wait for the flush completion (identified by zero num_rcvd)
2484 	 * to arrive. Notify CQ even when there are no more CQ entries
2485 	 * for HW to flush partially coalesced CQ entries.
2486 	 * In Lancer, there is no need to wait for flush compl.
2487 	 */
2488 	for (;;) {
2489 		rxcp = be_rx_compl_get(rxo);
2490 		if (!rxcp) {
2491 			if (lancer_chip(adapter))
2492 				break;
2493 
2494 			if (flush_wait++ > 50 ||
2495 			    be_check_error(adapter,
2496 					   BE_ERROR_HW)) {
2497 				dev_warn(&adapter->pdev->dev,
2498 					 "did not receive flush compl\n");
2499 				break;
2500 			}
2501 			be_cq_notify(adapter, rx_cq->id, true, 0);
2502 			mdelay(1);
2503 		} else {
2504 			be_rx_compl_discard(rxo, rxcp);
2505 			be_cq_notify(adapter, rx_cq->id, false, 1);
2506 			if (rxcp->num_rcvd == 0)
2507 				break;
2508 		}
2509 	}
2510 
2511 	/* After cleanup, leave the CQ in unarmed state */
2512 	be_cq_notify(adapter, rx_cq->id, false, 0);
2513 }
2514 
2515 static void be_tx_compl_clean(struct be_adapter *adapter)
2516 {
2517 	u16 end_idx, notified_idx, cmpl = 0, timeo = 0, num_wrbs = 0;
2518 	struct device *dev = &adapter->pdev->dev;
2519 	struct be_tx_compl_info *txcp;
2520 	struct be_queue_info *txq;
2521 	struct be_tx_obj *txo;
2522 	int i, pending_txqs;
2523 
2524 	/* Stop polling for compls when HW has been silent for 10ms */
2525 	do {
2526 		pending_txqs = adapter->num_tx_qs;
2527 
2528 		for_all_tx_queues(adapter, txo, i) {
2529 			cmpl = 0;
2530 			num_wrbs = 0;
2531 			txq = &txo->q;
2532 			while ((txcp = be_tx_compl_get(txo))) {
2533 				num_wrbs +=
2534 					be_tx_compl_process(adapter, txo,
2535 							    txcp->end_index);
2536 				cmpl++;
2537 			}
2538 			if (cmpl) {
2539 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2540 				atomic_sub(num_wrbs, &txq->used);
2541 				timeo = 0;
2542 			}
2543 			if (!be_is_tx_compl_pending(txo))
2544 				pending_txqs--;
2545 		}
2546 
2547 		if (pending_txqs == 0 || ++timeo > 10 ||
2548 		    be_check_error(adapter, BE_ERROR_HW))
2549 			break;
2550 
2551 		mdelay(1);
2552 	} while (true);
2553 
2554 	/* Free enqueued TX that was never notified to HW */
2555 	for_all_tx_queues(adapter, txo, i) {
2556 		txq = &txo->q;
2557 
2558 		if (atomic_read(&txq->used)) {
2559 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2560 				 i, atomic_read(&txq->used));
2561 			notified_idx = txq->tail;
2562 			end_idx = txq->tail;
2563 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2564 				  txq->len);
2565 			/* Use the tx-compl process logic to handle requests
2566 			 * that were not sent to the HW.
2567 			 */
2568 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2569 			atomic_sub(num_wrbs, &txq->used);
2570 			BUG_ON(atomic_read(&txq->used));
2571 			txo->pend_wrb_cnt = 0;
2572 			/* Since hw was never notified of these requests,
2573 			 * reset TXQ indices
2574 			 */
2575 			txq->head = notified_idx;
2576 			txq->tail = notified_idx;
2577 		}
2578 	}
2579 }
2580 
2581 static void be_evt_queues_destroy(struct be_adapter *adapter)
2582 {
2583 	struct be_eq_obj *eqo;
2584 	int i;
2585 
2586 	for_all_evt_queues(adapter, eqo, i) {
2587 		if (eqo->q.created) {
2588 			be_eq_clean(eqo);
2589 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2590 			napi_hash_del(&eqo->napi);
2591 			netif_napi_del(&eqo->napi);
2592 			free_cpumask_var(eqo->affinity_mask);
2593 		}
2594 		be_queue_free(adapter, &eqo->q);
2595 	}
2596 }
2597 
2598 static int be_evt_queues_create(struct be_adapter *adapter)
2599 {
2600 	struct be_queue_info *eq;
2601 	struct be_eq_obj *eqo;
2602 	struct be_aic_obj *aic;
2603 	int i, rc;
2604 
2605 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2606 				    adapter->cfg_num_qs);
2607 
2608 	for_all_evt_queues(adapter, eqo, i) {
2609 		int numa_node = dev_to_node(&adapter->pdev->dev);
2610 
2611 		aic = &adapter->aic_obj[i];
2612 		eqo->adapter = adapter;
2613 		eqo->idx = i;
2614 		aic->max_eqd = BE_MAX_EQD;
2615 		aic->enable = true;
2616 
2617 		eq = &eqo->q;
2618 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2619 				    sizeof(struct be_eq_entry));
2620 		if (rc)
2621 			return rc;
2622 
2623 		rc = be_cmd_eq_create(adapter, eqo);
2624 		if (rc)
2625 			return rc;
2626 
2627 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2628 			return -ENOMEM;
2629 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2630 				eqo->affinity_mask);
2631 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2632 			       BE_NAPI_WEIGHT);
2633 	}
2634 	return 0;
2635 }
2636 
2637 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2638 {
2639 	struct be_queue_info *q;
2640 
2641 	q = &adapter->mcc_obj.q;
2642 	if (q->created)
2643 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2644 	be_queue_free(adapter, q);
2645 
2646 	q = &adapter->mcc_obj.cq;
2647 	if (q->created)
2648 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2649 	be_queue_free(adapter, q);
2650 }
2651 
2652 /* Must be called only after TX qs are created as MCC shares TX EQ */
2653 static int be_mcc_queues_create(struct be_adapter *adapter)
2654 {
2655 	struct be_queue_info *q, *cq;
2656 
2657 	cq = &adapter->mcc_obj.cq;
2658 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2659 			   sizeof(struct be_mcc_compl)))
2660 		goto err;
2661 
2662 	/* Use the default EQ for MCC completions */
2663 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2664 		goto mcc_cq_free;
2665 
2666 	q = &adapter->mcc_obj.q;
2667 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2668 		goto mcc_cq_destroy;
2669 
2670 	if (be_cmd_mccq_create(adapter, q, cq))
2671 		goto mcc_q_free;
2672 
2673 	return 0;
2674 
2675 mcc_q_free:
2676 	be_queue_free(adapter, q);
2677 mcc_cq_destroy:
2678 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2679 mcc_cq_free:
2680 	be_queue_free(adapter, cq);
2681 err:
2682 	return -1;
2683 }
2684 
2685 static void be_tx_queues_destroy(struct be_adapter *adapter)
2686 {
2687 	struct be_queue_info *q;
2688 	struct be_tx_obj *txo;
2689 	u8 i;
2690 
2691 	for_all_tx_queues(adapter, txo, i) {
2692 		q = &txo->q;
2693 		if (q->created)
2694 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2695 		be_queue_free(adapter, q);
2696 
2697 		q = &txo->cq;
2698 		if (q->created)
2699 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2700 		be_queue_free(adapter, q);
2701 	}
2702 }
2703 
2704 static int be_tx_qs_create(struct be_adapter *adapter)
2705 {
2706 	struct be_queue_info *cq;
2707 	struct be_tx_obj *txo;
2708 	struct be_eq_obj *eqo;
2709 	int status, i;
2710 
2711 	adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
2712 
2713 	for_all_tx_queues(adapter, txo, i) {
2714 		cq = &txo->cq;
2715 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2716 					sizeof(struct be_eth_tx_compl));
2717 		if (status)
2718 			return status;
2719 
2720 		u64_stats_init(&txo->stats.sync);
2721 		u64_stats_init(&txo->stats.sync_compl);
2722 
2723 		/* If num_evt_qs is less than num_tx_qs, then more than
2724 		 * one txq share an eq
2725 		 */
2726 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2727 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2728 		if (status)
2729 			return status;
2730 
2731 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2732 					sizeof(struct be_eth_wrb));
2733 		if (status)
2734 			return status;
2735 
2736 		status = be_cmd_txq_create(adapter, txo);
2737 		if (status)
2738 			return status;
2739 
2740 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2741 				    eqo->idx);
2742 	}
2743 
2744 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2745 		 adapter->num_tx_qs);
2746 	return 0;
2747 }
2748 
2749 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2750 {
2751 	struct be_queue_info *q;
2752 	struct be_rx_obj *rxo;
2753 	int i;
2754 
2755 	for_all_rx_queues(adapter, rxo, i) {
2756 		q = &rxo->cq;
2757 		if (q->created)
2758 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2759 		be_queue_free(adapter, q);
2760 	}
2761 }
2762 
2763 static int be_rx_cqs_create(struct be_adapter *adapter)
2764 {
2765 	struct be_queue_info *eq, *cq;
2766 	struct be_rx_obj *rxo;
2767 	int rc, i;
2768 
2769 	/* We can create as many RSS rings as there are EQs. */
2770 	adapter->num_rss_qs = adapter->num_evt_qs;
2771 
2772 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2773 	if (adapter->num_rss_qs <= 1)
2774 		adapter->num_rss_qs = 0;
2775 
2776 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2777 
2778 	/* When the interface is not capable of RSS rings (and there is no
2779 	 * need to create a default RXQ) we'll still need one RXQ
2780 	 */
2781 	if (adapter->num_rx_qs == 0)
2782 		adapter->num_rx_qs = 1;
2783 
2784 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2785 	for_all_rx_queues(adapter, rxo, i) {
2786 		rxo->adapter = adapter;
2787 		cq = &rxo->cq;
2788 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2789 				    sizeof(struct be_eth_rx_compl));
2790 		if (rc)
2791 			return rc;
2792 
2793 		u64_stats_init(&rxo->stats.sync);
2794 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2795 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2796 		if (rc)
2797 			return rc;
2798 	}
2799 
2800 	dev_info(&adapter->pdev->dev,
2801 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
2802 	return 0;
2803 }
2804 
2805 static irqreturn_t be_intx(int irq, void *dev)
2806 {
2807 	struct be_eq_obj *eqo = dev;
2808 	struct be_adapter *adapter = eqo->adapter;
2809 	int num_evts = 0;
2810 
2811 	/* IRQ is not expected when NAPI is scheduled as the EQ
2812 	 * will not be armed.
2813 	 * But, this can happen on Lancer INTx where it takes
2814 	 * a while to de-assert INTx or in BE2 where occasionaly
2815 	 * an interrupt may be raised even when EQ is unarmed.
2816 	 * If NAPI is already scheduled, then counting & notifying
2817 	 * events will orphan them.
2818 	 */
2819 	if (napi_schedule_prep(&eqo->napi)) {
2820 		num_evts = events_get(eqo);
2821 		__napi_schedule(&eqo->napi);
2822 		if (num_evts)
2823 			eqo->spurious_intr = 0;
2824 	}
2825 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2826 
2827 	/* Return IRQ_HANDLED only for the the first spurious intr
2828 	 * after a valid intr to stop the kernel from branding
2829 	 * this irq as a bad one!
2830 	 */
2831 	if (num_evts || eqo->spurious_intr++ == 0)
2832 		return IRQ_HANDLED;
2833 	else
2834 		return IRQ_NONE;
2835 }
2836 
2837 static irqreturn_t be_msix(int irq, void *dev)
2838 {
2839 	struct be_eq_obj *eqo = dev;
2840 
2841 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2842 	napi_schedule(&eqo->napi);
2843 	return IRQ_HANDLED;
2844 }
2845 
2846 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2847 {
2848 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2849 }
2850 
2851 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2852 			 int budget, int polling)
2853 {
2854 	struct be_adapter *adapter = rxo->adapter;
2855 	struct be_queue_info *rx_cq = &rxo->cq;
2856 	struct be_rx_compl_info *rxcp;
2857 	u32 work_done;
2858 	u32 frags_consumed = 0;
2859 
2860 	for (work_done = 0; work_done < budget; work_done++) {
2861 		rxcp = be_rx_compl_get(rxo);
2862 		if (!rxcp)
2863 			break;
2864 
2865 		/* Is it a flush compl that has no data */
2866 		if (unlikely(rxcp->num_rcvd == 0))
2867 			goto loop_continue;
2868 
2869 		/* Discard compl with partial DMA Lancer B0 */
2870 		if (unlikely(!rxcp->pkt_size)) {
2871 			be_rx_compl_discard(rxo, rxcp);
2872 			goto loop_continue;
2873 		}
2874 
2875 		/* On BE drop pkts that arrive due to imperfect filtering in
2876 		 * promiscuous mode on some skews
2877 		 */
2878 		if (unlikely(rxcp->port != adapter->port_num &&
2879 			     !lancer_chip(adapter))) {
2880 			be_rx_compl_discard(rxo, rxcp);
2881 			goto loop_continue;
2882 		}
2883 
2884 		/* Don't do gro when we're busy_polling */
2885 		if (do_gro(rxcp) && polling != BUSY_POLLING)
2886 			be_rx_compl_process_gro(rxo, napi, rxcp);
2887 		else
2888 			be_rx_compl_process(rxo, napi, rxcp);
2889 
2890 loop_continue:
2891 		frags_consumed += rxcp->num_rcvd;
2892 		be_rx_stats_update(rxo, rxcp);
2893 	}
2894 
2895 	if (work_done) {
2896 		be_cq_notify(adapter, rx_cq->id, true, work_done);
2897 
2898 		/* When an rx-obj gets into post_starved state, just
2899 		 * let be_worker do the posting.
2900 		 */
2901 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
2902 		    !rxo->rx_post_starved)
2903 			be_post_rx_frags(rxo, GFP_ATOMIC,
2904 					 max_t(u32, MAX_RX_POST,
2905 					       frags_consumed));
2906 	}
2907 
2908 	return work_done;
2909 }
2910 
2911 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2912 {
2913 	switch (status) {
2914 	case BE_TX_COMP_HDR_PARSE_ERR:
2915 		tx_stats(txo)->tx_hdr_parse_err++;
2916 		break;
2917 	case BE_TX_COMP_NDMA_ERR:
2918 		tx_stats(txo)->tx_dma_err++;
2919 		break;
2920 	case BE_TX_COMP_ACL_ERR:
2921 		tx_stats(txo)->tx_spoof_check_err++;
2922 		break;
2923 	}
2924 }
2925 
2926 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2927 {
2928 	switch (status) {
2929 	case LANCER_TX_COMP_LSO_ERR:
2930 		tx_stats(txo)->tx_tso_err++;
2931 		break;
2932 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2933 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2934 		tx_stats(txo)->tx_spoof_check_err++;
2935 		break;
2936 	case LANCER_TX_COMP_QINQ_ERR:
2937 		tx_stats(txo)->tx_qinq_err++;
2938 		break;
2939 	case LANCER_TX_COMP_PARITY_ERR:
2940 		tx_stats(txo)->tx_internal_parity_err++;
2941 		break;
2942 	case LANCER_TX_COMP_DMA_ERR:
2943 		tx_stats(txo)->tx_dma_err++;
2944 		break;
2945 	}
2946 }
2947 
2948 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
2949 			  int idx)
2950 {
2951 	int num_wrbs = 0, work_done = 0;
2952 	struct be_tx_compl_info *txcp;
2953 
2954 	while ((txcp = be_tx_compl_get(txo))) {
2955 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
2956 		work_done++;
2957 
2958 		if (txcp->status) {
2959 			if (lancer_chip(adapter))
2960 				lancer_update_tx_err(txo, txcp->status);
2961 			else
2962 				be_update_tx_err(txo, txcp->status);
2963 		}
2964 	}
2965 
2966 	if (work_done) {
2967 		be_cq_notify(adapter, txo->cq.id, true, work_done);
2968 		atomic_sub(num_wrbs, &txo->q.used);
2969 
2970 		/* As Tx wrbs have been freed up, wake up netdev queue
2971 		 * if it was stopped due to lack of tx wrbs.  */
2972 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
2973 		    be_can_txq_wake(txo)) {
2974 			netif_wake_subqueue(adapter->netdev, idx);
2975 		}
2976 
2977 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
2978 		tx_stats(txo)->tx_compl += work_done;
2979 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
2980 	}
2981 }
2982 
2983 #ifdef CONFIG_NET_RX_BUSY_POLL
2984 static inline bool be_lock_napi(struct be_eq_obj *eqo)
2985 {
2986 	bool status = true;
2987 
2988 	spin_lock(&eqo->lock); /* BH is already disabled */
2989 	if (eqo->state & BE_EQ_LOCKED) {
2990 		WARN_ON(eqo->state & BE_EQ_NAPI);
2991 		eqo->state |= BE_EQ_NAPI_YIELD;
2992 		status = false;
2993 	} else {
2994 		eqo->state = BE_EQ_NAPI;
2995 	}
2996 	spin_unlock(&eqo->lock);
2997 	return status;
2998 }
2999 
3000 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3001 {
3002 	spin_lock(&eqo->lock); /* BH is already disabled */
3003 
3004 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3005 	eqo->state = BE_EQ_IDLE;
3006 
3007 	spin_unlock(&eqo->lock);
3008 }
3009 
3010 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3011 {
3012 	bool status = true;
3013 
3014 	spin_lock_bh(&eqo->lock);
3015 	if (eqo->state & BE_EQ_LOCKED) {
3016 		eqo->state |= BE_EQ_POLL_YIELD;
3017 		status = false;
3018 	} else {
3019 		eqo->state |= BE_EQ_POLL;
3020 	}
3021 	spin_unlock_bh(&eqo->lock);
3022 	return status;
3023 }
3024 
3025 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3026 {
3027 	spin_lock_bh(&eqo->lock);
3028 
3029 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3030 	eqo->state = BE_EQ_IDLE;
3031 
3032 	spin_unlock_bh(&eqo->lock);
3033 }
3034 
3035 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3036 {
3037 	spin_lock_init(&eqo->lock);
3038 	eqo->state = BE_EQ_IDLE;
3039 }
3040 
3041 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3042 {
3043 	local_bh_disable();
3044 
3045 	/* It's enough to just acquire napi lock on the eqo to stop
3046 	 * be_busy_poll() from processing any queueus.
3047 	 */
3048 	while (!be_lock_napi(eqo))
3049 		mdelay(1);
3050 
3051 	local_bh_enable();
3052 }
3053 
3054 #else /* CONFIG_NET_RX_BUSY_POLL */
3055 
3056 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3057 {
3058 	return true;
3059 }
3060 
3061 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3062 {
3063 }
3064 
3065 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3066 {
3067 	return false;
3068 }
3069 
3070 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3071 {
3072 }
3073 
3074 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3075 {
3076 }
3077 
3078 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3079 {
3080 }
3081 #endif /* CONFIG_NET_RX_BUSY_POLL */
3082 
3083 int be_poll(struct napi_struct *napi, int budget)
3084 {
3085 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3086 	struct be_adapter *adapter = eqo->adapter;
3087 	int max_work = 0, work, i, num_evts;
3088 	struct be_rx_obj *rxo;
3089 	struct be_tx_obj *txo;
3090 	u32 mult_enc = 0;
3091 
3092 	num_evts = events_get(eqo);
3093 
3094 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3095 		be_process_tx(adapter, txo, i);
3096 
3097 	if (be_lock_napi(eqo)) {
3098 		/* This loop will iterate twice for EQ0 in which
3099 		 * completions of the last RXQ (default one) are also processed
3100 		 * For other EQs the loop iterates only once
3101 		 */
3102 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3103 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3104 			max_work = max(work, max_work);
3105 		}
3106 		be_unlock_napi(eqo);
3107 	} else {
3108 		max_work = budget;
3109 	}
3110 
3111 	if (is_mcc_eqo(eqo))
3112 		be_process_mcc(adapter);
3113 
3114 	if (max_work < budget) {
3115 		napi_complete(napi);
3116 
3117 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3118 		 * delay via a delay multiplier encoding value
3119 		 */
3120 		if (skyhawk_chip(adapter))
3121 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3122 
3123 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3124 			     mult_enc);
3125 	} else {
3126 		/* As we'll continue in polling mode, count and clear events */
3127 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3128 	}
3129 	return max_work;
3130 }
3131 
3132 #ifdef CONFIG_NET_RX_BUSY_POLL
3133 static int be_busy_poll(struct napi_struct *napi)
3134 {
3135 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3136 	struct be_adapter *adapter = eqo->adapter;
3137 	struct be_rx_obj *rxo;
3138 	int i, work = 0;
3139 
3140 	if (!be_lock_busy_poll(eqo))
3141 		return LL_FLUSH_BUSY;
3142 
3143 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3144 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3145 		if (work)
3146 			break;
3147 	}
3148 
3149 	be_unlock_busy_poll(eqo);
3150 	return work;
3151 }
3152 #endif
3153 
3154 void be_detect_error(struct be_adapter *adapter)
3155 {
3156 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3157 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3158 	u32 i;
3159 	struct device *dev = &adapter->pdev->dev;
3160 
3161 	if (be_check_error(adapter, BE_ERROR_HW))
3162 		return;
3163 
3164 	if (lancer_chip(adapter)) {
3165 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3166 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3167 			be_set_error(adapter, BE_ERROR_UE);
3168 			sliport_err1 = ioread32(adapter->db +
3169 						SLIPORT_ERROR1_OFFSET);
3170 			sliport_err2 = ioread32(adapter->db +
3171 						SLIPORT_ERROR2_OFFSET);
3172 			/* Do not log error messages if its a FW reset */
3173 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3174 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3175 				dev_info(dev, "Firmware update in progress\n");
3176 			} else {
3177 				dev_err(dev, "Error detected in the card\n");
3178 				dev_err(dev, "ERR: sliport status 0x%x\n",
3179 					sliport_status);
3180 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3181 					sliport_err1);
3182 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3183 					sliport_err2);
3184 			}
3185 		}
3186 	} else {
3187 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3188 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3189 		ue_lo_mask = ioread32(adapter->pcicfg +
3190 				      PCICFG_UE_STATUS_LOW_MASK);
3191 		ue_hi_mask = ioread32(adapter->pcicfg +
3192 				      PCICFG_UE_STATUS_HI_MASK);
3193 
3194 		ue_lo = (ue_lo & ~ue_lo_mask);
3195 		ue_hi = (ue_hi & ~ue_hi_mask);
3196 
3197 		/* On certain platforms BE hardware can indicate spurious UEs.
3198 		 * Allow HW to stop working completely in case of a real UE.
3199 		 * Hence not setting the hw_error for UE detection.
3200 		 */
3201 
3202 		if (ue_lo || ue_hi) {
3203 			dev_err(dev,
3204 				"Unrecoverable Error detected in the adapter");
3205 			dev_err(dev, "Please reboot server to recover");
3206 			if (skyhawk_chip(adapter))
3207 				be_set_error(adapter, BE_ERROR_UE);
3208 
3209 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3210 				if (ue_lo & 1)
3211 					dev_err(dev, "UE: %s bit set\n",
3212 						ue_status_low_desc[i]);
3213 			}
3214 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3215 				if (ue_hi & 1)
3216 					dev_err(dev, "UE: %s bit set\n",
3217 						ue_status_hi_desc[i]);
3218 			}
3219 		}
3220 	}
3221 }
3222 
3223 static void be_msix_disable(struct be_adapter *adapter)
3224 {
3225 	if (msix_enabled(adapter)) {
3226 		pci_disable_msix(adapter->pdev);
3227 		adapter->num_msix_vec = 0;
3228 		adapter->num_msix_roce_vec = 0;
3229 	}
3230 }
3231 
3232 static int be_msix_enable(struct be_adapter *adapter)
3233 {
3234 	int i, num_vec;
3235 	struct device *dev = &adapter->pdev->dev;
3236 
3237 	/* If RoCE is supported, program the max number of NIC vectors that
3238 	 * may be configured via set-channels, along with vectors needed for
3239 	 * RoCe. Else, just program the number we'll use initially.
3240 	 */
3241 	if (be_roce_supported(adapter))
3242 		num_vec = min_t(int, 2 * be_max_eqs(adapter),
3243 				2 * num_online_cpus());
3244 	else
3245 		num_vec = adapter->cfg_num_qs;
3246 
3247 	for (i = 0; i < num_vec; i++)
3248 		adapter->msix_entries[i].entry = i;
3249 
3250 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3251 					MIN_MSIX_VECTORS, num_vec);
3252 	if (num_vec < 0)
3253 		goto fail;
3254 
3255 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3256 		adapter->num_msix_roce_vec = num_vec / 2;
3257 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3258 			 adapter->num_msix_roce_vec);
3259 	}
3260 
3261 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3262 
3263 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3264 		 adapter->num_msix_vec);
3265 	return 0;
3266 
3267 fail:
3268 	dev_warn(dev, "MSIx enable failed\n");
3269 
3270 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3271 	if (be_virtfn(adapter))
3272 		return num_vec;
3273 	return 0;
3274 }
3275 
3276 static inline int be_msix_vec_get(struct be_adapter *adapter,
3277 				  struct be_eq_obj *eqo)
3278 {
3279 	return adapter->msix_entries[eqo->msix_idx].vector;
3280 }
3281 
3282 static int be_msix_register(struct be_adapter *adapter)
3283 {
3284 	struct net_device *netdev = adapter->netdev;
3285 	struct be_eq_obj *eqo;
3286 	int status, i, vec;
3287 
3288 	for_all_evt_queues(adapter, eqo, i) {
3289 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3290 		vec = be_msix_vec_get(adapter, eqo);
3291 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3292 		if (status)
3293 			goto err_msix;
3294 
3295 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3296 	}
3297 
3298 	return 0;
3299 err_msix:
3300 	for (i--; i >= 0; i--) {
3301 		eqo = &adapter->eq_obj[i];
3302 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3303 	}
3304 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3305 		 status);
3306 	be_msix_disable(adapter);
3307 	return status;
3308 }
3309 
3310 static int be_irq_register(struct be_adapter *adapter)
3311 {
3312 	struct net_device *netdev = adapter->netdev;
3313 	int status;
3314 
3315 	if (msix_enabled(adapter)) {
3316 		status = be_msix_register(adapter);
3317 		if (status == 0)
3318 			goto done;
3319 		/* INTx is not supported for VF */
3320 		if (be_virtfn(adapter))
3321 			return status;
3322 	}
3323 
3324 	/* INTx: only the first EQ is used */
3325 	netdev->irq = adapter->pdev->irq;
3326 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3327 			     &adapter->eq_obj[0]);
3328 	if (status) {
3329 		dev_err(&adapter->pdev->dev,
3330 			"INTx request IRQ failed - err %d\n", status);
3331 		return status;
3332 	}
3333 done:
3334 	adapter->isr_registered = true;
3335 	return 0;
3336 }
3337 
3338 static void be_irq_unregister(struct be_adapter *adapter)
3339 {
3340 	struct net_device *netdev = adapter->netdev;
3341 	struct be_eq_obj *eqo;
3342 	int i, vec;
3343 
3344 	if (!adapter->isr_registered)
3345 		return;
3346 
3347 	/* INTx */
3348 	if (!msix_enabled(adapter)) {
3349 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3350 		goto done;
3351 	}
3352 
3353 	/* MSIx */
3354 	for_all_evt_queues(adapter, eqo, i) {
3355 		vec = be_msix_vec_get(adapter, eqo);
3356 		irq_set_affinity_hint(vec, NULL);
3357 		free_irq(vec, eqo);
3358 	}
3359 
3360 done:
3361 	adapter->isr_registered = false;
3362 }
3363 
3364 static void be_rx_qs_destroy(struct be_adapter *adapter)
3365 {
3366 	struct be_queue_info *q;
3367 	struct be_rx_obj *rxo;
3368 	int i;
3369 
3370 	for_all_rx_queues(adapter, rxo, i) {
3371 		q = &rxo->q;
3372 		if (q->created) {
3373 			/* If RXQs are destroyed while in an "out of buffer"
3374 			 * state, there is a possibility of an HW stall on
3375 			 * Lancer. So, post 64 buffers to each queue to relieve
3376 			 * the "out of buffer" condition.
3377 			 * Make sure there's space in the RXQ before posting.
3378 			 */
3379 			if (lancer_chip(adapter)) {
3380 				be_rx_cq_clean(rxo);
3381 				if (atomic_read(&q->used) == 0)
3382 					be_post_rx_frags(rxo, GFP_KERNEL,
3383 							 MAX_RX_POST);
3384 			}
3385 
3386 			be_cmd_rxq_destroy(adapter, q);
3387 			be_rx_cq_clean(rxo);
3388 			be_rxq_clean(rxo);
3389 		}
3390 		be_queue_free(adapter, q);
3391 	}
3392 }
3393 
3394 static void be_disable_if_filters(struct be_adapter *adapter)
3395 {
3396 	be_cmd_pmac_del(adapter, adapter->if_handle,
3397 			adapter->pmac_id[0], 0);
3398 
3399 	be_clear_uc_list(adapter);
3400 
3401 	/* The IFACE flags are enabled in the open path and cleared
3402 	 * in the close path. When a VF gets detached from the host and
3403 	 * assigned to a VM the following happens:
3404 	 *	- VF's IFACE flags get cleared in the detach path
3405 	 *	- IFACE create is issued by the VF in the attach path
3406 	 * Due to a bug in the BE3/Skyhawk-R FW
3407 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3408 	 * specified along with the IFACE create cmd issued by a VF are not
3409 	 * honoured by FW.  As a consequence, if a *new* driver
3410 	 * (that enables/disables IFACE flags in open/close)
3411 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3412 	 * the IFACE gets created *without* the needed flags.
3413 	 * To avoid this, disable RX-filter flags only for Lancer.
3414 	 */
3415 	if (lancer_chip(adapter)) {
3416 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3417 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3418 	}
3419 }
3420 
3421 static int be_close(struct net_device *netdev)
3422 {
3423 	struct be_adapter *adapter = netdev_priv(netdev);
3424 	struct be_eq_obj *eqo;
3425 	int i;
3426 
3427 	/* This protection is needed as be_close() may be called even when the
3428 	 * adapter is in cleared state (after eeh perm failure)
3429 	 */
3430 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3431 		return 0;
3432 
3433 	be_disable_if_filters(adapter);
3434 
3435 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3436 		for_all_evt_queues(adapter, eqo, i) {
3437 			napi_disable(&eqo->napi);
3438 			be_disable_busy_poll(eqo);
3439 		}
3440 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3441 	}
3442 
3443 	be_async_mcc_disable(adapter);
3444 
3445 	/* Wait for all pending tx completions to arrive so that
3446 	 * all tx skbs are freed.
3447 	 */
3448 	netif_tx_disable(netdev);
3449 	be_tx_compl_clean(adapter);
3450 
3451 	be_rx_qs_destroy(adapter);
3452 
3453 	for_all_evt_queues(adapter, eqo, i) {
3454 		if (msix_enabled(adapter))
3455 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3456 		else
3457 			synchronize_irq(netdev->irq);
3458 		be_eq_clean(eqo);
3459 	}
3460 
3461 	be_irq_unregister(adapter);
3462 
3463 	return 0;
3464 }
3465 
3466 static int be_rx_qs_create(struct be_adapter *adapter)
3467 {
3468 	struct rss_info *rss = &adapter->rss_info;
3469 	u8 rss_key[RSS_HASH_KEY_LEN];
3470 	struct be_rx_obj *rxo;
3471 	int rc, i, j;
3472 
3473 	for_all_rx_queues(adapter, rxo, i) {
3474 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3475 				    sizeof(struct be_eth_rx_d));
3476 		if (rc)
3477 			return rc;
3478 	}
3479 
3480 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3481 		rxo = default_rxo(adapter);
3482 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3483 				       rx_frag_size, adapter->if_handle,
3484 				       false, &rxo->rss_id);
3485 		if (rc)
3486 			return rc;
3487 	}
3488 
3489 	for_all_rss_queues(adapter, rxo, i) {
3490 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3491 				       rx_frag_size, adapter->if_handle,
3492 				       true, &rxo->rss_id);
3493 		if (rc)
3494 			return rc;
3495 	}
3496 
3497 	if (be_multi_rxq(adapter)) {
3498 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3499 			for_all_rss_queues(adapter, rxo, i) {
3500 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3501 					break;
3502 				rss->rsstable[j + i] = rxo->rss_id;
3503 				rss->rss_queue[j + i] = i;
3504 			}
3505 		}
3506 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3507 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3508 
3509 		if (!BEx_chip(adapter))
3510 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3511 				RSS_ENABLE_UDP_IPV6;
3512 	} else {
3513 		/* Disable RSS, if only default RX Q is created */
3514 		rss->rss_flags = RSS_ENABLE_NONE;
3515 	}
3516 
3517 	netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3518 	rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3519 			       RSS_INDIR_TABLE_LEN, rss_key);
3520 	if (rc) {
3521 		rss->rss_flags = RSS_ENABLE_NONE;
3522 		return rc;
3523 	}
3524 
3525 	memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3526 
3527 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3528 	 * which is a queue empty condition
3529 	 */
3530 	for_all_rx_queues(adapter, rxo, i)
3531 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3532 
3533 	return 0;
3534 }
3535 
3536 static int be_enable_if_filters(struct be_adapter *adapter)
3537 {
3538 	int status;
3539 
3540 	status = be_cmd_rx_filter(adapter, BE_IF_EN_FLAGS, ON);
3541 	if (status)
3542 		return status;
3543 
3544 	/* For BE3 VFs, the PF programs the initial MAC address */
3545 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3546 		status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3547 					 adapter->if_handle,
3548 					 &adapter->pmac_id[0], 0);
3549 		if (status)
3550 			return status;
3551 	}
3552 
3553 	if (adapter->vlans_added)
3554 		be_vid_config(adapter);
3555 
3556 	be_set_rx_mode(adapter->netdev);
3557 
3558 	return 0;
3559 }
3560 
3561 static int be_open(struct net_device *netdev)
3562 {
3563 	struct be_adapter *adapter = netdev_priv(netdev);
3564 	struct be_eq_obj *eqo;
3565 	struct be_rx_obj *rxo;
3566 	struct be_tx_obj *txo;
3567 	u8 link_status;
3568 	int status, i;
3569 
3570 	status = be_rx_qs_create(adapter);
3571 	if (status)
3572 		goto err;
3573 
3574 	status = be_enable_if_filters(adapter);
3575 	if (status)
3576 		goto err;
3577 
3578 	status = be_irq_register(adapter);
3579 	if (status)
3580 		goto err;
3581 
3582 	for_all_rx_queues(adapter, rxo, i)
3583 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3584 
3585 	for_all_tx_queues(adapter, txo, i)
3586 		be_cq_notify(adapter, txo->cq.id, true, 0);
3587 
3588 	be_async_mcc_enable(adapter);
3589 
3590 	for_all_evt_queues(adapter, eqo, i) {
3591 		napi_enable(&eqo->napi);
3592 		be_enable_busy_poll(eqo);
3593 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3594 	}
3595 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3596 
3597 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3598 	if (!status)
3599 		be_link_status_update(adapter, link_status);
3600 
3601 	netif_tx_start_all_queues(netdev);
3602 #ifdef CONFIG_BE2NET_VXLAN
3603 	if (skyhawk_chip(adapter))
3604 		vxlan_get_rx_port(netdev);
3605 #endif
3606 
3607 	return 0;
3608 err:
3609 	be_close(adapter->netdev);
3610 	return -EIO;
3611 }
3612 
3613 static int be_setup_wol(struct be_adapter *adapter, bool enable)
3614 {
3615 	struct device *dev = &adapter->pdev->dev;
3616 	struct be_dma_mem cmd;
3617 	u8 mac[ETH_ALEN];
3618 	int status;
3619 
3620 	eth_zero_addr(mac);
3621 
3622 	cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
3623 	cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
3624 	if (!cmd.va)
3625 		return -ENOMEM;
3626 
3627 	if (enable) {
3628 		status = pci_write_config_dword(adapter->pdev,
3629 						PCICFG_PM_CONTROL_OFFSET,
3630 						PCICFG_PM_CONTROL_MASK);
3631 		if (status) {
3632 			dev_err(dev, "Could not enable Wake-on-lan\n");
3633 			goto err;
3634 		}
3635 	} else {
3636 		ether_addr_copy(mac, adapter->netdev->dev_addr);
3637 	}
3638 
3639 	status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
3640 	pci_enable_wake(adapter->pdev, PCI_D3hot, enable);
3641 	pci_enable_wake(adapter->pdev, PCI_D3cold, enable);
3642 err:
3643 	dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma);
3644 	return status;
3645 }
3646 
3647 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3648 {
3649 	u32 addr;
3650 
3651 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3652 
3653 	mac[5] = (u8)(addr & 0xFF);
3654 	mac[4] = (u8)((addr >> 8) & 0xFF);
3655 	mac[3] = (u8)((addr >> 16) & 0xFF);
3656 	/* Use the OUI from the current MAC address */
3657 	memcpy(mac, adapter->netdev->dev_addr, 3);
3658 }
3659 
3660 /*
3661  * Generate a seed MAC address from the PF MAC Address using jhash.
3662  * MAC Address for VFs are assigned incrementally starting from the seed.
3663  * These addresses are programmed in the ASIC by the PF and the VF driver
3664  * queries for the MAC address during its probe.
3665  */
3666 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3667 {
3668 	u32 vf;
3669 	int status = 0;
3670 	u8 mac[ETH_ALEN];
3671 	struct be_vf_cfg *vf_cfg;
3672 
3673 	be_vf_eth_addr_generate(adapter, mac);
3674 
3675 	for_all_vfs(adapter, vf_cfg, vf) {
3676 		if (BEx_chip(adapter))
3677 			status = be_cmd_pmac_add(adapter, mac,
3678 						 vf_cfg->if_handle,
3679 						 &vf_cfg->pmac_id, vf + 1);
3680 		else
3681 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3682 						vf + 1);
3683 
3684 		if (status)
3685 			dev_err(&adapter->pdev->dev,
3686 				"Mac address assignment failed for VF %d\n",
3687 				vf);
3688 		else
3689 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3690 
3691 		mac[5] += 1;
3692 	}
3693 	return status;
3694 }
3695 
3696 static int be_vfs_mac_query(struct be_adapter *adapter)
3697 {
3698 	int status, vf;
3699 	u8 mac[ETH_ALEN];
3700 	struct be_vf_cfg *vf_cfg;
3701 
3702 	for_all_vfs(adapter, vf_cfg, vf) {
3703 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3704 					       mac, vf_cfg->if_handle,
3705 					       false, vf+1);
3706 		if (status)
3707 			return status;
3708 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3709 	}
3710 	return 0;
3711 }
3712 
3713 static void be_vf_clear(struct be_adapter *adapter)
3714 {
3715 	struct be_vf_cfg *vf_cfg;
3716 	u32 vf;
3717 
3718 	if (pci_vfs_assigned(adapter->pdev)) {
3719 		dev_warn(&adapter->pdev->dev,
3720 			 "VFs are assigned to VMs: not disabling VFs\n");
3721 		goto done;
3722 	}
3723 
3724 	pci_disable_sriov(adapter->pdev);
3725 
3726 	for_all_vfs(adapter, vf_cfg, vf) {
3727 		if (BEx_chip(adapter))
3728 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3729 					vf_cfg->pmac_id, vf + 1);
3730 		else
3731 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3732 				       vf + 1);
3733 
3734 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3735 	}
3736 done:
3737 	kfree(adapter->vf_cfg);
3738 	adapter->num_vfs = 0;
3739 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3740 }
3741 
3742 static void be_clear_queues(struct be_adapter *adapter)
3743 {
3744 	be_mcc_queues_destroy(adapter);
3745 	be_rx_cqs_destroy(adapter);
3746 	be_tx_queues_destroy(adapter);
3747 	be_evt_queues_destroy(adapter);
3748 }
3749 
3750 static void be_cancel_worker(struct be_adapter *adapter)
3751 {
3752 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3753 		cancel_delayed_work_sync(&adapter->work);
3754 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3755 	}
3756 }
3757 
3758 static void be_cancel_err_detection(struct be_adapter *adapter)
3759 {
3760 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3761 		cancel_delayed_work_sync(&adapter->be_err_detection_work);
3762 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3763 	}
3764 }
3765 
3766 #ifdef CONFIG_BE2NET_VXLAN
3767 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3768 {
3769 	struct net_device *netdev = adapter->netdev;
3770 
3771 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3772 		be_cmd_manage_iface(adapter, adapter->if_handle,
3773 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3774 
3775 	if (adapter->vxlan_port)
3776 		be_cmd_set_vxlan_port(adapter, 0);
3777 
3778 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3779 	adapter->vxlan_port = 0;
3780 
3781 	netdev->hw_enc_features = 0;
3782 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3783 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3784 }
3785 #endif
3786 
3787 static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs)
3788 {
3789 	struct be_resources res = adapter->pool_res;
3790 	u16 num_vf_qs = 1;
3791 
3792 	/* Distribute the queue resources equally among the PF and it's VFs
3793 	 * Do not distribute queue resources in multi-channel configuration.
3794 	 */
3795 	if (num_vfs && !be_is_mc(adapter)) {
3796 		/* If number of VFs requested is 8 less than max supported,
3797 		 * assign 8 queue pairs to the PF and divide the remaining
3798 		 * resources evenly among the VFs
3799 		 */
3800 		if (num_vfs < (be_max_vfs(adapter) - 8))
3801 			num_vf_qs = (res.max_rss_qs - 8) / num_vfs;
3802 		else
3803 			num_vf_qs = res.max_rss_qs / num_vfs;
3804 
3805 		/* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable
3806 		 * interfaces per port. Provide RSS on VFs, only if number
3807 		 * of VFs requested is less than MAX_RSS_IFACES limit.
3808 		 */
3809 		if (num_vfs >= MAX_RSS_IFACES)
3810 			num_vf_qs = 1;
3811 	}
3812 	return num_vf_qs;
3813 }
3814 
3815 static int be_clear(struct be_adapter *adapter)
3816 {
3817 	struct pci_dev *pdev = adapter->pdev;
3818 	u16 num_vf_qs;
3819 
3820 	be_cancel_worker(adapter);
3821 
3822 	if (sriov_enabled(adapter))
3823 		be_vf_clear(adapter);
3824 
3825 	/* Re-configure FW to distribute resources evenly across max-supported
3826 	 * number of VFs, only when VFs are not already enabled.
3827 	 */
3828 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3829 	    !pci_vfs_assigned(pdev)) {
3830 		num_vf_qs = be_calculate_vf_qs(adapter,
3831 					       pci_sriov_get_totalvfs(pdev));
3832 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
3833 					pci_sriov_get_totalvfs(pdev),
3834 					num_vf_qs);
3835 	}
3836 
3837 #ifdef CONFIG_BE2NET_VXLAN
3838 	be_disable_vxlan_offloads(adapter);
3839 #endif
3840 	kfree(adapter->pmac_id);
3841 	adapter->pmac_id = NULL;
3842 
3843 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3844 
3845 	be_clear_queues(adapter);
3846 
3847 	be_msix_disable(adapter);
3848 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3849 	return 0;
3850 }
3851 
3852 static int be_vfs_if_create(struct be_adapter *adapter)
3853 {
3854 	struct be_resources res = {0};
3855 	u32 cap_flags, en_flags, vf;
3856 	struct be_vf_cfg *vf_cfg;
3857 	int status;
3858 
3859 	/* If a FW profile exists, then cap_flags are updated */
3860 	cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
3861 		    BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
3862 
3863 	for_all_vfs(adapter, vf_cfg, vf) {
3864 		if (!BE3_chip(adapter)) {
3865 			status = be_cmd_get_profile_config(adapter, &res,
3866 							   RESOURCE_LIMITS,
3867 							   vf + 1);
3868 			if (!status) {
3869 				cap_flags = res.if_cap_flags;
3870 				/* Prevent VFs from enabling VLAN promiscuous
3871 				 * mode
3872 				 */
3873 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3874 			}
3875 		}
3876 
3877 		en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED |
3878 					BE_IF_FLAGS_BROADCAST |
3879 					BE_IF_FLAGS_MULTICAST |
3880 					BE_IF_FLAGS_PASS_L3L4_ERRORS);
3881 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
3882 					  &vf_cfg->if_handle, vf + 1);
3883 		if (status)
3884 			return status;
3885 	}
3886 
3887 	return 0;
3888 }
3889 
3890 static int be_vf_setup_init(struct be_adapter *adapter)
3891 {
3892 	struct be_vf_cfg *vf_cfg;
3893 	int vf;
3894 
3895 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
3896 				  GFP_KERNEL);
3897 	if (!adapter->vf_cfg)
3898 		return -ENOMEM;
3899 
3900 	for_all_vfs(adapter, vf_cfg, vf) {
3901 		vf_cfg->if_handle = -1;
3902 		vf_cfg->pmac_id = -1;
3903 	}
3904 	return 0;
3905 }
3906 
3907 static int be_vf_setup(struct be_adapter *adapter)
3908 {
3909 	struct device *dev = &adapter->pdev->dev;
3910 	struct be_vf_cfg *vf_cfg;
3911 	int status, old_vfs, vf;
3912 	bool spoofchk;
3913 
3914 	old_vfs = pci_num_vf(adapter->pdev);
3915 
3916 	status = be_vf_setup_init(adapter);
3917 	if (status)
3918 		goto err;
3919 
3920 	if (old_vfs) {
3921 		for_all_vfs(adapter, vf_cfg, vf) {
3922 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
3923 			if (status)
3924 				goto err;
3925 		}
3926 
3927 		status = be_vfs_mac_query(adapter);
3928 		if (status)
3929 			goto err;
3930 	} else {
3931 		status = be_vfs_if_create(adapter);
3932 		if (status)
3933 			goto err;
3934 
3935 		status = be_vf_eth_addr_config(adapter);
3936 		if (status)
3937 			goto err;
3938 	}
3939 
3940 	for_all_vfs(adapter, vf_cfg, vf) {
3941 		/* Allow VFs to programs MAC/VLAN filters */
3942 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
3943 						  vf + 1);
3944 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
3945 			status = be_cmd_set_fn_privileges(adapter,
3946 							  vf_cfg->privileges |
3947 							  BE_PRIV_FILTMGMT,
3948 							  vf + 1);
3949 			if (!status) {
3950 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
3951 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
3952 					 vf);
3953 			}
3954 		}
3955 
3956 		/* Allow full available bandwidth */
3957 		if (!old_vfs)
3958 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
3959 
3960 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
3961 					       vf_cfg->if_handle, NULL,
3962 					       &spoofchk);
3963 		if (!status)
3964 			vf_cfg->spoofchk = spoofchk;
3965 
3966 		if (!old_vfs) {
3967 			be_cmd_enable_vf(adapter, vf + 1);
3968 			be_cmd_set_logical_link_config(adapter,
3969 						       IFLA_VF_LINK_STATE_AUTO,
3970 						       vf+1);
3971 		}
3972 	}
3973 
3974 	if (!old_vfs) {
3975 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
3976 		if (status) {
3977 			dev_err(dev, "SRIOV enable failed\n");
3978 			adapter->num_vfs = 0;
3979 			goto err;
3980 		}
3981 	}
3982 
3983 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
3984 	return 0;
3985 err:
3986 	dev_err(dev, "VF setup failed\n");
3987 	be_vf_clear(adapter);
3988 	return status;
3989 }
3990 
3991 /* Converting function_mode bits on BE3 to SH mc_type enums */
3992 
3993 static u8 be_convert_mc_type(u32 function_mode)
3994 {
3995 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
3996 		return vNIC1;
3997 	else if (function_mode & QNQ_MODE)
3998 		return FLEX10;
3999 	else if (function_mode & VNIC_MODE)
4000 		return vNIC2;
4001 	else if (function_mode & UMC_ENABLED)
4002 		return UMC;
4003 	else
4004 		return MC_NONE;
4005 }
4006 
4007 /* On BE2/BE3 FW does not suggest the supported limits */
4008 static void BEx_get_resources(struct be_adapter *adapter,
4009 			      struct be_resources *res)
4010 {
4011 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4012 
4013 	if (be_physfn(adapter))
4014 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4015 	else
4016 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4017 
4018 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4019 
4020 	if (be_is_mc(adapter)) {
4021 		/* Assuming that there are 4 channels per port,
4022 		 * when multi-channel is enabled
4023 		 */
4024 		if (be_is_qnq_mode(adapter))
4025 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4026 		else
4027 			/* In a non-qnq multichannel mode, the pvid
4028 			 * takes up one vlan entry
4029 			 */
4030 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4031 	} else {
4032 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4033 	}
4034 
4035 	res->max_mcast_mac = BE_MAX_MC;
4036 
4037 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4038 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4039 	 *    *only* if it is RSS-capable.
4040 	 */
4041 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4042 	    be_virtfn(adapter) ||
4043 	    (be_is_mc(adapter) &&
4044 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4045 		res->max_tx_qs = 1;
4046 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4047 		struct be_resources super_nic_res = {0};
4048 
4049 		/* On a SuperNIC profile, the driver needs to use the
4050 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4051 		 */
4052 		be_cmd_get_profile_config(adapter, &super_nic_res,
4053 					  RESOURCE_LIMITS, 0);
4054 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4055 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4056 	} else {
4057 		res->max_tx_qs = BE3_MAX_TX_QS;
4058 	}
4059 
4060 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4061 	    !use_sriov && be_physfn(adapter))
4062 		res->max_rss_qs = (adapter->be3_native) ?
4063 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4064 	res->max_rx_qs = res->max_rss_qs + 1;
4065 
4066 	if (be_physfn(adapter))
4067 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4068 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4069 	else
4070 		res->max_evt_qs = 1;
4071 
4072 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4073 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4074 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4075 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4076 }
4077 
4078 static void be_setup_init(struct be_adapter *adapter)
4079 {
4080 	adapter->vlan_prio_bmap = 0xff;
4081 	adapter->phy.link_speed = -1;
4082 	adapter->if_handle = -1;
4083 	adapter->be3_native = false;
4084 	adapter->if_flags = 0;
4085 	if (be_physfn(adapter))
4086 		adapter->cmd_privileges = MAX_PRIVILEGES;
4087 	else
4088 		adapter->cmd_privileges = MIN_PRIVILEGES;
4089 }
4090 
4091 static int be_get_sriov_config(struct be_adapter *adapter)
4092 {
4093 	struct be_resources res = {0};
4094 	int max_vfs, old_vfs;
4095 
4096 	be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0);
4097 
4098 	/* Some old versions of BE3 FW don't report max_vfs value */
4099 	if (BE3_chip(adapter) && !res.max_vfs) {
4100 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4101 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4102 	}
4103 
4104 	adapter->pool_res = res;
4105 
4106 	/* If during previous unload of the driver, the VFs were not disabled,
4107 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4108 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4109 	 */
4110 	old_vfs = pci_num_vf(adapter->pdev);
4111 	if (old_vfs) {
4112 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4113 			 old_vfs);
4114 
4115 		adapter->pool_res.max_vfs =
4116 			pci_sriov_get_totalvfs(adapter->pdev);
4117 		adapter->num_vfs = old_vfs;
4118 	}
4119 
4120 	return 0;
4121 }
4122 
4123 static void be_alloc_sriov_res(struct be_adapter *adapter)
4124 {
4125 	int old_vfs = pci_num_vf(adapter->pdev);
4126 	u16 num_vf_qs;
4127 	int status;
4128 
4129 	be_get_sriov_config(adapter);
4130 
4131 	if (!old_vfs)
4132 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4133 
4134 	/* When the HW is in SRIOV capable configuration, the PF-pool
4135 	 * resources are given to PF during driver load, if there are no
4136 	 * old VFs. This facility is not available in BE3 FW.
4137 	 * Also, this is done by FW in Lancer chip.
4138 	 */
4139 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4140 		num_vf_qs = be_calculate_vf_qs(adapter, 0);
4141 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4142 						 num_vf_qs);
4143 		if (status)
4144 			dev_err(&adapter->pdev->dev,
4145 				"Failed to optimize SRIOV resources\n");
4146 	}
4147 }
4148 
4149 static int be_get_resources(struct be_adapter *adapter)
4150 {
4151 	struct device *dev = &adapter->pdev->dev;
4152 	struct be_resources res = {0};
4153 	int status;
4154 
4155 	if (BEx_chip(adapter)) {
4156 		BEx_get_resources(adapter, &res);
4157 		adapter->res = res;
4158 	}
4159 
4160 	/* For Lancer, SH etc read per-function resource limits from FW.
4161 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4162 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4163 	 */
4164 	if (!BEx_chip(adapter)) {
4165 		status = be_cmd_get_func_config(adapter, &res);
4166 		if (status)
4167 			return status;
4168 
4169 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4170 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4171 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4172 			res.max_rss_qs -= 1;
4173 
4174 		/* If RoCE may be enabled stash away half the EQs for RoCE */
4175 		if (be_roce_supported(adapter))
4176 			res.max_evt_qs /= 2;
4177 		adapter->res = res;
4178 	}
4179 
4180 	/* If FW supports RSS default queue, then skip creating non-RSS
4181 	 * queue for non-IP traffic.
4182 	 */
4183 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4184 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4185 
4186 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4187 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4188 		 be_max_rss(adapter), be_max_eqs(adapter),
4189 		 be_max_vfs(adapter));
4190 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4191 		 be_max_uc(adapter), be_max_mc(adapter),
4192 		 be_max_vlans(adapter));
4193 
4194 	/* Sanitize cfg_num_qs based on HW and platform limits */
4195 	adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(),
4196 				    be_max_qs(adapter));
4197 	return 0;
4198 }
4199 
4200 static int be_get_config(struct be_adapter *adapter)
4201 {
4202 	int status, level;
4203 	u16 profile_id;
4204 
4205 	status = be_cmd_get_cntl_attributes(adapter);
4206 	if (status)
4207 		return status;
4208 
4209 	status = be_cmd_query_fw_cfg(adapter);
4210 	if (status)
4211 		return status;
4212 
4213 	if (!lancer_chip(adapter) && be_physfn(adapter))
4214 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4215 
4216 	if (BEx_chip(adapter)) {
4217 		level = be_cmd_get_fw_log_level(adapter);
4218 		adapter->msg_enable =
4219 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4220 	}
4221 
4222 	be_cmd_get_acpi_wol_cap(adapter);
4223 
4224 	be_cmd_query_port_name(adapter);
4225 
4226 	if (be_physfn(adapter)) {
4227 		status = be_cmd_get_active_profile(adapter, &profile_id);
4228 		if (!status)
4229 			dev_info(&adapter->pdev->dev,
4230 				 "Using profile 0x%x\n", profile_id);
4231 	}
4232 
4233 	status = be_get_resources(adapter);
4234 	if (status)
4235 		return status;
4236 
4237 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4238 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4239 	if (!adapter->pmac_id)
4240 		return -ENOMEM;
4241 
4242 	return 0;
4243 }
4244 
4245 static int be_mac_setup(struct be_adapter *adapter)
4246 {
4247 	u8 mac[ETH_ALEN];
4248 	int status;
4249 
4250 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4251 		status = be_cmd_get_perm_mac(adapter, mac);
4252 		if (status)
4253 			return status;
4254 
4255 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4256 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4257 	}
4258 
4259 	return 0;
4260 }
4261 
4262 static void be_schedule_worker(struct be_adapter *adapter)
4263 {
4264 	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4265 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4266 }
4267 
4268 static void be_schedule_err_detection(struct be_adapter *adapter)
4269 {
4270 	schedule_delayed_work(&adapter->be_err_detection_work,
4271 			      msecs_to_jiffies(1000));
4272 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4273 }
4274 
4275 static int be_setup_queues(struct be_adapter *adapter)
4276 {
4277 	struct net_device *netdev = adapter->netdev;
4278 	int status;
4279 
4280 	status = be_evt_queues_create(adapter);
4281 	if (status)
4282 		goto err;
4283 
4284 	status = be_tx_qs_create(adapter);
4285 	if (status)
4286 		goto err;
4287 
4288 	status = be_rx_cqs_create(adapter);
4289 	if (status)
4290 		goto err;
4291 
4292 	status = be_mcc_queues_create(adapter);
4293 	if (status)
4294 		goto err;
4295 
4296 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4297 	if (status)
4298 		goto err;
4299 
4300 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4301 	if (status)
4302 		goto err;
4303 
4304 	return 0;
4305 err:
4306 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4307 	return status;
4308 }
4309 
4310 int be_update_queues(struct be_adapter *adapter)
4311 {
4312 	struct net_device *netdev = adapter->netdev;
4313 	int status;
4314 
4315 	if (netif_running(netdev))
4316 		be_close(netdev);
4317 
4318 	be_cancel_worker(adapter);
4319 
4320 	/* If any vectors have been shared with RoCE we cannot re-program
4321 	 * the MSIx table.
4322 	 */
4323 	if (!adapter->num_msix_roce_vec)
4324 		be_msix_disable(adapter);
4325 
4326 	be_clear_queues(adapter);
4327 
4328 	if (!msix_enabled(adapter)) {
4329 		status = be_msix_enable(adapter);
4330 		if (status)
4331 			return status;
4332 	}
4333 
4334 	status = be_setup_queues(adapter);
4335 	if (status)
4336 		return status;
4337 
4338 	be_schedule_worker(adapter);
4339 
4340 	if (netif_running(netdev))
4341 		status = be_open(netdev);
4342 
4343 	return status;
4344 }
4345 
4346 static inline int fw_major_num(const char *fw_ver)
4347 {
4348 	int fw_major = 0, i;
4349 
4350 	i = sscanf(fw_ver, "%d.", &fw_major);
4351 	if (i != 1)
4352 		return 0;
4353 
4354 	return fw_major;
4355 }
4356 
4357 /* If any VFs are already enabled don't FLR the PF */
4358 static bool be_reset_required(struct be_adapter *adapter)
4359 {
4360 	return pci_num_vf(adapter->pdev) ? false : true;
4361 }
4362 
4363 /* Wait for the FW to be ready and perform the required initialization */
4364 static int be_func_init(struct be_adapter *adapter)
4365 {
4366 	int status;
4367 
4368 	status = be_fw_wait_ready(adapter);
4369 	if (status)
4370 		return status;
4371 
4372 	if (be_reset_required(adapter)) {
4373 		status = be_cmd_reset_function(adapter);
4374 		if (status)
4375 			return status;
4376 
4377 		/* Wait for interrupts to quiesce after an FLR */
4378 		msleep(100);
4379 
4380 		/* We can clear all errors when function reset succeeds */
4381 		be_clear_error(adapter, BE_CLEAR_ALL);
4382 	}
4383 
4384 	/* Tell FW we're ready to fire cmds */
4385 	status = be_cmd_fw_init(adapter);
4386 	if (status)
4387 		return status;
4388 
4389 	/* Allow interrupts for other ULPs running on NIC function */
4390 	be_intr_set(adapter, true);
4391 
4392 	return 0;
4393 }
4394 
4395 static int be_setup(struct be_adapter *adapter)
4396 {
4397 	struct device *dev = &adapter->pdev->dev;
4398 	u32 en_flags;
4399 	int status;
4400 
4401 	status = be_func_init(adapter);
4402 	if (status)
4403 		return status;
4404 
4405 	be_setup_init(adapter);
4406 
4407 	if (!lancer_chip(adapter))
4408 		be_cmd_req_native_mode(adapter);
4409 
4410 	/* invoke this cmd first to get pf_num and vf_num which are needed
4411 	 * for issuing profile related cmds
4412 	 */
4413 	if (!BEx_chip(adapter)) {
4414 		status = be_cmd_get_func_config(adapter, NULL);
4415 		if (status)
4416 			return status;
4417 	}
4418 
4419 	if (!BE2_chip(adapter) && be_physfn(adapter))
4420 		be_alloc_sriov_res(adapter);
4421 
4422 	status = be_get_config(adapter);
4423 	if (status)
4424 		goto err;
4425 
4426 	status = be_msix_enable(adapter);
4427 	if (status)
4428 		goto err;
4429 
4430 	/* will enable all the needed filter flags in be_open() */
4431 	en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4432 	en_flags = en_flags & be_if_cap_flags(adapter);
4433 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4434 				  &adapter->if_handle, 0);
4435 	if (status)
4436 		goto err;
4437 
4438 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4439 	rtnl_lock();
4440 	status = be_setup_queues(adapter);
4441 	rtnl_unlock();
4442 	if (status)
4443 		goto err;
4444 
4445 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4446 
4447 	status = be_mac_setup(adapter);
4448 	if (status)
4449 		goto err;
4450 
4451 	be_cmd_get_fw_ver(adapter);
4452 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4453 
4454 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4455 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4456 			adapter->fw_ver);
4457 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4458 	}
4459 
4460 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4461 					 adapter->rx_fc);
4462 	if (status)
4463 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4464 					&adapter->rx_fc);
4465 
4466 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4467 		 adapter->tx_fc, adapter->rx_fc);
4468 
4469 	if (be_physfn(adapter))
4470 		be_cmd_set_logical_link_config(adapter,
4471 					       IFLA_VF_LINK_STATE_AUTO, 0);
4472 
4473 	if (adapter->num_vfs)
4474 		be_vf_setup(adapter);
4475 
4476 	status = be_cmd_get_phy_info(adapter);
4477 	if (!status && be_pause_supported(adapter))
4478 		adapter->phy.fc_autoneg = 1;
4479 
4480 	be_schedule_worker(adapter);
4481 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4482 	return 0;
4483 err:
4484 	be_clear(adapter);
4485 	return status;
4486 }
4487 
4488 #ifdef CONFIG_NET_POLL_CONTROLLER
4489 static void be_netpoll(struct net_device *netdev)
4490 {
4491 	struct be_adapter *adapter = netdev_priv(netdev);
4492 	struct be_eq_obj *eqo;
4493 	int i;
4494 
4495 	for_all_evt_queues(adapter, eqo, i) {
4496 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4497 		napi_schedule(&eqo->napi);
4498 	}
4499 }
4500 #endif
4501 
4502 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4503 {
4504 	const struct firmware *fw;
4505 	int status;
4506 
4507 	if (!netif_running(adapter->netdev)) {
4508 		dev_err(&adapter->pdev->dev,
4509 			"Firmware load not allowed (interface is down)\n");
4510 		return -ENETDOWN;
4511 	}
4512 
4513 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4514 	if (status)
4515 		goto fw_exit;
4516 
4517 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4518 
4519 	if (lancer_chip(adapter))
4520 		status = lancer_fw_download(adapter, fw);
4521 	else
4522 		status = be_fw_download(adapter, fw);
4523 
4524 	if (!status)
4525 		be_cmd_get_fw_ver(adapter);
4526 
4527 fw_exit:
4528 	release_firmware(fw);
4529 	return status;
4530 }
4531 
4532 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4533 				 u16 flags)
4534 {
4535 	struct be_adapter *adapter = netdev_priv(dev);
4536 	struct nlattr *attr, *br_spec;
4537 	int rem;
4538 	int status = 0;
4539 	u16 mode = 0;
4540 
4541 	if (!sriov_enabled(adapter))
4542 		return -EOPNOTSUPP;
4543 
4544 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4545 	if (!br_spec)
4546 		return -EINVAL;
4547 
4548 	nla_for_each_nested(attr, br_spec, rem) {
4549 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4550 			continue;
4551 
4552 		if (nla_len(attr) < sizeof(mode))
4553 			return -EINVAL;
4554 
4555 		mode = nla_get_u16(attr);
4556 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4557 			return -EOPNOTSUPP;
4558 
4559 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4560 			return -EINVAL;
4561 
4562 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4563 					       adapter->if_handle,
4564 					       mode == BRIDGE_MODE_VEPA ?
4565 					       PORT_FWD_TYPE_VEPA :
4566 					       PORT_FWD_TYPE_VEB, 0);
4567 		if (status)
4568 			goto err;
4569 
4570 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4571 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4572 
4573 		return status;
4574 	}
4575 err:
4576 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4577 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4578 
4579 	return status;
4580 }
4581 
4582 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4583 				 struct net_device *dev, u32 filter_mask,
4584 				 int nlflags)
4585 {
4586 	struct be_adapter *adapter = netdev_priv(dev);
4587 	int status = 0;
4588 	u8 hsw_mode;
4589 
4590 	/* BE and Lancer chips support VEB mode only */
4591 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4592 		hsw_mode = PORT_FWD_TYPE_VEB;
4593 	} else {
4594 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4595 					       adapter->if_handle, &hsw_mode,
4596 					       NULL);
4597 		if (status)
4598 			return 0;
4599 
4600 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4601 			return 0;
4602 	}
4603 
4604 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4605 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4606 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4607 				       0, 0, nlflags, filter_mask, NULL);
4608 }
4609 
4610 #ifdef CONFIG_BE2NET_VXLAN
4611 /* VxLAN offload Notes:
4612  *
4613  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4614  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4615  * is expected to work across all types of IP tunnels once exported. Skyhawk
4616  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4617  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4618  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4619  * those other tunnels are unexported on the fly through ndo_features_check().
4620  *
4621  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4622  * adds more than one port, disable offloads and don't re-enable them again
4623  * until after all the tunnels are removed.
4624  */
4625 static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
4626 			      __be16 port)
4627 {
4628 	struct be_adapter *adapter = netdev_priv(netdev);
4629 	struct device *dev = &adapter->pdev->dev;
4630 	int status;
4631 
4632 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4633 		return;
4634 
4635 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4636 		adapter->vxlan_port_aliases++;
4637 		return;
4638 	}
4639 
4640 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4641 		dev_info(dev,
4642 			 "Only one UDP port supported for VxLAN offloads\n");
4643 		dev_info(dev, "Disabling VxLAN offloads\n");
4644 		adapter->vxlan_port_count++;
4645 		goto err;
4646 	}
4647 
4648 	if (adapter->vxlan_port_count++ >= 1)
4649 		return;
4650 
4651 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
4652 				     OP_CONVERT_NORMAL_TO_TUNNEL);
4653 	if (status) {
4654 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4655 		goto err;
4656 	}
4657 
4658 	status = be_cmd_set_vxlan_port(adapter, port);
4659 	if (status) {
4660 		dev_warn(dev, "Failed to add VxLAN port\n");
4661 		goto err;
4662 	}
4663 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4664 	adapter->vxlan_port = port;
4665 
4666 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4667 				   NETIF_F_TSO | NETIF_F_TSO6 |
4668 				   NETIF_F_GSO_UDP_TUNNEL;
4669 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4670 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4671 
4672 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4673 		 be16_to_cpu(port));
4674 	return;
4675 err:
4676 	be_disable_vxlan_offloads(adapter);
4677 }
4678 
4679 static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
4680 			      __be16 port)
4681 {
4682 	struct be_adapter *adapter = netdev_priv(netdev);
4683 
4684 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4685 		return;
4686 
4687 	if (adapter->vxlan_port != port)
4688 		goto done;
4689 
4690 	if (adapter->vxlan_port_aliases) {
4691 		adapter->vxlan_port_aliases--;
4692 		return;
4693 	}
4694 
4695 	be_disable_vxlan_offloads(adapter);
4696 
4697 	dev_info(&adapter->pdev->dev,
4698 		 "Disabled VxLAN offloads for UDP port %d\n",
4699 		 be16_to_cpu(port));
4700 done:
4701 	adapter->vxlan_port_count--;
4702 }
4703 
4704 static netdev_features_t be_features_check(struct sk_buff *skb,
4705 					   struct net_device *dev,
4706 					   netdev_features_t features)
4707 {
4708 	struct be_adapter *adapter = netdev_priv(dev);
4709 	u8 l4_hdr = 0;
4710 
4711 	/* The code below restricts offload features for some tunneled packets.
4712 	 * Offload features for normal (non tunnel) packets are unchanged.
4713 	 */
4714 	if (!skb->encapsulation ||
4715 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4716 		return features;
4717 
4718 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
4719 	 * should disable tunnel offload features if it's not a VxLAN packet,
4720 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
4721 	 * allow other tunneled traffic like GRE work fine while VxLAN
4722 	 * offloads are configured in Skyhawk-R.
4723 	 */
4724 	switch (vlan_get_protocol(skb)) {
4725 	case htons(ETH_P_IP):
4726 		l4_hdr = ip_hdr(skb)->protocol;
4727 		break;
4728 	case htons(ETH_P_IPV6):
4729 		l4_hdr = ipv6_hdr(skb)->nexthdr;
4730 		break;
4731 	default:
4732 		return features;
4733 	}
4734 
4735 	if (l4_hdr != IPPROTO_UDP ||
4736 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4737 	    skb->inner_protocol != htons(ETH_P_TEB) ||
4738 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4739 	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4740 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4741 
4742 	return features;
4743 }
4744 #endif
4745 
4746 static int be_get_phys_port_id(struct net_device *dev,
4747 			       struct netdev_phys_item_id *ppid)
4748 {
4749 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4750 	struct be_adapter *adapter = netdev_priv(dev);
4751 	u8 *id;
4752 
4753 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
4754 		return -ENOSPC;
4755 
4756 	ppid->id[0] = adapter->hba_port_num + 1;
4757 	id = &ppid->id[1];
4758 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4759 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4760 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4761 
4762 	ppid->id_len = id_len;
4763 
4764 	return 0;
4765 }
4766 
4767 static const struct net_device_ops be_netdev_ops = {
4768 	.ndo_open		= be_open,
4769 	.ndo_stop		= be_close,
4770 	.ndo_start_xmit		= be_xmit,
4771 	.ndo_set_rx_mode	= be_set_rx_mode,
4772 	.ndo_set_mac_address	= be_mac_addr_set,
4773 	.ndo_change_mtu		= be_change_mtu,
4774 	.ndo_get_stats64	= be_get_stats64,
4775 	.ndo_validate_addr	= eth_validate_addr,
4776 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
4777 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
4778 	.ndo_set_vf_mac		= be_set_vf_mac,
4779 	.ndo_set_vf_vlan	= be_set_vf_vlan,
4780 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
4781 	.ndo_get_vf_config	= be_get_vf_config,
4782 	.ndo_set_vf_link_state  = be_set_vf_link_state,
4783 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4784 #ifdef CONFIG_NET_POLL_CONTROLLER
4785 	.ndo_poll_controller	= be_netpoll,
4786 #endif
4787 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
4788 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
4789 #ifdef CONFIG_NET_RX_BUSY_POLL
4790 	.ndo_busy_poll		= be_busy_poll,
4791 #endif
4792 #ifdef CONFIG_BE2NET_VXLAN
4793 	.ndo_add_vxlan_port	= be_add_vxlan_port,
4794 	.ndo_del_vxlan_port	= be_del_vxlan_port,
4795 	.ndo_features_check	= be_features_check,
4796 #endif
4797 	.ndo_get_phys_port_id   = be_get_phys_port_id,
4798 };
4799 
4800 static void be_netdev_init(struct net_device *netdev)
4801 {
4802 	struct be_adapter *adapter = netdev_priv(netdev);
4803 
4804 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4805 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
4806 		NETIF_F_HW_VLAN_CTAG_TX;
4807 	if (be_multi_rxq(adapter))
4808 		netdev->hw_features |= NETIF_F_RXHASH;
4809 
4810 	netdev->features |= netdev->hw_features |
4811 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
4812 
4813 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4814 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4815 
4816 	netdev->priv_flags |= IFF_UNICAST_FLT;
4817 
4818 	netdev->flags |= IFF_MULTICAST;
4819 
4820 	netif_set_gso_max_size(netdev, 65535 - ETH_HLEN);
4821 
4822 	netdev->netdev_ops = &be_netdev_ops;
4823 
4824 	netdev->ethtool_ops = &be_ethtool_ops;
4825 }
4826 
4827 static void be_cleanup(struct be_adapter *adapter)
4828 {
4829 	struct net_device *netdev = adapter->netdev;
4830 
4831 	rtnl_lock();
4832 	netif_device_detach(netdev);
4833 	if (netif_running(netdev))
4834 		be_close(netdev);
4835 	rtnl_unlock();
4836 
4837 	be_clear(adapter);
4838 }
4839 
4840 static int be_resume(struct be_adapter *adapter)
4841 {
4842 	struct net_device *netdev = adapter->netdev;
4843 	int status;
4844 
4845 	status = be_setup(adapter);
4846 	if (status)
4847 		return status;
4848 
4849 	if (netif_running(netdev)) {
4850 		status = be_open(netdev);
4851 		if (status)
4852 			return status;
4853 	}
4854 
4855 	netif_device_attach(netdev);
4856 
4857 	return 0;
4858 }
4859 
4860 static int be_err_recover(struct be_adapter *adapter)
4861 {
4862 	struct device *dev = &adapter->pdev->dev;
4863 	int status;
4864 
4865 	status = be_resume(adapter);
4866 	if (status)
4867 		goto err;
4868 
4869 	dev_info(dev, "Adapter recovery successful\n");
4870 	return 0;
4871 err:
4872 	if (be_physfn(adapter))
4873 		dev_err(dev, "Adapter recovery failed\n");
4874 	else
4875 		dev_err(dev, "Re-trying adapter recovery\n");
4876 
4877 	return status;
4878 }
4879 
4880 static void be_err_detection_task(struct work_struct *work)
4881 {
4882 	struct be_adapter *adapter =
4883 				container_of(work, struct be_adapter,
4884 					     be_err_detection_work.work);
4885 	int status = 0;
4886 
4887 	be_detect_error(adapter);
4888 
4889 	if (be_check_error(adapter, BE_ERROR_HW)) {
4890 		be_cleanup(adapter);
4891 
4892 		/* As of now error recovery support is in Lancer only */
4893 		if (lancer_chip(adapter))
4894 			status = be_err_recover(adapter);
4895 	}
4896 
4897 	/* Always attempt recovery on VFs */
4898 	if (!status || be_virtfn(adapter))
4899 		be_schedule_err_detection(adapter);
4900 }
4901 
4902 static void be_log_sfp_info(struct be_adapter *adapter)
4903 {
4904 	int status;
4905 
4906 	status = be_cmd_query_sfp_info(adapter);
4907 	if (!status) {
4908 		dev_err(&adapter->pdev->dev,
4909 			"Unqualified SFP+ detected on %c from %s part no: %s",
4910 			adapter->port_name, adapter->phy.vendor_name,
4911 			adapter->phy.vendor_pn);
4912 	}
4913 	adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP;
4914 }
4915 
4916 static void be_worker(struct work_struct *work)
4917 {
4918 	struct be_adapter *adapter =
4919 		container_of(work, struct be_adapter, work.work);
4920 	struct be_rx_obj *rxo;
4921 	int i;
4922 
4923 	/* when interrupts are not yet enabled, just reap any pending
4924 	 * mcc completions
4925 	 */
4926 	if (!netif_running(adapter->netdev)) {
4927 		local_bh_disable();
4928 		be_process_mcc(adapter);
4929 		local_bh_enable();
4930 		goto reschedule;
4931 	}
4932 
4933 	if (!adapter->stats_cmd_sent) {
4934 		if (lancer_chip(adapter))
4935 			lancer_cmd_get_pport_stats(adapter,
4936 						   &adapter->stats_cmd);
4937 		else
4938 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
4939 	}
4940 
4941 	if (be_physfn(adapter) &&
4942 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
4943 		be_cmd_get_die_temperature(adapter);
4944 
4945 	for_all_rx_queues(adapter, rxo, i) {
4946 		/* Replenish RX-queues starved due to memory
4947 		 * allocation failures.
4948 		 */
4949 		if (rxo->rx_post_starved)
4950 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
4951 	}
4952 
4953 	/* EQ-delay update for Skyhawk is done while notifying EQ */
4954 	if (!skyhawk_chip(adapter))
4955 		be_eqd_update(adapter, false);
4956 
4957 	if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP)
4958 		be_log_sfp_info(adapter);
4959 
4960 reschedule:
4961 	adapter->work_counter++;
4962 	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4963 }
4964 
4965 static void be_unmap_pci_bars(struct be_adapter *adapter)
4966 {
4967 	if (adapter->csr)
4968 		pci_iounmap(adapter->pdev, adapter->csr);
4969 	if (adapter->db)
4970 		pci_iounmap(adapter->pdev, adapter->db);
4971 }
4972 
4973 static int db_bar(struct be_adapter *adapter)
4974 {
4975 	if (lancer_chip(adapter) || be_virtfn(adapter))
4976 		return 0;
4977 	else
4978 		return 4;
4979 }
4980 
4981 static int be_roce_map_pci_bars(struct be_adapter *adapter)
4982 {
4983 	if (skyhawk_chip(adapter)) {
4984 		adapter->roce_db.size = 4096;
4985 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
4986 							      db_bar(adapter));
4987 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
4988 							       db_bar(adapter));
4989 	}
4990 	return 0;
4991 }
4992 
4993 static int be_map_pci_bars(struct be_adapter *adapter)
4994 {
4995 	struct pci_dev *pdev = adapter->pdev;
4996 	u8 __iomem *addr;
4997 	u32 sli_intf;
4998 
4999 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5000 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5001 				SLI_INTF_FAMILY_SHIFT;
5002 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5003 
5004 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5005 		adapter->csr = pci_iomap(pdev, 2, 0);
5006 		if (!adapter->csr)
5007 			return -ENOMEM;
5008 	}
5009 
5010 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5011 	if (!addr)
5012 		goto pci_map_err;
5013 	adapter->db = addr;
5014 
5015 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5016 		if (be_physfn(adapter)) {
5017 			/* PCICFG is the 2nd BAR in BE2 */
5018 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5019 			if (!addr)
5020 				goto pci_map_err;
5021 			adapter->pcicfg = addr;
5022 		} else {
5023 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5024 		}
5025 	}
5026 
5027 	be_roce_map_pci_bars(adapter);
5028 	return 0;
5029 
5030 pci_map_err:
5031 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5032 	be_unmap_pci_bars(adapter);
5033 	return -ENOMEM;
5034 }
5035 
5036 static void be_drv_cleanup(struct be_adapter *adapter)
5037 {
5038 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5039 	struct device *dev = &adapter->pdev->dev;
5040 
5041 	if (mem->va)
5042 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5043 
5044 	mem = &adapter->rx_filter;
5045 	if (mem->va)
5046 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5047 
5048 	mem = &adapter->stats_cmd;
5049 	if (mem->va)
5050 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5051 }
5052 
5053 /* Allocate and initialize various fields in be_adapter struct */
5054 static int be_drv_init(struct be_adapter *adapter)
5055 {
5056 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5057 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5058 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5059 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5060 	struct device *dev = &adapter->pdev->dev;
5061 	int status = 0;
5062 
5063 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5064 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5065 						 &mbox_mem_alloc->dma,
5066 						 GFP_KERNEL);
5067 	if (!mbox_mem_alloc->va)
5068 		return -ENOMEM;
5069 
5070 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5071 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5072 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5073 
5074 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5075 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5076 					    &rx_filter->dma, GFP_KERNEL);
5077 	if (!rx_filter->va) {
5078 		status = -ENOMEM;
5079 		goto free_mbox;
5080 	}
5081 
5082 	if (lancer_chip(adapter))
5083 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5084 	else if (BE2_chip(adapter))
5085 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5086 	else if (BE3_chip(adapter))
5087 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5088 	else
5089 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5090 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5091 					    &stats_cmd->dma, GFP_KERNEL);
5092 	if (!stats_cmd->va) {
5093 		status = -ENOMEM;
5094 		goto free_rx_filter;
5095 	}
5096 
5097 	mutex_init(&adapter->mbox_lock);
5098 	spin_lock_init(&adapter->mcc_lock);
5099 	spin_lock_init(&adapter->mcc_cq_lock);
5100 	init_completion(&adapter->et_cmd_compl);
5101 
5102 	pci_save_state(adapter->pdev);
5103 
5104 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5105 	INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5106 			  be_err_detection_task);
5107 
5108 	adapter->rx_fc = true;
5109 	adapter->tx_fc = true;
5110 
5111 	/* Must be a power of 2 or else MODULO will BUG_ON */
5112 	adapter->be_get_temp_freq = 64;
5113 
5114 	return 0;
5115 
5116 free_rx_filter:
5117 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5118 free_mbox:
5119 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5120 			  mbox_mem_alloc->dma);
5121 	return status;
5122 }
5123 
5124 static void be_remove(struct pci_dev *pdev)
5125 {
5126 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5127 
5128 	if (!adapter)
5129 		return;
5130 
5131 	be_roce_dev_remove(adapter);
5132 	be_intr_set(adapter, false);
5133 
5134 	be_cancel_err_detection(adapter);
5135 
5136 	unregister_netdev(adapter->netdev);
5137 
5138 	be_clear(adapter);
5139 
5140 	/* tell fw we're done with firing cmds */
5141 	be_cmd_fw_clean(adapter);
5142 
5143 	be_unmap_pci_bars(adapter);
5144 	be_drv_cleanup(adapter);
5145 
5146 	pci_disable_pcie_error_reporting(pdev);
5147 
5148 	pci_release_regions(pdev);
5149 	pci_disable_device(pdev);
5150 
5151 	free_netdev(adapter->netdev);
5152 }
5153 
5154 static ssize_t be_hwmon_show_temp(struct device *dev,
5155 				  struct device_attribute *dev_attr,
5156 				  char *buf)
5157 {
5158 	struct be_adapter *adapter = dev_get_drvdata(dev);
5159 
5160 	/* Unit: millidegree Celsius */
5161 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5162 		return -EIO;
5163 	else
5164 		return sprintf(buf, "%u\n",
5165 			       adapter->hwmon_info.be_on_die_temp * 1000);
5166 }
5167 
5168 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5169 			  be_hwmon_show_temp, NULL, 1);
5170 
5171 static struct attribute *be_hwmon_attrs[] = {
5172 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5173 	NULL
5174 };
5175 
5176 ATTRIBUTE_GROUPS(be_hwmon);
5177 
5178 static char *mc_name(struct be_adapter *adapter)
5179 {
5180 	char *str = "";	/* default */
5181 
5182 	switch (adapter->mc_type) {
5183 	case UMC:
5184 		str = "UMC";
5185 		break;
5186 	case FLEX10:
5187 		str = "FLEX10";
5188 		break;
5189 	case vNIC1:
5190 		str = "vNIC-1";
5191 		break;
5192 	case nPAR:
5193 		str = "nPAR";
5194 		break;
5195 	case UFP:
5196 		str = "UFP";
5197 		break;
5198 	case vNIC2:
5199 		str = "vNIC-2";
5200 		break;
5201 	default:
5202 		str = "";
5203 	}
5204 
5205 	return str;
5206 }
5207 
5208 static inline char *func_name(struct be_adapter *adapter)
5209 {
5210 	return be_physfn(adapter) ? "PF" : "VF";
5211 }
5212 
5213 static inline char *nic_name(struct pci_dev *pdev)
5214 {
5215 	switch (pdev->device) {
5216 	case OC_DEVICE_ID1:
5217 		return OC_NAME;
5218 	case OC_DEVICE_ID2:
5219 		return OC_NAME_BE;
5220 	case OC_DEVICE_ID3:
5221 	case OC_DEVICE_ID4:
5222 		return OC_NAME_LANCER;
5223 	case BE_DEVICE_ID2:
5224 		return BE3_NAME;
5225 	case OC_DEVICE_ID5:
5226 	case OC_DEVICE_ID6:
5227 		return OC_NAME_SH;
5228 	default:
5229 		return BE_NAME;
5230 	}
5231 }
5232 
5233 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5234 {
5235 	struct be_adapter *adapter;
5236 	struct net_device *netdev;
5237 	int status = 0;
5238 
5239 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5240 
5241 	status = pci_enable_device(pdev);
5242 	if (status)
5243 		goto do_none;
5244 
5245 	status = pci_request_regions(pdev, DRV_NAME);
5246 	if (status)
5247 		goto disable_dev;
5248 	pci_set_master(pdev);
5249 
5250 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5251 	if (!netdev) {
5252 		status = -ENOMEM;
5253 		goto rel_reg;
5254 	}
5255 	adapter = netdev_priv(netdev);
5256 	adapter->pdev = pdev;
5257 	pci_set_drvdata(pdev, adapter);
5258 	adapter->netdev = netdev;
5259 	SET_NETDEV_DEV(netdev, &pdev->dev);
5260 
5261 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5262 	if (!status) {
5263 		netdev->features |= NETIF_F_HIGHDMA;
5264 	} else {
5265 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5266 		if (status) {
5267 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5268 			goto free_netdev;
5269 		}
5270 	}
5271 
5272 	status = pci_enable_pcie_error_reporting(pdev);
5273 	if (!status)
5274 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5275 
5276 	status = be_map_pci_bars(adapter);
5277 	if (status)
5278 		goto free_netdev;
5279 
5280 	status = be_drv_init(adapter);
5281 	if (status)
5282 		goto unmap_bars;
5283 
5284 	status = be_setup(adapter);
5285 	if (status)
5286 		goto drv_cleanup;
5287 
5288 	be_netdev_init(netdev);
5289 	status = register_netdev(netdev);
5290 	if (status != 0)
5291 		goto unsetup;
5292 
5293 	be_roce_dev_add(adapter);
5294 
5295 	be_schedule_err_detection(adapter);
5296 
5297 	/* On Die temperature not supported for VF. */
5298 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5299 		adapter->hwmon_info.hwmon_dev =
5300 			devm_hwmon_device_register_with_groups(&pdev->dev,
5301 							       DRV_NAME,
5302 							       adapter,
5303 							       be_hwmon_groups);
5304 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5305 	}
5306 
5307 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5308 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5309 
5310 	return 0;
5311 
5312 unsetup:
5313 	be_clear(adapter);
5314 drv_cleanup:
5315 	be_drv_cleanup(adapter);
5316 unmap_bars:
5317 	be_unmap_pci_bars(adapter);
5318 free_netdev:
5319 	free_netdev(netdev);
5320 rel_reg:
5321 	pci_release_regions(pdev);
5322 disable_dev:
5323 	pci_disable_device(pdev);
5324 do_none:
5325 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5326 	return status;
5327 }
5328 
5329 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5330 {
5331 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5332 
5333 	if (adapter->wol_en)
5334 		be_setup_wol(adapter, true);
5335 
5336 	be_intr_set(adapter, false);
5337 	be_cancel_err_detection(adapter);
5338 
5339 	be_cleanup(adapter);
5340 
5341 	pci_save_state(pdev);
5342 	pci_disable_device(pdev);
5343 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5344 	return 0;
5345 }
5346 
5347 static int be_pci_resume(struct pci_dev *pdev)
5348 {
5349 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5350 	int status = 0;
5351 
5352 	status = pci_enable_device(pdev);
5353 	if (status)
5354 		return status;
5355 
5356 	pci_restore_state(pdev);
5357 
5358 	status = be_resume(adapter);
5359 	if (status)
5360 		return status;
5361 
5362 	be_schedule_err_detection(adapter);
5363 
5364 	if (adapter->wol_en)
5365 		be_setup_wol(adapter, false);
5366 
5367 	return 0;
5368 }
5369 
5370 /*
5371  * An FLR will stop BE from DMAing any data.
5372  */
5373 static void be_shutdown(struct pci_dev *pdev)
5374 {
5375 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5376 
5377 	if (!adapter)
5378 		return;
5379 
5380 	be_roce_dev_shutdown(adapter);
5381 	cancel_delayed_work_sync(&adapter->work);
5382 	be_cancel_err_detection(adapter);
5383 
5384 	netif_device_detach(adapter->netdev);
5385 
5386 	be_cmd_reset_function(adapter);
5387 
5388 	pci_disable_device(pdev);
5389 }
5390 
5391 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5392 					    pci_channel_state_t state)
5393 {
5394 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5395 
5396 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5397 
5398 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5399 		be_set_error(adapter, BE_ERROR_EEH);
5400 
5401 		be_cancel_err_detection(adapter);
5402 
5403 		be_cleanup(adapter);
5404 	}
5405 
5406 	if (state == pci_channel_io_perm_failure)
5407 		return PCI_ERS_RESULT_DISCONNECT;
5408 
5409 	pci_disable_device(pdev);
5410 
5411 	/* The error could cause the FW to trigger a flash debug dump.
5412 	 * Resetting the card while flash dump is in progress
5413 	 * can cause it not to recover; wait for it to finish.
5414 	 * Wait only for first function as it is needed only once per
5415 	 * adapter.
5416 	 */
5417 	if (pdev->devfn == 0)
5418 		ssleep(30);
5419 
5420 	return PCI_ERS_RESULT_NEED_RESET;
5421 }
5422 
5423 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5424 {
5425 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5426 	int status;
5427 
5428 	dev_info(&adapter->pdev->dev, "EEH reset\n");
5429 
5430 	status = pci_enable_device(pdev);
5431 	if (status)
5432 		return PCI_ERS_RESULT_DISCONNECT;
5433 
5434 	pci_set_master(pdev);
5435 	pci_restore_state(pdev);
5436 
5437 	/* Check if card is ok and fw is ready */
5438 	dev_info(&adapter->pdev->dev,
5439 		 "Waiting for FW to be ready after EEH reset\n");
5440 	status = be_fw_wait_ready(adapter);
5441 	if (status)
5442 		return PCI_ERS_RESULT_DISCONNECT;
5443 
5444 	pci_cleanup_aer_uncorrect_error_status(pdev);
5445 	be_clear_error(adapter, BE_CLEAR_ALL);
5446 	return PCI_ERS_RESULT_RECOVERED;
5447 }
5448 
5449 static void be_eeh_resume(struct pci_dev *pdev)
5450 {
5451 	int status = 0;
5452 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5453 
5454 	dev_info(&adapter->pdev->dev, "EEH resume\n");
5455 
5456 	pci_save_state(pdev);
5457 
5458 	status = be_resume(adapter);
5459 	if (status)
5460 		goto err;
5461 
5462 	be_schedule_err_detection(adapter);
5463 	return;
5464 err:
5465 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5466 }
5467 
5468 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5469 {
5470 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5471 	u16 num_vf_qs;
5472 	int status;
5473 
5474 	if (!num_vfs)
5475 		be_vf_clear(adapter);
5476 
5477 	adapter->num_vfs = num_vfs;
5478 
5479 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5480 		dev_warn(&pdev->dev,
5481 			 "Cannot disable VFs while they are assigned\n");
5482 		return -EBUSY;
5483 	}
5484 
5485 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
5486 	 * are equally distributed across the max-number of VFs. The user may
5487 	 * request only a subset of the max-vfs to be enabled.
5488 	 * Based on num_vfs, redistribute the resources across num_vfs so that
5489 	 * each VF will have access to more number of resources.
5490 	 * This facility is not available in BE3 FW.
5491 	 * Also, this is done by FW in Lancer chip.
5492 	 */
5493 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5494 		num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs);
5495 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5496 						 adapter->num_vfs, num_vf_qs);
5497 		if (status)
5498 			dev_err(&pdev->dev,
5499 				"Failed to optimize SR-IOV resources\n");
5500 	}
5501 
5502 	status = be_get_resources(adapter);
5503 	if (status)
5504 		return be_cmd_status(status);
5505 
5506 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5507 	rtnl_lock();
5508 	status = be_update_queues(adapter);
5509 	rtnl_unlock();
5510 	if (status)
5511 		return be_cmd_status(status);
5512 
5513 	if (adapter->num_vfs)
5514 		status = be_vf_setup(adapter);
5515 
5516 	if (!status)
5517 		return adapter->num_vfs;
5518 
5519 	return 0;
5520 }
5521 
5522 static const struct pci_error_handlers be_eeh_handlers = {
5523 	.error_detected = be_eeh_err_detected,
5524 	.slot_reset = be_eeh_reset,
5525 	.resume = be_eeh_resume,
5526 };
5527 
5528 static struct pci_driver be_driver = {
5529 	.name = DRV_NAME,
5530 	.id_table = be_dev_ids,
5531 	.probe = be_probe,
5532 	.remove = be_remove,
5533 	.suspend = be_suspend,
5534 	.resume = be_pci_resume,
5535 	.shutdown = be_shutdown,
5536 	.sriov_configure = be_pci_sriov_configure,
5537 	.err_handler = &be_eeh_handlers
5538 };
5539 
5540 static int __init be_init_module(void)
5541 {
5542 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5543 	    rx_frag_size != 2048) {
5544 		printk(KERN_WARNING DRV_NAME
5545 			" : Module param rx_frag_size must be 2048/4096/8192."
5546 			" Using 2048\n");
5547 		rx_frag_size = 2048;
5548 	}
5549 
5550 	if (num_vfs > 0) {
5551 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5552 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5553 	}
5554 
5555 	return pci_register_driver(&be_driver);
5556 }
5557 module_init(be_init_module);
5558 
5559 static void __exit be_exit_module(void)
5560 {
5561 	pci_unregister_driver(&be_driver);
5562 }
5563 module_exit(be_exit_module);
5564