1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26 
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29 
30 #include "vmxnet3_int.h"
31 
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34 
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40 	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41 	{0}
42 };
43 
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45 
46 static int enable_mq = 1;
47 
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50 
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57 	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59 
60 
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64 	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66 
67 
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74 	int i;
75 
76 	for (i = 0; i < adapter->intr.num_intrs; i++)
77 		vmxnet3_enable_intr(adapter, i);
78 	adapter->shared->devRead.intrConf.intrCtrl &=
79 					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81 
82 
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86 	int i;
87 
88 	adapter->shared->devRead.intrConf.intrCtrl |=
89 					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90 	for (i = 0; i < adapter->intr.num_intrs; i++)
91 		vmxnet3_disable_intr(adapter, i);
92 }
93 
94 
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100 
101 
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105 	return tq->stopped;
106 }
107 
108 
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112 	tq->stopped = false;
113 	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115 
116 
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120 	tq->stopped = false;
121 	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123 
124 
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128 	tq->stopped = true;
129 	tq->num_stop++;
130 	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132 
133 
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140 	u32 ret;
141 	int i;
142 	unsigned long flags;
143 
144 	spin_lock_irqsave(&adapter->cmd_lock, flags);
145 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146 	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148 
149 	adapter->link_speed = ret >> 16;
150 	if (ret & 1) { /* Link is up. */
151 		netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152 			    adapter->link_speed);
153 		netif_carrier_on(adapter->netdev);
154 
155 		if (affectTxQueue) {
156 			for (i = 0; i < adapter->num_tx_queues; i++)
157 				vmxnet3_tq_start(&adapter->tx_queue[i],
158 						 adapter);
159 		}
160 	} else {
161 		netdev_info(adapter->netdev, "NIC Link is Down\n");
162 		netif_carrier_off(adapter->netdev);
163 
164 		if (affectTxQueue) {
165 			for (i = 0; i < adapter->num_tx_queues; i++)
166 				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167 		}
168 	}
169 }
170 
171 static void
172 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173 {
174 	int i;
175 	unsigned long flags;
176 	u32 events = le32_to_cpu(adapter->shared->ecr);
177 	if (!events)
178 		return;
179 
180 	vmxnet3_ack_events(adapter, events);
181 
182 	/* Check if link state has changed */
183 	if (events & VMXNET3_ECR_LINK)
184 		vmxnet3_check_link(adapter, true);
185 
186 	/* Check if there is an error on xmit/recv queues */
187 	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188 		spin_lock_irqsave(&adapter->cmd_lock, flags);
189 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190 				       VMXNET3_CMD_GET_QUEUE_STATUS);
191 		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192 
193 		for (i = 0; i < adapter->num_tx_queues; i++)
194 			if (adapter->tqd_start[i].status.stopped)
195 				dev_err(&adapter->netdev->dev,
196 					"%s: tq[%d] error 0x%x\n",
197 					adapter->netdev->name, i, le32_to_cpu(
198 					adapter->tqd_start[i].status.error));
199 		for (i = 0; i < adapter->num_rx_queues; i++)
200 			if (adapter->rqd_start[i].status.stopped)
201 				dev_err(&adapter->netdev->dev,
202 					"%s: rq[%d] error 0x%x\n",
203 					adapter->netdev->name, i,
204 					adapter->rqd_start[i].status.error);
205 
206 		schedule_work(&adapter->work);
207 	}
208 }
209 
210 #ifdef __BIG_ENDIAN_BITFIELD
211 /*
212  * The device expects the bitfields in shared structures to be written in
213  * little endian. When CPU is big endian, the following routines are used to
214  * correctly read and write into ABI.
215  * The general technique used here is : double word bitfields are defined in
216  * opposite order for big endian architecture. Then before reading them in
217  * driver the complete double word is translated using le32_to_cpu. Similarly
218  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219  * double words into required format.
220  * In order to avoid touching bits in shared structure more than once, temporary
221  * descriptors are used. These are passed as srcDesc to following functions.
222  */
223 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224 				struct Vmxnet3_RxDesc *dstDesc)
225 {
226 	u32 *src = (u32 *)srcDesc + 2;
227 	u32 *dst = (u32 *)dstDesc + 2;
228 	dstDesc->addr = le64_to_cpu(srcDesc->addr);
229 	*dst = le32_to_cpu(*src);
230 	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231 }
232 
233 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234 			       struct Vmxnet3_TxDesc *dstDesc)
235 {
236 	int i;
237 	u32 *src = (u32 *)(srcDesc + 1);
238 	u32 *dst = (u32 *)(dstDesc + 1);
239 
240 	/* Working backwards so that the gen bit is set at the end. */
241 	for (i = 2; i > 0; i--) {
242 		src--;
243 		dst--;
244 		*dst = cpu_to_le32(*src);
245 	}
246 }
247 
248 
249 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250 				struct Vmxnet3_RxCompDesc *dstDesc)
251 {
252 	int i = 0;
253 	u32 *src = (u32 *)srcDesc;
254 	u32 *dst = (u32 *)dstDesc;
255 	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256 		*dst = le32_to_cpu(*src);
257 		src++;
258 		dst++;
259 	}
260 }
261 
262 
263 /* Used to read bitfield values from double words. */
264 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265 {
266 	u32 temp = le32_to_cpu(*bitfield);
267 	u32 mask = ((1 << size) - 1) << pos;
268 	temp &= mask;
269 	temp >>= pos;
270 	return temp;
271 }
272 
273 
274 
275 #endif  /* __BIG_ENDIAN_BITFIELD */
276 
277 #ifdef __BIG_ENDIAN_BITFIELD
278 
279 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280 			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281 			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283 			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284 			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286 			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287 			VMXNET3_TCD_GEN_SIZE)
288 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289 			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291 			(dstrcd) = (tmp); \
292 			vmxnet3_RxCompToCPU((rcd), (tmp)); \
293 		} while (0)
294 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295 			(dstrxd) = (tmp); \
296 			vmxnet3_RxDescToCPU((rxd), (tmp)); \
297 		} while (0)
298 
299 #else
300 
301 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307 
308 #endif /* __BIG_ENDIAN_BITFIELD  */
309 
310 
311 static void
312 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313 		     struct pci_dev *pdev)
314 {
315 	if (tbi->map_type == VMXNET3_MAP_SINGLE)
316 		pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
317 				 PCI_DMA_TODEVICE);
318 	else if (tbi->map_type == VMXNET3_MAP_PAGE)
319 		pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
320 			       PCI_DMA_TODEVICE);
321 	else
322 		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323 
324 	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325 }
326 
327 
328 static int
329 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330 		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
331 {
332 	struct sk_buff *skb;
333 	int entries = 0;
334 
335 	/* no out of order completion */
336 	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337 	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338 
339 	skb = tq->buf_info[eop_idx].skb;
340 	BUG_ON(skb == NULL);
341 	tq->buf_info[eop_idx].skb = NULL;
342 
343 	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344 
345 	while (tq->tx_ring.next2comp != eop_idx) {
346 		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347 				     pdev);
348 
349 		/* update next2comp w/o tx_lock. Since we are marking more,
350 		 * instead of less, tx ring entries avail, the worst case is
351 		 * that the tx routine incorrectly re-queues a pkt due to
352 		 * insufficient tx ring entries.
353 		 */
354 		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355 		entries++;
356 	}
357 
358 	dev_kfree_skb_any(skb);
359 	return entries;
360 }
361 
362 
363 static int
364 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365 			struct vmxnet3_adapter *adapter)
366 {
367 	int completed = 0;
368 	union Vmxnet3_GenericDesc *gdesc;
369 
370 	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371 	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372 		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373 					       &gdesc->tcd), tq, adapter->pdev,
374 					       adapter);
375 
376 		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377 		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378 	}
379 
380 	if (completed) {
381 		spin_lock(&tq->tx_lock);
382 		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383 			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384 			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385 			     netif_carrier_ok(adapter->netdev))) {
386 			vmxnet3_tq_wake(tq, adapter);
387 		}
388 		spin_unlock(&tq->tx_lock);
389 	}
390 	return completed;
391 }
392 
393 
394 static void
395 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396 		   struct vmxnet3_adapter *adapter)
397 {
398 	int i;
399 
400 	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401 		struct vmxnet3_tx_buf_info *tbi;
402 
403 		tbi = tq->buf_info + tq->tx_ring.next2comp;
404 
405 		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406 		if (tbi->skb) {
407 			dev_kfree_skb_any(tbi->skb);
408 			tbi->skb = NULL;
409 		}
410 		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411 	}
412 
413 	/* sanity check, verify all buffers are indeed unmapped and freed */
414 	for (i = 0; i < tq->tx_ring.size; i++) {
415 		BUG_ON(tq->buf_info[i].skb != NULL ||
416 		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417 	}
418 
419 	tq->tx_ring.gen = VMXNET3_INIT_GEN;
420 	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421 
422 	tq->comp_ring.gen = VMXNET3_INIT_GEN;
423 	tq->comp_ring.next2proc = 0;
424 }
425 
426 
427 static void
428 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429 		   struct vmxnet3_adapter *adapter)
430 {
431 	if (tq->tx_ring.base) {
432 		pci_free_consistent(adapter->pdev, tq->tx_ring.size *
433 				    sizeof(struct Vmxnet3_TxDesc),
434 				    tq->tx_ring.base, tq->tx_ring.basePA);
435 		tq->tx_ring.base = NULL;
436 	}
437 	if (tq->data_ring.base) {
438 		pci_free_consistent(adapter->pdev, tq->data_ring.size *
439 				    sizeof(struct Vmxnet3_TxDataDesc),
440 				    tq->data_ring.base, tq->data_ring.basePA);
441 		tq->data_ring.base = NULL;
442 	}
443 	if (tq->comp_ring.base) {
444 		pci_free_consistent(adapter->pdev, tq->comp_ring.size *
445 				    sizeof(struct Vmxnet3_TxCompDesc),
446 				    tq->comp_ring.base, tq->comp_ring.basePA);
447 		tq->comp_ring.base = NULL;
448 	}
449 	kfree(tq->buf_info);
450 	tq->buf_info = NULL;
451 }
452 
453 
454 /* Destroy all tx queues */
455 void
456 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
457 {
458 	int i;
459 
460 	for (i = 0; i < adapter->num_tx_queues; i++)
461 		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
462 }
463 
464 
465 static void
466 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
467 		struct vmxnet3_adapter *adapter)
468 {
469 	int i;
470 
471 	/* reset the tx ring contents to 0 and reset the tx ring states */
472 	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
473 	       sizeof(struct Vmxnet3_TxDesc));
474 	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
475 	tq->tx_ring.gen = VMXNET3_INIT_GEN;
476 
477 	memset(tq->data_ring.base, 0, tq->data_ring.size *
478 	       sizeof(struct Vmxnet3_TxDataDesc));
479 
480 	/* reset the tx comp ring contents to 0 and reset comp ring states */
481 	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
482 	       sizeof(struct Vmxnet3_TxCompDesc));
483 	tq->comp_ring.next2proc = 0;
484 	tq->comp_ring.gen = VMXNET3_INIT_GEN;
485 
486 	/* reset the bookkeeping data */
487 	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
488 	for (i = 0; i < tq->tx_ring.size; i++)
489 		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
490 
491 	/* stats are not reset */
492 }
493 
494 
495 static int
496 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
497 		  struct vmxnet3_adapter *adapter)
498 {
499 	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
500 	       tq->comp_ring.base || tq->buf_info);
501 
502 	tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
503 			   * sizeof(struct Vmxnet3_TxDesc),
504 			   &tq->tx_ring.basePA);
505 	if (!tq->tx_ring.base) {
506 		netdev_err(adapter->netdev, "failed to allocate tx ring\n");
507 		goto err;
508 	}
509 
510 	tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
511 			     tq->data_ring.size *
512 			     sizeof(struct Vmxnet3_TxDataDesc),
513 			     &tq->data_ring.basePA);
514 	if (!tq->data_ring.base) {
515 		netdev_err(adapter->netdev, "failed to allocate data ring\n");
516 		goto err;
517 	}
518 
519 	tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
520 			     tq->comp_ring.size *
521 			     sizeof(struct Vmxnet3_TxCompDesc),
522 			     &tq->comp_ring.basePA);
523 	if (!tq->comp_ring.base) {
524 		netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
525 		goto err;
526 	}
527 
528 	tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
529 			       GFP_KERNEL);
530 	if (!tq->buf_info)
531 		goto err;
532 
533 	return 0;
534 
535 err:
536 	vmxnet3_tq_destroy(tq, adapter);
537 	return -ENOMEM;
538 }
539 
540 static void
541 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
542 {
543 	int i;
544 
545 	for (i = 0; i < adapter->num_tx_queues; i++)
546 		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
547 }
548 
549 /*
550  *    starting from ring->next2fill, allocate rx buffers for the given ring
551  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
552  *    are allocated or allocation fails
553  */
554 
555 static int
556 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
557 			int num_to_alloc, struct vmxnet3_adapter *adapter)
558 {
559 	int num_allocated = 0;
560 	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
561 	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
562 	u32 val;
563 
564 	while (num_allocated <= num_to_alloc) {
565 		struct vmxnet3_rx_buf_info *rbi;
566 		union Vmxnet3_GenericDesc *gd;
567 
568 		rbi = rbi_base + ring->next2fill;
569 		gd = ring->base + ring->next2fill;
570 
571 		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
572 			if (rbi->skb == NULL) {
573 				rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
574 								       rbi->len,
575 								       GFP_KERNEL);
576 				if (unlikely(rbi->skb == NULL)) {
577 					rq->stats.rx_buf_alloc_failure++;
578 					break;
579 				}
580 
581 				rbi->dma_addr = pci_map_single(adapter->pdev,
582 						rbi->skb->data, rbi->len,
583 						PCI_DMA_FROMDEVICE);
584 			} else {
585 				/* rx buffer skipped by the device */
586 			}
587 			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
588 		} else {
589 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
590 			       rbi->len  != PAGE_SIZE);
591 
592 			if (rbi->page == NULL) {
593 				rbi->page = alloc_page(GFP_ATOMIC);
594 				if (unlikely(rbi->page == NULL)) {
595 					rq->stats.rx_buf_alloc_failure++;
596 					break;
597 				}
598 				rbi->dma_addr = pci_map_page(adapter->pdev,
599 						rbi->page, 0, PAGE_SIZE,
600 						PCI_DMA_FROMDEVICE);
601 			} else {
602 				/* rx buffers skipped by the device */
603 			}
604 			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
605 		}
606 
607 		BUG_ON(rbi->dma_addr == 0);
608 		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
609 		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
610 					   | val | rbi->len);
611 
612 		/* Fill the last buffer but dont mark it ready, or else the
613 		 * device will think that the queue is full */
614 		if (num_allocated == num_to_alloc)
615 			break;
616 
617 		gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
618 		num_allocated++;
619 		vmxnet3_cmd_ring_adv_next2fill(ring);
620 	}
621 
622 	netdev_dbg(adapter->netdev,
623 		"alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
624 		num_allocated, ring->next2fill, ring->next2comp);
625 
626 	/* so that the device can distinguish a full ring and an empty ring */
627 	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
628 
629 	return num_allocated;
630 }
631 
632 
633 static void
634 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
635 		    struct vmxnet3_rx_buf_info *rbi)
636 {
637 	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
638 		skb_shinfo(skb)->nr_frags;
639 
640 	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
641 
642 	__skb_frag_set_page(frag, rbi->page);
643 	frag->page_offset = 0;
644 	skb_frag_size_set(frag, rcd->len);
645 	skb->data_len += rcd->len;
646 	skb->truesize += PAGE_SIZE;
647 	skb_shinfo(skb)->nr_frags++;
648 }
649 
650 
651 static void
652 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
653 		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
654 		struct vmxnet3_adapter *adapter)
655 {
656 	u32 dw2, len;
657 	unsigned long buf_offset;
658 	int i;
659 	union Vmxnet3_GenericDesc *gdesc;
660 	struct vmxnet3_tx_buf_info *tbi = NULL;
661 
662 	BUG_ON(ctx->copy_size > skb_headlen(skb));
663 
664 	/* use the previous gen bit for the SOP desc */
665 	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
666 
667 	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
668 	gdesc = ctx->sop_txd; /* both loops below can be skipped */
669 
670 	/* no need to map the buffer if headers are copied */
671 	if (ctx->copy_size) {
672 		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
673 					tq->tx_ring.next2fill *
674 					sizeof(struct Vmxnet3_TxDataDesc));
675 		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
676 		ctx->sop_txd->dword[3] = 0;
677 
678 		tbi = tq->buf_info + tq->tx_ring.next2fill;
679 		tbi->map_type = VMXNET3_MAP_NONE;
680 
681 		netdev_dbg(adapter->netdev,
682 			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
683 			tq->tx_ring.next2fill,
684 			le64_to_cpu(ctx->sop_txd->txd.addr),
685 			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
686 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
687 
688 		/* use the right gen for non-SOP desc */
689 		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
690 	}
691 
692 	/* linear part can use multiple tx desc if it's big */
693 	len = skb_headlen(skb) - ctx->copy_size;
694 	buf_offset = ctx->copy_size;
695 	while (len) {
696 		u32 buf_size;
697 
698 		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
699 			buf_size = len;
700 			dw2 |= len;
701 		} else {
702 			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
703 			/* spec says that for TxDesc.len, 0 == 2^14 */
704 		}
705 
706 		tbi = tq->buf_info + tq->tx_ring.next2fill;
707 		tbi->map_type = VMXNET3_MAP_SINGLE;
708 		tbi->dma_addr = pci_map_single(adapter->pdev,
709 				skb->data + buf_offset, buf_size,
710 				PCI_DMA_TODEVICE);
711 
712 		tbi->len = buf_size;
713 
714 		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
715 		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
716 
717 		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
718 		gdesc->dword[2] = cpu_to_le32(dw2);
719 		gdesc->dword[3] = 0;
720 
721 		netdev_dbg(adapter->netdev,
722 			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
723 			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
724 			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
725 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
726 		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
727 
728 		len -= buf_size;
729 		buf_offset += buf_size;
730 	}
731 
732 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
733 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
734 		u32 buf_size;
735 
736 		buf_offset = 0;
737 		len = skb_frag_size(frag);
738 		while (len) {
739 			tbi = tq->buf_info + tq->tx_ring.next2fill;
740 			if (len < VMXNET3_MAX_TX_BUF_SIZE) {
741 				buf_size = len;
742 				dw2 |= len;
743 			} else {
744 				buf_size = VMXNET3_MAX_TX_BUF_SIZE;
745 				/* spec says that for TxDesc.len, 0 == 2^14 */
746 			}
747 			tbi->map_type = VMXNET3_MAP_PAGE;
748 			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
749 							 buf_offset, buf_size,
750 							 DMA_TO_DEVICE);
751 
752 			tbi->len = buf_size;
753 
754 			gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
755 			BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
756 
757 			gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
758 			gdesc->dword[2] = cpu_to_le32(dw2);
759 			gdesc->dword[3] = 0;
760 
761 			netdev_dbg(adapter->netdev,
762 				"txd[%u]: 0x%llu %u %u\n",
763 				tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
764 				le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
765 			vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
766 			dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
767 
768 			len -= buf_size;
769 			buf_offset += buf_size;
770 		}
771 	}
772 
773 	ctx->eop_txd = gdesc;
774 
775 	/* set the last buf_info for the pkt */
776 	tbi->skb = skb;
777 	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
778 }
779 
780 
781 /* Init all tx queues */
782 static void
783 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
784 {
785 	int i;
786 
787 	for (i = 0; i < adapter->num_tx_queues; i++)
788 		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
789 }
790 
791 
792 /*
793  *    parse and copy relevant protocol headers:
794  *      For a tso pkt, relevant headers are L2/3/4 including options
795  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
796  *      if it's a TCP/UDP pkt
797  *
798  * Returns:
799  *    -1:  error happens during parsing
800  *     0:  protocol headers parsed, but too big to be copied
801  *     1:  protocol headers parsed and copied
802  *
803  * Other effects:
804  *    1. related *ctx fields are updated.
805  *    2. ctx->copy_size is # of bytes copied
806  *    3. the portion copied is guaranteed to be in the linear part
807  *
808  */
809 static int
810 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
811 			   struct vmxnet3_tx_ctx *ctx,
812 			   struct vmxnet3_adapter *adapter)
813 {
814 	struct Vmxnet3_TxDataDesc *tdd;
815 
816 	if (ctx->mss) {	/* TSO */
817 		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
818 		ctx->l4_hdr_size = tcp_hdrlen(skb);
819 		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
820 	} else {
821 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
822 			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
823 
824 			if (ctx->ipv4) {
825 				const struct iphdr *iph = ip_hdr(skb);
826 
827 				if (iph->protocol == IPPROTO_TCP)
828 					ctx->l4_hdr_size = tcp_hdrlen(skb);
829 				else if (iph->protocol == IPPROTO_UDP)
830 					ctx->l4_hdr_size = sizeof(struct udphdr);
831 				else
832 					ctx->l4_hdr_size = 0;
833 			} else {
834 				/* for simplicity, don't copy L4 headers */
835 				ctx->l4_hdr_size = 0;
836 			}
837 			ctx->copy_size = min(ctx->eth_ip_hdr_size +
838 					 ctx->l4_hdr_size, skb->len);
839 		} else {
840 			ctx->eth_ip_hdr_size = 0;
841 			ctx->l4_hdr_size = 0;
842 			/* copy as much as allowed */
843 			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
844 					     , skb_headlen(skb));
845 		}
846 
847 		/* make sure headers are accessible directly */
848 		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
849 			goto err;
850 	}
851 
852 	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
853 		tq->stats.oversized_hdr++;
854 		ctx->copy_size = 0;
855 		return 0;
856 	}
857 
858 	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
859 
860 	memcpy(tdd->data, skb->data, ctx->copy_size);
861 	netdev_dbg(adapter->netdev,
862 		"copy %u bytes to dataRing[%u]\n",
863 		ctx->copy_size, tq->tx_ring.next2fill);
864 	return 1;
865 
866 err:
867 	return -1;
868 }
869 
870 
871 static void
872 vmxnet3_prepare_tso(struct sk_buff *skb,
873 		    struct vmxnet3_tx_ctx *ctx)
874 {
875 	struct tcphdr *tcph = tcp_hdr(skb);
876 
877 	if (ctx->ipv4) {
878 		struct iphdr *iph = ip_hdr(skb);
879 
880 		iph->check = 0;
881 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
882 						 IPPROTO_TCP, 0);
883 	} else {
884 		struct ipv6hdr *iph = ipv6_hdr(skb);
885 
886 		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
887 					       IPPROTO_TCP, 0);
888 	}
889 }
890 
891 static int txd_estimate(const struct sk_buff *skb)
892 {
893 	int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
894 	int i;
895 
896 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
897 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
898 
899 		count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
900 	}
901 	return count;
902 }
903 
904 /*
905  * Transmits a pkt thru a given tq
906  * Returns:
907  *    NETDEV_TX_OK:      descriptors are setup successfully
908  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
909  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
910  *
911  * Side-effects:
912  *    1. tx ring may be changed
913  *    2. tq stats may be updated accordingly
914  *    3. shared->txNumDeferred may be updated
915  */
916 
917 static int
918 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
919 		struct vmxnet3_adapter *adapter, struct net_device *netdev)
920 {
921 	int ret;
922 	u32 count;
923 	unsigned long flags;
924 	struct vmxnet3_tx_ctx ctx;
925 	union Vmxnet3_GenericDesc *gdesc;
926 #ifdef __BIG_ENDIAN_BITFIELD
927 	/* Use temporary descriptor to avoid touching bits multiple times */
928 	union Vmxnet3_GenericDesc tempTxDesc;
929 #endif
930 
931 	count = txd_estimate(skb);
932 
933 	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
934 
935 	ctx.mss = skb_shinfo(skb)->gso_size;
936 	if (ctx.mss) {
937 		if (skb_header_cloned(skb)) {
938 			if (unlikely(pskb_expand_head(skb, 0, 0,
939 						      GFP_ATOMIC) != 0)) {
940 				tq->stats.drop_tso++;
941 				goto drop_pkt;
942 			}
943 			tq->stats.copy_skb_header++;
944 		}
945 		vmxnet3_prepare_tso(skb, &ctx);
946 	} else {
947 		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
948 
949 			/* non-tso pkts must not use more than
950 			 * VMXNET3_MAX_TXD_PER_PKT entries
951 			 */
952 			if (skb_linearize(skb) != 0) {
953 				tq->stats.drop_too_many_frags++;
954 				goto drop_pkt;
955 			}
956 			tq->stats.linearized++;
957 
958 			/* recalculate the # of descriptors to use */
959 			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
960 		}
961 	}
962 
963 	spin_lock_irqsave(&tq->tx_lock, flags);
964 
965 	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
966 		tq->stats.tx_ring_full++;
967 		netdev_dbg(adapter->netdev,
968 			"tx queue stopped on %s, next2comp %u"
969 			" next2fill %u\n", adapter->netdev->name,
970 			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
971 
972 		vmxnet3_tq_stop(tq, adapter);
973 		spin_unlock_irqrestore(&tq->tx_lock, flags);
974 		return NETDEV_TX_BUSY;
975 	}
976 
977 
978 	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
979 	if (ret >= 0) {
980 		BUG_ON(ret <= 0 && ctx.copy_size != 0);
981 		/* hdrs parsed, check against other limits */
982 		if (ctx.mss) {
983 			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
984 				     VMXNET3_MAX_TX_BUF_SIZE)) {
985 				goto hdr_too_big;
986 			}
987 		} else {
988 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
989 				if (unlikely(ctx.eth_ip_hdr_size +
990 					     skb->csum_offset >
991 					     VMXNET3_MAX_CSUM_OFFSET)) {
992 					goto hdr_too_big;
993 				}
994 			}
995 		}
996 	} else {
997 		tq->stats.drop_hdr_inspect_err++;
998 		goto unlock_drop_pkt;
999 	}
1000 
1001 	/* fill tx descs related to addr & len */
1002 	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1003 
1004 	/* setup the EOP desc */
1005 	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1006 
1007 	/* setup the SOP desc */
1008 #ifdef __BIG_ENDIAN_BITFIELD
1009 	gdesc = &tempTxDesc;
1010 	gdesc->dword[2] = ctx.sop_txd->dword[2];
1011 	gdesc->dword[3] = ctx.sop_txd->dword[3];
1012 #else
1013 	gdesc = ctx.sop_txd;
1014 #endif
1015 	if (ctx.mss) {
1016 		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1017 		gdesc->txd.om = VMXNET3_OM_TSO;
1018 		gdesc->txd.msscof = ctx.mss;
1019 		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1020 			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1021 	} else {
1022 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1023 			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1024 			gdesc->txd.om = VMXNET3_OM_CSUM;
1025 			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1026 					    skb->csum_offset;
1027 		} else {
1028 			gdesc->txd.om = 0;
1029 			gdesc->txd.msscof = 0;
1030 		}
1031 		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1032 	}
1033 
1034 	if (vlan_tx_tag_present(skb)) {
1035 		gdesc->txd.ti = 1;
1036 		gdesc->txd.tci = vlan_tx_tag_get(skb);
1037 	}
1038 
1039 	/* finally flips the GEN bit of the SOP desc. */
1040 	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1041 						  VMXNET3_TXD_GEN);
1042 #ifdef __BIG_ENDIAN_BITFIELD
1043 	/* Finished updating in bitfields of Tx Desc, so write them in original
1044 	 * place.
1045 	 */
1046 	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1047 			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1048 	gdesc = ctx.sop_txd;
1049 #endif
1050 	netdev_dbg(adapter->netdev,
1051 		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1052 		(u32)(ctx.sop_txd -
1053 		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1054 		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1055 
1056 	spin_unlock_irqrestore(&tq->tx_lock, flags);
1057 
1058 	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1059 					le32_to_cpu(tq->shared->txThreshold)) {
1060 		tq->shared->txNumDeferred = 0;
1061 		VMXNET3_WRITE_BAR0_REG(adapter,
1062 				       VMXNET3_REG_TXPROD + tq->qid * 8,
1063 				       tq->tx_ring.next2fill);
1064 	}
1065 
1066 	return NETDEV_TX_OK;
1067 
1068 hdr_too_big:
1069 	tq->stats.drop_oversized_hdr++;
1070 unlock_drop_pkt:
1071 	spin_unlock_irqrestore(&tq->tx_lock, flags);
1072 drop_pkt:
1073 	tq->stats.drop_total++;
1074 	dev_kfree_skb(skb);
1075 	return NETDEV_TX_OK;
1076 }
1077 
1078 
1079 static netdev_tx_t
1080 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1081 {
1082 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1083 
1084 	BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1085 	return vmxnet3_tq_xmit(skb,
1086 			       &adapter->tx_queue[skb->queue_mapping],
1087 			       adapter, netdev);
1088 }
1089 
1090 
1091 static void
1092 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1093 		struct sk_buff *skb,
1094 		union Vmxnet3_GenericDesc *gdesc)
1095 {
1096 	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1097 		/* typical case: TCP/UDP over IP and both csums are correct */
1098 		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1099 							VMXNET3_RCD_CSUM_OK) {
1100 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1101 			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1102 			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1103 			BUG_ON(gdesc->rcd.frg);
1104 		} else {
1105 			if (gdesc->rcd.csum) {
1106 				skb->csum = htons(gdesc->rcd.csum);
1107 				skb->ip_summed = CHECKSUM_PARTIAL;
1108 			} else {
1109 				skb_checksum_none_assert(skb);
1110 			}
1111 		}
1112 	} else {
1113 		skb_checksum_none_assert(skb);
1114 	}
1115 }
1116 
1117 
1118 static void
1119 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1120 		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1121 {
1122 	rq->stats.drop_err++;
1123 	if (!rcd->fcs)
1124 		rq->stats.drop_fcs++;
1125 
1126 	rq->stats.drop_total++;
1127 
1128 	/*
1129 	 * We do not unmap and chain the rx buffer to the skb.
1130 	 * We basically pretend this buffer is not used and will be recycled
1131 	 * by vmxnet3_rq_alloc_rx_buf()
1132 	 */
1133 
1134 	/*
1135 	 * ctx->skb may be NULL if this is the first and the only one
1136 	 * desc for the pkt
1137 	 */
1138 	if (ctx->skb)
1139 		dev_kfree_skb_irq(ctx->skb);
1140 
1141 	ctx->skb = NULL;
1142 }
1143 
1144 
1145 static int
1146 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1147 		       struct vmxnet3_adapter *adapter, int quota)
1148 {
1149 	static const u32 rxprod_reg[2] = {
1150 		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1151 	};
1152 	u32 num_rxd = 0;
1153 	bool skip_page_frags = false;
1154 	struct Vmxnet3_RxCompDesc *rcd;
1155 	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1156 #ifdef __BIG_ENDIAN_BITFIELD
1157 	struct Vmxnet3_RxDesc rxCmdDesc;
1158 	struct Vmxnet3_RxCompDesc rxComp;
1159 #endif
1160 	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1161 			  &rxComp);
1162 	while (rcd->gen == rq->comp_ring.gen) {
1163 		struct vmxnet3_rx_buf_info *rbi;
1164 		struct sk_buff *skb, *new_skb = NULL;
1165 		struct page *new_page = NULL;
1166 		int num_to_alloc;
1167 		struct Vmxnet3_RxDesc *rxd;
1168 		u32 idx, ring_idx;
1169 		struct vmxnet3_cmd_ring	*ring = NULL;
1170 		if (num_rxd >= quota) {
1171 			/* we may stop even before we see the EOP desc of
1172 			 * the current pkt
1173 			 */
1174 			break;
1175 		}
1176 		num_rxd++;
1177 		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1178 		idx = rcd->rxdIdx;
1179 		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1180 		ring = rq->rx_ring + ring_idx;
1181 		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1182 				  &rxCmdDesc);
1183 		rbi = rq->buf_info[ring_idx] + idx;
1184 
1185 		BUG_ON(rxd->addr != rbi->dma_addr ||
1186 		       rxd->len != rbi->len);
1187 
1188 		if (unlikely(rcd->eop && rcd->err)) {
1189 			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1190 			goto rcd_done;
1191 		}
1192 
1193 		if (rcd->sop) { /* first buf of the pkt */
1194 			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1195 			       rcd->rqID != rq->qid);
1196 
1197 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1198 			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1199 
1200 			if (unlikely(rcd->len == 0)) {
1201 				/* Pretend the rx buffer is skipped. */
1202 				BUG_ON(!(rcd->sop && rcd->eop));
1203 				netdev_dbg(adapter->netdev,
1204 					"rxRing[%u][%u] 0 length\n",
1205 					ring_idx, idx);
1206 				goto rcd_done;
1207 			}
1208 
1209 			skip_page_frags = false;
1210 			ctx->skb = rbi->skb;
1211 			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1212 							    rbi->len);
1213 			if (new_skb == NULL) {
1214 				/* Skb allocation failed, do not handover this
1215 				 * skb to stack. Reuse it. Drop the existing pkt
1216 				 */
1217 				rq->stats.rx_buf_alloc_failure++;
1218 				ctx->skb = NULL;
1219 				rq->stats.drop_total++;
1220 				skip_page_frags = true;
1221 				goto rcd_done;
1222 			}
1223 
1224 			pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1225 					 PCI_DMA_FROMDEVICE);
1226 
1227 #ifdef VMXNET3_RSS
1228 			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1229 			    (adapter->netdev->features & NETIF_F_RXHASH))
1230 				ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1231 #endif
1232 			skb_put(ctx->skb, rcd->len);
1233 
1234 			/* Immediate refill */
1235 			rbi->skb = new_skb;
1236 			rbi->dma_addr = pci_map_single(adapter->pdev,
1237 						       rbi->skb->data, rbi->len,
1238 						       PCI_DMA_FROMDEVICE);
1239 			rxd->addr = cpu_to_le64(rbi->dma_addr);
1240 			rxd->len = rbi->len;
1241 
1242 		} else {
1243 			BUG_ON(ctx->skb == NULL && !skip_page_frags);
1244 
1245 			/* non SOP buffer must be type 1 in most cases */
1246 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1247 			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1248 
1249 			/* If an sop buffer was dropped, skip all
1250 			 * following non-sop fragments. They will be reused.
1251 			 */
1252 			if (skip_page_frags)
1253 				goto rcd_done;
1254 
1255 			new_page = alloc_page(GFP_ATOMIC);
1256 			if (unlikely(new_page == NULL)) {
1257 				/* Replacement page frag could not be allocated.
1258 				 * Reuse this page. Drop the pkt and free the
1259 				 * skb which contained this page as a frag. Skip
1260 				 * processing all the following non-sop frags.
1261 				 */
1262 				rq->stats.rx_buf_alloc_failure++;
1263 				dev_kfree_skb(ctx->skb);
1264 				ctx->skb = NULL;
1265 				skip_page_frags = true;
1266 				goto rcd_done;
1267 			}
1268 
1269 			if (rcd->len) {
1270 				pci_unmap_page(adapter->pdev,
1271 					       rbi->dma_addr, rbi->len,
1272 					       PCI_DMA_FROMDEVICE);
1273 
1274 				vmxnet3_append_frag(ctx->skb, rcd, rbi);
1275 			}
1276 
1277 			/* Immediate refill */
1278 			rbi->page = new_page;
1279 			rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1280 						     0, PAGE_SIZE,
1281 						     PCI_DMA_FROMDEVICE);
1282 			rxd->addr = cpu_to_le64(rbi->dma_addr);
1283 			rxd->len = rbi->len;
1284 		}
1285 
1286 
1287 		skb = ctx->skb;
1288 		if (rcd->eop) {
1289 			skb->len += skb->data_len;
1290 
1291 			vmxnet3_rx_csum(adapter, skb,
1292 					(union Vmxnet3_GenericDesc *)rcd);
1293 			skb->protocol = eth_type_trans(skb, adapter->netdev);
1294 
1295 			if (unlikely(rcd->ts))
1296 				__vlan_hwaccel_put_tag(skb, rcd->tci);
1297 
1298 			if (adapter->netdev->features & NETIF_F_LRO)
1299 				netif_receive_skb(skb);
1300 			else
1301 				napi_gro_receive(&rq->napi, skb);
1302 
1303 			ctx->skb = NULL;
1304 		}
1305 
1306 rcd_done:
1307 		/* device may have skipped some rx descs */
1308 		ring->next2comp = idx;
1309 		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1310 		ring = rq->rx_ring + ring_idx;
1311 		while (num_to_alloc) {
1312 			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1313 					  &rxCmdDesc);
1314 			BUG_ON(!rxd->addr);
1315 
1316 			/* Recv desc is ready to be used by the device */
1317 			rxd->gen = ring->gen;
1318 			vmxnet3_cmd_ring_adv_next2fill(ring);
1319 			num_to_alloc--;
1320 		}
1321 
1322 		/* if needed, update the register */
1323 		if (unlikely(rq->shared->updateRxProd)) {
1324 			VMXNET3_WRITE_BAR0_REG(adapter,
1325 					       rxprod_reg[ring_idx] + rq->qid * 8,
1326 					       ring->next2fill);
1327 		}
1328 
1329 		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1330 		vmxnet3_getRxComp(rcd,
1331 				  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1332 	}
1333 
1334 	return num_rxd;
1335 }
1336 
1337 
1338 static void
1339 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1340 		   struct vmxnet3_adapter *adapter)
1341 {
1342 	u32 i, ring_idx;
1343 	struct Vmxnet3_RxDesc *rxd;
1344 
1345 	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1346 		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1347 #ifdef __BIG_ENDIAN_BITFIELD
1348 			struct Vmxnet3_RxDesc rxDesc;
1349 #endif
1350 			vmxnet3_getRxDesc(rxd,
1351 				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1352 
1353 			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1354 					rq->buf_info[ring_idx][i].skb) {
1355 				pci_unmap_single(adapter->pdev, rxd->addr,
1356 						 rxd->len, PCI_DMA_FROMDEVICE);
1357 				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1358 				rq->buf_info[ring_idx][i].skb = NULL;
1359 			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1360 					rq->buf_info[ring_idx][i].page) {
1361 				pci_unmap_page(adapter->pdev, rxd->addr,
1362 					       rxd->len, PCI_DMA_FROMDEVICE);
1363 				put_page(rq->buf_info[ring_idx][i].page);
1364 				rq->buf_info[ring_idx][i].page = NULL;
1365 			}
1366 		}
1367 
1368 		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1369 		rq->rx_ring[ring_idx].next2fill =
1370 					rq->rx_ring[ring_idx].next2comp = 0;
1371 	}
1372 
1373 	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1374 	rq->comp_ring.next2proc = 0;
1375 }
1376 
1377 
1378 static void
1379 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1380 {
1381 	int i;
1382 
1383 	for (i = 0; i < adapter->num_rx_queues; i++)
1384 		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1385 }
1386 
1387 
1388 void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1389 			struct vmxnet3_adapter *adapter)
1390 {
1391 	int i;
1392 	int j;
1393 
1394 	/* all rx buffers must have already been freed */
1395 	for (i = 0; i < 2; i++) {
1396 		if (rq->buf_info[i]) {
1397 			for (j = 0; j < rq->rx_ring[i].size; j++)
1398 				BUG_ON(rq->buf_info[i][j].page != NULL);
1399 		}
1400 	}
1401 
1402 
1403 	kfree(rq->buf_info[0]);
1404 
1405 	for (i = 0; i < 2; i++) {
1406 		if (rq->rx_ring[i].base) {
1407 			pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1408 					    * sizeof(struct Vmxnet3_RxDesc),
1409 					    rq->rx_ring[i].base,
1410 					    rq->rx_ring[i].basePA);
1411 			rq->rx_ring[i].base = NULL;
1412 		}
1413 		rq->buf_info[i] = NULL;
1414 	}
1415 
1416 	if (rq->comp_ring.base) {
1417 		pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1418 				    sizeof(struct Vmxnet3_RxCompDesc),
1419 				    rq->comp_ring.base, rq->comp_ring.basePA);
1420 		rq->comp_ring.base = NULL;
1421 	}
1422 }
1423 
1424 
1425 static int
1426 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1427 		struct vmxnet3_adapter  *adapter)
1428 {
1429 	int i;
1430 
1431 	/* initialize buf_info */
1432 	for (i = 0; i < rq->rx_ring[0].size; i++) {
1433 
1434 		/* 1st buf for a pkt is skbuff */
1435 		if (i % adapter->rx_buf_per_pkt == 0) {
1436 			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1437 			rq->buf_info[0][i].len = adapter->skb_buf_size;
1438 		} else { /* subsequent bufs for a pkt is frag */
1439 			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1440 			rq->buf_info[0][i].len = PAGE_SIZE;
1441 		}
1442 	}
1443 	for (i = 0; i < rq->rx_ring[1].size; i++) {
1444 		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1445 		rq->buf_info[1][i].len = PAGE_SIZE;
1446 	}
1447 
1448 	/* reset internal state and allocate buffers for both rings */
1449 	for (i = 0; i < 2; i++) {
1450 		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1451 
1452 		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1453 		       sizeof(struct Vmxnet3_RxDesc));
1454 		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1455 	}
1456 	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1457 				    adapter) == 0) {
1458 		/* at least has 1 rx buffer for the 1st ring */
1459 		return -ENOMEM;
1460 	}
1461 	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1462 
1463 	/* reset the comp ring */
1464 	rq->comp_ring.next2proc = 0;
1465 	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1466 	       sizeof(struct Vmxnet3_RxCompDesc));
1467 	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1468 
1469 	/* reset rxctx */
1470 	rq->rx_ctx.skb = NULL;
1471 
1472 	/* stats are not reset */
1473 	return 0;
1474 }
1475 
1476 
1477 static int
1478 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1479 {
1480 	int i, err = 0;
1481 
1482 	for (i = 0; i < adapter->num_rx_queues; i++) {
1483 		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1484 		if (unlikely(err)) {
1485 			dev_err(&adapter->netdev->dev, "%s: failed to "
1486 				"initialize rx queue%i\n",
1487 				adapter->netdev->name, i);
1488 			break;
1489 		}
1490 	}
1491 	return err;
1492 
1493 }
1494 
1495 
1496 static int
1497 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1498 {
1499 	int i;
1500 	size_t sz;
1501 	struct vmxnet3_rx_buf_info *bi;
1502 
1503 	for (i = 0; i < 2; i++) {
1504 
1505 		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1506 		rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1507 							&rq->rx_ring[i].basePA);
1508 		if (!rq->rx_ring[i].base) {
1509 			netdev_err(adapter->netdev,
1510 				   "failed to allocate rx ring %d\n", i);
1511 			goto err;
1512 		}
1513 	}
1514 
1515 	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1516 	rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1517 						  &rq->comp_ring.basePA);
1518 	if (!rq->comp_ring.base) {
1519 		netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1520 		goto err;
1521 	}
1522 
1523 	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1524 						   rq->rx_ring[1].size);
1525 	bi = kzalloc(sz, GFP_KERNEL);
1526 	if (!bi)
1527 		goto err;
1528 
1529 	rq->buf_info[0] = bi;
1530 	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1531 
1532 	return 0;
1533 
1534 err:
1535 	vmxnet3_rq_destroy(rq, adapter);
1536 	return -ENOMEM;
1537 }
1538 
1539 
1540 static int
1541 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1542 {
1543 	int i, err = 0;
1544 
1545 	for (i = 0; i < adapter->num_rx_queues; i++) {
1546 		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1547 		if (unlikely(err)) {
1548 			dev_err(&adapter->netdev->dev,
1549 				"%s: failed to create rx queue%i\n",
1550 				adapter->netdev->name, i);
1551 			goto err_out;
1552 		}
1553 	}
1554 	return err;
1555 err_out:
1556 	vmxnet3_rq_destroy_all(adapter);
1557 	return err;
1558 
1559 }
1560 
1561 /* Multiple queue aware polling function for tx and rx */
1562 
1563 static int
1564 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1565 {
1566 	int rcd_done = 0, i;
1567 	if (unlikely(adapter->shared->ecr))
1568 		vmxnet3_process_events(adapter);
1569 	for (i = 0; i < adapter->num_tx_queues; i++)
1570 		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1571 
1572 	for (i = 0; i < adapter->num_rx_queues; i++)
1573 		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1574 						   adapter, budget);
1575 	return rcd_done;
1576 }
1577 
1578 
1579 static int
1580 vmxnet3_poll(struct napi_struct *napi, int budget)
1581 {
1582 	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1583 					  struct vmxnet3_rx_queue, napi);
1584 	int rxd_done;
1585 
1586 	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1587 
1588 	if (rxd_done < budget) {
1589 		napi_complete(napi);
1590 		vmxnet3_enable_all_intrs(rx_queue->adapter);
1591 	}
1592 	return rxd_done;
1593 }
1594 
1595 /*
1596  * NAPI polling function for MSI-X mode with multiple Rx queues
1597  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1598  */
1599 
1600 static int
1601 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1602 {
1603 	struct vmxnet3_rx_queue *rq = container_of(napi,
1604 						struct vmxnet3_rx_queue, napi);
1605 	struct vmxnet3_adapter *adapter = rq->adapter;
1606 	int rxd_done;
1607 
1608 	/* When sharing interrupt with corresponding tx queue, process
1609 	 * tx completions in that queue as well
1610 	 */
1611 	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1612 		struct vmxnet3_tx_queue *tq =
1613 				&adapter->tx_queue[rq - adapter->rx_queue];
1614 		vmxnet3_tq_tx_complete(tq, adapter);
1615 	}
1616 
1617 	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1618 
1619 	if (rxd_done < budget) {
1620 		napi_complete(napi);
1621 		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1622 	}
1623 	return rxd_done;
1624 }
1625 
1626 
1627 #ifdef CONFIG_PCI_MSI
1628 
1629 /*
1630  * Handle completion interrupts on tx queues
1631  * Returns whether or not the intr is handled
1632  */
1633 
1634 static irqreturn_t
1635 vmxnet3_msix_tx(int irq, void *data)
1636 {
1637 	struct vmxnet3_tx_queue *tq = data;
1638 	struct vmxnet3_adapter *adapter = tq->adapter;
1639 
1640 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1641 		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1642 
1643 	/* Handle the case where only one irq is allocate for all tx queues */
1644 	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1645 		int i;
1646 		for (i = 0; i < adapter->num_tx_queues; i++) {
1647 			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1648 			vmxnet3_tq_tx_complete(txq, adapter);
1649 		}
1650 	} else {
1651 		vmxnet3_tq_tx_complete(tq, adapter);
1652 	}
1653 	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1654 
1655 	return IRQ_HANDLED;
1656 }
1657 
1658 
1659 /*
1660  * Handle completion interrupts on rx queues. Returns whether or not the
1661  * intr is handled
1662  */
1663 
1664 static irqreturn_t
1665 vmxnet3_msix_rx(int irq, void *data)
1666 {
1667 	struct vmxnet3_rx_queue *rq = data;
1668 	struct vmxnet3_adapter *adapter = rq->adapter;
1669 
1670 	/* disable intr if needed */
1671 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1672 		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1673 	napi_schedule(&rq->napi);
1674 
1675 	return IRQ_HANDLED;
1676 }
1677 
1678 /*
1679  *----------------------------------------------------------------------------
1680  *
1681  * vmxnet3_msix_event --
1682  *
1683  *    vmxnet3 msix event intr handler
1684  *
1685  * Result:
1686  *    whether or not the intr is handled
1687  *
1688  *----------------------------------------------------------------------------
1689  */
1690 
1691 static irqreturn_t
1692 vmxnet3_msix_event(int irq, void *data)
1693 {
1694 	struct net_device *dev = data;
1695 	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1696 
1697 	/* disable intr if needed */
1698 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1699 		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1700 
1701 	if (adapter->shared->ecr)
1702 		vmxnet3_process_events(adapter);
1703 
1704 	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1705 
1706 	return IRQ_HANDLED;
1707 }
1708 
1709 #endif /* CONFIG_PCI_MSI  */
1710 
1711 
1712 /* Interrupt handler for vmxnet3  */
1713 static irqreturn_t
1714 vmxnet3_intr(int irq, void *dev_id)
1715 {
1716 	struct net_device *dev = dev_id;
1717 	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1718 
1719 	if (adapter->intr.type == VMXNET3_IT_INTX) {
1720 		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1721 		if (unlikely(icr == 0))
1722 			/* not ours */
1723 			return IRQ_NONE;
1724 	}
1725 
1726 
1727 	/* disable intr if needed */
1728 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1729 		vmxnet3_disable_all_intrs(adapter);
1730 
1731 	napi_schedule(&adapter->rx_queue[0].napi);
1732 
1733 	return IRQ_HANDLED;
1734 }
1735 
1736 #ifdef CONFIG_NET_POLL_CONTROLLER
1737 
1738 /* netpoll callback. */
1739 static void
1740 vmxnet3_netpoll(struct net_device *netdev)
1741 {
1742 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1743 
1744 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1745 		vmxnet3_disable_all_intrs(adapter);
1746 
1747 	vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1748 	vmxnet3_enable_all_intrs(adapter);
1749 
1750 }
1751 #endif	/* CONFIG_NET_POLL_CONTROLLER */
1752 
1753 static int
1754 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1755 {
1756 	struct vmxnet3_intr *intr = &adapter->intr;
1757 	int err = 0, i;
1758 	int vector = 0;
1759 
1760 #ifdef CONFIG_PCI_MSI
1761 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1762 		for (i = 0; i < adapter->num_tx_queues; i++) {
1763 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1764 				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1765 					adapter->netdev->name, vector);
1766 				err = request_irq(
1767 					      intr->msix_entries[vector].vector,
1768 					      vmxnet3_msix_tx, 0,
1769 					      adapter->tx_queue[i].name,
1770 					      &adapter->tx_queue[i]);
1771 			} else {
1772 				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1773 					adapter->netdev->name, vector);
1774 			}
1775 			if (err) {
1776 				dev_err(&adapter->netdev->dev,
1777 					"Failed to request irq for MSIX, %s, "
1778 					"error %d\n",
1779 					adapter->tx_queue[i].name, err);
1780 				return err;
1781 			}
1782 
1783 			/* Handle the case where only 1 MSIx was allocated for
1784 			 * all tx queues */
1785 			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1786 				for (; i < adapter->num_tx_queues; i++)
1787 					adapter->tx_queue[i].comp_ring.intr_idx
1788 								= vector;
1789 				vector++;
1790 				break;
1791 			} else {
1792 				adapter->tx_queue[i].comp_ring.intr_idx
1793 								= vector++;
1794 			}
1795 		}
1796 		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1797 			vector = 0;
1798 
1799 		for (i = 0; i < adapter->num_rx_queues; i++) {
1800 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1801 				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1802 					adapter->netdev->name, vector);
1803 			else
1804 				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1805 					adapter->netdev->name, vector);
1806 			err = request_irq(intr->msix_entries[vector].vector,
1807 					  vmxnet3_msix_rx, 0,
1808 					  adapter->rx_queue[i].name,
1809 					  &(adapter->rx_queue[i]));
1810 			if (err) {
1811 				netdev_err(adapter->netdev,
1812 					   "Failed to request irq for MSIX, "
1813 					   "%s, error %d\n",
1814 					   adapter->rx_queue[i].name, err);
1815 				return err;
1816 			}
1817 
1818 			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1819 		}
1820 
1821 		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1822 			adapter->netdev->name, vector);
1823 		err = request_irq(intr->msix_entries[vector].vector,
1824 				  vmxnet3_msix_event, 0,
1825 				  intr->event_msi_vector_name, adapter->netdev);
1826 		intr->event_intr_idx = vector;
1827 
1828 	} else if (intr->type == VMXNET3_IT_MSI) {
1829 		adapter->num_rx_queues = 1;
1830 		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1831 				  adapter->netdev->name, adapter->netdev);
1832 	} else {
1833 #endif
1834 		adapter->num_rx_queues = 1;
1835 		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1836 				  IRQF_SHARED, adapter->netdev->name,
1837 				  adapter->netdev);
1838 #ifdef CONFIG_PCI_MSI
1839 	}
1840 #endif
1841 	intr->num_intrs = vector + 1;
1842 	if (err) {
1843 		netdev_err(adapter->netdev,
1844 			   "Failed to request irq (intr type:%d), error %d\n",
1845 			   intr->type, err);
1846 	} else {
1847 		/* Number of rx queues will not change after this */
1848 		for (i = 0; i < adapter->num_rx_queues; i++) {
1849 			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1850 			rq->qid = i;
1851 			rq->qid2 = i + adapter->num_rx_queues;
1852 		}
1853 
1854 
1855 
1856 		/* init our intr settings */
1857 		for (i = 0; i < intr->num_intrs; i++)
1858 			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1859 		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1860 			adapter->intr.event_intr_idx = 0;
1861 			for (i = 0; i < adapter->num_tx_queues; i++)
1862 				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1863 			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1864 		}
1865 
1866 		netdev_info(adapter->netdev,
1867 			    "intr type %u, mode %u, %u vectors allocated\n",
1868 			    intr->type, intr->mask_mode, intr->num_intrs);
1869 	}
1870 
1871 	return err;
1872 }
1873 
1874 
1875 static void
1876 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1877 {
1878 	struct vmxnet3_intr *intr = &adapter->intr;
1879 	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1880 
1881 	switch (intr->type) {
1882 #ifdef CONFIG_PCI_MSI
1883 	case VMXNET3_IT_MSIX:
1884 	{
1885 		int i, vector = 0;
1886 
1887 		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1888 			for (i = 0; i < adapter->num_tx_queues; i++) {
1889 				free_irq(intr->msix_entries[vector++].vector,
1890 					 &(adapter->tx_queue[i]));
1891 				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1892 					break;
1893 			}
1894 		}
1895 
1896 		for (i = 0; i < adapter->num_rx_queues; i++) {
1897 			free_irq(intr->msix_entries[vector++].vector,
1898 				 &(adapter->rx_queue[i]));
1899 		}
1900 
1901 		free_irq(intr->msix_entries[vector].vector,
1902 			 adapter->netdev);
1903 		BUG_ON(vector >= intr->num_intrs);
1904 		break;
1905 	}
1906 #endif
1907 	case VMXNET3_IT_MSI:
1908 		free_irq(adapter->pdev->irq, adapter->netdev);
1909 		break;
1910 	case VMXNET3_IT_INTX:
1911 		free_irq(adapter->pdev->irq, adapter->netdev);
1912 		break;
1913 	default:
1914 		BUG();
1915 	}
1916 }
1917 
1918 
1919 static void
1920 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1921 {
1922 	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1923 	u16 vid;
1924 
1925 	/* allow untagged pkts */
1926 	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1927 
1928 	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1929 		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1930 }
1931 
1932 
1933 static int
1934 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
1935 {
1936 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1937 
1938 	if (!(netdev->flags & IFF_PROMISC)) {
1939 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1940 		unsigned long flags;
1941 
1942 		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1943 		spin_lock_irqsave(&adapter->cmd_lock, flags);
1944 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1945 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1946 		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1947 	}
1948 
1949 	set_bit(vid, adapter->active_vlans);
1950 
1951 	return 0;
1952 }
1953 
1954 
1955 static int
1956 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
1957 {
1958 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1959 
1960 	if (!(netdev->flags & IFF_PROMISC)) {
1961 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1962 		unsigned long flags;
1963 
1964 		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1965 		spin_lock_irqsave(&adapter->cmd_lock, flags);
1966 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1967 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1968 		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1969 	}
1970 
1971 	clear_bit(vid, adapter->active_vlans);
1972 
1973 	return 0;
1974 }
1975 
1976 
1977 static u8 *
1978 vmxnet3_copy_mc(struct net_device *netdev)
1979 {
1980 	u8 *buf = NULL;
1981 	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1982 
1983 	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1984 	if (sz <= 0xffff) {
1985 		/* We may be called with BH disabled */
1986 		buf = kmalloc(sz, GFP_ATOMIC);
1987 		if (buf) {
1988 			struct netdev_hw_addr *ha;
1989 			int i = 0;
1990 
1991 			netdev_for_each_mc_addr(ha, netdev)
1992 				memcpy(buf + i++ * ETH_ALEN, ha->addr,
1993 				       ETH_ALEN);
1994 		}
1995 	}
1996 	return buf;
1997 }
1998 
1999 
2000 static void
2001 vmxnet3_set_mc(struct net_device *netdev)
2002 {
2003 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2004 	unsigned long flags;
2005 	struct Vmxnet3_RxFilterConf *rxConf =
2006 					&adapter->shared->devRead.rxFilterConf;
2007 	u8 *new_table = NULL;
2008 	u32 new_mode = VMXNET3_RXM_UCAST;
2009 
2010 	if (netdev->flags & IFF_PROMISC) {
2011 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2012 		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2013 
2014 		new_mode |= VMXNET3_RXM_PROMISC;
2015 	} else {
2016 		vmxnet3_restore_vlan(adapter);
2017 	}
2018 
2019 	if (netdev->flags & IFF_BROADCAST)
2020 		new_mode |= VMXNET3_RXM_BCAST;
2021 
2022 	if (netdev->flags & IFF_ALLMULTI)
2023 		new_mode |= VMXNET3_RXM_ALL_MULTI;
2024 	else
2025 		if (!netdev_mc_empty(netdev)) {
2026 			new_table = vmxnet3_copy_mc(netdev);
2027 			if (new_table) {
2028 				new_mode |= VMXNET3_RXM_MCAST;
2029 				rxConf->mfTableLen = cpu_to_le16(
2030 					netdev_mc_count(netdev) * ETH_ALEN);
2031 				rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
2032 						    new_table));
2033 			} else {
2034 				netdev_info(netdev, "failed to copy mcast list"
2035 					    ", setting ALL_MULTI\n");
2036 				new_mode |= VMXNET3_RXM_ALL_MULTI;
2037 			}
2038 		}
2039 
2040 
2041 	if (!(new_mode & VMXNET3_RXM_MCAST)) {
2042 		rxConf->mfTableLen = 0;
2043 		rxConf->mfTablePA = 0;
2044 	}
2045 
2046 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2047 	if (new_mode != rxConf->rxMode) {
2048 		rxConf->rxMode = cpu_to_le32(new_mode);
2049 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2050 				       VMXNET3_CMD_UPDATE_RX_MODE);
2051 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2052 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2053 	}
2054 
2055 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2056 			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
2057 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2058 
2059 	kfree(new_table);
2060 }
2061 
2062 void
2063 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2064 {
2065 	int i;
2066 
2067 	for (i = 0; i < adapter->num_rx_queues; i++)
2068 		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2069 }
2070 
2071 
2072 /*
2073  *   Set up driver_shared based on settings in adapter.
2074  */
2075 
2076 static void
2077 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2078 {
2079 	struct Vmxnet3_DriverShared *shared = adapter->shared;
2080 	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2081 	struct Vmxnet3_TxQueueConf *tqc;
2082 	struct Vmxnet3_RxQueueConf *rqc;
2083 	int i;
2084 
2085 	memset(shared, 0, sizeof(*shared));
2086 
2087 	/* driver settings */
2088 	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2089 	devRead->misc.driverInfo.version = cpu_to_le32(
2090 						VMXNET3_DRIVER_VERSION_NUM);
2091 	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2092 				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2093 	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2094 	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2095 				*((u32 *)&devRead->misc.driverInfo.gos));
2096 	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2097 	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2098 
2099 	devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2100 	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2101 
2102 	/* set up feature flags */
2103 	if (adapter->netdev->features & NETIF_F_RXCSUM)
2104 		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2105 
2106 	if (adapter->netdev->features & NETIF_F_LRO) {
2107 		devRead->misc.uptFeatures |= UPT1_F_LRO;
2108 		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2109 	}
2110 	if (adapter->netdev->features & NETIF_F_HW_VLAN_RX)
2111 		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2112 
2113 	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2114 	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2115 	devRead->misc.queueDescLen = cpu_to_le32(
2116 		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2117 		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2118 
2119 	/* tx queue settings */
2120 	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2121 	for (i = 0; i < adapter->num_tx_queues; i++) {
2122 		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2123 		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2124 		tqc = &adapter->tqd_start[i].conf;
2125 		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2126 		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2127 		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2128 		tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2129 		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2130 		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2131 		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2132 		tqc->ddLen          = cpu_to_le32(
2133 					sizeof(struct vmxnet3_tx_buf_info) *
2134 					tqc->txRingSize);
2135 		tqc->intrIdx        = tq->comp_ring.intr_idx;
2136 	}
2137 
2138 	/* rx queue settings */
2139 	devRead->misc.numRxQueues = adapter->num_rx_queues;
2140 	for (i = 0; i < adapter->num_rx_queues; i++) {
2141 		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2142 		rqc = &adapter->rqd_start[i].conf;
2143 		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2144 		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2145 		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2146 		rqc->ddPA            = cpu_to_le64(virt_to_phys(
2147 							rq->buf_info));
2148 		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2149 		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2150 		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2151 		rqc->ddLen           = cpu_to_le32(
2152 					sizeof(struct vmxnet3_rx_buf_info) *
2153 					(rqc->rxRingSize[0] +
2154 					 rqc->rxRingSize[1]));
2155 		rqc->intrIdx         = rq->comp_ring.intr_idx;
2156 	}
2157 
2158 #ifdef VMXNET3_RSS
2159 	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2160 
2161 	if (adapter->rss) {
2162 		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2163 		static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2164 			0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2165 			0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2166 			0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2167 			0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2168 			0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2169 		};
2170 
2171 		devRead->misc.uptFeatures |= UPT1_F_RSS;
2172 		devRead->misc.numRxQueues = adapter->num_rx_queues;
2173 		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2174 				    UPT1_RSS_HASH_TYPE_IPV4 |
2175 				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2176 				    UPT1_RSS_HASH_TYPE_IPV6;
2177 		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2178 		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2179 		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2180 		memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2181 
2182 		for (i = 0; i < rssConf->indTableSize; i++)
2183 			rssConf->indTable[i] = ethtool_rxfh_indir_default(
2184 				i, adapter->num_rx_queues);
2185 
2186 		devRead->rssConfDesc.confVer = 1;
2187 		devRead->rssConfDesc.confLen = sizeof(*rssConf);
2188 		devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2189 	}
2190 
2191 #endif /* VMXNET3_RSS */
2192 
2193 	/* intr settings */
2194 	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2195 				     VMXNET3_IMM_AUTO;
2196 	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2197 	for (i = 0; i < adapter->intr.num_intrs; i++)
2198 		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2199 
2200 	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2201 	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2202 
2203 	/* rx filter settings */
2204 	devRead->rxFilterConf.rxMode = 0;
2205 	vmxnet3_restore_vlan(adapter);
2206 	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2207 
2208 	/* the rest are already zeroed */
2209 }
2210 
2211 
2212 int
2213 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2214 {
2215 	int err, i;
2216 	u32 ret;
2217 	unsigned long flags;
2218 
2219 	netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2220 		" ring sizes %u %u %u\n", adapter->netdev->name,
2221 		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2222 		adapter->tx_queue[0].tx_ring.size,
2223 		adapter->rx_queue[0].rx_ring[0].size,
2224 		adapter->rx_queue[0].rx_ring[1].size);
2225 
2226 	vmxnet3_tq_init_all(adapter);
2227 	err = vmxnet3_rq_init_all(adapter);
2228 	if (err) {
2229 		netdev_err(adapter->netdev,
2230 			   "Failed to init rx queue error %d\n", err);
2231 		goto rq_err;
2232 	}
2233 
2234 	err = vmxnet3_request_irqs(adapter);
2235 	if (err) {
2236 		netdev_err(adapter->netdev,
2237 			   "Failed to setup irq for error %d\n", err);
2238 		goto irq_err;
2239 	}
2240 
2241 	vmxnet3_setup_driver_shared(adapter);
2242 
2243 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2244 			       adapter->shared_pa));
2245 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2246 			       adapter->shared_pa));
2247 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2248 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2249 			       VMXNET3_CMD_ACTIVATE_DEV);
2250 	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2251 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2252 
2253 	if (ret != 0) {
2254 		netdev_err(adapter->netdev,
2255 			   "Failed to activate dev: error %u\n", ret);
2256 		err = -EINVAL;
2257 		goto activate_err;
2258 	}
2259 
2260 	for (i = 0; i < adapter->num_rx_queues; i++) {
2261 		VMXNET3_WRITE_BAR0_REG(adapter,
2262 				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2263 				adapter->rx_queue[i].rx_ring[0].next2fill);
2264 		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2265 				(i * VMXNET3_REG_ALIGN)),
2266 				adapter->rx_queue[i].rx_ring[1].next2fill);
2267 	}
2268 
2269 	/* Apply the rx filter settins last. */
2270 	vmxnet3_set_mc(adapter->netdev);
2271 
2272 	/*
2273 	 * Check link state when first activating device. It will start the
2274 	 * tx queue if the link is up.
2275 	 */
2276 	vmxnet3_check_link(adapter, true);
2277 	for (i = 0; i < adapter->num_rx_queues; i++)
2278 		napi_enable(&adapter->rx_queue[i].napi);
2279 	vmxnet3_enable_all_intrs(adapter);
2280 	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2281 	return 0;
2282 
2283 activate_err:
2284 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2285 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2286 	vmxnet3_free_irqs(adapter);
2287 irq_err:
2288 rq_err:
2289 	/* free up buffers we allocated */
2290 	vmxnet3_rq_cleanup_all(adapter);
2291 	return err;
2292 }
2293 
2294 
2295 void
2296 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2297 {
2298 	unsigned long flags;
2299 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2300 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2301 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2302 }
2303 
2304 
2305 int
2306 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2307 {
2308 	int i;
2309 	unsigned long flags;
2310 	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2311 		return 0;
2312 
2313 
2314 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2315 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2316 			       VMXNET3_CMD_QUIESCE_DEV);
2317 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2318 	vmxnet3_disable_all_intrs(adapter);
2319 
2320 	for (i = 0; i < adapter->num_rx_queues; i++)
2321 		napi_disable(&adapter->rx_queue[i].napi);
2322 	netif_tx_disable(adapter->netdev);
2323 	adapter->link_speed = 0;
2324 	netif_carrier_off(adapter->netdev);
2325 
2326 	vmxnet3_tq_cleanup_all(adapter);
2327 	vmxnet3_rq_cleanup_all(adapter);
2328 	vmxnet3_free_irqs(adapter);
2329 	return 0;
2330 }
2331 
2332 
2333 static void
2334 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2335 {
2336 	u32 tmp;
2337 
2338 	tmp = *(u32 *)mac;
2339 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2340 
2341 	tmp = (mac[5] << 8) | mac[4];
2342 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2343 }
2344 
2345 
2346 static int
2347 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2348 {
2349 	struct sockaddr *addr = p;
2350 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2351 
2352 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2353 	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2354 
2355 	return 0;
2356 }
2357 
2358 
2359 /* ==================== initialization and cleanup routines ============ */
2360 
2361 static int
2362 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2363 {
2364 	int err;
2365 	unsigned long mmio_start, mmio_len;
2366 	struct pci_dev *pdev = adapter->pdev;
2367 
2368 	err = pci_enable_device(pdev);
2369 	if (err) {
2370 		dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2371 		return err;
2372 	}
2373 
2374 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2375 		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2376 			dev_err(&pdev->dev,
2377 				"pci_set_consistent_dma_mask failed\n");
2378 			err = -EIO;
2379 			goto err_set_mask;
2380 		}
2381 		*dma64 = true;
2382 	} else {
2383 		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2384 			dev_err(&pdev->dev,
2385 				"pci_set_dma_mask failed\n");
2386 			err = -EIO;
2387 			goto err_set_mask;
2388 		}
2389 		*dma64 = false;
2390 	}
2391 
2392 	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2393 					   vmxnet3_driver_name);
2394 	if (err) {
2395 		dev_err(&pdev->dev,
2396 			"Failed to request region for adapter: error %d\n", err);
2397 		goto err_set_mask;
2398 	}
2399 
2400 	pci_set_master(pdev);
2401 
2402 	mmio_start = pci_resource_start(pdev, 0);
2403 	mmio_len = pci_resource_len(pdev, 0);
2404 	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2405 	if (!adapter->hw_addr0) {
2406 		dev_err(&pdev->dev, "Failed to map bar0\n");
2407 		err = -EIO;
2408 		goto err_ioremap;
2409 	}
2410 
2411 	mmio_start = pci_resource_start(pdev, 1);
2412 	mmio_len = pci_resource_len(pdev, 1);
2413 	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2414 	if (!adapter->hw_addr1) {
2415 		dev_err(&pdev->dev, "Failed to map bar1\n");
2416 		err = -EIO;
2417 		goto err_bar1;
2418 	}
2419 	return 0;
2420 
2421 err_bar1:
2422 	iounmap(adapter->hw_addr0);
2423 err_ioremap:
2424 	pci_release_selected_regions(pdev, (1 << 2) - 1);
2425 err_set_mask:
2426 	pci_disable_device(pdev);
2427 	return err;
2428 }
2429 
2430 
2431 static void
2432 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2433 {
2434 	BUG_ON(!adapter->pdev);
2435 
2436 	iounmap(adapter->hw_addr0);
2437 	iounmap(adapter->hw_addr1);
2438 	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2439 	pci_disable_device(adapter->pdev);
2440 }
2441 
2442 
2443 static void
2444 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2445 {
2446 	size_t sz, i, ring0_size, ring1_size, comp_size;
2447 	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2448 
2449 
2450 	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2451 				    VMXNET3_MAX_ETH_HDR_SIZE) {
2452 		adapter->skb_buf_size = adapter->netdev->mtu +
2453 					VMXNET3_MAX_ETH_HDR_SIZE;
2454 		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2455 			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2456 
2457 		adapter->rx_buf_per_pkt = 1;
2458 	} else {
2459 		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2460 		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2461 					    VMXNET3_MAX_ETH_HDR_SIZE;
2462 		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2463 	}
2464 
2465 	/*
2466 	 * for simplicity, force the ring0 size to be a multiple of
2467 	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2468 	 */
2469 	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2470 	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2471 	ring0_size = (ring0_size + sz - 1) / sz * sz;
2472 	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2473 			   sz * sz);
2474 	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2475 	comp_size = ring0_size + ring1_size;
2476 
2477 	for (i = 0; i < adapter->num_rx_queues; i++) {
2478 		rq = &adapter->rx_queue[i];
2479 		rq->rx_ring[0].size = ring0_size;
2480 		rq->rx_ring[1].size = ring1_size;
2481 		rq->comp_ring.size = comp_size;
2482 	}
2483 }
2484 
2485 
2486 int
2487 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2488 		      u32 rx_ring_size, u32 rx_ring2_size)
2489 {
2490 	int err = 0, i;
2491 
2492 	for (i = 0; i < adapter->num_tx_queues; i++) {
2493 		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2494 		tq->tx_ring.size   = tx_ring_size;
2495 		tq->data_ring.size = tx_ring_size;
2496 		tq->comp_ring.size = tx_ring_size;
2497 		tq->shared = &adapter->tqd_start[i].ctrl;
2498 		tq->stopped = true;
2499 		tq->adapter = adapter;
2500 		tq->qid = i;
2501 		err = vmxnet3_tq_create(tq, adapter);
2502 		/*
2503 		 * Too late to change num_tx_queues. We cannot do away with
2504 		 * lesser number of queues than what we asked for
2505 		 */
2506 		if (err)
2507 			goto queue_err;
2508 	}
2509 
2510 	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2511 	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2512 	vmxnet3_adjust_rx_ring_size(adapter);
2513 	for (i = 0; i < adapter->num_rx_queues; i++) {
2514 		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2515 		/* qid and qid2 for rx queues will be assigned later when num
2516 		 * of rx queues is finalized after allocating intrs */
2517 		rq->shared = &adapter->rqd_start[i].ctrl;
2518 		rq->adapter = adapter;
2519 		err = vmxnet3_rq_create(rq, adapter);
2520 		if (err) {
2521 			if (i == 0) {
2522 				netdev_err(adapter->netdev,
2523 					   "Could not allocate any rx queues. "
2524 					   "Aborting.\n");
2525 				goto queue_err;
2526 			} else {
2527 				netdev_info(adapter->netdev,
2528 					    "Number of rx queues changed "
2529 					    "to : %d.\n", i);
2530 				adapter->num_rx_queues = i;
2531 				err = 0;
2532 				break;
2533 			}
2534 		}
2535 	}
2536 	return err;
2537 queue_err:
2538 	vmxnet3_tq_destroy_all(adapter);
2539 	return err;
2540 }
2541 
2542 static int
2543 vmxnet3_open(struct net_device *netdev)
2544 {
2545 	struct vmxnet3_adapter *adapter;
2546 	int err, i;
2547 
2548 	adapter = netdev_priv(netdev);
2549 
2550 	for (i = 0; i < adapter->num_tx_queues; i++)
2551 		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2552 
2553 	err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2554 				    VMXNET3_DEF_RX_RING_SIZE,
2555 				    VMXNET3_DEF_RX_RING_SIZE);
2556 	if (err)
2557 		goto queue_err;
2558 
2559 	err = vmxnet3_activate_dev(adapter);
2560 	if (err)
2561 		goto activate_err;
2562 
2563 	return 0;
2564 
2565 activate_err:
2566 	vmxnet3_rq_destroy_all(adapter);
2567 	vmxnet3_tq_destroy_all(adapter);
2568 queue_err:
2569 	return err;
2570 }
2571 
2572 
2573 static int
2574 vmxnet3_close(struct net_device *netdev)
2575 {
2576 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2577 
2578 	/*
2579 	 * Reset_work may be in the middle of resetting the device, wait for its
2580 	 * completion.
2581 	 */
2582 	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2583 		msleep(1);
2584 
2585 	vmxnet3_quiesce_dev(adapter);
2586 
2587 	vmxnet3_rq_destroy_all(adapter);
2588 	vmxnet3_tq_destroy_all(adapter);
2589 
2590 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2591 
2592 
2593 	return 0;
2594 }
2595 
2596 
2597 void
2598 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2599 {
2600 	int i;
2601 
2602 	/*
2603 	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2604 	 * vmxnet3_close() will deadlock.
2605 	 */
2606 	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2607 
2608 	/* we need to enable NAPI, otherwise dev_close will deadlock */
2609 	for (i = 0; i < adapter->num_rx_queues; i++)
2610 		napi_enable(&adapter->rx_queue[i].napi);
2611 	dev_close(adapter->netdev);
2612 }
2613 
2614 
2615 static int
2616 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2617 {
2618 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2619 	int err = 0;
2620 
2621 	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2622 		return -EINVAL;
2623 
2624 	netdev->mtu = new_mtu;
2625 
2626 	/*
2627 	 * Reset_work may be in the middle of resetting the device, wait for its
2628 	 * completion.
2629 	 */
2630 	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2631 		msleep(1);
2632 
2633 	if (netif_running(netdev)) {
2634 		vmxnet3_quiesce_dev(adapter);
2635 		vmxnet3_reset_dev(adapter);
2636 
2637 		/* we need to re-create the rx queue based on the new mtu */
2638 		vmxnet3_rq_destroy_all(adapter);
2639 		vmxnet3_adjust_rx_ring_size(adapter);
2640 		err = vmxnet3_rq_create_all(adapter);
2641 		if (err) {
2642 			netdev_err(netdev,
2643 				   "failed to re-create rx queues, "
2644 				   " error %d. Closing it.\n", err);
2645 			goto out;
2646 		}
2647 
2648 		err = vmxnet3_activate_dev(adapter);
2649 		if (err) {
2650 			netdev_err(netdev,
2651 				   "failed to re-activate, error %d. "
2652 				   "Closing it\n", err);
2653 			goto out;
2654 		}
2655 	}
2656 
2657 out:
2658 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2659 	if (err)
2660 		vmxnet3_force_close(adapter);
2661 
2662 	return err;
2663 }
2664 
2665 
2666 static void
2667 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2668 {
2669 	struct net_device *netdev = adapter->netdev;
2670 
2671 	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2672 		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX |
2673 		NETIF_F_HW_VLAN_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2674 		NETIF_F_LRO;
2675 	if (dma64)
2676 		netdev->hw_features |= NETIF_F_HIGHDMA;
2677 	netdev->vlan_features = netdev->hw_features &
2678 				~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
2679 	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_FILTER;
2680 }
2681 
2682 
2683 static void
2684 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2685 {
2686 	u32 tmp;
2687 
2688 	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2689 	*(u32 *)mac = tmp;
2690 
2691 	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2692 	mac[4] = tmp & 0xff;
2693 	mac[5] = (tmp >> 8) & 0xff;
2694 }
2695 
2696 #ifdef CONFIG_PCI_MSI
2697 
2698 /*
2699  * Enable MSIx vectors.
2700  * Returns :
2701  *	0 on successful enabling of required vectors,
2702  *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2703  *	 could be enabled.
2704  *	number of vectors which can be enabled otherwise (this number is smaller
2705  *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2706  */
2707 
2708 static int
2709 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2710 			     int vectors)
2711 {
2712 	int err = 0, vector_threshold;
2713 	vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2714 
2715 	while (vectors >= vector_threshold) {
2716 		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2717 				      vectors);
2718 		if (!err) {
2719 			adapter->intr.num_intrs = vectors;
2720 			return 0;
2721 		} else if (err < 0) {
2722 			dev_err(&adapter->netdev->dev,
2723 				   "Failed to enable MSI-X, error: %d\n", err);
2724 			vectors = 0;
2725 		} else if (err < vector_threshold) {
2726 			break;
2727 		} else {
2728 			/* If fails to enable required number of MSI-x vectors
2729 			 * try enabling minimum number of vectors required.
2730 			 */
2731 			dev_err(&adapter->netdev->dev,
2732 				"Failed to enable %d MSI-X, trying %d instead\n",
2733 				    vectors, vector_threshold);
2734 			vectors = vector_threshold;
2735 		}
2736 	}
2737 
2738 	dev_info(&adapter->pdev->dev,
2739 		 "Number of MSI-X interrupts which can be allocated "
2740 		 "is lower than min threshold required.\n");
2741 	return err;
2742 }
2743 
2744 
2745 #endif /* CONFIG_PCI_MSI */
2746 
2747 static void
2748 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2749 {
2750 	u32 cfg;
2751 	unsigned long flags;
2752 
2753 	/* intr settings */
2754 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2755 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2756 			       VMXNET3_CMD_GET_CONF_INTR);
2757 	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2758 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2759 	adapter->intr.type = cfg & 0x3;
2760 	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2761 
2762 	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2763 		adapter->intr.type = VMXNET3_IT_MSIX;
2764 	}
2765 
2766 #ifdef CONFIG_PCI_MSI
2767 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2768 		int vector, err = 0;
2769 
2770 		adapter->intr.num_intrs = (adapter->share_intr ==
2771 					   VMXNET3_INTR_TXSHARE) ? 1 :
2772 					   adapter->num_tx_queues;
2773 		adapter->intr.num_intrs += (adapter->share_intr ==
2774 					   VMXNET3_INTR_BUDDYSHARE) ? 0 :
2775 					   adapter->num_rx_queues;
2776 		adapter->intr.num_intrs += 1;		/* for link event */
2777 
2778 		adapter->intr.num_intrs = (adapter->intr.num_intrs >
2779 					   VMXNET3_LINUX_MIN_MSIX_VECT
2780 					   ? adapter->intr.num_intrs :
2781 					   VMXNET3_LINUX_MIN_MSIX_VECT);
2782 
2783 		for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2784 			adapter->intr.msix_entries[vector].entry = vector;
2785 
2786 		err = vmxnet3_acquire_msix_vectors(adapter,
2787 						   adapter->intr.num_intrs);
2788 		/* If we cannot allocate one MSIx vector per queue
2789 		 * then limit the number of rx queues to 1
2790 		 */
2791 		if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2792 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2793 			    || adapter->num_rx_queues != 1) {
2794 				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2795 				netdev_err(adapter->netdev,
2796 					   "Number of rx queues : 1\n");
2797 				adapter->num_rx_queues = 1;
2798 				adapter->intr.num_intrs =
2799 						VMXNET3_LINUX_MIN_MSIX_VECT;
2800 			}
2801 			return;
2802 		}
2803 		if (!err)
2804 			return;
2805 
2806 		/* If we cannot allocate MSIx vectors use only one rx queue */
2807 		dev_info(&adapter->pdev->dev,
2808 			 "Failed to enable MSI-X, error %d. "
2809 			 "Limiting #rx queues to 1, try MSI.\n", err);
2810 
2811 		adapter->intr.type = VMXNET3_IT_MSI;
2812 	}
2813 
2814 	if (adapter->intr.type == VMXNET3_IT_MSI) {
2815 		int err;
2816 		err = pci_enable_msi(adapter->pdev);
2817 		if (!err) {
2818 			adapter->num_rx_queues = 1;
2819 			adapter->intr.num_intrs = 1;
2820 			return;
2821 		}
2822 	}
2823 #endif /* CONFIG_PCI_MSI */
2824 
2825 	adapter->num_rx_queues = 1;
2826 	dev_info(&adapter->netdev->dev,
2827 		 "Using INTx interrupt, #Rx queues: 1.\n");
2828 	adapter->intr.type = VMXNET3_IT_INTX;
2829 
2830 	/* INT-X related setting */
2831 	adapter->intr.num_intrs = 1;
2832 }
2833 
2834 
2835 static void
2836 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2837 {
2838 	if (adapter->intr.type == VMXNET3_IT_MSIX)
2839 		pci_disable_msix(adapter->pdev);
2840 	else if (adapter->intr.type == VMXNET3_IT_MSI)
2841 		pci_disable_msi(adapter->pdev);
2842 	else
2843 		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2844 }
2845 
2846 
2847 static void
2848 vmxnet3_tx_timeout(struct net_device *netdev)
2849 {
2850 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2851 	adapter->tx_timeout_count++;
2852 
2853 	netdev_err(adapter->netdev, "tx hang\n");
2854 	schedule_work(&adapter->work);
2855 	netif_wake_queue(adapter->netdev);
2856 }
2857 
2858 
2859 static void
2860 vmxnet3_reset_work(struct work_struct *data)
2861 {
2862 	struct vmxnet3_adapter *adapter;
2863 
2864 	adapter = container_of(data, struct vmxnet3_adapter, work);
2865 
2866 	/* if another thread is resetting the device, no need to proceed */
2867 	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2868 		return;
2869 
2870 	/* if the device is closed, we must leave it alone */
2871 	rtnl_lock();
2872 	if (netif_running(adapter->netdev)) {
2873 		netdev_notice(adapter->netdev, "resetting\n");
2874 		vmxnet3_quiesce_dev(adapter);
2875 		vmxnet3_reset_dev(adapter);
2876 		vmxnet3_activate_dev(adapter);
2877 	} else {
2878 		netdev_info(adapter->netdev, "already closed\n");
2879 	}
2880 	rtnl_unlock();
2881 
2882 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2883 }
2884 
2885 
2886 static int
2887 vmxnet3_probe_device(struct pci_dev *pdev,
2888 		     const struct pci_device_id *id)
2889 {
2890 	static const struct net_device_ops vmxnet3_netdev_ops = {
2891 		.ndo_open = vmxnet3_open,
2892 		.ndo_stop = vmxnet3_close,
2893 		.ndo_start_xmit = vmxnet3_xmit_frame,
2894 		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2895 		.ndo_change_mtu = vmxnet3_change_mtu,
2896 		.ndo_set_features = vmxnet3_set_features,
2897 		.ndo_get_stats64 = vmxnet3_get_stats64,
2898 		.ndo_tx_timeout = vmxnet3_tx_timeout,
2899 		.ndo_set_rx_mode = vmxnet3_set_mc,
2900 		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2901 		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2902 #ifdef CONFIG_NET_POLL_CONTROLLER
2903 		.ndo_poll_controller = vmxnet3_netpoll,
2904 #endif
2905 	};
2906 	int err;
2907 	bool dma64 = false; /* stupid gcc */
2908 	u32 ver;
2909 	struct net_device *netdev;
2910 	struct vmxnet3_adapter *adapter;
2911 	u8 mac[ETH_ALEN];
2912 	int size;
2913 	int num_tx_queues;
2914 	int num_rx_queues;
2915 
2916 	if (!pci_msi_enabled())
2917 		enable_mq = 0;
2918 
2919 #ifdef VMXNET3_RSS
2920 	if (enable_mq)
2921 		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2922 				    (int)num_online_cpus());
2923 	else
2924 #endif
2925 		num_rx_queues = 1;
2926 	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2927 
2928 	if (enable_mq)
2929 		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2930 				    (int)num_online_cpus());
2931 	else
2932 		num_tx_queues = 1;
2933 
2934 	num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2935 	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2936 				   max(num_tx_queues, num_rx_queues));
2937 	dev_info(&pdev->dev,
2938 		 "# of Tx queues : %d, # of Rx queues : %d\n",
2939 		 num_tx_queues, num_rx_queues);
2940 
2941 	if (!netdev)
2942 		return -ENOMEM;
2943 
2944 	pci_set_drvdata(pdev, netdev);
2945 	adapter = netdev_priv(netdev);
2946 	adapter->netdev = netdev;
2947 	adapter->pdev = pdev;
2948 
2949 	spin_lock_init(&adapter->cmd_lock);
2950 	adapter->shared = pci_alloc_consistent(adapter->pdev,
2951 					       sizeof(struct Vmxnet3_DriverShared),
2952 					       &adapter->shared_pa);
2953 	if (!adapter->shared) {
2954 		dev_err(&pdev->dev, "Failed to allocate memory\n");
2955 		err = -ENOMEM;
2956 		goto err_alloc_shared;
2957 	}
2958 
2959 	adapter->num_rx_queues = num_rx_queues;
2960 	adapter->num_tx_queues = num_tx_queues;
2961 
2962 	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2963 	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2964 	adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2965 						  &adapter->queue_desc_pa);
2966 
2967 	if (!adapter->tqd_start) {
2968 		dev_err(&pdev->dev, "Failed to allocate memory\n");
2969 		err = -ENOMEM;
2970 		goto err_alloc_queue_desc;
2971 	}
2972 	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2973 							    adapter->num_tx_queues);
2974 
2975 	adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2976 	if (adapter->pm_conf == NULL) {
2977 		err = -ENOMEM;
2978 		goto err_alloc_pm;
2979 	}
2980 
2981 #ifdef VMXNET3_RSS
2982 
2983 	adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2984 	if (adapter->rss_conf == NULL) {
2985 		err = -ENOMEM;
2986 		goto err_alloc_rss;
2987 	}
2988 #endif /* VMXNET3_RSS */
2989 
2990 	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
2991 	if (err < 0)
2992 		goto err_alloc_pci;
2993 
2994 	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
2995 	if (ver & 1) {
2996 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
2997 	} else {
2998 		dev_err(&pdev->dev,
2999 			"Incompatible h/w version (0x%x) for adapter\n", ver);
3000 		err = -EBUSY;
3001 		goto err_ver;
3002 	}
3003 
3004 	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3005 	if (ver & 1) {
3006 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3007 	} else {
3008 		dev_err(&pdev->dev,
3009 			"Incompatible upt version (0x%x) for adapter\n", ver);
3010 		err = -EBUSY;
3011 		goto err_ver;
3012 	}
3013 
3014 	SET_NETDEV_DEV(netdev, &pdev->dev);
3015 	vmxnet3_declare_features(adapter, dma64);
3016 
3017 	if (adapter->num_tx_queues == adapter->num_rx_queues)
3018 		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3019 	else
3020 		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3021 
3022 	vmxnet3_alloc_intr_resources(adapter);
3023 
3024 #ifdef VMXNET3_RSS
3025 	if (adapter->num_rx_queues > 1 &&
3026 	    adapter->intr.type == VMXNET3_IT_MSIX) {
3027 		adapter->rss = true;
3028 		netdev->hw_features |= NETIF_F_RXHASH;
3029 		netdev->features |= NETIF_F_RXHASH;
3030 		dev_dbg(&pdev->dev, "RSS is enabled.\n");
3031 	} else {
3032 		adapter->rss = false;
3033 	}
3034 #endif
3035 
3036 	vmxnet3_read_mac_addr(adapter, mac);
3037 	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3038 
3039 	netdev->netdev_ops = &vmxnet3_netdev_ops;
3040 	vmxnet3_set_ethtool_ops(netdev);
3041 	netdev->watchdog_timeo = 5 * HZ;
3042 
3043 	INIT_WORK(&adapter->work, vmxnet3_reset_work);
3044 	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3045 
3046 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
3047 		int i;
3048 		for (i = 0; i < adapter->num_rx_queues; i++) {
3049 			netif_napi_add(adapter->netdev,
3050 				       &adapter->rx_queue[i].napi,
3051 				       vmxnet3_poll_rx_only, 64);
3052 		}
3053 	} else {
3054 		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3055 			       vmxnet3_poll, 64);
3056 	}
3057 
3058 	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3059 	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3060 
3061 	netif_carrier_off(netdev);
3062 	err = register_netdev(netdev);
3063 
3064 	if (err) {
3065 		dev_err(&pdev->dev, "Failed to register adapter\n");
3066 		goto err_register;
3067 	}
3068 
3069 	vmxnet3_check_link(adapter, false);
3070 	return 0;
3071 
3072 err_register:
3073 	vmxnet3_free_intr_resources(adapter);
3074 err_ver:
3075 	vmxnet3_free_pci_resources(adapter);
3076 err_alloc_pci:
3077 #ifdef VMXNET3_RSS
3078 	kfree(adapter->rss_conf);
3079 err_alloc_rss:
3080 #endif
3081 	kfree(adapter->pm_conf);
3082 err_alloc_pm:
3083 	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3084 			    adapter->queue_desc_pa);
3085 err_alloc_queue_desc:
3086 	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3087 			    adapter->shared, adapter->shared_pa);
3088 err_alloc_shared:
3089 	pci_set_drvdata(pdev, NULL);
3090 	free_netdev(netdev);
3091 	return err;
3092 }
3093 
3094 
3095 static void
3096 vmxnet3_remove_device(struct pci_dev *pdev)
3097 {
3098 	struct net_device *netdev = pci_get_drvdata(pdev);
3099 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3100 	int size = 0;
3101 	int num_rx_queues;
3102 
3103 #ifdef VMXNET3_RSS
3104 	if (enable_mq)
3105 		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3106 				    (int)num_online_cpus());
3107 	else
3108 #endif
3109 		num_rx_queues = 1;
3110 	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3111 
3112 	cancel_work_sync(&adapter->work);
3113 
3114 	unregister_netdev(netdev);
3115 
3116 	vmxnet3_free_intr_resources(adapter);
3117 	vmxnet3_free_pci_resources(adapter);
3118 #ifdef VMXNET3_RSS
3119 	kfree(adapter->rss_conf);
3120 #endif
3121 	kfree(adapter->pm_conf);
3122 
3123 	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3124 	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3125 	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3126 			    adapter->queue_desc_pa);
3127 	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3128 			    adapter->shared, adapter->shared_pa);
3129 	free_netdev(netdev);
3130 }
3131 
3132 
3133 #ifdef CONFIG_PM
3134 
3135 static int
3136 vmxnet3_suspend(struct device *device)
3137 {
3138 	struct pci_dev *pdev = to_pci_dev(device);
3139 	struct net_device *netdev = pci_get_drvdata(pdev);
3140 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3141 	struct Vmxnet3_PMConf *pmConf;
3142 	struct ethhdr *ehdr;
3143 	struct arphdr *ahdr;
3144 	u8 *arpreq;
3145 	struct in_device *in_dev;
3146 	struct in_ifaddr *ifa;
3147 	unsigned long flags;
3148 	int i = 0;
3149 
3150 	if (!netif_running(netdev))
3151 		return 0;
3152 
3153 	for (i = 0; i < adapter->num_rx_queues; i++)
3154 		napi_disable(&adapter->rx_queue[i].napi);
3155 
3156 	vmxnet3_disable_all_intrs(adapter);
3157 	vmxnet3_free_irqs(adapter);
3158 	vmxnet3_free_intr_resources(adapter);
3159 
3160 	netif_device_detach(netdev);
3161 	netif_tx_stop_all_queues(netdev);
3162 
3163 	/* Create wake-up filters. */
3164 	pmConf = adapter->pm_conf;
3165 	memset(pmConf, 0, sizeof(*pmConf));
3166 
3167 	if (adapter->wol & WAKE_UCAST) {
3168 		pmConf->filters[i].patternSize = ETH_ALEN;
3169 		pmConf->filters[i].maskSize = 1;
3170 		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3171 		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3172 
3173 		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3174 		i++;
3175 	}
3176 
3177 	if (adapter->wol & WAKE_ARP) {
3178 		in_dev = in_dev_get(netdev);
3179 		if (!in_dev)
3180 			goto skip_arp;
3181 
3182 		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3183 		if (!ifa)
3184 			goto skip_arp;
3185 
3186 		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3187 			sizeof(struct arphdr) +		/* ARP header */
3188 			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3189 			2 * sizeof(u32);	/*2 IPv4 addresses */
3190 		pmConf->filters[i].maskSize =
3191 			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3192 
3193 		/* ETH_P_ARP in Ethernet header. */
3194 		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3195 		ehdr->h_proto = htons(ETH_P_ARP);
3196 
3197 		/* ARPOP_REQUEST in ARP header. */
3198 		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3199 		ahdr->ar_op = htons(ARPOP_REQUEST);
3200 		arpreq = (u8 *)(ahdr + 1);
3201 
3202 		/* The Unicast IPv4 address in 'tip' field. */
3203 		arpreq += 2 * ETH_ALEN + sizeof(u32);
3204 		*(u32 *)arpreq = ifa->ifa_address;
3205 
3206 		/* The mask for the relevant bits. */
3207 		pmConf->filters[i].mask[0] = 0x00;
3208 		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3209 		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3210 		pmConf->filters[i].mask[3] = 0x00;
3211 		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3212 		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3213 		in_dev_put(in_dev);
3214 
3215 		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3216 		i++;
3217 	}
3218 
3219 skip_arp:
3220 	if (adapter->wol & WAKE_MAGIC)
3221 		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3222 
3223 	pmConf->numFilters = i;
3224 
3225 	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3226 	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3227 								  *pmConf));
3228 	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3229 								 pmConf));
3230 
3231 	spin_lock_irqsave(&adapter->cmd_lock, flags);
3232 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3233 			       VMXNET3_CMD_UPDATE_PMCFG);
3234 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3235 
3236 	pci_save_state(pdev);
3237 	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3238 			adapter->wol);
3239 	pci_disable_device(pdev);
3240 	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3241 
3242 	return 0;
3243 }
3244 
3245 
3246 static int
3247 vmxnet3_resume(struct device *device)
3248 {
3249 	int err, i = 0;
3250 	unsigned long flags;
3251 	struct pci_dev *pdev = to_pci_dev(device);
3252 	struct net_device *netdev = pci_get_drvdata(pdev);
3253 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3254 	struct Vmxnet3_PMConf *pmConf;
3255 
3256 	if (!netif_running(netdev))
3257 		return 0;
3258 
3259 	/* Destroy wake-up filters. */
3260 	pmConf = adapter->pm_conf;
3261 	memset(pmConf, 0, sizeof(*pmConf));
3262 
3263 	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3264 	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3265 								  *pmConf));
3266 	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3267 								 pmConf));
3268 
3269 	netif_device_attach(netdev);
3270 	pci_set_power_state(pdev, PCI_D0);
3271 	pci_restore_state(pdev);
3272 	err = pci_enable_device_mem(pdev);
3273 	if (err != 0)
3274 		return err;
3275 
3276 	pci_enable_wake(pdev, PCI_D0, 0);
3277 
3278 	spin_lock_irqsave(&adapter->cmd_lock, flags);
3279 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3280 			       VMXNET3_CMD_UPDATE_PMCFG);
3281 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3282 	vmxnet3_alloc_intr_resources(adapter);
3283 	vmxnet3_request_irqs(adapter);
3284 	for (i = 0; i < adapter->num_rx_queues; i++)
3285 		napi_enable(&adapter->rx_queue[i].napi);
3286 	vmxnet3_enable_all_intrs(adapter);
3287 
3288 	return 0;
3289 }
3290 
3291 static const struct dev_pm_ops vmxnet3_pm_ops = {
3292 	.suspend = vmxnet3_suspend,
3293 	.resume = vmxnet3_resume,
3294 };
3295 #endif
3296 
3297 static struct pci_driver vmxnet3_driver = {
3298 	.name		= vmxnet3_driver_name,
3299 	.id_table	= vmxnet3_pciid_table,
3300 	.probe		= vmxnet3_probe_device,
3301 	.remove		= vmxnet3_remove_device,
3302 #ifdef CONFIG_PM
3303 	.driver.pm	= &vmxnet3_pm_ops,
3304 #endif
3305 };
3306 
3307 
3308 static int __init
3309 vmxnet3_init_module(void)
3310 {
3311 	pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3312 		VMXNET3_DRIVER_VERSION_REPORT);
3313 	return pci_register_driver(&vmxnet3_driver);
3314 }
3315 
3316 module_init(vmxnet3_init_module);
3317 
3318 
3319 static void
3320 vmxnet3_exit_module(void)
3321 {
3322 	pci_unregister_driver(&vmxnet3_driver);
3323 }
3324 
3325 module_exit(vmxnet3_exit_module);
3326 
3327 MODULE_AUTHOR("VMware, Inc.");
3328 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3329 MODULE_LICENSE("GPL v2");
3330 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3331