1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26 
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29 
30 #include "vmxnet3_int.h"
31 
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34 
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40 	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41 	{0}
42 };
43 
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45 
46 static int enable_mq = 1;
47 
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50 
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57 	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59 
60 
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64 	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66 
67 
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74 	int i;
75 
76 	for (i = 0; i < adapter->intr.num_intrs; i++)
77 		vmxnet3_enable_intr(adapter, i);
78 	adapter->shared->devRead.intrConf.intrCtrl &=
79 					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81 
82 
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86 	int i;
87 
88 	adapter->shared->devRead.intrConf.intrCtrl |=
89 					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90 	for (i = 0; i < adapter->intr.num_intrs; i++)
91 		vmxnet3_disable_intr(adapter, i);
92 }
93 
94 
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100 
101 
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105 	return tq->stopped;
106 }
107 
108 
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112 	tq->stopped = false;
113 	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115 
116 
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120 	tq->stopped = false;
121 	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123 
124 
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128 	tq->stopped = true;
129 	tq->num_stop++;
130 	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132 
133 
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140 	u32 ret;
141 	int i;
142 	unsigned long flags;
143 
144 	spin_lock_irqsave(&adapter->cmd_lock, flags);
145 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146 	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148 
149 	adapter->link_speed = ret >> 16;
150 	if (ret & 1) { /* Link is up. */
151 		netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152 			    adapter->link_speed);
153 		netif_carrier_on(adapter->netdev);
154 
155 		if (affectTxQueue) {
156 			for (i = 0; i < adapter->num_tx_queues; i++)
157 				vmxnet3_tq_start(&adapter->tx_queue[i],
158 						 adapter);
159 		}
160 	} else {
161 		netdev_info(adapter->netdev, "NIC Link is Down\n");
162 		netif_carrier_off(adapter->netdev);
163 
164 		if (affectTxQueue) {
165 			for (i = 0; i < adapter->num_tx_queues; i++)
166 				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167 		}
168 	}
169 }
170 
171 static void
172 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173 {
174 	int i;
175 	unsigned long flags;
176 	u32 events = le32_to_cpu(adapter->shared->ecr);
177 	if (!events)
178 		return;
179 
180 	vmxnet3_ack_events(adapter, events);
181 
182 	/* Check if link state has changed */
183 	if (events & VMXNET3_ECR_LINK)
184 		vmxnet3_check_link(adapter, true);
185 
186 	/* Check if there is an error on xmit/recv queues */
187 	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188 		spin_lock_irqsave(&adapter->cmd_lock, flags);
189 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190 				       VMXNET3_CMD_GET_QUEUE_STATUS);
191 		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192 
193 		for (i = 0; i < adapter->num_tx_queues; i++)
194 			if (adapter->tqd_start[i].status.stopped)
195 				dev_err(&adapter->netdev->dev,
196 					"%s: tq[%d] error 0x%x\n",
197 					adapter->netdev->name, i, le32_to_cpu(
198 					adapter->tqd_start[i].status.error));
199 		for (i = 0; i < adapter->num_rx_queues; i++)
200 			if (adapter->rqd_start[i].status.stopped)
201 				dev_err(&adapter->netdev->dev,
202 					"%s: rq[%d] error 0x%x\n",
203 					adapter->netdev->name, i,
204 					adapter->rqd_start[i].status.error);
205 
206 		schedule_work(&adapter->work);
207 	}
208 }
209 
210 #ifdef __BIG_ENDIAN_BITFIELD
211 /*
212  * The device expects the bitfields in shared structures to be written in
213  * little endian. When CPU is big endian, the following routines are used to
214  * correctly read and write into ABI.
215  * The general technique used here is : double word bitfields are defined in
216  * opposite order for big endian architecture. Then before reading them in
217  * driver the complete double word is translated using le32_to_cpu. Similarly
218  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219  * double words into required format.
220  * In order to avoid touching bits in shared structure more than once, temporary
221  * descriptors are used. These are passed as srcDesc to following functions.
222  */
223 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224 				struct Vmxnet3_RxDesc *dstDesc)
225 {
226 	u32 *src = (u32 *)srcDesc + 2;
227 	u32 *dst = (u32 *)dstDesc + 2;
228 	dstDesc->addr = le64_to_cpu(srcDesc->addr);
229 	*dst = le32_to_cpu(*src);
230 	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231 }
232 
233 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234 			       struct Vmxnet3_TxDesc *dstDesc)
235 {
236 	int i;
237 	u32 *src = (u32 *)(srcDesc + 1);
238 	u32 *dst = (u32 *)(dstDesc + 1);
239 
240 	/* Working backwards so that the gen bit is set at the end. */
241 	for (i = 2; i > 0; i--) {
242 		src--;
243 		dst--;
244 		*dst = cpu_to_le32(*src);
245 	}
246 }
247 
248 
249 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250 				struct Vmxnet3_RxCompDesc *dstDesc)
251 {
252 	int i = 0;
253 	u32 *src = (u32 *)srcDesc;
254 	u32 *dst = (u32 *)dstDesc;
255 	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256 		*dst = le32_to_cpu(*src);
257 		src++;
258 		dst++;
259 	}
260 }
261 
262 
263 /* Used to read bitfield values from double words. */
264 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265 {
266 	u32 temp = le32_to_cpu(*bitfield);
267 	u32 mask = ((1 << size) - 1) << pos;
268 	temp &= mask;
269 	temp >>= pos;
270 	return temp;
271 }
272 
273 
274 
275 #endif  /* __BIG_ENDIAN_BITFIELD */
276 
277 #ifdef __BIG_ENDIAN_BITFIELD
278 
279 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280 			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281 			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283 			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284 			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286 			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287 			VMXNET3_TCD_GEN_SIZE)
288 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289 			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291 			(dstrcd) = (tmp); \
292 			vmxnet3_RxCompToCPU((rcd), (tmp)); \
293 		} while (0)
294 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295 			(dstrxd) = (tmp); \
296 			vmxnet3_RxDescToCPU((rxd), (tmp)); \
297 		} while (0)
298 
299 #else
300 
301 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307 
308 #endif /* __BIG_ENDIAN_BITFIELD  */
309 
310 
311 static void
312 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313 		     struct pci_dev *pdev)
314 {
315 	if (tbi->map_type == VMXNET3_MAP_SINGLE)
316 		pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
317 				 PCI_DMA_TODEVICE);
318 	else if (tbi->map_type == VMXNET3_MAP_PAGE)
319 		pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
320 			       PCI_DMA_TODEVICE);
321 	else
322 		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323 
324 	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325 }
326 
327 
328 static int
329 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330 		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
331 {
332 	struct sk_buff *skb;
333 	int entries = 0;
334 
335 	/* no out of order completion */
336 	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337 	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338 
339 	skb = tq->buf_info[eop_idx].skb;
340 	BUG_ON(skb == NULL);
341 	tq->buf_info[eop_idx].skb = NULL;
342 
343 	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344 
345 	while (tq->tx_ring.next2comp != eop_idx) {
346 		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347 				     pdev);
348 
349 		/* update next2comp w/o tx_lock. Since we are marking more,
350 		 * instead of less, tx ring entries avail, the worst case is
351 		 * that the tx routine incorrectly re-queues a pkt due to
352 		 * insufficient tx ring entries.
353 		 */
354 		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355 		entries++;
356 	}
357 
358 	dev_kfree_skb_any(skb);
359 	return entries;
360 }
361 
362 
363 static int
364 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365 			struct vmxnet3_adapter *adapter)
366 {
367 	int completed = 0;
368 	union Vmxnet3_GenericDesc *gdesc;
369 
370 	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371 	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372 		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373 					       &gdesc->tcd), tq, adapter->pdev,
374 					       adapter);
375 
376 		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377 		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378 	}
379 
380 	if (completed) {
381 		spin_lock(&tq->tx_lock);
382 		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383 			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384 			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385 			     netif_carrier_ok(adapter->netdev))) {
386 			vmxnet3_tq_wake(tq, adapter);
387 		}
388 		spin_unlock(&tq->tx_lock);
389 	}
390 	return completed;
391 }
392 
393 
394 static void
395 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396 		   struct vmxnet3_adapter *adapter)
397 {
398 	int i;
399 
400 	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401 		struct vmxnet3_tx_buf_info *tbi;
402 
403 		tbi = tq->buf_info + tq->tx_ring.next2comp;
404 
405 		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406 		if (tbi->skb) {
407 			dev_kfree_skb_any(tbi->skb);
408 			tbi->skb = NULL;
409 		}
410 		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411 	}
412 
413 	/* sanity check, verify all buffers are indeed unmapped and freed */
414 	for (i = 0; i < tq->tx_ring.size; i++) {
415 		BUG_ON(tq->buf_info[i].skb != NULL ||
416 		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417 	}
418 
419 	tq->tx_ring.gen = VMXNET3_INIT_GEN;
420 	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421 
422 	tq->comp_ring.gen = VMXNET3_INIT_GEN;
423 	tq->comp_ring.next2proc = 0;
424 }
425 
426 
427 static void
428 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429 		   struct vmxnet3_adapter *adapter)
430 {
431 	if (tq->tx_ring.base) {
432 		pci_free_consistent(adapter->pdev, tq->tx_ring.size *
433 				    sizeof(struct Vmxnet3_TxDesc),
434 				    tq->tx_ring.base, tq->tx_ring.basePA);
435 		tq->tx_ring.base = NULL;
436 	}
437 	if (tq->data_ring.base) {
438 		pci_free_consistent(adapter->pdev, tq->data_ring.size *
439 				    sizeof(struct Vmxnet3_TxDataDesc),
440 				    tq->data_ring.base, tq->data_ring.basePA);
441 		tq->data_ring.base = NULL;
442 	}
443 	if (tq->comp_ring.base) {
444 		pci_free_consistent(adapter->pdev, tq->comp_ring.size *
445 				    sizeof(struct Vmxnet3_TxCompDesc),
446 				    tq->comp_ring.base, tq->comp_ring.basePA);
447 		tq->comp_ring.base = NULL;
448 	}
449 	kfree(tq->buf_info);
450 	tq->buf_info = NULL;
451 }
452 
453 
454 /* Destroy all tx queues */
455 void
456 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
457 {
458 	int i;
459 
460 	for (i = 0; i < adapter->num_tx_queues; i++)
461 		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
462 }
463 
464 
465 static void
466 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
467 		struct vmxnet3_adapter *adapter)
468 {
469 	int i;
470 
471 	/* reset the tx ring contents to 0 and reset the tx ring states */
472 	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
473 	       sizeof(struct Vmxnet3_TxDesc));
474 	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
475 	tq->tx_ring.gen = VMXNET3_INIT_GEN;
476 
477 	memset(tq->data_ring.base, 0, tq->data_ring.size *
478 	       sizeof(struct Vmxnet3_TxDataDesc));
479 
480 	/* reset the tx comp ring contents to 0 and reset comp ring states */
481 	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
482 	       sizeof(struct Vmxnet3_TxCompDesc));
483 	tq->comp_ring.next2proc = 0;
484 	tq->comp_ring.gen = VMXNET3_INIT_GEN;
485 
486 	/* reset the bookkeeping data */
487 	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
488 	for (i = 0; i < tq->tx_ring.size; i++)
489 		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
490 
491 	/* stats are not reset */
492 }
493 
494 
495 static int
496 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
497 		  struct vmxnet3_adapter *adapter)
498 {
499 	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
500 	       tq->comp_ring.base || tq->buf_info);
501 
502 	tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
503 			   * sizeof(struct Vmxnet3_TxDesc),
504 			   &tq->tx_ring.basePA);
505 	if (!tq->tx_ring.base) {
506 		netdev_err(adapter->netdev, "failed to allocate tx ring\n");
507 		goto err;
508 	}
509 
510 	tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
511 			     tq->data_ring.size *
512 			     sizeof(struct Vmxnet3_TxDataDesc),
513 			     &tq->data_ring.basePA);
514 	if (!tq->data_ring.base) {
515 		netdev_err(adapter->netdev, "failed to allocate data ring\n");
516 		goto err;
517 	}
518 
519 	tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
520 			     tq->comp_ring.size *
521 			     sizeof(struct Vmxnet3_TxCompDesc),
522 			     &tq->comp_ring.basePA);
523 	if (!tq->comp_ring.base) {
524 		netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
525 		goto err;
526 	}
527 
528 	tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
529 			       GFP_KERNEL);
530 	if (!tq->buf_info)
531 		goto err;
532 
533 	return 0;
534 
535 err:
536 	vmxnet3_tq_destroy(tq, adapter);
537 	return -ENOMEM;
538 }
539 
540 static void
541 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
542 {
543 	int i;
544 
545 	for (i = 0; i < adapter->num_tx_queues; i++)
546 		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
547 }
548 
549 /*
550  *    starting from ring->next2fill, allocate rx buffers for the given ring
551  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
552  *    are allocated or allocation fails
553  */
554 
555 static int
556 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
557 			int num_to_alloc, struct vmxnet3_adapter *adapter)
558 {
559 	int num_allocated = 0;
560 	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
561 	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
562 	u32 val;
563 
564 	while (num_allocated <= num_to_alloc) {
565 		struct vmxnet3_rx_buf_info *rbi;
566 		union Vmxnet3_GenericDesc *gd;
567 
568 		rbi = rbi_base + ring->next2fill;
569 		gd = ring->base + ring->next2fill;
570 
571 		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
572 			if (rbi->skb == NULL) {
573 				rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
574 								       rbi->len,
575 								       GFP_KERNEL);
576 				if (unlikely(rbi->skb == NULL)) {
577 					rq->stats.rx_buf_alloc_failure++;
578 					break;
579 				}
580 
581 				rbi->dma_addr = pci_map_single(adapter->pdev,
582 						rbi->skb->data, rbi->len,
583 						PCI_DMA_FROMDEVICE);
584 			} else {
585 				/* rx buffer skipped by the device */
586 			}
587 			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
588 		} else {
589 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
590 			       rbi->len  != PAGE_SIZE);
591 
592 			if (rbi->page == NULL) {
593 				rbi->page = alloc_page(GFP_ATOMIC);
594 				if (unlikely(rbi->page == NULL)) {
595 					rq->stats.rx_buf_alloc_failure++;
596 					break;
597 				}
598 				rbi->dma_addr = pci_map_page(adapter->pdev,
599 						rbi->page, 0, PAGE_SIZE,
600 						PCI_DMA_FROMDEVICE);
601 			} else {
602 				/* rx buffers skipped by the device */
603 			}
604 			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
605 		}
606 
607 		BUG_ON(rbi->dma_addr == 0);
608 		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
609 		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
610 					   | val | rbi->len);
611 
612 		/* Fill the last buffer but dont mark it ready, or else the
613 		 * device will think that the queue is full */
614 		if (num_allocated == num_to_alloc)
615 			break;
616 
617 		gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
618 		num_allocated++;
619 		vmxnet3_cmd_ring_adv_next2fill(ring);
620 	}
621 
622 	netdev_dbg(adapter->netdev,
623 		"alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
624 		num_allocated, ring->next2fill, ring->next2comp);
625 
626 	/* so that the device can distinguish a full ring and an empty ring */
627 	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
628 
629 	return num_allocated;
630 }
631 
632 
633 static void
634 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
635 		    struct vmxnet3_rx_buf_info *rbi)
636 {
637 	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
638 		skb_shinfo(skb)->nr_frags;
639 
640 	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
641 
642 	__skb_frag_set_page(frag, rbi->page);
643 	frag->page_offset = 0;
644 	skb_frag_size_set(frag, rcd->len);
645 	skb->data_len += rcd->len;
646 	skb->truesize += PAGE_SIZE;
647 	skb_shinfo(skb)->nr_frags++;
648 }
649 
650 
651 static void
652 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
653 		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
654 		struct vmxnet3_adapter *adapter)
655 {
656 	u32 dw2, len;
657 	unsigned long buf_offset;
658 	int i;
659 	union Vmxnet3_GenericDesc *gdesc;
660 	struct vmxnet3_tx_buf_info *tbi = NULL;
661 
662 	BUG_ON(ctx->copy_size > skb_headlen(skb));
663 
664 	/* use the previous gen bit for the SOP desc */
665 	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
666 
667 	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
668 	gdesc = ctx->sop_txd; /* both loops below can be skipped */
669 
670 	/* no need to map the buffer if headers are copied */
671 	if (ctx->copy_size) {
672 		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
673 					tq->tx_ring.next2fill *
674 					sizeof(struct Vmxnet3_TxDataDesc));
675 		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
676 		ctx->sop_txd->dword[3] = 0;
677 
678 		tbi = tq->buf_info + tq->tx_ring.next2fill;
679 		tbi->map_type = VMXNET3_MAP_NONE;
680 
681 		netdev_dbg(adapter->netdev,
682 			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
683 			tq->tx_ring.next2fill,
684 			le64_to_cpu(ctx->sop_txd->txd.addr),
685 			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
686 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
687 
688 		/* use the right gen for non-SOP desc */
689 		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
690 	}
691 
692 	/* linear part can use multiple tx desc if it's big */
693 	len = skb_headlen(skb) - ctx->copy_size;
694 	buf_offset = ctx->copy_size;
695 	while (len) {
696 		u32 buf_size;
697 
698 		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
699 			buf_size = len;
700 			dw2 |= len;
701 		} else {
702 			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
703 			/* spec says that for TxDesc.len, 0 == 2^14 */
704 		}
705 
706 		tbi = tq->buf_info + tq->tx_ring.next2fill;
707 		tbi->map_type = VMXNET3_MAP_SINGLE;
708 		tbi->dma_addr = pci_map_single(adapter->pdev,
709 				skb->data + buf_offset, buf_size,
710 				PCI_DMA_TODEVICE);
711 
712 		tbi->len = buf_size;
713 
714 		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
715 		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
716 
717 		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
718 		gdesc->dword[2] = cpu_to_le32(dw2);
719 		gdesc->dword[3] = 0;
720 
721 		netdev_dbg(adapter->netdev,
722 			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
723 			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
724 			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
725 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
726 		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
727 
728 		len -= buf_size;
729 		buf_offset += buf_size;
730 	}
731 
732 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
733 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
734 		u32 buf_size;
735 
736 		buf_offset = 0;
737 		len = skb_frag_size(frag);
738 		while (len) {
739 			tbi = tq->buf_info + tq->tx_ring.next2fill;
740 			if (len < VMXNET3_MAX_TX_BUF_SIZE) {
741 				buf_size = len;
742 				dw2 |= len;
743 			} else {
744 				buf_size = VMXNET3_MAX_TX_BUF_SIZE;
745 				/* spec says that for TxDesc.len, 0 == 2^14 */
746 			}
747 			tbi->map_type = VMXNET3_MAP_PAGE;
748 			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
749 							 buf_offset, buf_size,
750 							 DMA_TO_DEVICE);
751 
752 			tbi->len = buf_size;
753 
754 			gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
755 			BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
756 
757 			gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
758 			gdesc->dword[2] = cpu_to_le32(dw2);
759 			gdesc->dword[3] = 0;
760 
761 			netdev_dbg(adapter->netdev,
762 				"txd[%u]: 0x%llu %u %u\n",
763 				tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
764 				le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
765 			vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
766 			dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
767 
768 			len -= buf_size;
769 			buf_offset += buf_size;
770 		}
771 	}
772 
773 	ctx->eop_txd = gdesc;
774 
775 	/* set the last buf_info for the pkt */
776 	tbi->skb = skb;
777 	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
778 }
779 
780 
781 /* Init all tx queues */
782 static void
783 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
784 {
785 	int i;
786 
787 	for (i = 0; i < adapter->num_tx_queues; i++)
788 		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
789 }
790 
791 
792 /*
793  *    parse and copy relevant protocol headers:
794  *      For a tso pkt, relevant headers are L2/3/4 including options
795  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
796  *      if it's a TCP/UDP pkt
797  *
798  * Returns:
799  *    -1:  error happens during parsing
800  *     0:  protocol headers parsed, but too big to be copied
801  *     1:  protocol headers parsed and copied
802  *
803  * Other effects:
804  *    1. related *ctx fields are updated.
805  *    2. ctx->copy_size is # of bytes copied
806  *    3. the portion copied is guaranteed to be in the linear part
807  *
808  */
809 static int
810 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
811 			   struct vmxnet3_tx_ctx *ctx,
812 			   struct vmxnet3_adapter *adapter)
813 {
814 	struct Vmxnet3_TxDataDesc *tdd;
815 
816 	if (ctx->mss) {	/* TSO */
817 		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
818 		ctx->l4_hdr_size = tcp_hdrlen(skb);
819 		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
820 	} else {
821 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
822 			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
823 
824 			if (ctx->ipv4) {
825 				const struct iphdr *iph = ip_hdr(skb);
826 
827 				if (iph->protocol == IPPROTO_TCP)
828 					ctx->l4_hdr_size = tcp_hdrlen(skb);
829 				else if (iph->protocol == IPPROTO_UDP)
830 					ctx->l4_hdr_size = sizeof(struct udphdr);
831 				else
832 					ctx->l4_hdr_size = 0;
833 			} else {
834 				/* for simplicity, don't copy L4 headers */
835 				ctx->l4_hdr_size = 0;
836 			}
837 			ctx->copy_size = min(ctx->eth_ip_hdr_size +
838 					 ctx->l4_hdr_size, skb->len);
839 		} else {
840 			ctx->eth_ip_hdr_size = 0;
841 			ctx->l4_hdr_size = 0;
842 			/* copy as much as allowed */
843 			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
844 					     , skb_headlen(skb));
845 		}
846 
847 		/* make sure headers are accessible directly */
848 		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
849 			goto err;
850 	}
851 
852 	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
853 		tq->stats.oversized_hdr++;
854 		ctx->copy_size = 0;
855 		return 0;
856 	}
857 
858 	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
859 
860 	memcpy(tdd->data, skb->data, ctx->copy_size);
861 	netdev_dbg(adapter->netdev,
862 		"copy %u bytes to dataRing[%u]\n",
863 		ctx->copy_size, tq->tx_ring.next2fill);
864 	return 1;
865 
866 err:
867 	return -1;
868 }
869 
870 
871 static void
872 vmxnet3_prepare_tso(struct sk_buff *skb,
873 		    struct vmxnet3_tx_ctx *ctx)
874 {
875 	struct tcphdr *tcph = tcp_hdr(skb);
876 
877 	if (ctx->ipv4) {
878 		struct iphdr *iph = ip_hdr(skb);
879 
880 		iph->check = 0;
881 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
882 						 IPPROTO_TCP, 0);
883 	} else {
884 		struct ipv6hdr *iph = ipv6_hdr(skb);
885 
886 		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
887 					       IPPROTO_TCP, 0);
888 	}
889 }
890 
891 static int txd_estimate(const struct sk_buff *skb)
892 {
893 	int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
894 	int i;
895 
896 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
897 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
898 
899 		count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
900 	}
901 	return count;
902 }
903 
904 /*
905  * Transmits a pkt thru a given tq
906  * Returns:
907  *    NETDEV_TX_OK:      descriptors are setup successfully
908  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
909  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
910  *
911  * Side-effects:
912  *    1. tx ring may be changed
913  *    2. tq stats may be updated accordingly
914  *    3. shared->txNumDeferred may be updated
915  */
916 
917 static int
918 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
919 		struct vmxnet3_adapter *adapter, struct net_device *netdev)
920 {
921 	int ret;
922 	u32 count;
923 	unsigned long flags;
924 	struct vmxnet3_tx_ctx ctx;
925 	union Vmxnet3_GenericDesc *gdesc;
926 #ifdef __BIG_ENDIAN_BITFIELD
927 	/* Use temporary descriptor to avoid touching bits multiple times */
928 	union Vmxnet3_GenericDesc tempTxDesc;
929 #endif
930 
931 	count = txd_estimate(skb);
932 
933 	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
934 
935 	ctx.mss = skb_shinfo(skb)->gso_size;
936 	if (ctx.mss) {
937 		if (skb_header_cloned(skb)) {
938 			if (unlikely(pskb_expand_head(skb, 0, 0,
939 						      GFP_ATOMIC) != 0)) {
940 				tq->stats.drop_tso++;
941 				goto drop_pkt;
942 			}
943 			tq->stats.copy_skb_header++;
944 		}
945 		vmxnet3_prepare_tso(skb, &ctx);
946 	} else {
947 		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
948 
949 			/* non-tso pkts must not use more than
950 			 * VMXNET3_MAX_TXD_PER_PKT entries
951 			 */
952 			if (skb_linearize(skb) != 0) {
953 				tq->stats.drop_too_many_frags++;
954 				goto drop_pkt;
955 			}
956 			tq->stats.linearized++;
957 
958 			/* recalculate the # of descriptors to use */
959 			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
960 		}
961 	}
962 
963 	spin_lock_irqsave(&tq->tx_lock, flags);
964 
965 	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
966 		tq->stats.tx_ring_full++;
967 		netdev_dbg(adapter->netdev,
968 			"tx queue stopped on %s, next2comp %u"
969 			" next2fill %u\n", adapter->netdev->name,
970 			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
971 
972 		vmxnet3_tq_stop(tq, adapter);
973 		spin_unlock_irqrestore(&tq->tx_lock, flags);
974 		return NETDEV_TX_BUSY;
975 	}
976 
977 
978 	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
979 	if (ret >= 0) {
980 		BUG_ON(ret <= 0 && ctx.copy_size != 0);
981 		/* hdrs parsed, check against other limits */
982 		if (ctx.mss) {
983 			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
984 				     VMXNET3_MAX_TX_BUF_SIZE)) {
985 				goto hdr_too_big;
986 			}
987 		} else {
988 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
989 				if (unlikely(ctx.eth_ip_hdr_size +
990 					     skb->csum_offset >
991 					     VMXNET3_MAX_CSUM_OFFSET)) {
992 					goto hdr_too_big;
993 				}
994 			}
995 		}
996 	} else {
997 		tq->stats.drop_hdr_inspect_err++;
998 		goto unlock_drop_pkt;
999 	}
1000 
1001 	/* fill tx descs related to addr & len */
1002 	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1003 
1004 	/* setup the EOP desc */
1005 	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1006 
1007 	/* setup the SOP desc */
1008 #ifdef __BIG_ENDIAN_BITFIELD
1009 	gdesc = &tempTxDesc;
1010 	gdesc->dword[2] = ctx.sop_txd->dword[2];
1011 	gdesc->dword[3] = ctx.sop_txd->dword[3];
1012 #else
1013 	gdesc = ctx.sop_txd;
1014 #endif
1015 	if (ctx.mss) {
1016 		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1017 		gdesc->txd.om = VMXNET3_OM_TSO;
1018 		gdesc->txd.msscof = ctx.mss;
1019 		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1020 			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1021 	} else {
1022 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1023 			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1024 			gdesc->txd.om = VMXNET3_OM_CSUM;
1025 			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1026 					    skb->csum_offset;
1027 		} else {
1028 			gdesc->txd.om = 0;
1029 			gdesc->txd.msscof = 0;
1030 		}
1031 		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1032 	}
1033 
1034 	if (vlan_tx_tag_present(skb)) {
1035 		gdesc->txd.ti = 1;
1036 		gdesc->txd.tci = vlan_tx_tag_get(skb);
1037 	}
1038 
1039 	/* finally flips the GEN bit of the SOP desc. */
1040 	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1041 						  VMXNET3_TXD_GEN);
1042 #ifdef __BIG_ENDIAN_BITFIELD
1043 	/* Finished updating in bitfields of Tx Desc, so write them in original
1044 	 * place.
1045 	 */
1046 	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1047 			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1048 	gdesc = ctx.sop_txd;
1049 #endif
1050 	netdev_dbg(adapter->netdev,
1051 		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1052 		(u32)(ctx.sop_txd -
1053 		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1054 		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1055 
1056 	spin_unlock_irqrestore(&tq->tx_lock, flags);
1057 
1058 	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1059 					le32_to_cpu(tq->shared->txThreshold)) {
1060 		tq->shared->txNumDeferred = 0;
1061 		VMXNET3_WRITE_BAR0_REG(adapter,
1062 				       VMXNET3_REG_TXPROD + tq->qid * 8,
1063 				       tq->tx_ring.next2fill);
1064 	}
1065 
1066 	return NETDEV_TX_OK;
1067 
1068 hdr_too_big:
1069 	tq->stats.drop_oversized_hdr++;
1070 unlock_drop_pkt:
1071 	spin_unlock_irqrestore(&tq->tx_lock, flags);
1072 drop_pkt:
1073 	tq->stats.drop_total++;
1074 	dev_kfree_skb(skb);
1075 	return NETDEV_TX_OK;
1076 }
1077 
1078 
1079 static netdev_tx_t
1080 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1081 {
1082 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1083 
1084 	BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1085 	return vmxnet3_tq_xmit(skb,
1086 			       &adapter->tx_queue[skb->queue_mapping],
1087 			       adapter, netdev);
1088 }
1089 
1090 
1091 static void
1092 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1093 		struct sk_buff *skb,
1094 		union Vmxnet3_GenericDesc *gdesc)
1095 {
1096 	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1097 		/* typical case: TCP/UDP over IP and both csums are correct */
1098 		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1099 							VMXNET3_RCD_CSUM_OK) {
1100 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1101 			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1102 			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1103 			BUG_ON(gdesc->rcd.frg);
1104 		} else {
1105 			if (gdesc->rcd.csum) {
1106 				skb->csum = htons(gdesc->rcd.csum);
1107 				skb->ip_summed = CHECKSUM_PARTIAL;
1108 			} else {
1109 				skb_checksum_none_assert(skb);
1110 			}
1111 		}
1112 	} else {
1113 		skb_checksum_none_assert(skb);
1114 	}
1115 }
1116 
1117 
1118 static void
1119 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1120 		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1121 {
1122 	rq->stats.drop_err++;
1123 	if (!rcd->fcs)
1124 		rq->stats.drop_fcs++;
1125 
1126 	rq->stats.drop_total++;
1127 
1128 	/*
1129 	 * We do not unmap and chain the rx buffer to the skb.
1130 	 * We basically pretend this buffer is not used and will be recycled
1131 	 * by vmxnet3_rq_alloc_rx_buf()
1132 	 */
1133 
1134 	/*
1135 	 * ctx->skb may be NULL if this is the first and the only one
1136 	 * desc for the pkt
1137 	 */
1138 	if (ctx->skb)
1139 		dev_kfree_skb_irq(ctx->skb);
1140 
1141 	ctx->skb = NULL;
1142 }
1143 
1144 
1145 static int
1146 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1147 		       struct vmxnet3_adapter *adapter, int quota)
1148 {
1149 	static const u32 rxprod_reg[2] = {
1150 		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1151 	};
1152 	u32 num_rxd = 0;
1153 	bool skip_page_frags = false;
1154 	struct Vmxnet3_RxCompDesc *rcd;
1155 	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1156 #ifdef __BIG_ENDIAN_BITFIELD
1157 	struct Vmxnet3_RxDesc rxCmdDesc;
1158 	struct Vmxnet3_RxCompDesc rxComp;
1159 #endif
1160 	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1161 			  &rxComp);
1162 	while (rcd->gen == rq->comp_ring.gen) {
1163 		struct vmxnet3_rx_buf_info *rbi;
1164 		struct sk_buff *skb, *new_skb = NULL;
1165 		struct page *new_page = NULL;
1166 		int num_to_alloc;
1167 		struct Vmxnet3_RxDesc *rxd;
1168 		u32 idx, ring_idx;
1169 		struct vmxnet3_cmd_ring	*ring = NULL;
1170 		if (num_rxd >= quota) {
1171 			/* we may stop even before we see the EOP desc of
1172 			 * the current pkt
1173 			 */
1174 			break;
1175 		}
1176 		num_rxd++;
1177 		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1178 		idx = rcd->rxdIdx;
1179 		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1180 		ring = rq->rx_ring + ring_idx;
1181 		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1182 				  &rxCmdDesc);
1183 		rbi = rq->buf_info[ring_idx] + idx;
1184 
1185 		BUG_ON(rxd->addr != rbi->dma_addr ||
1186 		       rxd->len != rbi->len);
1187 
1188 		if (unlikely(rcd->eop && rcd->err)) {
1189 			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1190 			goto rcd_done;
1191 		}
1192 
1193 		if (rcd->sop) { /* first buf of the pkt */
1194 			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1195 			       rcd->rqID != rq->qid);
1196 
1197 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1198 			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1199 
1200 			if (unlikely(rcd->len == 0)) {
1201 				/* Pretend the rx buffer is skipped. */
1202 				BUG_ON(!(rcd->sop && rcd->eop));
1203 				netdev_dbg(adapter->netdev,
1204 					"rxRing[%u][%u] 0 length\n",
1205 					ring_idx, idx);
1206 				goto rcd_done;
1207 			}
1208 
1209 			skip_page_frags = false;
1210 			ctx->skb = rbi->skb;
1211 			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1212 							    rbi->len);
1213 			if (new_skb == NULL) {
1214 				/* Skb allocation failed, do not handover this
1215 				 * skb to stack. Reuse it. Drop the existing pkt
1216 				 */
1217 				rq->stats.rx_buf_alloc_failure++;
1218 				ctx->skb = NULL;
1219 				rq->stats.drop_total++;
1220 				skip_page_frags = true;
1221 				goto rcd_done;
1222 			}
1223 
1224 			pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1225 					 PCI_DMA_FROMDEVICE);
1226 
1227 #ifdef VMXNET3_RSS
1228 			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1229 			    (adapter->netdev->features & NETIF_F_RXHASH))
1230 				ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1231 #endif
1232 			skb_put(ctx->skb, rcd->len);
1233 
1234 			/* Immediate refill */
1235 			rbi->skb = new_skb;
1236 			rbi->dma_addr = pci_map_single(adapter->pdev,
1237 						       rbi->skb->data, rbi->len,
1238 						       PCI_DMA_FROMDEVICE);
1239 			rxd->addr = cpu_to_le64(rbi->dma_addr);
1240 			rxd->len = rbi->len;
1241 
1242 		} else {
1243 			BUG_ON(ctx->skb == NULL && !skip_page_frags);
1244 
1245 			/* non SOP buffer must be type 1 in most cases */
1246 			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1247 			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1248 
1249 			/* If an sop buffer was dropped, skip all
1250 			 * following non-sop fragments. They will be reused.
1251 			 */
1252 			if (skip_page_frags)
1253 				goto rcd_done;
1254 
1255 			new_page = alloc_page(GFP_ATOMIC);
1256 			if (unlikely(new_page == NULL)) {
1257 				/* Replacement page frag could not be allocated.
1258 				 * Reuse this page. Drop the pkt and free the
1259 				 * skb which contained this page as a frag. Skip
1260 				 * processing all the following non-sop frags.
1261 				 */
1262 				rq->stats.rx_buf_alloc_failure++;
1263 				dev_kfree_skb(ctx->skb);
1264 				ctx->skb = NULL;
1265 				skip_page_frags = true;
1266 				goto rcd_done;
1267 			}
1268 
1269 			if (rcd->len) {
1270 				pci_unmap_page(adapter->pdev,
1271 					       rbi->dma_addr, rbi->len,
1272 					       PCI_DMA_FROMDEVICE);
1273 
1274 				vmxnet3_append_frag(ctx->skb, rcd, rbi);
1275 			}
1276 
1277 			/* Immediate refill */
1278 			rbi->page = new_page;
1279 			rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1280 						     0, PAGE_SIZE,
1281 						     PCI_DMA_FROMDEVICE);
1282 			rxd->addr = cpu_to_le64(rbi->dma_addr);
1283 			rxd->len = rbi->len;
1284 		}
1285 
1286 
1287 		skb = ctx->skb;
1288 		if (rcd->eop) {
1289 			skb->len += skb->data_len;
1290 
1291 			vmxnet3_rx_csum(adapter, skb,
1292 					(union Vmxnet3_GenericDesc *)rcd);
1293 			skb->protocol = eth_type_trans(skb, adapter->netdev);
1294 
1295 			if (unlikely(rcd->ts))
1296 				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1297 
1298 			if (adapter->netdev->features & NETIF_F_LRO)
1299 				netif_receive_skb(skb);
1300 			else
1301 				napi_gro_receive(&rq->napi, skb);
1302 
1303 			ctx->skb = NULL;
1304 		}
1305 
1306 rcd_done:
1307 		/* device may have skipped some rx descs */
1308 		ring->next2comp = idx;
1309 		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1310 		ring = rq->rx_ring + ring_idx;
1311 		while (num_to_alloc) {
1312 			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1313 					  &rxCmdDesc);
1314 			BUG_ON(!rxd->addr);
1315 
1316 			/* Recv desc is ready to be used by the device */
1317 			rxd->gen = ring->gen;
1318 			vmxnet3_cmd_ring_adv_next2fill(ring);
1319 			num_to_alloc--;
1320 		}
1321 
1322 		/* if needed, update the register */
1323 		if (unlikely(rq->shared->updateRxProd)) {
1324 			VMXNET3_WRITE_BAR0_REG(adapter,
1325 					       rxprod_reg[ring_idx] + rq->qid * 8,
1326 					       ring->next2fill);
1327 		}
1328 
1329 		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1330 		vmxnet3_getRxComp(rcd,
1331 				  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1332 	}
1333 
1334 	return num_rxd;
1335 }
1336 
1337 
1338 static void
1339 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1340 		   struct vmxnet3_adapter *adapter)
1341 {
1342 	u32 i, ring_idx;
1343 	struct Vmxnet3_RxDesc *rxd;
1344 
1345 	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1346 		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1347 #ifdef __BIG_ENDIAN_BITFIELD
1348 			struct Vmxnet3_RxDesc rxDesc;
1349 #endif
1350 			vmxnet3_getRxDesc(rxd,
1351 				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1352 
1353 			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1354 					rq->buf_info[ring_idx][i].skb) {
1355 				pci_unmap_single(adapter->pdev, rxd->addr,
1356 						 rxd->len, PCI_DMA_FROMDEVICE);
1357 				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1358 				rq->buf_info[ring_idx][i].skb = NULL;
1359 			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1360 					rq->buf_info[ring_idx][i].page) {
1361 				pci_unmap_page(adapter->pdev, rxd->addr,
1362 					       rxd->len, PCI_DMA_FROMDEVICE);
1363 				put_page(rq->buf_info[ring_idx][i].page);
1364 				rq->buf_info[ring_idx][i].page = NULL;
1365 			}
1366 		}
1367 
1368 		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1369 		rq->rx_ring[ring_idx].next2fill =
1370 					rq->rx_ring[ring_idx].next2comp = 0;
1371 	}
1372 
1373 	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1374 	rq->comp_ring.next2proc = 0;
1375 }
1376 
1377 
1378 static void
1379 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1380 {
1381 	int i;
1382 
1383 	for (i = 0; i < adapter->num_rx_queues; i++)
1384 		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1385 }
1386 
1387 
1388 static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1389 			       struct vmxnet3_adapter *adapter)
1390 {
1391 	int i;
1392 	int j;
1393 
1394 	/* all rx buffers must have already been freed */
1395 	for (i = 0; i < 2; i++) {
1396 		if (rq->buf_info[i]) {
1397 			for (j = 0; j < rq->rx_ring[i].size; j++)
1398 				BUG_ON(rq->buf_info[i][j].page != NULL);
1399 		}
1400 	}
1401 
1402 
1403 	kfree(rq->buf_info[0]);
1404 
1405 	for (i = 0; i < 2; i++) {
1406 		if (rq->rx_ring[i].base) {
1407 			pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1408 					    * sizeof(struct Vmxnet3_RxDesc),
1409 					    rq->rx_ring[i].base,
1410 					    rq->rx_ring[i].basePA);
1411 			rq->rx_ring[i].base = NULL;
1412 		}
1413 		rq->buf_info[i] = NULL;
1414 	}
1415 
1416 	if (rq->comp_ring.base) {
1417 		pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1418 				    sizeof(struct Vmxnet3_RxCompDesc),
1419 				    rq->comp_ring.base, rq->comp_ring.basePA);
1420 		rq->comp_ring.base = NULL;
1421 	}
1422 }
1423 
1424 
1425 static int
1426 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1427 		struct vmxnet3_adapter  *adapter)
1428 {
1429 	int i;
1430 
1431 	/* initialize buf_info */
1432 	for (i = 0; i < rq->rx_ring[0].size; i++) {
1433 
1434 		/* 1st buf for a pkt is skbuff */
1435 		if (i % adapter->rx_buf_per_pkt == 0) {
1436 			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1437 			rq->buf_info[0][i].len = adapter->skb_buf_size;
1438 		} else { /* subsequent bufs for a pkt is frag */
1439 			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1440 			rq->buf_info[0][i].len = PAGE_SIZE;
1441 		}
1442 	}
1443 	for (i = 0; i < rq->rx_ring[1].size; i++) {
1444 		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1445 		rq->buf_info[1][i].len = PAGE_SIZE;
1446 	}
1447 
1448 	/* reset internal state and allocate buffers for both rings */
1449 	for (i = 0; i < 2; i++) {
1450 		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1451 
1452 		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1453 		       sizeof(struct Vmxnet3_RxDesc));
1454 		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1455 	}
1456 	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1457 				    adapter) == 0) {
1458 		/* at least has 1 rx buffer for the 1st ring */
1459 		return -ENOMEM;
1460 	}
1461 	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1462 
1463 	/* reset the comp ring */
1464 	rq->comp_ring.next2proc = 0;
1465 	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1466 	       sizeof(struct Vmxnet3_RxCompDesc));
1467 	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1468 
1469 	/* reset rxctx */
1470 	rq->rx_ctx.skb = NULL;
1471 
1472 	/* stats are not reset */
1473 	return 0;
1474 }
1475 
1476 
1477 static int
1478 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1479 {
1480 	int i, err = 0;
1481 
1482 	for (i = 0; i < adapter->num_rx_queues; i++) {
1483 		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1484 		if (unlikely(err)) {
1485 			dev_err(&adapter->netdev->dev, "%s: failed to "
1486 				"initialize rx queue%i\n",
1487 				adapter->netdev->name, i);
1488 			break;
1489 		}
1490 	}
1491 	return err;
1492 
1493 }
1494 
1495 
1496 static int
1497 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1498 {
1499 	int i;
1500 	size_t sz;
1501 	struct vmxnet3_rx_buf_info *bi;
1502 
1503 	for (i = 0; i < 2; i++) {
1504 
1505 		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1506 		rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1507 							&rq->rx_ring[i].basePA);
1508 		if (!rq->rx_ring[i].base) {
1509 			netdev_err(adapter->netdev,
1510 				   "failed to allocate rx ring %d\n", i);
1511 			goto err;
1512 		}
1513 	}
1514 
1515 	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1516 	rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1517 						  &rq->comp_ring.basePA);
1518 	if (!rq->comp_ring.base) {
1519 		netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1520 		goto err;
1521 	}
1522 
1523 	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1524 						   rq->rx_ring[1].size);
1525 	bi = kzalloc(sz, GFP_KERNEL);
1526 	if (!bi)
1527 		goto err;
1528 
1529 	rq->buf_info[0] = bi;
1530 	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1531 
1532 	return 0;
1533 
1534 err:
1535 	vmxnet3_rq_destroy(rq, adapter);
1536 	return -ENOMEM;
1537 }
1538 
1539 
1540 static int
1541 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1542 {
1543 	int i, err = 0;
1544 
1545 	for (i = 0; i < adapter->num_rx_queues; i++) {
1546 		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1547 		if (unlikely(err)) {
1548 			dev_err(&adapter->netdev->dev,
1549 				"%s: failed to create rx queue%i\n",
1550 				adapter->netdev->name, i);
1551 			goto err_out;
1552 		}
1553 	}
1554 	return err;
1555 err_out:
1556 	vmxnet3_rq_destroy_all(adapter);
1557 	return err;
1558 
1559 }
1560 
1561 /* Multiple queue aware polling function for tx and rx */
1562 
1563 static int
1564 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1565 {
1566 	int rcd_done = 0, i;
1567 	if (unlikely(adapter->shared->ecr))
1568 		vmxnet3_process_events(adapter);
1569 	for (i = 0; i < adapter->num_tx_queues; i++)
1570 		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1571 
1572 	for (i = 0; i < adapter->num_rx_queues; i++)
1573 		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1574 						   adapter, budget);
1575 	return rcd_done;
1576 }
1577 
1578 
1579 static int
1580 vmxnet3_poll(struct napi_struct *napi, int budget)
1581 {
1582 	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1583 					  struct vmxnet3_rx_queue, napi);
1584 	int rxd_done;
1585 
1586 	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1587 
1588 	if (rxd_done < budget) {
1589 		napi_complete(napi);
1590 		vmxnet3_enable_all_intrs(rx_queue->adapter);
1591 	}
1592 	return rxd_done;
1593 }
1594 
1595 /*
1596  * NAPI polling function for MSI-X mode with multiple Rx queues
1597  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1598  */
1599 
1600 static int
1601 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1602 {
1603 	struct vmxnet3_rx_queue *rq = container_of(napi,
1604 						struct vmxnet3_rx_queue, napi);
1605 	struct vmxnet3_adapter *adapter = rq->adapter;
1606 	int rxd_done;
1607 
1608 	/* When sharing interrupt with corresponding tx queue, process
1609 	 * tx completions in that queue as well
1610 	 */
1611 	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1612 		struct vmxnet3_tx_queue *tq =
1613 				&adapter->tx_queue[rq - adapter->rx_queue];
1614 		vmxnet3_tq_tx_complete(tq, adapter);
1615 	}
1616 
1617 	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1618 
1619 	if (rxd_done < budget) {
1620 		napi_complete(napi);
1621 		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1622 	}
1623 	return rxd_done;
1624 }
1625 
1626 
1627 #ifdef CONFIG_PCI_MSI
1628 
1629 /*
1630  * Handle completion interrupts on tx queues
1631  * Returns whether or not the intr is handled
1632  */
1633 
1634 static irqreturn_t
1635 vmxnet3_msix_tx(int irq, void *data)
1636 {
1637 	struct vmxnet3_tx_queue *tq = data;
1638 	struct vmxnet3_adapter *adapter = tq->adapter;
1639 
1640 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1641 		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1642 
1643 	/* Handle the case where only one irq is allocate for all tx queues */
1644 	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1645 		int i;
1646 		for (i = 0; i < adapter->num_tx_queues; i++) {
1647 			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1648 			vmxnet3_tq_tx_complete(txq, adapter);
1649 		}
1650 	} else {
1651 		vmxnet3_tq_tx_complete(tq, adapter);
1652 	}
1653 	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1654 
1655 	return IRQ_HANDLED;
1656 }
1657 
1658 
1659 /*
1660  * Handle completion interrupts on rx queues. Returns whether or not the
1661  * intr is handled
1662  */
1663 
1664 static irqreturn_t
1665 vmxnet3_msix_rx(int irq, void *data)
1666 {
1667 	struct vmxnet3_rx_queue *rq = data;
1668 	struct vmxnet3_adapter *adapter = rq->adapter;
1669 
1670 	/* disable intr if needed */
1671 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1672 		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1673 	napi_schedule(&rq->napi);
1674 
1675 	return IRQ_HANDLED;
1676 }
1677 
1678 /*
1679  *----------------------------------------------------------------------------
1680  *
1681  * vmxnet3_msix_event --
1682  *
1683  *    vmxnet3 msix event intr handler
1684  *
1685  * Result:
1686  *    whether or not the intr is handled
1687  *
1688  *----------------------------------------------------------------------------
1689  */
1690 
1691 static irqreturn_t
1692 vmxnet3_msix_event(int irq, void *data)
1693 {
1694 	struct net_device *dev = data;
1695 	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1696 
1697 	/* disable intr if needed */
1698 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1699 		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1700 
1701 	if (adapter->shared->ecr)
1702 		vmxnet3_process_events(adapter);
1703 
1704 	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1705 
1706 	return IRQ_HANDLED;
1707 }
1708 
1709 #endif /* CONFIG_PCI_MSI  */
1710 
1711 
1712 /* Interrupt handler for vmxnet3  */
1713 static irqreturn_t
1714 vmxnet3_intr(int irq, void *dev_id)
1715 {
1716 	struct net_device *dev = dev_id;
1717 	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1718 
1719 	if (adapter->intr.type == VMXNET3_IT_INTX) {
1720 		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1721 		if (unlikely(icr == 0))
1722 			/* not ours */
1723 			return IRQ_NONE;
1724 	}
1725 
1726 
1727 	/* disable intr if needed */
1728 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1729 		vmxnet3_disable_all_intrs(adapter);
1730 
1731 	napi_schedule(&adapter->rx_queue[0].napi);
1732 
1733 	return IRQ_HANDLED;
1734 }
1735 
1736 #ifdef CONFIG_NET_POLL_CONTROLLER
1737 
1738 /* netpoll callback. */
1739 static void
1740 vmxnet3_netpoll(struct net_device *netdev)
1741 {
1742 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1743 
1744 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1745 		vmxnet3_disable_all_intrs(adapter);
1746 
1747 	vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1748 	vmxnet3_enable_all_intrs(adapter);
1749 
1750 }
1751 #endif	/* CONFIG_NET_POLL_CONTROLLER */
1752 
1753 static int
1754 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1755 {
1756 	struct vmxnet3_intr *intr = &adapter->intr;
1757 	int err = 0, i;
1758 	int vector = 0;
1759 
1760 #ifdef CONFIG_PCI_MSI
1761 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1762 		for (i = 0; i < adapter->num_tx_queues; i++) {
1763 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1764 				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1765 					adapter->netdev->name, vector);
1766 				err = request_irq(
1767 					      intr->msix_entries[vector].vector,
1768 					      vmxnet3_msix_tx, 0,
1769 					      adapter->tx_queue[i].name,
1770 					      &adapter->tx_queue[i]);
1771 			} else {
1772 				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1773 					adapter->netdev->name, vector);
1774 			}
1775 			if (err) {
1776 				dev_err(&adapter->netdev->dev,
1777 					"Failed to request irq for MSIX, %s, "
1778 					"error %d\n",
1779 					adapter->tx_queue[i].name, err);
1780 				return err;
1781 			}
1782 
1783 			/* Handle the case where only 1 MSIx was allocated for
1784 			 * all tx queues */
1785 			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1786 				for (; i < adapter->num_tx_queues; i++)
1787 					adapter->tx_queue[i].comp_ring.intr_idx
1788 								= vector;
1789 				vector++;
1790 				break;
1791 			} else {
1792 				adapter->tx_queue[i].comp_ring.intr_idx
1793 								= vector++;
1794 			}
1795 		}
1796 		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1797 			vector = 0;
1798 
1799 		for (i = 0; i < adapter->num_rx_queues; i++) {
1800 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1801 				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1802 					adapter->netdev->name, vector);
1803 			else
1804 				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1805 					adapter->netdev->name, vector);
1806 			err = request_irq(intr->msix_entries[vector].vector,
1807 					  vmxnet3_msix_rx, 0,
1808 					  adapter->rx_queue[i].name,
1809 					  &(adapter->rx_queue[i]));
1810 			if (err) {
1811 				netdev_err(adapter->netdev,
1812 					   "Failed to request irq for MSIX, "
1813 					   "%s, error %d\n",
1814 					   adapter->rx_queue[i].name, err);
1815 				return err;
1816 			}
1817 
1818 			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1819 		}
1820 
1821 		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1822 			adapter->netdev->name, vector);
1823 		err = request_irq(intr->msix_entries[vector].vector,
1824 				  vmxnet3_msix_event, 0,
1825 				  intr->event_msi_vector_name, adapter->netdev);
1826 		intr->event_intr_idx = vector;
1827 
1828 	} else if (intr->type == VMXNET3_IT_MSI) {
1829 		adapter->num_rx_queues = 1;
1830 		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1831 				  adapter->netdev->name, adapter->netdev);
1832 	} else {
1833 #endif
1834 		adapter->num_rx_queues = 1;
1835 		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1836 				  IRQF_SHARED, adapter->netdev->name,
1837 				  adapter->netdev);
1838 #ifdef CONFIG_PCI_MSI
1839 	}
1840 #endif
1841 	intr->num_intrs = vector + 1;
1842 	if (err) {
1843 		netdev_err(adapter->netdev,
1844 			   "Failed to request irq (intr type:%d), error %d\n",
1845 			   intr->type, err);
1846 	} else {
1847 		/* Number of rx queues will not change after this */
1848 		for (i = 0; i < adapter->num_rx_queues; i++) {
1849 			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1850 			rq->qid = i;
1851 			rq->qid2 = i + adapter->num_rx_queues;
1852 		}
1853 
1854 
1855 
1856 		/* init our intr settings */
1857 		for (i = 0; i < intr->num_intrs; i++)
1858 			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1859 		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1860 			adapter->intr.event_intr_idx = 0;
1861 			for (i = 0; i < adapter->num_tx_queues; i++)
1862 				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1863 			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1864 		}
1865 
1866 		netdev_info(adapter->netdev,
1867 			    "intr type %u, mode %u, %u vectors allocated\n",
1868 			    intr->type, intr->mask_mode, intr->num_intrs);
1869 	}
1870 
1871 	return err;
1872 }
1873 
1874 
1875 static void
1876 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1877 {
1878 	struct vmxnet3_intr *intr = &adapter->intr;
1879 	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1880 
1881 	switch (intr->type) {
1882 #ifdef CONFIG_PCI_MSI
1883 	case VMXNET3_IT_MSIX:
1884 	{
1885 		int i, vector = 0;
1886 
1887 		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1888 			for (i = 0; i < adapter->num_tx_queues; i++) {
1889 				free_irq(intr->msix_entries[vector++].vector,
1890 					 &(adapter->tx_queue[i]));
1891 				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1892 					break;
1893 			}
1894 		}
1895 
1896 		for (i = 0; i < adapter->num_rx_queues; i++) {
1897 			free_irq(intr->msix_entries[vector++].vector,
1898 				 &(adapter->rx_queue[i]));
1899 		}
1900 
1901 		free_irq(intr->msix_entries[vector].vector,
1902 			 adapter->netdev);
1903 		BUG_ON(vector >= intr->num_intrs);
1904 		break;
1905 	}
1906 #endif
1907 	case VMXNET3_IT_MSI:
1908 		free_irq(adapter->pdev->irq, adapter->netdev);
1909 		break;
1910 	case VMXNET3_IT_INTX:
1911 		free_irq(adapter->pdev->irq, adapter->netdev);
1912 		break;
1913 	default:
1914 		BUG();
1915 	}
1916 }
1917 
1918 
1919 static void
1920 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1921 {
1922 	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1923 	u16 vid;
1924 
1925 	/* allow untagged pkts */
1926 	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1927 
1928 	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1929 		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1930 }
1931 
1932 
1933 static int
1934 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1935 {
1936 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1937 
1938 	if (!(netdev->flags & IFF_PROMISC)) {
1939 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1940 		unsigned long flags;
1941 
1942 		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1943 		spin_lock_irqsave(&adapter->cmd_lock, flags);
1944 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1945 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1946 		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1947 	}
1948 
1949 	set_bit(vid, adapter->active_vlans);
1950 
1951 	return 0;
1952 }
1953 
1954 
1955 static int
1956 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1957 {
1958 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1959 
1960 	if (!(netdev->flags & IFF_PROMISC)) {
1961 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1962 		unsigned long flags;
1963 
1964 		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1965 		spin_lock_irqsave(&adapter->cmd_lock, flags);
1966 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1967 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1968 		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1969 	}
1970 
1971 	clear_bit(vid, adapter->active_vlans);
1972 
1973 	return 0;
1974 }
1975 
1976 
1977 static u8 *
1978 vmxnet3_copy_mc(struct net_device *netdev)
1979 {
1980 	u8 *buf = NULL;
1981 	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1982 
1983 	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1984 	if (sz <= 0xffff) {
1985 		/* We may be called with BH disabled */
1986 		buf = kmalloc(sz, GFP_ATOMIC);
1987 		if (buf) {
1988 			struct netdev_hw_addr *ha;
1989 			int i = 0;
1990 
1991 			netdev_for_each_mc_addr(ha, netdev)
1992 				memcpy(buf + i++ * ETH_ALEN, ha->addr,
1993 				       ETH_ALEN);
1994 		}
1995 	}
1996 	return buf;
1997 }
1998 
1999 
2000 static void
2001 vmxnet3_set_mc(struct net_device *netdev)
2002 {
2003 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2004 	unsigned long flags;
2005 	struct Vmxnet3_RxFilterConf *rxConf =
2006 					&adapter->shared->devRead.rxFilterConf;
2007 	u8 *new_table = NULL;
2008 	u32 new_mode = VMXNET3_RXM_UCAST;
2009 
2010 	if (netdev->flags & IFF_PROMISC) {
2011 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2012 		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2013 
2014 		new_mode |= VMXNET3_RXM_PROMISC;
2015 	} else {
2016 		vmxnet3_restore_vlan(adapter);
2017 	}
2018 
2019 	if (netdev->flags & IFF_BROADCAST)
2020 		new_mode |= VMXNET3_RXM_BCAST;
2021 
2022 	if (netdev->flags & IFF_ALLMULTI)
2023 		new_mode |= VMXNET3_RXM_ALL_MULTI;
2024 	else
2025 		if (!netdev_mc_empty(netdev)) {
2026 			new_table = vmxnet3_copy_mc(netdev);
2027 			if (new_table) {
2028 				new_mode |= VMXNET3_RXM_MCAST;
2029 				rxConf->mfTableLen = cpu_to_le16(
2030 					netdev_mc_count(netdev) * ETH_ALEN);
2031 				rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
2032 						    new_table));
2033 			} else {
2034 				netdev_info(netdev, "failed to copy mcast list"
2035 					    ", setting ALL_MULTI\n");
2036 				new_mode |= VMXNET3_RXM_ALL_MULTI;
2037 			}
2038 		}
2039 
2040 
2041 	if (!(new_mode & VMXNET3_RXM_MCAST)) {
2042 		rxConf->mfTableLen = 0;
2043 		rxConf->mfTablePA = 0;
2044 	}
2045 
2046 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2047 	if (new_mode != rxConf->rxMode) {
2048 		rxConf->rxMode = cpu_to_le32(new_mode);
2049 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2050 				       VMXNET3_CMD_UPDATE_RX_MODE);
2051 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2052 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2053 	}
2054 
2055 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2056 			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
2057 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2058 
2059 	kfree(new_table);
2060 }
2061 
2062 void
2063 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2064 {
2065 	int i;
2066 
2067 	for (i = 0; i < adapter->num_rx_queues; i++)
2068 		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2069 }
2070 
2071 
2072 /*
2073  *   Set up driver_shared based on settings in adapter.
2074  */
2075 
2076 static void
2077 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2078 {
2079 	struct Vmxnet3_DriverShared *shared = adapter->shared;
2080 	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2081 	struct Vmxnet3_TxQueueConf *tqc;
2082 	struct Vmxnet3_RxQueueConf *rqc;
2083 	int i;
2084 
2085 	memset(shared, 0, sizeof(*shared));
2086 
2087 	/* driver settings */
2088 	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2089 	devRead->misc.driverInfo.version = cpu_to_le32(
2090 						VMXNET3_DRIVER_VERSION_NUM);
2091 	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2092 				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2093 	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2094 	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2095 				*((u32 *)&devRead->misc.driverInfo.gos));
2096 	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2097 	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2098 
2099 	devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2100 	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2101 
2102 	/* set up feature flags */
2103 	if (adapter->netdev->features & NETIF_F_RXCSUM)
2104 		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2105 
2106 	if (adapter->netdev->features & NETIF_F_LRO) {
2107 		devRead->misc.uptFeatures |= UPT1_F_LRO;
2108 		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2109 	}
2110 	if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2111 		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2112 
2113 	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2114 	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2115 	devRead->misc.queueDescLen = cpu_to_le32(
2116 		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2117 		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2118 
2119 	/* tx queue settings */
2120 	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2121 	for (i = 0; i < adapter->num_tx_queues; i++) {
2122 		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2123 		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2124 		tqc = &adapter->tqd_start[i].conf;
2125 		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2126 		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2127 		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2128 		tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2129 		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2130 		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2131 		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2132 		tqc->ddLen          = cpu_to_le32(
2133 					sizeof(struct vmxnet3_tx_buf_info) *
2134 					tqc->txRingSize);
2135 		tqc->intrIdx        = tq->comp_ring.intr_idx;
2136 	}
2137 
2138 	/* rx queue settings */
2139 	devRead->misc.numRxQueues = adapter->num_rx_queues;
2140 	for (i = 0; i < adapter->num_rx_queues; i++) {
2141 		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2142 		rqc = &adapter->rqd_start[i].conf;
2143 		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2144 		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2145 		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2146 		rqc->ddPA            = cpu_to_le64(virt_to_phys(
2147 							rq->buf_info));
2148 		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2149 		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2150 		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2151 		rqc->ddLen           = cpu_to_le32(
2152 					sizeof(struct vmxnet3_rx_buf_info) *
2153 					(rqc->rxRingSize[0] +
2154 					 rqc->rxRingSize[1]));
2155 		rqc->intrIdx         = rq->comp_ring.intr_idx;
2156 	}
2157 
2158 #ifdef VMXNET3_RSS
2159 	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2160 
2161 	if (adapter->rss) {
2162 		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2163 		static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2164 			0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2165 			0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2166 			0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2167 			0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2168 			0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2169 		};
2170 
2171 		devRead->misc.uptFeatures |= UPT1_F_RSS;
2172 		devRead->misc.numRxQueues = adapter->num_rx_queues;
2173 		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2174 				    UPT1_RSS_HASH_TYPE_IPV4 |
2175 				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2176 				    UPT1_RSS_HASH_TYPE_IPV6;
2177 		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2178 		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2179 		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2180 		memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2181 
2182 		for (i = 0; i < rssConf->indTableSize; i++)
2183 			rssConf->indTable[i] = ethtool_rxfh_indir_default(
2184 				i, adapter->num_rx_queues);
2185 
2186 		devRead->rssConfDesc.confVer = 1;
2187 		devRead->rssConfDesc.confLen = sizeof(*rssConf);
2188 		devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2189 	}
2190 
2191 #endif /* VMXNET3_RSS */
2192 
2193 	/* intr settings */
2194 	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2195 				     VMXNET3_IMM_AUTO;
2196 	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2197 	for (i = 0; i < adapter->intr.num_intrs; i++)
2198 		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2199 
2200 	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2201 	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2202 
2203 	/* rx filter settings */
2204 	devRead->rxFilterConf.rxMode = 0;
2205 	vmxnet3_restore_vlan(adapter);
2206 	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2207 
2208 	/* the rest are already zeroed */
2209 }
2210 
2211 
2212 int
2213 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2214 {
2215 	int err, i;
2216 	u32 ret;
2217 	unsigned long flags;
2218 
2219 	netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2220 		" ring sizes %u %u %u\n", adapter->netdev->name,
2221 		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2222 		adapter->tx_queue[0].tx_ring.size,
2223 		adapter->rx_queue[0].rx_ring[0].size,
2224 		adapter->rx_queue[0].rx_ring[1].size);
2225 
2226 	vmxnet3_tq_init_all(adapter);
2227 	err = vmxnet3_rq_init_all(adapter);
2228 	if (err) {
2229 		netdev_err(adapter->netdev,
2230 			   "Failed to init rx queue error %d\n", err);
2231 		goto rq_err;
2232 	}
2233 
2234 	err = vmxnet3_request_irqs(adapter);
2235 	if (err) {
2236 		netdev_err(adapter->netdev,
2237 			   "Failed to setup irq for error %d\n", err);
2238 		goto irq_err;
2239 	}
2240 
2241 	vmxnet3_setup_driver_shared(adapter);
2242 
2243 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2244 			       adapter->shared_pa));
2245 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2246 			       adapter->shared_pa));
2247 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2248 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2249 			       VMXNET3_CMD_ACTIVATE_DEV);
2250 	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2251 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2252 
2253 	if (ret != 0) {
2254 		netdev_err(adapter->netdev,
2255 			   "Failed to activate dev: error %u\n", ret);
2256 		err = -EINVAL;
2257 		goto activate_err;
2258 	}
2259 
2260 	for (i = 0; i < adapter->num_rx_queues; i++) {
2261 		VMXNET3_WRITE_BAR0_REG(adapter,
2262 				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2263 				adapter->rx_queue[i].rx_ring[0].next2fill);
2264 		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2265 				(i * VMXNET3_REG_ALIGN)),
2266 				adapter->rx_queue[i].rx_ring[1].next2fill);
2267 	}
2268 
2269 	/* Apply the rx filter settins last. */
2270 	vmxnet3_set_mc(adapter->netdev);
2271 
2272 	/*
2273 	 * Check link state when first activating device. It will start the
2274 	 * tx queue if the link is up.
2275 	 */
2276 	vmxnet3_check_link(adapter, true);
2277 	for (i = 0; i < adapter->num_rx_queues; i++)
2278 		napi_enable(&adapter->rx_queue[i].napi);
2279 	vmxnet3_enable_all_intrs(adapter);
2280 	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2281 	return 0;
2282 
2283 activate_err:
2284 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2285 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2286 	vmxnet3_free_irqs(adapter);
2287 irq_err:
2288 rq_err:
2289 	/* free up buffers we allocated */
2290 	vmxnet3_rq_cleanup_all(adapter);
2291 	return err;
2292 }
2293 
2294 
2295 void
2296 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2297 {
2298 	unsigned long flags;
2299 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2300 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2301 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2302 }
2303 
2304 
2305 int
2306 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2307 {
2308 	int i;
2309 	unsigned long flags;
2310 	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2311 		return 0;
2312 
2313 
2314 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2315 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2316 			       VMXNET3_CMD_QUIESCE_DEV);
2317 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2318 	vmxnet3_disable_all_intrs(adapter);
2319 
2320 	for (i = 0; i < adapter->num_rx_queues; i++)
2321 		napi_disable(&adapter->rx_queue[i].napi);
2322 	netif_tx_disable(adapter->netdev);
2323 	adapter->link_speed = 0;
2324 	netif_carrier_off(adapter->netdev);
2325 
2326 	vmxnet3_tq_cleanup_all(adapter);
2327 	vmxnet3_rq_cleanup_all(adapter);
2328 	vmxnet3_free_irqs(adapter);
2329 	return 0;
2330 }
2331 
2332 
2333 static void
2334 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2335 {
2336 	u32 tmp;
2337 
2338 	tmp = *(u32 *)mac;
2339 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2340 
2341 	tmp = (mac[5] << 8) | mac[4];
2342 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2343 }
2344 
2345 
2346 static int
2347 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2348 {
2349 	struct sockaddr *addr = p;
2350 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2351 
2352 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2353 	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2354 
2355 	return 0;
2356 }
2357 
2358 
2359 /* ==================== initialization and cleanup routines ============ */
2360 
2361 static int
2362 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2363 {
2364 	int err;
2365 	unsigned long mmio_start, mmio_len;
2366 	struct pci_dev *pdev = adapter->pdev;
2367 
2368 	err = pci_enable_device(pdev);
2369 	if (err) {
2370 		dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2371 		return err;
2372 	}
2373 
2374 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2375 		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2376 			dev_err(&pdev->dev,
2377 				"pci_set_consistent_dma_mask failed\n");
2378 			err = -EIO;
2379 			goto err_set_mask;
2380 		}
2381 		*dma64 = true;
2382 	} else {
2383 		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2384 			dev_err(&pdev->dev,
2385 				"pci_set_dma_mask failed\n");
2386 			err = -EIO;
2387 			goto err_set_mask;
2388 		}
2389 		*dma64 = false;
2390 	}
2391 
2392 	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2393 					   vmxnet3_driver_name);
2394 	if (err) {
2395 		dev_err(&pdev->dev,
2396 			"Failed to request region for adapter: error %d\n", err);
2397 		goto err_set_mask;
2398 	}
2399 
2400 	pci_set_master(pdev);
2401 
2402 	mmio_start = pci_resource_start(pdev, 0);
2403 	mmio_len = pci_resource_len(pdev, 0);
2404 	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2405 	if (!adapter->hw_addr0) {
2406 		dev_err(&pdev->dev, "Failed to map bar0\n");
2407 		err = -EIO;
2408 		goto err_ioremap;
2409 	}
2410 
2411 	mmio_start = pci_resource_start(pdev, 1);
2412 	mmio_len = pci_resource_len(pdev, 1);
2413 	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2414 	if (!adapter->hw_addr1) {
2415 		dev_err(&pdev->dev, "Failed to map bar1\n");
2416 		err = -EIO;
2417 		goto err_bar1;
2418 	}
2419 	return 0;
2420 
2421 err_bar1:
2422 	iounmap(adapter->hw_addr0);
2423 err_ioremap:
2424 	pci_release_selected_regions(pdev, (1 << 2) - 1);
2425 err_set_mask:
2426 	pci_disable_device(pdev);
2427 	return err;
2428 }
2429 
2430 
2431 static void
2432 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2433 {
2434 	BUG_ON(!adapter->pdev);
2435 
2436 	iounmap(adapter->hw_addr0);
2437 	iounmap(adapter->hw_addr1);
2438 	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2439 	pci_disable_device(adapter->pdev);
2440 }
2441 
2442 
2443 static void
2444 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2445 {
2446 	size_t sz, i, ring0_size, ring1_size, comp_size;
2447 	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2448 
2449 
2450 	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2451 				    VMXNET3_MAX_ETH_HDR_SIZE) {
2452 		adapter->skb_buf_size = adapter->netdev->mtu +
2453 					VMXNET3_MAX_ETH_HDR_SIZE;
2454 		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2455 			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2456 
2457 		adapter->rx_buf_per_pkt = 1;
2458 	} else {
2459 		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2460 		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2461 					    VMXNET3_MAX_ETH_HDR_SIZE;
2462 		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2463 	}
2464 
2465 	/*
2466 	 * for simplicity, force the ring0 size to be a multiple of
2467 	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2468 	 */
2469 	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2470 	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2471 	ring0_size = (ring0_size + sz - 1) / sz * sz;
2472 	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2473 			   sz * sz);
2474 	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2475 	comp_size = ring0_size + ring1_size;
2476 
2477 	for (i = 0; i < adapter->num_rx_queues; i++) {
2478 		rq = &adapter->rx_queue[i];
2479 		rq->rx_ring[0].size = ring0_size;
2480 		rq->rx_ring[1].size = ring1_size;
2481 		rq->comp_ring.size = comp_size;
2482 	}
2483 }
2484 
2485 
2486 int
2487 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2488 		      u32 rx_ring_size, u32 rx_ring2_size)
2489 {
2490 	int err = 0, i;
2491 
2492 	for (i = 0; i < adapter->num_tx_queues; i++) {
2493 		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2494 		tq->tx_ring.size   = tx_ring_size;
2495 		tq->data_ring.size = tx_ring_size;
2496 		tq->comp_ring.size = tx_ring_size;
2497 		tq->shared = &adapter->tqd_start[i].ctrl;
2498 		tq->stopped = true;
2499 		tq->adapter = adapter;
2500 		tq->qid = i;
2501 		err = vmxnet3_tq_create(tq, adapter);
2502 		/*
2503 		 * Too late to change num_tx_queues. We cannot do away with
2504 		 * lesser number of queues than what we asked for
2505 		 */
2506 		if (err)
2507 			goto queue_err;
2508 	}
2509 
2510 	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2511 	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2512 	vmxnet3_adjust_rx_ring_size(adapter);
2513 	for (i = 0; i < adapter->num_rx_queues; i++) {
2514 		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2515 		/* qid and qid2 for rx queues will be assigned later when num
2516 		 * of rx queues is finalized after allocating intrs */
2517 		rq->shared = &adapter->rqd_start[i].ctrl;
2518 		rq->adapter = adapter;
2519 		err = vmxnet3_rq_create(rq, adapter);
2520 		if (err) {
2521 			if (i == 0) {
2522 				netdev_err(adapter->netdev,
2523 					   "Could not allocate any rx queues. "
2524 					   "Aborting.\n");
2525 				goto queue_err;
2526 			} else {
2527 				netdev_info(adapter->netdev,
2528 					    "Number of rx queues changed "
2529 					    "to : %d.\n", i);
2530 				adapter->num_rx_queues = i;
2531 				err = 0;
2532 				break;
2533 			}
2534 		}
2535 	}
2536 	return err;
2537 queue_err:
2538 	vmxnet3_tq_destroy_all(adapter);
2539 	return err;
2540 }
2541 
2542 static int
2543 vmxnet3_open(struct net_device *netdev)
2544 {
2545 	struct vmxnet3_adapter *adapter;
2546 	int err, i;
2547 
2548 	adapter = netdev_priv(netdev);
2549 
2550 	for (i = 0; i < adapter->num_tx_queues; i++)
2551 		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2552 
2553 	err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2554 				    VMXNET3_DEF_RX_RING_SIZE,
2555 				    VMXNET3_DEF_RX_RING_SIZE);
2556 	if (err)
2557 		goto queue_err;
2558 
2559 	err = vmxnet3_activate_dev(adapter);
2560 	if (err)
2561 		goto activate_err;
2562 
2563 	return 0;
2564 
2565 activate_err:
2566 	vmxnet3_rq_destroy_all(adapter);
2567 	vmxnet3_tq_destroy_all(adapter);
2568 queue_err:
2569 	return err;
2570 }
2571 
2572 
2573 static int
2574 vmxnet3_close(struct net_device *netdev)
2575 {
2576 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2577 
2578 	/*
2579 	 * Reset_work may be in the middle of resetting the device, wait for its
2580 	 * completion.
2581 	 */
2582 	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2583 		msleep(1);
2584 
2585 	vmxnet3_quiesce_dev(adapter);
2586 
2587 	vmxnet3_rq_destroy_all(adapter);
2588 	vmxnet3_tq_destroy_all(adapter);
2589 
2590 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2591 
2592 
2593 	return 0;
2594 }
2595 
2596 
2597 void
2598 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2599 {
2600 	int i;
2601 
2602 	/*
2603 	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2604 	 * vmxnet3_close() will deadlock.
2605 	 */
2606 	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2607 
2608 	/* we need to enable NAPI, otherwise dev_close will deadlock */
2609 	for (i = 0; i < adapter->num_rx_queues; i++)
2610 		napi_enable(&adapter->rx_queue[i].napi);
2611 	dev_close(adapter->netdev);
2612 }
2613 
2614 
2615 static int
2616 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2617 {
2618 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2619 	int err = 0;
2620 
2621 	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2622 		return -EINVAL;
2623 
2624 	netdev->mtu = new_mtu;
2625 
2626 	/*
2627 	 * Reset_work may be in the middle of resetting the device, wait for its
2628 	 * completion.
2629 	 */
2630 	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2631 		msleep(1);
2632 
2633 	if (netif_running(netdev)) {
2634 		vmxnet3_quiesce_dev(adapter);
2635 		vmxnet3_reset_dev(adapter);
2636 
2637 		/* we need to re-create the rx queue based on the new mtu */
2638 		vmxnet3_rq_destroy_all(adapter);
2639 		vmxnet3_adjust_rx_ring_size(adapter);
2640 		err = vmxnet3_rq_create_all(adapter);
2641 		if (err) {
2642 			netdev_err(netdev,
2643 				   "failed to re-create rx queues, "
2644 				   " error %d. Closing it.\n", err);
2645 			goto out;
2646 		}
2647 
2648 		err = vmxnet3_activate_dev(adapter);
2649 		if (err) {
2650 			netdev_err(netdev,
2651 				   "failed to re-activate, error %d. "
2652 				   "Closing it\n", err);
2653 			goto out;
2654 		}
2655 	}
2656 
2657 out:
2658 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2659 	if (err)
2660 		vmxnet3_force_close(adapter);
2661 
2662 	return err;
2663 }
2664 
2665 
2666 static void
2667 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2668 {
2669 	struct net_device *netdev = adapter->netdev;
2670 
2671 	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2672 		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2673 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2674 		NETIF_F_LRO;
2675 	if (dma64)
2676 		netdev->hw_features |= NETIF_F_HIGHDMA;
2677 	netdev->vlan_features = netdev->hw_features &
2678 				~(NETIF_F_HW_VLAN_CTAG_TX |
2679 				  NETIF_F_HW_VLAN_CTAG_RX);
2680 	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2681 }
2682 
2683 
2684 static void
2685 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2686 {
2687 	u32 tmp;
2688 
2689 	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2690 	*(u32 *)mac = tmp;
2691 
2692 	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2693 	mac[4] = tmp & 0xff;
2694 	mac[5] = (tmp >> 8) & 0xff;
2695 }
2696 
2697 #ifdef CONFIG_PCI_MSI
2698 
2699 /*
2700  * Enable MSIx vectors.
2701  * Returns :
2702  *	0 on successful enabling of required vectors,
2703  *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2704  *	 could be enabled.
2705  *	number of vectors which can be enabled otherwise (this number is smaller
2706  *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2707  */
2708 
2709 static int
2710 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2711 			     int vectors)
2712 {
2713 	int err = 0, vector_threshold;
2714 	vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2715 
2716 	while (vectors >= vector_threshold) {
2717 		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2718 				      vectors);
2719 		if (!err) {
2720 			adapter->intr.num_intrs = vectors;
2721 			return 0;
2722 		} else if (err < 0) {
2723 			dev_err(&adapter->netdev->dev,
2724 				   "Failed to enable MSI-X, error: %d\n", err);
2725 			vectors = 0;
2726 		} else if (err < vector_threshold) {
2727 			break;
2728 		} else {
2729 			/* If fails to enable required number of MSI-x vectors
2730 			 * try enabling minimum number of vectors required.
2731 			 */
2732 			dev_err(&adapter->netdev->dev,
2733 				"Failed to enable %d MSI-X, trying %d instead\n",
2734 				    vectors, vector_threshold);
2735 			vectors = vector_threshold;
2736 		}
2737 	}
2738 
2739 	dev_info(&adapter->pdev->dev,
2740 		 "Number of MSI-X interrupts which can be allocated "
2741 		 "is lower than min threshold required.\n");
2742 	return err;
2743 }
2744 
2745 
2746 #endif /* CONFIG_PCI_MSI */
2747 
2748 static void
2749 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2750 {
2751 	u32 cfg;
2752 	unsigned long flags;
2753 
2754 	/* intr settings */
2755 	spin_lock_irqsave(&adapter->cmd_lock, flags);
2756 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2757 			       VMXNET3_CMD_GET_CONF_INTR);
2758 	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2759 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2760 	adapter->intr.type = cfg & 0x3;
2761 	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2762 
2763 	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2764 		adapter->intr.type = VMXNET3_IT_MSIX;
2765 	}
2766 
2767 #ifdef CONFIG_PCI_MSI
2768 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2769 		int vector, err = 0;
2770 
2771 		adapter->intr.num_intrs = (adapter->share_intr ==
2772 					   VMXNET3_INTR_TXSHARE) ? 1 :
2773 					   adapter->num_tx_queues;
2774 		adapter->intr.num_intrs += (adapter->share_intr ==
2775 					   VMXNET3_INTR_BUDDYSHARE) ? 0 :
2776 					   adapter->num_rx_queues;
2777 		adapter->intr.num_intrs += 1;		/* for link event */
2778 
2779 		adapter->intr.num_intrs = (adapter->intr.num_intrs >
2780 					   VMXNET3_LINUX_MIN_MSIX_VECT
2781 					   ? adapter->intr.num_intrs :
2782 					   VMXNET3_LINUX_MIN_MSIX_VECT);
2783 
2784 		for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2785 			adapter->intr.msix_entries[vector].entry = vector;
2786 
2787 		err = vmxnet3_acquire_msix_vectors(adapter,
2788 						   adapter->intr.num_intrs);
2789 		/* If we cannot allocate one MSIx vector per queue
2790 		 * then limit the number of rx queues to 1
2791 		 */
2792 		if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2793 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2794 			    || adapter->num_rx_queues != 1) {
2795 				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2796 				netdev_err(adapter->netdev,
2797 					   "Number of rx queues : 1\n");
2798 				adapter->num_rx_queues = 1;
2799 				adapter->intr.num_intrs =
2800 						VMXNET3_LINUX_MIN_MSIX_VECT;
2801 			}
2802 			return;
2803 		}
2804 		if (!err)
2805 			return;
2806 
2807 		/* If we cannot allocate MSIx vectors use only one rx queue */
2808 		dev_info(&adapter->pdev->dev,
2809 			 "Failed to enable MSI-X, error %d. "
2810 			 "Limiting #rx queues to 1, try MSI.\n", err);
2811 
2812 		adapter->intr.type = VMXNET3_IT_MSI;
2813 	}
2814 
2815 	if (adapter->intr.type == VMXNET3_IT_MSI) {
2816 		int err;
2817 		err = pci_enable_msi(adapter->pdev);
2818 		if (!err) {
2819 			adapter->num_rx_queues = 1;
2820 			adapter->intr.num_intrs = 1;
2821 			return;
2822 		}
2823 	}
2824 #endif /* CONFIG_PCI_MSI */
2825 
2826 	adapter->num_rx_queues = 1;
2827 	dev_info(&adapter->netdev->dev,
2828 		 "Using INTx interrupt, #Rx queues: 1.\n");
2829 	adapter->intr.type = VMXNET3_IT_INTX;
2830 
2831 	/* INT-X related setting */
2832 	adapter->intr.num_intrs = 1;
2833 }
2834 
2835 
2836 static void
2837 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2838 {
2839 	if (adapter->intr.type == VMXNET3_IT_MSIX)
2840 		pci_disable_msix(adapter->pdev);
2841 	else if (adapter->intr.type == VMXNET3_IT_MSI)
2842 		pci_disable_msi(adapter->pdev);
2843 	else
2844 		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2845 }
2846 
2847 
2848 static void
2849 vmxnet3_tx_timeout(struct net_device *netdev)
2850 {
2851 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2852 	adapter->tx_timeout_count++;
2853 
2854 	netdev_err(adapter->netdev, "tx hang\n");
2855 	schedule_work(&adapter->work);
2856 	netif_wake_queue(adapter->netdev);
2857 }
2858 
2859 
2860 static void
2861 vmxnet3_reset_work(struct work_struct *data)
2862 {
2863 	struct vmxnet3_adapter *adapter;
2864 
2865 	adapter = container_of(data, struct vmxnet3_adapter, work);
2866 
2867 	/* if another thread is resetting the device, no need to proceed */
2868 	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2869 		return;
2870 
2871 	/* if the device is closed, we must leave it alone */
2872 	rtnl_lock();
2873 	if (netif_running(adapter->netdev)) {
2874 		netdev_notice(adapter->netdev, "resetting\n");
2875 		vmxnet3_quiesce_dev(adapter);
2876 		vmxnet3_reset_dev(adapter);
2877 		vmxnet3_activate_dev(adapter);
2878 	} else {
2879 		netdev_info(adapter->netdev, "already closed\n");
2880 	}
2881 	rtnl_unlock();
2882 
2883 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2884 }
2885 
2886 
2887 static int
2888 vmxnet3_probe_device(struct pci_dev *pdev,
2889 		     const struct pci_device_id *id)
2890 {
2891 	static const struct net_device_ops vmxnet3_netdev_ops = {
2892 		.ndo_open = vmxnet3_open,
2893 		.ndo_stop = vmxnet3_close,
2894 		.ndo_start_xmit = vmxnet3_xmit_frame,
2895 		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2896 		.ndo_change_mtu = vmxnet3_change_mtu,
2897 		.ndo_set_features = vmxnet3_set_features,
2898 		.ndo_get_stats64 = vmxnet3_get_stats64,
2899 		.ndo_tx_timeout = vmxnet3_tx_timeout,
2900 		.ndo_set_rx_mode = vmxnet3_set_mc,
2901 		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2902 		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2903 #ifdef CONFIG_NET_POLL_CONTROLLER
2904 		.ndo_poll_controller = vmxnet3_netpoll,
2905 #endif
2906 	};
2907 	int err;
2908 	bool dma64 = false; /* stupid gcc */
2909 	u32 ver;
2910 	struct net_device *netdev;
2911 	struct vmxnet3_adapter *adapter;
2912 	u8 mac[ETH_ALEN];
2913 	int size;
2914 	int num_tx_queues;
2915 	int num_rx_queues;
2916 
2917 	if (!pci_msi_enabled())
2918 		enable_mq = 0;
2919 
2920 #ifdef VMXNET3_RSS
2921 	if (enable_mq)
2922 		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2923 				    (int)num_online_cpus());
2924 	else
2925 #endif
2926 		num_rx_queues = 1;
2927 	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2928 
2929 	if (enable_mq)
2930 		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2931 				    (int)num_online_cpus());
2932 	else
2933 		num_tx_queues = 1;
2934 
2935 	num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2936 	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2937 				   max(num_tx_queues, num_rx_queues));
2938 	dev_info(&pdev->dev,
2939 		 "# of Tx queues : %d, # of Rx queues : %d\n",
2940 		 num_tx_queues, num_rx_queues);
2941 
2942 	if (!netdev)
2943 		return -ENOMEM;
2944 
2945 	pci_set_drvdata(pdev, netdev);
2946 	adapter = netdev_priv(netdev);
2947 	adapter->netdev = netdev;
2948 	adapter->pdev = pdev;
2949 
2950 	spin_lock_init(&adapter->cmd_lock);
2951 	adapter->shared = pci_alloc_consistent(adapter->pdev,
2952 					       sizeof(struct Vmxnet3_DriverShared),
2953 					       &adapter->shared_pa);
2954 	if (!adapter->shared) {
2955 		dev_err(&pdev->dev, "Failed to allocate memory\n");
2956 		err = -ENOMEM;
2957 		goto err_alloc_shared;
2958 	}
2959 
2960 	adapter->num_rx_queues = num_rx_queues;
2961 	adapter->num_tx_queues = num_tx_queues;
2962 	adapter->rx_buf_per_pkt = 1;
2963 
2964 	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2965 	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2966 	adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2967 						  &adapter->queue_desc_pa);
2968 
2969 	if (!adapter->tqd_start) {
2970 		dev_err(&pdev->dev, "Failed to allocate memory\n");
2971 		err = -ENOMEM;
2972 		goto err_alloc_queue_desc;
2973 	}
2974 	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2975 							    adapter->num_tx_queues);
2976 
2977 	adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2978 	if (adapter->pm_conf == NULL) {
2979 		err = -ENOMEM;
2980 		goto err_alloc_pm;
2981 	}
2982 
2983 #ifdef VMXNET3_RSS
2984 
2985 	adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2986 	if (adapter->rss_conf == NULL) {
2987 		err = -ENOMEM;
2988 		goto err_alloc_rss;
2989 	}
2990 #endif /* VMXNET3_RSS */
2991 
2992 	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
2993 	if (err < 0)
2994 		goto err_alloc_pci;
2995 
2996 	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
2997 	if (ver & 1) {
2998 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
2999 	} else {
3000 		dev_err(&pdev->dev,
3001 			"Incompatible h/w version (0x%x) for adapter\n", ver);
3002 		err = -EBUSY;
3003 		goto err_ver;
3004 	}
3005 
3006 	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3007 	if (ver & 1) {
3008 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3009 	} else {
3010 		dev_err(&pdev->dev,
3011 			"Incompatible upt version (0x%x) for adapter\n", ver);
3012 		err = -EBUSY;
3013 		goto err_ver;
3014 	}
3015 
3016 	SET_NETDEV_DEV(netdev, &pdev->dev);
3017 	vmxnet3_declare_features(adapter, dma64);
3018 
3019 	if (adapter->num_tx_queues == adapter->num_rx_queues)
3020 		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3021 	else
3022 		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3023 
3024 	vmxnet3_alloc_intr_resources(adapter);
3025 
3026 #ifdef VMXNET3_RSS
3027 	if (adapter->num_rx_queues > 1 &&
3028 	    adapter->intr.type == VMXNET3_IT_MSIX) {
3029 		adapter->rss = true;
3030 		netdev->hw_features |= NETIF_F_RXHASH;
3031 		netdev->features |= NETIF_F_RXHASH;
3032 		dev_dbg(&pdev->dev, "RSS is enabled.\n");
3033 	} else {
3034 		adapter->rss = false;
3035 	}
3036 #endif
3037 
3038 	vmxnet3_read_mac_addr(adapter, mac);
3039 	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3040 
3041 	netdev->netdev_ops = &vmxnet3_netdev_ops;
3042 	vmxnet3_set_ethtool_ops(netdev);
3043 	netdev->watchdog_timeo = 5 * HZ;
3044 
3045 	INIT_WORK(&adapter->work, vmxnet3_reset_work);
3046 	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3047 
3048 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
3049 		int i;
3050 		for (i = 0; i < adapter->num_rx_queues; i++) {
3051 			netif_napi_add(adapter->netdev,
3052 				       &adapter->rx_queue[i].napi,
3053 				       vmxnet3_poll_rx_only, 64);
3054 		}
3055 	} else {
3056 		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3057 			       vmxnet3_poll, 64);
3058 	}
3059 
3060 	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3061 	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3062 
3063 	netif_carrier_off(netdev);
3064 	err = register_netdev(netdev);
3065 
3066 	if (err) {
3067 		dev_err(&pdev->dev, "Failed to register adapter\n");
3068 		goto err_register;
3069 	}
3070 
3071 	vmxnet3_check_link(adapter, false);
3072 	return 0;
3073 
3074 err_register:
3075 	vmxnet3_free_intr_resources(adapter);
3076 err_ver:
3077 	vmxnet3_free_pci_resources(adapter);
3078 err_alloc_pci:
3079 #ifdef VMXNET3_RSS
3080 	kfree(adapter->rss_conf);
3081 err_alloc_rss:
3082 #endif
3083 	kfree(adapter->pm_conf);
3084 err_alloc_pm:
3085 	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3086 			    adapter->queue_desc_pa);
3087 err_alloc_queue_desc:
3088 	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3089 			    adapter->shared, adapter->shared_pa);
3090 err_alloc_shared:
3091 	pci_set_drvdata(pdev, NULL);
3092 	free_netdev(netdev);
3093 	return err;
3094 }
3095 
3096 
3097 static void
3098 vmxnet3_remove_device(struct pci_dev *pdev)
3099 {
3100 	struct net_device *netdev = pci_get_drvdata(pdev);
3101 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3102 	int size = 0;
3103 	int num_rx_queues;
3104 
3105 #ifdef VMXNET3_RSS
3106 	if (enable_mq)
3107 		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3108 				    (int)num_online_cpus());
3109 	else
3110 #endif
3111 		num_rx_queues = 1;
3112 	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3113 
3114 	cancel_work_sync(&adapter->work);
3115 
3116 	unregister_netdev(netdev);
3117 
3118 	vmxnet3_free_intr_resources(adapter);
3119 	vmxnet3_free_pci_resources(adapter);
3120 #ifdef VMXNET3_RSS
3121 	kfree(adapter->rss_conf);
3122 #endif
3123 	kfree(adapter->pm_conf);
3124 
3125 	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3126 	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3127 	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3128 			    adapter->queue_desc_pa);
3129 	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3130 			    adapter->shared, adapter->shared_pa);
3131 	free_netdev(netdev);
3132 }
3133 
3134 
3135 #ifdef CONFIG_PM
3136 
3137 static int
3138 vmxnet3_suspend(struct device *device)
3139 {
3140 	struct pci_dev *pdev = to_pci_dev(device);
3141 	struct net_device *netdev = pci_get_drvdata(pdev);
3142 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3143 	struct Vmxnet3_PMConf *pmConf;
3144 	struct ethhdr *ehdr;
3145 	struct arphdr *ahdr;
3146 	u8 *arpreq;
3147 	struct in_device *in_dev;
3148 	struct in_ifaddr *ifa;
3149 	unsigned long flags;
3150 	int i = 0;
3151 
3152 	if (!netif_running(netdev))
3153 		return 0;
3154 
3155 	for (i = 0; i < adapter->num_rx_queues; i++)
3156 		napi_disable(&adapter->rx_queue[i].napi);
3157 
3158 	vmxnet3_disable_all_intrs(adapter);
3159 	vmxnet3_free_irqs(adapter);
3160 	vmxnet3_free_intr_resources(adapter);
3161 
3162 	netif_device_detach(netdev);
3163 	netif_tx_stop_all_queues(netdev);
3164 
3165 	/* Create wake-up filters. */
3166 	pmConf = adapter->pm_conf;
3167 	memset(pmConf, 0, sizeof(*pmConf));
3168 
3169 	if (adapter->wol & WAKE_UCAST) {
3170 		pmConf->filters[i].patternSize = ETH_ALEN;
3171 		pmConf->filters[i].maskSize = 1;
3172 		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3173 		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3174 
3175 		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3176 		i++;
3177 	}
3178 
3179 	if (adapter->wol & WAKE_ARP) {
3180 		in_dev = in_dev_get(netdev);
3181 		if (!in_dev)
3182 			goto skip_arp;
3183 
3184 		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3185 		if (!ifa)
3186 			goto skip_arp;
3187 
3188 		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3189 			sizeof(struct arphdr) +		/* ARP header */
3190 			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3191 			2 * sizeof(u32);	/*2 IPv4 addresses */
3192 		pmConf->filters[i].maskSize =
3193 			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3194 
3195 		/* ETH_P_ARP in Ethernet header. */
3196 		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3197 		ehdr->h_proto = htons(ETH_P_ARP);
3198 
3199 		/* ARPOP_REQUEST in ARP header. */
3200 		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3201 		ahdr->ar_op = htons(ARPOP_REQUEST);
3202 		arpreq = (u8 *)(ahdr + 1);
3203 
3204 		/* The Unicast IPv4 address in 'tip' field. */
3205 		arpreq += 2 * ETH_ALEN + sizeof(u32);
3206 		*(u32 *)arpreq = ifa->ifa_address;
3207 
3208 		/* The mask for the relevant bits. */
3209 		pmConf->filters[i].mask[0] = 0x00;
3210 		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3211 		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3212 		pmConf->filters[i].mask[3] = 0x00;
3213 		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3214 		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3215 		in_dev_put(in_dev);
3216 
3217 		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3218 		i++;
3219 	}
3220 
3221 skip_arp:
3222 	if (adapter->wol & WAKE_MAGIC)
3223 		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3224 
3225 	pmConf->numFilters = i;
3226 
3227 	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3228 	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3229 								  *pmConf));
3230 	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3231 								 pmConf));
3232 
3233 	spin_lock_irqsave(&adapter->cmd_lock, flags);
3234 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3235 			       VMXNET3_CMD_UPDATE_PMCFG);
3236 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3237 
3238 	pci_save_state(pdev);
3239 	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3240 			adapter->wol);
3241 	pci_disable_device(pdev);
3242 	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3243 
3244 	return 0;
3245 }
3246 
3247 
3248 static int
3249 vmxnet3_resume(struct device *device)
3250 {
3251 	int err, i = 0;
3252 	unsigned long flags;
3253 	struct pci_dev *pdev = to_pci_dev(device);
3254 	struct net_device *netdev = pci_get_drvdata(pdev);
3255 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3256 	struct Vmxnet3_PMConf *pmConf;
3257 
3258 	if (!netif_running(netdev))
3259 		return 0;
3260 
3261 	/* Destroy wake-up filters. */
3262 	pmConf = adapter->pm_conf;
3263 	memset(pmConf, 0, sizeof(*pmConf));
3264 
3265 	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3266 	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3267 								  *pmConf));
3268 	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3269 								 pmConf));
3270 
3271 	netif_device_attach(netdev);
3272 	pci_set_power_state(pdev, PCI_D0);
3273 	pci_restore_state(pdev);
3274 	err = pci_enable_device_mem(pdev);
3275 	if (err != 0)
3276 		return err;
3277 
3278 	pci_enable_wake(pdev, PCI_D0, 0);
3279 
3280 	spin_lock_irqsave(&adapter->cmd_lock, flags);
3281 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3282 			       VMXNET3_CMD_UPDATE_PMCFG);
3283 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3284 	vmxnet3_alloc_intr_resources(adapter);
3285 	vmxnet3_request_irqs(adapter);
3286 	for (i = 0; i < adapter->num_rx_queues; i++)
3287 		napi_enable(&adapter->rx_queue[i].napi);
3288 	vmxnet3_enable_all_intrs(adapter);
3289 
3290 	return 0;
3291 }
3292 
3293 static const struct dev_pm_ops vmxnet3_pm_ops = {
3294 	.suspend = vmxnet3_suspend,
3295 	.resume = vmxnet3_resume,
3296 };
3297 #endif
3298 
3299 static struct pci_driver vmxnet3_driver = {
3300 	.name		= vmxnet3_driver_name,
3301 	.id_table	= vmxnet3_pciid_table,
3302 	.probe		= vmxnet3_probe_device,
3303 	.remove		= vmxnet3_remove_device,
3304 #ifdef CONFIG_PM
3305 	.driver.pm	= &vmxnet3_pm_ops,
3306 #endif
3307 };
3308 
3309 
3310 static int __init
3311 vmxnet3_init_module(void)
3312 {
3313 	pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3314 		VMXNET3_DRIVER_VERSION_REPORT);
3315 	return pci_register_driver(&vmxnet3_driver);
3316 }
3317 
3318 module_init(vmxnet3_init_module);
3319 
3320 
3321 static void
3322 vmxnet3_exit_module(void)
3323 {
3324 	pci_unregister_driver(&vmxnet3_driver);
3325 }
3326 
3327 module_exit(vmxnet3_exit_module);
3328 
3329 MODULE_AUTHOR("VMware, Inc.");
3330 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3331 MODULE_LICENSE("GPL v2");
3332 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3333