11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
32a1d9b7fSRoland Dreier  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
42a1d9b7fSRoland Dreier  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
52a1d9b7fSRoland Dreier  * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * This software is available to you under a choice of one of two
81da177e4SLinus Torvalds  * licenses.  You may choose to be licensed under the terms of the GNU
91da177e4SLinus Torvalds  * General Public License (GPL) Version 2, available from the file
101da177e4SLinus Torvalds  * COPYING in the main directory of this source tree, or the
111da177e4SLinus Torvalds  * OpenIB.org BSD license below:
121da177e4SLinus Torvalds  *
131da177e4SLinus Torvalds  *     Redistribution and use in source and binary forms, with or
141da177e4SLinus Torvalds  *     without modification, are permitted provided that the following
151da177e4SLinus Torvalds  *     conditions are met:
161da177e4SLinus Torvalds  *
171da177e4SLinus Torvalds  *      - Redistributions of source code must retain the above
181da177e4SLinus Torvalds  *        copyright notice, this list of conditions and the following
191da177e4SLinus Torvalds  *        disclaimer.
201da177e4SLinus Torvalds  *
211da177e4SLinus Torvalds  *      - Redistributions in binary form must reproduce the above
221da177e4SLinus Torvalds  *        copyright notice, this list of conditions and the following
231da177e4SLinus Torvalds  *        disclaimer in the documentation and/or other materials
241da177e4SLinus Torvalds  *        provided with the distribution.
251da177e4SLinus Torvalds  *
261da177e4SLinus Torvalds  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
271da177e4SLinus Torvalds  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
281da177e4SLinus Torvalds  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
291da177e4SLinus Torvalds  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
301da177e4SLinus Torvalds  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
311da177e4SLinus Torvalds  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
321da177e4SLinus Torvalds  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
331da177e4SLinus Torvalds  * SOFTWARE.
341da177e4SLinus Torvalds  */
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds #include <linux/delay.h>
37fec14d2fSPaul Gortmaker #include <linux/moduleparam.h>
381da177e4SLinus Torvalds #include <linux/dma-mapping.h>
395a0e3ad6STejun Heo #include <linux/slab.h>
401da177e4SLinus Torvalds 
4140ca1988SEli Cohen #include <linux/ip.h>
4240ca1988SEli Cohen #include <linux/tcp.h>
431dfce294SParav Pandit #include <rdma/ib_cache.h>
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds #include "ipoib.h"
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
481da177e4SLinus Torvalds static int data_debug_level;
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds module_param(data_debug_level, int, 0644);
511da177e4SLinus Torvalds MODULE_PARM_DESC(data_debug_level,
521da177e4SLinus Torvalds 		 "Enable data path debug tracing if > 0");
531da177e4SLinus Torvalds #endif
541da177e4SLinus Torvalds 
ipoib_create_ah(struct net_device * dev,struct ib_pd * pd,struct rdma_ah_attr * attr)551da177e4SLinus Torvalds struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
5690898850SDasaratharaman Chandramouli 				 struct ib_pd *pd, struct rdma_ah_attr *attr)
571da177e4SLinus Torvalds {
581da177e4SLinus Torvalds 	struct ipoib_ah *ah;
593874397cSMike Marciniszyn 	struct ib_ah *vah;
601da177e4SLinus Torvalds 
61b1b63970SKamal Heib 	ah = kmalloc(sizeof(*ah), GFP_KERNEL);
621da177e4SLinus Torvalds 	if (!ah)
633874397cSMike Marciniszyn 		return ERR_PTR(-ENOMEM);
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds 	ah->dev       = dev;
661da177e4SLinus Torvalds 	ah->last_send = 0;
671da177e4SLinus Torvalds 	kref_init(&ah->ref);
681da177e4SLinus Torvalds 
69b090c4e3SGal Pressman 	vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE);
703874397cSMike Marciniszyn 	if (IS_ERR(vah)) {
711da177e4SLinus Torvalds 		kfree(ah);
723874397cSMike Marciniszyn 		ah = (struct ipoib_ah *)vah;
733874397cSMike Marciniszyn 	} else {
743874397cSMike Marciniszyn 		ah->ah = vah;
75c1048affSErez Shitrit 		ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
763874397cSMike Marciniszyn 	}
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds 	return ah;
791da177e4SLinus Torvalds }
801da177e4SLinus Torvalds 
ipoib_free_ah(struct kref * kref)811da177e4SLinus Torvalds void ipoib_free_ah(struct kref *kref)
821da177e4SLinus Torvalds {
831da177e4SLinus Torvalds 	struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
84c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds 	unsigned long flags;
871da177e4SLinus Torvalds 
881da177e4SLinus Torvalds 	spin_lock_irqsave(&priv->lock, flags);
891da177e4SLinus Torvalds 	list_add_tail(&ah->list, &priv->dead_ahs);
901da177e4SLinus Torvalds 	spin_unlock_irqrestore(&priv->lock, flags);
911da177e4SLinus Torvalds }
921da177e4SLinus Torvalds 
ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv * priv,u64 mapping[IPOIB_UD_RX_SG])93bc7b3a36SShirley Ma static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
94bc7b3a36SShirley Ma 				  u64 mapping[IPOIB_UD_RX_SG])
95bc7b3a36SShirley Ma {
96bc7b3a36SShirley Ma 	ib_dma_unmap_single(priv->ca, mapping[0],
97bc7b3a36SShirley Ma 			    IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
98bc7b3a36SShirley Ma 			    DMA_FROM_DEVICE);
99bc7b3a36SShirley Ma }
100bc7b3a36SShirley Ma 
ipoib_ib_post_receive(struct net_device * dev,int id)1011da177e4SLinus Torvalds static int ipoib_ib_post_receive(struct net_device *dev, int id)
1021da177e4SLinus Torvalds {
103c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1041da177e4SLinus Torvalds 	int ret;
1051da177e4SLinus Torvalds 
106bc7b3a36SShirley Ma 	priv->rx_wr.wr_id   = id | IPOIB_OP_RECV;
107bc7b3a36SShirley Ma 	priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
108bc7b3a36SShirley Ma 	priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
1091da177e4SLinus Torvalds 
1101da177e4SLinus Torvalds 
1114b4671a0SBart Van Assche 	ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL);
1121993d683SRoland Dreier 	if (unlikely(ret)) {
1131993d683SRoland Dreier 		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
114bc7b3a36SShirley Ma 		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
1151993d683SRoland Dreier 		dev_kfree_skb_any(priv->rx_ring[id].skb);
1161da177e4SLinus Torvalds 		priv->rx_ring[id].skb = NULL;
1171da177e4SLinus Torvalds 	}
1181da177e4SLinus Torvalds 
1191da177e4SLinus Torvalds 	return ret;
1201da177e4SLinus Torvalds }
1211da177e4SLinus Torvalds 
ipoib_alloc_rx_skb(struct net_device * dev,int id)122bc7b3a36SShirley Ma static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
1231993d683SRoland Dreier {
124c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1251993d683SRoland Dreier 	struct sk_buff *skb;
126bc7b3a36SShirley Ma 	int buf_size;
127bc7b3a36SShirley Ma 	u64 *mapping;
1281993d683SRoland Dreier 
129bc7b3a36SShirley Ma 	buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
130bc7b3a36SShirley Ma 
131fc791b63SPaolo Abeni 	skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN);
132bc7b3a36SShirley Ma 	if (unlikely(!skb))
133bc7b3a36SShirley Ma 		return NULL;
1341993d683SRoland Dreier 
1351993d683SRoland Dreier 	/*
136fc791b63SPaolo Abeni 	 * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is
137fc791b63SPaolo Abeni 	 * 64 bytes aligned
1381993d683SRoland Dreier 	 */
139fc791b63SPaolo Abeni 	skb_reserve(skb, sizeof(struct ipoib_pseudo_header));
1401993d683SRoland Dreier 
141bc7b3a36SShirley Ma 	mapping = priv->rx_ring[id].mapping;
142bc7b3a36SShirley Ma 	mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
1431993d683SRoland Dreier 				       DMA_FROM_DEVICE);
144bc7b3a36SShirley Ma 	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
145bc7b3a36SShirley Ma 		goto error;
146bc7b3a36SShirley Ma 
1471993d683SRoland Dreier 	priv->rx_ring[id].skb = skb;
148bc7b3a36SShirley Ma 	return skb;
149bc7b3a36SShirley Ma error:
150bc7b3a36SShirley Ma 	dev_kfree_skb_any(skb);
151bc7b3a36SShirley Ma 	return NULL;
1521993d683SRoland Dreier }
1531993d683SRoland Dreier 
ipoib_ib_post_receives(struct net_device * dev)1541da177e4SLinus Torvalds static int ipoib_ib_post_receives(struct net_device *dev)
1551da177e4SLinus Torvalds {
156c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1571da177e4SLinus Torvalds 	int i;
1581da177e4SLinus Torvalds 
1590f485251SShirley Ma 	for (i = 0; i < ipoib_recvq_size; ++i) {
160bc7b3a36SShirley Ma 		if (!ipoib_alloc_rx_skb(dev, i)) {
1611993d683SRoland Dreier 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
1621993d683SRoland Dreier 			return -ENOMEM;
1631993d683SRoland Dreier 		}
1641da177e4SLinus Torvalds 		if (ipoib_ib_post_receive(dev, i)) {
1651da177e4SLinus Torvalds 			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
1661da177e4SLinus Torvalds 			return -EIO;
1671da177e4SLinus Torvalds 		}
1681da177e4SLinus Torvalds 	}
1691da177e4SLinus Torvalds 
1701da177e4SLinus Torvalds 	return 0;
1711da177e4SLinus Torvalds }
1721da177e4SLinus Torvalds 
ipoib_ib_handle_rx_wc(struct net_device * dev,struct ib_wc * wc)1732439a6e6SRoland Dreier static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
1741da177e4SLinus Torvalds {
175c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1762439a6e6SRoland Dreier 	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
1772439a6e6SRoland Dreier 	struct sk_buff *skb;
178bc7b3a36SShirley Ma 	u64 mapping[IPOIB_UD_RX_SG];
179fed1db33SChristoph Lameter 	union ib_gid *dgid;
18068996a6eSEli Cohen 	union ib_gid *sgid;
1811da177e4SLinus Torvalds 
182a89875fcSRoland Dreier 	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
183a89875fcSRoland Dreier 		       wr_id, wc->status);
1841da177e4SLinus Torvalds 
1852439a6e6SRoland Dreier 	if (unlikely(wr_id >= ipoib_recvq_size)) {
1862439a6e6SRoland Dreier 		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
1872439a6e6SRoland Dreier 			   wr_id, ipoib_recvq_size);
1882439a6e6SRoland Dreier 		return;
1892439a6e6SRoland Dreier 	}
1901da177e4SLinus Torvalds 
1912439a6e6SRoland Dreier 	skb  = priv->rx_ring[wr_id].skb;
1921da177e4SLinus Torvalds 
1931993d683SRoland Dreier 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1941da177e4SLinus Torvalds 		if (wc->status != IB_WC_WR_FLUSH_ERR)
195b04dc199SAjaykumar Hotchandani 			ipoib_warn(priv,
196b04dc199SAjaykumar Hotchandani 				   "failed recv event (status=%d, wrid=%d vend_err %#x)\n",
1971da177e4SLinus Torvalds 				   wc->status, wr_id, wc->vendor_err);
198bc7b3a36SShirley Ma 		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
1991da177e4SLinus Torvalds 		dev_kfree_skb_any(skb);
2001993d683SRoland Dreier 		priv->rx_ring[wr_id].skb = NULL;
2011da177e4SLinus Torvalds 		return;
2021da177e4SLinus Torvalds 	}
2031da177e4SLinus Torvalds 
204bc7b3a36SShirley Ma 	memcpy(mapping, priv->rx_ring[wr_id].mapping,
205b1b63970SKamal Heib 	       IPOIB_UD_RX_SG * sizeof(*mapping));
206bc7b3a36SShirley Ma 
2071b844afeSRoland Dreier 	/*
2081993d683SRoland Dreier 	 * If we can't allocate a new RX buffer, dump
2091993d683SRoland Dreier 	 * this packet and reuse the old buffer.
2101993d683SRoland Dreier 	 */
211bc7b3a36SShirley Ma 	if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id))) {
212de903512SRoland Dreier 		++dev->stats.rx_dropped;
2131993d683SRoland Dreier 		goto repost;
2141993d683SRoland Dreier 	}
2151993d683SRoland Dreier 
2161da177e4SLinus Torvalds 	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
2171da177e4SLinus Torvalds 		       wc->byte_len, wc->slid);
2181da177e4SLinus Torvalds 
219bc7b3a36SShirley Ma 	ipoib_ud_dma_unmap_rx(priv, mapping);
220a44878d1SErez Shitrit 
221a44878d1SErez Shitrit 	skb_put(skb, wc->byte_len);
2221993d683SRoland Dreier 
223fed1db33SChristoph Lameter 	/* First byte of dgid signals multicast when 0xff */
224fed1db33SChristoph Lameter 	dgid = &((struct ib_grh *)skb->data)->dgid;
225fed1db33SChristoph Lameter 
226fed1db33SChristoph Lameter 	if (!(wc->wc_flags & IB_WC_GRH) || dgid->raw[0] != 0xff)
227fed1db33SChristoph Lameter 		skb->pkt_type = PACKET_HOST;
228fed1db33SChristoph Lameter 	else if (memcmp(dgid, dev->broadcast + 4, sizeof(union ib_gid)) == 0)
229fed1db33SChristoph Lameter 		skb->pkt_type = PACKET_BROADCAST;
230fed1db33SChristoph Lameter 	else
231fed1db33SChristoph Lameter 		skb->pkt_type = PACKET_MULTICAST;
232fed1db33SChristoph Lameter 
23368996a6eSEli Cohen 	sgid = &((struct ib_grh *)skb->data)->sgid;
23468996a6eSEli Cohen 
23568996a6eSEli Cohen 	/*
23668996a6eSEli Cohen 	 * Drop packets that this interface sent, ie multicast packets
23768996a6eSEli Cohen 	 * that the HCA has replicated.
23868996a6eSEli Cohen 	 */
23968996a6eSEli Cohen 	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) {
24068996a6eSEli Cohen 		int need_repost = 1;
24168996a6eSEli Cohen 
24268996a6eSEli Cohen 		if ((wc->wc_flags & IB_WC_GRH) &&
24368996a6eSEli Cohen 		    sgid->global.interface_id != priv->local_gid.global.interface_id)
24468996a6eSEli Cohen 			need_repost = 0;
24568996a6eSEli Cohen 
24668996a6eSEli Cohen 		if (need_repost) {
24768996a6eSEli Cohen 			dev_kfree_skb_any(skb);
24868996a6eSEli Cohen 			goto repost;
24968996a6eSEli Cohen 		}
25068996a6eSEli Cohen 	}
25168996a6eSEli Cohen 
2521da177e4SLinus Torvalds 	skb_pull(skb, IB_GRH_BYTES);
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
255fc791b63SPaolo Abeni 	skb_add_pseudo_hdr(skb);
2561da177e4SLinus Torvalds 
257de903512SRoland Dreier 	++dev->stats.rx_packets;
258de903512SRoland Dreier 	dev->stats.rx_bytes += skb->len;
2594829d964SAlex Vesker 	if (skb->pkt_type == PACKET_MULTICAST)
2604829d964SAlex Vesker 		dev->stats.multicast++;
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds 	skb->dev = dev;
263d927d505SOr Gerlitz 	if ((dev->features & NETIF_F_RXCSUM) &&
264d927d505SOr Gerlitz 			likely(wc->wc_flags & IB_WC_IP_CSUM_OK))
2656046136cSEli Cohen 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2666046136cSEli Cohen 
2678966e28dSErez Shitrit 	napi_gro_receive(&priv->recv_napi, skb);
2681da177e4SLinus Torvalds 
2691993d683SRoland Dreier repost:
2701993d683SRoland Dreier 	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
2711da177e4SLinus Torvalds 		ipoib_warn(priv, "ipoib_ib_post_receive failed "
2721da177e4SLinus Torvalds 			   "for buf %d\n", wr_id);
2732439a6e6SRoland Dreier }
2741da177e4SLinus Torvalds 
ipoib_dma_map_tx(struct ib_device * ca,struct ipoib_tx_buf * tx_req)275c4268778SYuval Shaia int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
2767143740dSEli Cohen {
2777143740dSEli Cohen 	struct sk_buff *skb = tx_req->skb;
2787143740dSEli Cohen 	u64 *mapping = tx_req->mapping;
2797143740dSEli Cohen 	int i;
28040ca1988SEli Cohen 	int off;
2817143740dSEli Cohen 
28240ca1988SEli Cohen 	if (skb_headlen(skb)) {
2837143740dSEli Cohen 		mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
2847143740dSEli Cohen 					       DMA_TO_DEVICE);
2857143740dSEli Cohen 		if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
2867143740dSEli Cohen 			return -EIO;
2877143740dSEli Cohen 
28840ca1988SEli Cohen 		off = 1;
28940ca1988SEli Cohen 	} else
29040ca1988SEli Cohen 		off = 0;
29140ca1988SEli Cohen 
2927143740dSEli Cohen 	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
2939e903e08SEric Dumazet 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2945581be3bSIan Campbell 		mapping[i + off] = ib_dma_map_page(ca,
2955581be3bSIan Campbell 						 skb_frag_page(frag),
296b54c9d5bSJonathan Lemon 						 skb_frag_off(frag),
297b54c9d5bSJonathan Lemon 						 skb_frag_size(frag),
2987143740dSEli Cohen 						 DMA_TO_DEVICE);
29940ca1988SEli Cohen 		if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
3007143740dSEli Cohen 			goto partial_error;
3017143740dSEli Cohen 	}
3027143740dSEli Cohen 	return 0;
3037143740dSEli Cohen 
3047143740dSEli Cohen partial_error:
3057143740dSEli Cohen 	for (; i > 0; --i) {
3069e903e08SEric Dumazet 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
3079e903e08SEric Dumazet 
3089e903e08SEric Dumazet 		ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag), DMA_TO_DEVICE);
3097143740dSEli Cohen 	}
31040ca1988SEli Cohen 
31140ca1988SEli Cohen 	if (off)
31240ca1988SEli Cohen 		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
31340ca1988SEli Cohen 
3147143740dSEli Cohen 	return -EIO;
3157143740dSEli Cohen }
3167143740dSEli Cohen 
ipoib_dma_unmap_tx(struct ipoib_dev_priv * priv,struct ipoib_tx_buf * tx_req)317c4268778SYuval Shaia void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
3187143740dSEli Cohen 			struct ipoib_tx_buf *tx_req)
3197143740dSEli Cohen {
3207143740dSEli Cohen 	struct sk_buff *skb = tx_req->skb;
3217143740dSEli Cohen 	u64 *mapping = tx_req->mapping;
3227143740dSEli Cohen 	int i;
32340ca1988SEli Cohen 	int off;
3247143740dSEli Cohen 
32540ca1988SEli Cohen 	if (skb_headlen(skb)) {
326c4268778SYuval Shaia 		ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb),
327c4268778SYuval Shaia 				    DMA_TO_DEVICE);
32840ca1988SEli Cohen 		off = 1;
32940ca1988SEli Cohen 	} else
33040ca1988SEli Cohen 		off = 0;
3317143740dSEli Cohen 
3327143740dSEli Cohen 	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
3339e903e08SEric Dumazet 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3349e903e08SEric Dumazet 
335c4268778SYuval Shaia 		ib_dma_unmap_page(priv->ca, mapping[i + off],
336c4268778SYuval Shaia 				  skb_frag_size(frag), DMA_TO_DEVICE);
3377143740dSEli Cohen 	}
3387143740dSEli Cohen }
3397143740dSEli Cohen 
3402c010730SErez Shitrit /*
3412c010730SErez Shitrit  * As the result of a completion error the QP Can be transferred to SQE states.
3422c010730SErez Shitrit  * The function checks if the (send)QP is in SQE state and
3432c010730SErez Shitrit  * moves it back to RTS state, that in order to have it functional again.
3442c010730SErez Shitrit  */
ipoib_qp_state_validate_work(struct work_struct * work)3452c010730SErez Shitrit static void ipoib_qp_state_validate_work(struct work_struct *work)
3462c010730SErez Shitrit {
3472c010730SErez Shitrit 	struct ipoib_qp_state_validate *qp_work =
3482c010730SErez Shitrit 		container_of(work, struct ipoib_qp_state_validate, work);
3492c010730SErez Shitrit 
3502c010730SErez Shitrit 	struct ipoib_dev_priv *priv = qp_work->priv;
3512c010730SErez Shitrit 	struct ib_qp_attr qp_attr;
3522c010730SErez Shitrit 	struct ib_qp_init_attr query_init_attr;
3532c010730SErez Shitrit 	int ret;
3542c010730SErez Shitrit 
3552c010730SErez Shitrit 	ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
3562c010730SErez Shitrit 	if (ret) {
3572c010730SErez Shitrit 		ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
3582c010730SErez Shitrit 			   __func__, ret);
3592c010730SErez Shitrit 		goto free_res;
3602c010730SErez Shitrit 	}
3612c010730SErez Shitrit 	pr_info("%s: QP: 0x%x is in state: %d\n",
3622c010730SErez Shitrit 		__func__, priv->qp->qp_num, qp_attr.qp_state);
3632c010730SErez Shitrit 
3642c010730SErez Shitrit 	/* currently support only in SQE->RTS transition*/
3652c010730SErez Shitrit 	if (qp_attr.qp_state == IB_QPS_SQE) {
3662c010730SErez Shitrit 		qp_attr.qp_state = IB_QPS_RTS;
3672c010730SErez Shitrit 
3682c010730SErez Shitrit 		ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
3692c010730SErez Shitrit 		if (ret) {
3702c010730SErez Shitrit 			pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
3712c010730SErez Shitrit 				ret, priv->qp->qp_num);
3722c010730SErez Shitrit 			goto free_res;
3732c010730SErez Shitrit 		}
3742c010730SErez Shitrit 		pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
3752c010730SErez Shitrit 			__func__, priv->qp->qp_num);
3762c010730SErez Shitrit 	} else {
3772c010730SErez Shitrit 		pr_warn("QP (%d) will stay in state: %d\n",
3782c010730SErez Shitrit 			priv->qp->qp_num, qp_attr.qp_state);
3792c010730SErez Shitrit 	}
3802c010730SErez Shitrit 
3812c010730SErez Shitrit free_res:
3822c010730SErez Shitrit 	kfree(qp_work);
3832c010730SErez Shitrit }
3842c010730SErez Shitrit 
ipoib_ib_handle_tx_wc(struct net_device * dev,struct ib_wc * wc)3852439a6e6SRoland Dreier static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
3862439a6e6SRoland Dreier {
387c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
3882439a6e6SRoland Dreier 	unsigned int wr_id = wc->wr_id;
3891993d683SRoland Dreier 	struct ipoib_tx_buf *tx_req;
3901da177e4SLinus Torvalds 
391a89875fcSRoland Dreier 	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
392a89875fcSRoland Dreier 		       wr_id, wc->status);
3932439a6e6SRoland Dreier 
3942439a6e6SRoland Dreier 	if (unlikely(wr_id >= ipoib_sendq_size)) {
3952439a6e6SRoland Dreier 		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
3960f485251SShirley Ma 			   wr_id, ipoib_sendq_size);
3971da177e4SLinus Torvalds 		return;
3981da177e4SLinus Torvalds 	}
3991da177e4SLinus Torvalds 
4001da177e4SLinus Torvalds 	tx_req = &priv->tx_ring[wr_id];
4011da177e4SLinus Torvalds 
402c4268778SYuval Shaia 	ipoib_dma_unmap_tx(priv, tx_req);
4031da177e4SLinus Torvalds 
404de903512SRoland Dreier 	++dev->stats.tx_packets;
405de903512SRoland Dreier 	dev->stats.tx_bytes += tx_req->skb->len;
4061da177e4SLinus Torvalds 
4071da177e4SLinus Torvalds 	dev_kfree_skb_any(tx_req->skb);
4081da177e4SLinus Torvalds 
4091da177e4SLinus Torvalds 	++priv->tx_tail;
4101acba6a8SValentine Fatiev 	++priv->global_tx_tail;
4118966e28dSErez Shitrit 
4128966e28dSErez Shitrit 	if (unlikely(netif_queue_stopped(dev) &&
4131acba6a8SValentine Fatiev 		     ((priv->global_tx_head - priv->global_tx_tail) <=
4141acba6a8SValentine Fatiev 		      ipoib_sendq_size >> 1) &&
4158966e28dSErez Shitrit 		     test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
4161da177e4SLinus Torvalds 		netif_wake_queue(dev);
4171da177e4SLinus Torvalds 
4181da177e4SLinus Torvalds 	if (wc->status != IB_WC_SUCCESS &&
4192c010730SErez Shitrit 	    wc->status != IB_WC_WR_FLUSH_ERR) {
4202c010730SErez Shitrit 		struct ipoib_qp_state_validate *qp_work;
421b04dc199SAjaykumar Hotchandani 		ipoib_warn(priv,
422b04dc199SAjaykumar Hotchandani 			   "failed send event (status=%d, wrid=%d vend_err %#x)\n",
4231da177e4SLinus Torvalds 			   wc->status, wr_id, wc->vendor_err);
4242c010730SErez Shitrit 		qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
42574226649SLeon Romanovsky 		if (!qp_work)
4262c010730SErez Shitrit 			return;
4272c010730SErez Shitrit 
4282c010730SErez Shitrit 		INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
4292c010730SErez Shitrit 		qp_work->priv = priv;
4302c010730SErez Shitrit 		queue_work(priv->wq, &qp_work->work);
4312c010730SErez Shitrit 	}
4321da177e4SLinus Torvalds }
4332439a6e6SRoland Dreier 
poll_tx(struct ipoib_dev_priv * priv)434f56bcd80SEli Cohen static int poll_tx(struct ipoib_dev_priv *priv)
435f56bcd80SEli Cohen {
436f56bcd80SEli Cohen 	int n, i;
4378966e28dSErez Shitrit 	struct ib_wc *wc;
438f56bcd80SEli Cohen 
439f56bcd80SEli Cohen 	n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
4408966e28dSErez Shitrit 	for (i = 0; i < n; ++i) {
4418966e28dSErez Shitrit 		wc = priv->send_wc + i;
4428966e28dSErez Shitrit 		if (wc->wr_id & IPOIB_OP_CM)
4438966e28dSErez Shitrit 			ipoib_cm_handle_tx_wc(priv->dev, priv->send_wc + i);
4448966e28dSErez Shitrit 		else
445f56bcd80SEli Cohen 			ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
4468966e28dSErez Shitrit 	}
447f56bcd80SEli Cohen 	return n == MAX_SEND_CQE;
448f56bcd80SEli Cohen }
449f56bcd80SEli Cohen 
ipoib_rx_poll(struct napi_struct * napi,int budget)4508966e28dSErez Shitrit int ipoib_rx_poll(struct napi_struct *napi, int budget)
4512439a6e6SRoland Dreier {
4528966e28dSErez Shitrit 	struct ipoib_dev_priv *priv =
4538966e28dSErez Shitrit 		container_of(napi, struct ipoib_dev_priv, recv_napi);
454bea3348eSStephen Hemminger 	struct net_device *dev = priv->dev;
4558d1cc86aSRoland Dreier 	int done;
4568d1cc86aSRoland Dreier 	int t;
4578d1cc86aSRoland Dreier 	int n, i;
4588d1cc86aSRoland Dreier 
4598d1cc86aSRoland Dreier 	done  = 0;
4608d1cc86aSRoland Dreier 
461bea3348eSStephen Hemminger poll_more:
462bea3348eSStephen Hemminger 	while (done < budget) {
463bea3348eSStephen Hemminger 		int max = (budget - done);
464bea3348eSStephen Hemminger 
4658d1cc86aSRoland Dreier 		t = min(IPOIB_NUM_WC, max);
466f56bcd80SEli Cohen 		n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
4678d1cc86aSRoland Dreier 
468bea3348eSStephen Hemminger 		for (i = 0; i < n; i++) {
4698d1cc86aSRoland Dreier 			struct ib_wc *wc = priv->ibwc + i;
4708d1cc86aSRoland Dreier 
4711b524963SMichael S. Tsirkin 			if (wc->wr_id & IPOIB_OP_RECV) {
4728d1cc86aSRoland Dreier 				++done;
4731b524963SMichael S. Tsirkin 				if (wc->wr_id & IPOIB_OP_CM)
474839fcabaSMichael S. Tsirkin 					ipoib_cm_handle_rx_wc(dev, wc);
4751b524963SMichael S. Tsirkin 				else
4762439a6e6SRoland Dreier 					ipoib_ib_handle_rx_wc(dev, wc);
4778966e28dSErez Shitrit 			} else {
4788966e28dSErez Shitrit 				pr_warn("%s: Got unexpected wqe id\n", __func__);
4798966e28dSErez Shitrit 			}
4801b524963SMichael S. Tsirkin 		}
4811da177e4SLinus Torvalds 
482bea3348eSStephen Hemminger 		if (n != t)
4838d1cc86aSRoland Dreier 			break;
4848d1cc86aSRoland Dreier 	}
4858d1cc86aSRoland Dreier 
486bea3348eSStephen Hemminger 	if (done < budget) {
487288379f0SBen Hutchings 		napi_complete(napi);
488f56bcd80SEli Cohen 		if (unlikely(ib_req_notify_cq(priv->recv_cq,
4898d1cc86aSRoland Dreier 					      IB_CQ_NEXT_COMP |
4908d1cc86aSRoland Dreier 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
491288379f0SBen Hutchings 		    napi_reschedule(napi))
492bea3348eSStephen Hemminger 			goto poll_more;
4938d1cc86aSRoland Dreier 	}
4948d1cc86aSRoland Dreier 
495bea3348eSStephen Hemminger 	return done;
4968d1cc86aSRoland Dreier }
4978d1cc86aSRoland Dreier 
ipoib_tx_poll(struct napi_struct * napi,int budget)4988966e28dSErez Shitrit int ipoib_tx_poll(struct napi_struct *napi, int budget)
4991da177e4SLinus Torvalds {
5008966e28dSErez Shitrit 	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv,
5018966e28dSErez Shitrit 						   send_napi);
5028966e28dSErez Shitrit 	struct net_device *dev = priv->dev;
5038966e28dSErez Shitrit 	int n, i;
5048966e28dSErez Shitrit 	struct ib_wc *wc;
505bea3348eSStephen Hemminger 
5068966e28dSErez Shitrit poll_more:
5078966e28dSErez Shitrit 	n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
5088966e28dSErez Shitrit 
5098966e28dSErez Shitrit 	for (i = 0; i < n; i++) {
5108966e28dSErez Shitrit 		wc = priv->send_wc + i;
5118966e28dSErez Shitrit 		if (wc->wr_id & IPOIB_OP_CM)
5128966e28dSErez Shitrit 			ipoib_cm_handle_tx_wc(dev, wc);
5138966e28dSErez Shitrit 		else
5148966e28dSErez Shitrit 			ipoib_ib_handle_tx_wc(dev, wc);
5151da177e4SLinus Torvalds 	}
5161da177e4SLinus Torvalds 
5178966e28dSErez Shitrit 	if (n < budget) {
5188966e28dSErez Shitrit 		napi_complete(napi);
5198966e28dSErez Shitrit 		if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
5208966e28dSErez Shitrit 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
5218966e28dSErez Shitrit 		    napi_reschedule(napi))
5228966e28dSErez Shitrit 			goto poll_more;
5238966e28dSErez Shitrit 	}
5248966e28dSErez Shitrit 	return n < 0 ? 0 : n;
52557ce41d1SEli Cohen }
52657ce41d1SEli Cohen 
ipoib_ib_rx_completion(struct ib_cq * cq,void * ctx_ptr)5278966e28dSErez Shitrit void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr)
52857ce41d1SEli Cohen {
5298966e28dSErez Shitrit 	struct ipoib_dev_priv *priv = ctx_ptr;
530943c246eSRoland Dreier 
5318966e28dSErez Shitrit 	napi_schedule(&priv->recv_napi);
5328966e28dSErez Shitrit }
5338966e28dSErez Shitrit 
ipoib_ib_tx_completion(struct ib_cq * cq,void * ctx_ptr)5348966e28dSErez Shitrit void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr)
5358966e28dSErez Shitrit {
5368966e28dSErez Shitrit 	struct ipoib_dev_priv *priv = ctx_ptr;
5378966e28dSErez Shitrit 
5388966e28dSErez Shitrit 	napi_schedule(&priv->send_napi);
53957ce41d1SEli Cohen }
54057ce41d1SEli Cohen 
post_send(struct ipoib_dev_priv * priv,unsigned int wr_id,struct ib_ah * address,u32 dqpn,struct ipoib_tx_buf * tx_req,void * head,int hlen)5411da177e4SLinus Torvalds static inline int post_send(struct ipoib_dev_priv *priv,
5421da177e4SLinus Torvalds 			    unsigned int wr_id,
54310adcbd2SErez Shitrit 			    struct ib_ah *address, u32 dqpn,
54440ca1988SEli Cohen 			    struct ipoib_tx_buf *tx_req,
54540ca1988SEli Cohen 			    void *head, int hlen)
5461da177e4SLinus Torvalds {
54740ca1988SEli Cohen 	struct sk_buff *skb = tx_req->skb;
5481da177e4SLinus Torvalds 
549c4268778SYuval Shaia 	ipoib_build_sge(priv, tx_req);
55040ca1988SEli Cohen 
551e622f2f4SChristoph Hellwig 	priv->tx_wr.wr.wr_id	= wr_id;
55210adcbd2SErez Shitrit 	priv->tx_wr.remote_qpn	= dqpn;
553e622f2f4SChristoph Hellwig 	priv->tx_wr.ah		= address;
5541da177e4SLinus Torvalds 
55540ca1988SEli Cohen 	if (head) {
556e622f2f4SChristoph Hellwig 		priv->tx_wr.mss		= skb_shinfo(skb)->gso_size;
557e622f2f4SChristoph Hellwig 		priv->tx_wr.header	= head;
558e622f2f4SChristoph Hellwig 		priv->tx_wr.hlen	= hlen;
559e622f2f4SChristoph Hellwig 		priv->tx_wr.wr.opcode	= IB_WR_LSO;
56040ca1988SEli Cohen 	} else
561e622f2f4SChristoph Hellwig 		priv->tx_wr.wr.opcode	= IB_WR_SEND;
56240ca1988SEli Cohen 
5634b4671a0SBart Van Assche 	return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL);
5641da177e4SLinus Torvalds }
5651da177e4SLinus Torvalds 
ipoib_send(struct net_device * dev,struct sk_buff * skb,struct ib_ah * address,u32 dqpn)566cd565b4bSErez Shitrit int ipoib_send(struct net_device *dev, struct sk_buff *skb,
567cd565b4bSErez Shitrit 	       struct ib_ah *address, u32 dqpn)
5681da177e4SLinus Torvalds {
569c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
5701993d683SRoland Dreier 	struct ipoib_tx_buf *tx_req;
571a48f509bSOr Gerlitz 	int hlen, rc;
57240ca1988SEli Cohen 	void *phead;
5730578cdadSKamal Heib 	unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
5741da177e4SLinus Torvalds 
57540ca1988SEli Cohen 	if (skb_is_gso(skb)) {
57640ca1988SEli Cohen 		hlen = skb_tcp_all_headers(skb);
57740ca1988SEli Cohen 		phead = skb->data;
57840ca1988SEli Cohen 		if (unlikely(!skb_pull(skb, hlen))) {
57940ca1988SEli Cohen 			ipoib_warn(priv, "linear data too small\n");
58040ca1988SEli Cohen 			++dev->stats.tx_dropped;
58140ca1988SEli Cohen 			++dev->stats.tx_errors;
58240ca1988SEli Cohen 			dev_kfree_skb_any(skb);
583cd565b4bSErez Shitrit 			return -1;
58440ca1988SEli Cohen 		}
58540ca1988SEli Cohen 	} else {
58677d8e1efSMichael S. Tsirkin 		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
5871da177e4SLinus Torvalds 			ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
58877d8e1efSMichael S. Tsirkin 				   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
589de903512SRoland Dreier 			++dev->stats.tx_dropped;
590de903512SRoland Dreier 			++dev->stats.tx_errors;
591839fcabaSMichael S. Tsirkin 			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
592cd565b4bSErez Shitrit 			return -1;
5931da177e4SLinus Torvalds 		}
59440ca1988SEli Cohen 		phead = NULL;
59540ca1988SEli Cohen 		hlen  = 0;
59640ca1988SEli Cohen 	}
59778a50a5eSHans Westgaard Ry 	if (skb_shinfo(skb)->nr_frags > usable_sge) {
59878a50a5eSHans Westgaard Ry 		if (skb_linearize(skb) < 0) {
59978a50a5eSHans Westgaard Ry 			ipoib_warn(priv, "skb could not be linearized\n");
60078a50a5eSHans Westgaard Ry 			++dev->stats.tx_dropped;
60178a50a5eSHans Westgaard Ry 			++dev->stats.tx_errors;
60278a50a5eSHans Westgaard Ry 			dev_kfree_skb_any(skb);
603cd565b4bSErez Shitrit 			return -1;
60478a50a5eSHans Westgaard Ry 		}
60578a50a5eSHans Westgaard Ry 		/* Does skb_linearize return ok without reducing nr_frags? */
60678a50a5eSHans Westgaard Ry 		if (skb_shinfo(skb)->nr_frags > usable_sge) {
60778a50a5eSHans Westgaard Ry 			ipoib_warn(priv, "too many frags after skb linearize\n");
60878a50a5eSHans Westgaard Ry 			++dev->stats.tx_dropped;
60978a50a5eSHans Westgaard Ry 			++dev->stats.tx_errors;
61078a50a5eSHans Westgaard Ry 			dev_kfree_skb_any(skb);
611cd565b4bSErez Shitrit 			return -1;
61278a50a5eSHans Westgaard Ry 		}
61378a50a5eSHans Westgaard Ry 	}
6141da177e4SLinus Torvalds 
61510adcbd2SErez Shitrit 	ipoib_dbg_data(priv,
61610adcbd2SErez Shitrit 		       "sending packet, length=%d address=%p dqpn=0x%06x\n",
61710adcbd2SErez Shitrit 		       skb->len, address, dqpn);
6181da177e4SLinus Torvalds 
6191da177e4SLinus Torvalds 	/*
6201da177e4SLinus Torvalds 	 * We put the skb into the tx_ring _before_ we call post_send()
6211da177e4SLinus Torvalds 	 * because it's entirely possible that the completion handler will
6221da177e4SLinus Torvalds 	 * run before we execute anything after the post_send().  That
6231da177e4SLinus Torvalds 	 * means we have to make sure everything is properly recorded and
6241da177e4SLinus Torvalds 	 * our state is consistent before we call post_send().
6251da177e4SLinus Torvalds 	 */
6260f485251SShirley Ma 	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
6271da177e4SLinus Torvalds 	tx_req->skb = skb;
6287143740dSEli Cohen 	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
629de903512SRoland Dreier 		++dev->stats.tx_errors;
63073fbe8beSRoland Dreier 		dev_kfree_skb_any(skb);
631cd565b4bSErez Shitrit 		return -1;
63273fbe8beSRoland Dreier 	}
6331da177e4SLinus Torvalds 
6346046136cSEli Cohen 	if (skb->ip_summed == CHECKSUM_PARTIAL)
635e622f2f4SChristoph Hellwig 		priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
6366046136cSEli Cohen 	else
637e622f2f4SChristoph Hellwig 		priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
6382c104ea6SErez Shitrit 	/* increase the tx_head after send success, but use it for queue state */
6391acba6a8SValentine Fatiev 	if ((priv->global_tx_head - priv->global_tx_tail) ==
6401acba6a8SValentine Fatiev 	    ipoib_sendq_size - 1) {
64157ce41d1SEli Cohen 		ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
64257ce41d1SEli Cohen 		netif_stop_queue(dev);
64357ce41d1SEli Cohen 	}
64457ce41d1SEli Cohen 
6457e5a90c2SShlomo Pongratz 	skb_orphan(skb);
6467e5a90c2SShlomo Pongratz 	skb_dst_drop(skb);
6477e5a90c2SShlomo Pongratz 
6488966e28dSErez Shitrit 	if (netif_queue_stopped(dev))
6498966e28dSErez Shitrit 		if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
650809cb695SAlex Estrin 				     IB_CQ_REPORT_MISSED_EVENTS) < 0)
6518966e28dSErez Shitrit 			ipoib_warn(priv, "request notify on send CQ failed\n");
6528966e28dSErez Shitrit 
653a48f509bSOr Gerlitz 	rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
654cd565b4bSErez Shitrit 		       address, dqpn, tx_req, phead, hlen);
655a48f509bSOr Gerlitz 	if (unlikely(rc)) {
656a48f509bSOr Gerlitz 		ipoib_warn(priv, "post_send failed, error %d\n", rc);
657de903512SRoland Dreier 		++dev->stats.tx_errors;
658c4268778SYuval Shaia 		ipoib_dma_unmap_tx(priv, tx_req);
6591da177e4SLinus Torvalds 		dev_kfree_skb_any(skb);
66057ce41d1SEli Cohen 		if (netif_queue_stopped(dev))
66157ce41d1SEli Cohen 			netif_wake_queue(dev);
662cd565b4bSErez Shitrit 		rc = 0;
6631da177e4SLinus Torvalds 	} else {
664860e9538SFlorian Westphal 		netif_trans_update(dev);
6651da177e4SLinus Torvalds 
666cd565b4bSErez Shitrit 		rc = priv->tx_head;
6671da177e4SLinus Torvalds 		++priv->tx_head;
6681acba6a8SValentine Fatiev 		++priv->global_tx_head;
6691da177e4SLinus Torvalds 	}
670cd565b4bSErez Shitrit 	return rc;
6711da177e4SLinus Torvalds }
6721da177e4SLinus Torvalds 
ipoib_reap_dead_ahs(struct ipoib_dev_priv * priv)67365936bf2SJason Gunthorpe static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv)
6741da177e4SLinus Torvalds {
6751da177e4SLinus Torvalds 	struct ipoib_ah *ah, *tah;
676943c246eSRoland Dreier 	unsigned long flags;
6771da177e4SLinus Torvalds 
67865936bf2SJason Gunthorpe 	netif_tx_lock_bh(priv->dev);
679943c246eSRoland Dreier 	spin_lock_irqsave(&priv->lock, flags);
680943c246eSRoland Dreier 
6811da177e4SLinus Torvalds 	list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
6822181858bSRoland Dreier 		if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
6831da177e4SLinus Torvalds 			list_del(&ah->list);
6842553ba21SGal Pressman 			rdma_destroy_ah(ah->ah, 0);
6851da177e4SLinus Torvalds 			kfree(ah);
6861da177e4SLinus Torvalds 		}
687943c246eSRoland Dreier 
688943c246eSRoland Dreier 	spin_unlock_irqrestore(&priv->lock, flags);
68965936bf2SJason Gunthorpe 	netif_tx_unlock_bh(priv->dev);
6901da177e4SLinus Torvalds }
6911da177e4SLinus Torvalds 
ipoib_reap_ah(struct work_struct * work)692c4028958SDavid Howells void ipoib_reap_ah(struct work_struct *work)
6931da177e4SLinus Torvalds {
694c4028958SDavid Howells 	struct ipoib_dev_priv *priv =
695c4028958SDavid Howells 		container_of(work, struct ipoib_dev_priv, ah_reap_task.work);
6961da177e4SLinus Torvalds 
69765936bf2SJason Gunthorpe 	ipoib_reap_dead_ahs(priv);
6981da177e4SLinus Torvalds 
6991da177e4SLinus Torvalds 	if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
7000b39578bSDoug Ledford 		queue_delayed_work(priv->wq, &priv->ah_reap_task,
70169fc507aSAnton Blanchard 				   round_jiffies_relative(HZ));
7021da177e4SLinus Torvalds }
7031da177e4SLinus Torvalds 
ipoib_start_ah_reaper(struct ipoib_dev_priv * priv)70465936bf2SJason Gunthorpe static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv)
705e135106fSDoug Ledford {
70665936bf2SJason Gunthorpe 	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
70765936bf2SJason Gunthorpe 	queue_delayed_work(priv->wq, &priv->ah_reap_task,
70865936bf2SJason Gunthorpe 			   round_jiffies_relative(HZ));
709e135106fSDoug Ledford }
710e135106fSDoug Ledford 
ipoib_stop_ah_reaper(struct ipoib_dev_priv * priv)71165936bf2SJason Gunthorpe static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv)
712e135106fSDoug Ledford {
713e135106fSDoug Ledford 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
71465936bf2SJason Gunthorpe 	cancel_delayed_work(&priv->ah_reap_task);
71565936bf2SJason Gunthorpe 	/*
71665936bf2SJason Gunthorpe 	 * After ipoib_stop_ah_reaper() we always go through
71765936bf2SJason Gunthorpe 	 * ipoib_reap_dead_ahs() which ensures the work is really stopped and
71865936bf2SJason Gunthorpe 	 * does a final flush out of the dead_ah's list
71965936bf2SJason Gunthorpe 	 */
720e135106fSDoug Ledford }
721e135106fSDoug Ledford 
recvs_pending(struct net_device * dev)7227ce1a3eeSErez Shitrit static int recvs_pending(struct net_device *dev)
7237ce1a3eeSErez Shitrit {
724c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
7257ce1a3eeSErez Shitrit 	int pending = 0;
7267ce1a3eeSErez Shitrit 	int i;
7277ce1a3eeSErez Shitrit 
7287ce1a3eeSErez Shitrit 	for (i = 0; i < ipoib_recvq_size; ++i)
7297ce1a3eeSErez Shitrit 		if (priv->rx_ring[i].skb)
7307ce1a3eeSErez Shitrit 			++pending;
7317ce1a3eeSErez Shitrit 
7327ce1a3eeSErez Shitrit 	return pending;
7337ce1a3eeSErez Shitrit }
7347ce1a3eeSErez Shitrit 
check_qp_movement_and_print(struct ipoib_dev_priv * priv,struct ib_qp * qp,enum ib_qp_state new_state)7355dc78ad1SErez Shitrit static void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
7365dc78ad1SErez Shitrit 					struct ib_qp *qp,
7375dc78ad1SErez Shitrit 					enum ib_qp_state new_state)
7385dc78ad1SErez Shitrit {
7395dc78ad1SErez Shitrit 	struct ib_qp_attr qp_attr;
7405dc78ad1SErez Shitrit 	struct ib_qp_init_attr query_init_attr;
7415dc78ad1SErez Shitrit 	int ret;
7425dc78ad1SErez Shitrit 
7435dc78ad1SErez Shitrit 	ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr);
7445dc78ad1SErez Shitrit 	if (ret) {
7455dc78ad1SErez Shitrit 		ipoib_warn(priv, "%s: Failed to query QP\n", __func__);
7465dc78ad1SErez Shitrit 		return;
7475dc78ad1SErez Shitrit 	}
7485dc78ad1SErez Shitrit 	/* print according to the new-state and the previous state.*/
7495dc78ad1SErez Shitrit 	if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET)
7505dc78ad1SErez Shitrit 		ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n");
7515dc78ad1SErez Shitrit 	else
7525dc78ad1SErez Shitrit 		ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n",
7535dc78ad1SErez Shitrit 			   new_state, qp_attr.qp_state);
7545dc78ad1SErez Shitrit }
7555dc78ad1SErez Shitrit 
ipoib_napi_enable(struct net_device * dev)7568966e28dSErez Shitrit static void ipoib_napi_enable(struct net_device *dev)
7578966e28dSErez Shitrit {
7588966e28dSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
7598966e28dSErez Shitrit 
7608966e28dSErez Shitrit 	napi_enable(&priv->recv_napi);
7618966e28dSErez Shitrit 	napi_enable(&priv->send_napi);
7628966e28dSErez Shitrit }
7638966e28dSErez Shitrit 
ipoib_napi_disable(struct net_device * dev)7648966e28dSErez Shitrit static void ipoib_napi_disable(struct net_device *dev)
7658966e28dSErez Shitrit {
7668966e28dSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
7678966e28dSErez Shitrit 
7688966e28dSErez Shitrit 	napi_disable(&priv->recv_napi);
7698966e28dSErez Shitrit 	napi_disable(&priv->send_napi);
7708966e28dSErez Shitrit }
7718966e28dSErez Shitrit 
ipoib_ib_dev_stop_default(struct net_device * dev)7727ce1a3eeSErez Shitrit int ipoib_ib_dev_stop_default(struct net_device *dev)
7737ce1a3eeSErez Shitrit {
774c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
7757ce1a3eeSErez Shitrit 	struct ib_qp_attr qp_attr;
7767ce1a3eeSErez Shitrit 	unsigned long begin;
7777ce1a3eeSErez Shitrit 	struct ipoib_tx_buf *tx_req;
7787ce1a3eeSErez Shitrit 	int i;
7797ce1a3eeSErez Shitrit 
780cd565b4bSErez Shitrit 	if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
7818966e28dSErez Shitrit 		ipoib_napi_disable(dev);
7827ce1a3eeSErez Shitrit 
7837ce1a3eeSErez Shitrit 	ipoib_cm_dev_stop(dev);
7847ce1a3eeSErez Shitrit 
7857ce1a3eeSErez Shitrit 	/*
7867ce1a3eeSErez Shitrit 	 * Move our QP to the error state and then reinitialize in
7877ce1a3eeSErez Shitrit 	 * when all work requests have completed or have been flushed.
7887ce1a3eeSErez Shitrit 	 */
7897ce1a3eeSErez Shitrit 	qp_attr.qp_state = IB_QPS_ERR;
7907ce1a3eeSErez Shitrit 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
7915dc78ad1SErez Shitrit 		check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR);
7927ce1a3eeSErez Shitrit 
7937ce1a3eeSErez Shitrit 	/* Wait for all sends and receives to complete */
7947ce1a3eeSErez Shitrit 	begin = jiffies;
7957ce1a3eeSErez Shitrit 
7967ce1a3eeSErez Shitrit 	while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
7977ce1a3eeSErez Shitrit 		if (time_after(jiffies, begin + 5 * HZ)) {
7987ce1a3eeSErez Shitrit 			ipoib_warn(priv,
7997ce1a3eeSErez Shitrit 				   "timing out; %d sends %d receives not completed\n",
8007ce1a3eeSErez Shitrit 				   priv->tx_head - priv->tx_tail,
8017ce1a3eeSErez Shitrit 				   recvs_pending(dev));
8027ce1a3eeSErez Shitrit 
8037ce1a3eeSErez Shitrit 			/*
8047ce1a3eeSErez Shitrit 			 * assume the HW is wedged and just free up
8057ce1a3eeSErez Shitrit 			 * all our pending work requests.
8067ce1a3eeSErez Shitrit 			 */
8077ce1a3eeSErez Shitrit 			while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
8087ce1a3eeSErez Shitrit 				tx_req = &priv->tx_ring[priv->tx_tail &
8097ce1a3eeSErez Shitrit 							(ipoib_sendq_size - 1)];
8107ce1a3eeSErez Shitrit 				ipoib_dma_unmap_tx(priv, tx_req);
8117ce1a3eeSErez Shitrit 				dev_kfree_skb_any(tx_req->skb);
8127ce1a3eeSErez Shitrit 				++priv->tx_tail;
8131acba6a8SValentine Fatiev 				++priv->global_tx_tail;
8147ce1a3eeSErez Shitrit 			}
8157ce1a3eeSErez Shitrit 
8167ce1a3eeSErez Shitrit 			for (i = 0; i < ipoib_recvq_size; ++i) {
8177ce1a3eeSErez Shitrit 				struct ipoib_rx_buf *rx_req;
8187ce1a3eeSErez Shitrit 
8197ce1a3eeSErez Shitrit 				rx_req = &priv->rx_ring[i];
8207ce1a3eeSErez Shitrit 				if (!rx_req->skb)
8217ce1a3eeSErez Shitrit 					continue;
8227ce1a3eeSErez Shitrit 				ipoib_ud_dma_unmap_rx(priv,
8237ce1a3eeSErez Shitrit 						      priv->rx_ring[i].mapping);
8247ce1a3eeSErez Shitrit 				dev_kfree_skb_any(rx_req->skb);
8257ce1a3eeSErez Shitrit 				rx_req->skb = NULL;
8267ce1a3eeSErez Shitrit 			}
8277ce1a3eeSErez Shitrit 
8287ce1a3eeSErez Shitrit 			goto timeout;
8297ce1a3eeSErez Shitrit 		}
8307ce1a3eeSErez Shitrit 
8317ce1a3eeSErez Shitrit 		ipoib_drain_cq(dev);
8327ce1a3eeSErez Shitrit 
83398e77d9fSLeon Romanovsky 		usleep_range(1000, 2000);
8347ce1a3eeSErez Shitrit 	}
8357ce1a3eeSErez Shitrit 
8367ce1a3eeSErez Shitrit 	ipoib_dbg(priv, "All sends and receives done.\n");
8377ce1a3eeSErez Shitrit 
8387ce1a3eeSErez Shitrit timeout:
8397ce1a3eeSErez Shitrit 	qp_attr.qp_state = IB_QPS_RESET;
8407ce1a3eeSErez Shitrit 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
8417ce1a3eeSErez Shitrit 		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
8427ce1a3eeSErez Shitrit 
8437ce1a3eeSErez Shitrit 	ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
8447ce1a3eeSErez Shitrit 
8457ce1a3eeSErez Shitrit 	return 0;
8467ce1a3eeSErez Shitrit }
8477ce1a3eeSErez Shitrit 
ipoib_ib_dev_open_default(struct net_device * dev)8487ce1a3eeSErez Shitrit int ipoib_ib_dev_open_default(struct net_device *dev)
8491da177e4SLinus Torvalds {
850c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
8511da177e4SLinus Torvalds 	int ret;
8521da177e4SLinus Torvalds 
8535b6810e0SRoland Dreier 	ret = ipoib_init_qp(dev);
8541da177e4SLinus Torvalds 	if (ret) {
8555b6810e0SRoland Dreier 		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
8561da177e4SLinus Torvalds 		return -1;
8571da177e4SLinus Torvalds 	}
8581da177e4SLinus Torvalds 
8591da177e4SLinus Torvalds 	ret = ipoib_ib_post_receives(dev);
8601da177e4SLinus Torvalds 	if (ret) {
8611da177e4SLinus Torvalds 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
862cd565b4bSErez Shitrit 		goto out;
8631da177e4SLinus Torvalds 	}
8641da177e4SLinus Torvalds 
865839fcabaSMichael S. Tsirkin 	ret = ipoib_cm_dev_open(dev);
866839fcabaSMichael S. Tsirkin 	if (ret) {
86724bd1e4eSMichael S. Tsirkin 		ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
868cd565b4bSErez Shitrit 		goto out;
869839fcabaSMichael S. Tsirkin 	}
870839fcabaSMichael S. Tsirkin 
871cd565b4bSErez Shitrit 	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
8728966e28dSErez Shitrit 		ipoib_napi_enable(dev);
8737a343d4cSLeonid Arsh 
8741da177e4SLinus Torvalds 	return 0;
875cd565b4bSErez Shitrit out:
876c2bb5628SErez Shitrit 	return -1;
8771da177e4SLinus Torvalds }
8781da177e4SLinus Torvalds 
ipoib_ib_dev_open(struct net_device * dev)8797ce1a3eeSErez Shitrit int ipoib_ib_dev_open(struct net_device *dev)
8807ce1a3eeSErez Shitrit {
881c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
8827ce1a3eeSErez Shitrit 
8837ce1a3eeSErez Shitrit 	ipoib_pkey_dev_check_presence(dev);
8847ce1a3eeSErez Shitrit 
8857ce1a3eeSErez Shitrit 	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
8867ce1a3eeSErez Shitrit 		ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
8877ce1a3eeSErez Shitrit 			   (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
8887ce1a3eeSErez Shitrit 		return -1;
8897ce1a3eeSErez Shitrit 	}
8907ce1a3eeSErez Shitrit 
89165936bf2SJason Gunthorpe 	ipoib_start_ah_reaper(priv);
892cd565b4bSErez Shitrit 	if (priv->rn_ops->ndo_open(dev)) {
8937ce1a3eeSErez Shitrit 		pr_warn("%s: Failed to open dev\n", dev->name);
894cd565b4bSErez Shitrit 		goto dev_stop;
8957ce1a3eeSErez Shitrit 	}
8967ce1a3eeSErez Shitrit 
897cd565b4bSErez Shitrit 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
898cd565b4bSErez Shitrit 
8997ce1a3eeSErez Shitrit 	return 0;
9007ce1a3eeSErez Shitrit 
901cd565b4bSErez Shitrit dev_stop:
90265936bf2SJason Gunthorpe 	ipoib_stop_ah_reaper(priv);
9037ce1a3eeSErez Shitrit 	return -1;
9047ce1a3eeSErez Shitrit }
9057ce1a3eeSErez Shitrit 
ipoib_ib_dev_stop(struct net_device * dev)90665936bf2SJason Gunthorpe void ipoib_ib_dev_stop(struct net_device *dev)
90765936bf2SJason Gunthorpe {
90865936bf2SJason Gunthorpe 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
90965936bf2SJason Gunthorpe 
91065936bf2SJason Gunthorpe 	priv->rn_ops->ndo_stop(dev);
91165936bf2SJason Gunthorpe 
91265936bf2SJason Gunthorpe 	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
91365936bf2SJason Gunthorpe 	ipoib_stop_ah_reaper(priv);
91465936bf2SJason Gunthorpe }
91565936bf2SJason Gunthorpe 
ipoib_pkey_dev_check_presence(struct net_device * dev)916db84f880SErez Shitrit void ipoib_pkey_dev_check_presence(struct net_device *dev)
9177a343d4cSLeonid Arsh {
918c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
919980f91c3SAlex Vesker 	struct rdma_netdev *rn = netdev_priv(dev);
9207a343d4cSLeonid Arsh 
921dd57c930SAlex Estrin 	if (!(priv->pkey & 0x7fff) ||
922dd57c930SAlex Estrin 	    ib_find_pkey(priv->ca, priv->port, priv->pkey,
923980f91c3SAlex Vesker 			 &priv->pkey_index)) {
9247a343d4cSLeonid Arsh 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
925980f91c3SAlex Vesker 	} else {
926980f91c3SAlex Vesker 		if (rn->set_id)
927980f91c3SAlex Vesker 			rn->set_id(dev, priv->pkey_index);
9287a343d4cSLeonid Arsh 		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
9297a343d4cSLeonid Arsh 	}
930980f91c3SAlex Vesker }
9317a343d4cSLeonid Arsh 
ipoib_ib_dev_up(struct net_device * dev)9325c37077fSZhu Yanjun void ipoib_ib_dev_up(struct net_device *dev)
9331da177e4SLinus Torvalds {
934c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
9351da177e4SLinus Torvalds 
9367a343d4cSLeonid Arsh 	ipoib_pkey_dev_check_presence(dev);
9377a343d4cSLeonid Arsh 
9387a343d4cSLeonid Arsh 	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
9397a343d4cSLeonid Arsh 		ipoib_dbg(priv, "PKEY is not assigned.\n");
9405c37077fSZhu Yanjun 		return;
9417a343d4cSLeonid Arsh 	}
9427a343d4cSLeonid Arsh 
9431da177e4SLinus Torvalds 	set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
9441da177e4SLinus Torvalds 
9455c37077fSZhu Yanjun 	ipoib_mcast_start_thread(dev);
9461da177e4SLinus Torvalds }
9471da177e4SLinus Torvalds 
ipoib_ib_dev_down(struct net_device * dev)948dfc0e555SZhu Yanjun void ipoib_ib_dev_down(struct net_device *dev)
9491da177e4SLinus Torvalds {
950c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
9511da177e4SLinus Torvalds 
9521da177e4SLinus Torvalds 	ipoib_dbg(priv, "downing ib_dev\n");
9531da177e4SLinus Torvalds 
9541da177e4SLinus Torvalds 	clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
9551da177e4SLinus Torvalds 	netif_carrier_off(dev);
9561da177e4SLinus Torvalds 
957efc82eeeSDoug Ledford 	ipoib_mcast_stop_thread(dev);
9581da177e4SLinus Torvalds 	ipoib_mcast_dev_flush(dev);
9591da177e4SLinus Torvalds 
9601da177e4SLinus Torvalds 	ipoib_flush_paths(dev);
9611da177e4SLinus Torvalds }
9621da177e4SLinus Torvalds 
ipoib_drain_cq(struct net_device * dev)9632dfbfc37SMichael S. Tsirkin void ipoib_drain_cq(struct net_device *dev)
9642dfbfc37SMichael S. Tsirkin {
965c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
9662dfbfc37SMichael S. Tsirkin 	int i, n;
967943c246eSRoland Dreier 
968943c246eSRoland Dreier 	/*
969943c246eSRoland Dreier 	 * We call completion handling routines that expect to be
970943c246eSRoland Dreier 	 * called from the BH-disabled NAPI poll context, so disable
971943c246eSRoland Dreier 	 * BHs here too.
972943c246eSRoland Dreier 	 */
973943c246eSRoland Dreier 	local_bh_disable();
974943c246eSRoland Dreier 
9752dfbfc37SMichael S. Tsirkin 	do {
976f56bcd80SEli Cohen 		n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
9772dfbfc37SMichael S. Tsirkin 		for (i = 0; i < n; ++i) {
978ce423ef5SRoland Dreier 			/*
979ce423ef5SRoland Dreier 			 * Convert any successful completions to flush
980ce423ef5SRoland Dreier 			 * errors to avoid passing packets up the
981ce423ef5SRoland Dreier 			 * stack after bringing the device down.
982ce423ef5SRoland Dreier 			 */
983ce423ef5SRoland Dreier 			if (priv->ibwc[i].status == IB_WC_SUCCESS)
984ce423ef5SRoland Dreier 				priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
985ce423ef5SRoland Dreier 
9861b524963SMichael S. Tsirkin 			if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
9871b524963SMichael S. Tsirkin 				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
9882dfbfc37SMichael S. Tsirkin 					ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
9891b524963SMichael S. Tsirkin 				else
9902dfbfc37SMichael S. Tsirkin 					ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
9918966e28dSErez Shitrit 			} else {
9928966e28dSErez Shitrit 				pr_warn("%s: Got unexpected wqe id\n", __func__);
9938966e28dSErez Shitrit 			}
9941b524963SMichael S. Tsirkin 		}
9952dfbfc37SMichael S. Tsirkin 	} while (n == IPOIB_NUM_WC);
996f56bcd80SEli Cohen 
997f56bcd80SEli Cohen 	while (poll_tx(priv))
998f56bcd80SEli Cohen 		; /* nothing */
999943c246eSRoland Dreier 
1000943c246eSRoland Dreier 	local_bh_enable();
10012dfbfc37SMichael S. Tsirkin }
10022dfbfc37SMichael S. Tsirkin 
1003c2904141SErez Shitrit /*
1004c2904141SErez Shitrit  * Takes whatever value which is in pkey index 0 and updates priv->pkey
1005c2904141SErez Shitrit  * returns 0 if the pkey value was changed.
1006c2904141SErez Shitrit  */
update_parent_pkey(struct ipoib_dev_priv * priv)1007c2904141SErez Shitrit static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
1008c2904141SErez Shitrit {
1009c2904141SErez Shitrit 	int result;
1010c2904141SErez Shitrit 	u16 prev_pkey;
1011c2904141SErez Shitrit 
1012c2904141SErez Shitrit 	prev_pkey = priv->pkey;
1013c2904141SErez Shitrit 	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
1014c2904141SErez Shitrit 	if (result) {
1015c2904141SErez Shitrit 		ipoib_warn(priv, "ib_query_pkey port %d failed (ret = %d)\n",
1016c2904141SErez Shitrit 			   priv->port, result);
1017c2904141SErez Shitrit 		return result;
1018c2904141SErez Shitrit 	}
1019c2904141SErez Shitrit 
1020c2904141SErez Shitrit 	priv->pkey |= 0x8000;
1021c2904141SErez Shitrit 
1022c2904141SErez Shitrit 	if (prev_pkey != priv->pkey) {
1023c2904141SErez Shitrit 		ipoib_dbg(priv, "pkey changed from 0x%x to 0x%x\n",
1024c2904141SErez Shitrit 			  prev_pkey, priv->pkey);
1025c2904141SErez Shitrit 		/*
1026c2904141SErez Shitrit 		 * Update the pkey in the broadcast address, while making sure to set
1027c2904141SErez Shitrit 		 * the full membership bit, so that we join the right broadcast group.
1028c2904141SErez Shitrit 		 */
1029c2904141SErez Shitrit 		priv->dev->broadcast[8] = priv->pkey >> 8;
1030c2904141SErez Shitrit 		priv->dev->broadcast[9] = priv->pkey & 0xff;
1031c2904141SErez Shitrit 		return 0;
1032c2904141SErez Shitrit 	}
1033c2904141SErez Shitrit 
1034c2904141SErez Shitrit 	return 1;
1035c2904141SErez Shitrit }
1036dd57c930SAlex Estrin /*
1037dd57c930SAlex Estrin  * returns 0 if pkey value was found in a different slot.
1038dd57c930SAlex Estrin  */
update_child_pkey(struct ipoib_dev_priv * priv)1039dd57c930SAlex Estrin static inline int update_child_pkey(struct ipoib_dev_priv *priv)
1040dd57c930SAlex Estrin {
1041dd57c930SAlex Estrin 	u16 old_index = priv->pkey_index;
1042dd57c930SAlex Estrin 
1043dd57c930SAlex Estrin 	priv->pkey_index = 0;
1044dd57c930SAlex Estrin 	ipoib_pkey_dev_check_presence(priv->dev);
1045dd57c930SAlex Estrin 
1046dd57c930SAlex Estrin 	if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
1047dd57c930SAlex Estrin 	    (old_index == priv->pkey_index))
1048dd57c930SAlex Estrin 		return 1;
1049dd57c930SAlex Estrin 	return 0;
1050dd57c930SAlex Estrin }
1051c2904141SErez Shitrit 
1052492a7e67SMark Bloch /*
1053492a7e67SMark Bloch  * returns true if the device address of the ipoib interface has changed and the
1054492a7e67SMark Bloch  * new address is a valid one (i.e in the gid table), return false otherwise.
1055492a7e67SMark Bloch  */
ipoib_dev_addr_changed_valid(struct ipoib_dev_priv * priv)1056492a7e67SMark Bloch static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
1057492a7e67SMark Bloch {
1058492a7e67SMark Bloch 	union ib_gid search_gid;
1059492a7e67SMark Bloch 	union ib_gid gid0;
1060492a7e67SMark Bloch 	int err;
1061492a7e67SMark Bloch 	u16 index;
10621fb7f897SMark Bloch 	u32 port;
1063492a7e67SMark Bloch 	bool ret = false;
1064492a7e67SMark Bloch 
10651dfce294SParav Pandit 	if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
1066492a7e67SMark Bloch 		return false;
1067492a7e67SMark Bloch 
10689b29953bSMark Bloch 	netif_addr_lock_bh(priv->dev);
1069492a7e67SMark Bloch 
1070492a7e67SMark Bloch 	/* The subnet prefix may have changed, update it now so we won't have
1071492a7e67SMark Bloch 	 * to do it later
1072492a7e67SMark Bloch 	 */
1073492a7e67SMark Bloch 	priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
107410f7b9bcSJakub Kicinski 	dev_addr_mod(priv->dev, 4, (u8 *)&gid0.global.subnet_prefix,
107510f7b9bcSJakub Kicinski 		     sizeof(gid0.global.subnet_prefix));
1076492a7e67SMark Bloch 	search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1077492a7e67SMark Bloch 
1078492a7e67SMark Bloch 	search_gid.global.interface_id = priv->local_gid.global.interface_id;
1079492a7e67SMark Bloch 
10809b29953bSMark Bloch 	netif_addr_unlock_bh(priv->dev);
1081492a7e67SMark Bloch 
1082b26c4a11SParav Pandit 	err = ib_find_gid(priv->ca, &search_gid, &port, &index);
1083492a7e67SMark Bloch 
10849b29953bSMark Bloch 	netif_addr_lock_bh(priv->dev);
1085492a7e67SMark Bloch 
1086492a7e67SMark Bloch 	if (search_gid.global.interface_id !=
1087492a7e67SMark Bloch 	    priv->local_gid.global.interface_id)
1088492a7e67SMark Bloch 		/* There was a change while we were looking up the gid, bail
1089492a7e67SMark Bloch 		 * here and let the next work sort this out
1090492a7e67SMark Bloch 		 */
1091492a7e67SMark Bloch 		goto out;
1092492a7e67SMark Bloch 
1093492a7e67SMark Bloch 	/* The next section of code needs some background:
1094492a7e67SMark Bloch 	 * Per IB spec the port GUID can't change if the HCA is powered on.
1095492a7e67SMark Bloch 	 * port GUID is the basis for GID at index 0 which is the basis for
1096492a7e67SMark Bloch 	 * the default device address of a ipoib interface.
1097492a7e67SMark Bloch 	 *
1098492a7e67SMark Bloch 	 * so it seems the flow should be:
1099492a7e67SMark Bloch 	 * if user_changed_dev_addr && gid in gid tbl
1100492a7e67SMark Bloch 	 *	set bit dev_addr_set
1101492a7e67SMark Bloch 	 *	return true
1102492a7e67SMark Bloch 	 * else
1103492a7e67SMark Bloch 	 *	return false
1104492a7e67SMark Bloch 	 *
1105492a7e67SMark Bloch 	 * The issue is that there are devices that don't follow the spec,
1106492a7e67SMark Bloch 	 * they change the port GUID when the HCA is powered, so in order
1107492a7e67SMark Bloch 	 * not to break userspace applications, We need to check if the
1108492a7e67SMark Bloch 	 * user wanted to control the device address and we assume that
1109492a7e67SMark Bloch 	 * if he sets the device address back to be based on GID index 0,
1110492a7e67SMark Bloch 	 * he no longer wishs to control it.
1111492a7e67SMark Bloch 	 *
1112*ca325edbSSlark Xiao 	 * If the user doesn't control the device address,
1113492a7e67SMark Bloch 	 * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
1114492a7e67SMark Bloch 	 * the port GUID has changed and GID at index 0 has changed
1115492a7e67SMark Bloch 	 * so we need to change priv->local_gid and priv->dev->dev_addr
1116492a7e67SMark Bloch 	 * to reflect the new GID.
1117492a7e67SMark Bloch 	 */
1118492a7e67SMark Bloch 	if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
1119492a7e67SMark Bloch 		if (!err && port == priv->port) {
1120492a7e67SMark Bloch 			set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1121492a7e67SMark Bloch 			if (index == 0)
1122492a7e67SMark Bloch 				clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
1123492a7e67SMark Bloch 					  &priv->flags);
1124492a7e67SMark Bloch 			else
1125492a7e67SMark Bloch 				set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
1126492a7e67SMark Bloch 			ret = true;
1127492a7e67SMark Bloch 		} else {
1128492a7e67SMark Bloch 			ret = false;
1129492a7e67SMark Bloch 		}
1130492a7e67SMark Bloch 	} else {
1131492a7e67SMark Bloch 		if (!err && port == priv->port) {
1132492a7e67SMark Bloch 			ret = true;
1133492a7e67SMark Bloch 		} else {
1134492a7e67SMark Bloch 			if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
1135492a7e67SMark Bloch 				memcpy(&priv->local_gid, &gid0,
1136492a7e67SMark Bloch 				       sizeof(priv->local_gid));
113710f7b9bcSJakub Kicinski 				dev_addr_mod(priv->dev, 4, (u8 *)&gid0,
1138492a7e67SMark Bloch 					     sizeof(priv->local_gid));
1139492a7e67SMark Bloch 				ret = true;
1140492a7e67SMark Bloch 			}
1141492a7e67SMark Bloch 		}
1142492a7e67SMark Bloch 	}
1143492a7e67SMark Bloch 
1144492a7e67SMark Bloch out:
11459b29953bSMark Bloch 	netif_addr_unlock_bh(priv->dev);
1146492a7e67SMark Bloch 
1147492a7e67SMark Bloch 	return ret;
1148492a7e67SMark Bloch }
1149492a7e67SMark Bloch 
__ipoib_ib_dev_flush(struct ipoib_dev_priv * priv,enum ipoib_flush_level level,int nesting)1150ee1e2c82SMoni Shoua static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
11518b7cce0dSHaggai Eran 				enum ipoib_flush_level level,
11528b7cce0dSHaggai Eran 				int nesting)
11531da177e4SLinus Torvalds {
115426bbf13cSYosef Etigin 	struct ipoib_dev_priv *cpriv;
1155c4028958SDavid Howells 	struct net_device *dev = priv->dev;
1156c2904141SErez Shitrit 	int result;
115726bbf13cSYosef Etigin 
11588b7cce0dSHaggai Eran 	down_read_nested(&priv->vlan_rwsem, nesting);
115926bbf13cSYosef Etigin 
116026bbf13cSYosef Etigin 	/*
116126bbf13cSYosef Etigin 	 * Flush any child interfaces too -- they might be up even if
116226bbf13cSYosef Etigin 	 * the parent is down.
116326bbf13cSYosef Etigin 	 */
116426bbf13cSYosef Etigin 	list_for_each_entry(cpriv, &priv->child_intfs, list)
11658b7cce0dSHaggai Eran 		__ipoib_ib_dev_flush(cpriv, level, nesting + 1);
116626bbf13cSYosef Etigin 
1167f47944ccSErez Shitrit 	up_read(&priv->vlan_rwsem);
11681da177e4SLinus Torvalds 
1169dd57c930SAlex Estrin 	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
1170dd57c930SAlex Estrin 	    level != IPOIB_FLUSH_HEAVY) {
1171492a7e67SMark Bloch 		/* Make sure the dev_addr is set even if not flushing */
1172492a7e67SMark Bloch 		if (level == IPOIB_FLUSH_LIGHT)
1173492a7e67SMark Bloch 			ipoib_dev_addr_changed_valid(priv);
11747a343d4cSLeonid Arsh 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
11751da177e4SLinus Torvalds 		return;
11767a343d4cSLeonid Arsh 	}
11777a343d4cSLeonid Arsh 
11787a343d4cSLeonid Arsh 	if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1179dd57c930SAlex Estrin 		/* interface is down. update pkey and leave. */
1180dd57c930SAlex Estrin 		if (level == IPOIB_FLUSH_HEAVY) {
1181dd57c930SAlex Estrin 			if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
1182dd57c930SAlex Estrin 				update_parent_pkey(priv);
1183dd57c930SAlex Estrin 			else
1184dd57c930SAlex Estrin 				update_child_pkey(priv);
1185492a7e67SMark Bloch 		} else if (level == IPOIB_FLUSH_LIGHT)
1186492a7e67SMark Bloch 			ipoib_dev_addr_changed_valid(priv);
11877a343d4cSLeonid Arsh 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
11887a343d4cSLeonid Arsh 		return;
11897a343d4cSLeonid Arsh 	}
11901da177e4SLinus Torvalds 
1191ee1e2c82SMoni Shoua 	if (level == IPOIB_FLUSH_HEAVY) {
1192c2904141SErez Shitrit 		/* child devices chase their origin pkey value, while non-child
1193c2904141SErez Shitrit 		 * (parent) devices should always takes what present in pkey index 0
1194c2904141SErez Shitrit 		 */
1195c2904141SErez Shitrit 		if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
1196dd57c930SAlex Estrin 			result = update_child_pkey(priv);
1197dd57c930SAlex Estrin 			if (result) {
119826bbf13cSYosef Etigin 				/* restart QP only if P_Key index is changed */
119926bbf13cSYosef Etigin 				ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
120026bbf13cSYosef Etigin 				return;
120126bbf13cSYosef Etigin 			}
1202dd57c930SAlex Estrin 
1203c2904141SErez Shitrit 		} else {
1204c2904141SErez Shitrit 			result = update_parent_pkey(priv);
1205c2904141SErez Shitrit 			/* restart QP only if P_Key value changed */
1206c2904141SErez Shitrit 			if (result) {
1207c2904141SErez Shitrit 				ipoib_dbg(priv, "Not flushing - P_Key value not changed.\n");
1208c2904141SErez Shitrit 				return;
1209c2904141SErez Shitrit 			}
1210c2904141SErez Shitrit 		}
121126bbf13cSYosef Etigin 	}
121226bbf13cSYosef Etigin 
1213ee1e2c82SMoni Shoua 	if (level == IPOIB_FLUSH_LIGHT) {
1214344baccaSAlex Vesker 		int oper_up;
1215ee1e2c82SMoni Shoua 		ipoib_mark_paths_invalid(dev);
1216344baccaSAlex Vesker 		/* Set IPoIB operation as down to prevent races between:
1217344baccaSAlex Vesker 		 * the flush flow which leaves MCG and on the fly joins
1218344baccaSAlex Vesker 		 * which can happen during that time. mcast restart task
1219344baccaSAlex Vesker 		 * should deal with join requests we missed.
1220344baccaSAlex Vesker 		 */
1221344baccaSAlex Vesker 		oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
1222ee1e2c82SMoni Shoua 		ipoib_mcast_dev_flush(dev);
1223344baccaSAlex Vesker 		if (oper_up)
1224344baccaSAlex Vesker 			set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
122565936bf2SJason Gunthorpe 		ipoib_reap_dead_ahs(priv);
1226ee1e2c82SMoni Shoua 	}
12271da177e4SLinus Torvalds 
1228ee1e2c82SMoni Shoua 	if (level >= IPOIB_FLUSH_NORMAL)
1229efc82eeeSDoug Ledford 		ipoib_ib_dev_down(dev);
12301da177e4SLinus Torvalds 
1231ee1e2c82SMoni Shoua 	if (level == IPOIB_FLUSH_HEAVY) {
1232dd57c930SAlex Estrin 		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1233efc82eeeSDoug Ledford 			ipoib_ib_dev_stop(dev);
1234b4b678b0SAlex Vesker 
12351f80bd6aSAlex Vesker 		if (ipoib_ib_dev_open(dev))
1236dd57c930SAlex Estrin 			return;
1237b4b678b0SAlex Vesker 
1238dd57c930SAlex Estrin 		if (netif_queue_stopped(dev))
1239dd57c930SAlex Estrin 			netif_start_queue(dev);
124026bbf13cSYosef Etigin 	}
124126bbf13cSYosef Etigin 
12421da177e4SLinus Torvalds 	/*
12431da177e4SLinus Torvalds 	 * The device could have been brought down between the start and when
12441da177e4SLinus Torvalds 	 * we get here, don't bring it back up if it's not configured up
12451da177e4SLinus Torvalds 	 */
12465ccd0255SEli Cohen 	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1247ee1e2c82SMoni Shoua 		if (level >= IPOIB_FLUSH_NORMAL)
12481da177e4SLinus Torvalds 			ipoib_ib_dev_up(dev);
1249492a7e67SMark Bloch 		if (ipoib_dev_addr_changed_valid(priv))
1250c4028958SDavid Howells 			ipoib_mcast_restart_task(&priv->restart_task);
12515ccd0255SEli Cohen 	}
125226bbf13cSYosef Etigin }
12531da177e4SLinus Torvalds 
ipoib_ib_dev_flush_light(struct work_struct * work)1254ee1e2c82SMoni Shoua void ipoib_ib_dev_flush_light(struct work_struct *work)
125526bbf13cSYosef Etigin {
125626bbf13cSYosef Etigin 	struct ipoib_dev_priv *priv =
1257ee1e2c82SMoni Shoua 		container_of(work, struct ipoib_dev_priv, flush_light);
12584f71055aSMichael S. Tsirkin 
12598b7cce0dSHaggai Eran 	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0);
126026bbf13cSYosef Etigin }
12614f71055aSMichael S. Tsirkin 
ipoib_ib_dev_flush_normal(struct work_struct * work)1262ee1e2c82SMoni Shoua void ipoib_ib_dev_flush_normal(struct work_struct *work)
126326bbf13cSYosef Etigin {
126426bbf13cSYosef Etigin 	struct ipoib_dev_priv *priv =
1265ee1e2c82SMoni Shoua 		container_of(work, struct ipoib_dev_priv, flush_normal);
126626bbf13cSYosef Etigin 
12678b7cce0dSHaggai Eran 	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0);
1268ee1e2c82SMoni Shoua }
1269ee1e2c82SMoni Shoua 
ipoib_ib_dev_flush_heavy(struct work_struct * work)1270ee1e2c82SMoni Shoua void ipoib_ib_dev_flush_heavy(struct work_struct *work)
1271ee1e2c82SMoni Shoua {
1272ee1e2c82SMoni Shoua 	struct ipoib_dev_priv *priv =
1273ee1e2c82SMoni Shoua 		container_of(work, struct ipoib_dev_priv, flush_heavy);
1274ee1e2c82SMoni Shoua 
12751f80bd6aSAlex Vesker 	rtnl_lock();
12768b7cce0dSHaggai Eran 	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
12771f80bd6aSAlex Vesker 	rtnl_unlock();
12781da177e4SLinus Torvalds }
12791da177e4SLinus Torvalds 
ipoib_ib_dev_cleanup(struct net_device * dev)12801da177e4SLinus Torvalds void ipoib_ib_dev_cleanup(struct net_device *dev)
12811da177e4SLinus Torvalds {
1282c1048affSErez Shitrit 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
12831da177e4SLinus Torvalds 
12841da177e4SLinus Torvalds 	ipoib_dbg(priv, "cleaning up ib_dev\n");
1285a39c52abSErez Shitrit 	/*
1286a39c52abSErez Shitrit 	 * We must make sure there are no more (path) completions
1287a39c52abSErez Shitrit 	 * that may wish to touch priv fields that are no longer valid
1288a39c52abSErez Shitrit 	 */
1289a39c52abSErez Shitrit 	ipoib_flush_paths(dev);
12901da177e4SLinus Torvalds 
1291efc82eeeSDoug Ledford 	ipoib_mcast_stop_thread(dev);
1292988bd503SEli Cohen 	ipoib_mcast_dev_flush(dev);
12931da177e4SLinus Torvalds 
1294e135106fSDoug Ledford 	/*
1295e135106fSDoug Ledford 	 * All of our ah references aren't free until after
1296e135106fSDoug Ledford 	 * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
1297e135106fSDoug Ledford 	 * the neighbor garbage collection is stopped and reaped.
1298e135106fSDoug Ledford 	 * That should all be done now, so make a final ah flush.
1299e135106fSDoug Ledford 	 */
130065936bf2SJason Gunthorpe 	ipoib_reap_dead_ahs(priv);
1301e135106fSDoug Ledford 
1302515ed4f3SErez Shitrit 	clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
13031da177e4SLinus Torvalds 
1304cd565b4bSErez Shitrit 	priv->rn_ops->ndo_uninit(dev);
1305515ed4f3SErez Shitrit 
1306515ed4f3SErez Shitrit 	if (priv->pd) {
1307515ed4f3SErez Shitrit 		ib_dealloc_pd(priv->pd);
1308515ed4f3SErez Shitrit 		priv->pd = NULL;
1309515ed4f3SErez Shitrit 	}
1310515ed4f3SErez Shitrit }
13111da177e4SLinus Torvalds 
1312