11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (c) 2004 Topspin Communications. All rights reserved. 32a1d9b7fSRoland Dreier * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 42a1d9b7fSRoland Dreier * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This software is available to you under a choice of one of two 71da177e4SLinus Torvalds * licenses. You may choose to be licensed under the terms of the GNU 81da177e4SLinus Torvalds * General Public License (GPL) Version 2, available from the file 91da177e4SLinus Torvalds * COPYING in the main directory of this source tree, or the 101da177e4SLinus Torvalds * OpenIB.org BSD license below: 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Redistribution and use in source and binary forms, with or 131da177e4SLinus Torvalds * without modification, are permitted provided that the following 141da177e4SLinus Torvalds * conditions are met: 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * - Redistributions of source code must retain the above 171da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 181da177e4SLinus Torvalds * disclaimer. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * - Redistributions in binary form must reproduce the above 211da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 221da177e4SLinus Torvalds * disclaimer in the documentation and/or other materials 231da177e4SLinus Torvalds * provided with the distribution. 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 261da177e4SLinus Torvalds * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 271da177e4SLinus Torvalds * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 281da177e4SLinus Torvalds * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 291da177e4SLinus Torvalds * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 301da177e4SLinus Torvalds * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 311da177e4SLinus Torvalds * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 321da177e4SLinus Torvalds * SOFTWARE. 331da177e4SLinus Torvalds * 341da177e4SLinus Torvalds * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds #include "ipoib.h" 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds #include <linux/version.h> 401da177e4SLinus Torvalds #include <linux/module.h> 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds #include <linux/init.h> 431da177e4SLinus Torvalds #include <linux/slab.h> 441da177e4SLinus Torvalds #include <linux/vmalloc.h> 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds #include <linux/if_arp.h> /* For ARPHRD_xxx */ 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds #include <linux/ip.h> 491da177e4SLinus Torvalds #include <linux/in.h> 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds MODULE_AUTHOR("Roland Dreier"); 521da177e4SLinus Torvalds MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 531da177e4SLinus Torvalds MODULE_LICENSE("Dual BSD/GPL"); 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 561da177e4SLinus Torvalds int ipoib_debug_level; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds module_param_named(debug_level, ipoib_debug_level, int, 0644); 591da177e4SLinus Torvalds MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 601da177e4SLinus Torvalds #endif 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds static const u8 ipv4_bcast_addr[] = { 631da177e4SLinus Torvalds 0x00, 0xff, 0xff, 0xff, 641da177e4SLinus Torvalds 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 651da177e4SLinus Torvalds 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff 661da177e4SLinus Torvalds }; 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds struct workqueue_struct *ipoib_workqueue; 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds static void ipoib_add_one(struct ib_device *device); 711da177e4SLinus Torvalds static void ipoib_remove_one(struct ib_device *device); 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds static struct ib_client ipoib_client = { 741da177e4SLinus Torvalds .name = "ipoib", 751da177e4SLinus Torvalds .add = ipoib_add_one, 761da177e4SLinus Torvalds .remove = ipoib_remove_one 771da177e4SLinus Torvalds }; 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds int ipoib_open(struct net_device *dev) 801da177e4SLinus Torvalds { 811da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 821da177e4SLinus Torvalds 831da177e4SLinus Torvalds ipoib_dbg(priv, "bringing up interface\n"); 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds if (ipoib_pkey_dev_delay_open(dev)) 881da177e4SLinus Torvalds return 0; 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds if (ipoib_ib_dev_open(dev)) 911da177e4SLinus Torvalds return -EINVAL; 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds if (ipoib_ib_dev_up(dev)) 941da177e4SLinus Torvalds return -EINVAL; 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 971da177e4SLinus Torvalds struct ipoib_dev_priv *cpriv; 981da177e4SLinus Torvalds 991da177e4SLinus Torvalds /* Bring up any child interfaces too */ 1001da177e4SLinus Torvalds down(&priv->vlan_mutex); 1011da177e4SLinus Torvalds list_for_each_entry(cpriv, &priv->child_intfs, list) { 1021da177e4SLinus Torvalds int flags; 1031da177e4SLinus Torvalds 1041da177e4SLinus Torvalds flags = cpriv->dev->flags; 1051da177e4SLinus Torvalds if (flags & IFF_UP) 1061da177e4SLinus Torvalds continue; 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds dev_change_flags(cpriv->dev, flags | IFF_UP); 1091da177e4SLinus Torvalds } 1101da177e4SLinus Torvalds up(&priv->vlan_mutex); 1111da177e4SLinus Torvalds } 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds netif_start_queue(dev); 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds return 0; 1161da177e4SLinus Torvalds } 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds static int ipoib_stop(struct net_device *dev) 1191da177e4SLinus Torvalds { 1201da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds ipoib_dbg(priv, "stopping interface\n"); 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds netif_stop_queue(dev); 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds ipoib_ib_dev_down(dev); 1291da177e4SLinus Torvalds ipoib_ib_dev_stop(dev); 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 1321da177e4SLinus Torvalds struct ipoib_dev_priv *cpriv; 1331da177e4SLinus Torvalds 1341da177e4SLinus Torvalds /* Bring down any child interfaces too */ 1351da177e4SLinus Torvalds down(&priv->vlan_mutex); 1361da177e4SLinus Torvalds list_for_each_entry(cpriv, &priv->child_intfs, list) { 1371da177e4SLinus Torvalds int flags; 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds flags = cpriv->dev->flags; 1401da177e4SLinus Torvalds if (!(flags & IFF_UP)) 1411da177e4SLinus Torvalds continue; 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds dev_change_flags(cpriv->dev, flags & ~IFF_UP); 1441da177e4SLinus Torvalds } 1451da177e4SLinus Torvalds up(&priv->vlan_mutex); 1461da177e4SLinus Torvalds } 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds return 0; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds static int ipoib_change_mtu(struct net_device *dev, int new_mtu) 1521da177e4SLinus Torvalds { 1531da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) 1561da177e4SLinus Torvalds return -EINVAL; 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds priv->admin_mtu = new_mtu; 1591da177e4SLinus Torvalds 1601da177e4SLinus Torvalds dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 1611da177e4SLinus Torvalds 1621da177e4SLinus Torvalds return 0; 1631da177e4SLinus Torvalds } 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds static struct ipoib_path *__path_find(struct net_device *dev, 1661da177e4SLinus Torvalds union ib_gid *gid) 1671da177e4SLinus Torvalds { 1681da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 1691da177e4SLinus Torvalds struct rb_node *n = priv->path_tree.rb_node; 1701da177e4SLinus Torvalds struct ipoib_path *path; 1711da177e4SLinus Torvalds int ret; 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds while (n) { 1741da177e4SLinus Torvalds path = rb_entry(n, struct ipoib_path, rb_node); 1751da177e4SLinus Torvalds 1761da177e4SLinus Torvalds ret = memcmp(gid->raw, path->pathrec.dgid.raw, 1771da177e4SLinus Torvalds sizeof (union ib_gid)); 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds if (ret < 0) 1801da177e4SLinus Torvalds n = n->rb_left; 1811da177e4SLinus Torvalds else if (ret > 0) 1821da177e4SLinus Torvalds n = n->rb_right; 1831da177e4SLinus Torvalds else 1841da177e4SLinus Torvalds return path; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 1871da177e4SLinus Torvalds return NULL; 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds static int __path_add(struct net_device *dev, struct ipoib_path *path) 1911da177e4SLinus Torvalds { 1921da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 1931da177e4SLinus Torvalds struct rb_node **n = &priv->path_tree.rb_node; 1941da177e4SLinus Torvalds struct rb_node *pn = NULL; 1951da177e4SLinus Torvalds struct ipoib_path *tpath; 1961da177e4SLinus Torvalds int ret; 1971da177e4SLinus Torvalds 1981da177e4SLinus Torvalds while (*n) { 1991da177e4SLinus Torvalds pn = *n; 2001da177e4SLinus Torvalds tpath = rb_entry(pn, struct ipoib_path, rb_node); 2011da177e4SLinus Torvalds 2021da177e4SLinus Torvalds ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, 2031da177e4SLinus Torvalds sizeof (union ib_gid)); 2041da177e4SLinus Torvalds if (ret < 0) 2051da177e4SLinus Torvalds n = &pn->rb_left; 2061da177e4SLinus Torvalds else if (ret > 0) 2071da177e4SLinus Torvalds n = &pn->rb_right; 2081da177e4SLinus Torvalds else 2091da177e4SLinus Torvalds return -EEXIST; 2101da177e4SLinus Torvalds } 2111da177e4SLinus Torvalds 2121da177e4SLinus Torvalds rb_link_node(&path->rb_node, pn, n); 2131da177e4SLinus Torvalds rb_insert_color(&path->rb_node, &priv->path_tree); 2141da177e4SLinus Torvalds 2151da177e4SLinus Torvalds list_add_tail(&path->list, &priv->path_list); 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds return 0; 2181da177e4SLinus Torvalds } 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds static void path_free(struct net_device *dev, struct ipoib_path *path) 2211da177e4SLinus Torvalds { 2221da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 2231da177e4SLinus Torvalds struct ipoib_neigh *neigh, *tn; 2241da177e4SLinus Torvalds struct sk_buff *skb; 2251da177e4SLinus Torvalds unsigned long flags; 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds while ((skb = __skb_dequeue(&path->queue))) 2281da177e4SLinus Torvalds dev_kfree_skb_irq(skb); 2291da177e4SLinus Torvalds 2301da177e4SLinus Torvalds spin_lock_irqsave(&priv->lock, flags); 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 2331da177e4SLinus Torvalds /* 2341da177e4SLinus Torvalds * It's safe to call ipoib_put_ah() inside priv->lock 2351da177e4SLinus Torvalds * here, because we know that path->ah will always 2361da177e4SLinus Torvalds * hold one more reference, so ipoib_put_ah() will 2371da177e4SLinus Torvalds * never do more than decrement the ref count. 2381da177e4SLinus Torvalds */ 2391da177e4SLinus Torvalds if (neigh->ah) 2401da177e4SLinus Torvalds ipoib_put_ah(neigh->ah); 2411da177e4SLinus Torvalds *to_ipoib_neigh(neigh->neighbour) = NULL; 2421da177e4SLinus Torvalds neigh->neighbour->ops->destructor = NULL; 2431da177e4SLinus Torvalds kfree(neigh); 2441da177e4SLinus Torvalds } 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->lock, flags); 2471da177e4SLinus Torvalds 2481da177e4SLinus Torvalds if (path->ah) 2491da177e4SLinus Torvalds ipoib_put_ah(path->ah); 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds kfree(path); 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds void ipoib_flush_paths(struct net_device *dev) 2551da177e4SLinus Torvalds { 2561da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 2571da177e4SLinus Torvalds struct ipoib_path *path, *tp; 2581da177e4SLinus Torvalds LIST_HEAD(remove_list); 2591da177e4SLinus Torvalds unsigned long flags; 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds spin_lock_irqsave(&priv->lock, flags); 2621da177e4SLinus Torvalds 2631da177e4SLinus Torvalds list_splice(&priv->path_list, &remove_list); 2641da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->path_list); 2651da177e4SLinus Torvalds 2661da177e4SLinus Torvalds list_for_each_entry(path, &remove_list, list) 2671da177e4SLinus Torvalds rb_erase(&path->rb_node, &priv->path_tree); 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->lock, flags); 2701da177e4SLinus Torvalds 2711da177e4SLinus Torvalds list_for_each_entry_safe(path, tp, &remove_list, list) { 2721da177e4SLinus Torvalds if (path->query) 2731da177e4SLinus Torvalds ib_sa_cancel_query(path->query_id, path->query); 2741da177e4SLinus Torvalds wait_for_completion(&path->done); 2751da177e4SLinus Torvalds path_free(dev, path); 2761da177e4SLinus Torvalds } 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds static void path_rec_completion(int status, 2801da177e4SLinus Torvalds struct ib_sa_path_rec *pathrec, 2811da177e4SLinus Torvalds void *path_ptr) 2821da177e4SLinus Torvalds { 2831da177e4SLinus Torvalds struct ipoib_path *path = path_ptr; 2841da177e4SLinus Torvalds struct net_device *dev = path->dev; 2851da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 2861da177e4SLinus Torvalds struct ipoib_ah *ah = NULL; 2871da177e4SLinus Torvalds struct ipoib_neigh *neigh; 2881da177e4SLinus Torvalds struct sk_buff_head skqueue; 2891da177e4SLinus Torvalds struct sk_buff *skb; 2901da177e4SLinus Torvalds unsigned long flags; 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds if (pathrec) 2931da177e4SLinus Torvalds ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n", 2941da177e4SLinus Torvalds be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid)); 2951da177e4SLinus Torvalds else 2961da177e4SLinus Torvalds ipoib_dbg(priv, "PathRec status %d for GID " IPOIB_GID_FMT "\n", 2971da177e4SLinus Torvalds status, IPOIB_GID_ARG(path->pathrec.dgid)); 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds skb_queue_head_init(&skqueue); 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds if (!status) { 3021da177e4SLinus Torvalds struct ib_ah_attr av = { 3031da177e4SLinus Torvalds .dlid = be16_to_cpu(pathrec->dlid), 3041da177e4SLinus Torvalds .sl = pathrec->sl, 3051da177e4SLinus Torvalds .port_num = priv->port 3061da177e4SLinus Torvalds }; 307e6ded99cSRoland Dreier int path_rate = ib_sa_rate_enum_to_int(pathrec->rate); 3081da177e4SLinus Torvalds 309e6ded99cSRoland Dreier if (path_rate > 0 && priv->local_rate > path_rate) 310e6ded99cSRoland Dreier av.static_rate = (priv->local_rate - 1) / path_rate; 3111da177e4SLinus Torvalds 3121da177e4SLinus Torvalds ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n", 3131da177e4SLinus Torvalds av.static_rate, priv->local_rate, 3141da177e4SLinus Torvalds ib_sa_rate_enum_to_int(pathrec->rate)); 3151da177e4SLinus Torvalds 3161da177e4SLinus Torvalds ah = ipoib_create_ah(dev, priv->pd, &av); 3171da177e4SLinus Torvalds } 3181da177e4SLinus Torvalds 3191da177e4SLinus Torvalds spin_lock_irqsave(&priv->lock, flags); 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds path->ah = ah; 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds if (ah) { 3241da177e4SLinus Torvalds path->pathrec = *pathrec; 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", 3271da177e4SLinus Torvalds ah, be16_to_cpu(pathrec->dlid), pathrec->sl); 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds while ((skb = __skb_dequeue(&path->queue))) 3301da177e4SLinus Torvalds __skb_queue_tail(&skqueue, skb); 3311da177e4SLinus Torvalds 3321da177e4SLinus Torvalds list_for_each_entry(neigh, &path->neigh_list, list) { 3331da177e4SLinus Torvalds kref_get(&path->ah->ref); 3341da177e4SLinus Torvalds neigh->ah = path->ah; 3351da177e4SLinus Torvalds 3361da177e4SLinus Torvalds while ((skb = __skb_dequeue(&neigh->queue))) 3371da177e4SLinus Torvalds __skb_queue_tail(&skqueue, skb); 3381da177e4SLinus Torvalds } 3391da177e4SLinus Torvalds } else 3401da177e4SLinus Torvalds path->query = NULL; 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds complete(&path->done); 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->lock, flags); 3451da177e4SLinus Torvalds 3461da177e4SLinus Torvalds while ((skb = __skb_dequeue(&skqueue))) { 3471da177e4SLinus Torvalds skb->dev = dev; 3481da177e4SLinus Torvalds if (dev_queue_xmit(skb)) 3491da177e4SLinus Torvalds ipoib_warn(priv, "dev_queue_xmit failed " 3501da177e4SLinus Torvalds "to requeue packet\n"); 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds 3541da177e4SLinus Torvalds static struct ipoib_path *path_rec_create(struct net_device *dev, 3551da177e4SLinus Torvalds union ib_gid *gid) 3561da177e4SLinus Torvalds { 3571da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 3581da177e4SLinus Torvalds struct ipoib_path *path; 3591da177e4SLinus Torvalds 3601da177e4SLinus Torvalds path = kmalloc(sizeof *path, GFP_ATOMIC); 3611da177e4SLinus Torvalds if (!path) 3621da177e4SLinus Torvalds return NULL; 3631da177e4SLinus Torvalds 3641da177e4SLinus Torvalds path->dev = dev; 3651da177e4SLinus Torvalds path->pathrec.dlid = 0; 3661da177e4SLinus Torvalds path->ah = NULL; 3671da177e4SLinus Torvalds 3681da177e4SLinus Torvalds skb_queue_head_init(&path->queue); 3691da177e4SLinus Torvalds 3701da177e4SLinus Torvalds INIT_LIST_HEAD(&path->neigh_list); 3711da177e4SLinus Torvalds path->query = NULL; 3721da177e4SLinus Torvalds init_completion(&path->done); 3731da177e4SLinus Torvalds 3741da177e4SLinus Torvalds memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid)); 3751da177e4SLinus Torvalds path->pathrec.sgid = priv->local_gid; 3761da177e4SLinus Torvalds path->pathrec.pkey = cpu_to_be16(priv->pkey); 3771da177e4SLinus Torvalds path->pathrec.numb_path = 1; 3781da177e4SLinus Torvalds 3791da177e4SLinus Torvalds return path; 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds 3821da177e4SLinus Torvalds static int path_rec_start(struct net_device *dev, 3831da177e4SLinus Torvalds struct ipoib_path *path) 3841da177e4SLinus Torvalds { 3851da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 3861da177e4SLinus Torvalds 3871da177e4SLinus Torvalds ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n", 3881da177e4SLinus Torvalds IPOIB_GID_ARG(path->pathrec.dgid)); 3891da177e4SLinus Torvalds 3901da177e4SLinus Torvalds path->query_id = 3911da177e4SLinus Torvalds ib_sa_path_rec_get(priv->ca, priv->port, 3921da177e4SLinus Torvalds &path->pathrec, 3931da177e4SLinus Torvalds IB_SA_PATH_REC_DGID | 3941da177e4SLinus Torvalds IB_SA_PATH_REC_SGID | 3951da177e4SLinus Torvalds IB_SA_PATH_REC_NUMB_PATH | 3961da177e4SLinus Torvalds IB_SA_PATH_REC_PKEY, 3971da177e4SLinus Torvalds 1000, GFP_ATOMIC, 3981da177e4SLinus Torvalds path_rec_completion, 3991da177e4SLinus Torvalds path, &path->query); 4001da177e4SLinus Torvalds if (path->query_id < 0) { 4011da177e4SLinus Torvalds ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); 4021da177e4SLinus Torvalds path->query = NULL; 4031da177e4SLinus Torvalds return path->query_id; 4041da177e4SLinus Torvalds } 4051da177e4SLinus Torvalds 4061da177e4SLinus Torvalds return 0; 4071da177e4SLinus Torvalds } 4081da177e4SLinus Torvalds 4091da177e4SLinus Torvalds static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) 4101da177e4SLinus Torvalds { 4111da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 4121da177e4SLinus Torvalds struct ipoib_path *path; 4131da177e4SLinus Torvalds struct ipoib_neigh *neigh; 4141da177e4SLinus Torvalds 4151da177e4SLinus Torvalds neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 4161da177e4SLinus Torvalds if (!neigh) { 4171da177e4SLinus Torvalds ++priv->stats.tx_dropped; 4181da177e4SLinus Torvalds dev_kfree_skb_any(skb); 4191da177e4SLinus Torvalds return; 4201da177e4SLinus Torvalds } 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds skb_queue_head_init(&neigh->queue); 4231da177e4SLinus Torvalds neigh->neighbour = skb->dst->neighbour; 4241da177e4SLinus Torvalds *to_ipoib_neigh(skb->dst->neighbour) = neigh; 4251da177e4SLinus Torvalds 4261da177e4SLinus Torvalds /* 4271da177e4SLinus Torvalds * We can only be called from ipoib_start_xmit, so we're 4281da177e4SLinus Torvalds * inside tx_lock -- no need to save/restore flags. 4291da177e4SLinus Torvalds */ 4301da177e4SLinus Torvalds spin_lock(&priv->lock); 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4)); 4331da177e4SLinus Torvalds if (!path) { 4341da177e4SLinus Torvalds path = path_rec_create(dev, 4351da177e4SLinus Torvalds (union ib_gid *) (skb->dst->neighbour->ha + 4)); 4361da177e4SLinus Torvalds if (!path) 4371da177e4SLinus Torvalds goto err; 4381da177e4SLinus Torvalds 4391da177e4SLinus Torvalds __path_add(dev, path); 4401da177e4SLinus Torvalds } 4411da177e4SLinus Torvalds 4421da177e4SLinus Torvalds list_add_tail(&neigh->list, &path->neigh_list); 4431da177e4SLinus Torvalds 4441da177e4SLinus Torvalds if (path->pathrec.dlid) { 4451da177e4SLinus Torvalds kref_get(&path->ah->ref); 4461da177e4SLinus Torvalds neigh->ah = path->ah; 4471da177e4SLinus Torvalds 4481da177e4SLinus Torvalds ipoib_send(dev, skb, path->ah, 4491da177e4SLinus Torvalds be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); 4501da177e4SLinus Torvalds } else { 4511da177e4SLinus Torvalds neigh->ah = NULL; 4521da177e4SLinus Torvalds if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 4531da177e4SLinus Torvalds __skb_queue_tail(&neigh->queue, skb); 4541da177e4SLinus Torvalds } else { 4551da177e4SLinus Torvalds ++priv->stats.tx_dropped; 4561da177e4SLinus Torvalds dev_kfree_skb_any(skb); 4571da177e4SLinus Torvalds } 4581da177e4SLinus Torvalds 4591da177e4SLinus Torvalds if (!path->query && path_rec_start(dev, path)) 4601da177e4SLinus Torvalds goto err; 4611da177e4SLinus Torvalds } 4621da177e4SLinus Torvalds 4631da177e4SLinus Torvalds spin_unlock(&priv->lock); 4641da177e4SLinus Torvalds return; 4651da177e4SLinus Torvalds 4661da177e4SLinus Torvalds err: 4671da177e4SLinus Torvalds *to_ipoib_neigh(skb->dst->neighbour) = NULL; 4681da177e4SLinus Torvalds list_del(&neigh->list); 4691da177e4SLinus Torvalds neigh->neighbour->ops->destructor = NULL; 4701da177e4SLinus Torvalds kfree(neigh); 4711da177e4SLinus Torvalds 4721da177e4SLinus Torvalds ++priv->stats.tx_dropped; 4731da177e4SLinus Torvalds dev_kfree_skb_any(skb); 4741da177e4SLinus Torvalds 4751da177e4SLinus Torvalds spin_unlock(&priv->lock); 4761da177e4SLinus Torvalds } 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds static void path_lookup(struct sk_buff *skb, struct net_device *dev) 4791da177e4SLinus Torvalds { 4801da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(skb->dev); 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds /* Look up path record for unicasts */ 4831da177e4SLinus Torvalds if (skb->dst->neighbour->ha[4] != 0xff) { 4841da177e4SLinus Torvalds neigh_add_path(skb, dev); 4851da177e4SLinus Torvalds return; 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds 4881da177e4SLinus Torvalds /* Add in the P_Key for multicasts */ 4891da177e4SLinus Torvalds skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; 4901da177e4SLinus Torvalds skb->dst->neighbour->ha[9] = priv->pkey & 0xff; 4911da177e4SLinus Torvalds ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb); 4921da177e4SLinus Torvalds } 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 4951da177e4SLinus Torvalds struct ipoib_pseudoheader *phdr) 4961da177e4SLinus Torvalds { 4971da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 4981da177e4SLinus Torvalds struct ipoib_path *path; 4991da177e4SLinus Torvalds 5001da177e4SLinus Torvalds /* 5011da177e4SLinus Torvalds * We can only be called from ipoib_start_xmit, so we're 5021da177e4SLinus Torvalds * inside tx_lock -- no need to save/restore flags. 5031da177e4SLinus Torvalds */ 5041da177e4SLinus Torvalds spin_lock(&priv->lock); 5051da177e4SLinus Torvalds 5061da177e4SLinus Torvalds path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4)); 5071da177e4SLinus Torvalds if (!path) { 5081da177e4SLinus Torvalds path = path_rec_create(dev, 5091da177e4SLinus Torvalds (union ib_gid *) (phdr->hwaddr + 4)); 5101da177e4SLinus Torvalds if (path) { 5111da177e4SLinus Torvalds /* put pseudoheader back on for next time */ 5121da177e4SLinus Torvalds skb_push(skb, sizeof *phdr); 5131da177e4SLinus Torvalds __skb_queue_tail(&path->queue, skb); 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds if (path_rec_start(dev, path)) { 5161da177e4SLinus Torvalds spin_unlock(&priv->lock); 5171da177e4SLinus Torvalds path_free(dev, path); 5181da177e4SLinus Torvalds return; 5191da177e4SLinus Torvalds } else 5201da177e4SLinus Torvalds __path_add(dev, path); 5211da177e4SLinus Torvalds } else { 5221da177e4SLinus Torvalds ++priv->stats.tx_dropped; 5231da177e4SLinus Torvalds dev_kfree_skb_any(skb); 5241da177e4SLinus Torvalds } 5251da177e4SLinus Torvalds 5261da177e4SLinus Torvalds spin_unlock(&priv->lock); 5271da177e4SLinus Torvalds return; 5281da177e4SLinus Torvalds } 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds if (path->pathrec.dlid) { 5311da177e4SLinus Torvalds ipoib_dbg(priv, "Send unicast ARP to %04x\n", 5321da177e4SLinus Torvalds be16_to_cpu(path->pathrec.dlid)); 5331da177e4SLinus Torvalds 5341da177e4SLinus Torvalds ipoib_send(dev, skb, path->ah, 5351da177e4SLinus Torvalds be32_to_cpup((__be32 *) phdr->hwaddr)); 5361da177e4SLinus Torvalds } else if ((path->query || !path_rec_start(dev, path)) && 5371da177e4SLinus Torvalds skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 5381da177e4SLinus Torvalds /* put pseudoheader back on for next time */ 5391da177e4SLinus Torvalds skb_push(skb, sizeof *phdr); 5401da177e4SLinus Torvalds __skb_queue_tail(&path->queue, skb); 5411da177e4SLinus Torvalds } else { 5421da177e4SLinus Torvalds ++priv->stats.tx_dropped; 5431da177e4SLinus Torvalds dev_kfree_skb_any(skb); 5441da177e4SLinus Torvalds } 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds spin_unlock(&priv->lock); 5471da177e4SLinus Torvalds } 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) 5501da177e4SLinus Torvalds { 5511da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 5521da177e4SLinus Torvalds struct ipoib_neigh *neigh; 5531da177e4SLinus Torvalds unsigned long flags; 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds local_irq_save(flags); 5561da177e4SLinus Torvalds if (!spin_trylock(&priv->tx_lock)) { 5571da177e4SLinus Torvalds local_irq_restore(flags); 5581da177e4SLinus Torvalds return NETDEV_TX_LOCKED; 5591da177e4SLinus Torvalds } 5601da177e4SLinus Torvalds 5611da177e4SLinus Torvalds /* 5621da177e4SLinus Torvalds * Check if our queue is stopped. Since we have the LLTX bit 5631da177e4SLinus Torvalds * set, we can't rely on netif_stop_queue() preventing our 5641da177e4SLinus Torvalds * xmit function from being called with a full queue. 5651da177e4SLinus Torvalds */ 5661da177e4SLinus Torvalds if (unlikely(netif_queue_stopped(dev))) { 5671da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->tx_lock, flags); 5681da177e4SLinus Torvalds return NETDEV_TX_BUSY; 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5711da177e4SLinus Torvalds if (skb->dst && skb->dst->neighbour) { 5721da177e4SLinus Torvalds if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { 5731da177e4SLinus Torvalds path_lookup(skb, dev); 5741da177e4SLinus Torvalds goto out; 5751da177e4SLinus Torvalds } 5761da177e4SLinus Torvalds 5771da177e4SLinus Torvalds neigh = *to_ipoib_neigh(skb->dst->neighbour); 5781da177e4SLinus Torvalds 5791da177e4SLinus Torvalds if (likely(neigh->ah)) { 5801da177e4SLinus Torvalds ipoib_send(dev, skb, neigh->ah, 5811da177e4SLinus Torvalds be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); 5821da177e4SLinus Torvalds goto out; 5831da177e4SLinus Torvalds } 5841da177e4SLinus Torvalds 5851da177e4SLinus Torvalds if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 5861da177e4SLinus Torvalds spin_lock(&priv->lock); 5871da177e4SLinus Torvalds __skb_queue_tail(&neigh->queue, skb); 5881da177e4SLinus Torvalds spin_unlock(&priv->lock); 5891da177e4SLinus Torvalds } else { 5901da177e4SLinus Torvalds ++priv->stats.tx_dropped; 5911da177e4SLinus Torvalds dev_kfree_skb_any(skb); 5921da177e4SLinus Torvalds } 5931da177e4SLinus Torvalds } else { 5941da177e4SLinus Torvalds struct ipoib_pseudoheader *phdr = 5951da177e4SLinus Torvalds (struct ipoib_pseudoheader *) skb->data; 5961da177e4SLinus Torvalds skb_pull(skb, sizeof *phdr); 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds if (phdr->hwaddr[4] == 0xff) { 5991da177e4SLinus Torvalds /* Add in the P_Key for multicast*/ 6001da177e4SLinus Torvalds phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; 6011da177e4SLinus Torvalds phdr->hwaddr[9] = priv->pkey & 0xff; 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); 6041da177e4SLinus Torvalds } else { 6050dca0f7bSHal Rosenstock /* unicast GID -- should be ARP or RARP reply */ 6061da177e4SLinus Torvalds 6070dca0f7bSHal Rosenstock if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && 6080dca0f7bSHal Rosenstock (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { 6091da177e4SLinus Torvalds ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " 6101da177e4SLinus Torvalds IPOIB_GID_FMT "\n", 6111da177e4SLinus Torvalds skb->dst ? "neigh" : "dst", 6121da177e4SLinus Torvalds be16_to_cpup((u16 *) skb->data), 6131da177e4SLinus Torvalds be32_to_cpup((u32 *) phdr->hwaddr), 6141da177e4SLinus Torvalds IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); 6151da177e4SLinus Torvalds dev_kfree_skb_any(skb); 6161da177e4SLinus Torvalds ++priv->stats.tx_dropped; 6171da177e4SLinus Torvalds goto out; 6181da177e4SLinus Torvalds } 6191da177e4SLinus Torvalds 6201da177e4SLinus Torvalds unicast_arp_send(skb, dev, phdr); 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds } 6231da177e4SLinus Torvalds 6241da177e4SLinus Torvalds out: 6251da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->tx_lock, flags); 6261da177e4SLinus Torvalds 6271da177e4SLinus Torvalds return NETDEV_TX_OK; 6281da177e4SLinus Torvalds } 6291da177e4SLinus Torvalds 6301da177e4SLinus Torvalds static struct net_device_stats *ipoib_get_stats(struct net_device *dev) 6311da177e4SLinus Torvalds { 6321da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 6331da177e4SLinus Torvalds 6341da177e4SLinus Torvalds return &priv->stats; 6351da177e4SLinus Torvalds } 6361da177e4SLinus Torvalds 6371da177e4SLinus Torvalds static void ipoib_timeout(struct net_device *dev) 6381da177e4SLinus Torvalds { 6391da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 6401da177e4SLinus Torvalds 6411da177e4SLinus Torvalds ipoib_warn(priv, "transmit timeout: latency %ld\n", 6421da177e4SLinus Torvalds jiffies - dev->trans_start); 6431da177e4SLinus Torvalds /* XXX reset QP, etc. */ 6441da177e4SLinus Torvalds } 6451da177e4SLinus Torvalds 6461da177e4SLinus Torvalds static int ipoib_hard_header(struct sk_buff *skb, 6471da177e4SLinus Torvalds struct net_device *dev, 6481da177e4SLinus Torvalds unsigned short type, 6491da177e4SLinus Torvalds void *daddr, void *saddr, unsigned len) 6501da177e4SLinus Torvalds { 6511da177e4SLinus Torvalds struct ipoib_header *header; 6521da177e4SLinus Torvalds 6531da177e4SLinus Torvalds header = (struct ipoib_header *) skb_push(skb, sizeof *header); 6541da177e4SLinus Torvalds 6551da177e4SLinus Torvalds header->proto = htons(type); 6561da177e4SLinus Torvalds header->reserved = 0; 6571da177e4SLinus Torvalds 6581da177e4SLinus Torvalds /* 6591da177e4SLinus Torvalds * If we don't have a neighbour structure, stuff the 6601da177e4SLinus Torvalds * destination address onto the front of the skb so we can 6611da177e4SLinus Torvalds * figure out where to send the packet later. 6621da177e4SLinus Torvalds */ 6631da177e4SLinus Torvalds if (!skb->dst || !skb->dst->neighbour) { 6641da177e4SLinus Torvalds struct ipoib_pseudoheader *phdr = 6651da177e4SLinus Torvalds (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); 6661da177e4SLinus Torvalds memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); 6671da177e4SLinus Torvalds } 6681da177e4SLinus Torvalds 6691da177e4SLinus Torvalds return 0; 6701da177e4SLinus Torvalds } 6711da177e4SLinus Torvalds 6721da177e4SLinus Torvalds static void ipoib_set_mcast_list(struct net_device *dev) 6731da177e4SLinus Torvalds { 6741da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 6751da177e4SLinus Torvalds 6761da177e4SLinus Torvalds schedule_work(&priv->restart_task); 6771da177e4SLinus Torvalds } 6781da177e4SLinus Torvalds 6791da177e4SLinus Torvalds static void ipoib_neigh_destructor(struct neighbour *n) 6801da177e4SLinus Torvalds { 6811da177e4SLinus Torvalds struct ipoib_neigh *neigh; 6821da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(n->dev); 6831da177e4SLinus Torvalds unsigned long flags; 6841da177e4SLinus Torvalds struct ipoib_ah *ah = NULL; 6851da177e4SLinus Torvalds 6861da177e4SLinus Torvalds ipoib_dbg(priv, 6871da177e4SLinus Torvalds "neigh_destructor for %06x " IPOIB_GID_FMT "\n", 6881da177e4SLinus Torvalds be32_to_cpup((__be32 *) n->ha), 6891da177e4SLinus Torvalds IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4)))); 6901da177e4SLinus Torvalds 6911da177e4SLinus Torvalds spin_lock_irqsave(&priv->lock, flags); 6921da177e4SLinus Torvalds 6931da177e4SLinus Torvalds neigh = *to_ipoib_neigh(n); 6941da177e4SLinus Torvalds if (neigh) { 6951da177e4SLinus Torvalds if (neigh->ah) 6961da177e4SLinus Torvalds ah = neigh->ah; 6971da177e4SLinus Torvalds list_del(&neigh->list); 6981da177e4SLinus Torvalds *to_ipoib_neigh(n) = NULL; 6991da177e4SLinus Torvalds kfree(neigh); 7001da177e4SLinus Torvalds } 7011da177e4SLinus Torvalds 7021da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->lock, flags); 7031da177e4SLinus Torvalds 7041da177e4SLinus Torvalds if (ah) 7051da177e4SLinus Torvalds ipoib_put_ah(ah); 7061da177e4SLinus Torvalds } 7071da177e4SLinus Torvalds 7081da177e4SLinus Torvalds static int ipoib_neigh_setup(struct neighbour *neigh) 7091da177e4SLinus Torvalds { 7101da177e4SLinus Torvalds /* 7111da177e4SLinus Torvalds * Is this kosher? I can't find anybody in the kernel that 7121da177e4SLinus Torvalds * sets neigh->destructor, so we should be able to set it here 7131da177e4SLinus Torvalds * without trouble. 7141da177e4SLinus Torvalds */ 7151da177e4SLinus Torvalds neigh->ops->destructor = ipoib_neigh_destructor; 7161da177e4SLinus Torvalds 7171da177e4SLinus Torvalds return 0; 7181da177e4SLinus Torvalds } 7191da177e4SLinus Torvalds 7201da177e4SLinus Torvalds static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) 7211da177e4SLinus Torvalds { 7221da177e4SLinus Torvalds parms->neigh_setup = ipoib_neigh_setup; 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds return 0; 7251da177e4SLinus Torvalds } 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 7281da177e4SLinus Torvalds { 7291da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds /* Allocate RX/TX "rings" to hold queued skbs */ 7321da177e4SLinus Torvalds 7331da177e4SLinus Torvalds priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), 7341da177e4SLinus Torvalds GFP_KERNEL); 7351da177e4SLinus Torvalds if (!priv->rx_ring) { 7361da177e4SLinus Torvalds printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 7371da177e4SLinus Torvalds ca->name, IPOIB_RX_RING_SIZE); 7381da177e4SLinus Torvalds goto out; 7391da177e4SLinus Torvalds } 7401da177e4SLinus Torvalds memset(priv->rx_ring, 0, 7411da177e4SLinus Torvalds IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); 7421da177e4SLinus Torvalds 7431da177e4SLinus Torvalds priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), 7441da177e4SLinus Torvalds GFP_KERNEL); 7451da177e4SLinus Torvalds if (!priv->tx_ring) { 7461da177e4SLinus Torvalds printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 7471da177e4SLinus Torvalds ca->name, IPOIB_TX_RING_SIZE); 7481da177e4SLinus Torvalds goto out_rx_ring_cleanup; 7491da177e4SLinus Torvalds } 7501da177e4SLinus Torvalds memset(priv->tx_ring, 0, 7511da177e4SLinus Torvalds IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); 7521da177e4SLinus Torvalds 7531da177e4SLinus Torvalds /* priv->tx_head & tx_tail are already 0 */ 7541da177e4SLinus Torvalds 7551da177e4SLinus Torvalds if (ipoib_ib_dev_init(dev, ca, port)) 7561da177e4SLinus Torvalds goto out_tx_ring_cleanup; 7571da177e4SLinus Torvalds 7581da177e4SLinus Torvalds return 0; 7591da177e4SLinus Torvalds 7601da177e4SLinus Torvalds out_tx_ring_cleanup: 7611da177e4SLinus Torvalds kfree(priv->tx_ring); 7621da177e4SLinus Torvalds 7631da177e4SLinus Torvalds out_rx_ring_cleanup: 7641da177e4SLinus Torvalds kfree(priv->rx_ring); 7651da177e4SLinus Torvalds 7661da177e4SLinus Torvalds out: 7671da177e4SLinus Torvalds return -ENOMEM; 7681da177e4SLinus Torvalds } 7691da177e4SLinus Torvalds 7701da177e4SLinus Torvalds void ipoib_dev_cleanup(struct net_device *dev) 7711da177e4SLinus Torvalds { 7721da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; 7731da177e4SLinus Torvalds 7741da177e4SLinus Torvalds ipoib_delete_debug_file(dev); 7751da177e4SLinus Torvalds 7761da177e4SLinus Torvalds /* Delete any child interfaces first */ 7771da177e4SLinus Torvalds list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 7781da177e4SLinus Torvalds unregister_netdev(cpriv->dev); 7791da177e4SLinus Torvalds ipoib_dev_cleanup(cpriv->dev); 7801da177e4SLinus Torvalds free_netdev(cpriv->dev); 7811da177e4SLinus Torvalds } 7821da177e4SLinus Torvalds 7831da177e4SLinus Torvalds ipoib_ib_dev_cleanup(dev); 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds kfree(priv->rx_ring); 7861da177e4SLinus Torvalds kfree(priv->tx_ring); 78792a6b34bSHal Rosenstock 78892a6b34bSHal Rosenstock priv->rx_ring = NULL; 7891da177e4SLinus Torvalds priv->tx_ring = NULL; 7901da177e4SLinus Torvalds } 7911da177e4SLinus Torvalds 7921da177e4SLinus Torvalds static void ipoib_setup(struct net_device *dev) 7931da177e4SLinus Torvalds { 7941da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 7951da177e4SLinus Torvalds 7961da177e4SLinus Torvalds dev->open = ipoib_open; 7971da177e4SLinus Torvalds dev->stop = ipoib_stop; 7981da177e4SLinus Torvalds dev->change_mtu = ipoib_change_mtu; 7991da177e4SLinus Torvalds dev->hard_start_xmit = ipoib_start_xmit; 8001da177e4SLinus Torvalds dev->get_stats = ipoib_get_stats; 8011da177e4SLinus Torvalds dev->tx_timeout = ipoib_timeout; 8021da177e4SLinus Torvalds dev->hard_header = ipoib_hard_header; 8031da177e4SLinus Torvalds dev->set_multicast_list = ipoib_set_mcast_list; 8041da177e4SLinus Torvalds dev->neigh_setup = ipoib_neigh_setup_dev; 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds dev->watchdog_timeo = HZ; 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds dev->rebuild_header = NULL; 8091da177e4SLinus Torvalds dev->set_mac_address = NULL; 8101da177e4SLinus Torvalds dev->header_cache_update = NULL; 8111da177e4SLinus Torvalds 8121da177e4SLinus Torvalds dev->flags |= IFF_BROADCAST | IFF_MULTICAST; 8131da177e4SLinus Torvalds 8141da177e4SLinus Torvalds /* 8151da177e4SLinus Torvalds * We add in INFINIBAND_ALEN to allow for the destination 8161da177e4SLinus Torvalds * address "pseudoheader" for skbs without neighbour struct. 8171da177e4SLinus Torvalds */ 8181da177e4SLinus Torvalds dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; 8191da177e4SLinus Torvalds dev->addr_len = INFINIBAND_ALEN; 8201da177e4SLinus Torvalds dev->type = ARPHRD_INFINIBAND; 8211da177e4SLinus Torvalds dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; 8221da177e4SLinus Torvalds dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; 8231da177e4SLinus Torvalds 8241da177e4SLinus Torvalds /* MTU will be reset when mcast join happens */ 8251da177e4SLinus Torvalds dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; 8261da177e4SLinus Torvalds priv->mcast_mtu = priv->admin_mtu = dev->mtu; 8271da177e4SLinus Torvalds 8281da177e4SLinus Torvalds memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); 8291da177e4SLinus Torvalds 8301da177e4SLinus Torvalds netif_carrier_off(dev); 8311da177e4SLinus Torvalds 8321da177e4SLinus Torvalds SET_MODULE_OWNER(dev); 8331da177e4SLinus Torvalds 8341da177e4SLinus Torvalds priv->dev = dev; 8351da177e4SLinus Torvalds 8361da177e4SLinus Torvalds spin_lock_init(&priv->lock); 8371da177e4SLinus Torvalds spin_lock_init(&priv->tx_lock); 8381da177e4SLinus Torvalds 8391da177e4SLinus Torvalds init_MUTEX(&priv->mcast_mutex); 8401da177e4SLinus Torvalds init_MUTEX(&priv->vlan_mutex); 8411da177e4SLinus Torvalds 8421da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->path_list); 8431da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->child_intfs); 8441da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->dead_ahs); 8451da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->multicast_list); 8461da177e4SLinus Torvalds 8471da177e4SLinus Torvalds INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev); 8481da177e4SLinus Torvalds INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev); 8491da177e4SLinus Torvalds INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev); 8501da177e4SLinus Torvalds INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev); 8511da177e4SLinus Torvalds INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev); 8521da177e4SLinus Torvalds } 8531da177e4SLinus Torvalds 8541da177e4SLinus Torvalds struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) 8551da177e4SLinus Torvalds { 8561da177e4SLinus Torvalds struct net_device *dev; 8571da177e4SLinus Torvalds 8581da177e4SLinus Torvalds dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name, 8591da177e4SLinus Torvalds ipoib_setup); 8601da177e4SLinus Torvalds if (!dev) 8611da177e4SLinus Torvalds return NULL; 8621da177e4SLinus Torvalds 8631da177e4SLinus Torvalds return netdev_priv(dev); 8641da177e4SLinus Torvalds } 8651da177e4SLinus Torvalds 8661da177e4SLinus Torvalds static ssize_t show_pkey(struct class_device *cdev, char *buf) 8671da177e4SLinus Torvalds { 8681da177e4SLinus Torvalds struct ipoib_dev_priv *priv = 8691da177e4SLinus Torvalds netdev_priv(container_of(cdev, struct net_device, class_dev)); 8701da177e4SLinus Torvalds 8711da177e4SLinus Torvalds return sprintf(buf, "0x%04x\n", priv->pkey); 8721da177e4SLinus Torvalds } 8731da177e4SLinus Torvalds static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 8741da177e4SLinus Torvalds 8751da177e4SLinus Torvalds static ssize_t create_child(struct class_device *cdev, 8761da177e4SLinus Torvalds const char *buf, size_t count) 8771da177e4SLinus Torvalds { 8781da177e4SLinus Torvalds int pkey; 8791da177e4SLinus Torvalds int ret; 8801da177e4SLinus Torvalds 8811da177e4SLinus Torvalds if (sscanf(buf, "%i", &pkey) != 1) 8821da177e4SLinus Torvalds return -EINVAL; 8831da177e4SLinus Torvalds 8841da177e4SLinus Torvalds if (pkey < 0 || pkey > 0xffff) 8851da177e4SLinus Torvalds return -EINVAL; 8861da177e4SLinus Torvalds 8871da177e4SLinus Torvalds ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), 8881da177e4SLinus Torvalds pkey); 8891da177e4SLinus Torvalds 8901da177e4SLinus Torvalds return ret ? ret : count; 8911da177e4SLinus Torvalds } 8921da177e4SLinus Torvalds static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child); 8931da177e4SLinus Torvalds 8941da177e4SLinus Torvalds static ssize_t delete_child(struct class_device *cdev, 8951da177e4SLinus Torvalds const char *buf, size_t count) 8961da177e4SLinus Torvalds { 8971da177e4SLinus Torvalds int pkey; 8981da177e4SLinus Torvalds int ret; 8991da177e4SLinus Torvalds 9001da177e4SLinus Torvalds if (sscanf(buf, "%i", &pkey) != 1) 9011da177e4SLinus Torvalds return -EINVAL; 9021da177e4SLinus Torvalds 9031da177e4SLinus Torvalds if (pkey < 0 || pkey > 0xffff) 9041da177e4SLinus Torvalds return -EINVAL; 9051da177e4SLinus Torvalds 9061da177e4SLinus Torvalds ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev), 9071da177e4SLinus Torvalds pkey); 9081da177e4SLinus Torvalds 9091da177e4SLinus Torvalds return ret ? ret : count; 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds } 9121da177e4SLinus Torvalds static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child); 9131da177e4SLinus Torvalds 9141da177e4SLinus Torvalds int ipoib_add_pkey_attr(struct net_device *dev) 9151da177e4SLinus Torvalds { 9161da177e4SLinus Torvalds return class_device_create_file(&dev->class_dev, 9171da177e4SLinus Torvalds &class_device_attr_pkey); 9181da177e4SLinus Torvalds } 9191da177e4SLinus Torvalds 9201da177e4SLinus Torvalds static struct net_device *ipoib_add_port(const char *format, 9211da177e4SLinus Torvalds struct ib_device *hca, u8 port) 9221da177e4SLinus Torvalds { 9231da177e4SLinus Torvalds struct ipoib_dev_priv *priv; 9241da177e4SLinus Torvalds int result = -ENOMEM; 9251da177e4SLinus Torvalds 9261da177e4SLinus Torvalds priv = ipoib_intf_alloc(format); 9271da177e4SLinus Torvalds if (!priv) 9281da177e4SLinus Torvalds goto alloc_mem_failed; 9291da177e4SLinus Torvalds 9301da177e4SLinus Torvalds SET_NETDEV_DEV(priv->dev, hca->dma_device); 9311da177e4SLinus Torvalds 9321da177e4SLinus Torvalds result = ib_query_pkey(hca, port, 0, &priv->pkey); 9331da177e4SLinus Torvalds if (result) { 9341da177e4SLinus Torvalds printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 9351da177e4SLinus Torvalds hca->name, port, result); 9361da177e4SLinus Torvalds goto alloc_mem_failed; 9371da177e4SLinus Torvalds } 9381da177e4SLinus Torvalds 9391da177e4SLinus Torvalds priv->dev->broadcast[8] = priv->pkey >> 8; 9401da177e4SLinus Torvalds priv->dev->broadcast[9] = priv->pkey & 0xff; 9411da177e4SLinus Torvalds 9421da177e4SLinus Torvalds result = ib_query_gid(hca, port, 0, &priv->local_gid); 9431da177e4SLinus Torvalds if (result) { 9441da177e4SLinus Torvalds printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 9451da177e4SLinus Torvalds hca->name, port, result); 9461da177e4SLinus Torvalds goto alloc_mem_failed; 9471da177e4SLinus Torvalds } else 9481da177e4SLinus Torvalds memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 9491da177e4SLinus Torvalds 9501da177e4SLinus Torvalds 9511da177e4SLinus Torvalds result = ipoib_dev_init(priv->dev, hca, port); 9521da177e4SLinus Torvalds if (result < 0) { 9531da177e4SLinus Torvalds printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 9541da177e4SLinus Torvalds hca->name, port, result); 9551da177e4SLinus Torvalds goto device_init_failed; 9561da177e4SLinus Torvalds } 9571da177e4SLinus Torvalds 9581da177e4SLinus Torvalds INIT_IB_EVENT_HANDLER(&priv->event_handler, 9591da177e4SLinus Torvalds priv->ca, ipoib_event); 9601da177e4SLinus Torvalds result = ib_register_event_handler(&priv->event_handler); 9611da177e4SLinus Torvalds if (result < 0) { 9621da177e4SLinus Torvalds printk(KERN_WARNING "%s: ib_register_event_handler failed for " 9631da177e4SLinus Torvalds "port %d (ret = %d)\n", 9641da177e4SLinus Torvalds hca->name, port, result); 9651da177e4SLinus Torvalds goto event_failed; 9661da177e4SLinus Torvalds } 9671da177e4SLinus Torvalds 9681da177e4SLinus Torvalds result = register_netdev(priv->dev); 9691da177e4SLinus Torvalds if (result) { 9701da177e4SLinus Torvalds printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", 9711da177e4SLinus Torvalds hca->name, port, result); 9721da177e4SLinus Torvalds goto register_failed; 9731da177e4SLinus Torvalds } 9741da177e4SLinus Torvalds 9751da177e4SLinus Torvalds if (ipoib_create_debug_file(priv->dev)) 9761da177e4SLinus Torvalds goto debug_failed; 9771da177e4SLinus Torvalds 9781da177e4SLinus Torvalds if (ipoib_add_pkey_attr(priv->dev)) 9791da177e4SLinus Torvalds goto sysfs_failed; 9801da177e4SLinus Torvalds if (class_device_create_file(&priv->dev->class_dev, 9811da177e4SLinus Torvalds &class_device_attr_create_child)) 9821da177e4SLinus Torvalds goto sysfs_failed; 9831da177e4SLinus Torvalds if (class_device_create_file(&priv->dev->class_dev, 9841da177e4SLinus Torvalds &class_device_attr_delete_child)) 9851da177e4SLinus Torvalds goto sysfs_failed; 9861da177e4SLinus Torvalds 9871da177e4SLinus Torvalds return priv->dev; 9881da177e4SLinus Torvalds 9891da177e4SLinus Torvalds sysfs_failed: 9901da177e4SLinus Torvalds ipoib_delete_debug_file(priv->dev); 9911da177e4SLinus Torvalds 9921da177e4SLinus Torvalds debug_failed: 9931da177e4SLinus Torvalds unregister_netdev(priv->dev); 9941da177e4SLinus Torvalds 9951da177e4SLinus Torvalds register_failed: 9961da177e4SLinus Torvalds ib_unregister_event_handler(&priv->event_handler); 9971da177e4SLinus Torvalds 9981da177e4SLinus Torvalds event_failed: 9991da177e4SLinus Torvalds ipoib_dev_cleanup(priv->dev); 10001da177e4SLinus Torvalds 10011da177e4SLinus Torvalds device_init_failed: 10021da177e4SLinus Torvalds free_netdev(priv->dev); 10031da177e4SLinus Torvalds 10041da177e4SLinus Torvalds alloc_mem_failed: 10051da177e4SLinus Torvalds return ERR_PTR(result); 10061da177e4SLinus Torvalds } 10071da177e4SLinus Torvalds 10081da177e4SLinus Torvalds static void ipoib_add_one(struct ib_device *device) 10091da177e4SLinus Torvalds { 10101da177e4SLinus Torvalds struct list_head *dev_list; 10111da177e4SLinus Torvalds struct net_device *dev; 10121da177e4SLinus Torvalds struct ipoib_dev_priv *priv; 10131da177e4SLinus Torvalds int s, e, p; 10141da177e4SLinus Torvalds 10151da177e4SLinus Torvalds dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 10161da177e4SLinus Torvalds if (!dev_list) 10171da177e4SLinus Torvalds return; 10181da177e4SLinus Torvalds 10191da177e4SLinus Torvalds INIT_LIST_HEAD(dev_list); 10201da177e4SLinus Torvalds 10211da177e4SLinus Torvalds if (device->node_type == IB_NODE_SWITCH) { 10221da177e4SLinus Torvalds s = 0; 10231da177e4SLinus Torvalds e = 0; 10241da177e4SLinus Torvalds } else { 10251da177e4SLinus Torvalds s = 1; 10261da177e4SLinus Torvalds e = device->phys_port_cnt; 10271da177e4SLinus Torvalds } 10281da177e4SLinus Torvalds 10291da177e4SLinus Torvalds for (p = s; p <= e; ++p) { 10301da177e4SLinus Torvalds dev = ipoib_add_port("ib%d", device, p); 10311da177e4SLinus Torvalds if (!IS_ERR(dev)) { 10321da177e4SLinus Torvalds priv = netdev_priv(dev); 10331da177e4SLinus Torvalds list_add_tail(&priv->list, dev_list); 10341da177e4SLinus Torvalds } 10351da177e4SLinus Torvalds } 10361da177e4SLinus Torvalds 10371da177e4SLinus Torvalds ib_set_client_data(device, &ipoib_client, dev_list); 10381da177e4SLinus Torvalds } 10391da177e4SLinus Torvalds 10401da177e4SLinus Torvalds static void ipoib_remove_one(struct ib_device *device) 10411da177e4SLinus Torvalds { 10421da177e4SLinus Torvalds struct ipoib_dev_priv *priv, *tmp; 10431da177e4SLinus Torvalds struct list_head *dev_list; 10441da177e4SLinus Torvalds 10451da177e4SLinus Torvalds dev_list = ib_get_client_data(device, &ipoib_client); 10461da177e4SLinus Torvalds 10471da177e4SLinus Torvalds list_for_each_entry_safe(priv, tmp, dev_list, list) { 10481da177e4SLinus Torvalds ib_unregister_event_handler(&priv->event_handler); 10491da177e4SLinus Torvalds 10501da177e4SLinus Torvalds unregister_netdev(priv->dev); 10511da177e4SLinus Torvalds ipoib_dev_cleanup(priv->dev); 10521da177e4SLinus Torvalds free_netdev(priv->dev); 10531da177e4SLinus Torvalds } 10541da177e4SLinus Torvalds } 10551da177e4SLinus Torvalds 10561da177e4SLinus Torvalds static int __init ipoib_init_module(void) 10571da177e4SLinus Torvalds { 10581da177e4SLinus Torvalds int ret; 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds ret = ipoib_register_debugfs(); 10611da177e4SLinus Torvalds if (ret) 10621da177e4SLinus Torvalds return ret; 10631da177e4SLinus Torvalds 10641da177e4SLinus Torvalds /* 10651da177e4SLinus Torvalds * We create our own workqueue mainly because we want to be 10661da177e4SLinus Torvalds * able to flush it when devices are being removed. We can't 10671da177e4SLinus Torvalds * use schedule_work()/flush_scheduled_work() because both 10681da177e4SLinus Torvalds * unregister_netdev() and linkwatch_event take the rtnl lock, 10691da177e4SLinus Torvalds * so flush_scheduled_work() can deadlock during device 10701da177e4SLinus Torvalds * removal. 10711da177e4SLinus Torvalds */ 10721da177e4SLinus Torvalds ipoib_workqueue = create_singlethread_workqueue("ipoib"); 10731da177e4SLinus Torvalds if (!ipoib_workqueue) { 10741da177e4SLinus Torvalds ret = -ENOMEM; 10751da177e4SLinus Torvalds goto err_fs; 10761da177e4SLinus Torvalds } 10771da177e4SLinus Torvalds 10781da177e4SLinus Torvalds ret = ib_register_client(&ipoib_client); 10791da177e4SLinus Torvalds if (ret) 10801da177e4SLinus Torvalds goto err_wq; 10811da177e4SLinus Torvalds 10821da177e4SLinus Torvalds return 0; 10831da177e4SLinus Torvalds 10841da177e4SLinus Torvalds err_wq: 10851da177e4SLinus Torvalds destroy_workqueue(ipoib_workqueue); 10861da177e4SLinus Torvalds 10879adec1a8SRoland Dreier err_fs: 10889adec1a8SRoland Dreier ipoib_unregister_debugfs(); 10899adec1a8SRoland Dreier 10901da177e4SLinus Torvalds return ret; 10911da177e4SLinus Torvalds } 10921da177e4SLinus Torvalds 10931da177e4SLinus Torvalds static void __exit ipoib_cleanup_module(void) 10941da177e4SLinus Torvalds { 10951da177e4SLinus Torvalds ib_unregister_client(&ipoib_client); 10969adec1a8SRoland Dreier ipoib_unregister_debugfs(); 10971da177e4SLinus Torvalds destroy_workqueue(ipoib_workqueue); 10981da177e4SLinus Torvalds } 10991da177e4SLinus Torvalds 11001da177e4SLinus Torvalds module_init(ipoib_init_module); 11011da177e4SLinus Torvalds module_exit(ipoib_cleanup_module); 1102