11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (c) 2004 Topspin Communications. All rights reserved. 32a1d9b7fSRoland Dreier * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 42a1d9b7fSRoland Dreier * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This software is available to you under a choice of one of two 71da177e4SLinus Torvalds * licenses. You may choose to be licensed under the terms of the GNU 81da177e4SLinus Torvalds * General Public License (GPL) Version 2, available from the file 91da177e4SLinus Torvalds * COPYING in the main directory of this source tree, or the 101da177e4SLinus Torvalds * OpenIB.org BSD license below: 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Redistribution and use in source and binary forms, with or 131da177e4SLinus Torvalds * without modification, are permitted provided that the following 141da177e4SLinus Torvalds * conditions are met: 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * - Redistributions of source code must retain the above 171da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 181da177e4SLinus Torvalds * disclaimer. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * - Redistributions in binary form must reproduce the above 211da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 221da177e4SLinus Torvalds * disclaimer in the documentation and/or other materials 231da177e4SLinus Torvalds * provided with the distribution. 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 261da177e4SLinus Torvalds * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 271da177e4SLinus Torvalds * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 281da177e4SLinus Torvalds * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 291da177e4SLinus Torvalds * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 301da177e4SLinus Torvalds * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 311da177e4SLinus Torvalds * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 321da177e4SLinus Torvalds * SOFTWARE. 331da177e4SLinus Torvalds */ 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds #include "ipoib.h" 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds #include <linux/module.h> 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds #include <linux/init.h> 401da177e4SLinus Torvalds #include <linux/slab.h> 410f485251SShirley Ma #include <linux/kernel.h> 4210313cbbSRoland Dreier #include <linux/vmalloc.h> 431da177e4SLinus Torvalds 441da177e4SLinus Torvalds #include <linux/if_arp.h> /* For ARPHRD_xxx */ 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds #include <linux/ip.h> 471da177e4SLinus Torvalds #include <linux/in.h> 481da177e4SLinus Torvalds 49b63b70d8SShlomo Pongratz #include <linux/jhash.h> 50b63b70d8SShlomo Pongratz #include <net/arp.h> 5114c85021SArnaldo Carvalho de Melo 521da177e4SLinus Torvalds MODULE_AUTHOR("Roland Dreier"); 531da177e4SLinus Torvalds MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 541da177e4SLinus Torvalds MODULE_LICENSE("Dual BSD/GPL"); 551da177e4SLinus Torvalds 560f485251SShirley Ma int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; 570f485251SShirley Ma int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; 580f485251SShirley Ma 590f485251SShirley Ma module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); 600f485251SShirley Ma MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); 610f485251SShirley Ma module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 620f485251SShirley Ma MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 630f485251SShirley Ma 641da177e4SLinus Torvalds #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 651da177e4SLinus Torvalds int ipoib_debug_level; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds module_param_named(debug_level, ipoib_debug_level, int, 0644); 681da177e4SLinus Torvalds MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 691da177e4SLinus Torvalds #endif 701da177e4SLinus Torvalds 711732b0efSRoland Dreier struct ipoib_path_iter { 721732b0efSRoland Dreier struct net_device *dev; 731732b0efSRoland Dreier struct ipoib_path path; 741732b0efSRoland Dreier }; 751732b0efSRoland Dreier 761da177e4SLinus Torvalds static const u8 ipv4_bcast_addr[] = { 771da177e4SLinus Torvalds 0x00, 0xff, 0xff, 0xff, 781da177e4SLinus Torvalds 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 791da177e4SLinus Torvalds 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff 801da177e4SLinus Torvalds }; 811da177e4SLinus Torvalds 821da177e4SLinus Torvalds struct workqueue_struct *ipoib_workqueue; 831da177e4SLinus Torvalds 84c1a0b23bSMichael S. Tsirkin struct ib_sa_client ipoib_sa_client; 85c1a0b23bSMichael S. Tsirkin 861da177e4SLinus Torvalds static void ipoib_add_one(struct ib_device *device); 871da177e4SLinus Torvalds static void ipoib_remove_one(struct ib_device *device); 88b63b70d8SShlomo Pongratz static void ipoib_neigh_reclaim(struct rcu_head *rp); 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds static struct ib_client ipoib_client = { 911da177e4SLinus Torvalds .name = "ipoib", 921da177e4SLinus Torvalds .add = ipoib_add_one, 931da177e4SLinus Torvalds .remove = ipoib_remove_one 941da177e4SLinus Torvalds }; 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds int ipoib_open(struct net_device *dev) 971da177e4SLinus Torvalds { 981da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds ipoib_dbg(priv, "bringing up interface\n"); 1011da177e4SLinus Torvalds 102e028cc55SYossi Etigin set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 1031da177e4SLinus Torvalds 1041da177e4SLinus Torvalds if (ipoib_pkey_dev_delay_open(dev)) 1051da177e4SLinus Torvalds return 0; 1061da177e4SLinus Torvalds 107b8a1b1ceSRoland Dreier if (ipoib_ib_dev_open(dev)) 108b8a1b1ceSRoland Dreier goto err_disable; 109fe25c561SYossi Etigin 110b8a1b1ceSRoland Dreier if (ipoib_ib_dev_up(dev)) 111b8a1b1ceSRoland Dreier goto err_stop; 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 1141da177e4SLinus Torvalds struct ipoib_dev_priv *cpriv; 1151da177e4SLinus Torvalds 1161da177e4SLinus Torvalds /* Bring up any child interfaces too */ 11795ed644fSIngo Molnar mutex_lock(&priv->vlan_mutex); 1181da177e4SLinus Torvalds list_for_each_entry(cpriv, &priv->child_intfs, list) { 1191da177e4SLinus Torvalds int flags; 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds flags = cpriv->dev->flags; 1221da177e4SLinus Torvalds if (flags & IFF_UP) 1231da177e4SLinus Torvalds continue; 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds dev_change_flags(cpriv->dev, flags | IFF_UP); 1261da177e4SLinus Torvalds } 12795ed644fSIngo Molnar mutex_unlock(&priv->vlan_mutex); 1281da177e4SLinus Torvalds } 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds netif_start_queue(dev); 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds return 0; 133b8a1b1ceSRoland Dreier 134b8a1b1ceSRoland Dreier err_stop: 135b8a1b1ceSRoland Dreier ipoib_ib_dev_stop(dev, 1); 136b8a1b1ceSRoland Dreier 137b8a1b1ceSRoland Dreier err_disable: 138b8a1b1ceSRoland Dreier clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 139b8a1b1ceSRoland Dreier 140b8a1b1ceSRoland Dreier return -EINVAL; 1411da177e4SLinus Torvalds } 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds static int ipoib_stop(struct net_device *dev) 1441da177e4SLinus Torvalds { 1451da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds ipoib_dbg(priv, "stopping interface\n"); 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds netif_stop_queue(dev); 1521da177e4SLinus Torvalds 153a77a57a1SRoland Dreier ipoib_ib_dev_down(dev, 0); 154a77a57a1SRoland Dreier ipoib_ib_dev_stop(dev, 0); 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 1571da177e4SLinus Torvalds struct ipoib_dev_priv *cpriv; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds /* Bring down any child interfaces too */ 16095ed644fSIngo Molnar mutex_lock(&priv->vlan_mutex); 1611da177e4SLinus Torvalds list_for_each_entry(cpriv, &priv->child_intfs, list) { 1621da177e4SLinus Torvalds int flags; 1631da177e4SLinus Torvalds 1641da177e4SLinus Torvalds flags = cpriv->dev->flags; 1651da177e4SLinus Torvalds if (!(flags & IFF_UP)) 1661da177e4SLinus Torvalds continue; 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds dev_change_flags(cpriv->dev, flags & ~IFF_UP); 1691da177e4SLinus Torvalds } 17095ed644fSIngo Molnar mutex_unlock(&priv->vlan_mutex); 1711da177e4SLinus Torvalds } 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds return 0; 1741da177e4SLinus Torvalds } 1751da177e4SLinus Torvalds 1769baa0b03SOr Gerlitz static void ipoib_uninit(struct net_device *dev) 1779baa0b03SOr Gerlitz { 1789baa0b03SOr Gerlitz ipoib_dev_cleanup(dev); 1799baa0b03SOr Gerlitz } 1809baa0b03SOr Gerlitz 1819ca36f7dSDavid S. Miller static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) 1823d96c74dSMichał Mirosław { 1833d96c74dSMichał Mirosław struct ipoib_dev_priv *priv = netdev_priv(dev); 1843d96c74dSMichał Mirosław 1853d96c74dSMichał Mirosław if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) 1863d96c74dSMichał Mirosław features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO); 1873d96c74dSMichał Mirosław 1883d96c74dSMichał Mirosław return features; 1893d96c74dSMichał Mirosław } 1903d96c74dSMichał Mirosław 1911da177e4SLinus Torvalds static int ipoib_change_mtu(struct net_device *dev, int new_mtu) 1921da177e4SLinus Torvalds { 1931da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 1941da177e4SLinus Torvalds 195839fcabaSMichael S. Tsirkin /* dev->mtu > 2K ==> connected mode */ 196586a6934SPradeep Satyanarayana if (ipoib_cm_admin_enabled(dev)) { 197586a6934SPradeep Satyanarayana if (new_mtu > ipoib_cm_max_mtu(dev)) 198586a6934SPradeep Satyanarayana return -EINVAL; 199586a6934SPradeep Satyanarayana 200839fcabaSMichael S. Tsirkin if (new_mtu > priv->mcast_mtu) 201839fcabaSMichael S. Tsirkin ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", 202839fcabaSMichael S. Tsirkin priv->mcast_mtu); 203586a6934SPradeep Satyanarayana 204839fcabaSMichael S. Tsirkin dev->mtu = new_mtu; 205839fcabaSMichael S. Tsirkin return 0; 206839fcabaSMichael S. Tsirkin } 207839fcabaSMichael S. Tsirkin 208bc7b3a36SShirley Ma if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 2091da177e4SLinus Torvalds return -EINVAL; 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds priv->admin_mtu = new_mtu; 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 2141da177e4SLinus Torvalds 2151da177e4SLinus Torvalds return 0; 2161da177e4SLinus Torvalds } 2171da177e4SLinus Torvalds 21837c22a77SJack Morgenstein static struct ipoib_path *__path_find(struct net_device *dev, void *gid) 2191da177e4SLinus Torvalds { 2201da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 2211da177e4SLinus Torvalds struct rb_node *n = priv->path_tree.rb_node; 2221da177e4SLinus Torvalds struct ipoib_path *path; 2231da177e4SLinus Torvalds int ret; 2241da177e4SLinus Torvalds 2251da177e4SLinus Torvalds while (n) { 2261da177e4SLinus Torvalds path = rb_entry(n, struct ipoib_path, rb_node); 2271da177e4SLinus Torvalds 22837c22a77SJack Morgenstein ret = memcmp(gid, path->pathrec.dgid.raw, 2291da177e4SLinus Torvalds sizeof (union ib_gid)); 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds if (ret < 0) 2321da177e4SLinus Torvalds n = n->rb_left; 2331da177e4SLinus Torvalds else if (ret > 0) 2341da177e4SLinus Torvalds n = n->rb_right; 2351da177e4SLinus Torvalds else 2361da177e4SLinus Torvalds return path; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds return NULL; 2401da177e4SLinus Torvalds } 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds static int __path_add(struct net_device *dev, struct ipoib_path *path) 2431da177e4SLinus Torvalds { 2441da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 2451da177e4SLinus Torvalds struct rb_node **n = &priv->path_tree.rb_node; 2461da177e4SLinus Torvalds struct rb_node *pn = NULL; 2471da177e4SLinus Torvalds struct ipoib_path *tpath; 2481da177e4SLinus Torvalds int ret; 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds while (*n) { 2511da177e4SLinus Torvalds pn = *n; 2521da177e4SLinus Torvalds tpath = rb_entry(pn, struct ipoib_path, rb_node); 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, 2551da177e4SLinus Torvalds sizeof (union ib_gid)); 2561da177e4SLinus Torvalds if (ret < 0) 2571da177e4SLinus Torvalds n = &pn->rb_left; 2581da177e4SLinus Torvalds else if (ret > 0) 2591da177e4SLinus Torvalds n = &pn->rb_right; 2601da177e4SLinus Torvalds else 2611da177e4SLinus Torvalds return -EEXIST; 2621da177e4SLinus Torvalds } 2631da177e4SLinus Torvalds 2641da177e4SLinus Torvalds rb_link_node(&path->rb_node, pn, n); 2651da177e4SLinus Torvalds rb_insert_color(&path->rb_node, &priv->path_tree); 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds list_add_tail(&path->list, &priv->path_list); 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds return 0; 2701da177e4SLinus Torvalds } 2711da177e4SLinus Torvalds 2721da177e4SLinus Torvalds static void path_free(struct net_device *dev, struct ipoib_path *path) 2731da177e4SLinus Torvalds { 2741da177e4SLinus Torvalds struct sk_buff *skb; 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds while ((skb = __skb_dequeue(&path->queue))) 2771da177e4SLinus Torvalds dev_kfree_skb_irq(skb); 2781da177e4SLinus Torvalds 279b63b70d8SShlomo Pongratz ipoib_dbg(netdev_priv(dev), "path_free\n"); 2801da177e4SLinus Torvalds 281b63b70d8SShlomo Pongratz /* remove all neigh connected to this path */ 282b63b70d8SShlomo Pongratz ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds if (path->ah) 2851da177e4SLinus Torvalds ipoib_put_ah(path->ah); 2861da177e4SLinus Torvalds 2871da177e4SLinus Torvalds kfree(path); 2881da177e4SLinus Torvalds } 2891da177e4SLinus Torvalds 2901732b0efSRoland Dreier #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 2911732b0efSRoland Dreier 2921732b0efSRoland Dreier struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) 2931732b0efSRoland Dreier { 2941732b0efSRoland Dreier struct ipoib_path_iter *iter; 2951732b0efSRoland Dreier 2961732b0efSRoland Dreier iter = kmalloc(sizeof *iter, GFP_KERNEL); 2971732b0efSRoland Dreier if (!iter) 2981732b0efSRoland Dreier return NULL; 2991732b0efSRoland Dreier 3001732b0efSRoland Dreier iter->dev = dev; 3011732b0efSRoland Dreier memset(iter->path.pathrec.dgid.raw, 0, 16); 3021732b0efSRoland Dreier 3031732b0efSRoland Dreier if (ipoib_path_iter_next(iter)) { 3041732b0efSRoland Dreier kfree(iter); 3051732b0efSRoland Dreier return NULL; 3061732b0efSRoland Dreier } 3071732b0efSRoland Dreier 3081732b0efSRoland Dreier return iter; 3091732b0efSRoland Dreier } 3101732b0efSRoland Dreier 3111732b0efSRoland Dreier int ipoib_path_iter_next(struct ipoib_path_iter *iter) 3121732b0efSRoland Dreier { 3131732b0efSRoland Dreier struct ipoib_dev_priv *priv = netdev_priv(iter->dev); 3141732b0efSRoland Dreier struct rb_node *n; 3151732b0efSRoland Dreier struct ipoib_path *path; 3161732b0efSRoland Dreier int ret = 1; 3171732b0efSRoland Dreier 3181732b0efSRoland Dreier spin_lock_irq(&priv->lock); 3191732b0efSRoland Dreier 3201732b0efSRoland Dreier n = rb_first(&priv->path_tree); 3211732b0efSRoland Dreier 3221732b0efSRoland Dreier while (n) { 3231732b0efSRoland Dreier path = rb_entry(n, struct ipoib_path, rb_node); 3241732b0efSRoland Dreier 3251732b0efSRoland Dreier if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, 3261732b0efSRoland Dreier sizeof (union ib_gid)) < 0) { 3271732b0efSRoland Dreier iter->path = *path; 3281732b0efSRoland Dreier ret = 0; 3291732b0efSRoland Dreier break; 3301732b0efSRoland Dreier } 3311732b0efSRoland Dreier 3321732b0efSRoland Dreier n = rb_next(n); 3331732b0efSRoland Dreier } 3341732b0efSRoland Dreier 3351732b0efSRoland Dreier spin_unlock_irq(&priv->lock); 3361732b0efSRoland Dreier 3371732b0efSRoland Dreier return ret; 3381732b0efSRoland Dreier } 3391732b0efSRoland Dreier 3401732b0efSRoland Dreier void ipoib_path_iter_read(struct ipoib_path_iter *iter, 3411732b0efSRoland Dreier struct ipoib_path *path) 3421732b0efSRoland Dreier { 3431732b0efSRoland Dreier *path = iter->path; 3441732b0efSRoland Dreier } 3451732b0efSRoland Dreier 3461732b0efSRoland Dreier #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 3471732b0efSRoland Dreier 348ee1e2c82SMoni Shoua void ipoib_mark_paths_invalid(struct net_device *dev) 349ee1e2c82SMoni Shoua { 350ee1e2c82SMoni Shoua struct ipoib_dev_priv *priv = netdev_priv(dev); 351ee1e2c82SMoni Shoua struct ipoib_path *path, *tp; 352ee1e2c82SMoni Shoua 353ee1e2c82SMoni Shoua spin_lock_irq(&priv->lock); 354ee1e2c82SMoni Shoua 355ee1e2c82SMoni Shoua list_for_each_entry_safe(path, tp, &priv->path_list, list) { 3565b095d98SHarvey Harrison ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n", 357ee1e2c82SMoni Shoua be16_to_cpu(path->pathrec.dlid), 358fcace2feSHarvey Harrison path->pathrec.dgid.raw); 359ee1e2c82SMoni Shoua path->valid = 0; 360ee1e2c82SMoni Shoua } 361ee1e2c82SMoni Shoua 362ee1e2c82SMoni Shoua spin_unlock_irq(&priv->lock); 363ee1e2c82SMoni Shoua } 364ee1e2c82SMoni Shoua 3651da177e4SLinus Torvalds void ipoib_flush_paths(struct net_device *dev) 3661da177e4SLinus Torvalds { 3671da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 3681da177e4SLinus Torvalds struct ipoib_path *path, *tp; 3691da177e4SLinus Torvalds LIST_HEAD(remove_list); 370943c246eSRoland Dreier unsigned long flags; 3711da177e4SLinus Torvalds 372943c246eSRoland Dreier netif_tx_lock_bh(dev); 373943c246eSRoland Dreier spin_lock_irqsave(&priv->lock, flags); 3741da177e4SLinus Torvalds 375157de229SRobert P. J. Day list_splice_init(&priv->path_list, &remove_list); 3761da177e4SLinus Torvalds 3771da177e4SLinus Torvalds list_for_each_entry(path, &remove_list, list) 3781da177e4SLinus Torvalds rb_erase(&path->rb_node, &priv->path_tree); 3791da177e4SLinus Torvalds 3801da177e4SLinus Torvalds list_for_each_entry_safe(path, tp, &remove_list, list) { 3811da177e4SLinus Torvalds if (path->query) 3821da177e4SLinus Torvalds ib_sa_cancel_query(path->query_id, path->query); 383943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 384943c246eSRoland Dreier netif_tx_unlock_bh(dev); 3851da177e4SLinus Torvalds wait_for_completion(&path->done); 3861da177e4SLinus Torvalds path_free(dev, path); 387943c246eSRoland Dreier netif_tx_lock_bh(dev); 388943c246eSRoland Dreier spin_lock_irqsave(&priv->lock, flags); 3891da177e4SLinus Torvalds } 390943c246eSRoland Dreier 391943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 392943c246eSRoland Dreier netif_tx_unlock_bh(dev); 3931da177e4SLinus Torvalds } 3941da177e4SLinus Torvalds 3951da177e4SLinus Torvalds static void path_rec_completion(int status, 3961da177e4SLinus Torvalds struct ib_sa_path_rec *pathrec, 3971da177e4SLinus Torvalds void *path_ptr) 3981da177e4SLinus Torvalds { 3991da177e4SLinus Torvalds struct ipoib_path *path = path_ptr; 4001da177e4SLinus Torvalds struct net_device *dev = path->dev; 4011da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 4021da177e4SLinus Torvalds struct ipoib_ah *ah = NULL; 403c9da4badSRoland Dreier struct ipoib_ah *old_ah = NULL; 404d04d01b1SMichael S. Tsirkin struct ipoib_neigh *neigh, *tn; 4051da177e4SLinus Torvalds struct sk_buff_head skqueue; 4061da177e4SLinus Torvalds struct sk_buff *skb; 4071da177e4SLinus Torvalds unsigned long flags; 4081da177e4SLinus Torvalds 409843613b0SRoland Dreier if (!status) 4105b095d98SHarvey Harrison ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n", 411fcace2feSHarvey Harrison be16_to_cpu(pathrec->dlid), pathrec->dgid.raw); 4121da177e4SLinus Torvalds else 4135b095d98SHarvey Harrison ipoib_dbg(priv, "PathRec status %d for GID %pI6\n", 414fcace2feSHarvey Harrison status, path->pathrec.dgid.raw); 4151da177e4SLinus Torvalds 4161da177e4SLinus Torvalds skb_queue_head_init(&skqueue); 4171da177e4SLinus Torvalds 4181da177e4SLinus Torvalds if (!status) { 41946f1b3d7SSean Hefty struct ib_ah_attr av; 4201da177e4SLinus Torvalds 42146f1b3d7SSean Hefty if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) 4221da177e4SLinus Torvalds ah = ipoib_create_ah(dev, priv->pd, &av); 4231da177e4SLinus Torvalds } 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds spin_lock_irqsave(&priv->lock, flags); 4261da177e4SLinus Torvalds 4273874397cSMike Marciniszyn if (!IS_ERR_OR_NULL(ah)) { 4281da177e4SLinus Torvalds path->pathrec = *pathrec; 4291da177e4SLinus Torvalds 430c9da4badSRoland Dreier old_ah = path->ah; 431c9da4badSRoland Dreier path->ah = ah; 432c9da4badSRoland Dreier 4331da177e4SLinus Torvalds ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", 4341da177e4SLinus Torvalds ah, be16_to_cpu(pathrec->dlid), pathrec->sl); 4351da177e4SLinus Torvalds 4361da177e4SLinus Torvalds while ((skb = __skb_dequeue(&path->queue))) 4371da177e4SLinus Torvalds __skb_queue_tail(&skqueue, skb); 4381da177e4SLinus Torvalds 439d04d01b1SMichael S. Tsirkin list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 440ee1e2c82SMoni Shoua if (neigh->ah) { 441ee1e2c82SMoni Shoua WARN_ON(neigh->ah != old_ah); 442ee1e2c82SMoni Shoua /* 443ee1e2c82SMoni Shoua * Dropping the ah reference inside 444ee1e2c82SMoni Shoua * priv->lock is safe here, because we 445ee1e2c82SMoni Shoua * will hold one more reference from 446ee1e2c82SMoni Shoua * the original value of path->ah (ie 447ee1e2c82SMoni Shoua * old_ah). 448ee1e2c82SMoni Shoua */ 449ee1e2c82SMoni Shoua ipoib_put_ah(neigh->ah); 450ee1e2c82SMoni Shoua } 4511da177e4SLinus Torvalds kref_get(&path->ah->ref); 4521da177e4SLinus Torvalds neigh->ah = path->ah; 4531da177e4SLinus Torvalds 454b63b70d8SShlomo Pongratz if (ipoib_cm_enabled(dev, neigh->daddr)) { 455839fcabaSMichael S. Tsirkin if (!ipoib_cm_get(neigh)) 456839fcabaSMichael S. Tsirkin ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, 457839fcabaSMichael S. Tsirkin path, 458839fcabaSMichael S. Tsirkin neigh)); 459839fcabaSMichael S. Tsirkin if (!ipoib_cm_get(neigh)) { 460839fcabaSMichael S. Tsirkin list_del(&neigh->list); 461b63b70d8SShlomo Pongratz ipoib_neigh_free(neigh); 462839fcabaSMichael S. Tsirkin continue; 463839fcabaSMichael S. Tsirkin } 464839fcabaSMichael S. Tsirkin } 465839fcabaSMichael S. Tsirkin 4661da177e4SLinus Torvalds while ((skb = __skb_dequeue(&neigh->queue))) 4671da177e4SLinus Torvalds __skb_queue_tail(&skqueue, skb); 4681da177e4SLinus Torvalds } 469ee1e2c82SMoni Shoua path->valid = 1; 4705872a9fcSRoland Dreier } 4711da177e4SLinus Torvalds 4725872a9fcSRoland Dreier path->query = NULL; 4731da177e4SLinus Torvalds complete(&path->done); 4741da177e4SLinus Torvalds 4751da177e4SLinus Torvalds spin_unlock_irqrestore(&priv->lock, flags); 4761da177e4SLinus Torvalds 477ee1e2c82SMoni Shoua if (old_ah) 478ee1e2c82SMoni Shoua ipoib_put_ah(old_ah); 479ee1e2c82SMoni Shoua 4801da177e4SLinus Torvalds while ((skb = __skb_dequeue(&skqueue))) { 4811da177e4SLinus Torvalds skb->dev = dev; 4821da177e4SLinus Torvalds if (dev_queue_xmit(skb)) 4831da177e4SLinus Torvalds ipoib_warn(priv, "dev_queue_xmit failed " 4841da177e4SLinus Torvalds "to requeue packet\n"); 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds 48837c22a77SJack Morgenstein static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) 4891da177e4SLinus Torvalds { 4901da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 4911da177e4SLinus Torvalds struct ipoib_path *path; 4921da177e4SLinus Torvalds 4931401b53aSJack Morgenstein if (!priv->broadcast) 4941401b53aSJack Morgenstein return NULL; 4951401b53aSJack Morgenstein 49621a38489SRoland Dreier path = kzalloc(sizeof *path, GFP_ATOMIC); 4971da177e4SLinus Torvalds if (!path) 4981da177e4SLinus Torvalds return NULL; 4991da177e4SLinus Torvalds 5001da177e4SLinus Torvalds path->dev = dev; 5011da177e4SLinus Torvalds 5021da177e4SLinus Torvalds skb_queue_head_init(&path->queue); 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds INIT_LIST_HEAD(&path->neigh_list); 5051da177e4SLinus Torvalds 50637c22a77SJack Morgenstein memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid)); 5071da177e4SLinus Torvalds path->pathrec.sgid = priv->local_gid; 5081da177e4SLinus Torvalds path->pathrec.pkey = cpu_to_be16(priv->pkey); 5091da177e4SLinus Torvalds path->pathrec.numb_path = 1; 51081668838SSean Hefty path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds return path; 5131da177e4SLinus Torvalds } 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds static int path_rec_start(struct net_device *dev, 5161da177e4SLinus Torvalds struct ipoib_path *path) 5171da177e4SLinus Torvalds { 5181da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 5191da177e4SLinus Torvalds 5205b095d98SHarvey Harrison ipoib_dbg(priv, "Start path record lookup for %pI6\n", 521fcace2feSHarvey Harrison path->pathrec.dgid.raw); 5221da177e4SLinus Torvalds 52365c7eddaSRoland Dreier init_completion(&path->done); 52465c7eddaSRoland Dreier 5251da177e4SLinus Torvalds path->query_id = 526c1a0b23bSMichael S. Tsirkin ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, 5271da177e4SLinus Torvalds &path->pathrec, 5281da177e4SLinus Torvalds IB_SA_PATH_REC_DGID | 5291da177e4SLinus Torvalds IB_SA_PATH_REC_SGID | 5301da177e4SLinus Torvalds IB_SA_PATH_REC_NUMB_PATH | 53181668838SSean Hefty IB_SA_PATH_REC_TRAFFIC_CLASS | 5321da177e4SLinus Torvalds IB_SA_PATH_REC_PKEY, 5331da177e4SLinus Torvalds 1000, GFP_ATOMIC, 5341da177e4SLinus Torvalds path_rec_completion, 5351da177e4SLinus Torvalds path, &path->query); 5361da177e4SLinus Torvalds if (path->query_id < 0) { 53701b3fc8bSOr Gerlitz ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); 5381da177e4SLinus Torvalds path->query = NULL; 53993a3ab93SYossi Etigin complete(&path->done); 5401da177e4SLinus Torvalds return path->query_id; 5411da177e4SLinus Torvalds } 5421da177e4SLinus Torvalds 5431da177e4SLinus Torvalds return 0; 5441da177e4SLinus Torvalds } 5451da177e4SLinus Torvalds 546b63b70d8SShlomo Pongratz static void neigh_add_path(struct sk_buff *skb, u8 *daddr, 547b63b70d8SShlomo Pongratz struct net_device *dev) 5481da177e4SLinus Torvalds { 5491da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 5501da177e4SLinus Torvalds struct ipoib_path *path; 5511da177e4SLinus Torvalds struct ipoib_neigh *neigh; 552943c246eSRoland Dreier unsigned long flags; 5531da177e4SLinus Torvalds 554b5120a6eSShlomo Pongratz spin_lock_irqsave(&priv->lock, flags); 555b63b70d8SShlomo Pongratz neigh = ipoib_neigh_alloc(daddr, dev); 5561da177e4SLinus Torvalds if (!neigh) { 557b5120a6eSShlomo Pongratz spin_unlock_irqrestore(&priv->lock, flags); 558de903512SRoland Dreier ++dev->stats.tx_dropped; 5591da177e4SLinus Torvalds dev_kfree_skb_any(skb); 5601da177e4SLinus Torvalds return; 5611da177e4SLinus Torvalds } 5621da177e4SLinus Torvalds 563b63b70d8SShlomo Pongratz path = __path_find(dev, daddr + 4); 5641da177e4SLinus Torvalds if (!path) { 565b63b70d8SShlomo Pongratz path = path_rec_create(dev, daddr + 4); 5661da177e4SLinus Torvalds if (!path) 567d2e0655eSMichael S. Tsirkin goto err_path; 5681da177e4SLinus Torvalds 5691da177e4SLinus Torvalds __path_add(dev, path); 5701da177e4SLinus Torvalds } 5711da177e4SLinus Torvalds 5721da177e4SLinus Torvalds list_add_tail(&neigh->list, &path->neigh_list); 5731da177e4SLinus Torvalds 57447f7a071SMichael S. Tsirkin if (path->ah) { 5751da177e4SLinus Torvalds kref_get(&path->ah->ref); 5761da177e4SLinus Torvalds neigh->ah = path->ah; 5771da177e4SLinus Torvalds 578b63b70d8SShlomo Pongratz if (ipoib_cm_enabled(dev, neigh->daddr)) { 579839fcabaSMichael S. Tsirkin if (!ipoib_cm_get(neigh)) 580839fcabaSMichael S. Tsirkin ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); 581839fcabaSMichael S. Tsirkin if (!ipoib_cm_get(neigh)) { 582839fcabaSMichael S. Tsirkin list_del(&neigh->list); 583b63b70d8SShlomo Pongratz ipoib_neigh_free(neigh); 584839fcabaSMichael S. Tsirkin goto err_drop; 585839fcabaSMichael S. Tsirkin } 586839fcabaSMichael S. Tsirkin if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) 587839fcabaSMichael S. Tsirkin __skb_queue_tail(&neigh->queue, skb); 588839fcabaSMichael S. Tsirkin else { 589839fcabaSMichael S. Tsirkin ipoib_warn(priv, "queue length limit %d. Packet drop.\n", 590839fcabaSMichael S. Tsirkin skb_queue_len(&neigh->queue)); 591839fcabaSMichael S. Tsirkin goto err_drop; 592839fcabaSMichael S. Tsirkin } 593721d67cdSRoland Dreier } else { 594721d67cdSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 595b63b70d8SShlomo Pongratz ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); 596b63b70d8SShlomo Pongratz ipoib_neigh_put(neigh); 597721d67cdSRoland Dreier return; 598721d67cdSRoland Dreier } 5991da177e4SLinus Torvalds } else { 6001da177e4SLinus Torvalds neigh->ah = NULL; 6011da177e4SLinus Torvalds 6021da177e4SLinus Torvalds if (!path->query && path_rec_start(dev, path)) 603d2e0655eSMichael S. Tsirkin goto err_list; 6042745b5b7SMichael S. Tsirkin 6052745b5b7SMichael S. Tsirkin __skb_queue_tail(&neigh->queue, skb); 6061da177e4SLinus Torvalds } 6071da177e4SLinus Torvalds 608943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 609b63b70d8SShlomo Pongratz ipoib_neigh_put(neigh); 6101da177e4SLinus Torvalds return; 6111da177e4SLinus Torvalds 612d2e0655eSMichael S. Tsirkin err_list: 6131da177e4SLinus Torvalds list_del(&neigh->list); 6141da177e4SLinus Torvalds 615d2e0655eSMichael S. Tsirkin err_path: 616b63b70d8SShlomo Pongratz ipoib_neigh_free(neigh); 617839fcabaSMichael S. Tsirkin err_drop: 618de903512SRoland Dreier ++dev->stats.tx_dropped; 6191da177e4SLinus Torvalds dev_kfree_skb_any(skb); 6201da177e4SLinus Torvalds 621943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 622b63b70d8SShlomo Pongratz ipoib_neigh_put(neigh); 6231da177e4SLinus Torvalds } 6241da177e4SLinus Torvalds 6251da177e4SLinus Torvalds static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 626936d7de3SRoland Dreier struct ipoib_cb *cb) 6271da177e4SLinus Torvalds { 6281da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 6291da177e4SLinus Torvalds struct ipoib_path *path; 630943c246eSRoland Dreier unsigned long flags; 6311da177e4SLinus Torvalds 632943c246eSRoland Dreier spin_lock_irqsave(&priv->lock, flags); 6331da177e4SLinus Torvalds 634936d7de3SRoland Dreier path = __path_find(dev, cb->hwaddr + 4); 635ee1e2c82SMoni Shoua if (!path || !path->valid) { 63671d98b46SJack Morgenstein int new_path = 0; 63771d98b46SJack Morgenstein 63871d98b46SJack Morgenstein if (!path) { 639936d7de3SRoland Dreier path = path_rec_create(dev, cb->hwaddr + 4); 64071d98b46SJack Morgenstein new_path = 1; 64171d98b46SJack Morgenstein } 6421da177e4SLinus Torvalds if (path) { 6431da177e4SLinus Torvalds __skb_queue_tail(&path->queue, skb); 6441da177e4SLinus Torvalds 645ff79ae80SYossi Etigin if (!path->query && path_rec_start(dev, path)) { 646943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 64771d98b46SJack Morgenstein if (new_path) 6481da177e4SLinus Torvalds path_free(dev, path); 6491da177e4SLinus Torvalds return; 6501da177e4SLinus Torvalds } else 6511da177e4SLinus Torvalds __path_add(dev, path); 6521da177e4SLinus Torvalds } else { 653de903512SRoland Dreier ++dev->stats.tx_dropped; 6541da177e4SLinus Torvalds dev_kfree_skb_any(skb); 6551da177e4SLinus Torvalds } 6561da177e4SLinus Torvalds 657943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 6581da177e4SLinus Torvalds return; 6591da177e4SLinus Torvalds } 6601da177e4SLinus Torvalds 66147f7a071SMichael S. Tsirkin if (path->ah) { 6621da177e4SLinus Torvalds ipoib_dbg(priv, "Send unicast ARP to %04x\n", 6631da177e4SLinus Torvalds be16_to_cpu(path->pathrec.dlid)); 6641da177e4SLinus Torvalds 665721d67cdSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 666936d7de3SRoland Dreier ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); 667721d67cdSRoland Dreier return; 6681da177e4SLinus Torvalds } else if ((path->query || !path_rec_start(dev, path)) && 6691da177e4SLinus Torvalds skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 6701da177e4SLinus Torvalds __skb_queue_tail(&path->queue, skb); 6711da177e4SLinus Torvalds } else { 672de903512SRoland Dreier ++dev->stats.tx_dropped; 6731da177e4SLinus Torvalds dev_kfree_skb_any(skb); 6741da177e4SLinus Torvalds } 6751da177e4SLinus Torvalds 676943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 6771da177e4SLinus Torvalds } 6781da177e4SLinus Torvalds 6791da177e4SLinus Torvalds static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) 6801da177e4SLinus Torvalds { 6811da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 6821da177e4SLinus Torvalds struct ipoib_neigh *neigh; 683b63b70d8SShlomo Pongratz struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 684b63b70d8SShlomo Pongratz struct ipoib_header *header; 6851da177e4SLinus Torvalds unsigned long flags; 6861da177e4SLinus Torvalds 687b63b70d8SShlomo Pongratz header = (struct ipoib_header *) skb->data; 688b63b70d8SShlomo Pongratz 689b63b70d8SShlomo Pongratz if (unlikely(cb->hwaddr[4] == 0xff)) { 690b63b70d8SShlomo Pongratz /* multicast, arrange "if" according to probability */ 691b63b70d8SShlomo Pongratz if ((header->proto != htons(ETH_P_IP)) && 692b63b70d8SShlomo Pongratz (header->proto != htons(ETH_P_IPV6)) && 693b63b70d8SShlomo Pongratz (header->proto != htons(ETH_P_ARP)) && 694b63b70d8SShlomo Pongratz (header->proto != htons(ETH_P_RARP))) { 695b63b70d8SShlomo Pongratz /* ethertype not supported by IPoIB */ 69617e6abeeSDavid Miller ++dev->stats.tx_dropped; 69717e6abeeSDavid Miller dev_kfree_skb_any(skb); 698b63b70d8SShlomo Pongratz return NETDEV_TX_OK; 69917e6abeeSDavid Miller } 700b63b70d8SShlomo Pongratz /* Add in the P_Key for multicast*/ 701b63b70d8SShlomo Pongratz cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; 702b63b70d8SShlomo Pongratz cb->hwaddr[9] = priv->pkey & 0xff; 703b63b70d8SShlomo Pongratz 704b63b70d8SShlomo Pongratz neigh = ipoib_neigh_get(dev, cb->hwaddr); 705b63b70d8SShlomo Pongratz if (likely(neigh)) 706b63b70d8SShlomo Pongratz goto send_using_neigh; 707b63b70d8SShlomo Pongratz ipoib_mcast_send(dev, cb->hwaddr, skb); 708b63b70d8SShlomo Pongratz return NETDEV_TX_OK; 7091da177e4SLinus Torvalds } 7101da177e4SLinus Torvalds 711b63b70d8SShlomo Pongratz /* unicast, arrange "switch" according to probability */ 712b63b70d8SShlomo Pongratz switch (header->proto) { 713b63b70d8SShlomo Pongratz case htons(ETH_P_IP): 714b63b70d8SShlomo Pongratz case htons(ETH_P_IPV6): 715b63b70d8SShlomo Pongratz neigh = ipoib_neigh_get(dev, cb->hwaddr); 716b63b70d8SShlomo Pongratz if (unlikely(!neigh)) { 717b63b70d8SShlomo Pongratz neigh_add_path(skb, cb->hwaddr, dev); 718b63b70d8SShlomo Pongratz return NETDEV_TX_OK; 719b63b70d8SShlomo Pongratz } 720b63b70d8SShlomo Pongratz break; 721b63b70d8SShlomo Pongratz case htons(ETH_P_ARP): 722b63b70d8SShlomo Pongratz case htons(ETH_P_RARP): 723b63b70d8SShlomo Pongratz /* for unicast ARP and RARP should always perform path find */ 724b63b70d8SShlomo Pongratz unicast_arp_send(skb, dev, cb); 725b63b70d8SShlomo Pongratz return NETDEV_TX_OK; 726b63b70d8SShlomo Pongratz default: 727b63b70d8SShlomo Pongratz /* ethertype not supported by IPoIB */ 728b63b70d8SShlomo Pongratz ++dev->stats.tx_dropped; 729b63b70d8SShlomo Pongratz dev_kfree_skb_any(skb); 730b63b70d8SShlomo Pongratz return NETDEV_TX_OK; 7318a7f7521SMichael S. Tsirkin } 7328a7f7521SMichael S. Tsirkin 733b63b70d8SShlomo Pongratz send_using_neigh: 734b63b70d8SShlomo Pongratz /* note we now hold a ref to neigh */ 735bafff974SOr Gerlitz if (ipoib_cm_get(neigh)) { 736bafff974SOr Gerlitz if (ipoib_cm_up(neigh)) { 737bafff974SOr Gerlitz ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); 738b63b70d8SShlomo Pongratz goto unref; 739bafff974SOr Gerlitz } 740bafff974SOr Gerlitz } else if (neigh->ah) { 741b63b70d8SShlomo Pongratz ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr)); 742b63b70d8SShlomo Pongratz goto unref; 7431da177e4SLinus Torvalds } 7441da177e4SLinus Torvalds 7451da177e4SLinus Torvalds if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 746943c246eSRoland Dreier spin_lock_irqsave(&priv->lock, flags); 7471da177e4SLinus Torvalds __skb_queue_tail(&neigh->queue, skb); 748943c246eSRoland Dreier spin_unlock_irqrestore(&priv->lock, flags); 7491da177e4SLinus Torvalds } else { 750de903512SRoland Dreier ++dev->stats.tx_dropped; 7511da177e4SLinus Torvalds dev_kfree_skb_any(skb); 7521da177e4SLinus Torvalds } 7531da177e4SLinus Torvalds 754b63b70d8SShlomo Pongratz unref: 755b63b70d8SShlomo Pongratz ipoib_neigh_put(neigh); 7561da177e4SLinus Torvalds 7571da177e4SLinus Torvalds return NETDEV_TX_OK; 7581da177e4SLinus Torvalds } 7591da177e4SLinus Torvalds 7601da177e4SLinus Torvalds static void ipoib_timeout(struct net_device *dev) 7611da177e4SLinus Torvalds { 7621da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 7631da177e4SLinus Torvalds 7644b2d319bSRoland Dreier ipoib_warn(priv, "transmit timeout: latency %d msecs\n", 7654b2d319bSRoland Dreier jiffies_to_msecs(jiffies - dev->trans_start)); 7664b2d319bSRoland Dreier ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", 7674b2d319bSRoland Dreier netif_queue_stopped(dev), 7684b2d319bSRoland Dreier priv->tx_head, priv->tx_tail); 7691da177e4SLinus Torvalds /* XXX reset QP, etc. */ 7701da177e4SLinus Torvalds } 7711da177e4SLinus Torvalds 7721da177e4SLinus Torvalds static int ipoib_hard_header(struct sk_buff *skb, 7731da177e4SLinus Torvalds struct net_device *dev, 7741da177e4SLinus Torvalds unsigned short type, 7753b04dddeSStephen Hemminger const void *daddr, const void *saddr, unsigned len) 7761da177e4SLinus Torvalds { 7771da177e4SLinus Torvalds struct ipoib_header *header; 778b63b70d8SShlomo Pongratz struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 7791da177e4SLinus Torvalds 7801da177e4SLinus Torvalds header = (struct ipoib_header *) skb_push(skb, sizeof *header); 7811da177e4SLinus Torvalds 7821da177e4SLinus Torvalds header->proto = htons(type); 7831da177e4SLinus Torvalds header->reserved = 0; 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds /* 786b63b70d8SShlomo Pongratz * we don't rely on dst_entry structure, always stuff the 787936d7de3SRoland Dreier * destination address into skb->cb so we can figure out where 788936d7de3SRoland Dreier * to send the packet later. 7891da177e4SLinus Torvalds */ 790936d7de3SRoland Dreier memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); 7911da177e4SLinus Torvalds 7921da177e4SLinus Torvalds return 0; 7931da177e4SLinus Torvalds } 7941da177e4SLinus Torvalds 7951da177e4SLinus Torvalds static void ipoib_set_mcast_list(struct net_device *dev) 7961da177e4SLinus Torvalds { 7971da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 7981da177e4SLinus Torvalds 7997a343d4cSLeonid Arsh if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 8007a343d4cSLeonid Arsh ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set"); 8017a343d4cSLeonid Arsh return; 8027a343d4cSLeonid Arsh } 8037a343d4cSLeonid Arsh 8041ad62a19SMichael S. Tsirkin queue_work(ipoib_workqueue, &priv->restart_task); 8051da177e4SLinus Torvalds } 8061da177e4SLinus Torvalds 807b63b70d8SShlomo Pongratz static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) 8081da177e4SLinus Torvalds { 809b63b70d8SShlomo Pongratz /* 810b63b70d8SShlomo Pongratz * Use only the address parts that contributes to spreading 811b63b70d8SShlomo Pongratz * The subnet prefix is not used as one can not connect to 812b63b70d8SShlomo Pongratz * same remote port (GUID) using the same remote QPN via two 813b63b70d8SShlomo Pongratz * different subnets. 814b63b70d8SShlomo Pongratz */ 815b63b70d8SShlomo Pongratz /* qpn octets[1:4) & port GUID octets[12:20) */ 816b63b70d8SShlomo Pongratz u32 *daddr_32 = (u32 *) daddr; 817b63b70d8SShlomo Pongratz u32 hv; 8181da177e4SLinus Torvalds 819b63b70d8SShlomo Pongratz hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0); 820b63b70d8SShlomo Pongratz return hv & htbl->mask; 8211da177e4SLinus Torvalds } 8221da177e4SLinus Torvalds 823b63b70d8SShlomo Pongratz struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) 824b63b70d8SShlomo Pongratz { 825b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = netdev_priv(dev); 826b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 827b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 828b63b70d8SShlomo Pongratz struct ipoib_neigh *neigh = NULL; 829b63b70d8SShlomo Pongratz u32 hash_val; 830b63b70d8SShlomo Pongratz 831b63b70d8SShlomo Pongratz rcu_read_lock_bh(); 832b63b70d8SShlomo Pongratz 833b63b70d8SShlomo Pongratz htbl = rcu_dereference_bh(ntbl->htbl); 834b63b70d8SShlomo Pongratz 835b63b70d8SShlomo Pongratz if (!htbl) 836b63b70d8SShlomo Pongratz goto out_unlock; 837b63b70d8SShlomo Pongratz 838b63b70d8SShlomo Pongratz hash_val = ipoib_addr_hash(htbl, daddr); 839b63b70d8SShlomo Pongratz for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]); 840b63b70d8SShlomo Pongratz neigh != NULL; 841b63b70d8SShlomo Pongratz neigh = rcu_dereference_bh(neigh->hnext)) { 842b63b70d8SShlomo Pongratz if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { 843b63b70d8SShlomo Pongratz /* found, take one ref on behalf of the caller */ 844b63b70d8SShlomo Pongratz if (!atomic_inc_not_zero(&neigh->refcnt)) { 845b63b70d8SShlomo Pongratz /* deleted */ 846b63b70d8SShlomo Pongratz neigh = NULL; 847b63b70d8SShlomo Pongratz goto out_unlock; 848b63b70d8SShlomo Pongratz } 849b63b70d8SShlomo Pongratz neigh->alive = jiffies; 850b63b70d8SShlomo Pongratz goto out_unlock; 851b63b70d8SShlomo Pongratz } 852b63b70d8SShlomo Pongratz } 853b63b70d8SShlomo Pongratz 854b63b70d8SShlomo Pongratz out_unlock: 855b63b70d8SShlomo Pongratz rcu_read_unlock_bh(); 856b63b70d8SShlomo Pongratz return neigh; 857b63b70d8SShlomo Pongratz } 858b63b70d8SShlomo Pongratz 859b63b70d8SShlomo Pongratz static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) 860b63b70d8SShlomo Pongratz { 861b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 862b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 863b63b70d8SShlomo Pongratz unsigned long neigh_obsolete; 864b63b70d8SShlomo Pongratz unsigned long dt; 865b63b70d8SShlomo Pongratz unsigned long flags; 866b63b70d8SShlomo Pongratz int i; 867b63b70d8SShlomo Pongratz 868b63b70d8SShlomo Pongratz if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 869b63b70d8SShlomo Pongratz return; 870b63b70d8SShlomo Pongratz 871b5120a6eSShlomo Pongratz spin_lock_irqsave(&priv->lock, flags); 872b63b70d8SShlomo Pongratz 873b63b70d8SShlomo Pongratz htbl = rcu_dereference_protected(ntbl->htbl, 874b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 875b63b70d8SShlomo Pongratz 876b63b70d8SShlomo Pongratz if (!htbl) 877b63b70d8SShlomo Pongratz goto out_unlock; 878b63b70d8SShlomo Pongratz 879b63b70d8SShlomo Pongratz /* neigh is obsolete if it was idle for two GC periods */ 880b63b70d8SShlomo Pongratz dt = 2 * arp_tbl.gc_interval; 881b63b70d8SShlomo Pongratz neigh_obsolete = jiffies - dt; 882b63b70d8SShlomo Pongratz /* handle possible race condition */ 883b63b70d8SShlomo Pongratz if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 884b63b70d8SShlomo Pongratz goto out_unlock; 885b63b70d8SShlomo Pongratz 886b63b70d8SShlomo Pongratz for (i = 0; i < htbl->size; i++) { 887b63b70d8SShlomo Pongratz struct ipoib_neigh *neigh; 888b63b70d8SShlomo Pongratz struct ipoib_neigh __rcu **np = &htbl->buckets[i]; 889b63b70d8SShlomo Pongratz 890b63b70d8SShlomo Pongratz while ((neigh = rcu_dereference_protected(*np, 891b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))) != NULL) { 892b63b70d8SShlomo Pongratz /* was the neigh idle for two GC periods */ 893b63b70d8SShlomo Pongratz if (time_after(neigh_obsolete, neigh->alive)) { 894b63b70d8SShlomo Pongratz rcu_assign_pointer(*np, 895b63b70d8SShlomo Pongratz rcu_dereference_protected(neigh->hnext, 896b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))); 897b63b70d8SShlomo Pongratz /* remove from path/mc list */ 898b63b70d8SShlomo Pongratz list_del(&neigh->list); 899b63b70d8SShlomo Pongratz call_rcu(&neigh->rcu, ipoib_neigh_reclaim); 900b63b70d8SShlomo Pongratz } else { 901b63b70d8SShlomo Pongratz np = &neigh->hnext; 902b63b70d8SShlomo Pongratz } 903b63b70d8SShlomo Pongratz 904b63b70d8SShlomo Pongratz } 905b63b70d8SShlomo Pongratz } 906b63b70d8SShlomo Pongratz 907b63b70d8SShlomo Pongratz out_unlock: 908b5120a6eSShlomo Pongratz spin_unlock_irqrestore(&priv->lock, flags); 909b63b70d8SShlomo Pongratz } 910b63b70d8SShlomo Pongratz 911b63b70d8SShlomo Pongratz static void ipoib_reap_neigh(struct work_struct *work) 912b63b70d8SShlomo Pongratz { 913b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = 914b63b70d8SShlomo Pongratz container_of(work, struct ipoib_dev_priv, neigh_reap_task.work); 915b63b70d8SShlomo Pongratz 916b63b70d8SShlomo Pongratz __ipoib_reap_neigh(priv); 917b63b70d8SShlomo Pongratz 918b63b70d8SShlomo Pongratz if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 919b63b70d8SShlomo Pongratz queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, 920b63b70d8SShlomo Pongratz arp_tbl.gc_interval); 921b63b70d8SShlomo Pongratz } 922b63b70d8SShlomo Pongratz 923b63b70d8SShlomo Pongratz 924b63b70d8SShlomo Pongratz static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, 925732a2170SMoni Shoua struct net_device *dev) 926d2e0655eSMichael S. Tsirkin { 927d2e0655eSMichael S. Tsirkin struct ipoib_neigh *neigh; 928d2e0655eSMichael S. Tsirkin 929b63b70d8SShlomo Pongratz neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); 930d2e0655eSMichael S. Tsirkin if (!neigh) 931d2e0655eSMichael S. Tsirkin return NULL; 932d2e0655eSMichael S. Tsirkin 933732a2170SMoni Shoua neigh->dev = dev; 934b63b70d8SShlomo Pongratz memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr)); 93582b39913SRoland Dreier skb_queue_head_init(&neigh->queue); 936b63b70d8SShlomo Pongratz INIT_LIST_HEAD(&neigh->list); 937839fcabaSMichael S. Tsirkin ipoib_cm_set(neigh, NULL); 938b63b70d8SShlomo Pongratz /* one ref on behalf of the caller */ 939b63b70d8SShlomo Pongratz atomic_set(&neigh->refcnt, 1); 940d2e0655eSMichael S. Tsirkin 941d2e0655eSMichael S. Tsirkin return neigh; 942d2e0655eSMichael S. Tsirkin } 943d2e0655eSMichael S. Tsirkin 944b63b70d8SShlomo Pongratz struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, 945b63b70d8SShlomo Pongratz struct net_device *dev) 946d2e0655eSMichael S. Tsirkin { 947b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = netdev_priv(dev); 948b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 949b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 950b63b70d8SShlomo Pongratz struct ipoib_neigh *neigh; 951b63b70d8SShlomo Pongratz u32 hash_val; 952b63b70d8SShlomo Pongratz 953b63b70d8SShlomo Pongratz htbl = rcu_dereference_protected(ntbl->htbl, 954b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 955b63b70d8SShlomo Pongratz if (!htbl) { 956b63b70d8SShlomo Pongratz neigh = NULL; 957b63b70d8SShlomo Pongratz goto out_unlock; 958b63b70d8SShlomo Pongratz } 959b63b70d8SShlomo Pongratz 960b63b70d8SShlomo Pongratz /* need to add a new neigh, but maybe some other thread succeeded? 961b63b70d8SShlomo Pongratz * recalc hash, maybe hash resize took place so we do a search 962b63b70d8SShlomo Pongratz */ 963b63b70d8SShlomo Pongratz hash_val = ipoib_addr_hash(htbl, daddr); 964b63b70d8SShlomo Pongratz for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], 965b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 966b63b70d8SShlomo Pongratz neigh != NULL; 967b63b70d8SShlomo Pongratz neigh = rcu_dereference_protected(neigh->hnext, 968b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))) { 969b63b70d8SShlomo Pongratz if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { 970b63b70d8SShlomo Pongratz /* found, take one ref on behalf of the caller */ 971b63b70d8SShlomo Pongratz if (!atomic_inc_not_zero(&neigh->refcnt)) { 972b63b70d8SShlomo Pongratz /* deleted */ 973b63b70d8SShlomo Pongratz neigh = NULL; 974b63b70d8SShlomo Pongratz break; 975b63b70d8SShlomo Pongratz } 976b63b70d8SShlomo Pongratz neigh->alive = jiffies; 977b63b70d8SShlomo Pongratz goto out_unlock; 978b63b70d8SShlomo Pongratz } 979b63b70d8SShlomo Pongratz } 980b63b70d8SShlomo Pongratz 981b63b70d8SShlomo Pongratz neigh = ipoib_neigh_ctor(daddr, dev); 982b63b70d8SShlomo Pongratz if (!neigh) 983b63b70d8SShlomo Pongratz goto out_unlock; 984b63b70d8SShlomo Pongratz 985b63b70d8SShlomo Pongratz /* one ref on behalf of the hash table */ 986b63b70d8SShlomo Pongratz atomic_inc(&neigh->refcnt); 987b63b70d8SShlomo Pongratz neigh->alive = jiffies; 988b63b70d8SShlomo Pongratz /* put in hash */ 989b63b70d8SShlomo Pongratz rcu_assign_pointer(neigh->hnext, 990b63b70d8SShlomo Pongratz rcu_dereference_protected(htbl->buckets[hash_val], 991b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))); 992b63b70d8SShlomo Pongratz rcu_assign_pointer(htbl->buckets[hash_val], neigh); 993b63b70d8SShlomo Pongratz atomic_inc(&ntbl->entries); 994b63b70d8SShlomo Pongratz 995b63b70d8SShlomo Pongratz out_unlock: 996b63b70d8SShlomo Pongratz 997b63b70d8SShlomo Pongratz return neigh; 998b63b70d8SShlomo Pongratz } 999b63b70d8SShlomo Pongratz 1000b63b70d8SShlomo Pongratz void ipoib_neigh_dtor(struct ipoib_neigh *neigh) 1001b63b70d8SShlomo Pongratz { 1002b63b70d8SShlomo Pongratz /* neigh reference count was dropprd to zero */ 1003b63b70d8SShlomo Pongratz struct net_device *dev = neigh->dev; 1004b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = netdev_priv(dev); 10052745b5b7SMichael S. Tsirkin struct sk_buff *skb; 1006b63b70d8SShlomo Pongratz if (neigh->ah) 1007b63b70d8SShlomo Pongratz ipoib_put_ah(neigh->ah); 10082745b5b7SMichael S. Tsirkin while ((skb = __skb_dequeue(&neigh->queue))) { 1009de903512SRoland Dreier ++dev->stats.tx_dropped; 10102745b5b7SMichael S. Tsirkin dev_kfree_skb_any(skb); 10112745b5b7SMichael S. Tsirkin } 1012839fcabaSMichael S. Tsirkin if (ipoib_cm_get(neigh)) 1013839fcabaSMichael S. Tsirkin ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); 1014b63b70d8SShlomo Pongratz ipoib_dbg(netdev_priv(dev), 1015b63b70d8SShlomo Pongratz "neigh free for %06x %pI6\n", 1016b63b70d8SShlomo Pongratz IPOIB_QPN(neigh->daddr), 1017b63b70d8SShlomo Pongratz neigh->daddr + 4); 1018d2e0655eSMichael S. Tsirkin kfree(neigh); 1019b63b70d8SShlomo Pongratz if (atomic_dec_and_test(&priv->ntbl.entries)) { 1020b63b70d8SShlomo Pongratz if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags)) 1021b63b70d8SShlomo Pongratz complete(&priv->ntbl.flushed); 1022b63b70d8SShlomo Pongratz } 1023d2e0655eSMichael S. Tsirkin } 1024d2e0655eSMichael S. Tsirkin 1025b63b70d8SShlomo Pongratz static void ipoib_neigh_reclaim(struct rcu_head *rp) 10261da177e4SLinus Torvalds { 1027b63b70d8SShlomo Pongratz /* Called as a result of removal from hash table */ 1028b63b70d8SShlomo Pongratz struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu); 1029b63b70d8SShlomo Pongratz /* note TX context may hold another ref */ 1030b63b70d8SShlomo Pongratz ipoib_neigh_put(neigh); 1031b63b70d8SShlomo Pongratz } 1032b63b70d8SShlomo Pongratz 1033b63b70d8SShlomo Pongratz void ipoib_neigh_free(struct ipoib_neigh *neigh) 1034b63b70d8SShlomo Pongratz { 1035b63b70d8SShlomo Pongratz struct net_device *dev = neigh->dev; 1036b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = netdev_priv(dev); 1037b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 1038b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 1039b63b70d8SShlomo Pongratz struct ipoib_neigh __rcu **np; 1040b63b70d8SShlomo Pongratz struct ipoib_neigh *n; 1041b63b70d8SShlomo Pongratz u32 hash_val; 1042b63b70d8SShlomo Pongratz 1043b63b70d8SShlomo Pongratz htbl = rcu_dereference_protected(ntbl->htbl, 1044b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 1045b63b70d8SShlomo Pongratz if (!htbl) 1046b5120a6eSShlomo Pongratz return; 1047b63b70d8SShlomo Pongratz 1048b63b70d8SShlomo Pongratz hash_val = ipoib_addr_hash(htbl, neigh->daddr); 1049b63b70d8SShlomo Pongratz np = &htbl->buckets[hash_val]; 1050b63b70d8SShlomo Pongratz for (n = rcu_dereference_protected(*np, 1051b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 1052b63b70d8SShlomo Pongratz n != NULL; 10536c723a68SShlomo Pongratz n = rcu_dereference_protected(*np, 1054b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))) { 1055b63b70d8SShlomo Pongratz if (n == neigh) { 1056b63b70d8SShlomo Pongratz /* found */ 1057b63b70d8SShlomo Pongratz rcu_assign_pointer(*np, 1058b63b70d8SShlomo Pongratz rcu_dereference_protected(neigh->hnext, 1059b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))); 1060b63b70d8SShlomo Pongratz call_rcu(&neigh->rcu, ipoib_neigh_reclaim); 1061b5120a6eSShlomo Pongratz return; 1062b63b70d8SShlomo Pongratz } else { 1063b63b70d8SShlomo Pongratz np = &n->hnext; 1064b63b70d8SShlomo Pongratz } 1065b63b70d8SShlomo Pongratz } 1066b63b70d8SShlomo Pongratz } 1067b63b70d8SShlomo Pongratz 1068b63b70d8SShlomo Pongratz static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) 1069b63b70d8SShlomo Pongratz { 1070b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 1071b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 1072b63b70d8SShlomo Pongratz struct ipoib_neigh **buckets; 1073b63b70d8SShlomo Pongratz u32 size; 1074b63b70d8SShlomo Pongratz 1075b63b70d8SShlomo Pongratz clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); 1076b63b70d8SShlomo Pongratz ntbl->htbl = NULL; 1077b63b70d8SShlomo Pongratz htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); 1078b63b70d8SShlomo Pongratz if (!htbl) 1079b63b70d8SShlomo Pongratz return -ENOMEM; 1080b63b70d8SShlomo Pongratz set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1081b63b70d8SShlomo Pongratz size = roundup_pow_of_two(arp_tbl.gc_thresh3); 1082b63b70d8SShlomo Pongratz buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL); 1083b63b70d8SShlomo Pongratz if (!buckets) { 1084b63b70d8SShlomo Pongratz kfree(htbl); 1085b63b70d8SShlomo Pongratz return -ENOMEM; 1086b63b70d8SShlomo Pongratz } 1087b63b70d8SShlomo Pongratz htbl->size = size; 1088b63b70d8SShlomo Pongratz htbl->mask = (size - 1); 1089b63b70d8SShlomo Pongratz htbl->buckets = buckets; 1090b63b70d8SShlomo Pongratz ntbl->htbl = htbl; 109166172c09SShlomo Pongratz htbl->ntbl = ntbl; 1092b63b70d8SShlomo Pongratz atomic_set(&ntbl->entries, 0); 1093b63b70d8SShlomo Pongratz 1094b63b70d8SShlomo Pongratz /* start garbage collection */ 1095b63b70d8SShlomo Pongratz clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1096b63b70d8SShlomo Pongratz queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, 1097b63b70d8SShlomo Pongratz arp_tbl.gc_interval); 10981da177e4SLinus Torvalds 10991da177e4SLinus Torvalds return 0; 11001da177e4SLinus Torvalds } 11011da177e4SLinus Torvalds 1102b63b70d8SShlomo Pongratz static void neigh_hash_free_rcu(struct rcu_head *head) 1103b63b70d8SShlomo Pongratz { 1104b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl = container_of(head, 1105b63b70d8SShlomo Pongratz struct ipoib_neigh_hash, 1106b63b70d8SShlomo Pongratz rcu); 1107b63b70d8SShlomo Pongratz struct ipoib_neigh __rcu **buckets = htbl->buckets; 110866172c09SShlomo Pongratz struct ipoib_neigh_table *ntbl = htbl->ntbl; 1109b63b70d8SShlomo Pongratz 1110b63b70d8SShlomo Pongratz kfree(buckets); 1111b63b70d8SShlomo Pongratz kfree(htbl); 111266172c09SShlomo Pongratz complete(&ntbl->deleted); 1113b63b70d8SShlomo Pongratz } 1114b63b70d8SShlomo Pongratz 1115b63b70d8SShlomo Pongratz void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) 1116b63b70d8SShlomo Pongratz { 1117b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = netdev_priv(dev); 1118b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 1119b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 1120b63b70d8SShlomo Pongratz unsigned long flags; 1121b63b70d8SShlomo Pongratz int i; 1122b63b70d8SShlomo Pongratz 1123b63b70d8SShlomo Pongratz /* remove all neigh connected to a given path or mcast */ 1124b5120a6eSShlomo Pongratz spin_lock_irqsave(&priv->lock, flags); 1125b63b70d8SShlomo Pongratz 1126b63b70d8SShlomo Pongratz htbl = rcu_dereference_protected(ntbl->htbl, 1127b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 1128b63b70d8SShlomo Pongratz 1129b63b70d8SShlomo Pongratz if (!htbl) 1130b63b70d8SShlomo Pongratz goto out_unlock; 1131b63b70d8SShlomo Pongratz 1132b63b70d8SShlomo Pongratz for (i = 0; i < htbl->size; i++) { 1133b63b70d8SShlomo Pongratz struct ipoib_neigh *neigh; 1134b63b70d8SShlomo Pongratz struct ipoib_neigh __rcu **np = &htbl->buckets[i]; 1135b63b70d8SShlomo Pongratz 1136b63b70d8SShlomo Pongratz while ((neigh = rcu_dereference_protected(*np, 1137b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))) != NULL) { 1138b63b70d8SShlomo Pongratz /* delete neighs belong to this parent */ 1139b63b70d8SShlomo Pongratz if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { 1140b63b70d8SShlomo Pongratz rcu_assign_pointer(*np, 1141b63b70d8SShlomo Pongratz rcu_dereference_protected(neigh->hnext, 1142b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))); 1143b63b70d8SShlomo Pongratz /* remove from parent list */ 1144b63b70d8SShlomo Pongratz list_del(&neigh->list); 1145b63b70d8SShlomo Pongratz call_rcu(&neigh->rcu, ipoib_neigh_reclaim); 1146b63b70d8SShlomo Pongratz } else { 1147b63b70d8SShlomo Pongratz np = &neigh->hnext; 1148b63b70d8SShlomo Pongratz } 1149b63b70d8SShlomo Pongratz 1150b63b70d8SShlomo Pongratz } 1151b63b70d8SShlomo Pongratz } 1152b63b70d8SShlomo Pongratz out_unlock: 1153b5120a6eSShlomo Pongratz spin_unlock_irqrestore(&priv->lock, flags); 1154b63b70d8SShlomo Pongratz } 1155b63b70d8SShlomo Pongratz 1156b63b70d8SShlomo Pongratz static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) 1157b63b70d8SShlomo Pongratz { 1158b63b70d8SShlomo Pongratz struct ipoib_neigh_table *ntbl = &priv->ntbl; 1159b63b70d8SShlomo Pongratz struct ipoib_neigh_hash *htbl; 1160b63b70d8SShlomo Pongratz unsigned long flags; 116166172c09SShlomo Pongratz int i, wait_flushed = 0; 1162b63b70d8SShlomo Pongratz 116366172c09SShlomo Pongratz init_completion(&priv->ntbl.flushed); 1164b63b70d8SShlomo Pongratz 1165b5120a6eSShlomo Pongratz spin_lock_irqsave(&priv->lock, flags); 1166b63b70d8SShlomo Pongratz 1167b63b70d8SShlomo Pongratz htbl = rcu_dereference_protected(ntbl->htbl, 1168b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock)); 1169b63b70d8SShlomo Pongratz if (!htbl) 1170b63b70d8SShlomo Pongratz goto out_unlock; 1171b63b70d8SShlomo Pongratz 117266172c09SShlomo Pongratz wait_flushed = atomic_read(&priv->ntbl.entries); 117366172c09SShlomo Pongratz if (!wait_flushed) 117466172c09SShlomo Pongratz goto free_htbl; 117566172c09SShlomo Pongratz 1176b63b70d8SShlomo Pongratz for (i = 0; i < htbl->size; i++) { 1177b63b70d8SShlomo Pongratz struct ipoib_neigh *neigh; 1178b63b70d8SShlomo Pongratz struct ipoib_neigh __rcu **np = &htbl->buckets[i]; 1179b63b70d8SShlomo Pongratz 1180b63b70d8SShlomo Pongratz while ((neigh = rcu_dereference_protected(*np, 1181b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))) != NULL) { 1182b63b70d8SShlomo Pongratz rcu_assign_pointer(*np, 1183b63b70d8SShlomo Pongratz rcu_dereference_protected(neigh->hnext, 1184b5120a6eSShlomo Pongratz lockdep_is_held(&priv->lock))); 1185b63b70d8SShlomo Pongratz /* remove from path/mc list */ 1186b63b70d8SShlomo Pongratz list_del(&neigh->list); 1187b63b70d8SShlomo Pongratz call_rcu(&neigh->rcu, ipoib_neigh_reclaim); 1188b63b70d8SShlomo Pongratz } 1189b63b70d8SShlomo Pongratz } 1190b63b70d8SShlomo Pongratz 119166172c09SShlomo Pongratz free_htbl: 1192b63b70d8SShlomo Pongratz rcu_assign_pointer(ntbl->htbl, NULL); 1193b63b70d8SShlomo Pongratz call_rcu(&htbl->rcu, neigh_hash_free_rcu); 1194b63b70d8SShlomo Pongratz 1195b63b70d8SShlomo Pongratz out_unlock: 1196b5120a6eSShlomo Pongratz spin_unlock_irqrestore(&priv->lock, flags); 119766172c09SShlomo Pongratz if (wait_flushed) 119866172c09SShlomo Pongratz wait_for_completion(&priv->ntbl.flushed); 1199b63b70d8SShlomo Pongratz } 1200b63b70d8SShlomo Pongratz 1201b63b70d8SShlomo Pongratz static void ipoib_neigh_hash_uninit(struct net_device *dev) 1202b63b70d8SShlomo Pongratz { 1203b63b70d8SShlomo Pongratz struct ipoib_dev_priv *priv = netdev_priv(dev); 1204b63b70d8SShlomo Pongratz int stopped; 1205b63b70d8SShlomo Pongratz 1206b63b70d8SShlomo Pongratz ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); 120766172c09SShlomo Pongratz init_completion(&priv->ntbl.deleted); 1208b63b70d8SShlomo Pongratz set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); 1209b63b70d8SShlomo Pongratz 1210b63b70d8SShlomo Pongratz /* Stop GC if called at init fail need to cancel work */ 1211b63b70d8SShlomo Pongratz stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1212b63b70d8SShlomo Pongratz if (!stopped) 1213b63b70d8SShlomo Pongratz cancel_delayed_work(&priv->neigh_reap_task); 1214b63b70d8SShlomo Pongratz 1215b63b70d8SShlomo Pongratz ipoib_flush_neighs(priv); 121666172c09SShlomo Pongratz 121766172c09SShlomo Pongratz wait_for_completion(&priv->ntbl.deleted); 1218b63b70d8SShlomo Pongratz } 1219b63b70d8SShlomo Pongratz 1220b63b70d8SShlomo Pongratz 12211da177e4SLinus Torvalds int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 12221da177e4SLinus Torvalds { 12231da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 12241da177e4SLinus Torvalds 1225b63b70d8SShlomo Pongratz if (ipoib_neigh_hash_init(priv) < 0) 1226b63b70d8SShlomo Pongratz goto out; 12271da177e4SLinus Torvalds /* Allocate RX/TX "rings" to hold queued skbs */ 12280f485251SShirley Ma priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 12291da177e4SLinus Torvalds GFP_KERNEL); 12301da177e4SLinus Torvalds if (!priv->rx_ring) { 12311da177e4SLinus Torvalds printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 12320f485251SShirley Ma ca->name, ipoib_recvq_size); 1233b63b70d8SShlomo Pongratz goto out_neigh_hash_cleanup; 12341da177e4SLinus Torvalds } 12351da177e4SLinus Torvalds 1236948579cdSJoe Perches priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 12371da177e4SLinus Torvalds if (!priv->tx_ring) { 12381da177e4SLinus Torvalds printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 12390f485251SShirley Ma ca->name, ipoib_sendq_size); 12401da177e4SLinus Torvalds goto out_rx_ring_cleanup; 12411da177e4SLinus Torvalds } 12421da177e4SLinus Torvalds 12431b524963SMichael S. Tsirkin /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ 12441da177e4SLinus Torvalds 12451da177e4SLinus Torvalds if (ipoib_ib_dev_init(dev, ca, port)) 12461da177e4SLinus Torvalds goto out_tx_ring_cleanup; 12471da177e4SLinus Torvalds 12481da177e4SLinus Torvalds return 0; 12491da177e4SLinus Torvalds 12501da177e4SLinus Torvalds out_tx_ring_cleanup: 125110313cbbSRoland Dreier vfree(priv->tx_ring); 12521da177e4SLinus Torvalds 12531da177e4SLinus Torvalds out_rx_ring_cleanup: 12541da177e4SLinus Torvalds kfree(priv->rx_ring); 12551da177e4SLinus Torvalds 1256b63b70d8SShlomo Pongratz out_neigh_hash_cleanup: 1257b63b70d8SShlomo Pongratz ipoib_neigh_hash_uninit(dev); 12581da177e4SLinus Torvalds out: 12591da177e4SLinus Torvalds return -ENOMEM; 12601da177e4SLinus Torvalds } 12611da177e4SLinus Torvalds 12621da177e4SLinus Torvalds void ipoib_dev_cleanup(struct net_device *dev) 12631da177e4SLinus Torvalds { 12641da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; 12659baa0b03SOr Gerlitz LIST_HEAD(head); 12669baa0b03SOr Gerlitz 12679baa0b03SOr Gerlitz ASSERT_RTNL(); 12681da177e4SLinus Torvalds 12691732b0efSRoland Dreier ipoib_delete_debug_files(dev); 12701da177e4SLinus Torvalds 12711da177e4SLinus Torvalds /* Delete any child interfaces first */ 12721da177e4SLinus Torvalds list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 1273b63b70d8SShlomo Pongratz /* Stop GC on child */ 1274b63b70d8SShlomo Pongratz set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); 1275b63b70d8SShlomo Pongratz cancel_delayed_work(&cpriv->neigh_reap_task); 12769baa0b03SOr Gerlitz unregister_netdevice_queue(cpriv->dev, &head); 12771da177e4SLinus Torvalds } 12789baa0b03SOr Gerlitz unregister_netdevice_many(&head); 12791da177e4SLinus Torvalds 12801da177e4SLinus Torvalds ipoib_ib_dev_cleanup(dev); 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds kfree(priv->rx_ring); 128310313cbbSRoland Dreier vfree(priv->tx_ring); 128492a6b34bSHal Rosenstock 128592a6b34bSHal Rosenstock priv->rx_ring = NULL; 12861da177e4SLinus Torvalds priv->tx_ring = NULL; 1287b63b70d8SShlomo Pongratz 1288b63b70d8SShlomo Pongratz ipoib_neigh_hash_uninit(dev); 12891da177e4SLinus Torvalds } 12901da177e4SLinus Torvalds 12913b04dddeSStephen Hemminger static const struct header_ops ipoib_header_ops = { 12923b04dddeSStephen Hemminger .create = ipoib_hard_header, 12933b04dddeSStephen Hemminger }; 12943b04dddeSStephen Hemminger 1295fe8114e8SStephen Hemminger static const struct net_device_ops ipoib_netdev_ops = { 12969baa0b03SOr Gerlitz .ndo_uninit = ipoib_uninit, 1297fe8114e8SStephen Hemminger .ndo_open = ipoib_open, 1298fe8114e8SStephen Hemminger .ndo_stop = ipoib_stop, 1299fe8114e8SStephen Hemminger .ndo_change_mtu = ipoib_change_mtu, 13003d96c74dSMichał Mirosław .ndo_fix_features = ipoib_fix_features, 1301fe8114e8SStephen Hemminger .ndo_start_xmit = ipoib_start_xmit, 1302fe8114e8SStephen Hemminger .ndo_tx_timeout = ipoib_timeout, 1303afc4b13dSJiri Pirko .ndo_set_rx_mode = ipoib_set_mcast_list, 1304fe8114e8SStephen Hemminger }; 1305fe8114e8SStephen Hemminger 13069baa0b03SOr Gerlitz void ipoib_setup(struct net_device *dev) 13071da177e4SLinus Torvalds { 13081da177e4SLinus Torvalds struct ipoib_dev_priv *priv = netdev_priv(dev); 13091da177e4SLinus Torvalds 1310fe8114e8SStephen Hemminger dev->netdev_ops = &ipoib_netdev_ops; 13113b04dddeSStephen Hemminger dev->header_ops = &ipoib_header_ops; 1312bea3348eSStephen Hemminger 131382c24c18SEli Cohen ipoib_set_ethtool_ops(dev); 131482c24c18SEli Cohen 1315bea3348eSStephen Hemminger netif_napi_add(dev, &priv->napi, ipoib_poll, 100); 13161da177e4SLinus Torvalds 13171da177e4SLinus Torvalds dev->watchdog_timeo = HZ; 13181da177e4SLinus Torvalds 13191da177e4SLinus Torvalds dev->flags |= IFF_BROADCAST | IFF_MULTICAST; 13201da177e4SLinus Torvalds 1321936d7de3SRoland Dreier dev->hard_header_len = IPOIB_ENCAP_LEN; 13221da177e4SLinus Torvalds dev->addr_len = INFINIBAND_ALEN; 13231da177e4SLinus Torvalds dev->type = ARPHRD_INFINIBAND; 13240f485251SShirley Ma dev->tx_queue_len = ipoib_sendq_size * 2; 1325eb14032fSEli Cohen dev->features = (NETIF_F_VLAN_CHALLENGED | 1326eb14032fSEli Cohen NETIF_F_HIGHDMA); 132786d15cd8SEric Dumazet dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 13281da177e4SLinus Torvalds 13291da177e4SLinus Torvalds memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); 13301da177e4SLinus Torvalds 13311da177e4SLinus Torvalds netif_carrier_off(dev); 13321da177e4SLinus Torvalds 13331da177e4SLinus Torvalds priv->dev = dev; 13341da177e4SLinus Torvalds 13351da177e4SLinus Torvalds spin_lock_init(&priv->lock); 13361da177e4SLinus Torvalds 133795ed644fSIngo Molnar mutex_init(&priv->vlan_mutex); 13381da177e4SLinus Torvalds 13391da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->path_list); 13401da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->child_intfs); 13411da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->dead_ahs); 13421da177e4SLinus Torvalds INIT_LIST_HEAD(&priv->multicast_list); 13431da177e4SLinus Torvalds 134426bbf13cSYosef Etigin INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 1345c4028958SDavid Howells INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 1346e8224e4bSYossi Etigin INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); 1347ee1e2c82SMoni Shoua INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); 1348ee1e2c82SMoni Shoua INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); 1349ee1e2c82SMoni Shoua INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 1350c4028958SDavid Howells INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1351c4028958SDavid Howells INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1352b63b70d8SShlomo Pongratz INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); 13531da177e4SLinus Torvalds } 13541da177e4SLinus Torvalds 13551da177e4SLinus Torvalds struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) 13561da177e4SLinus Torvalds { 13571da177e4SLinus Torvalds struct net_device *dev; 13581da177e4SLinus Torvalds 13591da177e4SLinus Torvalds dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name, 13601da177e4SLinus Torvalds ipoib_setup); 13611da177e4SLinus Torvalds if (!dev) 13621da177e4SLinus Torvalds return NULL; 13631da177e4SLinus Torvalds 13641da177e4SLinus Torvalds return netdev_priv(dev); 13651da177e4SLinus Torvalds } 13661da177e4SLinus Torvalds 136743cb76d9SGreg Kroah-Hartman static ssize_t show_pkey(struct device *dev, 136843cb76d9SGreg Kroah-Hartman struct device_attribute *attr, char *buf) 13691da177e4SLinus Torvalds { 137043cb76d9SGreg Kroah-Hartman struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); 13711da177e4SLinus Torvalds 13721da177e4SLinus Torvalds return sprintf(buf, "0x%04x\n", priv->pkey); 13731da177e4SLinus Torvalds } 137443cb76d9SGreg Kroah-Hartman static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 13751da177e4SLinus Torvalds 1376335a64a5SOr Gerlitz static ssize_t show_umcast(struct device *dev, 1377335a64a5SOr Gerlitz struct device_attribute *attr, char *buf) 1378335a64a5SOr Gerlitz { 1379335a64a5SOr Gerlitz struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); 1380335a64a5SOr Gerlitz 1381335a64a5SOr Gerlitz return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); 1382335a64a5SOr Gerlitz } 1383335a64a5SOr Gerlitz 1384862096a8SOr Gerlitz void ipoib_set_umcast(struct net_device *ndev, int umcast_val) 1385335a64a5SOr Gerlitz { 1386862096a8SOr Gerlitz struct ipoib_dev_priv *priv = netdev_priv(ndev); 1387335a64a5SOr Gerlitz 1388335a64a5SOr Gerlitz if (umcast_val > 0) { 1389335a64a5SOr Gerlitz set_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1390335a64a5SOr Gerlitz ipoib_warn(priv, "ignoring multicast groups joined directly " 1391335a64a5SOr Gerlitz "by userspace\n"); 1392335a64a5SOr Gerlitz } else 1393335a64a5SOr Gerlitz clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1394862096a8SOr Gerlitz } 1395862096a8SOr Gerlitz 1396862096a8SOr Gerlitz static ssize_t set_umcast(struct device *dev, 1397862096a8SOr Gerlitz struct device_attribute *attr, 1398862096a8SOr Gerlitz const char *buf, size_t count) 1399862096a8SOr Gerlitz { 1400862096a8SOr Gerlitz unsigned long umcast_val = simple_strtoul(buf, NULL, 0); 1401862096a8SOr Gerlitz 1402862096a8SOr Gerlitz ipoib_set_umcast(to_net_dev(dev), umcast_val); 1403335a64a5SOr Gerlitz 1404335a64a5SOr Gerlitz return count; 1405335a64a5SOr Gerlitz } 1406335a64a5SOr Gerlitz static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast); 1407335a64a5SOr Gerlitz 1408335a64a5SOr Gerlitz int ipoib_add_umcast_attr(struct net_device *dev) 1409335a64a5SOr Gerlitz { 1410335a64a5SOr Gerlitz return device_create_file(&dev->dev, &dev_attr_umcast); 1411335a64a5SOr Gerlitz } 1412335a64a5SOr Gerlitz 141343cb76d9SGreg Kroah-Hartman static ssize_t create_child(struct device *dev, 141443cb76d9SGreg Kroah-Hartman struct device_attribute *attr, 14151da177e4SLinus Torvalds const char *buf, size_t count) 14161da177e4SLinus Torvalds { 14171da177e4SLinus Torvalds int pkey; 14181da177e4SLinus Torvalds int ret; 14191da177e4SLinus Torvalds 14201da177e4SLinus Torvalds if (sscanf(buf, "%i", &pkey) != 1) 14211da177e4SLinus Torvalds return -EINVAL; 14221da177e4SLinus Torvalds 14231da177e4SLinus Torvalds if (pkey < 0 || pkey > 0xffff) 14241da177e4SLinus Torvalds return -EINVAL; 14251da177e4SLinus Torvalds 14264ce05937SRoland Dreier /* 14274ce05937SRoland Dreier * Set the full membership bit, so that we join the right 14284ce05937SRoland Dreier * broadcast group, etc. 14294ce05937SRoland Dreier */ 14304ce05937SRoland Dreier pkey |= 0x8000; 14314ce05937SRoland Dreier 143243cb76d9SGreg Kroah-Hartman ret = ipoib_vlan_add(to_net_dev(dev), pkey); 14331da177e4SLinus Torvalds 14341da177e4SLinus Torvalds return ret ? ret : count; 14351da177e4SLinus Torvalds } 14367a52b34bSOr Gerlitz static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child); 14371da177e4SLinus Torvalds 143843cb76d9SGreg Kroah-Hartman static ssize_t delete_child(struct device *dev, 143943cb76d9SGreg Kroah-Hartman struct device_attribute *attr, 14401da177e4SLinus Torvalds const char *buf, size_t count) 14411da177e4SLinus Torvalds { 14421da177e4SLinus Torvalds int pkey; 14431da177e4SLinus Torvalds int ret; 14441da177e4SLinus Torvalds 14451da177e4SLinus Torvalds if (sscanf(buf, "%i", &pkey) != 1) 14461da177e4SLinus Torvalds return -EINVAL; 14471da177e4SLinus Torvalds 14481da177e4SLinus Torvalds if (pkey < 0 || pkey > 0xffff) 14491da177e4SLinus Torvalds return -EINVAL; 14501da177e4SLinus Torvalds 145143cb76d9SGreg Kroah-Hartman ret = ipoib_vlan_delete(to_net_dev(dev), pkey); 14521da177e4SLinus Torvalds 14531da177e4SLinus Torvalds return ret ? ret : count; 14541da177e4SLinus Torvalds 14551da177e4SLinus Torvalds } 14567a52b34bSOr Gerlitz static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child); 14571da177e4SLinus Torvalds 14581da177e4SLinus Torvalds int ipoib_add_pkey_attr(struct net_device *dev) 14591da177e4SLinus Torvalds { 146043cb76d9SGreg Kroah-Hartman return device_create_file(&dev->dev, &dev_attr_pkey); 14611da177e4SLinus Torvalds } 14621da177e4SLinus Torvalds 146383bb63f6SOr Gerlitz int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) 146483bb63f6SOr Gerlitz { 146583bb63f6SOr Gerlitz struct ib_device_attr *device_attr; 146683bb63f6SOr Gerlitz int result = -ENOMEM; 146783bb63f6SOr Gerlitz 146883bb63f6SOr Gerlitz device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); 146983bb63f6SOr Gerlitz if (!device_attr) { 147083bb63f6SOr Gerlitz printk(KERN_WARNING "%s: allocation of %zu bytes failed\n", 147183bb63f6SOr Gerlitz hca->name, sizeof *device_attr); 147283bb63f6SOr Gerlitz return result; 147383bb63f6SOr Gerlitz } 147483bb63f6SOr Gerlitz 147583bb63f6SOr Gerlitz result = ib_query_device(hca, device_attr); 147683bb63f6SOr Gerlitz if (result) { 147783bb63f6SOr Gerlitz printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n", 147883bb63f6SOr Gerlitz hca->name, result); 147983bb63f6SOr Gerlitz kfree(device_attr); 148083bb63f6SOr Gerlitz return result; 148183bb63f6SOr Gerlitz } 148283bb63f6SOr Gerlitz priv->hca_caps = device_attr->device_cap_flags; 148383bb63f6SOr Gerlitz 148483bb63f6SOr Gerlitz kfree(device_attr); 148583bb63f6SOr Gerlitz 148683bb63f6SOr Gerlitz if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 14873d96c74dSMichał Mirosław priv->dev->hw_features = NETIF_F_SG | 14883d96c74dSMichał Mirosław NETIF_F_IP_CSUM | NETIF_F_RXCSUM; 14893d96c74dSMichał Mirosław 14903d96c74dSMichał Mirosław if (priv->hca_caps & IB_DEVICE_UD_TSO) 14913d96c74dSMichał Mirosław priv->dev->hw_features |= NETIF_F_TSO; 14923d96c74dSMichał Mirosław 14933d96c74dSMichał Mirosław priv->dev->features |= priv->dev->hw_features; 149483bb63f6SOr Gerlitz } 149583bb63f6SOr Gerlitz 149683bb63f6SOr Gerlitz return 0; 149783bb63f6SOr Gerlitz } 149883bb63f6SOr Gerlitz 14991da177e4SLinus Torvalds static struct net_device *ipoib_add_port(const char *format, 15001da177e4SLinus Torvalds struct ib_device *hca, u8 port) 15011da177e4SLinus Torvalds { 15021da177e4SLinus Torvalds struct ipoib_dev_priv *priv; 1503bc7b3a36SShirley Ma struct ib_port_attr attr; 15041da177e4SLinus Torvalds int result = -ENOMEM; 15051da177e4SLinus Torvalds 15061da177e4SLinus Torvalds priv = ipoib_intf_alloc(format); 15071da177e4SLinus Torvalds if (!priv) 15081da177e4SLinus Torvalds goto alloc_mem_failed; 15091da177e4SLinus Torvalds 15101da177e4SLinus Torvalds SET_NETDEV_DEV(priv->dev, hca->dma_device); 1511c3aa9b18SEli Cohen priv->dev->dev_id = port - 1; 15121da177e4SLinus Torvalds 1513bc7b3a36SShirley Ma if (!ib_query_port(hca, port, &attr)) 1514bc7b3a36SShirley Ma priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); 1515bc7b3a36SShirley Ma else { 1516bc7b3a36SShirley Ma printk(KERN_WARNING "%s: ib_query_port %d failed\n", 1517bc7b3a36SShirley Ma hca->name, port); 1518bc7b3a36SShirley Ma goto device_init_failed; 1519bc7b3a36SShirley Ma } 1520bc7b3a36SShirley Ma 1521bc7b3a36SShirley Ma /* MTU will be reset when mcast join happens */ 1522bc7b3a36SShirley Ma priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 1523bc7b3a36SShirley Ma priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; 1524bc7b3a36SShirley Ma 1525596b9b68SDavid Miller priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); 1526596b9b68SDavid Miller 15271da177e4SLinus Torvalds result = ib_query_pkey(hca, port, 0, &priv->pkey); 15281da177e4SLinus Torvalds if (result) { 15291da177e4SLinus Torvalds printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 15301da177e4SLinus Torvalds hca->name, port, result); 1531ca6de177SEli Cohen goto device_init_failed; 15321da177e4SLinus Torvalds } 15331da177e4SLinus Torvalds 153483bb63f6SOr Gerlitz if (ipoib_set_dev_features(priv, hca)) 15356046136cSEli Cohen goto device_init_failed; 1536af40da89SVladimir Sokolovsky 15374ce05937SRoland Dreier /* 15384ce05937SRoland Dreier * Set the full membership bit, so that we join the right 15394ce05937SRoland Dreier * broadcast group, etc. 15404ce05937SRoland Dreier */ 15414ce05937SRoland Dreier priv->pkey |= 0x8000; 15424ce05937SRoland Dreier 15431da177e4SLinus Torvalds priv->dev->broadcast[8] = priv->pkey >> 8; 15441da177e4SLinus Torvalds priv->dev->broadcast[9] = priv->pkey & 0xff; 15451da177e4SLinus Torvalds 15461da177e4SLinus Torvalds result = ib_query_gid(hca, port, 0, &priv->local_gid); 15471da177e4SLinus Torvalds if (result) { 15481da177e4SLinus Torvalds printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 15491da177e4SLinus Torvalds hca->name, port, result); 1550ca6de177SEli Cohen goto device_init_failed; 15511da177e4SLinus Torvalds } else 15521da177e4SLinus Torvalds memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 15531da177e4SLinus Torvalds 15541da177e4SLinus Torvalds result = ipoib_dev_init(priv->dev, hca, port); 15551da177e4SLinus Torvalds if (result < 0) { 15561da177e4SLinus Torvalds printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 15571da177e4SLinus Torvalds hca->name, port, result); 15581da177e4SLinus Torvalds goto device_init_failed; 15591da177e4SLinus Torvalds } 15601da177e4SLinus Torvalds 15611da177e4SLinus Torvalds INIT_IB_EVENT_HANDLER(&priv->event_handler, 15621da177e4SLinus Torvalds priv->ca, ipoib_event); 15631da177e4SLinus Torvalds result = ib_register_event_handler(&priv->event_handler); 15641da177e4SLinus Torvalds if (result < 0) { 15651da177e4SLinus Torvalds printk(KERN_WARNING "%s: ib_register_event_handler failed for " 15661da177e4SLinus Torvalds "port %d (ret = %d)\n", 15671da177e4SLinus Torvalds hca->name, port, result); 15681da177e4SLinus Torvalds goto event_failed; 15691da177e4SLinus Torvalds } 15701da177e4SLinus Torvalds 15711da177e4SLinus Torvalds result = register_netdev(priv->dev); 15721da177e4SLinus Torvalds if (result) { 15731da177e4SLinus Torvalds printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", 15741da177e4SLinus Torvalds hca->name, port, result); 15751da177e4SLinus Torvalds goto register_failed; 15761da177e4SLinus Torvalds } 15771da177e4SLinus Torvalds 15781732b0efSRoland Dreier ipoib_create_debug_files(priv->dev); 15791da177e4SLinus Torvalds 1580839fcabaSMichael S. Tsirkin if (ipoib_cm_add_mode_attr(priv->dev)) 1581839fcabaSMichael S. Tsirkin goto sysfs_failed; 15821da177e4SLinus Torvalds if (ipoib_add_pkey_attr(priv->dev)) 15831da177e4SLinus Torvalds goto sysfs_failed; 1584335a64a5SOr Gerlitz if (ipoib_add_umcast_attr(priv->dev)) 1585335a64a5SOr Gerlitz goto sysfs_failed; 158643cb76d9SGreg Kroah-Hartman if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) 15871da177e4SLinus Torvalds goto sysfs_failed; 158843cb76d9SGreg Kroah-Hartman if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) 15891da177e4SLinus Torvalds goto sysfs_failed; 15901da177e4SLinus Torvalds 15911da177e4SLinus Torvalds return priv->dev; 15921da177e4SLinus Torvalds 15931da177e4SLinus Torvalds sysfs_failed: 15941732b0efSRoland Dreier ipoib_delete_debug_files(priv->dev); 15951da177e4SLinus Torvalds unregister_netdev(priv->dev); 15961da177e4SLinus Torvalds 15971da177e4SLinus Torvalds register_failed: 15981da177e4SLinus Torvalds ib_unregister_event_handler(&priv->event_handler); 1599b63b70d8SShlomo Pongratz /* Stop GC if started before flush */ 1600b63b70d8SShlomo Pongratz set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1601b63b70d8SShlomo Pongratz cancel_delayed_work(&priv->neigh_reap_task); 1602a77a57a1SRoland Dreier flush_workqueue(ipoib_workqueue); 16031da177e4SLinus Torvalds 16041da177e4SLinus Torvalds event_failed: 16051da177e4SLinus Torvalds ipoib_dev_cleanup(priv->dev); 16061da177e4SLinus Torvalds 16071da177e4SLinus Torvalds device_init_failed: 16081da177e4SLinus Torvalds free_netdev(priv->dev); 16091da177e4SLinus Torvalds 16101da177e4SLinus Torvalds alloc_mem_failed: 16111da177e4SLinus Torvalds return ERR_PTR(result); 16121da177e4SLinus Torvalds } 16131da177e4SLinus Torvalds 16141da177e4SLinus Torvalds static void ipoib_add_one(struct ib_device *device) 16151da177e4SLinus Torvalds { 16161da177e4SLinus Torvalds struct list_head *dev_list; 16171da177e4SLinus Torvalds struct net_device *dev; 16181da177e4SLinus Torvalds struct ipoib_dev_priv *priv; 16191da177e4SLinus Torvalds int s, e, p; 16201da177e4SLinus Torvalds 162107ebafbaSTom Tucker if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 162207ebafbaSTom Tucker return; 162307ebafbaSTom Tucker 16241da177e4SLinus Torvalds dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 16251da177e4SLinus Torvalds if (!dev_list) 16261da177e4SLinus Torvalds return; 16271da177e4SLinus Torvalds 16281da177e4SLinus Torvalds INIT_LIST_HEAD(dev_list); 16291da177e4SLinus Torvalds 163007ebafbaSTom Tucker if (device->node_type == RDMA_NODE_IB_SWITCH) { 16311da177e4SLinus Torvalds s = 0; 16321da177e4SLinus Torvalds e = 0; 16331da177e4SLinus Torvalds } else { 16341da177e4SLinus Torvalds s = 1; 16351da177e4SLinus Torvalds e = device->phys_port_cnt; 16361da177e4SLinus Torvalds } 16371da177e4SLinus Torvalds 16381da177e4SLinus Torvalds for (p = s; p <= e; ++p) { 16397b4c8769SEli Cohen if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) 16407b4c8769SEli Cohen continue; 16411da177e4SLinus Torvalds dev = ipoib_add_port("ib%d", device, p); 16421da177e4SLinus Torvalds if (!IS_ERR(dev)) { 16431da177e4SLinus Torvalds priv = netdev_priv(dev); 16441da177e4SLinus Torvalds list_add_tail(&priv->list, dev_list); 16451da177e4SLinus Torvalds } 16461da177e4SLinus Torvalds } 16471da177e4SLinus Torvalds 16481da177e4SLinus Torvalds ib_set_client_data(device, &ipoib_client, dev_list); 16491da177e4SLinus Torvalds } 16501da177e4SLinus Torvalds 16511da177e4SLinus Torvalds static void ipoib_remove_one(struct ib_device *device) 16521da177e4SLinus Torvalds { 16531da177e4SLinus Torvalds struct ipoib_dev_priv *priv, *tmp; 16541da177e4SLinus Torvalds struct list_head *dev_list; 16551da177e4SLinus Torvalds 165607ebafbaSTom Tucker if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 165707ebafbaSTom Tucker return; 165807ebafbaSTom Tucker 16591da177e4SLinus Torvalds dev_list = ib_get_client_data(device, &ipoib_client); 16601da177e4SLinus Torvalds 16611da177e4SLinus Torvalds list_for_each_entry_safe(priv, tmp, dev_list, list) { 16621da177e4SLinus Torvalds ib_unregister_event_handler(&priv->event_handler); 1663a77a57a1SRoland Dreier 1664a77a57a1SRoland Dreier rtnl_lock(); 1665a77a57a1SRoland Dreier dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); 1666a77a57a1SRoland Dreier rtnl_unlock(); 1667a77a57a1SRoland Dreier 1668b63b70d8SShlomo Pongratz /* Stop GC */ 1669b63b70d8SShlomo Pongratz set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1670b63b70d8SShlomo Pongratz cancel_delayed_work(&priv->neigh_reap_task); 1671a77a57a1SRoland Dreier flush_workqueue(ipoib_workqueue); 16721da177e4SLinus Torvalds 16731da177e4SLinus Torvalds unregister_netdev(priv->dev); 16741da177e4SLinus Torvalds free_netdev(priv->dev); 16751da177e4SLinus Torvalds } 167606c56e44SMichael S. Tsirkin 167706c56e44SMichael S. Tsirkin kfree(dev_list); 16781da177e4SLinus Torvalds } 16791da177e4SLinus Torvalds 16801da177e4SLinus Torvalds static int __init ipoib_init_module(void) 16811da177e4SLinus Torvalds { 16821da177e4SLinus Torvalds int ret; 16831da177e4SLinus Torvalds 16840f485251SShirley Ma ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); 16850f485251SShirley Ma ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); 16860f485251SShirley Ma ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); 16870f485251SShirley Ma 16880f485251SShirley Ma ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 16890f485251SShirley Ma ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1690732eacc0SHagen Paul Pfeifer ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE); 169168e995a2SPradeep Satyanarayana #ifdef CONFIG_INFINIBAND_IPOIB_CM 169268e995a2SPradeep Satyanarayana ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 169368e995a2SPradeep Satyanarayana #endif 16940f485251SShirley Ma 1695f89271daSEli Cohen /* 1696f89271daSEli Cohen * When copying small received packets, we only copy from the 1697f89271daSEli Cohen * linear data part of the SKB, so we rely on this condition. 1698f89271daSEli Cohen */ 1699f89271daSEli Cohen BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); 1700f89271daSEli Cohen 17011da177e4SLinus Torvalds ret = ipoib_register_debugfs(); 17021da177e4SLinus Torvalds if (ret) 17031da177e4SLinus Torvalds return ret; 17041da177e4SLinus Torvalds 17051da177e4SLinus Torvalds /* 17061da177e4SLinus Torvalds * We create our own workqueue mainly because we want to be 17071da177e4SLinus Torvalds * able to flush it when devices are being removed. We can't 17081da177e4SLinus Torvalds * use schedule_work()/flush_scheduled_work() because both 17091da177e4SLinus Torvalds * unregister_netdev() and linkwatch_event take the rtnl lock, 17101da177e4SLinus Torvalds * so flush_scheduled_work() can deadlock during device 17111da177e4SLinus Torvalds * removal. 17121da177e4SLinus Torvalds */ 17131da177e4SLinus Torvalds ipoib_workqueue = create_singlethread_workqueue("ipoib"); 17141da177e4SLinus Torvalds if (!ipoib_workqueue) { 17151da177e4SLinus Torvalds ret = -ENOMEM; 17161da177e4SLinus Torvalds goto err_fs; 17171da177e4SLinus Torvalds } 17181da177e4SLinus Torvalds 1719c1a0b23bSMichael S. Tsirkin ib_sa_register_client(&ipoib_sa_client); 1720c1a0b23bSMichael S. Tsirkin 17211da177e4SLinus Torvalds ret = ib_register_client(&ipoib_client); 17221da177e4SLinus Torvalds if (ret) 1723c1a0b23bSMichael S. Tsirkin goto err_sa; 17241da177e4SLinus Torvalds 17259baa0b03SOr Gerlitz ret = ipoib_netlink_init(); 17269baa0b03SOr Gerlitz if (ret) 17279baa0b03SOr Gerlitz goto err_client; 17289baa0b03SOr Gerlitz 17291da177e4SLinus Torvalds return 0; 17301da177e4SLinus Torvalds 17319baa0b03SOr Gerlitz err_client: 17329baa0b03SOr Gerlitz ib_unregister_client(&ipoib_client); 17339baa0b03SOr Gerlitz 1734c1a0b23bSMichael S. Tsirkin err_sa: 1735c1a0b23bSMichael S. Tsirkin ib_sa_unregister_client(&ipoib_sa_client); 17361da177e4SLinus Torvalds destroy_workqueue(ipoib_workqueue); 17371da177e4SLinus Torvalds 17389adec1a8SRoland Dreier err_fs: 17399adec1a8SRoland Dreier ipoib_unregister_debugfs(); 17409adec1a8SRoland Dreier 17411da177e4SLinus Torvalds return ret; 17421da177e4SLinus Torvalds } 17431da177e4SLinus Torvalds 17441da177e4SLinus Torvalds static void __exit ipoib_cleanup_module(void) 17451da177e4SLinus Torvalds { 17469baa0b03SOr Gerlitz ipoib_netlink_fini(); 17471da177e4SLinus Torvalds ib_unregister_client(&ipoib_client); 1748c1a0b23bSMichael S. Tsirkin ib_sa_unregister_client(&ipoib_sa_client); 17499adec1a8SRoland Dreier ipoib_unregister_debugfs(); 17501da177e4SLinus Torvalds destroy_workqueue(ipoib_workqueue); 17511da177e4SLinus Torvalds } 17521da177e4SLinus Torvalds 17531da177e4SLinus Torvalds module_init(ipoib_init_module); 17541da177e4SLinus Torvalds module_exit(ipoib_cleanup_module); 1755