11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (c) 2004 Topspin Communications. All rights reserved. 32a1d9b7fSRoland Dreier * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This software is available to you under a choice of one of two 61da177e4SLinus Torvalds * licenses. You may choose to be licensed under the terms of the GNU 71da177e4SLinus Torvalds * General Public License (GPL) Version 2, available from the file 81da177e4SLinus Torvalds * COPYING in the main directory of this source tree, or the 91da177e4SLinus Torvalds * OpenIB.org BSD license below: 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * Redistribution and use in source and binary forms, with or 121da177e4SLinus Torvalds * without modification, are permitted provided that the following 131da177e4SLinus Torvalds * conditions are met: 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * - Redistributions of source code must retain the above 161da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 171da177e4SLinus Torvalds * disclaimer. 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * - Redistributions in binary form must reproduce the above 201da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 211da177e4SLinus Torvalds * disclaimer in the documentation and/or other materials 221da177e4SLinus Torvalds * provided with the distribution. 231da177e4SLinus Torvalds * 241da177e4SLinus Torvalds * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 251da177e4SLinus Torvalds * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 261da177e4SLinus Torvalds * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 271da177e4SLinus Torvalds * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 281da177e4SLinus Torvalds * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 291da177e4SLinus Torvalds * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 301da177e4SLinus Torvalds * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 311da177e4SLinus Torvalds * SOFTWARE. 321da177e4SLinus Torvalds */ 331da177e4SLinus Torvalds 341da177e4SLinus Torvalds #include <linux/module.h> 351da177e4SLinus Torvalds #include <linux/string.h> 361da177e4SLinus Torvalds #include <linux/errno.h> 379a6b090cSAhmed S. Darwish #include <linux/kernel.h> 381da177e4SLinus Torvalds #include <linux/slab.h> 391da177e4SLinus Torvalds #include <linux/init.h> 409268f72dSYotam Kenneth #include <linux/netdevice.h> 414e0f7b90SParav Pandit #include <net/net_namespace.h> 424e0f7b90SParav Pandit #include <net/netns/generic.h> 438f408ab6SDaniel Jurgens #include <linux/security.h> 448f408ab6SDaniel Jurgens #include <linux/notifier.h> 45324e227eSJason Gunthorpe #include <linux/hashtable.h> 46b2cbae2cSRoland Dreier #include <rdma/rdma_netlink.h> 4703db3a2dSMatan Barak #include <rdma/ib_addr.h> 4803db3a2dSMatan Barak #include <rdma/ib_cache.h> 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds #include "core_priv.h" 5141eda65cSLeon Romanovsky #include "restrack.h" 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds MODULE_AUTHOR("Roland Dreier"); 541da177e4SLinus Torvalds MODULE_DESCRIPTION("core kernel InfiniBand API"); 551da177e4SLinus Torvalds MODULE_LICENSE("Dual BSD/GPL"); 561da177e4SLinus Torvalds 5714d3a3b2SChristoph Hellwig struct workqueue_struct *ib_comp_wq; 58f794809aSJack Morgenstein struct workqueue_struct *ib_comp_unbound_wq; 59f0626710STejun Heo struct workqueue_struct *ib_wq; 60f0626710STejun Heo EXPORT_SYMBOL_GPL(ib_wq); 61f0626710STejun Heo 620df91bb6SJason Gunthorpe /* 63921eab11SJason Gunthorpe * Each of the three rwsem locks (devices, clients, client_data) protects the 64921eab11SJason Gunthorpe * xarray of the same name. Specifically it allows the caller to assert that 65921eab11SJason Gunthorpe * the MARK will/will not be changing under the lock, and for devices and 66921eab11SJason Gunthorpe * clients, that the value in the xarray is still a valid pointer. Change of 67921eab11SJason Gunthorpe * the MARK is linked to the object state, so holding the lock and testing the 68921eab11SJason Gunthorpe * MARK also asserts that the contained object is in a certain state. 69921eab11SJason Gunthorpe * 70921eab11SJason Gunthorpe * This is used to build a two stage register/unregister flow where objects 71921eab11SJason Gunthorpe * can continue to be in the xarray even though they are still in progress to 72921eab11SJason Gunthorpe * register/unregister. 73921eab11SJason Gunthorpe * 74921eab11SJason Gunthorpe * The xarray itself provides additional locking, and restartable iteration, 75921eab11SJason Gunthorpe * which is also relied on. 76921eab11SJason Gunthorpe * 77921eab11SJason Gunthorpe * Locks should not be nested, with the exception of client_data, which is 78921eab11SJason Gunthorpe * allowed to nest under the read side of the other two locks. 79921eab11SJason Gunthorpe * 80921eab11SJason Gunthorpe * The devices_rwsem also protects the device name list, any change or 81921eab11SJason Gunthorpe * assignment of device name must also hold the write side to guarantee unique 82921eab11SJason Gunthorpe * names. 83921eab11SJason Gunthorpe */ 84921eab11SJason Gunthorpe 85921eab11SJason Gunthorpe /* 860df91bb6SJason Gunthorpe * devices contains devices that have had their names assigned. The 870df91bb6SJason Gunthorpe * devices may not be registered. Users that care about the registration 880df91bb6SJason Gunthorpe * status need to call ib_device_try_get() on the device to ensure it is 890df91bb6SJason Gunthorpe * registered, and keep it registered, for the required duration. 900df91bb6SJason Gunthorpe * 910df91bb6SJason Gunthorpe */ 920df91bb6SJason Gunthorpe static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); 93921eab11SJason Gunthorpe static DECLARE_RWSEM(devices_rwsem); 940df91bb6SJason Gunthorpe #define DEVICE_REGISTERED XA_MARK_1 950df91bb6SJason Gunthorpe 961da177e4SLinus Torvalds static LIST_HEAD(client_list); 97e59178d8SJason Gunthorpe #define CLIENT_REGISTERED XA_MARK_1 98e59178d8SJason Gunthorpe static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); 99921eab11SJason Gunthorpe static DECLARE_RWSEM(clients_rwsem); 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds /* 1020df91bb6SJason Gunthorpe * If client_data is registered then the corresponding client must also still 1030df91bb6SJason Gunthorpe * be registered. 1040df91bb6SJason Gunthorpe */ 1050df91bb6SJason Gunthorpe #define CLIENT_DATA_REGISTERED XA_MARK_1 1064e0f7b90SParav Pandit 1074e0f7b90SParav Pandit /** 1084e0f7b90SParav Pandit * struct rdma_dev_net - rdma net namespace metadata for a net 1094e0f7b90SParav Pandit * @net: Pointer to owner net namespace 1104e0f7b90SParav Pandit * @id: xarray id to identify the net namespace. 1114e0f7b90SParav Pandit */ 1124e0f7b90SParav Pandit struct rdma_dev_net { 1134e0f7b90SParav Pandit possible_net_t net; 1144e0f7b90SParav Pandit u32 id; 1154e0f7b90SParav Pandit }; 1164e0f7b90SParav Pandit 1174e0f7b90SParav Pandit static unsigned int rdma_dev_net_id; 1184e0f7b90SParav Pandit 1194e0f7b90SParav Pandit /* 1204e0f7b90SParav Pandit * A list of net namespaces is maintained in an xarray. This is necessary 1214e0f7b90SParav Pandit * because we can't get the locking right using the existing net ns list. We 1224e0f7b90SParav Pandit * would require a init_net callback after the list is updated. 1234e0f7b90SParav Pandit */ 1244e0f7b90SParav Pandit static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); 1254e0f7b90SParav Pandit /* 1264e0f7b90SParav Pandit * rwsem to protect accessing the rdma_nets xarray entries. 1274e0f7b90SParav Pandit */ 1284e0f7b90SParav Pandit static DECLARE_RWSEM(rdma_nets_rwsem); 1294e0f7b90SParav Pandit 130cb7e0e13SParav Pandit bool ib_devices_shared_netns = true; 131a56bc45bSParav Pandit module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); 132a56bc45bSParav Pandit MODULE_PARM_DESC(netns_mode, 133a56bc45bSParav Pandit "Share device among net namespaces; default=1 (shared)"); 13441c61401SParav Pandit /** 13541c61401SParav Pandit * rdma_dev_access_netns() - Return whether a rdma device can be accessed 13641c61401SParav Pandit * from a specified net namespace or not. 13741c61401SParav Pandit * @device: Pointer to rdma device which needs to be checked 13841c61401SParav Pandit * @net: Pointer to net namesapce for which access to be checked 13941c61401SParav Pandit * 14041c61401SParav Pandit * rdma_dev_access_netns() - Return whether a rdma device can be accessed 14141c61401SParav Pandit * from a specified net namespace or not. When 14241c61401SParav Pandit * rdma device is in shared mode, it ignores the 14341c61401SParav Pandit * net namespace. When rdma device is exclusive 14441c61401SParav Pandit * to a net namespace, rdma device net namespace is 14541c61401SParav Pandit * checked against the specified one. 14641c61401SParav Pandit */ 14741c61401SParav Pandit bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) 14841c61401SParav Pandit { 14941c61401SParav Pandit return (ib_devices_shared_netns || 15041c61401SParav Pandit net_eq(read_pnet(&dev->coredev.rdma_net), net)); 15141c61401SParav Pandit } 15241c61401SParav Pandit EXPORT_SYMBOL(rdma_dev_access_netns); 15341c61401SParav Pandit 1540df91bb6SJason Gunthorpe /* 1550df91bb6SJason Gunthorpe * xarray has this behavior where it won't iterate over NULL values stored in 1560df91bb6SJason Gunthorpe * allocated arrays. So we need our own iterator to see all values stored in 1570df91bb6SJason Gunthorpe * the array. This does the same thing as xa_for_each except that it also 1580df91bb6SJason Gunthorpe * returns NULL valued entries if the array is allocating. Simplified to only 1590df91bb6SJason Gunthorpe * work on simple xarrays. 1600df91bb6SJason Gunthorpe */ 1610df91bb6SJason Gunthorpe static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, 1620df91bb6SJason Gunthorpe xa_mark_t filter) 1630df91bb6SJason Gunthorpe { 1640df91bb6SJason Gunthorpe XA_STATE(xas, xa, *indexp); 1650df91bb6SJason Gunthorpe void *entry; 1660df91bb6SJason Gunthorpe 1670df91bb6SJason Gunthorpe rcu_read_lock(); 1680df91bb6SJason Gunthorpe do { 1690df91bb6SJason Gunthorpe entry = xas_find_marked(&xas, ULONG_MAX, filter); 1700df91bb6SJason Gunthorpe if (xa_is_zero(entry)) 1710df91bb6SJason Gunthorpe break; 1720df91bb6SJason Gunthorpe } while (xas_retry(&xas, entry)); 1730df91bb6SJason Gunthorpe rcu_read_unlock(); 1740df91bb6SJason Gunthorpe 1750df91bb6SJason Gunthorpe if (entry) { 1760df91bb6SJason Gunthorpe *indexp = xas.xa_index; 1770df91bb6SJason Gunthorpe if (xa_is_zero(entry)) 1780df91bb6SJason Gunthorpe return NULL; 1790df91bb6SJason Gunthorpe return entry; 1800df91bb6SJason Gunthorpe } 1810df91bb6SJason Gunthorpe return XA_ERROR(-ENOENT); 1820df91bb6SJason Gunthorpe } 1830df91bb6SJason Gunthorpe #define xan_for_each_marked(xa, index, entry, filter) \ 1840df91bb6SJason Gunthorpe for (index = 0, entry = xan_find_marked(xa, &(index), filter); \ 1850df91bb6SJason Gunthorpe !xa_is_err(entry); \ 1860df91bb6SJason Gunthorpe (index)++, entry = xan_find_marked(xa, &(index), filter)) 1870df91bb6SJason Gunthorpe 188324e227eSJason Gunthorpe /* RCU hash table mapping netdevice pointers to struct ib_port_data */ 189324e227eSJason Gunthorpe static DEFINE_SPINLOCK(ndev_hash_lock); 190324e227eSJason Gunthorpe static DECLARE_HASHTABLE(ndev_hash, 5); 191324e227eSJason Gunthorpe 192c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev); 193d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work); 194d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *device); 1958f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event, 1968f408ab6SDaniel Jurgens void *lsm_data); 1978f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work); 1988f408ab6SDaniel Jurgens static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); 1998f408ab6SDaniel Jurgens 200923abb9dSGal Pressman static void __ibdev_printk(const char *level, const struct ib_device *ibdev, 201923abb9dSGal Pressman struct va_format *vaf) 202923abb9dSGal Pressman { 203923abb9dSGal Pressman if (ibdev && ibdev->dev.parent) 204923abb9dSGal Pressman dev_printk_emit(level[1] - '0', 205923abb9dSGal Pressman ibdev->dev.parent, 206923abb9dSGal Pressman "%s %s %s: %pV", 207923abb9dSGal Pressman dev_driver_string(ibdev->dev.parent), 208923abb9dSGal Pressman dev_name(ibdev->dev.parent), 209923abb9dSGal Pressman dev_name(&ibdev->dev), 210923abb9dSGal Pressman vaf); 211923abb9dSGal Pressman else if (ibdev) 212923abb9dSGal Pressman printk("%s%s: %pV", 213923abb9dSGal Pressman level, dev_name(&ibdev->dev), vaf); 214923abb9dSGal Pressman else 215923abb9dSGal Pressman printk("%s(NULL ib_device): %pV", level, vaf); 216923abb9dSGal Pressman } 217923abb9dSGal Pressman 218923abb9dSGal Pressman void ibdev_printk(const char *level, const struct ib_device *ibdev, 219923abb9dSGal Pressman const char *format, ...) 220923abb9dSGal Pressman { 221923abb9dSGal Pressman struct va_format vaf; 222923abb9dSGal Pressman va_list args; 223923abb9dSGal Pressman 224923abb9dSGal Pressman va_start(args, format); 225923abb9dSGal Pressman 226923abb9dSGal Pressman vaf.fmt = format; 227923abb9dSGal Pressman vaf.va = &args; 228923abb9dSGal Pressman 229923abb9dSGal Pressman __ibdev_printk(level, ibdev, &vaf); 230923abb9dSGal Pressman 231923abb9dSGal Pressman va_end(args); 232923abb9dSGal Pressman } 233923abb9dSGal Pressman EXPORT_SYMBOL(ibdev_printk); 234923abb9dSGal Pressman 235923abb9dSGal Pressman #define define_ibdev_printk_level(func, level) \ 236923abb9dSGal Pressman void func(const struct ib_device *ibdev, const char *fmt, ...) \ 237923abb9dSGal Pressman { \ 238923abb9dSGal Pressman struct va_format vaf; \ 239923abb9dSGal Pressman va_list args; \ 240923abb9dSGal Pressman \ 241923abb9dSGal Pressman va_start(args, fmt); \ 242923abb9dSGal Pressman \ 243923abb9dSGal Pressman vaf.fmt = fmt; \ 244923abb9dSGal Pressman vaf.va = &args; \ 245923abb9dSGal Pressman \ 246923abb9dSGal Pressman __ibdev_printk(level, ibdev, &vaf); \ 247923abb9dSGal Pressman \ 248923abb9dSGal Pressman va_end(args); \ 249923abb9dSGal Pressman } \ 250923abb9dSGal Pressman EXPORT_SYMBOL(func); 251923abb9dSGal Pressman 252923abb9dSGal Pressman define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); 253923abb9dSGal Pressman define_ibdev_printk_level(ibdev_alert, KERN_ALERT); 254923abb9dSGal Pressman define_ibdev_printk_level(ibdev_crit, KERN_CRIT); 255923abb9dSGal Pressman define_ibdev_printk_level(ibdev_err, KERN_ERR); 256923abb9dSGal Pressman define_ibdev_printk_level(ibdev_warn, KERN_WARNING); 257923abb9dSGal Pressman define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); 258923abb9dSGal Pressman define_ibdev_printk_level(ibdev_info, KERN_INFO); 259923abb9dSGal Pressman 2608f408ab6SDaniel Jurgens static struct notifier_block ibdev_lsm_nb = { 2618f408ab6SDaniel Jurgens .notifier_call = ib_security_change, 2628f408ab6SDaniel Jurgens }; 2631da177e4SLinus Torvalds 264decbc7a6SParav Pandit static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, 265decbc7a6SParav Pandit struct net *net); 266decbc7a6SParav Pandit 267324e227eSJason Gunthorpe /* Pointer to the RCU head at the start of the ib_port_data array */ 268324e227eSJason Gunthorpe struct ib_port_data_rcu { 269324e227eSJason Gunthorpe struct rcu_head rcu_head; 270324e227eSJason Gunthorpe struct ib_port_data pdata[]; 271324e227eSJason Gunthorpe }; 272324e227eSJason Gunthorpe 2731da177e4SLinus Torvalds static int ib_device_check_mandatory(struct ib_device *device) 2741da177e4SLinus Torvalds { 2753023a1e9SKamal Heib #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } 2761da177e4SLinus Torvalds static const struct { 2771da177e4SLinus Torvalds size_t offset; 2781da177e4SLinus Torvalds char *name; 2791da177e4SLinus Torvalds } mandatory_table[] = { 2801da177e4SLinus Torvalds IB_MANDATORY_FUNC(query_device), 2811da177e4SLinus Torvalds IB_MANDATORY_FUNC(query_port), 2821da177e4SLinus Torvalds IB_MANDATORY_FUNC(query_pkey), 2831da177e4SLinus Torvalds IB_MANDATORY_FUNC(alloc_pd), 2841da177e4SLinus Torvalds IB_MANDATORY_FUNC(dealloc_pd), 2851da177e4SLinus Torvalds IB_MANDATORY_FUNC(create_qp), 2861da177e4SLinus Torvalds IB_MANDATORY_FUNC(modify_qp), 2871da177e4SLinus Torvalds IB_MANDATORY_FUNC(destroy_qp), 2881da177e4SLinus Torvalds IB_MANDATORY_FUNC(post_send), 2891da177e4SLinus Torvalds IB_MANDATORY_FUNC(post_recv), 2901da177e4SLinus Torvalds IB_MANDATORY_FUNC(create_cq), 2911da177e4SLinus Torvalds IB_MANDATORY_FUNC(destroy_cq), 2921da177e4SLinus Torvalds IB_MANDATORY_FUNC(poll_cq), 2931da177e4SLinus Torvalds IB_MANDATORY_FUNC(req_notify_cq), 2941da177e4SLinus Torvalds IB_MANDATORY_FUNC(get_dma_mr), 2957738613eSIra Weiny IB_MANDATORY_FUNC(dereg_mr), 2967738613eSIra Weiny IB_MANDATORY_FUNC(get_port_immutable) 2971da177e4SLinus Torvalds }; 2981da177e4SLinus Torvalds int i; 2991da177e4SLinus Torvalds 3006780c4faSGal Pressman device->kverbs_provider = true; 3019a6b090cSAhmed S. Darwish for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { 3023023a1e9SKamal Heib if (!*(void **) ((void *) &device->ops + 3033023a1e9SKamal Heib mandatory_table[i].offset)) { 3046780c4faSGal Pressman device->kverbs_provider = false; 3056780c4faSGal Pressman break; 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds } 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds return 0; 3101da177e4SLinus Torvalds } 3111da177e4SLinus Torvalds 312f8978bd9SLeon Romanovsky /* 31301b67117SParav Pandit * Caller must perform ib_device_put() to return the device reference count 31401b67117SParav Pandit * when ib_device_get_by_index() returns valid device pointer. 315f8978bd9SLeon Romanovsky */ 31637eeab55SParav Pandit struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) 317f8978bd9SLeon Romanovsky { 318f8978bd9SLeon Romanovsky struct ib_device *device; 319f8978bd9SLeon Romanovsky 320921eab11SJason Gunthorpe down_read(&devices_rwsem); 3210df91bb6SJason Gunthorpe device = xa_load(&devices, index); 32201b67117SParav Pandit if (device) { 32337eeab55SParav Pandit if (!rdma_dev_access_netns(device, net)) { 32437eeab55SParav Pandit device = NULL; 32537eeab55SParav Pandit goto out; 32637eeab55SParav Pandit } 32737eeab55SParav Pandit 328d79af724SJason Gunthorpe if (!ib_device_try_get(device)) 32901b67117SParav Pandit device = NULL; 33001b67117SParav Pandit } 33137eeab55SParav Pandit out: 332921eab11SJason Gunthorpe up_read(&devices_rwsem); 333f8978bd9SLeon Romanovsky return device; 334f8978bd9SLeon Romanovsky } 335f8978bd9SLeon Romanovsky 336d79af724SJason Gunthorpe /** 337d79af724SJason Gunthorpe * ib_device_put - Release IB device reference 338d79af724SJason Gunthorpe * @device: device whose reference to be released 339d79af724SJason Gunthorpe * 340d79af724SJason Gunthorpe * ib_device_put() releases reference to the IB device to allow it to be 341d79af724SJason Gunthorpe * unregistered and eventually free. 342d79af724SJason Gunthorpe */ 34301b67117SParav Pandit void ib_device_put(struct ib_device *device) 34401b67117SParav Pandit { 34501b67117SParav Pandit if (refcount_dec_and_test(&device->refcount)) 34601b67117SParav Pandit complete(&device->unreg_completion); 34701b67117SParav Pandit } 348d79af724SJason Gunthorpe EXPORT_SYMBOL(ib_device_put); 34901b67117SParav Pandit 3501da177e4SLinus Torvalds static struct ib_device *__ib_device_get_by_name(const char *name) 3511da177e4SLinus Torvalds { 3521da177e4SLinus Torvalds struct ib_device *device; 3530df91bb6SJason Gunthorpe unsigned long index; 3541da177e4SLinus Torvalds 3550df91bb6SJason Gunthorpe xa_for_each (&devices, index, device) 356896de009SJason Gunthorpe if (!strcmp(name, dev_name(&device->dev))) 3571da177e4SLinus Torvalds return device; 3581da177e4SLinus Torvalds 3591da177e4SLinus Torvalds return NULL; 3601da177e4SLinus Torvalds } 3611da177e4SLinus Torvalds 3626cc2c8e5SJason Gunthorpe /** 3636cc2c8e5SJason Gunthorpe * ib_device_get_by_name - Find an IB device by name 3646cc2c8e5SJason Gunthorpe * @name: The name to look for 3656cc2c8e5SJason Gunthorpe * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) 3666cc2c8e5SJason Gunthorpe * 3676cc2c8e5SJason Gunthorpe * Find and hold an ib_device by its name. The caller must call 3686cc2c8e5SJason Gunthorpe * ib_device_put() on the returned pointer. 3696cc2c8e5SJason Gunthorpe */ 3706cc2c8e5SJason Gunthorpe struct ib_device *ib_device_get_by_name(const char *name, 3716cc2c8e5SJason Gunthorpe enum rdma_driver_id driver_id) 3726cc2c8e5SJason Gunthorpe { 3736cc2c8e5SJason Gunthorpe struct ib_device *device; 3746cc2c8e5SJason Gunthorpe 3756cc2c8e5SJason Gunthorpe down_read(&devices_rwsem); 3766cc2c8e5SJason Gunthorpe device = __ib_device_get_by_name(name); 3776cc2c8e5SJason Gunthorpe if (device && driver_id != RDMA_DRIVER_UNKNOWN && 3786cc2c8e5SJason Gunthorpe device->driver_id != driver_id) 3796cc2c8e5SJason Gunthorpe device = NULL; 3806cc2c8e5SJason Gunthorpe 3816cc2c8e5SJason Gunthorpe if (device) { 3826cc2c8e5SJason Gunthorpe if (!ib_device_try_get(device)) 3836cc2c8e5SJason Gunthorpe device = NULL; 3846cc2c8e5SJason Gunthorpe } 3856cc2c8e5SJason Gunthorpe up_read(&devices_rwsem); 3866cc2c8e5SJason Gunthorpe return device; 3876cc2c8e5SJason Gunthorpe } 3886cc2c8e5SJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_name); 3896cc2c8e5SJason Gunthorpe 3904e0f7b90SParav Pandit static int rename_compat_devs(struct ib_device *device) 3914e0f7b90SParav Pandit { 3924e0f7b90SParav Pandit struct ib_core_device *cdev; 3934e0f7b90SParav Pandit unsigned long index; 3944e0f7b90SParav Pandit int ret = 0; 3954e0f7b90SParav Pandit 3964e0f7b90SParav Pandit mutex_lock(&device->compat_devs_mutex); 3974e0f7b90SParav Pandit xa_for_each (&device->compat_devs, index, cdev) { 3984e0f7b90SParav Pandit ret = device_rename(&cdev->dev, dev_name(&device->dev)); 3994e0f7b90SParav Pandit if (ret) { 4004e0f7b90SParav Pandit dev_warn(&cdev->dev, 4014e0f7b90SParav Pandit "Fail to rename compatdev to new name %s\n", 4024e0f7b90SParav Pandit dev_name(&device->dev)); 4034e0f7b90SParav Pandit break; 4044e0f7b90SParav Pandit } 4054e0f7b90SParav Pandit } 4064e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 4074e0f7b90SParav Pandit return ret; 4084e0f7b90SParav Pandit } 4094e0f7b90SParav Pandit 410d21943ddSLeon Romanovsky int ib_device_rename(struct ib_device *ibdev, const char *name) 411d21943ddSLeon Romanovsky { 412e3593b56SJason Gunthorpe int ret; 413d21943ddSLeon Romanovsky 414921eab11SJason Gunthorpe down_write(&devices_rwsem); 415e3593b56SJason Gunthorpe if (!strcmp(name, dev_name(&ibdev->dev))) { 416e3593b56SJason Gunthorpe ret = 0; 417e3593b56SJason Gunthorpe goto out; 418e3593b56SJason Gunthorpe } 419e3593b56SJason Gunthorpe 420344684e6SJason Gunthorpe if (__ib_device_get_by_name(name)) { 421d21943ddSLeon Romanovsky ret = -EEXIST; 422d21943ddSLeon Romanovsky goto out; 423d21943ddSLeon Romanovsky } 424d21943ddSLeon Romanovsky 425d21943ddSLeon Romanovsky ret = device_rename(&ibdev->dev, name); 426d21943ddSLeon Romanovsky if (ret) 427d21943ddSLeon Romanovsky goto out; 428d21943ddSLeon Romanovsky strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); 4294e0f7b90SParav Pandit ret = rename_compat_devs(ibdev); 430d21943ddSLeon Romanovsky out: 431921eab11SJason Gunthorpe up_write(&devices_rwsem); 432d21943ddSLeon Romanovsky return ret; 433d21943ddSLeon Romanovsky } 434d21943ddSLeon Romanovsky 435e349f858SJason Gunthorpe static int alloc_name(struct ib_device *ibdev, const char *name) 4361da177e4SLinus Torvalds { 4371da177e4SLinus Torvalds struct ib_device *device; 4380df91bb6SJason Gunthorpe unsigned long index; 4393b88afd3SJason Gunthorpe struct ida inuse; 4403b88afd3SJason Gunthorpe int rc; 4411da177e4SLinus Torvalds int i; 4421da177e4SLinus Torvalds 443921eab11SJason Gunthorpe lockdep_assert_held_exclusive(&devices_rwsem); 4443b88afd3SJason Gunthorpe ida_init(&inuse); 4450df91bb6SJason Gunthorpe xa_for_each (&devices, index, device) { 446e349f858SJason Gunthorpe char buf[IB_DEVICE_NAME_MAX]; 447e349f858SJason Gunthorpe 448896de009SJason Gunthorpe if (sscanf(dev_name(&device->dev), name, &i) != 1) 4491da177e4SLinus Torvalds continue; 4503b88afd3SJason Gunthorpe if (i < 0 || i >= INT_MAX) 4511da177e4SLinus Torvalds continue; 4521da177e4SLinus Torvalds snprintf(buf, sizeof buf, name, i); 4533b88afd3SJason Gunthorpe if (strcmp(buf, dev_name(&device->dev)) != 0) 4543b88afd3SJason Gunthorpe continue; 4553b88afd3SJason Gunthorpe 4563b88afd3SJason Gunthorpe rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL); 4573b88afd3SJason Gunthorpe if (rc < 0) 4583b88afd3SJason Gunthorpe goto out; 4591da177e4SLinus Torvalds } 4601da177e4SLinus Torvalds 4613b88afd3SJason Gunthorpe rc = ida_alloc(&inuse, GFP_KERNEL); 4623b88afd3SJason Gunthorpe if (rc < 0) 4633b88afd3SJason Gunthorpe goto out; 4641da177e4SLinus Torvalds 4653b88afd3SJason Gunthorpe rc = dev_set_name(&ibdev->dev, name, rc); 4663b88afd3SJason Gunthorpe out: 4673b88afd3SJason Gunthorpe ida_destroy(&inuse); 4683b88afd3SJason Gunthorpe return rc; 4691da177e4SLinus Torvalds } 4701da177e4SLinus Torvalds 47155aeed06SJason Gunthorpe static void ib_device_release(struct device *device) 47255aeed06SJason Gunthorpe { 47355aeed06SJason Gunthorpe struct ib_device *dev = container_of(device, struct ib_device, dev); 47455aeed06SJason Gunthorpe 475c2261dd7SJason Gunthorpe free_netdevs(dev); 476652432f3SJason Gunthorpe WARN_ON(refcount_read(&dev->refcount)); 47703db3a2dSMatan Barak ib_cache_release_one(dev); 478b34b269aSJason Gunthorpe ib_security_release_port_pkey_list(dev); 4794e0f7b90SParav Pandit xa_destroy(&dev->compat_devs); 4800df91bb6SJason Gunthorpe xa_destroy(&dev->client_data); 481324e227eSJason Gunthorpe if (dev->port_data) 482324e227eSJason Gunthorpe kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, 483324e227eSJason Gunthorpe pdata[0]), 484324e227eSJason Gunthorpe rcu_head); 485324e227eSJason Gunthorpe kfree_rcu(dev, rcu_head); 48655aeed06SJason Gunthorpe } 48755aeed06SJason Gunthorpe 48855aeed06SJason Gunthorpe static int ib_device_uevent(struct device *device, 48955aeed06SJason Gunthorpe struct kobj_uevent_env *env) 49055aeed06SJason Gunthorpe { 491896de009SJason Gunthorpe if (add_uevent_var(env, "NAME=%s", dev_name(device))) 49255aeed06SJason Gunthorpe return -ENOMEM; 49355aeed06SJason Gunthorpe 49455aeed06SJason Gunthorpe /* 49555aeed06SJason Gunthorpe * It would be nice to pass the node GUID with the event... 49655aeed06SJason Gunthorpe */ 49755aeed06SJason Gunthorpe 49855aeed06SJason Gunthorpe return 0; 49955aeed06SJason Gunthorpe } 50055aeed06SJason Gunthorpe 50162dfa795SParav Pandit static const void *net_namespace(struct device *d) 50262dfa795SParav Pandit { 5034e0f7b90SParav Pandit struct ib_core_device *coredev = 5044e0f7b90SParav Pandit container_of(d, struct ib_core_device, dev); 5054e0f7b90SParav Pandit 5064e0f7b90SParav Pandit return read_pnet(&coredev->rdma_net); 50762dfa795SParav Pandit } 50862dfa795SParav Pandit 50955aeed06SJason Gunthorpe static struct class ib_class = { 51055aeed06SJason Gunthorpe .name = "infiniband", 51155aeed06SJason Gunthorpe .dev_release = ib_device_release, 51255aeed06SJason Gunthorpe .dev_uevent = ib_device_uevent, 51362dfa795SParav Pandit .ns_type = &net_ns_type_operations, 51462dfa795SParav Pandit .namespace = net_namespace, 51555aeed06SJason Gunthorpe }; 51655aeed06SJason Gunthorpe 517cebe556bSParav Pandit static void rdma_init_coredev(struct ib_core_device *coredev, 5184e0f7b90SParav Pandit struct ib_device *dev, struct net *net) 519cebe556bSParav Pandit { 520cebe556bSParav Pandit /* This BUILD_BUG_ON is intended to catch layout change 521cebe556bSParav Pandit * of union of ib_core_device and device. 522cebe556bSParav Pandit * dev must be the first element as ib_core and providers 523cebe556bSParav Pandit * driver uses it. Adding anything in ib_core_device before 524cebe556bSParav Pandit * device will break this assumption. 525cebe556bSParav Pandit */ 526cebe556bSParav Pandit BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != 527cebe556bSParav Pandit offsetof(struct ib_device, dev)); 528cebe556bSParav Pandit 529cebe556bSParav Pandit coredev->dev.class = &ib_class; 530cebe556bSParav Pandit coredev->dev.groups = dev->groups; 531cebe556bSParav Pandit device_initialize(&coredev->dev); 532cebe556bSParav Pandit coredev->owner = dev; 533cebe556bSParav Pandit INIT_LIST_HEAD(&coredev->port_list); 5344e0f7b90SParav Pandit write_pnet(&coredev->rdma_net, net); 535cebe556bSParav Pandit } 536cebe556bSParav Pandit 5371da177e4SLinus Torvalds /** 538459cc69fSLeon Romanovsky * _ib_alloc_device - allocate an IB device struct 5391da177e4SLinus Torvalds * @size:size of structure to allocate 5401da177e4SLinus Torvalds * 5411da177e4SLinus Torvalds * Low-level drivers should use ib_alloc_device() to allocate &struct 5421da177e4SLinus Torvalds * ib_device. @size is the size of the structure to be allocated, 5431da177e4SLinus Torvalds * including any private data used by the low-level driver. 5441da177e4SLinus Torvalds * ib_dealloc_device() must be used to free structures allocated with 5451da177e4SLinus Torvalds * ib_alloc_device(). 5461da177e4SLinus Torvalds */ 547459cc69fSLeon Romanovsky struct ib_device *_ib_alloc_device(size_t size) 5481da177e4SLinus Torvalds { 54955aeed06SJason Gunthorpe struct ib_device *device; 5501da177e4SLinus Torvalds 55155aeed06SJason Gunthorpe if (WARN_ON(size < sizeof(struct ib_device))) 55255aeed06SJason Gunthorpe return NULL; 55355aeed06SJason Gunthorpe 55455aeed06SJason Gunthorpe device = kzalloc(size, GFP_KERNEL); 55555aeed06SJason Gunthorpe if (!device) 55655aeed06SJason Gunthorpe return NULL; 55755aeed06SJason Gunthorpe 55841eda65cSLeon Romanovsky if (rdma_restrack_init(device)) { 55941eda65cSLeon Romanovsky kfree(device); 56041eda65cSLeon Romanovsky return NULL; 56141eda65cSLeon Romanovsky } 56202d8883fSLeon Romanovsky 5635f8f5499SParav Pandit device->groups[0] = &ib_dev_attr_group; 5644e0f7b90SParav Pandit rdma_init_coredev(&device->coredev, device, &init_net); 56555aeed06SJason Gunthorpe 56655aeed06SJason Gunthorpe INIT_LIST_HEAD(&device->event_handler_list); 56755aeed06SJason Gunthorpe spin_lock_init(&device->event_handler_lock); 568d0899892SJason Gunthorpe mutex_init(&device->unregistration_lock); 5690df91bb6SJason Gunthorpe /* 5700df91bb6SJason Gunthorpe * client_data needs to be alloc because we don't want our mark to be 5710df91bb6SJason Gunthorpe * destroyed if the user stores NULL in the client data. 5720df91bb6SJason Gunthorpe */ 5730df91bb6SJason Gunthorpe xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); 574921eab11SJason Gunthorpe init_rwsem(&device->client_data_rwsem); 5754e0f7b90SParav Pandit xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); 5764e0f7b90SParav Pandit mutex_init(&device->compat_devs_mutex); 57701b67117SParav Pandit init_completion(&device->unreg_completion); 578d0899892SJason Gunthorpe INIT_WORK(&device->unregistration_work, ib_unregister_work); 57955aeed06SJason Gunthorpe 58055aeed06SJason Gunthorpe return device; 5811da177e4SLinus Torvalds } 582459cc69fSLeon Romanovsky EXPORT_SYMBOL(_ib_alloc_device); 5831da177e4SLinus Torvalds 5841da177e4SLinus Torvalds /** 5851da177e4SLinus Torvalds * ib_dealloc_device - free an IB device struct 5861da177e4SLinus Torvalds * @device:structure to free 5871da177e4SLinus Torvalds * 5881da177e4SLinus Torvalds * Free a structure allocated with ib_alloc_device(). 5891da177e4SLinus Torvalds */ 5901da177e4SLinus Torvalds void ib_dealloc_device(struct ib_device *device) 5911da177e4SLinus Torvalds { 592d0899892SJason Gunthorpe if (device->ops.dealloc_driver) 593d0899892SJason Gunthorpe device->ops.dealloc_driver(device); 594d0899892SJason Gunthorpe 595d0899892SJason Gunthorpe /* 596d0899892SJason Gunthorpe * ib_unregister_driver() requires all devices to remain in the xarray 597d0899892SJason Gunthorpe * while their ops are callable. The last op we call is dealloc_driver 598d0899892SJason Gunthorpe * above. This is needed to create a fence on op callbacks prior to 599d0899892SJason Gunthorpe * allowing the driver module to unload. 600d0899892SJason Gunthorpe */ 601d0899892SJason Gunthorpe down_write(&devices_rwsem); 602d0899892SJason Gunthorpe if (xa_load(&devices, device->index) == device) 603d0899892SJason Gunthorpe xa_erase(&devices, device->index); 604d0899892SJason Gunthorpe up_write(&devices_rwsem); 605d0899892SJason Gunthorpe 606c2261dd7SJason Gunthorpe /* Expedite releasing netdev references */ 607c2261dd7SJason Gunthorpe free_netdevs(device); 608c2261dd7SJason Gunthorpe 6094e0f7b90SParav Pandit WARN_ON(!xa_empty(&device->compat_devs)); 6100df91bb6SJason Gunthorpe WARN_ON(!xa_empty(&device->client_data)); 611652432f3SJason Gunthorpe WARN_ON(refcount_read(&device->refcount)); 6120ad699c0SLeon Romanovsky rdma_restrack_clean(device); 613e155755eSParav Pandit /* Balances with device_initialize */ 614924b8900SLeon Romanovsky put_device(&device->dev); 6151da177e4SLinus Torvalds } 6161da177e4SLinus Torvalds EXPORT_SYMBOL(ib_dealloc_device); 6171da177e4SLinus Torvalds 618921eab11SJason Gunthorpe /* 619921eab11SJason Gunthorpe * add_client_context() and remove_client_context() must be safe against 620921eab11SJason Gunthorpe * parallel calls on the same device - registration/unregistration of both the 621921eab11SJason Gunthorpe * device and client can be occurring in parallel. 622921eab11SJason Gunthorpe * 623921eab11SJason Gunthorpe * The routines need to be a fence, any caller must not return until the add 624921eab11SJason Gunthorpe * or remove is fully completed. 625921eab11SJason Gunthorpe */ 626921eab11SJason Gunthorpe static int add_client_context(struct ib_device *device, 627921eab11SJason Gunthorpe struct ib_client *client) 6281da177e4SLinus Torvalds { 629921eab11SJason Gunthorpe int ret = 0; 6301da177e4SLinus Torvalds 6316780c4faSGal Pressman if (!device->kverbs_provider && !client->no_kverbs_req) 632921eab11SJason Gunthorpe return 0; 6336780c4faSGal Pressman 634921eab11SJason Gunthorpe down_write(&device->client_data_rwsem); 635921eab11SJason Gunthorpe /* 636921eab11SJason Gunthorpe * Another caller to add_client_context got here first and has already 637921eab11SJason Gunthorpe * completely initialized context. 638921eab11SJason Gunthorpe */ 639921eab11SJason Gunthorpe if (xa_get_mark(&device->client_data, client->client_id, 640921eab11SJason Gunthorpe CLIENT_DATA_REGISTERED)) 641921eab11SJason Gunthorpe goto out; 642921eab11SJason Gunthorpe 643921eab11SJason Gunthorpe ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, 644921eab11SJason Gunthorpe GFP_KERNEL)); 645921eab11SJason Gunthorpe if (ret) 646921eab11SJason Gunthorpe goto out; 647921eab11SJason Gunthorpe downgrade_write(&device->client_data_rwsem); 648921eab11SJason Gunthorpe if (client->add) 649921eab11SJason Gunthorpe client->add(device); 650921eab11SJason Gunthorpe 651921eab11SJason Gunthorpe /* Readers shall not see a client until add has been completed */ 6520df91bb6SJason Gunthorpe xa_set_mark(&device->client_data, client->client_id, 6530df91bb6SJason Gunthorpe CLIENT_DATA_REGISTERED); 654921eab11SJason Gunthorpe up_read(&device->client_data_rwsem); 655921eab11SJason Gunthorpe return 0; 6561da177e4SLinus Torvalds 657921eab11SJason Gunthorpe out: 658921eab11SJason Gunthorpe up_write(&device->client_data_rwsem); 659921eab11SJason Gunthorpe return ret; 660921eab11SJason Gunthorpe } 661921eab11SJason Gunthorpe 662921eab11SJason Gunthorpe static void remove_client_context(struct ib_device *device, 663921eab11SJason Gunthorpe unsigned int client_id) 664921eab11SJason Gunthorpe { 665921eab11SJason Gunthorpe struct ib_client *client; 666921eab11SJason Gunthorpe void *client_data; 667921eab11SJason Gunthorpe 668921eab11SJason Gunthorpe down_write(&device->client_data_rwsem); 669921eab11SJason Gunthorpe if (!xa_get_mark(&device->client_data, client_id, 670921eab11SJason Gunthorpe CLIENT_DATA_REGISTERED)) { 671921eab11SJason Gunthorpe up_write(&device->client_data_rwsem); 672921eab11SJason Gunthorpe return; 673921eab11SJason Gunthorpe } 674921eab11SJason Gunthorpe client_data = xa_load(&device->client_data, client_id); 675921eab11SJason Gunthorpe xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); 676921eab11SJason Gunthorpe client = xa_load(&clients, client_id); 677921eab11SJason Gunthorpe downgrade_write(&device->client_data_rwsem); 678921eab11SJason Gunthorpe 679921eab11SJason Gunthorpe /* 680921eab11SJason Gunthorpe * Notice we cannot be holding any exclusive locks when calling the 681921eab11SJason Gunthorpe * remove callback as the remove callback can recurse back into any 682921eab11SJason Gunthorpe * public functions in this module and thus try for any locks those 683921eab11SJason Gunthorpe * functions take. 684921eab11SJason Gunthorpe * 685921eab11SJason Gunthorpe * For this reason clients and drivers should not call the 686921eab11SJason Gunthorpe * unregistration functions will holdling any locks. 687921eab11SJason Gunthorpe * 688921eab11SJason Gunthorpe * It tempting to drop the client_data_rwsem too, but this is required 689921eab11SJason Gunthorpe * to ensure that unregister_client does not return until all clients 690921eab11SJason Gunthorpe * are completely unregistered, which is required to avoid module 691921eab11SJason Gunthorpe * unloading races. 692921eab11SJason Gunthorpe */ 693921eab11SJason Gunthorpe if (client->remove) 694921eab11SJason Gunthorpe client->remove(device, client_data); 695921eab11SJason Gunthorpe 696921eab11SJason Gunthorpe xa_erase(&device->client_data, client_id); 697921eab11SJason Gunthorpe up_read(&device->client_data_rwsem); 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds 700c2261dd7SJason Gunthorpe static int alloc_port_data(struct ib_device *device) 7015eb620c8SYosef Etigin { 702324e227eSJason Gunthorpe struct ib_port_data_rcu *pdata_rcu; 703ea1075edSJason Gunthorpe unsigned int port; 704c2261dd7SJason Gunthorpe 705c2261dd7SJason Gunthorpe if (device->port_data) 706c2261dd7SJason Gunthorpe return 0; 707c2261dd7SJason Gunthorpe 708c2261dd7SJason Gunthorpe /* This can only be called once the physical port range is defined */ 709c2261dd7SJason Gunthorpe if (WARN_ON(!device->phys_port_cnt)) 710c2261dd7SJason Gunthorpe return -EINVAL; 7115eb620c8SYosef Etigin 7128ceb1357SJason Gunthorpe /* 7138ceb1357SJason Gunthorpe * device->port_data is indexed directly by the port number to make 7147738613eSIra Weiny * access to this data as efficient as possible. 7157738613eSIra Weiny * 7168ceb1357SJason Gunthorpe * Therefore port_data is declared as a 1 based array with potential 7178ceb1357SJason Gunthorpe * empty slots at the beginning. 7187738613eSIra Weiny */ 719324e227eSJason Gunthorpe pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata, 720324e227eSJason Gunthorpe rdma_end_port(device) + 1), 721324e227eSJason Gunthorpe GFP_KERNEL); 722324e227eSJason Gunthorpe if (!pdata_rcu) 72355aeed06SJason Gunthorpe return -ENOMEM; 724324e227eSJason Gunthorpe /* 725324e227eSJason Gunthorpe * The rcu_head is put in front of the port data array and the stored 726324e227eSJason Gunthorpe * pointer is adjusted since we never need to see that member until 727324e227eSJason Gunthorpe * kfree_rcu. 728324e227eSJason Gunthorpe */ 729324e227eSJason Gunthorpe device->port_data = pdata_rcu->pdata; 7305eb620c8SYosef Etigin 731ea1075edSJason Gunthorpe rdma_for_each_port (device, port) { 7328ceb1357SJason Gunthorpe struct ib_port_data *pdata = &device->port_data[port]; 7338ceb1357SJason Gunthorpe 734324e227eSJason Gunthorpe pdata->ib_dev = device; 7358ceb1357SJason Gunthorpe spin_lock_init(&pdata->pkey_list_lock); 7368ceb1357SJason Gunthorpe INIT_LIST_HEAD(&pdata->pkey_list); 737c2261dd7SJason Gunthorpe spin_lock_init(&pdata->netdev_lock); 738324e227eSJason Gunthorpe INIT_HLIST_NODE(&pdata->ndev_hash_link); 739c2261dd7SJason Gunthorpe } 740c2261dd7SJason Gunthorpe return 0; 741c2261dd7SJason Gunthorpe } 742c2261dd7SJason Gunthorpe 743c2261dd7SJason Gunthorpe static int verify_immutable(const struct ib_device *dev, u8 port) 744c2261dd7SJason Gunthorpe { 745c2261dd7SJason Gunthorpe return WARN_ON(!rdma_cap_ib_mad(dev, port) && 746c2261dd7SJason Gunthorpe rdma_max_mad_size(dev, port) != 0); 747c2261dd7SJason Gunthorpe } 748c2261dd7SJason Gunthorpe 749c2261dd7SJason Gunthorpe static int setup_port_data(struct ib_device *device) 750c2261dd7SJason Gunthorpe { 751c2261dd7SJason Gunthorpe unsigned int port; 752c2261dd7SJason Gunthorpe int ret; 753c2261dd7SJason Gunthorpe 754c2261dd7SJason Gunthorpe ret = alloc_port_data(device); 755c2261dd7SJason Gunthorpe if (ret) 756c2261dd7SJason Gunthorpe return ret; 757c2261dd7SJason Gunthorpe 758c2261dd7SJason Gunthorpe rdma_for_each_port (device, port) { 759c2261dd7SJason Gunthorpe struct ib_port_data *pdata = &device->port_data[port]; 7608ceb1357SJason Gunthorpe 7618ceb1357SJason Gunthorpe ret = device->ops.get_port_immutable(device, port, 7628ceb1357SJason Gunthorpe &pdata->immutable); 7635eb620c8SYosef Etigin if (ret) 7645eb620c8SYosef Etigin return ret; 76555aeed06SJason Gunthorpe 76655aeed06SJason Gunthorpe if (verify_immutable(device, port)) 76755aeed06SJason Gunthorpe return -EINVAL; 76855aeed06SJason Gunthorpe } 76955aeed06SJason Gunthorpe return 0; 7705eb620c8SYosef Etigin } 7715eb620c8SYosef Etigin 7729abb0d1bSLeon Romanovsky void ib_get_device_fw_str(struct ib_device *dev, char *str) 7735fa76c20SIra Weiny { 7743023a1e9SKamal Heib if (dev->ops.get_dev_fw_str) 7753023a1e9SKamal Heib dev->ops.get_dev_fw_str(dev, str); 7765fa76c20SIra Weiny else 7775fa76c20SIra Weiny str[0] = '\0'; 7785fa76c20SIra Weiny } 7795fa76c20SIra Weiny EXPORT_SYMBOL(ib_get_device_fw_str); 7805fa76c20SIra Weiny 7818f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work) 7828f408ab6SDaniel Jurgens { 7838f408ab6SDaniel Jurgens struct ib_device *dev; 7840df91bb6SJason Gunthorpe unsigned long index; 7858f408ab6SDaniel Jurgens 786921eab11SJason Gunthorpe down_read(&devices_rwsem); 7870df91bb6SJason Gunthorpe xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 788ea1075edSJason Gunthorpe unsigned int i; 7898f408ab6SDaniel Jurgens 790ea1075edSJason Gunthorpe rdma_for_each_port (dev, i) { 7918f408ab6SDaniel Jurgens u64 sp; 7928f408ab6SDaniel Jurgens int ret = ib_get_cached_subnet_prefix(dev, 7938f408ab6SDaniel Jurgens i, 7948f408ab6SDaniel Jurgens &sp); 7958f408ab6SDaniel Jurgens 7968f408ab6SDaniel Jurgens WARN_ONCE(ret, 7978f408ab6SDaniel Jurgens "ib_get_cached_subnet_prefix err: %d, this should never happen here\n", 7988f408ab6SDaniel Jurgens ret); 799a750cfdeSDaniel Jurgens if (!ret) 8008f408ab6SDaniel Jurgens ib_security_cache_change(dev, i, sp); 8018f408ab6SDaniel Jurgens } 8028f408ab6SDaniel Jurgens } 803921eab11SJason Gunthorpe up_read(&devices_rwsem); 8048f408ab6SDaniel Jurgens } 8058f408ab6SDaniel Jurgens 8068f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event, 8078f408ab6SDaniel Jurgens void *lsm_data) 8088f408ab6SDaniel Jurgens { 8098f408ab6SDaniel Jurgens if (event != LSM_POLICY_CHANGE) 8108f408ab6SDaniel Jurgens return NOTIFY_DONE; 8118f408ab6SDaniel Jurgens 8128f408ab6SDaniel Jurgens schedule_work(&ib_policy_change_work); 813c66f6741SDaniel Jurgens ib_mad_agent_security_change(); 8148f408ab6SDaniel Jurgens 8158f408ab6SDaniel Jurgens return NOTIFY_OK; 8168f408ab6SDaniel Jurgens } 8178f408ab6SDaniel Jurgens 8184e0f7b90SParav Pandit static void compatdev_release(struct device *dev) 8194e0f7b90SParav Pandit { 8204e0f7b90SParav Pandit struct ib_core_device *cdev = 8214e0f7b90SParav Pandit container_of(dev, struct ib_core_device, dev); 8224e0f7b90SParav Pandit 8234e0f7b90SParav Pandit kfree(cdev); 8244e0f7b90SParav Pandit } 8254e0f7b90SParav Pandit 8264e0f7b90SParav Pandit static int add_one_compat_dev(struct ib_device *device, 8274e0f7b90SParav Pandit struct rdma_dev_net *rnet) 8284e0f7b90SParav Pandit { 8294e0f7b90SParav Pandit struct ib_core_device *cdev; 8304e0f7b90SParav Pandit int ret; 8314e0f7b90SParav Pandit 8322b34c558SParav Pandit lockdep_assert_held(&rdma_nets_rwsem); 833a56bc45bSParav Pandit if (!ib_devices_shared_netns) 834a56bc45bSParav Pandit return 0; 835a56bc45bSParav Pandit 8364e0f7b90SParav Pandit /* 8374e0f7b90SParav Pandit * Create and add compat device in all namespaces other than where it 8384e0f7b90SParav Pandit * is currently bound to. 8394e0f7b90SParav Pandit */ 8404e0f7b90SParav Pandit if (net_eq(read_pnet(&rnet->net), 8414e0f7b90SParav Pandit read_pnet(&device->coredev.rdma_net))) 8424e0f7b90SParav Pandit return 0; 8434e0f7b90SParav Pandit 8444e0f7b90SParav Pandit /* 8454e0f7b90SParav Pandit * The first of init_net() or ib_register_device() to take the 8464e0f7b90SParav Pandit * compat_devs_mutex wins and gets to add the device. Others will wait 8474e0f7b90SParav Pandit * for completion here. 8484e0f7b90SParav Pandit */ 8494e0f7b90SParav Pandit mutex_lock(&device->compat_devs_mutex); 8504e0f7b90SParav Pandit cdev = xa_load(&device->compat_devs, rnet->id); 8514e0f7b90SParav Pandit if (cdev) { 8524e0f7b90SParav Pandit ret = 0; 8534e0f7b90SParav Pandit goto done; 8544e0f7b90SParav Pandit } 8554e0f7b90SParav Pandit ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); 8564e0f7b90SParav Pandit if (ret) 8574e0f7b90SParav Pandit goto done; 8584e0f7b90SParav Pandit 8594e0f7b90SParav Pandit cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); 8604e0f7b90SParav Pandit if (!cdev) { 8614e0f7b90SParav Pandit ret = -ENOMEM; 8624e0f7b90SParav Pandit goto cdev_err; 8634e0f7b90SParav Pandit } 8644e0f7b90SParav Pandit 8654e0f7b90SParav Pandit cdev->dev.parent = device->dev.parent; 8664e0f7b90SParav Pandit rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); 8674e0f7b90SParav Pandit cdev->dev.release = compatdev_release; 8684e0f7b90SParav Pandit dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); 8694e0f7b90SParav Pandit 8704e0f7b90SParav Pandit ret = device_add(&cdev->dev); 8714e0f7b90SParav Pandit if (ret) 8724e0f7b90SParav Pandit goto add_err; 873eb15c78bSParav Pandit ret = ib_setup_port_attrs(cdev); 8745417783eSParav Pandit if (ret) 8755417783eSParav Pandit goto port_err; 8764e0f7b90SParav Pandit 8774e0f7b90SParav Pandit ret = xa_err(xa_store(&device->compat_devs, rnet->id, 8784e0f7b90SParav Pandit cdev, GFP_KERNEL)); 8794e0f7b90SParav Pandit if (ret) 8804e0f7b90SParav Pandit goto insert_err; 8814e0f7b90SParav Pandit 8824e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 8834e0f7b90SParav Pandit return 0; 8844e0f7b90SParav Pandit 8854e0f7b90SParav Pandit insert_err: 8865417783eSParav Pandit ib_free_port_attrs(cdev); 8875417783eSParav Pandit port_err: 8884e0f7b90SParav Pandit device_del(&cdev->dev); 8894e0f7b90SParav Pandit add_err: 8904e0f7b90SParav Pandit put_device(&cdev->dev); 8914e0f7b90SParav Pandit cdev_err: 8924e0f7b90SParav Pandit xa_release(&device->compat_devs, rnet->id); 8934e0f7b90SParav Pandit done: 8944e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 8954e0f7b90SParav Pandit return ret; 8964e0f7b90SParav Pandit } 8974e0f7b90SParav Pandit 8984e0f7b90SParav Pandit static void remove_one_compat_dev(struct ib_device *device, u32 id) 8994e0f7b90SParav Pandit { 9004e0f7b90SParav Pandit struct ib_core_device *cdev; 9014e0f7b90SParav Pandit 9024e0f7b90SParav Pandit mutex_lock(&device->compat_devs_mutex); 9034e0f7b90SParav Pandit cdev = xa_erase(&device->compat_devs, id); 9044e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 9054e0f7b90SParav Pandit if (cdev) { 9065417783eSParav Pandit ib_free_port_attrs(cdev); 9074e0f7b90SParav Pandit device_del(&cdev->dev); 9084e0f7b90SParav Pandit put_device(&cdev->dev); 9094e0f7b90SParav Pandit } 9104e0f7b90SParav Pandit } 9114e0f7b90SParav Pandit 9124e0f7b90SParav Pandit static void remove_compat_devs(struct ib_device *device) 9134e0f7b90SParav Pandit { 9144e0f7b90SParav Pandit struct ib_core_device *cdev; 9154e0f7b90SParav Pandit unsigned long index; 9164e0f7b90SParav Pandit 9174e0f7b90SParav Pandit xa_for_each (&device->compat_devs, index, cdev) 9184e0f7b90SParav Pandit remove_one_compat_dev(device, index); 9194e0f7b90SParav Pandit } 9204e0f7b90SParav Pandit 9214e0f7b90SParav Pandit static int add_compat_devs(struct ib_device *device) 9224e0f7b90SParav Pandit { 9234e0f7b90SParav Pandit struct rdma_dev_net *rnet; 9244e0f7b90SParav Pandit unsigned long index; 9254e0f7b90SParav Pandit int ret = 0; 9264e0f7b90SParav Pandit 927decbc7a6SParav Pandit lockdep_assert_held(&devices_rwsem); 928decbc7a6SParav Pandit 9294e0f7b90SParav Pandit down_read(&rdma_nets_rwsem); 9304e0f7b90SParav Pandit xa_for_each (&rdma_nets, index, rnet) { 9314e0f7b90SParav Pandit ret = add_one_compat_dev(device, rnet); 9324e0f7b90SParav Pandit if (ret) 9334e0f7b90SParav Pandit break; 9344e0f7b90SParav Pandit } 9354e0f7b90SParav Pandit up_read(&rdma_nets_rwsem); 9364e0f7b90SParav Pandit return ret; 9374e0f7b90SParav Pandit } 9384e0f7b90SParav Pandit 9392b34c558SParav Pandit static void remove_all_compat_devs(void) 9402b34c558SParav Pandit { 9412b34c558SParav Pandit struct ib_compat_device *cdev; 9422b34c558SParav Pandit struct ib_device *dev; 9432b34c558SParav Pandit unsigned long index; 9442b34c558SParav Pandit 9452b34c558SParav Pandit down_read(&devices_rwsem); 9462b34c558SParav Pandit xa_for_each (&devices, index, dev) { 9472b34c558SParav Pandit unsigned long c_index = 0; 9482b34c558SParav Pandit 9492b34c558SParav Pandit /* Hold nets_rwsem so that any other thread modifying this 9502b34c558SParav Pandit * system param can sync with this thread. 9512b34c558SParav Pandit */ 9522b34c558SParav Pandit down_read(&rdma_nets_rwsem); 9532b34c558SParav Pandit xa_for_each (&dev->compat_devs, c_index, cdev) 9542b34c558SParav Pandit remove_one_compat_dev(dev, c_index); 9552b34c558SParav Pandit up_read(&rdma_nets_rwsem); 9562b34c558SParav Pandit } 9572b34c558SParav Pandit up_read(&devices_rwsem); 9582b34c558SParav Pandit } 9592b34c558SParav Pandit 9602b34c558SParav Pandit static int add_all_compat_devs(void) 9612b34c558SParav Pandit { 9622b34c558SParav Pandit struct rdma_dev_net *rnet; 9632b34c558SParav Pandit struct ib_device *dev; 9642b34c558SParav Pandit unsigned long index; 9652b34c558SParav Pandit int ret = 0; 9662b34c558SParav Pandit 9672b34c558SParav Pandit down_read(&devices_rwsem); 9682b34c558SParav Pandit xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 9692b34c558SParav Pandit unsigned long net_index = 0; 9702b34c558SParav Pandit 9712b34c558SParav Pandit /* Hold nets_rwsem so that any other thread modifying this 9722b34c558SParav Pandit * system param can sync with this thread. 9732b34c558SParav Pandit */ 9742b34c558SParav Pandit down_read(&rdma_nets_rwsem); 9752b34c558SParav Pandit xa_for_each (&rdma_nets, net_index, rnet) { 9762b34c558SParav Pandit ret = add_one_compat_dev(dev, rnet); 9772b34c558SParav Pandit if (ret) 9782b34c558SParav Pandit break; 9792b34c558SParav Pandit } 9802b34c558SParav Pandit up_read(&rdma_nets_rwsem); 9812b34c558SParav Pandit } 9822b34c558SParav Pandit up_read(&devices_rwsem); 9832b34c558SParav Pandit if (ret) 9842b34c558SParav Pandit remove_all_compat_devs(); 9852b34c558SParav Pandit return ret; 9862b34c558SParav Pandit } 9872b34c558SParav Pandit 9882b34c558SParav Pandit int rdma_compatdev_set(u8 enable) 9892b34c558SParav Pandit { 9902b34c558SParav Pandit struct rdma_dev_net *rnet; 9912b34c558SParav Pandit unsigned long index; 9922b34c558SParav Pandit int ret = 0; 9932b34c558SParav Pandit 9942b34c558SParav Pandit down_write(&rdma_nets_rwsem); 9952b34c558SParav Pandit if (ib_devices_shared_netns == enable) { 9962b34c558SParav Pandit up_write(&rdma_nets_rwsem); 9972b34c558SParav Pandit return 0; 9982b34c558SParav Pandit } 9992b34c558SParav Pandit 10002b34c558SParav Pandit /* enable/disable of compat devices is not supported 10012b34c558SParav Pandit * when more than default init_net exists. 10022b34c558SParav Pandit */ 10032b34c558SParav Pandit xa_for_each (&rdma_nets, index, rnet) { 10042b34c558SParav Pandit ret++; 10052b34c558SParav Pandit break; 10062b34c558SParav Pandit } 10072b34c558SParav Pandit if (!ret) 10082b34c558SParav Pandit ib_devices_shared_netns = enable; 10092b34c558SParav Pandit up_write(&rdma_nets_rwsem); 10102b34c558SParav Pandit if (ret) 10112b34c558SParav Pandit return -EBUSY; 10122b34c558SParav Pandit 10132b34c558SParav Pandit if (enable) 10142b34c558SParav Pandit ret = add_all_compat_devs(); 10152b34c558SParav Pandit else 10162b34c558SParav Pandit remove_all_compat_devs(); 10172b34c558SParav Pandit return ret; 10182b34c558SParav Pandit } 10192b34c558SParav Pandit 10204e0f7b90SParav Pandit static void rdma_dev_exit_net(struct net *net) 10214e0f7b90SParav Pandit { 10224e0f7b90SParav Pandit struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); 10234e0f7b90SParav Pandit struct ib_device *dev; 10244e0f7b90SParav Pandit unsigned long index; 10254e0f7b90SParav Pandit int ret; 10264e0f7b90SParav Pandit 10274e0f7b90SParav Pandit down_write(&rdma_nets_rwsem); 10284e0f7b90SParav Pandit /* 10294e0f7b90SParav Pandit * Prevent the ID from being re-used and hide the id from xa_for_each. 10304e0f7b90SParav Pandit */ 10314e0f7b90SParav Pandit ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); 10324e0f7b90SParav Pandit WARN_ON(ret); 10334e0f7b90SParav Pandit up_write(&rdma_nets_rwsem); 10344e0f7b90SParav Pandit 10354e0f7b90SParav Pandit down_read(&devices_rwsem); 10364e0f7b90SParav Pandit xa_for_each (&devices, index, dev) { 10374e0f7b90SParav Pandit get_device(&dev->dev); 10384e0f7b90SParav Pandit /* 10394e0f7b90SParav Pandit * Release the devices_rwsem so that pontentially blocking 10404e0f7b90SParav Pandit * device_del, doesn't hold the devices_rwsem for too long. 10414e0f7b90SParav Pandit */ 10424e0f7b90SParav Pandit up_read(&devices_rwsem); 10434e0f7b90SParav Pandit 10444e0f7b90SParav Pandit remove_one_compat_dev(dev, rnet->id); 10454e0f7b90SParav Pandit 1046decbc7a6SParav Pandit /* 1047decbc7a6SParav Pandit * If the real device is in the NS then move it back to init. 1048decbc7a6SParav Pandit */ 1049decbc7a6SParav Pandit rdma_dev_change_netns(dev, net, &init_net); 1050decbc7a6SParav Pandit 10514e0f7b90SParav Pandit put_device(&dev->dev); 10524e0f7b90SParav Pandit down_read(&devices_rwsem); 10534e0f7b90SParav Pandit } 10544e0f7b90SParav Pandit up_read(&devices_rwsem); 10554e0f7b90SParav Pandit 10564e0f7b90SParav Pandit xa_erase(&rdma_nets, rnet->id); 10574e0f7b90SParav Pandit } 10584e0f7b90SParav Pandit 10594e0f7b90SParav Pandit static __net_init int rdma_dev_init_net(struct net *net) 10604e0f7b90SParav Pandit { 10614e0f7b90SParav Pandit struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); 10624e0f7b90SParav Pandit unsigned long index; 10634e0f7b90SParav Pandit struct ib_device *dev; 10644e0f7b90SParav Pandit int ret; 10654e0f7b90SParav Pandit 10664e0f7b90SParav Pandit /* No need to create any compat devices in default init_net. */ 10674e0f7b90SParav Pandit if (net_eq(net, &init_net)) 10684e0f7b90SParav Pandit return 0; 10694e0f7b90SParav Pandit 10704e0f7b90SParav Pandit write_pnet(&rnet->net, net); 10714e0f7b90SParav Pandit 10724e0f7b90SParav Pandit ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); 10734e0f7b90SParav Pandit if (ret) 10744e0f7b90SParav Pandit return ret; 10754e0f7b90SParav Pandit 10764e0f7b90SParav Pandit down_read(&devices_rwsem); 10774e0f7b90SParav Pandit xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 10782b34c558SParav Pandit /* Hold nets_rwsem so that netlink command cannot change 10792b34c558SParav Pandit * system configuration for device sharing mode. 10802b34c558SParav Pandit */ 10812b34c558SParav Pandit down_read(&rdma_nets_rwsem); 10824e0f7b90SParav Pandit ret = add_one_compat_dev(dev, rnet); 10832b34c558SParav Pandit up_read(&rdma_nets_rwsem); 10844e0f7b90SParav Pandit if (ret) 10854e0f7b90SParav Pandit break; 10864e0f7b90SParav Pandit } 10874e0f7b90SParav Pandit up_read(&devices_rwsem); 10884e0f7b90SParav Pandit 10894e0f7b90SParav Pandit if (ret) 10904e0f7b90SParav Pandit rdma_dev_exit_net(net); 10914e0f7b90SParav Pandit 10924e0f7b90SParav Pandit return ret; 10934e0f7b90SParav Pandit } 10944e0f7b90SParav Pandit 1095ecc82c53SLeon Romanovsky /* 1096d0899892SJason Gunthorpe * Assign the unique string device name and the unique device index. This is 1097d0899892SJason Gunthorpe * undone by ib_dealloc_device. 1098ecc82c53SLeon Romanovsky */ 10990df91bb6SJason Gunthorpe static int assign_name(struct ib_device *device, const char *name) 11000df91bb6SJason Gunthorpe { 11010df91bb6SJason Gunthorpe static u32 last_id; 11020df91bb6SJason Gunthorpe int ret; 1103ecc82c53SLeon Romanovsky 1104921eab11SJason Gunthorpe down_write(&devices_rwsem); 11050df91bb6SJason Gunthorpe /* Assign a unique name to the device */ 11060df91bb6SJason Gunthorpe if (strchr(name, '%')) 11070df91bb6SJason Gunthorpe ret = alloc_name(device, name); 11080df91bb6SJason Gunthorpe else 11090df91bb6SJason Gunthorpe ret = dev_set_name(&device->dev, name); 11100df91bb6SJason Gunthorpe if (ret) 11110df91bb6SJason Gunthorpe goto out; 1112ecc82c53SLeon Romanovsky 11130df91bb6SJason Gunthorpe if (__ib_device_get_by_name(dev_name(&device->dev))) { 11140df91bb6SJason Gunthorpe ret = -ENFILE; 11150df91bb6SJason Gunthorpe goto out; 1116ecc82c53SLeon Romanovsky } 11170df91bb6SJason Gunthorpe strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); 11180df91bb6SJason Gunthorpe 1119ea295481SLinus Torvalds ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, 1120ea295481SLinus Torvalds &last_id, GFP_KERNEL); 1121ea295481SLinus Torvalds if (ret > 0) 11220df91bb6SJason Gunthorpe ret = 0; 1123921eab11SJason Gunthorpe 11240df91bb6SJason Gunthorpe out: 1125921eab11SJason Gunthorpe up_write(&devices_rwsem); 11260df91bb6SJason Gunthorpe return ret; 11270df91bb6SJason Gunthorpe } 11280df91bb6SJason Gunthorpe 1129548cb4fbSParav Pandit static void setup_dma_device(struct ib_device *device) 11301da177e4SLinus Torvalds { 113199db9494SBart Van Assche struct device *parent = device->dev.parent; 11321da177e4SLinus Torvalds 11330957c29fSBart Van Assche WARN_ON_ONCE(device->dma_device); 11340957c29fSBart Van Assche if (device->dev.dma_ops) { 11350957c29fSBart Van Assche /* 11360957c29fSBart Van Assche * The caller provided custom DMA operations. Copy the 11370957c29fSBart Van Assche * DMA-related fields that are used by e.g. dma_alloc_coherent() 11380957c29fSBart Van Assche * into device->dev. 11390957c29fSBart Van Assche */ 11400957c29fSBart Van Assche device->dma_device = &device->dev; 114102ee9da3SBart Van Assche if (!device->dev.dma_mask) { 114202ee9da3SBart Van Assche if (parent) 114399db9494SBart Van Assche device->dev.dma_mask = parent->dma_mask; 114402ee9da3SBart Van Assche else 114502ee9da3SBart Van Assche WARN_ON_ONCE(true); 114602ee9da3SBart Van Assche } 114702ee9da3SBart Van Assche if (!device->dev.coherent_dma_mask) { 114802ee9da3SBart Van Assche if (parent) 11490957c29fSBart Van Assche device->dev.coherent_dma_mask = 11500957c29fSBart Van Assche parent->coherent_dma_mask; 115102ee9da3SBart Van Assche else 115202ee9da3SBart Van Assche WARN_ON_ONCE(true); 115302ee9da3SBart Van Assche } 11540957c29fSBart Van Assche } else { 11550957c29fSBart Van Assche /* 11560957c29fSBart Van Assche * The caller did not provide custom DMA operations. Use the 11570957c29fSBart Van Assche * DMA mapping operations of the parent device. 11580957c29fSBart Van Assche */ 115902ee9da3SBart Van Assche WARN_ON_ONCE(!parent); 11600957c29fSBart Van Assche device->dma_device = parent; 11610957c29fSBart Van Assche } 1162d10bcf94SShiraz Saleem /* Setup default max segment size for all IB devices */ 1163d10bcf94SShiraz Saleem dma_set_max_seg_size(device->dma_device, SZ_2G); 1164d10bcf94SShiraz Saleem 1165548cb4fbSParav Pandit } 1166548cb4fbSParav Pandit 1167921eab11SJason Gunthorpe /* 1168921eab11SJason Gunthorpe * setup_device() allocates memory and sets up data that requires calling the 1169921eab11SJason Gunthorpe * device ops, this is the only reason these actions are not done during 1170921eab11SJason Gunthorpe * ib_alloc_device. It is undone by ib_dealloc_device(). 1171921eab11SJason Gunthorpe */ 1172548cb4fbSParav Pandit static int setup_device(struct ib_device *device) 1173548cb4fbSParav Pandit { 1174548cb4fbSParav Pandit struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 1175548cb4fbSParav Pandit int ret; 1176548cb4fbSParav Pandit 1177921eab11SJason Gunthorpe setup_dma_device(device); 1178921eab11SJason Gunthorpe 1179548cb4fbSParav Pandit ret = ib_device_check_mandatory(device); 1180548cb4fbSParav Pandit if (ret) 1181548cb4fbSParav Pandit return ret; 1182548cb4fbSParav Pandit 11838ceb1357SJason Gunthorpe ret = setup_port_data(device); 1184548cb4fbSParav Pandit if (ret) { 11858ceb1357SJason Gunthorpe dev_warn(&device->dev, "Couldn't create per-port data\n"); 1186548cb4fbSParav Pandit return ret; 1187548cb4fbSParav Pandit } 1188548cb4fbSParav Pandit 1189548cb4fbSParav Pandit memset(&device->attrs, 0, sizeof(device->attrs)); 11903023a1e9SKamal Heib ret = device->ops.query_device(device, &device->attrs, &uhw); 1191548cb4fbSParav Pandit if (ret) { 1192548cb4fbSParav Pandit dev_warn(&device->dev, 1193548cb4fbSParav Pandit "Couldn't query the device attributes\n"); 1194d45f89d5SJason Gunthorpe return ret; 1195548cb4fbSParav Pandit } 1196548cb4fbSParav Pandit 1197548cb4fbSParav Pandit return 0; 1198548cb4fbSParav Pandit } 1199548cb4fbSParav Pandit 1200921eab11SJason Gunthorpe static void disable_device(struct ib_device *device) 1201921eab11SJason Gunthorpe { 1202921eab11SJason Gunthorpe struct ib_client *client; 1203921eab11SJason Gunthorpe 1204921eab11SJason Gunthorpe WARN_ON(!refcount_read(&device->refcount)); 1205921eab11SJason Gunthorpe 1206921eab11SJason Gunthorpe down_write(&devices_rwsem); 1207921eab11SJason Gunthorpe xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); 1208921eab11SJason Gunthorpe up_write(&devices_rwsem); 1209921eab11SJason Gunthorpe 1210921eab11SJason Gunthorpe down_read(&clients_rwsem); 1211921eab11SJason Gunthorpe list_for_each_entry_reverse(client, &client_list, list) 1212921eab11SJason Gunthorpe remove_client_context(device, client->client_id); 1213921eab11SJason Gunthorpe up_read(&clients_rwsem); 1214921eab11SJason Gunthorpe 1215921eab11SJason Gunthorpe /* Pairs with refcount_set in enable_device */ 1216921eab11SJason Gunthorpe ib_device_put(device); 1217921eab11SJason Gunthorpe wait_for_completion(&device->unreg_completion); 1218c2261dd7SJason Gunthorpe 12194e0f7b90SParav Pandit /* 12204e0f7b90SParav Pandit * compat devices must be removed after device refcount drops to zero. 12214e0f7b90SParav Pandit * Otherwise init_net() may add more compatdevs after removing compat 12224e0f7b90SParav Pandit * devices and before device is disabled. 12234e0f7b90SParav Pandit */ 12244e0f7b90SParav Pandit remove_compat_devs(device); 1225921eab11SJason Gunthorpe } 1226921eab11SJason Gunthorpe 1227921eab11SJason Gunthorpe /* 1228921eab11SJason Gunthorpe * An enabled device is visible to all clients and to all the public facing 1229d0899892SJason Gunthorpe * APIs that return a device pointer. This always returns with a new get, even 1230d0899892SJason Gunthorpe * if it fails. 1231921eab11SJason Gunthorpe */ 1232d0899892SJason Gunthorpe static int enable_device_and_get(struct ib_device *device) 1233921eab11SJason Gunthorpe { 1234921eab11SJason Gunthorpe struct ib_client *client; 1235921eab11SJason Gunthorpe unsigned long index; 1236d0899892SJason Gunthorpe int ret = 0; 1237921eab11SJason Gunthorpe 1238d0899892SJason Gunthorpe /* 1239d0899892SJason Gunthorpe * One ref belongs to the xa and the other belongs to this 1240d0899892SJason Gunthorpe * thread. This is needed to guard against parallel unregistration. 1241d0899892SJason Gunthorpe */ 1242d0899892SJason Gunthorpe refcount_set(&device->refcount, 2); 1243921eab11SJason Gunthorpe down_write(&devices_rwsem); 1244921eab11SJason Gunthorpe xa_set_mark(&devices, device->index, DEVICE_REGISTERED); 1245d0899892SJason Gunthorpe 1246d0899892SJason Gunthorpe /* 1247d0899892SJason Gunthorpe * By using downgrade_write() we ensure that no other thread can clear 1248d0899892SJason Gunthorpe * DEVICE_REGISTERED while we are completing the client setup. 1249d0899892SJason Gunthorpe */ 1250d0899892SJason Gunthorpe downgrade_write(&devices_rwsem); 1251921eab11SJason Gunthorpe 1252ca22354bSJason Gunthorpe if (device->ops.enable_driver) { 1253ca22354bSJason Gunthorpe ret = device->ops.enable_driver(device); 1254ca22354bSJason Gunthorpe if (ret) 1255ca22354bSJason Gunthorpe goto out; 1256ca22354bSJason Gunthorpe } 1257ca22354bSJason Gunthorpe 1258921eab11SJason Gunthorpe down_read(&clients_rwsem); 1259921eab11SJason Gunthorpe xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { 1260921eab11SJason Gunthorpe ret = add_client_context(device, client); 1261d0899892SJason Gunthorpe if (ret) 1262d0899892SJason Gunthorpe break; 1263d0899892SJason Gunthorpe } 1264921eab11SJason Gunthorpe up_read(&clients_rwsem); 12654e0f7b90SParav Pandit if (!ret) 12664e0f7b90SParav Pandit ret = add_compat_devs(device); 1267ca22354bSJason Gunthorpe out: 1268d0899892SJason Gunthorpe up_read(&devices_rwsem); 1269921eab11SJason Gunthorpe return ret; 1270921eab11SJason Gunthorpe } 1271921eab11SJason Gunthorpe 1272548cb4fbSParav Pandit /** 1273548cb4fbSParav Pandit * ib_register_device - Register an IB device with IB core 1274548cb4fbSParav Pandit * @device:Device to register 1275548cb4fbSParav Pandit * 1276548cb4fbSParav Pandit * Low-level drivers use ib_register_device() to register their 1277548cb4fbSParav Pandit * devices with the IB core. All registered clients will receive a 1278548cb4fbSParav Pandit * callback for each device that is added. @device must be allocated 1279548cb4fbSParav Pandit * with ib_alloc_device(). 1280d0899892SJason Gunthorpe * 1281d0899892SJason Gunthorpe * If the driver uses ops.dealloc_driver and calls any ib_unregister_device() 1282d0899892SJason Gunthorpe * asynchronously then the device pointer may become freed as soon as this 1283d0899892SJason Gunthorpe * function returns. 1284548cb4fbSParav Pandit */ 1285ea4baf7fSParav Pandit int ib_register_device(struct ib_device *device, const char *name) 1286548cb4fbSParav Pandit { 1287548cb4fbSParav Pandit int ret; 12881da177e4SLinus Torvalds 12890df91bb6SJason Gunthorpe ret = assign_name(device, name); 1290e349f858SJason Gunthorpe if (ret) 1291921eab11SJason Gunthorpe return ret; 12921da177e4SLinus Torvalds 1293548cb4fbSParav Pandit ret = setup_device(device); 1294548cb4fbSParav Pandit if (ret) 1295d0899892SJason Gunthorpe return ret; 129603db3a2dSMatan Barak 1297d45f89d5SJason Gunthorpe ret = ib_cache_setup_one(device); 1298d45f89d5SJason Gunthorpe if (ret) { 1299d45f89d5SJason Gunthorpe dev_warn(&device->dev, 1300d45f89d5SJason Gunthorpe "Couldn't set up InfiniBand P_Key/GID cache\n"); 1301d0899892SJason Gunthorpe return ret; 1302d45f89d5SJason Gunthorpe } 1303d45f89d5SJason Gunthorpe 13047527a7b1SParav Pandit ib_device_register_rdmacg(device); 13053e153a93SIra Weiny 13065f8f5499SParav Pandit ret = device_add(&device->dev); 13075f8f5499SParav Pandit if (ret) 13085f8f5499SParav Pandit goto cg_cleanup; 13095f8f5499SParav Pandit 1310ea4baf7fSParav Pandit ret = ib_device_register_sysfs(device); 13111da177e4SLinus Torvalds if (ret) { 131243c7c851SJason Gunthorpe dev_warn(&device->dev, 131343c7c851SJason Gunthorpe "Couldn't register device with driver model\n"); 13145f8f5499SParav Pandit goto dev_cleanup; 13151da177e4SLinus Torvalds } 13161da177e4SLinus Torvalds 1317d0899892SJason Gunthorpe ret = enable_device_and_get(device); 1318d0899892SJason Gunthorpe if (ret) { 1319d0899892SJason Gunthorpe void (*dealloc_fn)(struct ib_device *); 1320d0899892SJason Gunthorpe 1321d0899892SJason Gunthorpe /* 1322d0899892SJason Gunthorpe * If we hit this error flow then we don't want to 1323d0899892SJason Gunthorpe * automatically dealloc the device since the caller is 1324d0899892SJason Gunthorpe * expected to call ib_dealloc_device() after 1325d0899892SJason Gunthorpe * ib_register_device() fails. This is tricky due to the 1326d0899892SJason Gunthorpe * possibility for a parallel unregistration along with this 1327d0899892SJason Gunthorpe * error flow. Since we have a refcount here we know any 1328d0899892SJason Gunthorpe * parallel flow is stopped in disable_device and will see the 1329d0899892SJason Gunthorpe * NULL pointers, causing the responsibility to 1330d0899892SJason Gunthorpe * ib_dealloc_device() to revert back to this thread. 1331d0899892SJason Gunthorpe */ 1332d0899892SJason Gunthorpe dealloc_fn = device->ops.dealloc_driver; 1333d0899892SJason Gunthorpe device->ops.dealloc_driver = NULL; 1334d0899892SJason Gunthorpe ib_device_put(device); 1335d0899892SJason Gunthorpe __ib_unregister_device(device); 1336d0899892SJason Gunthorpe device->ops.dealloc_driver = dealloc_fn; 1337d0899892SJason Gunthorpe return ret; 1338d0899892SJason Gunthorpe } 1339d0899892SJason Gunthorpe ib_device_put(device); 13401da177e4SLinus Torvalds 13414be3a4faSParav Pandit return 0; 13424be3a4faSParav Pandit 13435f8f5499SParav Pandit dev_cleanup: 13445f8f5499SParav Pandit device_del(&device->dev); 13452fb4f4eaSParav Pandit cg_cleanup: 13462fb4f4eaSParav Pandit ib_device_unregister_rdmacg(device); 1347d45f89d5SJason Gunthorpe ib_cache_cleanup_one(device); 13481da177e4SLinus Torvalds return ret; 13491da177e4SLinus Torvalds } 13501da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_device); 13511da177e4SLinus Torvalds 1352d0899892SJason Gunthorpe /* Callers must hold a get on the device. */ 1353d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *ib_dev) 1354d0899892SJason Gunthorpe { 1355d0899892SJason Gunthorpe /* 1356d0899892SJason Gunthorpe * We have a registration lock so that all the calls to unregister are 1357d0899892SJason Gunthorpe * fully fenced, once any unregister returns the device is truely 1358d0899892SJason Gunthorpe * unregistered even if multiple callers are unregistering it at the 1359d0899892SJason Gunthorpe * same time. This also interacts with the registration flow and 1360d0899892SJason Gunthorpe * provides sane semantics if register and unregister are racing. 1361d0899892SJason Gunthorpe */ 1362d0899892SJason Gunthorpe mutex_lock(&ib_dev->unregistration_lock); 1363d0899892SJason Gunthorpe if (!refcount_read(&ib_dev->refcount)) 1364d0899892SJason Gunthorpe goto out; 1365d0899892SJason Gunthorpe 1366d0899892SJason Gunthorpe disable_device(ib_dev); 13673042492bSParav Pandit 13683042492bSParav Pandit /* Expedite removing unregistered pointers from the hash table */ 13693042492bSParav Pandit free_netdevs(ib_dev); 13703042492bSParav Pandit 1371d0899892SJason Gunthorpe ib_device_unregister_sysfs(ib_dev); 1372d0899892SJason Gunthorpe device_del(&ib_dev->dev); 1373d0899892SJason Gunthorpe ib_device_unregister_rdmacg(ib_dev); 1374d0899892SJason Gunthorpe ib_cache_cleanup_one(ib_dev); 1375d0899892SJason Gunthorpe 1376d0899892SJason Gunthorpe /* 1377d0899892SJason Gunthorpe * Drivers using the new flow may not call ib_dealloc_device except 1378d0899892SJason Gunthorpe * in error unwind prior to registration success. 1379d0899892SJason Gunthorpe */ 1380d0899892SJason Gunthorpe if (ib_dev->ops.dealloc_driver) { 1381d0899892SJason Gunthorpe WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); 1382d0899892SJason Gunthorpe ib_dealloc_device(ib_dev); 1383d0899892SJason Gunthorpe } 1384d0899892SJason Gunthorpe out: 1385d0899892SJason Gunthorpe mutex_unlock(&ib_dev->unregistration_lock); 1386d0899892SJason Gunthorpe } 1387d0899892SJason Gunthorpe 13881da177e4SLinus Torvalds /** 13891da177e4SLinus Torvalds * ib_unregister_device - Unregister an IB device 1390d0899892SJason Gunthorpe * @device: The device to unregister 13911da177e4SLinus Torvalds * 13921da177e4SLinus Torvalds * Unregister an IB device. All clients will receive a remove callback. 1393d0899892SJason Gunthorpe * 1394d0899892SJason Gunthorpe * Callers should call this routine only once, and protect against races with 1395d0899892SJason Gunthorpe * registration. Typically it should only be called as part of a remove 1396d0899892SJason Gunthorpe * callback in an implementation of driver core's struct device_driver and 1397d0899892SJason Gunthorpe * related. 1398d0899892SJason Gunthorpe * 1399d0899892SJason Gunthorpe * If ops.dealloc_driver is used then ib_dev will be freed upon return from 1400d0899892SJason Gunthorpe * this function. 14011da177e4SLinus Torvalds */ 1402d0899892SJason Gunthorpe void ib_unregister_device(struct ib_device *ib_dev) 14031da177e4SLinus Torvalds { 1404d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1405d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1406d0899892SJason Gunthorpe put_device(&ib_dev->dev); 14071da177e4SLinus Torvalds } 14081da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_device); 14091da177e4SLinus Torvalds 1410d0899892SJason Gunthorpe /** 1411d0899892SJason Gunthorpe * ib_unregister_device_and_put - Unregister a device while holding a 'get' 1412d0899892SJason Gunthorpe * device: The device to unregister 1413d0899892SJason Gunthorpe * 1414d0899892SJason Gunthorpe * This is the same as ib_unregister_device(), except it includes an internal 1415d0899892SJason Gunthorpe * ib_device_put() that should match a 'get' obtained by the caller. 1416d0899892SJason Gunthorpe * 1417d0899892SJason Gunthorpe * It is safe to call this routine concurrently from multiple threads while 1418d0899892SJason Gunthorpe * holding the 'get'. When the function returns the device is fully 1419d0899892SJason Gunthorpe * unregistered. 1420d0899892SJason Gunthorpe * 1421d0899892SJason Gunthorpe * Drivers using this flow MUST use the driver_unregister callback to clean up 1422d0899892SJason Gunthorpe * their resources associated with the device and dealloc it. 1423d0899892SJason Gunthorpe */ 1424d0899892SJason Gunthorpe void ib_unregister_device_and_put(struct ib_device *ib_dev) 1425d0899892SJason Gunthorpe { 1426d0899892SJason Gunthorpe WARN_ON(!ib_dev->ops.dealloc_driver); 1427d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1428d0899892SJason Gunthorpe ib_device_put(ib_dev); 1429d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1430d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1431d0899892SJason Gunthorpe } 1432d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_and_put); 1433d0899892SJason Gunthorpe 1434d0899892SJason Gunthorpe /** 1435d0899892SJason Gunthorpe * ib_unregister_driver - Unregister all IB devices for a driver 1436d0899892SJason Gunthorpe * @driver_id: The driver to unregister 1437d0899892SJason Gunthorpe * 1438d0899892SJason Gunthorpe * This implements a fence for device unregistration. It only returns once all 1439d0899892SJason Gunthorpe * devices associated with the driver_id have fully completed their 1440d0899892SJason Gunthorpe * unregistration and returned from ib_unregister_device*(). 1441d0899892SJason Gunthorpe * 1442d0899892SJason Gunthorpe * If device's are not yet unregistered it goes ahead and starts unregistering 1443d0899892SJason Gunthorpe * them. 1444d0899892SJason Gunthorpe * 1445d0899892SJason Gunthorpe * This does not block creation of new devices with the given driver_id, that 1446d0899892SJason Gunthorpe * is the responsibility of the caller. 1447d0899892SJason Gunthorpe */ 1448d0899892SJason Gunthorpe void ib_unregister_driver(enum rdma_driver_id driver_id) 1449d0899892SJason Gunthorpe { 1450d0899892SJason Gunthorpe struct ib_device *ib_dev; 1451d0899892SJason Gunthorpe unsigned long index; 1452d0899892SJason Gunthorpe 1453d0899892SJason Gunthorpe down_read(&devices_rwsem); 1454d0899892SJason Gunthorpe xa_for_each (&devices, index, ib_dev) { 1455d0899892SJason Gunthorpe if (ib_dev->driver_id != driver_id) 1456d0899892SJason Gunthorpe continue; 1457d0899892SJason Gunthorpe 1458d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1459d0899892SJason Gunthorpe up_read(&devices_rwsem); 1460d0899892SJason Gunthorpe 1461d0899892SJason Gunthorpe WARN_ON(!ib_dev->ops.dealloc_driver); 1462d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1463d0899892SJason Gunthorpe 1464d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1465d0899892SJason Gunthorpe down_read(&devices_rwsem); 1466d0899892SJason Gunthorpe } 1467d0899892SJason Gunthorpe up_read(&devices_rwsem); 1468d0899892SJason Gunthorpe } 1469d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_driver); 1470d0899892SJason Gunthorpe 1471d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work) 1472d0899892SJason Gunthorpe { 1473d0899892SJason Gunthorpe struct ib_device *ib_dev = 1474d0899892SJason Gunthorpe container_of(work, struct ib_device, unregistration_work); 1475d0899892SJason Gunthorpe 1476d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1477d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1478d0899892SJason Gunthorpe } 1479d0899892SJason Gunthorpe 1480d0899892SJason Gunthorpe /** 1481d0899892SJason Gunthorpe * ib_unregister_device_queued - Unregister a device using a work queue 1482d0899892SJason Gunthorpe * device: The device to unregister 1483d0899892SJason Gunthorpe * 1484d0899892SJason Gunthorpe * This schedules an asynchronous unregistration using a WQ for the device. A 1485d0899892SJason Gunthorpe * driver should use this to avoid holding locks while doing unregistration, 1486d0899892SJason Gunthorpe * such as holding the RTNL lock. 1487d0899892SJason Gunthorpe * 1488d0899892SJason Gunthorpe * Drivers using this API must use ib_unregister_driver before module unload 1489d0899892SJason Gunthorpe * to ensure that all scheduled unregistrations have completed. 1490d0899892SJason Gunthorpe */ 1491d0899892SJason Gunthorpe void ib_unregister_device_queued(struct ib_device *ib_dev) 1492d0899892SJason Gunthorpe { 1493d0899892SJason Gunthorpe WARN_ON(!refcount_read(&ib_dev->refcount)); 1494d0899892SJason Gunthorpe WARN_ON(!ib_dev->ops.dealloc_driver); 1495d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1496d0899892SJason Gunthorpe if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) 1497d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1498d0899892SJason Gunthorpe } 1499d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_queued); 1500d0899892SJason Gunthorpe 1501decbc7a6SParav Pandit /* 1502decbc7a6SParav Pandit * The caller must pass in a device that has the kref held and the refcount 1503decbc7a6SParav Pandit * released. If the device is in cur_net and still registered then it is moved 1504decbc7a6SParav Pandit * into net. 1505decbc7a6SParav Pandit */ 1506decbc7a6SParav Pandit static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, 1507decbc7a6SParav Pandit struct net *net) 1508decbc7a6SParav Pandit { 1509decbc7a6SParav Pandit int ret2 = -EINVAL; 1510decbc7a6SParav Pandit int ret; 1511decbc7a6SParav Pandit 1512decbc7a6SParav Pandit mutex_lock(&device->unregistration_lock); 1513decbc7a6SParav Pandit 1514decbc7a6SParav Pandit /* 15152e5b8a01SParav Pandit * If a device not under ib_device_get() or if the unregistration_lock 15162e5b8a01SParav Pandit * is not held, the namespace can be changed, or it can be unregistered. 15172e5b8a01SParav Pandit * Check again under the lock. 1518decbc7a6SParav Pandit */ 1519decbc7a6SParav Pandit if (refcount_read(&device->refcount) == 0 || 1520decbc7a6SParav Pandit !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { 1521decbc7a6SParav Pandit ret = -ENODEV; 1522decbc7a6SParav Pandit goto out; 1523decbc7a6SParav Pandit } 1524decbc7a6SParav Pandit 1525decbc7a6SParav Pandit kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); 1526decbc7a6SParav Pandit disable_device(device); 1527decbc7a6SParav Pandit 1528decbc7a6SParav Pandit /* 1529decbc7a6SParav Pandit * At this point no one can be using the device, so it is safe to 1530decbc7a6SParav Pandit * change the namespace. 1531decbc7a6SParav Pandit */ 1532decbc7a6SParav Pandit write_pnet(&device->coredev.rdma_net, net); 1533decbc7a6SParav Pandit 15342e5b8a01SParav Pandit down_read(&devices_rwsem); 1535decbc7a6SParav Pandit /* 1536decbc7a6SParav Pandit * Currently rdma devices are system wide unique. So the device name 1537decbc7a6SParav Pandit * is guaranteed free in the new namespace. Publish the new namespace 1538decbc7a6SParav Pandit * at the sysfs level. 1539decbc7a6SParav Pandit */ 1540decbc7a6SParav Pandit ret = device_rename(&device->dev, dev_name(&device->dev)); 1541decbc7a6SParav Pandit up_read(&devices_rwsem); 1542decbc7a6SParav Pandit if (ret) { 1543decbc7a6SParav Pandit dev_warn(&device->dev, 1544decbc7a6SParav Pandit "%s: Couldn't rename device after namespace change\n", 1545decbc7a6SParav Pandit __func__); 1546decbc7a6SParav Pandit /* Try and put things back and re-enable the device */ 1547decbc7a6SParav Pandit write_pnet(&device->coredev.rdma_net, cur_net); 1548decbc7a6SParav Pandit } 1549decbc7a6SParav Pandit 1550decbc7a6SParav Pandit ret2 = enable_device_and_get(device); 15512e5b8a01SParav Pandit if (ret2) { 1552decbc7a6SParav Pandit /* 1553decbc7a6SParav Pandit * This shouldn't really happen, but if it does, let the user 1554decbc7a6SParav Pandit * retry at later point. So don't disable the device. 1555decbc7a6SParav Pandit */ 1556decbc7a6SParav Pandit dev_warn(&device->dev, 1557decbc7a6SParav Pandit "%s: Couldn't re-enable device after namespace change\n", 1558decbc7a6SParav Pandit __func__); 15592e5b8a01SParav Pandit } 1560decbc7a6SParav Pandit kobject_uevent(&device->dev.kobj, KOBJ_ADD); 15612e5b8a01SParav Pandit 1562decbc7a6SParav Pandit ib_device_put(device); 1563decbc7a6SParav Pandit out: 1564decbc7a6SParav Pandit mutex_unlock(&device->unregistration_lock); 1565decbc7a6SParav Pandit if (ret) 1566decbc7a6SParav Pandit return ret; 1567decbc7a6SParav Pandit return ret2; 1568decbc7a6SParav Pandit } 1569decbc7a6SParav Pandit 15702e5b8a01SParav Pandit int ib_device_set_netns_put(struct sk_buff *skb, 15712e5b8a01SParav Pandit struct ib_device *dev, u32 ns_fd) 15722e5b8a01SParav Pandit { 15732e5b8a01SParav Pandit struct net *net; 15742e5b8a01SParav Pandit int ret; 15752e5b8a01SParav Pandit 15762e5b8a01SParav Pandit net = get_net_ns_by_fd(ns_fd); 15772e5b8a01SParav Pandit if (IS_ERR(net)) { 15782e5b8a01SParav Pandit ret = PTR_ERR(net); 15792e5b8a01SParav Pandit goto net_err; 15802e5b8a01SParav Pandit } 15812e5b8a01SParav Pandit 15822e5b8a01SParav Pandit if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 15832e5b8a01SParav Pandit ret = -EPERM; 15842e5b8a01SParav Pandit goto ns_err; 15852e5b8a01SParav Pandit } 15862e5b8a01SParav Pandit 15872e5b8a01SParav Pandit /* 15882e5b8a01SParav Pandit * Currently supported only for those providers which support 15892e5b8a01SParav Pandit * disassociation and don't do port specific sysfs init. Once a 15902e5b8a01SParav Pandit * port_cleanup infrastructure is implemented, this limitation will be 15912e5b8a01SParav Pandit * removed. 15922e5b8a01SParav Pandit */ 15932e5b8a01SParav Pandit if (!dev->ops.disassociate_ucontext || dev->ops.init_port || 15942e5b8a01SParav Pandit ib_devices_shared_netns) { 15952e5b8a01SParav Pandit ret = -EOPNOTSUPP; 15962e5b8a01SParav Pandit goto ns_err; 15972e5b8a01SParav Pandit } 15982e5b8a01SParav Pandit 15992e5b8a01SParav Pandit get_device(&dev->dev); 16002e5b8a01SParav Pandit ib_device_put(dev); 16012e5b8a01SParav Pandit ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); 16022e5b8a01SParav Pandit put_device(&dev->dev); 16032e5b8a01SParav Pandit 16042e5b8a01SParav Pandit put_net(net); 16052e5b8a01SParav Pandit return ret; 16062e5b8a01SParav Pandit 16072e5b8a01SParav Pandit ns_err: 16082e5b8a01SParav Pandit put_net(net); 16092e5b8a01SParav Pandit net_err: 16102e5b8a01SParav Pandit ib_device_put(dev); 16112e5b8a01SParav Pandit return ret; 16122e5b8a01SParav Pandit } 16132e5b8a01SParav Pandit 16144e0f7b90SParav Pandit static struct pernet_operations rdma_dev_net_ops = { 16154e0f7b90SParav Pandit .init = rdma_dev_init_net, 16164e0f7b90SParav Pandit .exit = rdma_dev_exit_net, 16174e0f7b90SParav Pandit .id = &rdma_dev_net_id, 16184e0f7b90SParav Pandit .size = sizeof(struct rdma_dev_net), 16194e0f7b90SParav Pandit }; 16204e0f7b90SParav Pandit 1621e59178d8SJason Gunthorpe static int assign_client_id(struct ib_client *client) 1622e59178d8SJason Gunthorpe { 1623e59178d8SJason Gunthorpe int ret; 1624e59178d8SJason Gunthorpe 1625921eab11SJason Gunthorpe down_write(&clients_rwsem); 1626e59178d8SJason Gunthorpe /* 1627e59178d8SJason Gunthorpe * The add/remove callbacks must be called in FIFO/LIFO order. To 1628e59178d8SJason Gunthorpe * achieve this we assign client_ids so they are sorted in 1629e59178d8SJason Gunthorpe * registration order, and retain a linked list we can reverse iterate 1630e59178d8SJason Gunthorpe * to get the LIFO order. The extra linked list can go away if xarray 1631e59178d8SJason Gunthorpe * learns to reverse iterate. 1632e59178d8SJason Gunthorpe */ 1633ea295481SLinus Torvalds if (list_empty(&client_list)) { 1634e59178d8SJason Gunthorpe client->client_id = 0; 1635ea295481SLinus Torvalds } else { 1636ea295481SLinus Torvalds struct ib_client *last; 1637ea295481SLinus Torvalds 1638ea295481SLinus Torvalds last = list_last_entry(&client_list, struct ib_client, list); 1639ea295481SLinus Torvalds client->client_id = last->client_id + 1; 1640ea295481SLinus Torvalds } 1641ea295481SLinus Torvalds ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); 1642e59178d8SJason Gunthorpe if (ret) 1643e59178d8SJason Gunthorpe goto out; 1644e59178d8SJason Gunthorpe 1645921eab11SJason Gunthorpe xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); 1646921eab11SJason Gunthorpe list_add_tail(&client->list, &client_list); 1647921eab11SJason Gunthorpe 1648e59178d8SJason Gunthorpe out: 1649921eab11SJason Gunthorpe up_write(&clients_rwsem); 1650e59178d8SJason Gunthorpe return ret; 1651e59178d8SJason Gunthorpe } 1652e59178d8SJason Gunthorpe 16531da177e4SLinus Torvalds /** 16541da177e4SLinus Torvalds * ib_register_client - Register an IB client 16551da177e4SLinus Torvalds * @client:Client to register 16561da177e4SLinus Torvalds * 16571da177e4SLinus Torvalds * Upper level users of the IB drivers can use ib_register_client() to 16581da177e4SLinus Torvalds * register callbacks for IB device addition and removal. When an IB 16591da177e4SLinus Torvalds * device is added, each registered client's add method will be called 16601da177e4SLinus Torvalds * (in the order the clients were registered), and when a device is 16611da177e4SLinus Torvalds * removed, each client's remove method will be called (in the reverse 16621da177e4SLinus Torvalds * order that clients were registered). In addition, when 16631da177e4SLinus Torvalds * ib_register_client() is called, the client will receive an add 16641da177e4SLinus Torvalds * callback for all devices already registered. 16651da177e4SLinus Torvalds */ 16661da177e4SLinus Torvalds int ib_register_client(struct ib_client *client) 16671da177e4SLinus Torvalds { 16681da177e4SLinus Torvalds struct ib_device *device; 16690df91bb6SJason Gunthorpe unsigned long index; 1670e59178d8SJason Gunthorpe int ret; 16711da177e4SLinus Torvalds 1672e59178d8SJason Gunthorpe ret = assign_client_id(client); 1673921eab11SJason Gunthorpe if (ret) 1674921eab11SJason Gunthorpe return ret; 1675921eab11SJason Gunthorpe 1676921eab11SJason Gunthorpe down_read(&devices_rwsem); 1677921eab11SJason Gunthorpe xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { 1678921eab11SJason Gunthorpe ret = add_client_context(device, client); 1679e59178d8SJason Gunthorpe if (ret) { 1680921eab11SJason Gunthorpe up_read(&devices_rwsem); 1681921eab11SJason Gunthorpe ib_unregister_client(client); 1682e59178d8SJason Gunthorpe return ret; 1683e59178d8SJason Gunthorpe } 1684921eab11SJason Gunthorpe } 1685921eab11SJason Gunthorpe up_read(&devices_rwsem); 16861da177e4SLinus Torvalds return 0; 16871da177e4SLinus Torvalds } 16881da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_client); 16891da177e4SLinus Torvalds 16901da177e4SLinus Torvalds /** 16911da177e4SLinus Torvalds * ib_unregister_client - Unregister an IB client 16921da177e4SLinus Torvalds * @client:Client to unregister 16931da177e4SLinus Torvalds * 16941da177e4SLinus Torvalds * Upper level users use ib_unregister_client() to remove their client 16951da177e4SLinus Torvalds * registration. When ib_unregister_client() is called, the client 16961da177e4SLinus Torvalds * will receive a remove callback for each IB device still registered. 1697921eab11SJason Gunthorpe * 1698921eab11SJason Gunthorpe * This is a full fence, once it returns no client callbacks will be called, 1699921eab11SJason Gunthorpe * or are running in another thread. 17001da177e4SLinus Torvalds */ 17011da177e4SLinus Torvalds void ib_unregister_client(struct ib_client *client) 17021da177e4SLinus Torvalds { 17031da177e4SLinus Torvalds struct ib_device *device; 17040df91bb6SJason Gunthorpe unsigned long index; 17051da177e4SLinus Torvalds 1706921eab11SJason Gunthorpe down_write(&clients_rwsem); 1707e59178d8SJason Gunthorpe xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); 1708921eab11SJason Gunthorpe up_write(&clients_rwsem); 1709921eab11SJason Gunthorpe /* 1710921eab11SJason Gunthorpe * Every device still known must be serialized to make sure we are 1711921eab11SJason Gunthorpe * done with the client callbacks before we return. 1712921eab11SJason Gunthorpe */ 1713921eab11SJason Gunthorpe down_read(&devices_rwsem); 1714921eab11SJason Gunthorpe xa_for_each (&devices, index, device) 1715921eab11SJason Gunthorpe remove_client_context(device, client->client_id); 1716921eab11SJason Gunthorpe up_read(&devices_rwsem); 17175aa44bb9SHaggai Eran 1718921eab11SJason Gunthorpe down_write(&clients_rwsem); 1719e59178d8SJason Gunthorpe list_del(&client->list); 1720e59178d8SJason Gunthorpe xa_erase(&clients, client->client_id); 1721921eab11SJason Gunthorpe up_write(&clients_rwsem); 17221da177e4SLinus Torvalds } 17231da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_client); 17241da177e4SLinus Torvalds 17251da177e4SLinus Torvalds /** 17269cd330d3SKrishna Kumar * ib_set_client_data - Set IB client context 17271da177e4SLinus Torvalds * @device:Device to set context for 17281da177e4SLinus Torvalds * @client:Client to set context for 17291da177e4SLinus Torvalds * @data:Context to set 17301da177e4SLinus Torvalds * 17310df91bb6SJason Gunthorpe * ib_set_client_data() sets client context data that can be retrieved with 17320df91bb6SJason Gunthorpe * ib_get_client_data(). This can only be called while the client is 17330df91bb6SJason Gunthorpe * registered to the device, once the ib_client remove() callback returns this 17340df91bb6SJason Gunthorpe * cannot be called. 17351da177e4SLinus Torvalds */ 17361da177e4SLinus Torvalds void ib_set_client_data(struct ib_device *device, struct ib_client *client, 17371da177e4SLinus Torvalds void *data) 17381da177e4SLinus Torvalds { 17390df91bb6SJason Gunthorpe void *rc; 17401da177e4SLinus Torvalds 17410df91bb6SJason Gunthorpe if (WARN_ON(IS_ERR(data))) 17420df91bb6SJason Gunthorpe data = NULL; 17431da177e4SLinus Torvalds 17440df91bb6SJason Gunthorpe rc = xa_store(&device->client_data, client->client_id, data, 17450df91bb6SJason Gunthorpe GFP_KERNEL); 17460df91bb6SJason Gunthorpe WARN_ON(xa_is_err(rc)); 17471da177e4SLinus Torvalds } 17481da177e4SLinus Torvalds EXPORT_SYMBOL(ib_set_client_data); 17491da177e4SLinus Torvalds 17501da177e4SLinus Torvalds /** 17511da177e4SLinus Torvalds * ib_register_event_handler - Register an IB event handler 17521da177e4SLinus Torvalds * @event_handler:Handler to register 17531da177e4SLinus Torvalds * 17541da177e4SLinus Torvalds * ib_register_event_handler() registers an event handler that will be 17551da177e4SLinus Torvalds * called back when asynchronous IB events occur (as defined in 17561da177e4SLinus Torvalds * chapter 11 of the InfiniBand Architecture Specification). This 17571da177e4SLinus Torvalds * callback may occur in interrupt context. 17581da177e4SLinus Torvalds */ 1759dcc9881eSLeon Romanovsky void ib_register_event_handler(struct ib_event_handler *event_handler) 17601da177e4SLinus Torvalds { 17611da177e4SLinus Torvalds unsigned long flags; 17621da177e4SLinus Torvalds 17631da177e4SLinus Torvalds spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); 17641da177e4SLinus Torvalds list_add_tail(&event_handler->list, 17651da177e4SLinus Torvalds &event_handler->device->event_handler_list); 17661da177e4SLinus Torvalds spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); 17671da177e4SLinus Torvalds } 17681da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_event_handler); 17691da177e4SLinus Torvalds 17701da177e4SLinus Torvalds /** 17711da177e4SLinus Torvalds * ib_unregister_event_handler - Unregister an event handler 17721da177e4SLinus Torvalds * @event_handler:Handler to unregister 17731da177e4SLinus Torvalds * 17741da177e4SLinus Torvalds * Unregister an event handler registered with 17751da177e4SLinus Torvalds * ib_register_event_handler(). 17761da177e4SLinus Torvalds */ 1777dcc9881eSLeon Romanovsky void ib_unregister_event_handler(struct ib_event_handler *event_handler) 17781da177e4SLinus Torvalds { 17791da177e4SLinus Torvalds unsigned long flags; 17801da177e4SLinus Torvalds 17811da177e4SLinus Torvalds spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); 17821da177e4SLinus Torvalds list_del(&event_handler->list); 17831da177e4SLinus Torvalds spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); 17841da177e4SLinus Torvalds } 17851da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_event_handler); 17861da177e4SLinus Torvalds 17871da177e4SLinus Torvalds /** 17881da177e4SLinus Torvalds * ib_dispatch_event - Dispatch an asynchronous event 17891da177e4SLinus Torvalds * @event:Event to dispatch 17901da177e4SLinus Torvalds * 17911da177e4SLinus Torvalds * Low-level drivers must call ib_dispatch_event() to dispatch the 17921da177e4SLinus Torvalds * event to all registered event handlers when an asynchronous event 17931da177e4SLinus Torvalds * occurs. 17941da177e4SLinus Torvalds */ 17951da177e4SLinus Torvalds void ib_dispatch_event(struct ib_event *event) 17961da177e4SLinus Torvalds { 17971da177e4SLinus Torvalds unsigned long flags; 17981da177e4SLinus Torvalds struct ib_event_handler *handler; 17991da177e4SLinus Torvalds 18001da177e4SLinus Torvalds spin_lock_irqsave(&event->device->event_handler_lock, flags); 18011da177e4SLinus Torvalds 18021da177e4SLinus Torvalds list_for_each_entry(handler, &event->device->event_handler_list, list) 18031da177e4SLinus Torvalds handler->handler(handler, event); 18041da177e4SLinus Torvalds 18051da177e4SLinus Torvalds spin_unlock_irqrestore(&event->device->event_handler_lock, flags); 18061da177e4SLinus Torvalds } 18071da177e4SLinus Torvalds EXPORT_SYMBOL(ib_dispatch_event); 18081da177e4SLinus Torvalds 18091da177e4SLinus Torvalds /** 18101da177e4SLinus Torvalds * ib_query_port - Query IB port attributes 18111da177e4SLinus Torvalds * @device:Device to query 18121da177e4SLinus Torvalds * @port_num:Port number to query 18131da177e4SLinus Torvalds * @port_attr:Port attributes 18141da177e4SLinus Torvalds * 18151da177e4SLinus Torvalds * ib_query_port() returns the attributes of a port through the 18161da177e4SLinus Torvalds * @port_attr pointer. 18171da177e4SLinus Torvalds */ 18181da177e4SLinus Torvalds int ib_query_port(struct ib_device *device, 18191da177e4SLinus Torvalds u8 port_num, 18201da177e4SLinus Torvalds struct ib_port_attr *port_attr) 18211da177e4SLinus Torvalds { 1822fad61ad4SEli Cohen union ib_gid gid; 1823fad61ad4SEli Cohen int err; 1824fad61ad4SEli Cohen 182524dc831bSYuval Shaia if (!rdma_is_port_valid(device, port_num)) 1826116c0074SRoland Dreier return -EINVAL; 1827116c0074SRoland Dreier 1828fad61ad4SEli Cohen memset(port_attr, 0, sizeof(*port_attr)); 18293023a1e9SKamal Heib err = device->ops.query_port(device, port_num, port_attr); 1830fad61ad4SEli Cohen if (err || port_attr->subnet_prefix) 1831fad61ad4SEli Cohen return err; 1832fad61ad4SEli Cohen 1833d7012467SEli Cohen if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) 1834d7012467SEli Cohen return 0; 1835d7012467SEli Cohen 18363023a1e9SKamal Heib err = device->ops.query_gid(device, port_num, 0, &gid); 1837fad61ad4SEli Cohen if (err) 1838fad61ad4SEli Cohen return err; 1839fad61ad4SEli Cohen 1840fad61ad4SEli Cohen port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); 1841fad61ad4SEli Cohen return 0; 18421da177e4SLinus Torvalds } 18431da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_port); 18441da177e4SLinus Torvalds 1845324e227eSJason Gunthorpe static void add_ndev_hash(struct ib_port_data *pdata) 1846324e227eSJason Gunthorpe { 1847324e227eSJason Gunthorpe unsigned long flags; 1848324e227eSJason Gunthorpe 1849324e227eSJason Gunthorpe might_sleep(); 1850324e227eSJason Gunthorpe 1851324e227eSJason Gunthorpe spin_lock_irqsave(&ndev_hash_lock, flags); 1852324e227eSJason Gunthorpe if (hash_hashed(&pdata->ndev_hash_link)) { 1853324e227eSJason Gunthorpe hash_del_rcu(&pdata->ndev_hash_link); 1854324e227eSJason Gunthorpe spin_unlock_irqrestore(&ndev_hash_lock, flags); 1855324e227eSJason Gunthorpe /* 1856324e227eSJason Gunthorpe * We cannot do hash_add_rcu after a hash_del_rcu until the 1857324e227eSJason Gunthorpe * grace period 1858324e227eSJason Gunthorpe */ 1859324e227eSJason Gunthorpe synchronize_rcu(); 1860324e227eSJason Gunthorpe spin_lock_irqsave(&ndev_hash_lock, flags); 1861324e227eSJason Gunthorpe } 1862324e227eSJason Gunthorpe if (pdata->netdev) 1863324e227eSJason Gunthorpe hash_add_rcu(ndev_hash, &pdata->ndev_hash_link, 1864324e227eSJason Gunthorpe (uintptr_t)pdata->netdev); 1865324e227eSJason Gunthorpe spin_unlock_irqrestore(&ndev_hash_lock, flags); 1866324e227eSJason Gunthorpe } 1867324e227eSJason Gunthorpe 18681da177e4SLinus Torvalds /** 1869c2261dd7SJason Gunthorpe * ib_device_set_netdev - Associate the ib_dev with an underlying net_device 1870c2261dd7SJason Gunthorpe * @ib_dev: Device to modify 1871c2261dd7SJason Gunthorpe * @ndev: net_device to affiliate, may be NULL 1872c2261dd7SJason Gunthorpe * @port: IB port the net_device is connected to 1873c2261dd7SJason Gunthorpe * 1874c2261dd7SJason Gunthorpe * Drivers should use this to link the ib_device to a netdev so the netdev 1875c2261dd7SJason Gunthorpe * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be 1876c2261dd7SJason Gunthorpe * affiliated with any port. 1877c2261dd7SJason Gunthorpe * 1878c2261dd7SJason Gunthorpe * The caller must ensure that the given ndev is not unregistered or 1879c2261dd7SJason Gunthorpe * unregistering, and that either the ib_device is unregistered or 1880c2261dd7SJason Gunthorpe * ib_device_set_netdev() is called with NULL when the ndev sends a 1881c2261dd7SJason Gunthorpe * NETDEV_UNREGISTER event. 1882c2261dd7SJason Gunthorpe */ 1883c2261dd7SJason Gunthorpe int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, 1884c2261dd7SJason Gunthorpe unsigned int port) 1885c2261dd7SJason Gunthorpe { 1886c2261dd7SJason Gunthorpe struct net_device *old_ndev; 1887c2261dd7SJason Gunthorpe struct ib_port_data *pdata; 1888c2261dd7SJason Gunthorpe unsigned long flags; 1889c2261dd7SJason Gunthorpe int ret; 1890c2261dd7SJason Gunthorpe 1891c2261dd7SJason Gunthorpe /* 1892c2261dd7SJason Gunthorpe * Drivers wish to call this before ib_register_driver, so we have to 1893c2261dd7SJason Gunthorpe * setup the port data early. 1894c2261dd7SJason Gunthorpe */ 1895c2261dd7SJason Gunthorpe ret = alloc_port_data(ib_dev); 1896c2261dd7SJason Gunthorpe if (ret) 1897c2261dd7SJason Gunthorpe return ret; 1898c2261dd7SJason Gunthorpe 1899c2261dd7SJason Gunthorpe if (!rdma_is_port_valid(ib_dev, port)) 1900c2261dd7SJason Gunthorpe return -EINVAL; 1901c2261dd7SJason Gunthorpe 1902c2261dd7SJason Gunthorpe pdata = &ib_dev->port_data[port]; 1903c2261dd7SJason Gunthorpe spin_lock_irqsave(&pdata->netdev_lock, flags); 1904324e227eSJason Gunthorpe old_ndev = rcu_dereference_protected( 1905324e227eSJason Gunthorpe pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 1906324e227eSJason Gunthorpe if (old_ndev == ndev) { 1907c2261dd7SJason Gunthorpe spin_unlock_irqrestore(&pdata->netdev_lock, flags); 1908c2261dd7SJason Gunthorpe return 0; 1909c2261dd7SJason Gunthorpe } 1910c2261dd7SJason Gunthorpe 1911c2261dd7SJason Gunthorpe if (ndev) 1912c2261dd7SJason Gunthorpe dev_hold(ndev); 1913324e227eSJason Gunthorpe rcu_assign_pointer(pdata->netdev, ndev); 1914c2261dd7SJason Gunthorpe spin_unlock_irqrestore(&pdata->netdev_lock, flags); 1915c2261dd7SJason Gunthorpe 1916324e227eSJason Gunthorpe add_ndev_hash(pdata); 1917c2261dd7SJason Gunthorpe if (old_ndev) 1918c2261dd7SJason Gunthorpe dev_put(old_ndev); 1919c2261dd7SJason Gunthorpe 1920c2261dd7SJason Gunthorpe return 0; 1921c2261dd7SJason Gunthorpe } 1922c2261dd7SJason Gunthorpe EXPORT_SYMBOL(ib_device_set_netdev); 1923c2261dd7SJason Gunthorpe 1924c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev) 1925c2261dd7SJason Gunthorpe { 1926c2261dd7SJason Gunthorpe unsigned long flags; 1927c2261dd7SJason Gunthorpe unsigned int port; 1928c2261dd7SJason Gunthorpe 1929c2261dd7SJason Gunthorpe rdma_for_each_port (ib_dev, port) { 1930c2261dd7SJason Gunthorpe struct ib_port_data *pdata = &ib_dev->port_data[port]; 1931324e227eSJason Gunthorpe struct net_device *ndev; 1932c2261dd7SJason Gunthorpe 1933c2261dd7SJason Gunthorpe spin_lock_irqsave(&pdata->netdev_lock, flags); 1934324e227eSJason Gunthorpe ndev = rcu_dereference_protected( 1935324e227eSJason Gunthorpe pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 1936324e227eSJason Gunthorpe if (ndev) { 1937324e227eSJason Gunthorpe spin_lock(&ndev_hash_lock); 1938324e227eSJason Gunthorpe hash_del_rcu(&pdata->ndev_hash_link); 1939324e227eSJason Gunthorpe spin_unlock(&ndev_hash_lock); 1940324e227eSJason Gunthorpe 1941324e227eSJason Gunthorpe /* 1942324e227eSJason Gunthorpe * If this is the last dev_put there is still a 1943324e227eSJason Gunthorpe * synchronize_rcu before the netdev is kfreed, so we 1944324e227eSJason Gunthorpe * can continue to rely on unlocked pointer 1945324e227eSJason Gunthorpe * comparisons after the put 1946324e227eSJason Gunthorpe */ 1947324e227eSJason Gunthorpe rcu_assign_pointer(pdata->netdev, NULL); 1948324e227eSJason Gunthorpe dev_put(ndev); 1949c2261dd7SJason Gunthorpe } 1950c2261dd7SJason Gunthorpe spin_unlock_irqrestore(&pdata->netdev_lock, flags); 1951c2261dd7SJason Gunthorpe } 1952c2261dd7SJason Gunthorpe } 1953c2261dd7SJason Gunthorpe 1954c2261dd7SJason Gunthorpe struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, 1955c2261dd7SJason Gunthorpe unsigned int port) 1956c2261dd7SJason Gunthorpe { 1957c2261dd7SJason Gunthorpe struct ib_port_data *pdata; 1958c2261dd7SJason Gunthorpe struct net_device *res; 1959c2261dd7SJason Gunthorpe 1960c2261dd7SJason Gunthorpe if (!rdma_is_port_valid(ib_dev, port)) 1961c2261dd7SJason Gunthorpe return NULL; 1962c2261dd7SJason Gunthorpe 1963c2261dd7SJason Gunthorpe pdata = &ib_dev->port_data[port]; 1964c2261dd7SJason Gunthorpe 1965c2261dd7SJason Gunthorpe /* 1966c2261dd7SJason Gunthorpe * New drivers should use ib_device_set_netdev() not the legacy 1967c2261dd7SJason Gunthorpe * get_netdev(). 1968c2261dd7SJason Gunthorpe */ 1969c2261dd7SJason Gunthorpe if (ib_dev->ops.get_netdev) 1970c2261dd7SJason Gunthorpe res = ib_dev->ops.get_netdev(ib_dev, port); 1971c2261dd7SJason Gunthorpe else { 1972c2261dd7SJason Gunthorpe spin_lock(&pdata->netdev_lock); 1973324e227eSJason Gunthorpe res = rcu_dereference_protected( 1974324e227eSJason Gunthorpe pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 1975c2261dd7SJason Gunthorpe if (res) 1976c2261dd7SJason Gunthorpe dev_hold(res); 1977c2261dd7SJason Gunthorpe spin_unlock(&pdata->netdev_lock); 1978c2261dd7SJason Gunthorpe } 1979c2261dd7SJason Gunthorpe 1980c2261dd7SJason Gunthorpe /* 1981c2261dd7SJason Gunthorpe * If we are starting to unregister expedite things by preventing 1982c2261dd7SJason Gunthorpe * propagation of an unregistering netdev. 1983c2261dd7SJason Gunthorpe */ 1984c2261dd7SJason Gunthorpe if (res && res->reg_state != NETREG_REGISTERED) { 1985c2261dd7SJason Gunthorpe dev_put(res); 1986c2261dd7SJason Gunthorpe return NULL; 1987c2261dd7SJason Gunthorpe } 1988c2261dd7SJason Gunthorpe 1989c2261dd7SJason Gunthorpe return res; 1990c2261dd7SJason Gunthorpe } 1991c2261dd7SJason Gunthorpe 1992c2261dd7SJason Gunthorpe /** 1993324e227eSJason Gunthorpe * ib_device_get_by_netdev - Find an IB device associated with a netdev 1994324e227eSJason Gunthorpe * @ndev: netdev to locate 1995324e227eSJason Gunthorpe * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) 1996324e227eSJason Gunthorpe * 1997324e227eSJason Gunthorpe * Find and hold an ib_device that is associated with a netdev via 1998324e227eSJason Gunthorpe * ib_device_set_netdev(). The caller must call ib_device_put() on the 1999324e227eSJason Gunthorpe * returned pointer. 2000324e227eSJason Gunthorpe */ 2001324e227eSJason Gunthorpe struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, 2002324e227eSJason Gunthorpe enum rdma_driver_id driver_id) 2003324e227eSJason Gunthorpe { 2004324e227eSJason Gunthorpe struct ib_device *res = NULL; 2005324e227eSJason Gunthorpe struct ib_port_data *cur; 2006324e227eSJason Gunthorpe 2007324e227eSJason Gunthorpe rcu_read_lock(); 2008324e227eSJason Gunthorpe hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link, 2009324e227eSJason Gunthorpe (uintptr_t)ndev) { 2010324e227eSJason Gunthorpe if (rcu_access_pointer(cur->netdev) == ndev && 2011324e227eSJason Gunthorpe (driver_id == RDMA_DRIVER_UNKNOWN || 2012324e227eSJason Gunthorpe cur->ib_dev->driver_id == driver_id) && 2013324e227eSJason Gunthorpe ib_device_try_get(cur->ib_dev)) { 2014324e227eSJason Gunthorpe res = cur->ib_dev; 2015324e227eSJason Gunthorpe break; 2016324e227eSJason Gunthorpe } 2017324e227eSJason Gunthorpe } 2018324e227eSJason Gunthorpe rcu_read_unlock(); 2019324e227eSJason Gunthorpe 2020324e227eSJason Gunthorpe return res; 2021324e227eSJason Gunthorpe } 2022324e227eSJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_netdev); 2023324e227eSJason Gunthorpe 2024324e227eSJason Gunthorpe /** 202503db3a2dSMatan Barak * ib_enum_roce_netdev - enumerate all RoCE ports 202603db3a2dSMatan Barak * @ib_dev : IB device we want to query 202703db3a2dSMatan Barak * @filter: Should we call the callback? 202803db3a2dSMatan Barak * @filter_cookie: Cookie passed to filter 202903db3a2dSMatan Barak * @cb: Callback to call for each found RoCE ports 203003db3a2dSMatan Barak * @cookie: Cookie passed back to the callback 203103db3a2dSMatan Barak * 203203db3a2dSMatan Barak * Enumerates all of the physical RoCE ports of ib_dev 203303db3a2dSMatan Barak * which are related to netdevice and calls callback() on each 203403db3a2dSMatan Barak * device for which filter() function returns non zero. 203503db3a2dSMatan Barak */ 203603db3a2dSMatan Barak void ib_enum_roce_netdev(struct ib_device *ib_dev, 203703db3a2dSMatan Barak roce_netdev_filter filter, 203803db3a2dSMatan Barak void *filter_cookie, 203903db3a2dSMatan Barak roce_netdev_callback cb, 204003db3a2dSMatan Barak void *cookie) 204103db3a2dSMatan Barak { 2042ea1075edSJason Gunthorpe unsigned int port; 204303db3a2dSMatan Barak 2044ea1075edSJason Gunthorpe rdma_for_each_port (ib_dev, port) 204503db3a2dSMatan Barak if (rdma_protocol_roce(ib_dev, port)) { 2046c2261dd7SJason Gunthorpe struct net_device *idev = 2047c2261dd7SJason Gunthorpe ib_device_get_netdev(ib_dev, port); 204803db3a2dSMatan Barak 204903db3a2dSMatan Barak if (filter(ib_dev, port, idev, filter_cookie)) 205003db3a2dSMatan Barak cb(ib_dev, port, idev, cookie); 205103db3a2dSMatan Barak 205203db3a2dSMatan Barak if (idev) 205303db3a2dSMatan Barak dev_put(idev); 205403db3a2dSMatan Barak } 205503db3a2dSMatan Barak } 205603db3a2dSMatan Barak 205703db3a2dSMatan Barak /** 205803db3a2dSMatan Barak * ib_enum_all_roce_netdevs - enumerate all RoCE devices 205903db3a2dSMatan Barak * @filter: Should we call the callback? 206003db3a2dSMatan Barak * @filter_cookie: Cookie passed to filter 206103db3a2dSMatan Barak * @cb: Callback to call for each found RoCE ports 206203db3a2dSMatan Barak * @cookie: Cookie passed back to the callback 206303db3a2dSMatan Barak * 206403db3a2dSMatan Barak * Enumerates all RoCE devices' physical ports which are related 206503db3a2dSMatan Barak * to netdevices and calls callback() on each device for which 206603db3a2dSMatan Barak * filter() function returns non zero. 206703db3a2dSMatan Barak */ 206803db3a2dSMatan Barak void ib_enum_all_roce_netdevs(roce_netdev_filter filter, 206903db3a2dSMatan Barak void *filter_cookie, 207003db3a2dSMatan Barak roce_netdev_callback cb, 207103db3a2dSMatan Barak void *cookie) 207203db3a2dSMatan Barak { 207303db3a2dSMatan Barak struct ib_device *dev; 20740df91bb6SJason Gunthorpe unsigned long index; 207503db3a2dSMatan Barak 2076921eab11SJason Gunthorpe down_read(&devices_rwsem); 20770df91bb6SJason Gunthorpe xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) 207803db3a2dSMatan Barak ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); 2079921eab11SJason Gunthorpe up_read(&devices_rwsem); 208003db3a2dSMatan Barak } 208103db3a2dSMatan Barak 208203db3a2dSMatan Barak /** 20838030c835SLeon Romanovsky * ib_enum_all_devs - enumerate all ib_devices 20848030c835SLeon Romanovsky * @cb: Callback to call for each found ib_device 20858030c835SLeon Romanovsky * 20868030c835SLeon Romanovsky * Enumerates all ib_devices and calls callback() on each device. 20878030c835SLeon Romanovsky */ 20888030c835SLeon Romanovsky int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, 20898030c835SLeon Romanovsky struct netlink_callback *cb) 20908030c835SLeon Romanovsky { 20910df91bb6SJason Gunthorpe unsigned long index; 20928030c835SLeon Romanovsky struct ib_device *dev; 20938030c835SLeon Romanovsky unsigned int idx = 0; 20948030c835SLeon Romanovsky int ret = 0; 20958030c835SLeon Romanovsky 2096921eab11SJason Gunthorpe down_read(&devices_rwsem); 20970df91bb6SJason Gunthorpe xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 209837eeab55SParav Pandit if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) 209937eeab55SParav Pandit continue; 210037eeab55SParav Pandit 21018030c835SLeon Romanovsky ret = nldev_cb(dev, skb, cb, idx); 21028030c835SLeon Romanovsky if (ret) 21038030c835SLeon Romanovsky break; 21048030c835SLeon Romanovsky idx++; 21058030c835SLeon Romanovsky } 2106921eab11SJason Gunthorpe up_read(&devices_rwsem); 21078030c835SLeon Romanovsky return ret; 21088030c835SLeon Romanovsky } 21098030c835SLeon Romanovsky 21108030c835SLeon Romanovsky /** 21111da177e4SLinus Torvalds * ib_query_pkey - Get P_Key table entry 21121da177e4SLinus Torvalds * @device:Device to query 21131da177e4SLinus Torvalds * @port_num:Port number to query 21141da177e4SLinus Torvalds * @index:P_Key table index to query 21151da177e4SLinus Torvalds * @pkey:Returned P_Key 21161da177e4SLinus Torvalds * 21171da177e4SLinus Torvalds * ib_query_pkey() fetches the specified P_Key table entry. 21181da177e4SLinus Torvalds */ 21191da177e4SLinus Torvalds int ib_query_pkey(struct ib_device *device, 21201da177e4SLinus Torvalds u8 port_num, u16 index, u16 *pkey) 21211da177e4SLinus Torvalds { 21229af3f5cfSYuval Shaia if (!rdma_is_port_valid(device, port_num)) 21239af3f5cfSYuval Shaia return -EINVAL; 21249af3f5cfSYuval Shaia 21253023a1e9SKamal Heib return device->ops.query_pkey(device, port_num, index, pkey); 21261da177e4SLinus Torvalds } 21271da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_pkey); 21281da177e4SLinus Torvalds 21291da177e4SLinus Torvalds /** 21301da177e4SLinus Torvalds * ib_modify_device - Change IB device attributes 21311da177e4SLinus Torvalds * @device:Device to modify 21321da177e4SLinus Torvalds * @device_modify_mask:Mask of attributes to change 21331da177e4SLinus Torvalds * @device_modify:New attribute values 21341da177e4SLinus Torvalds * 21351da177e4SLinus Torvalds * ib_modify_device() changes a device's attributes as specified by 21361da177e4SLinus Torvalds * the @device_modify_mask and @device_modify structure. 21371da177e4SLinus Torvalds */ 21381da177e4SLinus Torvalds int ib_modify_device(struct ib_device *device, 21391da177e4SLinus Torvalds int device_modify_mask, 21401da177e4SLinus Torvalds struct ib_device_modify *device_modify) 21411da177e4SLinus Torvalds { 21423023a1e9SKamal Heib if (!device->ops.modify_device) 214310e1b54bSBart Van Assche return -ENOSYS; 214410e1b54bSBart Van Assche 21453023a1e9SKamal Heib return device->ops.modify_device(device, device_modify_mask, 21461da177e4SLinus Torvalds device_modify); 21471da177e4SLinus Torvalds } 21481da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_device); 21491da177e4SLinus Torvalds 21501da177e4SLinus Torvalds /** 21511da177e4SLinus Torvalds * ib_modify_port - Modifies the attributes for the specified port. 21521da177e4SLinus Torvalds * @device: The device to modify. 21531da177e4SLinus Torvalds * @port_num: The number of the port to modify. 21541da177e4SLinus Torvalds * @port_modify_mask: Mask used to specify which attributes of the port 21551da177e4SLinus Torvalds * to change. 21561da177e4SLinus Torvalds * @port_modify: New attribute values for the port. 21571da177e4SLinus Torvalds * 21581da177e4SLinus Torvalds * ib_modify_port() changes a port's attributes as specified by the 21591da177e4SLinus Torvalds * @port_modify_mask and @port_modify structure. 21601da177e4SLinus Torvalds */ 21611da177e4SLinus Torvalds int ib_modify_port(struct ib_device *device, 21621da177e4SLinus Torvalds u8 port_num, int port_modify_mask, 21631da177e4SLinus Torvalds struct ib_port_modify *port_modify) 21641da177e4SLinus Torvalds { 216561e0962dSSelvin Xavier int rc; 216610e1b54bSBart Van Assche 216724dc831bSYuval Shaia if (!rdma_is_port_valid(device, port_num)) 2168116c0074SRoland Dreier return -EINVAL; 2169116c0074SRoland Dreier 21703023a1e9SKamal Heib if (device->ops.modify_port) 21713023a1e9SKamal Heib rc = device->ops.modify_port(device, port_num, 21723023a1e9SKamal Heib port_modify_mask, 21731da177e4SLinus Torvalds port_modify); 217461e0962dSSelvin Xavier else 217561e0962dSSelvin Xavier rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; 217661e0962dSSelvin Xavier return rc; 21771da177e4SLinus Torvalds } 21781da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_port); 21791da177e4SLinus Torvalds 21805eb620c8SYosef Etigin /** 21815eb620c8SYosef Etigin * ib_find_gid - Returns the port number and GID table index where 2182dbb12562SParav Pandit * a specified GID value occurs. Its searches only for IB link layer. 21835eb620c8SYosef Etigin * @device: The device to query. 21845eb620c8SYosef Etigin * @gid: The GID value to search for. 21855eb620c8SYosef Etigin * @port_num: The port number of the device where the GID value was found. 21865eb620c8SYosef Etigin * @index: The index into the GID table where the GID was found. This 21875eb620c8SYosef Etigin * parameter may be NULL. 21885eb620c8SYosef Etigin */ 21895eb620c8SYosef Etigin int ib_find_gid(struct ib_device *device, union ib_gid *gid, 2190b26c4a11SParav Pandit u8 *port_num, u16 *index) 21915eb620c8SYosef Etigin { 21925eb620c8SYosef Etigin union ib_gid tmp_gid; 2193ea1075edSJason Gunthorpe unsigned int port; 2194ea1075edSJason Gunthorpe int ret, i; 21955eb620c8SYosef Etigin 2196ea1075edSJason Gunthorpe rdma_for_each_port (device, port) { 219722d24f75SParav Pandit if (!rdma_protocol_ib(device, port)) 2198b39ffa1dSMatan Barak continue; 2199b39ffa1dSMatan Barak 22008ceb1357SJason Gunthorpe for (i = 0; i < device->port_data[port].immutable.gid_tbl_len; 22018ceb1357SJason Gunthorpe ++i) { 22021dfce294SParav Pandit ret = rdma_query_gid(device, port, i, &tmp_gid); 22035eb620c8SYosef Etigin if (ret) 22045eb620c8SYosef Etigin return ret; 22055eb620c8SYosef Etigin if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 22065eb620c8SYosef Etigin *port_num = port; 22075eb620c8SYosef Etigin if (index) 22085eb620c8SYosef Etigin *index = i; 22095eb620c8SYosef Etigin return 0; 22105eb620c8SYosef Etigin } 22115eb620c8SYosef Etigin } 22125eb620c8SYosef Etigin } 22135eb620c8SYosef Etigin 22145eb620c8SYosef Etigin return -ENOENT; 22155eb620c8SYosef Etigin } 22165eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_gid); 22175eb620c8SYosef Etigin 22185eb620c8SYosef Etigin /** 22195eb620c8SYosef Etigin * ib_find_pkey - Returns the PKey table index where a specified 22205eb620c8SYosef Etigin * PKey value occurs. 22215eb620c8SYosef Etigin * @device: The device to query. 22225eb620c8SYosef Etigin * @port_num: The port number of the device to search for the PKey. 22235eb620c8SYosef Etigin * @pkey: The PKey value to search for. 22245eb620c8SYosef Etigin * @index: The index into the PKey table where the PKey was found. 22255eb620c8SYosef Etigin */ 22265eb620c8SYosef Etigin int ib_find_pkey(struct ib_device *device, 22275eb620c8SYosef Etigin u8 port_num, u16 pkey, u16 *index) 22285eb620c8SYosef Etigin { 22295eb620c8SYosef Etigin int ret, i; 22305eb620c8SYosef Etigin u16 tmp_pkey; 2231ff7166c4SJack Morgenstein int partial_ix = -1; 22325eb620c8SYosef Etigin 22338ceb1357SJason Gunthorpe for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len; 22348ceb1357SJason Gunthorpe ++i) { 22355eb620c8SYosef Etigin ret = ib_query_pkey(device, port_num, i, &tmp_pkey); 22365eb620c8SYosef Etigin if (ret) 22375eb620c8SYosef Etigin return ret; 223836026eccSMoni Shoua if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { 2239ff7166c4SJack Morgenstein /* if there is full-member pkey take it.*/ 2240ff7166c4SJack Morgenstein if (tmp_pkey & 0x8000) { 22415eb620c8SYosef Etigin *index = i; 22425eb620c8SYosef Etigin return 0; 22435eb620c8SYosef Etigin } 2244ff7166c4SJack Morgenstein if (partial_ix < 0) 2245ff7166c4SJack Morgenstein partial_ix = i; 2246ff7166c4SJack Morgenstein } 22475eb620c8SYosef Etigin } 22485eb620c8SYosef Etigin 2249ff7166c4SJack Morgenstein /*no full-member, if exists take the limited*/ 2250ff7166c4SJack Morgenstein if (partial_ix >= 0) { 2251ff7166c4SJack Morgenstein *index = partial_ix; 2252ff7166c4SJack Morgenstein return 0; 2253ff7166c4SJack Morgenstein } 22545eb620c8SYosef Etigin return -ENOENT; 22555eb620c8SYosef Etigin } 22565eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_pkey); 22575eb620c8SYosef Etigin 22589268f72dSYotam Kenneth /** 22599268f72dSYotam Kenneth * ib_get_net_dev_by_params() - Return the appropriate net_dev 22609268f72dSYotam Kenneth * for a received CM request 22619268f72dSYotam Kenneth * @dev: An RDMA device on which the request has been received. 22629268f72dSYotam Kenneth * @port: Port number on the RDMA device. 22639268f72dSYotam Kenneth * @pkey: The Pkey the request came on. 22649268f72dSYotam Kenneth * @gid: A GID that the net_dev uses to communicate. 22659268f72dSYotam Kenneth * @addr: Contains the IP address that the request specified as its 22669268f72dSYotam Kenneth * destination. 2267921eab11SJason Gunthorpe * 22689268f72dSYotam Kenneth */ 22699268f72dSYotam Kenneth struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, 22709268f72dSYotam Kenneth u8 port, 22719268f72dSYotam Kenneth u16 pkey, 22729268f72dSYotam Kenneth const union ib_gid *gid, 22739268f72dSYotam Kenneth const struct sockaddr *addr) 22749268f72dSYotam Kenneth { 22759268f72dSYotam Kenneth struct net_device *net_dev = NULL; 22760df91bb6SJason Gunthorpe unsigned long index; 22770df91bb6SJason Gunthorpe void *client_data; 22789268f72dSYotam Kenneth 22799268f72dSYotam Kenneth if (!rdma_protocol_ib(dev, port)) 22809268f72dSYotam Kenneth return NULL; 22819268f72dSYotam Kenneth 2282921eab11SJason Gunthorpe /* 2283921eab11SJason Gunthorpe * Holding the read side guarantees that the client will not become 2284921eab11SJason Gunthorpe * unregistered while we are calling get_net_dev_by_params() 2285921eab11SJason Gunthorpe */ 2286921eab11SJason Gunthorpe down_read(&dev->client_data_rwsem); 22870df91bb6SJason Gunthorpe xan_for_each_marked (&dev->client_data, index, client_data, 22880df91bb6SJason Gunthorpe CLIENT_DATA_REGISTERED) { 22890df91bb6SJason Gunthorpe struct ib_client *client = xa_load(&clients, index); 22909268f72dSYotam Kenneth 22910df91bb6SJason Gunthorpe if (!client || !client->get_net_dev_by_params) 22929268f72dSYotam Kenneth continue; 22939268f72dSYotam Kenneth 22940df91bb6SJason Gunthorpe net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, 22950df91bb6SJason Gunthorpe addr, client_data); 22969268f72dSYotam Kenneth if (net_dev) 22979268f72dSYotam Kenneth break; 22989268f72dSYotam Kenneth } 2299921eab11SJason Gunthorpe up_read(&dev->client_data_rwsem); 23009268f72dSYotam Kenneth 23019268f72dSYotam Kenneth return net_dev; 23029268f72dSYotam Kenneth } 23039268f72dSYotam Kenneth EXPORT_SYMBOL(ib_get_net_dev_by_params); 23049268f72dSYotam Kenneth 2305521ed0d9SKamal Heib void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) 2306521ed0d9SKamal Heib { 23073023a1e9SKamal Heib struct ib_device_ops *dev_ops = &dev->ops; 2308521ed0d9SKamal Heib #define SET_DEVICE_OP(ptr, name) \ 2309521ed0d9SKamal Heib do { \ 2310521ed0d9SKamal Heib if (ops->name) \ 2311521ed0d9SKamal Heib if (!((ptr)->name)) \ 2312521ed0d9SKamal Heib (ptr)->name = ops->name; \ 2313521ed0d9SKamal Heib } while (0) 2314521ed0d9SKamal Heib 231530471d4bSLeon Romanovsky #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) 231630471d4bSLeon Romanovsky 23173023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, add_gid); 23182f1927b0SMoni Shoua SET_DEVICE_OP(dev_ops, advise_mr); 23193023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_dm); 23203023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_fmr); 23213023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_hw_stats); 23223023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_mr); 23233023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_mw); 23243023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_pd); 23253023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); 23263023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_ucontext); 23273023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_xrcd); 23283023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, attach_mcast); 23293023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, check_mr_status); 23303023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_ah); 23313023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_counters); 23323023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_cq); 23333023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_flow); 23343023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_flow_action_esp); 23353023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_qp); 23363023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_rwq_ind_table); 23373023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_srq); 23383023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_wq); 23393023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_dm); 2340d0899892SJason Gunthorpe SET_DEVICE_OP(dev_ops, dealloc_driver); 23413023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_fmr); 23423023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_mw); 23433023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_pd); 23443023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_ucontext); 23453023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_xrcd); 23463023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, del_gid); 23473023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dereg_mr); 23483023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_ah); 23493023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_counters); 23503023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_cq); 23513023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_flow); 23523023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_flow_action); 23533023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_qp); 23543023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); 23553023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_srq); 23563023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_wq); 23573023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, detach_mcast); 23583023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, disassociate_ucontext); 23593023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, drain_rq); 23603023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, drain_sq); 2361ca22354bSJason Gunthorpe SET_DEVICE_OP(dev_ops, enable_driver); 236202da3750SLeon Romanovsky SET_DEVICE_OP(dev_ops, fill_res_entry); 23633023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_dev_fw_str); 23643023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_dma_mr); 23653023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_hw_stats); 23663023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_link_layer); 23673023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_netdev); 23683023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_port_immutable); 23693023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_vector_affinity); 23703023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_vf_config); 23713023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_vf_stats); 2372ea4baf7fSParav Pandit SET_DEVICE_OP(dev_ops, init_port); 23733023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, map_mr_sg); 23743023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, map_phys_fmr); 23753023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, mmap); 23763023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_ah); 23773023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_cq); 23783023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_device); 23793023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_flow_action_esp); 23803023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_port); 23813023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_qp); 23823023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_srq); 23833023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_wq); 23843023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, peek_cq); 23853023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, poll_cq); 23863023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, post_recv); 23873023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, post_send); 23883023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, post_srq_recv); 23893023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, process_mad); 23903023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_ah); 23913023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_device); 23923023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_gid); 23933023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_pkey); 23943023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_port); 23953023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_qp); 23963023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_srq); 23973023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); 23983023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, read_counters); 23993023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, reg_dm_mr); 24003023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, reg_user_mr); 24013023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, req_ncomp_notif); 24023023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, req_notify_cq); 24033023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, rereg_user_mr); 24043023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, resize_cq); 24053023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, set_vf_guid); 24063023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, set_vf_link_state); 24073023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, unmap_fmr); 240821a428a0SLeon Romanovsky 2409d3456914SLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_ah); 241021a428a0SLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_pd); 241168e326deSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_srq); 2412a2a074efSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_ucontext); 2413521ed0d9SKamal Heib } 2414521ed0d9SKamal Heib EXPORT_SYMBOL(ib_set_device_ops); 2415521ed0d9SKamal Heib 2416d0e312feSLeon Romanovsky static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { 2417735c631aSMark Bloch [RDMA_NL_LS_OP_RESOLVE] = { 2418647c75acSLeon Romanovsky .doit = ib_nl_handle_resolve_resp, 2419e3a2b93dSLeon Romanovsky .flags = RDMA_NL_ADMIN_PERM, 2420e3a2b93dSLeon Romanovsky }, 2421735c631aSMark Bloch [RDMA_NL_LS_OP_SET_TIMEOUT] = { 2422647c75acSLeon Romanovsky .doit = ib_nl_handle_set_timeout, 2423e3a2b93dSLeon Romanovsky .flags = RDMA_NL_ADMIN_PERM, 2424e3a2b93dSLeon Romanovsky }, 2425ae43f828SMark Bloch [RDMA_NL_LS_OP_IP_RESOLVE] = { 2426647c75acSLeon Romanovsky .doit = ib_nl_handle_ip_res_resp, 2427e3a2b93dSLeon Romanovsky .flags = RDMA_NL_ADMIN_PERM, 2428e3a2b93dSLeon Romanovsky }, 2429735c631aSMark Bloch }; 2430735c631aSMark Bloch 24311da177e4SLinus Torvalds static int __init ib_core_init(void) 24321da177e4SLinus Torvalds { 24331da177e4SLinus Torvalds int ret; 24341da177e4SLinus Torvalds 2435f0626710STejun Heo ib_wq = alloc_workqueue("infiniband", 0, 0); 2436f0626710STejun Heo if (!ib_wq) 2437f0626710STejun Heo return -ENOMEM; 2438f0626710STejun Heo 243914d3a3b2SChristoph Hellwig ib_comp_wq = alloc_workqueue("ib-comp-wq", 2440b7363e67SSagi Grimberg WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 244114d3a3b2SChristoph Hellwig if (!ib_comp_wq) { 244214d3a3b2SChristoph Hellwig ret = -ENOMEM; 244314d3a3b2SChristoph Hellwig goto err; 244414d3a3b2SChristoph Hellwig } 244514d3a3b2SChristoph Hellwig 2446f794809aSJack Morgenstein ib_comp_unbound_wq = 2447f794809aSJack Morgenstein alloc_workqueue("ib-comp-unb-wq", 2448f794809aSJack Morgenstein WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | 2449f794809aSJack Morgenstein WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); 2450f794809aSJack Morgenstein if (!ib_comp_unbound_wq) { 2451f794809aSJack Morgenstein ret = -ENOMEM; 2452f794809aSJack Morgenstein goto err_comp; 2453f794809aSJack Morgenstein } 2454f794809aSJack Morgenstein 245555aeed06SJason Gunthorpe ret = class_register(&ib_class); 2456fd75c789SNir Muchtar if (ret) { 2457aba25a3eSParav Pandit pr_warn("Couldn't create InfiniBand device class\n"); 2458f794809aSJack Morgenstein goto err_comp_unbound; 2459fd75c789SNir Muchtar } 24601da177e4SLinus Torvalds 2461c9901724SLeon Romanovsky ret = rdma_nl_init(); 24621da177e4SLinus Torvalds if (ret) { 2463c9901724SLeon Romanovsky pr_warn("Couldn't init IB netlink interface: err %d\n", ret); 2464fd75c789SNir Muchtar goto err_sysfs; 24651da177e4SLinus Torvalds } 24661da177e4SLinus Torvalds 2467e3f20f02SLeon Romanovsky ret = addr_init(); 2468e3f20f02SLeon Romanovsky if (ret) { 2469e3f20f02SLeon Romanovsky pr_warn("Could't init IB address resolution\n"); 2470e3f20f02SLeon Romanovsky goto err_ibnl; 2471e3f20f02SLeon Romanovsky } 2472e3f20f02SLeon Romanovsky 24734c2cb422SMark Bloch ret = ib_mad_init(); 24744c2cb422SMark Bloch if (ret) { 24754c2cb422SMark Bloch pr_warn("Couldn't init IB MAD\n"); 24764c2cb422SMark Bloch goto err_addr; 24774c2cb422SMark Bloch } 24784c2cb422SMark Bloch 2479c2e49c92SMark Bloch ret = ib_sa_init(); 2480c2e49c92SMark Bloch if (ret) { 2481c2e49c92SMark Bloch pr_warn("Couldn't init SA\n"); 2482c2e49c92SMark Bloch goto err_mad; 2483c2e49c92SMark Bloch } 2484c2e49c92SMark Bloch 24858f408ab6SDaniel Jurgens ret = register_lsm_notifier(&ibdev_lsm_nb); 24868f408ab6SDaniel Jurgens if (ret) { 24878f408ab6SDaniel Jurgens pr_warn("Couldn't register LSM notifier. ret %d\n", ret); 2488c9901724SLeon Romanovsky goto err_sa; 24898f408ab6SDaniel Jurgens } 24908f408ab6SDaniel Jurgens 24914e0f7b90SParav Pandit ret = register_pernet_device(&rdma_dev_net_ops); 24924e0f7b90SParav Pandit if (ret) { 24934e0f7b90SParav Pandit pr_warn("Couldn't init compat dev. ret %d\n", ret); 24944e0f7b90SParav Pandit goto err_compat; 24954e0f7b90SParav Pandit } 24964e0f7b90SParav Pandit 24976c80b41aSLeon Romanovsky nldev_init(); 2498c9901724SLeon Romanovsky rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); 24995ef8c0c1SJason Gunthorpe roce_gid_mgmt_init(); 2500b2cbae2cSRoland Dreier 2501fd75c789SNir Muchtar return 0; 2502fd75c789SNir Muchtar 25034e0f7b90SParav Pandit err_compat: 25044e0f7b90SParav Pandit unregister_lsm_notifier(&ibdev_lsm_nb); 2505735c631aSMark Bloch err_sa: 2506735c631aSMark Bloch ib_sa_cleanup(); 2507c2e49c92SMark Bloch err_mad: 2508c2e49c92SMark Bloch ib_mad_cleanup(); 25094c2cb422SMark Bloch err_addr: 25104c2cb422SMark Bloch addr_cleanup(); 2511e3f20f02SLeon Romanovsky err_ibnl: 2512c9901724SLeon Romanovsky rdma_nl_exit(); 2513fd75c789SNir Muchtar err_sysfs: 251455aeed06SJason Gunthorpe class_unregister(&ib_class); 2515f794809aSJack Morgenstein err_comp_unbound: 2516f794809aSJack Morgenstein destroy_workqueue(ib_comp_unbound_wq); 251714d3a3b2SChristoph Hellwig err_comp: 251814d3a3b2SChristoph Hellwig destroy_workqueue(ib_comp_wq); 2519fd75c789SNir Muchtar err: 2520fd75c789SNir Muchtar destroy_workqueue(ib_wq); 25211da177e4SLinus Torvalds return ret; 25221da177e4SLinus Torvalds } 25231da177e4SLinus Torvalds 25241da177e4SLinus Torvalds static void __exit ib_core_cleanup(void) 25251da177e4SLinus Torvalds { 25265ef8c0c1SJason Gunthorpe roce_gid_mgmt_cleanup(); 25276c80b41aSLeon Romanovsky nldev_exit(); 2528c9901724SLeon Romanovsky rdma_nl_unregister(RDMA_NL_LS); 25294e0f7b90SParav Pandit unregister_pernet_device(&rdma_dev_net_ops); 2530c9901724SLeon Romanovsky unregister_lsm_notifier(&ibdev_lsm_nb); 2531c2e49c92SMark Bloch ib_sa_cleanup(); 25324c2cb422SMark Bloch ib_mad_cleanup(); 2533e3f20f02SLeon Romanovsky addr_cleanup(); 2534c9901724SLeon Romanovsky rdma_nl_exit(); 253555aeed06SJason Gunthorpe class_unregister(&ib_class); 2536f794809aSJack Morgenstein destroy_workqueue(ib_comp_unbound_wq); 253714d3a3b2SChristoph Hellwig destroy_workqueue(ib_comp_wq); 2538f7c6a7b5SRoland Dreier /* Make sure that any pending umem accounting work is done. */ 2539f0626710STejun Heo destroy_workqueue(ib_wq); 2540d0899892SJason Gunthorpe flush_workqueue(system_unbound_wq); 2541e59178d8SJason Gunthorpe WARN_ON(!xa_empty(&clients)); 25420df91bb6SJason Gunthorpe WARN_ON(!xa_empty(&devices)); 25431da177e4SLinus Torvalds } 25441da177e4SLinus Torvalds 2545e3bf14bdSJason Gunthorpe MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); 2546e3bf14bdSJason Gunthorpe 254762dfa795SParav Pandit /* ib core relies on netdev stack to first register net_ns_type_operations 254862dfa795SParav Pandit * ns kobject type before ib_core initialization. 254962dfa795SParav Pandit */ 255062dfa795SParav Pandit fs_initcall(ib_core_init); 25511da177e4SLinus Torvalds module_exit(ib_core_cleanup); 2552