11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (c) 2004 Topspin Communications. All rights reserved. 32a1d9b7fSRoland Dreier * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This software is available to you under a choice of one of two 61da177e4SLinus Torvalds * licenses. You may choose to be licensed under the terms of the GNU 71da177e4SLinus Torvalds * General Public License (GPL) Version 2, available from the file 81da177e4SLinus Torvalds * COPYING in the main directory of this source tree, or the 91da177e4SLinus Torvalds * OpenIB.org BSD license below: 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * Redistribution and use in source and binary forms, with or 121da177e4SLinus Torvalds * without modification, are permitted provided that the following 131da177e4SLinus Torvalds * conditions are met: 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * - Redistributions of source code must retain the above 161da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 171da177e4SLinus Torvalds * disclaimer. 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * - Redistributions in binary form must reproduce the above 201da177e4SLinus Torvalds * copyright notice, this list of conditions and the following 211da177e4SLinus Torvalds * disclaimer in the documentation and/or other materials 221da177e4SLinus Torvalds * provided with the distribution. 231da177e4SLinus Torvalds * 241da177e4SLinus Torvalds * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 251da177e4SLinus Torvalds * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 261da177e4SLinus Torvalds * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 271da177e4SLinus Torvalds * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 281da177e4SLinus Torvalds * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 291da177e4SLinus Torvalds * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 301da177e4SLinus Torvalds * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 311da177e4SLinus Torvalds * SOFTWARE. 321da177e4SLinus Torvalds */ 331da177e4SLinus Torvalds 341da177e4SLinus Torvalds #include <linux/module.h> 351da177e4SLinus Torvalds #include <linux/string.h> 361da177e4SLinus Torvalds #include <linux/errno.h> 379a6b090cSAhmed S. Darwish #include <linux/kernel.h> 381da177e4SLinus Torvalds #include <linux/slab.h> 391da177e4SLinus Torvalds #include <linux/init.h> 409268f72dSYotam Kenneth #include <linux/netdevice.h> 414e0f7b90SParav Pandit #include <net/net_namespace.h> 428f408ab6SDaniel Jurgens #include <linux/security.h> 438f408ab6SDaniel Jurgens #include <linux/notifier.h> 44324e227eSJason Gunthorpe #include <linux/hashtable.h> 45b2cbae2cSRoland Dreier #include <rdma/rdma_netlink.h> 4603db3a2dSMatan Barak #include <rdma/ib_addr.h> 4703db3a2dSMatan Barak #include <rdma/ib_cache.h> 48413d3347SMark Zhang #include <rdma/rdma_counter.h> 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds #include "core_priv.h" 5141eda65cSLeon Romanovsky #include "restrack.h" 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds MODULE_AUTHOR("Roland Dreier"); 541da177e4SLinus Torvalds MODULE_DESCRIPTION("core kernel InfiniBand API"); 551da177e4SLinus Torvalds MODULE_LICENSE("Dual BSD/GPL"); 561da177e4SLinus Torvalds 5714d3a3b2SChristoph Hellwig struct workqueue_struct *ib_comp_wq; 58f794809aSJack Morgenstein struct workqueue_struct *ib_comp_unbound_wq; 59f0626710STejun Heo struct workqueue_struct *ib_wq; 60f0626710STejun Heo EXPORT_SYMBOL_GPL(ib_wq); 61f0626710STejun Heo 620df91bb6SJason Gunthorpe /* 63921eab11SJason Gunthorpe * Each of the three rwsem locks (devices, clients, client_data) protects the 64921eab11SJason Gunthorpe * xarray of the same name. Specifically it allows the caller to assert that 65921eab11SJason Gunthorpe * the MARK will/will not be changing under the lock, and for devices and 66921eab11SJason Gunthorpe * clients, that the value in the xarray is still a valid pointer. Change of 67921eab11SJason Gunthorpe * the MARK is linked to the object state, so holding the lock and testing the 68921eab11SJason Gunthorpe * MARK also asserts that the contained object is in a certain state. 69921eab11SJason Gunthorpe * 70921eab11SJason Gunthorpe * This is used to build a two stage register/unregister flow where objects 71921eab11SJason Gunthorpe * can continue to be in the xarray even though they are still in progress to 72921eab11SJason Gunthorpe * register/unregister. 73921eab11SJason Gunthorpe * 74921eab11SJason Gunthorpe * The xarray itself provides additional locking, and restartable iteration, 75921eab11SJason Gunthorpe * which is also relied on. 76921eab11SJason Gunthorpe * 77921eab11SJason Gunthorpe * Locks should not be nested, with the exception of client_data, which is 78921eab11SJason Gunthorpe * allowed to nest under the read side of the other two locks. 79921eab11SJason Gunthorpe * 80921eab11SJason Gunthorpe * The devices_rwsem also protects the device name list, any change or 81921eab11SJason Gunthorpe * assignment of device name must also hold the write side to guarantee unique 82921eab11SJason Gunthorpe * names. 83921eab11SJason Gunthorpe */ 84921eab11SJason Gunthorpe 85921eab11SJason Gunthorpe /* 860df91bb6SJason Gunthorpe * devices contains devices that have had their names assigned. The 870df91bb6SJason Gunthorpe * devices may not be registered. Users that care about the registration 880df91bb6SJason Gunthorpe * status need to call ib_device_try_get() on the device to ensure it is 890df91bb6SJason Gunthorpe * registered, and keep it registered, for the required duration. 900df91bb6SJason Gunthorpe * 910df91bb6SJason Gunthorpe */ 920df91bb6SJason Gunthorpe static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); 93921eab11SJason Gunthorpe static DECLARE_RWSEM(devices_rwsem); 940df91bb6SJason Gunthorpe #define DEVICE_REGISTERED XA_MARK_1 950df91bb6SJason Gunthorpe 969cd58817SJason Gunthorpe static u32 highest_client_id; 97e59178d8SJason Gunthorpe #define CLIENT_REGISTERED XA_MARK_1 98e59178d8SJason Gunthorpe static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); 99921eab11SJason Gunthorpe static DECLARE_RWSEM(clients_rwsem); 1001da177e4SLinus Torvalds 101621e55ffSJason Gunthorpe static void ib_client_put(struct ib_client *client) 102621e55ffSJason Gunthorpe { 103621e55ffSJason Gunthorpe if (refcount_dec_and_test(&client->uses)) 104621e55ffSJason Gunthorpe complete(&client->uses_zero); 105621e55ffSJason Gunthorpe } 106621e55ffSJason Gunthorpe 1071da177e4SLinus Torvalds /* 1080df91bb6SJason Gunthorpe * If client_data is registered then the corresponding client must also still 1090df91bb6SJason Gunthorpe * be registered. 1100df91bb6SJason Gunthorpe */ 1110df91bb6SJason Gunthorpe #define CLIENT_DATA_REGISTERED XA_MARK_1 1124e0f7b90SParav Pandit 1131d2fedd8SParav Pandit unsigned int rdma_dev_net_id; 1144e0f7b90SParav Pandit 1154e0f7b90SParav Pandit /* 1164e0f7b90SParav Pandit * A list of net namespaces is maintained in an xarray. This is necessary 1174e0f7b90SParav Pandit * because we can't get the locking right using the existing net ns list. We 1184e0f7b90SParav Pandit * would require a init_net callback after the list is updated. 1194e0f7b90SParav Pandit */ 1204e0f7b90SParav Pandit static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); 1214e0f7b90SParav Pandit /* 1224e0f7b90SParav Pandit * rwsem to protect accessing the rdma_nets xarray entries. 1234e0f7b90SParav Pandit */ 1244e0f7b90SParav Pandit static DECLARE_RWSEM(rdma_nets_rwsem); 1254e0f7b90SParav Pandit 126cb7e0e13SParav Pandit bool ib_devices_shared_netns = true; 127a56bc45bSParav Pandit module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); 128a56bc45bSParav Pandit MODULE_PARM_DESC(netns_mode, 129a56bc45bSParav Pandit "Share device among net namespaces; default=1 (shared)"); 13041c61401SParav Pandit /** 131d6537c1aSrd.dunlab@gmail.com * rdma_dev_access_netns() - Return whether an rdma device can be accessed 13241c61401SParav Pandit * from a specified net namespace or not. 133d6537c1aSrd.dunlab@gmail.com * @dev: Pointer to rdma device which needs to be checked 13441c61401SParav Pandit * @net: Pointer to net namesapce for which access to be checked 13541c61401SParav Pandit * 136d6537c1aSrd.dunlab@gmail.com * When the rdma device is in shared mode, it ignores the net namespace. 137d6537c1aSrd.dunlab@gmail.com * When the rdma device is exclusive to a net namespace, rdma device net 138d6537c1aSrd.dunlab@gmail.com * namespace is checked against the specified one. 13941c61401SParav Pandit */ 14041c61401SParav Pandit bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) 14141c61401SParav Pandit { 14241c61401SParav Pandit return (ib_devices_shared_netns || 14341c61401SParav Pandit net_eq(read_pnet(&dev->coredev.rdma_net), net)); 14441c61401SParav Pandit } 14541c61401SParav Pandit EXPORT_SYMBOL(rdma_dev_access_netns); 14641c61401SParav Pandit 1470df91bb6SJason Gunthorpe /* 1480df91bb6SJason Gunthorpe * xarray has this behavior where it won't iterate over NULL values stored in 1490df91bb6SJason Gunthorpe * allocated arrays. So we need our own iterator to see all values stored in 1500df91bb6SJason Gunthorpe * the array. This does the same thing as xa_for_each except that it also 1510df91bb6SJason Gunthorpe * returns NULL valued entries if the array is allocating. Simplified to only 1520df91bb6SJason Gunthorpe * work on simple xarrays. 1530df91bb6SJason Gunthorpe */ 1540df91bb6SJason Gunthorpe static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, 1550df91bb6SJason Gunthorpe xa_mark_t filter) 1560df91bb6SJason Gunthorpe { 1570df91bb6SJason Gunthorpe XA_STATE(xas, xa, *indexp); 1580df91bb6SJason Gunthorpe void *entry; 1590df91bb6SJason Gunthorpe 1600df91bb6SJason Gunthorpe rcu_read_lock(); 1610df91bb6SJason Gunthorpe do { 1620df91bb6SJason Gunthorpe entry = xas_find_marked(&xas, ULONG_MAX, filter); 1630df91bb6SJason Gunthorpe if (xa_is_zero(entry)) 1640df91bb6SJason Gunthorpe break; 1650df91bb6SJason Gunthorpe } while (xas_retry(&xas, entry)); 1660df91bb6SJason Gunthorpe rcu_read_unlock(); 1670df91bb6SJason Gunthorpe 1680df91bb6SJason Gunthorpe if (entry) { 1690df91bb6SJason Gunthorpe *indexp = xas.xa_index; 1700df91bb6SJason Gunthorpe if (xa_is_zero(entry)) 1710df91bb6SJason Gunthorpe return NULL; 1720df91bb6SJason Gunthorpe return entry; 1730df91bb6SJason Gunthorpe } 1740df91bb6SJason Gunthorpe return XA_ERROR(-ENOENT); 1750df91bb6SJason Gunthorpe } 1760df91bb6SJason Gunthorpe #define xan_for_each_marked(xa, index, entry, filter) \ 1770df91bb6SJason Gunthorpe for (index = 0, entry = xan_find_marked(xa, &(index), filter); \ 1780df91bb6SJason Gunthorpe !xa_is_err(entry); \ 1790df91bb6SJason Gunthorpe (index)++, entry = xan_find_marked(xa, &(index), filter)) 1800df91bb6SJason Gunthorpe 181324e227eSJason Gunthorpe /* RCU hash table mapping netdevice pointers to struct ib_port_data */ 182324e227eSJason Gunthorpe static DEFINE_SPINLOCK(ndev_hash_lock); 183324e227eSJason Gunthorpe static DECLARE_HASHTABLE(ndev_hash, 5); 184324e227eSJason Gunthorpe 185c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev); 186d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work); 187d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *device); 1888f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event, 1898f408ab6SDaniel Jurgens void *lsm_data); 1908f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work); 1918f408ab6SDaniel Jurgens static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); 1928f408ab6SDaniel Jurgens 193923abb9dSGal Pressman static void __ibdev_printk(const char *level, const struct ib_device *ibdev, 194923abb9dSGal Pressman struct va_format *vaf) 195923abb9dSGal Pressman { 196923abb9dSGal Pressman if (ibdev && ibdev->dev.parent) 197923abb9dSGal Pressman dev_printk_emit(level[1] - '0', 198923abb9dSGal Pressman ibdev->dev.parent, 199923abb9dSGal Pressman "%s %s %s: %pV", 200923abb9dSGal Pressman dev_driver_string(ibdev->dev.parent), 201923abb9dSGal Pressman dev_name(ibdev->dev.parent), 202923abb9dSGal Pressman dev_name(&ibdev->dev), 203923abb9dSGal Pressman vaf); 204923abb9dSGal Pressman else if (ibdev) 205923abb9dSGal Pressman printk("%s%s: %pV", 206923abb9dSGal Pressman level, dev_name(&ibdev->dev), vaf); 207923abb9dSGal Pressman else 208923abb9dSGal Pressman printk("%s(NULL ib_device): %pV", level, vaf); 209923abb9dSGal Pressman } 210923abb9dSGal Pressman 211923abb9dSGal Pressman void ibdev_printk(const char *level, const struct ib_device *ibdev, 212923abb9dSGal Pressman const char *format, ...) 213923abb9dSGal Pressman { 214923abb9dSGal Pressman struct va_format vaf; 215923abb9dSGal Pressman va_list args; 216923abb9dSGal Pressman 217923abb9dSGal Pressman va_start(args, format); 218923abb9dSGal Pressman 219923abb9dSGal Pressman vaf.fmt = format; 220923abb9dSGal Pressman vaf.va = &args; 221923abb9dSGal Pressman 222923abb9dSGal Pressman __ibdev_printk(level, ibdev, &vaf); 223923abb9dSGal Pressman 224923abb9dSGal Pressman va_end(args); 225923abb9dSGal Pressman } 226923abb9dSGal Pressman EXPORT_SYMBOL(ibdev_printk); 227923abb9dSGal Pressman 228923abb9dSGal Pressman #define define_ibdev_printk_level(func, level) \ 229923abb9dSGal Pressman void func(const struct ib_device *ibdev, const char *fmt, ...) \ 230923abb9dSGal Pressman { \ 231923abb9dSGal Pressman struct va_format vaf; \ 232923abb9dSGal Pressman va_list args; \ 233923abb9dSGal Pressman \ 234923abb9dSGal Pressman va_start(args, fmt); \ 235923abb9dSGal Pressman \ 236923abb9dSGal Pressman vaf.fmt = fmt; \ 237923abb9dSGal Pressman vaf.va = &args; \ 238923abb9dSGal Pressman \ 239923abb9dSGal Pressman __ibdev_printk(level, ibdev, &vaf); \ 240923abb9dSGal Pressman \ 241923abb9dSGal Pressman va_end(args); \ 242923abb9dSGal Pressman } \ 243923abb9dSGal Pressman EXPORT_SYMBOL(func); 244923abb9dSGal Pressman 245923abb9dSGal Pressman define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); 246923abb9dSGal Pressman define_ibdev_printk_level(ibdev_alert, KERN_ALERT); 247923abb9dSGal Pressman define_ibdev_printk_level(ibdev_crit, KERN_CRIT); 248923abb9dSGal Pressman define_ibdev_printk_level(ibdev_err, KERN_ERR); 249923abb9dSGal Pressman define_ibdev_printk_level(ibdev_warn, KERN_WARNING); 250923abb9dSGal Pressman define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); 251923abb9dSGal Pressman define_ibdev_printk_level(ibdev_info, KERN_INFO); 252923abb9dSGal Pressman 2538f408ab6SDaniel Jurgens static struct notifier_block ibdev_lsm_nb = { 2548f408ab6SDaniel Jurgens .notifier_call = ib_security_change, 2558f408ab6SDaniel Jurgens }; 2561da177e4SLinus Torvalds 257decbc7a6SParav Pandit static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, 258decbc7a6SParav Pandit struct net *net); 259decbc7a6SParav Pandit 260324e227eSJason Gunthorpe /* Pointer to the RCU head at the start of the ib_port_data array */ 261324e227eSJason Gunthorpe struct ib_port_data_rcu { 262324e227eSJason Gunthorpe struct rcu_head rcu_head; 263324e227eSJason Gunthorpe struct ib_port_data pdata[]; 264324e227eSJason Gunthorpe }; 265324e227eSJason Gunthorpe 266deee3c7eSKamal Heib static void ib_device_check_mandatory(struct ib_device *device) 2671da177e4SLinus Torvalds { 2683023a1e9SKamal Heib #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } 2691da177e4SLinus Torvalds static const struct { 2701da177e4SLinus Torvalds size_t offset; 2711da177e4SLinus Torvalds char *name; 2721da177e4SLinus Torvalds } mandatory_table[] = { 2731da177e4SLinus Torvalds IB_MANDATORY_FUNC(query_device), 2741da177e4SLinus Torvalds IB_MANDATORY_FUNC(query_port), 2751da177e4SLinus Torvalds IB_MANDATORY_FUNC(alloc_pd), 2761da177e4SLinus Torvalds IB_MANDATORY_FUNC(dealloc_pd), 2771da177e4SLinus Torvalds IB_MANDATORY_FUNC(create_qp), 2781da177e4SLinus Torvalds IB_MANDATORY_FUNC(modify_qp), 2791da177e4SLinus Torvalds IB_MANDATORY_FUNC(destroy_qp), 2801da177e4SLinus Torvalds IB_MANDATORY_FUNC(post_send), 2811da177e4SLinus Torvalds IB_MANDATORY_FUNC(post_recv), 2821da177e4SLinus Torvalds IB_MANDATORY_FUNC(create_cq), 2831da177e4SLinus Torvalds IB_MANDATORY_FUNC(destroy_cq), 2841da177e4SLinus Torvalds IB_MANDATORY_FUNC(poll_cq), 2851da177e4SLinus Torvalds IB_MANDATORY_FUNC(req_notify_cq), 2861da177e4SLinus Torvalds IB_MANDATORY_FUNC(get_dma_mr), 28744ce37bcSJason Gunthorpe IB_MANDATORY_FUNC(reg_user_mr), 2887738613eSIra Weiny IB_MANDATORY_FUNC(dereg_mr), 2897738613eSIra Weiny IB_MANDATORY_FUNC(get_port_immutable) 2901da177e4SLinus Torvalds }; 2911da177e4SLinus Torvalds int i; 2921da177e4SLinus Torvalds 2936780c4faSGal Pressman device->kverbs_provider = true; 2949a6b090cSAhmed S. Darwish for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { 2953023a1e9SKamal Heib if (!*(void **) ((void *) &device->ops + 2963023a1e9SKamal Heib mandatory_table[i].offset)) { 2976780c4faSGal Pressman device->kverbs_provider = false; 2986780c4faSGal Pressman break; 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds } 3021da177e4SLinus Torvalds 303f8978bd9SLeon Romanovsky /* 30401b67117SParav Pandit * Caller must perform ib_device_put() to return the device reference count 30501b67117SParav Pandit * when ib_device_get_by_index() returns valid device pointer. 306f8978bd9SLeon Romanovsky */ 30737eeab55SParav Pandit struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) 308f8978bd9SLeon Romanovsky { 309f8978bd9SLeon Romanovsky struct ib_device *device; 310f8978bd9SLeon Romanovsky 311921eab11SJason Gunthorpe down_read(&devices_rwsem); 3120df91bb6SJason Gunthorpe device = xa_load(&devices, index); 31301b67117SParav Pandit if (device) { 31437eeab55SParav Pandit if (!rdma_dev_access_netns(device, net)) { 31537eeab55SParav Pandit device = NULL; 31637eeab55SParav Pandit goto out; 31737eeab55SParav Pandit } 31837eeab55SParav Pandit 319d79af724SJason Gunthorpe if (!ib_device_try_get(device)) 32001b67117SParav Pandit device = NULL; 32101b67117SParav Pandit } 32237eeab55SParav Pandit out: 323921eab11SJason Gunthorpe up_read(&devices_rwsem); 324f8978bd9SLeon Romanovsky return device; 325f8978bd9SLeon Romanovsky } 326f8978bd9SLeon Romanovsky 327d79af724SJason Gunthorpe /** 328d79af724SJason Gunthorpe * ib_device_put - Release IB device reference 329d79af724SJason Gunthorpe * @device: device whose reference to be released 330d79af724SJason Gunthorpe * 331d79af724SJason Gunthorpe * ib_device_put() releases reference to the IB device to allow it to be 332d79af724SJason Gunthorpe * unregistered and eventually free. 333d79af724SJason Gunthorpe */ 33401b67117SParav Pandit void ib_device_put(struct ib_device *device) 33501b67117SParav Pandit { 33601b67117SParav Pandit if (refcount_dec_and_test(&device->refcount)) 33701b67117SParav Pandit complete(&device->unreg_completion); 33801b67117SParav Pandit } 339d79af724SJason Gunthorpe EXPORT_SYMBOL(ib_device_put); 34001b67117SParav Pandit 3411da177e4SLinus Torvalds static struct ib_device *__ib_device_get_by_name(const char *name) 3421da177e4SLinus Torvalds { 3431da177e4SLinus Torvalds struct ib_device *device; 3440df91bb6SJason Gunthorpe unsigned long index; 3451da177e4SLinus Torvalds 3460df91bb6SJason Gunthorpe xa_for_each (&devices, index, device) 347896de009SJason Gunthorpe if (!strcmp(name, dev_name(&device->dev))) 3481da177e4SLinus Torvalds return device; 3491da177e4SLinus Torvalds 3501da177e4SLinus Torvalds return NULL; 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds 3536cc2c8e5SJason Gunthorpe /** 3546cc2c8e5SJason Gunthorpe * ib_device_get_by_name - Find an IB device by name 3556cc2c8e5SJason Gunthorpe * @name: The name to look for 3566cc2c8e5SJason Gunthorpe * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) 3576cc2c8e5SJason Gunthorpe * 3586cc2c8e5SJason Gunthorpe * Find and hold an ib_device by its name. The caller must call 3596cc2c8e5SJason Gunthorpe * ib_device_put() on the returned pointer. 3606cc2c8e5SJason Gunthorpe */ 3616cc2c8e5SJason Gunthorpe struct ib_device *ib_device_get_by_name(const char *name, 3626cc2c8e5SJason Gunthorpe enum rdma_driver_id driver_id) 3636cc2c8e5SJason Gunthorpe { 3646cc2c8e5SJason Gunthorpe struct ib_device *device; 3656cc2c8e5SJason Gunthorpe 3666cc2c8e5SJason Gunthorpe down_read(&devices_rwsem); 3676cc2c8e5SJason Gunthorpe device = __ib_device_get_by_name(name); 3686cc2c8e5SJason Gunthorpe if (device && driver_id != RDMA_DRIVER_UNKNOWN && 369b9560a41SJason Gunthorpe device->ops.driver_id != driver_id) 3706cc2c8e5SJason Gunthorpe device = NULL; 3716cc2c8e5SJason Gunthorpe 3726cc2c8e5SJason Gunthorpe if (device) { 3736cc2c8e5SJason Gunthorpe if (!ib_device_try_get(device)) 3746cc2c8e5SJason Gunthorpe device = NULL; 3756cc2c8e5SJason Gunthorpe } 3766cc2c8e5SJason Gunthorpe up_read(&devices_rwsem); 3776cc2c8e5SJason Gunthorpe return device; 3786cc2c8e5SJason Gunthorpe } 3796cc2c8e5SJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_name); 3806cc2c8e5SJason Gunthorpe 3814e0f7b90SParav Pandit static int rename_compat_devs(struct ib_device *device) 3824e0f7b90SParav Pandit { 3834e0f7b90SParav Pandit struct ib_core_device *cdev; 3844e0f7b90SParav Pandit unsigned long index; 3854e0f7b90SParav Pandit int ret = 0; 3864e0f7b90SParav Pandit 3874e0f7b90SParav Pandit mutex_lock(&device->compat_devs_mutex); 3884e0f7b90SParav Pandit xa_for_each (&device->compat_devs, index, cdev) { 3894e0f7b90SParav Pandit ret = device_rename(&cdev->dev, dev_name(&device->dev)); 3904e0f7b90SParav Pandit if (ret) { 3914e0f7b90SParav Pandit dev_warn(&cdev->dev, 3924e0f7b90SParav Pandit "Fail to rename compatdev to new name %s\n", 3934e0f7b90SParav Pandit dev_name(&device->dev)); 3944e0f7b90SParav Pandit break; 3954e0f7b90SParav Pandit } 3964e0f7b90SParav Pandit } 3974e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 3984e0f7b90SParav Pandit return ret; 3994e0f7b90SParav Pandit } 4004e0f7b90SParav Pandit 401d21943ddSLeon Romanovsky int ib_device_rename(struct ib_device *ibdev, const char *name) 402d21943ddSLeon Romanovsky { 403dc1435c0SLeon Romanovsky unsigned long index; 404dc1435c0SLeon Romanovsky void *client_data; 405e3593b56SJason Gunthorpe int ret; 406d21943ddSLeon Romanovsky 407921eab11SJason Gunthorpe down_write(&devices_rwsem); 408e3593b56SJason Gunthorpe if (!strcmp(name, dev_name(&ibdev->dev))) { 409dc1435c0SLeon Romanovsky up_write(&devices_rwsem); 410dc1435c0SLeon Romanovsky return 0; 411e3593b56SJason Gunthorpe } 412e3593b56SJason Gunthorpe 413344684e6SJason Gunthorpe if (__ib_device_get_by_name(name)) { 414dc1435c0SLeon Romanovsky up_write(&devices_rwsem); 415dc1435c0SLeon Romanovsky return -EEXIST; 416d21943ddSLeon Romanovsky } 417d21943ddSLeon Romanovsky 418d21943ddSLeon Romanovsky ret = device_rename(&ibdev->dev, name); 419dc1435c0SLeon Romanovsky if (ret) { 420921eab11SJason Gunthorpe up_write(&devices_rwsem); 421d21943ddSLeon Romanovsky return ret; 422d21943ddSLeon Romanovsky } 423d21943ddSLeon Romanovsky 424dc1435c0SLeon Romanovsky strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); 425dc1435c0SLeon Romanovsky ret = rename_compat_devs(ibdev); 426dc1435c0SLeon Romanovsky 427dc1435c0SLeon Romanovsky downgrade_write(&devices_rwsem); 428dc1435c0SLeon Romanovsky down_read(&ibdev->client_data_rwsem); 429dc1435c0SLeon Romanovsky xan_for_each_marked(&ibdev->client_data, index, client_data, 430dc1435c0SLeon Romanovsky CLIENT_DATA_REGISTERED) { 431dc1435c0SLeon Romanovsky struct ib_client *client = xa_load(&clients, index); 432dc1435c0SLeon Romanovsky 433dc1435c0SLeon Romanovsky if (!client || !client->rename) 434dc1435c0SLeon Romanovsky continue; 435dc1435c0SLeon Romanovsky 436dc1435c0SLeon Romanovsky client->rename(ibdev, client_data); 437dc1435c0SLeon Romanovsky } 438dc1435c0SLeon Romanovsky up_read(&ibdev->client_data_rwsem); 439dc1435c0SLeon Romanovsky up_read(&devices_rwsem); 440dc1435c0SLeon Romanovsky return 0; 441dc1435c0SLeon Romanovsky } 442dc1435c0SLeon Romanovsky 443f8fc8cd9SYamin Friedman int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim) 444f8fc8cd9SYamin Friedman { 445f8fc8cd9SYamin Friedman if (use_dim > 1) 446f8fc8cd9SYamin Friedman return -EINVAL; 447f8fc8cd9SYamin Friedman ibdev->use_cq_dim = use_dim; 448f8fc8cd9SYamin Friedman 449f8fc8cd9SYamin Friedman return 0; 450f8fc8cd9SYamin Friedman } 451f8fc8cd9SYamin Friedman 452e349f858SJason Gunthorpe static int alloc_name(struct ib_device *ibdev, const char *name) 4531da177e4SLinus Torvalds { 4541da177e4SLinus Torvalds struct ib_device *device; 4550df91bb6SJason Gunthorpe unsigned long index; 4563b88afd3SJason Gunthorpe struct ida inuse; 4573b88afd3SJason Gunthorpe int rc; 4581da177e4SLinus Torvalds int i; 4591da177e4SLinus Torvalds 4609ffbe8acSNikolay Borisov lockdep_assert_held_write(&devices_rwsem); 4613b88afd3SJason Gunthorpe ida_init(&inuse); 4620df91bb6SJason Gunthorpe xa_for_each (&devices, index, device) { 463e349f858SJason Gunthorpe char buf[IB_DEVICE_NAME_MAX]; 464e349f858SJason Gunthorpe 465896de009SJason Gunthorpe if (sscanf(dev_name(&device->dev), name, &i) != 1) 4661da177e4SLinus Torvalds continue; 4673b88afd3SJason Gunthorpe if (i < 0 || i >= INT_MAX) 4681da177e4SLinus Torvalds continue; 4691da177e4SLinus Torvalds snprintf(buf, sizeof buf, name, i); 4703b88afd3SJason Gunthorpe if (strcmp(buf, dev_name(&device->dev)) != 0) 4713b88afd3SJason Gunthorpe continue; 4723b88afd3SJason Gunthorpe 4733b88afd3SJason Gunthorpe rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL); 4743b88afd3SJason Gunthorpe if (rc < 0) 4753b88afd3SJason Gunthorpe goto out; 4761da177e4SLinus Torvalds } 4771da177e4SLinus Torvalds 4783b88afd3SJason Gunthorpe rc = ida_alloc(&inuse, GFP_KERNEL); 4793b88afd3SJason Gunthorpe if (rc < 0) 4803b88afd3SJason Gunthorpe goto out; 4811da177e4SLinus Torvalds 4823b88afd3SJason Gunthorpe rc = dev_set_name(&ibdev->dev, name, rc); 4833b88afd3SJason Gunthorpe out: 4843b88afd3SJason Gunthorpe ida_destroy(&inuse); 4853b88afd3SJason Gunthorpe return rc; 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds 48855aeed06SJason Gunthorpe static void ib_device_release(struct device *device) 48955aeed06SJason Gunthorpe { 49055aeed06SJason Gunthorpe struct ib_device *dev = container_of(device, struct ib_device, dev); 49155aeed06SJason Gunthorpe 492c2261dd7SJason Gunthorpe free_netdevs(dev); 493652432f3SJason Gunthorpe WARN_ON(refcount_read(&dev->refcount)); 494b7066b32SJason Gunthorpe if (dev->hw_stats_data) 495b7066b32SJason Gunthorpe ib_device_release_hw_stats(dev->hw_stats_data); 49646bdf370SKamal Heib if (dev->port_data) { 49703db3a2dSMatan Barak ib_cache_release_one(dev); 498b34b269aSJason Gunthorpe ib_security_release_port_pkey_list(dev); 499413d3347SMark Zhang rdma_counter_release(dev); 500324e227eSJason Gunthorpe kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, 501324e227eSJason Gunthorpe pdata[0]), 502324e227eSJason Gunthorpe rcu_head); 50346bdf370SKamal Heib } 504413d3347SMark Zhang 50556594ae1SParav Pandit mutex_destroy(&dev->unregistration_lock); 50656594ae1SParav Pandit mutex_destroy(&dev->compat_devs_mutex); 50756594ae1SParav Pandit 50846bdf370SKamal Heib xa_destroy(&dev->compat_devs); 50946bdf370SKamal Heib xa_destroy(&dev->client_data); 510324e227eSJason Gunthorpe kfree_rcu(dev, rcu_head); 51155aeed06SJason Gunthorpe } 51255aeed06SJason Gunthorpe 51355aeed06SJason Gunthorpe static int ib_device_uevent(struct device *device, 51455aeed06SJason Gunthorpe struct kobj_uevent_env *env) 51555aeed06SJason Gunthorpe { 516896de009SJason Gunthorpe if (add_uevent_var(env, "NAME=%s", dev_name(device))) 51755aeed06SJason Gunthorpe return -ENOMEM; 51855aeed06SJason Gunthorpe 51955aeed06SJason Gunthorpe /* 52055aeed06SJason Gunthorpe * It would be nice to pass the node GUID with the event... 52155aeed06SJason Gunthorpe */ 52255aeed06SJason Gunthorpe 52355aeed06SJason Gunthorpe return 0; 52455aeed06SJason Gunthorpe } 52555aeed06SJason Gunthorpe 52662dfa795SParav Pandit static const void *net_namespace(struct device *d) 52762dfa795SParav Pandit { 5284e0f7b90SParav Pandit struct ib_core_device *coredev = 5294e0f7b90SParav Pandit container_of(d, struct ib_core_device, dev); 5304e0f7b90SParav Pandit 5314e0f7b90SParav Pandit return read_pnet(&coredev->rdma_net); 53262dfa795SParav Pandit } 53362dfa795SParav Pandit 53455aeed06SJason Gunthorpe static struct class ib_class = { 53555aeed06SJason Gunthorpe .name = "infiniband", 53655aeed06SJason Gunthorpe .dev_release = ib_device_release, 53755aeed06SJason Gunthorpe .dev_uevent = ib_device_uevent, 53862dfa795SParav Pandit .ns_type = &net_ns_type_operations, 53962dfa795SParav Pandit .namespace = net_namespace, 54055aeed06SJason Gunthorpe }; 54155aeed06SJason Gunthorpe 542cebe556bSParav Pandit static void rdma_init_coredev(struct ib_core_device *coredev, 5434e0f7b90SParav Pandit struct ib_device *dev, struct net *net) 544cebe556bSParav Pandit { 545cebe556bSParav Pandit /* This BUILD_BUG_ON is intended to catch layout change 546cebe556bSParav Pandit * of union of ib_core_device and device. 547cebe556bSParav Pandit * dev must be the first element as ib_core and providers 548cebe556bSParav Pandit * driver uses it. Adding anything in ib_core_device before 549cebe556bSParav Pandit * device will break this assumption. 550cebe556bSParav Pandit */ 551cebe556bSParav Pandit BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != 552cebe556bSParav Pandit offsetof(struct ib_device, dev)); 553cebe556bSParav Pandit 554cebe556bSParav Pandit coredev->dev.class = &ib_class; 555cebe556bSParav Pandit coredev->dev.groups = dev->groups; 556cebe556bSParav Pandit device_initialize(&coredev->dev); 557cebe556bSParav Pandit coredev->owner = dev; 558cebe556bSParav Pandit INIT_LIST_HEAD(&coredev->port_list); 5594e0f7b90SParav Pandit write_pnet(&coredev->rdma_net, net); 560cebe556bSParav Pandit } 561cebe556bSParav Pandit 5621da177e4SLinus Torvalds /** 563459cc69fSLeon Romanovsky * _ib_alloc_device - allocate an IB device struct 5641da177e4SLinus Torvalds * @size:size of structure to allocate 5651da177e4SLinus Torvalds * 5661da177e4SLinus Torvalds * Low-level drivers should use ib_alloc_device() to allocate &struct 5671da177e4SLinus Torvalds * ib_device. @size is the size of the structure to be allocated, 5681da177e4SLinus Torvalds * including any private data used by the low-level driver. 5691da177e4SLinus Torvalds * ib_dealloc_device() must be used to free structures allocated with 5701da177e4SLinus Torvalds * ib_alloc_device(). 5711da177e4SLinus Torvalds */ 572459cc69fSLeon Romanovsky struct ib_device *_ib_alloc_device(size_t size) 5731da177e4SLinus Torvalds { 57455aeed06SJason Gunthorpe struct ib_device *device; 575286e1d3fSJack Morgenstein unsigned int i; 5761da177e4SLinus Torvalds 57755aeed06SJason Gunthorpe if (WARN_ON(size < sizeof(struct ib_device))) 57855aeed06SJason Gunthorpe return NULL; 57955aeed06SJason Gunthorpe 58055aeed06SJason Gunthorpe device = kzalloc(size, GFP_KERNEL); 58155aeed06SJason Gunthorpe if (!device) 58255aeed06SJason Gunthorpe return NULL; 58355aeed06SJason Gunthorpe 58441eda65cSLeon Romanovsky if (rdma_restrack_init(device)) { 58541eda65cSLeon Romanovsky kfree(device); 58641eda65cSLeon Romanovsky return NULL; 58741eda65cSLeon Romanovsky } 58802d8883fSLeon Romanovsky 5895f8f5499SParav Pandit device->groups[0] = &ib_dev_attr_group; 5904e0f7b90SParav Pandit rdma_init_coredev(&device->coredev, device, &init_net); 59155aeed06SJason Gunthorpe 59255aeed06SJason Gunthorpe INIT_LIST_HEAD(&device->event_handler_list); 59340adf686SParav Pandit spin_lock_init(&device->qp_open_list_lock); 5946b57cea9SParav Pandit init_rwsem(&device->event_handler_rwsem); 595d0899892SJason Gunthorpe mutex_init(&device->unregistration_lock); 5960df91bb6SJason Gunthorpe /* 5970df91bb6SJason Gunthorpe * client_data needs to be alloc because we don't want our mark to be 5980df91bb6SJason Gunthorpe * destroyed if the user stores NULL in the client data. 5990df91bb6SJason Gunthorpe */ 6000df91bb6SJason Gunthorpe xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); 601921eab11SJason Gunthorpe init_rwsem(&device->client_data_rwsem); 6024e0f7b90SParav Pandit xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); 6034e0f7b90SParav Pandit mutex_init(&device->compat_devs_mutex); 60401b67117SParav Pandit init_completion(&device->unreg_completion); 605d0899892SJason Gunthorpe INIT_WORK(&device->unregistration_work, ib_unregister_work); 60655aeed06SJason Gunthorpe 607286e1d3fSJack Morgenstein spin_lock_init(&device->cq_pools_lock); 608286e1d3fSJack Morgenstein for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++) 609286e1d3fSJack Morgenstein INIT_LIST_HEAD(&device->cq_pools[i]); 610286e1d3fSJack Morgenstein 611c074bb1eSJason Gunthorpe device->uverbs_cmd_mask = 61244ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | 613c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | 61444ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | 61544ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | 616676a80adSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | 617c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 618c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | 619c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | 62044ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | 621652caba5SJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | 62244ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | 623c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | 624c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | 625676a80adSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | 626c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | 627c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | 62844ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | 62944ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | 630c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | 631c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | 63244ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | 63344ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | 63444ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | 635c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | 636c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | 637c074bb1eSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | 63844ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | 63944ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | 64044ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | 64144ce37bcSJason Gunthorpe BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); 64255aeed06SJason Gunthorpe return device; 6431da177e4SLinus Torvalds } 644459cc69fSLeon Romanovsky EXPORT_SYMBOL(_ib_alloc_device); 6451da177e4SLinus Torvalds 6461da177e4SLinus Torvalds /** 6471da177e4SLinus Torvalds * ib_dealloc_device - free an IB device struct 6481da177e4SLinus Torvalds * @device:structure to free 6491da177e4SLinus Torvalds * 6501da177e4SLinus Torvalds * Free a structure allocated with ib_alloc_device(). 6511da177e4SLinus Torvalds */ 6521da177e4SLinus Torvalds void ib_dealloc_device(struct ib_device *device) 6531da177e4SLinus Torvalds { 654d0899892SJason Gunthorpe if (device->ops.dealloc_driver) 655d0899892SJason Gunthorpe device->ops.dealloc_driver(device); 656d0899892SJason Gunthorpe 657d0899892SJason Gunthorpe /* 658d0899892SJason Gunthorpe * ib_unregister_driver() requires all devices to remain in the xarray 659d0899892SJason Gunthorpe * while their ops are callable. The last op we call is dealloc_driver 660d0899892SJason Gunthorpe * above. This is needed to create a fence on op callbacks prior to 661d0899892SJason Gunthorpe * allowing the driver module to unload. 662d0899892SJason Gunthorpe */ 663d0899892SJason Gunthorpe down_write(&devices_rwsem); 664d0899892SJason Gunthorpe if (xa_load(&devices, device->index) == device) 665d0899892SJason Gunthorpe xa_erase(&devices, device->index); 666d0899892SJason Gunthorpe up_write(&devices_rwsem); 667d0899892SJason Gunthorpe 668c2261dd7SJason Gunthorpe /* Expedite releasing netdev references */ 669c2261dd7SJason Gunthorpe free_netdevs(device); 670c2261dd7SJason Gunthorpe 6714e0f7b90SParav Pandit WARN_ON(!xa_empty(&device->compat_devs)); 6720df91bb6SJason Gunthorpe WARN_ON(!xa_empty(&device->client_data)); 673652432f3SJason Gunthorpe WARN_ON(refcount_read(&device->refcount)); 6740ad699c0SLeon Romanovsky rdma_restrack_clean(device); 675e155755eSParav Pandit /* Balances with device_initialize */ 676924b8900SLeon Romanovsky put_device(&device->dev); 6771da177e4SLinus Torvalds } 6781da177e4SLinus Torvalds EXPORT_SYMBOL(ib_dealloc_device); 6791da177e4SLinus Torvalds 680921eab11SJason Gunthorpe /* 681921eab11SJason Gunthorpe * add_client_context() and remove_client_context() must be safe against 682921eab11SJason Gunthorpe * parallel calls on the same device - registration/unregistration of both the 683921eab11SJason Gunthorpe * device and client can be occurring in parallel. 684921eab11SJason Gunthorpe * 685921eab11SJason Gunthorpe * The routines need to be a fence, any caller must not return until the add 686921eab11SJason Gunthorpe * or remove is fully completed. 687921eab11SJason Gunthorpe */ 688921eab11SJason Gunthorpe static int add_client_context(struct ib_device *device, 689921eab11SJason Gunthorpe struct ib_client *client) 6901da177e4SLinus Torvalds { 691921eab11SJason Gunthorpe int ret = 0; 6921da177e4SLinus Torvalds 6936780c4faSGal Pressman if (!device->kverbs_provider && !client->no_kverbs_req) 694921eab11SJason Gunthorpe return 0; 6956780c4faSGal Pressman 696921eab11SJason Gunthorpe down_write(&device->client_data_rwsem); 697921eab11SJason Gunthorpe /* 698621e55ffSJason Gunthorpe * So long as the client is registered hold both the client and device 699621e55ffSJason Gunthorpe * unregistration locks. 700621e55ffSJason Gunthorpe */ 701621e55ffSJason Gunthorpe if (!refcount_inc_not_zero(&client->uses)) 702621e55ffSJason Gunthorpe goto out_unlock; 703621e55ffSJason Gunthorpe refcount_inc(&device->refcount); 704621e55ffSJason Gunthorpe 705621e55ffSJason Gunthorpe /* 706921eab11SJason Gunthorpe * Another caller to add_client_context got here first and has already 707921eab11SJason Gunthorpe * completely initialized context. 708921eab11SJason Gunthorpe */ 709921eab11SJason Gunthorpe if (xa_get_mark(&device->client_data, client->client_id, 710921eab11SJason Gunthorpe CLIENT_DATA_REGISTERED)) 711921eab11SJason Gunthorpe goto out; 712921eab11SJason Gunthorpe 713921eab11SJason Gunthorpe ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, 714921eab11SJason Gunthorpe GFP_KERNEL)); 715921eab11SJason Gunthorpe if (ret) 716921eab11SJason Gunthorpe goto out; 717921eab11SJason Gunthorpe downgrade_write(&device->client_data_rwsem); 71811a0ae4cSJason Gunthorpe if (client->add) { 71911a0ae4cSJason Gunthorpe if (client->add(device)) { 72011a0ae4cSJason Gunthorpe /* 72111a0ae4cSJason Gunthorpe * If a client fails to add then the error code is 72211a0ae4cSJason Gunthorpe * ignored, but we won't call any more ops on this 72311a0ae4cSJason Gunthorpe * client. 72411a0ae4cSJason Gunthorpe */ 72511a0ae4cSJason Gunthorpe xa_erase(&device->client_data, client->client_id); 72611a0ae4cSJason Gunthorpe up_read(&device->client_data_rwsem); 72711a0ae4cSJason Gunthorpe ib_device_put(device); 72811a0ae4cSJason Gunthorpe ib_client_put(client); 72911a0ae4cSJason Gunthorpe return 0; 73011a0ae4cSJason Gunthorpe } 73111a0ae4cSJason Gunthorpe } 732921eab11SJason Gunthorpe 733921eab11SJason Gunthorpe /* Readers shall not see a client until add has been completed */ 7340df91bb6SJason Gunthorpe xa_set_mark(&device->client_data, client->client_id, 7350df91bb6SJason Gunthorpe CLIENT_DATA_REGISTERED); 736921eab11SJason Gunthorpe up_read(&device->client_data_rwsem); 737921eab11SJason Gunthorpe return 0; 7381da177e4SLinus Torvalds 739921eab11SJason Gunthorpe out: 740621e55ffSJason Gunthorpe ib_device_put(device); 741621e55ffSJason Gunthorpe ib_client_put(client); 742621e55ffSJason Gunthorpe out_unlock: 743921eab11SJason Gunthorpe up_write(&device->client_data_rwsem); 744921eab11SJason Gunthorpe return ret; 745921eab11SJason Gunthorpe } 746921eab11SJason Gunthorpe 747921eab11SJason Gunthorpe static void remove_client_context(struct ib_device *device, 748921eab11SJason Gunthorpe unsigned int client_id) 749921eab11SJason Gunthorpe { 750921eab11SJason Gunthorpe struct ib_client *client; 751921eab11SJason Gunthorpe void *client_data; 752921eab11SJason Gunthorpe 753921eab11SJason Gunthorpe down_write(&device->client_data_rwsem); 754921eab11SJason Gunthorpe if (!xa_get_mark(&device->client_data, client_id, 755921eab11SJason Gunthorpe CLIENT_DATA_REGISTERED)) { 756921eab11SJason Gunthorpe up_write(&device->client_data_rwsem); 757921eab11SJason Gunthorpe return; 758921eab11SJason Gunthorpe } 759921eab11SJason Gunthorpe client_data = xa_load(&device->client_data, client_id); 760921eab11SJason Gunthorpe xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); 761921eab11SJason Gunthorpe client = xa_load(&clients, client_id); 762621e55ffSJason Gunthorpe up_write(&device->client_data_rwsem); 763921eab11SJason Gunthorpe 764921eab11SJason Gunthorpe /* 765921eab11SJason Gunthorpe * Notice we cannot be holding any exclusive locks when calling the 766921eab11SJason Gunthorpe * remove callback as the remove callback can recurse back into any 767921eab11SJason Gunthorpe * public functions in this module and thus try for any locks those 768921eab11SJason Gunthorpe * functions take. 769921eab11SJason Gunthorpe * 770921eab11SJason Gunthorpe * For this reason clients and drivers should not call the 771921eab11SJason Gunthorpe * unregistration functions will holdling any locks. 772921eab11SJason Gunthorpe */ 773921eab11SJason Gunthorpe if (client->remove) 774921eab11SJason Gunthorpe client->remove(device, client_data); 775921eab11SJason Gunthorpe 776921eab11SJason Gunthorpe xa_erase(&device->client_data, client_id); 777621e55ffSJason Gunthorpe ib_device_put(device); 778621e55ffSJason Gunthorpe ib_client_put(client); 7791da177e4SLinus Torvalds } 7801da177e4SLinus Torvalds 781c2261dd7SJason Gunthorpe static int alloc_port_data(struct ib_device *device) 7825eb620c8SYosef Etigin { 783324e227eSJason Gunthorpe struct ib_port_data_rcu *pdata_rcu; 7841fb7f897SMark Bloch u32 port; 785c2261dd7SJason Gunthorpe 786c2261dd7SJason Gunthorpe if (device->port_data) 787c2261dd7SJason Gunthorpe return 0; 788c2261dd7SJason Gunthorpe 789c2261dd7SJason Gunthorpe /* This can only be called once the physical port range is defined */ 790c2261dd7SJason Gunthorpe if (WARN_ON(!device->phys_port_cnt)) 791c2261dd7SJason Gunthorpe return -EINVAL; 7925eb620c8SYosef Etigin 7931fb7f897SMark Bloch /* Reserve U32_MAX so the logic to go over all the ports is sane */ 7941fb7f897SMark Bloch if (WARN_ON(device->phys_port_cnt == U32_MAX)) 7951fb7f897SMark Bloch return -EINVAL; 7961fb7f897SMark Bloch 7978ceb1357SJason Gunthorpe /* 7988ceb1357SJason Gunthorpe * device->port_data is indexed directly by the port number to make 7997738613eSIra Weiny * access to this data as efficient as possible. 8007738613eSIra Weiny * 8018ceb1357SJason Gunthorpe * Therefore port_data is declared as a 1 based array with potential 8028ceb1357SJason Gunthorpe * empty slots at the beginning. 8037738613eSIra Weiny */ 804324e227eSJason Gunthorpe pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata, 805324e227eSJason Gunthorpe rdma_end_port(device) + 1), 806324e227eSJason Gunthorpe GFP_KERNEL); 807324e227eSJason Gunthorpe if (!pdata_rcu) 80855aeed06SJason Gunthorpe return -ENOMEM; 809324e227eSJason Gunthorpe /* 810324e227eSJason Gunthorpe * The rcu_head is put in front of the port data array and the stored 811324e227eSJason Gunthorpe * pointer is adjusted since we never need to see that member until 812324e227eSJason Gunthorpe * kfree_rcu. 813324e227eSJason Gunthorpe */ 814324e227eSJason Gunthorpe device->port_data = pdata_rcu->pdata; 8155eb620c8SYosef Etigin 816ea1075edSJason Gunthorpe rdma_for_each_port (device, port) { 8178ceb1357SJason Gunthorpe struct ib_port_data *pdata = &device->port_data[port]; 8188ceb1357SJason Gunthorpe 819324e227eSJason Gunthorpe pdata->ib_dev = device; 8208ceb1357SJason Gunthorpe spin_lock_init(&pdata->pkey_list_lock); 8218ceb1357SJason Gunthorpe INIT_LIST_HEAD(&pdata->pkey_list); 822c2261dd7SJason Gunthorpe spin_lock_init(&pdata->netdev_lock); 823324e227eSJason Gunthorpe INIT_HLIST_NODE(&pdata->ndev_hash_link); 824c2261dd7SJason Gunthorpe } 825c2261dd7SJason Gunthorpe return 0; 826c2261dd7SJason Gunthorpe } 827c2261dd7SJason Gunthorpe 8281fb7f897SMark Bloch static int verify_immutable(const struct ib_device *dev, u32 port) 829c2261dd7SJason Gunthorpe { 830c2261dd7SJason Gunthorpe return WARN_ON(!rdma_cap_ib_mad(dev, port) && 831c2261dd7SJason Gunthorpe rdma_max_mad_size(dev, port) != 0); 832c2261dd7SJason Gunthorpe } 833c2261dd7SJason Gunthorpe 834c2261dd7SJason Gunthorpe static int setup_port_data(struct ib_device *device) 835c2261dd7SJason Gunthorpe { 8361fb7f897SMark Bloch u32 port; 837c2261dd7SJason Gunthorpe int ret; 838c2261dd7SJason Gunthorpe 839c2261dd7SJason Gunthorpe ret = alloc_port_data(device); 840c2261dd7SJason Gunthorpe if (ret) 841c2261dd7SJason Gunthorpe return ret; 842c2261dd7SJason Gunthorpe 843c2261dd7SJason Gunthorpe rdma_for_each_port (device, port) { 844c2261dd7SJason Gunthorpe struct ib_port_data *pdata = &device->port_data[port]; 8458ceb1357SJason Gunthorpe 8468ceb1357SJason Gunthorpe ret = device->ops.get_port_immutable(device, port, 8478ceb1357SJason Gunthorpe &pdata->immutable); 8485eb620c8SYosef Etigin if (ret) 8495eb620c8SYosef Etigin return ret; 85055aeed06SJason Gunthorpe 85155aeed06SJason Gunthorpe if (verify_immutable(device, port)) 85255aeed06SJason Gunthorpe return -EINVAL; 85355aeed06SJason Gunthorpe } 85455aeed06SJason Gunthorpe return 0; 8555eb620c8SYosef Etigin } 8565eb620c8SYosef Etigin 8577416790eSParav Pandit /** 8587416790eSParav Pandit * ib_port_immutable_read() - Read rdma port's immutable data 859168e4cd9SLeon Romanovsky * @dev: IB device 860168e4cd9SLeon Romanovsky * @port: port number whose immutable data to read. It starts with index 1 and 8617416790eSParav Pandit * valid upto including rdma_end_port(). 8627416790eSParav Pandit */ 8637416790eSParav Pandit const struct ib_port_immutable* 8647416790eSParav Pandit ib_port_immutable_read(struct ib_device *dev, unsigned int port) 8657416790eSParav Pandit { 8667416790eSParav Pandit WARN_ON(!rdma_is_port_valid(dev, port)); 8677416790eSParav Pandit return &dev->port_data[port].immutable; 8687416790eSParav Pandit } 8697416790eSParav Pandit EXPORT_SYMBOL(ib_port_immutable_read); 8707416790eSParav Pandit 8719abb0d1bSLeon Romanovsky void ib_get_device_fw_str(struct ib_device *dev, char *str) 8725fa76c20SIra Weiny { 8733023a1e9SKamal Heib if (dev->ops.get_dev_fw_str) 8743023a1e9SKamal Heib dev->ops.get_dev_fw_str(dev, str); 8755fa76c20SIra Weiny else 8765fa76c20SIra Weiny str[0] = '\0'; 8775fa76c20SIra Weiny } 8785fa76c20SIra Weiny EXPORT_SYMBOL(ib_get_device_fw_str); 8795fa76c20SIra Weiny 8808f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work) 8818f408ab6SDaniel Jurgens { 8828f408ab6SDaniel Jurgens struct ib_device *dev; 8830df91bb6SJason Gunthorpe unsigned long index; 8848f408ab6SDaniel Jurgens 885921eab11SJason Gunthorpe down_read(&devices_rwsem); 8860df91bb6SJason Gunthorpe xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 887ea1075edSJason Gunthorpe unsigned int i; 8888f408ab6SDaniel Jurgens 889ea1075edSJason Gunthorpe rdma_for_each_port (dev, i) { 8908f408ab6SDaniel Jurgens u64 sp; 8918f408ab6SDaniel Jurgens int ret = ib_get_cached_subnet_prefix(dev, 8928f408ab6SDaniel Jurgens i, 8938f408ab6SDaniel Jurgens &sp); 8948f408ab6SDaniel Jurgens 8958f408ab6SDaniel Jurgens WARN_ONCE(ret, 8968f408ab6SDaniel Jurgens "ib_get_cached_subnet_prefix err: %d, this should never happen here\n", 8978f408ab6SDaniel Jurgens ret); 898a750cfdeSDaniel Jurgens if (!ret) 8998f408ab6SDaniel Jurgens ib_security_cache_change(dev, i, sp); 9008f408ab6SDaniel Jurgens } 9018f408ab6SDaniel Jurgens } 902921eab11SJason Gunthorpe up_read(&devices_rwsem); 9038f408ab6SDaniel Jurgens } 9048f408ab6SDaniel Jurgens 9058f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event, 9068f408ab6SDaniel Jurgens void *lsm_data) 9078f408ab6SDaniel Jurgens { 9088f408ab6SDaniel Jurgens if (event != LSM_POLICY_CHANGE) 9098f408ab6SDaniel Jurgens return NOTIFY_DONE; 9108f408ab6SDaniel Jurgens 9118f408ab6SDaniel Jurgens schedule_work(&ib_policy_change_work); 912c66f6741SDaniel Jurgens ib_mad_agent_security_change(); 9138f408ab6SDaniel Jurgens 9148f408ab6SDaniel Jurgens return NOTIFY_OK; 9158f408ab6SDaniel Jurgens } 9168f408ab6SDaniel Jurgens 9174e0f7b90SParav Pandit static void compatdev_release(struct device *dev) 9184e0f7b90SParav Pandit { 9194e0f7b90SParav Pandit struct ib_core_device *cdev = 9204e0f7b90SParav Pandit container_of(dev, struct ib_core_device, dev); 9214e0f7b90SParav Pandit 9224e0f7b90SParav Pandit kfree(cdev); 9234e0f7b90SParav Pandit } 9244e0f7b90SParav Pandit 9254e0f7b90SParav Pandit static int add_one_compat_dev(struct ib_device *device, 9264e0f7b90SParav Pandit struct rdma_dev_net *rnet) 9274e0f7b90SParav Pandit { 9284e0f7b90SParav Pandit struct ib_core_device *cdev; 9294e0f7b90SParav Pandit int ret; 9304e0f7b90SParav Pandit 9312b34c558SParav Pandit lockdep_assert_held(&rdma_nets_rwsem); 932a56bc45bSParav Pandit if (!ib_devices_shared_netns) 933a56bc45bSParav Pandit return 0; 934a56bc45bSParav Pandit 9354e0f7b90SParav Pandit /* 9364e0f7b90SParav Pandit * Create and add compat device in all namespaces other than where it 9374e0f7b90SParav Pandit * is currently bound to. 9384e0f7b90SParav Pandit */ 9394e0f7b90SParav Pandit if (net_eq(read_pnet(&rnet->net), 9404e0f7b90SParav Pandit read_pnet(&device->coredev.rdma_net))) 9414e0f7b90SParav Pandit return 0; 9424e0f7b90SParav Pandit 9434e0f7b90SParav Pandit /* 9444e0f7b90SParav Pandit * The first of init_net() or ib_register_device() to take the 9454e0f7b90SParav Pandit * compat_devs_mutex wins and gets to add the device. Others will wait 9464e0f7b90SParav Pandit * for completion here. 9474e0f7b90SParav Pandit */ 9484e0f7b90SParav Pandit mutex_lock(&device->compat_devs_mutex); 9494e0f7b90SParav Pandit cdev = xa_load(&device->compat_devs, rnet->id); 9504e0f7b90SParav Pandit if (cdev) { 9514e0f7b90SParav Pandit ret = 0; 9524e0f7b90SParav Pandit goto done; 9534e0f7b90SParav Pandit } 9544e0f7b90SParav Pandit ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); 9554e0f7b90SParav Pandit if (ret) 9564e0f7b90SParav Pandit goto done; 9574e0f7b90SParav Pandit 9584e0f7b90SParav Pandit cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); 9594e0f7b90SParav Pandit if (!cdev) { 9604e0f7b90SParav Pandit ret = -ENOMEM; 9614e0f7b90SParav Pandit goto cdev_err; 9624e0f7b90SParav Pandit } 9634e0f7b90SParav Pandit 9644e0f7b90SParav Pandit cdev->dev.parent = device->dev.parent; 9654e0f7b90SParav Pandit rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); 9664e0f7b90SParav Pandit cdev->dev.release = compatdev_release; 967f2f2b3bbSJason Gunthorpe ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); 968f2f2b3bbSJason Gunthorpe if (ret) 969f2f2b3bbSJason Gunthorpe goto add_err; 9704e0f7b90SParav Pandit 9714e0f7b90SParav Pandit ret = device_add(&cdev->dev); 9724e0f7b90SParav Pandit if (ret) 9734e0f7b90SParav Pandit goto add_err; 974eb15c78bSParav Pandit ret = ib_setup_port_attrs(cdev); 9755417783eSParav Pandit if (ret) 9765417783eSParav Pandit goto port_err; 9774e0f7b90SParav Pandit 9784e0f7b90SParav Pandit ret = xa_err(xa_store(&device->compat_devs, rnet->id, 9794e0f7b90SParav Pandit cdev, GFP_KERNEL)); 9804e0f7b90SParav Pandit if (ret) 9814e0f7b90SParav Pandit goto insert_err; 9824e0f7b90SParav Pandit 9834e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 9844e0f7b90SParav Pandit return 0; 9854e0f7b90SParav Pandit 9864e0f7b90SParav Pandit insert_err: 9875417783eSParav Pandit ib_free_port_attrs(cdev); 9885417783eSParav Pandit port_err: 9894e0f7b90SParav Pandit device_del(&cdev->dev); 9904e0f7b90SParav Pandit add_err: 9914e0f7b90SParav Pandit put_device(&cdev->dev); 9924e0f7b90SParav Pandit cdev_err: 9934e0f7b90SParav Pandit xa_release(&device->compat_devs, rnet->id); 9944e0f7b90SParav Pandit done: 9954e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 9964e0f7b90SParav Pandit return ret; 9974e0f7b90SParav Pandit } 9984e0f7b90SParav Pandit 9994e0f7b90SParav Pandit static void remove_one_compat_dev(struct ib_device *device, u32 id) 10004e0f7b90SParav Pandit { 10014e0f7b90SParav Pandit struct ib_core_device *cdev; 10024e0f7b90SParav Pandit 10034e0f7b90SParav Pandit mutex_lock(&device->compat_devs_mutex); 10044e0f7b90SParav Pandit cdev = xa_erase(&device->compat_devs, id); 10054e0f7b90SParav Pandit mutex_unlock(&device->compat_devs_mutex); 10064e0f7b90SParav Pandit if (cdev) { 10075417783eSParav Pandit ib_free_port_attrs(cdev); 10084e0f7b90SParav Pandit device_del(&cdev->dev); 10094e0f7b90SParav Pandit put_device(&cdev->dev); 10104e0f7b90SParav Pandit } 10114e0f7b90SParav Pandit } 10124e0f7b90SParav Pandit 10134e0f7b90SParav Pandit static void remove_compat_devs(struct ib_device *device) 10144e0f7b90SParav Pandit { 10154e0f7b90SParav Pandit struct ib_core_device *cdev; 10164e0f7b90SParav Pandit unsigned long index; 10174e0f7b90SParav Pandit 10184e0f7b90SParav Pandit xa_for_each (&device->compat_devs, index, cdev) 10194e0f7b90SParav Pandit remove_one_compat_dev(device, index); 10204e0f7b90SParav Pandit } 10214e0f7b90SParav Pandit 10224e0f7b90SParav Pandit static int add_compat_devs(struct ib_device *device) 10234e0f7b90SParav Pandit { 10244e0f7b90SParav Pandit struct rdma_dev_net *rnet; 10254e0f7b90SParav Pandit unsigned long index; 10264e0f7b90SParav Pandit int ret = 0; 10274e0f7b90SParav Pandit 1028decbc7a6SParav Pandit lockdep_assert_held(&devices_rwsem); 1029decbc7a6SParav Pandit 10304e0f7b90SParav Pandit down_read(&rdma_nets_rwsem); 10314e0f7b90SParav Pandit xa_for_each (&rdma_nets, index, rnet) { 10324e0f7b90SParav Pandit ret = add_one_compat_dev(device, rnet); 10334e0f7b90SParav Pandit if (ret) 10344e0f7b90SParav Pandit break; 10354e0f7b90SParav Pandit } 10364e0f7b90SParav Pandit up_read(&rdma_nets_rwsem); 10374e0f7b90SParav Pandit return ret; 10384e0f7b90SParav Pandit } 10394e0f7b90SParav Pandit 10402b34c558SParav Pandit static void remove_all_compat_devs(void) 10412b34c558SParav Pandit { 10422b34c558SParav Pandit struct ib_compat_device *cdev; 10432b34c558SParav Pandit struct ib_device *dev; 10442b34c558SParav Pandit unsigned long index; 10452b34c558SParav Pandit 10462b34c558SParav Pandit down_read(&devices_rwsem); 10472b34c558SParav Pandit xa_for_each (&devices, index, dev) { 10482b34c558SParav Pandit unsigned long c_index = 0; 10492b34c558SParav Pandit 10502b34c558SParav Pandit /* Hold nets_rwsem so that any other thread modifying this 10512b34c558SParav Pandit * system param can sync with this thread. 10522b34c558SParav Pandit */ 10532b34c558SParav Pandit down_read(&rdma_nets_rwsem); 10542b34c558SParav Pandit xa_for_each (&dev->compat_devs, c_index, cdev) 10552b34c558SParav Pandit remove_one_compat_dev(dev, c_index); 10562b34c558SParav Pandit up_read(&rdma_nets_rwsem); 10572b34c558SParav Pandit } 10582b34c558SParav Pandit up_read(&devices_rwsem); 10592b34c558SParav Pandit } 10602b34c558SParav Pandit 10612b34c558SParav Pandit static int add_all_compat_devs(void) 10622b34c558SParav Pandit { 10632b34c558SParav Pandit struct rdma_dev_net *rnet; 10642b34c558SParav Pandit struct ib_device *dev; 10652b34c558SParav Pandit unsigned long index; 10662b34c558SParav Pandit int ret = 0; 10672b34c558SParav Pandit 10682b34c558SParav Pandit down_read(&devices_rwsem); 10692b34c558SParav Pandit xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 10702b34c558SParav Pandit unsigned long net_index = 0; 10712b34c558SParav Pandit 10722b34c558SParav Pandit /* Hold nets_rwsem so that any other thread modifying this 10732b34c558SParav Pandit * system param can sync with this thread. 10742b34c558SParav Pandit */ 10752b34c558SParav Pandit down_read(&rdma_nets_rwsem); 10762b34c558SParav Pandit xa_for_each (&rdma_nets, net_index, rnet) { 10772b34c558SParav Pandit ret = add_one_compat_dev(dev, rnet); 10782b34c558SParav Pandit if (ret) 10792b34c558SParav Pandit break; 10802b34c558SParav Pandit } 10812b34c558SParav Pandit up_read(&rdma_nets_rwsem); 10822b34c558SParav Pandit } 10832b34c558SParav Pandit up_read(&devices_rwsem); 10842b34c558SParav Pandit if (ret) 10852b34c558SParav Pandit remove_all_compat_devs(); 10862b34c558SParav Pandit return ret; 10872b34c558SParav Pandit } 10882b34c558SParav Pandit 10892b34c558SParav Pandit int rdma_compatdev_set(u8 enable) 10902b34c558SParav Pandit { 10912b34c558SParav Pandit struct rdma_dev_net *rnet; 10922b34c558SParav Pandit unsigned long index; 10932b34c558SParav Pandit int ret = 0; 10942b34c558SParav Pandit 10952b34c558SParav Pandit down_write(&rdma_nets_rwsem); 10962b34c558SParav Pandit if (ib_devices_shared_netns == enable) { 10972b34c558SParav Pandit up_write(&rdma_nets_rwsem); 10982b34c558SParav Pandit return 0; 10992b34c558SParav Pandit } 11002b34c558SParav Pandit 11012b34c558SParav Pandit /* enable/disable of compat devices is not supported 11022b34c558SParav Pandit * when more than default init_net exists. 11032b34c558SParav Pandit */ 11042b34c558SParav Pandit xa_for_each (&rdma_nets, index, rnet) { 11052b34c558SParav Pandit ret++; 11062b34c558SParav Pandit break; 11072b34c558SParav Pandit } 11082b34c558SParav Pandit if (!ret) 11092b34c558SParav Pandit ib_devices_shared_netns = enable; 11102b34c558SParav Pandit up_write(&rdma_nets_rwsem); 11112b34c558SParav Pandit if (ret) 11122b34c558SParav Pandit return -EBUSY; 11132b34c558SParav Pandit 11142b34c558SParav Pandit if (enable) 11152b34c558SParav Pandit ret = add_all_compat_devs(); 11162b34c558SParav Pandit else 11172b34c558SParav Pandit remove_all_compat_devs(); 11182b34c558SParav Pandit return ret; 11192b34c558SParav Pandit } 11202b34c558SParav Pandit 11214e0f7b90SParav Pandit static void rdma_dev_exit_net(struct net *net) 11224e0f7b90SParav Pandit { 11231d2fedd8SParav Pandit struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); 11244e0f7b90SParav Pandit struct ib_device *dev; 11254e0f7b90SParav Pandit unsigned long index; 11264e0f7b90SParav Pandit int ret; 11274e0f7b90SParav Pandit 11284e0f7b90SParav Pandit down_write(&rdma_nets_rwsem); 11294e0f7b90SParav Pandit /* 11304e0f7b90SParav Pandit * Prevent the ID from being re-used and hide the id from xa_for_each. 11314e0f7b90SParav Pandit */ 11324e0f7b90SParav Pandit ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); 11334e0f7b90SParav Pandit WARN_ON(ret); 11344e0f7b90SParav Pandit up_write(&rdma_nets_rwsem); 11354e0f7b90SParav Pandit 11364e0f7b90SParav Pandit down_read(&devices_rwsem); 11374e0f7b90SParav Pandit xa_for_each (&devices, index, dev) { 11384e0f7b90SParav Pandit get_device(&dev->dev); 11394e0f7b90SParav Pandit /* 11404e0f7b90SParav Pandit * Release the devices_rwsem so that pontentially blocking 11414e0f7b90SParav Pandit * device_del, doesn't hold the devices_rwsem for too long. 11424e0f7b90SParav Pandit */ 11434e0f7b90SParav Pandit up_read(&devices_rwsem); 11444e0f7b90SParav Pandit 11454e0f7b90SParav Pandit remove_one_compat_dev(dev, rnet->id); 11464e0f7b90SParav Pandit 1147decbc7a6SParav Pandit /* 1148decbc7a6SParav Pandit * If the real device is in the NS then move it back to init. 1149decbc7a6SParav Pandit */ 1150decbc7a6SParav Pandit rdma_dev_change_netns(dev, net, &init_net); 1151decbc7a6SParav Pandit 11524e0f7b90SParav Pandit put_device(&dev->dev); 11534e0f7b90SParav Pandit down_read(&devices_rwsem); 11544e0f7b90SParav Pandit } 11554e0f7b90SParav Pandit up_read(&devices_rwsem); 11564e0f7b90SParav Pandit 11571d2fedd8SParav Pandit rdma_nl_net_exit(rnet); 11584e0f7b90SParav Pandit xa_erase(&rdma_nets, rnet->id); 11594e0f7b90SParav Pandit } 11604e0f7b90SParav Pandit 11614e0f7b90SParav Pandit static __net_init int rdma_dev_init_net(struct net *net) 11624e0f7b90SParav Pandit { 11631d2fedd8SParav Pandit struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); 11644e0f7b90SParav Pandit unsigned long index; 11654e0f7b90SParav Pandit struct ib_device *dev; 11664e0f7b90SParav Pandit int ret; 11674e0f7b90SParav Pandit 11681d2fedd8SParav Pandit write_pnet(&rnet->net, net); 11691d2fedd8SParav Pandit 11701d2fedd8SParav Pandit ret = rdma_nl_net_init(rnet); 11711d2fedd8SParav Pandit if (ret) 11721d2fedd8SParav Pandit return ret; 11731d2fedd8SParav Pandit 11744e0f7b90SParav Pandit /* No need to create any compat devices in default init_net. */ 11754e0f7b90SParav Pandit if (net_eq(net, &init_net)) 11764e0f7b90SParav Pandit return 0; 11774e0f7b90SParav Pandit 11784e0f7b90SParav Pandit ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); 11791d2fedd8SParav Pandit if (ret) { 11801d2fedd8SParav Pandit rdma_nl_net_exit(rnet); 11814e0f7b90SParav Pandit return ret; 11821d2fedd8SParav Pandit } 11834e0f7b90SParav Pandit 11844e0f7b90SParav Pandit down_read(&devices_rwsem); 11854e0f7b90SParav Pandit xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 11862b34c558SParav Pandit /* Hold nets_rwsem so that netlink command cannot change 11872b34c558SParav Pandit * system configuration for device sharing mode. 11882b34c558SParav Pandit */ 11892b34c558SParav Pandit down_read(&rdma_nets_rwsem); 11904e0f7b90SParav Pandit ret = add_one_compat_dev(dev, rnet); 11912b34c558SParav Pandit up_read(&rdma_nets_rwsem); 11924e0f7b90SParav Pandit if (ret) 11934e0f7b90SParav Pandit break; 11944e0f7b90SParav Pandit } 11954e0f7b90SParav Pandit up_read(&devices_rwsem); 11964e0f7b90SParav Pandit 11974e0f7b90SParav Pandit if (ret) 11984e0f7b90SParav Pandit rdma_dev_exit_net(net); 11994e0f7b90SParav Pandit 12004e0f7b90SParav Pandit return ret; 12014e0f7b90SParav Pandit } 12024e0f7b90SParav Pandit 1203ecc82c53SLeon Romanovsky /* 1204d0899892SJason Gunthorpe * Assign the unique string device name and the unique device index. This is 1205d0899892SJason Gunthorpe * undone by ib_dealloc_device. 1206ecc82c53SLeon Romanovsky */ 12070df91bb6SJason Gunthorpe static int assign_name(struct ib_device *device, const char *name) 12080df91bb6SJason Gunthorpe { 12090df91bb6SJason Gunthorpe static u32 last_id; 12100df91bb6SJason Gunthorpe int ret; 1211ecc82c53SLeon Romanovsky 1212921eab11SJason Gunthorpe down_write(&devices_rwsem); 12130df91bb6SJason Gunthorpe /* Assign a unique name to the device */ 12140df91bb6SJason Gunthorpe if (strchr(name, '%')) 12150df91bb6SJason Gunthorpe ret = alloc_name(device, name); 12160df91bb6SJason Gunthorpe else 12170df91bb6SJason Gunthorpe ret = dev_set_name(&device->dev, name); 12180df91bb6SJason Gunthorpe if (ret) 12190df91bb6SJason Gunthorpe goto out; 1220ecc82c53SLeon Romanovsky 12210df91bb6SJason Gunthorpe if (__ib_device_get_by_name(dev_name(&device->dev))) { 12220df91bb6SJason Gunthorpe ret = -ENFILE; 12230df91bb6SJason Gunthorpe goto out; 1224ecc82c53SLeon Romanovsky } 12250df91bb6SJason Gunthorpe strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); 12260df91bb6SJason Gunthorpe 1227ea295481SLinus Torvalds ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, 1228ea295481SLinus Torvalds &last_id, GFP_KERNEL); 1229ea295481SLinus Torvalds if (ret > 0) 12300df91bb6SJason Gunthorpe ret = 0; 1231921eab11SJason Gunthorpe 12320df91bb6SJason Gunthorpe out: 1233921eab11SJason Gunthorpe up_write(&devices_rwsem); 12340df91bb6SJason Gunthorpe return ret; 12350df91bb6SJason Gunthorpe } 12360df91bb6SJason Gunthorpe 1237921eab11SJason Gunthorpe /* 1238921eab11SJason Gunthorpe * setup_device() allocates memory and sets up data that requires calling the 1239921eab11SJason Gunthorpe * device ops, this is the only reason these actions are not done during 1240921eab11SJason Gunthorpe * ib_alloc_device. It is undone by ib_dealloc_device(). 1241921eab11SJason Gunthorpe */ 1242548cb4fbSParav Pandit static int setup_device(struct ib_device *device) 1243548cb4fbSParav Pandit { 1244548cb4fbSParav Pandit struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 1245548cb4fbSParav Pandit int ret; 1246548cb4fbSParav Pandit 1247deee3c7eSKamal Heib ib_device_check_mandatory(device); 1248548cb4fbSParav Pandit 12498ceb1357SJason Gunthorpe ret = setup_port_data(device); 1250548cb4fbSParav Pandit if (ret) { 12518ceb1357SJason Gunthorpe dev_warn(&device->dev, "Couldn't create per-port data\n"); 1252548cb4fbSParav Pandit return ret; 1253548cb4fbSParav Pandit } 1254548cb4fbSParav Pandit 1255548cb4fbSParav Pandit memset(&device->attrs, 0, sizeof(device->attrs)); 12563023a1e9SKamal Heib ret = device->ops.query_device(device, &device->attrs, &uhw); 1257548cb4fbSParav Pandit if (ret) { 1258548cb4fbSParav Pandit dev_warn(&device->dev, 1259548cb4fbSParav Pandit "Couldn't query the device attributes\n"); 1260d45f89d5SJason Gunthorpe return ret; 1261548cb4fbSParav Pandit } 1262548cb4fbSParav Pandit 1263548cb4fbSParav Pandit return 0; 1264548cb4fbSParav Pandit } 1265548cb4fbSParav Pandit 1266921eab11SJason Gunthorpe static void disable_device(struct ib_device *device) 1267921eab11SJason Gunthorpe { 12689cd58817SJason Gunthorpe u32 cid; 1269921eab11SJason Gunthorpe 1270921eab11SJason Gunthorpe WARN_ON(!refcount_read(&device->refcount)); 1271921eab11SJason Gunthorpe 1272921eab11SJason Gunthorpe down_write(&devices_rwsem); 1273921eab11SJason Gunthorpe xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); 1274921eab11SJason Gunthorpe up_write(&devices_rwsem); 1275921eab11SJason Gunthorpe 12769cd58817SJason Gunthorpe /* 12779cd58817SJason Gunthorpe * Remove clients in LIFO order, see assign_client_id. This could be 12789cd58817SJason Gunthorpe * more efficient if xarray learns to reverse iterate. Since no new 12799cd58817SJason Gunthorpe * clients can be added to this ib_device past this point we only need 12809cd58817SJason Gunthorpe * the maximum possible client_id value here. 12819cd58817SJason Gunthorpe */ 1282921eab11SJason Gunthorpe down_read(&clients_rwsem); 12839cd58817SJason Gunthorpe cid = highest_client_id; 1284921eab11SJason Gunthorpe up_read(&clients_rwsem); 12859cd58817SJason Gunthorpe while (cid) { 12869cd58817SJason Gunthorpe cid--; 12879cd58817SJason Gunthorpe remove_client_context(device, cid); 12889cd58817SJason Gunthorpe } 1289921eab11SJason Gunthorpe 1290286e1d3fSJack Morgenstein ib_cq_pool_cleanup(device); 12914aa16152SJason Gunthorpe 1292921eab11SJason Gunthorpe /* Pairs with refcount_set in enable_device */ 1293921eab11SJason Gunthorpe ib_device_put(device); 1294921eab11SJason Gunthorpe wait_for_completion(&device->unreg_completion); 1295c2261dd7SJason Gunthorpe 12964e0f7b90SParav Pandit /* 12974e0f7b90SParav Pandit * compat devices must be removed after device refcount drops to zero. 12984e0f7b90SParav Pandit * Otherwise init_net() may add more compatdevs after removing compat 12994e0f7b90SParav Pandit * devices and before device is disabled. 13004e0f7b90SParav Pandit */ 13014e0f7b90SParav Pandit remove_compat_devs(device); 1302921eab11SJason Gunthorpe } 1303921eab11SJason Gunthorpe 1304921eab11SJason Gunthorpe /* 1305921eab11SJason Gunthorpe * An enabled device is visible to all clients and to all the public facing 1306d0899892SJason Gunthorpe * APIs that return a device pointer. This always returns with a new get, even 1307d0899892SJason Gunthorpe * if it fails. 1308921eab11SJason Gunthorpe */ 1309d0899892SJason Gunthorpe static int enable_device_and_get(struct ib_device *device) 1310921eab11SJason Gunthorpe { 1311921eab11SJason Gunthorpe struct ib_client *client; 1312921eab11SJason Gunthorpe unsigned long index; 1313d0899892SJason Gunthorpe int ret = 0; 1314921eab11SJason Gunthorpe 1315d0899892SJason Gunthorpe /* 1316d0899892SJason Gunthorpe * One ref belongs to the xa and the other belongs to this 1317d0899892SJason Gunthorpe * thread. This is needed to guard against parallel unregistration. 1318d0899892SJason Gunthorpe */ 1319d0899892SJason Gunthorpe refcount_set(&device->refcount, 2); 1320921eab11SJason Gunthorpe down_write(&devices_rwsem); 1321921eab11SJason Gunthorpe xa_set_mark(&devices, device->index, DEVICE_REGISTERED); 1322d0899892SJason Gunthorpe 1323d0899892SJason Gunthorpe /* 1324d0899892SJason Gunthorpe * By using downgrade_write() we ensure that no other thread can clear 1325d0899892SJason Gunthorpe * DEVICE_REGISTERED while we are completing the client setup. 1326d0899892SJason Gunthorpe */ 1327d0899892SJason Gunthorpe downgrade_write(&devices_rwsem); 1328921eab11SJason Gunthorpe 1329ca22354bSJason Gunthorpe if (device->ops.enable_driver) { 1330ca22354bSJason Gunthorpe ret = device->ops.enable_driver(device); 1331ca22354bSJason Gunthorpe if (ret) 1332ca22354bSJason Gunthorpe goto out; 1333ca22354bSJason Gunthorpe } 1334ca22354bSJason Gunthorpe 1335921eab11SJason Gunthorpe down_read(&clients_rwsem); 1336921eab11SJason Gunthorpe xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { 1337921eab11SJason Gunthorpe ret = add_client_context(device, client); 1338d0899892SJason Gunthorpe if (ret) 1339d0899892SJason Gunthorpe break; 1340d0899892SJason Gunthorpe } 1341921eab11SJason Gunthorpe up_read(&clients_rwsem); 13424e0f7b90SParav Pandit if (!ret) 13434e0f7b90SParav Pandit ret = add_compat_devs(device); 1344ca22354bSJason Gunthorpe out: 1345d0899892SJason Gunthorpe up_read(&devices_rwsem); 1346921eab11SJason Gunthorpe return ret; 1347921eab11SJason Gunthorpe } 1348921eab11SJason Gunthorpe 13490cb42c02SJason Gunthorpe static void prevent_dealloc_device(struct ib_device *ib_dev) 13500cb42c02SJason Gunthorpe { 13510cb42c02SJason Gunthorpe } 13520cb42c02SJason Gunthorpe 1353548cb4fbSParav Pandit /** 1354548cb4fbSParav Pandit * ib_register_device - Register an IB device with IB core 1355548cb4fbSParav Pandit * @device: Device to register 1356d6537c1aSrd.dunlab@gmail.com * @name: unique string device name. This may include a '%' which will 1357d6537c1aSrd.dunlab@gmail.com * cause a unique index to be added to the passed device name. 1358e0477b34SJason Gunthorpe * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB 1359e0477b34SJason Gunthorpe * device will be used. In this case the caller should fully 1360e0477b34SJason Gunthorpe * setup the ibdev for DMA. This usually means using dma_virt_ops. 1361548cb4fbSParav Pandit * 1362548cb4fbSParav Pandit * Low-level drivers use ib_register_device() to register their 1363548cb4fbSParav Pandit * devices with the IB core. All registered clients will receive a 1364548cb4fbSParav Pandit * callback for each device that is added. @device must be allocated 1365548cb4fbSParav Pandit * with ib_alloc_device(). 1366d0899892SJason Gunthorpe * 1367d0899892SJason Gunthorpe * If the driver uses ops.dealloc_driver and calls any ib_unregister_device() 1368d0899892SJason Gunthorpe * asynchronously then the device pointer may become freed as soon as this 1369d0899892SJason Gunthorpe * function returns. 1370548cb4fbSParav Pandit */ 1371e0477b34SJason Gunthorpe int ib_register_device(struct ib_device *device, const char *name, 1372e0477b34SJason Gunthorpe struct device *dma_device) 1373548cb4fbSParav Pandit { 1374548cb4fbSParav Pandit int ret; 13751da177e4SLinus Torvalds 13760df91bb6SJason Gunthorpe ret = assign_name(device, name); 1377e349f858SJason Gunthorpe if (ret) 1378921eab11SJason Gunthorpe return ret; 13791da177e4SLinus Torvalds 13805a7a9e03SChristoph Hellwig /* 13815a7a9e03SChristoph Hellwig * If the caller does not provide a DMA capable device then the IB core 13825a7a9e03SChristoph Hellwig * will set up ib_sge and scatterlist structures that stash the kernel 13835a7a9e03SChristoph Hellwig * virtual address into the address field. 13845a7a9e03SChristoph Hellwig */ 13855a7a9e03SChristoph Hellwig WARN_ON(dma_device && !dma_device->dma_parms); 13865a7a9e03SChristoph Hellwig device->dma_device = dma_device; 13875a7a9e03SChristoph Hellwig 1388548cb4fbSParav Pandit ret = setup_device(device); 1389548cb4fbSParav Pandit if (ret) 1390d0899892SJason Gunthorpe return ret; 139103db3a2dSMatan Barak 1392d45f89d5SJason Gunthorpe ret = ib_cache_setup_one(device); 1393d45f89d5SJason Gunthorpe if (ret) { 1394d45f89d5SJason Gunthorpe dev_warn(&device->dev, 1395d45f89d5SJason Gunthorpe "Couldn't set up InfiniBand P_Key/GID cache\n"); 1396d0899892SJason Gunthorpe return ret; 1397d45f89d5SJason Gunthorpe } 1398d45f89d5SJason Gunthorpe 1399b7066b32SJason Gunthorpe ret = ib_setup_device_attrs(device); 1400b7066b32SJason Gunthorpe if (ret) 1401b7066b32SJason Gunthorpe goto cache_cleanup; 1402b7066b32SJason Gunthorpe 14037527a7b1SParav Pandit ib_device_register_rdmacg(device); 14043e153a93SIra Weiny 1405413d3347SMark Zhang rdma_counter_init(device); 1406413d3347SMark Zhang 1407e7a5b4aaSLeon Romanovsky /* 1408e7a5b4aaSLeon Romanovsky * Ensure that ADD uevent is not fired because it 1409e7a5b4aaSLeon Romanovsky * is too early amd device is not initialized yet. 1410e7a5b4aaSLeon Romanovsky */ 1411e7a5b4aaSLeon Romanovsky dev_set_uevent_suppress(&device->dev, true); 14125f8f5499SParav Pandit ret = device_add(&device->dev); 14135f8f5499SParav Pandit if (ret) 14145f8f5499SParav Pandit goto cg_cleanup; 14155f8f5499SParav Pandit 1416b7066b32SJason Gunthorpe ret = ib_setup_port_attrs(&device->coredev); 14171da177e4SLinus Torvalds if (ret) { 141843c7c851SJason Gunthorpe dev_warn(&device->dev, 141943c7c851SJason Gunthorpe "Couldn't register device with driver model\n"); 14205f8f5499SParav Pandit goto dev_cleanup; 14211da177e4SLinus Torvalds } 14221da177e4SLinus Torvalds 1423d0899892SJason Gunthorpe ret = enable_device_and_get(device); 1424d0899892SJason Gunthorpe if (ret) { 1425d0899892SJason Gunthorpe void (*dealloc_fn)(struct ib_device *); 1426d0899892SJason Gunthorpe 1427d0899892SJason Gunthorpe /* 1428d0899892SJason Gunthorpe * If we hit this error flow then we don't want to 1429d0899892SJason Gunthorpe * automatically dealloc the device since the caller is 1430d0899892SJason Gunthorpe * expected to call ib_dealloc_device() after 1431d0899892SJason Gunthorpe * ib_register_device() fails. This is tricky due to the 1432d0899892SJason Gunthorpe * possibility for a parallel unregistration along with this 1433d0899892SJason Gunthorpe * error flow. Since we have a refcount here we know any 1434d0899892SJason Gunthorpe * parallel flow is stopped in disable_device and will see the 14350cb42c02SJason Gunthorpe * special dealloc_driver pointer, causing the responsibility to 1436d0899892SJason Gunthorpe * ib_dealloc_device() to revert back to this thread. 1437d0899892SJason Gunthorpe */ 1438d0899892SJason Gunthorpe dealloc_fn = device->ops.dealloc_driver; 14390cb42c02SJason Gunthorpe device->ops.dealloc_driver = prevent_dealloc_device; 1440d0899892SJason Gunthorpe ib_device_put(device); 1441d0899892SJason Gunthorpe __ib_unregister_device(device); 1442d0899892SJason Gunthorpe device->ops.dealloc_driver = dealloc_fn; 1443779e0bf4SJack Morgenstein dev_set_uevent_suppress(&device->dev, false); 1444d0899892SJason Gunthorpe return ret; 1445d0899892SJason Gunthorpe } 1446779e0bf4SJack Morgenstein dev_set_uevent_suppress(&device->dev, false); 1447779e0bf4SJack Morgenstein /* Mark for userspace that device is ready */ 1448779e0bf4SJack Morgenstein kobject_uevent(&device->dev.kobj, KOBJ_ADD); 1449d0899892SJason Gunthorpe ib_device_put(device); 14501da177e4SLinus Torvalds 14514be3a4faSParav Pandit return 0; 14524be3a4faSParav Pandit 14535f8f5499SParav Pandit dev_cleanup: 14545f8f5499SParav Pandit device_del(&device->dev); 14552fb4f4eaSParav Pandit cg_cleanup: 1456e7a5b4aaSLeon Romanovsky dev_set_uevent_suppress(&device->dev, false); 14572fb4f4eaSParav Pandit ib_device_unregister_rdmacg(device); 1458b7066b32SJason Gunthorpe cache_cleanup: 1459d45f89d5SJason Gunthorpe ib_cache_cleanup_one(device); 14601da177e4SLinus Torvalds return ret; 14611da177e4SLinus Torvalds } 14621da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_device); 14631da177e4SLinus Torvalds 1464d0899892SJason Gunthorpe /* Callers must hold a get on the device. */ 1465d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *ib_dev) 1466d0899892SJason Gunthorpe { 1467d0899892SJason Gunthorpe /* 1468d0899892SJason Gunthorpe * We have a registration lock so that all the calls to unregister are 1469d0899892SJason Gunthorpe * fully fenced, once any unregister returns the device is truely 1470d0899892SJason Gunthorpe * unregistered even if multiple callers are unregistering it at the 1471d0899892SJason Gunthorpe * same time. This also interacts with the registration flow and 1472d0899892SJason Gunthorpe * provides sane semantics if register and unregister are racing. 1473d0899892SJason Gunthorpe */ 1474d0899892SJason Gunthorpe mutex_lock(&ib_dev->unregistration_lock); 1475d0899892SJason Gunthorpe if (!refcount_read(&ib_dev->refcount)) 1476d0899892SJason Gunthorpe goto out; 1477d0899892SJason Gunthorpe 1478d0899892SJason Gunthorpe disable_device(ib_dev); 14793042492bSParav Pandit 14803042492bSParav Pandit /* Expedite removing unregistered pointers from the hash table */ 14813042492bSParav Pandit free_netdevs(ib_dev); 14823042492bSParav Pandit 1483b7066b32SJason Gunthorpe ib_free_port_attrs(&ib_dev->coredev); 1484d0899892SJason Gunthorpe device_del(&ib_dev->dev); 1485d0899892SJason Gunthorpe ib_device_unregister_rdmacg(ib_dev); 1486d0899892SJason Gunthorpe ib_cache_cleanup_one(ib_dev); 1487d0899892SJason Gunthorpe 1488d0899892SJason Gunthorpe /* 1489d0899892SJason Gunthorpe * Drivers using the new flow may not call ib_dealloc_device except 1490d0899892SJason Gunthorpe * in error unwind prior to registration success. 1491d0899892SJason Gunthorpe */ 14920cb42c02SJason Gunthorpe if (ib_dev->ops.dealloc_driver && 14930cb42c02SJason Gunthorpe ib_dev->ops.dealloc_driver != prevent_dealloc_device) { 1494d0899892SJason Gunthorpe WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); 1495d0899892SJason Gunthorpe ib_dealloc_device(ib_dev); 1496d0899892SJason Gunthorpe } 1497d0899892SJason Gunthorpe out: 1498d0899892SJason Gunthorpe mutex_unlock(&ib_dev->unregistration_lock); 1499d0899892SJason Gunthorpe } 1500d0899892SJason Gunthorpe 15011da177e4SLinus Torvalds /** 15021da177e4SLinus Torvalds * ib_unregister_device - Unregister an IB device 1503d6537c1aSrd.dunlab@gmail.com * @ib_dev: The device to unregister 15041da177e4SLinus Torvalds * 15051da177e4SLinus Torvalds * Unregister an IB device. All clients will receive a remove callback. 1506d0899892SJason Gunthorpe * 1507d0899892SJason Gunthorpe * Callers should call this routine only once, and protect against races with 1508d0899892SJason Gunthorpe * registration. Typically it should only be called as part of a remove 1509d0899892SJason Gunthorpe * callback in an implementation of driver core's struct device_driver and 1510d0899892SJason Gunthorpe * related. 1511d0899892SJason Gunthorpe * 1512d0899892SJason Gunthorpe * If ops.dealloc_driver is used then ib_dev will be freed upon return from 1513d0899892SJason Gunthorpe * this function. 15141da177e4SLinus Torvalds */ 1515d0899892SJason Gunthorpe void ib_unregister_device(struct ib_device *ib_dev) 15161da177e4SLinus Torvalds { 1517d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1518d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1519d0899892SJason Gunthorpe put_device(&ib_dev->dev); 15201da177e4SLinus Torvalds } 15211da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_device); 15221da177e4SLinus Torvalds 1523d0899892SJason Gunthorpe /** 1524d0899892SJason Gunthorpe * ib_unregister_device_and_put - Unregister a device while holding a 'get' 1525d6537c1aSrd.dunlab@gmail.com * @ib_dev: The device to unregister 1526d0899892SJason Gunthorpe * 1527d0899892SJason Gunthorpe * This is the same as ib_unregister_device(), except it includes an internal 1528d0899892SJason Gunthorpe * ib_device_put() that should match a 'get' obtained by the caller. 1529d0899892SJason Gunthorpe * 1530d0899892SJason Gunthorpe * It is safe to call this routine concurrently from multiple threads while 1531d0899892SJason Gunthorpe * holding the 'get'. When the function returns the device is fully 1532d0899892SJason Gunthorpe * unregistered. 1533d0899892SJason Gunthorpe * 1534d0899892SJason Gunthorpe * Drivers using this flow MUST use the driver_unregister callback to clean up 1535d0899892SJason Gunthorpe * their resources associated with the device and dealloc it. 1536d0899892SJason Gunthorpe */ 1537d0899892SJason Gunthorpe void ib_unregister_device_and_put(struct ib_device *ib_dev) 1538d0899892SJason Gunthorpe { 1539d0899892SJason Gunthorpe WARN_ON(!ib_dev->ops.dealloc_driver); 1540d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1541d0899892SJason Gunthorpe ib_device_put(ib_dev); 1542d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1543d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1544d0899892SJason Gunthorpe } 1545d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_and_put); 1546d0899892SJason Gunthorpe 1547d0899892SJason Gunthorpe /** 1548d0899892SJason Gunthorpe * ib_unregister_driver - Unregister all IB devices for a driver 1549d0899892SJason Gunthorpe * @driver_id: The driver to unregister 1550d0899892SJason Gunthorpe * 1551d0899892SJason Gunthorpe * This implements a fence for device unregistration. It only returns once all 1552d0899892SJason Gunthorpe * devices associated with the driver_id have fully completed their 1553d0899892SJason Gunthorpe * unregistration and returned from ib_unregister_device*(). 1554d0899892SJason Gunthorpe * 1555d0899892SJason Gunthorpe * If device's are not yet unregistered it goes ahead and starts unregistering 1556d0899892SJason Gunthorpe * them. 1557d0899892SJason Gunthorpe * 1558d0899892SJason Gunthorpe * This does not block creation of new devices with the given driver_id, that 1559d0899892SJason Gunthorpe * is the responsibility of the caller. 1560d0899892SJason Gunthorpe */ 1561d0899892SJason Gunthorpe void ib_unregister_driver(enum rdma_driver_id driver_id) 1562d0899892SJason Gunthorpe { 1563d0899892SJason Gunthorpe struct ib_device *ib_dev; 1564d0899892SJason Gunthorpe unsigned long index; 1565d0899892SJason Gunthorpe 1566d0899892SJason Gunthorpe down_read(&devices_rwsem); 1567d0899892SJason Gunthorpe xa_for_each (&devices, index, ib_dev) { 1568b9560a41SJason Gunthorpe if (ib_dev->ops.driver_id != driver_id) 1569d0899892SJason Gunthorpe continue; 1570d0899892SJason Gunthorpe 1571d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1572d0899892SJason Gunthorpe up_read(&devices_rwsem); 1573d0899892SJason Gunthorpe 1574d0899892SJason Gunthorpe WARN_ON(!ib_dev->ops.dealloc_driver); 1575d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1576d0899892SJason Gunthorpe 1577d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1578d0899892SJason Gunthorpe down_read(&devices_rwsem); 1579d0899892SJason Gunthorpe } 1580d0899892SJason Gunthorpe up_read(&devices_rwsem); 1581d0899892SJason Gunthorpe } 1582d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_driver); 1583d0899892SJason Gunthorpe 1584d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work) 1585d0899892SJason Gunthorpe { 1586d0899892SJason Gunthorpe struct ib_device *ib_dev = 1587d0899892SJason Gunthorpe container_of(work, struct ib_device, unregistration_work); 1588d0899892SJason Gunthorpe 1589d0899892SJason Gunthorpe __ib_unregister_device(ib_dev); 1590d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1591d0899892SJason Gunthorpe } 1592d0899892SJason Gunthorpe 1593d0899892SJason Gunthorpe /** 1594d0899892SJason Gunthorpe * ib_unregister_device_queued - Unregister a device using a work queue 1595d6537c1aSrd.dunlab@gmail.com * @ib_dev: The device to unregister 1596d0899892SJason Gunthorpe * 1597d0899892SJason Gunthorpe * This schedules an asynchronous unregistration using a WQ for the device. A 1598d0899892SJason Gunthorpe * driver should use this to avoid holding locks while doing unregistration, 1599d0899892SJason Gunthorpe * such as holding the RTNL lock. 1600d0899892SJason Gunthorpe * 1601d0899892SJason Gunthorpe * Drivers using this API must use ib_unregister_driver before module unload 1602d0899892SJason Gunthorpe * to ensure that all scheduled unregistrations have completed. 1603d0899892SJason Gunthorpe */ 1604d0899892SJason Gunthorpe void ib_unregister_device_queued(struct ib_device *ib_dev) 1605d0899892SJason Gunthorpe { 1606d0899892SJason Gunthorpe WARN_ON(!refcount_read(&ib_dev->refcount)); 1607d0899892SJason Gunthorpe WARN_ON(!ib_dev->ops.dealloc_driver); 1608d0899892SJason Gunthorpe get_device(&ib_dev->dev); 1609d0899892SJason Gunthorpe if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) 1610d0899892SJason Gunthorpe put_device(&ib_dev->dev); 1611d0899892SJason Gunthorpe } 1612d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_queued); 1613d0899892SJason Gunthorpe 1614decbc7a6SParav Pandit /* 1615decbc7a6SParav Pandit * The caller must pass in a device that has the kref held and the refcount 1616decbc7a6SParav Pandit * released. If the device is in cur_net and still registered then it is moved 1617decbc7a6SParav Pandit * into net. 1618decbc7a6SParav Pandit */ 1619decbc7a6SParav Pandit static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, 1620decbc7a6SParav Pandit struct net *net) 1621decbc7a6SParav Pandit { 1622decbc7a6SParav Pandit int ret2 = -EINVAL; 1623decbc7a6SParav Pandit int ret; 1624decbc7a6SParav Pandit 1625decbc7a6SParav Pandit mutex_lock(&device->unregistration_lock); 1626decbc7a6SParav Pandit 1627decbc7a6SParav Pandit /* 16282e5b8a01SParav Pandit * If a device not under ib_device_get() or if the unregistration_lock 16292e5b8a01SParav Pandit * is not held, the namespace can be changed, or it can be unregistered. 16302e5b8a01SParav Pandit * Check again under the lock. 1631decbc7a6SParav Pandit */ 1632decbc7a6SParav Pandit if (refcount_read(&device->refcount) == 0 || 1633decbc7a6SParav Pandit !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { 1634decbc7a6SParav Pandit ret = -ENODEV; 1635decbc7a6SParav Pandit goto out; 1636decbc7a6SParav Pandit } 1637decbc7a6SParav Pandit 1638decbc7a6SParav Pandit kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); 1639decbc7a6SParav Pandit disable_device(device); 1640decbc7a6SParav Pandit 1641decbc7a6SParav Pandit /* 1642decbc7a6SParav Pandit * At this point no one can be using the device, so it is safe to 1643decbc7a6SParav Pandit * change the namespace. 1644decbc7a6SParav Pandit */ 1645decbc7a6SParav Pandit write_pnet(&device->coredev.rdma_net, net); 1646decbc7a6SParav Pandit 16472e5b8a01SParav Pandit down_read(&devices_rwsem); 1648decbc7a6SParav Pandit /* 1649decbc7a6SParav Pandit * Currently rdma devices are system wide unique. So the device name 1650decbc7a6SParav Pandit * is guaranteed free in the new namespace. Publish the new namespace 1651decbc7a6SParav Pandit * at the sysfs level. 1652decbc7a6SParav Pandit */ 1653decbc7a6SParav Pandit ret = device_rename(&device->dev, dev_name(&device->dev)); 1654decbc7a6SParav Pandit up_read(&devices_rwsem); 1655decbc7a6SParav Pandit if (ret) { 1656decbc7a6SParav Pandit dev_warn(&device->dev, 1657decbc7a6SParav Pandit "%s: Couldn't rename device after namespace change\n", 1658decbc7a6SParav Pandit __func__); 1659decbc7a6SParav Pandit /* Try and put things back and re-enable the device */ 1660decbc7a6SParav Pandit write_pnet(&device->coredev.rdma_net, cur_net); 1661decbc7a6SParav Pandit } 1662decbc7a6SParav Pandit 1663decbc7a6SParav Pandit ret2 = enable_device_and_get(device); 16642e5b8a01SParav Pandit if (ret2) { 1665decbc7a6SParav Pandit /* 1666decbc7a6SParav Pandit * This shouldn't really happen, but if it does, let the user 1667decbc7a6SParav Pandit * retry at later point. So don't disable the device. 1668decbc7a6SParav Pandit */ 1669decbc7a6SParav Pandit dev_warn(&device->dev, 1670decbc7a6SParav Pandit "%s: Couldn't re-enable device after namespace change\n", 1671decbc7a6SParav Pandit __func__); 16722e5b8a01SParav Pandit } 1673decbc7a6SParav Pandit kobject_uevent(&device->dev.kobj, KOBJ_ADD); 16742e5b8a01SParav Pandit 1675decbc7a6SParav Pandit ib_device_put(device); 1676decbc7a6SParav Pandit out: 1677decbc7a6SParav Pandit mutex_unlock(&device->unregistration_lock); 1678decbc7a6SParav Pandit if (ret) 1679decbc7a6SParav Pandit return ret; 1680decbc7a6SParav Pandit return ret2; 1681decbc7a6SParav Pandit } 1682decbc7a6SParav Pandit 16832e5b8a01SParav Pandit int ib_device_set_netns_put(struct sk_buff *skb, 16842e5b8a01SParav Pandit struct ib_device *dev, u32 ns_fd) 16852e5b8a01SParav Pandit { 16862e5b8a01SParav Pandit struct net *net; 16872e5b8a01SParav Pandit int ret; 16882e5b8a01SParav Pandit 16892e5b8a01SParav Pandit net = get_net_ns_by_fd(ns_fd); 16902e5b8a01SParav Pandit if (IS_ERR(net)) { 16912e5b8a01SParav Pandit ret = PTR_ERR(net); 16922e5b8a01SParav Pandit goto net_err; 16932e5b8a01SParav Pandit } 16942e5b8a01SParav Pandit 16952e5b8a01SParav Pandit if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 16962e5b8a01SParav Pandit ret = -EPERM; 16972e5b8a01SParav Pandit goto ns_err; 16982e5b8a01SParav Pandit } 16992e5b8a01SParav Pandit 17002e5b8a01SParav Pandit /* 17012e5b8a01SParav Pandit * Currently supported only for those providers which support 17022e5b8a01SParav Pandit * disassociation and don't do port specific sysfs init. Once a 17032e5b8a01SParav Pandit * port_cleanup infrastructure is implemented, this limitation will be 17042e5b8a01SParav Pandit * removed. 17052e5b8a01SParav Pandit */ 1706*d7407d16SJason Gunthorpe if (!dev->ops.disassociate_ucontext || dev->ops.port_groups || 17072e5b8a01SParav Pandit ib_devices_shared_netns) { 17082e5b8a01SParav Pandit ret = -EOPNOTSUPP; 17092e5b8a01SParav Pandit goto ns_err; 17102e5b8a01SParav Pandit } 17112e5b8a01SParav Pandit 17122e5b8a01SParav Pandit get_device(&dev->dev); 17132e5b8a01SParav Pandit ib_device_put(dev); 17142e5b8a01SParav Pandit ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); 17152e5b8a01SParav Pandit put_device(&dev->dev); 17162e5b8a01SParav Pandit 17172e5b8a01SParav Pandit put_net(net); 17182e5b8a01SParav Pandit return ret; 17192e5b8a01SParav Pandit 17202e5b8a01SParav Pandit ns_err: 17212e5b8a01SParav Pandit put_net(net); 17222e5b8a01SParav Pandit net_err: 17232e5b8a01SParav Pandit ib_device_put(dev); 17242e5b8a01SParav Pandit return ret; 17252e5b8a01SParav Pandit } 17262e5b8a01SParav Pandit 17274e0f7b90SParav Pandit static struct pernet_operations rdma_dev_net_ops = { 17284e0f7b90SParav Pandit .init = rdma_dev_init_net, 17294e0f7b90SParav Pandit .exit = rdma_dev_exit_net, 17304e0f7b90SParav Pandit .id = &rdma_dev_net_id, 17314e0f7b90SParav Pandit .size = sizeof(struct rdma_dev_net), 17324e0f7b90SParav Pandit }; 17334e0f7b90SParav Pandit 1734e59178d8SJason Gunthorpe static int assign_client_id(struct ib_client *client) 1735e59178d8SJason Gunthorpe { 1736e59178d8SJason Gunthorpe int ret; 1737e59178d8SJason Gunthorpe 1738921eab11SJason Gunthorpe down_write(&clients_rwsem); 1739e59178d8SJason Gunthorpe /* 1740e59178d8SJason Gunthorpe * The add/remove callbacks must be called in FIFO/LIFO order. To 1741e59178d8SJason Gunthorpe * achieve this we assign client_ids so they are sorted in 17429cd58817SJason Gunthorpe * registration order. 1743e59178d8SJason Gunthorpe */ 17449cd58817SJason Gunthorpe client->client_id = highest_client_id; 1745ea295481SLinus Torvalds ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); 1746e59178d8SJason Gunthorpe if (ret) 1747e59178d8SJason Gunthorpe goto out; 1748e59178d8SJason Gunthorpe 17499cd58817SJason Gunthorpe highest_client_id++; 1750921eab11SJason Gunthorpe xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); 1751921eab11SJason Gunthorpe 1752e59178d8SJason Gunthorpe out: 1753921eab11SJason Gunthorpe up_write(&clients_rwsem); 1754e59178d8SJason Gunthorpe return ret; 1755e59178d8SJason Gunthorpe } 1756e59178d8SJason Gunthorpe 17579cd58817SJason Gunthorpe static void remove_client_id(struct ib_client *client) 17589cd58817SJason Gunthorpe { 17599cd58817SJason Gunthorpe down_write(&clients_rwsem); 17609cd58817SJason Gunthorpe xa_erase(&clients, client->client_id); 17619cd58817SJason Gunthorpe for (; highest_client_id; highest_client_id--) 17629cd58817SJason Gunthorpe if (xa_load(&clients, highest_client_id - 1)) 17639cd58817SJason Gunthorpe break; 17649cd58817SJason Gunthorpe up_write(&clients_rwsem); 17659cd58817SJason Gunthorpe } 17669cd58817SJason Gunthorpe 17671da177e4SLinus Torvalds /** 17681da177e4SLinus Torvalds * ib_register_client - Register an IB client 17691da177e4SLinus Torvalds * @client:Client to register 17701da177e4SLinus Torvalds * 17711da177e4SLinus Torvalds * Upper level users of the IB drivers can use ib_register_client() to 17721da177e4SLinus Torvalds * register callbacks for IB device addition and removal. When an IB 17731da177e4SLinus Torvalds * device is added, each registered client's add method will be called 17741da177e4SLinus Torvalds * (in the order the clients were registered), and when a device is 17751da177e4SLinus Torvalds * removed, each client's remove method will be called (in the reverse 17761da177e4SLinus Torvalds * order that clients were registered). In addition, when 17771da177e4SLinus Torvalds * ib_register_client() is called, the client will receive an add 17781da177e4SLinus Torvalds * callback for all devices already registered. 17791da177e4SLinus Torvalds */ 17801da177e4SLinus Torvalds int ib_register_client(struct ib_client *client) 17811da177e4SLinus Torvalds { 17821da177e4SLinus Torvalds struct ib_device *device; 17830df91bb6SJason Gunthorpe unsigned long index; 1784e59178d8SJason Gunthorpe int ret; 17851da177e4SLinus Torvalds 1786621e55ffSJason Gunthorpe refcount_set(&client->uses, 1); 1787621e55ffSJason Gunthorpe init_completion(&client->uses_zero); 1788e59178d8SJason Gunthorpe ret = assign_client_id(client); 1789921eab11SJason Gunthorpe if (ret) 1790921eab11SJason Gunthorpe return ret; 1791921eab11SJason Gunthorpe 1792921eab11SJason Gunthorpe down_read(&devices_rwsem); 1793921eab11SJason Gunthorpe xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { 1794921eab11SJason Gunthorpe ret = add_client_context(device, client); 1795e59178d8SJason Gunthorpe if (ret) { 1796921eab11SJason Gunthorpe up_read(&devices_rwsem); 1797921eab11SJason Gunthorpe ib_unregister_client(client); 1798e59178d8SJason Gunthorpe return ret; 1799e59178d8SJason Gunthorpe } 1800921eab11SJason Gunthorpe } 1801921eab11SJason Gunthorpe up_read(&devices_rwsem); 18021da177e4SLinus Torvalds return 0; 18031da177e4SLinus Torvalds } 18041da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_client); 18051da177e4SLinus Torvalds 18061da177e4SLinus Torvalds /** 18071da177e4SLinus Torvalds * ib_unregister_client - Unregister an IB client 18081da177e4SLinus Torvalds * @client:Client to unregister 18091da177e4SLinus Torvalds * 18101da177e4SLinus Torvalds * Upper level users use ib_unregister_client() to remove their client 18111da177e4SLinus Torvalds * registration. When ib_unregister_client() is called, the client 18121da177e4SLinus Torvalds * will receive a remove callback for each IB device still registered. 1813921eab11SJason Gunthorpe * 1814921eab11SJason Gunthorpe * This is a full fence, once it returns no client callbacks will be called, 1815921eab11SJason Gunthorpe * or are running in another thread. 18161da177e4SLinus Torvalds */ 18171da177e4SLinus Torvalds void ib_unregister_client(struct ib_client *client) 18181da177e4SLinus Torvalds { 18191da177e4SLinus Torvalds struct ib_device *device; 18200df91bb6SJason Gunthorpe unsigned long index; 18211da177e4SLinus Torvalds 1822921eab11SJason Gunthorpe down_write(&clients_rwsem); 1823621e55ffSJason Gunthorpe ib_client_put(client); 1824e59178d8SJason Gunthorpe xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); 1825921eab11SJason Gunthorpe up_write(&clients_rwsem); 18265aa44bb9SHaggai Eran 1827621e55ffSJason Gunthorpe /* We do not want to have locks while calling client->remove() */ 1828621e55ffSJason Gunthorpe rcu_read_lock(); 1829621e55ffSJason Gunthorpe xa_for_each (&devices, index, device) { 1830621e55ffSJason Gunthorpe if (!ib_device_try_get(device)) 1831621e55ffSJason Gunthorpe continue; 1832621e55ffSJason Gunthorpe rcu_read_unlock(); 1833621e55ffSJason Gunthorpe 18341da177e4SLinus Torvalds remove_client_context(device, client->client_id); 1835621e55ffSJason Gunthorpe 1836621e55ffSJason Gunthorpe ib_device_put(device); 1837621e55ffSJason Gunthorpe rcu_read_lock(); 1838621e55ffSJason Gunthorpe } 1839621e55ffSJason Gunthorpe rcu_read_unlock(); 1840621e55ffSJason Gunthorpe 1841621e55ffSJason Gunthorpe /* 1842621e55ffSJason Gunthorpe * remove_client_context() is not a fence, it can return even though a 1843621e55ffSJason Gunthorpe * removal is ongoing. Wait until all removals are completed. 1844621e55ffSJason Gunthorpe */ 1845621e55ffSJason Gunthorpe wait_for_completion(&client->uses_zero); 18469cd58817SJason Gunthorpe remove_client_id(client); 18471da177e4SLinus Torvalds } 18481da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_client); 18491da177e4SLinus Torvalds 18500e2d00ebSJason Gunthorpe static int __ib_get_global_client_nl_info(const char *client_name, 18510e2d00ebSJason Gunthorpe struct ib_client_nl_info *res) 18520e2d00ebSJason Gunthorpe { 18530e2d00ebSJason Gunthorpe struct ib_client *client; 18540e2d00ebSJason Gunthorpe unsigned long index; 18550e2d00ebSJason Gunthorpe int ret = -ENOENT; 18560e2d00ebSJason Gunthorpe 18570e2d00ebSJason Gunthorpe down_read(&clients_rwsem); 18580e2d00ebSJason Gunthorpe xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { 18590e2d00ebSJason Gunthorpe if (strcmp(client->name, client_name) != 0) 18600e2d00ebSJason Gunthorpe continue; 18610e2d00ebSJason Gunthorpe if (!client->get_global_nl_info) { 18620e2d00ebSJason Gunthorpe ret = -EOPNOTSUPP; 18630e2d00ebSJason Gunthorpe break; 18640e2d00ebSJason Gunthorpe } 18650e2d00ebSJason Gunthorpe ret = client->get_global_nl_info(res); 18660e2d00ebSJason Gunthorpe if (WARN_ON(ret == -ENOENT)) 18670e2d00ebSJason Gunthorpe ret = -EINVAL; 18680e2d00ebSJason Gunthorpe if (!ret && res->cdev) 18690e2d00ebSJason Gunthorpe get_device(res->cdev); 18700e2d00ebSJason Gunthorpe break; 18710e2d00ebSJason Gunthorpe } 18720e2d00ebSJason Gunthorpe up_read(&clients_rwsem); 18730e2d00ebSJason Gunthorpe return ret; 18740e2d00ebSJason Gunthorpe } 18750e2d00ebSJason Gunthorpe 18760e2d00ebSJason Gunthorpe static int __ib_get_client_nl_info(struct ib_device *ibdev, 18770e2d00ebSJason Gunthorpe const char *client_name, 18780e2d00ebSJason Gunthorpe struct ib_client_nl_info *res) 18790e2d00ebSJason Gunthorpe { 18800e2d00ebSJason Gunthorpe unsigned long index; 18810e2d00ebSJason Gunthorpe void *client_data; 18820e2d00ebSJason Gunthorpe int ret = -ENOENT; 18830e2d00ebSJason Gunthorpe 18840e2d00ebSJason Gunthorpe down_read(&ibdev->client_data_rwsem); 18850e2d00ebSJason Gunthorpe xan_for_each_marked (&ibdev->client_data, index, client_data, 18860e2d00ebSJason Gunthorpe CLIENT_DATA_REGISTERED) { 18870e2d00ebSJason Gunthorpe struct ib_client *client = xa_load(&clients, index); 18880e2d00ebSJason Gunthorpe 18890e2d00ebSJason Gunthorpe if (!client || strcmp(client->name, client_name) != 0) 18900e2d00ebSJason Gunthorpe continue; 18910e2d00ebSJason Gunthorpe if (!client->get_nl_info) { 18920e2d00ebSJason Gunthorpe ret = -EOPNOTSUPP; 18930e2d00ebSJason Gunthorpe break; 18940e2d00ebSJason Gunthorpe } 18950e2d00ebSJason Gunthorpe ret = client->get_nl_info(ibdev, client_data, res); 18960e2d00ebSJason Gunthorpe if (WARN_ON(ret == -ENOENT)) 18970e2d00ebSJason Gunthorpe ret = -EINVAL; 18980e2d00ebSJason Gunthorpe 18990e2d00ebSJason Gunthorpe /* 19000e2d00ebSJason Gunthorpe * The cdev is guaranteed valid as long as we are inside the 19010e2d00ebSJason Gunthorpe * client_data_rwsem as remove_one can't be called. Keep it 19020e2d00ebSJason Gunthorpe * valid for the caller. 19030e2d00ebSJason Gunthorpe */ 19040e2d00ebSJason Gunthorpe if (!ret && res->cdev) 19050e2d00ebSJason Gunthorpe get_device(res->cdev); 19060e2d00ebSJason Gunthorpe break; 19070e2d00ebSJason Gunthorpe } 19080e2d00ebSJason Gunthorpe up_read(&ibdev->client_data_rwsem); 19090e2d00ebSJason Gunthorpe 19100e2d00ebSJason Gunthorpe return ret; 19110e2d00ebSJason Gunthorpe } 19120e2d00ebSJason Gunthorpe 19130e2d00ebSJason Gunthorpe /** 19140e2d00ebSJason Gunthorpe * ib_get_client_nl_info - Fetch the nl_info from a client 19154c3b53e1SLee Jones * @ibdev: IB device 19164c3b53e1SLee Jones * @client_name: Name of the client 19174c3b53e1SLee Jones * @res: Result of the query 19180e2d00ebSJason Gunthorpe */ 19190e2d00ebSJason Gunthorpe int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, 19200e2d00ebSJason Gunthorpe struct ib_client_nl_info *res) 19210e2d00ebSJason Gunthorpe { 19220e2d00ebSJason Gunthorpe int ret; 19230e2d00ebSJason Gunthorpe 19240e2d00ebSJason Gunthorpe if (ibdev) 19250e2d00ebSJason Gunthorpe ret = __ib_get_client_nl_info(ibdev, client_name, res); 19260e2d00ebSJason Gunthorpe else 19270e2d00ebSJason Gunthorpe ret = __ib_get_global_client_nl_info(client_name, res); 19280e2d00ebSJason Gunthorpe #ifdef CONFIG_MODULES 19290e2d00ebSJason Gunthorpe if (ret == -ENOENT) { 19300e2d00ebSJason Gunthorpe request_module("rdma-client-%s", client_name); 19310e2d00ebSJason Gunthorpe if (ibdev) 19320e2d00ebSJason Gunthorpe ret = __ib_get_client_nl_info(ibdev, client_name, res); 19330e2d00ebSJason Gunthorpe else 19340e2d00ebSJason Gunthorpe ret = __ib_get_global_client_nl_info(client_name, res); 19350e2d00ebSJason Gunthorpe } 19360e2d00ebSJason Gunthorpe #endif 19370e2d00ebSJason Gunthorpe if (ret) { 19380e2d00ebSJason Gunthorpe if (ret == -ENOENT) 19390e2d00ebSJason Gunthorpe return -EOPNOTSUPP; 19400e2d00ebSJason Gunthorpe return ret; 19410e2d00ebSJason Gunthorpe } 19420e2d00ebSJason Gunthorpe 19430e2d00ebSJason Gunthorpe if (WARN_ON(!res->cdev)) 19440e2d00ebSJason Gunthorpe return -EINVAL; 19450e2d00ebSJason Gunthorpe return 0; 19460e2d00ebSJason Gunthorpe } 19470e2d00ebSJason Gunthorpe 19481da177e4SLinus Torvalds /** 19499cd330d3SKrishna Kumar * ib_set_client_data - Set IB client context 19501da177e4SLinus Torvalds * @device:Device to set context for 19511da177e4SLinus Torvalds * @client:Client to set context for 19521da177e4SLinus Torvalds * @data:Context to set 19531da177e4SLinus Torvalds * 19540df91bb6SJason Gunthorpe * ib_set_client_data() sets client context data that can be retrieved with 19550df91bb6SJason Gunthorpe * ib_get_client_data(). This can only be called while the client is 19560df91bb6SJason Gunthorpe * registered to the device, once the ib_client remove() callback returns this 19570df91bb6SJason Gunthorpe * cannot be called. 19581da177e4SLinus Torvalds */ 19591da177e4SLinus Torvalds void ib_set_client_data(struct ib_device *device, struct ib_client *client, 19601da177e4SLinus Torvalds void *data) 19611da177e4SLinus Torvalds { 19620df91bb6SJason Gunthorpe void *rc; 19631da177e4SLinus Torvalds 19640df91bb6SJason Gunthorpe if (WARN_ON(IS_ERR(data))) 19650df91bb6SJason Gunthorpe data = NULL; 19661da177e4SLinus Torvalds 19670df91bb6SJason Gunthorpe rc = xa_store(&device->client_data, client->client_id, data, 19680df91bb6SJason Gunthorpe GFP_KERNEL); 19690df91bb6SJason Gunthorpe WARN_ON(xa_is_err(rc)); 19701da177e4SLinus Torvalds } 19711da177e4SLinus Torvalds EXPORT_SYMBOL(ib_set_client_data); 19721da177e4SLinus Torvalds 19731da177e4SLinus Torvalds /** 19741da177e4SLinus Torvalds * ib_register_event_handler - Register an IB event handler 19751da177e4SLinus Torvalds * @event_handler:Handler to register 19761da177e4SLinus Torvalds * 19771da177e4SLinus Torvalds * ib_register_event_handler() registers an event handler that will be 19781da177e4SLinus Torvalds * called back when asynchronous IB events occur (as defined in 19791da177e4SLinus Torvalds * chapter 11 of the InfiniBand Architecture Specification). This 19806b57cea9SParav Pandit * callback occurs in workqueue context. 19811da177e4SLinus Torvalds */ 1982dcc9881eSLeon Romanovsky void ib_register_event_handler(struct ib_event_handler *event_handler) 19831da177e4SLinus Torvalds { 19846b57cea9SParav Pandit down_write(&event_handler->device->event_handler_rwsem); 19851da177e4SLinus Torvalds list_add_tail(&event_handler->list, 19861da177e4SLinus Torvalds &event_handler->device->event_handler_list); 19876b57cea9SParav Pandit up_write(&event_handler->device->event_handler_rwsem); 19881da177e4SLinus Torvalds } 19891da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_event_handler); 19901da177e4SLinus Torvalds 19911da177e4SLinus Torvalds /** 19921da177e4SLinus Torvalds * ib_unregister_event_handler - Unregister an event handler 19931da177e4SLinus Torvalds * @event_handler:Handler to unregister 19941da177e4SLinus Torvalds * 19951da177e4SLinus Torvalds * Unregister an event handler registered with 19961da177e4SLinus Torvalds * ib_register_event_handler(). 19971da177e4SLinus Torvalds */ 1998dcc9881eSLeon Romanovsky void ib_unregister_event_handler(struct ib_event_handler *event_handler) 19991da177e4SLinus Torvalds { 20006b57cea9SParav Pandit down_write(&event_handler->device->event_handler_rwsem); 20011da177e4SLinus Torvalds list_del(&event_handler->list); 20026b57cea9SParav Pandit up_write(&event_handler->device->event_handler_rwsem); 20031da177e4SLinus Torvalds } 20041da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_event_handler); 20051da177e4SLinus Torvalds 20066b57cea9SParav Pandit void ib_dispatch_event_clients(struct ib_event *event) 20071da177e4SLinus Torvalds { 20081da177e4SLinus Torvalds struct ib_event_handler *handler; 20091da177e4SLinus Torvalds 20106b57cea9SParav Pandit down_read(&event->device->event_handler_rwsem); 20111da177e4SLinus Torvalds 20121da177e4SLinus Torvalds list_for_each_entry(handler, &event->device->event_handler_list, list) 20131da177e4SLinus Torvalds handler->handler(handler, event); 20141da177e4SLinus Torvalds 20156b57cea9SParav Pandit up_read(&event->device->event_handler_rwsem); 20161da177e4SLinus Torvalds } 20171da177e4SLinus Torvalds 20184929116bSKamal Heib static int iw_query_port(struct ib_device *device, 20191fb7f897SMark Bloch u32 port_num, 20204929116bSKamal Heib struct ib_port_attr *port_attr) 20214929116bSKamal Heib { 20224929116bSKamal Heib struct in_device *inetdev; 20234929116bSKamal Heib struct net_device *netdev; 20244929116bSKamal Heib 20254929116bSKamal Heib memset(port_attr, 0, sizeof(*port_attr)); 20264929116bSKamal Heib 20274929116bSKamal Heib netdev = ib_device_get_netdev(device, port_num); 20284929116bSKamal Heib if (!netdev) 20294929116bSKamal Heib return -ENODEV; 20304929116bSKamal Heib 20314929116bSKamal Heib port_attr->max_mtu = IB_MTU_4096; 20324929116bSKamal Heib port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); 20334929116bSKamal Heib 20344929116bSKamal Heib if (!netif_carrier_ok(netdev)) { 20354929116bSKamal Heib port_attr->state = IB_PORT_DOWN; 20364929116bSKamal Heib port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; 20374929116bSKamal Heib } else { 2038390d3fdcSMichal Kalderon rcu_read_lock(); 2039390d3fdcSMichal Kalderon inetdev = __in_dev_get_rcu(netdev); 20404929116bSKamal Heib 20414929116bSKamal Heib if (inetdev && inetdev->ifa_list) { 20424929116bSKamal Heib port_attr->state = IB_PORT_ACTIVE; 20434929116bSKamal Heib port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 20444929116bSKamal Heib } else { 20454929116bSKamal Heib port_attr->state = IB_PORT_INIT; 20464929116bSKamal Heib port_attr->phys_state = 20474929116bSKamal Heib IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; 20484929116bSKamal Heib } 2049390d3fdcSMichal Kalderon 2050390d3fdcSMichal Kalderon rcu_read_unlock(); 20514929116bSKamal Heib } 20524929116bSKamal Heib 2053390d3fdcSMichal Kalderon dev_put(netdev); 20541e123d96SGuoqing Jiang return device->ops.query_port(device, port_num, port_attr); 20554929116bSKamal Heib } 20564929116bSKamal Heib 20574929116bSKamal Heib static int __ib_query_port(struct ib_device *device, 20581fb7f897SMark Bloch u32 port_num, 20594929116bSKamal Heib struct ib_port_attr *port_attr) 20604929116bSKamal Heib { 20614929116bSKamal Heib union ib_gid gid = {}; 20624929116bSKamal Heib int err; 20634929116bSKamal Heib 20644929116bSKamal Heib memset(port_attr, 0, sizeof(*port_attr)); 20654929116bSKamal Heib 20664929116bSKamal Heib err = device->ops.query_port(device, port_num, port_attr); 20674929116bSKamal Heib if (err || port_attr->subnet_prefix) 20684929116bSKamal Heib return err; 20694929116bSKamal Heib 20704929116bSKamal Heib if (rdma_port_get_link_layer(device, port_num) != 20714929116bSKamal Heib IB_LINK_LAYER_INFINIBAND) 20724929116bSKamal Heib return 0; 20734929116bSKamal Heib 20744929116bSKamal Heib err = device->ops.query_gid(device, port_num, 0, &gid); 20754929116bSKamal Heib if (err) 20764929116bSKamal Heib return err; 20774929116bSKamal Heib 20784929116bSKamal Heib port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); 20794929116bSKamal Heib return 0; 20804929116bSKamal Heib } 20814929116bSKamal Heib 20821da177e4SLinus Torvalds /** 20831da177e4SLinus Torvalds * ib_query_port - Query IB port attributes 20841da177e4SLinus Torvalds * @device:Device to query 20851da177e4SLinus Torvalds * @port_num:Port number to query 20861da177e4SLinus Torvalds * @port_attr:Port attributes 20871da177e4SLinus Torvalds * 20881da177e4SLinus Torvalds * ib_query_port() returns the attributes of a port through the 20891da177e4SLinus Torvalds * @port_attr pointer. 20901da177e4SLinus Torvalds */ 20911da177e4SLinus Torvalds int ib_query_port(struct ib_device *device, 20921fb7f897SMark Bloch u32 port_num, 20931da177e4SLinus Torvalds struct ib_port_attr *port_attr) 20941da177e4SLinus Torvalds { 209524dc831bSYuval Shaia if (!rdma_is_port_valid(device, port_num)) 2096116c0074SRoland Dreier return -EINVAL; 2097116c0074SRoland Dreier 20984929116bSKamal Heib if (rdma_protocol_iwarp(device, port_num)) 20994929116bSKamal Heib return iw_query_port(device, port_num, port_attr); 21004929116bSKamal Heib else 21014929116bSKamal Heib return __ib_query_port(device, port_num, port_attr); 21021da177e4SLinus Torvalds } 21031da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_port); 21041da177e4SLinus Torvalds 2105324e227eSJason Gunthorpe static void add_ndev_hash(struct ib_port_data *pdata) 2106324e227eSJason Gunthorpe { 2107324e227eSJason Gunthorpe unsigned long flags; 2108324e227eSJason Gunthorpe 2109324e227eSJason Gunthorpe might_sleep(); 2110324e227eSJason Gunthorpe 2111324e227eSJason Gunthorpe spin_lock_irqsave(&ndev_hash_lock, flags); 2112324e227eSJason Gunthorpe if (hash_hashed(&pdata->ndev_hash_link)) { 2113324e227eSJason Gunthorpe hash_del_rcu(&pdata->ndev_hash_link); 2114324e227eSJason Gunthorpe spin_unlock_irqrestore(&ndev_hash_lock, flags); 2115324e227eSJason Gunthorpe /* 2116324e227eSJason Gunthorpe * We cannot do hash_add_rcu after a hash_del_rcu until the 2117324e227eSJason Gunthorpe * grace period 2118324e227eSJason Gunthorpe */ 2119324e227eSJason Gunthorpe synchronize_rcu(); 2120324e227eSJason Gunthorpe spin_lock_irqsave(&ndev_hash_lock, flags); 2121324e227eSJason Gunthorpe } 2122324e227eSJason Gunthorpe if (pdata->netdev) 2123324e227eSJason Gunthorpe hash_add_rcu(ndev_hash, &pdata->ndev_hash_link, 2124324e227eSJason Gunthorpe (uintptr_t)pdata->netdev); 2125324e227eSJason Gunthorpe spin_unlock_irqrestore(&ndev_hash_lock, flags); 2126324e227eSJason Gunthorpe } 2127324e227eSJason Gunthorpe 21281da177e4SLinus Torvalds /** 2129c2261dd7SJason Gunthorpe * ib_device_set_netdev - Associate the ib_dev with an underlying net_device 2130c2261dd7SJason Gunthorpe * @ib_dev: Device to modify 2131c2261dd7SJason Gunthorpe * @ndev: net_device to affiliate, may be NULL 2132c2261dd7SJason Gunthorpe * @port: IB port the net_device is connected to 2133c2261dd7SJason Gunthorpe * 2134c2261dd7SJason Gunthorpe * Drivers should use this to link the ib_device to a netdev so the netdev 2135c2261dd7SJason Gunthorpe * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be 2136c2261dd7SJason Gunthorpe * affiliated with any port. 2137c2261dd7SJason Gunthorpe * 2138c2261dd7SJason Gunthorpe * The caller must ensure that the given ndev is not unregistered or 2139c2261dd7SJason Gunthorpe * unregistering, and that either the ib_device is unregistered or 2140c2261dd7SJason Gunthorpe * ib_device_set_netdev() is called with NULL when the ndev sends a 2141c2261dd7SJason Gunthorpe * NETDEV_UNREGISTER event. 2142c2261dd7SJason Gunthorpe */ 2143c2261dd7SJason Gunthorpe int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, 21441fb7f897SMark Bloch u32 port) 2145c2261dd7SJason Gunthorpe { 2146c2261dd7SJason Gunthorpe struct net_device *old_ndev; 2147c2261dd7SJason Gunthorpe struct ib_port_data *pdata; 2148c2261dd7SJason Gunthorpe unsigned long flags; 2149c2261dd7SJason Gunthorpe int ret; 2150c2261dd7SJason Gunthorpe 2151c2261dd7SJason Gunthorpe /* 2152c2261dd7SJason Gunthorpe * Drivers wish to call this before ib_register_driver, so we have to 2153c2261dd7SJason Gunthorpe * setup the port data early. 2154c2261dd7SJason Gunthorpe */ 2155c2261dd7SJason Gunthorpe ret = alloc_port_data(ib_dev); 2156c2261dd7SJason Gunthorpe if (ret) 2157c2261dd7SJason Gunthorpe return ret; 2158c2261dd7SJason Gunthorpe 2159c2261dd7SJason Gunthorpe if (!rdma_is_port_valid(ib_dev, port)) 2160c2261dd7SJason Gunthorpe return -EINVAL; 2161c2261dd7SJason Gunthorpe 2162c2261dd7SJason Gunthorpe pdata = &ib_dev->port_data[port]; 2163c2261dd7SJason Gunthorpe spin_lock_irqsave(&pdata->netdev_lock, flags); 2164324e227eSJason Gunthorpe old_ndev = rcu_dereference_protected( 2165324e227eSJason Gunthorpe pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 2166324e227eSJason Gunthorpe if (old_ndev == ndev) { 2167c2261dd7SJason Gunthorpe spin_unlock_irqrestore(&pdata->netdev_lock, flags); 2168c2261dd7SJason Gunthorpe return 0; 2169c2261dd7SJason Gunthorpe } 2170c2261dd7SJason Gunthorpe 2171c2261dd7SJason Gunthorpe if (ndev) 2172c2261dd7SJason Gunthorpe dev_hold(ndev); 2173324e227eSJason Gunthorpe rcu_assign_pointer(pdata->netdev, ndev); 2174c2261dd7SJason Gunthorpe spin_unlock_irqrestore(&pdata->netdev_lock, flags); 2175c2261dd7SJason Gunthorpe 2176324e227eSJason Gunthorpe add_ndev_hash(pdata); 2177c2261dd7SJason Gunthorpe if (old_ndev) 2178c2261dd7SJason Gunthorpe dev_put(old_ndev); 2179c2261dd7SJason Gunthorpe 2180c2261dd7SJason Gunthorpe return 0; 2181c2261dd7SJason Gunthorpe } 2182c2261dd7SJason Gunthorpe EXPORT_SYMBOL(ib_device_set_netdev); 2183c2261dd7SJason Gunthorpe 2184c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev) 2185c2261dd7SJason Gunthorpe { 2186c2261dd7SJason Gunthorpe unsigned long flags; 21871fb7f897SMark Bloch u32 port; 2188c2261dd7SJason Gunthorpe 218946bdf370SKamal Heib if (!ib_dev->port_data) 219046bdf370SKamal Heib return; 219146bdf370SKamal Heib 2192c2261dd7SJason Gunthorpe rdma_for_each_port (ib_dev, port) { 2193c2261dd7SJason Gunthorpe struct ib_port_data *pdata = &ib_dev->port_data[port]; 2194324e227eSJason Gunthorpe struct net_device *ndev; 2195c2261dd7SJason Gunthorpe 2196c2261dd7SJason Gunthorpe spin_lock_irqsave(&pdata->netdev_lock, flags); 2197324e227eSJason Gunthorpe ndev = rcu_dereference_protected( 2198324e227eSJason Gunthorpe pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 2199324e227eSJason Gunthorpe if (ndev) { 2200324e227eSJason Gunthorpe spin_lock(&ndev_hash_lock); 2201324e227eSJason Gunthorpe hash_del_rcu(&pdata->ndev_hash_link); 2202324e227eSJason Gunthorpe spin_unlock(&ndev_hash_lock); 2203324e227eSJason Gunthorpe 2204324e227eSJason Gunthorpe /* 2205324e227eSJason Gunthorpe * If this is the last dev_put there is still a 2206324e227eSJason Gunthorpe * synchronize_rcu before the netdev is kfreed, so we 2207324e227eSJason Gunthorpe * can continue to rely on unlocked pointer 2208324e227eSJason Gunthorpe * comparisons after the put 2209324e227eSJason Gunthorpe */ 2210324e227eSJason Gunthorpe rcu_assign_pointer(pdata->netdev, NULL); 2211324e227eSJason Gunthorpe dev_put(ndev); 2212c2261dd7SJason Gunthorpe } 2213c2261dd7SJason Gunthorpe spin_unlock_irqrestore(&pdata->netdev_lock, flags); 2214c2261dd7SJason Gunthorpe } 2215c2261dd7SJason Gunthorpe } 2216c2261dd7SJason Gunthorpe 2217c2261dd7SJason Gunthorpe struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, 22181fb7f897SMark Bloch u32 port) 2219c2261dd7SJason Gunthorpe { 2220c2261dd7SJason Gunthorpe struct ib_port_data *pdata; 2221c2261dd7SJason Gunthorpe struct net_device *res; 2222c2261dd7SJason Gunthorpe 2223c2261dd7SJason Gunthorpe if (!rdma_is_port_valid(ib_dev, port)) 2224c2261dd7SJason Gunthorpe return NULL; 2225c2261dd7SJason Gunthorpe 2226c2261dd7SJason Gunthorpe pdata = &ib_dev->port_data[port]; 2227c2261dd7SJason Gunthorpe 2228c2261dd7SJason Gunthorpe /* 2229c2261dd7SJason Gunthorpe * New drivers should use ib_device_set_netdev() not the legacy 2230c2261dd7SJason Gunthorpe * get_netdev(). 2231c2261dd7SJason Gunthorpe */ 2232c2261dd7SJason Gunthorpe if (ib_dev->ops.get_netdev) 2233c2261dd7SJason Gunthorpe res = ib_dev->ops.get_netdev(ib_dev, port); 2234c2261dd7SJason Gunthorpe else { 2235c2261dd7SJason Gunthorpe spin_lock(&pdata->netdev_lock); 2236324e227eSJason Gunthorpe res = rcu_dereference_protected( 2237324e227eSJason Gunthorpe pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 2238c2261dd7SJason Gunthorpe if (res) 2239c2261dd7SJason Gunthorpe dev_hold(res); 2240c2261dd7SJason Gunthorpe spin_unlock(&pdata->netdev_lock); 2241c2261dd7SJason Gunthorpe } 2242c2261dd7SJason Gunthorpe 2243c2261dd7SJason Gunthorpe /* 2244c2261dd7SJason Gunthorpe * If we are starting to unregister expedite things by preventing 2245c2261dd7SJason Gunthorpe * propagation of an unregistering netdev. 2246c2261dd7SJason Gunthorpe */ 2247c2261dd7SJason Gunthorpe if (res && res->reg_state != NETREG_REGISTERED) { 2248c2261dd7SJason Gunthorpe dev_put(res); 2249c2261dd7SJason Gunthorpe return NULL; 2250c2261dd7SJason Gunthorpe } 2251c2261dd7SJason Gunthorpe 2252c2261dd7SJason Gunthorpe return res; 2253c2261dd7SJason Gunthorpe } 2254c2261dd7SJason Gunthorpe 2255c2261dd7SJason Gunthorpe /** 2256324e227eSJason Gunthorpe * ib_device_get_by_netdev - Find an IB device associated with a netdev 2257324e227eSJason Gunthorpe * @ndev: netdev to locate 2258324e227eSJason Gunthorpe * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) 2259324e227eSJason Gunthorpe * 2260324e227eSJason Gunthorpe * Find and hold an ib_device that is associated with a netdev via 2261324e227eSJason Gunthorpe * ib_device_set_netdev(). The caller must call ib_device_put() on the 2262324e227eSJason Gunthorpe * returned pointer. 2263324e227eSJason Gunthorpe */ 2264324e227eSJason Gunthorpe struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, 2265324e227eSJason Gunthorpe enum rdma_driver_id driver_id) 2266324e227eSJason Gunthorpe { 2267324e227eSJason Gunthorpe struct ib_device *res = NULL; 2268324e227eSJason Gunthorpe struct ib_port_data *cur; 2269324e227eSJason Gunthorpe 2270324e227eSJason Gunthorpe rcu_read_lock(); 2271324e227eSJason Gunthorpe hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link, 2272324e227eSJason Gunthorpe (uintptr_t)ndev) { 2273324e227eSJason Gunthorpe if (rcu_access_pointer(cur->netdev) == ndev && 2274324e227eSJason Gunthorpe (driver_id == RDMA_DRIVER_UNKNOWN || 2275b9560a41SJason Gunthorpe cur->ib_dev->ops.driver_id == driver_id) && 2276324e227eSJason Gunthorpe ib_device_try_get(cur->ib_dev)) { 2277324e227eSJason Gunthorpe res = cur->ib_dev; 2278324e227eSJason Gunthorpe break; 2279324e227eSJason Gunthorpe } 2280324e227eSJason Gunthorpe } 2281324e227eSJason Gunthorpe rcu_read_unlock(); 2282324e227eSJason Gunthorpe 2283324e227eSJason Gunthorpe return res; 2284324e227eSJason Gunthorpe } 2285324e227eSJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_netdev); 2286324e227eSJason Gunthorpe 2287324e227eSJason Gunthorpe /** 228803db3a2dSMatan Barak * ib_enum_roce_netdev - enumerate all RoCE ports 228903db3a2dSMatan Barak * @ib_dev : IB device we want to query 229003db3a2dSMatan Barak * @filter: Should we call the callback? 229103db3a2dSMatan Barak * @filter_cookie: Cookie passed to filter 229203db3a2dSMatan Barak * @cb: Callback to call for each found RoCE ports 229303db3a2dSMatan Barak * @cookie: Cookie passed back to the callback 229403db3a2dSMatan Barak * 229503db3a2dSMatan Barak * Enumerates all of the physical RoCE ports of ib_dev 229603db3a2dSMatan Barak * which are related to netdevice and calls callback() on each 229703db3a2dSMatan Barak * device for which filter() function returns non zero. 229803db3a2dSMatan Barak */ 229903db3a2dSMatan Barak void ib_enum_roce_netdev(struct ib_device *ib_dev, 230003db3a2dSMatan Barak roce_netdev_filter filter, 230103db3a2dSMatan Barak void *filter_cookie, 230203db3a2dSMatan Barak roce_netdev_callback cb, 230303db3a2dSMatan Barak void *cookie) 230403db3a2dSMatan Barak { 23051fb7f897SMark Bloch u32 port; 230603db3a2dSMatan Barak 2307ea1075edSJason Gunthorpe rdma_for_each_port (ib_dev, port) 230803db3a2dSMatan Barak if (rdma_protocol_roce(ib_dev, port)) { 2309c2261dd7SJason Gunthorpe struct net_device *idev = 2310c2261dd7SJason Gunthorpe ib_device_get_netdev(ib_dev, port); 231103db3a2dSMatan Barak 231203db3a2dSMatan Barak if (filter(ib_dev, port, idev, filter_cookie)) 231303db3a2dSMatan Barak cb(ib_dev, port, idev, cookie); 231403db3a2dSMatan Barak 231503db3a2dSMatan Barak if (idev) 231603db3a2dSMatan Barak dev_put(idev); 231703db3a2dSMatan Barak } 231803db3a2dSMatan Barak } 231903db3a2dSMatan Barak 232003db3a2dSMatan Barak /** 232103db3a2dSMatan Barak * ib_enum_all_roce_netdevs - enumerate all RoCE devices 232203db3a2dSMatan Barak * @filter: Should we call the callback? 232303db3a2dSMatan Barak * @filter_cookie: Cookie passed to filter 232403db3a2dSMatan Barak * @cb: Callback to call for each found RoCE ports 232503db3a2dSMatan Barak * @cookie: Cookie passed back to the callback 232603db3a2dSMatan Barak * 232703db3a2dSMatan Barak * Enumerates all RoCE devices' physical ports which are related 232803db3a2dSMatan Barak * to netdevices and calls callback() on each device for which 232903db3a2dSMatan Barak * filter() function returns non zero. 233003db3a2dSMatan Barak */ 233103db3a2dSMatan Barak void ib_enum_all_roce_netdevs(roce_netdev_filter filter, 233203db3a2dSMatan Barak void *filter_cookie, 233303db3a2dSMatan Barak roce_netdev_callback cb, 233403db3a2dSMatan Barak void *cookie) 233503db3a2dSMatan Barak { 233603db3a2dSMatan Barak struct ib_device *dev; 23370df91bb6SJason Gunthorpe unsigned long index; 233803db3a2dSMatan Barak 2339921eab11SJason Gunthorpe down_read(&devices_rwsem); 23400df91bb6SJason Gunthorpe xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) 234103db3a2dSMatan Barak ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); 2342921eab11SJason Gunthorpe up_read(&devices_rwsem); 234303db3a2dSMatan Barak } 234403db3a2dSMatan Barak 23454c3b53e1SLee Jones /* 23468030c835SLeon Romanovsky * ib_enum_all_devs - enumerate all ib_devices 23478030c835SLeon Romanovsky * @cb: Callback to call for each found ib_device 23488030c835SLeon Romanovsky * 23498030c835SLeon Romanovsky * Enumerates all ib_devices and calls callback() on each device. 23508030c835SLeon Romanovsky */ 23518030c835SLeon Romanovsky int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, 23528030c835SLeon Romanovsky struct netlink_callback *cb) 23538030c835SLeon Romanovsky { 23540df91bb6SJason Gunthorpe unsigned long index; 23558030c835SLeon Romanovsky struct ib_device *dev; 23568030c835SLeon Romanovsky unsigned int idx = 0; 23578030c835SLeon Romanovsky int ret = 0; 23588030c835SLeon Romanovsky 2359921eab11SJason Gunthorpe down_read(&devices_rwsem); 23600df91bb6SJason Gunthorpe xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 236137eeab55SParav Pandit if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) 236237eeab55SParav Pandit continue; 236337eeab55SParav Pandit 23648030c835SLeon Romanovsky ret = nldev_cb(dev, skb, cb, idx); 23658030c835SLeon Romanovsky if (ret) 23668030c835SLeon Romanovsky break; 23678030c835SLeon Romanovsky idx++; 23688030c835SLeon Romanovsky } 2369921eab11SJason Gunthorpe up_read(&devices_rwsem); 23708030c835SLeon Romanovsky return ret; 23718030c835SLeon Romanovsky } 23728030c835SLeon Romanovsky 23738030c835SLeon Romanovsky /** 23741da177e4SLinus Torvalds * ib_query_pkey - Get P_Key table entry 23751da177e4SLinus Torvalds * @device:Device to query 23761da177e4SLinus Torvalds * @port_num:Port number to query 23771da177e4SLinus Torvalds * @index:P_Key table index to query 23781da177e4SLinus Torvalds * @pkey:Returned P_Key 23791da177e4SLinus Torvalds * 23801da177e4SLinus Torvalds * ib_query_pkey() fetches the specified P_Key table entry. 23811da177e4SLinus Torvalds */ 23821da177e4SLinus Torvalds int ib_query_pkey(struct ib_device *device, 23831fb7f897SMark Bloch u32 port_num, u16 index, u16 *pkey) 23841da177e4SLinus Torvalds { 23859af3f5cfSYuval Shaia if (!rdma_is_port_valid(device, port_num)) 23869af3f5cfSYuval Shaia return -EINVAL; 23879af3f5cfSYuval Shaia 2388ab75a6cbSKamal Heib if (!device->ops.query_pkey) 2389ab75a6cbSKamal Heib return -EOPNOTSUPP; 2390ab75a6cbSKamal Heib 23913023a1e9SKamal Heib return device->ops.query_pkey(device, port_num, index, pkey); 23921da177e4SLinus Torvalds } 23931da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_pkey); 23941da177e4SLinus Torvalds 23951da177e4SLinus Torvalds /** 23961da177e4SLinus Torvalds * ib_modify_device - Change IB device attributes 23971da177e4SLinus Torvalds * @device:Device to modify 23981da177e4SLinus Torvalds * @device_modify_mask:Mask of attributes to change 23991da177e4SLinus Torvalds * @device_modify:New attribute values 24001da177e4SLinus Torvalds * 24011da177e4SLinus Torvalds * ib_modify_device() changes a device's attributes as specified by 24021da177e4SLinus Torvalds * the @device_modify_mask and @device_modify structure. 24031da177e4SLinus Torvalds */ 24041da177e4SLinus Torvalds int ib_modify_device(struct ib_device *device, 24051da177e4SLinus Torvalds int device_modify_mask, 24061da177e4SLinus Torvalds struct ib_device_modify *device_modify) 24071da177e4SLinus Torvalds { 24083023a1e9SKamal Heib if (!device->ops.modify_device) 2409d0f3ef36SKamal Heib return -EOPNOTSUPP; 241010e1b54bSBart Van Assche 24113023a1e9SKamal Heib return device->ops.modify_device(device, device_modify_mask, 24121da177e4SLinus Torvalds device_modify); 24131da177e4SLinus Torvalds } 24141da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_device); 24151da177e4SLinus Torvalds 24161da177e4SLinus Torvalds /** 24171da177e4SLinus Torvalds * ib_modify_port - Modifies the attributes for the specified port. 24181da177e4SLinus Torvalds * @device: The device to modify. 24191da177e4SLinus Torvalds * @port_num: The number of the port to modify. 24201da177e4SLinus Torvalds * @port_modify_mask: Mask used to specify which attributes of the port 24211da177e4SLinus Torvalds * to change. 24221da177e4SLinus Torvalds * @port_modify: New attribute values for the port. 24231da177e4SLinus Torvalds * 24241da177e4SLinus Torvalds * ib_modify_port() changes a port's attributes as specified by the 24251da177e4SLinus Torvalds * @port_modify_mask and @port_modify structure. 24261da177e4SLinus Torvalds */ 24271da177e4SLinus Torvalds int ib_modify_port(struct ib_device *device, 24281fb7f897SMark Bloch u32 port_num, int port_modify_mask, 24291da177e4SLinus Torvalds struct ib_port_modify *port_modify) 24301da177e4SLinus Torvalds { 243161e0962dSSelvin Xavier int rc; 243210e1b54bSBart Van Assche 243324dc831bSYuval Shaia if (!rdma_is_port_valid(device, port_num)) 2434116c0074SRoland Dreier return -EINVAL; 2435116c0074SRoland Dreier 24363023a1e9SKamal Heib if (device->ops.modify_port) 24373023a1e9SKamal Heib rc = device->ops.modify_port(device, port_num, 24383023a1e9SKamal Heib port_modify_mask, 24391da177e4SLinus Torvalds port_modify); 244055bfe905SKamal Heib else if (rdma_protocol_roce(device, port_num) && 244155bfe905SKamal Heib ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 || 244255bfe905SKamal Heib (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0)) 244355bfe905SKamal Heib rc = 0; 244461e0962dSSelvin Xavier else 244555bfe905SKamal Heib rc = -EOPNOTSUPP; 244661e0962dSSelvin Xavier return rc; 24471da177e4SLinus Torvalds } 24481da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_port); 24491da177e4SLinus Torvalds 24505eb620c8SYosef Etigin /** 24515eb620c8SYosef Etigin * ib_find_gid - Returns the port number and GID table index where 2452dbb12562SParav Pandit * a specified GID value occurs. Its searches only for IB link layer. 24535eb620c8SYosef Etigin * @device: The device to query. 24545eb620c8SYosef Etigin * @gid: The GID value to search for. 24555eb620c8SYosef Etigin * @port_num: The port number of the device where the GID value was found. 24565eb620c8SYosef Etigin * @index: The index into the GID table where the GID was found. This 24575eb620c8SYosef Etigin * parameter may be NULL. 24585eb620c8SYosef Etigin */ 24595eb620c8SYosef Etigin int ib_find_gid(struct ib_device *device, union ib_gid *gid, 24601fb7f897SMark Bloch u32 *port_num, u16 *index) 24615eb620c8SYosef Etigin { 24625eb620c8SYosef Etigin union ib_gid tmp_gid; 24631fb7f897SMark Bloch u32 port; 2464ea1075edSJason Gunthorpe int ret, i; 24655eb620c8SYosef Etigin 2466ea1075edSJason Gunthorpe rdma_for_each_port (device, port) { 246722d24f75SParav Pandit if (!rdma_protocol_ib(device, port)) 2468b39ffa1dSMatan Barak continue; 2469b39ffa1dSMatan Barak 24708ceb1357SJason Gunthorpe for (i = 0; i < device->port_data[port].immutable.gid_tbl_len; 24718ceb1357SJason Gunthorpe ++i) { 24721dfce294SParav Pandit ret = rdma_query_gid(device, port, i, &tmp_gid); 24735eb620c8SYosef Etigin if (ret) 24745eb620c8SYosef Etigin return ret; 24755eb620c8SYosef Etigin if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 24765eb620c8SYosef Etigin *port_num = port; 24775eb620c8SYosef Etigin if (index) 24785eb620c8SYosef Etigin *index = i; 24795eb620c8SYosef Etigin return 0; 24805eb620c8SYosef Etigin } 24815eb620c8SYosef Etigin } 24825eb620c8SYosef Etigin } 24835eb620c8SYosef Etigin 24845eb620c8SYosef Etigin return -ENOENT; 24855eb620c8SYosef Etigin } 24865eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_gid); 24875eb620c8SYosef Etigin 24885eb620c8SYosef Etigin /** 24895eb620c8SYosef Etigin * ib_find_pkey - Returns the PKey table index where a specified 24905eb620c8SYosef Etigin * PKey value occurs. 24915eb620c8SYosef Etigin * @device: The device to query. 24925eb620c8SYosef Etigin * @port_num: The port number of the device to search for the PKey. 24935eb620c8SYosef Etigin * @pkey: The PKey value to search for. 24945eb620c8SYosef Etigin * @index: The index into the PKey table where the PKey was found. 24955eb620c8SYosef Etigin */ 24965eb620c8SYosef Etigin int ib_find_pkey(struct ib_device *device, 24971fb7f897SMark Bloch u32 port_num, u16 pkey, u16 *index) 24985eb620c8SYosef Etigin { 24995eb620c8SYosef Etigin int ret, i; 25005eb620c8SYosef Etigin u16 tmp_pkey; 2501ff7166c4SJack Morgenstein int partial_ix = -1; 25025eb620c8SYosef Etigin 25038ceb1357SJason Gunthorpe for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len; 25048ceb1357SJason Gunthorpe ++i) { 25055eb620c8SYosef Etigin ret = ib_query_pkey(device, port_num, i, &tmp_pkey); 25065eb620c8SYosef Etigin if (ret) 25075eb620c8SYosef Etigin return ret; 250836026eccSMoni Shoua if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { 2509ff7166c4SJack Morgenstein /* if there is full-member pkey take it.*/ 2510ff7166c4SJack Morgenstein if (tmp_pkey & 0x8000) { 25115eb620c8SYosef Etigin *index = i; 25125eb620c8SYosef Etigin return 0; 25135eb620c8SYosef Etigin } 2514ff7166c4SJack Morgenstein if (partial_ix < 0) 2515ff7166c4SJack Morgenstein partial_ix = i; 2516ff7166c4SJack Morgenstein } 25175eb620c8SYosef Etigin } 25185eb620c8SYosef Etigin 2519ff7166c4SJack Morgenstein /*no full-member, if exists take the limited*/ 2520ff7166c4SJack Morgenstein if (partial_ix >= 0) { 2521ff7166c4SJack Morgenstein *index = partial_ix; 2522ff7166c4SJack Morgenstein return 0; 2523ff7166c4SJack Morgenstein } 25245eb620c8SYosef Etigin return -ENOENT; 25255eb620c8SYosef Etigin } 25265eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_pkey); 25275eb620c8SYosef Etigin 25289268f72dSYotam Kenneth /** 25299268f72dSYotam Kenneth * ib_get_net_dev_by_params() - Return the appropriate net_dev 25309268f72dSYotam Kenneth * for a received CM request 25319268f72dSYotam Kenneth * @dev: An RDMA device on which the request has been received. 25329268f72dSYotam Kenneth * @port: Port number on the RDMA device. 25339268f72dSYotam Kenneth * @pkey: The Pkey the request came on. 25349268f72dSYotam Kenneth * @gid: A GID that the net_dev uses to communicate. 25359268f72dSYotam Kenneth * @addr: Contains the IP address that the request specified as its 25369268f72dSYotam Kenneth * destination. 2537921eab11SJason Gunthorpe * 25389268f72dSYotam Kenneth */ 25399268f72dSYotam Kenneth struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, 25401fb7f897SMark Bloch u32 port, 25419268f72dSYotam Kenneth u16 pkey, 25429268f72dSYotam Kenneth const union ib_gid *gid, 25439268f72dSYotam Kenneth const struct sockaddr *addr) 25449268f72dSYotam Kenneth { 25459268f72dSYotam Kenneth struct net_device *net_dev = NULL; 25460df91bb6SJason Gunthorpe unsigned long index; 25470df91bb6SJason Gunthorpe void *client_data; 25489268f72dSYotam Kenneth 25499268f72dSYotam Kenneth if (!rdma_protocol_ib(dev, port)) 25509268f72dSYotam Kenneth return NULL; 25519268f72dSYotam Kenneth 2552921eab11SJason Gunthorpe /* 2553921eab11SJason Gunthorpe * Holding the read side guarantees that the client will not become 2554921eab11SJason Gunthorpe * unregistered while we are calling get_net_dev_by_params() 2555921eab11SJason Gunthorpe */ 2556921eab11SJason Gunthorpe down_read(&dev->client_data_rwsem); 25570df91bb6SJason Gunthorpe xan_for_each_marked (&dev->client_data, index, client_data, 25580df91bb6SJason Gunthorpe CLIENT_DATA_REGISTERED) { 25590df91bb6SJason Gunthorpe struct ib_client *client = xa_load(&clients, index); 25609268f72dSYotam Kenneth 25610df91bb6SJason Gunthorpe if (!client || !client->get_net_dev_by_params) 25629268f72dSYotam Kenneth continue; 25639268f72dSYotam Kenneth 25640df91bb6SJason Gunthorpe net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, 25650df91bb6SJason Gunthorpe addr, client_data); 25669268f72dSYotam Kenneth if (net_dev) 25679268f72dSYotam Kenneth break; 25689268f72dSYotam Kenneth } 2569921eab11SJason Gunthorpe up_read(&dev->client_data_rwsem); 25709268f72dSYotam Kenneth 25719268f72dSYotam Kenneth return net_dev; 25729268f72dSYotam Kenneth } 25739268f72dSYotam Kenneth EXPORT_SYMBOL(ib_get_net_dev_by_params); 25749268f72dSYotam Kenneth 2575521ed0d9SKamal Heib void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) 2576521ed0d9SKamal Heib { 25773023a1e9SKamal Heib struct ib_device_ops *dev_ops = &dev->ops; 2578521ed0d9SKamal Heib #define SET_DEVICE_OP(ptr, name) \ 2579521ed0d9SKamal Heib do { \ 2580521ed0d9SKamal Heib if (ops->name) \ 2581521ed0d9SKamal Heib if (!((ptr)->name)) \ 2582521ed0d9SKamal Heib (ptr)->name = ops->name; \ 2583521ed0d9SKamal Heib } while (0) 2584521ed0d9SKamal Heib 258530471d4bSLeon Romanovsky #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) 258630471d4bSLeon Romanovsky 2587b9560a41SJason Gunthorpe if (ops->driver_id != RDMA_DRIVER_UNKNOWN) { 2588b9560a41SJason Gunthorpe WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN && 2589b9560a41SJason Gunthorpe dev_ops->driver_id != ops->driver_id); 2590b9560a41SJason Gunthorpe dev_ops->driver_id = ops->driver_id; 2591b9560a41SJason Gunthorpe } 25927a154142SJason Gunthorpe if (ops->owner) { 25937a154142SJason Gunthorpe WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner); 25947a154142SJason Gunthorpe dev_ops->owner = ops->owner; 25957a154142SJason Gunthorpe } 259672c6ec18SJason Gunthorpe if (ops->uverbs_abi_ver) 259772c6ec18SJason Gunthorpe dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver; 2598b9560a41SJason Gunthorpe 25998f71bb00SJason Gunthorpe dev_ops->uverbs_no_driver_id_binding |= 26008f71bb00SJason Gunthorpe ops->uverbs_no_driver_id_binding; 26018f71bb00SJason Gunthorpe 26023023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, add_gid); 26032f1927b0SMoni Shoua SET_DEVICE_OP(dev_ops, advise_mr); 26043023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_dm); 26054b5f4d3fSJason Gunthorpe SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); 26064b5f4d3fSJason Gunthorpe SET_DEVICE_OP(dev_ops, alloc_hw_port_stats); 26073023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_mr); 260826bc7eaeSIsrael Rukshin SET_DEVICE_OP(dev_ops, alloc_mr_integrity); 26093023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_mw); 26103023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_pd); 26113023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); 26123023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_ucontext); 26133023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, alloc_xrcd); 26143023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, attach_mcast); 26153023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, check_mr_status); 2616c4ffee7cSMark Zhang SET_DEVICE_OP(dev_ops, counter_alloc_stats); 261799fa331dSMark Zhang SET_DEVICE_OP(dev_ops, counter_bind_qp); 261899fa331dSMark Zhang SET_DEVICE_OP(dev_ops, counter_dealloc); 261999fa331dSMark Zhang SET_DEVICE_OP(dev_ops, counter_unbind_qp); 2620c4ffee7cSMark Zhang SET_DEVICE_OP(dev_ops, counter_update_stats); 26213023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_ah); 26223023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_counters); 26233023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_cq); 26243023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_flow); 26253023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_flow_action_esp); 26263023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_qp); 26273023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_rwq_ind_table); 26283023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_srq); 2629676a80adSJason Gunthorpe SET_DEVICE_OP(dev_ops, create_user_ah); 26303023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, create_wq); 26313023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_dm); 2632d0899892SJason Gunthorpe SET_DEVICE_OP(dev_ops, dealloc_driver); 26333023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_mw); 26343023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_pd); 26353023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_ucontext); 26363023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dealloc_xrcd); 26373023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, del_gid); 26383023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, dereg_mr); 26393023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_ah); 26403023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_counters); 26413023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_cq); 26423023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_flow); 26433023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_flow_action); 26443023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_qp); 26453023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); 26463023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_srq); 26473023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, destroy_wq); 26483023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, detach_mcast); 26493023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, disassociate_ucontext); 26503023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, drain_rq); 26513023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, drain_sq); 2652ca22354bSJason Gunthorpe SET_DEVICE_OP(dev_ops, enable_driver); 2653211cd945SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); 26549e2a187aSMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_cq_entry); 265565959522SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw); 2656f4434529SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_mr_entry); 265765959522SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw); 26585cc34116SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_qp_entry); 265965959522SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw); 2660f4434529SMaor Gottlieb SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); 26613023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_dev_fw_str); 26623023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_dma_mr); 26633023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_hw_stats); 26643023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_link_layer); 26653023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_netdev); 26663023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_port_immutable); 26673023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_vector_affinity); 26683023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_vf_config); 2669bfcb3c5dSDanit Goldberg SET_DEVICE_OP(dev_ops, get_vf_guid); 26703023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, get_vf_stats); 2671dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_accept); 2672dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_add_ref); 2673dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_connect); 2674dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_create_listen); 2675dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_destroy_listen); 2676dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_get_qp); 2677dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_reject); 2678dd05cb82SKamal Heib SET_DEVICE_OP(dev_ops, iw_rem_ref); 26793023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, map_mr_sg); 26802cdfcdd8SMax Gurtovoy SET_DEVICE_OP(dev_ops, map_mr_sg_pi); 26813023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, mmap); 26823411f9f0SMichal Kalderon SET_DEVICE_OP(dev_ops, mmap_free); 26833023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_ah); 26843023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_cq); 26853023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_device); 26863023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_flow_action_esp); 26873023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_port); 26883023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_qp); 26893023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_srq); 26903023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, modify_wq); 26913023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, peek_cq); 26923023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, poll_cq); 2693*d7407d16SJason Gunthorpe SET_DEVICE_OP(dev_ops, port_groups); 26943023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, post_recv); 26953023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, post_send); 26963023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, post_srq_recv); 26973023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, process_mad); 26983023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_ah); 26993023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_device); 27003023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_gid); 27013023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_pkey); 27023023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_port); 27033023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_qp); 27043023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, query_srq); 27051c8fb1eaSYishai Hadas SET_DEVICE_OP(dev_ops, query_ucontext); 27063023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); 27073023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, read_counters); 27083023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, reg_dm_mr); 27093023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, reg_user_mr); 27103bc489e8SJianxin Xiong SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf); 27113023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, req_notify_cq); 27123023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, rereg_user_mr); 27133023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, resize_cq); 27143023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, set_vf_guid); 27153023a1e9SKamal Heib SET_DEVICE_OP(dev_ops, set_vf_link_state); 271621a428a0SLeon Romanovsky 2717d3456914SLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_ah); 27183b023e1bSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_counters); 2719e39afe3dSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_cq); 2720d18bb3e1SLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_mw); 272121a428a0SLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_pd); 2722c0a6b5ecSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table); 272368e326deSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_srq); 2724a2a074efSLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_ucontext); 272528ad5f65SLeon Romanovsky SET_OBJ_SIZE(dev_ops, ib_xrcd); 2726521ed0d9SKamal Heib } 2727521ed0d9SKamal Heib EXPORT_SYMBOL(ib_set_device_ops); 2728521ed0d9SKamal Heib 27295a7a9e03SChristoph Hellwig #ifdef CONFIG_INFINIBAND_VIRT_DMA 27305a7a9e03SChristoph Hellwig int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) 27315a7a9e03SChristoph Hellwig { 27325a7a9e03SChristoph Hellwig struct scatterlist *s; 27335a7a9e03SChristoph Hellwig int i; 27345a7a9e03SChristoph Hellwig 27355a7a9e03SChristoph Hellwig for_each_sg(sg, s, nents, i) { 27365a7a9e03SChristoph Hellwig sg_dma_address(s) = (uintptr_t)sg_virt(s); 27375a7a9e03SChristoph Hellwig sg_dma_len(s) = s->length; 27385a7a9e03SChristoph Hellwig } 27395a7a9e03SChristoph Hellwig return nents; 27405a7a9e03SChristoph Hellwig } 27415a7a9e03SChristoph Hellwig EXPORT_SYMBOL(ib_dma_virt_map_sg); 27425a7a9e03SChristoph Hellwig #endif /* CONFIG_INFINIBAND_VIRT_DMA */ 27435a7a9e03SChristoph Hellwig 2744d0e312feSLeon Romanovsky static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { 2745735c631aSMark Bloch [RDMA_NL_LS_OP_RESOLVE] = { 2746647c75acSLeon Romanovsky .doit = ib_nl_handle_resolve_resp, 2747e3a2b93dSLeon Romanovsky .flags = RDMA_NL_ADMIN_PERM, 2748e3a2b93dSLeon Romanovsky }, 2749735c631aSMark Bloch [RDMA_NL_LS_OP_SET_TIMEOUT] = { 2750647c75acSLeon Romanovsky .doit = ib_nl_handle_set_timeout, 2751e3a2b93dSLeon Romanovsky .flags = RDMA_NL_ADMIN_PERM, 2752e3a2b93dSLeon Romanovsky }, 2753ae43f828SMark Bloch [RDMA_NL_LS_OP_IP_RESOLVE] = { 2754647c75acSLeon Romanovsky .doit = ib_nl_handle_ip_res_resp, 2755e3a2b93dSLeon Romanovsky .flags = RDMA_NL_ADMIN_PERM, 2756e3a2b93dSLeon Romanovsky }, 2757735c631aSMark Bloch }; 2758735c631aSMark Bloch 27591da177e4SLinus Torvalds static int __init ib_core_init(void) 27601da177e4SLinus Torvalds { 27611da177e4SLinus Torvalds int ret; 27621da177e4SLinus Torvalds 2763f0626710STejun Heo ib_wq = alloc_workqueue("infiniband", 0, 0); 2764f0626710STejun Heo if (!ib_wq) 2765f0626710STejun Heo return -ENOMEM; 2766f0626710STejun Heo 276714d3a3b2SChristoph Hellwig ib_comp_wq = alloc_workqueue("ib-comp-wq", 2768b7363e67SSagi Grimberg WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 276914d3a3b2SChristoph Hellwig if (!ib_comp_wq) { 277014d3a3b2SChristoph Hellwig ret = -ENOMEM; 277114d3a3b2SChristoph Hellwig goto err; 277214d3a3b2SChristoph Hellwig } 277314d3a3b2SChristoph Hellwig 2774f794809aSJack Morgenstein ib_comp_unbound_wq = 2775f794809aSJack Morgenstein alloc_workqueue("ib-comp-unb-wq", 2776f794809aSJack Morgenstein WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | 2777f794809aSJack Morgenstein WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); 2778f794809aSJack Morgenstein if (!ib_comp_unbound_wq) { 2779f794809aSJack Morgenstein ret = -ENOMEM; 2780f794809aSJack Morgenstein goto err_comp; 2781f794809aSJack Morgenstein } 2782f794809aSJack Morgenstein 278355aeed06SJason Gunthorpe ret = class_register(&ib_class); 2784fd75c789SNir Muchtar if (ret) { 2785aba25a3eSParav Pandit pr_warn("Couldn't create InfiniBand device class\n"); 2786f794809aSJack Morgenstein goto err_comp_unbound; 2787fd75c789SNir Muchtar } 27881da177e4SLinus Torvalds 2789549af008SParav Pandit rdma_nl_init(); 2790549af008SParav Pandit 2791e3f20f02SLeon Romanovsky ret = addr_init(); 2792e3f20f02SLeon Romanovsky if (ret) { 27934469add9SColin Ian King pr_warn("Couldn't init IB address resolution\n"); 2794e3f20f02SLeon Romanovsky goto err_ibnl; 2795e3f20f02SLeon Romanovsky } 2796e3f20f02SLeon Romanovsky 27974c2cb422SMark Bloch ret = ib_mad_init(); 27984c2cb422SMark Bloch if (ret) { 27994c2cb422SMark Bloch pr_warn("Couldn't init IB MAD\n"); 28004c2cb422SMark Bloch goto err_addr; 28014c2cb422SMark Bloch } 28024c2cb422SMark Bloch 2803c2e49c92SMark Bloch ret = ib_sa_init(); 2804c2e49c92SMark Bloch if (ret) { 2805c2e49c92SMark Bloch pr_warn("Couldn't init SA\n"); 2806c2e49c92SMark Bloch goto err_mad; 2807c2e49c92SMark Bloch } 2808c2e49c92SMark Bloch 280942df744cSJanne Karhunen ret = register_blocking_lsm_notifier(&ibdev_lsm_nb); 28108f408ab6SDaniel Jurgens if (ret) { 28118f408ab6SDaniel Jurgens pr_warn("Couldn't register LSM notifier. ret %d\n", ret); 2812c9901724SLeon Romanovsky goto err_sa; 28138f408ab6SDaniel Jurgens } 28148f408ab6SDaniel Jurgens 28154e0f7b90SParav Pandit ret = register_pernet_device(&rdma_dev_net_ops); 28164e0f7b90SParav Pandit if (ret) { 28174e0f7b90SParav Pandit pr_warn("Couldn't init compat dev. ret %d\n", ret); 28184e0f7b90SParav Pandit goto err_compat; 28194e0f7b90SParav Pandit } 28204e0f7b90SParav Pandit 28216c80b41aSLeon Romanovsky nldev_init(); 2822c9901724SLeon Romanovsky rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); 28235ef8c0c1SJason Gunthorpe roce_gid_mgmt_init(); 2824b2cbae2cSRoland Dreier 2825fd75c789SNir Muchtar return 0; 2826fd75c789SNir Muchtar 28274e0f7b90SParav Pandit err_compat: 282842df744cSJanne Karhunen unregister_blocking_lsm_notifier(&ibdev_lsm_nb); 2829735c631aSMark Bloch err_sa: 2830735c631aSMark Bloch ib_sa_cleanup(); 2831c2e49c92SMark Bloch err_mad: 2832c2e49c92SMark Bloch ib_mad_cleanup(); 28334c2cb422SMark Bloch err_addr: 28344c2cb422SMark Bloch addr_cleanup(); 2835e3f20f02SLeon Romanovsky err_ibnl: 283655aeed06SJason Gunthorpe class_unregister(&ib_class); 2837f794809aSJack Morgenstein err_comp_unbound: 2838f794809aSJack Morgenstein destroy_workqueue(ib_comp_unbound_wq); 283914d3a3b2SChristoph Hellwig err_comp: 284014d3a3b2SChristoph Hellwig destroy_workqueue(ib_comp_wq); 2841fd75c789SNir Muchtar err: 2842fd75c789SNir Muchtar destroy_workqueue(ib_wq); 28431da177e4SLinus Torvalds return ret; 28441da177e4SLinus Torvalds } 28451da177e4SLinus Torvalds 28461da177e4SLinus Torvalds static void __exit ib_core_cleanup(void) 28471da177e4SLinus Torvalds { 28485ef8c0c1SJason Gunthorpe roce_gid_mgmt_cleanup(); 28496c80b41aSLeon Romanovsky nldev_exit(); 2850c9901724SLeon Romanovsky rdma_nl_unregister(RDMA_NL_LS); 28514e0f7b90SParav Pandit unregister_pernet_device(&rdma_dev_net_ops); 285242df744cSJanne Karhunen unregister_blocking_lsm_notifier(&ibdev_lsm_nb); 2853c2e49c92SMark Bloch ib_sa_cleanup(); 28544c2cb422SMark Bloch ib_mad_cleanup(); 2855e3f20f02SLeon Romanovsky addr_cleanup(); 2856c9901724SLeon Romanovsky rdma_nl_exit(); 285755aeed06SJason Gunthorpe class_unregister(&ib_class); 2858f794809aSJack Morgenstein destroy_workqueue(ib_comp_unbound_wq); 285914d3a3b2SChristoph Hellwig destroy_workqueue(ib_comp_wq); 2860f7c6a7b5SRoland Dreier /* Make sure that any pending umem accounting work is done. */ 2861f0626710STejun Heo destroy_workqueue(ib_wq); 2862d0899892SJason Gunthorpe flush_workqueue(system_unbound_wq); 2863e59178d8SJason Gunthorpe WARN_ON(!xa_empty(&clients)); 28640df91bb6SJason Gunthorpe WARN_ON(!xa_empty(&devices)); 28651da177e4SLinus Torvalds } 28661da177e4SLinus Torvalds 2867e3bf14bdSJason Gunthorpe MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); 2868e3bf14bdSJason Gunthorpe 286962dfa795SParav Pandit /* ib core relies on netdev stack to first register net_ns_type_operations 287062dfa795SParav Pandit * ns kobject type before ib_core initialization. 287162dfa795SParav Pandit */ 287262dfa795SParav Pandit fs_initcall(ib_core_init); 28731da177e4SLinus Torvalds module_exit(ib_core_cleanup); 2874