xref: /openbmc/linux/drivers/infiniband/core/device.c (revision 5417783e)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
32a1d9b7fSRoland Dreier  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * This software is available to you under a choice of one of two
61da177e4SLinus Torvalds  * licenses.  You may choose to be licensed under the terms of the GNU
71da177e4SLinus Torvalds  * General Public License (GPL) Version 2, available from the file
81da177e4SLinus Torvalds  * COPYING in the main directory of this source tree, or the
91da177e4SLinus Torvalds  * OpenIB.org BSD license below:
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *     Redistribution and use in source and binary forms, with or
121da177e4SLinus Torvalds  *     without modification, are permitted provided that the following
131da177e4SLinus Torvalds  *     conditions are met:
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  *      - Redistributions of source code must retain the above
161da177e4SLinus Torvalds  *        copyright notice, this list of conditions and the following
171da177e4SLinus Torvalds  *        disclaimer.
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  *      - Redistributions in binary form must reproduce the above
201da177e4SLinus Torvalds  *        copyright notice, this list of conditions and the following
211da177e4SLinus Torvalds  *        disclaimer in the documentation and/or other materials
221da177e4SLinus Torvalds  *        provided with the distribution.
231da177e4SLinus Torvalds  *
241da177e4SLinus Torvalds  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
251da177e4SLinus Torvalds  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
261da177e4SLinus Torvalds  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
271da177e4SLinus Torvalds  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
281da177e4SLinus Torvalds  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
291da177e4SLinus Torvalds  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
301da177e4SLinus Torvalds  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
311da177e4SLinus Torvalds  * SOFTWARE.
321da177e4SLinus Torvalds  */
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds #include <linux/module.h>
351da177e4SLinus Torvalds #include <linux/string.h>
361da177e4SLinus Torvalds #include <linux/errno.h>
379a6b090cSAhmed S. Darwish #include <linux/kernel.h>
381da177e4SLinus Torvalds #include <linux/slab.h>
391da177e4SLinus Torvalds #include <linux/init.h>
409268f72dSYotam Kenneth #include <linux/netdevice.h>
414e0f7b90SParav Pandit #include <net/net_namespace.h>
424e0f7b90SParav Pandit #include <net/netns/generic.h>
438f408ab6SDaniel Jurgens #include <linux/security.h>
448f408ab6SDaniel Jurgens #include <linux/notifier.h>
45324e227eSJason Gunthorpe #include <linux/hashtable.h>
46b2cbae2cSRoland Dreier #include <rdma/rdma_netlink.h>
4703db3a2dSMatan Barak #include <rdma/ib_addr.h>
4803db3a2dSMatan Barak #include <rdma/ib_cache.h>
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds #include "core_priv.h"
5141eda65cSLeon Romanovsky #include "restrack.h"
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds MODULE_AUTHOR("Roland Dreier");
541da177e4SLinus Torvalds MODULE_DESCRIPTION("core kernel InfiniBand API");
551da177e4SLinus Torvalds MODULE_LICENSE("Dual BSD/GPL");
561da177e4SLinus Torvalds 
5714d3a3b2SChristoph Hellwig struct workqueue_struct *ib_comp_wq;
58f794809aSJack Morgenstein struct workqueue_struct *ib_comp_unbound_wq;
59f0626710STejun Heo struct workqueue_struct *ib_wq;
60f0626710STejun Heo EXPORT_SYMBOL_GPL(ib_wq);
61f0626710STejun Heo 
620df91bb6SJason Gunthorpe /*
63921eab11SJason Gunthorpe  * Each of the three rwsem locks (devices, clients, client_data) protects the
64921eab11SJason Gunthorpe  * xarray of the same name. Specifically it allows the caller to assert that
65921eab11SJason Gunthorpe  * the MARK will/will not be changing under the lock, and for devices and
66921eab11SJason Gunthorpe  * clients, that the value in the xarray is still a valid pointer. Change of
67921eab11SJason Gunthorpe  * the MARK is linked to the object state, so holding the lock and testing the
68921eab11SJason Gunthorpe  * MARK also asserts that the contained object is in a certain state.
69921eab11SJason Gunthorpe  *
70921eab11SJason Gunthorpe  * This is used to build a two stage register/unregister flow where objects
71921eab11SJason Gunthorpe  * can continue to be in the xarray even though they are still in progress to
72921eab11SJason Gunthorpe  * register/unregister.
73921eab11SJason Gunthorpe  *
74921eab11SJason Gunthorpe  * The xarray itself provides additional locking, and restartable iteration,
75921eab11SJason Gunthorpe  * which is also relied on.
76921eab11SJason Gunthorpe  *
77921eab11SJason Gunthorpe  * Locks should not be nested, with the exception of client_data, which is
78921eab11SJason Gunthorpe  * allowed to nest under the read side of the other two locks.
79921eab11SJason Gunthorpe  *
80921eab11SJason Gunthorpe  * The devices_rwsem also protects the device name list, any change or
81921eab11SJason Gunthorpe  * assignment of device name must also hold the write side to guarantee unique
82921eab11SJason Gunthorpe  * names.
83921eab11SJason Gunthorpe  */
84921eab11SJason Gunthorpe 
85921eab11SJason Gunthorpe /*
860df91bb6SJason Gunthorpe  * devices contains devices that have had their names assigned. The
870df91bb6SJason Gunthorpe  * devices may not be registered. Users that care about the registration
880df91bb6SJason Gunthorpe  * status need to call ib_device_try_get() on the device to ensure it is
890df91bb6SJason Gunthorpe  * registered, and keep it registered, for the required duration.
900df91bb6SJason Gunthorpe  *
910df91bb6SJason Gunthorpe  */
920df91bb6SJason Gunthorpe static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
93921eab11SJason Gunthorpe static DECLARE_RWSEM(devices_rwsem);
940df91bb6SJason Gunthorpe #define DEVICE_REGISTERED XA_MARK_1
950df91bb6SJason Gunthorpe 
961da177e4SLinus Torvalds static LIST_HEAD(client_list);
97e59178d8SJason Gunthorpe #define CLIENT_REGISTERED XA_MARK_1
98e59178d8SJason Gunthorpe static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
99921eab11SJason Gunthorpe static DECLARE_RWSEM(clients_rwsem);
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds /*
1020df91bb6SJason Gunthorpe  * If client_data is registered then the corresponding client must also still
1030df91bb6SJason Gunthorpe  * be registered.
1040df91bb6SJason Gunthorpe  */
1050df91bb6SJason Gunthorpe #define CLIENT_DATA_REGISTERED XA_MARK_1
1064e0f7b90SParav Pandit 
1074e0f7b90SParav Pandit /**
1084e0f7b90SParav Pandit  * struct rdma_dev_net - rdma net namespace metadata for a net
1094e0f7b90SParav Pandit  * @net:	Pointer to owner net namespace
1104e0f7b90SParav Pandit  * @id:		xarray id to identify the net namespace.
1114e0f7b90SParav Pandit  */
1124e0f7b90SParav Pandit struct rdma_dev_net {
1134e0f7b90SParav Pandit 	possible_net_t net;
1144e0f7b90SParav Pandit 	u32 id;
1154e0f7b90SParav Pandit };
1164e0f7b90SParav Pandit 
1174e0f7b90SParav Pandit static unsigned int rdma_dev_net_id;
1184e0f7b90SParav Pandit 
1194e0f7b90SParav Pandit /*
1204e0f7b90SParav Pandit  * A list of net namespaces is maintained in an xarray. This is necessary
1214e0f7b90SParav Pandit  * because we can't get the locking right using the existing net ns list. We
1224e0f7b90SParav Pandit  * would require a init_net callback after the list is updated.
1234e0f7b90SParav Pandit  */
1244e0f7b90SParav Pandit static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC);
1254e0f7b90SParav Pandit /*
1264e0f7b90SParav Pandit  * rwsem to protect accessing the rdma_nets xarray entries.
1274e0f7b90SParav Pandit  */
1284e0f7b90SParav Pandit static DECLARE_RWSEM(rdma_nets_rwsem);
1294e0f7b90SParav Pandit 
1300df91bb6SJason Gunthorpe /*
1310df91bb6SJason Gunthorpe  * xarray has this behavior where it won't iterate over NULL values stored in
1320df91bb6SJason Gunthorpe  * allocated arrays.  So we need our own iterator to see all values stored in
1330df91bb6SJason Gunthorpe  * the array. This does the same thing as xa_for_each except that it also
1340df91bb6SJason Gunthorpe  * returns NULL valued entries if the array is allocating. Simplified to only
1350df91bb6SJason Gunthorpe  * work on simple xarrays.
1360df91bb6SJason Gunthorpe  */
1370df91bb6SJason Gunthorpe static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
1380df91bb6SJason Gunthorpe 			     xa_mark_t filter)
1390df91bb6SJason Gunthorpe {
1400df91bb6SJason Gunthorpe 	XA_STATE(xas, xa, *indexp);
1410df91bb6SJason Gunthorpe 	void *entry;
1420df91bb6SJason Gunthorpe 
1430df91bb6SJason Gunthorpe 	rcu_read_lock();
1440df91bb6SJason Gunthorpe 	do {
1450df91bb6SJason Gunthorpe 		entry = xas_find_marked(&xas, ULONG_MAX, filter);
1460df91bb6SJason Gunthorpe 		if (xa_is_zero(entry))
1470df91bb6SJason Gunthorpe 			break;
1480df91bb6SJason Gunthorpe 	} while (xas_retry(&xas, entry));
1490df91bb6SJason Gunthorpe 	rcu_read_unlock();
1500df91bb6SJason Gunthorpe 
1510df91bb6SJason Gunthorpe 	if (entry) {
1520df91bb6SJason Gunthorpe 		*indexp = xas.xa_index;
1530df91bb6SJason Gunthorpe 		if (xa_is_zero(entry))
1540df91bb6SJason Gunthorpe 			return NULL;
1550df91bb6SJason Gunthorpe 		return entry;
1560df91bb6SJason Gunthorpe 	}
1570df91bb6SJason Gunthorpe 	return XA_ERROR(-ENOENT);
1580df91bb6SJason Gunthorpe }
1590df91bb6SJason Gunthorpe #define xan_for_each_marked(xa, index, entry, filter)                          \
1600df91bb6SJason Gunthorpe 	for (index = 0, entry = xan_find_marked(xa, &(index), filter);         \
1610df91bb6SJason Gunthorpe 	     !xa_is_err(entry);                                                \
1620df91bb6SJason Gunthorpe 	     (index)++, entry = xan_find_marked(xa, &(index), filter))
1630df91bb6SJason Gunthorpe 
164324e227eSJason Gunthorpe /* RCU hash table mapping netdevice pointers to struct ib_port_data */
165324e227eSJason Gunthorpe static DEFINE_SPINLOCK(ndev_hash_lock);
166324e227eSJason Gunthorpe static DECLARE_HASHTABLE(ndev_hash, 5);
167324e227eSJason Gunthorpe 
168c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev);
169d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work);
170d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *device);
1718f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event,
1728f408ab6SDaniel Jurgens 			      void *lsm_data);
1738f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work);
1748f408ab6SDaniel Jurgens static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
1758f408ab6SDaniel Jurgens 
1768f408ab6SDaniel Jurgens static struct notifier_block ibdev_lsm_nb = {
1778f408ab6SDaniel Jurgens 	.notifier_call = ib_security_change,
1788f408ab6SDaniel Jurgens };
1791da177e4SLinus Torvalds 
180324e227eSJason Gunthorpe /* Pointer to the RCU head at the start of the ib_port_data array */
181324e227eSJason Gunthorpe struct ib_port_data_rcu {
182324e227eSJason Gunthorpe 	struct rcu_head rcu_head;
183324e227eSJason Gunthorpe 	struct ib_port_data pdata[];
184324e227eSJason Gunthorpe };
185324e227eSJason Gunthorpe 
1861da177e4SLinus Torvalds static int ib_device_check_mandatory(struct ib_device *device)
1871da177e4SLinus Torvalds {
1883023a1e9SKamal Heib #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
1891da177e4SLinus Torvalds 	static const struct {
1901da177e4SLinus Torvalds 		size_t offset;
1911da177e4SLinus Torvalds 		char  *name;
1921da177e4SLinus Torvalds 	} mandatory_table[] = {
1931da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(query_device),
1941da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(query_port),
1951da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(query_pkey),
1961da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(alloc_pd),
1971da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(dealloc_pd),
1981da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(create_qp),
1991da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(modify_qp),
2001da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(destroy_qp),
2011da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(post_send),
2021da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(post_recv),
2031da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(create_cq),
2041da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(destroy_cq),
2051da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(poll_cq),
2061da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(req_notify_cq),
2071da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(get_dma_mr),
2087738613eSIra Weiny 		IB_MANDATORY_FUNC(dereg_mr),
2097738613eSIra Weiny 		IB_MANDATORY_FUNC(get_port_immutable)
2101da177e4SLinus Torvalds 	};
2111da177e4SLinus Torvalds 	int i;
2121da177e4SLinus Torvalds 
2136780c4faSGal Pressman 	device->kverbs_provider = true;
2149a6b090cSAhmed S. Darwish 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
2153023a1e9SKamal Heib 		if (!*(void **) ((void *) &device->ops +
2163023a1e9SKamal Heib 				 mandatory_table[i].offset)) {
2176780c4faSGal Pressman 			device->kverbs_provider = false;
2186780c4faSGal Pressman 			break;
2191da177e4SLinus Torvalds 		}
2201da177e4SLinus Torvalds 	}
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds 	return 0;
2231da177e4SLinus Torvalds }
2241da177e4SLinus Torvalds 
225f8978bd9SLeon Romanovsky /*
22601b67117SParav Pandit  * Caller must perform ib_device_put() to return the device reference count
22701b67117SParav Pandit  * when ib_device_get_by_index() returns valid device pointer.
228f8978bd9SLeon Romanovsky  */
229f8978bd9SLeon Romanovsky struct ib_device *ib_device_get_by_index(u32 index)
230f8978bd9SLeon Romanovsky {
231f8978bd9SLeon Romanovsky 	struct ib_device *device;
232f8978bd9SLeon Romanovsky 
233921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
2340df91bb6SJason Gunthorpe 	device = xa_load(&devices, index);
23501b67117SParav Pandit 	if (device) {
236d79af724SJason Gunthorpe 		if (!ib_device_try_get(device))
23701b67117SParav Pandit 			device = NULL;
23801b67117SParav Pandit 	}
239921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
240f8978bd9SLeon Romanovsky 	return device;
241f8978bd9SLeon Romanovsky }
242f8978bd9SLeon Romanovsky 
243d79af724SJason Gunthorpe /**
244d79af724SJason Gunthorpe  * ib_device_put - Release IB device reference
245d79af724SJason Gunthorpe  * @device: device whose reference to be released
246d79af724SJason Gunthorpe  *
247d79af724SJason Gunthorpe  * ib_device_put() releases reference to the IB device to allow it to be
248d79af724SJason Gunthorpe  * unregistered and eventually free.
249d79af724SJason Gunthorpe  */
25001b67117SParav Pandit void ib_device_put(struct ib_device *device)
25101b67117SParav Pandit {
25201b67117SParav Pandit 	if (refcount_dec_and_test(&device->refcount))
25301b67117SParav Pandit 		complete(&device->unreg_completion);
25401b67117SParav Pandit }
255d79af724SJason Gunthorpe EXPORT_SYMBOL(ib_device_put);
25601b67117SParav Pandit 
2571da177e4SLinus Torvalds static struct ib_device *__ib_device_get_by_name(const char *name)
2581da177e4SLinus Torvalds {
2591da177e4SLinus Torvalds 	struct ib_device *device;
2600df91bb6SJason Gunthorpe 	unsigned long index;
2611da177e4SLinus Torvalds 
2620df91bb6SJason Gunthorpe 	xa_for_each (&devices, index, device)
263896de009SJason Gunthorpe 		if (!strcmp(name, dev_name(&device->dev)))
2641da177e4SLinus Torvalds 			return device;
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds 	return NULL;
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds 
2696cc2c8e5SJason Gunthorpe /**
2706cc2c8e5SJason Gunthorpe  * ib_device_get_by_name - Find an IB device by name
2716cc2c8e5SJason Gunthorpe  * @name: The name to look for
2726cc2c8e5SJason Gunthorpe  * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
2736cc2c8e5SJason Gunthorpe  *
2746cc2c8e5SJason Gunthorpe  * Find and hold an ib_device by its name. The caller must call
2756cc2c8e5SJason Gunthorpe  * ib_device_put() on the returned pointer.
2766cc2c8e5SJason Gunthorpe  */
2776cc2c8e5SJason Gunthorpe struct ib_device *ib_device_get_by_name(const char *name,
2786cc2c8e5SJason Gunthorpe 					enum rdma_driver_id driver_id)
2796cc2c8e5SJason Gunthorpe {
2806cc2c8e5SJason Gunthorpe 	struct ib_device *device;
2816cc2c8e5SJason Gunthorpe 
2826cc2c8e5SJason Gunthorpe 	down_read(&devices_rwsem);
2836cc2c8e5SJason Gunthorpe 	device = __ib_device_get_by_name(name);
2846cc2c8e5SJason Gunthorpe 	if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
2856cc2c8e5SJason Gunthorpe 	    device->driver_id != driver_id)
2866cc2c8e5SJason Gunthorpe 		device = NULL;
2876cc2c8e5SJason Gunthorpe 
2886cc2c8e5SJason Gunthorpe 	if (device) {
2896cc2c8e5SJason Gunthorpe 		if (!ib_device_try_get(device))
2906cc2c8e5SJason Gunthorpe 			device = NULL;
2916cc2c8e5SJason Gunthorpe 	}
2926cc2c8e5SJason Gunthorpe 	up_read(&devices_rwsem);
2936cc2c8e5SJason Gunthorpe 	return device;
2946cc2c8e5SJason Gunthorpe }
2956cc2c8e5SJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_name);
2966cc2c8e5SJason Gunthorpe 
2974e0f7b90SParav Pandit static int rename_compat_devs(struct ib_device *device)
2984e0f7b90SParav Pandit {
2994e0f7b90SParav Pandit 	struct ib_core_device *cdev;
3004e0f7b90SParav Pandit 	unsigned long index;
3014e0f7b90SParav Pandit 	int ret = 0;
3024e0f7b90SParav Pandit 
3034e0f7b90SParav Pandit 	mutex_lock(&device->compat_devs_mutex);
3044e0f7b90SParav Pandit 	xa_for_each (&device->compat_devs, index, cdev) {
3054e0f7b90SParav Pandit 		ret = device_rename(&cdev->dev, dev_name(&device->dev));
3064e0f7b90SParav Pandit 		if (ret) {
3074e0f7b90SParav Pandit 			dev_warn(&cdev->dev,
3084e0f7b90SParav Pandit 				 "Fail to rename compatdev to new name %s\n",
3094e0f7b90SParav Pandit 				 dev_name(&device->dev));
3104e0f7b90SParav Pandit 			break;
3114e0f7b90SParav Pandit 		}
3124e0f7b90SParav Pandit 	}
3134e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
3144e0f7b90SParav Pandit 	return ret;
3154e0f7b90SParav Pandit }
3164e0f7b90SParav Pandit 
317d21943ddSLeon Romanovsky int ib_device_rename(struct ib_device *ibdev, const char *name)
318d21943ddSLeon Romanovsky {
319e3593b56SJason Gunthorpe 	int ret;
320d21943ddSLeon Romanovsky 
321921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
322e3593b56SJason Gunthorpe 	if (!strcmp(name, dev_name(&ibdev->dev))) {
323e3593b56SJason Gunthorpe 		ret = 0;
324e3593b56SJason Gunthorpe 		goto out;
325e3593b56SJason Gunthorpe 	}
326e3593b56SJason Gunthorpe 
327344684e6SJason Gunthorpe 	if (__ib_device_get_by_name(name)) {
328d21943ddSLeon Romanovsky 		ret = -EEXIST;
329d21943ddSLeon Romanovsky 		goto out;
330d21943ddSLeon Romanovsky 	}
331d21943ddSLeon Romanovsky 
332d21943ddSLeon Romanovsky 	ret = device_rename(&ibdev->dev, name);
333d21943ddSLeon Romanovsky 	if (ret)
334d21943ddSLeon Romanovsky 		goto out;
335d21943ddSLeon Romanovsky 	strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
3364e0f7b90SParav Pandit 	ret = rename_compat_devs(ibdev);
337d21943ddSLeon Romanovsky out:
338921eab11SJason Gunthorpe 	up_write(&devices_rwsem);
339d21943ddSLeon Romanovsky 	return ret;
340d21943ddSLeon Romanovsky }
341d21943ddSLeon Romanovsky 
342e349f858SJason Gunthorpe static int alloc_name(struct ib_device *ibdev, const char *name)
3431da177e4SLinus Torvalds {
3441da177e4SLinus Torvalds 	struct ib_device *device;
3450df91bb6SJason Gunthorpe 	unsigned long index;
3463b88afd3SJason Gunthorpe 	struct ida inuse;
3473b88afd3SJason Gunthorpe 	int rc;
3481da177e4SLinus Torvalds 	int i;
3491da177e4SLinus Torvalds 
350921eab11SJason Gunthorpe 	lockdep_assert_held_exclusive(&devices_rwsem);
3513b88afd3SJason Gunthorpe 	ida_init(&inuse);
3520df91bb6SJason Gunthorpe 	xa_for_each (&devices, index, device) {
353e349f858SJason Gunthorpe 		char buf[IB_DEVICE_NAME_MAX];
354e349f858SJason Gunthorpe 
355896de009SJason Gunthorpe 		if (sscanf(dev_name(&device->dev), name, &i) != 1)
3561da177e4SLinus Torvalds 			continue;
3573b88afd3SJason Gunthorpe 		if (i < 0 || i >= INT_MAX)
3581da177e4SLinus Torvalds 			continue;
3591da177e4SLinus Torvalds 		snprintf(buf, sizeof buf, name, i);
3603b88afd3SJason Gunthorpe 		if (strcmp(buf, dev_name(&device->dev)) != 0)
3613b88afd3SJason Gunthorpe 			continue;
3623b88afd3SJason Gunthorpe 
3633b88afd3SJason Gunthorpe 		rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
3643b88afd3SJason Gunthorpe 		if (rc < 0)
3653b88afd3SJason Gunthorpe 			goto out;
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 
3683b88afd3SJason Gunthorpe 	rc = ida_alloc(&inuse, GFP_KERNEL);
3693b88afd3SJason Gunthorpe 	if (rc < 0)
3703b88afd3SJason Gunthorpe 		goto out;
3711da177e4SLinus Torvalds 
3723b88afd3SJason Gunthorpe 	rc = dev_set_name(&ibdev->dev, name, rc);
3733b88afd3SJason Gunthorpe out:
3743b88afd3SJason Gunthorpe 	ida_destroy(&inuse);
3753b88afd3SJason Gunthorpe 	return rc;
3761da177e4SLinus Torvalds }
3771da177e4SLinus Torvalds 
37855aeed06SJason Gunthorpe static void ib_device_release(struct device *device)
37955aeed06SJason Gunthorpe {
38055aeed06SJason Gunthorpe 	struct ib_device *dev = container_of(device, struct ib_device, dev);
38155aeed06SJason Gunthorpe 
382c2261dd7SJason Gunthorpe 	free_netdevs(dev);
383652432f3SJason Gunthorpe 	WARN_ON(refcount_read(&dev->refcount));
38403db3a2dSMatan Barak 	ib_cache_release_one(dev);
385b34b269aSJason Gunthorpe 	ib_security_release_port_pkey_list(dev);
3864e0f7b90SParav Pandit 	xa_destroy(&dev->compat_devs);
3870df91bb6SJason Gunthorpe 	xa_destroy(&dev->client_data);
388324e227eSJason Gunthorpe 	if (dev->port_data)
389324e227eSJason Gunthorpe 		kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
390324e227eSJason Gunthorpe 				       pdata[0]),
391324e227eSJason Gunthorpe 			  rcu_head);
392324e227eSJason Gunthorpe 	kfree_rcu(dev, rcu_head);
39355aeed06SJason Gunthorpe }
39455aeed06SJason Gunthorpe 
39555aeed06SJason Gunthorpe static int ib_device_uevent(struct device *device,
39655aeed06SJason Gunthorpe 			    struct kobj_uevent_env *env)
39755aeed06SJason Gunthorpe {
398896de009SJason Gunthorpe 	if (add_uevent_var(env, "NAME=%s", dev_name(device)))
39955aeed06SJason Gunthorpe 		return -ENOMEM;
40055aeed06SJason Gunthorpe 
40155aeed06SJason Gunthorpe 	/*
40255aeed06SJason Gunthorpe 	 * It would be nice to pass the node GUID with the event...
40355aeed06SJason Gunthorpe 	 */
40455aeed06SJason Gunthorpe 
40555aeed06SJason Gunthorpe 	return 0;
40655aeed06SJason Gunthorpe }
40755aeed06SJason Gunthorpe 
40862dfa795SParav Pandit static const void *net_namespace(struct device *d)
40962dfa795SParav Pandit {
4104e0f7b90SParav Pandit 	struct ib_core_device *coredev =
4114e0f7b90SParav Pandit 			container_of(d, struct ib_core_device, dev);
4124e0f7b90SParav Pandit 
4134e0f7b90SParav Pandit 	return read_pnet(&coredev->rdma_net);
41462dfa795SParav Pandit }
41562dfa795SParav Pandit 
41655aeed06SJason Gunthorpe static struct class ib_class = {
41755aeed06SJason Gunthorpe 	.name    = "infiniband",
41855aeed06SJason Gunthorpe 	.dev_release = ib_device_release,
41955aeed06SJason Gunthorpe 	.dev_uevent = ib_device_uevent,
42062dfa795SParav Pandit 	.ns_type = &net_ns_type_operations,
42162dfa795SParav Pandit 	.namespace = net_namespace,
42255aeed06SJason Gunthorpe };
42355aeed06SJason Gunthorpe 
424cebe556bSParav Pandit static void rdma_init_coredev(struct ib_core_device *coredev,
4254e0f7b90SParav Pandit 			      struct ib_device *dev, struct net *net)
426cebe556bSParav Pandit {
427cebe556bSParav Pandit 	/* This BUILD_BUG_ON is intended to catch layout change
428cebe556bSParav Pandit 	 * of union of ib_core_device and device.
429cebe556bSParav Pandit 	 * dev must be the first element as ib_core and providers
430cebe556bSParav Pandit 	 * driver uses it. Adding anything in ib_core_device before
431cebe556bSParav Pandit 	 * device will break this assumption.
432cebe556bSParav Pandit 	 */
433cebe556bSParav Pandit 	BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) !=
434cebe556bSParav Pandit 		     offsetof(struct ib_device, dev));
435cebe556bSParav Pandit 
436cebe556bSParav Pandit 	coredev->dev.class = &ib_class;
437cebe556bSParav Pandit 	coredev->dev.groups = dev->groups;
438cebe556bSParav Pandit 	device_initialize(&coredev->dev);
439cebe556bSParav Pandit 	coredev->owner = dev;
440cebe556bSParav Pandit 	INIT_LIST_HEAD(&coredev->port_list);
4414e0f7b90SParav Pandit 	write_pnet(&coredev->rdma_net, net);
442cebe556bSParav Pandit }
443cebe556bSParav Pandit 
4441da177e4SLinus Torvalds /**
445459cc69fSLeon Romanovsky  * _ib_alloc_device - allocate an IB device struct
4461da177e4SLinus Torvalds  * @size:size of structure to allocate
4471da177e4SLinus Torvalds  *
4481da177e4SLinus Torvalds  * Low-level drivers should use ib_alloc_device() to allocate &struct
4491da177e4SLinus Torvalds  * ib_device.  @size is the size of the structure to be allocated,
4501da177e4SLinus Torvalds  * including any private data used by the low-level driver.
4511da177e4SLinus Torvalds  * ib_dealloc_device() must be used to free structures allocated with
4521da177e4SLinus Torvalds  * ib_alloc_device().
4531da177e4SLinus Torvalds  */
454459cc69fSLeon Romanovsky struct ib_device *_ib_alloc_device(size_t size)
4551da177e4SLinus Torvalds {
45655aeed06SJason Gunthorpe 	struct ib_device *device;
4571da177e4SLinus Torvalds 
45855aeed06SJason Gunthorpe 	if (WARN_ON(size < sizeof(struct ib_device)))
45955aeed06SJason Gunthorpe 		return NULL;
46055aeed06SJason Gunthorpe 
46155aeed06SJason Gunthorpe 	device = kzalloc(size, GFP_KERNEL);
46255aeed06SJason Gunthorpe 	if (!device)
46355aeed06SJason Gunthorpe 		return NULL;
46455aeed06SJason Gunthorpe 
46541eda65cSLeon Romanovsky 	if (rdma_restrack_init(device)) {
46641eda65cSLeon Romanovsky 		kfree(device);
46741eda65cSLeon Romanovsky 		return NULL;
46841eda65cSLeon Romanovsky 	}
46902d8883fSLeon Romanovsky 
4705f8f5499SParav Pandit 	device->groups[0] = &ib_dev_attr_group;
4714e0f7b90SParav Pandit 	rdma_init_coredev(&device->coredev, device, &init_net);
47255aeed06SJason Gunthorpe 
47355aeed06SJason Gunthorpe 	INIT_LIST_HEAD(&device->event_handler_list);
47455aeed06SJason Gunthorpe 	spin_lock_init(&device->event_handler_lock);
475d0899892SJason Gunthorpe 	mutex_init(&device->unregistration_lock);
4760df91bb6SJason Gunthorpe 	/*
4770df91bb6SJason Gunthorpe 	 * client_data needs to be alloc because we don't want our mark to be
4780df91bb6SJason Gunthorpe 	 * destroyed if the user stores NULL in the client data.
4790df91bb6SJason Gunthorpe 	 */
4800df91bb6SJason Gunthorpe 	xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
481921eab11SJason Gunthorpe 	init_rwsem(&device->client_data_rwsem);
4824e0f7b90SParav Pandit 	xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC);
4834e0f7b90SParav Pandit 	mutex_init(&device->compat_devs_mutex);
48401b67117SParav Pandit 	init_completion(&device->unreg_completion);
485d0899892SJason Gunthorpe 	INIT_WORK(&device->unregistration_work, ib_unregister_work);
48655aeed06SJason Gunthorpe 
48755aeed06SJason Gunthorpe 	return device;
4881da177e4SLinus Torvalds }
489459cc69fSLeon Romanovsky EXPORT_SYMBOL(_ib_alloc_device);
4901da177e4SLinus Torvalds 
4911da177e4SLinus Torvalds /**
4921da177e4SLinus Torvalds  * ib_dealloc_device - free an IB device struct
4931da177e4SLinus Torvalds  * @device:structure to free
4941da177e4SLinus Torvalds  *
4951da177e4SLinus Torvalds  * Free a structure allocated with ib_alloc_device().
4961da177e4SLinus Torvalds  */
4971da177e4SLinus Torvalds void ib_dealloc_device(struct ib_device *device)
4981da177e4SLinus Torvalds {
499d0899892SJason Gunthorpe 	if (device->ops.dealloc_driver)
500d0899892SJason Gunthorpe 		device->ops.dealloc_driver(device);
501d0899892SJason Gunthorpe 
502d0899892SJason Gunthorpe 	/*
503d0899892SJason Gunthorpe 	 * ib_unregister_driver() requires all devices to remain in the xarray
504d0899892SJason Gunthorpe 	 * while their ops are callable. The last op we call is dealloc_driver
505d0899892SJason Gunthorpe 	 * above.  This is needed to create a fence on op callbacks prior to
506d0899892SJason Gunthorpe 	 * allowing the driver module to unload.
507d0899892SJason Gunthorpe 	 */
508d0899892SJason Gunthorpe 	down_write(&devices_rwsem);
509d0899892SJason Gunthorpe 	if (xa_load(&devices, device->index) == device)
510d0899892SJason Gunthorpe 		xa_erase(&devices, device->index);
511d0899892SJason Gunthorpe 	up_write(&devices_rwsem);
512d0899892SJason Gunthorpe 
513c2261dd7SJason Gunthorpe 	/* Expedite releasing netdev references */
514c2261dd7SJason Gunthorpe 	free_netdevs(device);
515c2261dd7SJason Gunthorpe 
5164e0f7b90SParav Pandit 	WARN_ON(!xa_empty(&device->compat_devs));
5170df91bb6SJason Gunthorpe 	WARN_ON(!xa_empty(&device->client_data));
518652432f3SJason Gunthorpe 	WARN_ON(refcount_read(&device->refcount));
5190ad699c0SLeon Romanovsky 	rdma_restrack_clean(device);
520e155755eSParav Pandit 	/* Balances with device_initialize */
521924b8900SLeon Romanovsky 	put_device(&device->dev);
5221da177e4SLinus Torvalds }
5231da177e4SLinus Torvalds EXPORT_SYMBOL(ib_dealloc_device);
5241da177e4SLinus Torvalds 
525921eab11SJason Gunthorpe /*
526921eab11SJason Gunthorpe  * add_client_context() and remove_client_context() must be safe against
527921eab11SJason Gunthorpe  * parallel calls on the same device - registration/unregistration of both the
528921eab11SJason Gunthorpe  * device and client can be occurring in parallel.
529921eab11SJason Gunthorpe  *
530921eab11SJason Gunthorpe  * The routines need to be a fence, any caller must not return until the add
531921eab11SJason Gunthorpe  * or remove is fully completed.
532921eab11SJason Gunthorpe  */
533921eab11SJason Gunthorpe static int add_client_context(struct ib_device *device,
534921eab11SJason Gunthorpe 			      struct ib_client *client)
5351da177e4SLinus Torvalds {
536921eab11SJason Gunthorpe 	int ret = 0;
5371da177e4SLinus Torvalds 
5386780c4faSGal Pressman 	if (!device->kverbs_provider && !client->no_kverbs_req)
539921eab11SJason Gunthorpe 		return 0;
5406780c4faSGal Pressman 
541921eab11SJason Gunthorpe 	down_write(&device->client_data_rwsem);
542921eab11SJason Gunthorpe 	/*
543921eab11SJason Gunthorpe 	 * Another caller to add_client_context got here first and has already
544921eab11SJason Gunthorpe 	 * completely initialized context.
545921eab11SJason Gunthorpe 	 */
546921eab11SJason Gunthorpe 	if (xa_get_mark(&device->client_data, client->client_id,
547921eab11SJason Gunthorpe 		    CLIENT_DATA_REGISTERED))
548921eab11SJason Gunthorpe 		goto out;
549921eab11SJason Gunthorpe 
550921eab11SJason Gunthorpe 	ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
551921eab11SJason Gunthorpe 			      GFP_KERNEL));
552921eab11SJason Gunthorpe 	if (ret)
553921eab11SJason Gunthorpe 		goto out;
554921eab11SJason Gunthorpe 	downgrade_write(&device->client_data_rwsem);
555921eab11SJason Gunthorpe 	if (client->add)
556921eab11SJason Gunthorpe 		client->add(device);
557921eab11SJason Gunthorpe 
558921eab11SJason Gunthorpe 	/* Readers shall not see a client until add has been completed */
5590df91bb6SJason Gunthorpe 	xa_set_mark(&device->client_data, client->client_id,
5600df91bb6SJason Gunthorpe 		    CLIENT_DATA_REGISTERED);
561921eab11SJason Gunthorpe 	up_read(&device->client_data_rwsem);
562921eab11SJason Gunthorpe 	return 0;
5631da177e4SLinus Torvalds 
564921eab11SJason Gunthorpe out:
565921eab11SJason Gunthorpe 	up_write(&device->client_data_rwsem);
566921eab11SJason Gunthorpe 	return ret;
567921eab11SJason Gunthorpe }
568921eab11SJason Gunthorpe 
569921eab11SJason Gunthorpe static void remove_client_context(struct ib_device *device,
570921eab11SJason Gunthorpe 				  unsigned int client_id)
571921eab11SJason Gunthorpe {
572921eab11SJason Gunthorpe 	struct ib_client *client;
573921eab11SJason Gunthorpe 	void *client_data;
574921eab11SJason Gunthorpe 
575921eab11SJason Gunthorpe 	down_write(&device->client_data_rwsem);
576921eab11SJason Gunthorpe 	if (!xa_get_mark(&device->client_data, client_id,
577921eab11SJason Gunthorpe 			 CLIENT_DATA_REGISTERED)) {
578921eab11SJason Gunthorpe 		up_write(&device->client_data_rwsem);
579921eab11SJason Gunthorpe 		return;
580921eab11SJason Gunthorpe 	}
581921eab11SJason Gunthorpe 	client_data = xa_load(&device->client_data, client_id);
582921eab11SJason Gunthorpe 	xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
583921eab11SJason Gunthorpe 	client = xa_load(&clients, client_id);
584921eab11SJason Gunthorpe 	downgrade_write(&device->client_data_rwsem);
585921eab11SJason Gunthorpe 
586921eab11SJason Gunthorpe 	/*
587921eab11SJason Gunthorpe 	 * Notice we cannot be holding any exclusive locks when calling the
588921eab11SJason Gunthorpe 	 * remove callback as the remove callback can recurse back into any
589921eab11SJason Gunthorpe 	 * public functions in this module and thus try for any locks those
590921eab11SJason Gunthorpe 	 * functions take.
591921eab11SJason Gunthorpe 	 *
592921eab11SJason Gunthorpe 	 * For this reason clients and drivers should not call the
593921eab11SJason Gunthorpe 	 * unregistration functions will holdling any locks.
594921eab11SJason Gunthorpe 	 *
595921eab11SJason Gunthorpe 	 * It tempting to drop the client_data_rwsem too, but this is required
596921eab11SJason Gunthorpe 	 * to ensure that unregister_client does not return until all clients
597921eab11SJason Gunthorpe 	 * are completely unregistered, which is required to avoid module
598921eab11SJason Gunthorpe 	 * unloading races.
599921eab11SJason Gunthorpe 	 */
600921eab11SJason Gunthorpe 	if (client->remove)
601921eab11SJason Gunthorpe 		client->remove(device, client_data);
602921eab11SJason Gunthorpe 
603921eab11SJason Gunthorpe 	xa_erase(&device->client_data, client_id);
604921eab11SJason Gunthorpe 	up_read(&device->client_data_rwsem);
6051da177e4SLinus Torvalds }
6061da177e4SLinus Torvalds 
607c2261dd7SJason Gunthorpe static int alloc_port_data(struct ib_device *device)
6085eb620c8SYosef Etigin {
609324e227eSJason Gunthorpe 	struct ib_port_data_rcu *pdata_rcu;
610ea1075edSJason Gunthorpe 	unsigned int port;
611c2261dd7SJason Gunthorpe 
612c2261dd7SJason Gunthorpe 	if (device->port_data)
613c2261dd7SJason Gunthorpe 		return 0;
614c2261dd7SJason Gunthorpe 
615c2261dd7SJason Gunthorpe 	/* This can only be called once the physical port range is defined */
616c2261dd7SJason Gunthorpe 	if (WARN_ON(!device->phys_port_cnt))
617c2261dd7SJason Gunthorpe 		return -EINVAL;
6185eb620c8SYosef Etigin 
6198ceb1357SJason Gunthorpe 	/*
6208ceb1357SJason Gunthorpe 	 * device->port_data is indexed directly by the port number to make
6217738613eSIra Weiny 	 * access to this data as efficient as possible.
6227738613eSIra Weiny 	 *
6238ceb1357SJason Gunthorpe 	 * Therefore port_data is declared as a 1 based array with potential
6248ceb1357SJason Gunthorpe 	 * empty slots at the beginning.
6257738613eSIra Weiny 	 */
626324e227eSJason Gunthorpe 	pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
627324e227eSJason Gunthorpe 					rdma_end_port(device) + 1),
628324e227eSJason Gunthorpe 			    GFP_KERNEL);
629324e227eSJason Gunthorpe 	if (!pdata_rcu)
63055aeed06SJason Gunthorpe 		return -ENOMEM;
631324e227eSJason Gunthorpe 	/*
632324e227eSJason Gunthorpe 	 * The rcu_head is put in front of the port data array and the stored
633324e227eSJason Gunthorpe 	 * pointer is adjusted since we never need to see that member until
634324e227eSJason Gunthorpe 	 * kfree_rcu.
635324e227eSJason Gunthorpe 	 */
636324e227eSJason Gunthorpe 	device->port_data = pdata_rcu->pdata;
6375eb620c8SYosef Etigin 
638ea1075edSJason Gunthorpe 	rdma_for_each_port (device, port) {
6398ceb1357SJason Gunthorpe 		struct ib_port_data *pdata = &device->port_data[port];
6408ceb1357SJason Gunthorpe 
641324e227eSJason Gunthorpe 		pdata->ib_dev = device;
6428ceb1357SJason Gunthorpe 		spin_lock_init(&pdata->pkey_list_lock);
6438ceb1357SJason Gunthorpe 		INIT_LIST_HEAD(&pdata->pkey_list);
644c2261dd7SJason Gunthorpe 		spin_lock_init(&pdata->netdev_lock);
645324e227eSJason Gunthorpe 		INIT_HLIST_NODE(&pdata->ndev_hash_link);
646c2261dd7SJason Gunthorpe 	}
647c2261dd7SJason Gunthorpe 	return 0;
648c2261dd7SJason Gunthorpe }
649c2261dd7SJason Gunthorpe 
650c2261dd7SJason Gunthorpe static int verify_immutable(const struct ib_device *dev, u8 port)
651c2261dd7SJason Gunthorpe {
652c2261dd7SJason Gunthorpe 	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
653c2261dd7SJason Gunthorpe 			    rdma_max_mad_size(dev, port) != 0);
654c2261dd7SJason Gunthorpe }
655c2261dd7SJason Gunthorpe 
656c2261dd7SJason Gunthorpe static int setup_port_data(struct ib_device *device)
657c2261dd7SJason Gunthorpe {
658c2261dd7SJason Gunthorpe 	unsigned int port;
659c2261dd7SJason Gunthorpe 	int ret;
660c2261dd7SJason Gunthorpe 
661c2261dd7SJason Gunthorpe 	ret = alloc_port_data(device);
662c2261dd7SJason Gunthorpe 	if (ret)
663c2261dd7SJason Gunthorpe 		return ret;
664c2261dd7SJason Gunthorpe 
665c2261dd7SJason Gunthorpe 	rdma_for_each_port (device, port) {
666c2261dd7SJason Gunthorpe 		struct ib_port_data *pdata = &device->port_data[port];
6678ceb1357SJason Gunthorpe 
6688ceb1357SJason Gunthorpe 		ret = device->ops.get_port_immutable(device, port,
6698ceb1357SJason Gunthorpe 						     &pdata->immutable);
6705eb620c8SYosef Etigin 		if (ret)
6715eb620c8SYosef Etigin 			return ret;
67255aeed06SJason Gunthorpe 
67355aeed06SJason Gunthorpe 		if (verify_immutable(device, port))
67455aeed06SJason Gunthorpe 			return -EINVAL;
67555aeed06SJason Gunthorpe 	}
67655aeed06SJason Gunthorpe 	return 0;
6775eb620c8SYosef Etigin }
6785eb620c8SYosef Etigin 
6799abb0d1bSLeon Romanovsky void ib_get_device_fw_str(struct ib_device *dev, char *str)
6805fa76c20SIra Weiny {
6813023a1e9SKamal Heib 	if (dev->ops.get_dev_fw_str)
6823023a1e9SKamal Heib 		dev->ops.get_dev_fw_str(dev, str);
6835fa76c20SIra Weiny 	else
6845fa76c20SIra Weiny 		str[0] = '\0';
6855fa76c20SIra Weiny }
6865fa76c20SIra Weiny EXPORT_SYMBOL(ib_get_device_fw_str);
6875fa76c20SIra Weiny 
6888f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work)
6898f408ab6SDaniel Jurgens {
6908f408ab6SDaniel Jurgens 	struct ib_device *dev;
6910df91bb6SJason Gunthorpe 	unsigned long index;
6928f408ab6SDaniel Jurgens 
693921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
6940df91bb6SJason Gunthorpe 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
695ea1075edSJason Gunthorpe 		unsigned int i;
6968f408ab6SDaniel Jurgens 
697ea1075edSJason Gunthorpe 		rdma_for_each_port (dev, i) {
6988f408ab6SDaniel Jurgens 			u64 sp;
6998f408ab6SDaniel Jurgens 			int ret = ib_get_cached_subnet_prefix(dev,
7008f408ab6SDaniel Jurgens 							      i,
7018f408ab6SDaniel Jurgens 							      &sp);
7028f408ab6SDaniel Jurgens 
7038f408ab6SDaniel Jurgens 			WARN_ONCE(ret,
7048f408ab6SDaniel Jurgens 				  "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
7058f408ab6SDaniel Jurgens 				  ret);
706a750cfdeSDaniel Jurgens 			if (!ret)
7078f408ab6SDaniel Jurgens 				ib_security_cache_change(dev, i, sp);
7088f408ab6SDaniel Jurgens 		}
7098f408ab6SDaniel Jurgens 	}
710921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
7118f408ab6SDaniel Jurgens }
7128f408ab6SDaniel Jurgens 
7138f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event,
7148f408ab6SDaniel Jurgens 			      void *lsm_data)
7158f408ab6SDaniel Jurgens {
7168f408ab6SDaniel Jurgens 	if (event != LSM_POLICY_CHANGE)
7178f408ab6SDaniel Jurgens 		return NOTIFY_DONE;
7188f408ab6SDaniel Jurgens 
7198f408ab6SDaniel Jurgens 	schedule_work(&ib_policy_change_work);
720c66f6741SDaniel Jurgens 	ib_mad_agent_security_change();
7218f408ab6SDaniel Jurgens 
7228f408ab6SDaniel Jurgens 	return NOTIFY_OK;
7238f408ab6SDaniel Jurgens }
7248f408ab6SDaniel Jurgens 
7254e0f7b90SParav Pandit static void compatdev_release(struct device *dev)
7264e0f7b90SParav Pandit {
7274e0f7b90SParav Pandit 	struct ib_core_device *cdev =
7284e0f7b90SParav Pandit 		container_of(dev, struct ib_core_device, dev);
7294e0f7b90SParav Pandit 
7304e0f7b90SParav Pandit 	kfree(cdev);
7314e0f7b90SParav Pandit }
7324e0f7b90SParav Pandit 
7334e0f7b90SParav Pandit static int add_one_compat_dev(struct ib_device *device,
7344e0f7b90SParav Pandit 			      struct rdma_dev_net *rnet)
7354e0f7b90SParav Pandit {
7364e0f7b90SParav Pandit 	struct ib_core_device *cdev;
7374e0f7b90SParav Pandit 	int ret;
7384e0f7b90SParav Pandit 
7394e0f7b90SParav Pandit 	/*
7404e0f7b90SParav Pandit 	 * Create and add compat device in all namespaces other than where it
7414e0f7b90SParav Pandit 	 * is currently bound to.
7424e0f7b90SParav Pandit 	 */
7434e0f7b90SParav Pandit 	if (net_eq(read_pnet(&rnet->net),
7444e0f7b90SParav Pandit 		   read_pnet(&device->coredev.rdma_net)))
7454e0f7b90SParav Pandit 		return 0;
7464e0f7b90SParav Pandit 
7474e0f7b90SParav Pandit 	/*
7484e0f7b90SParav Pandit 	 * The first of init_net() or ib_register_device() to take the
7494e0f7b90SParav Pandit 	 * compat_devs_mutex wins and gets to add the device. Others will wait
7504e0f7b90SParav Pandit 	 * for completion here.
7514e0f7b90SParav Pandit 	 */
7524e0f7b90SParav Pandit 	mutex_lock(&device->compat_devs_mutex);
7534e0f7b90SParav Pandit 	cdev = xa_load(&device->compat_devs, rnet->id);
7544e0f7b90SParav Pandit 	if (cdev) {
7554e0f7b90SParav Pandit 		ret = 0;
7564e0f7b90SParav Pandit 		goto done;
7574e0f7b90SParav Pandit 	}
7584e0f7b90SParav Pandit 	ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL);
7594e0f7b90SParav Pandit 	if (ret)
7604e0f7b90SParav Pandit 		goto done;
7614e0f7b90SParav Pandit 
7624e0f7b90SParav Pandit 	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
7634e0f7b90SParav Pandit 	if (!cdev) {
7644e0f7b90SParav Pandit 		ret = -ENOMEM;
7654e0f7b90SParav Pandit 		goto cdev_err;
7664e0f7b90SParav Pandit 	}
7674e0f7b90SParav Pandit 
7684e0f7b90SParav Pandit 	cdev->dev.parent = device->dev.parent;
7694e0f7b90SParav Pandit 	rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
7704e0f7b90SParav Pandit 	cdev->dev.release = compatdev_release;
7714e0f7b90SParav Pandit 	dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
7724e0f7b90SParav Pandit 
7734e0f7b90SParav Pandit 	ret = device_add(&cdev->dev);
7744e0f7b90SParav Pandit 	if (ret)
7754e0f7b90SParav Pandit 		goto add_err;
7765417783eSParav Pandit 	ret = ib_setup_port_attrs(cdev, false);
7775417783eSParav Pandit 	if (ret)
7785417783eSParav Pandit 		goto port_err;
7794e0f7b90SParav Pandit 
7804e0f7b90SParav Pandit 	ret = xa_err(xa_store(&device->compat_devs, rnet->id,
7814e0f7b90SParav Pandit 			      cdev, GFP_KERNEL));
7824e0f7b90SParav Pandit 	if (ret)
7834e0f7b90SParav Pandit 		goto insert_err;
7844e0f7b90SParav Pandit 
7854e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
7864e0f7b90SParav Pandit 	return 0;
7874e0f7b90SParav Pandit 
7884e0f7b90SParav Pandit insert_err:
7895417783eSParav Pandit 	ib_free_port_attrs(cdev);
7905417783eSParav Pandit port_err:
7914e0f7b90SParav Pandit 	device_del(&cdev->dev);
7924e0f7b90SParav Pandit add_err:
7934e0f7b90SParav Pandit 	put_device(&cdev->dev);
7944e0f7b90SParav Pandit cdev_err:
7954e0f7b90SParav Pandit 	xa_release(&device->compat_devs, rnet->id);
7964e0f7b90SParav Pandit done:
7974e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
7984e0f7b90SParav Pandit 	return ret;
7994e0f7b90SParav Pandit }
8004e0f7b90SParav Pandit 
8014e0f7b90SParav Pandit static void remove_one_compat_dev(struct ib_device *device, u32 id)
8024e0f7b90SParav Pandit {
8034e0f7b90SParav Pandit 	struct ib_core_device *cdev;
8044e0f7b90SParav Pandit 
8054e0f7b90SParav Pandit 	mutex_lock(&device->compat_devs_mutex);
8064e0f7b90SParav Pandit 	cdev = xa_erase(&device->compat_devs, id);
8074e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
8084e0f7b90SParav Pandit 	if (cdev) {
8095417783eSParav Pandit 		ib_free_port_attrs(cdev);
8104e0f7b90SParav Pandit 		device_del(&cdev->dev);
8114e0f7b90SParav Pandit 		put_device(&cdev->dev);
8124e0f7b90SParav Pandit 	}
8134e0f7b90SParav Pandit }
8144e0f7b90SParav Pandit 
8154e0f7b90SParav Pandit static void remove_compat_devs(struct ib_device *device)
8164e0f7b90SParav Pandit {
8174e0f7b90SParav Pandit 	struct ib_core_device *cdev;
8184e0f7b90SParav Pandit 	unsigned long index;
8194e0f7b90SParav Pandit 
8204e0f7b90SParav Pandit 	xa_for_each (&device->compat_devs, index, cdev)
8214e0f7b90SParav Pandit 		remove_one_compat_dev(device, index);
8224e0f7b90SParav Pandit }
8234e0f7b90SParav Pandit 
8244e0f7b90SParav Pandit static int add_compat_devs(struct ib_device *device)
8254e0f7b90SParav Pandit {
8264e0f7b90SParav Pandit 	struct rdma_dev_net *rnet;
8274e0f7b90SParav Pandit 	unsigned long index;
8284e0f7b90SParav Pandit 	int ret = 0;
8294e0f7b90SParav Pandit 
8304e0f7b90SParav Pandit 	down_read(&rdma_nets_rwsem);
8314e0f7b90SParav Pandit 	xa_for_each (&rdma_nets, index, rnet) {
8324e0f7b90SParav Pandit 		ret = add_one_compat_dev(device, rnet);
8334e0f7b90SParav Pandit 		if (ret)
8344e0f7b90SParav Pandit 			break;
8354e0f7b90SParav Pandit 	}
8364e0f7b90SParav Pandit 	up_read(&rdma_nets_rwsem);
8374e0f7b90SParav Pandit 	return ret;
8384e0f7b90SParav Pandit }
8394e0f7b90SParav Pandit 
8404e0f7b90SParav Pandit static void rdma_dev_exit_net(struct net *net)
8414e0f7b90SParav Pandit {
8424e0f7b90SParav Pandit 	struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
8434e0f7b90SParav Pandit 	struct ib_device *dev;
8444e0f7b90SParav Pandit 	unsigned long index;
8454e0f7b90SParav Pandit 	int ret;
8464e0f7b90SParav Pandit 
8474e0f7b90SParav Pandit 	down_write(&rdma_nets_rwsem);
8484e0f7b90SParav Pandit 	/*
8494e0f7b90SParav Pandit 	 * Prevent the ID from being re-used and hide the id from xa_for_each.
8504e0f7b90SParav Pandit 	 */
8514e0f7b90SParav Pandit 	ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL));
8524e0f7b90SParav Pandit 	WARN_ON(ret);
8534e0f7b90SParav Pandit 	up_write(&rdma_nets_rwsem);
8544e0f7b90SParav Pandit 
8554e0f7b90SParav Pandit 	down_read(&devices_rwsem);
8564e0f7b90SParav Pandit 	xa_for_each (&devices, index, dev) {
8574e0f7b90SParav Pandit 		get_device(&dev->dev);
8584e0f7b90SParav Pandit 		/*
8594e0f7b90SParav Pandit 		 * Release the devices_rwsem so that pontentially blocking
8604e0f7b90SParav Pandit 		 * device_del, doesn't hold the devices_rwsem for too long.
8614e0f7b90SParav Pandit 		 */
8624e0f7b90SParav Pandit 		up_read(&devices_rwsem);
8634e0f7b90SParav Pandit 
8644e0f7b90SParav Pandit 		remove_one_compat_dev(dev, rnet->id);
8654e0f7b90SParav Pandit 
8664e0f7b90SParav Pandit 		put_device(&dev->dev);
8674e0f7b90SParav Pandit 		down_read(&devices_rwsem);
8684e0f7b90SParav Pandit 	}
8694e0f7b90SParav Pandit 	up_read(&devices_rwsem);
8704e0f7b90SParav Pandit 
8714e0f7b90SParav Pandit 	xa_erase(&rdma_nets, rnet->id);
8724e0f7b90SParav Pandit }
8734e0f7b90SParav Pandit 
8744e0f7b90SParav Pandit static __net_init int rdma_dev_init_net(struct net *net)
8754e0f7b90SParav Pandit {
8764e0f7b90SParav Pandit 	struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
8774e0f7b90SParav Pandit 	unsigned long index;
8784e0f7b90SParav Pandit 	struct ib_device *dev;
8794e0f7b90SParav Pandit 	int ret;
8804e0f7b90SParav Pandit 
8814e0f7b90SParav Pandit 	/* No need to create any compat devices in default init_net. */
8824e0f7b90SParav Pandit 	if (net_eq(net, &init_net))
8834e0f7b90SParav Pandit 		return 0;
8844e0f7b90SParav Pandit 
8854e0f7b90SParav Pandit 	write_pnet(&rnet->net, net);
8864e0f7b90SParav Pandit 
8874e0f7b90SParav Pandit 	ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
8884e0f7b90SParav Pandit 	if (ret)
8894e0f7b90SParav Pandit 		return ret;
8904e0f7b90SParav Pandit 
8914e0f7b90SParav Pandit 	down_read(&devices_rwsem);
8924e0f7b90SParav Pandit 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
8934e0f7b90SParav Pandit 		ret = add_one_compat_dev(dev, rnet);
8944e0f7b90SParav Pandit 		if (ret)
8954e0f7b90SParav Pandit 			break;
8964e0f7b90SParav Pandit 	}
8974e0f7b90SParav Pandit 	up_read(&devices_rwsem);
8984e0f7b90SParav Pandit 
8994e0f7b90SParav Pandit 	if (ret)
9004e0f7b90SParav Pandit 		rdma_dev_exit_net(net);
9014e0f7b90SParav Pandit 
9024e0f7b90SParav Pandit 	return ret;
9034e0f7b90SParav Pandit }
9044e0f7b90SParav Pandit 
905ecc82c53SLeon Romanovsky /*
906d0899892SJason Gunthorpe  * Assign the unique string device name and the unique device index. This is
907d0899892SJason Gunthorpe  * undone by ib_dealloc_device.
908ecc82c53SLeon Romanovsky  */
9090df91bb6SJason Gunthorpe static int assign_name(struct ib_device *device, const char *name)
9100df91bb6SJason Gunthorpe {
9110df91bb6SJason Gunthorpe 	static u32 last_id;
9120df91bb6SJason Gunthorpe 	int ret;
913ecc82c53SLeon Romanovsky 
914921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
9150df91bb6SJason Gunthorpe 	/* Assign a unique name to the device */
9160df91bb6SJason Gunthorpe 	if (strchr(name, '%'))
9170df91bb6SJason Gunthorpe 		ret = alloc_name(device, name);
9180df91bb6SJason Gunthorpe 	else
9190df91bb6SJason Gunthorpe 		ret = dev_set_name(&device->dev, name);
9200df91bb6SJason Gunthorpe 	if (ret)
9210df91bb6SJason Gunthorpe 		goto out;
922ecc82c53SLeon Romanovsky 
9230df91bb6SJason Gunthorpe 	if (__ib_device_get_by_name(dev_name(&device->dev))) {
9240df91bb6SJason Gunthorpe 		ret = -ENFILE;
9250df91bb6SJason Gunthorpe 		goto out;
926ecc82c53SLeon Romanovsky 	}
9270df91bb6SJason Gunthorpe 	strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
9280df91bb6SJason Gunthorpe 
929ea295481SLinus Torvalds 	ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
930ea295481SLinus Torvalds 			&last_id, GFP_KERNEL);
931ea295481SLinus Torvalds 	if (ret > 0)
9320df91bb6SJason Gunthorpe 		ret = 0;
933921eab11SJason Gunthorpe 
9340df91bb6SJason Gunthorpe out:
935921eab11SJason Gunthorpe 	up_write(&devices_rwsem);
9360df91bb6SJason Gunthorpe 	return ret;
9370df91bb6SJason Gunthorpe }
9380df91bb6SJason Gunthorpe 
939548cb4fbSParav Pandit static void setup_dma_device(struct ib_device *device)
9401da177e4SLinus Torvalds {
94199db9494SBart Van Assche 	struct device *parent = device->dev.parent;
9421da177e4SLinus Torvalds 
9430957c29fSBart Van Assche 	WARN_ON_ONCE(device->dma_device);
9440957c29fSBart Van Assche 	if (device->dev.dma_ops) {
9450957c29fSBart Van Assche 		/*
9460957c29fSBart Van Assche 		 * The caller provided custom DMA operations. Copy the
9470957c29fSBart Van Assche 		 * DMA-related fields that are used by e.g. dma_alloc_coherent()
9480957c29fSBart Van Assche 		 * into device->dev.
9490957c29fSBart Van Assche 		 */
9500957c29fSBart Van Assche 		device->dma_device = &device->dev;
95102ee9da3SBart Van Assche 		if (!device->dev.dma_mask) {
95202ee9da3SBart Van Assche 			if (parent)
95399db9494SBart Van Assche 				device->dev.dma_mask = parent->dma_mask;
95402ee9da3SBart Van Assche 			else
95502ee9da3SBart Van Assche 				WARN_ON_ONCE(true);
95602ee9da3SBart Van Assche 		}
95702ee9da3SBart Van Assche 		if (!device->dev.coherent_dma_mask) {
95802ee9da3SBart Van Assche 			if (parent)
9590957c29fSBart Van Assche 				device->dev.coherent_dma_mask =
9600957c29fSBart Van Assche 					parent->coherent_dma_mask;
96102ee9da3SBart Van Assche 			else
96202ee9da3SBart Van Assche 				WARN_ON_ONCE(true);
96302ee9da3SBart Van Assche 		}
9640957c29fSBart Van Assche 	} else {
9650957c29fSBart Van Assche 		/*
9660957c29fSBart Van Assche 		 * The caller did not provide custom DMA operations. Use the
9670957c29fSBart Van Assche 		 * DMA mapping operations of the parent device.
9680957c29fSBart Van Assche 		 */
96902ee9da3SBart Van Assche 		WARN_ON_ONCE(!parent);
9700957c29fSBart Van Assche 		device->dma_device = parent;
9710957c29fSBart Van Assche 	}
972548cb4fbSParav Pandit }
973548cb4fbSParav Pandit 
974921eab11SJason Gunthorpe /*
975921eab11SJason Gunthorpe  * setup_device() allocates memory and sets up data that requires calling the
976921eab11SJason Gunthorpe  * device ops, this is the only reason these actions are not done during
977921eab11SJason Gunthorpe  * ib_alloc_device. It is undone by ib_dealloc_device().
978921eab11SJason Gunthorpe  */
979548cb4fbSParav Pandit static int setup_device(struct ib_device *device)
980548cb4fbSParav Pandit {
981548cb4fbSParav Pandit 	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
982548cb4fbSParav Pandit 	int ret;
983548cb4fbSParav Pandit 
984921eab11SJason Gunthorpe 	setup_dma_device(device);
985921eab11SJason Gunthorpe 
986548cb4fbSParav Pandit 	ret = ib_device_check_mandatory(device);
987548cb4fbSParav Pandit 	if (ret)
988548cb4fbSParav Pandit 		return ret;
989548cb4fbSParav Pandit 
9908ceb1357SJason Gunthorpe 	ret = setup_port_data(device);
991548cb4fbSParav Pandit 	if (ret) {
9928ceb1357SJason Gunthorpe 		dev_warn(&device->dev, "Couldn't create per-port data\n");
993548cb4fbSParav Pandit 		return ret;
994548cb4fbSParav Pandit 	}
995548cb4fbSParav Pandit 
996548cb4fbSParav Pandit 	memset(&device->attrs, 0, sizeof(device->attrs));
9973023a1e9SKamal Heib 	ret = device->ops.query_device(device, &device->attrs, &uhw);
998548cb4fbSParav Pandit 	if (ret) {
999548cb4fbSParav Pandit 		dev_warn(&device->dev,
1000548cb4fbSParav Pandit 			 "Couldn't query the device attributes\n");
1001d45f89d5SJason Gunthorpe 		return ret;
1002548cb4fbSParav Pandit 	}
1003548cb4fbSParav Pandit 
1004548cb4fbSParav Pandit 	return 0;
1005548cb4fbSParav Pandit }
1006548cb4fbSParav Pandit 
1007921eab11SJason Gunthorpe static void disable_device(struct ib_device *device)
1008921eab11SJason Gunthorpe {
1009921eab11SJason Gunthorpe 	struct ib_client *client;
1010921eab11SJason Gunthorpe 
1011921eab11SJason Gunthorpe 	WARN_ON(!refcount_read(&device->refcount));
1012921eab11SJason Gunthorpe 
1013921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
1014921eab11SJason Gunthorpe 	xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
1015921eab11SJason Gunthorpe 	up_write(&devices_rwsem);
1016921eab11SJason Gunthorpe 
1017921eab11SJason Gunthorpe 	down_read(&clients_rwsem);
1018921eab11SJason Gunthorpe 	list_for_each_entry_reverse(client, &client_list, list)
1019921eab11SJason Gunthorpe 		remove_client_context(device, client->client_id);
1020921eab11SJason Gunthorpe 	up_read(&clients_rwsem);
1021921eab11SJason Gunthorpe 
1022921eab11SJason Gunthorpe 	/* Pairs with refcount_set in enable_device */
1023921eab11SJason Gunthorpe 	ib_device_put(device);
1024921eab11SJason Gunthorpe 	wait_for_completion(&device->unreg_completion);
1025c2261dd7SJason Gunthorpe 
10264e0f7b90SParav Pandit 	/*
10274e0f7b90SParav Pandit 	 * compat devices must be removed after device refcount drops to zero.
10284e0f7b90SParav Pandit 	 * Otherwise init_net() may add more compatdevs after removing compat
10294e0f7b90SParav Pandit 	 * devices and before device is disabled.
10304e0f7b90SParav Pandit 	 */
10314e0f7b90SParav Pandit 	remove_compat_devs(device);
10324e0f7b90SParav Pandit 
1033c2261dd7SJason Gunthorpe 	/* Expedite removing unregistered pointers from the hash table */
1034c2261dd7SJason Gunthorpe 	free_netdevs(device);
1035921eab11SJason Gunthorpe }
1036921eab11SJason Gunthorpe 
1037921eab11SJason Gunthorpe /*
1038921eab11SJason Gunthorpe  * An enabled device is visible to all clients and to all the public facing
1039d0899892SJason Gunthorpe  * APIs that return a device pointer. This always returns with a new get, even
1040d0899892SJason Gunthorpe  * if it fails.
1041921eab11SJason Gunthorpe  */
1042d0899892SJason Gunthorpe static int enable_device_and_get(struct ib_device *device)
1043921eab11SJason Gunthorpe {
1044921eab11SJason Gunthorpe 	struct ib_client *client;
1045921eab11SJason Gunthorpe 	unsigned long index;
1046d0899892SJason Gunthorpe 	int ret = 0;
1047921eab11SJason Gunthorpe 
1048d0899892SJason Gunthorpe 	/*
1049d0899892SJason Gunthorpe 	 * One ref belongs to the xa and the other belongs to this
1050d0899892SJason Gunthorpe 	 * thread. This is needed to guard against parallel unregistration.
1051d0899892SJason Gunthorpe 	 */
1052d0899892SJason Gunthorpe 	refcount_set(&device->refcount, 2);
1053921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
1054921eab11SJason Gunthorpe 	xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
1055d0899892SJason Gunthorpe 
1056d0899892SJason Gunthorpe 	/*
1057d0899892SJason Gunthorpe 	 * By using downgrade_write() we ensure that no other thread can clear
1058d0899892SJason Gunthorpe 	 * DEVICE_REGISTERED while we are completing the client setup.
1059d0899892SJason Gunthorpe 	 */
1060d0899892SJason Gunthorpe 	downgrade_write(&devices_rwsem);
1061921eab11SJason Gunthorpe 
1062ca22354bSJason Gunthorpe 	if (device->ops.enable_driver) {
1063ca22354bSJason Gunthorpe 		ret = device->ops.enable_driver(device);
1064ca22354bSJason Gunthorpe 		if (ret)
1065ca22354bSJason Gunthorpe 			goto out;
1066ca22354bSJason Gunthorpe 	}
1067ca22354bSJason Gunthorpe 
1068921eab11SJason Gunthorpe 	down_read(&clients_rwsem);
1069921eab11SJason Gunthorpe 	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
1070921eab11SJason Gunthorpe 		ret = add_client_context(device, client);
1071d0899892SJason Gunthorpe 		if (ret)
1072d0899892SJason Gunthorpe 			break;
1073d0899892SJason Gunthorpe 	}
1074921eab11SJason Gunthorpe 	up_read(&clients_rwsem);
10754e0f7b90SParav Pandit 	if (!ret)
10764e0f7b90SParav Pandit 		ret = add_compat_devs(device);
1077ca22354bSJason Gunthorpe out:
1078d0899892SJason Gunthorpe 	up_read(&devices_rwsem);
1079921eab11SJason Gunthorpe 	return ret;
1080921eab11SJason Gunthorpe }
1081921eab11SJason Gunthorpe 
1082548cb4fbSParav Pandit /**
1083548cb4fbSParav Pandit  * ib_register_device - Register an IB device with IB core
1084548cb4fbSParav Pandit  * @device:Device to register
1085548cb4fbSParav Pandit  *
1086548cb4fbSParav Pandit  * Low-level drivers use ib_register_device() to register their
1087548cb4fbSParav Pandit  * devices with the IB core.  All registered clients will receive a
1088548cb4fbSParav Pandit  * callback for each device that is added. @device must be allocated
1089548cb4fbSParav Pandit  * with ib_alloc_device().
1090d0899892SJason Gunthorpe  *
1091d0899892SJason Gunthorpe  * If the driver uses ops.dealloc_driver and calls any ib_unregister_device()
1092d0899892SJason Gunthorpe  * asynchronously then the device pointer may become freed as soon as this
1093d0899892SJason Gunthorpe  * function returns.
1094548cb4fbSParav Pandit  */
1095ea4baf7fSParav Pandit int ib_register_device(struct ib_device *device, const char *name)
1096548cb4fbSParav Pandit {
1097548cb4fbSParav Pandit 	int ret;
10981da177e4SLinus Torvalds 
10990df91bb6SJason Gunthorpe 	ret = assign_name(device, name);
1100e349f858SJason Gunthorpe 	if (ret)
1101921eab11SJason Gunthorpe 		return ret;
11021da177e4SLinus Torvalds 
1103548cb4fbSParav Pandit 	ret = setup_device(device);
1104548cb4fbSParav Pandit 	if (ret)
1105d0899892SJason Gunthorpe 		return ret;
110603db3a2dSMatan Barak 
1107d45f89d5SJason Gunthorpe 	ret = ib_cache_setup_one(device);
1108d45f89d5SJason Gunthorpe 	if (ret) {
1109d45f89d5SJason Gunthorpe 		dev_warn(&device->dev,
1110d45f89d5SJason Gunthorpe 			 "Couldn't set up InfiniBand P_Key/GID cache\n");
1111d0899892SJason Gunthorpe 		return ret;
1112d45f89d5SJason Gunthorpe 	}
1113d45f89d5SJason Gunthorpe 
11147527a7b1SParav Pandit 	ib_device_register_rdmacg(device);
11153e153a93SIra Weiny 
11165f8f5499SParav Pandit 	ret = device_add(&device->dev);
11175f8f5499SParav Pandit 	if (ret)
11185f8f5499SParav Pandit 		goto cg_cleanup;
11195f8f5499SParav Pandit 
1120ea4baf7fSParav Pandit 	ret = ib_device_register_sysfs(device);
11211da177e4SLinus Torvalds 	if (ret) {
112243c7c851SJason Gunthorpe 		dev_warn(&device->dev,
112343c7c851SJason Gunthorpe 			 "Couldn't register device with driver model\n");
11245f8f5499SParav Pandit 		goto dev_cleanup;
11251da177e4SLinus Torvalds 	}
11261da177e4SLinus Torvalds 
1127d0899892SJason Gunthorpe 	ret = enable_device_and_get(device);
1128d0899892SJason Gunthorpe 	if (ret) {
1129d0899892SJason Gunthorpe 		void (*dealloc_fn)(struct ib_device *);
1130d0899892SJason Gunthorpe 
1131d0899892SJason Gunthorpe 		/*
1132d0899892SJason Gunthorpe 		 * If we hit this error flow then we don't want to
1133d0899892SJason Gunthorpe 		 * automatically dealloc the device since the caller is
1134d0899892SJason Gunthorpe 		 * expected to call ib_dealloc_device() after
1135d0899892SJason Gunthorpe 		 * ib_register_device() fails. This is tricky due to the
1136d0899892SJason Gunthorpe 		 * possibility for a parallel unregistration along with this
1137d0899892SJason Gunthorpe 		 * error flow. Since we have a refcount here we know any
1138d0899892SJason Gunthorpe 		 * parallel flow is stopped in disable_device and will see the
1139d0899892SJason Gunthorpe 		 * NULL pointers, causing the responsibility to
1140d0899892SJason Gunthorpe 		 * ib_dealloc_device() to revert back to this thread.
1141d0899892SJason Gunthorpe 		 */
1142d0899892SJason Gunthorpe 		dealloc_fn = device->ops.dealloc_driver;
1143d0899892SJason Gunthorpe 		device->ops.dealloc_driver = NULL;
1144d0899892SJason Gunthorpe 		ib_device_put(device);
1145d0899892SJason Gunthorpe 		__ib_unregister_device(device);
1146d0899892SJason Gunthorpe 		device->ops.dealloc_driver = dealloc_fn;
1147d0899892SJason Gunthorpe 		return ret;
1148d0899892SJason Gunthorpe 	}
1149d0899892SJason Gunthorpe 	ib_device_put(device);
11501da177e4SLinus Torvalds 
11514be3a4faSParav Pandit 	return 0;
11524be3a4faSParav Pandit 
11535f8f5499SParav Pandit dev_cleanup:
11545f8f5499SParav Pandit 	device_del(&device->dev);
11552fb4f4eaSParav Pandit cg_cleanup:
11562fb4f4eaSParav Pandit 	ib_device_unregister_rdmacg(device);
1157d45f89d5SJason Gunthorpe 	ib_cache_cleanup_one(device);
11581da177e4SLinus Torvalds 	return ret;
11591da177e4SLinus Torvalds }
11601da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_device);
11611da177e4SLinus Torvalds 
1162d0899892SJason Gunthorpe /* Callers must hold a get on the device. */
1163d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *ib_dev)
1164d0899892SJason Gunthorpe {
1165d0899892SJason Gunthorpe 	/*
1166d0899892SJason Gunthorpe 	 * We have a registration lock so that all the calls to unregister are
1167d0899892SJason Gunthorpe 	 * fully fenced, once any unregister returns the device is truely
1168d0899892SJason Gunthorpe 	 * unregistered even if multiple callers are unregistering it at the
1169d0899892SJason Gunthorpe 	 * same time. This also interacts with the registration flow and
1170d0899892SJason Gunthorpe 	 * provides sane semantics if register and unregister are racing.
1171d0899892SJason Gunthorpe 	 */
1172d0899892SJason Gunthorpe 	mutex_lock(&ib_dev->unregistration_lock);
1173d0899892SJason Gunthorpe 	if (!refcount_read(&ib_dev->refcount))
1174d0899892SJason Gunthorpe 		goto out;
1175d0899892SJason Gunthorpe 
1176d0899892SJason Gunthorpe 	disable_device(ib_dev);
1177d0899892SJason Gunthorpe 	ib_device_unregister_sysfs(ib_dev);
1178d0899892SJason Gunthorpe 	device_del(&ib_dev->dev);
1179d0899892SJason Gunthorpe 	ib_device_unregister_rdmacg(ib_dev);
1180d0899892SJason Gunthorpe 	ib_cache_cleanup_one(ib_dev);
1181d0899892SJason Gunthorpe 
1182d0899892SJason Gunthorpe 	/*
1183d0899892SJason Gunthorpe 	 * Drivers using the new flow may not call ib_dealloc_device except
1184d0899892SJason Gunthorpe 	 * in error unwind prior to registration success.
1185d0899892SJason Gunthorpe 	 */
1186d0899892SJason Gunthorpe 	if (ib_dev->ops.dealloc_driver) {
1187d0899892SJason Gunthorpe 		WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
1188d0899892SJason Gunthorpe 		ib_dealloc_device(ib_dev);
1189d0899892SJason Gunthorpe 	}
1190d0899892SJason Gunthorpe out:
1191d0899892SJason Gunthorpe 	mutex_unlock(&ib_dev->unregistration_lock);
1192d0899892SJason Gunthorpe }
1193d0899892SJason Gunthorpe 
11941da177e4SLinus Torvalds /**
11951da177e4SLinus Torvalds  * ib_unregister_device - Unregister an IB device
1196d0899892SJason Gunthorpe  * @device: The device to unregister
11971da177e4SLinus Torvalds  *
11981da177e4SLinus Torvalds  * Unregister an IB device.  All clients will receive a remove callback.
1199d0899892SJason Gunthorpe  *
1200d0899892SJason Gunthorpe  * Callers should call this routine only once, and protect against races with
1201d0899892SJason Gunthorpe  * registration. Typically it should only be called as part of a remove
1202d0899892SJason Gunthorpe  * callback in an implementation of driver core's struct device_driver and
1203d0899892SJason Gunthorpe  * related.
1204d0899892SJason Gunthorpe  *
1205d0899892SJason Gunthorpe  * If ops.dealloc_driver is used then ib_dev will be freed upon return from
1206d0899892SJason Gunthorpe  * this function.
12071da177e4SLinus Torvalds  */
1208d0899892SJason Gunthorpe void ib_unregister_device(struct ib_device *ib_dev)
12091da177e4SLinus Torvalds {
1210d0899892SJason Gunthorpe 	get_device(&ib_dev->dev);
1211d0899892SJason Gunthorpe 	__ib_unregister_device(ib_dev);
1212d0899892SJason Gunthorpe 	put_device(&ib_dev->dev);
12131da177e4SLinus Torvalds }
12141da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_device);
12151da177e4SLinus Torvalds 
1216d0899892SJason Gunthorpe /**
1217d0899892SJason Gunthorpe  * ib_unregister_device_and_put - Unregister a device while holding a 'get'
1218d0899892SJason Gunthorpe  * device: The device to unregister
1219d0899892SJason Gunthorpe  *
1220d0899892SJason Gunthorpe  * This is the same as ib_unregister_device(), except it includes an internal
1221d0899892SJason Gunthorpe  * ib_device_put() that should match a 'get' obtained by the caller.
1222d0899892SJason Gunthorpe  *
1223d0899892SJason Gunthorpe  * It is safe to call this routine concurrently from multiple threads while
1224d0899892SJason Gunthorpe  * holding the 'get'. When the function returns the device is fully
1225d0899892SJason Gunthorpe  * unregistered.
1226d0899892SJason Gunthorpe  *
1227d0899892SJason Gunthorpe  * Drivers using this flow MUST use the driver_unregister callback to clean up
1228d0899892SJason Gunthorpe  * their resources associated with the device and dealloc it.
1229d0899892SJason Gunthorpe  */
1230d0899892SJason Gunthorpe void ib_unregister_device_and_put(struct ib_device *ib_dev)
1231d0899892SJason Gunthorpe {
1232d0899892SJason Gunthorpe 	WARN_ON(!ib_dev->ops.dealloc_driver);
1233d0899892SJason Gunthorpe 	get_device(&ib_dev->dev);
1234d0899892SJason Gunthorpe 	ib_device_put(ib_dev);
1235d0899892SJason Gunthorpe 	__ib_unregister_device(ib_dev);
1236d0899892SJason Gunthorpe 	put_device(&ib_dev->dev);
1237d0899892SJason Gunthorpe }
1238d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_and_put);
1239d0899892SJason Gunthorpe 
1240d0899892SJason Gunthorpe /**
1241d0899892SJason Gunthorpe  * ib_unregister_driver - Unregister all IB devices for a driver
1242d0899892SJason Gunthorpe  * @driver_id: The driver to unregister
1243d0899892SJason Gunthorpe  *
1244d0899892SJason Gunthorpe  * This implements a fence for device unregistration. It only returns once all
1245d0899892SJason Gunthorpe  * devices associated with the driver_id have fully completed their
1246d0899892SJason Gunthorpe  * unregistration and returned from ib_unregister_device*().
1247d0899892SJason Gunthorpe  *
1248d0899892SJason Gunthorpe  * If device's are not yet unregistered it goes ahead and starts unregistering
1249d0899892SJason Gunthorpe  * them.
1250d0899892SJason Gunthorpe  *
1251d0899892SJason Gunthorpe  * This does not block creation of new devices with the given driver_id, that
1252d0899892SJason Gunthorpe  * is the responsibility of the caller.
1253d0899892SJason Gunthorpe  */
1254d0899892SJason Gunthorpe void ib_unregister_driver(enum rdma_driver_id driver_id)
1255d0899892SJason Gunthorpe {
1256d0899892SJason Gunthorpe 	struct ib_device *ib_dev;
1257d0899892SJason Gunthorpe 	unsigned long index;
1258d0899892SJason Gunthorpe 
1259d0899892SJason Gunthorpe 	down_read(&devices_rwsem);
1260d0899892SJason Gunthorpe 	xa_for_each (&devices, index, ib_dev) {
1261d0899892SJason Gunthorpe 		if (ib_dev->driver_id != driver_id)
1262d0899892SJason Gunthorpe 			continue;
1263d0899892SJason Gunthorpe 
1264d0899892SJason Gunthorpe 		get_device(&ib_dev->dev);
1265d0899892SJason Gunthorpe 		up_read(&devices_rwsem);
1266d0899892SJason Gunthorpe 
1267d0899892SJason Gunthorpe 		WARN_ON(!ib_dev->ops.dealloc_driver);
1268d0899892SJason Gunthorpe 		__ib_unregister_device(ib_dev);
1269d0899892SJason Gunthorpe 
1270d0899892SJason Gunthorpe 		put_device(&ib_dev->dev);
1271d0899892SJason Gunthorpe 		down_read(&devices_rwsem);
1272d0899892SJason Gunthorpe 	}
1273d0899892SJason Gunthorpe 	up_read(&devices_rwsem);
1274d0899892SJason Gunthorpe }
1275d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_driver);
1276d0899892SJason Gunthorpe 
1277d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work)
1278d0899892SJason Gunthorpe {
1279d0899892SJason Gunthorpe 	struct ib_device *ib_dev =
1280d0899892SJason Gunthorpe 		container_of(work, struct ib_device, unregistration_work);
1281d0899892SJason Gunthorpe 
1282d0899892SJason Gunthorpe 	__ib_unregister_device(ib_dev);
1283d0899892SJason Gunthorpe 	put_device(&ib_dev->dev);
1284d0899892SJason Gunthorpe }
1285d0899892SJason Gunthorpe 
1286d0899892SJason Gunthorpe /**
1287d0899892SJason Gunthorpe  * ib_unregister_device_queued - Unregister a device using a work queue
1288d0899892SJason Gunthorpe  * device: The device to unregister
1289d0899892SJason Gunthorpe  *
1290d0899892SJason Gunthorpe  * This schedules an asynchronous unregistration using a WQ for the device. A
1291d0899892SJason Gunthorpe  * driver should use this to avoid holding locks while doing unregistration,
1292d0899892SJason Gunthorpe  * such as holding the RTNL lock.
1293d0899892SJason Gunthorpe  *
1294d0899892SJason Gunthorpe  * Drivers using this API must use ib_unregister_driver before module unload
1295d0899892SJason Gunthorpe  * to ensure that all scheduled unregistrations have completed.
1296d0899892SJason Gunthorpe  */
1297d0899892SJason Gunthorpe void ib_unregister_device_queued(struct ib_device *ib_dev)
1298d0899892SJason Gunthorpe {
1299d0899892SJason Gunthorpe 	WARN_ON(!refcount_read(&ib_dev->refcount));
1300d0899892SJason Gunthorpe 	WARN_ON(!ib_dev->ops.dealloc_driver);
1301d0899892SJason Gunthorpe 	get_device(&ib_dev->dev);
1302d0899892SJason Gunthorpe 	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
1303d0899892SJason Gunthorpe 		put_device(&ib_dev->dev);
1304d0899892SJason Gunthorpe }
1305d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_queued);
1306d0899892SJason Gunthorpe 
13074e0f7b90SParav Pandit static struct pernet_operations rdma_dev_net_ops = {
13084e0f7b90SParav Pandit 	.init = rdma_dev_init_net,
13094e0f7b90SParav Pandit 	.exit = rdma_dev_exit_net,
13104e0f7b90SParav Pandit 	.id = &rdma_dev_net_id,
13114e0f7b90SParav Pandit 	.size = sizeof(struct rdma_dev_net),
13124e0f7b90SParav Pandit };
13134e0f7b90SParav Pandit 
1314e59178d8SJason Gunthorpe static int assign_client_id(struct ib_client *client)
1315e59178d8SJason Gunthorpe {
1316e59178d8SJason Gunthorpe 	int ret;
1317e59178d8SJason Gunthorpe 
1318921eab11SJason Gunthorpe 	down_write(&clients_rwsem);
1319e59178d8SJason Gunthorpe 	/*
1320e59178d8SJason Gunthorpe 	 * The add/remove callbacks must be called in FIFO/LIFO order. To
1321e59178d8SJason Gunthorpe 	 * achieve this we assign client_ids so they are sorted in
1322e59178d8SJason Gunthorpe 	 * registration order, and retain a linked list we can reverse iterate
1323e59178d8SJason Gunthorpe 	 * to get the LIFO order. The extra linked list can go away if xarray
1324e59178d8SJason Gunthorpe 	 * learns to reverse iterate.
1325e59178d8SJason Gunthorpe 	 */
1326ea295481SLinus Torvalds 	if (list_empty(&client_list)) {
1327e59178d8SJason Gunthorpe 		client->client_id = 0;
1328ea295481SLinus Torvalds 	} else {
1329ea295481SLinus Torvalds 		struct ib_client *last;
1330ea295481SLinus Torvalds 
1331ea295481SLinus Torvalds 		last = list_last_entry(&client_list, struct ib_client, list);
1332ea295481SLinus Torvalds 		client->client_id = last->client_id + 1;
1333ea295481SLinus Torvalds 	}
1334ea295481SLinus Torvalds 	ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
1335e59178d8SJason Gunthorpe 	if (ret)
1336e59178d8SJason Gunthorpe 		goto out;
1337e59178d8SJason Gunthorpe 
1338921eab11SJason Gunthorpe 	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
1339921eab11SJason Gunthorpe 	list_add_tail(&client->list, &client_list);
1340921eab11SJason Gunthorpe 
1341e59178d8SJason Gunthorpe out:
1342921eab11SJason Gunthorpe 	up_write(&clients_rwsem);
1343e59178d8SJason Gunthorpe 	return ret;
1344e59178d8SJason Gunthorpe }
1345e59178d8SJason Gunthorpe 
13461da177e4SLinus Torvalds /**
13471da177e4SLinus Torvalds  * ib_register_client - Register an IB client
13481da177e4SLinus Torvalds  * @client:Client to register
13491da177e4SLinus Torvalds  *
13501da177e4SLinus Torvalds  * Upper level users of the IB drivers can use ib_register_client() to
13511da177e4SLinus Torvalds  * register callbacks for IB device addition and removal.  When an IB
13521da177e4SLinus Torvalds  * device is added, each registered client's add method will be called
13531da177e4SLinus Torvalds  * (in the order the clients were registered), and when a device is
13541da177e4SLinus Torvalds  * removed, each client's remove method will be called (in the reverse
13551da177e4SLinus Torvalds  * order that clients were registered).  In addition, when
13561da177e4SLinus Torvalds  * ib_register_client() is called, the client will receive an add
13571da177e4SLinus Torvalds  * callback for all devices already registered.
13581da177e4SLinus Torvalds  */
13591da177e4SLinus Torvalds int ib_register_client(struct ib_client *client)
13601da177e4SLinus Torvalds {
13611da177e4SLinus Torvalds 	struct ib_device *device;
13620df91bb6SJason Gunthorpe 	unsigned long index;
1363e59178d8SJason Gunthorpe 	int ret;
13641da177e4SLinus Torvalds 
1365e59178d8SJason Gunthorpe 	ret = assign_client_id(client);
1366921eab11SJason Gunthorpe 	if (ret)
1367921eab11SJason Gunthorpe 		return ret;
1368921eab11SJason Gunthorpe 
1369921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
1370921eab11SJason Gunthorpe 	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
1371921eab11SJason Gunthorpe 		ret = add_client_context(device, client);
1372e59178d8SJason Gunthorpe 		if (ret) {
1373921eab11SJason Gunthorpe 			up_read(&devices_rwsem);
1374921eab11SJason Gunthorpe 			ib_unregister_client(client);
1375e59178d8SJason Gunthorpe 			return ret;
1376e59178d8SJason Gunthorpe 		}
1377921eab11SJason Gunthorpe 	}
1378921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
13791da177e4SLinus Torvalds 	return 0;
13801da177e4SLinus Torvalds }
13811da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_client);
13821da177e4SLinus Torvalds 
13831da177e4SLinus Torvalds /**
13841da177e4SLinus Torvalds  * ib_unregister_client - Unregister an IB client
13851da177e4SLinus Torvalds  * @client:Client to unregister
13861da177e4SLinus Torvalds  *
13871da177e4SLinus Torvalds  * Upper level users use ib_unregister_client() to remove their client
13881da177e4SLinus Torvalds  * registration.  When ib_unregister_client() is called, the client
13891da177e4SLinus Torvalds  * will receive a remove callback for each IB device still registered.
1390921eab11SJason Gunthorpe  *
1391921eab11SJason Gunthorpe  * This is a full fence, once it returns no client callbacks will be called,
1392921eab11SJason Gunthorpe  * or are running in another thread.
13931da177e4SLinus Torvalds  */
13941da177e4SLinus Torvalds void ib_unregister_client(struct ib_client *client)
13951da177e4SLinus Torvalds {
13961da177e4SLinus Torvalds 	struct ib_device *device;
13970df91bb6SJason Gunthorpe 	unsigned long index;
13981da177e4SLinus Torvalds 
1399921eab11SJason Gunthorpe 	down_write(&clients_rwsem);
1400e59178d8SJason Gunthorpe 	xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
1401921eab11SJason Gunthorpe 	up_write(&clients_rwsem);
1402921eab11SJason Gunthorpe 	/*
1403921eab11SJason Gunthorpe 	 * Every device still known must be serialized to make sure we are
1404921eab11SJason Gunthorpe 	 * done with the client callbacks before we return.
1405921eab11SJason Gunthorpe 	 */
1406921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
1407921eab11SJason Gunthorpe 	xa_for_each (&devices, index, device)
1408921eab11SJason Gunthorpe 		remove_client_context(device, client->client_id);
1409921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
14105aa44bb9SHaggai Eran 
1411921eab11SJason Gunthorpe 	down_write(&clients_rwsem);
1412e59178d8SJason Gunthorpe 	list_del(&client->list);
1413e59178d8SJason Gunthorpe 	xa_erase(&clients, client->client_id);
1414921eab11SJason Gunthorpe 	up_write(&clients_rwsem);
14151da177e4SLinus Torvalds }
14161da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_client);
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds /**
14199cd330d3SKrishna Kumar  * ib_set_client_data - Set IB client context
14201da177e4SLinus Torvalds  * @device:Device to set context for
14211da177e4SLinus Torvalds  * @client:Client to set context for
14221da177e4SLinus Torvalds  * @data:Context to set
14231da177e4SLinus Torvalds  *
14240df91bb6SJason Gunthorpe  * ib_set_client_data() sets client context data that can be retrieved with
14250df91bb6SJason Gunthorpe  * ib_get_client_data(). This can only be called while the client is
14260df91bb6SJason Gunthorpe  * registered to the device, once the ib_client remove() callback returns this
14270df91bb6SJason Gunthorpe  * cannot be called.
14281da177e4SLinus Torvalds  */
14291da177e4SLinus Torvalds void ib_set_client_data(struct ib_device *device, struct ib_client *client,
14301da177e4SLinus Torvalds 			void *data)
14311da177e4SLinus Torvalds {
14320df91bb6SJason Gunthorpe 	void *rc;
14331da177e4SLinus Torvalds 
14340df91bb6SJason Gunthorpe 	if (WARN_ON(IS_ERR(data)))
14350df91bb6SJason Gunthorpe 		data = NULL;
14361da177e4SLinus Torvalds 
14370df91bb6SJason Gunthorpe 	rc = xa_store(&device->client_data, client->client_id, data,
14380df91bb6SJason Gunthorpe 		      GFP_KERNEL);
14390df91bb6SJason Gunthorpe 	WARN_ON(xa_is_err(rc));
14401da177e4SLinus Torvalds }
14411da177e4SLinus Torvalds EXPORT_SYMBOL(ib_set_client_data);
14421da177e4SLinus Torvalds 
14431da177e4SLinus Torvalds /**
14441da177e4SLinus Torvalds  * ib_register_event_handler - Register an IB event handler
14451da177e4SLinus Torvalds  * @event_handler:Handler to register
14461da177e4SLinus Torvalds  *
14471da177e4SLinus Torvalds  * ib_register_event_handler() registers an event handler that will be
14481da177e4SLinus Torvalds  * called back when asynchronous IB events occur (as defined in
14491da177e4SLinus Torvalds  * chapter 11 of the InfiniBand Architecture Specification).  This
14501da177e4SLinus Torvalds  * callback may occur in interrupt context.
14511da177e4SLinus Torvalds  */
1452dcc9881eSLeon Romanovsky void ib_register_event_handler(struct ib_event_handler *event_handler)
14531da177e4SLinus Torvalds {
14541da177e4SLinus Torvalds 	unsigned long flags;
14551da177e4SLinus Torvalds 
14561da177e4SLinus Torvalds 	spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
14571da177e4SLinus Torvalds 	list_add_tail(&event_handler->list,
14581da177e4SLinus Torvalds 		      &event_handler->device->event_handler_list);
14591da177e4SLinus Torvalds 	spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
14601da177e4SLinus Torvalds }
14611da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_event_handler);
14621da177e4SLinus Torvalds 
14631da177e4SLinus Torvalds /**
14641da177e4SLinus Torvalds  * ib_unregister_event_handler - Unregister an event handler
14651da177e4SLinus Torvalds  * @event_handler:Handler to unregister
14661da177e4SLinus Torvalds  *
14671da177e4SLinus Torvalds  * Unregister an event handler registered with
14681da177e4SLinus Torvalds  * ib_register_event_handler().
14691da177e4SLinus Torvalds  */
1470dcc9881eSLeon Romanovsky void ib_unregister_event_handler(struct ib_event_handler *event_handler)
14711da177e4SLinus Torvalds {
14721da177e4SLinus Torvalds 	unsigned long flags;
14731da177e4SLinus Torvalds 
14741da177e4SLinus Torvalds 	spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
14751da177e4SLinus Torvalds 	list_del(&event_handler->list);
14761da177e4SLinus Torvalds 	spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
14771da177e4SLinus Torvalds }
14781da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_event_handler);
14791da177e4SLinus Torvalds 
14801da177e4SLinus Torvalds /**
14811da177e4SLinus Torvalds  * ib_dispatch_event - Dispatch an asynchronous event
14821da177e4SLinus Torvalds  * @event:Event to dispatch
14831da177e4SLinus Torvalds  *
14841da177e4SLinus Torvalds  * Low-level drivers must call ib_dispatch_event() to dispatch the
14851da177e4SLinus Torvalds  * event to all registered event handlers when an asynchronous event
14861da177e4SLinus Torvalds  * occurs.
14871da177e4SLinus Torvalds  */
14881da177e4SLinus Torvalds void ib_dispatch_event(struct ib_event *event)
14891da177e4SLinus Torvalds {
14901da177e4SLinus Torvalds 	unsigned long flags;
14911da177e4SLinus Torvalds 	struct ib_event_handler *handler;
14921da177e4SLinus Torvalds 
14931da177e4SLinus Torvalds 	spin_lock_irqsave(&event->device->event_handler_lock, flags);
14941da177e4SLinus Torvalds 
14951da177e4SLinus Torvalds 	list_for_each_entry(handler, &event->device->event_handler_list, list)
14961da177e4SLinus Torvalds 		handler->handler(handler, event);
14971da177e4SLinus Torvalds 
14981da177e4SLinus Torvalds 	spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
14991da177e4SLinus Torvalds }
15001da177e4SLinus Torvalds EXPORT_SYMBOL(ib_dispatch_event);
15011da177e4SLinus Torvalds 
15021da177e4SLinus Torvalds /**
15031da177e4SLinus Torvalds  * ib_query_port - Query IB port attributes
15041da177e4SLinus Torvalds  * @device:Device to query
15051da177e4SLinus Torvalds  * @port_num:Port number to query
15061da177e4SLinus Torvalds  * @port_attr:Port attributes
15071da177e4SLinus Torvalds  *
15081da177e4SLinus Torvalds  * ib_query_port() returns the attributes of a port through the
15091da177e4SLinus Torvalds  * @port_attr pointer.
15101da177e4SLinus Torvalds  */
15111da177e4SLinus Torvalds int ib_query_port(struct ib_device *device,
15121da177e4SLinus Torvalds 		  u8 port_num,
15131da177e4SLinus Torvalds 		  struct ib_port_attr *port_attr)
15141da177e4SLinus Torvalds {
1515fad61ad4SEli Cohen 	union ib_gid gid;
1516fad61ad4SEli Cohen 	int err;
1517fad61ad4SEli Cohen 
151824dc831bSYuval Shaia 	if (!rdma_is_port_valid(device, port_num))
1519116c0074SRoland Dreier 		return -EINVAL;
1520116c0074SRoland Dreier 
1521fad61ad4SEli Cohen 	memset(port_attr, 0, sizeof(*port_attr));
15223023a1e9SKamal Heib 	err = device->ops.query_port(device, port_num, port_attr);
1523fad61ad4SEli Cohen 	if (err || port_attr->subnet_prefix)
1524fad61ad4SEli Cohen 		return err;
1525fad61ad4SEli Cohen 
1526d7012467SEli Cohen 	if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
1527d7012467SEli Cohen 		return 0;
1528d7012467SEli Cohen 
15293023a1e9SKamal Heib 	err = device->ops.query_gid(device, port_num, 0, &gid);
1530fad61ad4SEli Cohen 	if (err)
1531fad61ad4SEli Cohen 		return err;
1532fad61ad4SEli Cohen 
1533fad61ad4SEli Cohen 	port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
1534fad61ad4SEli Cohen 	return 0;
15351da177e4SLinus Torvalds }
15361da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_port);
15371da177e4SLinus Torvalds 
1538324e227eSJason Gunthorpe static void add_ndev_hash(struct ib_port_data *pdata)
1539324e227eSJason Gunthorpe {
1540324e227eSJason Gunthorpe 	unsigned long flags;
1541324e227eSJason Gunthorpe 
1542324e227eSJason Gunthorpe 	might_sleep();
1543324e227eSJason Gunthorpe 
1544324e227eSJason Gunthorpe 	spin_lock_irqsave(&ndev_hash_lock, flags);
1545324e227eSJason Gunthorpe 	if (hash_hashed(&pdata->ndev_hash_link)) {
1546324e227eSJason Gunthorpe 		hash_del_rcu(&pdata->ndev_hash_link);
1547324e227eSJason Gunthorpe 		spin_unlock_irqrestore(&ndev_hash_lock, flags);
1548324e227eSJason Gunthorpe 		/*
1549324e227eSJason Gunthorpe 		 * We cannot do hash_add_rcu after a hash_del_rcu until the
1550324e227eSJason Gunthorpe 		 * grace period
1551324e227eSJason Gunthorpe 		 */
1552324e227eSJason Gunthorpe 		synchronize_rcu();
1553324e227eSJason Gunthorpe 		spin_lock_irqsave(&ndev_hash_lock, flags);
1554324e227eSJason Gunthorpe 	}
1555324e227eSJason Gunthorpe 	if (pdata->netdev)
1556324e227eSJason Gunthorpe 		hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
1557324e227eSJason Gunthorpe 			     (uintptr_t)pdata->netdev);
1558324e227eSJason Gunthorpe 	spin_unlock_irqrestore(&ndev_hash_lock, flags);
1559324e227eSJason Gunthorpe }
1560324e227eSJason Gunthorpe 
15611da177e4SLinus Torvalds /**
1562c2261dd7SJason Gunthorpe  * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
1563c2261dd7SJason Gunthorpe  * @ib_dev: Device to modify
1564c2261dd7SJason Gunthorpe  * @ndev: net_device to affiliate, may be NULL
1565c2261dd7SJason Gunthorpe  * @port: IB port the net_device is connected to
1566c2261dd7SJason Gunthorpe  *
1567c2261dd7SJason Gunthorpe  * Drivers should use this to link the ib_device to a netdev so the netdev
1568c2261dd7SJason Gunthorpe  * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be
1569c2261dd7SJason Gunthorpe  * affiliated with any port.
1570c2261dd7SJason Gunthorpe  *
1571c2261dd7SJason Gunthorpe  * The caller must ensure that the given ndev is not unregistered or
1572c2261dd7SJason Gunthorpe  * unregistering, and that either the ib_device is unregistered or
1573c2261dd7SJason Gunthorpe  * ib_device_set_netdev() is called with NULL when the ndev sends a
1574c2261dd7SJason Gunthorpe  * NETDEV_UNREGISTER event.
1575c2261dd7SJason Gunthorpe  */
1576c2261dd7SJason Gunthorpe int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
1577c2261dd7SJason Gunthorpe 			 unsigned int port)
1578c2261dd7SJason Gunthorpe {
1579c2261dd7SJason Gunthorpe 	struct net_device *old_ndev;
1580c2261dd7SJason Gunthorpe 	struct ib_port_data *pdata;
1581c2261dd7SJason Gunthorpe 	unsigned long flags;
1582c2261dd7SJason Gunthorpe 	int ret;
1583c2261dd7SJason Gunthorpe 
1584c2261dd7SJason Gunthorpe 	/*
1585c2261dd7SJason Gunthorpe 	 * Drivers wish to call this before ib_register_driver, so we have to
1586c2261dd7SJason Gunthorpe 	 * setup the port data early.
1587c2261dd7SJason Gunthorpe 	 */
1588c2261dd7SJason Gunthorpe 	ret = alloc_port_data(ib_dev);
1589c2261dd7SJason Gunthorpe 	if (ret)
1590c2261dd7SJason Gunthorpe 		return ret;
1591c2261dd7SJason Gunthorpe 
1592c2261dd7SJason Gunthorpe 	if (!rdma_is_port_valid(ib_dev, port))
1593c2261dd7SJason Gunthorpe 		return -EINVAL;
1594c2261dd7SJason Gunthorpe 
1595c2261dd7SJason Gunthorpe 	pdata = &ib_dev->port_data[port];
1596c2261dd7SJason Gunthorpe 	spin_lock_irqsave(&pdata->netdev_lock, flags);
1597324e227eSJason Gunthorpe 	old_ndev = rcu_dereference_protected(
1598324e227eSJason Gunthorpe 		pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
1599324e227eSJason Gunthorpe 	if (old_ndev == ndev) {
1600c2261dd7SJason Gunthorpe 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1601c2261dd7SJason Gunthorpe 		return 0;
1602c2261dd7SJason Gunthorpe 	}
1603c2261dd7SJason Gunthorpe 
1604c2261dd7SJason Gunthorpe 	if (ndev)
1605c2261dd7SJason Gunthorpe 		dev_hold(ndev);
1606324e227eSJason Gunthorpe 	rcu_assign_pointer(pdata->netdev, ndev);
1607c2261dd7SJason Gunthorpe 	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1608c2261dd7SJason Gunthorpe 
1609324e227eSJason Gunthorpe 	add_ndev_hash(pdata);
1610c2261dd7SJason Gunthorpe 	if (old_ndev)
1611c2261dd7SJason Gunthorpe 		dev_put(old_ndev);
1612c2261dd7SJason Gunthorpe 
1613c2261dd7SJason Gunthorpe 	return 0;
1614c2261dd7SJason Gunthorpe }
1615c2261dd7SJason Gunthorpe EXPORT_SYMBOL(ib_device_set_netdev);
1616c2261dd7SJason Gunthorpe 
1617c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev)
1618c2261dd7SJason Gunthorpe {
1619c2261dd7SJason Gunthorpe 	unsigned long flags;
1620c2261dd7SJason Gunthorpe 	unsigned int port;
1621c2261dd7SJason Gunthorpe 
1622c2261dd7SJason Gunthorpe 	rdma_for_each_port (ib_dev, port) {
1623c2261dd7SJason Gunthorpe 		struct ib_port_data *pdata = &ib_dev->port_data[port];
1624324e227eSJason Gunthorpe 		struct net_device *ndev;
1625c2261dd7SJason Gunthorpe 
1626c2261dd7SJason Gunthorpe 		spin_lock_irqsave(&pdata->netdev_lock, flags);
1627324e227eSJason Gunthorpe 		ndev = rcu_dereference_protected(
1628324e227eSJason Gunthorpe 			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
1629324e227eSJason Gunthorpe 		if (ndev) {
1630324e227eSJason Gunthorpe 			spin_lock(&ndev_hash_lock);
1631324e227eSJason Gunthorpe 			hash_del_rcu(&pdata->ndev_hash_link);
1632324e227eSJason Gunthorpe 			spin_unlock(&ndev_hash_lock);
1633324e227eSJason Gunthorpe 
1634324e227eSJason Gunthorpe 			/*
1635324e227eSJason Gunthorpe 			 * If this is the last dev_put there is still a
1636324e227eSJason Gunthorpe 			 * synchronize_rcu before the netdev is kfreed, so we
1637324e227eSJason Gunthorpe 			 * can continue to rely on unlocked pointer
1638324e227eSJason Gunthorpe 			 * comparisons after the put
1639324e227eSJason Gunthorpe 			 */
1640324e227eSJason Gunthorpe 			rcu_assign_pointer(pdata->netdev, NULL);
1641324e227eSJason Gunthorpe 			dev_put(ndev);
1642c2261dd7SJason Gunthorpe 		}
1643c2261dd7SJason Gunthorpe 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1644c2261dd7SJason Gunthorpe 	}
1645c2261dd7SJason Gunthorpe }
1646c2261dd7SJason Gunthorpe 
1647c2261dd7SJason Gunthorpe struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
1648c2261dd7SJason Gunthorpe 					unsigned int port)
1649c2261dd7SJason Gunthorpe {
1650c2261dd7SJason Gunthorpe 	struct ib_port_data *pdata;
1651c2261dd7SJason Gunthorpe 	struct net_device *res;
1652c2261dd7SJason Gunthorpe 
1653c2261dd7SJason Gunthorpe 	if (!rdma_is_port_valid(ib_dev, port))
1654c2261dd7SJason Gunthorpe 		return NULL;
1655c2261dd7SJason Gunthorpe 
1656c2261dd7SJason Gunthorpe 	pdata = &ib_dev->port_data[port];
1657c2261dd7SJason Gunthorpe 
1658c2261dd7SJason Gunthorpe 	/*
1659c2261dd7SJason Gunthorpe 	 * New drivers should use ib_device_set_netdev() not the legacy
1660c2261dd7SJason Gunthorpe 	 * get_netdev().
1661c2261dd7SJason Gunthorpe 	 */
1662c2261dd7SJason Gunthorpe 	if (ib_dev->ops.get_netdev)
1663c2261dd7SJason Gunthorpe 		res = ib_dev->ops.get_netdev(ib_dev, port);
1664c2261dd7SJason Gunthorpe 	else {
1665c2261dd7SJason Gunthorpe 		spin_lock(&pdata->netdev_lock);
1666324e227eSJason Gunthorpe 		res = rcu_dereference_protected(
1667324e227eSJason Gunthorpe 			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
1668c2261dd7SJason Gunthorpe 		if (res)
1669c2261dd7SJason Gunthorpe 			dev_hold(res);
1670c2261dd7SJason Gunthorpe 		spin_unlock(&pdata->netdev_lock);
1671c2261dd7SJason Gunthorpe 	}
1672c2261dd7SJason Gunthorpe 
1673c2261dd7SJason Gunthorpe 	/*
1674c2261dd7SJason Gunthorpe 	 * If we are starting to unregister expedite things by preventing
1675c2261dd7SJason Gunthorpe 	 * propagation of an unregistering netdev.
1676c2261dd7SJason Gunthorpe 	 */
1677c2261dd7SJason Gunthorpe 	if (res && res->reg_state != NETREG_REGISTERED) {
1678c2261dd7SJason Gunthorpe 		dev_put(res);
1679c2261dd7SJason Gunthorpe 		return NULL;
1680c2261dd7SJason Gunthorpe 	}
1681c2261dd7SJason Gunthorpe 
1682c2261dd7SJason Gunthorpe 	return res;
1683c2261dd7SJason Gunthorpe }
1684c2261dd7SJason Gunthorpe 
1685c2261dd7SJason Gunthorpe /**
1686324e227eSJason Gunthorpe  * ib_device_get_by_netdev - Find an IB device associated with a netdev
1687324e227eSJason Gunthorpe  * @ndev: netdev to locate
1688324e227eSJason Gunthorpe  * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
1689324e227eSJason Gunthorpe  *
1690324e227eSJason Gunthorpe  * Find and hold an ib_device that is associated with a netdev via
1691324e227eSJason Gunthorpe  * ib_device_set_netdev(). The caller must call ib_device_put() on the
1692324e227eSJason Gunthorpe  * returned pointer.
1693324e227eSJason Gunthorpe  */
1694324e227eSJason Gunthorpe struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
1695324e227eSJason Gunthorpe 					  enum rdma_driver_id driver_id)
1696324e227eSJason Gunthorpe {
1697324e227eSJason Gunthorpe 	struct ib_device *res = NULL;
1698324e227eSJason Gunthorpe 	struct ib_port_data *cur;
1699324e227eSJason Gunthorpe 
1700324e227eSJason Gunthorpe 	rcu_read_lock();
1701324e227eSJason Gunthorpe 	hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
1702324e227eSJason Gunthorpe 				    (uintptr_t)ndev) {
1703324e227eSJason Gunthorpe 		if (rcu_access_pointer(cur->netdev) == ndev &&
1704324e227eSJason Gunthorpe 		    (driver_id == RDMA_DRIVER_UNKNOWN ||
1705324e227eSJason Gunthorpe 		     cur->ib_dev->driver_id == driver_id) &&
1706324e227eSJason Gunthorpe 		    ib_device_try_get(cur->ib_dev)) {
1707324e227eSJason Gunthorpe 			res = cur->ib_dev;
1708324e227eSJason Gunthorpe 			break;
1709324e227eSJason Gunthorpe 		}
1710324e227eSJason Gunthorpe 	}
1711324e227eSJason Gunthorpe 	rcu_read_unlock();
1712324e227eSJason Gunthorpe 
1713324e227eSJason Gunthorpe 	return res;
1714324e227eSJason Gunthorpe }
1715324e227eSJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_netdev);
1716324e227eSJason Gunthorpe 
1717324e227eSJason Gunthorpe /**
171803db3a2dSMatan Barak  * ib_enum_roce_netdev - enumerate all RoCE ports
171903db3a2dSMatan Barak  * @ib_dev : IB device we want to query
172003db3a2dSMatan Barak  * @filter: Should we call the callback?
172103db3a2dSMatan Barak  * @filter_cookie: Cookie passed to filter
172203db3a2dSMatan Barak  * @cb: Callback to call for each found RoCE ports
172303db3a2dSMatan Barak  * @cookie: Cookie passed back to the callback
172403db3a2dSMatan Barak  *
172503db3a2dSMatan Barak  * Enumerates all of the physical RoCE ports of ib_dev
172603db3a2dSMatan Barak  * which are related to netdevice and calls callback() on each
172703db3a2dSMatan Barak  * device for which filter() function returns non zero.
172803db3a2dSMatan Barak  */
172903db3a2dSMatan Barak void ib_enum_roce_netdev(struct ib_device *ib_dev,
173003db3a2dSMatan Barak 			 roce_netdev_filter filter,
173103db3a2dSMatan Barak 			 void *filter_cookie,
173203db3a2dSMatan Barak 			 roce_netdev_callback cb,
173303db3a2dSMatan Barak 			 void *cookie)
173403db3a2dSMatan Barak {
1735ea1075edSJason Gunthorpe 	unsigned int port;
173603db3a2dSMatan Barak 
1737ea1075edSJason Gunthorpe 	rdma_for_each_port (ib_dev, port)
173803db3a2dSMatan Barak 		if (rdma_protocol_roce(ib_dev, port)) {
1739c2261dd7SJason Gunthorpe 			struct net_device *idev =
1740c2261dd7SJason Gunthorpe 				ib_device_get_netdev(ib_dev, port);
174103db3a2dSMatan Barak 
174203db3a2dSMatan Barak 			if (filter(ib_dev, port, idev, filter_cookie))
174303db3a2dSMatan Barak 				cb(ib_dev, port, idev, cookie);
174403db3a2dSMatan Barak 
174503db3a2dSMatan Barak 			if (idev)
174603db3a2dSMatan Barak 				dev_put(idev);
174703db3a2dSMatan Barak 		}
174803db3a2dSMatan Barak }
174903db3a2dSMatan Barak 
175003db3a2dSMatan Barak /**
175103db3a2dSMatan Barak  * ib_enum_all_roce_netdevs - enumerate all RoCE devices
175203db3a2dSMatan Barak  * @filter: Should we call the callback?
175303db3a2dSMatan Barak  * @filter_cookie: Cookie passed to filter
175403db3a2dSMatan Barak  * @cb: Callback to call for each found RoCE ports
175503db3a2dSMatan Barak  * @cookie: Cookie passed back to the callback
175603db3a2dSMatan Barak  *
175703db3a2dSMatan Barak  * Enumerates all RoCE devices' physical ports which are related
175803db3a2dSMatan Barak  * to netdevices and calls callback() on each device for which
175903db3a2dSMatan Barak  * filter() function returns non zero.
176003db3a2dSMatan Barak  */
176103db3a2dSMatan Barak void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
176203db3a2dSMatan Barak 			      void *filter_cookie,
176303db3a2dSMatan Barak 			      roce_netdev_callback cb,
176403db3a2dSMatan Barak 			      void *cookie)
176503db3a2dSMatan Barak {
176603db3a2dSMatan Barak 	struct ib_device *dev;
17670df91bb6SJason Gunthorpe 	unsigned long index;
176803db3a2dSMatan Barak 
1769921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
17700df91bb6SJason Gunthorpe 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
177103db3a2dSMatan Barak 		ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
1772921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
177303db3a2dSMatan Barak }
177403db3a2dSMatan Barak 
177503db3a2dSMatan Barak /**
17768030c835SLeon Romanovsky  * ib_enum_all_devs - enumerate all ib_devices
17778030c835SLeon Romanovsky  * @cb: Callback to call for each found ib_device
17788030c835SLeon Romanovsky  *
17798030c835SLeon Romanovsky  * Enumerates all ib_devices and calls callback() on each device.
17808030c835SLeon Romanovsky  */
17818030c835SLeon Romanovsky int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
17828030c835SLeon Romanovsky 		     struct netlink_callback *cb)
17838030c835SLeon Romanovsky {
17840df91bb6SJason Gunthorpe 	unsigned long index;
17858030c835SLeon Romanovsky 	struct ib_device *dev;
17868030c835SLeon Romanovsky 	unsigned int idx = 0;
17878030c835SLeon Romanovsky 	int ret = 0;
17888030c835SLeon Romanovsky 
1789921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
17900df91bb6SJason Gunthorpe 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
17918030c835SLeon Romanovsky 		ret = nldev_cb(dev, skb, cb, idx);
17928030c835SLeon Romanovsky 		if (ret)
17938030c835SLeon Romanovsky 			break;
17948030c835SLeon Romanovsky 		idx++;
17958030c835SLeon Romanovsky 	}
1796921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
17978030c835SLeon Romanovsky 	return ret;
17988030c835SLeon Romanovsky }
17998030c835SLeon Romanovsky 
18008030c835SLeon Romanovsky /**
18011da177e4SLinus Torvalds  * ib_query_pkey - Get P_Key table entry
18021da177e4SLinus Torvalds  * @device:Device to query
18031da177e4SLinus Torvalds  * @port_num:Port number to query
18041da177e4SLinus Torvalds  * @index:P_Key table index to query
18051da177e4SLinus Torvalds  * @pkey:Returned P_Key
18061da177e4SLinus Torvalds  *
18071da177e4SLinus Torvalds  * ib_query_pkey() fetches the specified P_Key table entry.
18081da177e4SLinus Torvalds  */
18091da177e4SLinus Torvalds int ib_query_pkey(struct ib_device *device,
18101da177e4SLinus Torvalds 		  u8 port_num, u16 index, u16 *pkey)
18111da177e4SLinus Torvalds {
18129af3f5cfSYuval Shaia 	if (!rdma_is_port_valid(device, port_num))
18139af3f5cfSYuval Shaia 		return -EINVAL;
18149af3f5cfSYuval Shaia 
18153023a1e9SKamal Heib 	return device->ops.query_pkey(device, port_num, index, pkey);
18161da177e4SLinus Torvalds }
18171da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_pkey);
18181da177e4SLinus Torvalds 
18191da177e4SLinus Torvalds /**
18201da177e4SLinus Torvalds  * ib_modify_device - Change IB device attributes
18211da177e4SLinus Torvalds  * @device:Device to modify
18221da177e4SLinus Torvalds  * @device_modify_mask:Mask of attributes to change
18231da177e4SLinus Torvalds  * @device_modify:New attribute values
18241da177e4SLinus Torvalds  *
18251da177e4SLinus Torvalds  * ib_modify_device() changes a device's attributes as specified by
18261da177e4SLinus Torvalds  * the @device_modify_mask and @device_modify structure.
18271da177e4SLinus Torvalds  */
18281da177e4SLinus Torvalds int ib_modify_device(struct ib_device *device,
18291da177e4SLinus Torvalds 		     int device_modify_mask,
18301da177e4SLinus Torvalds 		     struct ib_device_modify *device_modify)
18311da177e4SLinus Torvalds {
18323023a1e9SKamal Heib 	if (!device->ops.modify_device)
183310e1b54bSBart Van Assche 		return -ENOSYS;
183410e1b54bSBart Van Assche 
18353023a1e9SKamal Heib 	return device->ops.modify_device(device, device_modify_mask,
18361da177e4SLinus Torvalds 					 device_modify);
18371da177e4SLinus Torvalds }
18381da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_device);
18391da177e4SLinus Torvalds 
18401da177e4SLinus Torvalds /**
18411da177e4SLinus Torvalds  * ib_modify_port - Modifies the attributes for the specified port.
18421da177e4SLinus Torvalds  * @device: The device to modify.
18431da177e4SLinus Torvalds  * @port_num: The number of the port to modify.
18441da177e4SLinus Torvalds  * @port_modify_mask: Mask used to specify which attributes of the port
18451da177e4SLinus Torvalds  *   to change.
18461da177e4SLinus Torvalds  * @port_modify: New attribute values for the port.
18471da177e4SLinus Torvalds  *
18481da177e4SLinus Torvalds  * ib_modify_port() changes a port's attributes as specified by the
18491da177e4SLinus Torvalds  * @port_modify_mask and @port_modify structure.
18501da177e4SLinus Torvalds  */
18511da177e4SLinus Torvalds int ib_modify_port(struct ib_device *device,
18521da177e4SLinus Torvalds 		   u8 port_num, int port_modify_mask,
18531da177e4SLinus Torvalds 		   struct ib_port_modify *port_modify)
18541da177e4SLinus Torvalds {
185561e0962dSSelvin Xavier 	int rc;
185610e1b54bSBart Van Assche 
185724dc831bSYuval Shaia 	if (!rdma_is_port_valid(device, port_num))
1858116c0074SRoland Dreier 		return -EINVAL;
1859116c0074SRoland Dreier 
18603023a1e9SKamal Heib 	if (device->ops.modify_port)
18613023a1e9SKamal Heib 		rc = device->ops.modify_port(device, port_num,
18623023a1e9SKamal Heib 					     port_modify_mask,
18631da177e4SLinus Torvalds 					     port_modify);
186461e0962dSSelvin Xavier 	else
186561e0962dSSelvin Xavier 		rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
186661e0962dSSelvin Xavier 	return rc;
18671da177e4SLinus Torvalds }
18681da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_port);
18691da177e4SLinus Torvalds 
18705eb620c8SYosef Etigin /**
18715eb620c8SYosef Etigin  * ib_find_gid - Returns the port number and GID table index where
1872dbb12562SParav Pandit  *   a specified GID value occurs. Its searches only for IB link layer.
18735eb620c8SYosef Etigin  * @device: The device to query.
18745eb620c8SYosef Etigin  * @gid: The GID value to search for.
18755eb620c8SYosef Etigin  * @port_num: The port number of the device where the GID value was found.
18765eb620c8SYosef Etigin  * @index: The index into the GID table where the GID was found.  This
18775eb620c8SYosef Etigin  *   parameter may be NULL.
18785eb620c8SYosef Etigin  */
18795eb620c8SYosef Etigin int ib_find_gid(struct ib_device *device, union ib_gid *gid,
1880b26c4a11SParav Pandit 		u8 *port_num, u16 *index)
18815eb620c8SYosef Etigin {
18825eb620c8SYosef Etigin 	union ib_gid tmp_gid;
1883ea1075edSJason Gunthorpe 	unsigned int port;
1884ea1075edSJason Gunthorpe 	int ret, i;
18855eb620c8SYosef Etigin 
1886ea1075edSJason Gunthorpe 	rdma_for_each_port (device, port) {
188722d24f75SParav Pandit 		if (!rdma_protocol_ib(device, port))
1888b39ffa1dSMatan Barak 			continue;
1889b39ffa1dSMatan Barak 
18908ceb1357SJason Gunthorpe 		for (i = 0; i < device->port_data[port].immutable.gid_tbl_len;
18918ceb1357SJason Gunthorpe 		     ++i) {
18921dfce294SParav Pandit 			ret = rdma_query_gid(device, port, i, &tmp_gid);
18935eb620c8SYosef Etigin 			if (ret)
18945eb620c8SYosef Etigin 				return ret;
18955eb620c8SYosef Etigin 			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
18965eb620c8SYosef Etigin 				*port_num = port;
18975eb620c8SYosef Etigin 				if (index)
18985eb620c8SYosef Etigin 					*index = i;
18995eb620c8SYosef Etigin 				return 0;
19005eb620c8SYosef Etigin 			}
19015eb620c8SYosef Etigin 		}
19025eb620c8SYosef Etigin 	}
19035eb620c8SYosef Etigin 
19045eb620c8SYosef Etigin 	return -ENOENT;
19055eb620c8SYosef Etigin }
19065eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_gid);
19075eb620c8SYosef Etigin 
19085eb620c8SYosef Etigin /**
19095eb620c8SYosef Etigin  * ib_find_pkey - Returns the PKey table index where a specified
19105eb620c8SYosef Etigin  *   PKey value occurs.
19115eb620c8SYosef Etigin  * @device: The device to query.
19125eb620c8SYosef Etigin  * @port_num: The port number of the device to search for the PKey.
19135eb620c8SYosef Etigin  * @pkey: The PKey value to search for.
19145eb620c8SYosef Etigin  * @index: The index into the PKey table where the PKey was found.
19155eb620c8SYosef Etigin  */
19165eb620c8SYosef Etigin int ib_find_pkey(struct ib_device *device,
19175eb620c8SYosef Etigin 		 u8 port_num, u16 pkey, u16 *index)
19185eb620c8SYosef Etigin {
19195eb620c8SYosef Etigin 	int ret, i;
19205eb620c8SYosef Etigin 	u16 tmp_pkey;
1921ff7166c4SJack Morgenstein 	int partial_ix = -1;
19225eb620c8SYosef Etigin 
19238ceb1357SJason Gunthorpe 	for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len;
19248ceb1357SJason Gunthorpe 	     ++i) {
19255eb620c8SYosef Etigin 		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
19265eb620c8SYosef Etigin 		if (ret)
19275eb620c8SYosef Etigin 			return ret;
192836026eccSMoni Shoua 		if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
1929ff7166c4SJack Morgenstein 			/* if there is full-member pkey take it.*/
1930ff7166c4SJack Morgenstein 			if (tmp_pkey & 0x8000) {
19315eb620c8SYosef Etigin 				*index = i;
19325eb620c8SYosef Etigin 				return 0;
19335eb620c8SYosef Etigin 			}
1934ff7166c4SJack Morgenstein 			if (partial_ix < 0)
1935ff7166c4SJack Morgenstein 				partial_ix = i;
1936ff7166c4SJack Morgenstein 		}
19375eb620c8SYosef Etigin 	}
19385eb620c8SYosef Etigin 
1939ff7166c4SJack Morgenstein 	/*no full-member, if exists take the limited*/
1940ff7166c4SJack Morgenstein 	if (partial_ix >= 0) {
1941ff7166c4SJack Morgenstein 		*index = partial_ix;
1942ff7166c4SJack Morgenstein 		return 0;
1943ff7166c4SJack Morgenstein 	}
19445eb620c8SYosef Etigin 	return -ENOENT;
19455eb620c8SYosef Etigin }
19465eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_pkey);
19475eb620c8SYosef Etigin 
19489268f72dSYotam Kenneth /**
19499268f72dSYotam Kenneth  * ib_get_net_dev_by_params() - Return the appropriate net_dev
19509268f72dSYotam Kenneth  * for a received CM request
19519268f72dSYotam Kenneth  * @dev:	An RDMA device on which the request has been received.
19529268f72dSYotam Kenneth  * @port:	Port number on the RDMA device.
19539268f72dSYotam Kenneth  * @pkey:	The Pkey the request came on.
19549268f72dSYotam Kenneth  * @gid:	A GID that the net_dev uses to communicate.
19559268f72dSYotam Kenneth  * @addr:	Contains the IP address that the request specified as its
19569268f72dSYotam Kenneth  *		destination.
1957921eab11SJason Gunthorpe  *
19589268f72dSYotam Kenneth  */
19599268f72dSYotam Kenneth struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
19609268f72dSYotam Kenneth 					    u8 port,
19619268f72dSYotam Kenneth 					    u16 pkey,
19629268f72dSYotam Kenneth 					    const union ib_gid *gid,
19639268f72dSYotam Kenneth 					    const struct sockaddr *addr)
19649268f72dSYotam Kenneth {
19659268f72dSYotam Kenneth 	struct net_device *net_dev = NULL;
19660df91bb6SJason Gunthorpe 	unsigned long index;
19670df91bb6SJason Gunthorpe 	void *client_data;
19689268f72dSYotam Kenneth 
19699268f72dSYotam Kenneth 	if (!rdma_protocol_ib(dev, port))
19709268f72dSYotam Kenneth 		return NULL;
19719268f72dSYotam Kenneth 
1972921eab11SJason Gunthorpe 	/*
1973921eab11SJason Gunthorpe 	 * Holding the read side guarantees that the client will not become
1974921eab11SJason Gunthorpe 	 * unregistered while we are calling get_net_dev_by_params()
1975921eab11SJason Gunthorpe 	 */
1976921eab11SJason Gunthorpe 	down_read(&dev->client_data_rwsem);
19770df91bb6SJason Gunthorpe 	xan_for_each_marked (&dev->client_data, index, client_data,
19780df91bb6SJason Gunthorpe 			     CLIENT_DATA_REGISTERED) {
19790df91bb6SJason Gunthorpe 		struct ib_client *client = xa_load(&clients, index);
19809268f72dSYotam Kenneth 
19810df91bb6SJason Gunthorpe 		if (!client || !client->get_net_dev_by_params)
19829268f72dSYotam Kenneth 			continue;
19839268f72dSYotam Kenneth 
19840df91bb6SJason Gunthorpe 		net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
19850df91bb6SJason Gunthorpe 							addr, client_data);
19869268f72dSYotam Kenneth 		if (net_dev)
19879268f72dSYotam Kenneth 			break;
19889268f72dSYotam Kenneth 	}
1989921eab11SJason Gunthorpe 	up_read(&dev->client_data_rwsem);
19909268f72dSYotam Kenneth 
19919268f72dSYotam Kenneth 	return net_dev;
19929268f72dSYotam Kenneth }
19939268f72dSYotam Kenneth EXPORT_SYMBOL(ib_get_net_dev_by_params);
19949268f72dSYotam Kenneth 
1995521ed0d9SKamal Heib void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
1996521ed0d9SKamal Heib {
19973023a1e9SKamal Heib 	struct ib_device_ops *dev_ops = &dev->ops;
1998521ed0d9SKamal Heib #define SET_DEVICE_OP(ptr, name)                                               \
1999521ed0d9SKamal Heib 	do {                                                                   \
2000521ed0d9SKamal Heib 		if (ops->name)                                                 \
2001521ed0d9SKamal Heib 			if (!((ptr)->name))				       \
2002521ed0d9SKamal Heib 				(ptr)->name = ops->name;                       \
2003521ed0d9SKamal Heib 	} while (0)
2004521ed0d9SKamal Heib 
200530471d4bSLeon Romanovsky #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
200630471d4bSLeon Romanovsky 
20073023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, add_gid);
20082f1927b0SMoni Shoua 	SET_DEVICE_OP(dev_ops, advise_mr);
20093023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_dm);
20103023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_fmr);
20113023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_hw_stats);
20123023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_mr);
20133023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_mw);
20143023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_pd);
20153023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
20163023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_ucontext);
20173023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_xrcd);
20183023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, attach_mcast);
20193023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, check_mr_status);
20203023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_ah);
20213023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_counters);
20223023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_cq);
20233023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_flow);
20243023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_flow_action_esp);
20253023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_qp);
20263023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
20273023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_srq);
20283023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_wq);
20293023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_dm);
2030d0899892SJason Gunthorpe 	SET_DEVICE_OP(dev_ops, dealloc_driver);
20313023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_fmr);
20323023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_mw);
20333023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_pd);
20343023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_ucontext);
20353023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_xrcd);
20363023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, del_gid);
20373023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dereg_mr);
20383023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_ah);
20393023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_counters);
20403023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_cq);
20413023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_flow);
20423023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_flow_action);
20433023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_qp);
20443023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
20453023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_srq);
20463023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_wq);
20473023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, detach_mcast);
20483023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, disassociate_ucontext);
20493023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, drain_rq);
20503023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, drain_sq);
2051ca22354bSJason Gunthorpe 	SET_DEVICE_OP(dev_ops, enable_driver);
205202da3750SLeon Romanovsky 	SET_DEVICE_OP(dev_ops, fill_res_entry);
20533023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_dev_fw_str);
20543023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_dma_mr);
20553023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_hw_stats);
20563023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_link_layer);
20573023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_netdev);
20583023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_port_immutable);
20593023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_vector_affinity);
20603023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_vf_config);
20613023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_vf_stats);
2062ea4baf7fSParav Pandit 	SET_DEVICE_OP(dev_ops, init_port);
20633023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, map_mr_sg);
20643023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, map_phys_fmr);
20653023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, mmap);
20663023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_ah);
20673023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_cq);
20683023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_device);
20693023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
20703023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_port);
20713023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_qp);
20723023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_srq);
20733023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_wq);
20743023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, peek_cq);
20753023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, poll_cq);
20763023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, post_recv);
20773023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, post_send);
20783023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, post_srq_recv);
20793023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, process_mad);
20803023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_ah);
20813023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_device);
20823023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_gid);
20833023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_pkey);
20843023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_port);
20853023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_qp);
20863023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_srq);
20873023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
20883023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, read_counters);
20893023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, reg_dm_mr);
20903023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, reg_user_mr);
20913023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, req_ncomp_notif);
20923023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, req_notify_cq);
20933023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, rereg_user_mr);
20943023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, resize_cq);
20953023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, set_vf_guid);
20963023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, set_vf_link_state);
20973023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, unmap_fmr);
209821a428a0SLeon Romanovsky 
209921a428a0SLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_pd);
2100a2a074efSLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_ucontext);
2101521ed0d9SKamal Heib }
2102521ed0d9SKamal Heib EXPORT_SYMBOL(ib_set_device_ops);
2103521ed0d9SKamal Heib 
2104d0e312feSLeon Romanovsky static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
2105735c631aSMark Bloch 	[RDMA_NL_LS_OP_RESOLVE] = {
2106647c75acSLeon Romanovsky 		.doit = ib_nl_handle_resolve_resp,
2107e3a2b93dSLeon Romanovsky 		.flags = RDMA_NL_ADMIN_PERM,
2108e3a2b93dSLeon Romanovsky 	},
2109735c631aSMark Bloch 	[RDMA_NL_LS_OP_SET_TIMEOUT] = {
2110647c75acSLeon Romanovsky 		.doit = ib_nl_handle_set_timeout,
2111e3a2b93dSLeon Romanovsky 		.flags = RDMA_NL_ADMIN_PERM,
2112e3a2b93dSLeon Romanovsky 	},
2113ae43f828SMark Bloch 	[RDMA_NL_LS_OP_IP_RESOLVE] = {
2114647c75acSLeon Romanovsky 		.doit = ib_nl_handle_ip_res_resp,
2115e3a2b93dSLeon Romanovsky 		.flags = RDMA_NL_ADMIN_PERM,
2116e3a2b93dSLeon Romanovsky 	},
2117735c631aSMark Bloch };
2118735c631aSMark Bloch 
21191da177e4SLinus Torvalds static int __init ib_core_init(void)
21201da177e4SLinus Torvalds {
21211da177e4SLinus Torvalds 	int ret;
21221da177e4SLinus Torvalds 
2123f0626710STejun Heo 	ib_wq = alloc_workqueue("infiniband", 0, 0);
2124f0626710STejun Heo 	if (!ib_wq)
2125f0626710STejun Heo 		return -ENOMEM;
2126f0626710STejun Heo 
212714d3a3b2SChristoph Hellwig 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
2128b7363e67SSagi Grimberg 			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
212914d3a3b2SChristoph Hellwig 	if (!ib_comp_wq) {
213014d3a3b2SChristoph Hellwig 		ret = -ENOMEM;
213114d3a3b2SChristoph Hellwig 		goto err;
213214d3a3b2SChristoph Hellwig 	}
213314d3a3b2SChristoph Hellwig 
2134f794809aSJack Morgenstein 	ib_comp_unbound_wq =
2135f794809aSJack Morgenstein 		alloc_workqueue("ib-comp-unb-wq",
2136f794809aSJack Morgenstein 				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
2137f794809aSJack Morgenstein 				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
2138f794809aSJack Morgenstein 	if (!ib_comp_unbound_wq) {
2139f794809aSJack Morgenstein 		ret = -ENOMEM;
2140f794809aSJack Morgenstein 		goto err_comp;
2141f794809aSJack Morgenstein 	}
2142f794809aSJack Morgenstein 
214355aeed06SJason Gunthorpe 	ret = class_register(&ib_class);
2144fd75c789SNir Muchtar 	if (ret) {
2145aba25a3eSParav Pandit 		pr_warn("Couldn't create InfiniBand device class\n");
2146f794809aSJack Morgenstein 		goto err_comp_unbound;
2147fd75c789SNir Muchtar 	}
21481da177e4SLinus Torvalds 
2149c9901724SLeon Romanovsky 	ret = rdma_nl_init();
21501da177e4SLinus Torvalds 	if (ret) {
2151c9901724SLeon Romanovsky 		pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
2152fd75c789SNir Muchtar 		goto err_sysfs;
21531da177e4SLinus Torvalds 	}
21541da177e4SLinus Torvalds 
2155e3f20f02SLeon Romanovsky 	ret = addr_init();
2156e3f20f02SLeon Romanovsky 	if (ret) {
2157e3f20f02SLeon Romanovsky 		pr_warn("Could't init IB address resolution\n");
2158e3f20f02SLeon Romanovsky 		goto err_ibnl;
2159e3f20f02SLeon Romanovsky 	}
2160e3f20f02SLeon Romanovsky 
21614c2cb422SMark Bloch 	ret = ib_mad_init();
21624c2cb422SMark Bloch 	if (ret) {
21634c2cb422SMark Bloch 		pr_warn("Couldn't init IB MAD\n");
21644c2cb422SMark Bloch 		goto err_addr;
21654c2cb422SMark Bloch 	}
21664c2cb422SMark Bloch 
2167c2e49c92SMark Bloch 	ret = ib_sa_init();
2168c2e49c92SMark Bloch 	if (ret) {
2169c2e49c92SMark Bloch 		pr_warn("Couldn't init SA\n");
2170c2e49c92SMark Bloch 		goto err_mad;
2171c2e49c92SMark Bloch 	}
2172c2e49c92SMark Bloch 
21738f408ab6SDaniel Jurgens 	ret = register_lsm_notifier(&ibdev_lsm_nb);
21748f408ab6SDaniel Jurgens 	if (ret) {
21758f408ab6SDaniel Jurgens 		pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
2176c9901724SLeon Romanovsky 		goto err_sa;
21778f408ab6SDaniel Jurgens 	}
21788f408ab6SDaniel Jurgens 
21794e0f7b90SParav Pandit 	ret = register_pernet_device(&rdma_dev_net_ops);
21804e0f7b90SParav Pandit 	if (ret) {
21814e0f7b90SParav Pandit 		pr_warn("Couldn't init compat dev. ret %d\n", ret);
21824e0f7b90SParav Pandit 		goto err_compat;
21834e0f7b90SParav Pandit 	}
21844e0f7b90SParav Pandit 
21856c80b41aSLeon Romanovsky 	nldev_init();
2186c9901724SLeon Romanovsky 	rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
21875ef8c0c1SJason Gunthorpe 	roce_gid_mgmt_init();
2188b2cbae2cSRoland Dreier 
2189fd75c789SNir Muchtar 	return 0;
2190fd75c789SNir Muchtar 
21914e0f7b90SParav Pandit err_compat:
21924e0f7b90SParav Pandit 	unregister_lsm_notifier(&ibdev_lsm_nb);
2193735c631aSMark Bloch err_sa:
2194735c631aSMark Bloch 	ib_sa_cleanup();
2195c2e49c92SMark Bloch err_mad:
2196c2e49c92SMark Bloch 	ib_mad_cleanup();
21974c2cb422SMark Bloch err_addr:
21984c2cb422SMark Bloch 	addr_cleanup();
2199e3f20f02SLeon Romanovsky err_ibnl:
2200c9901724SLeon Romanovsky 	rdma_nl_exit();
2201fd75c789SNir Muchtar err_sysfs:
220255aeed06SJason Gunthorpe 	class_unregister(&ib_class);
2203f794809aSJack Morgenstein err_comp_unbound:
2204f794809aSJack Morgenstein 	destroy_workqueue(ib_comp_unbound_wq);
220514d3a3b2SChristoph Hellwig err_comp:
220614d3a3b2SChristoph Hellwig 	destroy_workqueue(ib_comp_wq);
2207fd75c789SNir Muchtar err:
2208fd75c789SNir Muchtar 	destroy_workqueue(ib_wq);
22091da177e4SLinus Torvalds 	return ret;
22101da177e4SLinus Torvalds }
22111da177e4SLinus Torvalds 
22121da177e4SLinus Torvalds static void __exit ib_core_cleanup(void)
22131da177e4SLinus Torvalds {
22145ef8c0c1SJason Gunthorpe 	roce_gid_mgmt_cleanup();
22156c80b41aSLeon Romanovsky 	nldev_exit();
2216c9901724SLeon Romanovsky 	rdma_nl_unregister(RDMA_NL_LS);
22174e0f7b90SParav Pandit 	unregister_pernet_device(&rdma_dev_net_ops);
2218c9901724SLeon Romanovsky 	unregister_lsm_notifier(&ibdev_lsm_nb);
2219c2e49c92SMark Bloch 	ib_sa_cleanup();
22204c2cb422SMark Bloch 	ib_mad_cleanup();
2221e3f20f02SLeon Romanovsky 	addr_cleanup();
2222c9901724SLeon Romanovsky 	rdma_nl_exit();
222355aeed06SJason Gunthorpe 	class_unregister(&ib_class);
2224f794809aSJack Morgenstein 	destroy_workqueue(ib_comp_unbound_wq);
222514d3a3b2SChristoph Hellwig 	destroy_workqueue(ib_comp_wq);
2226f7c6a7b5SRoland Dreier 	/* Make sure that any pending umem accounting work is done. */
2227f0626710STejun Heo 	destroy_workqueue(ib_wq);
2228d0899892SJason Gunthorpe 	flush_workqueue(system_unbound_wq);
2229e59178d8SJason Gunthorpe 	WARN_ON(!xa_empty(&clients));
22300df91bb6SJason Gunthorpe 	WARN_ON(!xa_empty(&devices));
22311da177e4SLinus Torvalds }
22321da177e4SLinus Torvalds 
2233e3bf14bdSJason Gunthorpe MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
2234e3bf14bdSJason Gunthorpe 
223562dfa795SParav Pandit /* ib core relies on netdev stack to first register net_ns_type_operations
223662dfa795SParav Pandit  * ns kobject type before ib_core initialization.
223762dfa795SParav Pandit  */
223862dfa795SParav Pandit fs_initcall(ib_core_init);
22391da177e4SLinus Torvalds module_exit(ib_core_cleanup);
2240