1 /* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/module.h> 35 #include <linux/string.h> 36 #include <linux/errno.h> 37 #include <linux/kernel.h> 38 #include <linux/slab.h> 39 #include <linux/init.h> 40 #include <linux/netdevice.h> 41 #include <net/net_namespace.h> 42 #include <net/netns/generic.h> 43 #include <linux/security.h> 44 #include <linux/notifier.h> 45 #include <linux/hashtable.h> 46 #include <rdma/rdma_netlink.h> 47 #include <rdma/ib_addr.h> 48 #include <rdma/ib_cache.h> 49 #include <rdma/rdma_counter.h> 50 51 #include "core_priv.h" 52 #include "restrack.h" 53 54 MODULE_AUTHOR("Roland Dreier"); 55 MODULE_DESCRIPTION("core kernel InfiniBand API"); 56 MODULE_LICENSE("Dual BSD/GPL"); 57 58 struct workqueue_struct *ib_comp_wq; 59 struct workqueue_struct *ib_comp_unbound_wq; 60 struct workqueue_struct *ib_wq; 61 EXPORT_SYMBOL_GPL(ib_wq); 62 63 /* 64 * Each of the three rwsem locks (devices, clients, client_data) protects the 65 * xarray of the same name. Specifically it allows the caller to assert that 66 * the MARK will/will not be changing under the lock, and for devices and 67 * clients, that the value in the xarray is still a valid pointer. Change of 68 * the MARK is linked to the object state, so holding the lock and testing the 69 * MARK also asserts that the contained object is in a certain state. 70 * 71 * This is used to build a two stage register/unregister flow where objects 72 * can continue to be in the xarray even though they are still in progress to 73 * register/unregister. 74 * 75 * The xarray itself provides additional locking, and restartable iteration, 76 * which is also relied on. 77 * 78 * Locks should not be nested, with the exception of client_data, which is 79 * allowed to nest under the read side of the other two locks. 80 * 81 * The devices_rwsem also protects the device name list, any change or 82 * assignment of device name must also hold the write side to guarantee unique 83 * names. 84 */ 85 86 /* 87 * devices contains devices that have had their names assigned. The 88 * devices may not be registered. Users that care about the registration 89 * status need to call ib_device_try_get() on the device to ensure it is 90 * registered, and keep it registered, for the required duration. 91 * 92 */ 93 static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); 94 static DECLARE_RWSEM(devices_rwsem); 95 #define DEVICE_REGISTERED XA_MARK_1 96 97 static u32 highest_client_id; 98 #define CLIENT_REGISTERED XA_MARK_1 99 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); 100 static DECLARE_RWSEM(clients_rwsem); 101 102 static void ib_client_put(struct ib_client *client) 103 { 104 if (refcount_dec_and_test(&client->uses)) 105 complete(&client->uses_zero); 106 } 107 108 /* 109 * If client_data is registered then the corresponding client must also still 110 * be registered. 111 */ 112 #define CLIENT_DATA_REGISTERED XA_MARK_1 113 114 /** 115 * struct rdma_dev_net - rdma net namespace metadata for a net 116 * @net: Pointer to owner net namespace 117 * @id: xarray id to identify the net namespace. 118 */ 119 struct rdma_dev_net { 120 possible_net_t net; 121 u32 id; 122 }; 123 124 static unsigned int rdma_dev_net_id; 125 126 /* 127 * A list of net namespaces is maintained in an xarray. This is necessary 128 * because we can't get the locking right using the existing net ns list. We 129 * would require a init_net callback after the list is updated. 130 */ 131 static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); 132 /* 133 * rwsem to protect accessing the rdma_nets xarray entries. 134 */ 135 static DECLARE_RWSEM(rdma_nets_rwsem); 136 137 bool ib_devices_shared_netns = true; 138 module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); 139 MODULE_PARM_DESC(netns_mode, 140 "Share device among net namespaces; default=1 (shared)"); 141 /** 142 * rdma_dev_access_netns() - Return whether a rdma device can be accessed 143 * from a specified net namespace or not. 144 * @device: Pointer to rdma device which needs to be checked 145 * @net: Pointer to net namesapce for which access to be checked 146 * 147 * rdma_dev_access_netns() - Return whether a rdma device can be accessed 148 * from a specified net namespace or not. When 149 * rdma device is in shared mode, it ignores the 150 * net namespace. When rdma device is exclusive 151 * to a net namespace, rdma device net namespace is 152 * checked against the specified one. 153 */ 154 bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) 155 { 156 return (ib_devices_shared_netns || 157 net_eq(read_pnet(&dev->coredev.rdma_net), net)); 158 } 159 EXPORT_SYMBOL(rdma_dev_access_netns); 160 161 /* 162 * xarray has this behavior where it won't iterate over NULL values stored in 163 * allocated arrays. So we need our own iterator to see all values stored in 164 * the array. This does the same thing as xa_for_each except that it also 165 * returns NULL valued entries if the array is allocating. Simplified to only 166 * work on simple xarrays. 167 */ 168 static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, 169 xa_mark_t filter) 170 { 171 XA_STATE(xas, xa, *indexp); 172 void *entry; 173 174 rcu_read_lock(); 175 do { 176 entry = xas_find_marked(&xas, ULONG_MAX, filter); 177 if (xa_is_zero(entry)) 178 break; 179 } while (xas_retry(&xas, entry)); 180 rcu_read_unlock(); 181 182 if (entry) { 183 *indexp = xas.xa_index; 184 if (xa_is_zero(entry)) 185 return NULL; 186 return entry; 187 } 188 return XA_ERROR(-ENOENT); 189 } 190 #define xan_for_each_marked(xa, index, entry, filter) \ 191 for (index = 0, entry = xan_find_marked(xa, &(index), filter); \ 192 !xa_is_err(entry); \ 193 (index)++, entry = xan_find_marked(xa, &(index), filter)) 194 195 /* RCU hash table mapping netdevice pointers to struct ib_port_data */ 196 static DEFINE_SPINLOCK(ndev_hash_lock); 197 static DECLARE_HASHTABLE(ndev_hash, 5); 198 199 static void free_netdevs(struct ib_device *ib_dev); 200 static void ib_unregister_work(struct work_struct *work); 201 static void __ib_unregister_device(struct ib_device *device); 202 static int ib_security_change(struct notifier_block *nb, unsigned long event, 203 void *lsm_data); 204 static void ib_policy_change_task(struct work_struct *work); 205 static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); 206 207 static void __ibdev_printk(const char *level, const struct ib_device *ibdev, 208 struct va_format *vaf) 209 { 210 if (ibdev && ibdev->dev.parent) 211 dev_printk_emit(level[1] - '0', 212 ibdev->dev.parent, 213 "%s %s %s: %pV", 214 dev_driver_string(ibdev->dev.parent), 215 dev_name(ibdev->dev.parent), 216 dev_name(&ibdev->dev), 217 vaf); 218 else if (ibdev) 219 printk("%s%s: %pV", 220 level, dev_name(&ibdev->dev), vaf); 221 else 222 printk("%s(NULL ib_device): %pV", level, vaf); 223 } 224 225 void ibdev_printk(const char *level, const struct ib_device *ibdev, 226 const char *format, ...) 227 { 228 struct va_format vaf; 229 va_list args; 230 231 va_start(args, format); 232 233 vaf.fmt = format; 234 vaf.va = &args; 235 236 __ibdev_printk(level, ibdev, &vaf); 237 238 va_end(args); 239 } 240 EXPORT_SYMBOL(ibdev_printk); 241 242 #define define_ibdev_printk_level(func, level) \ 243 void func(const struct ib_device *ibdev, const char *fmt, ...) \ 244 { \ 245 struct va_format vaf; \ 246 va_list args; \ 247 \ 248 va_start(args, fmt); \ 249 \ 250 vaf.fmt = fmt; \ 251 vaf.va = &args; \ 252 \ 253 __ibdev_printk(level, ibdev, &vaf); \ 254 \ 255 va_end(args); \ 256 } \ 257 EXPORT_SYMBOL(func); 258 259 define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); 260 define_ibdev_printk_level(ibdev_alert, KERN_ALERT); 261 define_ibdev_printk_level(ibdev_crit, KERN_CRIT); 262 define_ibdev_printk_level(ibdev_err, KERN_ERR); 263 define_ibdev_printk_level(ibdev_warn, KERN_WARNING); 264 define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); 265 define_ibdev_printk_level(ibdev_info, KERN_INFO); 266 267 static struct notifier_block ibdev_lsm_nb = { 268 .notifier_call = ib_security_change, 269 }; 270 271 static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, 272 struct net *net); 273 274 /* Pointer to the RCU head at the start of the ib_port_data array */ 275 struct ib_port_data_rcu { 276 struct rcu_head rcu_head; 277 struct ib_port_data pdata[]; 278 }; 279 280 static void ib_device_check_mandatory(struct ib_device *device) 281 { 282 #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } 283 static const struct { 284 size_t offset; 285 char *name; 286 } mandatory_table[] = { 287 IB_MANDATORY_FUNC(query_device), 288 IB_MANDATORY_FUNC(query_port), 289 IB_MANDATORY_FUNC(query_pkey), 290 IB_MANDATORY_FUNC(alloc_pd), 291 IB_MANDATORY_FUNC(dealloc_pd), 292 IB_MANDATORY_FUNC(create_qp), 293 IB_MANDATORY_FUNC(modify_qp), 294 IB_MANDATORY_FUNC(destroy_qp), 295 IB_MANDATORY_FUNC(post_send), 296 IB_MANDATORY_FUNC(post_recv), 297 IB_MANDATORY_FUNC(create_cq), 298 IB_MANDATORY_FUNC(destroy_cq), 299 IB_MANDATORY_FUNC(poll_cq), 300 IB_MANDATORY_FUNC(req_notify_cq), 301 IB_MANDATORY_FUNC(get_dma_mr), 302 IB_MANDATORY_FUNC(dereg_mr), 303 IB_MANDATORY_FUNC(get_port_immutable) 304 }; 305 int i; 306 307 device->kverbs_provider = true; 308 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { 309 if (!*(void **) ((void *) &device->ops + 310 mandatory_table[i].offset)) { 311 device->kverbs_provider = false; 312 break; 313 } 314 } 315 } 316 317 /* 318 * Caller must perform ib_device_put() to return the device reference count 319 * when ib_device_get_by_index() returns valid device pointer. 320 */ 321 struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) 322 { 323 struct ib_device *device; 324 325 down_read(&devices_rwsem); 326 device = xa_load(&devices, index); 327 if (device) { 328 if (!rdma_dev_access_netns(device, net)) { 329 device = NULL; 330 goto out; 331 } 332 333 if (!ib_device_try_get(device)) 334 device = NULL; 335 } 336 out: 337 up_read(&devices_rwsem); 338 return device; 339 } 340 341 /** 342 * ib_device_put - Release IB device reference 343 * @device: device whose reference to be released 344 * 345 * ib_device_put() releases reference to the IB device to allow it to be 346 * unregistered and eventually free. 347 */ 348 void ib_device_put(struct ib_device *device) 349 { 350 if (refcount_dec_and_test(&device->refcount)) 351 complete(&device->unreg_completion); 352 } 353 EXPORT_SYMBOL(ib_device_put); 354 355 static struct ib_device *__ib_device_get_by_name(const char *name) 356 { 357 struct ib_device *device; 358 unsigned long index; 359 360 xa_for_each (&devices, index, device) 361 if (!strcmp(name, dev_name(&device->dev))) 362 return device; 363 364 return NULL; 365 } 366 367 /** 368 * ib_device_get_by_name - Find an IB device by name 369 * @name: The name to look for 370 * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) 371 * 372 * Find and hold an ib_device by its name. The caller must call 373 * ib_device_put() on the returned pointer. 374 */ 375 struct ib_device *ib_device_get_by_name(const char *name, 376 enum rdma_driver_id driver_id) 377 { 378 struct ib_device *device; 379 380 down_read(&devices_rwsem); 381 device = __ib_device_get_by_name(name); 382 if (device && driver_id != RDMA_DRIVER_UNKNOWN && 383 device->ops.driver_id != driver_id) 384 device = NULL; 385 386 if (device) { 387 if (!ib_device_try_get(device)) 388 device = NULL; 389 } 390 up_read(&devices_rwsem); 391 return device; 392 } 393 EXPORT_SYMBOL(ib_device_get_by_name); 394 395 static int rename_compat_devs(struct ib_device *device) 396 { 397 struct ib_core_device *cdev; 398 unsigned long index; 399 int ret = 0; 400 401 mutex_lock(&device->compat_devs_mutex); 402 xa_for_each (&device->compat_devs, index, cdev) { 403 ret = device_rename(&cdev->dev, dev_name(&device->dev)); 404 if (ret) { 405 dev_warn(&cdev->dev, 406 "Fail to rename compatdev to new name %s\n", 407 dev_name(&device->dev)); 408 break; 409 } 410 } 411 mutex_unlock(&device->compat_devs_mutex); 412 return ret; 413 } 414 415 int ib_device_rename(struct ib_device *ibdev, const char *name) 416 { 417 unsigned long index; 418 void *client_data; 419 int ret; 420 421 down_write(&devices_rwsem); 422 if (!strcmp(name, dev_name(&ibdev->dev))) { 423 up_write(&devices_rwsem); 424 return 0; 425 } 426 427 if (__ib_device_get_by_name(name)) { 428 up_write(&devices_rwsem); 429 return -EEXIST; 430 } 431 432 ret = device_rename(&ibdev->dev, name); 433 if (ret) { 434 up_write(&devices_rwsem); 435 return ret; 436 } 437 438 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); 439 ret = rename_compat_devs(ibdev); 440 441 downgrade_write(&devices_rwsem); 442 down_read(&ibdev->client_data_rwsem); 443 xan_for_each_marked(&ibdev->client_data, index, client_data, 444 CLIENT_DATA_REGISTERED) { 445 struct ib_client *client = xa_load(&clients, index); 446 447 if (!client || !client->rename) 448 continue; 449 450 client->rename(ibdev, client_data); 451 } 452 up_read(&ibdev->client_data_rwsem); 453 up_read(&devices_rwsem); 454 return 0; 455 } 456 457 int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim) 458 { 459 if (use_dim > 1) 460 return -EINVAL; 461 ibdev->use_cq_dim = use_dim; 462 463 return 0; 464 } 465 466 static int alloc_name(struct ib_device *ibdev, const char *name) 467 { 468 struct ib_device *device; 469 unsigned long index; 470 struct ida inuse; 471 int rc; 472 int i; 473 474 lockdep_assert_held_write(&devices_rwsem); 475 ida_init(&inuse); 476 xa_for_each (&devices, index, device) { 477 char buf[IB_DEVICE_NAME_MAX]; 478 479 if (sscanf(dev_name(&device->dev), name, &i) != 1) 480 continue; 481 if (i < 0 || i >= INT_MAX) 482 continue; 483 snprintf(buf, sizeof buf, name, i); 484 if (strcmp(buf, dev_name(&device->dev)) != 0) 485 continue; 486 487 rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL); 488 if (rc < 0) 489 goto out; 490 } 491 492 rc = ida_alloc(&inuse, GFP_KERNEL); 493 if (rc < 0) 494 goto out; 495 496 rc = dev_set_name(&ibdev->dev, name, rc); 497 out: 498 ida_destroy(&inuse); 499 return rc; 500 } 501 502 static void ib_device_release(struct device *device) 503 { 504 struct ib_device *dev = container_of(device, struct ib_device, dev); 505 506 free_netdevs(dev); 507 WARN_ON(refcount_read(&dev->refcount)); 508 if (dev->port_data) { 509 ib_cache_release_one(dev); 510 ib_security_release_port_pkey_list(dev); 511 rdma_counter_release(dev); 512 kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, 513 pdata[0]), 514 rcu_head); 515 } 516 517 xa_destroy(&dev->compat_devs); 518 xa_destroy(&dev->client_data); 519 kfree_rcu(dev, rcu_head); 520 } 521 522 static int ib_device_uevent(struct device *device, 523 struct kobj_uevent_env *env) 524 { 525 if (add_uevent_var(env, "NAME=%s", dev_name(device))) 526 return -ENOMEM; 527 528 /* 529 * It would be nice to pass the node GUID with the event... 530 */ 531 532 return 0; 533 } 534 535 static const void *net_namespace(struct device *d) 536 { 537 struct ib_core_device *coredev = 538 container_of(d, struct ib_core_device, dev); 539 540 return read_pnet(&coredev->rdma_net); 541 } 542 543 static struct class ib_class = { 544 .name = "infiniband", 545 .dev_release = ib_device_release, 546 .dev_uevent = ib_device_uevent, 547 .ns_type = &net_ns_type_operations, 548 .namespace = net_namespace, 549 }; 550 551 static void rdma_init_coredev(struct ib_core_device *coredev, 552 struct ib_device *dev, struct net *net) 553 { 554 /* This BUILD_BUG_ON is intended to catch layout change 555 * of union of ib_core_device and device. 556 * dev must be the first element as ib_core and providers 557 * driver uses it. Adding anything in ib_core_device before 558 * device will break this assumption. 559 */ 560 BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != 561 offsetof(struct ib_device, dev)); 562 563 coredev->dev.class = &ib_class; 564 coredev->dev.groups = dev->groups; 565 device_initialize(&coredev->dev); 566 coredev->owner = dev; 567 INIT_LIST_HEAD(&coredev->port_list); 568 write_pnet(&coredev->rdma_net, net); 569 } 570 571 /** 572 * _ib_alloc_device - allocate an IB device struct 573 * @size:size of structure to allocate 574 * 575 * Low-level drivers should use ib_alloc_device() to allocate &struct 576 * ib_device. @size is the size of the structure to be allocated, 577 * including any private data used by the low-level driver. 578 * ib_dealloc_device() must be used to free structures allocated with 579 * ib_alloc_device(). 580 */ 581 struct ib_device *_ib_alloc_device(size_t size) 582 { 583 struct ib_device *device; 584 585 if (WARN_ON(size < sizeof(struct ib_device))) 586 return NULL; 587 588 device = kzalloc(size, GFP_KERNEL); 589 if (!device) 590 return NULL; 591 592 if (rdma_restrack_init(device)) { 593 kfree(device); 594 return NULL; 595 } 596 597 device->groups[0] = &ib_dev_attr_group; 598 rdma_init_coredev(&device->coredev, device, &init_net); 599 600 INIT_LIST_HEAD(&device->event_handler_list); 601 spin_lock_init(&device->event_handler_lock); 602 mutex_init(&device->unregistration_lock); 603 /* 604 * client_data needs to be alloc because we don't want our mark to be 605 * destroyed if the user stores NULL in the client data. 606 */ 607 xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); 608 init_rwsem(&device->client_data_rwsem); 609 xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); 610 mutex_init(&device->compat_devs_mutex); 611 init_completion(&device->unreg_completion); 612 INIT_WORK(&device->unregistration_work, ib_unregister_work); 613 614 return device; 615 } 616 EXPORT_SYMBOL(_ib_alloc_device); 617 618 /** 619 * ib_dealloc_device - free an IB device struct 620 * @device:structure to free 621 * 622 * Free a structure allocated with ib_alloc_device(). 623 */ 624 void ib_dealloc_device(struct ib_device *device) 625 { 626 if (device->ops.dealloc_driver) 627 device->ops.dealloc_driver(device); 628 629 /* 630 * ib_unregister_driver() requires all devices to remain in the xarray 631 * while their ops are callable. The last op we call is dealloc_driver 632 * above. This is needed to create a fence on op callbacks prior to 633 * allowing the driver module to unload. 634 */ 635 down_write(&devices_rwsem); 636 if (xa_load(&devices, device->index) == device) 637 xa_erase(&devices, device->index); 638 up_write(&devices_rwsem); 639 640 /* Expedite releasing netdev references */ 641 free_netdevs(device); 642 643 WARN_ON(!xa_empty(&device->compat_devs)); 644 WARN_ON(!xa_empty(&device->client_data)); 645 WARN_ON(refcount_read(&device->refcount)); 646 rdma_restrack_clean(device); 647 /* Balances with device_initialize */ 648 put_device(&device->dev); 649 } 650 EXPORT_SYMBOL(ib_dealloc_device); 651 652 /* 653 * add_client_context() and remove_client_context() must be safe against 654 * parallel calls on the same device - registration/unregistration of both the 655 * device and client can be occurring in parallel. 656 * 657 * The routines need to be a fence, any caller must not return until the add 658 * or remove is fully completed. 659 */ 660 static int add_client_context(struct ib_device *device, 661 struct ib_client *client) 662 { 663 int ret = 0; 664 665 if (!device->kverbs_provider && !client->no_kverbs_req) 666 return 0; 667 668 down_write(&device->client_data_rwsem); 669 /* 670 * So long as the client is registered hold both the client and device 671 * unregistration locks. 672 */ 673 if (!refcount_inc_not_zero(&client->uses)) 674 goto out_unlock; 675 refcount_inc(&device->refcount); 676 677 /* 678 * Another caller to add_client_context got here first and has already 679 * completely initialized context. 680 */ 681 if (xa_get_mark(&device->client_data, client->client_id, 682 CLIENT_DATA_REGISTERED)) 683 goto out; 684 685 ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, 686 GFP_KERNEL)); 687 if (ret) 688 goto out; 689 downgrade_write(&device->client_data_rwsem); 690 if (client->add) 691 client->add(device); 692 693 /* Readers shall not see a client until add has been completed */ 694 xa_set_mark(&device->client_data, client->client_id, 695 CLIENT_DATA_REGISTERED); 696 up_read(&device->client_data_rwsem); 697 return 0; 698 699 out: 700 ib_device_put(device); 701 ib_client_put(client); 702 out_unlock: 703 up_write(&device->client_data_rwsem); 704 return ret; 705 } 706 707 static void remove_client_context(struct ib_device *device, 708 unsigned int client_id) 709 { 710 struct ib_client *client; 711 void *client_data; 712 713 down_write(&device->client_data_rwsem); 714 if (!xa_get_mark(&device->client_data, client_id, 715 CLIENT_DATA_REGISTERED)) { 716 up_write(&device->client_data_rwsem); 717 return; 718 } 719 client_data = xa_load(&device->client_data, client_id); 720 xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); 721 client = xa_load(&clients, client_id); 722 up_write(&device->client_data_rwsem); 723 724 /* 725 * Notice we cannot be holding any exclusive locks when calling the 726 * remove callback as the remove callback can recurse back into any 727 * public functions in this module and thus try for any locks those 728 * functions take. 729 * 730 * For this reason clients and drivers should not call the 731 * unregistration functions will holdling any locks. 732 */ 733 if (client->remove) 734 client->remove(device, client_data); 735 736 xa_erase(&device->client_data, client_id); 737 ib_device_put(device); 738 ib_client_put(client); 739 } 740 741 static int alloc_port_data(struct ib_device *device) 742 { 743 struct ib_port_data_rcu *pdata_rcu; 744 unsigned int port; 745 746 if (device->port_data) 747 return 0; 748 749 /* This can only be called once the physical port range is defined */ 750 if (WARN_ON(!device->phys_port_cnt)) 751 return -EINVAL; 752 753 /* 754 * device->port_data is indexed directly by the port number to make 755 * access to this data as efficient as possible. 756 * 757 * Therefore port_data is declared as a 1 based array with potential 758 * empty slots at the beginning. 759 */ 760 pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata, 761 rdma_end_port(device) + 1), 762 GFP_KERNEL); 763 if (!pdata_rcu) 764 return -ENOMEM; 765 /* 766 * The rcu_head is put in front of the port data array and the stored 767 * pointer is adjusted since we never need to see that member until 768 * kfree_rcu. 769 */ 770 device->port_data = pdata_rcu->pdata; 771 772 rdma_for_each_port (device, port) { 773 struct ib_port_data *pdata = &device->port_data[port]; 774 775 pdata->ib_dev = device; 776 spin_lock_init(&pdata->pkey_list_lock); 777 INIT_LIST_HEAD(&pdata->pkey_list); 778 spin_lock_init(&pdata->netdev_lock); 779 INIT_HLIST_NODE(&pdata->ndev_hash_link); 780 } 781 return 0; 782 } 783 784 static int verify_immutable(const struct ib_device *dev, u8 port) 785 { 786 return WARN_ON(!rdma_cap_ib_mad(dev, port) && 787 rdma_max_mad_size(dev, port) != 0); 788 } 789 790 static int setup_port_data(struct ib_device *device) 791 { 792 unsigned int port; 793 int ret; 794 795 ret = alloc_port_data(device); 796 if (ret) 797 return ret; 798 799 rdma_for_each_port (device, port) { 800 struct ib_port_data *pdata = &device->port_data[port]; 801 802 ret = device->ops.get_port_immutable(device, port, 803 &pdata->immutable); 804 if (ret) 805 return ret; 806 807 if (verify_immutable(device, port)) 808 return -EINVAL; 809 } 810 return 0; 811 } 812 813 void ib_get_device_fw_str(struct ib_device *dev, char *str) 814 { 815 if (dev->ops.get_dev_fw_str) 816 dev->ops.get_dev_fw_str(dev, str); 817 else 818 str[0] = '\0'; 819 } 820 EXPORT_SYMBOL(ib_get_device_fw_str); 821 822 static void ib_policy_change_task(struct work_struct *work) 823 { 824 struct ib_device *dev; 825 unsigned long index; 826 827 down_read(&devices_rwsem); 828 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 829 unsigned int i; 830 831 rdma_for_each_port (dev, i) { 832 u64 sp; 833 int ret = ib_get_cached_subnet_prefix(dev, 834 i, 835 &sp); 836 837 WARN_ONCE(ret, 838 "ib_get_cached_subnet_prefix err: %d, this should never happen here\n", 839 ret); 840 if (!ret) 841 ib_security_cache_change(dev, i, sp); 842 } 843 } 844 up_read(&devices_rwsem); 845 } 846 847 static int ib_security_change(struct notifier_block *nb, unsigned long event, 848 void *lsm_data) 849 { 850 if (event != LSM_POLICY_CHANGE) 851 return NOTIFY_DONE; 852 853 schedule_work(&ib_policy_change_work); 854 ib_mad_agent_security_change(); 855 856 return NOTIFY_OK; 857 } 858 859 static void compatdev_release(struct device *dev) 860 { 861 struct ib_core_device *cdev = 862 container_of(dev, struct ib_core_device, dev); 863 864 kfree(cdev); 865 } 866 867 static int add_one_compat_dev(struct ib_device *device, 868 struct rdma_dev_net *rnet) 869 { 870 struct ib_core_device *cdev; 871 int ret; 872 873 lockdep_assert_held(&rdma_nets_rwsem); 874 if (!ib_devices_shared_netns) 875 return 0; 876 877 /* 878 * Create and add compat device in all namespaces other than where it 879 * is currently bound to. 880 */ 881 if (net_eq(read_pnet(&rnet->net), 882 read_pnet(&device->coredev.rdma_net))) 883 return 0; 884 885 /* 886 * The first of init_net() or ib_register_device() to take the 887 * compat_devs_mutex wins and gets to add the device. Others will wait 888 * for completion here. 889 */ 890 mutex_lock(&device->compat_devs_mutex); 891 cdev = xa_load(&device->compat_devs, rnet->id); 892 if (cdev) { 893 ret = 0; 894 goto done; 895 } 896 ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); 897 if (ret) 898 goto done; 899 900 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); 901 if (!cdev) { 902 ret = -ENOMEM; 903 goto cdev_err; 904 } 905 906 cdev->dev.parent = device->dev.parent; 907 rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); 908 cdev->dev.release = compatdev_release; 909 dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); 910 911 ret = device_add(&cdev->dev); 912 if (ret) 913 goto add_err; 914 ret = ib_setup_port_attrs(cdev); 915 if (ret) 916 goto port_err; 917 918 ret = xa_err(xa_store(&device->compat_devs, rnet->id, 919 cdev, GFP_KERNEL)); 920 if (ret) 921 goto insert_err; 922 923 mutex_unlock(&device->compat_devs_mutex); 924 return 0; 925 926 insert_err: 927 ib_free_port_attrs(cdev); 928 port_err: 929 device_del(&cdev->dev); 930 add_err: 931 put_device(&cdev->dev); 932 cdev_err: 933 xa_release(&device->compat_devs, rnet->id); 934 done: 935 mutex_unlock(&device->compat_devs_mutex); 936 return ret; 937 } 938 939 static void remove_one_compat_dev(struct ib_device *device, u32 id) 940 { 941 struct ib_core_device *cdev; 942 943 mutex_lock(&device->compat_devs_mutex); 944 cdev = xa_erase(&device->compat_devs, id); 945 mutex_unlock(&device->compat_devs_mutex); 946 if (cdev) { 947 ib_free_port_attrs(cdev); 948 device_del(&cdev->dev); 949 put_device(&cdev->dev); 950 } 951 } 952 953 static void remove_compat_devs(struct ib_device *device) 954 { 955 struct ib_core_device *cdev; 956 unsigned long index; 957 958 xa_for_each (&device->compat_devs, index, cdev) 959 remove_one_compat_dev(device, index); 960 } 961 962 static int add_compat_devs(struct ib_device *device) 963 { 964 struct rdma_dev_net *rnet; 965 unsigned long index; 966 int ret = 0; 967 968 lockdep_assert_held(&devices_rwsem); 969 970 down_read(&rdma_nets_rwsem); 971 xa_for_each (&rdma_nets, index, rnet) { 972 ret = add_one_compat_dev(device, rnet); 973 if (ret) 974 break; 975 } 976 up_read(&rdma_nets_rwsem); 977 return ret; 978 } 979 980 static void remove_all_compat_devs(void) 981 { 982 struct ib_compat_device *cdev; 983 struct ib_device *dev; 984 unsigned long index; 985 986 down_read(&devices_rwsem); 987 xa_for_each (&devices, index, dev) { 988 unsigned long c_index = 0; 989 990 /* Hold nets_rwsem so that any other thread modifying this 991 * system param can sync with this thread. 992 */ 993 down_read(&rdma_nets_rwsem); 994 xa_for_each (&dev->compat_devs, c_index, cdev) 995 remove_one_compat_dev(dev, c_index); 996 up_read(&rdma_nets_rwsem); 997 } 998 up_read(&devices_rwsem); 999 } 1000 1001 static int add_all_compat_devs(void) 1002 { 1003 struct rdma_dev_net *rnet; 1004 struct ib_device *dev; 1005 unsigned long index; 1006 int ret = 0; 1007 1008 down_read(&devices_rwsem); 1009 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 1010 unsigned long net_index = 0; 1011 1012 /* Hold nets_rwsem so that any other thread modifying this 1013 * system param can sync with this thread. 1014 */ 1015 down_read(&rdma_nets_rwsem); 1016 xa_for_each (&rdma_nets, net_index, rnet) { 1017 ret = add_one_compat_dev(dev, rnet); 1018 if (ret) 1019 break; 1020 } 1021 up_read(&rdma_nets_rwsem); 1022 } 1023 up_read(&devices_rwsem); 1024 if (ret) 1025 remove_all_compat_devs(); 1026 return ret; 1027 } 1028 1029 int rdma_compatdev_set(u8 enable) 1030 { 1031 struct rdma_dev_net *rnet; 1032 unsigned long index; 1033 int ret = 0; 1034 1035 down_write(&rdma_nets_rwsem); 1036 if (ib_devices_shared_netns == enable) { 1037 up_write(&rdma_nets_rwsem); 1038 return 0; 1039 } 1040 1041 /* enable/disable of compat devices is not supported 1042 * when more than default init_net exists. 1043 */ 1044 xa_for_each (&rdma_nets, index, rnet) { 1045 ret++; 1046 break; 1047 } 1048 if (!ret) 1049 ib_devices_shared_netns = enable; 1050 up_write(&rdma_nets_rwsem); 1051 if (ret) 1052 return -EBUSY; 1053 1054 if (enable) 1055 ret = add_all_compat_devs(); 1056 else 1057 remove_all_compat_devs(); 1058 return ret; 1059 } 1060 1061 static void rdma_dev_exit_net(struct net *net) 1062 { 1063 struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); 1064 struct ib_device *dev; 1065 unsigned long index; 1066 int ret; 1067 1068 down_write(&rdma_nets_rwsem); 1069 /* 1070 * Prevent the ID from being re-used and hide the id from xa_for_each. 1071 */ 1072 ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); 1073 WARN_ON(ret); 1074 up_write(&rdma_nets_rwsem); 1075 1076 down_read(&devices_rwsem); 1077 xa_for_each (&devices, index, dev) { 1078 get_device(&dev->dev); 1079 /* 1080 * Release the devices_rwsem so that pontentially blocking 1081 * device_del, doesn't hold the devices_rwsem for too long. 1082 */ 1083 up_read(&devices_rwsem); 1084 1085 remove_one_compat_dev(dev, rnet->id); 1086 1087 /* 1088 * If the real device is in the NS then move it back to init. 1089 */ 1090 rdma_dev_change_netns(dev, net, &init_net); 1091 1092 put_device(&dev->dev); 1093 down_read(&devices_rwsem); 1094 } 1095 up_read(&devices_rwsem); 1096 1097 xa_erase(&rdma_nets, rnet->id); 1098 } 1099 1100 static __net_init int rdma_dev_init_net(struct net *net) 1101 { 1102 struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); 1103 unsigned long index; 1104 struct ib_device *dev; 1105 int ret; 1106 1107 /* No need to create any compat devices in default init_net. */ 1108 if (net_eq(net, &init_net)) 1109 return 0; 1110 1111 write_pnet(&rnet->net, net); 1112 1113 ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); 1114 if (ret) 1115 return ret; 1116 1117 down_read(&devices_rwsem); 1118 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 1119 /* Hold nets_rwsem so that netlink command cannot change 1120 * system configuration for device sharing mode. 1121 */ 1122 down_read(&rdma_nets_rwsem); 1123 ret = add_one_compat_dev(dev, rnet); 1124 up_read(&rdma_nets_rwsem); 1125 if (ret) 1126 break; 1127 } 1128 up_read(&devices_rwsem); 1129 1130 if (ret) 1131 rdma_dev_exit_net(net); 1132 1133 return ret; 1134 } 1135 1136 /* 1137 * Assign the unique string device name and the unique device index. This is 1138 * undone by ib_dealloc_device. 1139 */ 1140 static int assign_name(struct ib_device *device, const char *name) 1141 { 1142 static u32 last_id; 1143 int ret; 1144 1145 down_write(&devices_rwsem); 1146 /* Assign a unique name to the device */ 1147 if (strchr(name, '%')) 1148 ret = alloc_name(device, name); 1149 else 1150 ret = dev_set_name(&device->dev, name); 1151 if (ret) 1152 goto out; 1153 1154 if (__ib_device_get_by_name(dev_name(&device->dev))) { 1155 ret = -ENFILE; 1156 goto out; 1157 } 1158 strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); 1159 1160 ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, 1161 &last_id, GFP_KERNEL); 1162 if (ret > 0) 1163 ret = 0; 1164 1165 out: 1166 up_write(&devices_rwsem); 1167 return ret; 1168 } 1169 1170 static void setup_dma_device(struct ib_device *device) 1171 { 1172 struct device *parent = device->dev.parent; 1173 1174 WARN_ON_ONCE(device->dma_device); 1175 if (device->dev.dma_ops) { 1176 /* 1177 * The caller provided custom DMA operations. Copy the 1178 * DMA-related fields that are used by e.g. dma_alloc_coherent() 1179 * into device->dev. 1180 */ 1181 device->dma_device = &device->dev; 1182 if (!device->dev.dma_mask) { 1183 if (parent) 1184 device->dev.dma_mask = parent->dma_mask; 1185 else 1186 WARN_ON_ONCE(true); 1187 } 1188 if (!device->dev.coherent_dma_mask) { 1189 if (parent) 1190 device->dev.coherent_dma_mask = 1191 parent->coherent_dma_mask; 1192 else 1193 WARN_ON_ONCE(true); 1194 } 1195 } else { 1196 /* 1197 * The caller did not provide custom DMA operations. Use the 1198 * DMA mapping operations of the parent device. 1199 */ 1200 WARN_ON_ONCE(!parent); 1201 device->dma_device = parent; 1202 } 1203 /* Setup default max segment size for all IB devices */ 1204 dma_set_max_seg_size(device->dma_device, SZ_2G); 1205 1206 } 1207 1208 /* 1209 * setup_device() allocates memory and sets up data that requires calling the 1210 * device ops, this is the only reason these actions are not done during 1211 * ib_alloc_device. It is undone by ib_dealloc_device(). 1212 */ 1213 static int setup_device(struct ib_device *device) 1214 { 1215 struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 1216 int ret; 1217 1218 setup_dma_device(device); 1219 ib_device_check_mandatory(device); 1220 1221 ret = setup_port_data(device); 1222 if (ret) { 1223 dev_warn(&device->dev, "Couldn't create per-port data\n"); 1224 return ret; 1225 } 1226 1227 memset(&device->attrs, 0, sizeof(device->attrs)); 1228 ret = device->ops.query_device(device, &device->attrs, &uhw); 1229 if (ret) { 1230 dev_warn(&device->dev, 1231 "Couldn't query the device attributes\n"); 1232 return ret; 1233 } 1234 1235 return 0; 1236 } 1237 1238 static void disable_device(struct ib_device *device) 1239 { 1240 u32 cid; 1241 1242 WARN_ON(!refcount_read(&device->refcount)); 1243 1244 down_write(&devices_rwsem); 1245 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); 1246 up_write(&devices_rwsem); 1247 1248 /* 1249 * Remove clients in LIFO order, see assign_client_id. This could be 1250 * more efficient if xarray learns to reverse iterate. Since no new 1251 * clients can be added to this ib_device past this point we only need 1252 * the maximum possible client_id value here. 1253 */ 1254 down_read(&clients_rwsem); 1255 cid = highest_client_id; 1256 up_read(&clients_rwsem); 1257 while (cid) { 1258 cid--; 1259 remove_client_context(device, cid); 1260 } 1261 1262 /* Pairs with refcount_set in enable_device */ 1263 ib_device_put(device); 1264 wait_for_completion(&device->unreg_completion); 1265 1266 /* 1267 * compat devices must be removed after device refcount drops to zero. 1268 * Otherwise init_net() may add more compatdevs after removing compat 1269 * devices and before device is disabled. 1270 */ 1271 remove_compat_devs(device); 1272 } 1273 1274 /* 1275 * An enabled device is visible to all clients and to all the public facing 1276 * APIs that return a device pointer. This always returns with a new get, even 1277 * if it fails. 1278 */ 1279 static int enable_device_and_get(struct ib_device *device) 1280 { 1281 struct ib_client *client; 1282 unsigned long index; 1283 int ret = 0; 1284 1285 /* 1286 * One ref belongs to the xa and the other belongs to this 1287 * thread. This is needed to guard against parallel unregistration. 1288 */ 1289 refcount_set(&device->refcount, 2); 1290 down_write(&devices_rwsem); 1291 xa_set_mark(&devices, device->index, DEVICE_REGISTERED); 1292 1293 /* 1294 * By using downgrade_write() we ensure that no other thread can clear 1295 * DEVICE_REGISTERED while we are completing the client setup. 1296 */ 1297 downgrade_write(&devices_rwsem); 1298 1299 if (device->ops.enable_driver) { 1300 ret = device->ops.enable_driver(device); 1301 if (ret) 1302 goto out; 1303 } 1304 1305 down_read(&clients_rwsem); 1306 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { 1307 ret = add_client_context(device, client); 1308 if (ret) 1309 break; 1310 } 1311 up_read(&clients_rwsem); 1312 if (!ret) 1313 ret = add_compat_devs(device); 1314 out: 1315 up_read(&devices_rwsem); 1316 return ret; 1317 } 1318 1319 /** 1320 * ib_register_device - Register an IB device with IB core 1321 * @device:Device to register 1322 * 1323 * Low-level drivers use ib_register_device() to register their 1324 * devices with the IB core. All registered clients will receive a 1325 * callback for each device that is added. @device must be allocated 1326 * with ib_alloc_device(). 1327 * 1328 * If the driver uses ops.dealloc_driver and calls any ib_unregister_device() 1329 * asynchronously then the device pointer may become freed as soon as this 1330 * function returns. 1331 */ 1332 int ib_register_device(struct ib_device *device, const char *name) 1333 { 1334 int ret; 1335 1336 ret = assign_name(device, name); 1337 if (ret) 1338 return ret; 1339 1340 ret = setup_device(device); 1341 if (ret) 1342 return ret; 1343 1344 ret = ib_cache_setup_one(device); 1345 if (ret) { 1346 dev_warn(&device->dev, 1347 "Couldn't set up InfiniBand P_Key/GID cache\n"); 1348 return ret; 1349 } 1350 1351 ib_device_register_rdmacg(device); 1352 1353 rdma_counter_init(device); 1354 1355 /* 1356 * Ensure that ADD uevent is not fired because it 1357 * is too early amd device is not initialized yet. 1358 */ 1359 dev_set_uevent_suppress(&device->dev, true); 1360 ret = device_add(&device->dev); 1361 if (ret) 1362 goto cg_cleanup; 1363 1364 ret = ib_device_register_sysfs(device); 1365 if (ret) { 1366 dev_warn(&device->dev, 1367 "Couldn't register device with driver model\n"); 1368 goto dev_cleanup; 1369 } 1370 1371 ret = enable_device_and_get(device); 1372 dev_set_uevent_suppress(&device->dev, false); 1373 /* Mark for userspace that device is ready */ 1374 kobject_uevent(&device->dev.kobj, KOBJ_ADD); 1375 if (ret) { 1376 void (*dealloc_fn)(struct ib_device *); 1377 1378 /* 1379 * If we hit this error flow then we don't want to 1380 * automatically dealloc the device since the caller is 1381 * expected to call ib_dealloc_device() after 1382 * ib_register_device() fails. This is tricky due to the 1383 * possibility for a parallel unregistration along with this 1384 * error flow. Since we have a refcount here we know any 1385 * parallel flow is stopped in disable_device and will see the 1386 * NULL pointers, causing the responsibility to 1387 * ib_dealloc_device() to revert back to this thread. 1388 */ 1389 dealloc_fn = device->ops.dealloc_driver; 1390 device->ops.dealloc_driver = NULL; 1391 ib_device_put(device); 1392 __ib_unregister_device(device); 1393 device->ops.dealloc_driver = dealloc_fn; 1394 return ret; 1395 } 1396 ib_device_put(device); 1397 1398 return 0; 1399 1400 dev_cleanup: 1401 device_del(&device->dev); 1402 cg_cleanup: 1403 dev_set_uevent_suppress(&device->dev, false); 1404 ib_device_unregister_rdmacg(device); 1405 ib_cache_cleanup_one(device); 1406 return ret; 1407 } 1408 EXPORT_SYMBOL(ib_register_device); 1409 1410 /* Callers must hold a get on the device. */ 1411 static void __ib_unregister_device(struct ib_device *ib_dev) 1412 { 1413 /* 1414 * We have a registration lock so that all the calls to unregister are 1415 * fully fenced, once any unregister returns the device is truely 1416 * unregistered even if multiple callers are unregistering it at the 1417 * same time. This also interacts with the registration flow and 1418 * provides sane semantics if register and unregister are racing. 1419 */ 1420 mutex_lock(&ib_dev->unregistration_lock); 1421 if (!refcount_read(&ib_dev->refcount)) 1422 goto out; 1423 1424 disable_device(ib_dev); 1425 1426 /* Expedite removing unregistered pointers from the hash table */ 1427 free_netdevs(ib_dev); 1428 1429 ib_device_unregister_sysfs(ib_dev); 1430 device_del(&ib_dev->dev); 1431 ib_device_unregister_rdmacg(ib_dev); 1432 ib_cache_cleanup_one(ib_dev); 1433 1434 /* 1435 * Drivers using the new flow may not call ib_dealloc_device except 1436 * in error unwind prior to registration success. 1437 */ 1438 if (ib_dev->ops.dealloc_driver) { 1439 WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); 1440 ib_dealloc_device(ib_dev); 1441 } 1442 out: 1443 mutex_unlock(&ib_dev->unregistration_lock); 1444 } 1445 1446 /** 1447 * ib_unregister_device - Unregister an IB device 1448 * @device: The device to unregister 1449 * 1450 * Unregister an IB device. All clients will receive a remove callback. 1451 * 1452 * Callers should call this routine only once, and protect against races with 1453 * registration. Typically it should only be called as part of a remove 1454 * callback in an implementation of driver core's struct device_driver and 1455 * related. 1456 * 1457 * If ops.dealloc_driver is used then ib_dev will be freed upon return from 1458 * this function. 1459 */ 1460 void ib_unregister_device(struct ib_device *ib_dev) 1461 { 1462 get_device(&ib_dev->dev); 1463 __ib_unregister_device(ib_dev); 1464 put_device(&ib_dev->dev); 1465 } 1466 EXPORT_SYMBOL(ib_unregister_device); 1467 1468 /** 1469 * ib_unregister_device_and_put - Unregister a device while holding a 'get' 1470 * device: The device to unregister 1471 * 1472 * This is the same as ib_unregister_device(), except it includes an internal 1473 * ib_device_put() that should match a 'get' obtained by the caller. 1474 * 1475 * It is safe to call this routine concurrently from multiple threads while 1476 * holding the 'get'. When the function returns the device is fully 1477 * unregistered. 1478 * 1479 * Drivers using this flow MUST use the driver_unregister callback to clean up 1480 * their resources associated with the device and dealloc it. 1481 */ 1482 void ib_unregister_device_and_put(struct ib_device *ib_dev) 1483 { 1484 WARN_ON(!ib_dev->ops.dealloc_driver); 1485 get_device(&ib_dev->dev); 1486 ib_device_put(ib_dev); 1487 __ib_unregister_device(ib_dev); 1488 put_device(&ib_dev->dev); 1489 } 1490 EXPORT_SYMBOL(ib_unregister_device_and_put); 1491 1492 /** 1493 * ib_unregister_driver - Unregister all IB devices for a driver 1494 * @driver_id: The driver to unregister 1495 * 1496 * This implements a fence for device unregistration. It only returns once all 1497 * devices associated with the driver_id have fully completed their 1498 * unregistration and returned from ib_unregister_device*(). 1499 * 1500 * If device's are not yet unregistered it goes ahead and starts unregistering 1501 * them. 1502 * 1503 * This does not block creation of new devices with the given driver_id, that 1504 * is the responsibility of the caller. 1505 */ 1506 void ib_unregister_driver(enum rdma_driver_id driver_id) 1507 { 1508 struct ib_device *ib_dev; 1509 unsigned long index; 1510 1511 down_read(&devices_rwsem); 1512 xa_for_each (&devices, index, ib_dev) { 1513 if (ib_dev->ops.driver_id != driver_id) 1514 continue; 1515 1516 get_device(&ib_dev->dev); 1517 up_read(&devices_rwsem); 1518 1519 WARN_ON(!ib_dev->ops.dealloc_driver); 1520 __ib_unregister_device(ib_dev); 1521 1522 put_device(&ib_dev->dev); 1523 down_read(&devices_rwsem); 1524 } 1525 up_read(&devices_rwsem); 1526 } 1527 EXPORT_SYMBOL(ib_unregister_driver); 1528 1529 static void ib_unregister_work(struct work_struct *work) 1530 { 1531 struct ib_device *ib_dev = 1532 container_of(work, struct ib_device, unregistration_work); 1533 1534 __ib_unregister_device(ib_dev); 1535 put_device(&ib_dev->dev); 1536 } 1537 1538 /** 1539 * ib_unregister_device_queued - Unregister a device using a work queue 1540 * device: The device to unregister 1541 * 1542 * This schedules an asynchronous unregistration using a WQ for the device. A 1543 * driver should use this to avoid holding locks while doing unregistration, 1544 * such as holding the RTNL lock. 1545 * 1546 * Drivers using this API must use ib_unregister_driver before module unload 1547 * to ensure that all scheduled unregistrations have completed. 1548 */ 1549 void ib_unregister_device_queued(struct ib_device *ib_dev) 1550 { 1551 WARN_ON(!refcount_read(&ib_dev->refcount)); 1552 WARN_ON(!ib_dev->ops.dealloc_driver); 1553 get_device(&ib_dev->dev); 1554 if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) 1555 put_device(&ib_dev->dev); 1556 } 1557 EXPORT_SYMBOL(ib_unregister_device_queued); 1558 1559 /* 1560 * The caller must pass in a device that has the kref held and the refcount 1561 * released. If the device is in cur_net and still registered then it is moved 1562 * into net. 1563 */ 1564 static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, 1565 struct net *net) 1566 { 1567 int ret2 = -EINVAL; 1568 int ret; 1569 1570 mutex_lock(&device->unregistration_lock); 1571 1572 /* 1573 * If a device not under ib_device_get() or if the unregistration_lock 1574 * is not held, the namespace can be changed, or it can be unregistered. 1575 * Check again under the lock. 1576 */ 1577 if (refcount_read(&device->refcount) == 0 || 1578 !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { 1579 ret = -ENODEV; 1580 goto out; 1581 } 1582 1583 kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); 1584 disable_device(device); 1585 1586 /* 1587 * At this point no one can be using the device, so it is safe to 1588 * change the namespace. 1589 */ 1590 write_pnet(&device->coredev.rdma_net, net); 1591 1592 down_read(&devices_rwsem); 1593 /* 1594 * Currently rdma devices are system wide unique. So the device name 1595 * is guaranteed free in the new namespace. Publish the new namespace 1596 * at the sysfs level. 1597 */ 1598 ret = device_rename(&device->dev, dev_name(&device->dev)); 1599 up_read(&devices_rwsem); 1600 if (ret) { 1601 dev_warn(&device->dev, 1602 "%s: Couldn't rename device after namespace change\n", 1603 __func__); 1604 /* Try and put things back and re-enable the device */ 1605 write_pnet(&device->coredev.rdma_net, cur_net); 1606 } 1607 1608 ret2 = enable_device_and_get(device); 1609 if (ret2) { 1610 /* 1611 * This shouldn't really happen, but if it does, let the user 1612 * retry at later point. So don't disable the device. 1613 */ 1614 dev_warn(&device->dev, 1615 "%s: Couldn't re-enable device after namespace change\n", 1616 __func__); 1617 } 1618 kobject_uevent(&device->dev.kobj, KOBJ_ADD); 1619 1620 ib_device_put(device); 1621 out: 1622 mutex_unlock(&device->unregistration_lock); 1623 if (ret) 1624 return ret; 1625 return ret2; 1626 } 1627 1628 int ib_device_set_netns_put(struct sk_buff *skb, 1629 struct ib_device *dev, u32 ns_fd) 1630 { 1631 struct net *net; 1632 int ret; 1633 1634 net = get_net_ns_by_fd(ns_fd); 1635 if (IS_ERR(net)) { 1636 ret = PTR_ERR(net); 1637 goto net_err; 1638 } 1639 1640 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 1641 ret = -EPERM; 1642 goto ns_err; 1643 } 1644 1645 /* 1646 * Currently supported only for those providers which support 1647 * disassociation and don't do port specific sysfs init. Once a 1648 * port_cleanup infrastructure is implemented, this limitation will be 1649 * removed. 1650 */ 1651 if (!dev->ops.disassociate_ucontext || dev->ops.init_port || 1652 ib_devices_shared_netns) { 1653 ret = -EOPNOTSUPP; 1654 goto ns_err; 1655 } 1656 1657 get_device(&dev->dev); 1658 ib_device_put(dev); 1659 ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); 1660 put_device(&dev->dev); 1661 1662 put_net(net); 1663 return ret; 1664 1665 ns_err: 1666 put_net(net); 1667 net_err: 1668 ib_device_put(dev); 1669 return ret; 1670 } 1671 1672 static struct pernet_operations rdma_dev_net_ops = { 1673 .init = rdma_dev_init_net, 1674 .exit = rdma_dev_exit_net, 1675 .id = &rdma_dev_net_id, 1676 .size = sizeof(struct rdma_dev_net), 1677 }; 1678 1679 static int assign_client_id(struct ib_client *client) 1680 { 1681 int ret; 1682 1683 down_write(&clients_rwsem); 1684 /* 1685 * The add/remove callbacks must be called in FIFO/LIFO order. To 1686 * achieve this we assign client_ids so they are sorted in 1687 * registration order. 1688 */ 1689 client->client_id = highest_client_id; 1690 ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); 1691 if (ret) 1692 goto out; 1693 1694 highest_client_id++; 1695 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); 1696 1697 out: 1698 up_write(&clients_rwsem); 1699 return ret; 1700 } 1701 1702 static void remove_client_id(struct ib_client *client) 1703 { 1704 down_write(&clients_rwsem); 1705 xa_erase(&clients, client->client_id); 1706 for (; highest_client_id; highest_client_id--) 1707 if (xa_load(&clients, highest_client_id - 1)) 1708 break; 1709 up_write(&clients_rwsem); 1710 } 1711 1712 /** 1713 * ib_register_client - Register an IB client 1714 * @client:Client to register 1715 * 1716 * Upper level users of the IB drivers can use ib_register_client() to 1717 * register callbacks for IB device addition and removal. When an IB 1718 * device is added, each registered client's add method will be called 1719 * (in the order the clients were registered), and when a device is 1720 * removed, each client's remove method will be called (in the reverse 1721 * order that clients were registered). In addition, when 1722 * ib_register_client() is called, the client will receive an add 1723 * callback for all devices already registered. 1724 */ 1725 int ib_register_client(struct ib_client *client) 1726 { 1727 struct ib_device *device; 1728 unsigned long index; 1729 int ret; 1730 1731 refcount_set(&client->uses, 1); 1732 init_completion(&client->uses_zero); 1733 ret = assign_client_id(client); 1734 if (ret) 1735 return ret; 1736 1737 down_read(&devices_rwsem); 1738 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { 1739 ret = add_client_context(device, client); 1740 if (ret) { 1741 up_read(&devices_rwsem); 1742 ib_unregister_client(client); 1743 return ret; 1744 } 1745 } 1746 up_read(&devices_rwsem); 1747 return 0; 1748 } 1749 EXPORT_SYMBOL(ib_register_client); 1750 1751 /** 1752 * ib_unregister_client - Unregister an IB client 1753 * @client:Client to unregister 1754 * 1755 * Upper level users use ib_unregister_client() to remove their client 1756 * registration. When ib_unregister_client() is called, the client 1757 * will receive a remove callback for each IB device still registered. 1758 * 1759 * This is a full fence, once it returns no client callbacks will be called, 1760 * or are running in another thread. 1761 */ 1762 void ib_unregister_client(struct ib_client *client) 1763 { 1764 struct ib_device *device; 1765 unsigned long index; 1766 1767 down_write(&clients_rwsem); 1768 ib_client_put(client); 1769 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); 1770 up_write(&clients_rwsem); 1771 1772 /* We do not want to have locks while calling client->remove() */ 1773 rcu_read_lock(); 1774 xa_for_each (&devices, index, device) { 1775 if (!ib_device_try_get(device)) 1776 continue; 1777 rcu_read_unlock(); 1778 1779 remove_client_context(device, client->client_id); 1780 1781 ib_device_put(device); 1782 rcu_read_lock(); 1783 } 1784 rcu_read_unlock(); 1785 1786 /* 1787 * remove_client_context() is not a fence, it can return even though a 1788 * removal is ongoing. Wait until all removals are completed. 1789 */ 1790 wait_for_completion(&client->uses_zero); 1791 remove_client_id(client); 1792 } 1793 EXPORT_SYMBOL(ib_unregister_client); 1794 1795 static int __ib_get_global_client_nl_info(const char *client_name, 1796 struct ib_client_nl_info *res) 1797 { 1798 struct ib_client *client; 1799 unsigned long index; 1800 int ret = -ENOENT; 1801 1802 down_read(&clients_rwsem); 1803 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { 1804 if (strcmp(client->name, client_name) != 0) 1805 continue; 1806 if (!client->get_global_nl_info) { 1807 ret = -EOPNOTSUPP; 1808 break; 1809 } 1810 ret = client->get_global_nl_info(res); 1811 if (WARN_ON(ret == -ENOENT)) 1812 ret = -EINVAL; 1813 if (!ret && res->cdev) 1814 get_device(res->cdev); 1815 break; 1816 } 1817 up_read(&clients_rwsem); 1818 return ret; 1819 } 1820 1821 static int __ib_get_client_nl_info(struct ib_device *ibdev, 1822 const char *client_name, 1823 struct ib_client_nl_info *res) 1824 { 1825 unsigned long index; 1826 void *client_data; 1827 int ret = -ENOENT; 1828 1829 down_read(&ibdev->client_data_rwsem); 1830 xan_for_each_marked (&ibdev->client_data, index, client_data, 1831 CLIENT_DATA_REGISTERED) { 1832 struct ib_client *client = xa_load(&clients, index); 1833 1834 if (!client || strcmp(client->name, client_name) != 0) 1835 continue; 1836 if (!client->get_nl_info) { 1837 ret = -EOPNOTSUPP; 1838 break; 1839 } 1840 ret = client->get_nl_info(ibdev, client_data, res); 1841 if (WARN_ON(ret == -ENOENT)) 1842 ret = -EINVAL; 1843 1844 /* 1845 * The cdev is guaranteed valid as long as we are inside the 1846 * client_data_rwsem as remove_one can't be called. Keep it 1847 * valid for the caller. 1848 */ 1849 if (!ret && res->cdev) 1850 get_device(res->cdev); 1851 break; 1852 } 1853 up_read(&ibdev->client_data_rwsem); 1854 1855 return ret; 1856 } 1857 1858 /** 1859 * ib_get_client_nl_info - Fetch the nl_info from a client 1860 * @device - IB device 1861 * @client_name - Name of the client 1862 * @res - Result of the query 1863 */ 1864 int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, 1865 struct ib_client_nl_info *res) 1866 { 1867 int ret; 1868 1869 if (ibdev) 1870 ret = __ib_get_client_nl_info(ibdev, client_name, res); 1871 else 1872 ret = __ib_get_global_client_nl_info(client_name, res); 1873 #ifdef CONFIG_MODULES 1874 if (ret == -ENOENT) { 1875 request_module("rdma-client-%s", client_name); 1876 if (ibdev) 1877 ret = __ib_get_client_nl_info(ibdev, client_name, res); 1878 else 1879 ret = __ib_get_global_client_nl_info(client_name, res); 1880 } 1881 #endif 1882 if (ret) { 1883 if (ret == -ENOENT) 1884 return -EOPNOTSUPP; 1885 return ret; 1886 } 1887 1888 if (WARN_ON(!res->cdev)) 1889 return -EINVAL; 1890 return 0; 1891 } 1892 1893 /** 1894 * ib_set_client_data - Set IB client context 1895 * @device:Device to set context for 1896 * @client:Client to set context for 1897 * @data:Context to set 1898 * 1899 * ib_set_client_data() sets client context data that can be retrieved with 1900 * ib_get_client_data(). This can only be called while the client is 1901 * registered to the device, once the ib_client remove() callback returns this 1902 * cannot be called. 1903 */ 1904 void ib_set_client_data(struct ib_device *device, struct ib_client *client, 1905 void *data) 1906 { 1907 void *rc; 1908 1909 if (WARN_ON(IS_ERR(data))) 1910 data = NULL; 1911 1912 rc = xa_store(&device->client_data, client->client_id, data, 1913 GFP_KERNEL); 1914 WARN_ON(xa_is_err(rc)); 1915 } 1916 EXPORT_SYMBOL(ib_set_client_data); 1917 1918 /** 1919 * ib_register_event_handler - Register an IB event handler 1920 * @event_handler:Handler to register 1921 * 1922 * ib_register_event_handler() registers an event handler that will be 1923 * called back when asynchronous IB events occur (as defined in 1924 * chapter 11 of the InfiniBand Architecture Specification). This 1925 * callback may occur in interrupt context. 1926 */ 1927 void ib_register_event_handler(struct ib_event_handler *event_handler) 1928 { 1929 unsigned long flags; 1930 1931 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); 1932 list_add_tail(&event_handler->list, 1933 &event_handler->device->event_handler_list); 1934 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); 1935 } 1936 EXPORT_SYMBOL(ib_register_event_handler); 1937 1938 /** 1939 * ib_unregister_event_handler - Unregister an event handler 1940 * @event_handler:Handler to unregister 1941 * 1942 * Unregister an event handler registered with 1943 * ib_register_event_handler(). 1944 */ 1945 void ib_unregister_event_handler(struct ib_event_handler *event_handler) 1946 { 1947 unsigned long flags; 1948 1949 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); 1950 list_del(&event_handler->list); 1951 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); 1952 } 1953 EXPORT_SYMBOL(ib_unregister_event_handler); 1954 1955 /** 1956 * ib_dispatch_event - Dispatch an asynchronous event 1957 * @event:Event to dispatch 1958 * 1959 * Low-level drivers must call ib_dispatch_event() to dispatch the 1960 * event to all registered event handlers when an asynchronous event 1961 * occurs. 1962 */ 1963 void ib_dispatch_event(struct ib_event *event) 1964 { 1965 unsigned long flags; 1966 struct ib_event_handler *handler; 1967 1968 spin_lock_irqsave(&event->device->event_handler_lock, flags); 1969 1970 list_for_each_entry(handler, &event->device->event_handler_list, list) 1971 handler->handler(handler, event); 1972 1973 spin_unlock_irqrestore(&event->device->event_handler_lock, flags); 1974 } 1975 EXPORT_SYMBOL(ib_dispatch_event); 1976 1977 /** 1978 * ib_query_port - Query IB port attributes 1979 * @device:Device to query 1980 * @port_num:Port number to query 1981 * @port_attr:Port attributes 1982 * 1983 * ib_query_port() returns the attributes of a port through the 1984 * @port_attr pointer. 1985 */ 1986 int ib_query_port(struct ib_device *device, 1987 u8 port_num, 1988 struct ib_port_attr *port_attr) 1989 { 1990 union ib_gid gid; 1991 int err; 1992 1993 if (!rdma_is_port_valid(device, port_num)) 1994 return -EINVAL; 1995 1996 memset(port_attr, 0, sizeof(*port_attr)); 1997 err = device->ops.query_port(device, port_num, port_attr); 1998 if (err || port_attr->subnet_prefix) 1999 return err; 2000 2001 if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) 2002 return 0; 2003 2004 err = device->ops.query_gid(device, port_num, 0, &gid); 2005 if (err) 2006 return err; 2007 2008 port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); 2009 return 0; 2010 } 2011 EXPORT_SYMBOL(ib_query_port); 2012 2013 static void add_ndev_hash(struct ib_port_data *pdata) 2014 { 2015 unsigned long flags; 2016 2017 might_sleep(); 2018 2019 spin_lock_irqsave(&ndev_hash_lock, flags); 2020 if (hash_hashed(&pdata->ndev_hash_link)) { 2021 hash_del_rcu(&pdata->ndev_hash_link); 2022 spin_unlock_irqrestore(&ndev_hash_lock, flags); 2023 /* 2024 * We cannot do hash_add_rcu after a hash_del_rcu until the 2025 * grace period 2026 */ 2027 synchronize_rcu(); 2028 spin_lock_irqsave(&ndev_hash_lock, flags); 2029 } 2030 if (pdata->netdev) 2031 hash_add_rcu(ndev_hash, &pdata->ndev_hash_link, 2032 (uintptr_t)pdata->netdev); 2033 spin_unlock_irqrestore(&ndev_hash_lock, flags); 2034 } 2035 2036 /** 2037 * ib_device_set_netdev - Associate the ib_dev with an underlying net_device 2038 * @ib_dev: Device to modify 2039 * @ndev: net_device to affiliate, may be NULL 2040 * @port: IB port the net_device is connected to 2041 * 2042 * Drivers should use this to link the ib_device to a netdev so the netdev 2043 * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be 2044 * affiliated with any port. 2045 * 2046 * The caller must ensure that the given ndev is not unregistered or 2047 * unregistering, and that either the ib_device is unregistered or 2048 * ib_device_set_netdev() is called with NULL when the ndev sends a 2049 * NETDEV_UNREGISTER event. 2050 */ 2051 int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, 2052 unsigned int port) 2053 { 2054 struct net_device *old_ndev; 2055 struct ib_port_data *pdata; 2056 unsigned long flags; 2057 int ret; 2058 2059 /* 2060 * Drivers wish to call this before ib_register_driver, so we have to 2061 * setup the port data early. 2062 */ 2063 ret = alloc_port_data(ib_dev); 2064 if (ret) 2065 return ret; 2066 2067 if (!rdma_is_port_valid(ib_dev, port)) 2068 return -EINVAL; 2069 2070 pdata = &ib_dev->port_data[port]; 2071 spin_lock_irqsave(&pdata->netdev_lock, flags); 2072 old_ndev = rcu_dereference_protected( 2073 pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 2074 if (old_ndev == ndev) { 2075 spin_unlock_irqrestore(&pdata->netdev_lock, flags); 2076 return 0; 2077 } 2078 2079 if (ndev) 2080 dev_hold(ndev); 2081 rcu_assign_pointer(pdata->netdev, ndev); 2082 spin_unlock_irqrestore(&pdata->netdev_lock, flags); 2083 2084 add_ndev_hash(pdata); 2085 if (old_ndev) 2086 dev_put(old_ndev); 2087 2088 return 0; 2089 } 2090 EXPORT_SYMBOL(ib_device_set_netdev); 2091 2092 static void free_netdevs(struct ib_device *ib_dev) 2093 { 2094 unsigned long flags; 2095 unsigned int port; 2096 2097 if (!ib_dev->port_data) 2098 return; 2099 2100 rdma_for_each_port (ib_dev, port) { 2101 struct ib_port_data *pdata = &ib_dev->port_data[port]; 2102 struct net_device *ndev; 2103 2104 spin_lock_irqsave(&pdata->netdev_lock, flags); 2105 ndev = rcu_dereference_protected( 2106 pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 2107 if (ndev) { 2108 spin_lock(&ndev_hash_lock); 2109 hash_del_rcu(&pdata->ndev_hash_link); 2110 spin_unlock(&ndev_hash_lock); 2111 2112 /* 2113 * If this is the last dev_put there is still a 2114 * synchronize_rcu before the netdev is kfreed, so we 2115 * can continue to rely on unlocked pointer 2116 * comparisons after the put 2117 */ 2118 rcu_assign_pointer(pdata->netdev, NULL); 2119 dev_put(ndev); 2120 } 2121 spin_unlock_irqrestore(&pdata->netdev_lock, flags); 2122 } 2123 } 2124 2125 struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, 2126 unsigned int port) 2127 { 2128 struct ib_port_data *pdata; 2129 struct net_device *res; 2130 2131 if (!rdma_is_port_valid(ib_dev, port)) 2132 return NULL; 2133 2134 pdata = &ib_dev->port_data[port]; 2135 2136 /* 2137 * New drivers should use ib_device_set_netdev() not the legacy 2138 * get_netdev(). 2139 */ 2140 if (ib_dev->ops.get_netdev) 2141 res = ib_dev->ops.get_netdev(ib_dev, port); 2142 else { 2143 spin_lock(&pdata->netdev_lock); 2144 res = rcu_dereference_protected( 2145 pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); 2146 if (res) 2147 dev_hold(res); 2148 spin_unlock(&pdata->netdev_lock); 2149 } 2150 2151 /* 2152 * If we are starting to unregister expedite things by preventing 2153 * propagation of an unregistering netdev. 2154 */ 2155 if (res && res->reg_state != NETREG_REGISTERED) { 2156 dev_put(res); 2157 return NULL; 2158 } 2159 2160 return res; 2161 } 2162 2163 /** 2164 * ib_device_get_by_netdev - Find an IB device associated with a netdev 2165 * @ndev: netdev to locate 2166 * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) 2167 * 2168 * Find and hold an ib_device that is associated with a netdev via 2169 * ib_device_set_netdev(). The caller must call ib_device_put() on the 2170 * returned pointer. 2171 */ 2172 struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, 2173 enum rdma_driver_id driver_id) 2174 { 2175 struct ib_device *res = NULL; 2176 struct ib_port_data *cur; 2177 2178 rcu_read_lock(); 2179 hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link, 2180 (uintptr_t)ndev) { 2181 if (rcu_access_pointer(cur->netdev) == ndev && 2182 (driver_id == RDMA_DRIVER_UNKNOWN || 2183 cur->ib_dev->ops.driver_id == driver_id) && 2184 ib_device_try_get(cur->ib_dev)) { 2185 res = cur->ib_dev; 2186 break; 2187 } 2188 } 2189 rcu_read_unlock(); 2190 2191 return res; 2192 } 2193 EXPORT_SYMBOL(ib_device_get_by_netdev); 2194 2195 /** 2196 * ib_enum_roce_netdev - enumerate all RoCE ports 2197 * @ib_dev : IB device we want to query 2198 * @filter: Should we call the callback? 2199 * @filter_cookie: Cookie passed to filter 2200 * @cb: Callback to call for each found RoCE ports 2201 * @cookie: Cookie passed back to the callback 2202 * 2203 * Enumerates all of the physical RoCE ports of ib_dev 2204 * which are related to netdevice and calls callback() on each 2205 * device for which filter() function returns non zero. 2206 */ 2207 void ib_enum_roce_netdev(struct ib_device *ib_dev, 2208 roce_netdev_filter filter, 2209 void *filter_cookie, 2210 roce_netdev_callback cb, 2211 void *cookie) 2212 { 2213 unsigned int port; 2214 2215 rdma_for_each_port (ib_dev, port) 2216 if (rdma_protocol_roce(ib_dev, port)) { 2217 struct net_device *idev = 2218 ib_device_get_netdev(ib_dev, port); 2219 2220 if (filter(ib_dev, port, idev, filter_cookie)) 2221 cb(ib_dev, port, idev, cookie); 2222 2223 if (idev) 2224 dev_put(idev); 2225 } 2226 } 2227 2228 /** 2229 * ib_enum_all_roce_netdevs - enumerate all RoCE devices 2230 * @filter: Should we call the callback? 2231 * @filter_cookie: Cookie passed to filter 2232 * @cb: Callback to call for each found RoCE ports 2233 * @cookie: Cookie passed back to the callback 2234 * 2235 * Enumerates all RoCE devices' physical ports which are related 2236 * to netdevices and calls callback() on each device for which 2237 * filter() function returns non zero. 2238 */ 2239 void ib_enum_all_roce_netdevs(roce_netdev_filter filter, 2240 void *filter_cookie, 2241 roce_netdev_callback cb, 2242 void *cookie) 2243 { 2244 struct ib_device *dev; 2245 unsigned long index; 2246 2247 down_read(&devices_rwsem); 2248 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) 2249 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); 2250 up_read(&devices_rwsem); 2251 } 2252 2253 /** 2254 * ib_enum_all_devs - enumerate all ib_devices 2255 * @cb: Callback to call for each found ib_device 2256 * 2257 * Enumerates all ib_devices and calls callback() on each device. 2258 */ 2259 int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, 2260 struct netlink_callback *cb) 2261 { 2262 unsigned long index; 2263 struct ib_device *dev; 2264 unsigned int idx = 0; 2265 int ret = 0; 2266 2267 down_read(&devices_rwsem); 2268 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 2269 if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) 2270 continue; 2271 2272 ret = nldev_cb(dev, skb, cb, idx); 2273 if (ret) 2274 break; 2275 idx++; 2276 } 2277 up_read(&devices_rwsem); 2278 return ret; 2279 } 2280 2281 /** 2282 * ib_query_pkey - Get P_Key table entry 2283 * @device:Device to query 2284 * @port_num:Port number to query 2285 * @index:P_Key table index to query 2286 * @pkey:Returned P_Key 2287 * 2288 * ib_query_pkey() fetches the specified P_Key table entry. 2289 */ 2290 int ib_query_pkey(struct ib_device *device, 2291 u8 port_num, u16 index, u16 *pkey) 2292 { 2293 if (!rdma_is_port_valid(device, port_num)) 2294 return -EINVAL; 2295 2296 return device->ops.query_pkey(device, port_num, index, pkey); 2297 } 2298 EXPORT_SYMBOL(ib_query_pkey); 2299 2300 /** 2301 * ib_modify_device - Change IB device attributes 2302 * @device:Device to modify 2303 * @device_modify_mask:Mask of attributes to change 2304 * @device_modify:New attribute values 2305 * 2306 * ib_modify_device() changes a device's attributes as specified by 2307 * the @device_modify_mask and @device_modify structure. 2308 */ 2309 int ib_modify_device(struct ib_device *device, 2310 int device_modify_mask, 2311 struct ib_device_modify *device_modify) 2312 { 2313 if (!device->ops.modify_device) 2314 return -ENOSYS; 2315 2316 return device->ops.modify_device(device, device_modify_mask, 2317 device_modify); 2318 } 2319 EXPORT_SYMBOL(ib_modify_device); 2320 2321 /** 2322 * ib_modify_port - Modifies the attributes for the specified port. 2323 * @device: The device to modify. 2324 * @port_num: The number of the port to modify. 2325 * @port_modify_mask: Mask used to specify which attributes of the port 2326 * to change. 2327 * @port_modify: New attribute values for the port. 2328 * 2329 * ib_modify_port() changes a port's attributes as specified by the 2330 * @port_modify_mask and @port_modify structure. 2331 */ 2332 int ib_modify_port(struct ib_device *device, 2333 u8 port_num, int port_modify_mask, 2334 struct ib_port_modify *port_modify) 2335 { 2336 int rc; 2337 2338 if (!rdma_is_port_valid(device, port_num)) 2339 return -EINVAL; 2340 2341 if (device->ops.modify_port) 2342 rc = device->ops.modify_port(device, port_num, 2343 port_modify_mask, 2344 port_modify); 2345 else 2346 rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; 2347 return rc; 2348 } 2349 EXPORT_SYMBOL(ib_modify_port); 2350 2351 /** 2352 * ib_find_gid - Returns the port number and GID table index where 2353 * a specified GID value occurs. Its searches only for IB link layer. 2354 * @device: The device to query. 2355 * @gid: The GID value to search for. 2356 * @port_num: The port number of the device where the GID value was found. 2357 * @index: The index into the GID table where the GID was found. This 2358 * parameter may be NULL. 2359 */ 2360 int ib_find_gid(struct ib_device *device, union ib_gid *gid, 2361 u8 *port_num, u16 *index) 2362 { 2363 union ib_gid tmp_gid; 2364 unsigned int port; 2365 int ret, i; 2366 2367 rdma_for_each_port (device, port) { 2368 if (!rdma_protocol_ib(device, port)) 2369 continue; 2370 2371 for (i = 0; i < device->port_data[port].immutable.gid_tbl_len; 2372 ++i) { 2373 ret = rdma_query_gid(device, port, i, &tmp_gid); 2374 if (ret) 2375 return ret; 2376 if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 2377 *port_num = port; 2378 if (index) 2379 *index = i; 2380 return 0; 2381 } 2382 } 2383 } 2384 2385 return -ENOENT; 2386 } 2387 EXPORT_SYMBOL(ib_find_gid); 2388 2389 /** 2390 * ib_find_pkey - Returns the PKey table index where a specified 2391 * PKey value occurs. 2392 * @device: The device to query. 2393 * @port_num: The port number of the device to search for the PKey. 2394 * @pkey: The PKey value to search for. 2395 * @index: The index into the PKey table where the PKey was found. 2396 */ 2397 int ib_find_pkey(struct ib_device *device, 2398 u8 port_num, u16 pkey, u16 *index) 2399 { 2400 int ret, i; 2401 u16 tmp_pkey; 2402 int partial_ix = -1; 2403 2404 for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len; 2405 ++i) { 2406 ret = ib_query_pkey(device, port_num, i, &tmp_pkey); 2407 if (ret) 2408 return ret; 2409 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { 2410 /* if there is full-member pkey take it.*/ 2411 if (tmp_pkey & 0x8000) { 2412 *index = i; 2413 return 0; 2414 } 2415 if (partial_ix < 0) 2416 partial_ix = i; 2417 } 2418 } 2419 2420 /*no full-member, if exists take the limited*/ 2421 if (partial_ix >= 0) { 2422 *index = partial_ix; 2423 return 0; 2424 } 2425 return -ENOENT; 2426 } 2427 EXPORT_SYMBOL(ib_find_pkey); 2428 2429 /** 2430 * ib_get_net_dev_by_params() - Return the appropriate net_dev 2431 * for a received CM request 2432 * @dev: An RDMA device on which the request has been received. 2433 * @port: Port number on the RDMA device. 2434 * @pkey: The Pkey the request came on. 2435 * @gid: A GID that the net_dev uses to communicate. 2436 * @addr: Contains the IP address that the request specified as its 2437 * destination. 2438 * 2439 */ 2440 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, 2441 u8 port, 2442 u16 pkey, 2443 const union ib_gid *gid, 2444 const struct sockaddr *addr) 2445 { 2446 struct net_device *net_dev = NULL; 2447 unsigned long index; 2448 void *client_data; 2449 2450 if (!rdma_protocol_ib(dev, port)) 2451 return NULL; 2452 2453 /* 2454 * Holding the read side guarantees that the client will not become 2455 * unregistered while we are calling get_net_dev_by_params() 2456 */ 2457 down_read(&dev->client_data_rwsem); 2458 xan_for_each_marked (&dev->client_data, index, client_data, 2459 CLIENT_DATA_REGISTERED) { 2460 struct ib_client *client = xa_load(&clients, index); 2461 2462 if (!client || !client->get_net_dev_by_params) 2463 continue; 2464 2465 net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, 2466 addr, client_data); 2467 if (net_dev) 2468 break; 2469 } 2470 up_read(&dev->client_data_rwsem); 2471 2472 return net_dev; 2473 } 2474 EXPORT_SYMBOL(ib_get_net_dev_by_params); 2475 2476 void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) 2477 { 2478 struct ib_device_ops *dev_ops = &dev->ops; 2479 #define SET_DEVICE_OP(ptr, name) \ 2480 do { \ 2481 if (ops->name) \ 2482 if (!((ptr)->name)) \ 2483 (ptr)->name = ops->name; \ 2484 } while (0) 2485 2486 #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) 2487 2488 if (ops->driver_id != RDMA_DRIVER_UNKNOWN) { 2489 WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN && 2490 dev_ops->driver_id != ops->driver_id); 2491 dev_ops->driver_id = ops->driver_id; 2492 } 2493 if (ops->owner) { 2494 WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner); 2495 dev_ops->owner = ops->owner; 2496 } 2497 if (ops->uverbs_abi_ver) 2498 dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver; 2499 2500 dev_ops->uverbs_no_driver_id_binding |= 2501 ops->uverbs_no_driver_id_binding; 2502 2503 SET_DEVICE_OP(dev_ops, add_gid); 2504 SET_DEVICE_OP(dev_ops, advise_mr); 2505 SET_DEVICE_OP(dev_ops, alloc_dm); 2506 SET_DEVICE_OP(dev_ops, alloc_fmr); 2507 SET_DEVICE_OP(dev_ops, alloc_hw_stats); 2508 SET_DEVICE_OP(dev_ops, alloc_mr); 2509 SET_DEVICE_OP(dev_ops, alloc_mr_integrity); 2510 SET_DEVICE_OP(dev_ops, alloc_mw); 2511 SET_DEVICE_OP(dev_ops, alloc_pd); 2512 SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); 2513 SET_DEVICE_OP(dev_ops, alloc_ucontext); 2514 SET_DEVICE_OP(dev_ops, alloc_xrcd); 2515 SET_DEVICE_OP(dev_ops, attach_mcast); 2516 SET_DEVICE_OP(dev_ops, check_mr_status); 2517 SET_DEVICE_OP(dev_ops, counter_alloc_stats); 2518 SET_DEVICE_OP(dev_ops, counter_bind_qp); 2519 SET_DEVICE_OP(dev_ops, counter_dealloc); 2520 SET_DEVICE_OP(dev_ops, counter_unbind_qp); 2521 SET_DEVICE_OP(dev_ops, counter_update_stats); 2522 SET_DEVICE_OP(dev_ops, create_ah); 2523 SET_DEVICE_OP(dev_ops, create_counters); 2524 SET_DEVICE_OP(dev_ops, create_cq); 2525 SET_DEVICE_OP(dev_ops, create_flow); 2526 SET_DEVICE_OP(dev_ops, create_flow_action_esp); 2527 SET_DEVICE_OP(dev_ops, create_qp); 2528 SET_DEVICE_OP(dev_ops, create_rwq_ind_table); 2529 SET_DEVICE_OP(dev_ops, create_srq); 2530 SET_DEVICE_OP(dev_ops, create_wq); 2531 SET_DEVICE_OP(dev_ops, dealloc_dm); 2532 SET_DEVICE_OP(dev_ops, dealloc_driver); 2533 SET_DEVICE_OP(dev_ops, dealloc_fmr); 2534 SET_DEVICE_OP(dev_ops, dealloc_mw); 2535 SET_DEVICE_OP(dev_ops, dealloc_pd); 2536 SET_DEVICE_OP(dev_ops, dealloc_ucontext); 2537 SET_DEVICE_OP(dev_ops, dealloc_xrcd); 2538 SET_DEVICE_OP(dev_ops, del_gid); 2539 SET_DEVICE_OP(dev_ops, dereg_mr); 2540 SET_DEVICE_OP(dev_ops, destroy_ah); 2541 SET_DEVICE_OP(dev_ops, destroy_counters); 2542 SET_DEVICE_OP(dev_ops, destroy_cq); 2543 SET_DEVICE_OP(dev_ops, destroy_flow); 2544 SET_DEVICE_OP(dev_ops, destroy_flow_action); 2545 SET_DEVICE_OP(dev_ops, destroy_qp); 2546 SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); 2547 SET_DEVICE_OP(dev_ops, destroy_srq); 2548 SET_DEVICE_OP(dev_ops, destroy_wq); 2549 SET_DEVICE_OP(dev_ops, detach_mcast); 2550 SET_DEVICE_OP(dev_ops, disassociate_ucontext); 2551 SET_DEVICE_OP(dev_ops, drain_rq); 2552 SET_DEVICE_OP(dev_ops, drain_sq); 2553 SET_DEVICE_OP(dev_ops, enable_driver); 2554 SET_DEVICE_OP(dev_ops, fill_res_entry); 2555 SET_DEVICE_OP(dev_ops, get_dev_fw_str); 2556 SET_DEVICE_OP(dev_ops, get_dma_mr); 2557 SET_DEVICE_OP(dev_ops, get_hw_stats); 2558 SET_DEVICE_OP(dev_ops, get_link_layer); 2559 SET_DEVICE_OP(dev_ops, get_netdev); 2560 SET_DEVICE_OP(dev_ops, get_port_immutable); 2561 SET_DEVICE_OP(dev_ops, get_vector_affinity); 2562 SET_DEVICE_OP(dev_ops, get_vf_config); 2563 SET_DEVICE_OP(dev_ops, get_vf_stats); 2564 SET_DEVICE_OP(dev_ops, init_port); 2565 SET_DEVICE_OP(dev_ops, iw_accept); 2566 SET_DEVICE_OP(dev_ops, iw_add_ref); 2567 SET_DEVICE_OP(dev_ops, iw_connect); 2568 SET_DEVICE_OP(dev_ops, iw_create_listen); 2569 SET_DEVICE_OP(dev_ops, iw_destroy_listen); 2570 SET_DEVICE_OP(dev_ops, iw_get_qp); 2571 SET_DEVICE_OP(dev_ops, iw_reject); 2572 SET_DEVICE_OP(dev_ops, iw_rem_ref); 2573 SET_DEVICE_OP(dev_ops, map_mr_sg); 2574 SET_DEVICE_OP(dev_ops, map_mr_sg_pi); 2575 SET_DEVICE_OP(dev_ops, map_phys_fmr); 2576 SET_DEVICE_OP(dev_ops, mmap); 2577 SET_DEVICE_OP(dev_ops, modify_ah); 2578 SET_DEVICE_OP(dev_ops, modify_cq); 2579 SET_DEVICE_OP(dev_ops, modify_device); 2580 SET_DEVICE_OP(dev_ops, modify_flow_action_esp); 2581 SET_DEVICE_OP(dev_ops, modify_port); 2582 SET_DEVICE_OP(dev_ops, modify_qp); 2583 SET_DEVICE_OP(dev_ops, modify_srq); 2584 SET_DEVICE_OP(dev_ops, modify_wq); 2585 SET_DEVICE_OP(dev_ops, peek_cq); 2586 SET_DEVICE_OP(dev_ops, poll_cq); 2587 SET_DEVICE_OP(dev_ops, post_recv); 2588 SET_DEVICE_OP(dev_ops, post_send); 2589 SET_DEVICE_OP(dev_ops, post_srq_recv); 2590 SET_DEVICE_OP(dev_ops, process_mad); 2591 SET_DEVICE_OP(dev_ops, query_ah); 2592 SET_DEVICE_OP(dev_ops, query_device); 2593 SET_DEVICE_OP(dev_ops, query_gid); 2594 SET_DEVICE_OP(dev_ops, query_pkey); 2595 SET_DEVICE_OP(dev_ops, query_port); 2596 SET_DEVICE_OP(dev_ops, query_qp); 2597 SET_DEVICE_OP(dev_ops, query_srq); 2598 SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); 2599 SET_DEVICE_OP(dev_ops, read_counters); 2600 SET_DEVICE_OP(dev_ops, reg_dm_mr); 2601 SET_DEVICE_OP(dev_ops, reg_user_mr); 2602 SET_DEVICE_OP(dev_ops, req_ncomp_notif); 2603 SET_DEVICE_OP(dev_ops, req_notify_cq); 2604 SET_DEVICE_OP(dev_ops, rereg_user_mr); 2605 SET_DEVICE_OP(dev_ops, resize_cq); 2606 SET_DEVICE_OP(dev_ops, set_vf_guid); 2607 SET_DEVICE_OP(dev_ops, set_vf_link_state); 2608 SET_DEVICE_OP(dev_ops, unmap_fmr); 2609 2610 SET_OBJ_SIZE(dev_ops, ib_ah); 2611 SET_OBJ_SIZE(dev_ops, ib_cq); 2612 SET_OBJ_SIZE(dev_ops, ib_pd); 2613 SET_OBJ_SIZE(dev_ops, ib_srq); 2614 SET_OBJ_SIZE(dev_ops, ib_ucontext); 2615 } 2616 EXPORT_SYMBOL(ib_set_device_ops); 2617 2618 static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { 2619 [RDMA_NL_LS_OP_RESOLVE] = { 2620 .doit = ib_nl_handle_resolve_resp, 2621 .flags = RDMA_NL_ADMIN_PERM, 2622 }, 2623 [RDMA_NL_LS_OP_SET_TIMEOUT] = { 2624 .doit = ib_nl_handle_set_timeout, 2625 .flags = RDMA_NL_ADMIN_PERM, 2626 }, 2627 [RDMA_NL_LS_OP_IP_RESOLVE] = { 2628 .doit = ib_nl_handle_ip_res_resp, 2629 .flags = RDMA_NL_ADMIN_PERM, 2630 }, 2631 }; 2632 2633 static int __init ib_core_init(void) 2634 { 2635 int ret; 2636 2637 ib_wq = alloc_workqueue("infiniband", 0, 0); 2638 if (!ib_wq) 2639 return -ENOMEM; 2640 2641 ib_comp_wq = alloc_workqueue("ib-comp-wq", 2642 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 2643 if (!ib_comp_wq) { 2644 ret = -ENOMEM; 2645 goto err; 2646 } 2647 2648 ib_comp_unbound_wq = 2649 alloc_workqueue("ib-comp-unb-wq", 2650 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | 2651 WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); 2652 if (!ib_comp_unbound_wq) { 2653 ret = -ENOMEM; 2654 goto err_comp; 2655 } 2656 2657 ret = class_register(&ib_class); 2658 if (ret) { 2659 pr_warn("Couldn't create InfiniBand device class\n"); 2660 goto err_comp_unbound; 2661 } 2662 2663 ret = rdma_nl_init(); 2664 if (ret) { 2665 pr_warn("Couldn't init IB netlink interface: err %d\n", ret); 2666 goto err_sysfs; 2667 } 2668 2669 ret = addr_init(); 2670 if (ret) { 2671 pr_warn("Could't init IB address resolution\n"); 2672 goto err_ibnl; 2673 } 2674 2675 ret = ib_mad_init(); 2676 if (ret) { 2677 pr_warn("Couldn't init IB MAD\n"); 2678 goto err_addr; 2679 } 2680 2681 ret = ib_sa_init(); 2682 if (ret) { 2683 pr_warn("Couldn't init SA\n"); 2684 goto err_mad; 2685 } 2686 2687 ret = register_blocking_lsm_notifier(&ibdev_lsm_nb); 2688 if (ret) { 2689 pr_warn("Couldn't register LSM notifier. ret %d\n", ret); 2690 goto err_sa; 2691 } 2692 2693 ret = register_pernet_device(&rdma_dev_net_ops); 2694 if (ret) { 2695 pr_warn("Couldn't init compat dev. ret %d\n", ret); 2696 goto err_compat; 2697 } 2698 2699 nldev_init(); 2700 rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); 2701 roce_gid_mgmt_init(); 2702 2703 return 0; 2704 2705 err_compat: 2706 unregister_blocking_lsm_notifier(&ibdev_lsm_nb); 2707 err_sa: 2708 ib_sa_cleanup(); 2709 err_mad: 2710 ib_mad_cleanup(); 2711 err_addr: 2712 addr_cleanup(); 2713 err_ibnl: 2714 rdma_nl_exit(); 2715 err_sysfs: 2716 class_unregister(&ib_class); 2717 err_comp_unbound: 2718 destroy_workqueue(ib_comp_unbound_wq); 2719 err_comp: 2720 destroy_workqueue(ib_comp_wq); 2721 err: 2722 destroy_workqueue(ib_wq); 2723 return ret; 2724 } 2725 2726 static void __exit ib_core_cleanup(void) 2727 { 2728 roce_gid_mgmt_cleanup(); 2729 nldev_exit(); 2730 rdma_nl_unregister(RDMA_NL_LS); 2731 unregister_pernet_device(&rdma_dev_net_ops); 2732 unregister_blocking_lsm_notifier(&ibdev_lsm_nb); 2733 ib_sa_cleanup(); 2734 ib_mad_cleanup(); 2735 addr_cleanup(); 2736 rdma_nl_exit(); 2737 class_unregister(&ib_class); 2738 destroy_workqueue(ib_comp_unbound_wq); 2739 destroy_workqueue(ib_comp_wq); 2740 /* Make sure that any pending umem accounting work is done. */ 2741 destroy_workqueue(ib_wq); 2742 flush_workqueue(system_unbound_wq); 2743 WARN_ON(!xa_empty(&clients)); 2744 WARN_ON(!xa_empty(&devices)); 2745 } 2746 2747 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); 2748 2749 /* ib core relies on netdev stack to first register net_ns_type_operations 2750 * ns kobject type before ib_core initialization. 2751 */ 2752 fs_initcall(ib_core_init); 2753 module_exit(ib_core_cleanup); 2754