1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/completion.h> 37 #include <linux/in.h> 38 #include <linux/in6.h> 39 #include <linux/mutex.h> 40 #include <linux/random.h> 41 #include <linux/igmp.h> 42 #include <linux/idr.h> 43 #include <linux/inetdevice.h> 44 #include <linux/slab.h> 45 #include <linux/module.h> 46 #include <net/route.h> 47 48 #include <net/net_namespace.h> 49 #include <net/netns/generic.h> 50 #include <net/tcp.h> 51 #include <net/ipv6.h> 52 #include <net/ip_fib.h> 53 #include <net/ip6_route.h> 54 55 #include <rdma/rdma_cm.h> 56 #include <rdma/rdma_cm_ib.h> 57 #include <rdma/rdma_netlink.h> 58 #include <rdma/ib.h> 59 #include <rdma/ib_cache.h> 60 #include <rdma/ib_cm.h> 61 #include <rdma/ib_sa.h> 62 #include <rdma/iw_cm.h> 63 64 #include "core_priv.h" 65 66 MODULE_AUTHOR("Sean Hefty"); 67 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 68 MODULE_LICENSE("Dual BSD/GPL"); 69 70 #define CMA_CM_RESPONSE_TIMEOUT 20 71 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 72 #define CMA_MAX_CM_RETRIES 15 73 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 74 #define CMA_IBOE_PACKET_LIFETIME 18 75 76 static const char * const cma_events[] = { 77 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 78 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 79 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 80 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 81 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 82 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 83 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 84 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 85 [RDMA_CM_EVENT_REJECTED] = "rejected", 86 [RDMA_CM_EVENT_ESTABLISHED] = "established", 87 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 88 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 89 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 90 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 91 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 92 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 93 }; 94 95 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 96 { 97 size_t index = event; 98 99 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 100 cma_events[index] : "unrecognized event"; 101 } 102 EXPORT_SYMBOL(rdma_event_msg); 103 104 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 105 int reason) 106 { 107 if (rdma_ib_or_roce(id->device, id->port_num)) 108 return ibcm_reject_msg(reason); 109 110 if (rdma_protocol_iwarp(id->device, id->port_num)) 111 return iwcm_reject_msg(reason); 112 113 WARN_ON_ONCE(1); 114 return "unrecognized transport"; 115 } 116 EXPORT_SYMBOL(rdma_reject_msg); 117 118 bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) 119 { 120 if (rdma_ib_or_roce(id->device, id->port_num)) 121 return reason == IB_CM_REJ_CONSUMER_DEFINED; 122 123 if (rdma_protocol_iwarp(id->device, id->port_num)) 124 return reason == -ECONNREFUSED; 125 126 WARN_ON_ONCE(1); 127 return false; 128 } 129 EXPORT_SYMBOL(rdma_is_consumer_reject); 130 131 const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 132 struct rdma_cm_event *ev, u8 *data_len) 133 { 134 const void *p; 135 136 if (rdma_is_consumer_reject(id, ev->status)) { 137 *data_len = ev->param.conn.private_data_len; 138 p = ev->param.conn.private_data; 139 } else { 140 *data_len = 0; 141 p = NULL; 142 } 143 return p; 144 } 145 EXPORT_SYMBOL(rdma_consumer_reject_data); 146 147 static void cma_add_one(struct ib_device *device); 148 static void cma_remove_one(struct ib_device *device, void *client_data); 149 150 static struct ib_client cma_client = { 151 .name = "cma", 152 .add = cma_add_one, 153 .remove = cma_remove_one 154 }; 155 156 static struct ib_sa_client sa_client; 157 static struct rdma_addr_client addr_client; 158 static LIST_HEAD(dev_list); 159 static LIST_HEAD(listen_any_list); 160 static DEFINE_MUTEX(lock); 161 static struct workqueue_struct *cma_wq; 162 static unsigned int cma_pernet_id; 163 164 struct cma_pernet { 165 struct idr tcp_ps; 166 struct idr udp_ps; 167 struct idr ipoib_ps; 168 struct idr ib_ps; 169 }; 170 171 static struct cma_pernet *cma_pernet(struct net *net) 172 { 173 return net_generic(net, cma_pernet_id); 174 } 175 176 static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) 177 { 178 struct cma_pernet *pernet = cma_pernet(net); 179 180 switch (ps) { 181 case RDMA_PS_TCP: 182 return &pernet->tcp_ps; 183 case RDMA_PS_UDP: 184 return &pernet->udp_ps; 185 case RDMA_PS_IPOIB: 186 return &pernet->ipoib_ps; 187 case RDMA_PS_IB: 188 return &pernet->ib_ps; 189 default: 190 return NULL; 191 } 192 } 193 194 struct cma_device { 195 struct list_head list; 196 struct ib_device *device; 197 struct completion comp; 198 atomic_t refcount; 199 struct list_head id_list; 200 enum ib_gid_type *default_gid_type; 201 u8 *default_roce_tos; 202 }; 203 204 struct rdma_bind_list { 205 enum rdma_port_space ps; 206 struct hlist_head owners; 207 unsigned short port; 208 }; 209 210 struct class_port_info_context { 211 struct ib_class_port_info *class_port_info; 212 struct ib_device *device; 213 struct completion done; 214 struct ib_sa_query *sa_query; 215 u8 port_num; 216 }; 217 218 static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, 219 struct rdma_bind_list *bind_list, int snum) 220 { 221 struct idr *idr = cma_pernet_idr(net, ps); 222 223 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 224 } 225 226 static struct rdma_bind_list *cma_ps_find(struct net *net, 227 enum rdma_port_space ps, int snum) 228 { 229 struct idr *idr = cma_pernet_idr(net, ps); 230 231 return idr_find(idr, snum); 232 } 233 234 static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) 235 { 236 struct idr *idr = cma_pernet_idr(net, ps); 237 238 idr_remove(idr, snum); 239 } 240 241 enum { 242 CMA_OPTION_AFONLY, 243 }; 244 245 void cma_ref_dev(struct cma_device *cma_dev) 246 { 247 atomic_inc(&cma_dev->refcount); 248 } 249 250 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 251 void *cookie) 252 { 253 struct cma_device *cma_dev; 254 struct cma_device *found_cma_dev = NULL; 255 256 mutex_lock(&lock); 257 258 list_for_each_entry(cma_dev, &dev_list, list) 259 if (filter(cma_dev->device, cookie)) { 260 found_cma_dev = cma_dev; 261 break; 262 } 263 264 if (found_cma_dev) 265 cma_ref_dev(found_cma_dev); 266 mutex_unlock(&lock); 267 return found_cma_dev; 268 } 269 270 int cma_get_default_gid_type(struct cma_device *cma_dev, 271 unsigned int port) 272 { 273 if (!rdma_is_port_valid(cma_dev->device, port)) 274 return -EINVAL; 275 276 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 277 } 278 279 int cma_set_default_gid_type(struct cma_device *cma_dev, 280 unsigned int port, 281 enum ib_gid_type default_gid_type) 282 { 283 unsigned long supported_gids; 284 285 if (!rdma_is_port_valid(cma_dev->device, port)) 286 return -EINVAL; 287 288 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 289 290 if (!(supported_gids & 1 << default_gid_type)) 291 return -EINVAL; 292 293 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 294 default_gid_type; 295 296 return 0; 297 } 298 299 int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port) 300 { 301 if (!rdma_is_port_valid(cma_dev->device, port)) 302 return -EINVAL; 303 304 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; 305 } 306 307 int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port, 308 u8 default_roce_tos) 309 { 310 if (!rdma_is_port_valid(cma_dev->device, port)) 311 return -EINVAL; 312 313 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = 314 default_roce_tos; 315 316 return 0; 317 } 318 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 319 { 320 return cma_dev->device; 321 } 322 323 /* 324 * Device removal can occur at anytime, so we need extra handling to 325 * serialize notifying the user of device removal with other callbacks. 326 * We do this by disabling removal notification while a callback is in process, 327 * and reporting it after the callback completes. 328 */ 329 struct rdma_id_private { 330 struct rdma_cm_id id; 331 332 struct rdma_bind_list *bind_list; 333 struct hlist_node node; 334 struct list_head list; /* listen_any_list or cma_device.list */ 335 struct list_head listen_list; /* per device listens */ 336 struct cma_device *cma_dev; 337 struct list_head mc_list; 338 339 int internal_id; 340 enum rdma_cm_state state; 341 spinlock_t lock; 342 struct mutex qp_mutex; 343 344 struct completion comp; 345 atomic_t refcount; 346 struct mutex handler_mutex; 347 348 int backlog; 349 int timeout_ms; 350 struct ib_sa_query *query; 351 int query_id; 352 union { 353 struct ib_cm_id *ib; 354 struct iw_cm_id *iw; 355 } cm_id; 356 357 u32 seq_num; 358 u32 qkey; 359 u32 qp_num; 360 pid_t owner; 361 u32 options; 362 u8 srq; 363 u8 tos; 364 bool tos_set; 365 u8 reuseaddr; 366 u8 afonly; 367 enum ib_gid_type gid_type; 368 }; 369 370 struct cma_multicast { 371 struct rdma_id_private *id_priv; 372 union { 373 struct ib_sa_multicast *ib; 374 } multicast; 375 struct list_head list; 376 void *context; 377 struct sockaddr_storage addr; 378 struct kref mcref; 379 bool igmp_joined; 380 u8 join_state; 381 }; 382 383 struct cma_work { 384 struct work_struct work; 385 struct rdma_id_private *id; 386 enum rdma_cm_state old_state; 387 enum rdma_cm_state new_state; 388 struct rdma_cm_event event; 389 }; 390 391 struct cma_ndev_work { 392 struct work_struct work; 393 struct rdma_id_private *id; 394 struct rdma_cm_event event; 395 }; 396 397 struct iboe_mcast_work { 398 struct work_struct work; 399 struct rdma_id_private *id; 400 struct cma_multicast *mc; 401 }; 402 403 union cma_ip_addr { 404 struct in6_addr ip6; 405 struct { 406 __be32 pad[3]; 407 __be32 addr; 408 } ip4; 409 }; 410 411 struct cma_hdr { 412 u8 cma_version; 413 u8 ip_version; /* IP version: 7:4 */ 414 __be16 port; 415 union cma_ip_addr src_addr; 416 union cma_ip_addr dst_addr; 417 }; 418 419 #define CMA_VERSION 0x00 420 421 struct cma_req_info { 422 struct ib_device *device; 423 int port; 424 union ib_gid local_gid; 425 __be64 service_id; 426 u16 pkey; 427 bool has_gid:1; 428 }; 429 430 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 431 { 432 unsigned long flags; 433 int ret; 434 435 spin_lock_irqsave(&id_priv->lock, flags); 436 ret = (id_priv->state == comp); 437 spin_unlock_irqrestore(&id_priv->lock, flags); 438 return ret; 439 } 440 441 static int cma_comp_exch(struct rdma_id_private *id_priv, 442 enum rdma_cm_state comp, enum rdma_cm_state exch) 443 { 444 unsigned long flags; 445 int ret; 446 447 spin_lock_irqsave(&id_priv->lock, flags); 448 if ((ret = (id_priv->state == comp))) 449 id_priv->state = exch; 450 spin_unlock_irqrestore(&id_priv->lock, flags); 451 return ret; 452 } 453 454 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 455 enum rdma_cm_state exch) 456 { 457 unsigned long flags; 458 enum rdma_cm_state old; 459 460 spin_lock_irqsave(&id_priv->lock, flags); 461 old = id_priv->state; 462 id_priv->state = exch; 463 spin_unlock_irqrestore(&id_priv->lock, flags); 464 return old; 465 } 466 467 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 468 { 469 return hdr->ip_version >> 4; 470 } 471 472 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 473 { 474 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 475 } 476 477 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) 478 { 479 struct in_device *in_dev = NULL; 480 481 if (ndev) { 482 rtnl_lock(); 483 in_dev = __in_dev_get_rtnl(ndev); 484 if (in_dev) { 485 if (join) 486 ip_mc_inc_group(in_dev, 487 *(__be32 *)(mgid->raw + 12)); 488 else 489 ip_mc_dec_group(in_dev, 490 *(__be32 *)(mgid->raw + 12)); 491 } 492 rtnl_unlock(); 493 } 494 return (in_dev) ? 0 : -ENODEV; 495 } 496 497 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 498 struct cma_device *cma_dev) 499 { 500 cma_ref_dev(cma_dev); 501 id_priv->cma_dev = cma_dev; 502 id_priv->gid_type = 0; 503 id_priv->id.device = cma_dev->device; 504 id_priv->id.route.addr.dev_addr.transport = 505 rdma_node_get_transport(cma_dev->device->node_type); 506 list_add_tail(&id_priv->list, &cma_dev->id_list); 507 } 508 509 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 510 struct cma_device *cma_dev) 511 { 512 _cma_attach_to_dev(id_priv, cma_dev); 513 id_priv->gid_type = 514 cma_dev->default_gid_type[id_priv->id.port_num - 515 rdma_start_port(cma_dev->device)]; 516 } 517 518 void cma_deref_dev(struct cma_device *cma_dev) 519 { 520 if (atomic_dec_and_test(&cma_dev->refcount)) 521 complete(&cma_dev->comp); 522 } 523 524 static inline void release_mc(struct kref *kref) 525 { 526 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 527 528 kfree(mc->multicast.ib); 529 kfree(mc); 530 } 531 532 static void cma_release_dev(struct rdma_id_private *id_priv) 533 { 534 mutex_lock(&lock); 535 list_del(&id_priv->list); 536 cma_deref_dev(id_priv->cma_dev); 537 id_priv->cma_dev = NULL; 538 mutex_unlock(&lock); 539 } 540 541 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 542 { 543 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 544 } 545 546 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 547 { 548 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 549 } 550 551 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 552 { 553 return id_priv->id.route.addr.src_addr.ss_family; 554 } 555 556 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 557 { 558 struct ib_sa_mcmember_rec rec; 559 int ret = 0; 560 561 if (id_priv->qkey) { 562 if (qkey && id_priv->qkey != qkey) 563 return -EINVAL; 564 return 0; 565 } 566 567 if (qkey) { 568 id_priv->qkey = qkey; 569 return 0; 570 } 571 572 switch (id_priv->id.ps) { 573 case RDMA_PS_UDP: 574 case RDMA_PS_IB: 575 id_priv->qkey = RDMA_UDP_QKEY; 576 break; 577 case RDMA_PS_IPOIB: 578 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 579 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 580 id_priv->id.port_num, &rec.mgid, 581 &rec); 582 if (!ret) 583 id_priv->qkey = be32_to_cpu(rec.qkey); 584 break; 585 default: 586 break; 587 } 588 return ret; 589 } 590 591 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 592 { 593 dev_addr->dev_type = ARPHRD_INFINIBAND; 594 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 595 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 596 } 597 598 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 599 { 600 int ret; 601 602 if (addr->sa_family != AF_IB) { 603 ret = rdma_translate_ip(addr, dev_addr, NULL); 604 } else { 605 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 606 ret = 0; 607 } 608 609 return ret; 610 } 611 612 static inline int cma_validate_port(struct ib_device *device, u8 port, 613 enum ib_gid_type gid_type, 614 union ib_gid *gid, int dev_type, 615 int bound_if_index) 616 { 617 int ret = -ENODEV; 618 struct net_device *ndev = NULL; 619 620 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 621 return ret; 622 623 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 624 return ret; 625 626 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 627 ndev = dev_get_by_index(&init_net, bound_if_index); 628 if (ndev && ndev->flags & IFF_LOOPBACK) { 629 pr_info("detected loopback device\n"); 630 dev_put(ndev); 631 632 if (!device->get_netdev) 633 return -EOPNOTSUPP; 634 635 ndev = device->get_netdev(device, port); 636 if (!ndev) 637 return -ENODEV; 638 } 639 } else { 640 gid_type = IB_GID_TYPE_IB; 641 } 642 643 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 644 ndev, NULL); 645 646 if (ndev) 647 dev_put(ndev); 648 649 return ret; 650 } 651 652 static int cma_acquire_dev(struct rdma_id_private *id_priv, 653 struct rdma_id_private *listen_id_priv) 654 { 655 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 656 struct cma_device *cma_dev; 657 union ib_gid gid, iboe_gid, *gidp; 658 int ret = -ENODEV; 659 u8 port; 660 661 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 662 id_priv->id.ps == RDMA_PS_IPOIB) 663 return -EINVAL; 664 665 mutex_lock(&lock); 666 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 667 &iboe_gid); 668 669 memcpy(&gid, dev_addr->src_dev_addr + 670 rdma_addr_gid_offset(dev_addr), sizeof gid); 671 672 if (listen_id_priv) { 673 cma_dev = listen_id_priv->cma_dev; 674 port = listen_id_priv->id.port_num; 675 gidp = rdma_protocol_roce(cma_dev->device, port) ? 676 &iboe_gid : &gid; 677 678 ret = cma_validate_port(cma_dev->device, port, 679 rdma_protocol_ib(cma_dev->device, port) ? 680 IB_GID_TYPE_IB : 681 listen_id_priv->gid_type, gidp, 682 dev_addr->dev_type, 683 dev_addr->bound_dev_if); 684 if (!ret) { 685 id_priv->id.port_num = port; 686 goto out; 687 } 688 } 689 690 list_for_each_entry(cma_dev, &dev_list, list) { 691 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 692 if (listen_id_priv && 693 listen_id_priv->cma_dev == cma_dev && 694 listen_id_priv->id.port_num == port) 695 continue; 696 697 gidp = rdma_protocol_roce(cma_dev->device, port) ? 698 &iboe_gid : &gid; 699 700 ret = cma_validate_port(cma_dev->device, port, 701 rdma_protocol_ib(cma_dev->device, port) ? 702 IB_GID_TYPE_IB : 703 cma_dev->default_gid_type[port - 1], 704 gidp, dev_addr->dev_type, 705 dev_addr->bound_dev_if); 706 if (!ret) { 707 id_priv->id.port_num = port; 708 goto out; 709 } 710 } 711 } 712 713 out: 714 if (!ret) 715 cma_attach_to_dev(id_priv, cma_dev); 716 717 mutex_unlock(&lock); 718 return ret; 719 } 720 721 /* 722 * Select the source IB device and address to reach the destination IB address. 723 */ 724 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 725 { 726 struct cma_device *cma_dev, *cur_dev; 727 struct sockaddr_ib *addr; 728 union ib_gid gid, sgid, *dgid; 729 u16 pkey, index; 730 u8 p; 731 enum ib_port_state port_state; 732 int i; 733 734 cma_dev = NULL; 735 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 736 dgid = (union ib_gid *) &addr->sib_addr; 737 pkey = ntohs(addr->sib_pkey); 738 739 list_for_each_entry(cur_dev, &dev_list, list) { 740 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 741 if (!rdma_cap_af_ib(cur_dev->device, p)) 742 continue; 743 744 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 745 continue; 746 747 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 748 continue; 749 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 750 &gid, NULL); 751 i++) { 752 if (!memcmp(&gid, dgid, sizeof(gid))) { 753 cma_dev = cur_dev; 754 sgid = gid; 755 id_priv->id.port_num = p; 756 goto found; 757 } 758 759 if (!cma_dev && (gid.global.subnet_prefix == 760 dgid->global.subnet_prefix) && 761 port_state == IB_PORT_ACTIVE) { 762 cma_dev = cur_dev; 763 sgid = gid; 764 id_priv->id.port_num = p; 765 } 766 } 767 } 768 } 769 770 if (!cma_dev) 771 return -ENODEV; 772 773 found: 774 cma_attach_to_dev(id_priv, cma_dev); 775 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 776 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 777 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 778 return 0; 779 } 780 781 static void cma_deref_id(struct rdma_id_private *id_priv) 782 { 783 if (atomic_dec_and_test(&id_priv->refcount)) 784 complete(&id_priv->comp); 785 } 786 787 struct rdma_cm_id *rdma_create_id(struct net *net, 788 rdma_cm_event_handler event_handler, 789 void *context, enum rdma_port_space ps, 790 enum ib_qp_type qp_type) 791 { 792 struct rdma_id_private *id_priv; 793 794 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 795 if (!id_priv) 796 return ERR_PTR(-ENOMEM); 797 798 id_priv->owner = task_pid_nr(current); 799 id_priv->state = RDMA_CM_IDLE; 800 id_priv->id.context = context; 801 id_priv->id.event_handler = event_handler; 802 id_priv->id.ps = ps; 803 id_priv->id.qp_type = qp_type; 804 id_priv->tos_set = false; 805 spin_lock_init(&id_priv->lock); 806 mutex_init(&id_priv->qp_mutex); 807 init_completion(&id_priv->comp); 808 atomic_set(&id_priv->refcount, 1); 809 mutex_init(&id_priv->handler_mutex); 810 INIT_LIST_HEAD(&id_priv->listen_list); 811 INIT_LIST_HEAD(&id_priv->mc_list); 812 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 813 id_priv->id.route.addr.dev_addr.net = get_net(net); 814 815 return &id_priv->id; 816 } 817 EXPORT_SYMBOL(rdma_create_id); 818 819 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 820 { 821 struct ib_qp_attr qp_attr; 822 int qp_attr_mask, ret; 823 824 qp_attr.qp_state = IB_QPS_INIT; 825 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 826 if (ret) 827 return ret; 828 829 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 830 if (ret) 831 return ret; 832 833 qp_attr.qp_state = IB_QPS_RTR; 834 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 835 if (ret) 836 return ret; 837 838 qp_attr.qp_state = IB_QPS_RTS; 839 qp_attr.sq_psn = 0; 840 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 841 842 return ret; 843 } 844 845 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 846 { 847 struct ib_qp_attr qp_attr; 848 int qp_attr_mask, ret; 849 850 qp_attr.qp_state = IB_QPS_INIT; 851 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 852 if (ret) 853 return ret; 854 855 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 856 } 857 858 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 859 struct ib_qp_init_attr *qp_init_attr) 860 { 861 struct rdma_id_private *id_priv; 862 struct ib_qp *qp; 863 int ret; 864 865 id_priv = container_of(id, struct rdma_id_private, id); 866 if (id->device != pd->device) 867 return -EINVAL; 868 869 qp_init_attr->port_num = id->port_num; 870 qp = ib_create_qp(pd, qp_init_attr); 871 if (IS_ERR(qp)) 872 return PTR_ERR(qp); 873 874 if (id->qp_type == IB_QPT_UD) 875 ret = cma_init_ud_qp(id_priv, qp); 876 else 877 ret = cma_init_conn_qp(id_priv, qp); 878 if (ret) 879 goto err; 880 881 id->qp = qp; 882 id_priv->qp_num = qp->qp_num; 883 id_priv->srq = (qp->srq != NULL); 884 return 0; 885 err: 886 ib_destroy_qp(qp); 887 return ret; 888 } 889 EXPORT_SYMBOL(rdma_create_qp); 890 891 void rdma_destroy_qp(struct rdma_cm_id *id) 892 { 893 struct rdma_id_private *id_priv; 894 895 id_priv = container_of(id, struct rdma_id_private, id); 896 mutex_lock(&id_priv->qp_mutex); 897 ib_destroy_qp(id_priv->id.qp); 898 id_priv->id.qp = NULL; 899 mutex_unlock(&id_priv->qp_mutex); 900 } 901 EXPORT_SYMBOL(rdma_destroy_qp); 902 903 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 904 struct rdma_conn_param *conn_param) 905 { 906 struct ib_qp_attr qp_attr; 907 int qp_attr_mask, ret; 908 union ib_gid sgid; 909 910 mutex_lock(&id_priv->qp_mutex); 911 if (!id_priv->id.qp) { 912 ret = 0; 913 goto out; 914 } 915 916 /* Need to update QP attributes from default values. */ 917 qp_attr.qp_state = IB_QPS_INIT; 918 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 919 if (ret) 920 goto out; 921 922 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 923 if (ret) 924 goto out; 925 926 qp_attr.qp_state = IB_QPS_RTR; 927 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 928 if (ret) 929 goto out; 930 931 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 932 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); 933 if (ret) 934 goto out; 935 936 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 937 938 if (conn_param) 939 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 940 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 941 out: 942 mutex_unlock(&id_priv->qp_mutex); 943 return ret; 944 } 945 946 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 947 struct rdma_conn_param *conn_param) 948 { 949 struct ib_qp_attr qp_attr; 950 int qp_attr_mask, ret; 951 952 mutex_lock(&id_priv->qp_mutex); 953 if (!id_priv->id.qp) { 954 ret = 0; 955 goto out; 956 } 957 958 qp_attr.qp_state = IB_QPS_RTS; 959 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 960 if (ret) 961 goto out; 962 963 if (conn_param) 964 qp_attr.max_rd_atomic = conn_param->initiator_depth; 965 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 966 out: 967 mutex_unlock(&id_priv->qp_mutex); 968 return ret; 969 } 970 971 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 972 { 973 struct ib_qp_attr qp_attr; 974 int ret; 975 976 mutex_lock(&id_priv->qp_mutex); 977 if (!id_priv->id.qp) { 978 ret = 0; 979 goto out; 980 } 981 982 qp_attr.qp_state = IB_QPS_ERR; 983 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 984 out: 985 mutex_unlock(&id_priv->qp_mutex); 986 return ret; 987 } 988 989 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 990 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 991 { 992 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 993 int ret; 994 u16 pkey; 995 996 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 997 pkey = 0xffff; 998 else 999 pkey = ib_addr_get_pkey(dev_addr); 1000 1001 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 1002 pkey, &qp_attr->pkey_index); 1003 if (ret) 1004 return ret; 1005 1006 qp_attr->port_num = id_priv->id.port_num; 1007 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 1008 1009 if (id_priv->id.qp_type == IB_QPT_UD) { 1010 ret = cma_set_qkey(id_priv, 0); 1011 if (ret) 1012 return ret; 1013 1014 qp_attr->qkey = id_priv->qkey; 1015 *qp_attr_mask |= IB_QP_QKEY; 1016 } else { 1017 qp_attr->qp_access_flags = 0; 1018 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 1019 } 1020 return 0; 1021 } 1022 1023 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 1024 int *qp_attr_mask) 1025 { 1026 struct rdma_id_private *id_priv; 1027 int ret = 0; 1028 1029 id_priv = container_of(id, struct rdma_id_private, id); 1030 if (rdma_cap_ib_cm(id->device, id->port_num)) { 1031 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 1032 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 1033 else 1034 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 1035 qp_attr_mask); 1036 1037 if (qp_attr->qp_state == IB_QPS_RTR) 1038 qp_attr->rq_psn = id_priv->seq_num; 1039 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1040 if (!id_priv->cm_id.iw) { 1041 qp_attr->qp_access_flags = 0; 1042 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1043 } else 1044 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1045 qp_attr_mask); 1046 } else 1047 ret = -ENOSYS; 1048 1049 return ret; 1050 } 1051 EXPORT_SYMBOL(rdma_init_qp_attr); 1052 1053 static inline int cma_zero_addr(struct sockaddr *addr) 1054 { 1055 switch (addr->sa_family) { 1056 case AF_INET: 1057 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1058 case AF_INET6: 1059 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1060 case AF_IB: 1061 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1062 default: 1063 return 0; 1064 } 1065 } 1066 1067 static inline int cma_loopback_addr(struct sockaddr *addr) 1068 { 1069 switch (addr->sa_family) { 1070 case AF_INET: 1071 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1072 case AF_INET6: 1073 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1074 case AF_IB: 1075 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1076 default: 1077 return 0; 1078 } 1079 } 1080 1081 static inline int cma_any_addr(struct sockaddr *addr) 1082 { 1083 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1084 } 1085 1086 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1087 { 1088 if (src->sa_family != dst->sa_family) 1089 return -1; 1090 1091 switch (src->sa_family) { 1092 case AF_INET: 1093 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1094 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1095 case AF_INET6: 1096 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1097 &((struct sockaddr_in6 *) dst)->sin6_addr); 1098 default: 1099 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1100 &((struct sockaddr_ib *) dst)->sib_addr); 1101 } 1102 } 1103 1104 static __be16 cma_port(struct sockaddr *addr) 1105 { 1106 struct sockaddr_ib *sib; 1107 1108 switch (addr->sa_family) { 1109 case AF_INET: 1110 return ((struct sockaddr_in *) addr)->sin_port; 1111 case AF_INET6: 1112 return ((struct sockaddr_in6 *) addr)->sin6_port; 1113 case AF_IB: 1114 sib = (struct sockaddr_ib *) addr; 1115 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1116 be64_to_cpu(sib->sib_sid_mask))); 1117 default: 1118 return 0; 1119 } 1120 } 1121 1122 static inline int cma_any_port(struct sockaddr *addr) 1123 { 1124 return !cma_port(addr); 1125 } 1126 1127 static void cma_save_ib_info(struct sockaddr *src_addr, 1128 struct sockaddr *dst_addr, 1129 struct rdma_cm_id *listen_id, 1130 struct ib_sa_path_rec *path) 1131 { 1132 struct sockaddr_ib *listen_ib, *ib; 1133 1134 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1135 if (src_addr) { 1136 ib = (struct sockaddr_ib *)src_addr; 1137 ib->sib_family = AF_IB; 1138 if (path) { 1139 ib->sib_pkey = path->pkey; 1140 ib->sib_flowinfo = path->flow_label; 1141 memcpy(&ib->sib_addr, &path->sgid, 16); 1142 ib->sib_sid = path->service_id; 1143 ib->sib_scope_id = 0; 1144 } else { 1145 ib->sib_pkey = listen_ib->sib_pkey; 1146 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1147 ib->sib_addr = listen_ib->sib_addr; 1148 ib->sib_sid = listen_ib->sib_sid; 1149 ib->sib_scope_id = listen_ib->sib_scope_id; 1150 } 1151 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1152 } 1153 if (dst_addr) { 1154 ib = (struct sockaddr_ib *)dst_addr; 1155 ib->sib_family = AF_IB; 1156 if (path) { 1157 ib->sib_pkey = path->pkey; 1158 ib->sib_flowinfo = path->flow_label; 1159 memcpy(&ib->sib_addr, &path->dgid, 16); 1160 } 1161 } 1162 } 1163 1164 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1165 struct sockaddr_in *dst_addr, 1166 struct cma_hdr *hdr, 1167 __be16 local_port) 1168 { 1169 if (src_addr) { 1170 *src_addr = (struct sockaddr_in) { 1171 .sin_family = AF_INET, 1172 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1173 .sin_port = local_port, 1174 }; 1175 } 1176 1177 if (dst_addr) { 1178 *dst_addr = (struct sockaddr_in) { 1179 .sin_family = AF_INET, 1180 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1181 .sin_port = hdr->port, 1182 }; 1183 } 1184 } 1185 1186 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1187 struct sockaddr_in6 *dst_addr, 1188 struct cma_hdr *hdr, 1189 __be16 local_port) 1190 { 1191 if (src_addr) { 1192 *src_addr = (struct sockaddr_in6) { 1193 .sin6_family = AF_INET6, 1194 .sin6_addr = hdr->dst_addr.ip6, 1195 .sin6_port = local_port, 1196 }; 1197 } 1198 1199 if (dst_addr) { 1200 *dst_addr = (struct sockaddr_in6) { 1201 .sin6_family = AF_INET6, 1202 .sin6_addr = hdr->src_addr.ip6, 1203 .sin6_port = hdr->port, 1204 }; 1205 } 1206 } 1207 1208 static u16 cma_port_from_service_id(__be64 service_id) 1209 { 1210 return (u16)be64_to_cpu(service_id); 1211 } 1212 1213 static int cma_save_ip_info(struct sockaddr *src_addr, 1214 struct sockaddr *dst_addr, 1215 struct ib_cm_event *ib_event, 1216 __be64 service_id) 1217 { 1218 struct cma_hdr *hdr; 1219 __be16 port; 1220 1221 hdr = ib_event->private_data; 1222 if (hdr->cma_version != CMA_VERSION) 1223 return -EINVAL; 1224 1225 port = htons(cma_port_from_service_id(service_id)); 1226 1227 switch (cma_get_ip_ver(hdr)) { 1228 case 4: 1229 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1230 (struct sockaddr_in *)dst_addr, hdr, port); 1231 break; 1232 case 6: 1233 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1234 (struct sockaddr_in6 *)dst_addr, hdr, port); 1235 break; 1236 default: 1237 return -EAFNOSUPPORT; 1238 } 1239 1240 return 0; 1241 } 1242 1243 static int cma_save_net_info(struct sockaddr *src_addr, 1244 struct sockaddr *dst_addr, 1245 struct rdma_cm_id *listen_id, 1246 struct ib_cm_event *ib_event, 1247 sa_family_t sa_family, __be64 service_id) 1248 { 1249 if (sa_family == AF_IB) { 1250 if (ib_event->event == IB_CM_REQ_RECEIVED) 1251 cma_save_ib_info(src_addr, dst_addr, listen_id, 1252 ib_event->param.req_rcvd.primary_path); 1253 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1254 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1255 return 0; 1256 } 1257 1258 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1259 } 1260 1261 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1262 struct cma_req_info *req) 1263 { 1264 const struct ib_cm_req_event_param *req_param = 1265 &ib_event->param.req_rcvd; 1266 const struct ib_cm_sidr_req_event_param *sidr_param = 1267 &ib_event->param.sidr_req_rcvd; 1268 1269 switch (ib_event->event) { 1270 case IB_CM_REQ_RECEIVED: 1271 req->device = req_param->listen_id->device; 1272 req->port = req_param->port; 1273 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1274 sizeof(req->local_gid)); 1275 req->has_gid = true; 1276 req->service_id = req_param->primary_path->service_id; 1277 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1278 if (req->pkey != req_param->bth_pkey) 1279 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1280 "RDMA CMA: in the future this may cause the request to be dropped\n", 1281 req_param->bth_pkey, req->pkey); 1282 break; 1283 case IB_CM_SIDR_REQ_RECEIVED: 1284 req->device = sidr_param->listen_id->device; 1285 req->port = sidr_param->port; 1286 req->has_gid = false; 1287 req->service_id = sidr_param->service_id; 1288 req->pkey = sidr_param->pkey; 1289 if (req->pkey != sidr_param->bth_pkey) 1290 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1291 "RDMA CMA: in the future this may cause the request to be dropped\n", 1292 sidr_param->bth_pkey, req->pkey); 1293 break; 1294 default: 1295 return -EINVAL; 1296 } 1297 1298 return 0; 1299 } 1300 1301 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1302 const struct sockaddr_in *dst_addr, 1303 const struct sockaddr_in *src_addr) 1304 { 1305 __be32 daddr = dst_addr->sin_addr.s_addr, 1306 saddr = src_addr->sin_addr.s_addr; 1307 struct fib_result res; 1308 struct flowi4 fl4; 1309 int err; 1310 bool ret; 1311 1312 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1313 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1314 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1315 ipv4_is_loopback(saddr)) 1316 return false; 1317 1318 memset(&fl4, 0, sizeof(fl4)); 1319 fl4.flowi4_iif = net_dev->ifindex; 1320 fl4.daddr = daddr; 1321 fl4.saddr = saddr; 1322 1323 rcu_read_lock(); 1324 err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); 1325 ret = err == 0 && FIB_RES_DEV(res) == net_dev; 1326 rcu_read_unlock(); 1327 1328 return ret; 1329 } 1330 1331 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1332 const struct sockaddr_in6 *dst_addr, 1333 const struct sockaddr_in6 *src_addr) 1334 { 1335 #if IS_ENABLED(CONFIG_IPV6) 1336 const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & 1337 IPV6_ADDR_LINKLOCAL; 1338 struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, 1339 &src_addr->sin6_addr, net_dev->ifindex, 1340 strict); 1341 bool ret; 1342 1343 if (!rt) 1344 return false; 1345 1346 ret = rt->rt6i_idev->dev == net_dev; 1347 ip6_rt_put(rt); 1348 1349 return ret; 1350 #else 1351 return false; 1352 #endif 1353 } 1354 1355 static bool validate_net_dev(struct net_device *net_dev, 1356 const struct sockaddr *daddr, 1357 const struct sockaddr *saddr) 1358 { 1359 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1360 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1361 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1362 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1363 1364 switch (daddr->sa_family) { 1365 case AF_INET: 1366 return saddr->sa_family == AF_INET && 1367 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1368 1369 case AF_INET6: 1370 return saddr->sa_family == AF_INET6 && 1371 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1372 1373 default: 1374 return false; 1375 } 1376 } 1377 1378 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1379 const struct cma_req_info *req) 1380 { 1381 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1382 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1383 *src_addr = (struct sockaddr *)&src_addr_storage; 1384 struct net_device *net_dev; 1385 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1386 int err; 1387 1388 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1389 req->service_id); 1390 if (err) 1391 return ERR_PTR(err); 1392 1393 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1394 gid, listen_addr); 1395 if (!net_dev) 1396 return ERR_PTR(-ENODEV); 1397 1398 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1399 dev_put(net_dev); 1400 return ERR_PTR(-EHOSTUNREACH); 1401 } 1402 1403 return net_dev; 1404 } 1405 1406 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1407 { 1408 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1409 } 1410 1411 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1412 const struct cma_hdr *hdr) 1413 { 1414 struct sockaddr *addr = cma_src_addr(id_priv); 1415 __be32 ip4_addr; 1416 struct in6_addr ip6_addr; 1417 1418 if (cma_any_addr(addr) && !id_priv->afonly) 1419 return true; 1420 1421 switch (addr->sa_family) { 1422 case AF_INET: 1423 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1424 if (cma_get_ip_ver(hdr) != 4) 1425 return false; 1426 if (!cma_any_addr(addr) && 1427 hdr->dst_addr.ip4.addr != ip4_addr) 1428 return false; 1429 break; 1430 case AF_INET6: 1431 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1432 if (cma_get_ip_ver(hdr) != 6) 1433 return false; 1434 if (!cma_any_addr(addr) && 1435 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1436 return false; 1437 break; 1438 case AF_IB: 1439 return true; 1440 default: 1441 return false; 1442 } 1443 1444 return true; 1445 } 1446 1447 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1448 { 1449 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1450 enum rdma_transport_type transport = 1451 rdma_node_get_transport(device->node_type); 1452 1453 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1454 } 1455 1456 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1457 { 1458 struct ib_device *device = id->device; 1459 const int port_num = id->port_num ?: rdma_start_port(device); 1460 1461 return cma_protocol_roce_dev_port(device, port_num); 1462 } 1463 1464 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1465 const struct net_device *net_dev, 1466 u8 port_num) 1467 { 1468 const struct rdma_addr *addr = &id->route.addr; 1469 1470 if (!net_dev) 1471 /* This request is an AF_IB request or a RoCE request */ 1472 return (!id->port_num || id->port_num == port_num) && 1473 (addr->src_addr.ss_family == AF_IB || 1474 cma_protocol_roce_dev_port(id->device, port_num)); 1475 1476 return !addr->dev_addr.bound_dev_if || 1477 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1478 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1479 } 1480 1481 static struct rdma_id_private *cma_find_listener( 1482 const struct rdma_bind_list *bind_list, 1483 const struct ib_cm_id *cm_id, 1484 const struct ib_cm_event *ib_event, 1485 const struct cma_req_info *req, 1486 const struct net_device *net_dev) 1487 { 1488 struct rdma_id_private *id_priv, *id_priv_dev; 1489 1490 if (!bind_list) 1491 return ERR_PTR(-EINVAL); 1492 1493 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1494 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1495 if (id_priv->id.device == cm_id->device && 1496 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1497 return id_priv; 1498 list_for_each_entry(id_priv_dev, 1499 &id_priv->listen_list, 1500 listen_list) { 1501 if (id_priv_dev->id.device == cm_id->device && 1502 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1503 return id_priv_dev; 1504 } 1505 } 1506 } 1507 1508 return ERR_PTR(-EINVAL); 1509 } 1510 1511 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1512 struct ib_cm_event *ib_event, 1513 struct net_device **net_dev) 1514 { 1515 struct cma_req_info req; 1516 struct rdma_bind_list *bind_list; 1517 struct rdma_id_private *id_priv; 1518 int err; 1519 1520 err = cma_save_req_info(ib_event, &req); 1521 if (err) 1522 return ERR_PTR(err); 1523 1524 *net_dev = cma_get_net_dev(ib_event, &req); 1525 if (IS_ERR(*net_dev)) { 1526 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1527 /* Assuming the protocol is AF_IB */ 1528 *net_dev = NULL; 1529 } else if (cma_protocol_roce_dev_port(req.device, req.port)) { 1530 /* TODO find the net dev matching the request parameters 1531 * through the RoCE GID table */ 1532 *net_dev = NULL; 1533 } else { 1534 return ERR_CAST(*net_dev); 1535 } 1536 } 1537 1538 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1539 rdma_ps_from_service_id(req.service_id), 1540 cma_port_from_service_id(req.service_id)); 1541 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1542 if (IS_ERR(id_priv) && *net_dev) { 1543 dev_put(*net_dev); 1544 *net_dev = NULL; 1545 } 1546 1547 return id_priv; 1548 } 1549 1550 static inline int cma_user_data_offset(struct rdma_id_private *id_priv) 1551 { 1552 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1553 } 1554 1555 static void cma_cancel_route(struct rdma_id_private *id_priv) 1556 { 1557 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1558 if (id_priv->query) 1559 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1560 } 1561 } 1562 1563 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1564 { 1565 struct rdma_id_private *dev_id_priv; 1566 1567 /* 1568 * Remove from listen_any_list to prevent added devices from spawning 1569 * additional listen requests. 1570 */ 1571 mutex_lock(&lock); 1572 list_del(&id_priv->list); 1573 1574 while (!list_empty(&id_priv->listen_list)) { 1575 dev_id_priv = list_entry(id_priv->listen_list.next, 1576 struct rdma_id_private, listen_list); 1577 /* sync with device removal to avoid duplicate destruction */ 1578 list_del_init(&dev_id_priv->list); 1579 list_del(&dev_id_priv->listen_list); 1580 mutex_unlock(&lock); 1581 1582 rdma_destroy_id(&dev_id_priv->id); 1583 mutex_lock(&lock); 1584 } 1585 mutex_unlock(&lock); 1586 } 1587 1588 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1589 enum rdma_cm_state state) 1590 { 1591 switch (state) { 1592 case RDMA_CM_ADDR_QUERY: 1593 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1594 break; 1595 case RDMA_CM_ROUTE_QUERY: 1596 cma_cancel_route(id_priv); 1597 break; 1598 case RDMA_CM_LISTEN: 1599 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1600 cma_cancel_listens(id_priv); 1601 break; 1602 default: 1603 break; 1604 } 1605 } 1606 1607 static void cma_release_port(struct rdma_id_private *id_priv) 1608 { 1609 struct rdma_bind_list *bind_list = id_priv->bind_list; 1610 struct net *net = id_priv->id.route.addr.dev_addr.net; 1611 1612 if (!bind_list) 1613 return; 1614 1615 mutex_lock(&lock); 1616 hlist_del(&id_priv->node); 1617 if (hlist_empty(&bind_list->owners)) { 1618 cma_ps_remove(net, bind_list->ps, bind_list->port); 1619 kfree(bind_list); 1620 } 1621 mutex_unlock(&lock); 1622 } 1623 1624 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1625 { 1626 struct cma_multicast *mc; 1627 1628 while (!list_empty(&id_priv->mc_list)) { 1629 mc = container_of(id_priv->mc_list.next, 1630 struct cma_multicast, list); 1631 list_del(&mc->list); 1632 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1633 id_priv->id.port_num)) { 1634 ib_sa_free_multicast(mc->multicast.ib); 1635 kfree(mc); 1636 } else { 1637 if (mc->igmp_joined) { 1638 struct rdma_dev_addr *dev_addr = 1639 &id_priv->id.route.addr.dev_addr; 1640 struct net_device *ndev = NULL; 1641 1642 if (dev_addr->bound_dev_if) 1643 ndev = dev_get_by_index(&init_net, 1644 dev_addr->bound_dev_if); 1645 if (ndev) { 1646 cma_igmp_send(ndev, 1647 &mc->multicast.ib->rec.mgid, 1648 false); 1649 dev_put(ndev); 1650 } 1651 } 1652 kref_put(&mc->mcref, release_mc); 1653 } 1654 } 1655 } 1656 1657 void rdma_destroy_id(struct rdma_cm_id *id) 1658 { 1659 struct rdma_id_private *id_priv; 1660 enum rdma_cm_state state; 1661 1662 id_priv = container_of(id, struct rdma_id_private, id); 1663 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1664 cma_cancel_operation(id_priv, state); 1665 1666 /* 1667 * Wait for any active callback to finish. New callbacks will find 1668 * the id_priv state set to destroying and abort. 1669 */ 1670 mutex_lock(&id_priv->handler_mutex); 1671 mutex_unlock(&id_priv->handler_mutex); 1672 1673 if (id_priv->cma_dev) { 1674 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1675 if (id_priv->cm_id.ib) 1676 ib_destroy_cm_id(id_priv->cm_id.ib); 1677 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1678 if (id_priv->cm_id.iw) 1679 iw_destroy_cm_id(id_priv->cm_id.iw); 1680 } 1681 cma_leave_mc_groups(id_priv); 1682 cma_release_dev(id_priv); 1683 } 1684 1685 cma_release_port(id_priv); 1686 cma_deref_id(id_priv); 1687 wait_for_completion(&id_priv->comp); 1688 1689 if (id_priv->internal_id) 1690 cma_deref_id(id_priv->id.context); 1691 1692 kfree(id_priv->id.route.path_rec); 1693 put_net(id_priv->id.route.addr.dev_addr.net); 1694 kfree(id_priv); 1695 } 1696 EXPORT_SYMBOL(rdma_destroy_id); 1697 1698 static int cma_rep_recv(struct rdma_id_private *id_priv) 1699 { 1700 int ret; 1701 1702 ret = cma_modify_qp_rtr(id_priv, NULL); 1703 if (ret) 1704 goto reject; 1705 1706 ret = cma_modify_qp_rts(id_priv, NULL); 1707 if (ret) 1708 goto reject; 1709 1710 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1711 if (ret) 1712 goto reject; 1713 1714 return 0; 1715 reject: 1716 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); 1717 cma_modify_qp_err(id_priv); 1718 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1719 NULL, 0, NULL, 0); 1720 return ret; 1721 } 1722 1723 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1724 struct ib_cm_rep_event_param *rep_data, 1725 void *private_data) 1726 { 1727 event->param.conn.private_data = private_data; 1728 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1729 event->param.conn.responder_resources = rep_data->responder_resources; 1730 event->param.conn.initiator_depth = rep_data->initiator_depth; 1731 event->param.conn.flow_control = rep_data->flow_control; 1732 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1733 event->param.conn.srq = rep_data->srq; 1734 event->param.conn.qp_num = rep_data->remote_qpn; 1735 } 1736 1737 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1738 { 1739 struct rdma_id_private *id_priv = cm_id->context; 1740 struct rdma_cm_event event; 1741 int ret = 0; 1742 1743 mutex_lock(&id_priv->handler_mutex); 1744 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1745 id_priv->state != RDMA_CM_CONNECT) || 1746 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1747 id_priv->state != RDMA_CM_DISCONNECT)) 1748 goto out; 1749 1750 memset(&event, 0, sizeof event); 1751 switch (ib_event->event) { 1752 case IB_CM_REQ_ERROR: 1753 case IB_CM_REP_ERROR: 1754 event.event = RDMA_CM_EVENT_UNREACHABLE; 1755 event.status = -ETIMEDOUT; 1756 break; 1757 case IB_CM_REP_RECEIVED: 1758 if (id_priv->id.qp) { 1759 event.status = cma_rep_recv(id_priv); 1760 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1761 RDMA_CM_EVENT_ESTABLISHED; 1762 } else { 1763 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1764 } 1765 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1766 ib_event->private_data); 1767 break; 1768 case IB_CM_RTU_RECEIVED: 1769 case IB_CM_USER_ESTABLISHED: 1770 event.event = RDMA_CM_EVENT_ESTABLISHED; 1771 break; 1772 case IB_CM_DREQ_ERROR: 1773 event.status = -ETIMEDOUT; /* fall through */ 1774 case IB_CM_DREQ_RECEIVED: 1775 case IB_CM_DREP_RECEIVED: 1776 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1777 RDMA_CM_DISCONNECT)) 1778 goto out; 1779 event.event = RDMA_CM_EVENT_DISCONNECTED; 1780 break; 1781 case IB_CM_TIMEWAIT_EXIT: 1782 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1783 break; 1784 case IB_CM_MRA_RECEIVED: 1785 /* ignore event */ 1786 goto out; 1787 case IB_CM_REJ_RECEIVED: 1788 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, 1789 ib_event->param.rej_rcvd.reason)); 1790 cma_modify_qp_err(id_priv); 1791 event.status = ib_event->param.rej_rcvd.reason; 1792 event.event = RDMA_CM_EVENT_REJECTED; 1793 event.param.conn.private_data = ib_event->private_data; 1794 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1795 break; 1796 default: 1797 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1798 ib_event->event); 1799 goto out; 1800 } 1801 1802 ret = id_priv->id.event_handler(&id_priv->id, &event); 1803 if (ret) { 1804 /* Destroy the CM ID by returning a non-zero value. */ 1805 id_priv->cm_id.ib = NULL; 1806 cma_exch(id_priv, RDMA_CM_DESTROYING); 1807 mutex_unlock(&id_priv->handler_mutex); 1808 rdma_destroy_id(&id_priv->id); 1809 return ret; 1810 } 1811 out: 1812 mutex_unlock(&id_priv->handler_mutex); 1813 return ret; 1814 } 1815 1816 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1817 struct ib_cm_event *ib_event, 1818 struct net_device *net_dev) 1819 { 1820 struct rdma_id_private *id_priv; 1821 struct rdma_cm_id *id; 1822 struct rdma_route *rt; 1823 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1824 const __be64 service_id = 1825 ib_event->param.req_rcvd.primary_path->service_id; 1826 int ret; 1827 1828 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 1829 listen_id->event_handler, listen_id->context, 1830 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1831 if (IS_ERR(id)) 1832 return NULL; 1833 1834 id_priv = container_of(id, struct rdma_id_private, id); 1835 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1836 (struct sockaddr *)&id->route.addr.dst_addr, 1837 listen_id, ib_event, ss_family, service_id)) 1838 goto err; 1839 1840 rt = &id->route; 1841 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1842 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1843 GFP_KERNEL); 1844 if (!rt->path_rec) 1845 goto err; 1846 1847 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; 1848 if (rt->num_paths == 2) 1849 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1850 1851 if (net_dev) { 1852 ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1853 if (ret) 1854 goto err; 1855 } else { 1856 if (!cma_protocol_roce(listen_id) && 1857 cma_any_addr(cma_src_addr(id_priv))) { 1858 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1859 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1860 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1861 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1862 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1863 if (ret) 1864 goto err; 1865 } 1866 } 1867 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1868 1869 id_priv->state = RDMA_CM_CONNECT; 1870 return id_priv; 1871 1872 err: 1873 rdma_destroy_id(id); 1874 return NULL; 1875 } 1876 1877 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1878 struct ib_cm_event *ib_event, 1879 struct net_device *net_dev) 1880 { 1881 struct rdma_id_private *id_priv; 1882 struct rdma_cm_id *id; 1883 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1884 struct net *net = listen_id->route.addr.dev_addr.net; 1885 int ret; 1886 1887 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 1888 listen_id->ps, IB_QPT_UD); 1889 if (IS_ERR(id)) 1890 return NULL; 1891 1892 id_priv = container_of(id, struct rdma_id_private, id); 1893 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1894 (struct sockaddr *)&id->route.addr.dst_addr, 1895 listen_id, ib_event, ss_family, 1896 ib_event->param.sidr_req_rcvd.service_id)) 1897 goto err; 1898 1899 if (net_dev) { 1900 ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1901 if (ret) 1902 goto err; 1903 } else { 1904 if (!cma_any_addr(cma_src_addr(id_priv))) { 1905 ret = cma_translate_addr(cma_src_addr(id_priv), 1906 &id->route.addr.dev_addr); 1907 if (ret) 1908 goto err; 1909 } 1910 } 1911 1912 id_priv->state = RDMA_CM_CONNECT; 1913 return id_priv; 1914 err: 1915 rdma_destroy_id(id); 1916 return NULL; 1917 } 1918 1919 static void cma_set_req_event_data(struct rdma_cm_event *event, 1920 struct ib_cm_req_event_param *req_data, 1921 void *private_data, int offset) 1922 { 1923 event->param.conn.private_data = private_data + offset; 1924 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1925 event->param.conn.responder_resources = req_data->responder_resources; 1926 event->param.conn.initiator_depth = req_data->initiator_depth; 1927 event->param.conn.flow_control = req_data->flow_control; 1928 event->param.conn.retry_count = req_data->retry_count; 1929 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1930 event->param.conn.srq = req_data->srq; 1931 event->param.conn.qp_num = req_data->remote_qpn; 1932 } 1933 1934 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1935 { 1936 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1937 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1938 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1939 (id->qp_type == IB_QPT_UD)) || 1940 (!id->qp_type)); 1941 } 1942 1943 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1944 { 1945 struct rdma_id_private *listen_id, *conn_id = NULL; 1946 struct rdma_cm_event event; 1947 struct net_device *net_dev; 1948 int offset, ret; 1949 1950 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1951 if (IS_ERR(listen_id)) 1952 return PTR_ERR(listen_id); 1953 1954 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1955 ret = -EINVAL; 1956 goto net_dev_put; 1957 } 1958 1959 mutex_lock(&listen_id->handler_mutex); 1960 if (listen_id->state != RDMA_CM_LISTEN) { 1961 ret = -ECONNABORTED; 1962 goto err1; 1963 } 1964 1965 memset(&event, 0, sizeof event); 1966 offset = cma_user_data_offset(listen_id); 1967 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1968 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1969 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1970 event.param.ud.private_data = ib_event->private_data + offset; 1971 event.param.ud.private_data_len = 1972 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1973 } else { 1974 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1975 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1976 ib_event->private_data, offset); 1977 } 1978 if (!conn_id) { 1979 ret = -ENOMEM; 1980 goto err1; 1981 } 1982 1983 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1984 ret = cma_acquire_dev(conn_id, listen_id); 1985 if (ret) 1986 goto err2; 1987 1988 conn_id->cm_id.ib = cm_id; 1989 cm_id->context = conn_id; 1990 cm_id->cm_handler = cma_ib_handler; 1991 1992 /* 1993 * Protect against the user destroying conn_id from another thread 1994 * until we're done accessing it. 1995 */ 1996 atomic_inc(&conn_id->refcount); 1997 ret = conn_id->id.event_handler(&conn_id->id, &event); 1998 if (ret) 1999 goto err3; 2000 /* 2001 * Acquire mutex to prevent user executing rdma_destroy_id() 2002 * while we're accessing the cm_id. 2003 */ 2004 mutex_lock(&lock); 2005 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2006 (conn_id->id.qp_type != IB_QPT_UD)) 2007 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2008 mutex_unlock(&lock); 2009 mutex_unlock(&conn_id->handler_mutex); 2010 mutex_unlock(&listen_id->handler_mutex); 2011 cma_deref_id(conn_id); 2012 if (net_dev) 2013 dev_put(net_dev); 2014 return 0; 2015 2016 err3: 2017 cma_deref_id(conn_id); 2018 /* Destroy the CM ID by returning a non-zero value. */ 2019 conn_id->cm_id.ib = NULL; 2020 err2: 2021 cma_exch(conn_id, RDMA_CM_DESTROYING); 2022 mutex_unlock(&conn_id->handler_mutex); 2023 err1: 2024 mutex_unlock(&listen_id->handler_mutex); 2025 if (conn_id) 2026 rdma_destroy_id(&conn_id->id); 2027 2028 net_dev_put: 2029 if (net_dev) 2030 dev_put(net_dev); 2031 2032 return ret; 2033 } 2034 2035 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2036 { 2037 if (addr->sa_family == AF_IB) 2038 return ((struct sockaddr_ib *) addr)->sib_sid; 2039 2040 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2041 } 2042 EXPORT_SYMBOL(rdma_get_service_id); 2043 2044 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2045 { 2046 struct rdma_id_private *id_priv = iw_id->context; 2047 struct rdma_cm_event event; 2048 int ret = 0; 2049 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2050 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2051 2052 mutex_lock(&id_priv->handler_mutex); 2053 if (id_priv->state != RDMA_CM_CONNECT) 2054 goto out; 2055 2056 memset(&event, 0, sizeof event); 2057 switch (iw_event->event) { 2058 case IW_CM_EVENT_CLOSE: 2059 event.event = RDMA_CM_EVENT_DISCONNECTED; 2060 break; 2061 case IW_CM_EVENT_CONNECT_REPLY: 2062 memcpy(cma_src_addr(id_priv), laddr, 2063 rdma_addr_size(laddr)); 2064 memcpy(cma_dst_addr(id_priv), raddr, 2065 rdma_addr_size(raddr)); 2066 switch (iw_event->status) { 2067 case 0: 2068 event.event = RDMA_CM_EVENT_ESTABLISHED; 2069 event.param.conn.initiator_depth = iw_event->ird; 2070 event.param.conn.responder_resources = iw_event->ord; 2071 break; 2072 case -ECONNRESET: 2073 case -ECONNREFUSED: 2074 event.event = RDMA_CM_EVENT_REJECTED; 2075 break; 2076 case -ETIMEDOUT: 2077 event.event = RDMA_CM_EVENT_UNREACHABLE; 2078 break; 2079 default: 2080 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2081 break; 2082 } 2083 break; 2084 case IW_CM_EVENT_ESTABLISHED: 2085 event.event = RDMA_CM_EVENT_ESTABLISHED; 2086 event.param.conn.initiator_depth = iw_event->ird; 2087 event.param.conn.responder_resources = iw_event->ord; 2088 break; 2089 default: 2090 BUG_ON(1); 2091 } 2092 2093 event.status = iw_event->status; 2094 event.param.conn.private_data = iw_event->private_data; 2095 event.param.conn.private_data_len = iw_event->private_data_len; 2096 ret = id_priv->id.event_handler(&id_priv->id, &event); 2097 if (ret) { 2098 /* Destroy the CM ID by returning a non-zero value. */ 2099 id_priv->cm_id.iw = NULL; 2100 cma_exch(id_priv, RDMA_CM_DESTROYING); 2101 mutex_unlock(&id_priv->handler_mutex); 2102 rdma_destroy_id(&id_priv->id); 2103 return ret; 2104 } 2105 2106 out: 2107 mutex_unlock(&id_priv->handler_mutex); 2108 return ret; 2109 } 2110 2111 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2112 struct iw_cm_event *iw_event) 2113 { 2114 struct rdma_cm_id *new_cm_id; 2115 struct rdma_id_private *listen_id, *conn_id; 2116 struct rdma_cm_event event; 2117 int ret = -ECONNABORTED; 2118 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2119 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2120 2121 listen_id = cm_id->context; 2122 2123 mutex_lock(&listen_id->handler_mutex); 2124 if (listen_id->state != RDMA_CM_LISTEN) 2125 goto out; 2126 2127 /* Create a new RDMA id for the new IW CM ID */ 2128 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2129 listen_id->id.event_handler, 2130 listen_id->id.context, 2131 RDMA_PS_TCP, IB_QPT_RC); 2132 if (IS_ERR(new_cm_id)) { 2133 ret = -ENOMEM; 2134 goto out; 2135 } 2136 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2137 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2138 conn_id->state = RDMA_CM_CONNECT; 2139 2140 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); 2141 if (ret) { 2142 mutex_unlock(&conn_id->handler_mutex); 2143 rdma_destroy_id(new_cm_id); 2144 goto out; 2145 } 2146 2147 ret = cma_acquire_dev(conn_id, listen_id); 2148 if (ret) { 2149 mutex_unlock(&conn_id->handler_mutex); 2150 rdma_destroy_id(new_cm_id); 2151 goto out; 2152 } 2153 2154 conn_id->cm_id.iw = cm_id; 2155 cm_id->context = conn_id; 2156 cm_id->cm_handler = cma_iw_handler; 2157 2158 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2159 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2160 2161 memset(&event, 0, sizeof event); 2162 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2163 event.param.conn.private_data = iw_event->private_data; 2164 event.param.conn.private_data_len = iw_event->private_data_len; 2165 event.param.conn.initiator_depth = iw_event->ird; 2166 event.param.conn.responder_resources = iw_event->ord; 2167 2168 /* 2169 * Protect against the user destroying conn_id from another thread 2170 * until we're done accessing it. 2171 */ 2172 atomic_inc(&conn_id->refcount); 2173 ret = conn_id->id.event_handler(&conn_id->id, &event); 2174 if (ret) { 2175 /* User wants to destroy the CM ID */ 2176 conn_id->cm_id.iw = NULL; 2177 cma_exch(conn_id, RDMA_CM_DESTROYING); 2178 mutex_unlock(&conn_id->handler_mutex); 2179 cma_deref_id(conn_id); 2180 rdma_destroy_id(&conn_id->id); 2181 goto out; 2182 } 2183 2184 mutex_unlock(&conn_id->handler_mutex); 2185 cma_deref_id(conn_id); 2186 2187 out: 2188 mutex_unlock(&listen_id->handler_mutex); 2189 return ret; 2190 } 2191 2192 static int cma_ib_listen(struct rdma_id_private *id_priv) 2193 { 2194 struct sockaddr *addr; 2195 struct ib_cm_id *id; 2196 __be64 svc_id; 2197 2198 addr = cma_src_addr(id_priv); 2199 svc_id = rdma_get_service_id(&id_priv->id, addr); 2200 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2201 if (IS_ERR(id)) 2202 return PTR_ERR(id); 2203 id_priv->cm_id.ib = id; 2204 2205 return 0; 2206 } 2207 2208 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2209 { 2210 int ret; 2211 struct iw_cm_id *id; 2212 2213 id = iw_create_cm_id(id_priv->id.device, 2214 iw_conn_req_handler, 2215 id_priv); 2216 if (IS_ERR(id)) 2217 return PTR_ERR(id); 2218 2219 id->tos = id_priv->tos; 2220 id_priv->cm_id.iw = id; 2221 2222 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2223 rdma_addr_size(cma_src_addr(id_priv))); 2224 2225 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2226 2227 if (ret) { 2228 iw_destroy_cm_id(id_priv->cm_id.iw); 2229 id_priv->cm_id.iw = NULL; 2230 } 2231 2232 return ret; 2233 } 2234 2235 static int cma_listen_handler(struct rdma_cm_id *id, 2236 struct rdma_cm_event *event) 2237 { 2238 struct rdma_id_private *id_priv = id->context; 2239 2240 id->context = id_priv->id.context; 2241 id->event_handler = id_priv->id.event_handler; 2242 return id_priv->id.event_handler(id, event); 2243 } 2244 2245 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2246 struct cma_device *cma_dev) 2247 { 2248 struct rdma_id_private *dev_id_priv; 2249 struct rdma_cm_id *id; 2250 struct net *net = id_priv->id.route.addr.dev_addr.net; 2251 int ret; 2252 2253 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2254 return; 2255 2256 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2257 id_priv->id.qp_type); 2258 if (IS_ERR(id)) 2259 return; 2260 2261 dev_id_priv = container_of(id, struct rdma_id_private, id); 2262 2263 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2264 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2265 rdma_addr_size(cma_src_addr(id_priv))); 2266 2267 _cma_attach_to_dev(dev_id_priv, cma_dev); 2268 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2269 atomic_inc(&id_priv->refcount); 2270 dev_id_priv->internal_id = 1; 2271 dev_id_priv->afonly = id_priv->afonly; 2272 2273 ret = rdma_listen(id, id_priv->backlog); 2274 if (ret) 2275 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2276 ret, cma_dev->device->name); 2277 } 2278 2279 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2280 { 2281 struct cma_device *cma_dev; 2282 2283 mutex_lock(&lock); 2284 list_add_tail(&id_priv->list, &listen_any_list); 2285 list_for_each_entry(cma_dev, &dev_list, list) 2286 cma_listen_on_dev(id_priv, cma_dev); 2287 mutex_unlock(&lock); 2288 } 2289 2290 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2291 { 2292 struct rdma_id_private *id_priv; 2293 2294 id_priv = container_of(id, struct rdma_id_private, id); 2295 id_priv->tos = (u8) tos; 2296 id_priv->tos_set = true; 2297 } 2298 EXPORT_SYMBOL(rdma_set_service_type); 2299 2300 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 2301 void *context) 2302 { 2303 struct cma_work *work = context; 2304 struct rdma_route *route; 2305 2306 route = &work->id->id.route; 2307 2308 if (!status) { 2309 route->num_paths = 1; 2310 *route->path_rec = *path_rec; 2311 } else { 2312 work->old_state = RDMA_CM_ROUTE_QUERY; 2313 work->new_state = RDMA_CM_ADDR_RESOLVED; 2314 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2315 work->event.status = status; 2316 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", 2317 status); 2318 } 2319 2320 queue_work(cma_wq, &work->work); 2321 } 2322 2323 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2324 struct cma_work *work) 2325 { 2326 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2327 struct ib_sa_path_rec path_rec; 2328 ib_sa_comp_mask comp_mask; 2329 struct sockaddr_in6 *sin6; 2330 struct sockaddr_ib *sib; 2331 2332 memset(&path_rec, 0, sizeof path_rec); 2333 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2334 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2335 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2336 path_rec.numb_path = 1; 2337 path_rec.reversible = 1; 2338 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 2339 2340 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2341 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2342 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2343 2344 switch (cma_family(id_priv)) { 2345 case AF_INET: 2346 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2347 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2348 break; 2349 case AF_INET6: 2350 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2351 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2352 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2353 break; 2354 case AF_IB: 2355 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2356 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2357 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2358 break; 2359 } 2360 2361 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2362 id_priv->id.port_num, &path_rec, 2363 comp_mask, timeout_ms, 2364 GFP_KERNEL, cma_query_handler, 2365 work, &id_priv->query); 2366 2367 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2368 } 2369 2370 static void cma_work_handler(struct work_struct *_work) 2371 { 2372 struct cma_work *work = container_of(_work, struct cma_work, work); 2373 struct rdma_id_private *id_priv = work->id; 2374 int destroy = 0; 2375 2376 mutex_lock(&id_priv->handler_mutex); 2377 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2378 goto out; 2379 2380 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2381 cma_exch(id_priv, RDMA_CM_DESTROYING); 2382 destroy = 1; 2383 } 2384 out: 2385 mutex_unlock(&id_priv->handler_mutex); 2386 cma_deref_id(id_priv); 2387 if (destroy) 2388 rdma_destroy_id(&id_priv->id); 2389 kfree(work); 2390 } 2391 2392 static void cma_ndev_work_handler(struct work_struct *_work) 2393 { 2394 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 2395 struct rdma_id_private *id_priv = work->id; 2396 int destroy = 0; 2397 2398 mutex_lock(&id_priv->handler_mutex); 2399 if (id_priv->state == RDMA_CM_DESTROYING || 2400 id_priv->state == RDMA_CM_DEVICE_REMOVAL) 2401 goto out; 2402 2403 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2404 cma_exch(id_priv, RDMA_CM_DESTROYING); 2405 destroy = 1; 2406 } 2407 2408 out: 2409 mutex_unlock(&id_priv->handler_mutex); 2410 cma_deref_id(id_priv); 2411 if (destroy) 2412 rdma_destroy_id(&id_priv->id); 2413 kfree(work); 2414 } 2415 2416 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2417 { 2418 struct rdma_route *route = &id_priv->id.route; 2419 struct cma_work *work; 2420 int ret; 2421 2422 work = kzalloc(sizeof *work, GFP_KERNEL); 2423 if (!work) 2424 return -ENOMEM; 2425 2426 work->id = id_priv; 2427 INIT_WORK(&work->work, cma_work_handler); 2428 work->old_state = RDMA_CM_ROUTE_QUERY; 2429 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2430 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2431 2432 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2433 if (!route->path_rec) { 2434 ret = -ENOMEM; 2435 goto err1; 2436 } 2437 2438 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2439 if (ret) 2440 goto err2; 2441 2442 return 0; 2443 err2: 2444 kfree(route->path_rec); 2445 route->path_rec = NULL; 2446 err1: 2447 kfree(work); 2448 return ret; 2449 } 2450 2451 int rdma_set_ib_paths(struct rdma_cm_id *id, 2452 struct ib_sa_path_rec *path_rec, int num_paths) 2453 { 2454 struct rdma_id_private *id_priv; 2455 int ret; 2456 2457 id_priv = container_of(id, struct rdma_id_private, id); 2458 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2459 RDMA_CM_ROUTE_RESOLVED)) 2460 return -EINVAL; 2461 2462 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2463 GFP_KERNEL); 2464 if (!id->route.path_rec) { 2465 ret = -ENOMEM; 2466 goto err; 2467 } 2468 2469 id->route.num_paths = num_paths; 2470 return 0; 2471 err: 2472 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2473 return ret; 2474 } 2475 EXPORT_SYMBOL(rdma_set_ib_paths); 2476 2477 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2478 { 2479 struct cma_work *work; 2480 2481 work = kzalloc(sizeof *work, GFP_KERNEL); 2482 if (!work) 2483 return -ENOMEM; 2484 2485 work->id = id_priv; 2486 INIT_WORK(&work->work, cma_work_handler); 2487 work->old_state = RDMA_CM_ROUTE_QUERY; 2488 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2489 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2490 queue_work(cma_wq, &work->work); 2491 return 0; 2492 } 2493 2494 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2495 { 2496 int prio; 2497 struct net_device *dev; 2498 2499 prio = rt_tos2priority(tos); 2500 dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; 2501 if (dev->num_tc) 2502 return netdev_get_prio_tc_map(dev, prio); 2503 2504 #if IS_ENABLED(CONFIG_VLAN_8021Q) 2505 if (is_vlan_dev(ndev)) 2506 return (vlan_dev_get_egress_qos_mask(ndev, prio) & 2507 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 2508 #endif 2509 return 0; 2510 } 2511 2512 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2513 unsigned long supported_gids, 2514 enum ib_gid_type default_gid) 2515 { 2516 if ((network_type == RDMA_NETWORK_IPV4 || 2517 network_type == RDMA_NETWORK_IPV6) && 2518 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2519 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2520 2521 return default_gid; 2522 } 2523 2524 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2525 { 2526 struct rdma_route *route = &id_priv->id.route; 2527 struct rdma_addr *addr = &route->addr; 2528 struct cma_work *work; 2529 int ret; 2530 struct net_device *ndev = NULL; 2531 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 2532 rdma_start_port(id_priv->cma_dev->device)]; 2533 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 2534 2535 2536 work = kzalloc(sizeof *work, GFP_KERNEL); 2537 if (!work) 2538 return -ENOMEM; 2539 2540 work->id = id_priv; 2541 INIT_WORK(&work->work, cma_work_handler); 2542 2543 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2544 if (!route->path_rec) { 2545 ret = -ENOMEM; 2546 goto err1; 2547 } 2548 2549 route->num_paths = 1; 2550 2551 if (addr->dev_addr.bound_dev_if) { 2552 unsigned long supported_gids; 2553 2554 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); 2555 if (!ndev) { 2556 ret = -ENODEV; 2557 goto err2; 2558 } 2559 2560 if (ndev->flags & IFF_LOOPBACK) { 2561 dev_put(ndev); 2562 if (!id_priv->id.device->get_netdev) { 2563 ret = -EOPNOTSUPP; 2564 goto err2; 2565 } 2566 2567 ndev = id_priv->id.device->get_netdev(id_priv->id.device, 2568 id_priv->id.port_num); 2569 if (!ndev) { 2570 ret = -ENODEV; 2571 goto err2; 2572 } 2573 } 2574 2575 route->path_rec->net = &init_net; 2576 route->path_rec->ifindex = ndev->ifindex; 2577 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2578 id_priv->id.port_num); 2579 route->path_rec->gid_type = 2580 cma_route_gid_type(addr->dev_addr.network, 2581 supported_gids, 2582 id_priv->gid_type); 2583 } 2584 if (!ndev) { 2585 ret = -ENODEV; 2586 goto err2; 2587 } 2588 2589 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2590 2591 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2592 &route->path_rec->sgid); 2593 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2594 &route->path_rec->dgid); 2595 2596 /* Use the hint from IP Stack to select GID Type */ 2597 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2598 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2599 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2600 /* TODO: get the hoplimit from the inet/inet6 device */ 2601 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2602 else 2603 route->path_rec->hop_limit = 1; 2604 route->path_rec->reversible = 1; 2605 route->path_rec->pkey = cpu_to_be16(0xffff); 2606 route->path_rec->mtu_selector = IB_SA_EQ; 2607 route->path_rec->sl = iboe_tos_to_sl(ndev, tos); 2608 route->path_rec->traffic_class = tos; 2609 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 2610 route->path_rec->rate_selector = IB_SA_EQ; 2611 route->path_rec->rate = iboe_get_rate(ndev); 2612 dev_put(ndev); 2613 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2614 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2615 if (!route->path_rec->mtu) { 2616 ret = -EINVAL; 2617 goto err2; 2618 } 2619 2620 work->old_state = RDMA_CM_ROUTE_QUERY; 2621 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2622 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2623 work->event.status = 0; 2624 2625 queue_work(cma_wq, &work->work); 2626 2627 return 0; 2628 2629 err2: 2630 kfree(route->path_rec); 2631 route->path_rec = NULL; 2632 err1: 2633 kfree(work); 2634 return ret; 2635 } 2636 2637 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2638 { 2639 struct rdma_id_private *id_priv; 2640 int ret; 2641 2642 id_priv = container_of(id, struct rdma_id_private, id); 2643 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2644 return -EINVAL; 2645 2646 atomic_inc(&id_priv->refcount); 2647 if (rdma_cap_ib_sa(id->device, id->port_num)) 2648 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2649 else if (rdma_protocol_roce(id->device, id->port_num)) 2650 ret = cma_resolve_iboe_route(id_priv); 2651 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2652 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2653 else 2654 ret = -ENOSYS; 2655 2656 if (ret) 2657 goto err; 2658 2659 return 0; 2660 err: 2661 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2662 cma_deref_id(id_priv); 2663 return ret; 2664 } 2665 EXPORT_SYMBOL(rdma_resolve_route); 2666 2667 static void cma_set_loopback(struct sockaddr *addr) 2668 { 2669 switch (addr->sa_family) { 2670 case AF_INET: 2671 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2672 break; 2673 case AF_INET6: 2674 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2675 0, 0, 0, htonl(1)); 2676 break; 2677 default: 2678 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2679 0, 0, 0, htonl(1)); 2680 break; 2681 } 2682 } 2683 2684 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2685 { 2686 struct cma_device *cma_dev, *cur_dev; 2687 union ib_gid gid; 2688 enum ib_port_state port_state; 2689 u16 pkey; 2690 int ret; 2691 u8 p; 2692 2693 cma_dev = NULL; 2694 mutex_lock(&lock); 2695 list_for_each_entry(cur_dev, &dev_list, list) { 2696 if (cma_family(id_priv) == AF_IB && 2697 !rdma_cap_ib_cm(cur_dev->device, 1)) 2698 continue; 2699 2700 if (!cma_dev) 2701 cma_dev = cur_dev; 2702 2703 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2704 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && 2705 port_state == IB_PORT_ACTIVE) { 2706 cma_dev = cur_dev; 2707 goto port_found; 2708 } 2709 } 2710 } 2711 2712 if (!cma_dev) { 2713 ret = -ENODEV; 2714 goto out; 2715 } 2716 2717 p = 1; 2718 2719 port_found: 2720 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2721 if (ret) 2722 goto out; 2723 2724 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2725 if (ret) 2726 goto out; 2727 2728 id_priv->id.route.addr.dev_addr.dev_type = 2729 (rdma_protocol_ib(cma_dev->device, p)) ? 2730 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2731 2732 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2733 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2734 id_priv->id.port_num = p; 2735 cma_attach_to_dev(id_priv, cma_dev); 2736 cma_set_loopback(cma_src_addr(id_priv)); 2737 out: 2738 mutex_unlock(&lock); 2739 return ret; 2740 } 2741 2742 static void addr_handler(int status, struct sockaddr *src_addr, 2743 struct rdma_dev_addr *dev_addr, void *context) 2744 { 2745 struct rdma_id_private *id_priv = context; 2746 struct rdma_cm_event event; 2747 2748 memset(&event, 0, sizeof event); 2749 mutex_lock(&id_priv->handler_mutex); 2750 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2751 RDMA_CM_ADDR_RESOLVED)) 2752 goto out; 2753 2754 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2755 if (!status && !id_priv->cma_dev) { 2756 status = cma_acquire_dev(id_priv, NULL); 2757 if (status) 2758 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2759 status); 2760 } else { 2761 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); 2762 } 2763 2764 if (status) { 2765 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2766 RDMA_CM_ADDR_BOUND)) 2767 goto out; 2768 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2769 event.status = status; 2770 } else 2771 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2772 2773 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2774 cma_exch(id_priv, RDMA_CM_DESTROYING); 2775 mutex_unlock(&id_priv->handler_mutex); 2776 cma_deref_id(id_priv); 2777 rdma_destroy_id(&id_priv->id); 2778 return; 2779 } 2780 out: 2781 mutex_unlock(&id_priv->handler_mutex); 2782 cma_deref_id(id_priv); 2783 } 2784 2785 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2786 { 2787 struct cma_work *work; 2788 union ib_gid gid; 2789 int ret; 2790 2791 work = kzalloc(sizeof *work, GFP_KERNEL); 2792 if (!work) 2793 return -ENOMEM; 2794 2795 if (!id_priv->cma_dev) { 2796 ret = cma_bind_loopback(id_priv); 2797 if (ret) 2798 goto err; 2799 } 2800 2801 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2802 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2803 2804 work->id = id_priv; 2805 INIT_WORK(&work->work, cma_work_handler); 2806 work->old_state = RDMA_CM_ADDR_QUERY; 2807 work->new_state = RDMA_CM_ADDR_RESOLVED; 2808 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2809 queue_work(cma_wq, &work->work); 2810 return 0; 2811 err: 2812 kfree(work); 2813 return ret; 2814 } 2815 2816 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2817 { 2818 struct cma_work *work; 2819 int ret; 2820 2821 work = kzalloc(sizeof *work, GFP_KERNEL); 2822 if (!work) 2823 return -ENOMEM; 2824 2825 if (!id_priv->cma_dev) { 2826 ret = cma_resolve_ib_dev(id_priv); 2827 if (ret) 2828 goto err; 2829 } 2830 2831 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2832 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2833 2834 work->id = id_priv; 2835 INIT_WORK(&work->work, cma_work_handler); 2836 work->old_state = RDMA_CM_ADDR_QUERY; 2837 work->new_state = RDMA_CM_ADDR_RESOLVED; 2838 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2839 queue_work(cma_wq, &work->work); 2840 return 0; 2841 err: 2842 kfree(work); 2843 return ret; 2844 } 2845 2846 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2847 struct sockaddr *dst_addr) 2848 { 2849 if (!src_addr || !src_addr->sa_family) { 2850 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2851 src_addr->sa_family = dst_addr->sa_family; 2852 if (IS_ENABLED(CONFIG_IPV6) && 2853 dst_addr->sa_family == AF_INET6) { 2854 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2855 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2856 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2857 if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 2858 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2859 } else if (dst_addr->sa_family == AF_IB) { 2860 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2861 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2862 } 2863 } 2864 return rdma_bind_addr(id, src_addr); 2865 } 2866 2867 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2868 struct sockaddr *dst_addr, int timeout_ms) 2869 { 2870 struct rdma_id_private *id_priv; 2871 int ret; 2872 2873 id_priv = container_of(id, struct rdma_id_private, id); 2874 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2875 if (id_priv->state == RDMA_CM_IDLE) { 2876 ret = cma_bind_addr(id, src_addr, dst_addr); 2877 if (ret) { 2878 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2879 return ret; 2880 } 2881 } 2882 2883 if (cma_family(id_priv) != dst_addr->sa_family) { 2884 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2885 return -EINVAL; 2886 } 2887 2888 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 2889 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2890 return -EINVAL; 2891 } 2892 2893 atomic_inc(&id_priv->refcount); 2894 if (cma_any_addr(dst_addr)) { 2895 ret = cma_resolve_loopback(id_priv); 2896 } else { 2897 if (dst_addr->sa_family == AF_IB) { 2898 ret = cma_resolve_ib_addr(id_priv); 2899 } else { 2900 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2901 dst_addr, &id->route.addr.dev_addr, 2902 timeout_ms, addr_handler, id_priv); 2903 } 2904 } 2905 if (ret) 2906 goto err; 2907 2908 return 0; 2909 err: 2910 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2911 cma_deref_id(id_priv); 2912 return ret; 2913 } 2914 EXPORT_SYMBOL(rdma_resolve_addr); 2915 2916 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2917 { 2918 struct rdma_id_private *id_priv; 2919 unsigned long flags; 2920 int ret; 2921 2922 id_priv = container_of(id, struct rdma_id_private, id); 2923 spin_lock_irqsave(&id_priv->lock, flags); 2924 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2925 id_priv->reuseaddr = reuse; 2926 ret = 0; 2927 } else { 2928 ret = -EINVAL; 2929 } 2930 spin_unlock_irqrestore(&id_priv->lock, flags); 2931 return ret; 2932 } 2933 EXPORT_SYMBOL(rdma_set_reuseaddr); 2934 2935 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2936 { 2937 struct rdma_id_private *id_priv; 2938 unsigned long flags; 2939 int ret; 2940 2941 id_priv = container_of(id, struct rdma_id_private, id); 2942 spin_lock_irqsave(&id_priv->lock, flags); 2943 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2944 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2945 id_priv->afonly = afonly; 2946 ret = 0; 2947 } else { 2948 ret = -EINVAL; 2949 } 2950 spin_unlock_irqrestore(&id_priv->lock, flags); 2951 return ret; 2952 } 2953 EXPORT_SYMBOL(rdma_set_afonly); 2954 2955 static void cma_bind_port(struct rdma_bind_list *bind_list, 2956 struct rdma_id_private *id_priv) 2957 { 2958 struct sockaddr *addr; 2959 struct sockaddr_ib *sib; 2960 u64 sid, mask; 2961 __be16 port; 2962 2963 addr = cma_src_addr(id_priv); 2964 port = htons(bind_list->port); 2965 2966 switch (addr->sa_family) { 2967 case AF_INET: 2968 ((struct sockaddr_in *) addr)->sin_port = port; 2969 break; 2970 case AF_INET6: 2971 ((struct sockaddr_in6 *) addr)->sin6_port = port; 2972 break; 2973 case AF_IB: 2974 sib = (struct sockaddr_ib *) addr; 2975 sid = be64_to_cpu(sib->sib_sid); 2976 mask = be64_to_cpu(sib->sib_sid_mask); 2977 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 2978 sib->sib_sid_mask = cpu_to_be64(~0ULL); 2979 break; 2980 } 2981 id_priv->bind_list = bind_list; 2982 hlist_add_head(&id_priv->node, &bind_list->owners); 2983 } 2984 2985 static int cma_alloc_port(enum rdma_port_space ps, 2986 struct rdma_id_private *id_priv, unsigned short snum) 2987 { 2988 struct rdma_bind_list *bind_list; 2989 int ret; 2990 2991 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 2992 if (!bind_list) 2993 return -ENOMEM; 2994 2995 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 2996 snum); 2997 if (ret < 0) 2998 goto err; 2999 3000 bind_list->ps = ps; 3001 bind_list->port = (unsigned short)ret; 3002 cma_bind_port(bind_list, id_priv); 3003 return 0; 3004 err: 3005 kfree(bind_list); 3006 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3007 } 3008 3009 static int cma_port_is_unique(struct rdma_bind_list *bind_list, 3010 struct rdma_id_private *id_priv) 3011 { 3012 struct rdma_id_private *cur_id; 3013 struct sockaddr *daddr = cma_dst_addr(id_priv); 3014 struct sockaddr *saddr = cma_src_addr(id_priv); 3015 __be16 dport = cma_port(daddr); 3016 3017 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3018 struct sockaddr *cur_daddr = cma_dst_addr(cur_id); 3019 struct sockaddr *cur_saddr = cma_src_addr(cur_id); 3020 __be16 cur_dport = cma_port(cur_daddr); 3021 3022 if (id_priv == cur_id) 3023 continue; 3024 3025 /* different dest port -> unique */ 3026 if (!cma_any_port(cur_daddr) && 3027 (dport != cur_dport)) 3028 continue; 3029 3030 /* different src address -> unique */ 3031 if (!cma_any_addr(saddr) && 3032 !cma_any_addr(cur_saddr) && 3033 cma_addr_cmp(saddr, cur_saddr)) 3034 continue; 3035 3036 /* different dst address -> unique */ 3037 if (!cma_any_addr(cur_daddr) && 3038 cma_addr_cmp(daddr, cur_daddr)) 3039 continue; 3040 3041 return -EADDRNOTAVAIL; 3042 } 3043 return 0; 3044 } 3045 3046 static int cma_alloc_any_port(enum rdma_port_space ps, 3047 struct rdma_id_private *id_priv) 3048 { 3049 static unsigned int last_used_port; 3050 int low, high, remaining; 3051 unsigned int rover; 3052 struct net *net = id_priv->id.route.addr.dev_addr.net; 3053 3054 inet_get_local_port_range(net, &low, &high); 3055 remaining = (high - low) + 1; 3056 rover = prandom_u32() % remaining + low; 3057 retry: 3058 if (last_used_port != rover) { 3059 struct rdma_bind_list *bind_list; 3060 int ret; 3061 3062 bind_list = cma_ps_find(net, ps, (unsigned short)rover); 3063 3064 if (!bind_list) { 3065 ret = cma_alloc_port(ps, id_priv, rover); 3066 } else { 3067 ret = cma_port_is_unique(bind_list, id_priv); 3068 if (!ret) 3069 cma_bind_port(bind_list, id_priv); 3070 } 3071 /* 3072 * Remember previously used port number in order to avoid 3073 * re-using same port immediately after it is closed. 3074 */ 3075 if (!ret) 3076 last_used_port = rover; 3077 if (ret != -EADDRNOTAVAIL) 3078 return ret; 3079 } 3080 if (--remaining) { 3081 rover++; 3082 if ((rover < low) || (rover > high)) 3083 rover = low; 3084 goto retry; 3085 } 3086 return -EADDRNOTAVAIL; 3087 } 3088 3089 /* 3090 * Check that the requested port is available. This is called when trying to 3091 * bind to a specific port, or when trying to listen on a bound port. In 3092 * the latter case, the provided id_priv may already be on the bind_list, but 3093 * we still need to check that it's okay to start listening. 3094 */ 3095 static int cma_check_port(struct rdma_bind_list *bind_list, 3096 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3097 { 3098 struct rdma_id_private *cur_id; 3099 struct sockaddr *addr, *cur_addr; 3100 3101 addr = cma_src_addr(id_priv); 3102 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3103 if (id_priv == cur_id) 3104 continue; 3105 3106 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3107 cur_id->reuseaddr) 3108 continue; 3109 3110 cur_addr = cma_src_addr(cur_id); 3111 if (id_priv->afonly && cur_id->afonly && 3112 (addr->sa_family != cur_addr->sa_family)) 3113 continue; 3114 3115 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3116 return -EADDRNOTAVAIL; 3117 3118 if (!cma_addr_cmp(addr, cur_addr)) 3119 return -EADDRINUSE; 3120 } 3121 return 0; 3122 } 3123 3124 static int cma_use_port(enum rdma_port_space ps, 3125 struct rdma_id_private *id_priv) 3126 { 3127 struct rdma_bind_list *bind_list; 3128 unsigned short snum; 3129 int ret; 3130 3131 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3132 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 3133 return -EACCES; 3134 3135 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3136 if (!bind_list) { 3137 ret = cma_alloc_port(ps, id_priv, snum); 3138 } else { 3139 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3140 if (!ret) 3141 cma_bind_port(bind_list, id_priv); 3142 } 3143 return ret; 3144 } 3145 3146 static int cma_bind_listen(struct rdma_id_private *id_priv) 3147 { 3148 struct rdma_bind_list *bind_list = id_priv->bind_list; 3149 int ret = 0; 3150 3151 mutex_lock(&lock); 3152 if (bind_list->owners.first->next) 3153 ret = cma_check_port(bind_list, id_priv, 0); 3154 mutex_unlock(&lock); 3155 return ret; 3156 } 3157 3158 static enum rdma_port_space cma_select_inet_ps( 3159 struct rdma_id_private *id_priv) 3160 { 3161 switch (id_priv->id.ps) { 3162 case RDMA_PS_TCP: 3163 case RDMA_PS_UDP: 3164 case RDMA_PS_IPOIB: 3165 case RDMA_PS_IB: 3166 return id_priv->id.ps; 3167 default: 3168 3169 return 0; 3170 } 3171 } 3172 3173 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3174 { 3175 enum rdma_port_space ps = 0; 3176 struct sockaddr_ib *sib; 3177 u64 sid_ps, mask, sid; 3178 3179 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3180 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3181 sid = be64_to_cpu(sib->sib_sid) & mask; 3182 3183 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3184 sid_ps = RDMA_IB_IP_PS_IB; 3185 ps = RDMA_PS_IB; 3186 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3187 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3188 sid_ps = RDMA_IB_IP_PS_TCP; 3189 ps = RDMA_PS_TCP; 3190 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3191 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3192 sid_ps = RDMA_IB_IP_PS_UDP; 3193 ps = RDMA_PS_UDP; 3194 } 3195 3196 if (ps) { 3197 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3198 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3199 be64_to_cpu(sib->sib_sid_mask)); 3200 } 3201 return ps; 3202 } 3203 3204 static int cma_get_port(struct rdma_id_private *id_priv) 3205 { 3206 enum rdma_port_space ps; 3207 int ret; 3208 3209 if (cma_family(id_priv) != AF_IB) 3210 ps = cma_select_inet_ps(id_priv); 3211 else 3212 ps = cma_select_ib_ps(id_priv); 3213 if (!ps) 3214 return -EPROTONOSUPPORT; 3215 3216 mutex_lock(&lock); 3217 if (cma_any_port(cma_src_addr(id_priv))) 3218 ret = cma_alloc_any_port(ps, id_priv); 3219 else 3220 ret = cma_use_port(ps, id_priv); 3221 mutex_unlock(&lock); 3222 3223 return ret; 3224 } 3225 3226 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3227 struct sockaddr *addr) 3228 { 3229 #if IS_ENABLED(CONFIG_IPV6) 3230 struct sockaddr_in6 *sin6; 3231 3232 if (addr->sa_family != AF_INET6) 3233 return 0; 3234 3235 sin6 = (struct sockaddr_in6 *) addr; 3236 3237 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 3238 return 0; 3239 3240 if (!sin6->sin6_scope_id) 3241 return -EINVAL; 3242 3243 dev_addr->bound_dev_if = sin6->sin6_scope_id; 3244 #endif 3245 return 0; 3246 } 3247 3248 int rdma_listen(struct rdma_cm_id *id, int backlog) 3249 { 3250 struct rdma_id_private *id_priv; 3251 int ret; 3252 3253 id_priv = container_of(id, struct rdma_id_private, id); 3254 if (id_priv->state == RDMA_CM_IDLE) { 3255 id->route.addr.src_addr.ss_family = AF_INET; 3256 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3257 if (ret) 3258 return ret; 3259 } 3260 3261 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3262 return -EINVAL; 3263 3264 if (id_priv->reuseaddr) { 3265 ret = cma_bind_listen(id_priv); 3266 if (ret) 3267 goto err; 3268 } 3269 3270 id_priv->backlog = backlog; 3271 if (id->device) { 3272 if (rdma_cap_ib_cm(id->device, 1)) { 3273 ret = cma_ib_listen(id_priv); 3274 if (ret) 3275 goto err; 3276 } else if (rdma_cap_iw_cm(id->device, 1)) { 3277 ret = cma_iw_listen(id_priv, backlog); 3278 if (ret) 3279 goto err; 3280 } else { 3281 ret = -ENOSYS; 3282 goto err; 3283 } 3284 } else 3285 cma_listen_on_all(id_priv); 3286 3287 return 0; 3288 err: 3289 id_priv->backlog = 0; 3290 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3291 return ret; 3292 } 3293 EXPORT_SYMBOL(rdma_listen); 3294 3295 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3296 { 3297 struct rdma_id_private *id_priv; 3298 int ret; 3299 struct sockaddr *daddr; 3300 3301 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3302 addr->sa_family != AF_IB) 3303 return -EAFNOSUPPORT; 3304 3305 id_priv = container_of(id, struct rdma_id_private, id); 3306 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3307 return -EINVAL; 3308 3309 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3310 if (ret) 3311 goto err1; 3312 3313 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3314 if (!cma_any_addr(addr)) { 3315 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3316 if (ret) 3317 goto err1; 3318 3319 ret = cma_acquire_dev(id_priv, NULL); 3320 if (ret) 3321 goto err1; 3322 } 3323 3324 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3325 if (addr->sa_family == AF_INET) 3326 id_priv->afonly = 1; 3327 #if IS_ENABLED(CONFIG_IPV6) 3328 else if (addr->sa_family == AF_INET6) { 3329 struct net *net = id_priv->id.route.addr.dev_addr.net; 3330 3331 id_priv->afonly = net->ipv6.sysctl.bindv6only; 3332 } 3333 #endif 3334 } 3335 ret = cma_get_port(id_priv); 3336 if (ret) 3337 goto err2; 3338 3339 daddr = cma_dst_addr(id_priv); 3340 daddr->sa_family = addr->sa_family; 3341 3342 return 0; 3343 err2: 3344 if (id_priv->cma_dev) 3345 cma_release_dev(id_priv); 3346 err1: 3347 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3348 return ret; 3349 } 3350 EXPORT_SYMBOL(rdma_bind_addr); 3351 3352 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3353 { 3354 struct cma_hdr *cma_hdr; 3355 3356 cma_hdr = hdr; 3357 cma_hdr->cma_version = CMA_VERSION; 3358 if (cma_family(id_priv) == AF_INET) { 3359 struct sockaddr_in *src4, *dst4; 3360 3361 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3362 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3363 3364 cma_set_ip_ver(cma_hdr, 4); 3365 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3366 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3367 cma_hdr->port = src4->sin_port; 3368 } else if (cma_family(id_priv) == AF_INET6) { 3369 struct sockaddr_in6 *src6, *dst6; 3370 3371 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3372 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3373 3374 cma_set_ip_ver(cma_hdr, 6); 3375 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3376 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3377 cma_hdr->port = src6->sin6_port; 3378 } 3379 return 0; 3380 } 3381 3382 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3383 struct ib_cm_event *ib_event) 3384 { 3385 struct rdma_id_private *id_priv = cm_id->context; 3386 struct rdma_cm_event event; 3387 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3388 int ret = 0; 3389 3390 mutex_lock(&id_priv->handler_mutex); 3391 if (id_priv->state != RDMA_CM_CONNECT) 3392 goto out; 3393 3394 memset(&event, 0, sizeof event); 3395 switch (ib_event->event) { 3396 case IB_CM_SIDR_REQ_ERROR: 3397 event.event = RDMA_CM_EVENT_UNREACHABLE; 3398 event.status = -ETIMEDOUT; 3399 break; 3400 case IB_CM_SIDR_REP_RECEIVED: 3401 event.param.ud.private_data = ib_event->private_data; 3402 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3403 if (rep->status != IB_SIDR_SUCCESS) { 3404 event.event = RDMA_CM_EVENT_UNREACHABLE; 3405 event.status = ib_event->param.sidr_rep_rcvd.status; 3406 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", 3407 event.status); 3408 break; 3409 } 3410 ret = cma_set_qkey(id_priv, rep->qkey); 3411 if (ret) { 3412 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); 3413 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3414 event.status = ret; 3415 break; 3416 } 3417 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, 3418 id_priv->id.route.path_rec, 3419 &event.param.ud.ah_attr); 3420 event.param.ud.qp_num = rep->qpn; 3421 event.param.ud.qkey = rep->qkey; 3422 event.event = RDMA_CM_EVENT_ESTABLISHED; 3423 event.status = 0; 3424 break; 3425 default: 3426 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3427 ib_event->event); 3428 goto out; 3429 } 3430 3431 ret = id_priv->id.event_handler(&id_priv->id, &event); 3432 if (ret) { 3433 /* Destroy the CM ID by returning a non-zero value. */ 3434 id_priv->cm_id.ib = NULL; 3435 cma_exch(id_priv, RDMA_CM_DESTROYING); 3436 mutex_unlock(&id_priv->handler_mutex); 3437 rdma_destroy_id(&id_priv->id); 3438 return ret; 3439 } 3440 out: 3441 mutex_unlock(&id_priv->handler_mutex); 3442 return ret; 3443 } 3444 3445 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3446 struct rdma_conn_param *conn_param) 3447 { 3448 struct ib_cm_sidr_req_param req; 3449 struct ib_cm_id *id; 3450 void *private_data; 3451 int offset, ret; 3452 3453 memset(&req, 0, sizeof req); 3454 offset = cma_user_data_offset(id_priv); 3455 req.private_data_len = offset + conn_param->private_data_len; 3456 if (req.private_data_len < conn_param->private_data_len) 3457 return -EINVAL; 3458 3459 if (req.private_data_len) { 3460 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3461 if (!private_data) 3462 return -ENOMEM; 3463 } else { 3464 private_data = NULL; 3465 } 3466 3467 if (conn_param->private_data && conn_param->private_data_len) 3468 memcpy(private_data + offset, conn_param->private_data, 3469 conn_param->private_data_len); 3470 3471 if (private_data) { 3472 ret = cma_format_hdr(private_data, id_priv); 3473 if (ret) 3474 goto out; 3475 req.private_data = private_data; 3476 } 3477 3478 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3479 id_priv); 3480 if (IS_ERR(id)) { 3481 ret = PTR_ERR(id); 3482 goto out; 3483 } 3484 id_priv->cm_id.ib = id; 3485 3486 req.path = id_priv->id.route.path_rec; 3487 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3488 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3489 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3490 3491 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3492 if (ret) { 3493 ib_destroy_cm_id(id_priv->cm_id.ib); 3494 id_priv->cm_id.ib = NULL; 3495 } 3496 out: 3497 kfree(private_data); 3498 return ret; 3499 } 3500 3501 static int cma_connect_ib(struct rdma_id_private *id_priv, 3502 struct rdma_conn_param *conn_param) 3503 { 3504 struct ib_cm_req_param req; 3505 struct rdma_route *route; 3506 void *private_data; 3507 struct ib_cm_id *id; 3508 int offset, ret; 3509 3510 memset(&req, 0, sizeof req); 3511 offset = cma_user_data_offset(id_priv); 3512 req.private_data_len = offset + conn_param->private_data_len; 3513 if (req.private_data_len < conn_param->private_data_len) 3514 return -EINVAL; 3515 3516 if (req.private_data_len) { 3517 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3518 if (!private_data) 3519 return -ENOMEM; 3520 } else { 3521 private_data = NULL; 3522 } 3523 3524 if (conn_param->private_data && conn_param->private_data_len) 3525 memcpy(private_data + offset, conn_param->private_data, 3526 conn_param->private_data_len); 3527 3528 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3529 if (IS_ERR(id)) { 3530 ret = PTR_ERR(id); 3531 goto out; 3532 } 3533 id_priv->cm_id.ib = id; 3534 3535 route = &id_priv->id.route; 3536 if (private_data) { 3537 ret = cma_format_hdr(private_data, id_priv); 3538 if (ret) 3539 goto out; 3540 req.private_data = private_data; 3541 } 3542 3543 req.primary_path = &route->path_rec[0]; 3544 if (route->num_paths == 2) 3545 req.alternate_path = &route->path_rec[1]; 3546 3547 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3548 req.qp_num = id_priv->qp_num; 3549 req.qp_type = id_priv->id.qp_type; 3550 req.starting_psn = id_priv->seq_num; 3551 req.responder_resources = conn_param->responder_resources; 3552 req.initiator_depth = conn_param->initiator_depth; 3553 req.flow_control = conn_param->flow_control; 3554 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3555 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3556 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3557 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3558 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3559 req.srq = id_priv->srq ? 1 : 0; 3560 3561 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3562 out: 3563 if (ret && !IS_ERR(id)) { 3564 ib_destroy_cm_id(id); 3565 id_priv->cm_id.ib = NULL; 3566 } 3567 3568 kfree(private_data); 3569 return ret; 3570 } 3571 3572 static int cma_connect_iw(struct rdma_id_private *id_priv, 3573 struct rdma_conn_param *conn_param) 3574 { 3575 struct iw_cm_id *cm_id; 3576 int ret; 3577 struct iw_cm_conn_param iw_param; 3578 3579 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3580 if (IS_ERR(cm_id)) 3581 return PTR_ERR(cm_id); 3582 3583 cm_id->tos = id_priv->tos; 3584 id_priv->cm_id.iw = cm_id; 3585 3586 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3587 rdma_addr_size(cma_src_addr(id_priv))); 3588 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3589 rdma_addr_size(cma_dst_addr(id_priv))); 3590 3591 ret = cma_modify_qp_rtr(id_priv, conn_param); 3592 if (ret) 3593 goto out; 3594 3595 if (conn_param) { 3596 iw_param.ord = conn_param->initiator_depth; 3597 iw_param.ird = conn_param->responder_resources; 3598 iw_param.private_data = conn_param->private_data; 3599 iw_param.private_data_len = conn_param->private_data_len; 3600 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3601 } else { 3602 memset(&iw_param, 0, sizeof iw_param); 3603 iw_param.qpn = id_priv->qp_num; 3604 } 3605 ret = iw_cm_connect(cm_id, &iw_param); 3606 out: 3607 if (ret) { 3608 iw_destroy_cm_id(cm_id); 3609 id_priv->cm_id.iw = NULL; 3610 } 3611 return ret; 3612 } 3613 3614 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3615 { 3616 struct rdma_id_private *id_priv; 3617 int ret; 3618 3619 id_priv = container_of(id, struct rdma_id_private, id); 3620 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3621 return -EINVAL; 3622 3623 if (!id->qp) { 3624 id_priv->qp_num = conn_param->qp_num; 3625 id_priv->srq = conn_param->srq; 3626 } 3627 3628 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3629 if (id->qp_type == IB_QPT_UD) 3630 ret = cma_resolve_ib_udp(id_priv, conn_param); 3631 else 3632 ret = cma_connect_ib(id_priv, conn_param); 3633 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3634 ret = cma_connect_iw(id_priv, conn_param); 3635 else 3636 ret = -ENOSYS; 3637 if (ret) 3638 goto err; 3639 3640 return 0; 3641 err: 3642 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3643 return ret; 3644 } 3645 EXPORT_SYMBOL(rdma_connect); 3646 3647 static int cma_accept_ib(struct rdma_id_private *id_priv, 3648 struct rdma_conn_param *conn_param) 3649 { 3650 struct ib_cm_rep_param rep; 3651 int ret; 3652 3653 ret = cma_modify_qp_rtr(id_priv, conn_param); 3654 if (ret) 3655 goto out; 3656 3657 ret = cma_modify_qp_rts(id_priv, conn_param); 3658 if (ret) 3659 goto out; 3660 3661 memset(&rep, 0, sizeof rep); 3662 rep.qp_num = id_priv->qp_num; 3663 rep.starting_psn = id_priv->seq_num; 3664 rep.private_data = conn_param->private_data; 3665 rep.private_data_len = conn_param->private_data_len; 3666 rep.responder_resources = conn_param->responder_resources; 3667 rep.initiator_depth = conn_param->initiator_depth; 3668 rep.failover_accepted = 0; 3669 rep.flow_control = conn_param->flow_control; 3670 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3671 rep.srq = id_priv->srq ? 1 : 0; 3672 3673 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3674 out: 3675 return ret; 3676 } 3677 3678 static int cma_accept_iw(struct rdma_id_private *id_priv, 3679 struct rdma_conn_param *conn_param) 3680 { 3681 struct iw_cm_conn_param iw_param; 3682 int ret; 3683 3684 if (!conn_param) 3685 return -EINVAL; 3686 3687 ret = cma_modify_qp_rtr(id_priv, conn_param); 3688 if (ret) 3689 return ret; 3690 3691 iw_param.ord = conn_param->initiator_depth; 3692 iw_param.ird = conn_param->responder_resources; 3693 iw_param.private_data = conn_param->private_data; 3694 iw_param.private_data_len = conn_param->private_data_len; 3695 if (id_priv->id.qp) { 3696 iw_param.qpn = id_priv->qp_num; 3697 } else 3698 iw_param.qpn = conn_param->qp_num; 3699 3700 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3701 } 3702 3703 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3704 enum ib_cm_sidr_status status, u32 qkey, 3705 const void *private_data, int private_data_len) 3706 { 3707 struct ib_cm_sidr_rep_param rep; 3708 int ret; 3709 3710 memset(&rep, 0, sizeof rep); 3711 rep.status = status; 3712 if (status == IB_SIDR_SUCCESS) { 3713 ret = cma_set_qkey(id_priv, qkey); 3714 if (ret) 3715 return ret; 3716 rep.qp_num = id_priv->qp_num; 3717 rep.qkey = id_priv->qkey; 3718 } 3719 rep.private_data = private_data; 3720 rep.private_data_len = private_data_len; 3721 3722 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3723 } 3724 3725 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3726 { 3727 struct rdma_id_private *id_priv; 3728 int ret; 3729 3730 id_priv = container_of(id, struct rdma_id_private, id); 3731 3732 id_priv->owner = task_pid_nr(current); 3733 3734 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3735 return -EINVAL; 3736 3737 if (!id->qp && conn_param) { 3738 id_priv->qp_num = conn_param->qp_num; 3739 id_priv->srq = conn_param->srq; 3740 } 3741 3742 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3743 if (id->qp_type == IB_QPT_UD) { 3744 if (conn_param) 3745 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3746 conn_param->qkey, 3747 conn_param->private_data, 3748 conn_param->private_data_len); 3749 else 3750 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3751 0, NULL, 0); 3752 } else { 3753 if (conn_param) 3754 ret = cma_accept_ib(id_priv, conn_param); 3755 else 3756 ret = cma_rep_recv(id_priv); 3757 } 3758 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3759 ret = cma_accept_iw(id_priv, conn_param); 3760 else 3761 ret = -ENOSYS; 3762 3763 if (ret) 3764 goto reject; 3765 3766 return 0; 3767 reject: 3768 cma_modify_qp_err(id_priv); 3769 rdma_reject(id, NULL, 0); 3770 return ret; 3771 } 3772 EXPORT_SYMBOL(rdma_accept); 3773 3774 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3775 { 3776 struct rdma_id_private *id_priv; 3777 int ret; 3778 3779 id_priv = container_of(id, struct rdma_id_private, id); 3780 if (!id_priv->cm_id.ib) 3781 return -EINVAL; 3782 3783 switch (id->device->node_type) { 3784 case RDMA_NODE_IB_CA: 3785 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3786 break; 3787 default: 3788 ret = 0; 3789 break; 3790 } 3791 return ret; 3792 } 3793 EXPORT_SYMBOL(rdma_notify); 3794 3795 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3796 u8 private_data_len) 3797 { 3798 struct rdma_id_private *id_priv; 3799 int ret; 3800 3801 id_priv = container_of(id, struct rdma_id_private, id); 3802 if (!id_priv->cm_id.ib) 3803 return -EINVAL; 3804 3805 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3806 if (id->qp_type == IB_QPT_UD) 3807 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3808 private_data, private_data_len); 3809 else 3810 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3811 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3812 0, private_data, private_data_len); 3813 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3814 ret = iw_cm_reject(id_priv->cm_id.iw, 3815 private_data, private_data_len); 3816 } else 3817 ret = -ENOSYS; 3818 3819 return ret; 3820 } 3821 EXPORT_SYMBOL(rdma_reject); 3822 3823 int rdma_disconnect(struct rdma_cm_id *id) 3824 { 3825 struct rdma_id_private *id_priv; 3826 int ret; 3827 3828 id_priv = container_of(id, struct rdma_id_private, id); 3829 if (!id_priv->cm_id.ib) 3830 return -EINVAL; 3831 3832 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3833 ret = cma_modify_qp_err(id_priv); 3834 if (ret) 3835 goto out; 3836 /* Initiate or respond to a disconnect. */ 3837 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3838 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3839 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3840 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3841 } else 3842 ret = -EINVAL; 3843 3844 out: 3845 return ret; 3846 } 3847 EXPORT_SYMBOL(rdma_disconnect); 3848 3849 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3850 { 3851 struct rdma_id_private *id_priv; 3852 struct cma_multicast *mc = multicast->context; 3853 struct rdma_cm_event event; 3854 int ret = 0; 3855 3856 id_priv = mc->id_priv; 3857 mutex_lock(&id_priv->handler_mutex); 3858 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3859 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3860 goto out; 3861 3862 if (!status) 3863 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3864 else 3865 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", 3866 status); 3867 mutex_lock(&id_priv->qp_mutex); 3868 if (!status && id_priv->id.qp) { 3869 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3870 be16_to_cpu(multicast->rec.mlid)); 3871 if (status) 3872 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", 3873 status); 3874 } 3875 mutex_unlock(&id_priv->qp_mutex); 3876 3877 memset(&event, 0, sizeof event); 3878 event.status = status; 3879 event.param.ud.private_data = mc->context; 3880 if (!status) { 3881 struct rdma_dev_addr *dev_addr = 3882 &id_priv->id.route.addr.dev_addr; 3883 struct net_device *ndev = 3884 dev_get_by_index(&init_net, dev_addr->bound_dev_if); 3885 enum ib_gid_type gid_type = 3886 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3887 rdma_start_port(id_priv->cma_dev->device)]; 3888 3889 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3890 ib_init_ah_from_mcmember(id_priv->id.device, 3891 id_priv->id.port_num, &multicast->rec, 3892 ndev, gid_type, 3893 &event.param.ud.ah_attr); 3894 event.param.ud.qp_num = 0xFFFFFF; 3895 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3896 if (ndev) 3897 dev_put(ndev); 3898 } else 3899 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3900 3901 ret = id_priv->id.event_handler(&id_priv->id, &event); 3902 if (ret) { 3903 cma_exch(id_priv, RDMA_CM_DESTROYING); 3904 mutex_unlock(&id_priv->handler_mutex); 3905 rdma_destroy_id(&id_priv->id); 3906 return 0; 3907 } 3908 3909 out: 3910 mutex_unlock(&id_priv->handler_mutex); 3911 return 0; 3912 } 3913 3914 static void cma_set_mgid(struct rdma_id_private *id_priv, 3915 struct sockaddr *addr, union ib_gid *mgid) 3916 { 3917 unsigned char mc_map[MAX_ADDR_LEN]; 3918 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3919 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3920 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3921 3922 if (cma_any_addr(addr)) { 3923 memset(mgid, 0, sizeof *mgid); 3924 } else if ((addr->sa_family == AF_INET6) && 3925 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3926 0xFF10A01B)) { 3927 /* IPv6 address is an SA assigned MGID. */ 3928 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3929 } else if (addr->sa_family == AF_IB) { 3930 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3931 } else if ((addr->sa_family == AF_INET6)) { 3932 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3933 if (id_priv->id.ps == RDMA_PS_UDP) 3934 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3935 *mgid = *(union ib_gid *) (mc_map + 4); 3936 } else { 3937 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3938 if (id_priv->id.ps == RDMA_PS_UDP) 3939 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3940 *mgid = *(union ib_gid *) (mc_map + 4); 3941 } 3942 } 3943 3944 static void cma_query_sa_classport_info_cb(int status, 3945 struct ib_class_port_info *rec, 3946 void *context) 3947 { 3948 struct class_port_info_context *cb_ctx = context; 3949 3950 WARN_ON(!context); 3951 3952 if (status || !rec) { 3953 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 3954 cb_ctx->device->name, cb_ctx->port_num, status); 3955 goto out; 3956 } 3957 3958 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 3959 3960 out: 3961 complete(&cb_ctx->done); 3962 } 3963 3964 static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 3965 struct ib_class_port_info *class_port_info) 3966 { 3967 struct class_port_info_context *cb_ctx; 3968 int ret; 3969 3970 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 3971 if (!cb_ctx) 3972 return -ENOMEM; 3973 3974 cb_ctx->device = device; 3975 cb_ctx->class_port_info = class_port_info; 3976 cb_ctx->port_num = port_num; 3977 init_completion(&cb_ctx->done); 3978 3979 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 3980 CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 3981 GFP_KERNEL, cma_query_sa_classport_info_cb, 3982 cb_ctx, &cb_ctx->sa_query); 3983 if (ret < 0) { 3984 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 3985 device->name, port_num, ret); 3986 goto out; 3987 } 3988 3989 wait_for_completion(&cb_ctx->done); 3990 3991 out: 3992 kfree(cb_ctx); 3993 return ret; 3994 } 3995 3996 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3997 struct cma_multicast *mc) 3998 { 3999 struct ib_sa_mcmember_rec rec; 4000 struct ib_class_port_info class_port_info; 4001 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4002 ib_sa_comp_mask comp_mask; 4003 int ret; 4004 4005 ib_addr_get_mgid(dev_addr, &rec.mgid); 4006 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4007 &rec.mgid, &rec); 4008 if (ret) 4009 return ret; 4010 4011 ret = cma_set_qkey(id_priv, 0); 4012 if (ret) 4013 return ret; 4014 4015 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4016 rec.qkey = cpu_to_be32(id_priv->qkey); 4017 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4018 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4019 rec.join_state = mc->join_state; 4020 4021 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 4022 ret = cma_query_sa_classport_info(id_priv->id.device, 4023 id_priv->id.port_num, 4024 &class_port_info); 4025 4026 if (ret) 4027 return ret; 4028 4029 if (!(ib_get_cpi_capmask2(&class_port_info) & 4030 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 4031 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4032 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4033 id_priv->id.device->name, id_priv->id.port_num); 4034 return -EOPNOTSUPP; 4035 } 4036 } 4037 4038 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4039 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4040 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4041 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4042 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4043 4044 if (id_priv->id.ps == RDMA_PS_IPOIB) 4045 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4046 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4047 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4048 IB_SA_MCMEMBER_REC_MTU | 4049 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4050 4051 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4052 id_priv->id.port_num, &rec, 4053 comp_mask, GFP_KERNEL, 4054 cma_ib_mc_handler, mc); 4055 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4056 } 4057 4058 static void iboe_mcast_work_handler(struct work_struct *work) 4059 { 4060 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4061 struct cma_multicast *mc = mw->mc; 4062 struct ib_sa_multicast *m = mc->multicast.ib; 4063 4064 mc->multicast.ib->context = mc; 4065 cma_ib_mc_handler(0, m); 4066 kref_put(&mc->mcref, release_mc); 4067 kfree(mw); 4068 } 4069 4070 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) 4071 { 4072 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4073 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4074 4075 if (cma_any_addr(addr)) { 4076 memset(mgid, 0, sizeof *mgid); 4077 } else if (addr->sa_family == AF_INET6) { 4078 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4079 } else { 4080 mgid->raw[0] = 0xff; 4081 mgid->raw[1] = 0x0e; 4082 mgid->raw[2] = 0; 4083 mgid->raw[3] = 0; 4084 mgid->raw[4] = 0; 4085 mgid->raw[5] = 0; 4086 mgid->raw[6] = 0; 4087 mgid->raw[7] = 0; 4088 mgid->raw[8] = 0; 4089 mgid->raw[9] = 0; 4090 mgid->raw[10] = 0xff; 4091 mgid->raw[11] = 0xff; 4092 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4093 } 4094 } 4095 4096 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4097 struct cma_multicast *mc) 4098 { 4099 struct iboe_mcast_work *work; 4100 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4101 int err = 0; 4102 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4103 struct net_device *ndev = NULL; 4104 enum ib_gid_type gid_type; 4105 bool send_only; 4106 4107 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4108 4109 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4110 return -EINVAL; 4111 4112 work = kzalloc(sizeof *work, GFP_KERNEL); 4113 if (!work) 4114 return -ENOMEM; 4115 4116 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4117 if (!mc->multicast.ib) { 4118 err = -ENOMEM; 4119 goto out1; 4120 } 4121 4122 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); 4123 4124 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4125 if (id_priv->id.ps == RDMA_PS_UDP) 4126 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4127 4128 if (dev_addr->bound_dev_if) 4129 ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 4130 if (!ndev) { 4131 err = -ENODEV; 4132 goto out2; 4133 } 4134 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4135 mc->multicast.ib->rec.hop_limit = 1; 4136 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); 4137 4138 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4139 rdma_start_port(id_priv->cma_dev->device)]; 4140 if (addr->sa_family == AF_INET) { 4141 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4142 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4143 if (!send_only) { 4144 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4145 true); 4146 if (!err) 4147 mc->igmp_joined = true; 4148 } 4149 } 4150 } else { 4151 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4152 err = -ENOTSUPP; 4153 } 4154 dev_put(ndev); 4155 if (err || !mc->multicast.ib->rec.mtu) { 4156 if (!err) 4157 err = -EINVAL; 4158 goto out2; 4159 } 4160 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4161 &mc->multicast.ib->rec.port_gid); 4162 work->id = id_priv; 4163 work->mc = mc; 4164 INIT_WORK(&work->work, iboe_mcast_work_handler); 4165 kref_get(&mc->mcref); 4166 queue_work(cma_wq, &work->work); 4167 4168 return 0; 4169 4170 out2: 4171 kfree(mc->multicast.ib); 4172 out1: 4173 kfree(work); 4174 return err; 4175 } 4176 4177 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4178 u8 join_state, void *context) 4179 { 4180 struct rdma_id_private *id_priv; 4181 struct cma_multicast *mc; 4182 int ret; 4183 4184 id_priv = container_of(id, struct rdma_id_private, id); 4185 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4186 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4187 return -EINVAL; 4188 4189 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4190 if (!mc) 4191 return -ENOMEM; 4192 4193 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4194 mc->context = context; 4195 mc->id_priv = id_priv; 4196 mc->igmp_joined = false; 4197 mc->join_state = join_state; 4198 spin_lock(&id_priv->lock); 4199 list_add(&mc->list, &id_priv->mc_list); 4200 spin_unlock(&id_priv->lock); 4201 4202 if (rdma_protocol_roce(id->device, id->port_num)) { 4203 kref_init(&mc->mcref); 4204 ret = cma_iboe_join_multicast(id_priv, mc); 4205 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4206 ret = cma_join_ib_multicast(id_priv, mc); 4207 else 4208 ret = -ENOSYS; 4209 4210 if (ret) { 4211 spin_lock_irq(&id_priv->lock); 4212 list_del(&mc->list); 4213 spin_unlock_irq(&id_priv->lock); 4214 kfree(mc); 4215 } 4216 return ret; 4217 } 4218 EXPORT_SYMBOL(rdma_join_multicast); 4219 4220 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4221 { 4222 struct rdma_id_private *id_priv; 4223 struct cma_multicast *mc; 4224 4225 id_priv = container_of(id, struct rdma_id_private, id); 4226 spin_lock_irq(&id_priv->lock); 4227 list_for_each_entry(mc, &id_priv->mc_list, list) { 4228 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4229 list_del(&mc->list); 4230 spin_unlock_irq(&id_priv->lock); 4231 4232 if (id->qp) 4233 ib_detach_mcast(id->qp, 4234 &mc->multicast.ib->rec.mgid, 4235 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4236 4237 BUG_ON(id_priv->cma_dev->device != id->device); 4238 4239 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4240 ib_sa_free_multicast(mc->multicast.ib); 4241 kfree(mc); 4242 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4243 if (mc->igmp_joined) { 4244 struct rdma_dev_addr *dev_addr = 4245 &id->route.addr.dev_addr; 4246 struct net_device *ndev = NULL; 4247 4248 if (dev_addr->bound_dev_if) 4249 ndev = dev_get_by_index(&init_net, 4250 dev_addr->bound_dev_if); 4251 if (ndev) { 4252 cma_igmp_send(ndev, 4253 &mc->multicast.ib->rec.mgid, 4254 false); 4255 dev_put(ndev); 4256 } 4257 mc->igmp_joined = false; 4258 } 4259 kref_put(&mc->mcref, release_mc); 4260 } 4261 return; 4262 } 4263 } 4264 spin_unlock_irq(&id_priv->lock); 4265 } 4266 EXPORT_SYMBOL(rdma_leave_multicast); 4267 4268 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 4269 { 4270 struct rdma_dev_addr *dev_addr; 4271 struct cma_ndev_work *work; 4272 4273 dev_addr = &id_priv->id.route.addr.dev_addr; 4274 4275 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4276 (net_eq(dev_net(ndev), dev_addr->net)) && 4277 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4278 pr_info("RDMA CM addr change for ndev %s used by id %p\n", 4279 ndev->name, &id_priv->id); 4280 work = kzalloc(sizeof *work, GFP_KERNEL); 4281 if (!work) 4282 return -ENOMEM; 4283 4284 INIT_WORK(&work->work, cma_ndev_work_handler); 4285 work->id = id_priv; 4286 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 4287 atomic_inc(&id_priv->refcount); 4288 queue_work(cma_wq, &work->work); 4289 } 4290 4291 return 0; 4292 } 4293 4294 static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 4295 void *ptr) 4296 { 4297 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4298 struct cma_device *cma_dev; 4299 struct rdma_id_private *id_priv; 4300 int ret = NOTIFY_DONE; 4301 4302 if (event != NETDEV_BONDING_FAILOVER) 4303 return NOTIFY_DONE; 4304 4305 if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) 4306 return NOTIFY_DONE; 4307 4308 mutex_lock(&lock); 4309 list_for_each_entry(cma_dev, &dev_list, list) 4310 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4311 ret = cma_netdev_change(ndev, id_priv); 4312 if (ret) 4313 goto out; 4314 } 4315 4316 out: 4317 mutex_unlock(&lock); 4318 return ret; 4319 } 4320 4321 static struct notifier_block cma_nb = { 4322 .notifier_call = cma_netdev_callback 4323 }; 4324 4325 static void cma_add_one(struct ib_device *device) 4326 { 4327 struct cma_device *cma_dev; 4328 struct rdma_id_private *id_priv; 4329 unsigned int i; 4330 unsigned long supported_gids = 0; 4331 4332 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4333 if (!cma_dev) 4334 return; 4335 4336 cma_dev->device = device; 4337 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4338 sizeof(*cma_dev->default_gid_type), 4339 GFP_KERNEL); 4340 if (!cma_dev->default_gid_type) 4341 goto free_cma_dev; 4342 4343 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4344 sizeof(*cma_dev->default_roce_tos), 4345 GFP_KERNEL); 4346 if (!cma_dev->default_roce_tos) 4347 goto free_gid_type; 4348 4349 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4350 supported_gids = roce_gid_type_mask_support(device, i); 4351 WARN_ON(!supported_gids); 4352 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4353 find_first_bit(&supported_gids, BITS_PER_LONG); 4354 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4355 } 4356 4357 init_completion(&cma_dev->comp); 4358 atomic_set(&cma_dev->refcount, 1); 4359 INIT_LIST_HEAD(&cma_dev->id_list); 4360 ib_set_client_data(device, &cma_client, cma_dev); 4361 4362 mutex_lock(&lock); 4363 list_add_tail(&cma_dev->list, &dev_list); 4364 list_for_each_entry(id_priv, &listen_any_list, list) 4365 cma_listen_on_dev(id_priv, cma_dev); 4366 mutex_unlock(&lock); 4367 4368 return; 4369 4370 free_gid_type: 4371 kfree(cma_dev->default_gid_type); 4372 4373 free_cma_dev: 4374 kfree(cma_dev); 4375 4376 return; 4377 } 4378 4379 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4380 { 4381 struct rdma_cm_event event; 4382 enum rdma_cm_state state; 4383 int ret = 0; 4384 4385 /* Record that we want to remove the device */ 4386 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4387 if (state == RDMA_CM_DESTROYING) 4388 return 0; 4389 4390 cma_cancel_operation(id_priv, state); 4391 mutex_lock(&id_priv->handler_mutex); 4392 4393 /* Check for destruction from another callback. */ 4394 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4395 goto out; 4396 4397 memset(&event, 0, sizeof event); 4398 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4399 ret = id_priv->id.event_handler(&id_priv->id, &event); 4400 out: 4401 mutex_unlock(&id_priv->handler_mutex); 4402 return ret; 4403 } 4404 4405 static void cma_process_remove(struct cma_device *cma_dev) 4406 { 4407 struct rdma_id_private *id_priv; 4408 int ret; 4409 4410 mutex_lock(&lock); 4411 while (!list_empty(&cma_dev->id_list)) { 4412 id_priv = list_entry(cma_dev->id_list.next, 4413 struct rdma_id_private, list); 4414 4415 list_del(&id_priv->listen_list); 4416 list_del_init(&id_priv->list); 4417 atomic_inc(&id_priv->refcount); 4418 mutex_unlock(&lock); 4419 4420 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4421 cma_deref_id(id_priv); 4422 if (ret) 4423 rdma_destroy_id(&id_priv->id); 4424 4425 mutex_lock(&lock); 4426 } 4427 mutex_unlock(&lock); 4428 4429 cma_deref_dev(cma_dev); 4430 wait_for_completion(&cma_dev->comp); 4431 } 4432 4433 static void cma_remove_one(struct ib_device *device, void *client_data) 4434 { 4435 struct cma_device *cma_dev = client_data; 4436 4437 if (!cma_dev) 4438 return; 4439 4440 mutex_lock(&lock); 4441 list_del(&cma_dev->list); 4442 mutex_unlock(&lock); 4443 4444 cma_process_remove(cma_dev); 4445 kfree(cma_dev->default_roce_tos); 4446 kfree(cma_dev->default_gid_type); 4447 kfree(cma_dev); 4448 } 4449 4450 static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) 4451 { 4452 struct nlmsghdr *nlh; 4453 struct rdma_cm_id_stats *id_stats; 4454 struct rdma_id_private *id_priv; 4455 struct rdma_cm_id *id = NULL; 4456 struct cma_device *cma_dev; 4457 int i_dev = 0, i_id = 0; 4458 4459 /* 4460 * We export all of the IDs as a sequence of messages. Each 4461 * ID gets its own netlink message. 4462 */ 4463 mutex_lock(&lock); 4464 4465 list_for_each_entry(cma_dev, &dev_list, list) { 4466 if (i_dev < cb->args[0]) { 4467 i_dev++; 4468 continue; 4469 } 4470 4471 i_id = 0; 4472 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4473 if (i_id < cb->args[1]) { 4474 i_id++; 4475 continue; 4476 } 4477 4478 id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, 4479 sizeof *id_stats, RDMA_NL_RDMA_CM, 4480 RDMA_NL_RDMA_CM_ID_STATS, 4481 NLM_F_MULTI); 4482 if (!id_stats) 4483 goto out; 4484 4485 memset(id_stats, 0, sizeof *id_stats); 4486 id = &id_priv->id; 4487 id_stats->node_type = id->route.addr.dev_addr.dev_type; 4488 id_stats->port_num = id->port_num; 4489 id_stats->bound_dev_if = 4490 id->route.addr.dev_addr.bound_dev_if; 4491 4492 if (ibnl_put_attr(skb, nlh, 4493 rdma_addr_size(cma_src_addr(id_priv)), 4494 cma_src_addr(id_priv), 4495 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) 4496 goto out; 4497 if (ibnl_put_attr(skb, nlh, 4498 rdma_addr_size(cma_src_addr(id_priv)), 4499 cma_dst_addr(id_priv), 4500 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) 4501 goto out; 4502 4503 id_stats->pid = id_priv->owner; 4504 id_stats->port_space = id->ps; 4505 id_stats->cm_state = id_priv->state; 4506 id_stats->qp_num = id_priv->qp_num; 4507 id_stats->qp_type = id->qp_type; 4508 4509 i_id++; 4510 } 4511 4512 cb->args[1] = 0; 4513 i_dev++; 4514 } 4515 4516 out: 4517 mutex_unlock(&lock); 4518 cb->args[0] = i_dev; 4519 cb->args[1] = i_id; 4520 4521 return skb->len; 4522 } 4523 4524 static const struct ibnl_client_cbs cma_cb_table[] = { 4525 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats, 4526 .module = THIS_MODULE }, 4527 }; 4528 4529 static int cma_init_net(struct net *net) 4530 { 4531 struct cma_pernet *pernet = cma_pernet(net); 4532 4533 idr_init(&pernet->tcp_ps); 4534 idr_init(&pernet->udp_ps); 4535 idr_init(&pernet->ipoib_ps); 4536 idr_init(&pernet->ib_ps); 4537 4538 return 0; 4539 } 4540 4541 static void cma_exit_net(struct net *net) 4542 { 4543 struct cma_pernet *pernet = cma_pernet(net); 4544 4545 idr_destroy(&pernet->tcp_ps); 4546 idr_destroy(&pernet->udp_ps); 4547 idr_destroy(&pernet->ipoib_ps); 4548 idr_destroy(&pernet->ib_ps); 4549 } 4550 4551 static struct pernet_operations cma_pernet_operations = { 4552 .init = cma_init_net, 4553 .exit = cma_exit_net, 4554 .id = &cma_pernet_id, 4555 .size = sizeof(struct cma_pernet), 4556 }; 4557 4558 static int __init cma_init(void) 4559 { 4560 int ret; 4561 4562 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4563 if (!cma_wq) 4564 return -ENOMEM; 4565 4566 ret = register_pernet_subsys(&cma_pernet_operations); 4567 if (ret) 4568 goto err_wq; 4569 4570 ib_sa_register_client(&sa_client); 4571 rdma_addr_register_client(&addr_client); 4572 register_netdevice_notifier(&cma_nb); 4573 4574 ret = ib_register_client(&cma_client); 4575 if (ret) 4576 goto err; 4577 4578 if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table), 4579 cma_cb_table)) 4580 pr_warn("RDMA CMA: failed to add netlink callback\n"); 4581 cma_configfs_init(); 4582 4583 return 0; 4584 4585 err: 4586 unregister_netdevice_notifier(&cma_nb); 4587 rdma_addr_unregister_client(&addr_client); 4588 ib_sa_unregister_client(&sa_client); 4589 err_wq: 4590 destroy_workqueue(cma_wq); 4591 return ret; 4592 } 4593 4594 static void __exit cma_cleanup(void) 4595 { 4596 cma_configfs_exit(); 4597 ibnl_remove_client(RDMA_NL_RDMA_CM); 4598 ib_unregister_client(&cma_client); 4599 unregister_netdevice_notifier(&cma_nb); 4600 rdma_addr_unregister_client(&addr_client); 4601 ib_sa_unregister_client(&sa_client); 4602 unregister_pernet_subsys(&cma_pernet_operations); 4603 destroy_workqueue(cma_wq); 4604 } 4605 4606 module_init(cma_init); 4607 module_exit(cma_cleanup); 4608