1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/completion.h> 37 #include <linux/in.h> 38 #include <linux/in6.h> 39 #include <linux/mutex.h> 40 #include <linux/random.h> 41 #include <linux/igmp.h> 42 #include <linux/idr.h> 43 #include <linux/inetdevice.h> 44 #include <linux/slab.h> 45 #include <linux/module.h> 46 #include <net/route.h> 47 48 #include <net/net_namespace.h> 49 #include <net/netns/generic.h> 50 #include <net/tcp.h> 51 #include <net/ipv6.h> 52 #include <net/ip_fib.h> 53 #include <net/ip6_route.h> 54 55 #include <rdma/rdma_cm.h> 56 #include <rdma/rdma_cm_ib.h> 57 #include <rdma/rdma_netlink.h> 58 #include <rdma/ib.h> 59 #include <rdma/ib_cache.h> 60 #include <rdma/ib_cm.h> 61 #include <rdma/ib_sa.h> 62 #include <rdma/iw_cm.h> 63 64 #include "core_priv.h" 65 #include "cma_priv.h" 66 67 MODULE_AUTHOR("Sean Hefty"); 68 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 69 MODULE_LICENSE("Dual BSD/GPL"); 70 71 #define CMA_CM_RESPONSE_TIMEOUT 20 72 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 73 #define CMA_MAX_CM_RETRIES 15 74 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 75 #define CMA_IBOE_PACKET_LIFETIME 18 76 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP 77 78 static const char * const cma_events[] = { 79 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 80 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 81 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 82 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 83 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 84 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 85 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 86 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 87 [RDMA_CM_EVENT_REJECTED] = "rejected", 88 [RDMA_CM_EVENT_ESTABLISHED] = "established", 89 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 90 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 91 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 92 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 93 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 94 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 95 }; 96 97 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 98 { 99 size_t index = event; 100 101 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 102 cma_events[index] : "unrecognized event"; 103 } 104 EXPORT_SYMBOL(rdma_event_msg); 105 106 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 107 int reason) 108 { 109 if (rdma_ib_or_roce(id->device, id->port_num)) 110 return ibcm_reject_msg(reason); 111 112 if (rdma_protocol_iwarp(id->device, id->port_num)) 113 return iwcm_reject_msg(reason); 114 115 WARN_ON_ONCE(1); 116 return "unrecognized transport"; 117 } 118 EXPORT_SYMBOL(rdma_reject_msg); 119 120 bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) 121 { 122 if (rdma_ib_or_roce(id->device, id->port_num)) 123 return reason == IB_CM_REJ_CONSUMER_DEFINED; 124 125 if (rdma_protocol_iwarp(id->device, id->port_num)) 126 return reason == -ECONNREFUSED; 127 128 WARN_ON_ONCE(1); 129 return false; 130 } 131 EXPORT_SYMBOL(rdma_is_consumer_reject); 132 133 const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 134 struct rdma_cm_event *ev, u8 *data_len) 135 { 136 const void *p; 137 138 if (rdma_is_consumer_reject(id, ev->status)) { 139 *data_len = ev->param.conn.private_data_len; 140 p = ev->param.conn.private_data; 141 } else { 142 *data_len = 0; 143 p = NULL; 144 } 145 return p; 146 } 147 EXPORT_SYMBOL(rdma_consumer_reject_data); 148 149 static void cma_add_one(struct ib_device *device); 150 static void cma_remove_one(struct ib_device *device, void *client_data); 151 152 static struct ib_client cma_client = { 153 .name = "cma", 154 .add = cma_add_one, 155 .remove = cma_remove_one 156 }; 157 158 static struct ib_sa_client sa_client; 159 static struct rdma_addr_client addr_client; 160 static LIST_HEAD(dev_list); 161 static LIST_HEAD(listen_any_list); 162 static DEFINE_MUTEX(lock); 163 static struct workqueue_struct *cma_wq; 164 static unsigned int cma_pernet_id; 165 166 struct cma_pernet { 167 struct idr tcp_ps; 168 struct idr udp_ps; 169 struct idr ipoib_ps; 170 struct idr ib_ps; 171 }; 172 173 static struct cma_pernet *cma_pernet(struct net *net) 174 { 175 return net_generic(net, cma_pernet_id); 176 } 177 178 static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps) 179 { 180 struct cma_pernet *pernet = cma_pernet(net); 181 182 switch (ps) { 183 case RDMA_PS_TCP: 184 return &pernet->tcp_ps; 185 case RDMA_PS_UDP: 186 return &pernet->udp_ps; 187 case RDMA_PS_IPOIB: 188 return &pernet->ipoib_ps; 189 case RDMA_PS_IB: 190 return &pernet->ib_ps; 191 default: 192 return NULL; 193 } 194 } 195 196 struct cma_device { 197 struct list_head list; 198 struct ib_device *device; 199 struct completion comp; 200 atomic_t refcount; 201 struct list_head id_list; 202 enum ib_gid_type *default_gid_type; 203 u8 *default_roce_tos; 204 }; 205 206 struct rdma_bind_list { 207 enum rdma_ucm_port_space ps; 208 struct hlist_head owners; 209 unsigned short port; 210 }; 211 212 struct class_port_info_context { 213 struct ib_class_port_info *class_port_info; 214 struct ib_device *device; 215 struct completion done; 216 struct ib_sa_query *sa_query; 217 u8 port_num; 218 }; 219 220 static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps, 221 struct rdma_bind_list *bind_list, int snum) 222 { 223 struct idr *idr = cma_pernet_idr(net, ps); 224 225 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 226 } 227 228 static struct rdma_bind_list *cma_ps_find(struct net *net, 229 enum rdma_ucm_port_space ps, int snum) 230 { 231 struct idr *idr = cma_pernet_idr(net, ps); 232 233 return idr_find(idr, snum); 234 } 235 236 static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps, 237 int snum) 238 { 239 struct idr *idr = cma_pernet_idr(net, ps); 240 241 idr_remove(idr, snum); 242 } 243 244 enum { 245 CMA_OPTION_AFONLY, 246 }; 247 248 void cma_ref_dev(struct cma_device *cma_dev) 249 { 250 atomic_inc(&cma_dev->refcount); 251 } 252 253 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 254 void *cookie) 255 { 256 struct cma_device *cma_dev; 257 struct cma_device *found_cma_dev = NULL; 258 259 mutex_lock(&lock); 260 261 list_for_each_entry(cma_dev, &dev_list, list) 262 if (filter(cma_dev->device, cookie)) { 263 found_cma_dev = cma_dev; 264 break; 265 } 266 267 if (found_cma_dev) 268 cma_ref_dev(found_cma_dev); 269 mutex_unlock(&lock); 270 return found_cma_dev; 271 } 272 273 int cma_get_default_gid_type(struct cma_device *cma_dev, 274 unsigned int port) 275 { 276 if (!rdma_is_port_valid(cma_dev->device, port)) 277 return -EINVAL; 278 279 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 280 } 281 282 int cma_set_default_gid_type(struct cma_device *cma_dev, 283 unsigned int port, 284 enum ib_gid_type default_gid_type) 285 { 286 unsigned long supported_gids; 287 288 if (!rdma_is_port_valid(cma_dev->device, port)) 289 return -EINVAL; 290 291 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 292 293 if (!(supported_gids & 1 << default_gid_type)) 294 return -EINVAL; 295 296 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 297 default_gid_type; 298 299 return 0; 300 } 301 302 int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port) 303 { 304 if (!rdma_is_port_valid(cma_dev->device, port)) 305 return -EINVAL; 306 307 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; 308 } 309 310 int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port, 311 u8 default_roce_tos) 312 { 313 if (!rdma_is_port_valid(cma_dev->device, port)) 314 return -EINVAL; 315 316 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = 317 default_roce_tos; 318 319 return 0; 320 } 321 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 322 { 323 return cma_dev->device; 324 } 325 326 /* 327 * Device removal can occur at anytime, so we need extra handling to 328 * serialize notifying the user of device removal with other callbacks. 329 * We do this by disabling removal notification while a callback is in process, 330 * and reporting it after the callback completes. 331 */ 332 333 struct cma_multicast { 334 struct rdma_id_private *id_priv; 335 union { 336 struct ib_sa_multicast *ib; 337 } multicast; 338 struct list_head list; 339 void *context; 340 struct sockaddr_storage addr; 341 struct kref mcref; 342 bool igmp_joined; 343 u8 join_state; 344 }; 345 346 struct cma_work { 347 struct work_struct work; 348 struct rdma_id_private *id; 349 enum rdma_cm_state old_state; 350 enum rdma_cm_state new_state; 351 struct rdma_cm_event event; 352 }; 353 354 struct cma_ndev_work { 355 struct work_struct work; 356 struct rdma_id_private *id; 357 struct rdma_cm_event event; 358 }; 359 360 struct iboe_mcast_work { 361 struct work_struct work; 362 struct rdma_id_private *id; 363 struct cma_multicast *mc; 364 }; 365 366 union cma_ip_addr { 367 struct in6_addr ip6; 368 struct { 369 __be32 pad[3]; 370 __be32 addr; 371 } ip4; 372 }; 373 374 struct cma_hdr { 375 u8 cma_version; 376 u8 ip_version; /* IP version: 7:4 */ 377 __be16 port; 378 union cma_ip_addr src_addr; 379 union cma_ip_addr dst_addr; 380 }; 381 382 #define CMA_VERSION 0x00 383 384 struct cma_req_info { 385 struct sockaddr_storage listen_addr_storage; 386 struct sockaddr_storage src_addr_storage; 387 struct ib_device *device; 388 int port; 389 union ib_gid local_gid; 390 __be64 service_id; 391 u16 pkey; 392 bool has_gid:1; 393 }; 394 395 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 396 { 397 unsigned long flags; 398 int ret; 399 400 spin_lock_irqsave(&id_priv->lock, flags); 401 ret = (id_priv->state == comp); 402 spin_unlock_irqrestore(&id_priv->lock, flags); 403 return ret; 404 } 405 406 static int cma_comp_exch(struct rdma_id_private *id_priv, 407 enum rdma_cm_state comp, enum rdma_cm_state exch) 408 { 409 unsigned long flags; 410 int ret; 411 412 spin_lock_irqsave(&id_priv->lock, flags); 413 if ((ret = (id_priv->state == comp))) 414 id_priv->state = exch; 415 spin_unlock_irqrestore(&id_priv->lock, flags); 416 return ret; 417 } 418 419 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 420 enum rdma_cm_state exch) 421 { 422 unsigned long flags; 423 enum rdma_cm_state old; 424 425 spin_lock_irqsave(&id_priv->lock, flags); 426 old = id_priv->state; 427 id_priv->state = exch; 428 spin_unlock_irqrestore(&id_priv->lock, flags); 429 return old; 430 } 431 432 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 433 { 434 return hdr->ip_version >> 4; 435 } 436 437 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 438 { 439 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 440 } 441 442 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) 443 { 444 struct in_device *in_dev = NULL; 445 446 if (ndev) { 447 rtnl_lock(); 448 in_dev = __in_dev_get_rtnl(ndev); 449 if (in_dev) { 450 if (join) 451 ip_mc_inc_group(in_dev, 452 *(__be32 *)(mgid->raw + 12)); 453 else 454 ip_mc_dec_group(in_dev, 455 *(__be32 *)(mgid->raw + 12)); 456 } 457 rtnl_unlock(); 458 } 459 return (in_dev) ? 0 : -ENODEV; 460 } 461 462 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 463 struct cma_device *cma_dev) 464 { 465 cma_ref_dev(cma_dev); 466 id_priv->cma_dev = cma_dev; 467 id_priv->gid_type = 0; 468 id_priv->id.device = cma_dev->device; 469 id_priv->id.route.addr.dev_addr.transport = 470 rdma_node_get_transport(cma_dev->device->node_type); 471 list_add_tail(&id_priv->list, &cma_dev->id_list); 472 id_priv->res.type = RDMA_RESTRACK_CM_ID; 473 rdma_restrack_add(&id_priv->res); 474 } 475 476 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 477 struct cma_device *cma_dev) 478 { 479 _cma_attach_to_dev(id_priv, cma_dev); 480 id_priv->gid_type = 481 cma_dev->default_gid_type[id_priv->id.port_num - 482 rdma_start_port(cma_dev->device)]; 483 } 484 485 void cma_deref_dev(struct cma_device *cma_dev) 486 { 487 if (atomic_dec_and_test(&cma_dev->refcount)) 488 complete(&cma_dev->comp); 489 } 490 491 static inline void release_mc(struct kref *kref) 492 { 493 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 494 495 kfree(mc->multicast.ib); 496 kfree(mc); 497 } 498 499 static void cma_release_dev(struct rdma_id_private *id_priv) 500 { 501 mutex_lock(&lock); 502 list_del(&id_priv->list); 503 cma_deref_dev(id_priv->cma_dev); 504 id_priv->cma_dev = NULL; 505 mutex_unlock(&lock); 506 } 507 508 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 509 { 510 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 511 } 512 513 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 514 { 515 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 516 } 517 518 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 519 { 520 return id_priv->id.route.addr.src_addr.ss_family; 521 } 522 523 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 524 { 525 struct ib_sa_mcmember_rec rec; 526 int ret = 0; 527 528 if (id_priv->qkey) { 529 if (qkey && id_priv->qkey != qkey) 530 return -EINVAL; 531 return 0; 532 } 533 534 if (qkey) { 535 id_priv->qkey = qkey; 536 return 0; 537 } 538 539 switch (id_priv->id.ps) { 540 case RDMA_PS_UDP: 541 case RDMA_PS_IB: 542 id_priv->qkey = RDMA_UDP_QKEY; 543 break; 544 case RDMA_PS_IPOIB: 545 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 546 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 547 id_priv->id.port_num, &rec.mgid, 548 &rec); 549 if (!ret) 550 id_priv->qkey = be32_to_cpu(rec.qkey); 551 break; 552 default: 553 break; 554 } 555 return ret; 556 } 557 558 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 559 { 560 dev_addr->dev_type = ARPHRD_INFINIBAND; 561 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 562 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 563 } 564 565 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 566 { 567 int ret; 568 569 if (addr->sa_family != AF_IB) { 570 ret = rdma_translate_ip(addr, dev_addr); 571 } else { 572 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 573 ret = 0; 574 } 575 576 return ret; 577 } 578 579 static inline int cma_validate_port(struct ib_device *device, u8 port, 580 enum ib_gid_type gid_type, 581 union ib_gid *gid, 582 struct rdma_id_private *id_priv) 583 { 584 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 585 int bound_if_index = dev_addr->bound_dev_if; 586 int dev_type = dev_addr->dev_type; 587 struct net_device *ndev = NULL; 588 int ret = -ENODEV; 589 590 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 591 return ret; 592 593 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 594 return ret; 595 596 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 597 ndev = dev_get_by_index(dev_addr->net, bound_if_index); 598 if (!ndev) 599 return ret; 600 } else { 601 gid_type = IB_GID_TYPE_IB; 602 } 603 604 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 605 ndev, NULL); 606 607 if (ndev) 608 dev_put(ndev); 609 610 return ret; 611 } 612 613 static int cma_acquire_dev(struct rdma_id_private *id_priv, 614 struct rdma_id_private *listen_id_priv) 615 { 616 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 617 struct cma_device *cma_dev; 618 union ib_gid gid, iboe_gid, *gidp; 619 int ret = -ENODEV; 620 u8 port; 621 622 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 623 id_priv->id.ps == RDMA_PS_IPOIB) 624 return -EINVAL; 625 626 mutex_lock(&lock); 627 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 628 &iboe_gid); 629 630 memcpy(&gid, dev_addr->src_dev_addr + 631 rdma_addr_gid_offset(dev_addr), sizeof gid); 632 633 if (listen_id_priv) { 634 cma_dev = listen_id_priv->cma_dev; 635 port = listen_id_priv->id.port_num; 636 gidp = rdma_protocol_roce(cma_dev->device, port) ? 637 &iboe_gid : &gid; 638 639 ret = cma_validate_port(cma_dev->device, port, 640 rdma_protocol_ib(cma_dev->device, port) ? 641 IB_GID_TYPE_IB : 642 listen_id_priv->gid_type, gidp, 643 id_priv); 644 if (!ret) { 645 id_priv->id.port_num = port; 646 goto out; 647 } 648 } 649 650 list_for_each_entry(cma_dev, &dev_list, list) { 651 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 652 if (listen_id_priv && 653 listen_id_priv->cma_dev == cma_dev && 654 listen_id_priv->id.port_num == port) 655 continue; 656 657 gidp = rdma_protocol_roce(cma_dev->device, port) ? 658 &iboe_gid : &gid; 659 660 ret = cma_validate_port(cma_dev->device, port, 661 rdma_protocol_ib(cma_dev->device, port) ? 662 IB_GID_TYPE_IB : 663 cma_dev->default_gid_type[port - 1], 664 gidp, id_priv); 665 if (!ret) { 666 id_priv->id.port_num = port; 667 goto out; 668 } 669 } 670 } 671 672 out: 673 if (!ret) 674 cma_attach_to_dev(id_priv, cma_dev); 675 676 mutex_unlock(&lock); 677 return ret; 678 } 679 680 /* 681 * Select the source IB device and address to reach the destination IB address. 682 */ 683 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 684 { 685 struct cma_device *cma_dev, *cur_dev; 686 struct sockaddr_ib *addr; 687 union ib_gid gid, sgid, *dgid; 688 u16 pkey, index; 689 u8 p; 690 enum ib_port_state port_state; 691 int i; 692 693 cma_dev = NULL; 694 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 695 dgid = (union ib_gid *) &addr->sib_addr; 696 pkey = ntohs(addr->sib_pkey); 697 698 list_for_each_entry(cur_dev, &dev_list, list) { 699 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 700 if (!rdma_cap_af_ib(cur_dev->device, p)) 701 continue; 702 703 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 704 continue; 705 706 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 707 continue; 708 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 709 &gid, NULL); 710 i++) { 711 if (!memcmp(&gid, dgid, sizeof(gid))) { 712 cma_dev = cur_dev; 713 sgid = gid; 714 id_priv->id.port_num = p; 715 goto found; 716 } 717 718 if (!cma_dev && (gid.global.subnet_prefix == 719 dgid->global.subnet_prefix) && 720 port_state == IB_PORT_ACTIVE) { 721 cma_dev = cur_dev; 722 sgid = gid; 723 id_priv->id.port_num = p; 724 } 725 } 726 } 727 } 728 729 if (!cma_dev) 730 return -ENODEV; 731 732 found: 733 cma_attach_to_dev(id_priv, cma_dev); 734 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 735 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 736 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 737 return 0; 738 } 739 740 static void cma_deref_id(struct rdma_id_private *id_priv) 741 { 742 if (atomic_dec_and_test(&id_priv->refcount)) 743 complete(&id_priv->comp); 744 } 745 746 struct rdma_cm_id *__rdma_create_id(struct net *net, 747 rdma_cm_event_handler event_handler, 748 void *context, enum rdma_ucm_port_space ps, 749 enum ib_qp_type qp_type, const char *caller) 750 { 751 struct rdma_id_private *id_priv; 752 753 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 754 if (!id_priv) 755 return ERR_PTR(-ENOMEM); 756 757 if (caller) 758 id_priv->res.kern_name = caller; 759 else 760 rdma_restrack_set_task(&id_priv->res, current); 761 id_priv->state = RDMA_CM_IDLE; 762 id_priv->id.context = context; 763 id_priv->id.event_handler = event_handler; 764 id_priv->id.ps = ps; 765 id_priv->id.qp_type = qp_type; 766 id_priv->tos_set = false; 767 spin_lock_init(&id_priv->lock); 768 mutex_init(&id_priv->qp_mutex); 769 init_completion(&id_priv->comp); 770 atomic_set(&id_priv->refcount, 1); 771 mutex_init(&id_priv->handler_mutex); 772 INIT_LIST_HEAD(&id_priv->listen_list); 773 INIT_LIST_HEAD(&id_priv->mc_list); 774 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 775 id_priv->id.route.addr.dev_addr.net = get_net(net); 776 id_priv->seq_num &= 0x00ffffff; 777 778 return &id_priv->id; 779 } 780 EXPORT_SYMBOL(__rdma_create_id); 781 782 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 783 { 784 struct ib_qp_attr qp_attr; 785 int qp_attr_mask, ret; 786 787 qp_attr.qp_state = IB_QPS_INIT; 788 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 789 if (ret) 790 return ret; 791 792 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 793 if (ret) 794 return ret; 795 796 qp_attr.qp_state = IB_QPS_RTR; 797 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 798 if (ret) 799 return ret; 800 801 qp_attr.qp_state = IB_QPS_RTS; 802 qp_attr.sq_psn = 0; 803 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 804 805 return ret; 806 } 807 808 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 809 { 810 struct ib_qp_attr qp_attr; 811 int qp_attr_mask, ret; 812 813 qp_attr.qp_state = IB_QPS_INIT; 814 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 815 if (ret) 816 return ret; 817 818 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 819 } 820 821 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 822 struct ib_qp_init_attr *qp_init_attr) 823 { 824 struct rdma_id_private *id_priv; 825 struct ib_qp *qp; 826 int ret; 827 828 id_priv = container_of(id, struct rdma_id_private, id); 829 if (id->device != pd->device) 830 return -EINVAL; 831 832 qp_init_attr->port_num = id->port_num; 833 qp = ib_create_qp(pd, qp_init_attr); 834 if (IS_ERR(qp)) 835 return PTR_ERR(qp); 836 837 if (id->qp_type == IB_QPT_UD) 838 ret = cma_init_ud_qp(id_priv, qp); 839 else 840 ret = cma_init_conn_qp(id_priv, qp); 841 if (ret) 842 goto err; 843 844 id->qp = qp; 845 id_priv->qp_num = qp->qp_num; 846 id_priv->srq = (qp->srq != NULL); 847 return 0; 848 err: 849 ib_destroy_qp(qp); 850 return ret; 851 } 852 EXPORT_SYMBOL(rdma_create_qp); 853 854 void rdma_destroy_qp(struct rdma_cm_id *id) 855 { 856 struct rdma_id_private *id_priv; 857 858 id_priv = container_of(id, struct rdma_id_private, id); 859 mutex_lock(&id_priv->qp_mutex); 860 ib_destroy_qp(id_priv->id.qp); 861 id_priv->id.qp = NULL; 862 mutex_unlock(&id_priv->qp_mutex); 863 } 864 EXPORT_SYMBOL(rdma_destroy_qp); 865 866 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 867 struct rdma_conn_param *conn_param) 868 { 869 struct ib_qp_attr qp_attr; 870 int qp_attr_mask, ret; 871 872 mutex_lock(&id_priv->qp_mutex); 873 if (!id_priv->id.qp) { 874 ret = 0; 875 goto out; 876 } 877 878 /* Need to update QP attributes from default values. */ 879 qp_attr.qp_state = IB_QPS_INIT; 880 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 881 if (ret) 882 goto out; 883 884 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 885 if (ret) 886 goto out; 887 888 qp_attr.qp_state = IB_QPS_RTR; 889 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 890 if (ret) 891 goto out; 892 893 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 894 895 if (conn_param) 896 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 897 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 898 out: 899 mutex_unlock(&id_priv->qp_mutex); 900 return ret; 901 } 902 903 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 904 struct rdma_conn_param *conn_param) 905 { 906 struct ib_qp_attr qp_attr; 907 int qp_attr_mask, ret; 908 909 mutex_lock(&id_priv->qp_mutex); 910 if (!id_priv->id.qp) { 911 ret = 0; 912 goto out; 913 } 914 915 qp_attr.qp_state = IB_QPS_RTS; 916 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 917 if (ret) 918 goto out; 919 920 if (conn_param) 921 qp_attr.max_rd_atomic = conn_param->initiator_depth; 922 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 923 out: 924 mutex_unlock(&id_priv->qp_mutex); 925 return ret; 926 } 927 928 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 929 { 930 struct ib_qp_attr qp_attr; 931 int ret; 932 933 mutex_lock(&id_priv->qp_mutex); 934 if (!id_priv->id.qp) { 935 ret = 0; 936 goto out; 937 } 938 939 qp_attr.qp_state = IB_QPS_ERR; 940 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 941 out: 942 mutex_unlock(&id_priv->qp_mutex); 943 return ret; 944 } 945 946 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 947 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 948 { 949 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 950 int ret; 951 u16 pkey; 952 953 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 954 pkey = 0xffff; 955 else 956 pkey = ib_addr_get_pkey(dev_addr); 957 958 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 959 pkey, &qp_attr->pkey_index); 960 if (ret) 961 return ret; 962 963 qp_attr->port_num = id_priv->id.port_num; 964 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 965 966 if (id_priv->id.qp_type == IB_QPT_UD) { 967 ret = cma_set_qkey(id_priv, 0); 968 if (ret) 969 return ret; 970 971 qp_attr->qkey = id_priv->qkey; 972 *qp_attr_mask |= IB_QP_QKEY; 973 } else { 974 qp_attr->qp_access_flags = 0; 975 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 976 } 977 return 0; 978 } 979 980 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 981 int *qp_attr_mask) 982 { 983 struct rdma_id_private *id_priv; 984 int ret = 0; 985 986 id_priv = container_of(id, struct rdma_id_private, id); 987 if (rdma_cap_ib_cm(id->device, id->port_num)) { 988 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 989 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 990 else 991 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 992 qp_attr_mask); 993 994 if (qp_attr->qp_state == IB_QPS_RTR) 995 qp_attr->rq_psn = id_priv->seq_num; 996 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 997 if (!id_priv->cm_id.iw) { 998 qp_attr->qp_access_flags = 0; 999 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1000 } else 1001 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1002 qp_attr_mask); 1003 qp_attr->port_num = id_priv->id.port_num; 1004 *qp_attr_mask |= IB_QP_PORT; 1005 } else 1006 ret = -ENOSYS; 1007 1008 return ret; 1009 } 1010 EXPORT_SYMBOL(rdma_init_qp_attr); 1011 1012 static inline int cma_zero_addr(struct sockaddr *addr) 1013 { 1014 switch (addr->sa_family) { 1015 case AF_INET: 1016 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1017 case AF_INET6: 1018 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1019 case AF_IB: 1020 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1021 default: 1022 return 0; 1023 } 1024 } 1025 1026 static inline int cma_loopback_addr(struct sockaddr *addr) 1027 { 1028 switch (addr->sa_family) { 1029 case AF_INET: 1030 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1031 case AF_INET6: 1032 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1033 case AF_IB: 1034 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1035 default: 1036 return 0; 1037 } 1038 } 1039 1040 static inline int cma_any_addr(struct sockaddr *addr) 1041 { 1042 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1043 } 1044 1045 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1046 { 1047 if (src->sa_family != dst->sa_family) 1048 return -1; 1049 1050 switch (src->sa_family) { 1051 case AF_INET: 1052 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1053 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1054 case AF_INET6: 1055 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1056 &((struct sockaddr_in6 *) dst)->sin6_addr); 1057 default: 1058 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1059 &((struct sockaddr_ib *) dst)->sib_addr); 1060 } 1061 } 1062 1063 static __be16 cma_port(struct sockaddr *addr) 1064 { 1065 struct sockaddr_ib *sib; 1066 1067 switch (addr->sa_family) { 1068 case AF_INET: 1069 return ((struct sockaddr_in *) addr)->sin_port; 1070 case AF_INET6: 1071 return ((struct sockaddr_in6 *) addr)->sin6_port; 1072 case AF_IB: 1073 sib = (struct sockaddr_ib *) addr; 1074 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1075 be64_to_cpu(sib->sib_sid_mask))); 1076 default: 1077 return 0; 1078 } 1079 } 1080 1081 static inline int cma_any_port(struct sockaddr *addr) 1082 { 1083 return !cma_port(addr); 1084 } 1085 1086 static void cma_save_ib_info(struct sockaddr *src_addr, 1087 struct sockaddr *dst_addr, 1088 struct rdma_cm_id *listen_id, 1089 struct sa_path_rec *path) 1090 { 1091 struct sockaddr_ib *listen_ib, *ib; 1092 1093 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1094 if (src_addr) { 1095 ib = (struct sockaddr_ib *)src_addr; 1096 ib->sib_family = AF_IB; 1097 if (path) { 1098 ib->sib_pkey = path->pkey; 1099 ib->sib_flowinfo = path->flow_label; 1100 memcpy(&ib->sib_addr, &path->sgid, 16); 1101 ib->sib_sid = path->service_id; 1102 ib->sib_scope_id = 0; 1103 } else { 1104 ib->sib_pkey = listen_ib->sib_pkey; 1105 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1106 ib->sib_addr = listen_ib->sib_addr; 1107 ib->sib_sid = listen_ib->sib_sid; 1108 ib->sib_scope_id = listen_ib->sib_scope_id; 1109 } 1110 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1111 } 1112 if (dst_addr) { 1113 ib = (struct sockaddr_ib *)dst_addr; 1114 ib->sib_family = AF_IB; 1115 if (path) { 1116 ib->sib_pkey = path->pkey; 1117 ib->sib_flowinfo = path->flow_label; 1118 memcpy(&ib->sib_addr, &path->dgid, 16); 1119 } 1120 } 1121 } 1122 1123 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1124 struct sockaddr_in *dst_addr, 1125 struct cma_hdr *hdr, 1126 __be16 local_port) 1127 { 1128 if (src_addr) { 1129 *src_addr = (struct sockaddr_in) { 1130 .sin_family = AF_INET, 1131 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1132 .sin_port = local_port, 1133 }; 1134 } 1135 1136 if (dst_addr) { 1137 *dst_addr = (struct sockaddr_in) { 1138 .sin_family = AF_INET, 1139 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1140 .sin_port = hdr->port, 1141 }; 1142 } 1143 } 1144 1145 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1146 struct sockaddr_in6 *dst_addr, 1147 struct cma_hdr *hdr, 1148 __be16 local_port) 1149 { 1150 if (src_addr) { 1151 *src_addr = (struct sockaddr_in6) { 1152 .sin6_family = AF_INET6, 1153 .sin6_addr = hdr->dst_addr.ip6, 1154 .sin6_port = local_port, 1155 }; 1156 } 1157 1158 if (dst_addr) { 1159 *dst_addr = (struct sockaddr_in6) { 1160 .sin6_family = AF_INET6, 1161 .sin6_addr = hdr->src_addr.ip6, 1162 .sin6_port = hdr->port, 1163 }; 1164 } 1165 } 1166 1167 static u16 cma_port_from_service_id(__be64 service_id) 1168 { 1169 return (u16)be64_to_cpu(service_id); 1170 } 1171 1172 static int cma_save_ip_info(struct sockaddr *src_addr, 1173 struct sockaddr *dst_addr, 1174 struct ib_cm_event *ib_event, 1175 __be64 service_id) 1176 { 1177 struct cma_hdr *hdr; 1178 __be16 port; 1179 1180 hdr = ib_event->private_data; 1181 if (hdr->cma_version != CMA_VERSION) 1182 return -EINVAL; 1183 1184 port = htons(cma_port_from_service_id(service_id)); 1185 1186 switch (cma_get_ip_ver(hdr)) { 1187 case 4: 1188 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1189 (struct sockaddr_in *)dst_addr, hdr, port); 1190 break; 1191 case 6: 1192 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1193 (struct sockaddr_in6 *)dst_addr, hdr, port); 1194 break; 1195 default: 1196 return -EAFNOSUPPORT; 1197 } 1198 1199 return 0; 1200 } 1201 1202 static int cma_save_net_info(struct sockaddr *src_addr, 1203 struct sockaddr *dst_addr, 1204 struct rdma_cm_id *listen_id, 1205 struct ib_cm_event *ib_event, 1206 sa_family_t sa_family, __be64 service_id) 1207 { 1208 if (sa_family == AF_IB) { 1209 if (ib_event->event == IB_CM_REQ_RECEIVED) 1210 cma_save_ib_info(src_addr, dst_addr, listen_id, 1211 ib_event->param.req_rcvd.primary_path); 1212 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1213 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1214 return 0; 1215 } 1216 1217 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1218 } 1219 1220 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1221 struct cma_req_info *req) 1222 { 1223 const struct ib_cm_req_event_param *req_param = 1224 &ib_event->param.req_rcvd; 1225 const struct ib_cm_sidr_req_event_param *sidr_param = 1226 &ib_event->param.sidr_req_rcvd; 1227 1228 switch (ib_event->event) { 1229 case IB_CM_REQ_RECEIVED: 1230 req->device = req_param->listen_id->device; 1231 req->port = req_param->port; 1232 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1233 sizeof(req->local_gid)); 1234 req->has_gid = true; 1235 req->service_id = req_param->primary_path->service_id; 1236 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1237 if (req->pkey != req_param->bth_pkey) 1238 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1239 "RDMA CMA: in the future this may cause the request to be dropped\n", 1240 req_param->bth_pkey, req->pkey); 1241 break; 1242 case IB_CM_SIDR_REQ_RECEIVED: 1243 req->device = sidr_param->listen_id->device; 1244 req->port = sidr_param->port; 1245 req->has_gid = false; 1246 req->service_id = sidr_param->service_id; 1247 req->pkey = sidr_param->pkey; 1248 if (req->pkey != sidr_param->bth_pkey) 1249 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1250 "RDMA CMA: in the future this may cause the request to be dropped\n", 1251 sidr_param->bth_pkey, req->pkey); 1252 break; 1253 default: 1254 return -EINVAL; 1255 } 1256 1257 return 0; 1258 } 1259 1260 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1261 const struct sockaddr_in *dst_addr, 1262 const struct sockaddr_in *src_addr) 1263 { 1264 __be32 daddr = dst_addr->sin_addr.s_addr, 1265 saddr = src_addr->sin_addr.s_addr; 1266 struct fib_result res; 1267 struct flowi4 fl4; 1268 int err; 1269 bool ret; 1270 1271 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1272 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1273 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1274 ipv4_is_loopback(saddr)) 1275 return false; 1276 1277 memset(&fl4, 0, sizeof(fl4)); 1278 fl4.flowi4_iif = net_dev->ifindex; 1279 fl4.daddr = daddr; 1280 fl4.saddr = saddr; 1281 1282 rcu_read_lock(); 1283 err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); 1284 ret = err == 0 && FIB_RES_DEV(res) == net_dev; 1285 rcu_read_unlock(); 1286 1287 return ret; 1288 } 1289 1290 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1291 const struct sockaddr_in6 *dst_addr, 1292 const struct sockaddr_in6 *src_addr) 1293 { 1294 #if IS_ENABLED(CONFIG_IPV6) 1295 const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & 1296 IPV6_ADDR_LINKLOCAL; 1297 struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, 1298 &src_addr->sin6_addr, net_dev->ifindex, 1299 NULL, strict); 1300 bool ret; 1301 1302 if (!rt) 1303 return false; 1304 1305 ret = rt->rt6i_idev->dev == net_dev; 1306 ip6_rt_put(rt); 1307 1308 return ret; 1309 #else 1310 return false; 1311 #endif 1312 } 1313 1314 static bool validate_net_dev(struct net_device *net_dev, 1315 const struct sockaddr *daddr, 1316 const struct sockaddr *saddr) 1317 { 1318 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1319 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1320 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1321 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1322 1323 switch (daddr->sa_family) { 1324 case AF_INET: 1325 return saddr->sa_family == AF_INET && 1326 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1327 1328 case AF_INET6: 1329 return saddr->sa_family == AF_INET6 && 1330 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1331 1332 default: 1333 return false; 1334 } 1335 } 1336 1337 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1338 struct cma_req_info *req) 1339 { 1340 struct sockaddr *listen_addr = 1341 (struct sockaddr *)&req->listen_addr_storage; 1342 struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage; 1343 struct net_device *net_dev; 1344 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1345 int err; 1346 1347 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1348 req->service_id); 1349 if (err) 1350 return ERR_PTR(err); 1351 1352 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1353 gid, listen_addr); 1354 if (!net_dev) 1355 return ERR_PTR(-ENODEV); 1356 1357 return net_dev; 1358 } 1359 1360 static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id) 1361 { 1362 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1363 } 1364 1365 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1366 const struct cma_hdr *hdr) 1367 { 1368 struct sockaddr *addr = cma_src_addr(id_priv); 1369 __be32 ip4_addr; 1370 struct in6_addr ip6_addr; 1371 1372 if (cma_any_addr(addr) && !id_priv->afonly) 1373 return true; 1374 1375 switch (addr->sa_family) { 1376 case AF_INET: 1377 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1378 if (cma_get_ip_ver(hdr) != 4) 1379 return false; 1380 if (!cma_any_addr(addr) && 1381 hdr->dst_addr.ip4.addr != ip4_addr) 1382 return false; 1383 break; 1384 case AF_INET6: 1385 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1386 if (cma_get_ip_ver(hdr) != 6) 1387 return false; 1388 if (!cma_any_addr(addr) && 1389 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1390 return false; 1391 break; 1392 case AF_IB: 1393 return true; 1394 default: 1395 return false; 1396 } 1397 1398 return true; 1399 } 1400 1401 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1402 { 1403 struct ib_device *device = id->device; 1404 const int port_num = id->port_num ?: rdma_start_port(device); 1405 1406 return rdma_protocol_roce(device, port_num); 1407 } 1408 1409 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1410 const struct net_device *net_dev, 1411 u8 port_num) 1412 { 1413 const struct rdma_addr *addr = &id->route.addr; 1414 1415 if (!net_dev) 1416 /* This request is an AF_IB request or a RoCE request */ 1417 return (!id->port_num || id->port_num == port_num) && 1418 (addr->src_addr.ss_family == AF_IB || 1419 rdma_protocol_roce(id->device, port_num)); 1420 1421 return !addr->dev_addr.bound_dev_if || 1422 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1423 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1424 } 1425 1426 static struct rdma_id_private *cma_find_listener( 1427 const struct rdma_bind_list *bind_list, 1428 const struct ib_cm_id *cm_id, 1429 const struct ib_cm_event *ib_event, 1430 const struct cma_req_info *req, 1431 const struct net_device *net_dev) 1432 { 1433 struct rdma_id_private *id_priv, *id_priv_dev; 1434 1435 if (!bind_list) 1436 return ERR_PTR(-EINVAL); 1437 1438 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1439 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1440 if (id_priv->id.device == cm_id->device && 1441 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1442 return id_priv; 1443 list_for_each_entry(id_priv_dev, 1444 &id_priv->listen_list, 1445 listen_list) { 1446 if (id_priv_dev->id.device == cm_id->device && 1447 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1448 return id_priv_dev; 1449 } 1450 } 1451 } 1452 1453 return ERR_PTR(-EINVAL); 1454 } 1455 1456 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1457 struct ib_cm_event *ib_event, 1458 struct net_device **net_dev) 1459 { 1460 struct cma_req_info req; 1461 struct rdma_bind_list *bind_list; 1462 struct rdma_id_private *id_priv; 1463 int err; 1464 1465 err = cma_save_req_info(ib_event, &req); 1466 if (err) 1467 return ERR_PTR(err); 1468 1469 *net_dev = cma_get_net_dev(ib_event, &req); 1470 if (IS_ERR(*net_dev)) { 1471 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1472 /* Assuming the protocol is AF_IB */ 1473 *net_dev = NULL; 1474 } else if (rdma_protocol_roce(req.device, req.port)) { 1475 /* TODO find the net dev matching the request parameters 1476 * through the RoCE GID table */ 1477 *net_dev = NULL; 1478 } else { 1479 return ERR_CAST(*net_dev); 1480 } 1481 } 1482 1483 /* 1484 * Net namespace might be getting deleted while route lookup, 1485 * cm_id lookup is in progress. Therefore, perform netdevice 1486 * validation, cm_id lookup under rcu lock. 1487 * RCU lock along with netdevice state check, synchronizes with 1488 * netdevice migrating to different net namespace and also avoids 1489 * case where net namespace doesn't get deleted while lookup is in 1490 * progress. 1491 * If the device state is not IFF_UP, its properties such as ifindex 1492 * and nd_net cannot be trusted to remain valid without rcu lock. 1493 * net/core/dev.c change_net_namespace() ensures to synchronize with 1494 * ongoing operations on net device after device is closed using 1495 * synchronize_net(). 1496 */ 1497 rcu_read_lock(); 1498 if (*net_dev) { 1499 /* 1500 * If netdevice is down, it is likely that it is administratively 1501 * down or it might be migrating to different namespace. 1502 * In that case avoid further processing, as the net namespace 1503 * or ifindex may change. 1504 */ 1505 if (((*net_dev)->flags & IFF_UP) == 0) { 1506 id_priv = ERR_PTR(-EHOSTUNREACH); 1507 goto err; 1508 } 1509 1510 if (!validate_net_dev(*net_dev, 1511 (struct sockaddr *)&req.listen_addr_storage, 1512 (struct sockaddr *)&req.src_addr_storage)) { 1513 id_priv = ERR_PTR(-EHOSTUNREACH); 1514 goto err; 1515 } 1516 } 1517 1518 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1519 rdma_ps_from_service_id(req.service_id), 1520 cma_port_from_service_id(req.service_id)); 1521 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1522 err: 1523 rcu_read_unlock(); 1524 if (IS_ERR(id_priv) && *net_dev) { 1525 dev_put(*net_dev); 1526 *net_dev = NULL; 1527 } 1528 return id_priv; 1529 } 1530 1531 static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) 1532 { 1533 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1534 } 1535 1536 static void cma_cancel_route(struct rdma_id_private *id_priv) 1537 { 1538 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1539 if (id_priv->query) 1540 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1541 } 1542 } 1543 1544 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1545 { 1546 struct rdma_id_private *dev_id_priv; 1547 1548 /* 1549 * Remove from listen_any_list to prevent added devices from spawning 1550 * additional listen requests. 1551 */ 1552 mutex_lock(&lock); 1553 list_del(&id_priv->list); 1554 1555 while (!list_empty(&id_priv->listen_list)) { 1556 dev_id_priv = list_entry(id_priv->listen_list.next, 1557 struct rdma_id_private, listen_list); 1558 /* sync with device removal to avoid duplicate destruction */ 1559 list_del_init(&dev_id_priv->list); 1560 list_del(&dev_id_priv->listen_list); 1561 mutex_unlock(&lock); 1562 1563 rdma_destroy_id(&dev_id_priv->id); 1564 mutex_lock(&lock); 1565 } 1566 mutex_unlock(&lock); 1567 } 1568 1569 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1570 enum rdma_cm_state state) 1571 { 1572 switch (state) { 1573 case RDMA_CM_ADDR_QUERY: 1574 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1575 break; 1576 case RDMA_CM_ROUTE_QUERY: 1577 cma_cancel_route(id_priv); 1578 break; 1579 case RDMA_CM_LISTEN: 1580 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1581 cma_cancel_listens(id_priv); 1582 break; 1583 default: 1584 break; 1585 } 1586 } 1587 1588 static void cma_release_port(struct rdma_id_private *id_priv) 1589 { 1590 struct rdma_bind_list *bind_list = id_priv->bind_list; 1591 struct net *net = id_priv->id.route.addr.dev_addr.net; 1592 1593 if (!bind_list) 1594 return; 1595 1596 mutex_lock(&lock); 1597 hlist_del(&id_priv->node); 1598 if (hlist_empty(&bind_list->owners)) { 1599 cma_ps_remove(net, bind_list->ps, bind_list->port); 1600 kfree(bind_list); 1601 } 1602 mutex_unlock(&lock); 1603 } 1604 1605 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1606 { 1607 struct cma_multicast *mc; 1608 1609 while (!list_empty(&id_priv->mc_list)) { 1610 mc = container_of(id_priv->mc_list.next, 1611 struct cma_multicast, list); 1612 list_del(&mc->list); 1613 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1614 id_priv->id.port_num)) { 1615 ib_sa_free_multicast(mc->multicast.ib); 1616 kfree(mc); 1617 } else { 1618 if (mc->igmp_joined) { 1619 struct rdma_dev_addr *dev_addr = 1620 &id_priv->id.route.addr.dev_addr; 1621 struct net_device *ndev = NULL; 1622 1623 if (dev_addr->bound_dev_if) 1624 ndev = dev_get_by_index(&init_net, 1625 dev_addr->bound_dev_if); 1626 if (ndev) { 1627 cma_igmp_send(ndev, 1628 &mc->multicast.ib->rec.mgid, 1629 false); 1630 dev_put(ndev); 1631 } 1632 } 1633 kref_put(&mc->mcref, release_mc); 1634 } 1635 } 1636 } 1637 1638 void rdma_destroy_id(struct rdma_cm_id *id) 1639 { 1640 struct rdma_id_private *id_priv; 1641 enum rdma_cm_state state; 1642 1643 id_priv = container_of(id, struct rdma_id_private, id); 1644 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1645 cma_cancel_operation(id_priv, state); 1646 1647 /* 1648 * Wait for any active callback to finish. New callbacks will find 1649 * the id_priv state set to destroying and abort. 1650 */ 1651 mutex_lock(&id_priv->handler_mutex); 1652 mutex_unlock(&id_priv->handler_mutex); 1653 1654 if (id_priv->cma_dev) { 1655 rdma_restrack_del(&id_priv->res); 1656 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1657 if (id_priv->cm_id.ib) 1658 ib_destroy_cm_id(id_priv->cm_id.ib); 1659 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1660 if (id_priv->cm_id.iw) 1661 iw_destroy_cm_id(id_priv->cm_id.iw); 1662 } 1663 cma_leave_mc_groups(id_priv); 1664 cma_release_dev(id_priv); 1665 } 1666 1667 cma_release_port(id_priv); 1668 cma_deref_id(id_priv); 1669 wait_for_completion(&id_priv->comp); 1670 1671 if (id_priv->internal_id) 1672 cma_deref_id(id_priv->id.context); 1673 1674 kfree(id_priv->id.route.path_rec); 1675 put_net(id_priv->id.route.addr.dev_addr.net); 1676 kfree(id_priv); 1677 } 1678 EXPORT_SYMBOL(rdma_destroy_id); 1679 1680 static int cma_rep_recv(struct rdma_id_private *id_priv) 1681 { 1682 int ret; 1683 1684 ret = cma_modify_qp_rtr(id_priv, NULL); 1685 if (ret) 1686 goto reject; 1687 1688 ret = cma_modify_qp_rts(id_priv, NULL); 1689 if (ret) 1690 goto reject; 1691 1692 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1693 if (ret) 1694 goto reject; 1695 1696 return 0; 1697 reject: 1698 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); 1699 cma_modify_qp_err(id_priv); 1700 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1701 NULL, 0, NULL, 0); 1702 return ret; 1703 } 1704 1705 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1706 struct ib_cm_rep_event_param *rep_data, 1707 void *private_data) 1708 { 1709 event->param.conn.private_data = private_data; 1710 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1711 event->param.conn.responder_resources = rep_data->responder_resources; 1712 event->param.conn.initiator_depth = rep_data->initiator_depth; 1713 event->param.conn.flow_control = rep_data->flow_control; 1714 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1715 event->param.conn.srq = rep_data->srq; 1716 event->param.conn.qp_num = rep_data->remote_qpn; 1717 } 1718 1719 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1720 { 1721 struct rdma_id_private *id_priv = cm_id->context; 1722 struct rdma_cm_event event; 1723 int ret = 0; 1724 1725 mutex_lock(&id_priv->handler_mutex); 1726 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1727 id_priv->state != RDMA_CM_CONNECT) || 1728 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1729 id_priv->state != RDMA_CM_DISCONNECT)) 1730 goto out; 1731 1732 memset(&event, 0, sizeof event); 1733 switch (ib_event->event) { 1734 case IB_CM_REQ_ERROR: 1735 case IB_CM_REP_ERROR: 1736 event.event = RDMA_CM_EVENT_UNREACHABLE; 1737 event.status = -ETIMEDOUT; 1738 break; 1739 case IB_CM_REP_RECEIVED: 1740 if (cma_comp(id_priv, RDMA_CM_CONNECT) && 1741 (id_priv->id.qp_type != IB_QPT_UD)) 1742 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1743 if (id_priv->id.qp) { 1744 event.status = cma_rep_recv(id_priv); 1745 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1746 RDMA_CM_EVENT_ESTABLISHED; 1747 } else { 1748 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1749 } 1750 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1751 ib_event->private_data); 1752 break; 1753 case IB_CM_RTU_RECEIVED: 1754 case IB_CM_USER_ESTABLISHED: 1755 event.event = RDMA_CM_EVENT_ESTABLISHED; 1756 break; 1757 case IB_CM_DREQ_ERROR: 1758 event.status = -ETIMEDOUT; /* fall through */ 1759 case IB_CM_DREQ_RECEIVED: 1760 case IB_CM_DREP_RECEIVED: 1761 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1762 RDMA_CM_DISCONNECT)) 1763 goto out; 1764 event.event = RDMA_CM_EVENT_DISCONNECTED; 1765 break; 1766 case IB_CM_TIMEWAIT_EXIT: 1767 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1768 break; 1769 case IB_CM_MRA_RECEIVED: 1770 /* ignore event */ 1771 goto out; 1772 case IB_CM_REJ_RECEIVED: 1773 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, 1774 ib_event->param.rej_rcvd.reason)); 1775 cma_modify_qp_err(id_priv); 1776 event.status = ib_event->param.rej_rcvd.reason; 1777 event.event = RDMA_CM_EVENT_REJECTED; 1778 event.param.conn.private_data = ib_event->private_data; 1779 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1780 break; 1781 default: 1782 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1783 ib_event->event); 1784 goto out; 1785 } 1786 1787 ret = id_priv->id.event_handler(&id_priv->id, &event); 1788 if (ret) { 1789 /* Destroy the CM ID by returning a non-zero value. */ 1790 id_priv->cm_id.ib = NULL; 1791 cma_exch(id_priv, RDMA_CM_DESTROYING); 1792 mutex_unlock(&id_priv->handler_mutex); 1793 rdma_destroy_id(&id_priv->id); 1794 return ret; 1795 } 1796 out: 1797 mutex_unlock(&id_priv->handler_mutex); 1798 return ret; 1799 } 1800 1801 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1802 struct ib_cm_event *ib_event, 1803 struct net_device *net_dev) 1804 { 1805 struct rdma_id_private *listen_id_priv; 1806 struct rdma_id_private *id_priv; 1807 struct rdma_cm_id *id; 1808 struct rdma_route *rt; 1809 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1810 struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; 1811 const __be64 service_id = 1812 ib_event->param.req_rcvd.primary_path->service_id; 1813 int ret; 1814 1815 listen_id_priv = container_of(listen_id, struct rdma_id_private, id); 1816 id = __rdma_create_id(listen_id->route.addr.dev_addr.net, 1817 listen_id->event_handler, listen_id->context, 1818 listen_id->ps, ib_event->param.req_rcvd.qp_type, 1819 listen_id_priv->res.kern_name); 1820 if (IS_ERR(id)) 1821 return NULL; 1822 1823 id_priv = container_of(id, struct rdma_id_private, id); 1824 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1825 (struct sockaddr *)&id->route.addr.dst_addr, 1826 listen_id, ib_event, ss_family, service_id)) 1827 goto err; 1828 1829 rt = &id->route; 1830 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1831 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1832 GFP_KERNEL); 1833 if (!rt->path_rec) 1834 goto err; 1835 1836 rt->path_rec[0] = *path; 1837 if (rt->num_paths == 2) 1838 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1839 1840 if (net_dev) { 1841 rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1842 } else { 1843 if (!cma_protocol_roce(listen_id) && 1844 cma_any_addr(cma_src_addr(id_priv))) { 1845 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1846 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1847 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1848 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1849 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1850 if (ret) 1851 goto err; 1852 } 1853 } 1854 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1855 1856 id_priv->state = RDMA_CM_CONNECT; 1857 return id_priv; 1858 1859 err: 1860 rdma_destroy_id(id); 1861 return NULL; 1862 } 1863 1864 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1865 struct ib_cm_event *ib_event, 1866 struct net_device *net_dev) 1867 { 1868 struct rdma_id_private *listen_id_priv; 1869 struct rdma_id_private *id_priv; 1870 struct rdma_cm_id *id; 1871 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1872 struct net *net = listen_id->route.addr.dev_addr.net; 1873 int ret; 1874 1875 listen_id_priv = container_of(listen_id, struct rdma_id_private, id); 1876 id = __rdma_create_id(net, listen_id->event_handler, listen_id->context, 1877 listen_id->ps, IB_QPT_UD, 1878 listen_id_priv->res.kern_name); 1879 if (IS_ERR(id)) 1880 return NULL; 1881 1882 id_priv = container_of(id, struct rdma_id_private, id); 1883 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1884 (struct sockaddr *)&id->route.addr.dst_addr, 1885 listen_id, ib_event, ss_family, 1886 ib_event->param.sidr_req_rcvd.service_id)) 1887 goto err; 1888 1889 if (net_dev) { 1890 rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1891 } else { 1892 if (!cma_any_addr(cma_src_addr(id_priv))) { 1893 ret = cma_translate_addr(cma_src_addr(id_priv), 1894 &id->route.addr.dev_addr); 1895 if (ret) 1896 goto err; 1897 } 1898 } 1899 1900 id_priv->state = RDMA_CM_CONNECT; 1901 return id_priv; 1902 err: 1903 rdma_destroy_id(id); 1904 return NULL; 1905 } 1906 1907 static void cma_set_req_event_data(struct rdma_cm_event *event, 1908 struct ib_cm_req_event_param *req_data, 1909 void *private_data, int offset) 1910 { 1911 event->param.conn.private_data = private_data + offset; 1912 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1913 event->param.conn.responder_resources = req_data->responder_resources; 1914 event->param.conn.initiator_depth = req_data->initiator_depth; 1915 event->param.conn.flow_control = req_data->flow_control; 1916 event->param.conn.retry_count = req_data->retry_count; 1917 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1918 event->param.conn.srq = req_data->srq; 1919 event->param.conn.qp_num = req_data->remote_qpn; 1920 } 1921 1922 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1923 { 1924 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1925 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1926 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1927 (id->qp_type == IB_QPT_UD)) || 1928 (!id->qp_type)); 1929 } 1930 1931 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1932 { 1933 struct rdma_id_private *listen_id, *conn_id = NULL; 1934 struct rdma_cm_event event; 1935 struct net_device *net_dev; 1936 u8 offset; 1937 int ret; 1938 1939 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1940 if (IS_ERR(listen_id)) 1941 return PTR_ERR(listen_id); 1942 1943 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1944 ret = -EINVAL; 1945 goto net_dev_put; 1946 } 1947 1948 mutex_lock(&listen_id->handler_mutex); 1949 if (listen_id->state != RDMA_CM_LISTEN) { 1950 ret = -ECONNABORTED; 1951 goto err1; 1952 } 1953 1954 memset(&event, 0, sizeof event); 1955 offset = cma_user_data_offset(listen_id); 1956 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1957 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1958 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1959 event.param.ud.private_data = ib_event->private_data + offset; 1960 event.param.ud.private_data_len = 1961 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1962 } else { 1963 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1964 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1965 ib_event->private_data, offset); 1966 } 1967 if (!conn_id) { 1968 ret = -ENOMEM; 1969 goto err1; 1970 } 1971 1972 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1973 ret = cma_acquire_dev(conn_id, listen_id); 1974 if (ret) 1975 goto err2; 1976 1977 conn_id->cm_id.ib = cm_id; 1978 cm_id->context = conn_id; 1979 cm_id->cm_handler = cma_ib_handler; 1980 1981 /* 1982 * Protect against the user destroying conn_id from another thread 1983 * until we're done accessing it. 1984 */ 1985 atomic_inc(&conn_id->refcount); 1986 ret = conn_id->id.event_handler(&conn_id->id, &event); 1987 if (ret) 1988 goto err3; 1989 /* 1990 * Acquire mutex to prevent user executing rdma_destroy_id() 1991 * while we're accessing the cm_id. 1992 */ 1993 mutex_lock(&lock); 1994 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 1995 (conn_id->id.qp_type != IB_QPT_UD)) 1996 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1997 mutex_unlock(&lock); 1998 mutex_unlock(&conn_id->handler_mutex); 1999 mutex_unlock(&listen_id->handler_mutex); 2000 cma_deref_id(conn_id); 2001 if (net_dev) 2002 dev_put(net_dev); 2003 return 0; 2004 2005 err3: 2006 cma_deref_id(conn_id); 2007 /* Destroy the CM ID by returning a non-zero value. */ 2008 conn_id->cm_id.ib = NULL; 2009 err2: 2010 cma_exch(conn_id, RDMA_CM_DESTROYING); 2011 mutex_unlock(&conn_id->handler_mutex); 2012 err1: 2013 mutex_unlock(&listen_id->handler_mutex); 2014 if (conn_id) 2015 rdma_destroy_id(&conn_id->id); 2016 2017 net_dev_put: 2018 if (net_dev) 2019 dev_put(net_dev); 2020 2021 return ret; 2022 } 2023 2024 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2025 { 2026 if (addr->sa_family == AF_IB) 2027 return ((struct sockaddr_ib *) addr)->sib_sid; 2028 2029 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2030 } 2031 EXPORT_SYMBOL(rdma_get_service_id); 2032 2033 void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, 2034 union ib_gid *dgid) 2035 { 2036 struct rdma_addr *addr = &cm_id->route.addr; 2037 2038 if (!cm_id->device) { 2039 if (sgid) 2040 memset(sgid, 0, sizeof(*sgid)); 2041 if (dgid) 2042 memset(dgid, 0, sizeof(*dgid)); 2043 return; 2044 } 2045 2046 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) { 2047 if (sgid) 2048 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid); 2049 if (dgid) 2050 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid); 2051 } else { 2052 if (sgid) 2053 rdma_addr_get_sgid(&addr->dev_addr, sgid); 2054 if (dgid) 2055 rdma_addr_get_dgid(&addr->dev_addr, dgid); 2056 } 2057 } 2058 EXPORT_SYMBOL(rdma_read_gids); 2059 2060 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2061 { 2062 struct rdma_id_private *id_priv = iw_id->context; 2063 struct rdma_cm_event event; 2064 int ret = 0; 2065 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2066 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2067 2068 mutex_lock(&id_priv->handler_mutex); 2069 if (id_priv->state != RDMA_CM_CONNECT) 2070 goto out; 2071 2072 memset(&event, 0, sizeof event); 2073 switch (iw_event->event) { 2074 case IW_CM_EVENT_CLOSE: 2075 event.event = RDMA_CM_EVENT_DISCONNECTED; 2076 break; 2077 case IW_CM_EVENT_CONNECT_REPLY: 2078 memcpy(cma_src_addr(id_priv), laddr, 2079 rdma_addr_size(laddr)); 2080 memcpy(cma_dst_addr(id_priv), raddr, 2081 rdma_addr_size(raddr)); 2082 switch (iw_event->status) { 2083 case 0: 2084 event.event = RDMA_CM_EVENT_ESTABLISHED; 2085 event.param.conn.initiator_depth = iw_event->ird; 2086 event.param.conn.responder_resources = iw_event->ord; 2087 break; 2088 case -ECONNRESET: 2089 case -ECONNREFUSED: 2090 event.event = RDMA_CM_EVENT_REJECTED; 2091 break; 2092 case -ETIMEDOUT: 2093 event.event = RDMA_CM_EVENT_UNREACHABLE; 2094 break; 2095 default: 2096 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2097 break; 2098 } 2099 break; 2100 case IW_CM_EVENT_ESTABLISHED: 2101 event.event = RDMA_CM_EVENT_ESTABLISHED; 2102 event.param.conn.initiator_depth = iw_event->ird; 2103 event.param.conn.responder_resources = iw_event->ord; 2104 break; 2105 default: 2106 BUG_ON(1); 2107 } 2108 2109 event.status = iw_event->status; 2110 event.param.conn.private_data = iw_event->private_data; 2111 event.param.conn.private_data_len = iw_event->private_data_len; 2112 ret = id_priv->id.event_handler(&id_priv->id, &event); 2113 if (ret) { 2114 /* Destroy the CM ID by returning a non-zero value. */ 2115 id_priv->cm_id.iw = NULL; 2116 cma_exch(id_priv, RDMA_CM_DESTROYING); 2117 mutex_unlock(&id_priv->handler_mutex); 2118 rdma_destroy_id(&id_priv->id); 2119 return ret; 2120 } 2121 2122 out: 2123 mutex_unlock(&id_priv->handler_mutex); 2124 return ret; 2125 } 2126 2127 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2128 struct iw_cm_event *iw_event) 2129 { 2130 struct rdma_cm_id *new_cm_id; 2131 struct rdma_id_private *listen_id, *conn_id; 2132 struct rdma_cm_event event; 2133 int ret = -ECONNABORTED; 2134 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2135 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2136 2137 listen_id = cm_id->context; 2138 2139 mutex_lock(&listen_id->handler_mutex); 2140 if (listen_id->state != RDMA_CM_LISTEN) 2141 goto out; 2142 2143 /* Create a new RDMA id for the new IW CM ID */ 2144 new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2145 listen_id->id.event_handler, 2146 listen_id->id.context, 2147 RDMA_PS_TCP, IB_QPT_RC, 2148 listen_id->res.kern_name); 2149 if (IS_ERR(new_cm_id)) { 2150 ret = -ENOMEM; 2151 goto out; 2152 } 2153 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2154 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2155 conn_id->state = RDMA_CM_CONNECT; 2156 2157 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2158 if (ret) { 2159 mutex_unlock(&conn_id->handler_mutex); 2160 rdma_destroy_id(new_cm_id); 2161 goto out; 2162 } 2163 2164 ret = cma_acquire_dev(conn_id, listen_id); 2165 if (ret) { 2166 mutex_unlock(&conn_id->handler_mutex); 2167 rdma_destroy_id(new_cm_id); 2168 goto out; 2169 } 2170 2171 conn_id->cm_id.iw = cm_id; 2172 cm_id->context = conn_id; 2173 cm_id->cm_handler = cma_iw_handler; 2174 2175 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2176 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2177 2178 memset(&event, 0, sizeof event); 2179 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2180 event.param.conn.private_data = iw_event->private_data; 2181 event.param.conn.private_data_len = iw_event->private_data_len; 2182 event.param.conn.initiator_depth = iw_event->ird; 2183 event.param.conn.responder_resources = iw_event->ord; 2184 2185 /* 2186 * Protect against the user destroying conn_id from another thread 2187 * until we're done accessing it. 2188 */ 2189 atomic_inc(&conn_id->refcount); 2190 ret = conn_id->id.event_handler(&conn_id->id, &event); 2191 if (ret) { 2192 /* User wants to destroy the CM ID */ 2193 conn_id->cm_id.iw = NULL; 2194 cma_exch(conn_id, RDMA_CM_DESTROYING); 2195 mutex_unlock(&conn_id->handler_mutex); 2196 cma_deref_id(conn_id); 2197 rdma_destroy_id(&conn_id->id); 2198 goto out; 2199 } 2200 2201 mutex_unlock(&conn_id->handler_mutex); 2202 cma_deref_id(conn_id); 2203 2204 out: 2205 mutex_unlock(&listen_id->handler_mutex); 2206 return ret; 2207 } 2208 2209 static int cma_ib_listen(struct rdma_id_private *id_priv) 2210 { 2211 struct sockaddr *addr; 2212 struct ib_cm_id *id; 2213 __be64 svc_id; 2214 2215 addr = cma_src_addr(id_priv); 2216 svc_id = rdma_get_service_id(&id_priv->id, addr); 2217 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2218 if (IS_ERR(id)) 2219 return PTR_ERR(id); 2220 id_priv->cm_id.ib = id; 2221 2222 return 0; 2223 } 2224 2225 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2226 { 2227 int ret; 2228 struct iw_cm_id *id; 2229 2230 id = iw_create_cm_id(id_priv->id.device, 2231 iw_conn_req_handler, 2232 id_priv); 2233 if (IS_ERR(id)) 2234 return PTR_ERR(id); 2235 2236 id->tos = id_priv->tos; 2237 id_priv->cm_id.iw = id; 2238 2239 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2240 rdma_addr_size(cma_src_addr(id_priv))); 2241 2242 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2243 2244 if (ret) { 2245 iw_destroy_cm_id(id_priv->cm_id.iw); 2246 id_priv->cm_id.iw = NULL; 2247 } 2248 2249 return ret; 2250 } 2251 2252 static int cma_listen_handler(struct rdma_cm_id *id, 2253 struct rdma_cm_event *event) 2254 { 2255 struct rdma_id_private *id_priv = id->context; 2256 2257 id->context = id_priv->id.context; 2258 id->event_handler = id_priv->id.event_handler; 2259 return id_priv->id.event_handler(id, event); 2260 } 2261 2262 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2263 struct cma_device *cma_dev) 2264 { 2265 struct rdma_id_private *dev_id_priv; 2266 struct rdma_cm_id *id; 2267 struct net *net = id_priv->id.route.addr.dev_addr.net; 2268 int ret; 2269 2270 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2271 return; 2272 2273 id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2274 id_priv->id.qp_type, id_priv->res.kern_name); 2275 if (IS_ERR(id)) 2276 return; 2277 2278 dev_id_priv = container_of(id, struct rdma_id_private, id); 2279 2280 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2281 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2282 rdma_addr_size(cma_src_addr(id_priv))); 2283 2284 _cma_attach_to_dev(dev_id_priv, cma_dev); 2285 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2286 atomic_inc(&id_priv->refcount); 2287 dev_id_priv->internal_id = 1; 2288 dev_id_priv->afonly = id_priv->afonly; 2289 2290 ret = rdma_listen(id, id_priv->backlog); 2291 if (ret) 2292 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2293 ret, cma_dev->device->name); 2294 } 2295 2296 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2297 { 2298 struct cma_device *cma_dev; 2299 2300 mutex_lock(&lock); 2301 list_add_tail(&id_priv->list, &listen_any_list); 2302 list_for_each_entry(cma_dev, &dev_list, list) 2303 cma_listen_on_dev(id_priv, cma_dev); 2304 mutex_unlock(&lock); 2305 } 2306 2307 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2308 { 2309 struct rdma_id_private *id_priv; 2310 2311 id_priv = container_of(id, struct rdma_id_private, id); 2312 id_priv->tos = (u8) tos; 2313 id_priv->tos_set = true; 2314 } 2315 EXPORT_SYMBOL(rdma_set_service_type); 2316 2317 static void cma_query_handler(int status, struct sa_path_rec *path_rec, 2318 void *context) 2319 { 2320 struct cma_work *work = context; 2321 struct rdma_route *route; 2322 2323 route = &work->id->id.route; 2324 2325 if (!status) { 2326 route->num_paths = 1; 2327 *route->path_rec = *path_rec; 2328 } else { 2329 work->old_state = RDMA_CM_ROUTE_QUERY; 2330 work->new_state = RDMA_CM_ADDR_RESOLVED; 2331 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2332 work->event.status = status; 2333 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", 2334 status); 2335 } 2336 2337 queue_work(cma_wq, &work->work); 2338 } 2339 2340 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2341 struct cma_work *work) 2342 { 2343 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2344 struct sa_path_rec path_rec; 2345 ib_sa_comp_mask comp_mask; 2346 struct sockaddr_in6 *sin6; 2347 struct sockaddr_ib *sib; 2348 2349 memset(&path_rec, 0, sizeof path_rec); 2350 2351 if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num)) 2352 path_rec.rec_type = SA_PATH_REC_TYPE_OPA; 2353 else 2354 path_rec.rec_type = SA_PATH_REC_TYPE_IB; 2355 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2356 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2357 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2358 path_rec.numb_path = 1; 2359 path_rec.reversible = 1; 2360 path_rec.service_id = rdma_get_service_id(&id_priv->id, 2361 cma_dst_addr(id_priv)); 2362 2363 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2364 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2365 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2366 2367 switch (cma_family(id_priv)) { 2368 case AF_INET: 2369 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2370 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2371 break; 2372 case AF_INET6: 2373 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2374 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2375 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2376 break; 2377 case AF_IB: 2378 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2379 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2380 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2381 break; 2382 } 2383 2384 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2385 id_priv->id.port_num, &path_rec, 2386 comp_mask, timeout_ms, 2387 GFP_KERNEL, cma_query_handler, 2388 work, &id_priv->query); 2389 2390 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2391 } 2392 2393 static void cma_work_handler(struct work_struct *_work) 2394 { 2395 struct cma_work *work = container_of(_work, struct cma_work, work); 2396 struct rdma_id_private *id_priv = work->id; 2397 int destroy = 0; 2398 2399 mutex_lock(&id_priv->handler_mutex); 2400 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2401 goto out; 2402 2403 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2404 cma_exch(id_priv, RDMA_CM_DESTROYING); 2405 destroy = 1; 2406 } 2407 out: 2408 mutex_unlock(&id_priv->handler_mutex); 2409 cma_deref_id(id_priv); 2410 if (destroy) 2411 rdma_destroy_id(&id_priv->id); 2412 kfree(work); 2413 } 2414 2415 static void cma_ndev_work_handler(struct work_struct *_work) 2416 { 2417 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 2418 struct rdma_id_private *id_priv = work->id; 2419 int destroy = 0; 2420 2421 mutex_lock(&id_priv->handler_mutex); 2422 if (id_priv->state == RDMA_CM_DESTROYING || 2423 id_priv->state == RDMA_CM_DEVICE_REMOVAL) 2424 goto out; 2425 2426 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2427 cma_exch(id_priv, RDMA_CM_DESTROYING); 2428 destroy = 1; 2429 } 2430 2431 out: 2432 mutex_unlock(&id_priv->handler_mutex); 2433 cma_deref_id(id_priv); 2434 if (destroy) 2435 rdma_destroy_id(&id_priv->id); 2436 kfree(work); 2437 } 2438 2439 static void cma_init_resolve_route_work(struct cma_work *work, 2440 struct rdma_id_private *id_priv) 2441 { 2442 work->id = id_priv; 2443 INIT_WORK(&work->work, cma_work_handler); 2444 work->old_state = RDMA_CM_ROUTE_QUERY; 2445 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2446 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2447 } 2448 2449 static void cma_init_resolve_addr_work(struct cma_work *work, 2450 struct rdma_id_private *id_priv) 2451 { 2452 work->id = id_priv; 2453 INIT_WORK(&work->work, cma_work_handler); 2454 work->old_state = RDMA_CM_ADDR_QUERY; 2455 work->new_state = RDMA_CM_ADDR_RESOLVED; 2456 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2457 } 2458 2459 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2460 { 2461 struct rdma_route *route = &id_priv->id.route; 2462 struct cma_work *work; 2463 int ret; 2464 2465 work = kzalloc(sizeof *work, GFP_KERNEL); 2466 if (!work) 2467 return -ENOMEM; 2468 2469 cma_init_resolve_route_work(work, id_priv); 2470 2471 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2472 if (!route->path_rec) { 2473 ret = -ENOMEM; 2474 goto err1; 2475 } 2476 2477 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2478 if (ret) 2479 goto err2; 2480 2481 return 0; 2482 err2: 2483 kfree(route->path_rec); 2484 route->path_rec = NULL; 2485 err1: 2486 kfree(work); 2487 return ret; 2488 } 2489 2490 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2491 unsigned long supported_gids, 2492 enum ib_gid_type default_gid) 2493 { 2494 if ((network_type == RDMA_NETWORK_IPV4 || 2495 network_type == RDMA_NETWORK_IPV6) && 2496 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2497 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2498 2499 return default_gid; 2500 } 2501 2502 /* 2503 * cma_iboe_set_path_rec_l2_fields() is helper function which sets 2504 * path record type based on GID type. 2505 * It also sets up other L2 fields which includes destination mac address 2506 * netdev ifindex, of the path record. 2507 * It returns the netdev of the bound interface for this path record entry. 2508 */ 2509 static struct net_device * 2510 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) 2511 { 2512 struct rdma_route *route = &id_priv->id.route; 2513 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; 2514 struct rdma_addr *addr = &route->addr; 2515 unsigned long supported_gids; 2516 struct net_device *ndev; 2517 2518 if (!addr->dev_addr.bound_dev_if) 2519 return NULL; 2520 2521 ndev = dev_get_by_index(addr->dev_addr.net, 2522 addr->dev_addr.bound_dev_if); 2523 if (!ndev) 2524 return NULL; 2525 2526 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2527 id_priv->id.port_num); 2528 gid_type = cma_route_gid_type(addr->dev_addr.network, 2529 supported_gids, 2530 id_priv->gid_type); 2531 /* Use the hint from IP Stack to select GID Type */ 2532 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2533 gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2534 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); 2535 2536 route->path_rec->roce.route_resolved = true; 2537 sa_path_set_ndev(route->path_rec, addr->dev_addr.net); 2538 sa_path_set_ifindex(route->path_rec, ndev->ifindex); 2539 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); 2540 return ndev; 2541 } 2542 2543 int rdma_set_ib_path(struct rdma_cm_id *id, 2544 struct sa_path_rec *path_rec) 2545 { 2546 struct rdma_id_private *id_priv; 2547 struct net_device *ndev; 2548 int ret; 2549 2550 id_priv = container_of(id, struct rdma_id_private, id); 2551 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2552 RDMA_CM_ROUTE_RESOLVED)) 2553 return -EINVAL; 2554 2555 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec), 2556 GFP_KERNEL); 2557 if (!id->route.path_rec) { 2558 ret = -ENOMEM; 2559 goto err; 2560 } 2561 2562 if (rdma_protocol_roce(id->device, id->port_num)) { 2563 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2564 if (!ndev) { 2565 ret = -ENODEV; 2566 goto err_free; 2567 } 2568 dev_put(ndev); 2569 } 2570 2571 id->route.num_paths = 1; 2572 return 0; 2573 2574 err_free: 2575 kfree(id->route.path_rec); 2576 id->route.path_rec = NULL; 2577 err: 2578 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2579 return ret; 2580 } 2581 EXPORT_SYMBOL(rdma_set_ib_path); 2582 2583 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2584 { 2585 struct cma_work *work; 2586 2587 work = kzalloc(sizeof *work, GFP_KERNEL); 2588 if (!work) 2589 return -ENOMEM; 2590 2591 cma_init_resolve_route_work(work, id_priv); 2592 queue_work(cma_wq, &work->work); 2593 return 0; 2594 } 2595 2596 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2597 { 2598 int prio; 2599 struct net_device *dev; 2600 2601 prio = rt_tos2priority(tos); 2602 dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; 2603 if (dev->num_tc) 2604 return netdev_get_prio_tc_map(dev, prio); 2605 2606 #if IS_ENABLED(CONFIG_VLAN_8021Q) 2607 if (is_vlan_dev(ndev)) 2608 return (vlan_dev_get_egress_qos_mask(ndev, prio) & 2609 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 2610 #endif 2611 return 0; 2612 } 2613 2614 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2615 { 2616 struct rdma_route *route = &id_priv->id.route; 2617 struct rdma_addr *addr = &route->addr; 2618 struct cma_work *work; 2619 int ret; 2620 struct net_device *ndev; 2621 2622 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 2623 rdma_start_port(id_priv->cma_dev->device)]; 2624 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 2625 2626 2627 work = kzalloc(sizeof *work, GFP_KERNEL); 2628 if (!work) 2629 return -ENOMEM; 2630 2631 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2632 if (!route->path_rec) { 2633 ret = -ENOMEM; 2634 goto err1; 2635 } 2636 2637 route->num_paths = 1; 2638 2639 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2640 if (!ndev) { 2641 ret = -ENODEV; 2642 goto err2; 2643 } 2644 2645 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2646 &route->path_rec->sgid); 2647 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2648 &route->path_rec->dgid); 2649 2650 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2651 /* TODO: get the hoplimit from the inet/inet6 device */ 2652 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2653 else 2654 route->path_rec->hop_limit = 1; 2655 route->path_rec->reversible = 1; 2656 route->path_rec->pkey = cpu_to_be16(0xffff); 2657 route->path_rec->mtu_selector = IB_SA_EQ; 2658 route->path_rec->sl = iboe_tos_to_sl(ndev, tos); 2659 route->path_rec->traffic_class = tos; 2660 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 2661 route->path_rec->rate_selector = IB_SA_EQ; 2662 route->path_rec->rate = iboe_get_rate(ndev); 2663 dev_put(ndev); 2664 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2665 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2666 if (!route->path_rec->mtu) { 2667 ret = -EINVAL; 2668 goto err2; 2669 } 2670 2671 cma_init_resolve_route_work(work, id_priv); 2672 queue_work(cma_wq, &work->work); 2673 2674 return 0; 2675 2676 err2: 2677 kfree(route->path_rec); 2678 route->path_rec = NULL; 2679 err1: 2680 kfree(work); 2681 return ret; 2682 } 2683 2684 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2685 { 2686 struct rdma_id_private *id_priv; 2687 int ret; 2688 2689 id_priv = container_of(id, struct rdma_id_private, id); 2690 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2691 return -EINVAL; 2692 2693 atomic_inc(&id_priv->refcount); 2694 if (rdma_cap_ib_sa(id->device, id->port_num)) 2695 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2696 else if (rdma_protocol_roce(id->device, id->port_num)) 2697 ret = cma_resolve_iboe_route(id_priv); 2698 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2699 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2700 else 2701 ret = -ENOSYS; 2702 2703 if (ret) 2704 goto err; 2705 2706 return 0; 2707 err: 2708 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2709 cma_deref_id(id_priv); 2710 return ret; 2711 } 2712 EXPORT_SYMBOL(rdma_resolve_route); 2713 2714 static void cma_set_loopback(struct sockaddr *addr) 2715 { 2716 switch (addr->sa_family) { 2717 case AF_INET: 2718 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2719 break; 2720 case AF_INET6: 2721 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2722 0, 0, 0, htonl(1)); 2723 break; 2724 default: 2725 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2726 0, 0, 0, htonl(1)); 2727 break; 2728 } 2729 } 2730 2731 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2732 { 2733 struct cma_device *cma_dev, *cur_dev; 2734 union ib_gid gid; 2735 enum ib_port_state port_state; 2736 u16 pkey; 2737 int ret; 2738 u8 p; 2739 2740 cma_dev = NULL; 2741 mutex_lock(&lock); 2742 list_for_each_entry(cur_dev, &dev_list, list) { 2743 if (cma_family(id_priv) == AF_IB && 2744 !rdma_cap_ib_cm(cur_dev->device, 1)) 2745 continue; 2746 2747 if (!cma_dev) 2748 cma_dev = cur_dev; 2749 2750 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2751 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && 2752 port_state == IB_PORT_ACTIVE) { 2753 cma_dev = cur_dev; 2754 goto port_found; 2755 } 2756 } 2757 } 2758 2759 if (!cma_dev) { 2760 ret = -ENODEV; 2761 goto out; 2762 } 2763 2764 p = 1; 2765 2766 port_found: 2767 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2768 if (ret) 2769 goto out; 2770 2771 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2772 if (ret) 2773 goto out; 2774 2775 id_priv->id.route.addr.dev_addr.dev_type = 2776 (rdma_protocol_ib(cma_dev->device, p)) ? 2777 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2778 2779 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2780 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2781 id_priv->id.port_num = p; 2782 cma_attach_to_dev(id_priv, cma_dev); 2783 cma_set_loopback(cma_src_addr(id_priv)); 2784 out: 2785 mutex_unlock(&lock); 2786 return ret; 2787 } 2788 2789 static void addr_handler(int status, struct sockaddr *src_addr, 2790 struct rdma_dev_addr *dev_addr, void *context) 2791 { 2792 struct rdma_id_private *id_priv = context; 2793 struct rdma_cm_event event; 2794 2795 memset(&event, 0, sizeof event); 2796 mutex_lock(&id_priv->handler_mutex); 2797 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2798 RDMA_CM_ADDR_RESOLVED)) 2799 goto out; 2800 2801 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2802 if (!status && !id_priv->cma_dev) { 2803 status = cma_acquire_dev(id_priv, NULL); 2804 if (status) 2805 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2806 status); 2807 } else { 2808 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); 2809 } 2810 2811 if (status) { 2812 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2813 RDMA_CM_ADDR_BOUND)) 2814 goto out; 2815 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2816 event.status = status; 2817 } else 2818 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2819 2820 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2821 cma_exch(id_priv, RDMA_CM_DESTROYING); 2822 mutex_unlock(&id_priv->handler_mutex); 2823 cma_deref_id(id_priv); 2824 rdma_destroy_id(&id_priv->id); 2825 return; 2826 } 2827 out: 2828 mutex_unlock(&id_priv->handler_mutex); 2829 cma_deref_id(id_priv); 2830 } 2831 2832 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2833 { 2834 struct cma_work *work; 2835 union ib_gid gid; 2836 int ret; 2837 2838 work = kzalloc(sizeof *work, GFP_KERNEL); 2839 if (!work) 2840 return -ENOMEM; 2841 2842 if (!id_priv->cma_dev) { 2843 ret = cma_bind_loopback(id_priv); 2844 if (ret) 2845 goto err; 2846 } 2847 2848 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2849 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2850 2851 cma_init_resolve_addr_work(work, id_priv); 2852 queue_work(cma_wq, &work->work); 2853 return 0; 2854 err: 2855 kfree(work); 2856 return ret; 2857 } 2858 2859 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2860 { 2861 struct cma_work *work; 2862 int ret; 2863 2864 work = kzalloc(sizeof *work, GFP_KERNEL); 2865 if (!work) 2866 return -ENOMEM; 2867 2868 if (!id_priv->cma_dev) { 2869 ret = cma_resolve_ib_dev(id_priv); 2870 if (ret) 2871 goto err; 2872 } 2873 2874 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2875 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2876 2877 cma_init_resolve_addr_work(work, id_priv); 2878 queue_work(cma_wq, &work->work); 2879 return 0; 2880 err: 2881 kfree(work); 2882 return ret; 2883 } 2884 2885 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2886 struct sockaddr *dst_addr) 2887 { 2888 if (!src_addr || !src_addr->sa_family) { 2889 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2890 src_addr->sa_family = dst_addr->sa_family; 2891 if (IS_ENABLED(CONFIG_IPV6) && 2892 dst_addr->sa_family == AF_INET6) { 2893 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2894 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2895 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2896 if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 2897 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2898 } else if (dst_addr->sa_family == AF_IB) { 2899 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2900 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2901 } 2902 } 2903 return rdma_bind_addr(id, src_addr); 2904 } 2905 2906 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2907 struct sockaddr *dst_addr, int timeout_ms) 2908 { 2909 struct rdma_id_private *id_priv; 2910 int ret; 2911 2912 id_priv = container_of(id, struct rdma_id_private, id); 2913 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2914 if (id_priv->state == RDMA_CM_IDLE) { 2915 ret = cma_bind_addr(id, src_addr, dst_addr); 2916 if (ret) { 2917 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2918 return ret; 2919 } 2920 } 2921 2922 if (cma_family(id_priv) != dst_addr->sa_family) { 2923 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2924 return -EINVAL; 2925 } 2926 2927 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 2928 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2929 return -EINVAL; 2930 } 2931 2932 atomic_inc(&id_priv->refcount); 2933 if (cma_any_addr(dst_addr)) { 2934 ret = cma_resolve_loopback(id_priv); 2935 } else { 2936 if (dst_addr->sa_family == AF_IB) { 2937 ret = cma_resolve_ib_addr(id_priv); 2938 } else { 2939 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2940 dst_addr, &id->route.addr.dev_addr, 2941 timeout_ms, addr_handler, id_priv); 2942 } 2943 } 2944 if (ret) 2945 goto err; 2946 2947 return 0; 2948 err: 2949 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2950 cma_deref_id(id_priv); 2951 return ret; 2952 } 2953 EXPORT_SYMBOL(rdma_resolve_addr); 2954 2955 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2956 { 2957 struct rdma_id_private *id_priv; 2958 unsigned long flags; 2959 int ret; 2960 2961 id_priv = container_of(id, struct rdma_id_private, id); 2962 spin_lock_irqsave(&id_priv->lock, flags); 2963 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2964 id_priv->reuseaddr = reuse; 2965 ret = 0; 2966 } else { 2967 ret = -EINVAL; 2968 } 2969 spin_unlock_irqrestore(&id_priv->lock, flags); 2970 return ret; 2971 } 2972 EXPORT_SYMBOL(rdma_set_reuseaddr); 2973 2974 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2975 { 2976 struct rdma_id_private *id_priv; 2977 unsigned long flags; 2978 int ret; 2979 2980 id_priv = container_of(id, struct rdma_id_private, id); 2981 spin_lock_irqsave(&id_priv->lock, flags); 2982 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2983 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2984 id_priv->afonly = afonly; 2985 ret = 0; 2986 } else { 2987 ret = -EINVAL; 2988 } 2989 spin_unlock_irqrestore(&id_priv->lock, flags); 2990 return ret; 2991 } 2992 EXPORT_SYMBOL(rdma_set_afonly); 2993 2994 static void cma_bind_port(struct rdma_bind_list *bind_list, 2995 struct rdma_id_private *id_priv) 2996 { 2997 struct sockaddr *addr; 2998 struct sockaddr_ib *sib; 2999 u64 sid, mask; 3000 __be16 port; 3001 3002 addr = cma_src_addr(id_priv); 3003 port = htons(bind_list->port); 3004 3005 switch (addr->sa_family) { 3006 case AF_INET: 3007 ((struct sockaddr_in *) addr)->sin_port = port; 3008 break; 3009 case AF_INET6: 3010 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3011 break; 3012 case AF_IB: 3013 sib = (struct sockaddr_ib *) addr; 3014 sid = be64_to_cpu(sib->sib_sid); 3015 mask = be64_to_cpu(sib->sib_sid_mask); 3016 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3017 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3018 break; 3019 } 3020 id_priv->bind_list = bind_list; 3021 hlist_add_head(&id_priv->node, &bind_list->owners); 3022 } 3023 3024 static int cma_alloc_port(enum rdma_ucm_port_space ps, 3025 struct rdma_id_private *id_priv, unsigned short snum) 3026 { 3027 struct rdma_bind_list *bind_list; 3028 int ret; 3029 3030 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3031 if (!bind_list) 3032 return -ENOMEM; 3033 3034 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3035 snum); 3036 if (ret < 0) 3037 goto err; 3038 3039 bind_list->ps = ps; 3040 bind_list->port = (unsigned short)ret; 3041 cma_bind_port(bind_list, id_priv); 3042 return 0; 3043 err: 3044 kfree(bind_list); 3045 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3046 } 3047 3048 static int cma_port_is_unique(struct rdma_bind_list *bind_list, 3049 struct rdma_id_private *id_priv) 3050 { 3051 struct rdma_id_private *cur_id; 3052 struct sockaddr *daddr = cma_dst_addr(id_priv); 3053 struct sockaddr *saddr = cma_src_addr(id_priv); 3054 __be16 dport = cma_port(daddr); 3055 3056 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3057 struct sockaddr *cur_daddr = cma_dst_addr(cur_id); 3058 struct sockaddr *cur_saddr = cma_src_addr(cur_id); 3059 __be16 cur_dport = cma_port(cur_daddr); 3060 3061 if (id_priv == cur_id) 3062 continue; 3063 3064 /* different dest port -> unique */ 3065 if (!cma_any_port(daddr) && 3066 !cma_any_port(cur_daddr) && 3067 (dport != cur_dport)) 3068 continue; 3069 3070 /* different src address -> unique */ 3071 if (!cma_any_addr(saddr) && 3072 !cma_any_addr(cur_saddr) && 3073 cma_addr_cmp(saddr, cur_saddr)) 3074 continue; 3075 3076 /* different dst address -> unique */ 3077 if (!cma_any_addr(daddr) && 3078 !cma_any_addr(cur_daddr) && 3079 cma_addr_cmp(daddr, cur_daddr)) 3080 continue; 3081 3082 return -EADDRNOTAVAIL; 3083 } 3084 return 0; 3085 } 3086 3087 static int cma_alloc_any_port(enum rdma_ucm_port_space ps, 3088 struct rdma_id_private *id_priv) 3089 { 3090 static unsigned int last_used_port; 3091 int low, high, remaining; 3092 unsigned int rover; 3093 struct net *net = id_priv->id.route.addr.dev_addr.net; 3094 3095 inet_get_local_port_range(net, &low, &high); 3096 remaining = (high - low) + 1; 3097 rover = prandom_u32() % remaining + low; 3098 retry: 3099 if (last_used_port != rover) { 3100 struct rdma_bind_list *bind_list; 3101 int ret; 3102 3103 bind_list = cma_ps_find(net, ps, (unsigned short)rover); 3104 3105 if (!bind_list) { 3106 ret = cma_alloc_port(ps, id_priv, rover); 3107 } else { 3108 ret = cma_port_is_unique(bind_list, id_priv); 3109 if (!ret) 3110 cma_bind_port(bind_list, id_priv); 3111 } 3112 /* 3113 * Remember previously used port number in order to avoid 3114 * re-using same port immediately after it is closed. 3115 */ 3116 if (!ret) 3117 last_used_port = rover; 3118 if (ret != -EADDRNOTAVAIL) 3119 return ret; 3120 } 3121 if (--remaining) { 3122 rover++; 3123 if ((rover < low) || (rover > high)) 3124 rover = low; 3125 goto retry; 3126 } 3127 return -EADDRNOTAVAIL; 3128 } 3129 3130 /* 3131 * Check that the requested port is available. This is called when trying to 3132 * bind to a specific port, or when trying to listen on a bound port. In 3133 * the latter case, the provided id_priv may already be on the bind_list, but 3134 * we still need to check that it's okay to start listening. 3135 */ 3136 static int cma_check_port(struct rdma_bind_list *bind_list, 3137 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3138 { 3139 struct rdma_id_private *cur_id; 3140 struct sockaddr *addr, *cur_addr; 3141 3142 addr = cma_src_addr(id_priv); 3143 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3144 if (id_priv == cur_id) 3145 continue; 3146 3147 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3148 cur_id->reuseaddr) 3149 continue; 3150 3151 cur_addr = cma_src_addr(cur_id); 3152 if (id_priv->afonly && cur_id->afonly && 3153 (addr->sa_family != cur_addr->sa_family)) 3154 continue; 3155 3156 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3157 return -EADDRNOTAVAIL; 3158 3159 if (!cma_addr_cmp(addr, cur_addr)) 3160 return -EADDRINUSE; 3161 } 3162 return 0; 3163 } 3164 3165 static int cma_use_port(enum rdma_ucm_port_space ps, 3166 struct rdma_id_private *id_priv) 3167 { 3168 struct rdma_bind_list *bind_list; 3169 unsigned short snum; 3170 int ret; 3171 3172 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3173 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 3174 return -EACCES; 3175 3176 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3177 if (!bind_list) { 3178 ret = cma_alloc_port(ps, id_priv, snum); 3179 } else { 3180 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3181 if (!ret) 3182 cma_bind_port(bind_list, id_priv); 3183 } 3184 return ret; 3185 } 3186 3187 static int cma_bind_listen(struct rdma_id_private *id_priv) 3188 { 3189 struct rdma_bind_list *bind_list = id_priv->bind_list; 3190 int ret = 0; 3191 3192 mutex_lock(&lock); 3193 if (bind_list->owners.first->next) 3194 ret = cma_check_port(bind_list, id_priv, 0); 3195 mutex_unlock(&lock); 3196 return ret; 3197 } 3198 3199 static enum rdma_ucm_port_space 3200 cma_select_inet_ps(struct rdma_id_private *id_priv) 3201 { 3202 switch (id_priv->id.ps) { 3203 case RDMA_PS_TCP: 3204 case RDMA_PS_UDP: 3205 case RDMA_PS_IPOIB: 3206 case RDMA_PS_IB: 3207 return id_priv->id.ps; 3208 default: 3209 3210 return 0; 3211 } 3212 } 3213 3214 static enum rdma_ucm_port_space 3215 cma_select_ib_ps(struct rdma_id_private *id_priv) 3216 { 3217 enum rdma_ucm_port_space ps = 0; 3218 struct sockaddr_ib *sib; 3219 u64 sid_ps, mask, sid; 3220 3221 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3222 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3223 sid = be64_to_cpu(sib->sib_sid) & mask; 3224 3225 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3226 sid_ps = RDMA_IB_IP_PS_IB; 3227 ps = RDMA_PS_IB; 3228 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3229 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3230 sid_ps = RDMA_IB_IP_PS_TCP; 3231 ps = RDMA_PS_TCP; 3232 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3233 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3234 sid_ps = RDMA_IB_IP_PS_UDP; 3235 ps = RDMA_PS_UDP; 3236 } 3237 3238 if (ps) { 3239 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3240 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3241 be64_to_cpu(sib->sib_sid_mask)); 3242 } 3243 return ps; 3244 } 3245 3246 static int cma_get_port(struct rdma_id_private *id_priv) 3247 { 3248 enum rdma_ucm_port_space ps; 3249 int ret; 3250 3251 if (cma_family(id_priv) != AF_IB) 3252 ps = cma_select_inet_ps(id_priv); 3253 else 3254 ps = cma_select_ib_ps(id_priv); 3255 if (!ps) 3256 return -EPROTONOSUPPORT; 3257 3258 mutex_lock(&lock); 3259 if (cma_any_port(cma_src_addr(id_priv))) 3260 ret = cma_alloc_any_port(ps, id_priv); 3261 else 3262 ret = cma_use_port(ps, id_priv); 3263 mutex_unlock(&lock); 3264 3265 return ret; 3266 } 3267 3268 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3269 struct sockaddr *addr) 3270 { 3271 #if IS_ENABLED(CONFIG_IPV6) 3272 struct sockaddr_in6 *sin6; 3273 3274 if (addr->sa_family != AF_INET6) 3275 return 0; 3276 3277 sin6 = (struct sockaddr_in6 *) addr; 3278 3279 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 3280 return 0; 3281 3282 if (!sin6->sin6_scope_id) 3283 return -EINVAL; 3284 3285 dev_addr->bound_dev_if = sin6->sin6_scope_id; 3286 #endif 3287 return 0; 3288 } 3289 3290 int rdma_listen(struct rdma_cm_id *id, int backlog) 3291 { 3292 struct rdma_id_private *id_priv; 3293 int ret; 3294 3295 id_priv = container_of(id, struct rdma_id_private, id); 3296 if (id_priv->state == RDMA_CM_IDLE) { 3297 id->route.addr.src_addr.ss_family = AF_INET; 3298 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3299 if (ret) 3300 return ret; 3301 } 3302 3303 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3304 return -EINVAL; 3305 3306 if (id_priv->reuseaddr) { 3307 ret = cma_bind_listen(id_priv); 3308 if (ret) 3309 goto err; 3310 } 3311 3312 id_priv->backlog = backlog; 3313 if (id->device) { 3314 if (rdma_cap_ib_cm(id->device, 1)) { 3315 ret = cma_ib_listen(id_priv); 3316 if (ret) 3317 goto err; 3318 } else if (rdma_cap_iw_cm(id->device, 1)) { 3319 ret = cma_iw_listen(id_priv, backlog); 3320 if (ret) 3321 goto err; 3322 } else { 3323 ret = -ENOSYS; 3324 goto err; 3325 } 3326 } else 3327 cma_listen_on_all(id_priv); 3328 3329 return 0; 3330 err: 3331 id_priv->backlog = 0; 3332 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3333 return ret; 3334 } 3335 EXPORT_SYMBOL(rdma_listen); 3336 3337 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3338 { 3339 struct rdma_id_private *id_priv; 3340 int ret; 3341 struct sockaddr *daddr; 3342 3343 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3344 addr->sa_family != AF_IB) 3345 return -EAFNOSUPPORT; 3346 3347 id_priv = container_of(id, struct rdma_id_private, id); 3348 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3349 return -EINVAL; 3350 3351 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3352 if (ret) 3353 goto err1; 3354 3355 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3356 if (!cma_any_addr(addr)) { 3357 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3358 if (ret) 3359 goto err1; 3360 3361 ret = cma_acquire_dev(id_priv, NULL); 3362 if (ret) 3363 goto err1; 3364 } 3365 3366 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3367 if (addr->sa_family == AF_INET) 3368 id_priv->afonly = 1; 3369 #if IS_ENABLED(CONFIG_IPV6) 3370 else if (addr->sa_family == AF_INET6) { 3371 struct net *net = id_priv->id.route.addr.dev_addr.net; 3372 3373 id_priv->afonly = net->ipv6.sysctl.bindv6only; 3374 } 3375 #endif 3376 } 3377 daddr = cma_dst_addr(id_priv); 3378 daddr->sa_family = addr->sa_family; 3379 3380 ret = cma_get_port(id_priv); 3381 if (ret) 3382 goto err2; 3383 3384 return 0; 3385 err2: 3386 if (id_priv->cma_dev) { 3387 rdma_restrack_del(&id_priv->res); 3388 cma_release_dev(id_priv); 3389 } 3390 err1: 3391 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3392 return ret; 3393 } 3394 EXPORT_SYMBOL(rdma_bind_addr); 3395 3396 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3397 { 3398 struct cma_hdr *cma_hdr; 3399 3400 cma_hdr = hdr; 3401 cma_hdr->cma_version = CMA_VERSION; 3402 if (cma_family(id_priv) == AF_INET) { 3403 struct sockaddr_in *src4, *dst4; 3404 3405 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3406 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3407 3408 cma_set_ip_ver(cma_hdr, 4); 3409 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3410 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3411 cma_hdr->port = src4->sin_port; 3412 } else if (cma_family(id_priv) == AF_INET6) { 3413 struct sockaddr_in6 *src6, *dst6; 3414 3415 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3416 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3417 3418 cma_set_ip_ver(cma_hdr, 6); 3419 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3420 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3421 cma_hdr->port = src6->sin6_port; 3422 } 3423 return 0; 3424 } 3425 3426 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3427 struct ib_cm_event *ib_event) 3428 { 3429 struct rdma_id_private *id_priv = cm_id->context; 3430 struct rdma_cm_event event; 3431 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3432 int ret = 0; 3433 3434 mutex_lock(&id_priv->handler_mutex); 3435 if (id_priv->state != RDMA_CM_CONNECT) 3436 goto out; 3437 3438 memset(&event, 0, sizeof event); 3439 switch (ib_event->event) { 3440 case IB_CM_SIDR_REQ_ERROR: 3441 event.event = RDMA_CM_EVENT_UNREACHABLE; 3442 event.status = -ETIMEDOUT; 3443 break; 3444 case IB_CM_SIDR_REP_RECEIVED: 3445 event.param.ud.private_data = ib_event->private_data; 3446 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3447 if (rep->status != IB_SIDR_SUCCESS) { 3448 event.event = RDMA_CM_EVENT_UNREACHABLE; 3449 event.status = ib_event->param.sidr_rep_rcvd.status; 3450 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", 3451 event.status); 3452 break; 3453 } 3454 ret = cma_set_qkey(id_priv, rep->qkey); 3455 if (ret) { 3456 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); 3457 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3458 event.status = ret; 3459 break; 3460 } 3461 ib_init_ah_attr_from_path(id_priv->id.device, 3462 id_priv->id.port_num, 3463 id_priv->id.route.path_rec, 3464 &event.param.ud.ah_attr); 3465 event.param.ud.qp_num = rep->qpn; 3466 event.param.ud.qkey = rep->qkey; 3467 event.event = RDMA_CM_EVENT_ESTABLISHED; 3468 event.status = 0; 3469 break; 3470 default: 3471 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3472 ib_event->event); 3473 goto out; 3474 } 3475 3476 ret = id_priv->id.event_handler(&id_priv->id, &event); 3477 if (ret) { 3478 /* Destroy the CM ID by returning a non-zero value. */ 3479 id_priv->cm_id.ib = NULL; 3480 cma_exch(id_priv, RDMA_CM_DESTROYING); 3481 mutex_unlock(&id_priv->handler_mutex); 3482 rdma_destroy_id(&id_priv->id); 3483 return ret; 3484 } 3485 out: 3486 mutex_unlock(&id_priv->handler_mutex); 3487 return ret; 3488 } 3489 3490 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3491 struct rdma_conn_param *conn_param) 3492 { 3493 struct ib_cm_sidr_req_param req; 3494 struct ib_cm_id *id; 3495 void *private_data; 3496 u8 offset; 3497 int ret; 3498 3499 memset(&req, 0, sizeof req); 3500 offset = cma_user_data_offset(id_priv); 3501 req.private_data_len = offset + conn_param->private_data_len; 3502 if (req.private_data_len < conn_param->private_data_len) 3503 return -EINVAL; 3504 3505 if (req.private_data_len) { 3506 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3507 if (!private_data) 3508 return -ENOMEM; 3509 } else { 3510 private_data = NULL; 3511 } 3512 3513 if (conn_param->private_data && conn_param->private_data_len) 3514 memcpy(private_data + offset, conn_param->private_data, 3515 conn_param->private_data_len); 3516 3517 if (private_data) { 3518 ret = cma_format_hdr(private_data, id_priv); 3519 if (ret) 3520 goto out; 3521 req.private_data = private_data; 3522 } 3523 3524 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3525 id_priv); 3526 if (IS_ERR(id)) { 3527 ret = PTR_ERR(id); 3528 goto out; 3529 } 3530 id_priv->cm_id.ib = id; 3531 3532 req.path = id_priv->id.route.path_rec; 3533 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3534 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3535 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3536 3537 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3538 if (ret) { 3539 ib_destroy_cm_id(id_priv->cm_id.ib); 3540 id_priv->cm_id.ib = NULL; 3541 } 3542 out: 3543 kfree(private_data); 3544 return ret; 3545 } 3546 3547 static int cma_connect_ib(struct rdma_id_private *id_priv, 3548 struct rdma_conn_param *conn_param) 3549 { 3550 struct ib_cm_req_param req; 3551 struct rdma_route *route; 3552 void *private_data; 3553 struct ib_cm_id *id; 3554 u8 offset; 3555 int ret; 3556 3557 memset(&req, 0, sizeof req); 3558 offset = cma_user_data_offset(id_priv); 3559 req.private_data_len = offset + conn_param->private_data_len; 3560 if (req.private_data_len < conn_param->private_data_len) 3561 return -EINVAL; 3562 3563 if (req.private_data_len) { 3564 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3565 if (!private_data) 3566 return -ENOMEM; 3567 } else { 3568 private_data = NULL; 3569 } 3570 3571 if (conn_param->private_data && conn_param->private_data_len) 3572 memcpy(private_data + offset, conn_param->private_data, 3573 conn_param->private_data_len); 3574 3575 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3576 if (IS_ERR(id)) { 3577 ret = PTR_ERR(id); 3578 goto out; 3579 } 3580 id_priv->cm_id.ib = id; 3581 3582 route = &id_priv->id.route; 3583 if (private_data) { 3584 ret = cma_format_hdr(private_data, id_priv); 3585 if (ret) 3586 goto out; 3587 req.private_data = private_data; 3588 } 3589 3590 req.primary_path = &route->path_rec[0]; 3591 if (route->num_paths == 2) 3592 req.alternate_path = &route->path_rec[1]; 3593 3594 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3595 req.qp_num = id_priv->qp_num; 3596 req.qp_type = id_priv->id.qp_type; 3597 req.starting_psn = id_priv->seq_num; 3598 req.responder_resources = conn_param->responder_resources; 3599 req.initiator_depth = conn_param->initiator_depth; 3600 req.flow_control = conn_param->flow_control; 3601 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3602 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3603 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3604 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3605 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3606 req.srq = id_priv->srq ? 1 : 0; 3607 3608 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3609 out: 3610 if (ret && !IS_ERR(id)) { 3611 ib_destroy_cm_id(id); 3612 id_priv->cm_id.ib = NULL; 3613 } 3614 3615 kfree(private_data); 3616 return ret; 3617 } 3618 3619 static int cma_connect_iw(struct rdma_id_private *id_priv, 3620 struct rdma_conn_param *conn_param) 3621 { 3622 struct iw_cm_id *cm_id; 3623 int ret; 3624 struct iw_cm_conn_param iw_param; 3625 3626 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3627 if (IS_ERR(cm_id)) 3628 return PTR_ERR(cm_id); 3629 3630 cm_id->tos = id_priv->tos; 3631 id_priv->cm_id.iw = cm_id; 3632 3633 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3634 rdma_addr_size(cma_src_addr(id_priv))); 3635 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3636 rdma_addr_size(cma_dst_addr(id_priv))); 3637 3638 ret = cma_modify_qp_rtr(id_priv, conn_param); 3639 if (ret) 3640 goto out; 3641 3642 if (conn_param) { 3643 iw_param.ord = conn_param->initiator_depth; 3644 iw_param.ird = conn_param->responder_resources; 3645 iw_param.private_data = conn_param->private_data; 3646 iw_param.private_data_len = conn_param->private_data_len; 3647 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3648 } else { 3649 memset(&iw_param, 0, sizeof iw_param); 3650 iw_param.qpn = id_priv->qp_num; 3651 } 3652 ret = iw_cm_connect(cm_id, &iw_param); 3653 out: 3654 if (ret) { 3655 iw_destroy_cm_id(cm_id); 3656 id_priv->cm_id.iw = NULL; 3657 } 3658 return ret; 3659 } 3660 3661 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3662 { 3663 struct rdma_id_private *id_priv; 3664 int ret; 3665 3666 id_priv = container_of(id, struct rdma_id_private, id); 3667 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3668 return -EINVAL; 3669 3670 if (!id->qp) { 3671 id_priv->qp_num = conn_param->qp_num; 3672 id_priv->srq = conn_param->srq; 3673 } 3674 3675 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3676 if (id->qp_type == IB_QPT_UD) 3677 ret = cma_resolve_ib_udp(id_priv, conn_param); 3678 else 3679 ret = cma_connect_ib(id_priv, conn_param); 3680 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3681 ret = cma_connect_iw(id_priv, conn_param); 3682 else 3683 ret = -ENOSYS; 3684 if (ret) 3685 goto err; 3686 3687 return 0; 3688 err: 3689 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3690 return ret; 3691 } 3692 EXPORT_SYMBOL(rdma_connect); 3693 3694 static int cma_accept_ib(struct rdma_id_private *id_priv, 3695 struct rdma_conn_param *conn_param) 3696 { 3697 struct ib_cm_rep_param rep; 3698 int ret; 3699 3700 ret = cma_modify_qp_rtr(id_priv, conn_param); 3701 if (ret) 3702 goto out; 3703 3704 ret = cma_modify_qp_rts(id_priv, conn_param); 3705 if (ret) 3706 goto out; 3707 3708 memset(&rep, 0, sizeof rep); 3709 rep.qp_num = id_priv->qp_num; 3710 rep.starting_psn = id_priv->seq_num; 3711 rep.private_data = conn_param->private_data; 3712 rep.private_data_len = conn_param->private_data_len; 3713 rep.responder_resources = conn_param->responder_resources; 3714 rep.initiator_depth = conn_param->initiator_depth; 3715 rep.failover_accepted = 0; 3716 rep.flow_control = conn_param->flow_control; 3717 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3718 rep.srq = id_priv->srq ? 1 : 0; 3719 3720 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3721 out: 3722 return ret; 3723 } 3724 3725 static int cma_accept_iw(struct rdma_id_private *id_priv, 3726 struct rdma_conn_param *conn_param) 3727 { 3728 struct iw_cm_conn_param iw_param; 3729 int ret; 3730 3731 if (!conn_param) 3732 return -EINVAL; 3733 3734 ret = cma_modify_qp_rtr(id_priv, conn_param); 3735 if (ret) 3736 return ret; 3737 3738 iw_param.ord = conn_param->initiator_depth; 3739 iw_param.ird = conn_param->responder_resources; 3740 iw_param.private_data = conn_param->private_data; 3741 iw_param.private_data_len = conn_param->private_data_len; 3742 if (id_priv->id.qp) { 3743 iw_param.qpn = id_priv->qp_num; 3744 } else 3745 iw_param.qpn = conn_param->qp_num; 3746 3747 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3748 } 3749 3750 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3751 enum ib_cm_sidr_status status, u32 qkey, 3752 const void *private_data, int private_data_len) 3753 { 3754 struct ib_cm_sidr_rep_param rep; 3755 int ret; 3756 3757 memset(&rep, 0, sizeof rep); 3758 rep.status = status; 3759 if (status == IB_SIDR_SUCCESS) { 3760 ret = cma_set_qkey(id_priv, qkey); 3761 if (ret) 3762 return ret; 3763 rep.qp_num = id_priv->qp_num; 3764 rep.qkey = id_priv->qkey; 3765 } 3766 rep.private_data = private_data; 3767 rep.private_data_len = private_data_len; 3768 3769 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3770 } 3771 3772 int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, 3773 const char *caller) 3774 { 3775 struct rdma_id_private *id_priv; 3776 int ret; 3777 3778 id_priv = container_of(id, struct rdma_id_private, id); 3779 3780 if (caller) 3781 id_priv->res.kern_name = caller; 3782 else 3783 rdma_restrack_set_task(&id_priv->res, current); 3784 3785 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3786 return -EINVAL; 3787 3788 if (!id->qp && conn_param) { 3789 id_priv->qp_num = conn_param->qp_num; 3790 id_priv->srq = conn_param->srq; 3791 } 3792 3793 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3794 if (id->qp_type == IB_QPT_UD) { 3795 if (conn_param) 3796 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3797 conn_param->qkey, 3798 conn_param->private_data, 3799 conn_param->private_data_len); 3800 else 3801 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3802 0, NULL, 0); 3803 } else { 3804 if (conn_param) 3805 ret = cma_accept_ib(id_priv, conn_param); 3806 else 3807 ret = cma_rep_recv(id_priv); 3808 } 3809 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3810 ret = cma_accept_iw(id_priv, conn_param); 3811 else 3812 ret = -ENOSYS; 3813 3814 if (ret) 3815 goto reject; 3816 3817 return 0; 3818 reject: 3819 cma_modify_qp_err(id_priv); 3820 rdma_reject(id, NULL, 0); 3821 return ret; 3822 } 3823 EXPORT_SYMBOL(__rdma_accept); 3824 3825 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3826 { 3827 struct rdma_id_private *id_priv; 3828 int ret; 3829 3830 id_priv = container_of(id, struct rdma_id_private, id); 3831 if (!id_priv->cm_id.ib) 3832 return -EINVAL; 3833 3834 switch (id->device->node_type) { 3835 case RDMA_NODE_IB_CA: 3836 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3837 break; 3838 default: 3839 ret = 0; 3840 break; 3841 } 3842 return ret; 3843 } 3844 EXPORT_SYMBOL(rdma_notify); 3845 3846 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3847 u8 private_data_len) 3848 { 3849 struct rdma_id_private *id_priv; 3850 int ret; 3851 3852 id_priv = container_of(id, struct rdma_id_private, id); 3853 if (!id_priv->cm_id.ib) 3854 return -EINVAL; 3855 3856 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3857 if (id->qp_type == IB_QPT_UD) 3858 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3859 private_data, private_data_len); 3860 else 3861 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3862 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3863 0, private_data, private_data_len); 3864 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3865 ret = iw_cm_reject(id_priv->cm_id.iw, 3866 private_data, private_data_len); 3867 } else 3868 ret = -ENOSYS; 3869 3870 return ret; 3871 } 3872 EXPORT_SYMBOL(rdma_reject); 3873 3874 int rdma_disconnect(struct rdma_cm_id *id) 3875 { 3876 struct rdma_id_private *id_priv; 3877 int ret; 3878 3879 id_priv = container_of(id, struct rdma_id_private, id); 3880 if (!id_priv->cm_id.ib) 3881 return -EINVAL; 3882 3883 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3884 ret = cma_modify_qp_err(id_priv); 3885 if (ret) 3886 goto out; 3887 /* Initiate or respond to a disconnect. */ 3888 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3889 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3890 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3891 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3892 } else 3893 ret = -EINVAL; 3894 3895 out: 3896 return ret; 3897 } 3898 EXPORT_SYMBOL(rdma_disconnect); 3899 3900 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3901 { 3902 struct rdma_id_private *id_priv; 3903 struct cma_multicast *mc = multicast->context; 3904 struct rdma_cm_event event; 3905 int ret = 0; 3906 3907 id_priv = mc->id_priv; 3908 mutex_lock(&id_priv->handler_mutex); 3909 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3910 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3911 goto out; 3912 3913 if (!status) 3914 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3915 else 3916 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", 3917 status); 3918 mutex_lock(&id_priv->qp_mutex); 3919 if (!status && id_priv->id.qp) { 3920 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3921 be16_to_cpu(multicast->rec.mlid)); 3922 if (status) 3923 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", 3924 status); 3925 } 3926 mutex_unlock(&id_priv->qp_mutex); 3927 3928 memset(&event, 0, sizeof event); 3929 event.status = status; 3930 event.param.ud.private_data = mc->context; 3931 if (!status) { 3932 struct rdma_dev_addr *dev_addr = 3933 &id_priv->id.route.addr.dev_addr; 3934 struct net_device *ndev = 3935 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3936 enum ib_gid_type gid_type = 3937 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3938 rdma_start_port(id_priv->cma_dev->device)]; 3939 3940 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3941 ret = ib_init_ah_from_mcmember(id_priv->id.device, 3942 id_priv->id.port_num, 3943 &multicast->rec, 3944 ndev, gid_type, 3945 &event.param.ud.ah_attr); 3946 if (ret) 3947 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3948 3949 event.param.ud.qp_num = 0xFFFFFF; 3950 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3951 if (ndev) 3952 dev_put(ndev); 3953 } else 3954 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3955 3956 ret = id_priv->id.event_handler(&id_priv->id, &event); 3957 if (ret) { 3958 cma_exch(id_priv, RDMA_CM_DESTROYING); 3959 mutex_unlock(&id_priv->handler_mutex); 3960 rdma_destroy_id(&id_priv->id); 3961 return 0; 3962 } 3963 3964 out: 3965 mutex_unlock(&id_priv->handler_mutex); 3966 return 0; 3967 } 3968 3969 static void cma_set_mgid(struct rdma_id_private *id_priv, 3970 struct sockaddr *addr, union ib_gid *mgid) 3971 { 3972 unsigned char mc_map[MAX_ADDR_LEN]; 3973 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3974 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3975 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3976 3977 if (cma_any_addr(addr)) { 3978 memset(mgid, 0, sizeof *mgid); 3979 } else if ((addr->sa_family == AF_INET6) && 3980 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3981 0xFF10A01B)) { 3982 /* IPv6 address is an SA assigned MGID. */ 3983 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3984 } else if (addr->sa_family == AF_IB) { 3985 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3986 } else if ((addr->sa_family == AF_INET6)) { 3987 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3988 if (id_priv->id.ps == RDMA_PS_UDP) 3989 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3990 *mgid = *(union ib_gid *) (mc_map + 4); 3991 } else { 3992 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3993 if (id_priv->id.ps == RDMA_PS_UDP) 3994 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3995 *mgid = *(union ib_gid *) (mc_map + 4); 3996 } 3997 } 3998 3999 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 4000 struct cma_multicast *mc) 4001 { 4002 struct ib_sa_mcmember_rec rec; 4003 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4004 ib_sa_comp_mask comp_mask; 4005 int ret; 4006 4007 ib_addr_get_mgid(dev_addr, &rec.mgid); 4008 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4009 &rec.mgid, &rec); 4010 if (ret) 4011 return ret; 4012 4013 ret = cma_set_qkey(id_priv, 0); 4014 if (ret) 4015 return ret; 4016 4017 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4018 rec.qkey = cpu_to_be32(id_priv->qkey); 4019 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4020 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4021 rec.join_state = mc->join_state; 4022 4023 if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && 4024 (!ib_sa_sendonly_fullmem_support(&sa_client, 4025 id_priv->id.device, 4026 id_priv->id.port_num))) { 4027 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4028 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4029 id_priv->id.device->name, id_priv->id.port_num); 4030 return -EOPNOTSUPP; 4031 } 4032 4033 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4034 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4035 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4036 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4037 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4038 4039 if (id_priv->id.ps == RDMA_PS_IPOIB) 4040 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4041 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4042 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4043 IB_SA_MCMEMBER_REC_MTU | 4044 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4045 4046 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4047 id_priv->id.port_num, &rec, 4048 comp_mask, GFP_KERNEL, 4049 cma_ib_mc_handler, mc); 4050 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4051 } 4052 4053 static void iboe_mcast_work_handler(struct work_struct *work) 4054 { 4055 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4056 struct cma_multicast *mc = mw->mc; 4057 struct ib_sa_multicast *m = mc->multicast.ib; 4058 4059 mc->multicast.ib->context = mc; 4060 cma_ib_mc_handler(0, m); 4061 kref_put(&mc->mcref, release_mc); 4062 kfree(mw); 4063 } 4064 4065 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4066 enum ib_gid_type gid_type) 4067 { 4068 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4069 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4070 4071 if (cma_any_addr(addr)) { 4072 memset(mgid, 0, sizeof *mgid); 4073 } else if (addr->sa_family == AF_INET6) { 4074 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4075 } else { 4076 mgid->raw[0] = 4077 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4078 mgid->raw[1] = 4079 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4080 mgid->raw[2] = 0; 4081 mgid->raw[3] = 0; 4082 mgid->raw[4] = 0; 4083 mgid->raw[5] = 0; 4084 mgid->raw[6] = 0; 4085 mgid->raw[7] = 0; 4086 mgid->raw[8] = 0; 4087 mgid->raw[9] = 0; 4088 mgid->raw[10] = 0xff; 4089 mgid->raw[11] = 0xff; 4090 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4091 } 4092 } 4093 4094 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4095 struct cma_multicast *mc) 4096 { 4097 struct iboe_mcast_work *work; 4098 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4099 int err = 0; 4100 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4101 struct net_device *ndev = NULL; 4102 enum ib_gid_type gid_type; 4103 bool send_only; 4104 4105 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4106 4107 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4108 return -EINVAL; 4109 4110 work = kzalloc(sizeof *work, GFP_KERNEL); 4111 if (!work) 4112 return -ENOMEM; 4113 4114 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4115 if (!mc->multicast.ib) { 4116 err = -ENOMEM; 4117 goto out1; 4118 } 4119 4120 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4121 rdma_start_port(id_priv->cma_dev->device)]; 4122 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4123 4124 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4125 if (id_priv->id.ps == RDMA_PS_UDP) 4126 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4127 4128 if (dev_addr->bound_dev_if) 4129 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4130 if (!ndev) { 4131 err = -ENODEV; 4132 goto out2; 4133 } 4134 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4135 mc->multicast.ib->rec.hop_limit = 1; 4136 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); 4137 4138 if (addr->sa_family == AF_INET) { 4139 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4140 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4141 if (!send_only) { 4142 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4143 true); 4144 if (!err) 4145 mc->igmp_joined = true; 4146 } 4147 } 4148 } else { 4149 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4150 err = -ENOTSUPP; 4151 } 4152 dev_put(ndev); 4153 if (err || !mc->multicast.ib->rec.mtu) { 4154 if (!err) 4155 err = -EINVAL; 4156 goto out2; 4157 } 4158 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4159 &mc->multicast.ib->rec.port_gid); 4160 work->id = id_priv; 4161 work->mc = mc; 4162 INIT_WORK(&work->work, iboe_mcast_work_handler); 4163 kref_get(&mc->mcref); 4164 queue_work(cma_wq, &work->work); 4165 4166 return 0; 4167 4168 out2: 4169 kfree(mc->multicast.ib); 4170 out1: 4171 kfree(work); 4172 return err; 4173 } 4174 4175 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4176 u8 join_state, void *context) 4177 { 4178 struct rdma_id_private *id_priv; 4179 struct cma_multicast *mc; 4180 int ret; 4181 4182 if (!id->device) 4183 return -EINVAL; 4184 4185 id_priv = container_of(id, struct rdma_id_private, id); 4186 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4187 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4188 return -EINVAL; 4189 4190 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4191 if (!mc) 4192 return -ENOMEM; 4193 4194 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4195 mc->context = context; 4196 mc->id_priv = id_priv; 4197 mc->igmp_joined = false; 4198 mc->join_state = join_state; 4199 spin_lock(&id_priv->lock); 4200 list_add(&mc->list, &id_priv->mc_list); 4201 spin_unlock(&id_priv->lock); 4202 4203 if (rdma_protocol_roce(id->device, id->port_num)) { 4204 kref_init(&mc->mcref); 4205 ret = cma_iboe_join_multicast(id_priv, mc); 4206 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4207 ret = cma_join_ib_multicast(id_priv, mc); 4208 else 4209 ret = -ENOSYS; 4210 4211 if (ret) { 4212 spin_lock_irq(&id_priv->lock); 4213 list_del(&mc->list); 4214 spin_unlock_irq(&id_priv->lock); 4215 kfree(mc); 4216 } 4217 return ret; 4218 } 4219 EXPORT_SYMBOL(rdma_join_multicast); 4220 4221 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4222 { 4223 struct rdma_id_private *id_priv; 4224 struct cma_multicast *mc; 4225 4226 id_priv = container_of(id, struct rdma_id_private, id); 4227 spin_lock_irq(&id_priv->lock); 4228 list_for_each_entry(mc, &id_priv->mc_list, list) { 4229 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4230 list_del(&mc->list); 4231 spin_unlock_irq(&id_priv->lock); 4232 4233 if (id->qp) 4234 ib_detach_mcast(id->qp, 4235 &mc->multicast.ib->rec.mgid, 4236 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4237 4238 BUG_ON(id_priv->cma_dev->device != id->device); 4239 4240 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4241 ib_sa_free_multicast(mc->multicast.ib); 4242 kfree(mc); 4243 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4244 if (mc->igmp_joined) { 4245 struct rdma_dev_addr *dev_addr = 4246 &id->route.addr.dev_addr; 4247 struct net_device *ndev = NULL; 4248 4249 if (dev_addr->bound_dev_if) 4250 ndev = dev_get_by_index(dev_addr->net, 4251 dev_addr->bound_dev_if); 4252 if (ndev) { 4253 cma_igmp_send(ndev, 4254 &mc->multicast.ib->rec.mgid, 4255 false); 4256 dev_put(ndev); 4257 } 4258 mc->igmp_joined = false; 4259 } 4260 kref_put(&mc->mcref, release_mc); 4261 } 4262 return; 4263 } 4264 } 4265 spin_unlock_irq(&id_priv->lock); 4266 } 4267 EXPORT_SYMBOL(rdma_leave_multicast); 4268 4269 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 4270 { 4271 struct rdma_dev_addr *dev_addr; 4272 struct cma_ndev_work *work; 4273 4274 dev_addr = &id_priv->id.route.addr.dev_addr; 4275 4276 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4277 (net_eq(dev_net(ndev), dev_addr->net)) && 4278 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4279 pr_info("RDMA CM addr change for ndev %s used by id %p\n", 4280 ndev->name, &id_priv->id); 4281 work = kzalloc(sizeof *work, GFP_KERNEL); 4282 if (!work) 4283 return -ENOMEM; 4284 4285 INIT_WORK(&work->work, cma_ndev_work_handler); 4286 work->id = id_priv; 4287 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 4288 atomic_inc(&id_priv->refcount); 4289 queue_work(cma_wq, &work->work); 4290 } 4291 4292 return 0; 4293 } 4294 4295 static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 4296 void *ptr) 4297 { 4298 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4299 struct cma_device *cma_dev; 4300 struct rdma_id_private *id_priv; 4301 int ret = NOTIFY_DONE; 4302 4303 if (event != NETDEV_BONDING_FAILOVER) 4304 return NOTIFY_DONE; 4305 4306 if (!netif_is_bond_master(ndev)) 4307 return NOTIFY_DONE; 4308 4309 mutex_lock(&lock); 4310 list_for_each_entry(cma_dev, &dev_list, list) 4311 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4312 ret = cma_netdev_change(ndev, id_priv); 4313 if (ret) 4314 goto out; 4315 } 4316 4317 out: 4318 mutex_unlock(&lock); 4319 return ret; 4320 } 4321 4322 static struct notifier_block cma_nb = { 4323 .notifier_call = cma_netdev_callback 4324 }; 4325 4326 static void cma_add_one(struct ib_device *device) 4327 { 4328 struct cma_device *cma_dev; 4329 struct rdma_id_private *id_priv; 4330 unsigned int i; 4331 unsigned long supported_gids = 0; 4332 4333 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4334 if (!cma_dev) 4335 return; 4336 4337 cma_dev->device = device; 4338 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4339 sizeof(*cma_dev->default_gid_type), 4340 GFP_KERNEL); 4341 if (!cma_dev->default_gid_type) 4342 goto free_cma_dev; 4343 4344 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4345 sizeof(*cma_dev->default_roce_tos), 4346 GFP_KERNEL); 4347 if (!cma_dev->default_roce_tos) 4348 goto free_gid_type; 4349 4350 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4351 supported_gids = roce_gid_type_mask_support(device, i); 4352 WARN_ON(!supported_gids); 4353 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4354 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4355 CMA_PREFERRED_ROCE_GID_TYPE; 4356 else 4357 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4358 find_first_bit(&supported_gids, BITS_PER_LONG); 4359 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4360 } 4361 4362 init_completion(&cma_dev->comp); 4363 atomic_set(&cma_dev->refcount, 1); 4364 INIT_LIST_HEAD(&cma_dev->id_list); 4365 ib_set_client_data(device, &cma_client, cma_dev); 4366 4367 mutex_lock(&lock); 4368 list_add_tail(&cma_dev->list, &dev_list); 4369 list_for_each_entry(id_priv, &listen_any_list, list) 4370 cma_listen_on_dev(id_priv, cma_dev); 4371 mutex_unlock(&lock); 4372 4373 return; 4374 4375 free_gid_type: 4376 kfree(cma_dev->default_gid_type); 4377 4378 free_cma_dev: 4379 kfree(cma_dev); 4380 4381 return; 4382 } 4383 4384 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4385 { 4386 struct rdma_cm_event event; 4387 enum rdma_cm_state state; 4388 int ret = 0; 4389 4390 /* Record that we want to remove the device */ 4391 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4392 if (state == RDMA_CM_DESTROYING) 4393 return 0; 4394 4395 cma_cancel_operation(id_priv, state); 4396 mutex_lock(&id_priv->handler_mutex); 4397 4398 /* Check for destruction from another callback. */ 4399 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4400 goto out; 4401 4402 memset(&event, 0, sizeof event); 4403 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4404 ret = id_priv->id.event_handler(&id_priv->id, &event); 4405 out: 4406 mutex_unlock(&id_priv->handler_mutex); 4407 return ret; 4408 } 4409 4410 static void cma_process_remove(struct cma_device *cma_dev) 4411 { 4412 struct rdma_id_private *id_priv; 4413 int ret; 4414 4415 mutex_lock(&lock); 4416 while (!list_empty(&cma_dev->id_list)) { 4417 id_priv = list_entry(cma_dev->id_list.next, 4418 struct rdma_id_private, list); 4419 4420 list_del(&id_priv->listen_list); 4421 list_del_init(&id_priv->list); 4422 atomic_inc(&id_priv->refcount); 4423 mutex_unlock(&lock); 4424 4425 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4426 cma_deref_id(id_priv); 4427 if (ret) 4428 rdma_destroy_id(&id_priv->id); 4429 4430 mutex_lock(&lock); 4431 } 4432 mutex_unlock(&lock); 4433 4434 cma_deref_dev(cma_dev); 4435 wait_for_completion(&cma_dev->comp); 4436 } 4437 4438 static void cma_remove_one(struct ib_device *device, void *client_data) 4439 { 4440 struct cma_device *cma_dev = client_data; 4441 4442 if (!cma_dev) 4443 return; 4444 4445 mutex_lock(&lock); 4446 list_del(&cma_dev->list); 4447 mutex_unlock(&lock); 4448 4449 cma_process_remove(cma_dev); 4450 kfree(cma_dev->default_roce_tos); 4451 kfree(cma_dev->default_gid_type); 4452 kfree(cma_dev); 4453 } 4454 4455 static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) 4456 { 4457 struct nlmsghdr *nlh; 4458 struct rdma_cm_id_stats *id_stats; 4459 struct rdma_id_private *id_priv; 4460 struct rdma_cm_id *id = NULL; 4461 struct cma_device *cma_dev; 4462 int i_dev = 0, i_id = 0; 4463 4464 /* 4465 * We export all of the IDs as a sequence of messages. Each 4466 * ID gets its own netlink message. 4467 */ 4468 mutex_lock(&lock); 4469 4470 list_for_each_entry(cma_dev, &dev_list, list) { 4471 if (i_dev < cb->args[0]) { 4472 i_dev++; 4473 continue; 4474 } 4475 4476 i_id = 0; 4477 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4478 if (i_id < cb->args[1]) { 4479 i_id++; 4480 continue; 4481 } 4482 4483 id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, 4484 sizeof *id_stats, RDMA_NL_RDMA_CM, 4485 RDMA_NL_RDMA_CM_ID_STATS, 4486 NLM_F_MULTI); 4487 if (!id_stats) 4488 goto out; 4489 4490 memset(id_stats, 0, sizeof *id_stats); 4491 id = &id_priv->id; 4492 id_stats->node_type = id->route.addr.dev_addr.dev_type; 4493 id_stats->port_num = id->port_num; 4494 id_stats->bound_dev_if = 4495 id->route.addr.dev_addr.bound_dev_if; 4496 4497 if (ibnl_put_attr(skb, nlh, 4498 rdma_addr_size(cma_src_addr(id_priv)), 4499 cma_src_addr(id_priv), 4500 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) 4501 goto out; 4502 if (ibnl_put_attr(skb, nlh, 4503 rdma_addr_size(cma_dst_addr(id_priv)), 4504 cma_dst_addr(id_priv), 4505 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) 4506 goto out; 4507 4508 id_stats->pid = task_pid_vnr(id_priv->res.task); 4509 id_stats->port_space = id->ps; 4510 id_stats->cm_state = id_priv->state; 4511 id_stats->qp_num = id_priv->qp_num; 4512 id_stats->qp_type = id->qp_type; 4513 4514 i_id++; 4515 nlmsg_end(skb, nlh); 4516 } 4517 4518 cb->args[1] = 0; 4519 i_dev++; 4520 } 4521 4522 out: 4523 mutex_unlock(&lock); 4524 cb->args[0] = i_dev; 4525 cb->args[1] = i_id; 4526 4527 return skb->len; 4528 } 4529 4530 static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { 4531 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, 4532 }; 4533 4534 static int cma_init_net(struct net *net) 4535 { 4536 struct cma_pernet *pernet = cma_pernet(net); 4537 4538 idr_init(&pernet->tcp_ps); 4539 idr_init(&pernet->udp_ps); 4540 idr_init(&pernet->ipoib_ps); 4541 idr_init(&pernet->ib_ps); 4542 4543 return 0; 4544 } 4545 4546 static void cma_exit_net(struct net *net) 4547 { 4548 struct cma_pernet *pernet = cma_pernet(net); 4549 4550 idr_destroy(&pernet->tcp_ps); 4551 idr_destroy(&pernet->udp_ps); 4552 idr_destroy(&pernet->ipoib_ps); 4553 idr_destroy(&pernet->ib_ps); 4554 } 4555 4556 static struct pernet_operations cma_pernet_operations = { 4557 .init = cma_init_net, 4558 .exit = cma_exit_net, 4559 .id = &cma_pernet_id, 4560 .size = sizeof(struct cma_pernet), 4561 }; 4562 4563 static int __init cma_init(void) 4564 { 4565 int ret; 4566 4567 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4568 if (!cma_wq) 4569 return -ENOMEM; 4570 4571 ret = register_pernet_subsys(&cma_pernet_operations); 4572 if (ret) 4573 goto err_wq; 4574 4575 ib_sa_register_client(&sa_client); 4576 rdma_addr_register_client(&addr_client); 4577 register_netdevice_notifier(&cma_nb); 4578 4579 ret = ib_register_client(&cma_client); 4580 if (ret) 4581 goto err; 4582 4583 rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table); 4584 cma_configfs_init(); 4585 4586 return 0; 4587 4588 err: 4589 unregister_netdevice_notifier(&cma_nb); 4590 rdma_addr_unregister_client(&addr_client); 4591 ib_sa_unregister_client(&sa_client); 4592 err_wq: 4593 destroy_workqueue(cma_wq); 4594 return ret; 4595 } 4596 4597 static void __exit cma_cleanup(void) 4598 { 4599 cma_configfs_exit(); 4600 rdma_nl_unregister(RDMA_NL_RDMA_CM); 4601 ib_unregister_client(&cma_client); 4602 unregister_netdevice_notifier(&cma_nb); 4603 rdma_addr_unregister_client(&addr_client); 4604 ib_sa_unregister_client(&sa_client); 4605 unregister_pernet_subsys(&cma_pernet_operations); 4606 destroy_workqueue(cma_wq); 4607 } 4608 4609 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1); 4610 4611 module_init(cma_init); 4612 module_exit(cma_cleanup); 4613