1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/completion.h> 37 #include <linux/in.h> 38 #include <linux/in6.h> 39 #include <linux/mutex.h> 40 #include <linux/random.h> 41 #include <linux/igmp.h> 42 #include <linux/idr.h> 43 #include <linux/inetdevice.h> 44 #include <linux/slab.h> 45 #include <linux/module.h> 46 #include <net/route.h> 47 48 #include <net/net_namespace.h> 49 #include <net/netns/generic.h> 50 #include <net/tcp.h> 51 #include <net/ipv6.h> 52 #include <net/ip_fib.h> 53 #include <net/ip6_route.h> 54 55 #include <rdma/rdma_cm.h> 56 #include <rdma/rdma_cm_ib.h> 57 #include <rdma/rdma_netlink.h> 58 #include <rdma/ib.h> 59 #include <rdma/ib_cache.h> 60 #include <rdma/ib_cm.h> 61 #include <rdma/ib_sa.h> 62 #include <rdma/iw_cm.h> 63 64 #include "core_priv.h" 65 #include "cma_priv.h" 66 67 MODULE_AUTHOR("Sean Hefty"); 68 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 69 MODULE_LICENSE("Dual BSD/GPL"); 70 71 #define CMA_CM_RESPONSE_TIMEOUT 20 72 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 73 #define CMA_MAX_CM_RETRIES 15 74 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 75 #define CMA_IBOE_PACKET_LIFETIME 18 76 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP 77 78 static const char * const cma_events[] = { 79 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 80 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 81 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 82 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 83 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 84 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 85 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 86 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 87 [RDMA_CM_EVENT_REJECTED] = "rejected", 88 [RDMA_CM_EVENT_ESTABLISHED] = "established", 89 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 90 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 91 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 92 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 93 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 94 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 95 }; 96 97 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 98 { 99 size_t index = event; 100 101 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 102 cma_events[index] : "unrecognized event"; 103 } 104 EXPORT_SYMBOL(rdma_event_msg); 105 106 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 107 int reason) 108 { 109 if (rdma_ib_or_roce(id->device, id->port_num)) 110 return ibcm_reject_msg(reason); 111 112 if (rdma_protocol_iwarp(id->device, id->port_num)) 113 return iwcm_reject_msg(reason); 114 115 WARN_ON_ONCE(1); 116 return "unrecognized transport"; 117 } 118 EXPORT_SYMBOL(rdma_reject_msg); 119 120 bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) 121 { 122 if (rdma_ib_or_roce(id->device, id->port_num)) 123 return reason == IB_CM_REJ_CONSUMER_DEFINED; 124 125 if (rdma_protocol_iwarp(id->device, id->port_num)) 126 return reason == -ECONNREFUSED; 127 128 WARN_ON_ONCE(1); 129 return false; 130 } 131 EXPORT_SYMBOL(rdma_is_consumer_reject); 132 133 const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 134 struct rdma_cm_event *ev, u8 *data_len) 135 { 136 const void *p; 137 138 if (rdma_is_consumer_reject(id, ev->status)) { 139 *data_len = ev->param.conn.private_data_len; 140 p = ev->param.conn.private_data; 141 } else { 142 *data_len = 0; 143 p = NULL; 144 } 145 return p; 146 } 147 EXPORT_SYMBOL(rdma_consumer_reject_data); 148 149 static void cma_add_one(struct ib_device *device); 150 static void cma_remove_one(struct ib_device *device, void *client_data); 151 152 static struct ib_client cma_client = { 153 .name = "cma", 154 .add = cma_add_one, 155 .remove = cma_remove_one 156 }; 157 158 static struct ib_sa_client sa_client; 159 static struct rdma_addr_client addr_client; 160 static LIST_HEAD(dev_list); 161 static LIST_HEAD(listen_any_list); 162 static DEFINE_MUTEX(lock); 163 static struct workqueue_struct *cma_wq; 164 static unsigned int cma_pernet_id; 165 166 struct cma_pernet { 167 struct idr tcp_ps; 168 struct idr udp_ps; 169 struct idr ipoib_ps; 170 struct idr ib_ps; 171 }; 172 173 static struct cma_pernet *cma_pernet(struct net *net) 174 { 175 return net_generic(net, cma_pernet_id); 176 } 177 178 static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps) 179 { 180 struct cma_pernet *pernet = cma_pernet(net); 181 182 switch (ps) { 183 case RDMA_PS_TCP: 184 return &pernet->tcp_ps; 185 case RDMA_PS_UDP: 186 return &pernet->udp_ps; 187 case RDMA_PS_IPOIB: 188 return &pernet->ipoib_ps; 189 case RDMA_PS_IB: 190 return &pernet->ib_ps; 191 default: 192 return NULL; 193 } 194 } 195 196 struct cma_device { 197 struct list_head list; 198 struct ib_device *device; 199 struct completion comp; 200 atomic_t refcount; 201 struct list_head id_list; 202 enum ib_gid_type *default_gid_type; 203 u8 *default_roce_tos; 204 }; 205 206 struct rdma_bind_list { 207 enum rdma_ucm_port_space ps; 208 struct hlist_head owners; 209 unsigned short port; 210 }; 211 212 struct class_port_info_context { 213 struct ib_class_port_info *class_port_info; 214 struct ib_device *device; 215 struct completion done; 216 struct ib_sa_query *sa_query; 217 u8 port_num; 218 }; 219 220 static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps, 221 struct rdma_bind_list *bind_list, int snum) 222 { 223 struct idr *idr = cma_pernet_idr(net, ps); 224 225 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 226 } 227 228 static struct rdma_bind_list *cma_ps_find(struct net *net, 229 enum rdma_ucm_port_space ps, int snum) 230 { 231 struct idr *idr = cma_pernet_idr(net, ps); 232 233 return idr_find(idr, snum); 234 } 235 236 static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps, 237 int snum) 238 { 239 struct idr *idr = cma_pernet_idr(net, ps); 240 241 idr_remove(idr, snum); 242 } 243 244 enum { 245 CMA_OPTION_AFONLY, 246 }; 247 248 void cma_ref_dev(struct cma_device *cma_dev) 249 { 250 atomic_inc(&cma_dev->refcount); 251 } 252 253 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 254 void *cookie) 255 { 256 struct cma_device *cma_dev; 257 struct cma_device *found_cma_dev = NULL; 258 259 mutex_lock(&lock); 260 261 list_for_each_entry(cma_dev, &dev_list, list) 262 if (filter(cma_dev->device, cookie)) { 263 found_cma_dev = cma_dev; 264 break; 265 } 266 267 if (found_cma_dev) 268 cma_ref_dev(found_cma_dev); 269 mutex_unlock(&lock); 270 return found_cma_dev; 271 } 272 273 int cma_get_default_gid_type(struct cma_device *cma_dev, 274 unsigned int port) 275 { 276 if (!rdma_is_port_valid(cma_dev->device, port)) 277 return -EINVAL; 278 279 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 280 } 281 282 int cma_set_default_gid_type(struct cma_device *cma_dev, 283 unsigned int port, 284 enum ib_gid_type default_gid_type) 285 { 286 unsigned long supported_gids; 287 288 if (!rdma_is_port_valid(cma_dev->device, port)) 289 return -EINVAL; 290 291 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 292 293 if (!(supported_gids & 1 << default_gid_type)) 294 return -EINVAL; 295 296 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 297 default_gid_type; 298 299 return 0; 300 } 301 302 int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port) 303 { 304 if (!rdma_is_port_valid(cma_dev->device, port)) 305 return -EINVAL; 306 307 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; 308 } 309 310 int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port, 311 u8 default_roce_tos) 312 { 313 if (!rdma_is_port_valid(cma_dev->device, port)) 314 return -EINVAL; 315 316 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = 317 default_roce_tos; 318 319 return 0; 320 } 321 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 322 { 323 return cma_dev->device; 324 } 325 326 /* 327 * Device removal can occur at anytime, so we need extra handling to 328 * serialize notifying the user of device removal with other callbacks. 329 * We do this by disabling removal notification while a callback is in process, 330 * and reporting it after the callback completes. 331 */ 332 333 struct cma_multicast { 334 struct rdma_id_private *id_priv; 335 union { 336 struct ib_sa_multicast *ib; 337 } multicast; 338 struct list_head list; 339 void *context; 340 struct sockaddr_storage addr; 341 struct kref mcref; 342 bool igmp_joined; 343 u8 join_state; 344 }; 345 346 struct cma_work { 347 struct work_struct work; 348 struct rdma_id_private *id; 349 enum rdma_cm_state old_state; 350 enum rdma_cm_state new_state; 351 struct rdma_cm_event event; 352 }; 353 354 struct cma_ndev_work { 355 struct work_struct work; 356 struct rdma_id_private *id; 357 struct rdma_cm_event event; 358 }; 359 360 struct iboe_mcast_work { 361 struct work_struct work; 362 struct rdma_id_private *id; 363 struct cma_multicast *mc; 364 }; 365 366 union cma_ip_addr { 367 struct in6_addr ip6; 368 struct { 369 __be32 pad[3]; 370 __be32 addr; 371 } ip4; 372 }; 373 374 struct cma_hdr { 375 u8 cma_version; 376 u8 ip_version; /* IP version: 7:4 */ 377 __be16 port; 378 union cma_ip_addr src_addr; 379 union cma_ip_addr dst_addr; 380 }; 381 382 #define CMA_VERSION 0x00 383 384 struct cma_req_info { 385 struct ib_device *device; 386 int port; 387 union ib_gid local_gid; 388 __be64 service_id; 389 u16 pkey; 390 bool has_gid:1; 391 }; 392 393 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 394 { 395 unsigned long flags; 396 int ret; 397 398 spin_lock_irqsave(&id_priv->lock, flags); 399 ret = (id_priv->state == comp); 400 spin_unlock_irqrestore(&id_priv->lock, flags); 401 return ret; 402 } 403 404 static int cma_comp_exch(struct rdma_id_private *id_priv, 405 enum rdma_cm_state comp, enum rdma_cm_state exch) 406 { 407 unsigned long flags; 408 int ret; 409 410 spin_lock_irqsave(&id_priv->lock, flags); 411 if ((ret = (id_priv->state == comp))) 412 id_priv->state = exch; 413 spin_unlock_irqrestore(&id_priv->lock, flags); 414 return ret; 415 } 416 417 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 418 enum rdma_cm_state exch) 419 { 420 unsigned long flags; 421 enum rdma_cm_state old; 422 423 spin_lock_irqsave(&id_priv->lock, flags); 424 old = id_priv->state; 425 id_priv->state = exch; 426 spin_unlock_irqrestore(&id_priv->lock, flags); 427 return old; 428 } 429 430 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 431 { 432 return hdr->ip_version >> 4; 433 } 434 435 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 436 { 437 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 438 } 439 440 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) 441 { 442 struct in_device *in_dev = NULL; 443 444 if (ndev) { 445 rtnl_lock(); 446 in_dev = __in_dev_get_rtnl(ndev); 447 if (in_dev) { 448 if (join) 449 ip_mc_inc_group(in_dev, 450 *(__be32 *)(mgid->raw + 12)); 451 else 452 ip_mc_dec_group(in_dev, 453 *(__be32 *)(mgid->raw + 12)); 454 } 455 rtnl_unlock(); 456 } 457 return (in_dev) ? 0 : -ENODEV; 458 } 459 460 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 461 struct cma_device *cma_dev) 462 { 463 cma_ref_dev(cma_dev); 464 id_priv->cma_dev = cma_dev; 465 id_priv->gid_type = 0; 466 id_priv->id.device = cma_dev->device; 467 id_priv->id.route.addr.dev_addr.transport = 468 rdma_node_get_transport(cma_dev->device->node_type); 469 list_add_tail(&id_priv->list, &cma_dev->id_list); 470 id_priv->res.type = RDMA_RESTRACK_CM_ID; 471 rdma_restrack_add(&id_priv->res); 472 } 473 474 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 475 struct cma_device *cma_dev) 476 { 477 _cma_attach_to_dev(id_priv, cma_dev); 478 id_priv->gid_type = 479 cma_dev->default_gid_type[id_priv->id.port_num - 480 rdma_start_port(cma_dev->device)]; 481 } 482 483 void cma_deref_dev(struct cma_device *cma_dev) 484 { 485 if (atomic_dec_and_test(&cma_dev->refcount)) 486 complete(&cma_dev->comp); 487 } 488 489 static inline void release_mc(struct kref *kref) 490 { 491 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 492 493 kfree(mc->multicast.ib); 494 kfree(mc); 495 } 496 497 static void cma_release_dev(struct rdma_id_private *id_priv) 498 { 499 mutex_lock(&lock); 500 list_del(&id_priv->list); 501 cma_deref_dev(id_priv->cma_dev); 502 id_priv->cma_dev = NULL; 503 mutex_unlock(&lock); 504 } 505 506 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 507 { 508 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 509 } 510 511 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 512 { 513 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 514 } 515 516 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 517 { 518 return id_priv->id.route.addr.src_addr.ss_family; 519 } 520 521 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 522 { 523 struct ib_sa_mcmember_rec rec; 524 int ret = 0; 525 526 if (id_priv->qkey) { 527 if (qkey && id_priv->qkey != qkey) 528 return -EINVAL; 529 return 0; 530 } 531 532 if (qkey) { 533 id_priv->qkey = qkey; 534 return 0; 535 } 536 537 switch (id_priv->id.ps) { 538 case RDMA_PS_UDP: 539 case RDMA_PS_IB: 540 id_priv->qkey = RDMA_UDP_QKEY; 541 break; 542 case RDMA_PS_IPOIB: 543 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 544 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 545 id_priv->id.port_num, &rec.mgid, 546 &rec); 547 if (!ret) 548 id_priv->qkey = be32_to_cpu(rec.qkey); 549 break; 550 default: 551 break; 552 } 553 return ret; 554 } 555 556 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 557 { 558 dev_addr->dev_type = ARPHRD_INFINIBAND; 559 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 560 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 561 } 562 563 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 564 { 565 int ret; 566 567 if (addr->sa_family != AF_IB) { 568 ret = rdma_translate_ip(addr, dev_addr); 569 } else { 570 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 571 ret = 0; 572 } 573 574 return ret; 575 } 576 577 static inline int cma_validate_port(struct ib_device *device, u8 port, 578 enum ib_gid_type gid_type, 579 union ib_gid *gid, 580 struct rdma_id_private *id_priv) 581 { 582 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 583 int bound_if_index = dev_addr->bound_dev_if; 584 int dev_type = dev_addr->dev_type; 585 struct net_device *ndev = NULL; 586 int ret = -ENODEV; 587 588 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 589 return ret; 590 591 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 592 return ret; 593 594 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 595 ndev = dev_get_by_index(dev_addr->net, bound_if_index); 596 if (!ndev) 597 return ret; 598 } else { 599 gid_type = IB_GID_TYPE_IB; 600 } 601 602 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 603 ndev, NULL); 604 605 if (ndev) 606 dev_put(ndev); 607 608 return ret; 609 } 610 611 static int cma_acquire_dev(struct rdma_id_private *id_priv, 612 struct rdma_id_private *listen_id_priv) 613 { 614 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 615 struct cma_device *cma_dev; 616 union ib_gid gid, iboe_gid, *gidp; 617 int ret = -ENODEV; 618 u8 port; 619 620 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 621 id_priv->id.ps == RDMA_PS_IPOIB) 622 return -EINVAL; 623 624 mutex_lock(&lock); 625 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 626 &iboe_gid); 627 628 memcpy(&gid, dev_addr->src_dev_addr + 629 rdma_addr_gid_offset(dev_addr), sizeof gid); 630 631 if (listen_id_priv) { 632 cma_dev = listen_id_priv->cma_dev; 633 port = listen_id_priv->id.port_num; 634 gidp = rdma_protocol_roce(cma_dev->device, port) ? 635 &iboe_gid : &gid; 636 637 ret = cma_validate_port(cma_dev->device, port, 638 rdma_protocol_ib(cma_dev->device, port) ? 639 IB_GID_TYPE_IB : 640 listen_id_priv->gid_type, gidp, 641 id_priv); 642 if (!ret) { 643 id_priv->id.port_num = port; 644 goto out; 645 } 646 } 647 648 list_for_each_entry(cma_dev, &dev_list, list) { 649 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 650 if (listen_id_priv && 651 listen_id_priv->cma_dev == cma_dev && 652 listen_id_priv->id.port_num == port) 653 continue; 654 655 gidp = rdma_protocol_roce(cma_dev->device, port) ? 656 &iboe_gid : &gid; 657 658 ret = cma_validate_port(cma_dev->device, port, 659 rdma_protocol_ib(cma_dev->device, port) ? 660 IB_GID_TYPE_IB : 661 cma_dev->default_gid_type[port - 1], 662 gidp, id_priv); 663 if (!ret) { 664 id_priv->id.port_num = port; 665 goto out; 666 } 667 } 668 } 669 670 out: 671 if (!ret) 672 cma_attach_to_dev(id_priv, cma_dev); 673 674 mutex_unlock(&lock); 675 return ret; 676 } 677 678 /* 679 * Select the source IB device and address to reach the destination IB address. 680 */ 681 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 682 { 683 struct cma_device *cma_dev, *cur_dev; 684 struct sockaddr_ib *addr; 685 union ib_gid gid, sgid, *dgid; 686 u16 pkey, index; 687 u8 p; 688 enum ib_port_state port_state; 689 int i; 690 691 cma_dev = NULL; 692 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 693 dgid = (union ib_gid *) &addr->sib_addr; 694 pkey = ntohs(addr->sib_pkey); 695 696 list_for_each_entry(cur_dev, &dev_list, list) { 697 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 698 if (!rdma_cap_af_ib(cur_dev->device, p)) 699 continue; 700 701 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 702 continue; 703 704 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 705 continue; 706 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 707 &gid, NULL); 708 i++) { 709 if (!memcmp(&gid, dgid, sizeof(gid))) { 710 cma_dev = cur_dev; 711 sgid = gid; 712 id_priv->id.port_num = p; 713 goto found; 714 } 715 716 if (!cma_dev && (gid.global.subnet_prefix == 717 dgid->global.subnet_prefix) && 718 port_state == IB_PORT_ACTIVE) { 719 cma_dev = cur_dev; 720 sgid = gid; 721 id_priv->id.port_num = p; 722 } 723 } 724 } 725 } 726 727 if (!cma_dev) 728 return -ENODEV; 729 730 found: 731 cma_attach_to_dev(id_priv, cma_dev); 732 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 733 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 734 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 735 return 0; 736 } 737 738 static void cma_deref_id(struct rdma_id_private *id_priv) 739 { 740 if (atomic_dec_and_test(&id_priv->refcount)) 741 complete(&id_priv->comp); 742 } 743 744 struct rdma_cm_id *__rdma_create_id(struct net *net, 745 rdma_cm_event_handler event_handler, 746 void *context, enum rdma_ucm_port_space ps, 747 enum ib_qp_type qp_type, const char *caller) 748 { 749 struct rdma_id_private *id_priv; 750 751 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 752 if (!id_priv) 753 return ERR_PTR(-ENOMEM); 754 755 if (caller) 756 id_priv->res.kern_name = caller; 757 else 758 rdma_restrack_set_task(&id_priv->res, current); 759 id_priv->state = RDMA_CM_IDLE; 760 id_priv->id.context = context; 761 id_priv->id.event_handler = event_handler; 762 id_priv->id.ps = ps; 763 id_priv->id.qp_type = qp_type; 764 id_priv->tos_set = false; 765 spin_lock_init(&id_priv->lock); 766 mutex_init(&id_priv->qp_mutex); 767 init_completion(&id_priv->comp); 768 atomic_set(&id_priv->refcount, 1); 769 mutex_init(&id_priv->handler_mutex); 770 INIT_LIST_HEAD(&id_priv->listen_list); 771 INIT_LIST_HEAD(&id_priv->mc_list); 772 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 773 id_priv->id.route.addr.dev_addr.net = get_net(net); 774 id_priv->seq_num &= 0x00ffffff; 775 776 return &id_priv->id; 777 } 778 EXPORT_SYMBOL(__rdma_create_id); 779 780 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 781 { 782 struct ib_qp_attr qp_attr; 783 int qp_attr_mask, ret; 784 785 qp_attr.qp_state = IB_QPS_INIT; 786 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 787 if (ret) 788 return ret; 789 790 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 791 if (ret) 792 return ret; 793 794 qp_attr.qp_state = IB_QPS_RTR; 795 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 796 if (ret) 797 return ret; 798 799 qp_attr.qp_state = IB_QPS_RTS; 800 qp_attr.sq_psn = 0; 801 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 802 803 return ret; 804 } 805 806 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 807 { 808 struct ib_qp_attr qp_attr; 809 int qp_attr_mask, ret; 810 811 qp_attr.qp_state = IB_QPS_INIT; 812 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 813 if (ret) 814 return ret; 815 816 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 817 } 818 819 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 820 struct ib_qp_init_attr *qp_init_attr) 821 { 822 struct rdma_id_private *id_priv; 823 struct ib_qp *qp; 824 int ret; 825 826 id_priv = container_of(id, struct rdma_id_private, id); 827 if (id->device != pd->device) 828 return -EINVAL; 829 830 qp_init_attr->port_num = id->port_num; 831 qp = ib_create_qp(pd, qp_init_attr); 832 if (IS_ERR(qp)) 833 return PTR_ERR(qp); 834 835 if (id->qp_type == IB_QPT_UD) 836 ret = cma_init_ud_qp(id_priv, qp); 837 else 838 ret = cma_init_conn_qp(id_priv, qp); 839 if (ret) 840 goto err; 841 842 id->qp = qp; 843 id_priv->qp_num = qp->qp_num; 844 id_priv->srq = (qp->srq != NULL); 845 return 0; 846 err: 847 ib_destroy_qp(qp); 848 return ret; 849 } 850 EXPORT_SYMBOL(rdma_create_qp); 851 852 void rdma_destroy_qp(struct rdma_cm_id *id) 853 { 854 struct rdma_id_private *id_priv; 855 856 id_priv = container_of(id, struct rdma_id_private, id); 857 mutex_lock(&id_priv->qp_mutex); 858 ib_destroy_qp(id_priv->id.qp); 859 id_priv->id.qp = NULL; 860 mutex_unlock(&id_priv->qp_mutex); 861 } 862 EXPORT_SYMBOL(rdma_destroy_qp); 863 864 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 865 struct rdma_conn_param *conn_param) 866 { 867 struct ib_qp_attr qp_attr; 868 int qp_attr_mask, ret; 869 union ib_gid sgid; 870 871 mutex_lock(&id_priv->qp_mutex); 872 if (!id_priv->id.qp) { 873 ret = 0; 874 goto out; 875 } 876 877 /* Need to update QP attributes from default values. */ 878 qp_attr.qp_state = IB_QPS_INIT; 879 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 880 if (ret) 881 goto out; 882 883 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 884 if (ret) 885 goto out; 886 887 qp_attr.qp_state = IB_QPS_RTR; 888 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 889 if (ret) 890 goto out; 891 892 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 893 rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index, 894 &sgid, NULL); 895 if (ret) 896 goto out; 897 898 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 899 900 if (conn_param) 901 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 902 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 903 out: 904 mutex_unlock(&id_priv->qp_mutex); 905 return ret; 906 } 907 908 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 909 struct rdma_conn_param *conn_param) 910 { 911 struct ib_qp_attr qp_attr; 912 int qp_attr_mask, ret; 913 914 mutex_lock(&id_priv->qp_mutex); 915 if (!id_priv->id.qp) { 916 ret = 0; 917 goto out; 918 } 919 920 qp_attr.qp_state = IB_QPS_RTS; 921 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 922 if (ret) 923 goto out; 924 925 if (conn_param) 926 qp_attr.max_rd_atomic = conn_param->initiator_depth; 927 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 928 out: 929 mutex_unlock(&id_priv->qp_mutex); 930 return ret; 931 } 932 933 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 934 { 935 struct ib_qp_attr qp_attr; 936 int ret; 937 938 mutex_lock(&id_priv->qp_mutex); 939 if (!id_priv->id.qp) { 940 ret = 0; 941 goto out; 942 } 943 944 qp_attr.qp_state = IB_QPS_ERR; 945 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 946 out: 947 mutex_unlock(&id_priv->qp_mutex); 948 return ret; 949 } 950 951 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 952 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 953 { 954 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 955 int ret; 956 u16 pkey; 957 958 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 959 pkey = 0xffff; 960 else 961 pkey = ib_addr_get_pkey(dev_addr); 962 963 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 964 pkey, &qp_attr->pkey_index); 965 if (ret) 966 return ret; 967 968 qp_attr->port_num = id_priv->id.port_num; 969 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 970 971 if (id_priv->id.qp_type == IB_QPT_UD) { 972 ret = cma_set_qkey(id_priv, 0); 973 if (ret) 974 return ret; 975 976 qp_attr->qkey = id_priv->qkey; 977 *qp_attr_mask |= IB_QP_QKEY; 978 } else { 979 qp_attr->qp_access_flags = 0; 980 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 981 } 982 return 0; 983 } 984 985 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 986 int *qp_attr_mask) 987 { 988 struct rdma_id_private *id_priv; 989 int ret = 0; 990 991 id_priv = container_of(id, struct rdma_id_private, id); 992 if (rdma_cap_ib_cm(id->device, id->port_num)) { 993 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 994 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 995 else 996 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 997 qp_attr_mask); 998 999 if (qp_attr->qp_state == IB_QPS_RTR) 1000 qp_attr->rq_psn = id_priv->seq_num; 1001 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1002 if (!id_priv->cm_id.iw) { 1003 qp_attr->qp_access_flags = 0; 1004 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1005 } else 1006 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1007 qp_attr_mask); 1008 qp_attr->port_num = id_priv->id.port_num; 1009 *qp_attr_mask |= IB_QP_PORT; 1010 } else 1011 ret = -ENOSYS; 1012 1013 return ret; 1014 } 1015 EXPORT_SYMBOL(rdma_init_qp_attr); 1016 1017 static inline int cma_zero_addr(struct sockaddr *addr) 1018 { 1019 switch (addr->sa_family) { 1020 case AF_INET: 1021 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1022 case AF_INET6: 1023 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1024 case AF_IB: 1025 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1026 default: 1027 return 0; 1028 } 1029 } 1030 1031 static inline int cma_loopback_addr(struct sockaddr *addr) 1032 { 1033 switch (addr->sa_family) { 1034 case AF_INET: 1035 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1036 case AF_INET6: 1037 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1038 case AF_IB: 1039 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1040 default: 1041 return 0; 1042 } 1043 } 1044 1045 static inline int cma_any_addr(struct sockaddr *addr) 1046 { 1047 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1048 } 1049 1050 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1051 { 1052 if (src->sa_family != dst->sa_family) 1053 return -1; 1054 1055 switch (src->sa_family) { 1056 case AF_INET: 1057 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1058 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1059 case AF_INET6: 1060 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1061 &((struct sockaddr_in6 *) dst)->sin6_addr); 1062 default: 1063 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1064 &((struct sockaddr_ib *) dst)->sib_addr); 1065 } 1066 } 1067 1068 static __be16 cma_port(struct sockaddr *addr) 1069 { 1070 struct sockaddr_ib *sib; 1071 1072 switch (addr->sa_family) { 1073 case AF_INET: 1074 return ((struct sockaddr_in *) addr)->sin_port; 1075 case AF_INET6: 1076 return ((struct sockaddr_in6 *) addr)->sin6_port; 1077 case AF_IB: 1078 sib = (struct sockaddr_ib *) addr; 1079 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1080 be64_to_cpu(sib->sib_sid_mask))); 1081 default: 1082 return 0; 1083 } 1084 } 1085 1086 static inline int cma_any_port(struct sockaddr *addr) 1087 { 1088 return !cma_port(addr); 1089 } 1090 1091 static void cma_save_ib_info(struct sockaddr *src_addr, 1092 struct sockaddr *dst_addr, 1093 struct rdma_cm_id *listen_id, 1094 struct sa_path_rec *path) 1095 { 1096 struct sockaddr_ib *listen_ib, *ib; 1097 1098 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1099 if (src_addr) { 1100 ib = (struct sockaddr_ib *)src_addr; 1101 ib->sib_family = AF_IB; 1102 if (path) { 1103 ib->sib_pkey = path->pkey; 1104 ib->sib_flowinfo = path->flow_label; 1105 memcpy(&ib->sib_addr, &path->sgid, 16); 1106 ib->sib_sid = path->service_id; 1107 ib->sib_scope_id = 0; 1108 } else { 1109 ib->sib_pkey = listen_ib->sib_pkey; 1110 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1111 ib->sib_addr = listen_ib->sib_addr; 1112 ib->sib_sid = listen_ib->sib_sid; 1113 ib->sib_scope_id = listen_ib->sib_scope_id; 1114 } 1115 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1116 } 1117 if (dst_addr) { 1118 ib = (struct sockaddr_ib *)dst_addr; 1119 ib->sib_family = AF_IB; 1120 if (path) { 1121 ib->sib_pkey = path->pkey; 1122 ib->sib_flowinfo = path->flow_label; 1123 memcpy(&ib->sib_addr, &path->dgid, 16); 1124 } 1125 } 1126 } 1127 1128 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1129 struct sockaddr_in *dst_addr, 1130 struct cma_hdr *hdr, 1131 __be16 local_port) 1132 { 1133 if (src_addr) { 1134 *src_addr = (struct sockaddr_in) { 1135 .sin_family = AF_INET, 1136 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1137 .sin_port = local_port, 1138 }; 1139 } 1140 1141 if (dst_addr) { 1142 *dst_addr = (struct sockaddr_in) { 1143 .sin_family = AF_INET, 1144 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1145 .sin_port = hdr->port, 1146 }; 1147 } 1148 } 1149 1150 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1151 struct sockaddr_in6 *dst_addr, 1152 struct cma_hdr *hdr, 1153 __be16 local_port) 1154 { 1155 if (src_addr) { 1156 *src_addr = (struct sockaddr_in6) { 1157 .sin6_family = AF_INET6, 1158 .sin6_addr = hdr->dst_addr.ip6, 1159 .sin6_port = local_port, 1160 }; 1161 } 1162 1163 if (dst_addr) { 1164 *dst_addr = (struct sockaddr_in6) { 1165 .sin6_family = AF_INET6, 1166 .sin6_addr = hdr->src_addr.ip6, 1167 .sin6_port = hdr->port, 1168 }; 1169 } 1170 } 1171 1172 static u16 cma_port_from_service_id(__be64 service_id) 1173 { 1174 return (u16)be64_to_cpu(service_id); 1175 } 1176 1177 static int cma_save_ip_info(struct sockaddr *src_addr, 1178 struct sockaddr *dst_addr, 1179 struct ib_cm_event *ib_event, 1180 __be64 service_id) 1181 { 1182 struct cma_hdr *hdr; 1183 __be16 port; 1184 1185 hdr = ib_event->private_data; 1186 if (hdr->cma_version != CMA_VERSION) 1187 return -EINVAL; 1188 1189 port = htons(cma_port_from_service_id(service_id)); 1190 1191 switch (cma_get_ip_ver(hdr)) { 1192 case 4: 1193 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1194 (struct sockaddr_in *)dst_addr, hdr, port); 1195 break; 1196 case 6: 1197 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1198 (struct sockaddr_in6 *)dst_addr, hdr, port); 1199 break; 1200 default: 1201 return -EAFNOSUPPORT; 1202 } 1203 1204 return 0; 1205 } 1206 1207 static int cma_save_net_info(struct sockaddr *src_addr, 1208 struct sockaddr *dst_addr, 1209 struct rdma_cm_id *listen_id, 1210 struct ib_cm_event *ib_event, 1211 sa_family_t sa_family, __be64 service_id) 1212 { 1213 if (sa_family == AF_IB) { 1214 if (ib_event->event == IB_CM_REQ_RECEIVED) 1215 cma_save_ib_info(src_addr, dst_addr, listen_id, 1216 ib_event->param.req_rcvd.primary_path); 1217 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1218 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1219 return 0; 1220 } 1221 1222 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1223 } 1224 1225 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1226 struct cma_req_info *req) 1227 { 1228 const struct ib_cm_req_event_param *req_param = 1229 &ib_event->param.req_rcvd; 1230 const struct ib_cm_sidr_req_event_param *sidr_param = 1231 &ib_event->param.sidr_req_rcvd; 1232 1233 switch (ib_event->event) { 1234 case IB_CM_REQ_RECEIVED: 1235 req->device = req_param->listen_id->device; 1236 req->port = req_param->port; 1237 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1238 sizeof(req->local_gid)); 1239 req->has_gid = true; 1240 req->service_id = req_param->primary_path->service_id; 1241 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1242 if (req->pkey != req_param->bth_pkey) 1243 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1244 "RDMA CMA: in the future this may cause the request to be dropped\n", 1245 req_param->bth_pkey, req->pkey); 1246 break; 1247 case IB_CM_SIDR_REQ_RECEIVED: 1248 req->device = sidr_param->listen_id->device; 1249 req->port = sidr_param->port; 1250 req->has_gid = false; 1251 req->service_id = sidr_param->service_id; 1252 req->pkey = sidr_param->pkey; 1253 if (req->pkey != sidr_param->bth_pkey) 1254 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1255 "RDMA CMA: in the future this may cause the request to be dropped\n", 1256 sidr_param->bth_pkey, req->pkey); 1257 break; 1258 default: 1259 return -EINVAL; 1260 } 1261 1262 return 0; 1263 } 1264 1265 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1266 const struct sockaddr_in *dst_addr, 1267 const struct sockaddr_in *src_addr) 1268 { 1269 __be32 daddr = dst_addr->sin_addr.s_addr, 1270 saddr = src_addr->sin_addr.s_addr; 1271 struct fib_result res; 1272 struct flowi4 fl4; 1273 int err; 1274 bool ret; 1275 1276 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1277 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1278 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1279 ipv4_is_loopback(saddr)) 1280 return false; 1281 1282 memset(&fl4, 0, sizeof(fl4)); 1283 fl4.flowi4_iif = net_dev->ifindex; 1284 fl4.daddr = daddr; 1285 fl4.saddr = saddr; 1286 1287 rcu_read_lock(); 1288 err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); 1289 ret = err == 0 && FIB_RES_DEV(res) == net_dev; 1290 rcu_read_unlock(); 1291 1292 return ret; 1293 } 1294 1295 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1296 const struct sockaddr_in6 *dst_addr, 1297 const struct sockaddr_in6 *src_addr) 1298 { 1299 #if IS_ENABLED(CONFIG_IPV6) 1300 const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & 1301 IPV6_ADDR_LINKLOCAL; 1302 struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, 1303 &src_addr->sin6_addr, net_dev->ifindex, 1304 NULL, strict); 1305 bool ret; 1306 1307 if (!rt) 1308 return false; 1309 1310 ret = rt->rt6i_idev->dev == net_dev; 1311 ip6_rt_put(rt); 1312 1313 return ret; 1314 #else 1315 return false; 1316 #endif 1317 } 1318 1319 static bool validate_net_dev(struct net_device *net_dev, 1320 const struct sockaddr *daddr, 1321 const struct sockaddr *saddr) 1322 { 1323 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1324 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1325 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1326 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1327 1328 switch (daddr->sa_family) { 1329 case AF_INET: 1330 return saddr->sa_family == AF_INET && 1331 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1332 1333 case AF_INET6: 1334 return saddr->sa_family == AF_INET6 && 1335 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1336 1337 default: 1338 return false; 1339 } 1340 } 1341 1342 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1343 const struct cma_req_info *req) 1344 { 1345 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1346 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1347 *src_addr = (struct sockaddr *)&src_addr_storage; 1348 struct net_device *net_dev; 1349 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1350 int err; 1351 1352 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1353 req->service_id); 1354 if (err) 1355 return ERR_PTR(err); 1356 1357 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1358 gid, listen_addr); 1359 if (!net_dev) 1360 return ERR_PTR(-ENODEV); 1361 1362 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1363 dev_put(net_dev); 1364 return ERR_PTR(-EHOSTUNREACH); 1365 } 1366 1367 return net_dev; 1368 } 1369 1370 static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id) 1371 { 1372 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1373 } 1374 1375 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1376 const struct cma_hdr *hdr) 1377 { 1378 struct sockaddr *addr = cma_src_addr(id_priv); 1379 __be32 ip4_addr; 1380 struct in6_addr ip6_addr; 1381 1382 if (cma_any_addr(addr) && !id_priv->afonly) 1383 return true; 1384 1385 switch (addr->sa_family) { 1386 case AF_INET: 1387 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1388 if (cma_get_ip_ver(hdr) != 4) 1389 return false; 1390 if (!cma_any_addr(addr) && 1391 hdr->dst_addr.ip4.addr != ip4_addr) 1392 return false; 1393 break; 1394 case AF_INET6: 1395 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1396 if (cma_get_ip_ver(hdr) != 6) 1397 return false; 1398 if (!cma_any_addr(addr) && 1399 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1400 return false; 1401 break; 1402 case AF_IB: 1403 return true; 1404 default: 1405 return false; 1406 } 1407 1408 return true; 1409 } 1410 1411 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1412 { 1413 struct ib_device *device = id->device; 1414 const int port_num = id->port_num ?: rdma_start_port(device); 1415 1416 return rdma_protocol_roce(device, port_num); 1417 } 1418 1419 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1420 const struct net_device *net_dev, 1421 u8 port_num) 1422 { 1423 const struct rdma_addr *addr = &id->route.addr; 1424 1425 if (!net_dev) 1426 /* This request is an AF_IB request or a RoCE request */ 1427 return (!id->port_num || id->port_num == port_num) && 1428 (addr->src_addr.ss_family == AF_IB || 1429 rdma_protocol_roce(id->device, port_num)); 1430 1431 return !addr->dev_addr.bound_dev_if || 1432 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1433 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1434 } 1435 1436 static struct rdma_id_private *cma_find_listener( 1437 const struct rdma_bind_list *bind_list, 1438 const struct ib_cm_id *cm_id, 1439 const struct ib_cm_event *ib_event, 1440 const struct cma_req_info *req, 1441 const struct net_device *net_dev) 1442 { 1443 struct rdma_id_private *id_priv, *id_priv_dev; 1444 1445 if (!bind_list) 1446 return ERR_PTR(-EINVAL); 1447 1448 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1449 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1450 if (id_priv->id.device == cm_id->device && 1451 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1452 return id_priv; 1453 list_for_each_entry(id_priv_dev, 1454 &id_priv->listen_list, 1455 listen_list) { 1456 if (id_priv_dev->id.device == cm_id->device && 1457 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1458 return id_priv_dev; 1459 } 1460 } 1461 } 1462 1463 return ERR_PTR(-EINVAL); 1464 } 1465 1466 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1467 struct ib_cm_event *ib_event, 1468 struct net_device **net_dev) 1469 { 1470 struct cma_req_info req; 1471 struct rdma_bind_list *bind_list; 1472 struct rdma_id_private *id_priv; 1473 int err; 1474 1475 err = cma_save_req_info(ib_event, &req); 1476 if (err) 1477 return ERR_PTR(err); 1478 1479 *net_dev = cma_get_net_dev(ib_event, &req); 1480 if (IS_ERR(*net_dev)) { 1481 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1482 /* Assuming the protocol is AF_IB */ 1483 *net_dev = NULL; 1484 } else if (rdma_protocol_roce(req.device, req.port)) { 1485 /* TODO find the net dev matching the request parameters 1486 * through the RoCE GID table */ 1487 *net_dev = NULL; 1488 } else { 1489 return ERR_CAST(*net_dev); 1490 } 1491 } 1492 1493 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1494 rdma_ps_from_service_id(req.service_id), 1495 cma_port_from_service_id(req.service_id)); 1496 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1497 if (IS_ERR(id_priv) && *net_dev) { 1498 dev_put(*net_dev); 1499 *net_dev = NULL; 1500 } 1501 1502 return id_priv; 1503 } 1504 1505 static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) 1506 { 1507 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1508 } 1509 1510 static void cma_cancel_route(struct rdma_id_private *id_priv) 1511 { 1512 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1513 if (id_priv->query) 1514 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1515 } 1516 } 1517 1518 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1519 { 1520 struct rdma_id_private *dev_id_priv; 1521 1522 /* 1523 * Remove from listen_any_list to prevent added devices from spawning 1524 * additional listen requests. 1525 */ 1526 mutex_lock(&lock); 1527 list_del(&id_priv->list); 1528 1529 while (!list_empty(&id_priv->listen_list)) { 1530 dev_id_priv = list_entry(id_priv->listen_list.next, 1531 struct rdma_id_private, listen_list); 1532 /* sync with device removal to avoid duplicate destruction */ 1533 list_del_init(&dev_id_priv->list); 1534 list_del(&dev_id_priv->listen_list); 1535 mutex_unlock(&lock); 1536 1537 rdma_destroy_id(&dev_id_priv->id); 1538 mutex_lock(&lock); 1539 } 1540 mutex_unlock(&lock); 1541 } 1542 1543 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1544 enum rdma_cm_state state) 1545 { 1546 switch (state) { 1547 case RDMA_CM_ADDR_QUERY: 1548 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1549 break; 1550 case RDMA_CM_ROUTE_QUERY: 1551 cma_cancel_route(id_priv); 1552 break; 1553 case RDMA_CM_LISTEN: 1554 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1555 cma_cancel_listens(id_priv); 1556 break; 1557 default: 1558 break; 1559 } 1560 } 1561 1562 static void cma_release_port(struct rdma_id_private *id_priv) 1563 { 1564 struct rdma_bind_list *bind_list = id_priv->bind_list; 1565 struct net *net = id_priv->id.route.addr.dev_addr.net; 1566 1567 if (!bind_list) 1568 return; 1569 1570 mutex_lock(&lock); 1571 hlist_del(&id_priv->node); 1572 if (hlist_empty(&bind_list->owners)) { 1573 cma_ps_remove(net, bind_list->ps, bind_list->port); 1574 kfree(bind_list); 1575 } 1576 mutex_unlock(&lock); 1577 } 1578 1579 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1580 { 1581 struct cma_multicast *mc; 1582 1583 while (!list_empty(&id_priv->mc_list)) { 1584 mc = container_of(id_priv->mc_list.next, 1585 struct cma_multicast, list); 1586 list_del(&mc->list); 1587 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1588 id_priv->id.port_num)) { 1589 ib_sa_free_multicast(mc->multicast.ib); 1590 kfree(mc); 1591 } else { 1592 if (mc->igmp_joined) { 1593 struct rdma_dev_addr *dev_addr = 1594 &id_priv->id.route.addr.dev_addr; 1595 struct net_device *ndev = NULL; 1596 1597 if (dev_addr->bound_dev_if) 1598 ndev = dev_get_by_index(&init_net, 1599 dev_addr->bound_dev_if); 1600 if (ndev) { 1601 cma_igmp_send(ndev, 1602 &mc->multicast.ib->rec.mgid, 1603 false); 1604 dev_put(ndev); 1605 } 1606 } 1607 kref_put(&mc->mcref, release_mc); 1608 } 1609 } 1610 } 1611 1612 void rdma_destroy_id(struct rdma_cm_id *id) 1613 { 1614 struct rdma_id_private *id_priv; 1615 enum rdma_cm_state state; 1616 1617 id_priv = container_of(id, struct rdma_id_private, id); 1618 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1619 cma_cancel_operation(id_priv, state); 1620 1621 /* 1622 * Wait for any active callback to finish. New callbacks will find 1623 * the id_priv state set to destroying and abort. 1624 */ 1625 mutex_lock(&id_priv->handler_mutex); 1626 mutex_unlock(&id_priv->handler_mutex); 1627 1628 if (id_priv->cma_dev) { 1629 rdma_restrack_del(&id_priv->res); 1630 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1631 if (id_priv->cm_id.ib) 1632 ib_destroy_cm_id(id_priv->cm_id.ib); 1633 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1634 if (id_priv->cm_id.iw) 1635 iw_destroy_cm_id(id_priv->cm_id.iw); 1636 } 1637 cma_leave_mc_groups(id_priv); 1638 cma_release_dev(id_priv); 1639 } 1640 1641 cma_release_port(id_priv); 1642 cma_deref_id(id_priv); 1643 wait_for_completion(&id_priv->comp); 1644 1645 if (id_priv->internal_id) 1646 cma_deref_id(id_priv->id.context); 1647 1648 kfree(id_priv->id.route.path_rec); 1649 put_net(id_priv->id.route.addr.dev_addr.net); 1650 kfree(id_priv); 1651 } 1652 EXPORT_SYMBOL(rdma_destroy_id); 1653 1654 static int cma_rep_recv(struct rdma_id_private *id_priv) 1655 { 1656 int ret; 1657 1658 ret = cma_modify_qp_rtr(id_priv, NULL); 1659 if (ret) 1660 goto reject; 1661 1662 ret = cma_modify_qp_rts(id_priv, NULL); 1663 if (ret) 1664 goto reject; 1665 1666 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1667 if (ret) 1668 goto reject; 1669 1670 return 0; 1671 reject: 1672 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); 1673 cma_modify_qp_err(id_priv); 1674 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1675 NULL, 0, NULL, 0); 1676 return ret; 1677 } 1678 1679 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1680 struct ib_cm_rep_event_param *rep_data, 1681 void *private_data) 1682 { 1683 event->param.conn.private_data = private_data; 1684 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1685 event->param.conn.responder_resources = rep_data->responder_resources; 1686 event->param.conn.initiator_depth = rep_data->initiator_depth; 1687 event->param.conn.flow_control = rep_data->flow_control; 1688 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1689 event->param.conn.srq = rep_data->srq; 1690 event->param.conn.qp_num = rep_data->remote_qpn; 1691 } 1692 1693 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1694 { 1695 struct rdma_id_private *id_priv = cm_id->context; 1696 struct rdma_cm_event event; 1697 int ret = 0; 1698 1699 mutex_lock(&id_priv->handler_mutex); 1700 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1701 id_priv->state != RDMA_CM_CONNECT) || 1702 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1703 id_priv->state != RDMA_CM_DISCONNECT)) 1704 goto out; 1705 1706 memset(&event, 0, sizeof event); 1707 switch (ib_event->event) { 1708 case IB_CM_REQ_ERROR: 1709 case IB_CM_REP_ERROR: 1710 event.event = RDMA_CM_EVENT_UNREACHABLE; 1711 event.status = -ETIMEDOUT; 1712 break; 1713 case IB_CM_REP_RECEIVED: 1714 if (cma_comp(id_priv, RDMA_CM_CONNECT) && 1715 (id_priv->id.qp_type != IB_QPT_UD)) 1716 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1717 if (id_priv->id.qp) { 1718 event.status = cma_rep_recv(id_priv); 1719 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1720 RDMA_CM_EVENT_ESTABLISHED; 1721 } else { 1722 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1723 } 1724 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1725 ib_event->private_data); 1726 break; 1727 case IB_CM_RTU_RECEIVED: 1728 case IB_CM_USER_ESTABLISHED: 1729 event.event = RDMA_CM_EVENT_ESTABLISHED; 1730 break; 1731 case IB_CM_DREQ_ERROR: 1732 event.status = -ETIMEDOUT; /* fall through */ 1733 case IB_CM_DREQ_RECEIVED: 1734 case IB_CM_DREP_RECEIVED: 1735 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1736 RDMA_CM_DISCONNECT)) 1737 goto out; 1738 event.event = RDMA_CM_EVENT_DISCONNECTED; 1739 break; 1740 case IB_CM_TIMEWAIT_EXIT: 1741 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1742 break; 1743 case IB_CM_MRA_RECEIVED: 1744 /* ignore event */ 1745 goto out; 1746 case IB_CM_REJ_RECEIVED: 1747 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, 1748 ib_event->param.rej_rcvd.reason)); 1749 cma_modify_qp_err(id_priv); 1750 event.status = ib_event->param.rej_rcvd.reason; 1751 event.event = RDMA_CM_EVENT_REJECTED; 1752 event.param.conn.private_data = ib_event->private_data; 1753 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1754 break; 1755 default: 1756 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1757 ib_event->event); 1758 goto out; 1759 } 1760 1761 ret = id_priv->id.event_handler(&id_priv->id, &event); 1762 if (ret) { 1763 /* Destroy the CM ID by returning a non-zero value. */ 1764 id_priv->cm_id.ib = NULL; 1765 cma_exch(id_priv, RDMA_CM_DESTROYING); 1766 mutex_unlock(&id_priv->handler_mutex); 1767 rdma_destroy_id(&id_priv->id); 1768 return ret; 1769 } 1770 out: 1771 mutex_unlock(&id_priv->handler_mutex); 1772 return ret; 1773 } 1774 1775 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1776 struct ib_cm_event *ib_event, 1777 struct net_device *net_dev) 1778 { 1779 struct rdma_id_private *listen_id_priv; 1780 struct rdma_id_private *id_priv; 1781 struct rdma_cm_id *id; 1782 struct rdma_route *rt; 1783 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1784 struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; 1785 const __be64 service_id = 1786 ib_event->param.req_rcvd.primary_path->service_id; 1787 int ret; 1788 1789 listen_id_priv = container_of(listen_id, struct rdma_id_private, id); 1790 id = __rdma_create_id(listen_id->route.addr.dev_addr.net, 1791 listen_id->event_handler, listen_id->context, 1792 listen_id->ps, ib_event->param.req_rcvd.qp_type, 1793 listen_id_priv->res.kern_name); 1794 if (IS_ERR(id)) 1795 return NULL; 1796 1797 id_priv = container_of(id, struct rdma_id_private, id); 1798 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1799 (struct sockaddr *)&id->route.addr.dst_addr, 1800 listen_id, ib_event, ss_family, service_id)) 1801 goto err; 1802 1803 rt = &id->route; 1804 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1805 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1806 GFP_KERNEL); 1807 if (!rt->path_rec) 1808 goto err; 1809 1810 rt->path_rec[0] = *path; 1811 if (rt->num_paths == 2) 1812 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1813 1814 if (net_dev) { 1815 rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1816 } else { 1817 if (!cma_protocol_roce(listen_id) && 1818 cma_any_addr(cma_src_addr(id_priv))) { 1819 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1820 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1821 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1822 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1823 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1824 if (ret) 1825 goto err; 1826 } 1827 } 1828 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1829 1830 id_priv->state = RDMA_CM_CONNECT; 1831 return id_priv; 1832 1833 err: 1834 rdma_destroy_id(id); 1835 return NULL; 1836 } 1837 1838 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1839 struct ib_cm_event *ib_event, 1840 struct net_device *net_dev) 1841 { 1842 struct rdma_id_private *listen_id_priv; 1843 struct rdma_id_private *id_priv; 1844 struct rdma_cm_id *id; 1845 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1846 struct net *net = listen_id->route.addr.dev_addr.net; 1847 int ret; 1848 1849 listen_id_priv = container_of(listen_id, struct rdma_id_private, id); 1850 id = __rdma_create_id(net, listen_id->event_handler, listen_id->context, 1851 listen_id->ps, IB_QPT_UD, 1852 listen_id_priv->res.kern_name); 1853 if (IS_ERR(id)) 1854 return NULL; 1855 1856 id_priv = container_of(id, struct rdma_id_private, id); 1857 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1858 (struct sockaddr *)&id->route.addr.dst_addr, 1859 listen_id, ib_event, ss_family, 1860 ib_event->param.sidr_req_rcvd.service_id)) 1861 goto err; 1862 1863 if (net_dev) { 1864 rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1865 } else { 1866 if (!cma_any_addr(cma_src_addr(id_priv))) { 1867 ret = cma_translate_addr(cma_src_addr(id_priv), 1868 &id->route.addr.dev_addr); 1869 if (ret) 1870 goto err; 1871 } 1872 } 1873 1874 id_priv->state = RDMA_CM_CONNECT; 1875 return id_priv; 1876 err: 1877 rdma_destroy_id(id); 1878 return NULL; 1879 } 1880 1881 static void cma_set_req_event_data(struct rdma_cm_event *event, 1882 struct ib_cm_req_event_param *req_data, 1883 void *private_data, int offset) 1884 { 1885 event->param.conn.private_data = private_data + offset; 1886 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1887 event->param.conn.responder_resources = req_data->responder_resources; 1888 event->param.conn.initiator_depth = req_data->initiator_depth; 1889 event->param.conn.flow_control = req_data->flow_control; 1890 event->param.conn.retry_count = req_data->retry_count; 1891 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1892 event->param.conn.srq = req_data->srq; 1893 event->param.conn.qp_num = req_data->remote_qpn; 1894 } 1895 1896 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1897 { 1898 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1899 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1900 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1901 (id->qp_type == IB_QPT_UD)) || 1902 (!id->qp_type)); 1903 } 1904 1905 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1906 { 1907 struct rdma_id_private *listen_id, *conn_id = NULL; 1908 struct rdma_cm_event event; 1909 struct net_device *net_dev; 1910 u8 offset; 1911 int ret; 1912 1913 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1914 if (IS_ERR(listen_id)) 1915 return PTR_ERR(listen_id); 1916 1917 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1918 ret = -EINVAL; 1919 goto net_dev_put; 1920 } 1921 1922 mutex_lock(&listen_id->handler_mutex); 1923 if (listen_id->state != RDMA_CM_LISTEN) { 1924 ret = -ECONNABORTED; 1925 goto err1; 1926 } 1927 1928 memset(&event, 0, sizeof event); 1929 offset = cma_user_data_offset(listen_id); 1930 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1931 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1932 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1933 event.param.ud.private_data = ib_event->private_data + offset; 1934 event.param.ud.private_data_len = 1935 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1936 } else { 1937 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1938 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1939 ib_event->private_data, offset); 1940 } 1941 if (!conn_id) { 1942 ret = -ENOMEM; 1943 goto err1; 1944 } 1945 1946 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1947 ret = cma_acquire_dev(conn_id, listen_id); 1948 if (ret) 1949 goto err2; 1950 1951 conn_id->cm_id.ib = cm_id; 1952 cm_id->context = conn_id; 1953 cm_id->cm_handler = cma_ib_handler; 1954 1955 /* 1956 * Protect against the user destroying conn_id from another thread 1957 * until we're done accessing it. 1958 */ 1959 atomic_inc(&conn_id->refcount); 1960 ret = conn_id->id.event_handler(&conn_id->id, &event); 1961 if (ret) 1962 goto err3; 1963 /* 1964 * Acquire mutex to prevent user executing rdma_destroy_id() 1965 * while we're accessing the cm_id. 1966 */ 1967 mutex_lock(&lock); 1968 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 1969 (conn_id->id.qp_type != IB_QPT_UD)) 1970 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1971 mutex_unlock(&lock); 1972 mutex_unlock(&conn_id->handler_mutex); 1973 mutex_unlock(&listen_id->handler_mutex); 1974 cma_deref_id(conn_id); 1975 if (net_dev) 1976 dev_put(net_dev); 1977 return 0; 1978 1979 err3: 1980 cma_deref_id(conn_id); 1981 /* Destroy the CM ID by returning a non-zero value. */ 1982 conn_id->cm_id.ib = NULL; 1983 err2: 1984 cma_exch(conn_id, RDMA_CM_DESTROYING); 1985 mutex_unlock(&conn_id->handler_mutex); 1986 err1: 1987 mutex_unlock(&listen_id->handler_mutex); 1988 if (conn_id) 1989 rdma_destroy_id(&conn_id->id); 1990 1991 net_dev_put: 1992 if (net_dev) 1993 dev_put(net_dev); 1994 1995 return ret; 1996 } 1997 1998 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 1999 { 2000 if (addr->sa_family == AF_IB) 2001 return ((struct sockaddr_ib *) addr)->sib_sid; 2002 2003 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2004 } 2005 EXPORT_SYMBOL(rdma_get_service_id); 2006 2007 void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, 2008 union ib_gid *dgid) 2009 { 2010 struct rdma_addr *addr = &cm_id->route.addr; 2011 2012 if (!cm_id->device) { 2013 if (sgid) 2014 memset(sgid, 0, sizeof(*sgid)); 2015 if (dgid) 2016 memset(dgid, 0, sizeof(*dgid)); 2017 return; 2018 } 2019 2020 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) { 2021 if (sgid) 2022 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid); 2023 if (dgid) 2024 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid); 2025 } else { 2026 if (sgid) 2027 rdma_addr_get_sgid(&addr->dev_addr, sgid); 2028 if (dgid) 2029 rdma_addr_get_dgid(&addr->dev_addr, dgid); 2030 } 2031 } 2032 EXPORT_SYMBOL(rdma_read_gids); 2033 2034 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2035 { 2036 struct rdma_id_private *id_priv = iw_id->context; 2037 struct rdma_cm_event event; 2038 int ret = 0; 2039 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2040 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2041 2042 mutex_lock(&id_priv->handler_mutex); 2043 if (id_priv->state != RDMA_CM_CONNECT) 2044 goto out; 2045 2046 memset(&event, 0, sizeof event); 2047 switch (iw_event->event) { 2048 case IW_CM_EVENT_CLOSE: 2049 event.event = RDMA_CM_EVENT_DISCONNECTED; 2050 break; 2051 case IW_CM_EVENT_CONNECT_REPLY: 2052 memcpy(cma_src_addr(id_priv), laddr, 2053 rdma_addr_size(laddr)); 2054 memcpy(cma_dst_addr(id_priv), raddr, 2055 rdma_addr_size(raddr)); 2056 switch (iw_event->status) { 2057 case 0: 2058 event.event = RDMA_CM_EVENT_ESTABLISHED; 2059 event.param.conn.initiator_depth = iw_event->ird; 2060 event.param.conn.responder_resources = iw_event->ord; 2061 break; 2062 case -ECONNRESET: 2063 case -ECONNREFUSED: 2064 event.event = RDMA_CM_EVENT_REJECTED; 2065 break; 2066 case -ETIMEDOUT: 2067 event.event = RDMA_CM_EVENT_UNREACHABLE; 2068 break; 2069 default: 2070 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2071 break; 2072 } 2073 break; 2074 case IW_CM_EVENT_ESTABLISHED: 2075 event.event = RDMA_CM_EVENT_ESTABLISHED; 2076 event.param.conn.initiator_depth = iw_event->ird; 2077 event.param.conn.responder_resources = iw_event->ord; 2078 break; 2079 default: 2080 BUG_ON(1); 2081 } 2082 2083 event.status = iw_event->status; 2084 event.param.conn.private_data = iw_event->private_data; 2085 event.param.conn.private_data_len = iw_event->private_data_len; 2086 ret = id_priv->id.event_handler(&id_priv->id, &event); 2087 if (ret) { 2088 /* Destroy the CM ID by returning a non-zero value. */ 2089 id_priv->cm_id.iw = NULL; 2090 cma_exch(id_priv, RDMA_CM_DESTROYING); 2091 mutex_unlock(&id_priv->handler_mutex); 2092 rdma_destroy_id(&id_priv->id); 2093 return ret; 2094 } 2095 2096 out: 2097 mutex_unlock(&id_priv->handler_mutex); 2098 return ret; 2099 } 2100 2101 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2102 struct iw_cm_event *iw_event) 2103 { 2104 struct rdma_cm_id *new_cm_id; 2105 struct rdma_id_private *listen_id, *conn_id; 2106 struct rdma_cm_event event; 2107 int ret = -ECONNABORTED; 2108 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2109 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2110 2111 listen_id = cm_id->context; 2112 2113 mutex_lock(&listen_id->handler_mutex); 2114 if (listen_id->state != RDMA_CM_LISTEN) 2115 goto out; 2116 2117 /* Create a new RDMA id for the new IW CM ID */ 2118 new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2119 listen_id->id.event_handler, 2120 listen_id->id.context, 2121 RDMA_PS_TCP, IB_QPT_RC, 2122 listen_id->res.kern_name); 2123 if (IS_ERR(new_cm_id)) { 2124 ret = -ENOMEM; 2125 goto out; 2126 } 2127 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2128 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2129 conn_id->state = RDMA_CM_CONNECT; 2130 2131 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2132 if (ret) { 2133 mutex_unlock(&conn_id->handler_mutex); 2134 rdma_destroy_id(new_cm_id); 2135 goto out; 2136 } 2137 2138 ret = cma_acquire_dev(conn_id, listen_id); 2139 if (ret) { 2140 mutex_unlock(&conn_id->handler_mutex); 2141 rdma_destroy_id(new_cm_id); 2142 goto out; 2143 } 2144 2145 conn_id->cm_id.iw = cm_id; 2146 cm_id->context = conn_id; 2147 cm_id->cm_handler = cma_iw_handler; 2148 2149 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2150 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2151 2152 memset(&event, 0, sizeof event); 2153 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2154 event.param.conn.private_data = iw_event->private_data; 2155 event.param.conn.private_data_len = iw_event->private_data_len; 2156 event.param.conn.initiator_depth = iw_event->ird; 2157 event.param.conn.responder_resources = iw_event->ord; 2158 2159 /* 2160 * Protect against the user destroying conn_id from another thread 2161 * until we're done accessing it. 2162 */ 2163 atomic_inc(&conn_id->refcount); 2164 ret = conn_id->id.event_handler(&conn_id->id, &event); 2165 if (ret) { 2166 /* User wants to destroy the CM ID */ 2167 conn_id->cm_id.iw = NULL; 2168 cma_exch(conn_id, RDMA_CM_DESTROYING); 2169 mutex_unlock(&conn_id->handler_mutex); 2170 cma_deref_id(conn_id); 2171 rdma_destroy_id(&conn_id->id); 2172 goto out; 2173 } 2174 2175 mutex_unlock(&conn_id->handler_mutex); 2176 cma_deref_id(conn_id); 2177 2178 out: 2179 mutex_unlock(&listen_id->handler_mutex); 2180 return ret; 2181 } 2182 2183 static int cma_ib_listen(struct rdma_id_private *id_priv) 2184 { 2185 struct sockaddr *addr; 2186 struct ib_cm_id *id; 2187 __be64 svc_id; 2188 2189 addr = cma_src_addr(id_priv); 2190 svc_id = rdma_get_service_id(&id_priv->id, addr); 2191 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2192 if (IS_ERR(id)) 2193 return PTR_ERR(id); 2194 id_priv->cm_id.ib = id; 2195 2196 return 0; 2197 } 2198 2199 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2200 { 2201 int ret; 2202 struct iw_cm_id *id; 2203 2204 id = iw_create_cm_id(id_priv->id.device, 2205 iw_conn_req_handler, 2206 id_priv); 2207 if (IS_ERR(id)) 2208 return PTR_ERR(id); 2209 2210 id->tos = id_priv->tos; 2211 id_priv->cm_id.iw = id; 2212 2213 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2214 rdma_addr_size(cma_src_addr(id_priv))); 2215 2216 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2217 2218 if (ret) { 2219 iw_destroy_cm_id(id_priv->cm_id.iw); 2220 id_priv->cm_id.iw = NULL; 2221 } 2222 2223 return ret; 2224 } 2225 2226 static int cma_listen_handler(struct rdma_cm_id *id, 2227 struct rdma_cm_event *event) 2228 { 2229 struct rdma_id_private *id_priv = id->context; 2230 2231 id->context = id_priv->id.context; 2232 id->event_handler = id_priv->id.event_handler; 2233 return id_priv->id.event_handler(id, event); 2234 } 2235 2236 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2237 struct cma_device *cma_dev) 2238 { 2239 struct rdma_id_private *dev_id_priv; 2240 struct rdma_cm_id *id; 2241 struct net *net = id_priv->id.route.addr.dev_addr.net; 2242 int ret; 2243 2244 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2245 return; 2246 2247 id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2248 id_priv->id.qp_type, id_priv->res.kern_name); 2249 if (IS_ERR(id)) 2250 return; 2251 2252 dev_id_priv = container_of(id, struct rdma_id_private, id); 2253 2254 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2255 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2256 rdma_addr_size(cma_src_addr(id_priv))); 2257 2258 _cma_attach_to_dev(dev_id_priv, cma_dev); 2259 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2260 atomic_inc(&id_priv->refcount); 2261 dev_id_priv->internal_id = 1; 2262 dev_id_priv->afonly = id_priv->afonly; 2263 2264 ret = rdma_listen(id, id_priv->backlog); 2265 if (ret) 2266 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2267 ret, cma_dev->device->name); 2268 } 2269 2270 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2271 { 2272 struct cma_device *cma_dev; 2273 2274 mutex_lock(&lock); 2275 list_add_tail(&id_priv->list, &listen_any_list); 2276 list_for_each_entry(cma_dev, &dev_list, list) 2277 cma_listen_on_dev(id_priv, cma_dev); 2278 mutex_unlock(&lock); 2279 } 2280 2281 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2282 { 2283 struct rdma_id_private *id_priv; 2284 2285 id_priv = container_of(id, struct rdma_id_private, id); 2286 id_priv->tos = (u8) tos; 2287 id_priv->tos_set = true; 2288 } 2289 EXPORT_SYMBOL(rdma_set_service_type); 2290 2291 static void cma_query_handler(int status, struct sa_path_rec *path_rec, 2292 void *context) 2293 { 2294 struct cma_work *work = context; 2295 struct rdma_route *route; 2296 2297 route = &work->id->id.route; 2298 2299 if (!status) { 2300 route->num_paths = 1; 2301 *route->path_rec = *path_rec; 2302 } else { 2303 work->old_state = RDMA_CM_ROUTE_QUERY; 2304 work->new_state = RDMA_CM_ADDR_RESOLVED; 2305 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2306 work->event.status = status; 2307 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", 2308 status); 2309 } 2310 2311 queue_work(cma_wq, &work->work); 2312 } 2313 2314 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2315 struct cma_work *work) 2316 { 2317 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2318 struct sa_path_rec path_rec; 2319 ib_sa_comp_mask comp_mask; 2320 struct sockaddr_in6 *sin6; 2321 struct sockaddr_ib *sib; 2322 2323 memset(&path_rec, 0, sizeof path_rec); 2324 2325 if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num)) 2326 path_rec.rec_type = SA_PATH_REC_TYPE_OPA; 2327 else 2328 path_rec.rec_type = SA_PATH_REC_TYPE_IB; 2329 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2330 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2331 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2332 path_rec.numb_path = 1; 2333 path_rec.reversible = 1; 2334 path_rec.service_id = rdma_get_service_id(&id_priv->id, 2335 cma_dst_addr(id_priv)); 2336 2337 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2338 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2339 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2340 2341 switch (cma_family(id_priv)) { 2342 case AF_INET: 2343 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2344 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2345 break; 2346 case AF_INET6: 2347 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2348 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2349 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2350 break; 2351 case AF_IB: 2352 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2353 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2354 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2355 break; 2356 } 2357 2358 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2359 id_priv->id.port_num, &path_rec, 2360 comp_mask, timeout_ms, 2361 GFP_KERNEL, cma_query_handler, 2362 work, &id_priv->query); 2363 2364 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2365 } 2366 2367 static void cma_work_handler(struct work_struct *_work) 2368 { 2369 struct cma_work *work = container_of(_work, struct cma_work, work); 2370 struct rdma_id_private *id_priv = work->id; 2371 int destroy = 0; 2372 2373 mutex_lock(&id_priv->handler_mutex); 2374 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2375 goto out; 2376 2377 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2378 cma_exch(id_priv, RDMA_CM_DESTROYING); 2379 destroy = 1; 2380 } 2381 out: 2382 mutex_unlock(&id_priv->handler_mutex); 2383 cma_deref_id(id_priv); 2384 if (destroy) 2385 rdma_destroy_id(&id_priv->id); 2386 kfree(work); 2387 } 2388 2389 static void cma_ndev_work_handler(struct work_struct *_work) 2390 { 2391 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 2392 struct rdma_id_private *id_priv = work->id; 2393 int destroy = 0; 2394 2395 mutex_lock(&id_priv->handler_mutex); 2396 if (id_priv->state == RDMA_CM_DESTROYING || 2397 id_priv->state == RDMA_CM_DEVICE_REMOVAL) 2398 goto out; 2399 2400 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2401 cma_exch(id_priv, RDMA_CM_DESTROYING); 2402 destroy = 1; 2403 } 2404 2405 out: 2406 mutex_unlock(&id_priv->handler_mutex); 2407 cma_deref_id(id_priv); 2408 if (destroy) 2409 rdma_destroy_id(&id_priv->id); 2410 kfree(work); 2411 } 2412 2413 static void cma_init_resolve_route_work(struct cma_work *work, 2414 struct rdma_id_private *id_priv) 2415 { 2416 work->id = id_priv; 2417 INIT_WORK(&work->work, cma_work_handler); 2418 work->old_state = RDMA_CM_ROUTE_QUERY; 2419 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2420 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2421 } 2422 2423 static void cma_init_resolve_addr_work(struct cma_work *work, 2424 struct rdma_id_private *id_priv) 2425 { 2426 work->id = id_priv; 2427 INIT_WORK(&work->work, cma_work_handler); 2428 work->old_state = RDMA_CM_ADDR_QUERY; 2429 work->new_state = RDMA_CM_ADDR_RESOLVED; 2430 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2431 } 2432 2433 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2434 { 2435 struct rdma_route *route = &id_priv->id.route; 2436 struct cma_work *work; 2437 int ret; 2438 2439 work = kzalloc(sizeof *work, GFP_KERNEL); 2440 if (!work) 2441 return -ENOMEM; 2442 2443 cma_init_resolve_route_work(work, id_priv); 2444 2445 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2446 if (!route->path_rec) { 2447 ret = -ENOMEM; 2448 goto err1; 2449 } 2450 2451 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2452 if (ret) 2453 goto err2; 2454 2455 return 0; 2456 err2: 2457 kfree(route->path_rec); 2458 route->path_rec = NULL; 2459 err1: 2460 kfree(work); 2461 return ret; 2462 } 2463 2464 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2465 unsigned long supported_gids, 2466 enum ib_gid_type default_gid) 2467 { 2468 if ((network_type == RDMA_NETWORK_IPV4 || 2469 network_type == RDMA_NETWORK_IPV6) && 2470 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2471 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2472 2473 return default_gid; 2474 } 2475 2476 /* 2477 * cma_iboe_set_path_rec_l2_fields() is helper function which sets 2478 * path record type based on GID type. 2479 * It also sets up other L2 fields which includes destination mac address 2480 * netdev ifindex, of the path record. 2481 * It returns the netdev of the bound interface for this path record entry. 2482 */ 2483 static struct net_device * 2484 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) 2485 { 2486 struct rdma_route *route = &id_priv->id.route; 2487 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; 2488 struct rdma_addr *addr = &route->addr; 2489 unsigned long supported_gids; 2490 struct net_device *ndev; 2491 2492 if (!addr->dev_addr.bound_dev_if) 2493 return NULL; 2494 2495 ndev = dev_get_by_index(addr->dev_addr.net, 2496 addr->dev_addr.bound_dev_if); 2497 if (!ndev) 2498 return NULL; 2499 2500 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2501 id_priv->id.port_num); 2502 gid_type = cma_route_gid_type(addr->dev_addr.network, 2503 supported_gids, 2504 id_priv->gid_type); 2505 /* Use the hint from IP Stack to select GID Type */ 2506 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2507 gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2508 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); 2509 2510 route->path_rec->roce.route_resolved = true; 2511 sa_path_set_ndev(route->path_rec, addr->dev_addr.net); 2512 sa_path_set_ifindex(route->path_rec, ndev->ifindex); 2513 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); 2514 return ndev; 2515 } 2516 2517 int rdma_set_ib_path(struct rdma_cm_id *id, 2518 struct sa_path_rec *path_rec) 2519 { 2520 struct rdma_id_private *id_priv; 2521 struct net_device *ndev; 2522 int ret; 2523 2524 id_priv = container_of(id, struct rdma_id_private, id); 2525 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2526 RDMA_CM_ROUTE_RESOLVED)) 2527 return -EINVAL; 2528 2529 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec), 2530 GFP_KERNEL); 2531 if (!id->route.path_rec) { 2532 ret = -ENOMEM; 2533 goto err; 2534 } 2535 2536 if (rdma_protocol_roce(id->device, id->port_num)) { 2537 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2538 if (!ndev) { 2539 ret = -ENODEV; 2540 goto err_free; 2541 } 2542 dev_put(ndev); 2543 } 2544 2545 id->route.num_paths = 1; 2546 return 0; 2547 2548 err_free: 2549 kfree(id->route.path_rec); 2550 id->route.path_rec = NULL; 2551 err: 2552 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2553 return ret; 2554 } 2555 EXPORT_SYMBOL(rdma_set_ib_path); 2556 2557 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2558 { 2559 struct cma_work *work; 2560 2561 work = kzalloc(sizeof *work, GFP_KERNEL); 2562 if (!work) 2563 return -ENOMEM; 2564 2565 cma_init_resolve_route_work(work, id_priv); 2566 queue_work(cma_wq, &work->work); 2567 return 0; 2568 } 2569 2570 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2571 { 2572 int prio; 2573 struct net_device *dev; 2574 2575 prio = rt_tos2priority(tos); 2576 dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; 2577 if (dev->num_tc) 2578 return netdev_get_prio_tc_map(dev, prio); 2579 2580 #if IS_ENABLED(CONFIG_VLAN_8021Q) 2581 if (is_vlan_dev(ndev)) 2582 return (vlan_dev_get_egress_qos_mask(ndev, prio) & 2583 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 2584 #endif 2585 return 0; 2586 } 2587 2588 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2589 { 2590 struct rdma_route *route = &id_priv->id.route; 2591 struct rdma_addr *addr = &route->addr; 2592 struct cma_work *work; 2593 int ret; 2594 struct net_device *ndev; 2595 2596 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 2597 rdma_start_port(id_priv->cma_dev->device)]; 2598 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 2599 2600 2601 work = kzalloc(sizeof *work, GFP_KERNEL); 2602 if (!work) 2603 return -ENOMEM; 2604 2605 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2606 if (!route->path_rec) { 2607 ret = -ENOMEM; 2608 goto err1; 2609 } 2610 2611 route->num_paths = 1; 2612 2613 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2614 if (!ndev) { 2615 ret = -ENODEV; 2616 goto err2; 2617 } 2618 2619 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2620 &route->path_rec->sgid); 2621 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2622 &route->path_rec->dgid); 2623 2624 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2625 /* TODO: get the hoplimit from the inet/inet6 device */ 2626 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2627 else 2628 route->path_rec->hop_limit = 1; 2629 route->path_rec->reversible = 1; 2630 route->path_rec->pkey = cpu_to_be16(0xffff); 2631 route->path_rec->mtu_selector = IB_SA_EQ; 2632 route->path_rec->sl = iboe_tos_to_sl(ndev, tos); 2633 route->path_rec->traffic_class = tos; 2634 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 2635 route->path_rec->rate_selector = IB_SA_EQ; 2636 route->path_rec->rate = iboe_get_rate(ndev); 2637 dev_put(ndev); 2638 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2639 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2640 if (!route->path_rec->mtu) { 2641 ret = -EINVAL; 2642 goto err2; 2643 } 2644 2645 cma_init_resolve_route_work(work, id_priv); 2646 queue_work(cma_wq, &work->work); 2647 2648 return 0; 2649 2650 err2: 2651 kfree(route->path_rec); 2652 route->path_rec = NULL; 2653 err1: 2654 kfree(work); 2655 return ret; 2656 } 2657 2658 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2659 { 2660 struct rdma_id_private *id_priv; 2661 int ret; 2662 2663 id_priv = container_of(id, struct rdma_id_private, id); 2664 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2665 return -EINVAL; 2666 2667 atomic_inc(&id_priv->refcount); 2668 if (rdma_cap_ib_sa(id->device, id->port_num)) 2669 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2670 else if (rdma_protocol_roce(id->device, id->port_num)) 2671 ret = cma_resolve_iboe_route(id_priv); 2672 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2673 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2674 else 2675 ret = -ENOSYS; 2676 2677 if (ret) 2678 goto err; 2679 2680 return 0; 2681 err: 2682 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2683 cma_deref_id(id_priv); 2684 return ret; 2685 } 2686 EXPORT_SYMBOL(rdma_resolve_route); 2687 2688 static void cma_set_loopback(struct sockaddr *addr) 2689 { 2690 switch (addr->sa_family) { 2691 case AF_INET: 2692 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2693 break; 2694 case AF_INET6: 2695 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2696 0, 0, 0, htonl(1)); 2697 break; 2698 default: 2699 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2700 0, 0, 0, htonl(1)); 2701 break; 2702 } 2703 } 2704 2705 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2706 { 2707 struct cma_device *cma_dev, *cur_dev; 2708 union ib_gid gid; 2709 enum ib_port_state port_state; 2710 u16 pkey; 2711 int ret; 2712 u8 p; 2713 2714 cma_dev = NULL; 2715 mutex_lock(&lock); 2716 list_for_each_entry(cur_dev, &dev_list, list) { 2717 if (cma_family(id_priv) == AF_IB && 2718 !rdma_cap_ib_cm(cur_dev->device, 1)) 2719 continue; 2720 2721 if (!cma_dev) 2722 cma_dev = cur_dev; 2723 2724 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2725 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && 2726 port_state == IB_PORT_ACTIVE) { 2727 cma_dev = cur_dev; 2728 goto port_found; 2729 } 2730 } 2731 } 2732 2733 if (!cma_dev) { 2734 ret = -ENODEV; 2735 goto out; 2736 } 2737 2738 p = 1; 2739 2740 port_found: 2741 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2742 if (ret) 2743 goto out; 2744 2745 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2746 if (ret) 2747 goto out; 2748 2749 id_priv->id.route.addr.dev_addr.dev_type = 2750 (rdma_protocol_ib(cma_dev->device, p)) ? 2751 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2752 2753 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2754 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2755 id_priv->id.port_num = p; 2756 cma_attach_to_dev(id_priv, cma_dev); 2757 cma_set_loopback(cma_src_addr(id_priv)); 2758 out: 2759 mutex_unlock(&lock); 2760 return ret; 2761 } 2762 2763 static void addr_handler(int status, struct sockaddr *src_addr, 2764 struct rdma_dev_addr *dev_addr, void *context) 2765 { 2766 struct rdma_id_private *id_priv = context; 2767 struct rdma_cm_event event; 2768 2769 memset(&event, 0, sizeof event); 2770 mutex_lock(&id_priv->handler_mutex); 2771 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2772 RDMA_CM_ADDR_RESOLVED)) 2773 goto out; 2774 2775 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2776 if (!status && !id_priv->cma_dev) { 2777 status = cma_acquire_dev(id_priv, NULL); 2778 if (status) 2779 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2780 status); 2781 } else { 2782 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); 2783 } 2784 2785 if (status) { 2786 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2787 RDMA_CM_ADDR_BOUND)) 2788 goto out; 2789 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2790 event.status = status; 2791 } else 2792 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2793 2794 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2795 cma_exch(id_priv, RDMA_CM_DESTROYING); 2796 mutex_unlock(&id_priv->handler_mutex); 2797 cma_deref_id(id_priv); 2798 rdma_destroy_id(&id_priv->id); 2799 return; 2800 } 2801 out: 2802 mutex_unlock(&id_priv->handler_mutex); 2803 cma_deref_id(id_priv); 2804 } 2805 2806 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2807 { 2808 struct cma_work *work; 2809 union ib_gid gid; 2810 int ret; 2811 2812 work = kzalloc(sizeof *work, GFP_KERNEL); 2813 if (!work) 2814 return -ENOMEM; 2815 2816 if (!id_priv->cma_dev) { 2817 ret = cma_bind_loopback(id_priv); 2818 if (ret) 2819 goto err; 2820 } 2821 2822 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2823 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2824 2825 cma_init_resolve_addr_work(work, id_priv); 2826 queue_work(cma_wq, &work->work); 2827 return 0; 2828 err: 2829 kfree(work); 2830 return ret; 2831 } 2832 2833 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2834 { 2835 struct cma_work *work; 2836 int ret; 2837 2838 work = kzalloc(sizeof *work, GFP_KERNEL); 2839 if (!work) 2840 return -ENOMEM; 2841 2842 if (!id_priv->cma_dev) { 2843 ret = cma_resolve_ib_dev(id_priv); 2844 if (ret) 2845 goto err; 2846 } 2847 2848 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2849 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2850 2851 cma_init_resolve_addr_work(work, id_priv); 2852 queue_work(cma_wq, &work->work); 2853 return 0; 2854 err: 2855 kfree(work); 2856 return ret; 2857 } 2858 2859 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2860 struct sockaddr *dst_addr) 2861 { 2862 if (!src_addr || !src_addr->sa_family) { 2863 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2864 src_addr->sa_family = dst_addr->sa_family; 2865 if (IS_ENABLED(CONFIG_IPV6) && 2866 dst_addr->sa_family == AF_INET6) { 2867 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2868 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2869 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2870 if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 2871 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2872 } else if (dst_addr->sa_family == AF_IB) { 2873 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2874 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2875 } 2876 } 2877 return rdma_bind_addr(id, src_addr); 2878 } 2879 2880 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2881 struct sockaddr *dst_addr, int timeout_ms) 2882 { 2883 struct rdma_id_private *id_priv; 2884 int ret; 2885 2886 id_priv = container_of(id, struct rdma_id_private, id); 2887 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2888 if (id_priv->state == RDMA_CM_IDLE) { 2889 ret = cma_bind_addr(id, src_addr, dst_addr); 2890 if (ret) { 2891 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2892 return ret; 2893 } 2894 } 2895 2896 if (cma_family(id_priv) != dst_addr->sa_family) { 2897 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2898 return -EINVAL; 2899 } 2900 2901 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 2902 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2903 return -EINVAL; 2904 } 2905 2906 atomic_inc(&id_priv->refcount); 2907 if (cma_any_addr(dst_addr)) { 2908 ret = cma_resolve_loopback(id_priv); 2909 } else { 2910 if (dst_addr->sa_family == AF_IB) { 2911 ret = cma_resolve_ib_addr(id_priv); 2912 } else { 2913 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2914 dst_addr, &id->route.addr.dev_addr, 2915 timeout_ms, addr_handler, id_priv); 2916 } 2917 } 2918 if (ret) 2919 goto err; 2920 2921 return 0; 2922 err: 2923 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2924 cma_deref_id(id_priv); 2925 return ret; 2926 } 2927 EXPORT_SYMBOL(rdma_resolve_addr); 2928 2929 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2930 { 2931 struct rdma_id_private *id_priv; 2932 unsigned long flags; 2933 int ret; 2934 2935 id_priv = container_of(id, struct rdma_id_private, id); 2936 spin_lock_irqsave(&id_priv->lock, flags); 2937 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2938 id_priv->reuseaddr = reuse; 2939 ret = 0; 2940 } else { 2941 ret = -EINVAL; 2942 } 2943 spin_unlock_irqrestore(&id_priv->lock, flags); 2944 return ret; 2945 } 2946 EXPORT_SYMBOL(rdma_set_reuseaddr); 2947 2948 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2949 { 2950 struct rdma_id_private *id_priv; 2951 unsigned long flags; 2952 int ret; 2953 2954 id_priv = container_of(id, struct rdma_id_private, id); 2955 spin_lock_irqsave(&id_priv->lock, flags); 2956 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2957 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2958 id_priv->afonly = afonly; 2959 ret = 0; 2960 } else { 2961 ret = -EINVAL; 2962 } 2963 spin_unlock_irqrestore(&id_priv->lock, flags); 2964 return ret; 2965 } 2966 EXPORT_SYMBOL(rdma_set_afonly); 2967 2968 static void cma_bind_port(struct rdma_bind_list *bind_list, 2969 struct rdma_id_private *id_priv) 2970 { 2971 struct sockaddr *addr; 2972 struct sockaddr_ib *sib; 2973 u64 sid, mask; 2974 __be16 port; 2975 2976 addr = cma_src_addr(id_priv); 2977 port = htons(bind_list->port); 2978 2979 switch (addr->sa_family) { 2980 case AF_INET: 2981 ((struct sockaddr_in *) addr)->sin_port = port; 2982 break; 2983 case AF_INET6: 2984 ((struct sockaddr_in6 *) addr)->sin6_port = port; 2985 break; 2986 case AF_IB: 2987 sib = (struct sockaddr_ib *) addr; 2988 sid = be64_to_cpu(sib->sib_sid); 2989 mask = be64_to_cpu(sib->sib_sid_mask); 2990 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 2991 sib->sib_sid_mask = cpu_to_be64(~0ULL); 2992 break; 2993 } 2994 id_priv->bind_list = bind_list; 2995 hlist_add_head(&id_priv->node, &bind_list->owners); 2996 } 2997 2998 static int cma_alloc_port(enum rdma_ucm_port_space ps, 2999 struct rdma_id_private *id_priv, unsigned short snum) 3000 { 3001 struct rdma_bind_list *bind_list; 3002 int ret; 3003 3004 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3005 if (!bind_list) 3006 return -ENOMEM; 3007 3008 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3009 snum); 3010 if (ret < 0) 3011 goto err; 3012 3013 bind_list->ps = ps; 3014 bind_list->port = (unsigned short)ret; 3015 cma_bind_port(bind_list, id_priv); 3016 return 0; 3017 err: 3018 kfree(bind_list); 3019 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3020 } 3021 3022 static int cma_port_is_unique(struct rdma_bind_list *bind_list, 3023 struct rdma_id_private *id_priv) 3024 { 3025 struct rdma_id_private *cur_id; 3026 struct sockaddr *daddr = cma_dst_addr(id_priv); 3027 struct sockaddr *saddr = cma_src_addr(id_priv); 3028 __be16 dport = cma_port(daddr); 3029 3030 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3031 struct sockaddr *cur_daddr = cma_dst_addr(cur_id); 3032 struct sockaddr *cur_saddr = cma_src_addr(cur_id); 3033 __be16 cur_dport = cma_port(cur_daddr); 3034 3035 if (id_priv == cur_id) 3036 continue; 3037 3038 /* different dest port -> unique */ 3039 if (!cma_any_port(daddr) && 3040 !cma_any_port(cur_daddr) && 3041 (dport != cur_dport)) 3042 continue; 3043 3044 /* different src address -> unique */ 3045 if (!cma_any_addr(saddr) && 3046 !cma_any_addr(cur_saddr) && 3047 cma_addr_cmp(saddr, cur_saddr)) 3048 continue; 3049 3050 /* different dst address -> unique */ 3051 if (!cma_any_addr(daddr) && 3052 !cma_any_addr(cur_daddr) && 3053 cma_addr_cmp(daddr, cur_daddr)) 3054 continue; 3055 3056 return -EADDRNOTAVAIL; 3057 } 3058 return 0; 3059 } 3060 3061 static int cma_alloc_any_port(enum rdma_ucm_port_space ps, 3062 struct rdma_id_private *id_priv) 3063 { 3064 static unsigned int last_used_port; 3065 int low, high, remaining; 3066 unsigned int rover; 3067 struct net *net = id_priv->id.route.addr.dev_addr.net; 3068 3069 inet_get_local_port_range(net, &low, &high); 3070 remaining = (high - low) + 1; 3071 rover = prandom_u32() % remaining + low; 3072 retry: 3073 if (last_used_port != rover) { 3074 struct rdma_bind_list *bind_list; 3075 int ret; 3076 3077 bind_list = cma_ps_find(net, ps, (unsigned short)rover); 3078 3079 if (!bind_list) { 3080 ret = cma_alloc_port(ps, id_priv, rover); 3081 } else { 3082 ret = cma_port_is_unique(bind_list, id_priv); 3083 if (!ret) 3084 cma_bind_port(bind_list, id_priv); 3085 } 3086 /* 3087 * Remember previously used port number in order to avoid 3088 * re-using same port immediately after it is closed. 3089 */ 3090 if (!ret) 3091 last_used_port = rover; 3092 if (ret != -EADDRNOTAVAIL) 3093 return ret; 3094 } 3095 if (--remaining) { 3096 rover++; 3097 if ((rover < low) || (rover > high)) 3098 rover = low; 3099 goto retry; 3100 } 3101 return -EADDRNOTAVAIL; 3102 } 3103 3104 /* 3105 * Check that the requested port is available. This is called when trying to 3106 * bind to a specific port, or when trying to listen on a bound port. In 3107 * the latter case, the provided id_priv may already be on the bind_list, but 3108 * we still need to check that it's okay to start listening. 3109 */ 3110 static int cma_check_port(struct rdma_bind_list *bind_list, 3111 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3112 { 3113 struct rdma_id_private *cur_id; 3114 struct sockaddr *addr, *cur_addr; 3115 3116 addr = cma_src_addr(id_priv); 3117 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3118 if (id_priv == cur_id) 3119 continue; 3120 3121 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3122 cur_id->reuseaddr) 3123 continue; 3124 3125 cur_addr = cma_src_addr(cur_id); 3126 if (id_priv->afonly && cur_id->afonly && 3127 (addr->sa_family != cur_addr->sa_family)) 3128 continue; 3129 3130 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3131 return -EADDRNOTAVAIL; 3132 3133 if (!cma_addr_cmp(addr, cur_addr)) 3134 return -EADDRINUSE; 3135 } 3136 return 0; 3137 } 3138 3139 static int cma_use_port(enum rdma_ucm_port_space ps, 3140 struct rdma_id_private *id_priv) 3141 { 3142 struct rdma_bind_list *bind_list; 3143 unsigned short snum; 3144 int ret; 3145 3146 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3147 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 3148 return -EACCES; 3149 3150 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3151 if (!bind_list) { 3152 ret = cma_alloc_port(ps, id_priv, snum); 3153 } else { 3154 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3155 if (!ret) 3156 cma_bind_port(bind_list, id_priv); 3157 } 3158 return ret; 3159 } 3160 3161 static int cma_bind_listen(struct rdma_id_private *id_priv) 3162 { 3163 struct rdma_bind_list *bind_list = id_priv->bind_list; 3164 int ret = 0; 3165 3166 mutex_lock(&lock); 3167 if (bind_list->owners.first->next) 3168 ret = cma_check_port(bind_list, id_priv, 0); 3169 mutex_unlock(&lock); 3170 return ret; 3171 } 3172 3173 static enum rdma_ucm_port_space 3174 cma_select_inet_ps(struct rdma_id_private *id_priv) 3175 { 3176 switch (id_priv->id.ps) { 3177 case RDMA_PS_TCP: 3178 case RDMA_PS_UDP: 3179 case RDMA_PS_IPOIB: 3180 case RDMA_PS_IB: 3181 return id_priv->id.ps; 3182 default: 3183 3184 return 0; 3185 } 3186 } 3187 3188 static enum rdma_ucm_port_space 3189 cma_select_ib_ps(struct rdma_id_private *id_priv) 3190 { 3191 enum rdma_ucm_port_space ps = 0; 3192 struct sockaddr_ib *sib; 3193 u64 sid_ps, mask, sid; 3194 3195 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3196 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3197 sid = be64_to_cpu(sib->sib_sid) & mask; 3198 3199 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3200 sid_ps = RDMA_IB_IP_PS_IB; 3201 ps = RDMA_PS_IB; 3202 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3203 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3204 sid_ps = RDMA_IB_IP_PS_TCP; 3205 ps = RDMA_PS_TCP; 3206 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3207 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3208 sid_ps = RDMA_IB_IP_PS_UDP; 3209 ps = RDMA_PS_UDP; 3210 } 3211 3212 if (ps) { 3213 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3214 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3215 be64_to_cpu(sib->sib_sid_mask)); 3216 } 3217 return ps; 3218 } 3219 3220 static int cma_get_port(struct rdma_id_private *id_priv) 3221 { 3222 enum rdma_ucm_port_space ps; 3223 int ret; 3224 3225 if (cma_family(id_priv) != AF_IB) 3226 ps = cma_select_inet_ps(id_priv); 3227 else 3228 ps = cma_select_ib_ps(id_priv); 3229 if (!ps) 3230 return -EPROTONOSUPPORT; 3231 3232 mutex_lock(&lock); 3233 if (cma_any_port(cma_src_addr(id_priv))) 3234 ret = cma_alloc_any_port(ps, id_priv); 3235 else 3236 ret = cma_use_port(ps, id_priv); 3237 mutex_unlock(&lock); 3238 3239 return ret; 3240 } 3241 3242 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3243 struct sockaddr *addr) 3244 { 3245 #if IS_ENABLED(CONFIG_IPV6) 3246 struct sockaddr_in6 *sin6; 3247 3248 if (addr->sa_family != AF_INET6) 3249 return 0; 3250 3251 sin6 = (struct sockaddr_in6 *) addr; 3252 3253 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 3254 return 0; 3255 3256 if (!sin6->sin6_scope_id) 3257 return -EINVAL; 3258 3259 dev_addr->bound_dev_if = sin6->sin6_scope_id; 3260 #endif 3261 return 0; 3262 } 3263 3264 int rdma_listen(struct rdma_cm_id *id, int backlog) 3265 { 3266 struct rdma_id_private *id_priv; 3267 int ret; 3268 3269 id_priv = container_of(id, struct rdma_id_private, id); 3270 if (id_priv->state == RDMA_CM_IDLE) { 3271 id->route.addr.src_addr.ss_family = AF_INET; 3272 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3273 if (ret) 3274 return ret; 3275 } 3276 3277 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3278 return -EINVAL; 3279 3280 if (id_priv->reuseaddr) { 3281 ret = cma_bind_listen(id_priv); 3282 if (ret) 3283 goto err; 3284 } 3285 3286 id_priv->backlog = backlog; 3287 if (id->device) { 3288 if (rdma_cap_ib_cm(id->device, 1)) { 3289 ret = cma_ib_listen(id_priv); 3290 if (ret) 3291 goto err; 3292 } else if (rdma_cap_iw_cm(id->device, 1)) { 3293 ret = cma_iw_listen(id_priv, backlog); 3294 if (ret) 3295 goto err; 3296 } else { 3297 ret = -ENOSYS; 3298 goto err; 3299 } 3300 } else 3301 cma_listen_on_all(id_priv); 3302 3303 return 0; 3304 err: 3305 id_priv->backlog = 0; 3306 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3307 return ret; 3308 } 3309 EXPORT_SYMBOL(rdma_listen); 3310 3311 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3312 { 3313 struct rdma_id_private *id_priv; 3314 int ret; 3315 struct sockaddr *daddr; 3316 3317 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3318 addr->sa_family != AF_IB) 3319 return -EAFNOSUPPORT; 3320 3321 id_priv = container_of(id, struct rdma_id_private, id); 3322 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3323 return -EINVAL; 3324 3325 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3326 if (ret) 3327 goto err1; 3328 3329 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3330 if (!cma_any_addr(addr)) { 3331 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3332 if (ret) 3333 goto err1; 3334 3335 ret = cma_acquire_dev(id_priv, NULL); 3336 if (ret) 3337 goto err1; 3338 } 3339 3340 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3341 if (addr->sa_family == AF_INET) 3342 id_priv->afonly = 1; 3343 #if IS_ENABLED(CONFIG_IPV6) 3344 else if (addr->sa_family == AF_INET6) { 3345 struct net *net = id_priv->id.route.addr.dev_addr.net; 3346 3347 id_priv->afonly = net->ipv6.sysctl.bindv6only; 3348 } 3349 #endif 3350 } 3351 daddr = cma_dst_addr(id_priv); 3352 daddr->sa_family = addr->sa_family; 3353 3354 ret = cma_get_port(id_priv); 3355 if (ret) 3356 goto err2; 3357 3358 return 0; 3359 err2: 3360 if (id_priv->cma_dev) { 3361 rdma_restrack_del(&id_priv->res); 3362 cma_release_dev(id_priv); 3363 } 3364 err1: 3365 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3366 return ret; 3367 } 3368 EXPORT_SYMBOL(rdma_bind_addr); 3369 3370 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3371 { 3372 struct cma_hdr *cma_hdr; 3373 3374 cma_hdr = hdr; 3375 cma_hdr->cma_version = CMA_VERSION; 3376 if (cma_family(id_priv) == AF_INET) { 3377 struct sockaddr_in *src4, *dst4; 3378 3379 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3380 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3381 3382 cma_set_ip_ver(cma_hdr, 4); 3383 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3384 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3385 cma_hdr->port = src4->sin_port; 3386 } else if (cma_family(id_priv) == AF_INET6) { 3387 struct sockaddr_in6 *src6, *dst6; 3388 3389 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3390 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3391 3392 cma_set_ip_ver(cma_hdr, 6); 3393 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3394 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3395 cma_hdr->port = src6->sin6_port; 3396 } 3397 return 0; 3398 } 3399 3400 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3401 struct ib_cm_event *ib_event) 3402 { 3403 struct rdma_id_private *id_priv = cm_id->context; 3404 struct rdma_cm_event event; 3405 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3406 int ret = 0; 3407 3408 mutex_lock(&id_priv->handler_mutex); 3409 if (id_priv->state != RDMA_CM_CONNECT) 3410 goto out; 3411 3412 memset(&event, 0, sizeof event); 3413 switch (ib_event->event) { 3414 case IB_CM_SIDR_REQ_ERROR: 3415 event.event = RDMA_CM_EVENT_UNREACHABLE; 3416 event.status = -ETIMEDOUT; 3417 break; 3418 case IB_CM_SIDR_REP_RECEIVED: 3419 event.param.ud.private_data = ib_event->private_data; 3420 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3421 if (rep->status != IB_SIDR_SUCCESS) { 3422 event.event = RDMA_CM_EVENT_UNREACHABLE; 3423 event.status = ib_event->param.sidr_rep_rcvd.status; 3424 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", 3425 event.status); 3426 break; 3427 } 3428 ret = cma_set_qkey(id_priv, rep->qkey); 3429 if (ret) { 3430 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); 3431 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3432 event.status = ret; 3433 break; 3434 } 3435 ib_init_ah_attr_from_path(id_priv->id.device, 3436 id_priv->id.port_num, 3437 id_priv->id.route.path_rec, 3438 &event.param.ud.ah_attr); 3439 event.param.ud.qp_num = rep->qpn; 3440 event.param.ud.qkey = rep->qkey; 3441 event.event = RDMA_CM_EVENT_ESTABLISHED; 3442 event.status = 0; 3443 break; 3444 default: 3445 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3446 ib_event->event); 3447 goto out; 3448 } 3449 3450 ret = id_priv->id.event_handler(&id_priv->id, &event); 3451 if (ret) { 3452 /* Destroy the CM ID by returning a non-zero value. */ 3453 id_priv->cm_id.ib = NULL; 3454 cma_exch(id_priv, RDMA_CM_DESTROYING); 3455 mutex_unlock(&id_priv->handler_mutex); 3456 rdma_destroy_id(&id_priv->id); 3457 return ret; 3458 } 3459 out: 3460 mutex_unlock(&id_priv->handler_mutex); 3461 return ret; 3462 } 3463 3464 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3465 struct rdma_conn_param *conn_param) 3466 { 3467 struct ib_cm_sidr_req_param req; 3468 struct ib_cm_id *id; 3469 void *private_data; 3470 u8 offset; 3471 int ret; 3472 3473 memset(&req, 0, sizeof req); 3474 offset = cma_user_data_offset(id_priv); 3475 req.private_data_len = offset + conn_param->private_data_len; 3476 if (req.private_data_len < conn_param->private_data_len) 3477 return -EINVAL; 3478 3479 if (req.private_data_len) { 3480 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3481 if (!private_data) 3482 return -ENOMEM; 3483 } else { 3484 private_data = NULL; 3485 } 3486 3487 if (conn_param->private_data && conn_param->private_data_len) 3488 memcpy(private_data + offset, conn_param->private_data, 3489 conn_param->private_data_len); 3490 3491 if (private_data) { 3492 ret = cma_format_hdr(private_data, id_priv); 3493 if (ret) 3494 goto out; 3495 req.private_data = private_data; 3496 } 3497 3498 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3499 id_priv); 3500 if (IS_ERR(id)) { 3501 ret = PTR_ERR(id); 3502 goto out; 3503 } 3504 id_priv->cm_id.ib = id; 3505 3506 req.path = id_priv->id.route.path_rec; 3507 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3508 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3509 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3510 3511 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3512 if (ret) { 3513 ib_destroy_cm_id(id_priv->cm_id.ib); 3514 id_priv->cm_id.ib = NULL; 3515 } 3516 out: 3517 kfree(private_data); 3518 return ret; 3519 } 3520 3521 static int cma_connect_ib(struct rdma_id_private *id_priv, 3522 struct rdma_conn_param *conn_param) 3523 { 3524 struct ib_cm_req_param req; 3525 struct rdma_route *route; 3526 void *private_data; 3527 struct ib_cm_id *id; 3528 u8 offset; 3529 int ret; 3530 3531 memset(&req, 0, sizeof req); 3532 offset = cma_user_data_offset(id_priv); 3533 req.private_data_len = offset + conn_param->private_data_len; 3534 if (req.private_data_len < conn_param->private_data_len) 3535 return -EINVAL; 3536 3537 if (req.private_data_len) { 3538 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3539 if (!private_data) 3540 return -ENOMEM; 3541 } else { 3542 private_data = NULL; 3543 } 3544 3545 if (conn_param->private_data && conn_param->private_data_len) 3546 memcpy(private_data + offset, conn_param->private_data, 3547 conn_param->private_data_len); 3548 3549 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3550 if (IS_ERR(id)) { 3551 ret = PTR_ERR(id); 3552 goto out; 3553 } 3554 id_priv->cm_id.ib = id; 3555 3556 route = &id_priv->id.route; 3557 if (private_data) { 3558 ret = cma_format_hdr(private_data, id_priv); 3559 if (ret) 3560 goto out; 3561 req.private_data = private_data; 3562 } 3563 3564 req.primary_path = &route->path_rec[0]; 3565 if (route->num_paths == 2) 3566 req.alternate_path = &route->path_rec[1]; 3567 3568 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3569 req.qp_num = id_priv->qp_num; 3570 req.qp_type = id_priv->id.qp_type; 3571 req.starting_psn = id_priv->seq_num; 3572 req.responder_resources = conn_param->responder_resources; 3573 req.initiator_depth = conn_param->initiator_depth; 3574 req.flow_control = conn_param->flow_control; 3575 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3576 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3577 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3578 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3579 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3580 req.srq = id_priv->srq ? 1 : 0; 3581 3582 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3583 out: 3584 if (ret && !IS_ERR(id)) { 3585 ib_destroy_cm_id(id); 3586 id_priv->cm_id.ib = NULL; 3587 } 3588 3589 kfree(private_data); 3590 return ret; 3591 } 3592 3593 static int cma_connect_iw(struct rdma_id_private *id_priv, 3594 struct rdma_conn_param *conn_param) 3595 { 3596 struct iw_cm_id *cm_id; 3597 int ret; 3598 struct iw_cm_conn_param iw_param; 3599 3600 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3601 if (IS_ERR(cm_id)) 3602 return PTR_ERR(cm_id); 3603 3604 cm_id->tos = id_priv->tos; 3605 id_priv->cm_id.iw = cm_id; 3606 3607 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3608 rdma_addr_size(cma_src_addr(id_priv))); 3609 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3610 rdma_addr_size(cma_dst_addr(id_priv))); 3611 3612 ret = cma_modify_qp_rtr(id_priv, conn_param); 3613 if (ret) 3614 goto out; 3615 3616 if (conn_param) { 3617 iw_param.ord = conn_param->initiator_depth; 3618 iw_param.ird = conn_param->responder_resources; 3619 iw_param.private_data = conn_param->private_data; 3620 iw_param.private_data_len = conn_param->private_data_len; 3621 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3622 } else { 3623 memset(&iw_param, 0, sizeof iw_param); 3624 iw_param.qpn = id_priv->qp_num; 3625 } 3626 ret = iw_cm_connect(cm_id, &iw_param); 3627 out: 3628 if (ret) { 3629 iw_destroy_cm_id(cm_id); 3630 id_priv->cm_id.iw = NULL; 3631 } 3632 return ret; 3633 } 3634 3635 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3636 { 3637 struct rdma_id_private *id_priv; 3638 int ret; 3639 3640 id_priv = container_of(id, struct rdma_id_private, id); 3641 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3642 return -EINVAL; 3643 3644 if (!id->qp) { 3645 id_priv->qp_num = conn_param->qp_num; 3646 id_priv->srq = conn_param->srq; 3647 } 3648 3649 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3650 if (id->qp_type == IB_QPT_UD) 3651 ret = cma_resolve_ib_udp(id_priv, conn_param); 3652 else 3653 ret = cma_connect_ib(id_priv, conn_param); 3654 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3655 ret = cma_connect_iw(id_priv, conn_param); 3656 else 3657 ret = -ENOSYS; 3658 if (ret) 3659 goto err; 3660 3661 return 0; 3662 err: 3663 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3664 return ret; 3665 } 3666 EXPORT_SYMBOL(rdma_connect); 3667 3668 static int cma_accept_ib(struct rdma_id_private *id_priv, 3669 struct rdma_conn_param *conn_param) 3670 { 3671 struct ib_cm_rep_param rep; 3672 int ret; 3673 3674 ret = cma_modify_qp_rtr(id_priv, conn_param); 3675 if (ret) 3676 goto out; 3677 3678 ret = cma_modify_qp_rts(id_priv, conn_param); 3679 if (ret) 3680 goto out; 3681 3682 memset(&rep, 0, sizeof rep); 3683 rep.qp_num = id_priv->qp_num; 3684 rep.starting_psn = id_priv->seq_num; 3685 rep.private_data = conn_param->private_data; 3686 rep.private_data_len = conn_param->private_data_len; 3687 rep.responder_resources = conn_param->responder_resources; 3688 rep.initiator_depth = conn_param->initiator_depth; 3689 rep.failover_accepted = 0; 3690 rep.flow_control = conn_param->flow_control; 3691 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3692 rep.srq = id_priv->srq ? 1 : 0; 3693 3694 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3695 out: 3696 return ret; 3697 } 3698 3699 static int cma_accept_iw(struct rdma_id_private *id_priv, 3700 struct rdma_conn_param *conn_param) 3701 { 3702 struct iw_cm_conn_param iw_param; 3703 int ret; 3704 3705 if (!conn_param) 3706 return -EINVAL; 3707 3708 ret = cma_modify_qp_rtr(id_priv, conn_param); 3709 if (ret) 3710 return ret; 3711 3712 iw_param.ord = conn_param->initiator_depth; 3713 iw_param.ird = conn_param->responder_resources; 3714 iw_param.private_data = conn_param->private_data; 3715 iw_param.private_data_len = conn_param->private_data_len; 3716 if (id_priv->id.qp) { 3717 iw_param.qpn = id_priv->qp_num; 3718 } else 3719 iw_param.qpn = conn_param->qp_num; 3720 3721 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3722 } 3723 3724 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3725 enum ib_cm_sidr_status status, u32 qkey, 3726 const void *private_data, int private_data_len) 3727 { 3728 struct ib_cm_sidr_rep_param rep; 3729 int ret; 3730 3731 memset(&rep, 0, sizeof rep); 3732 rep.status = status; 3733 if (status == IB_SIDR_SUCCESS) { 3734 ret = cma_set_qkey(id_priv, qkey); 3735 if (ret) 3736 return ret; 3737 rep.qp_num = id_priv->qp_num; 3738 rep.qkey = id_priv->qkey; 3739 } 3740 rep.private_data = private_data; 3741 rep.private_data_len = private_data_len; 3742 3743 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3744 } 3745 3746 int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, 3747 const char *caller) 3748 { 3749 struct rdma_id_private *id_priv; 3750 int ret; 3751 3752 id_priv = container_of(id, struct rdma_id_private, id); 3753 3754 if (caller) 3755 id_priv->res.kern_name = caller; 3756 else 3757 rdma_restrack_set_task(&id_priv->res, current); 3758 3759 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3760 return -EINVAL; 3761 3762 if (!id->qp && conn_param) { 3763 id_priv->qp_num = conn_param->qp_num; 3764 id_priv->srq = conn_param->srq; 3765 } 3766 3767 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3768 if (id->qp_type == IB_QPT_UD) { 3769 if (conn_param) 3770 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3771 conn_param->qkey, 3772 conn_param->private_data, 3773 conn_param->private_data_len); 3774 else 3775 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3776 0, NULL, 0); 3777 } else { 3778 if (conn_param) 3779 ret = cma_accept_ib(id_priv, conn_param); 3780 else 3781 ret = cma_rep_recv(id_priv); 3782 } 3783 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3784 ret = cma_accept_iw(id_priv, conn_param); 3785 else 3786 ret = -ENOSYS; 3787 3788 if (ret) 3789 goto reject; 3790 3791 return 0; 3792 reject: 3793 cma_modify_qp_err(id_priv); 3794 rdma_reject(id, NULL, 0); 3795 return ret; 3796 } 3797 EXPORT_SYMBOL(__rdma_accept); 3798 3799 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3800 { 3801 struct rdma_id_private *id_priv; 3802 int ret; 3803 3804 id_priv = container_of(id, struct rdma_id_private, id); 3805 if (!id_priv->cm_id.ib) 3806 return -EINVAL; 3807 3808 switch (id->device->node_type) { 3809 case RDMA_NODE_IB_CA: 3810 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3811 break; 3812 default: 3813 ret = 0; 3814 break; 3815 } 3816 return ret; 3817 } 3818 EXPORT_SYMBOL(rdma_notify); 3819 3820 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3821 u8 private_data_len) 3822 { 3823 struct rdma_id_private *id_priv; 3824 int ret; 3825 3826 id_priv = container_of(id, struct rdma_id_private, id); 3827 if (!id_priv->cm_id.ib) 3828 return -EINVAL; 3829 3830 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3831 if (id->qp_type == IB_QPT_UD) 3832 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3833 private_data, private_data_len); 3834 else 3835 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3836 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3837 0, private_data, private_data_len); 3838 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3839 ret = iw_cm_reject(id_priv->cm_id.iw, 3840 private_data, private_data_len); 3841 } else 3842 ret = -ENOSYS; 3843 3844 return ret; 3845 } 3846 EXPORT_SYMBOL(rdma_reject); 3847 3848 int rdma_disconnect(struct rdma_cm_id *id) 3849 { 3850 struct rdma_id_private *id_priv; 3851 int ret; 3852 3853 id_priv = container_of(id, struct rdma_id_private, id); 3854 if (!id_priv->cm_id.ib) 3855 return -EINVAL; 3856 3857 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3858 ret = cma_modify_qp_err(id_priv); 3859 if (ret) 3860 goto out; 3861 /* Initiate or respond to a disconnect. */ 3862 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3863 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3864 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3865 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3866 } else 3867 ret = -EINVAL; 3868 3869 out: 3870 return ret; 3871 } 3872 EXPORT_SYMBOL(rdma_disconnect); 3873 3874 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3875 { 3876 struct rdma_id_private *id_priv; 3877 struct cma_multicast *mc = multicast->context; 3878 struct rdma_cm_event event; 3879 int ret = 0; 3880 3881 id_priv = mc->id_priv; 3882 mutex_lock(&id_priv->handler_mutex); 3883 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3884 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3885 goto out; 3886 3887 if (!status) 3888 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3889 else 3890 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", 3891 status); 3892 mutex_lock(&id_priv->qp_mutex); 3893 if (!status && id_priv->id.qp) { 3894 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3895 be16_to_cpu(multicast->rec.mlid)); 3896 if (status) 3897 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", 3898 status); 3899 } 3900 mutex_unlock(&id_priv->qp_mutex); 3901 3902 memset(&event, 0, sizeof event); 3903 event.status = status; 3904 event.param.ud.private_data = mc->context; 3905 if (!status) { 3906 struct rdma_dev_addr *dev_addr = 3907 &id_priv->id.route.addr.dev_addr; 3908 struct net_device *ndev = 3909 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3910 enum ib_gid_type gid_type = 3911 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3912 rdma_start_port(id_priv->cma_dev->device)]; 3913 3914 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3915 ret = ib_init_ah_from_mcmember(id_priv->id.device, 3916 id_priv->id.port_num, 3917 &multicast->rec, 3918 ndev, gid_type, 3919 &event.param.ud.ah_attr); 3920 if (ret) 3921 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3922 3923 event.param.ud.qp_num = 0xFFFFFF; 3924 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3925 if (ndev) 3926 dev_put(ndev); 3927 } else 3928 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3929 3930 ret = id_priv->id.event_handler(&id_priv->id, &event); 3931 if (ret) { 3932 cma_exch(id_priv, RDMA_CM_DESTROYING); 3933 mutex_unlock(&id_priv->handler_mutex); 3934 rdma_destroy_id(&id_priv->id); 3935 return 0; 3936 } 3937 3938 out: 3939 mutex_unlock(&id_priv->handler_mutex); 3940 return 0; 3941 } 3942 3943 static void cma_set_mgid(struct rdma_id_private *id_priv, 3944 struct sockaddr *addr, union ib_gid *mgid) 3945 { 3946 unsigned char mc_map[MAX_ADDR_LEN]; 3947 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3948 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3949 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3950 3951 if (cma_any_addr(addr)) { 3952 memset(mgid, 0, sizeof *mgid); 3953 } else if ((addr->sa_family == AF_INET6) && 3954 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3955 0xFF10A01B)) { 3956 /* IPv6 address is an SA assigned MGID. */ 3957 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3958 } else if (addr->sa_family == AF_IB) { 3959 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3960 } else if ((addr->sa_family == AF_INET6)) { 3961 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3962 if (id_priv->id.ps == RDMA_PS_UDP) 3963 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3964 *mgid = *(union ib_gid *) (mc_map + 4); 3965 } else { 3966 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3967 if (id_priv->id.ps == RDMA_PS_UDP) 3968 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3969 *mgid = *(union ib_gid *) (mc_map + 4); 3970 } 3971 } 3972 3973 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3974 struct cma_multicast *mc) 3975 { 3976 struct ib_sa_mcmember_rec rec; 3977 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3978 ib_sa_comp_mask comp_mask; 3979 int ret; 3980 3981 ib_addr_get_mgid(dev_addr, &rec.mgid); 3982 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 3983 &rec.mgid, &rec); 3984 if (ret) 3985 return ret; 3986 3987 ret = cma_set_qkey(id_priv, 0); 3988 if (ret) 3989 return ret; 3990 3991 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 3992 rec.qkey = cpu_to_be32(id_priv->qkey); 3993 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3994 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3995 rec.join_state = mc->join_state; 3996 3997 if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && 3998 (!ib_sa_sendonly_fullmem_support(&sa_client, 3999 id_priv->id.device, 4000 id_priv->id.port_num))) { 4001 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4002 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4003 id_priv->id.device->name, id_priv->id.port_num); 4004 return -EOPNOTSUPP; 4005 } 4006 4007 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4008 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4009 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4010 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4011 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4012 4013 if (id_priv->id.ps == RDMA_PS_IPOIB) 4014 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4015 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4016 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4017 IB_SA_MCMEMBER_REC_MTU | 4018 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4019 4020 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4021 id_priv->id.port_num, &rec, 4022 comp_mask, GFP_KERNEL, 4023 cma_ib_mc_handler, mc); 4024 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4025 } 4026 4027 static void iboe_mcast_work_handler(struct work_struct *work) 4028 { 4029 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4030 struct cma_multicast *mc = mw->mc; 4031 struct ib_sa_multicast *m = mc->multicast.ib; 4032 4033 mc->multicast.ib->context = mc; 4034 cma_ib_mc_handler(0, m); 4035 kref_put(&mc->mcref, release_mc); 4036 kfree(mw); 4037 } 4038 4039 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4040 enum ib_gid_type gid_type) 4041 { 4042 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4043 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4044 4045 if (cma_any_addr(addr)) { 4046 memset(mgid, 0, sizeof *mgid); 4047 } else if (addr->sa_family == AF_INET6) { 4048 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4049 } else { 4050 mgid->raw[0] = 4051 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4052 mgid->raw[1] = 4053 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4054 mgid->raw[2] = 0; 4055 mgid->raw[3] = 0; 4056 mgid->raw[4] = 0; 4057 mgid->raw[5] = 0; 4058 mgid->raw[6] = 0; 4059 mgid->raw[7] = 0; 4060 mgid->raw[8] = 0; 4061 mgid->raw[9] = 0; 4062 mgid->raw[10] = 0xff; 4063 mgid->raw[11] = 0xff; 4064 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4065 } 4066 } 4067 4068 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4069 struct cma_multicast *mc) 4070 { 4071 struct iboe_mcast_work *work; 4072 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4073 int err = 0; 4074 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4075 struct net_device *ndev = NULL; 4076 enum ib_gid_type gid_type; 4077 bool send_only; 4078 4079 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4080 4081 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4082 return -EINVAL; 4083 4084 work = kzalloc(sizeof *work, GFP_KERNEL); 4085 if (!work) 4086 return -ENOMEM; 4087 4088 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4089 if (!mc->multicast.ib) { 4090 err = -ENOMEM; 4091 goto out1; 4092 } 4093 4094 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4095 rdma_start_port(id_priv->cma_dev->device)]; 4096 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4097 4098 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4099 if (id_priv->id.ps == RDMA_PS_UDP) 4100 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4101 4102 if (dev_addr->bound_dev_if) 4103 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4104 if (!ndev) { 4105 err = -ENODEV; 4106 goto out2; 4107 } 4108 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4109 mc->multicast.ib->rec.hop_limit = 1; 4110 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); 4111 4112 if (addr->sa_family == AF_INET) { 4113 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4114 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4115 if (!send_only) { 4116 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4117 true); 4118 if (!err) 4119 mc->igmp_joined = true; 4120 } 4121 } 4122 } else { 4123 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4124 err = -ENOTSUPP; 4125 } 4126 dev_put(ndev); 4127 if (err || !mc->multicast.ib->rec.mtu) { 4128 if (!err) 4129 err = -EINVAL; 4130 goto out2; 4131 } 4132 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4133 &mc->multicast.ib->rec.port_gid); 4134 work->id = id_priv; 4135 work->mc = mc; 4136 INIT_WORK(&work->work, iboe_mcast_work_handler); 4137 kref_get(&mc->mcref); 4138 queue_work(cma_wq, &work->work); 4139 4140 return 0; 4141 4142 out2: 4143 kfree(mc->multicast.ib); 4144 out1: 4145 kfree(work); 4146 return err; 4147 } 4148 4149 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4150 u8 join_state, void *context) 4151 { 4152 struct rdma_id_private *id_priv; 4153 struct cma_multicast *mc; 4154 int ret; 4155 4156 if (!id->device) 4157 return -EINVAL; 4158 4159 id_priv = container_of(id, struct rdma_id_private, id); 4160 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4161 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4162 return -EINVAL; 4163 4164 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4165 if (!mc) 4166 return -ENOMEM; 4167 4168 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4169 mc->context = context; 4170 mc->id_priv = id_priv; 4171 mc->igmp_joined = false; 4172 mc->join_state = join_state; 4173 spin_lock(&id_priv->lock); 4174 list_add(&mc->list, &id_priv->mc_list); 4175 spin_unlock(&id_priv->lock); 4176 4177 if (rdma_protocol_roce(id->device, id->port_num)) { 4178 kref_init(&mc->mcref); 4179 ret = cma_iboe_join_multicast(id_priv, mc); 4180 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4181 ret = cma_join_ib_multicast(id_priv, mc); 4182 else 4183 ret = -ENOSYS; 4184 4185 if (ret) { 4186 spin_lock_irq(&id_priv->lock); 4187 list_del(&mc->list); 4188 spin_unlock_irq(&id_priv->lock); 4189 kfree(mc); 4190 } 4191 return ret; 4192 } 4193 EXPORT_SYMBOL(rdma_join_multicast); 4194 4195 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4196 { 4197 struct rdma_id_private *id_priv; 4198 struct cma_multicast *mc; 4199 4200 id_priv = container_of(id, struct rdma_id_private, id); 4201 spin_lock_irq(&id_priv->lock); 4202 list_for_each_entry(mc, &id_priv->mc_list, list) { 4203 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4204 list_del(&mc->list); 4205 spin_unlock_irq(&id_priv->lock); 4206 4207 if (id->qp) 4208 ib_detach_mcast(id->qp, 4209 &mc->multicast.ib->rec.mgid, 4210 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4211 4212 BUG_ON(id_priv->cma_dev->device != id->device); 4213 4214 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4215 ib_sa_free_multicast(mc->multicast.ib); 4216 kfree(mc); 4217 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4218 if (mc->igmp_joined) { 4219 struct rdma_dev_addr *dev_addr = 4220 &id->route.addr.dev_addr; 4221 struct net_device *ndev = NULL; 4222 4223 if (dev_addr->bound_dev_if) 4224 ndev = dev_get_by_index(dev_addr->net, 4225 dev_addr->bound_dev_if); 4226 if (ndev) { 4227 cma_igmp_send(ndev, 4228 &mc->multicast.ib->rec.mgid, 4229 false); 4230 dev_put(ndev); 4231 } 4232 mc->igmp_joined = false; 4233 } 4234 kref_put(&mc->mcref, release_mc); 4235 } 4236 return; 4237 } 4238 } 4239 spin_unlock_irq(&id_priv->lock); 4240 } 4241 EXPORT_SYMBOL(rdma_leave_multicast); 4242 4243 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 4244 { 4245 struct rdma_dev_addr *dev_addr; 4246 struct cma_ndev_work *work; 4247 4248 dev_addr = &id_priv->id.route.addr.dev_addr; 4249 4250 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4251 (net_eq(dev_net(ndev), dev_addr->net)) && 4252 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4253 pr_info("RDMA CM addr change for ndev %s used by id %p\n", 4254 ndev->name, &id_priv->id); 4255 work = kzalloc(sizeof *work, GFP_KERNEL); 4256 if (!work) 4257 return -ENOMEM; 4258 4259 INIT_WORK(&work->work, cma_ndev_work_handler); 4260 work->id = id_priv; 4261 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 4262 atomic_inc(&id_priv->refcount); 4263 queue_work(cma_wq, &work->work); 4264 } 4265 4266 return 0; 4267 } 4268 4269 static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 4270 void *ptr) 4271 { 4272 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4273 struct cma_device *cma_dev; 4274 struct rdma_id_private *id_priv; 4275 int ret = NOTIFY_DONE; 4276 4277 if (event != NETDEV_BONDING_FAILOVER) 4278 return NOTIFY_DONE; 4279 4280 if (!netif_is_bond_master(ndev)) 4281 return NOTIFY_DONE; 4282 4283 mutex_lock(&lock); 4284 list_for_each_entry(cma_dev, &dev_list, list) 4285 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4286 ret = cma_netdev_change(ndev, id_priv); 4287 if (ret) 4288 goto out; 4289 } 4290 4291 out: 4292 mutex_unlock(&lock); 4293 return ret; 4294 } 4295 4296 static struct notifier_block cma_nb = { 4297 .notifier_call = cma_netdev_callback 4298 }; 4299 4300 static void cma_add_one(struct ib_device *device) 4301 { 4302 struct cma_device *cma_dev; 4303 struct rdma_id_private *id_priv; 4304 unsigned int i; 4305 unsigned long supported_gids = 0; 4306 4307 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4308 if (!cma_dev) 4309 return; 4310 4311 cma_dev->device = device; 4312 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4313 sizeof(*cma_dev->default_gid_type), 4314 GFP_KERNEL); 4315 if (!cma_dev->default_gid_type) 4316 goto free_cma_dev; 4317 4318 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4319 sizeof(*cma_dev->default_roce_tos), 4320 GFP_KERNEL); 4321 if (!cma_dev->default_roce_tos) 4322 goto free_gid_type; 4323 4324 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4325 supported_gids = roce_gid_type_mask_support(device, i); 4326 WARN_ON(!supported_gids); 4327 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4328 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4329 CMA_PREFERRED_ROCE_GID_TYPE; 4330 else 4331 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4332 find_first_bit(&supported_gids, BITS_PER_LONG); 4333 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4334 } 4335 4336 init_completion(&cma_dev->comp); 4337 atomic_set(&cma_dev->refcount, 1); 4338 INIT_LIST_HEAD(&cma_dev->id_list); 4339 ib_set_client_data(device, &cma_client, cma_dev); 4340 4341 mutex_lock(&lock); 4342 list_add_tail(&cma_dev->list, &dev_list); 4343 list_for_each_entry(id_priv, &listen_any_list, list) 4344 cma_listen_on_dev(id_priv, cma_dev); 4345 mutex_unlock(&lock); 4346 4347 return; 4348 4349 free_gid_type: 4350 kfree(cma_dev->default_gid_type); 4351 4352 free_cma_dev: 4353 kfree(cma_dev); 4354 4355 return; 4356 } 4357 4358 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4359 { 4360 struct rdma_cm_event event; 4361 enum rdma_cm_state state; 4362 int ret = 0; 4363 4364 /* Record that we want to remove the device */ 4365 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4366 if (state == RDMA_CM_DESTROYING) 4367 return 0; 4368 4369 cma_cancel_operation(id_priv, state); 4370 mutex_lock(&id_priv->handler_mutex); 4371 4372 /* Check for destruction from another callback. */ 4373 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4374 goto out; 4375 4376 memset(&event, 0, sizeof event); 4377 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4378 ret = id_priv->id.event_handler(&id_priv->id, &event); 4379 out: 4380 mutex_unlock(&id_priv->handler_mutex); 4381 return ret; 4382 } 4383 4384 static void cma_process_remove(struct cma_device *cma_dev) 4385 { 4386 struct rdma_id_private *id_priv; 4387 int ret; 4388 4389 mutex_lock(&lock); 4390 while (!list_empty(&cma_dev->id_list)) { 4391 id_priv = list_entry(cma_dev->id_list.next, 4392 struct rdma_id_private, list); 4393 4394 list_del(&id_priv->listen_list); 4395 list_del_init(&id_priv->list); 4396 atomic_inc(&id_priv->refcount); 4397 mutex_unlock(&lock); 4398 4399 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4400 cma_deref_id(id_priv); 4401 if (ret) 4402 rdma_destroy_id(&id_priv->id); 4403 4404 mutex_lock(&lock); 4405 } 4406 mutex_unlock(&lock); 4407 4408 cma_deref_dev(cma_dev); 4409 wait_for_completion(&cma_dev->comp); 4410 } 4411 4412 static void cma_remove_one(struct ib_device *device, void *client_data) 4413 { 4414 struct cma_device *cma_dev = client_data; 4415 4416 if (!cma_dev) 4417 return; 4418 4419 mutex_lock(&lock); 4420 list_del(&cma_dev->list); 4421 mutex_unlock(&lock); 4422 4423 cma_process_remove(cma_dev); 4424 kfree(cma_dev->default_roce_tos); 4425 kfree(cma_dev->default_gid_type); 4426 kfree(cma_dev); 4427 } 4428 4429 static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) 4430 { 4431 struct nlmsghdr *nlh; 4432 struct rdma_cm_id_stats *id_stats; 4433 struct rdma_id_private *id_priv; 4434 struct rdma_cm_id *id = NULL; 4435 struct cma_device *cma_dev; 4436 int i_dev = 0, i_id = 0; 4437 4438 /* 4439 * We export all of the IDs as a sequence of messages. Each 4440 * ID gets its own netlink message. 4441 */ 4442 mutex_lock(&lock); 4443 4444 list_for_each_entry(cma_dev, &dev_list, list) { 4445 if (i_dev < cb->args[0]) { 4446 i_dev++; 4447 continue; 4448 } 4449 4450 i_id = 0; 4451 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4452 if (i_id < cb->args[1]) { 4453 i_id++; 4454 continue; 4455 } 4456 4457 id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, 4458 sizeof *id_stats, RDMA_NL_RDMA_CM, 4459 RDMA_NL_RDMA_CM_ID_STATS, 4460 NLM_F_MULTI); 4461 if (!id_stats) 4462 goto out; 4463 4464 memset(id_stats, 0, sizeof *id_stats); 4465 id = &id_priv->id; 4466 id_stats->node_type = id->route.addr.dev_addr.dev_type; 4467 id_stats->port_num = id->port_num; 4468 id_stats->bound_dev_if = 4469 id->route.addr.dev_addr.bound_dev_if; 4470 4471 if (ibnl_put_attr(skb, nlh, 4472 rdma_addr_size(cma_src_addr(id_priv)), 4473 cma_src_addr(id_priv), 4474 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) 4475 goto out; 4476 if (ibnl_put_attr(skb, nlh, 4477 rdma_addr_size(cma_dst_addr(id_priv)), 4478 cma_dst_addr(id_priv), 4479 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) 4480 goto out; 4481 4482 id_stats->pid = task_pid_vnr(id_priv->res.task); 4483 id_stats->port_space = id->ps; 4484 id_stats->cm_state = id_priv->state; 4485 id_stats->qp_num = id_priv->qp_num; 4486 id_stats->qp_type = id->qp_type; 4487 4488 i_id++; 4489 nlmsg_end(skb, nlh); 4490 } 4491 4492 cb->args[1] = 0; 4493 i_dev++; 4494 } 4495 4496 out: 4497 mutex_unlock(&lock); 4498 cb->args[0] = i_dev; 4499 cb->args[1] = i_id; 4500 4501 return skb->len; 4502 } 4503 4504 static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { 4505 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, 4506 }; 4507 4508 static int cma_init_net(struct net *net) 4509 { 4510 struct cma_pernet *pernet = cma_pernet(net); 4511 4512 idr_init(&pernet->tcp_ps); 4513 idr_init(&pernet->udp_ps); 4514 idr_init(&pernet->ipoib_ps); 4515 idr_init(&pernet->ib_ps); 4516 4517 return 0; 4518 } 4519 4520 static void cma_exit_net(struct net *net) 4521 { 4522 struct cma_pernet *pernet = cma_pernet(net); 4523 4524 idr_destroy(&pernet->tcp_ps); 4525 idr_destroy(&pernet->udp_ps); 4526 idr_destroy(&pernet->ipoib_ps); 4527 idr_destroy(&pernet->ib_ps); 4528 } 4529 4530 static struct pernet_operations cma_pernet_operations = { 4531 .init = cma_init_net, 4532 .exit = cma_exit_net, 4533 .id = &cma_pernet_id, 4534 .size = sizeof(struct cma_pernet), 4535 }; 4536 4537 static int __init cma_init(void) 4538 { 4539 int ret; 4540 4541 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4542 if (!cma_wq) 4543 return -ENOMEM; 4544 4545 ret = register_pernet_subsys(&cma_pernet_operations); 4546 if (ret) 4547 goto err_wq; 4548 4549 ib_sa_register_client(&sa_client); 4550 rdma_addr_register_client(&addr_client); 4551 register_netdevice_notifier(&cma_nb); 4552 4553 ret = ib_register_client(&cma_client); 4554 if (ret) 4555 goto err; 4556 4557 rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table); 4558 cma_configfs_init(); 4559 4560 return 0; 4561 4562 err: 4563 unregister_netdevice_notifier(&cma_nb); 4564 rdma_addr_unregister_client(&addr_client); 4565 ib_sa_unregister_client(&sa_client); 4566 err_wq: 4567 destroy_workqueue(cma_wq); 4568 return ret; 4569 } 4570 4571 static void __exit cma_cleanup(void) 4572 { 4573 cma_configfs_exit(); 4574 rdma_nl_unregister(RDMA_NL_RDMA_CM); 4575 ib_unregister_client(&cma_client); 4576 unregister_netdevice_notifier(&cma_nb); 4577 rdma_addr_unregister_client(&addr_client); 4578 ib_sa_unregister_client(&sa_client); 4579 unregister_pernet_subsys(&cma_pernet_operations); 4580 destroy_workqueue(cma_wq); 4581 } 4582 4583 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1); 4584 4585 module_init(cma_init); 4586 module_exit(cma_cleanup); 4587