1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/completion.h> 37 #include <linux/in.h> 38 #include <linux/in6.h> 39 #include <linux/mutex.h> 40 #include <linux/random.h> 41 #include <linux/igmp.h> 42 #include <linux/idr.h> 43 #include <linux/inetdevice.h> 44 #include <linux/slab.h> 45 #include <linux/module.h> 46 #include <net/route.h> 47 48 #include <net/net_namespace.h> 49 #include <net/netns/generic.h> 50 #include <net/tcp.h> 51 #include <net/ipv6.h> 52 #include <net/ip_fib.h> 53 #include <net/ip6_route.h> 54 55 #include <rdma/rdma_cm.h> 56 #include <rdma/rdma_cm_ib.h> 57 #include <rdma/rdma_netlink.h> 58 #include <rdma/ib.h> 59 #include <rdma/ib_cache.h> 60 #include <rdma/ib_cm.h> 61 #include <rdma/ib_sa.h> 62 #include <rdma/iw_cm.h> 63 64 #include "core_priv.h" 65 66 MODULE_AUTHOR("Sean Hefty"); 67 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 68 MODULE_LICENSE("Dual BSD/GPL"); 69 70 #define CMA_CM_RESPONSE_TIMEOUT 20 71 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 72 #define CMA_MAX_CM_RETRIES 15 73 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 74 #define CMA_IBOE_PACKET_LIFETIME 18 75 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP 76 77 static const char * const cma_events[] = { 78 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 79 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 80 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 81 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 82 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 83 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 84 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 85 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 86 [RDMA_CM_EVENT_REJECTED] = "rejected", 87 [RDMA_CM_EVENT_ESTABLISHED] = "established", 88 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 89 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 90 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 91 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 92 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 93 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 94 }; 95 96 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 97 { 98 size_t index = event; 99 100 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 101 cma_events[index] : "unrecognized event"; 102 } 103 EXPORT_SYMBOL(rdma_event_msg); 104 105 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 106 int reason) 107 { 108 if (rdma_ib_or_roce(id->device, id->port_num)) 109 return ibcm_reject_msg(reason); 110 111 if (rdma_protocol_iwarp(id->device, id->port_num)) 112 return iwcm_reject_msg(reason); 113 114 WARN_ON_ONCE(1); 115 return "unrecognized transport"; 116 } 117 EXPORT_SYMBOL(rdma_reject_msg); 118 119 bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) 120 { 121 if (rdma_ib_or_roce(id->device, id->port_num)) 122 return reason == IB_CM_REJ_CONSUMER_DEFINED; 123 124 if (rdma_protocol_iwarp(id->device, id->port_num)) 125 return reason == -ECONNREFUSED; 126 127 WARN_ON_ONCE(1); 128 return false; 129 } 130 EXPORT_SYMBOL(rdma_is_consumer_reject); 131 132 const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 133 struct rdma_cm_event *ev, u8 *data_len) 134 { 135 const void *p; 136 137 if (rdma_is_consumer_reject(id, ev->status)) { 138 *data_len = ev->param.conn.private_data_len; 139 p = ev->param.conn.private_data; 140 } else { 141 *data_len = 0; 142 p = NULL; 143 } 144 return p; 145 } 146 EXPORT_SYMBOL(rdma_consumer_reject_data); 147 148 static void cma_add_one(struct ib_device *device); 149 static void cma_remove_one(struct ib_device *device, void *client_data); 150 151 static struct ib_client cma_client = { 152 .name = "cma", 153 .add = cma_add_one, 154 .remove = cma_remove_one 155 }; 156 157 static struct ib_sa_client sa_client; 158 static struct rdma_addr_client addr_client; 159 static LIST_HEAD(dev_list); 160 static LIST_HEAD(listen_any_list); 161 static DEFINE_MUTEX(lock); 162 static struct workqueue_struct *cma_wq; 163 static unsigned int cma_pernet_id; 164 165 struct cma_pernet { 166 struct idr tcp_ps; 167 struct idr udp_ps; 168 struct idr ipoib_ps; 169 struct idr ib_ps; 170 }; 171 172 static struct cma_pernet *cma_pernet(struct net *net) 173 { 174 return net_generic(net, cma_pernet_id); 175 } 176 177 static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) 178 { 179 struct cma_pernet *pernet = cma_pernet(net); 180 181 switch (ps) { 182 case RDMA_PS_TCP: 183 return &pernet->tcp_ps; 184 case RDMA_PS_UDP: 185 return &pernet->udp_ps; 186 case RDMA_PS_IPOIB: 187 return &pernet->ipoib_ps; 188 case RDMA_PS_IB: 189 return &pernet->ib_ps; 190 default: 191 return NULL; 192 } 193 } 194 195 struct cma_device { 196 struct list_head list; 197 struct ib_device *device; 198 struct completion comp; 199 atomic_t refcount; 200 struct list_head id_list; 201 enum ib_gid_type *default_gid_type; 202 u8 *default_roce_tos; 203 }; 204 205 struct rdma_bind_list { 206 enum rdma_port_space ps; 207 struct hlist_head owners; 208 unsigned short port; 209 }; 210 211 struct class_port_info_context { 212 struct ib_class_port_info *class_port_info; 213 struct ib_device *device; 214 struct completion done; 215 struct ib_sa_query *sa_query; 216 u8 port_num; 217 }; 218 219 static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, 220 struct rdma_bind_list *bind_list, int snum) 221 { 222 struct idr *idr = cma_pernet_idr(net, ps); 223 224 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 225 } 226 227 static struct rdma_bind_list *cma_ps_find(struct net *net, 228 enum rdma_port_space ps, int snum) 229 { 230 struct idr *idr = cma_pernet_idr(net, ps); 231 232 return idr_find(idr, snum); 233 } 234 235 static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) 236 { 237 struct idr *idr = cma_pernet_idr(net, ps); 238 239 idr_remove(idr, snum); 240 } 241 242 enum { 243 CMA_OPTION_AFONLY, 244 }; 245 246 void cma_ref_dev(struct cma_device *cma_dev) 247 { 248 atomic_inc(&cma_dev->refcount); 249 } 250 251 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 252 void *cookie) 253 { 254 struct cma_device *cma_dev; 255 struct cma_device *found_cma_dev = NULL; 256 257 mutex_lock(&lock); 258 259 list_for_each_entry(cma_dev, &dev_list, list) 260 if (filter(cma_dev->device, cookie)) { 261 found_cma_dev = cma_dev; 262 break; 263 } 264 265 if (found_cma_dev) 266 cma_ref_dev(found_cma_dev); 267 mutex_unlock(&lock); 268 return found_cma_dev; 269 } 270 271 int cma_get_default_gid_type(struct cma_device *cma_dev, 272 unsigned int port) 273 { 274 if (!rdma_is_port_valid(cma_dev->device, port)) 275 return -EINVAL; 276 277 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 278 } 279 280 int cma_set_default_gid_type(struct cma_device *cma_dev, 281 unsigned int port, 282 enum ib_gid_type default_gid_type) 283 { 284 unsigned long supported_gids; 285 286 if (!rdma_is_port_valid(cma_dev->device, port)) 287 return -EINVAL; 288 289 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 290 291 if (!(supported_gids & 1 << default_gid_type)) 292 return -EINVAL; 293 294 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 295 default_gid_type; 296 297 return 0; 298 } 299 300 int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port) 301 { 302 if (!rdma_is_port_valid(cma_dev->device, port)) 303 return -EINVAL; 304 305 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; 306 } 307 308 int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port, 309 u8 default_roce_tos) 310 { 311 if (!rdma_is_port_valid(cma_dev->device, port)) 312 return -EINVAL; 313 314 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = 315 default_roce_tos; 316 317 return 0; 318 } 319 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 320 { 321 return cma_dev->device; 322 } 323 324 /* 325 * Device removal can occur at anytime, so we need extra handling to 326 * serialize notifying the user of device removal with other callbacks. 327 * We do this by disabling removal notification while a callback is in process, 328 * and reporting it after the callback completes. 329 */ 330 struct rdma_id_private { 331 struct rdma_cm_id id; 332 333 struct rdma_bind_list *bind_list; 334 struct hlist_node node; 335 struct list_head list; /* listen_any_list or cma_device.list */ 336 struct list_head listen_list; /* per device listens */ 337 struct cma_device *cma_dev; 338 struct list_head mc_list; 339 340 int internal_id; 341 enum rdma_cm_state state; 342 spinlock_t lock; 343 struct mutex qp_mutex; 344 345 struct completion comp; 346 atomic_t refcount; 347 struct mutex handler_mutex; 348 349 int backlog; 350 int timeout_ms; 351 struct ib_sa_query *query; 352 int query_id; 353 union { 354 struct ib_cm_id *ib; 355 struct iw_cm_id *iw; 356 } cm_id; 357 358 u32 seq_num; 359 u32 qkey; 360 u32 qp_num; 361 pid_t owner; 362 u32 options; 363 u8 srq; 364 u8 tos; 365 bool tos_set; 366 u8 reuseaddr; 367 u8 afonly; 368 enum ib_gid_type gid_type; 369 }; 370 371 struct cma_multicast { 372 struct rdma_id_private *id_priv; 373 union { 374 struct ib_sa_multicast *ib; 375 } multicast; 376 struct list_head list; 377 void *context; 378 struct sockaddr_storage addr; 379 struct kref mcref; 380 bool igmp_joined; 381 u8 join_state; 382 }; 383 384 struct cma_work { 385 struct work_struct work; 386 struct rdma_id_private *id; 387 enum rdma_cm_state old_state; 388 enum rdma_cm_state new_state; 389 struct rdma_cm_event event; 390 }; 391 392 struct cma_ndev_work { 393 struct work_struct work; 394 struct rdma_id_private *id; 395 struct rdma_cm_event event; 396 }; 397 398 struct iboe_mcast_work { 399 struct work_struct work; 400 struct rdma_id_private *id; 401 struct cma_multicast *mc; 402 }; 403 404 union cma_ip_addr { 405 struct in6_addr ip6; 406 struct { 407 __be32 pad[3]; 408 __be32 addr; 409 } ip4; 410 }; 411 412 struct cma_hdr { 413 u8 cma_version; 414 u8 ip_version; /* IP version: 7:4 */ 415 __be16 port; 416 union cma_ip_addr src_addr; 417 union cma_ip_addr dst_addr; 418 }; 419 420 #define CMA_VERSION 0x00 421 422 struct cma_req_info { 423 struct ib_device *device; 424 int port; 425 union ib_gid local_gid; 426 __be64 service_id; 427 u16 pkey; 428 bool has_gid:1; 429 }; 430 431 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 432 { 433 unsigned long flags; 434 int ret; 435 436 spin_lock_irqsave(&id_priv->lock, flags); 437 ret = (id_priv->state == comp); 438 spin_unlock_irqrestore(&id_priv->lock, flags); 439 return ret; 440 } 441 442 static int cma_comp_exch(struct rdma_id_private *id_priv, 443 enum rdma_cm_state comp, enum rdma_cm_state exch) 444 { 445 unsigned long flags; 446 int ret; 447 448 spin_lock_irqsave(&id_priv->lock, flags); 449 if ((ret = (id_priv->state == comp))) 450 id_priv->state = exch; 451 spin_unlock_irqrestore(&id_priv->lock, flags); 452 return ret; 453 } 454 455 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 456 enum rdma_cm_state exch) 457 { 458 unsigned long flags; 459 enum rdma_cm_state old; 460 461 spin_lock_irqsave(&id_priv->lock, flags); 462 old = id_priv->state; 463 id_priv->state = exch; 464 spin_unlock_irqrestore(&id_priv->lock, flags); 465 return old; 466 } 467 468 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 469 { 470 return hdr->ip_version >> 4; 471 } 472 473 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 474 { 475 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 476 } 477 478 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) 479 { 480 struct in_device *in_dev = NULL; 481 482 if (ndev) { 483 rtnl_lock(); 484 in_dev = __in_dev_get_rtnl(ndev); 485 if (in_dev) { 486 if (join) 487 ip_mc_inc_group(in_dev, 488 *(__be32 *)(mgid->raw + 12)); 489 else 490 ip_mc_dec_group(in_dev, 491 *(__be32 *)(mgid->raw + 12)); 492 } 493 rtnl_unlock(); 494 } 495 return (in_dev) ? 0 : -ENODEV; 496 } 497 498 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 499 struct cma_device *cma_dev) 500 { 501 cma_ref_dev(cma_dev); 502 id_priv->cma_dev = cma_dev; 503 id_priv->gid_type = 0; 504 id_priv->id.device = cma_dev->device; 505 id_priv->id.route.addr.dev_addr.transport = 506 rdma_node_get_transport(cma_dev->device->node_type); 507 list_add_tail(&id_priv->list, &cma_dev->id_list); 508 } 509 510 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 511 struct cma_device *cma_dev) 512 { 513 _cma_attach_to_dev(id_priv, cma_dev); 514 id_priv->gid_type = 515 cma_dev->default_gid_type[id_priv->id.port_num - 516 rdma_start_port(cma_dev->device)]; 517 } 518 519 void cma_deref_dev(struct cma_device *cma_dev) 520 { 521 if (atomic_dec_and_test(&cma_dev->refcount)) 522 complete(&cma_dev->comp); 523 } 524 525 static inline void release_mc(struct kref *kref) 526 { 527 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 528 529 kfree(mc->multicast.ib); 530 kfree(mc); 531 } 532 533 static void cma_release_dev(struct rdma_id_private *id_priv) 534 { 535 mutex_lock(&lock); 536 list_del(&id_priv->list); 537 cma_deref_dev(id_priv->cma_dev); 538 id_priv->cma_dev = NULL; 539 mutex_unlock(&lock); 540 } 541 542 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 543 { 544 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 545 } 546 547 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 548 { 549 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 550 } 551 552 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 553 { 554 return id_priv->id.route.addr.src_addr.ss_family; 555 } 556 557 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 558 { 559 struct ib_sa_mcmember_rec rec; 560 int ret = 0; 561 562 if (id_priv->qkey) { 563 if (qkey && id_priv->qkey != qkey) 564 return -EINVAL; 565 return 0; 566 } 567 568 if (qkey) { 569 id_priv->qkey = qkey; 570 return 0; 571 } 572 573 switch (id_priv->id.ps) { 574 case RDMA_PS_UDP: 575 case RDMA_PS_IB: 576 id_priv->qkey = RDMA_UDP_QKEY; 577 break; 578 case RDMA_PS_IPOIB: 579 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 580 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 581 id_priv->id.port_num, &rec.mgid, 582 &rec); 583 if (!ret) 584 id_priv->qkey = be32_to_cpu(rec.qkey); 585 break; 586 default: 587 break; 588 } 589 return ret; 590 } 591 592 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 593 { 594 dev_addr->dev_type = ARPHRD_INFINIBAND; 595 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 596 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 597 } 598 599 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 600 { 601 int ret; 602 603 if (addr->sa_family != AF_IB) { 604 ret = rdma_translate_ip(addr, dev_addr); 605 } else { 606 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 607 ret = 0; 608 } 609 610 return ret; 611 } 612 613 static inline int cma_validate_port(struct ib_device *device, u8 port, 614 enum ib_gid_type gid_type, 615 union ib_gid *gid, 616 struct rdma_id_private *id_priv) 617 { 618 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 619 int bound_if_index = dev_addr->bound_dev_if; 620 int dev_type = dev_addr->dev_type; 621 struct net_device *ndev = NULL; 622 int ret = -ENODEV; 623 624 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 625 return ret; 626 627 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 628 return ret; 629 630 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 631 ndev = dev_get_by_index(dev_addr->net, bound_if_index); 632 if (!ndev) 633 return ret; 634 } else { 635 gid_type = IB_GID_TYPE_IB; 636 } 637 638 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 639 ndev, NULL); 640 641 if (ndev) 642 dev_put(ndev); 643 644 return ret; 645 } 646 647 static int cma_acquire_dev(struct rdma_id_private *id_priv, 648 struct rdma_id_private *listen_id_priv) 649 { 650 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 651 struct cma_device *cma_dev; 652 union ib_gid gid, iboe_gid, *gidp; 653 int ret = -ENODEV; 654 u8 port; 655 656 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 657 id_priv->id.ps == RDMA_PS_IPOIB) 658 return -EINVAL; 659 660 mutex_lock(&lock); 661 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 662 &iboe_gid); 663 664 memcpy(&gid, dev_addr->src_dev_addr + 665 rdma_addr_gid_offset(dev_addr), sizeof gid); 666 667 if (listen_id_priv) { 668 cma_dev = listen_id_priv->cma_dev; 669 port = listen_id_priv->id.port_num; 670 gidp = rdma_protocol_roce(cma_dev->device, port) ? 671 &iboe_gid : &gid; 672 673 ret = cma_validate_port(cma_dev->device, port, 674 rdma_protocol_ib(cma_dev->device, port) ? 675 IB_GID_TYPE_IB : 676 listen_id_priv->gid_type, gidp, 677 id_priv); 678 if (!ret) { 679 id_priv->id.port_num = port; 680 goto out; 681 } 682 } 683 684 list_for_each_entry(cma_dev, &dev_list, list) { 685 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 686 if (listen_id_priv && 687 listen_id_priv->cma_dev == cma_dev && 688 listen_id_priv->id.port_num == port) 689 continue; 690 691 gidp = rdma_protocol_roce(cma_dev->device, port) ? 692 &iboe_gid : &gid; 693 694 ret = cma_validate_port(cma_dev->device, port, 695 rdma_protocol_ib(cma_dev->device, port) ? 696 IB_GID_TYPE_IB : 697 cma_dev->default_gid_type[port - 1], 698 gidp, id_priv); 699 if (!ret) { 700 id_priv->id.port_num = port; 701 goto out; 702 } 703 } 704 } 705 706 out: 707 if (!ret) 708 cma_attach_to_dev(id_priv, cma_dev); 709 710 mutex_unlock(&lock); 711 return ret; 712 } 713 714 /* 715 * Select the source IB device and address to reach the destination IB address. 716 */ 717 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 718 { 719 struct cma_device *cma_dev, *cur_dev; 720 struct sockaddr_ib *addr; 721 union ib_gid gid, sgid, *dgid; 722 u16 pkey, index; 723 u8 p; 724 enum ib_port_state port_state; 725 int i; 726 727 cma_dev = NULL; 728 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 729 dgid = (union ib_gid *) &addr->sib_addr; 730 pkey = ntohs(addr->sib_pkey); 731 732 list_for_each_entry(cur_dev, &dev_list, list) { 733 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 734 if (!rdma_cap_af_ib(cur_dev->device, p)) 735 continue; 736 737 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 738 continue; 739 740 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 741 continue; 742 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 743 &gid, NULL); 744 i++) { 745 if (!memcmp(&gid, dgid, sizeof(gid))) { 746 cma_dev = cur_dev; 747 sgid = gid; 748 id_priv->id.port_num = p; 749 goto found; 750 } 751 752 if (!cma_dev && (gid.global.subnet_prefix == 753 dgid->global.subnet_prefix) && 754 port_state == IB_PORT_ACTIVE) { 755 cma_dev = cur_dev; 756 sgid = gid; 757 id_priv->id.port_num = p; 758 } 759 } 760 } 761 } 762 763 if (!cma_dev) 764 return -ENODEV; 765 766 found: 767 cma_attach_to_dev(id_priv, cma_dev); 768 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 769 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 770 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 771 return 0; 772 } 773 774 static void cma_deref_id(struct rdma_id_private *id_priv) 775 { 776 if (atomic_dec_and_test(&id_priv->refcount)) 777 complete(&id_priv->comp); 778 } 779 780 struct rdma_cm_id *rdma_create_id(struct net *net, 781 rdma_cm_event_handler event_handler, 782 void *context, enum rdma_port_space ps, 783 enum ib_qp_type qp_type) 784 { 785 struct rdma_id_private *id_priv; 786 787 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 788 if (!id_priv) 789 return ERR_PTR(-ENOMEM); 790 791 id_priv->owner = task_pid_nr(current); 792 id_priv->state = RDMA_CM_IDLE; 793 id_priv->id.context = context; 794 id_priv->id.event_handler = event_handler; 795 id_priv->id.ps = ps; 796 id_priv->id.qp_type = qp_type; 797 id_priv->tos_set = false; 798 spin_lock_init(&id_priv->lock); 799 mutex_init(&id_priv->qp_mutex); 800 init_completion(&id_priv->comp); 801 atomic_set(&id_priv->refcount, 1); 802 mutex_init(&id_priv->handler_mutex); 803 INIT_LIST_HEAD(&id_priv->listen_list); 804 INIT_LIST_HEAD(&id_priv->mc_list); 805 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 806 id_priv->id.route.addr.dev_addr.net = get_net(net); 807 id_priv->seq_num &= 0x00ffffff; 808 809 return &id_priv->id; 810 } 811 EXPORT_SYMBOL(rdma_create_id); 812 813 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 814 { 815 struct ib_qp_attr qp_attr; 816 int qp_attr_mask, ret; 817 818 qp_attr.qp_state = IB_QPS_INIT; 819 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 820 if (ret) 821 return ret; 822 823 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 824 if (ret) 825 return ret; 826 827 qp_attr.qp_state = IB_QPS_RTR; 828 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 829 if (ret) 830 return ret; 831 832 qp_attr.qp_state = IB_QPS_RTS; 833 qp_attr.sq_psn = 0; 834 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 835 836 return ret; 837 } 838 839 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 840 { 841 struct ib_qp_attr qp_attr; 842 int qp_attr_mask, ret; 843 844 qp_attr.qp_state = IB_QPS_INIT; 845 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 846 if (ret) 847 return ret; 848 849 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 850 } 851 852 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 853 struct ib_qp_init_attr *qp_init_attr) 854 { 855 struct rdma_id_private *id_priv; 856 struct ib_qp *qp; 857 int ret; 858 859 id_priv = container_of(id, struct rdma_id_private, id); 860 if (id->device != pd->device) 861 return -EINVAL; 862 863 qp_init_attr->port_num = id->port_num; 864 qp = ib_create_qp(pd, qp_init_attr); 865 if (IS_ERR(qp)) 866 return PTR_ERR(qp); 867 868 if (id->qp_type == IB_QPT_UD) 869 ret = cma_init_ud_qp(id_priv, qp); 870 else 871 ret = cma_init_conn_qp(id_priv, qp); 872 if (ret) 873 goto err; 874 875 id->qp = qp; 876 id_priv->qp_num = qp->qp_num; 877 id_priv->srq = (qp->srq != NULL); 878 return 0; 879 err: 880 ib_destroy_qp(qp); 881 return ret; 882 } 883 EXPORT_SYMBOL(rdma_create_qp); 884 885 void rdma_destroy_qp(struct rdma_cm_id *id) 886 { 887 struct rdma_id_private *id_priv; 888 889 id_priv = container_of(id, struct rdma_id_private, id); 890 mutex_lock(&id_priv->qp_mutex); 891 ib_destroy_qp(id_priv->id.qp); 892 id_priv->id.qp = NULL; 893 mutex_unlock(&id_priv->qp_mutex); 894 } 895 EXPORT_SYMBOL(rdma_destroy_qp); 896 897 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 898 struct rdma_conn_param *conn_param) 899 { 900 struct ib_qp_attr qp_attr; 901 int qp_attr_mask, ret; 902 union ib_gid sgid; 903 904 mutex_lock(&id_priv->qp_mutex); 905 if (!id_priv->id.qp) { 906 ret = 0; 907 goto out; 908 } 909 910 /* Need to update QP attributes from default values. */ 911 qp_attr.qp_state = IB_QPS_INIT; 912 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 913 if (ret) 914 goto out; 915 916 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 917 if (ret) 918 goto out; 919 920 qp_attr.qp_state = IB_QPS_RTR; 921 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 922 if (ret) 923 goto out; 924 925 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 926 rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index, 927 &sgid, NULL); 928 if (ret) 929 goto out; 930 931 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 932 933 if (conn_param) 934 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 935 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 936 out: 937 mutex_unlock(&id_priv->qp_mutex); 938 return ret; 939 } 940 941 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 942 struct rdma_conn_param *conn_param) 943 { 944 struct ib_qp_attr qp_attr; 945 int qp_attr_mask, ret; 946 947 mutex_lock(&id_priv->qp_mutex); 948 if (!id_priv->id.qp) { 949 ret = 0; 950 goto out; 951 } 952 953 qp_attr.qp_state = IB_QPS_RTS; 954 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 955 if (ret) 956 goto out; 957 958 if (conn_param) 959 qp_attr.max_rd_atomic = conn_param->initiator_depth; 960 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 961 out: 962 mutex_unlock(&id_priv->qp_mutex); 963 return ret; 964 } 965 966 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 967 { 968 struct ib_qp_attr qp_attr; 969 int ret; 970 971 mutex_lock(&id_priv->qp_mutex); 972 if (!id_priv->id.qp) { 973 ret = 0; 974 goto out; 975 } 976 977 qp_attr.qp_state = IB_QPS_ERR; 978 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 979 out: 980 mutex_unlock(&id_priv->qp_mutex); 981 return ret; 982 } 983 984 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 985 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 986 { 987 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 988 int ret; 989 u16 pkey; 990 991 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 992 pkey = 0xffff; 993 else 994 pkey = ib_addr_get_pkey(dev_addr); 995 996 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 997 pkey, &qp_attr->pkey_index); 998 if (ret) 999 return ret; 1000 1001 qp_attr->port_num = id_priv->id.port_num; 1002 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 1003 1004 if (id_priv->id.qp_type == IB_QPT_UD) { 1005 ret = cma_set_qkey(id_priv, 0); 1006 if (ret) 1007 return ret; 1008 1009 qp_attr->qkey = id_priv->qkey; 1010 *qp_attr_mask |= IB_QP_QKEY; 1011 } else { 1012 qp_attr->qp_access_flags = 0; 1013 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 1014 } 1015 return 0; 1016 } 1017 1018 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 1019 int *qp_attr_mask) 1020 { 1021 struct rdma_id_private *id_priv; 1022 int ret = 0; 1023 1024 id_priv = container_of(id, struct rdma_id_private, id); 1025 if (rdma_cap_ib_cm(id->device, id->port_num)) { 1026 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 1027 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 1028 else 1029 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 1030 qp_attr_mask); 1031 1032 if (qp_attr->qp_state == IB_QPS_RTR) 1033 qp_attr->rq_psn = id_priv->seq_num; 1034 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1035 if (!id_priv->cm_id.iw) { 1036 qp_attr->qp_access_flags = 0; 1037 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1038 } else 1039 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1040 qp_attr_mask); 1041 qp_attr->port_num = id_priv->id.port_num; 1042 *qp_attr_mask |= IB_QP_PORT; 1043 } else 1044 ret = -ENOSYS; 1045 1046 return ret; 1047 } 1048 EXPORT_SYMBOL(rdma_init_qp_attr); 1049 1050 static inline int cma_zero_addr(struct sockaddr *addr) 1051 { 1052 switch (addr->sa_family) { 1053 case AF_INET: 1054 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1055 case AF_INET6: 1056 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1057 case AF_IB: 1058 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1059 default: 1060 return 0; 1061 } 1062 } 1063 1064 static inline int cma_loopback_addr(struct sockaddr *addr) 1065 { 1066 switch (addr->sa_family) { 1067 case AF_INET: 1068 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1069 case AF_INET6: 1070 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1071 case AF_IB: 1072 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1073 default: 1074 return 0; 1075 } 1076 } 1077 1078 static inline int cma_any_addr(struct sockaddr *addr) 1079 { 1080 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1081 } 1082 1083 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1084 { 1085 if (src->sa_family != dst->sa_family) 1086 return -1; 1087 1088 switch (src->sa_family) { 1089 case AF_INET: 1090 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1091 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1092 case AF_INET6: 1093 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1094 &((struct sockaddr_in6 *) dst)->sin6_addr); 1095 default: 1096 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1097 &((struct sockaddr_ib *) dst)->sib_addr); 1098 } 1099 } 1100 1101 static __be16 cma_port(struct sockaddr *addr) 1102 { 1103 struct sockaddr_ib *sib; 1104 1105 switch (addr->sa_family) { 1106 case AF_INET: 1107 return ((struct sockaddr_in *) addr)->sin_port; 1108 case AF_INET6: 1109 return ((struct sockaddr_in6 *) addr)->sin6_port; 1110 case AF_IB: 1111 sib = (struct sockaddr_ib *) addr; 1112 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1113 be64_to_cpu(sib->sib_sid_mask))); 1114 default: 1115 return 0; 1116 } 1117 } 1118 1119 static inline int cma_any_port(struct sockaddr *addr) 1120 { 1121 return !cma_port(addr); 1122 } 1123 1124 static void cma_save_ib_info(struct sockaddr *src_addr, 1125 struct sockaddr *dst_addr, 1126 struct rdma_cm_id *listen_id, 1127 struct sa_path_rec *path) 1128 { 1129 struct sockaddr_ib *listen_ib, *ib; 1130 1131 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1132 if (src_addr) { 1133 ib = (struct sockaddr_ib *)src_addr; 1134 ib->sib_family = AF_IB; 1135 if (path) { 1136 ib->sib_pkey = path->pkey; 1137 ib->sib_flowinfo = path->flow_label; 1138 memcpy(&ib->sib_addr, &path->sgid, 16); 1139 ib->sib_sid = path->service_id; 1140 ib->sib_scope_id = 0; 1141 } else { 1142 ib->sib_pkey = listen_ib->sib_pkey; 1143 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1144 ib->sib_addr = listen_ib->sib_addr; 1145 ib->sib_sid = listen_ib->sib_sid; 1146 ib->sib_scope_id = listen_ib->sib_scope_id; 1147 } 1148 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1149 } 1150 if (dst_addr) { 1151 ib = (struct sockaddr_ib *)dst_addr; 1152 ib->sib_family = AF_IB; 1153 if (path) { 1154 ib->sib_pkey = path->pkey; 1155 ib->sib_flowinfo = path->flow_label; 1156 memcpy(&ib->sib_addr, &path->dgid, 16); 1157 } 1158 } 1159 } 1160 1161 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1162 struct sockaddr_in *dst_addr, 1163 struct cma_hdr *hdr, 1164 __be16 local_port) 1165 { 1166 if (src_addr) { 1167 *src_addr = (struct sockaddr_in) { 1168 .sin_family = AF_INET, 1169 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1170 .sin_port = local_port, 1171 }; 1172 } 1173 1174 if (dst_addr) { 1175 *dst_addr = (struct sockaddr_in) { 1176 .sin_family = AF_INET, 1177 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1178 .sin_port = hdr->port, 1179 }; 1180 } 1181 } 1182 1183 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1184 struct sockaddr_in6 *dst_addr, 1185 struct cma_hdr *hdr, 1186 __be16 local_port) 1187 { 1188 if (src_addr) { 1189 *src_addr = (struct sockaddr_in6) { 1190 .sin6_family = AF_INET6, 1191 .sin6_addr = hdr->dst_addr.ip6, 1192 .sin6_port = local_port, 1193 }; 1194 } 1195 1196 if (dst_addr) { 1197 *dst_addr = (struct sockaddr_in6) { 1198 .sin6_family = AF_INET6, 1199 .sin6_addr = hdr->src_addr.ip6, 1200 .sin6_port = hdr->port, 1201 }; 1202 } 1203 } 1204 1205 static u16 cma_port_from_service_id(__be64 service_id) 1206 { 1207 return (u16)be64_to_cpu(service_id); 1208 } 1209 1210 static int cma_save_ip_info(struct sockaddr *src_addr, 1211 struct sockaddr *dst_addr, 1212 struct ib_cm_event *ib_event, 1213 __be64 service_id) 1214 { 1215 struct cma_hdr *hdr; 1216 __be16 port; 1217 1218 hdr = ib_event->private_data; 1219 if (hdr->cma_version != CMA_VERSION) 1220 return -EINVAL; 1221 1222 port = htons(cma_port_from_service_id(service_id)); 1223 1224 switch (cma_get_ip_ver(hdr)) { 1225 case 4: 1226 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1227 (struct sockaddr_in *)dst_addr, hdr, port); 1228 break; 1229 case 6: 1230 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1231 (struct sockaddr_in6 *)dst_addr, hdr, port); 1232 break; 1233 default: 1234 return -EAFNOSUPPORT; 1235 } 1236 1237 return 0; 1238 } 1239 1240 static int cma_save_net_info(struct sockaddr *src_addr, 1241 struct sockaddr *dst_addr, 1242 struct rdma_cm_id *listen_id, 1243 struct ib_cm_event *ib_event, 1244 sa_family_t sa_family, __be64 service_id) 1245 { 1246 if (sa_family == AF_IB) { 1247 if (ib_event->event == IB_CM_REQ_RECEIVED) 1248 cma_save_ib_info(src_addr, dst_addr, listen_id, 1249 ib_event->param.req_rcvd.primary_path); 1250 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1251 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1252 return 0; 1253 } 1254 1255 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1256 } 1257 1258 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1259 struct cma_req_info *req) 1260 { 1261 const struct ib_cm_req_event_param *req_param = 1262 &ib_event->param.req_rcvd; 1263 const struct ib_cm_sidr_req_event_param *sidr_param = 1264 &ib_event->param.sidr_req_rcvd; 1265 1266 switch (ib_event->event) { 1267 case IB_CM_REQ_RECEIVED: 1268 req->device = req_param->listen_id->device; 1269 req->port = req_param->port; 1270 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1271 sizeof(req->local_gid)); 1272 req->has_gid = true; 1273 req->service_id = req_param->primary_path->service_id; 1274 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1275 if (req->pkey != req_param->bth_pkey) 1276 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1277 "RDMA CMA: in the future this may cause the request to be dropped\n", 1278 req_param->bth_pkey, req->pkey); 1279 break; 1280 case IB_CM_SIDR_REQ_RECEIVED: 1281 req->device = sidr_param->listen_id->device; 1282 req->port = sidr_param->port; 1283 req->has_gid = false; 1284 req->service_id = sidr_param->service_id; 1285 req->pkey = sidr_param->pkey; 1286 if (req->pkey != sidr_param->bth_pkey) 1287 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1288 "RDMA CMA: in the future this may cause the request to be dropped\n", 1289 sidr_param->bth_pkey, req->pkey); 1290 break; 1291 default: 1292 return -EINVAL; 1293 } 1294 1295 return 0; 1296 } 1297 1298 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1299 const struct sockaddr_in *dst_addr, 1300 const struct sockaddr_in *src_addr) 1301 { 1302 __be32 daddr = dst_addr->sin_addr.s_addr, 1303 saddr = src_addr->sin_addr.s_addr; 1304 struct fib_result res; 1305 struct flowi4 fl4; 1306 int err; 1307 bool ret; 1308 1309 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1310 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1311 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1312 ipv4_is_loopback(saddr)) 1313 return false; 1314 1315 memset(&fl4, 0, sizeof(fl4)); 1316 fl4.flowi4_iif = net_dev->ifindex; 1317 fl4.daddr = daddr; 1318 fl4.saddr = saddr; 1319 1320 rcu_read_lock(); 1321 err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); 1322 ret = err == 0 && FIB_RES_DEV(res) == net_dev; 1323 rcu_read_unlock(); 1324 1325 return ret; 1326 } 1327 1328 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1329 const struct sockaddr_in6 *dst_addr, 1330 const struct sockaddr_in6 *src_addr) 1331 { 1332 #if IS_ENABLED(CONFIG_IPV6) 1333 const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & 1334 IPV6_ADDR_LINKLOCAL; 1335 struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, 1336 &src_addr->sin6_addr, net_dev->ifindex, 1337 strict); 1338 bool ret; 1339 1340 if (!rt) 1341 return false; 1342 1343 ret = rt->rt6i_idev->dev == net_dev; 1344 ip6_rt_put(rt); 1345 1346 return ret; 1347 #else 1348 return false; 1349 #endif 1350 } 1351 1352 static bool validate_net_dev(struct net_device *net_dev, 1353 const struct sockaddr *daddr, 1354 const struct sockaddr *saddr) 1355 { 1356 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1357 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1358 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1359 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1360 1361 switch (daddr->sa_family) { 1362 case AF_INET: 1363 return saddr->sa_family == AF_INET && 1364 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1365 1366 case AF_INET6: 1367 return saddr->sa_family == AF_INET6 && 1368 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1369 1370 default: 1371 return false; 1372 } 1373 } 1374 1375 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1376 const struct cma_req_info *req) 1377 { 1378 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1379 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1380 *src_addr = (struct sockaddr *)&src_addr_storage; 1381 struct net_device *net_dev; 1382 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1383 int err; 1384 1385 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1386 req->service_id); 1387 if (err) 1388 return ERR_PTR(err); 1389 1390 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1391 gid, listen_addr); 1392 if (!net_dev) 1393 return ERR_PTR(-ENODEV); 1394 1395 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1396 dev_put(net_dev); 1397 return ERR_PTR(-EHOSTUNREACH); 1398 } 1399 1400 return net_dev; 1401 } 1402 1403 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1404 { 1405 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1406 } 1407 1408 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1409 const struct cma_hdr *hdr) 1410 { 1411 struct sockaddr *addr = cma_src_addr(id_priv); 1412 __be32 ip4_addr; 1413 struct in6_addr ip6_addr; 1414 1415 if (cma_any_addr(addr) && !id_priv->afonly) 1416 return true; 1417 1418 switch (addr->sa_family) { 1419 case AF_INET: 1420 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1421 if (cma_get_ip_ver(hdr) != 4) 1422 return false; 1423 if (!cma_any_addr(addr) && 1424 hdr->dst_addr.ip4.addr != ip4_addr) 1425 return false; 1426 break; 1427 case AF_INET6: 1428 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1429 if (cma_get_ip_ver(hdr) != 6) 1430 return false; 1431 if (!cma_any_addr(addr) && 1432 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1433 return false; 1434 break; 1435 case AF_IB: 1436 return true; 1437 default: 1438 return false; 1439 } 1440 1441 return true; 1442 } 1443 1444 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1445 { 1446 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1447 enum rdma_transport_type transport = 1448 rdma_node_get_transport(device->node_type); 1449 1450 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1451 } 1452 1453 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1454 { 1455 struct ib_device *device = id->device; 1456 const int port_num = id->port_num ?: rdma_start_port(device); 1457 1458 return cma_protocol_roce_dev_port(device, port_num); 1459 } 1460 1461 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1462 const struct net_device *net_dev, 1463 u8 port_num) 1464 { 1465 const struct rdma_addr *addr = &id->route.addr; 1466 1467 if (!net_dev) 1468 /* This request is an AF_IB request or a RoCE request */ 1469 return (!id->port_num || id->port_num == port_num) && 1470 (addr->src_addr.ss_family == AF_IB || 1471 cma_protocol_roce_dev_port(id->device, port_num)); 1472 1473 return !addr->dev_addr.bound_dev_if || 1474 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1475 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1476 } 1477 1478 static struct rdma_id_private *cma_find_listener( 1479 const struct rdma_bind_list *bind_list, 1480 const struct ib_cm_id *cm_id, 1481 const struct ib_cm_event *ib_event, 1482 const struct cma_req_info *req, 1483 const struct net_device *net_dev) 1484 { 1485 struct rdma_id_private *id_priv, *id_priv_dev; 1486 1487 if (!bind_list) 1488 return ERR_PTR(-EINVAL); 1489 1490 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1491 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1492 if (id_priv->id.device == cm_id->device && 1493 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1494 return id_priv; 1495 list_for_each_entry(id_priv_dev, 1496 &id_priv->listen_list, 1497 listen_list) { 1498 if (id_priv_dev->id.device == cm_id->device && 1499 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1500 return id_priv_dev; 1501 } 1502 } 1503 } 1504 1505 return ERR_PTR(-EINVAL); 1506 } 1507 1508 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1509 struct ib_cm_event *ib_event, 1510 struct net_device **net_dev) 1511 { 1512 struct cma_req_info req; 1513 struct rdma_bind_list *bind_list; 1514 struct rdma_id_private *id_priv; 1515 int err; 1516 1517 err = cma_save_req_info(ib_event, &req); 1518 if (err) 1519 return ERR_PTR(err); 1520 1521 *net_dev = cma_get_net_dev(ib_event, &req); 1522 if (IS_ERR(*net_dev)) { 1523 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1524 /* Assuming the protocol is AF_IB */ 1525 *net_dev = NULL; 1526 } else if (cma_protocol_roce_dev_port(req.device, req.port)) { 1527 /* TODO find the net dev matching the request parameters 1528 * through the RoCE GID table */ 1529 *net_dev = NULL; 1530 } else { 1531 return ERR_CAST(*net_dev); 1532 } 1533 } 1534 1535 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1536 rdma_ps_from_service_id(req.service_id), 1537 cma_port_from_service_id(req.service_id)); 1538 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1539 if (IS_ERR(id_priv) && *net_dev) { 1540 dev_put(*net_dev); 1541 *net_dev = NULL; 1542 } 1543 1544 return id_priv; 1545 } 1546 1547 static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) 1548 { 1549 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1550 } 1551 1552 static void cma_cancel_route(struct rdma_id_private *id_priv) 1553 { 1554 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1555 if (id_priv->query) 1556 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1557 } 1558 } 1559 1560 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1561 { 1562 struct rdma_id_private *dev_id_priv; 1563 1564 /* 1565 * Remove from listen_any_list to prevent added devices from spawning 1566 * additional listen requests. 1567 */ 1568 mutex_lock(&lock); 1569 list_del(&id_priv->list); 1570 1571 while (!list_empty(&id_priv->listen_list)) { 1572 dev_id_priv = list_entry(id_priv->listen_list.next, 1573 struct rdma_id_private, listen_list); 1574 /* sync with device removal to avoid duplicate destruction */ 1575 list_del_init(&dev_id_priv->list); 1576 list_del(&dev_id_priv->listen_list); 1577 mutex_unlock(&lock); 1578 1579 rdma_destroy_id(&dev_id_priv->id); 1580 mutex_lock(&lock); 1581 } 1582 mutex_unlock(&lock); 1583 } 1584 1585 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1586 enum rdma_cm_state state) 1587 { 1588 switch (state) { 1589 case RDMA_CM_ADDR_QUERY: 1590 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1591 break; 1592 case RDMA_CM_ROUTE_QUERY: 1593 cma_cancel_route(id_priv); 1594 break; 1595 case RDMA_CM_LISTEN: 1596 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1597 cma_cancel_listens(id_priv); 1598 break; 1599 default: 1600 break; 1601 } 1602 } 1603 1604 static void cma_release_port(struct rdma_id_private *id_priv) 1605 { 1606 struct rdma_bind_list *bind_list = id_priv->bind_list; 1607 struct net *net = id_priv->id.route.addr.dev_addr.net; 1608 1609 if (!bind_list) 1610 return; 1611 1612 mutex_lock(&lock); 1613 hlist_del(&id_priv->node); 1614 if (hlist_empty(&bind_list->owners)) { 1615 cma_ps_remove(net, bind_list->ps, bind_list->port); 1616 kfree(bind_list); 1617 } 1618 mutex_unlock(&lock); 1619 } 1620 1621 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1622 { 1623 struct cma_multicast *mc; 1624 1625 while (!list_empty(&id_priv->mc_list)) { 1626 mc = container_of(id_priv->mc_list.next, 1627 struct cma_multicast, list); 1628 list_del(&mc->list); 1629 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1630 id_priv->id.port_num)) { 1631 ib_sa_free_multicast(mc->multicast.ib); 1632 kfree(mc); 1633 } else { 1634 if (mc->igmp_joined) { 1635 struct rdma_dev_addr *dev_addr = 1636 &id_priv->id.route.addr.dev_addr; 1637 struct net_device *ndev = NULL; 1638 1639 if (dev_addr->bound_dev_if) 1640 ndev = dev_get_by_index(&init_net, 1641 dev_addr->bound_dev_if); 1642 if (ndev) { 1643 cma_igmp_send(ndev, 1644 &mc->multicast.ib->rec.mgid, 1645 false); 1646 dev_put(ndev); 1647 } 1648 } 1649 kref_put(&mc->mcref, release_mc); 1650 } 1651 } 1652 } 1653 1654 void rdma_destroy_id(struct rdma_cm_id *id) 1655 { 1656 struct rdma_id_private *id_priv; 1657 enum rdma_cm_state state; 1658 1659 id_priv = container_of(id, struct rdma_id_private, id); 1660 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1661 cma_cancel_operation(id_priv, state); 1662 1663 /* 1664 * Wait for any active callback to finish. New callbacks will find 1665 * the id_priv state set to destroying and abort. 1666 */ 1667 mutex_lock(&id_priv->handler_mutex); 1668 mutex_unlock(&id_priv->handler_mutex); 1669 1670 if (id_priv->cma_dev) { 1671 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1672 if (id_priv->cm_id.ib) 1673 ib_destroy_cm_id(id_priv->cm_id.ib); 1674 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1675 if (id_priv->cm_id.iw) 1676 iw_destroy_cm_id(id_priv->cm_id.iw); 1677 } 1678 cma_leave_mc_groups(id_priv); 1679 cma_release_dev(id_priv); 1680 } 1681 1682 cma_release_port(id_priv); 1683 cma_deref_id(id_priv); 1684 wait_for_completion(&id_priv->comp); 1685 1686 if (id_priv->internal_id) 1687 cma_deref_id(id_priv->id.context); 1688 1689 kfree(id_priv->id.route.path_rec); 1690 put_net(id_priv->id.route.addr.dev_addr.net); 1691 kfree(id_priv); 1692 } 1693 EXPORT_SYMBOL(rdma_destroy_id); 1694 1695 static int cma_rep_recv(struct rdma_id_private *id_priv) 1696 { 1697 int ret; 1698 1699 ret = cma_modify_qp_rtr(id_priv, NULL); 1700 if (ret) 1701 goto reject; 1702 1703 ret = cma_modify_qp_rts(id_priv, NULL); 1704 if (ret) 1705 goto reject; 1706 1707 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1708 if (ret) 1709 goto reject; 1710 1711 return 0; 1712 reject: 1713 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); 1714 cma_modify_qp_err(id_priv); 1715 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1716 NULL, 0, NULL, 0); 1717 return ret; 1718 } 1719 1720 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1721 struct ib_cm_rep_event_param *rep_data, 1722 void *private_data) 1723 { 1724 event->param.conn.private_data = private_data; 1725 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1726 event->param.conn.responder_resources = rep_data->responder_resources; 1727 event->param.conn.initiator_depth = rep_data->initiator_depth; 1728 event->param.conn.flow_control = rep_data->flow_control; 1729 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1730 event->param.conn.srq = rep_data->srq; 1731 event->param.conn.qp_num = rep_data->remote_qpn; 1732 } 1733 1734 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1735 { 1736 struct rdma_id_private *id_priv = cm_id->context; 1737 struct rdma_cm_event event; 1738 int ret = 0; 1739 1740 mutex_lock(&id_priv->handler_mutex); 1741 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1742 id_priv->state != RDMA_CM_CONNECT) || 1743 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1744 id_priv->state != RDMA_CM_DISCONNECT)) 1745 goto out; 1746 1747 memset(&event, 0, sizeof event); 1748 switch (ib_event->event) { 1749 case IB_CM_REQ_ERROR: 1750 case IB_CM_REP_ERROR: 1751 event.event = RDMA_CM_EVENT_UNREACHABLE; 1752 event.status = -ETIMEDOUT; 1753 break; 1754 case IB_CM_REP_RECEIVED: 1755 if (cma_comp(id_priv, RDMA_CM_CONNECT) && 1756 (id_priv->id.qp_type != IB_QPT_UD)) 1757 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1758 if (id_priv->id.qp) { 1759 event.status = cma_rep_recv(id_priv); 1760 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1761 RDMA_CM_EVENT_ESTABLISHED; 1762 } else { 1763 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1764 } 1765 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1766 ib_event->private_data); 1767 break; 1768 case IB_CM_RTU_RECEIVED: 1769 case IB_CM_USER_ESTABLISHED: 1770 event.event = RDMA_CM_EVENT_ESTABLISHED; 1771 break; 1772 case IB_CM_DREQ_ERROR: 1773 event.status = -ETIMEDOUT; /* fall through */ 1774 case IB_CM_DREQ_RECEIVED: 1775 case IB_CM_DREP_RECEIVED: 1776 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1777 RDMA_CM_DISCONNECT)) 1778 goto out; 1779 event.event = RDMA_CM_EVENT_DISCONNECTED; 1780 break; 1781 case IB_CM_TIMEWAIT_EXIT: 1782 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1783 break; 1784 case IB_CM_MRA_RECEIVED: 1785 /* ignore event */ 1786 goto out; 1787 case IB_CM_REJ_RECEIVED: 1788 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, 1789 ib_event->param.rej_rcvd.reason)); 1790 cma_modify_qp_err(id_priv); 1791 event.status = ib_event->param.rej_rcvd.reason; 1792 event.event = RDMA_CM_EVENT_REJECTED; 1793 event.param.conn.private_data = ib_event->private_data; 1794 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1795 break; 1796 default: 1797 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1798 ib_event->event); 1799 goto out; 1800 } 1801 1802 ret = id_priv->id.event_handler(&id_priv->id, &event); 1803 if (ret) { 1804 /* Destroy the CM ID by returning a non-zero value. */ 1805 id_priv->cm_id.ib = NULL; 1806 cma_exch(id_priv, RDMA_CM_DESTROYING); 1807 mutex_unlock(&id_priv->handler_mutex); 1808 rdma_destroy_id(&id_priv->id); 1809 return ret; 1810 } 1811 out: 1812 mutex_unlock(&id_priv->handler_mutex); 1813 return ret; 1814 } 1815 1816 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1817 struct ib_cm_event *ib_event, 1818 struct net_device *net_dev) 1819 { 1820 struct rdma_id_private *id_priv; 1821 struct rdma_cm_id *id; 1822 struct rdma_route *rt; 1823 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1824 struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; 1825 const __be64 service_id = 1826 ib_event->param.req_rcvd.primary_path->service_id; 1827 int ret; 1828 1829 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 1830 listen_id->event_handler, listen_id->context, 1831 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1832 if (IS_ERR(id)) 1833 return NULL; 1834 1835 id_priv = container_of(id, struct rdma_id_private, id); 1836 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1837 (struct sockaddr *)&id->route.addr.dst_addr, 1838 listen_id, ib_event, ss_family, service_id)) 1839 goto err; 1840 1841 rt = &id->route; 1842 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1843 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1844 GFP_KERNEL); 1845 if (!rt->path_rec) 1846 goto err; 1847 1848 rt->path_rec[0] = *path; 1849 if (rt->num_paths == 2) 1850 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1851 1852 if (net_dev) { 1853 rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1854 } else { 1855 if (!cma_protocol_roce(listen_id) && 1856 cma_any_addr(cma_src_addr(id_priv))) { 1857 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1858 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1859 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1860 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1861 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1862 if (ret) 1863 goto err; 1864 } 1865 } 1866 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1867 1868 id_priv->state = RDMA_CM_CONNECT; 1869 return id_priv; 1870 1871 err: 1872 rdma_destroy_id(id); 1873 return NULL; 1874 } 1875 1876 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1877 struct ib_cm_event *ib_event, 1878 struct net_device *net_dev) 1879 { 1880 struct rdma_id_private *id_priv; 1881 struct rdma_cm_id *id; 1882 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1883 struct net *net = listen_id->route.addr.dev_addr.net; 1884 int ret; 1885 1886 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 1887 listen_id->ps, IB_QPT_UD); 1888 if (IS_ERR(id)) 1889 return NULL; 1890 1891 id_priv = container_of(id, struct rdma_id_private, id); 1892 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1893 (struct sockaddr *)&id->route.addr.dst_addr, 1894 listen_id, ib_event, ss_family, 1895 ib_event->param.sidr_req_rcvd.service_id)) 1896 goto err; 1897 1898 if (net_dev) { 1899 rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1900 } else { 1901 if (!cma_any_addr(cma_src_addr(id_priv))) { 1902 ret = cma_translate_addr(cma_src_addr(id_priv), 1903 &id->route.addr.dev_addr); 1904 if (ret) 1905 goto err; 1906 } 1907 } 1908 1909 id_priv->state = RDMA_CM_CONNECT; 1910 return id_priv; 1911 err: 1912 rdma_destroy_id(id); 1913 return NULL; 1914 } 1915 1916 static void cma_set_req_event_data(struct rdma_cm_event *event, 1917 struct ib_cm_req_event_param *req_data, 1918 void *private_data, int offset) 1919 { 1920 event->param.conn.private_data = private_data + offset; 1921 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1922 event->param.conn.responder_resources = req_data->responder_resources; 1923 event->param.conn.initiator_depth = req_data->initiator_depth; 1924 event->param.conn.flow_control = req_data->flow_control; 1925 event->param.conn.retry_count = req_data->retry_count; 1926 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1927 event->param.conn.srq = req_data->srq; 1928 event->param.conn.qp_num = req_data->remote_qpn; 1929 } 1930 1931 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1932 { 1933 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1934 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1935 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1936 (id->qp_type == IB_QPT_UD)) || 1937 (!id->qp_type)); 1938 } 1939 1940 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1941 { 1942 struct rdma_id_private *listen_id, *conn_id = NULL; 1943 struct rdma_cm_event event; 1944 struct net_device *net_dev; 1945 u8 offset; 1946 int ret; 1947 1948 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1949 if (IS_ERR(listen_id)) 1950 return PTR_ERR(listen_id); 1951 1952 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1953 ret = -EINVAL; 1954 goto net_dev_put; 1955 } 1956 1957 mutex_lock(&listen_id->handler_mutex); 1958 if (listen_id->state != RDMA_CM_LISTEN) { 1959 ret = -ECONNABORTED; 1960 goto err1; 1961 } 1962 1963 memset(&event, 0, sizeof event); 1964 offset = cma_user_data_offset(listen_id); 1965 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1966 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1967 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1968 event.param.ud.private_data = ib_event->private_data + offset; 1969 event.param.ud.private_data_len = 1970 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1971 } else { 1972 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1973 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1974 ib_event->private_data, offset); 1975 } 1976 if (!conn_id) { 1977 ret = -ENOMEM; 1978 goto err1; 1979 } 1980 1981 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1982 ret = cma_acquire_dev(conn_id, listen_id); 1983 if (ret) 1984 goto err2; 1985 1986 conn_id->cm_id.ib = cm_id; 1987 cm_id->context = conn_id; 1988 cm_id->cm_handler = cma_ib_handler; 1989 1990 /* 1991 * Protect against the user destroying conn_id from another thread 1992 * until we're done accessing it. 1993 */ 1994 atomic_inc(&conn_id->refcount); 1995 ret = conn_id->id.event_handler(&conn_id->id, &event); 1996 if (ret) 1997 goto err3; 1998 /* 1999 * Acquire mutex to prevent user executing rdma_destroy_id() 2000 * while we're accessing the cm_id. 2001 */ 2002 mutex_lock(&lock); 2003 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2004 (conn_id->id.qp_type != IB_QPT_UD)) 2005 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2006 mutex_unlock(&lock); 2007 mutex_unlock(&conn_id->handler_mutex); 2008 mutex_unlock(&listen_id->handler_mutex); 2009 cma_deref_id(conn_id); 2010 if (net_dev) 2011 dev_put(net_dev); 2012 return 0; 2013 2014 err3: 2015 cma_deref_id(conn_id); 2016 /* Destroy the CM ID by returning a non-zero value. */ 2017 conn_id->cm_id.ib = NULL; 2018 err2: 2019 cma_exch(conn_id, RDMA_CM_DESTROYING); 2020 mutex_unlock(&conn_id->handler_mutex); 2021 err1: 2022 mutex_unlock(&listen_id->handler_mutex); 2023 if (conn_id) 2024 rdma_destroy_id(&conn_id->id); 2025 2026 net_dev_put: 2027 if (net_dev) 2028 dev_put(net_dev); 2029 2030 return ret; 2031 } 2032 2033 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2034 { 2035 if (addr->sa_family == AF_IB) 2036 return ((struct sockaddr_ib *) addr)->sib_sid; 2037 2038 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2039 } 2040 EXPORT_SYMBOL(rdma_get_service_id); 2041 2042 void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, 2043 union ib_gid *dgid) 2044 { 2045 struct rdma_addr *addr = &cm_id->route.addr; 2046 2047 if (!cm_id->device) { 2048 if (sgid) 2049 memset(sgid, 0, sizeof(*sgid)); 2050 if (dgid) 2051 memset(dgid, 0, sizeof(*dgid)); 2052 return; 2053 } 2054 2055 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) { 2056 if (sgid) 2057 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid); 2058 if (dgid) 2059 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid); 2060 } else { 2061 if (sgid) 2062 rdma_addr_get_sgid(&addr->dev_addr, sgid); 2063 if (dgid) 2064 rdma_addr_get_dgid(&addr->dev_addr, dgid); 2065 } 2066 } 2067 EXPORT_SYMBOL(rdma_read_gids); 2068 2069 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2070 { 2071 struct rdma_id_private *id_priv = iw_id->context; 2072 struct rdma_cm_event event; 2073 int ret = 0; 2074 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2075 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2076 2077 mutex_lock(&id_priv->handler_mutex); 2078 if (id_priv->state != RDMA_CM_CONNECT) 2079 goto out; 2080 2081 memset(&event, 0, sizeof event); 2082 switch (iw_event->event) { 2083 case IW_CM_EVENT_CLOSE: 2084 event.event = RDMA_CM_EVENT_DISCONNECTED; 2085 break; 2086 case IW_CM_EVENT_CONNECT_REPLY: 2087 memcpy(cma_src_addr(id_priv), laddr, 2088 rdma_addr_size(laddr)); 2089 memcpy(cma_dst_addr(id_priv), raddr, 2090 rdma_addr_size(raddr)); 2091 switch (iw_event->status) { 2092 case 0: 2093 event.event = RDMA_CM_EVENT_ESTABLISHED; 2094 event.param.conn.initiator_depth = iw_event->ird; 2095 event.param.conn.responder_resources = iw_event->ord; 2096 break; 2097 case -ECONNRESET: 2098 case -ECONNREFUSED: 2099 event.event = RDMA_CM_EVENT_REJECTED; 2100 break; 2101 case -ETIMEDOUT: 2102 event.event = RDMA_CM_EVENT_UNREACHABLE; 2103 break; 2104 default: 2105 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2106 break; 2107 } 2108 break; 2109 case IW_CM_EVENT_ESTABLISHED: 2110 event.event = RDMA_CM_EVENT_ESTABLISHED; 2111 event.param.conn.initiator_depth = iw_event->ird; 2112 event.param.conn.responder_resources = iw_event->ord; 2113 break; 2114 default: 2115 BUG_ON(1); 2116 } 2117 2118 event.status = iw_event->status; 2119 event.param.conn.private_data = iw_event->private_data; 2120 event.param.conn.private_data_len = iw_event->private_data_len; 2121 ret = id_priv->id.event_handler(&id_priv->id, &event); 2122 if (ret) { 2123 /* Destroy the CM ID by returning a non-zero value. */ 2124 id_priv->cm_id.iw = NULL; 2125 cma_exch(id_priv, RDMA_CM_DESTROYING); 2126 mutex_unlock(&id_priv->handler_mutex); 2127 rdma_destroy_id(&id_priv->id); 2128 return ret; 2129 } 2130 2131 out: 2132 mutex_unlock(&id_priv->handler_mutex); 2133 return ret; 2134 } 2135 2136 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2137 struct iw_cm_event *iw_event) 2138 { 2139 struct rdma_cm_id *new_cm_id; 2140 struct rdma_id_private *listen_id, *conn_id; 2141 struct rdma_cm_event event; 2142 int ret = -ECONNABORTED; 2143 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2144 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2145 2146 listen_id = cm_id->context; 2147 2148 mutex_lock(&listen_id->handler_mutex); 2149 if (listen_id->state != RDMA_CM_LISTEN) 2150 goto out; 2151 2152 /* Create a new RDMA id for the new IW CM ID */ 2153 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2154 listen_id->id.event_handler, 2155 listen_id->id.context, 2156 RDMA_PS_TCP, IB_QPT_RC); 2157 if (IS_ERR(new_cm_id)) { 2158 ret = -ENOMEM; 2159 goto out; 2160 } 2161 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2162 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2163 conn_id->state = RDMA_CM_CONNECT; 2164 2165 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2166 if (ret) { 2167 mutex_unlock(&conn_id->handler_mutex); 2168 rdma_destroy_id(new_cm_id); 2169 goto out; 2170 } 2171 2172 ret = cma_acquire_dev(conn_id, listen_id); 2173 if (ret) { 2174 mutex_unlock(&conn_id->handler_mutex); 2175 rdma_destroy_id(new_cm_id); 2176 goto out; 2177 } 2178 2179 conn_id->cm_id.iw = cm_id; 2180 cm_id->context = conn_id; 2181 cm_id->cm_handler = cma_iw_handler; 2182 2183 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2184 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2185 2186 memset(&event, 0, sizeof event); 2187 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2188 event.param.conn.private_data = iw_event->private_data; 2189 event.param.conn.private_data_len = iw_event->private_data_len; 2190 event.param.conn.initiator_depth = iw_event->ird; 2191 event.param.conn.responder_resources = iw_event->ord; 2192 2193 /* 2194 * Protect against the user destroying conn_id from another thread 2195 * until we're done accessing it. 2196 */ 2197 atomic_inc(&conn_id->refcount); 2198 ret = conn_id->id.event_handler(&conn_id->id, &event); 2199 if (ret) { 2200 /* User wants to destroy the CM ID */ 2201 conn_id->cm_id.iw = NULL; 2202 cma_exch(conn_id, RDMA_CM_DESTROYING); 2203 mutex_unlock(&conn_id->handler_mutex); 2204 cma_deref_id(conn_id); 2205 rdma_destroy_id(&conn_id->id); 2206 goto out; 2207 } 2208 2209 mutex_unlock(&conn_id->handler_mutex); 2210 cma_deref_id(conn_id); 2211 2212 out: 2213 mutex_unlock(&listen_id->handler_mutex); 2214 return ret; 2215 } 2216 2217 static int cma_ib_listen(struct rdma_id_private *id_priv) 2218 { 2219 struct sockaddr *addr; 2220 struct ib_cm_id *id; 2221 __be64 svc_id; 2222 2223 addr = cma_src_addr(id_priv); 2224 svc_id = rdma_get_service_id(&id_priv->id, addr); 2225 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2226 if (IS_ERR(id)) 2227 return PTR_ERR(id); 2228 id_priv->cm_id.ib = id; 2229 2230 return 0; 2231 } 2232 2233 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2234 { 2235 int ret; 2236 struct iw_cm_id *id; 2237 2238 id = iw_create_cm_id(id_priv->id.device, 2239 iw_conn_req_handler, 2240 id_priv); 2241 if (IS_ERR(id)) 2242 return PTR_ERR(id); 2243 2244 id->tos = id_priv->tos; 2245 id_priv->cm_id.iw = id; 2246 2247 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2248 rdma_addr_size(cma_src_addr(id_priv))); 2249 2250 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2251 2252 if (ret) { 2253 iw_destroy_cm_id(id_priv->cm_id.iw); 2254 id_priv->cm_id.iw = NULL; 2255 } 2256 2257 return ret; 2258 } 2259 2260 static int cma_listen_handler(struct rdma_cm_id *id, 2261 struct rdma_cm_event *event) 2262 { 2263 struct rdma_id_private *id_priv = id->context; 2264 2265 id->context = id_priv->id.context; 2266 id->event_handler = id_priv->id.event_handler; 2267 return id_priv->id.event_handler(id, event); 2268 } 2269 2270 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2271 struct cma_device *cma_dev) 2272 { 2273 struct rdma_id_private *dev_id_priv; 2274 struct rdma_cm_id *id; 2275 struct net *net = id_priv->id.route.addr.dev_addr.net; 2276 int ret; 2277 2278 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2279 return; 2280 2281 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2282 id_priv->id.qp_type); 2283 if (IS_ERR(id)) 2284 return; 2285 2286 dev_id_priv = container_of(id, struct rdma_id_private, id); 2287 2288 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2289 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2290 rdma_addr_size(cma_src_addr(id_priv))); 2291 2292 _cma_attach_to_dev(dev_id_priv, cma_dev); 2293 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2294 atomic_inc(&id_priv->refcount); 2295 dev_id_priv->internal_id = 1; 2296 dev_id_priv->afonly = id_priv->afonly; 2297 2298 ret = rdma_listen(id, id_priv->backlog); 2299 if (ret) 2300 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2301 ret, cma_dev->device->name); 2302 } 2303 2304 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2305 { 2306 struct cma_device *cma_dev; 2307 2308 mutex_lock(&lock); 2309 list_add_tail(&id_priv->list, &listen_any_list); 2310 list_for_each_entry(cma_dev, &dev_list, list) 2311 cma_listen_on_dev(id_priv, cma_dev); 2312 mutex_unlock(&lock); 2313 } 2314 2315 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2316 { 2317 struct rdma_id_private *id_priv; 2318 2319 id_priv = container_of(id, struct rdma_id_private, id); 2320 id_priv->tos = (u8) tos; 2321 id_priv->tos_set = true; 2322 } 2323 EXPORT_SYMBOL(rdma_set_service_type); 2324 2325 static void cma_query_handler(int status, struct sa_path_rec *path_rec, 2326 void *context) 2327 { 2328 struct cma_work *work = context; 2329 struct rdma_route *route; 2330 2331 route = &work->id->id.route; 2332 2333 if (!status) { 2334 route->num_paths = 1; 2335 *route->path_rec = *path_rec; 2336 } else { 2337 work->old_state = RDMA_CM_ROUTE_QUERY; 2338 work->new_state = RDMA_CM_ADDR_RESOLVED; 2339 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2340 work->event.status = status; 2341 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", 2342 status); 2343 } 2344 2345 queue_work(cma_wq, &work->work); 2346 } 2347 2348 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2349 struct cma_work *work) 2350 { 2351 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2352 struct sa_path_rec path_rec; 2353 ib_sa_comp_mask comp_mask; 2354 struct sockaddr_in6 *sin6; 2355 struct sockaddr_ib *sib; 2356 2357 memset(&path_rec, 0, sizeof path_rec); 2358 2359 if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num)) 2360 path_rec.rec_type = SA_PATH_REC_TYPE_OPA; 2361 else 2362 path_rec.rec_type = SA_PATH_REC_TYPE_IB; 2363 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2364 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2365 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2366 path_rec.numb_path = 1; 2367 path_rec.reversible = 1; 2368 path_rec.service_id = rdma_get_service_id(&id_priv->id, 2369 cma_dst_addr(id_priv)); 2370 2371 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2372 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2373 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2374 2375 switch (cma_family(id_priv)) { 2376 case AF_INET: 2377 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2378 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2379 break; 2380 case AF_INET6: 2381 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2382 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2383 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2384 break; 2385 case AF_IB: 2386 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2387 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2388 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2389 break; 2390 } 2391 2392 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2393 id_priv->id.port_num, &path_rec, 2394 comp_mask, timeout_ms, 2395 GFP_KERNEL, cma_query_handler, 2396 work, &id_priv->query); 2397 2398 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2399 } 2400 2401 static void cma_work_handler(struct work_struct *_work) 2402 { 2403 struct cma_work *work = container_of(_work, struct cma_work, work); 2404 struct rdma_id_private *id_priv = work->id; 2405 int destroy = 0; 2406 2407 mutex_lock(&id_priv->handler_mutex); 2408 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2409 goto out; 2410 2411 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2412 cma_exch(id_priv, RDMA_CM_DESTROYING); 2413 destroy = 1; 2414 } 2415 out: 2416 mutex_unlock(&id_priv->handler_mutex); 2417 cma_deref_id(id_priv); 2418 if (destroy) 2419 rdma_destroy_id(&id_priv->id); 2420 kfree(work); 2421 } 2422 2423 static void cma_ndev_work_handler(struct work_struct *_work) 2424 { 2425 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 2426 struct rdma_id_private *id_priv = work->id; 2427 int destroy = 0; 2428 2429 mutex_lock(&id_priv->handler_mutex); 2430 if (id_priv->state == RDMA_CM_DESTROYING || 2431 id_priv->state == RDMA_CM_DEVICE_REMOVAL) 2432 goto out; 2433 2434 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2435 cma_exch(id_priv, RDMA_CM_DESTROYING); 2436 destroy = 1; 2437 } 2438 2439 out: 2440 mutex_unlock(&id_priv->handler_mutex); 2441 cma_deref_id(id_priv); 2442 if (destroy) 2443 rdma_destroy_id(&id_priv->id); 2444 kfree(work); 2445 } 2446 2447 static void cma_init_resolve_route_work(struct cma_work *work, 2448 struct rdma_id_private *id_priv) 2449 { 2450 work->id = id_priv; 2451 INIT_WORK(&work->work, cma_work_handler); 2452 work->old_state = RDMA_CM_ROUTE_QUERY; 2453 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2454 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2455 } 2456 2457 static void cma_init_resolve_addr_work(struct cma_work *work, 2458 struct rdma_id_private *id_priv) 2459 { 2460 work->id = id_priv; 2461 INIT_WORK(&work->work, cma_work_handler); 2462 work->old_state = RDMA_CM_ADDR_QUERY; 2463 work->new_state = RDMA_CM_ADDR_RESOLVED; 2464 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2465 } 2466 2467 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2468 { 2469 struct rdma_route *route = &id_priv->id.route; 2470 struct cma_work *work; 2471 int ret; 2472 2473 work = kzalloc(sizeof *work, GFP_KERNEL); 2474 if (!work) 2475 return -ENOMEM; 2476 2477 cma_init_resolve_route_work(work, id_priv); 2478 2479 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2480 if (!route->path_rec) { 2481 ret = -ENOMEM; 2482 goto err1; 2483 } 2484 2485 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2486 if (ret) 2487 goto err2; 2488 2489 return 0; 2490 err2: 2491 kfree(route->path_rec); 2492 route->path_rec = NULL; 2493 err1: 2494 kfree(work); 2495 return ret; 2496 } 2497 2498 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2499 unsigned long supported_gids, 2500 enum ib_gid_type default_gid) 2501 { 2502 if ((network_type == RDMA_NETWORK_IPV4 || 2503 network_type == RDMA_NETWORK_IPV6) && 2504 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2505 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2506 2507 return default_gid; 2508 } 2509 2510 /* 2511 * cma_iboe_set_path_rec_l2_fields() is helper function which sets 2512 * path record type based on GID type. 2513 * It also sets up other L2 fields which includes destination mac address 2514 * netdev ifindex, of the path record. 2515 * It returns the netdev of the bound interface for this path record entry. 2516 */ 2517 static struct net_device * 2518 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) 2519 { 2520 struct rdma_route *route = &id_priv->id.route; 2521 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; 2522 struct rdma_addr *addr = &route->addr; 2523 unsigned long supported_gids; 2524 struct net_device *ndev; 2525 2526 if (!addr->dev_addr.bound_dev_if) 2527 return NULL; 2528 2529 ndev = dev_get_by_index(addr->dev_addr.net, 2530 addr->dev_addr.bound_dev_if); 2531 if (!ndev) 2532 return NULL; 2533 2534 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2535 id_priv->id.port_num); 2536 gid_type = cma_route_gid_type(addr->dev_addr.network, 2537 supported_gids, 2538 id_priv->gid_type); 2539 /* Use the hint from IP Stack to select GID Type */ 2540 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2541 gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2542 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); 2543 2544 sa_path_set_ndev(route->path_rec, addr->dev_addr.net); 2545 sa_path_set_ifindex(route->path_rec, ndev->ifindex); 2546 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); 2547 return ndev; 2548 } 2549 2550 int rdma_set_ib_path(struct rdma_cm_id *id, 2551 struct sa_path_rec *path_rec) 2552 { 2553 struct rdma_id_private *id_priv; 2554 struct net_device *ndev; 2555 int ret; 2556 2557 id_priv = container_of(id, struct rdma_id_private, id); 2558 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2559 RDMA_CM_ROUTE_RESOLVED)) 2560 return -EINVAL; 2561 2562 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec), 2563 GFP_KERNEL); 2564 if (!id->route.path_rec) { 2565 ret = -ENOMEM; 2566 goto err; 2567 } 2568 2569 if (rdma_protocol_roce(id->device, id->port_num)) { 2570 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2571 if (!ndev) { 2572 ret = -ENODEV; 2573 goto err_free; 2574 } 2575 dev_put(ndev); 2576 } 2577 2578 id->route.num_paths = 1; 2579 return 0; 2580 2581 err_free: 2582 kfree(id->route.path_rec); 2583 id->route.path_rec = NULL; 2584 err: 2585 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2586 return ret; 2587 } 2588 EXPORT_SYMBOL(rdma_set_ib_path); 2589 2590 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2591 { 2592 struct cma_work *work; 2593 2594 work = kzalloc(sizeof *work, GFP_KERNEL); 2595 if (!work) 2596 return -ENOMEM; 2597 2598 cma_init_resolve_route_work(work, id_priv); 2599 queue_work(cma_wq, &work->work); 2600 return 0; 2601 } 2602 2603 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2604 { 2605 int prio; 2606 struct net_device *dev; 2607 2608 prio = rt_tos2priority(tos); 2609 dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; 2610 if (dev->num_tc) 2611 return netdev_get_prio_tc_map(dev, prio); 2612 2613 #if IS_ENABLED(CONFIG_VLAN_8021Q) 2614 if (is_vlan_dev(ndev)) 2615 return (vlan_dev_get_egress_qos_mask(ndev, prio) & 2616 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 2617 #endif 2618 return 0; 2619 } 2620 2621 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2622 { 2623 struct rdma_route *route = &id_priv->id.route; 2624 struct rdma_addr *addr = &route->addr; 2625 struct cma_work *work; 2626 int ret; 2627 struct net_device *ndev; 2628 2629 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 2630 rdma_start_port(id_priv->cma_dev->device)]; 2631 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 2632 2633 2634 work = kzalloc(sizeof *work, GFP_KERNEL); 2635 if (!work) 2636 return -ENOMEM; 2637 2638 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2639 if (!route->path_rec) { 2640 ret = -ENOMEM; 2641 goto err1; 2642 } 2643 2644 route->num_paths = 1; 2645 2646 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2647 if (!ndev) { 2648 ret = -ENODEV; 2649 goto err2; 2650 } 2651 2652 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2653 &route->path_rec->sgid); 2654 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2655 &route->path_rec->dgid); 2656 2657 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2658 /* TODO: get the hoplimit from the inet/inet6 device */ 2659 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2660 else 2661 route->path_rec->hop_limit = 1; 2662 route->path_rec->reversible = 1; 2663 route->path_rec->pkey = cpu_to_be16(0xffff); 2664 route->path_rec->mtu_selector = IB_SA_EQ; 2665 route->path_rec->sl = iboe_tos_to_sl(ndev, tos); 2666 route->path_rec->traffic_class = tos; 2667 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 2668 route->path_rec->rate_selector = IB_SA_EQ; 2669 route->path_rec->rate = iboe_get_rate(ndev); 2670 dev_put(ndev); 2671 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2672 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2673 if (!route->path_rec->mtu) { 2674 ret = -EINVAL; 2675 goto err2; 2676 } 2677 2678 cma_init_resolve_route_work(work, id_priv); 2679 queue_work(cma_wq, &work->work); 2680 2681 return 0; 2682 2683 err2: 2684 kfree(route->path_rec); 2685 route->path_rec = NULL; 2686 err1: 2687 kfree(work); 2688 return ret; 2689 } 2690 2691 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2692 { 2693 struct rdma_id_private *id_priv; 2694 int ret; 2695 2696 id_priv = container_of(id, struct rdma_id_private, id); 2697 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2698 return -EINVAL; 2699 2700 atomic_inc(&id_priv->refcount); 2701 if (rdma_cap_ib_sa(id->device, id->port_num)) 2702 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2703 else if (rdma_protocol_roce(id->device, id->port_num)) 2704 ret = cma_resolve_iboe_route(id_priv); 2705 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2706 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2707 else 2708 ret = -ENOSYS; 2709 2710 if (ret) 2711 goto err; 2712 2713 return 0; 2714 err: 2715 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2716 cma_deref_id(id_priv); 2717 return ret; 2718 } 2719 EXPORT_SYMBOL(rdma_resolve_route); 2720 2721 static void cma_set_loopback(struct sockaddr *addr) 2722 { 2723 switch (addr->sa_family) { 2724 case AF_INET: 2725 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2726 break; 2727 case AF_INET6: 2728 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2729 0, 0, 0, htonl(1)); 2730 break; 2731 default: 2732 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2733 0, 0, 0, htonl(1)); 2734 break; 2735 } 2736 } 2737 2738 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2739 { 2740 struct cma_device *cma_dev, *cur_dev; 2741 union ib_gid gid; 2742 enum ib_port_state port_state; 2743 u16 pkey; 2744 int ret; 2745 u8 p; 2746 2747 cma_dev = NULL; 2748 mutex_lock(&lock); 2749 list_for_each_entry(cur_dev, &dev_list, list) { 2750 if (cma_family(id_priv) == AF_IB && 2751 !rdma_cap_ib_cm(cur_dev->device, 1)) 2752 continue; 2753 2754 if (!cma_dev) 2755 cma_dev = cur_dev; 2756 2757 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2758 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && 2759 port_state == IB_PORT_ACTIVE) { 2760 cma_dev = cur_dev; 2761 goto port_found; 2762 } 2763 } 2764 } 2765 2766 if (!cma_dev) { 2767 ret = -ENODEV; 2768 goto out; 2769 } 2770 2771 p = 1; 2772 2773 port_found: 2774 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2775 if (ret) 2776 goto out; 2777 2778 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2779 if (ret) 2780 goto out; 2781 2782 id_priv->id.route.addr.dev_addr.dev_type = 2783 (rdma_protocol_ib(cma_dev->device, p)) ? 2784 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2785 2786 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2787 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2788 id_priv->id.port_num = p; 2789 cma_attach_to_dev(id_priv, cma_dev); 2790 cma_set_loopback(cma_src_addr(id_priv)); 2791 out: 2792 mutex_unlock(&lock); 2793 return ret; 2794 } 2795 2796 static void addr_handler(int status, struct sockaddr *src_addr, 2797 struct rdma_dev_addr *dev_addr, void *context) 2798 { 2799 struct rdma_id_private *id_priv = context; 2800 struct rdma_cm_event event; 2801 2802 memset(&event, 0, sizeof event); 2803 mutex_lock(&id_priv->handler_mutex); 2804 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2805 RDMA_CM_ADDR_RESOLVED)) 2806 goto out; 2807 2808 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2809 if (!status && !id_priv->cma_dev) { 2810 status = cma_acquire_dev(id_priv, NULL); 2811 if (status) 2812 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2813 status); 2814 } else { 2815 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); 2816 } 2817 2818 if (status) { 2819 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2820 RDMA_CM_ADDR_BOUND)) 2821 goto out; 2822 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2823 event.status = status; 2824 } else 2825 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2826 2827 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2828 cma_exch(id_priv, RDMA_CM_DESTROYING); 2829 mutex_unlock(&id_priv->handler_mutex); 2830 cma_deref_id(id_priv); 2831 rdma_destroy_id(&id_priv->id); 2832 return; 2833 } 2834 out: 2835 mutex_unlock(&id_priv->handler_mutex); 2836 cma_deref_id(id_priv); 2837 } 2838 2839 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2840 { 2841 struct cma_work *work; 2842 union ib_gid gid; 2843 int ret; 2844 2845 work = kzalloc(sizeof *work, GFP_KERNEL); 2846 if (!work) 2847 return -ENOMEM; 2848 2849 if (!id_priv->cma_dev) { 2850 ret = cma_bind_loopback(id_priv); 2851 if (ret) 2852 goto err; 2853 } 2854 2855 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2856 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2857 2858 cma_init_resolve_addr_work(work, id_priv); 2859 queue_work(cma_wq, &work->work); 2860 return 0; 2861 err: 2862 kfree(work); 2863 return ret; 2864 } 2865 2866 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2867 { 2868 struct cma_work *work; 2869 int ret; 2870 2871 work = kzalloc(sizeof *work, GFP_KERNEL); 2872 if (!work) 2873 return -ENOMEM; 2874 2875 if (!id_priv->cma_dev) { 2876 ret = cma_resolve_ib_dev(id_priv); 2877 if (ret) 2878 goto err; 2879 } 2880 2881 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2882 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2883 2884 cma_init_resolve_addr_work(work, id_priv); 2885 queue_work(cma_wq, &work->work); 2886 return 0; 2887 err: 2888 kfree(work); 2889 return ret; 2890 } 2891 2892 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2893 struct sockaddr *dst_addr) 2894 { 2895 if (!src_addr || !src_addr->sa_family) { 2896 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2897 src_addr->sa_family = dst_addr->sa_family; 2898 if (IS_ENABLED(CONFIG_IPV6) && 2899 dst_addr->sa_family == AF_INET6) { 2900 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2901 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2902 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2903 if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 2904 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2905 } else if (dst_addr->sa_family == AF_IB) { 2906 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2907 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2908 } 2909 } 2910 return rdma_bind_addr(id, src_addr); 2911 } 2912 2913 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2914 struct sockaddr *dst_addr, int timeout_ms) 2915 { 2916 struct rdma_id_private *id_priv; 2917 int ret; 2918 2919 id_priv = container_of(id, struct rdma_id_private, id); 2920 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2921 if (id_priv->state == RDMA_CM_IDLE) { 2922 ret = cma_bind_addr(id, src_addr, dst_addr); 2923 if (ret) { 2924 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2925 return ret; 2926 } 2927 } 2928 2929 if (cma_family(id_priv) != dst_addr->sa_family) { 2930 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2931 return -EINVAL; 2932 } 2933 2934 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 2935 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2936 return -EINVAL; 2937 } 2938 2939 atomic_inc(&id_priv->refcount); 2940 if (cma_any_addr(dst_addr)) { 2941 ret = cma_resolve_loopback(id_priv); 2942 } else { 2943 if (dst_addr->sa_family == AF_IB) { 2944 ret = cma_resolve_ib_addr(id_priv); 2945 } else { 2946 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2947 dst_addr, &id->route.addr.dev_addr, 2948 timeout_ms, addr_handler, id_priv); 2949 } 2950 } 2951 if (ret) 2952 goto err; 2953 2954 return 0; 2955 err: 2956 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2957 cma_deref_id(id_priv); 2958 return ret; 2959 } 2960 EXPORT_SYMBOL(rdma_resolve_addr); 2961 2962 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2963 { 2964 struct rdma_id_private *id_priv; 2965 unsigned long flags; 2966 int ret; 2967 2968 id_priv = container_of(id, struct rdma_id_private, id); 2969 spin_lock_irqsave(&id_priv->lock, flags); 2970 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2971 id_priv->reuseaddr = reuse; 2972 ret = 0; 2973 } else { 2974 ret = -EINVAL; 2975 } 2976 spin_unlock_irqrestore(&id_priv->lock, flags); 2977 return ret; 2978 } 2979 EXPORT_SYMBOL(rdma_set_reuseaddr); 2980 2981 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2982 { 2983 struct rdma_id_private *id_priv; 2984 unsigned long flags; 2985 int ret; 2986 2987 id_priv = container_of(id, struct rdma_id_private, id); 2988 spin_lock_irqsave(&id_priv->lock, flags); 2989 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2990 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2991 id_priv->afonly = afonly; 2992 ret = 0; 2993 } else { 2994 ret = -EINVAL; 2995 } 2996 spin_unlock_irqrestore(&id_priv->lock, flags); 2997 return ret; 2998 } 2999 EXPORT_SYMBOL(rdma_set_afonly); 3000 3001 static void cma_bind_port(struct rdma_bind_list *bind_list, 3002 struct rdma_id_private *id_priv) 3003 { 3004 struct sockaddr *addr; 3005 struct sockaddr_ib *sib; 3006 u64 sid, mask; 3007 __be16 port; 3008 3009 addr = cma_src_addr(id_priv); 3010 port = htons(bind_list->port); 3011 3012 switch (addr->sa_family) { 3013 case AF_INET: 3014 ((struct sockaddr_in *) addr)->sin_port = port; 3015 break; 3016 case AF_INET6: 3017 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3018 break; 3019 case AF_IB: 3020 sib = (struct sockaddr_ib *) addr; 3021 sid = be64_to_cpu(sib->sib_sid); 3022 mask = be64_to_cpu(sib->sib_sid_mask); 3023 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3024 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3025 break; 3026 } 3027 id_priv->bind_list = bind_list; 3028 hlist_add_head(&id_priv->node, &bind_list->owners); 3029 } 3030 3031 static int cma_alloc_port(enum rdma_port_space ps, 3032 struct rdma_id_private *id_priv, unsigned short snum) 3033 { 3034 struct rdma_bind_list *bind_list; 3035 int ret; 3036 3037 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3038 if (!bind_list) 3039 return -ENOMEM; 3040 3041 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3042 snum); 3043 if (ret < 0) 3044 goto err; 3045 3046 bind_list->ps = ps; 3047 bind_list->port = (unsigned short)ret; 3048 cma_bind_port(bind_list, id_priv); 3049 return 0; 3050 err: 3051 kfree(bind_list); 3052 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3053 } 3054 3055 static int cma_port_is_unique(struct rdma_bind_list *bind_list, 3056 struct rdma_id_private *id_priv) 3057 { 3058 struct rdma_id_private *cur_id; 3059 struct sockaddr *daddr = cma_dst_addr(id_priv); 3060 struct sockaddr *saddr = cma_src_addr(id_priv); 3061 __be16 dport = cma_port(daddr); 3062 3063 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3064 struct sockaddr *cur_daddr = cma_dst_addr(cur_id); 3065 struct sockaddr *cur_saddr = cma_src_addr(cur_id); 3066 __be16 cur_dport = cma_port(cur_daddr); 3067 3068 if (id_priv == cur_id) 3069 continue; 3070 3071 /* different dest port -> unique */ 3072 if (!cma_any_port(cur_daddr) && 3073 (dport != cur_dport)) 3074 continue; 3075 3076 /* different src address -> unique */ 3077 if (!cma_any_addr(saddr) && 3078 !cma_any_addr(cur_saddr) && 3079 cma_addr_cmp(saddr, cur_saddr)) 3080 continue; 3081 3082 /* different dst address -> unique */ 3083 if (!cma_any_addr(cur_daddr) && 3084 cma_addr_cmp(daddr, cur_daddr)) 3085 continue; 3086 3087 return -EADDRNOTAVAIL; 3088 } 3089 return 0; 3090 } 3091 3092 static int cma_alloc_any_port(enum rdma_port_space ps, 3093 struct rdma_id_private *id_priv) 3094 { 3095 static unsigned int last_used_port; 3096 int low, high, remaining; 3097 unsigned int rover; 3098 struct net *net = id_priv->id.route.addr.dev_addr.net; 3099 3100 inet_get_local_port_range(net, &low, &high); 3101 remaining = (high - low) + 1; 3102 rover = prandom_u32() % remaining + low; 3103 retry: 3104 if (last_used_port != rover) { 3105 struct rdma_bind_list *bind_list; 3106 int ret; 3107 3108 bind_list = cma_ps_find(net, ps, (unsigned short)rover); 3109 3110 if (!bind_list) { 3111 ret = cma_alloc_port(ps, id_priv, rover); 3112 } else { 3113 ret = cma_port_is_unique(bind_list, id_priv); 3114 if (!ret) 3115 cma_bind_port(bind_list, id_priv); 3116 } 3117 /* 3118 * Remember previously used port number in order to avoid 3119 * re-using same port immediately after it is closed. 3120 */ 3121 if (!ret) 3122 last_used_port = rover; 3123 if (ret != -EADDRNOTAVAIL) 3124 return ret; 3125 } 3126 if (--remaining) { 3127 rover++; 3128 if ((rover < low) || (rover > high)) 3129 rover = low; 3130 goto retry; 3131 } 3132 return -EADDRNOTAVAIL; 3133 } 3134 3135 /* 3136 * Check that the requested port is available. This is called when trying to 3137 * bind to a specific port, or when trying to listen on a bound port. In 3138 * the latter case, the provided id_priv may already be on the bind_list, but 3139 * we still need to check that it's okay to start listening. 3140 */ 3141 static int cma_check_port(struct rdma_bind_list *bind_list, 3142 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3143 { 3144 struct rdma_id_private *cur_id; 3145 struct sockaddr *addr, *cur_addr; 3146 3147 addr = cma_src_addr(id_priv); 3148 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3149 if (id_priv == cur_id) 3150 continue; 3151 3152 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3153 cur_id->reuseaddr) 3154 continue; 3155 3156 cur_addr = cma_src_addr(cur_id); 3157 if (id_priv->afonly && cur_id->afonly && 3158 (addr->sa_family != cur_addr->sa_family)) 3159 continue; 3160 3161 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3162 return -EADDRNOTAVAIL; 3163 3164 if (!cma_addr_cmp(addr, cur_addr)) 3165 return -EADDRINUSE; 3166 } 3167 return 0; 3168 } 3169 3170 static int cma_use_port(enum rdma_port_space ps, 3171 struct rdma_id_private *id_priv) 3172 { 3173 struct rdma_bind_list *bind_list; 3174 unsigned short snum; 3175 int ret; 3176 3177 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3178 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 3179 return -EACCES; 3180 3181 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3182 if (!bind_list) { 3183 ret = cma_alloc_port(ps, id_priv, snum); 3184 } else { 3185 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3186 if (!ret) 3187 cma_bind_port(bind_list, id_priv); 3188 } 3189 return ret; 3190 } 3191 3192 static int cma_bind_listen(struct rdma_id_private *id_priv) 3193 { 3194 struct rdma_bind_list *bind_list = id_priv->bind_list; 3195 int ret = 0; 3196 3197 mutex_lock(&lock); 3198 if (bind_list->owners.first->next) 3199 ret = cma_check_port(bind_list, id_priv, 0); 3200 mutex_unlock(&lock); 3201 return ret; 3202 } 3203 3204 static enum rdma_port_space cma_select_inet_ps( 3205 struct rdma_id_private *id_priv) 3206 { 3207 switch (id_priv->id.ps) { 3208 case RDMA_PS_TCP: 3209 case RDMA_PS_UDP: 3210 case RDMA_PS_IPOIB: 3211 case RDMA_PS_IB: 3212 return id_priv->id.ps; 3213 default: 3214 3215 return 0; 3216 } 3217 } 3218 3219 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3220 { 3221 enum rdma_port_space ps = 0; 3222 struct sockaddr_ib *sib; 3223 u64 sid_ps, mask, sid; 3224 3225 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3226 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3227 sid = be64_to_cpu(sib->sib_sid) & mask; 3228 3229 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3230 sid_ps = RDMA_IB_IP_PS_IB; 3231 ps = RDMA_PS_IB; 3232 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3233 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3234 sid_ps = RDMA_IB_IP_PS_TCP; 3235 ps = RDMA_PS_TCP; 3236 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3237 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3238 sid_ps = RDMA_IB_IP_PS_UDP; 3239 ps = RDMA_PS_UDP; 3240 } 3241 3242 if (ps) { 3243 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3244 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3245 be64_to_cpu(sib->sib_sid_mask)); 3246 } 3247 return ps; 3248 } 3249 3250 static int cma_get_port(struct rdma_id_private *id_priv) 3251 { 3252 enum rdma_port_space ps; 3253 int ret; 3254 3255 if (cma_family(id_priv) != AF_IB) 3256 ps = cma_select_inet_ps(id_priv); 3257 else 3258 ps = cma_select_ib_ps(id_priv); 3259 if (!ps) 3260 return -EPROTONOSUPPORT; 3261 3262 mutex_lock(&lock); 3263 if (cma_any_port(cma_src_addr(id_priv))) 3264 ret = cma_alloc_any_port(ps, id_priv); 3265 else 3266 ret = cma_use_port(ps, id_priv); 3267 mutex_unlock(&lock); 3268 3269 return ret; 3270 } 3271 3272 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3273 struct sockaddr *addr) 3274 { 3275 #if IS_ENABLED(CONFIG_IPV6) 3276 struct sockaddr_in6 *sin6; 3277 3278 if (addr->sa_family != AF_INET6) 3279 return 0; 3280 3281 sin6 = (struct sockaddr_in6 *) addr; 3282 3283 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 3284 return 0; 3285 3286 if (!sin6->sin6_scope_id) 3287 return -EINVAL; 3288 3289 dev_addr->bound_dev_if = sin6->sin6_scope_id; 3290 #endif 3291 return 0; 3292 } 3293 3294 int rdma_listen(struct rdma_cm_id *id, int backlog) 3295 { 3296 struct rdma_id_private *id_priv; 3297 int ret; 3298 3299 id_priv = container_of(id, struct rdma_id_private, id); 3300 if (id_priv->state == RDMA_CM_IDLE) { 3301 id->route.addr.src_addr.ss_family = AF_INET; 3302 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3303 if (ret) 3304 return ret; 3305 } 3306 3307 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3308 return -EINVAL; 3309 3310 if (id_priv->reuseaddr) { 3311 ret = cma_bind_listen(id_priv); 3312 if (ret) 3313 goto err; 3314 } 3315 3316 id_priv->backlog = backlog; 3317 if (id->device) { 3318 if (rdma_cap_ib_cm(id->device, 1)) { 3319 ret = cma_ib_listen(id_priv); 3320 if (ret) 3321 goto err; 3322 } else if (rdma_cap_iw_cm(id->device, 1)) { 3323 ret = cma_iw_listen(id_priv, backlog); 3324 if (ret) 3325 goto err; 3326 } else { 3327 ret = -ENOSYS; 3328 goto err; 3329 } 3330 } else 3331 cma_listen_on_all(id_priv); 3332 3333 return 0; 3334 err: 3335 id_priv->backlog = 0; 3336 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3337 return ret; 3338 } 3339 EXPORT_SYMBOL(rdma_listen); 3340 3341 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3342 { 3343 struct rdma_id_private *id_priv; 3344 int ret; 3345 struct sockaddr *daddr; 3346 3347 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3348 addr->sa_family != AF_IB) 3349 return -EAFNOSUPPORT; 3350 3351 id_priv = container_of(id, struct rdma_id_private, id); 3352 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3353 return -EINVAL; 3354 3355 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3356 if (ret) 3357 goto err1; 3358 3359 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3360 if (!cma_any_addr(addr)) { 3361 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3362 if (ret) 3363 goto err1; 3364 3365 ret = cma_acquire_dev(id_priv, NULL); 3366 if (ret) 3367 goto err1; 3368 } 3369 3370 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3371 if (addr->sa_family == AF_INET) 3372 id_priv->afonly = 1; 3373 #if IS_ENABLED(CONFIG_IPV6) 3374 else if (addr->sa_family == AF_INET6) { 3375 struct net *net = id_priv->id.route.addr.dev_addr.net; 3376 3377 id_priv->afonly = net->ipv6.sysctl.bindv6only; 3378 } 3379 #endif 3380 } 3381 ret = cma_get_port(id_priv); 3382 if (ret) 3383 goto err2; 3384 3385 daddr = cma_dst_addr(id_priv); 3386 daddr->sa_family = addr->sa_family; 3387 3388 return 0; 3389 err2: 3390 if (id_priv->cma_dev) 3391 cma_release_dev(id_priv); 3392 err1: 3393 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3394 return ret; 3395 } 3396 EXPORT_SYMBOL(rdma_bind_addr); 3397 3398 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3399 { 3400 struct cma_hdr *cma_hdr; 3401 3402 cma_hdr = hdr; 3403 cma_hdr->cma_version = CMA_VERSION; 3404 if (cma_family(id_priv) == AF_INET) { 3405 struct sockaddr_in *src4, *dst4; 3406 3407 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3408 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3409 3410 cma_set_ip_ver(cma_hdr, 4); 3411 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3412 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3413 cma_hdr->port = src4->sin_port; 3414 } else if (cma_family(id_priv) == AF_INET6) { 3415 struct sockaddr_in6 *src6, *dst6; 3416 3417 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3418 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3419 3420 cma_set_ip_ver(cma_hdr, 6); 3421 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3422 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3423 cma_hdr->port = src6->sin6_port; 3424 } 3425 return 0; 3426 } 3427 3428 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3429 struct ib_cm_event *ib_event) 3430 { 3431 struct rdma_id_private *id_priv = cm_id->context; 3432 struct rdma_cm_event event; 3433 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3434 int ret = 0; 3435 3436 mutex_lock(&id_priv->handler_mutex); 3437 if (id_priv->state != RDMA_CM_CONNECT) 3438 goto out; 3439 3440 memset(&event, 0, sizeof event); 3441 switch (ib_event->event) { 3442 case IB_CM_SIDR_REQ_ERROR: 3443 event.event = RDMA_CM_EVENT_UNREACHABLE; 3444 event.status = -ETIMEDOUT; 3445 break; 3446 case IB_CM_SIDR_REP_RECEIVED: 3447 event.param.ud.private_data = ib_event->private_data; 3448 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3449 if (rep->status != IB_SIDR_SUCCESS) { 3450 event.event = RDMA_CM_EVENT_UNREACHABLE; 3451 event.status = ib_event->param.sidr_rep_rcvd.status; 3452 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", 3453 event.status); 3454 break; 3455 } 3456 ret = cma_set_qkey(id_priv, rep->qkey); 3457 if (ret) { 3458 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); 3459 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3460 event.status = ret; 3461 break; 3462 } 3463 ib_init_ah_attr_from_path(id_priv->id.device, 3464 id_priv->id.port_num, 3465 id_priv->id.route.path_rec, 3466 &event.param.ud.ah_attr); 3467 event.param.ud.qp_num = rep->qpn; 3468 event.param.ud.qkey = rep->qkey; 3469 event.event = RDMA_CM_EVENT_ESTABLISHED; 3470 event.status = 0; 3471 break; 3472 default: 3473 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3474 ib_event->event); 3475 goto out; 3476 } 3477 3478 ret = id_priv->id.event_handler(&id_priv->id, &event); 3479 if (ret) { 3480 /* Destroy the CM ID by returning a non-zero value. */ 3481 id_priv->cm_id.ib = NULL; 3482 cma_exch(id_priv, RDMA_CM_DESTROYING); 3483 mutex_unlock(&id_priv->handler_mutex); 3484 rdma_destroy_id(&id_priv->id); 3485 return ret; 3486 } 3487 out: 3488 mutex_unlock(&id_priv->handler_mutex); 3489 return ret; 3490 } 3491 3492 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3493 struct rdma_conn_param *conn_param) 3494 { 3495 struct ib_cm_sidr_req_param req; 3496 struct ib_cm_id *id; 3497 void *private_data; 3498 u8 offset; 3499 int ret; 3500 3501 memset(&req, 0, sizeof req); 3502 offset = cma_user_data_offset(id_priv); 3503 req.private_data_len = offset + conn_param->private_data_len; 3504 if (req.private_data_len < conn_param->private_data_len) 3505 return -EINVAL; 3506 3507 if (req.private_data_len) { 3508 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3509 if (!private_data) 3510 return -ENOMEM; 3511 } else { 3512 private_data = NULL; 3513 } 3514 3515 if (conn_param->private_data && conn_param->private_data_len) 3516 memcpy(private_data + offset, conn_param->private_data, 3517 conn_param->private_data_len); 3518 3519 if (private_data) { 3520 ret = cma_format_hdr(private_data, id_priv); 3521 if (ret) 3522 goto out; 3523 req.private_data = private_data; 3524 } 3525 3526 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3527 id_priv); 3528 if (IS_ERR(id)) { 3529 ret = PTR_ERR(id); 3530 goto out; 3531 } 3532 id_priv->cm_id.ib = id; 3533 3534 req.path = id_priv->id.route.path_rec; 3535 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3536 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3537 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3538 3539 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3540 if (ret) { 3541 ib_destroy_cm_id(id_priv->cm_id.ib); 3542 id_priv->cm_id.ib = NULL; 3543 } 3544 out: 3545 kfree(private_data); 3546 return ret; 3547 } 3548 3549 static int cma_connect_ib(struct rdma_id_private *id_priv, 3550 struct rdma_conn_param *conn_param) 3551 { 3552 struct ib_cm_req_param req; 3553 struct rdma_route *route; 3554 void *private_data; 3555 struct ib_cm_id *id; 3556 u8 offset; 3557 int ret; 3558 3559 memset(&req, 0, sizeof req); 3560 offset = cma_user_data_offset(id_priv); 3561 req.private_data_len = offset + conn_param->private_data_len; 3562 if (req.private_data_len < conn_param->private_data_len) 3563 return -EINVAL; 3564 3565 if (req.private_data_len) { 3566 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3567 if (!private_data) 3568 return -ENOMEM; 3569 } else { 3570 private_data = NULL; 3571 } 3572 3573 if (conn_param->private_data && conn_param->private_data_len) 3574 memcpy(private_data + offset, conn_param->private_data, 3575 conn_param->private_data_len); 3576 3577 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3578 if (IS_ERR(id)) { 3579 ret = PTR_ERR(id); 3580 goto out; 3581 } 3582 id_priv->cm_id.ib = id; 3583 3584 route = &id_priv->id.route; 3585 if (private_data) { 3586 ret = cma_format_hdr(private_data, id_priv); 3587 if (ret) 3588 goto out; 3589 req.private_data = private_data; 3590 } 3591 3592 req.primary_path = &route->path_rec[0]; 3593 if (route->num_paths == 2) 3594 req.alternate_path = &route->path_rec[1]; 3595 3596 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3597 req.qp_num = id_priv->qp_num; 3598 req.qp_type = id_priv->id.qp_type; 3599 req.starting_psn = id_priv->seq_num; 3600 req.responder_resources = conn_param->responder_resources; 3601 req.initiator_depth = conn_param->initiator_depth; 3602 req.flow_control = conn_param->flow_control; 3603 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3604 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3605 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3606 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3607 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3608 req.srq = id_priv->srq ? 1 : 0; 3609 3610 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3611 out: 3612 if (ret && !IS_ERR(id)) { 3613 ib_destroy_cm_id(id); 3614 id_priv->cm_id.ib = NULL; 3615 } 3616 3617 kfree(private_data); 3618 return ret; 3619 } 3620 3621 static int cma_connect_iw(struct rdma_id_private *id_priv, 3622 struct rdma_conn_param *conn_param) 3623 { 3624 struct iw_cm_id *cm_id; 3625 int ret; 3626 struct iw_cm_conn_param iw_param; 3627 3628 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3629 if (IS_ERR(cm_id)) 3630 return PTR_ERR(cm_id); 3631 3632 cm_id->tos = id_priv->tos; 3633 id_priv->cm_id.iw = cm_id; 3634 3635 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3636 rdma_addr_size(cma_src_addr(id_priv))); 3637 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3638 rdma_addr_size(cma_dst_addr(id_priv))); 3639 3640 ret = cma_modify_qp_rtr(id_priv, conn_param); 3641 if (ret) 3642 goto out; 3643 3644 if (conn_param) { 3645 iw_param.ord = conn_param->initiator_depth; 3646 iw_param.ird = conn_param->responder_resources; 3647 iw_param.private_data = conn_param->private_data; 3648 iw_param.private_data_len = conn_param->private_data_len; 3649 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3650 } else { 3651 memset(&iw_param, 0, sizeof iw_param); 3652 iw_param.qpn = id_priv->qp_num; 3653 } 3654 ret = iw_cm_connect(cm_id, &iw_param); 3655 out: 3656 if (ret) { 3657 iw_destroy_cm_id(cm_id); 3658 id_priv->cm_id.iw = NULL; 3659 } 3660 return ret; 3661 } 3662 3663 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3664 { 3665 struct rdma_id_private *id_priv; 3666 int ret; 3667 3668 id_priv = container_of(id, struct rdma_id_private, id); 3669 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3670 return -EINVAL; 3671 3672 if (!id->qp) { 3673 id_priv->qp_num = conn_param->qp_num; 3674 id_priv->srq = conn_param->srq; 3675 } 3676 3677 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3678 if (id->qp_type == IB_QPT_UD) 3679 ret = cma_resolve_ib_udp(id_priv, conn_param); 3680 else 3681 ret = cma_connect_ib(id_priv, conn_param); 3682 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3683 ret = cma_connect_iw(id_priv, conn_param); 3684 else 3685 ret = -ENOSYS; 3686 if (ret) 3687 goto err; 3688 3689 return 0; 3690 err: 3691 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3692 return ret; 3693 } 3694 EXPORT_SYMBOL(rdma_connect); 3695 3696 static int cma_accept_ib(struct rdma_id_private *id_priv, 3697 struct rdma_conn_param *conn_param) 3698 { 3699 struct ib_cm_rep_param rep; 3700 int ret; 3701 3702 ret = cma_modify_qp_rtr(id_priv, conn_param); 3703 if (ret) 3704 goto out; 3705 3706 ret = cma_modify_qp_rts(id_priv, conn_param); 3707 if (ret) 3708 goto out; 3709 3710 memset(&rep, 0, sizeof rep); 3711 rep.qp_num = id_priv->qp_num; 3712 rep.starting_psn = id_priv->seq_num; 3713 rep.private_data = conn_param->private_data; 3714 rep.private_data_len = conn_param->private_data_len; 3715 rep.responder_resources = conn_param->responder_resources; 3716 rep.initiator_depth = conn_param->initiator_depth; 3717 rep.failover_accepted = 0; 3718 rep.flow_control = conn_param->flow_control; 3719 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3720 rep.srq = id_priv->srq ? 1 : 0; 3721 3722 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3723 out: 3724 return ret; 3725 } 3726 3727 static int cma_accept_iw(struct rdma_id_private *id_priv, 3728 struct rdma_conn_param *conn_param) 3729 { 3730 struct iw_cm_conn_param iw_param; 3731 int ret; 3732 3733 if (!conn_param) 3734 return -EINVAL; 3735 3736 ret = cma_modify_qp_rtr(id_priv, conn_param); 3737 if (ret) 3738 return ret; 3739 3740 iw_param.ord = conn_param->initiator_depth; 3741 iw_param.ird = conn_param->responder_resources; 3742 iw_param.private_data = conn_param->private_data; 3743 iw_param.private_data_len = conn_param->private_data_len; 3744 if (id_priv->id.qp) { 3745 iw_param.qpn = id_priv->qp_num; 3746 } else 3747 iw_param.qpn = conn_param->qp_num; 3748 3749 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3750 } 3751 3752 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3753 enum ib_cm_sidr_status status, u32 qkey, 3754 const void *private_data, int private_data_len) 3755 { 3756 struct ib_cm_sidr_rep_param rep; 3757 int ret; 3758 3759 memset(&rep, 0, sizeof rep); 3760 rep.status = status; 3761 if (status == IB_SIDR_SUCCESS) { 3762 ret = cma_set_qkey(id_priv, qkey); 3763 if (ret) 3764 return ret; 3765 rep.qp_num = id_priv->qp_num; 3766 rep.qkey = id_priv->qkey; 3767 } 3768 rep.private_data = private_data; 3769 rep.private_data_len = private_data_len; 3770 3771 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3772 } 3773 3774 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3775 { 3776 struct rdma_id_private *id_priv; 3777 int ret; 3778 3779 id_priv = container_of(id, struct rdma_id_private, id); 3780 3781 id_priv->owner = task_pid_nr(current); 3782 3783 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3784 return -EINVAL; 3785 3786 if (!id->qp && conn_param) { 3787 id_priv->qp_num = conn_param->qp_num; 3788 id_priv->srq = conn_param->srq; 3789 } 3790 3791 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3792 if (id->qp_type == IB_QPT_UD) { 3793 if (conn_param) 3794 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3795 conn_param->qkey, 3796 conn_param->private_data, 3797 conn_param->private_data_len); 3798 else 3799 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3800 0, NULL, 0); 3801 } else { 3802 if (conn_param) 3803 ret = cma_accept_ib(id_priv, conn_param); 3804 else 3805 ret = cma_rep_recv(id_priv); 3806 } 3807 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3808 ret = cma_accept_iw(id_priv, conn_param); 3809 else 3810 ret = -ENOSYS; 3811 3812 if (ret) 3813 goto reject; 3814 3815 return 0; 3816 reject: 3817 cma_modify_qp_err(id_priv); 3818 rdma_reject(id, NULL, 0); 3819 return ret; 3820 } 3821 EXPORT_SYMBOL(rdma_accept); 3822 3823 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3824 { 3825 struct rdma_id_private *id_priv; 3826 int ret; 3827 3828 id_priv = container_of(id, struct rdma_id_private, id); 3829 if (!id_priv->cm_id.ib) 3830 return -EINVAL; 3831 3832 switch (id->device->node_type) { 3833 case RDMA_NODE_IB_CA: 3834 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3835 break; 3836 default: 3837 ret = 0; 3838 break; 3839 } 3840 return ret; 3841 } 3842 EXPORT_SYMBOL(rdma_notify); 3843 3844 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3845 u8 private_data_len) 3846 { 3847 struct rdma_id_private *id_priv; 3848 int ret; 3849 3850 id_priv = container_of(id, struct rdma_id_private, id); 3851 if (!id_priv->cm_id.ib) 3852 return -EINVAL; 3853 3854 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3855 if (id->qp_type == IB_QPT_UD) 3856 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3857 private_data, private_data_len); 3858 else 3859 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3860 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3861 0, private_data, private_data_len); 3862 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3863 ret = iw_cm_reject(id_priv->cm_id.iw, 3864 private_data, private_data_len); 3865 } else 3866 ret = -ENOSYS; 3867 3868 return ret; 3869 } 3870 EXPORT_SYMBOL(rdma_reject); 3871 3872 int rdma_disconnect(struct rdma_cm_id *id) 3873 { 3874 struct rdma_id_private *id_priv; 3875 int ret; 3876 3877 id_priv = container_of(id, struct rdma_id_private, id); 3878 if (!id_priv->cm_id.ib) 3879 return -EINVAL; 3880 3881 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3882 ret = cma_modify_qp_err(id_priv); 3883 if (ret) 3884 goto out; 3885 /* Initiate or respond to a disconnect. */ 3886 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3887 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3888 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3889 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3890 } else 3891 ret = -EINVAL; 3892 3893 out: 3894 return ret; 3895 } 3896 EXPORT_SYMBOL(rdma_disconnect); 3897 3898 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3899 { 3900 struct rdma_id_private *id_priv; 3901 struct cma_multicast *mc = multicast->context; 3902 struct rdma_cm_event event; 3903 int ret = 0; 3904 3905 id_priv = mc->id_priv; 3906 mutex_lock(&id_priv->handler_mutex); 3907 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3908 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3909 goto out; 3910 3911 if (!status) 3912 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3913 else 3914 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", 3915 status); 3916 mutex_lock(&id_priv->qp_mutex); 3917 if (!status && id_priv->id.qp) { 3918 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3919 be16_to_cpu(multicast->rec.mlid)); 3920 if (status) 3921 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", 3922 status); 3923 } 3924 mutex_unlock(&id_priv->qp_mutex); 3925 3926 memset(&event, 0, sizeof event); 3927 event.status = status; 3928 event.param.ud.private_data = mc->context; 3929 if (!status) { 3930 struct rdma_dev_addr *dev_addr = 3931 &id_priv->id.route.addr.dev_addr; 3932 struct net_device *ndev = 3933 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3934 enum ib_gid_type gid_type = 3935 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3936 rdma_start_port(id_priv->cma_dev->device)]; 3937 3938 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3939 ib_init_ah_from_mcmember(id_priv->id.device, 3940 id_priv->id.port_num, &multicast->rec, 3941 ndev, gid_type, 3942 &event.param.ud.ah_attr); 3943 event.param.ud.qp_num = 0xFFFFFF; 3944 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3945 if (ndev) 3946 dev_put(ndev); 3947 } else 3948 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3949 3950 ret = id_priv->id.event_handler(&id_priv->id, &event); 3951 if (ret) { 3952 cma_exch(id_priv, RDMA_CM_DESTROYING); 3953 mutex_unlock(&id_priv->handler_mutex); 3954 rdma_destroy_id(&id_priv->id); 3955 return 0; 3956 } 3957 3958 out: 3959 mutex_unlock(&id_priv->handler_mutex); 3960 return 0; 3961 } 3962 3963 static void cma_set_mgid(struct rdma_id_private *id_priv, 3964 struct sockaddr *addr, union ib_gid *mgid) 3965 { 3966 unsigned char mc_map[MAX_ADDR_LEN]; 3967 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3968 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3969 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3970 3971 if (cma_any_addr(addr)) { 3972 memset(mgid, 0, sizeof *mgid); 3973 } else if ((addr->sa_family == AF_INET6) && 3974 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3975 0xFF10A01B)) { 3976 /* IPv6 address is an SA assigned MGID. */ 3977 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3978 } else if (addr->sa_family == AF_IB) { 3979 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3980 } else if ((addr->sa_family == AF_INET6)) { 3981 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3982 if (id_priv->id.ps == RDMA_PS_UDP) 3983 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3984 *mgid = *(union ib_gid *) (mc_map + 4); 3985 } else { 3986 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3987 if (id_priv->id.ps == RDMA_PS_UDP) 3988 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3989 *mgid = *(union ib_gid *) (mc_map + 4); 3990 } 3991 } 3992 3993 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3994 struct cma_multicast *mc) 3995 { 3996 struct ib_sa_mcmember_rec rec; 3997 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3998 ib_sa_comp_mask comp_mask; 3999 int ret; 4000 4001 ib_addr_get_mgid(dev_addr, &rec.mgid); 4002 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4003 &rec.mgid, &rec); 4004 if (ret) 4005 return ret; 4006 4007 ret = cma_set_qkey(id_priv, 0); 4008 if (ret) 4009 return ret; 4010 4011 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4012 rec.qkey = cpu_to_be32(id_priv->qkey); 4013 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4014 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4015 rec.join_state = mc->join_state; 4016 4017 if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && 4018 (!ib_sa_sendonly_fullmem_support(&sa_client, 4019 id_priv->id.device, 4020 id_priv->id.port_num))) { 4021 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4022 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4023 id_priv->id.device->name, id_priv->id.port_num); 4024 return -EOPNOTSUPP; 4025 } 4026 4027 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4028 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4029 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4030 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4031 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4032 4033 if (id_priv->id.ps == RDMA_PS_IPOIB) 4034 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4035 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4036 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4037 IB_SA_MCMEMBER_REC_MTU | 4038 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4039 4040 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4041 id_priv->id.port_num, &rec, 4042 comp_mask, GFP_KERNEL, 4043 cma_ib_mc_handler, mc); 4044 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4045 } 4046 4047 static void iboe_mcast_work_handler(struct work_struct *work) 4048 { 4049 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4050 struct cma_multicast *mc = mw->mc; 4051 struct ib_sa_multicast *m = mc->multicast.ib; 4052 4053 mc->multicast.ib->context = mc; 4054 cma_ib_mc_handler(0, m); 4055 kref_put(&mc->mcref, release_mc); 4056 kfree(mw); 4057 } 4058 4059 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4060 enum ib_gid_type gid_type) 4061 { 4062 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4063 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4064 4065 if (cma_any_addr(addr)) { 4066 memset(mgid, 0, sizeof *mgid); 4067 } else if (addr->sa_family == AF_INET6) { 4068 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4069 } else { 4070 mgid->raw[0] = 4071 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4072 mgid->raw[1] = 4073 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4074 mgid->raw[2] = 0; 4075 mgid->raw[3] = 0; 4076 mgid->raw[4] = 0; 4077 mgid->raw[5] = 0; 4078 mgid->raw[6] = 0; 4079 mgid->raw[7] = 0; 4080 mgid->raw[8] = 0; 4081 mgid->raw[9] = 0; 4082 mgid->raw[10] = 0xff; 4083 mgid->raw[11] = 0xff; 4084 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4085 } 4086 } 4087 4088 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4089 struct cma_multicast *mc) 4090 { 4091 struct iboe_mcast_work *work; 4092 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4093 int err = 0; 4094 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4095 struct net_device *ndev = NULL; 4096 enum ib_gid_type gid_type; 4097 bool send_only; 4098 4099 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4100 4101 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4102 return -EINVAL; 4103 4104 work = kzalloc(sizeof *work, GFP_KERNEL); 4105 if (!work) 4106 return -ENOMEM; 4107 4108 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4109 if (!mc->multicast.ib) { 4110 err = -ENOMEM; 4111 goto out1; 4112 } 4113 4114 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4115 rdma_start_port(id_priv->cma_dev->device)]; 4116 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4117 4118 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4119 if (id_priv->id.ps == RDMA_PS_UDP) 4120 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4121 4122 if (dev_addr->bound_dev_if) 4123 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4124 if (!ndev) { 4125 err = -ENODEV; 4126 goto out2; 4127 } 4128 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4129 mc->multicast.ib->rec.hop_limit = 1; 4130 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); 4131 4132 if (addr->sa_family == AF_INET) { 4133 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4134 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4135 if (!send_only) { 4136 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4137 true); 4138 if (!err) 4139 mc->igmp_joined = true; 4140 } 4141 } 4142 } else { 4143 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4144 err = -ENOTSUPP; 4145 } 4146 dev_put(ndev); 4147 if (err || !mc->multicast.ib->rec.mtu) { 4148 if (!err) 4149 err = -EINVAL; 4150 goto out2; 4151 } 4152 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4153 &mc->multicast.ib->rec.port_gid); 4154 work->id = id_priv; 4155 work->mc = mc; 4156 INIT_WORK(&work->work, iboe_mcast_work_handler); 4157 kref_get(&mc->mcref); 4158 queue_work(cma_wq, &work->work); 4159 4160 return 0; 4161 4162 out2: 4163 kfree(mc->multicast.ib); 4164 out1: 4165 kfree(work); 4166 return err; 4167 } 4168 4169 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4170 u8 join_state, void *context) 4171 { 4172 struct rdma_id_private *id_priv; 4173 struct cma_multicast *mc; 4174 int ret; 4175 4176 id_priv = container_of(id, struct rdma_id_private, id); 4177 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4178 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4179 return -EINVAL; 4180 4181 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4182 if (!mc) 4183 return -ENOMEM; 4184 4185 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4186 mc->context = context; 4187 mc->id_priv = id_priv; 4188 mc->igmp_joined = false; 4189 mc->join_state = join_state; 4190 spin_lock(&id_priv->lock); 4191 list_add(&mc->list, &id_priv->mc_list); 4192 spin_unlock(&id_priv->lock); 4193 4194 if (rdma_protocol_roce(id->device, id->port_num)) { 4195 kref_init(&mc->mcref); 4196 ret = cma_iboe_join_multicast(id_priv, mc); 4197 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4198 ret = cma_join_ib_multicast(id_priv, mc); 4199 else 4200 ret = -ENOSYS; 4201 4202 if (ret) { 4203 spin_lock_irq(&id_priv->lock); 4204 list_del(&mc->list); 4205 spin_unlock_irq(&id_priv->lock); 4206 kfree(mc); 4207 } 4208 return ret; 4209 } 4210 EXPORT_SYMBOL(rdma_join_multicast); 4211 4212 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4213 { 4214 struct rdma_id_private *id_priv; 4215 struct cma_multicast *mc; 4216 4217 id_priv = container_of(id, struct rdma_id_private, id); 4218 spin_lock_irq(&id_priv->lock); 4219 list_for_each_entry(mc, &id_priv->mc_list, list) { 4220 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4221 list_del(&mc->list); 4222 spin_unlock_irq(&id_priv->lock); 4223 4224 if (id->qp) 4225 ib_detach_mcast(id->qp, 4226 &mc->multicast.ib->rec.mgid, 4227 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4228 4229 BUG_ON(id_priv->cma_dev->device != id->device); 4230 4231 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4232 ib_sa_free_multicast(mc->multicast.ib); 4233 kfree(mc); 4234 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4235 if (mc->igmp_joined) { 4236 struct rdma_dev_addr *dev_addr = 4237 &id->route.addr.dev_addr; 4238 struct net_device *ndev = NULL; 4239 4240 if (dev_addr->bound_dev_if) 4241 ndev = dev_get_by_index(dev_addr->net, 4242 dev_addr->bound_dev_if); 4243 if (ndev) { 4244 cma_igmp_send(ndev, 4245 &mc->multicast.ib->rec.mgid, 4246 false); 4247 dev_put(ndev); 4248 } 4249 mc->igmp_joined = false; 4250 } 4251 kref_put(&mc->mcref, release_mc); 4252 } 4253 return; 4254 } 4255 } 4256 spin_unlock_irq(&id_priv->lock); 4257 } 4258 EXPORT_SYMBOL(rdma_leave_multicast); 4259 4260 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 4261 { 4262 struct rdma_dev_addr *dev_addr; 4263 struct cma_ndev_work *work; 4264 4265 dev_addr = &id_priv->id.route.addr.dev_addr; 4266 4267 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4268 (net_eq(dev_net(ndev), dev_addr->net)) && 4269 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4270 pr_info("RDMA CM addr change for ndev %s used by id %p\n", 4271 ndev->name, &id_priv->id); 4272 work = kzalloc(sizeof *work, GFP_KERNEL); 4273 if (!work) 4274 return -ENOMEM; 4275 4276 INIT_WORK(&work->work, cma_ndev_work_handler); 4277 work->id = id_priv; 4278 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 4279 atomic_inc(&id_priv->refcount); 4280 queue_work(cma_wq, &work->work); 4281 } 4282 4283 return 0; 4284 } 4285 4286 static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 4287 void *ptr) 4288 { 4289 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4290 struct cma_device *cma_dev; 4291 struct rdma_id_private *id_priv; 4292 int ret = NOTIFY_DONE; 4293 4294 if (event != NETDEV_BONDING_FAILOVER) 4295 return NOTIFY_DONE; 4296 4297 if (!netif_is_bond_master(ndev)) 4298 return NOTIFY_DONE; 4299 4300 mutex_lock(&lock); 4301 list_for_each_entry(cma_dev, &dev_list, list) 4302 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4303 ret = cma_netdev_change(ndev, id_priv); 4304 if (ret) 4305 goto out; 4306 } 4307 4308 out: 4309 mutex_unlock(&lock); 4310 return ret; 4311 } 4312 4313 static struct notifier_block cma_nb = { 4314 .notifier_call = cma_netdev_callback 4315 }; 4316 4317 static void cma_add_one(struct ib_device *device) 4318 { 4319 struct cma_device *cma_dev; 4320 struct rdma_id_private *id_priv; 4321 unsigned int i; 4322 unsigned long supported_gids = 0; 4323 4324 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4325 if (!cma_dev) 4326 return; 4327 4328 cma_dev->device = device; 4329 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4330 sizeof(*cma_dev->default_gid_type), 4331 GFP_KERNEL); 4332 if (!cma_dev->default_gid_type) 4333 goto free_cma_dev; 4334 4335 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4336 sizeof(*cma_dev->default_roce_tos), 4337 GFP_KERNEL); 4338 if (!cma_dev->default_roce_tos) 4339 goto free_gid_type; 4340 4341 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4342 supported_gids = roce_gid_type_mask_support(device, i); 4343 WARN_ON(!supported_gids); 4344 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4345 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4346 CMA_PREFERRED_ROCE_GID_TYPE; 4347 else 4348 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4349 find_first_bit(&supported_gids, BITS_PER_LONG); 4350 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4351 } 4352 4353 init_completion(&cma_dev->comp); 4354 atomic_set(&cma_dev->refcount, 1); 4355 INIT_LIST_HEAD(&cma_dev->id_list); 4356 ib_set_client_data(device, &cma_client, cma_dev); 4357 4358 mutex_lock(&lock); 4359 list_add_tail(&cma_dev->list, &dev_list); 4360 list_for_each_entry(id_priv, &listen_any_list, list) 4361 cma_listen_on_dev(id_priv, cma_dev); 4362 mutex_unlock(&lock); 4363 4364 return; 4365 4366 free_gid_type: 4367 kfree(cma_dev->default_gid_type); 4368 4369 free_cma_dev: 4370 kfree(cma_dev); 4371 4372 return; 4373 } 4374 4375 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4376 { 4377 struct rdma_cm_event event; 4378 enum rdma_cm_state state; 4379 int ret = 0; 4380 4381 /* Record that we want to remove the device */ 4382 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4383 if (state == RDMA_CM_DESTROYING) 4384 return 0; 4385 4386 cma_cancel_operation(id_priv, state); 4387 mutex_lock(&id_priv->handler_mutex); 4388 4389 /* Check for destruction from another callback. */ 4390 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4391 goto out; 4392 4393 memset(&event, 0, sizeof event); 4394 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4395 ret = id_priv->id.event_handler(&id_priv->id, &event); 4396 out: 4397 mutex_unlock(&id_priv->handler_mutex); 4398 return ret; 4399 } 4400 4401 static void cma_process_remove(struct cma_device *cma_dev) 4402 { 4403 struct rdma_id_private *id_priv; 4404 int ret; 4405 4406 mutex_lock(&lock); 4407 while (!list_empty(&cma_dev->id_list)) { 4408 id_priv = list_entry(cma_dev->id_list.next, 4409 struct rdma_id_private, list); 4410 4411 list_del(&id_priv->listen_list); 4412 list_del_init(&id_priv->list); 4413 atomic_inc(&id_priv->refcount); 4414 mutex_unlock(&lock); 4415 4416 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4417 cma_deref_id(id_priv); 4418 if (ret) 4419 rdma_destroy_id(&id_priv->id); 4420 4421 mutex_lock(&lock); 4422 } 4423 mutex_unlock(&lock); 4424 4425 cma_deref_dev(cma_dev); 4426 wait_for_completion(&cma_dev->comp); 4427 } 4428 4429 static void cma_remove_one(struct ib_device *device, void *client_data) 4430 { 4431 struct cma_device *cma_dev = client_data; 4432 4433 if (!cma_dev) 4434 return; 4435 4436 mutex_lock(&lock); 4437 list_del(&cma_dev->list); 4438 mutex_unlock(&lock); 4439 4440 cma_process_remove(cma_dev); 4441 kfree(cma_dev->default_roce_tos); 4442 kfree(cma_dev->default_gid_type); 4443 kfree(cma_dev); 4444 } 4445 4446 static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) 4447 { 4448 struct nlmsghdr *nlh; 4449 struct rdma_cm_id_stats *id_stats; 4450 struct rdma_id_private *id_priv; 4451 struct rdma_cm_id *id = NULL; 4452 struct cma_device *cma_dev; 4453 int i_dev = 0, i_id = 0; 4454 4455 /* 4456 * We export all of the IDs as a sequence of messages. Each 4457 * ID gets its own netlink message. 4458 */ 4459 mutex_lock(&lock); 4460 4461 list_for_each_entry(cma_dev, &dev_list, list) { 4462 if (i_dev < cb->args[0]) { 4463 i_dev++; 4464 continue; 4465 } 4466 4467 i_id = 0; 4468 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4469 if (i_id < cb->args[1]) { 4470 i_id++; 4471 continue; 4472 } 4473 4474 id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, 4475 sizeof *id_stats, RDMA_NL_RDMA_CM, 4476 RDMA_NL_RDMA_CM_ID_STATS, 4477 NLM_F_MULTI); 4478 if (!id_stats) 4479 goto out; 4480 4481 memset(id_stats, 0, sizeof *id_stats); 4482 id = &id_priv->id; 4483 id_stats->node_type = id->route.addr.dev_addr.dev_type; 4484 id_stats->port_num = id->port_num; 4485 id_stats->bound_dev_if = 4486 id->route.addr.dev_addr.bound_dev_if; 4487 4488 if (ibnl_put_attr(skb, nlh, 4489 rdma_addr_size(cma_src_addr(id_priv)), 4490 cma_src_addr(id_priv), 4491 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) 4492 goto out; 4493 if (ibnl_put_attr(skb, nlh, 4494 rdma_addr_size(cma_dst_addr(id_priv)), 4495 cma_dst_addr(id_priv), 4496 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) 4497 goto out; 4498 4499 id_stats->pid = id_priv->owner; 4500 id_stats->port_space = id->ps; 4501 id_stats->cm_state = id_priv->state; 4502 id_stats->qp_num = id_priv->qp_num; 4503 id_stats->qp_type = id->qp_type; 4504 4505 i_id++; 4506 nlmsg_end(skb, nlh); 4507 } 4508 4509 cb->args[1] = 0; 4510 i_dev++; 4511 } 4512 4513 out: 4514 mutex_unlock(&lock); 4515 cb->args[0] = i_dev; 4516 cb->args[1] = i_id; 4517 4518 return skb->len; 4519 } 4520 4521 static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { 4522 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, 4523 }; 4524 4525 static int cma_init_net(struct net *net) 4526 { 4527 struct cma_pernet *pernet = cma_pernet(net); 4528 4529 idr_init(&pernet->tcp_ps); 4530 idr_init(&pernet->udp_ps); 4531 idr_init(&pernet->ipoib_ps); 4532 idr_init(&pernet->ib_ps); 4533 4534 return 0; 4535 } 4536 4537 static void cma_exit_net(struct net *net) 4538 { 4539 struct cma_pernet *pernet = cma_pernet(net); 4540 4541 idr_destroy(&pernet->tcp_ps); 4542 idr_destroy(&pernet->udp_ps); 4543 idr_destroy(&pernet->ipoib_ps); 4544 idr_destroy(&pernet->ib_ps); 4545 } 4546 4547 static struct pernet_operations cma_pernet_operations = { 4548 .init = cma_init_net, 4549 .exit = cma_exit_net, 4550 .id = &cma_pernet_id, 4551 .size = sizeof(struct cma_pernet), 4552 }; 4553 4554 static int __init cma_init(void) 4555 { 4556 int ret; 4557 4558 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4559 if (!cma_wq) 4560 return -ENOMEM; 4561 4562 ret = register_pernet_subsys(&cma_pernet_operations); 4563 if (ret) 4564 goto err_wq; 4565 4566 ib_sa_register_client(&sa_client); 4567 rdma_addr_register_client(&addr_client); 4568 register_netdevice_notifier(&cma_nb); 4569 4570 ret = ib_register_client(&cma_client); 4571 if (ret) 4572 goto err; 4573 4574 rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table); 4575 cma_configfs_init(); 4576 4577 return 0; 4578 4579 err: 4580 unregister_netdevice_notifier(&cma_nb); 4581 rdma_addr_unregister_client(&addr_client); 4582 ib_sa_unregister_client(&sa_client); 4583 err_wq: 4584 destroy_workqueue(cma_wq); 4585 return ret; 4586 } 4587 4588 static void __exit cma_cleanup(void) 4589 { 4590 cma_configfs_exit(); 4591 rdma_nl_unregister(RDMA_NL_RDMA_CM); 4592 ib_unregister_client(&cma_client); 4593 unregister_netdevice_notifier(&cma_nb); 4594 rdma_addr_unregister_client(&addr_client); 4595 ib_sa_unregister_client(&sa_client); 4596 unregister_pernet_subsys(&cma_pernet_operations); 4597 destroy_workqueue(cma_wq); 4598 } 4599 4600 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1); 4601 4602 module_init(cma_init); 4603 module_exit(cma_cleanup); 4604