1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/completion.h> 37 #include <linux/in.h> 38 #include <linux/in6.h> 39 #include <linux/mutex.h> 40 #include <linux/random.h> 41 #include <linux/igmp.h> 42 #include <linux/idr.h> 43 #include <linux/inetdevice.h> 44 #include <linux/slab.h> 45 #include <linux/module.h> 46 #include <net/route.h> 47 48 #include <net/net_namespace.h> 49 #include <net/netns/generic.h> 50 #include <net/tcp.h> 51 #include <net/ipv6.h> 52 #include <net/ip_fib.h> 53 #include <net/ip6_route.h> 54 55 #include <rdma/rdma_cm.h> 56 #include <rdma/rdma_cm_ib.h> 57 #include <rdma/rdma_netlink.h> 58 #include <rdma/ib.h> 59 #include <rdma/ib_cache.h> 60 #include <rdma/ib_cm.h> 61 #include <rdma/ib_sa.h> 62 #include <rdma/iw_cm.h> 63 64 #include "core_priv.h" 65 66 MODULE_AUTHOR("Sean Hefty"); 67 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 68 MODULE_LICENSE("Dual BSD/GPL"); 69 70 #define CMA_CM_RESPONSE_TIMEOUT 20 71 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 72 #define CMA_MAX_CM_RETRIES 15 73 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 74 #define CMA_IBOE_PACKET_LIFETIME 18 75 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP 76 77 static const char * const cma_events[] = { 78 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 79 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 80 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 81 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 82 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 83 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 84 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 85 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 86 [RDMA_CM_EVENT_REJECTED] = "rejected", 87 [RDMA_CM_EVENT_ESTABLISHED] = "established", 88 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 89 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 90 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 91 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 92 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 93 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 94 }; 95 96 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 97 { 98 size_t index = event; 99 100 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 101 cma_events[index] : "unrecognized event"; 102 } 103 EXPORT_SYMBOL(rdma_event_msg); 104 105 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 106 int reason) 107 { 108 if (rdma_ib_or_roce(id->device, id->port_num)) 109 return ibcm_reject_msg(reason); 110 111 if (rdma_protocol_iwarp(id->device, id->port_num)) 112 return iwcm_reject_msg(reason); 113 114 WARN_ON_ONCE(1); 115 return "unrecognized transport"; 116 } 117 EXPORT_SYMBOL(rdma_reject_msg); 118 119 bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) 120 { 121 if (rdma_ib_or_roce(id->device, id->port_num)) 122 return reason == IB_CM_REJ_CONSUMER_DEFINED; 123 124 if (rdma_protocol_iwarp(id->device, id->port_num)) 125 return reason == -ECONNREFUSED; 126 127 WARN_ON_ONCE(1); 128 return false; 129 } 130 EXPORT_SYMBOL(rdma_is_consumer_reject); 131 132 const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 133 struct rdma_cm_event *ev, u8 *data_len) 134 { 135 const void *p; 136 137 if (rdma_is_consumer_reject(id, ev->status)) { 138 *data_len = ev->param.conn.private_data_len; 139 p = ev->param.conn.private_data; 140 } else { 141 *data_len = 0; 142 p = NULL; 143 } 144 return p; 145 } 146 EXPORT_SYMBOL(rdma_consumer_reject_data); 147 148 static void cma_add_one(struct ib_device *device); 149 static void cma_remove_one(struct ib_device *device, void *client_data); 150 151 static struct ib_client cma_client = { 152 .name = "cma", 153 .add = cma_add_one, 154 .remove = cma_remove_one 155 }; 156 157 static struct ib_sa_client sa_client; 158 static struct rdma_addr_client addr_client; 159 static LIST_HEAD(dev_list); 160 static LIST_HEAD(listen_any_list); 161 static DEFINE_MUTEX(lock); 162 static struct workqueue_struct *cma_wq; 163 static unsigned int cma_pernet_id; 164 165 struct cma_pernet { 166 struct idr tcp_ps; 167 struct idr udp_ps; 168 struct idr ipoib_ps; 169 struct idr ib_ps; 170 }; 171 172 static struct cma_pernet *cma_pernet(struct net *net) 173 { 174 return net_generic(net, cma_pernet_id); 175 } 176 177 static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) 178 { 179 struct cma_pernet *pernet = cma_pernet(net); 180 181 switch (ps) { 182 case RDMA_PS_TCP: 183 return &pernet->tcp_ps; 184 case RDMA_PS_UDP: 185 return &pernet->udp_ps; 186 case RDMA_PS_IPOIB: 187 return &pernet->ipoib_ps; 188 case RDMA_PS_IB: 189 return &pernet->ib_ps; 190 default: 191 return NULL; 192 } 193 } 194 195 struct cma_device { 196 struct list_head list; 197 struct ib_device *device; 198 struct completion comp; 199 atomic_t refcount; 200 struct list_head id_list; 201 enum ib_gid_type *default_gid_type; 202 u8 *default_roce_tos; 203 }; 204 205 struct rdma_bind_list { 206 enum rdma_port_space ps; 207 struct hlist_head owners; 208 unsigned short port; 209 }; 210 211 struct class_port_info_context { 212 struct ib_class_port_info *class_port_info; 213 struct ib_device *device; 214 struct completion done; 215 struct ib_sa_query *sa_query; 216 u8 port_num; 217 }; 218 219 static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, 220 struct rdma_bind_list *bind_list, int snum) 221 { 222 struct idr *idr = cma_pernet_idr(net, ps); 223 224 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 225 } 226 227 static struct rdma_bind_list *cma_ps_find(struct net *net, 228 enum rdma_port_space ps, int snum) 229 { 230 struct idr *idr = cma_pernet_idr(net, ps); 231 232 return idr_find(idr, snum); 233 } 234 235 static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) 236 { 237 struct idr *idr = cma_pernet_idr(net, ps); 238 239 idr_remove(idr, snum); 240 } 241 242 enum { 243 CMA_OPTION_AFONLY, 244 }; 245 246 void cma_ref_dev(struct cma_device *cma_dev) 247 { 248 atomic_inc(&cma_dev->refcount); 249 } 250 251 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 252 void *cookie) 253 { 254 struct cma_device *cma_dev; 255 struct cma_device *found_cma_dev = NULL; 256 257 mutex_lock(&lock); 258 259 list_for_each_entry(cma_dev, &dev_list, list) 260 if (filter(cma_dev->device, cookie)) { 261 found_cma_dev = cma_dev; 262 break; 263 } 264 265 if (found_cma_dev) 266 cma_ref_dev(found_cma_dev); 267 mutex_unlock(&lock); 268 return found_cma_dev; 269 } 270 271 int cma_get_default_gid_type(struct cma_device *cma_dev, 272 unsigned int port) 273 { 274 if (!rdma_is_port_valid(cma_dev->device, port)) 275 return -EINVAL; 276 277 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 278 } 279 280 int cma_set_default_gid_type(struct cma_device *cma_dev, 281 unsigned int port, 282 enum ib_gid_type default_gid_type) 283 { 284 unsigned long supported_gids; 285 286 if (!rdma_is_port_valid(cma_dev->device, port)) 287 return -EINVAL; 288 289 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 290 291 if (!(supported_gids & 1 << default_gid_type)) 292 return -EINVAL; 293 294 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 295 default_gid_type; 296 297 return 0; 298 } 299 300 int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port) 301 { 302 if (!rdma_is_port_valid(cma_dev->device, port)) 303 return -EINVAL; 304 305 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; 306 } 307 308 int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port, 309 u8 default_roce_tos) 310 { 311 if (!rdma_is_port_valid(cma_dev->device, port)) 312 return -EINVAL; 313 314 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = 315 default_roce_tos; 316 317 return 0; 318 } 319 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 320 { 321 return cma_dev->device; 322 } 323 324 /* 325 * Device removal can occur at anytime, so we need extra handling to 326 * serialize notifying the user of device removal with other callbacks. 327 * We do this by disabling removal notification while a callback is in process, 328 * and reporting it after the callback completes. 329 */ 330 struct rdma_id_private { 331 struct rdma_cm_id id; 332 333 struct rdma_bind_list *bind_list; 334 struct hlist_node node; 335 struct list_head list; /* listen_any_list or cma_device.list */ 336 struct list_head listen_list; /* per device listens */ 337 struct cma_device *cma_dev; 338 struct list_head mc_list; 339 340 int internal_id; 341 enum rdma_cm_state state; 342 spinlock_t lock; 343 struct mutex qp_mutex; 344 345 struct completion comp; 346 atomic_t refcount; 347 struct mutex handler_mutex; 348 349 int backlog; 350 int timeout_ms; 351 struct ib_sa_query *query; 352 int query_id; 353 union { 354 struct ib_cm_id *ib; 355 struct iw_cm_id *iw; 356 } cm_id; 357 358 u32 seq_num; 359 u32 qkey; 360 u32 qp_num; 361 pid_t owner; 362 u32 options; 363 u8 srq; 364 u8 tos; 365 bool tos_set; 366 u8 reuseaddr; 367 u8 afonly; 368 enum ib_gid_type gid_type; 369 }; 370 371 struct cma_multicast { 372 struct rdma_id_private *id_priv; 373 union { 374 struct ib_sa_multicast *ib; 375 } multicast; 376 struct list_head list; 377 void *context; 378 struct sockaddr_storage addr; 379 struct kref mcref; 380 bool igmp_joined; 381 u8 join_state; 382 }; 383 384 struct cma_work { 385 struct work_struct work; 386 struct rdma_id_private *id; 387 enum rdma_cm_state old_state; 388 enum rdma_cm_state new_state; 389 struct rdma_cm_event event; 390 }; 391 392 struct cma_ndev_work { 393 struct work_struct work; 394 struct rdma_id_private *id; 395 struct rdma_cm_event event; 396 }; 397 398 struct iboe_mcast_work { 399 struct work_struct work; 400 struct rdma_id_private *id; 401 struct cma_multicast *mc; 402 }; 403 404 union cma_ip_addr { 405 struct in6_addr ip6; 406 struct { 407 __be32 pad[3]; 408 __be32 addr; 409 } ip4; 410 }; 411 412 struct cma_hdr { 413 u8 cma_version; 414 u8 ip_version; /* IP version: 7:4 */ 415 __be16 port; 416 union cma_ip_addr src_addr; 417 union cma_ip_addr dst_addr; 418 }; 419 420 #define CMA_VERSION 0x00 421 422 struct cma_req_info { 423 struct ib_device *device; 424 int port; 425 union ib_gid local_gid; 426 __be64 service_id; 427 u16 pkey; 428 bool has_gid:1; 429 }; 430 431 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 432 { 433 unsigned long flags; 434 int ret; 435 436 spin_lock_irqsave(&id_priv->lock, flags); 437 ret = (id_priv->state == comp); 438 spin_unlock_irqrestore(&id_priv->lock, flags); 439 return ret; 440 } 441 442 static int cma_comp_exch(struct rdma_id_private *id_priv, 443 enum rdma_cm_state comp, enum rdma_cm_state exch) 444 { 445 unsigned long flags; 446 int ret; 447 448 spin_lock_irqsave(&id_priv->lock, flags); 449 if ((ret = (id_priv->state == comp))) 450 id_priv->state = exch; 451 spin_unlock_irqrestore(&id_priv->lock, flags); 452 return ret; 453 } 454 455 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 456 enum rdma_cm_state exch) 457 { 458 unsigned long flags; 459 enum rdma_cm_state old; 460 461 spin_lock_irqsave(&id_priv->lock, flags); 462 old = id_priv->state; 463 id_priv->state = exch; 464 spin_unlock_irqrestore(&id_priv->lock, flags); 465 return old; 466 } 467 468 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 469 { 470 return hdr->ip_version >> 4; 471 } 472 473 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 474 { 475 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 476 } 477 478 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) 479 { 480 struct in_device *in_dev = NULL; 481 482 if (ndev) { 483 rtnl_lock(); 484 in_dev = __in_dev_get_rtnl(ndev); 485 if (in_dev) { 486 if (join) 487 ip_mc_inc_group(in_dev, 488 *(__be32 *)(mgid->raw + 12)); 489 else 490 ip_mc_dec_group(in_dev, 491 *(__be32 *)(mgid->raw + 12)); 492 } 493 rtnl_unlock(); 494 } 495 return (in_dev) ? 0 : -ENODEV; 496 } 497 498 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 499 struct cma_device *cma_dev) 500 { 501 cma_ref_dev(cma_dev); 502 id_priv->cma_dev = cma_dev; 503 id_priv->gid_type = 0; 504 id_priv->id.device = cma_dev->device; 505 id_priv->id.route.addr.dev_addr.transport = 506 rdma_node_get_transport(cma_dev->device->node_type); 507 list_add_tail(&id_priv->list, &cma_dev->id_list); 508 } 509 510 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 511 struct cma_device *cma_dev) 512 { 513 _cma_attach_to_dev(id_priv, cma_dev); 514 id_priv->gid_type = 515 cma_dev->default_gid_type[id_priv->id.port_num - 516 rdma_start_port(cma_dev->device)]; 517 } 518 519 void cma_deref_dev(struct cma_device *cma_dev) 520 { 521 if (atomic_dec_and_test(&cma_dev->refcount)) 522 complete(&cma_dev->comp); 523 } 524 525 static inline void release_mc(struct kref *kref) 526 { 527 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 528 529 kfree(mc->multicast.ib); 530 kfree(mc); 531 } 532 533 static void cma_release_dev(struct rdma_id_private *id_priv) 534 { 535 mutex_lock(&lock); 536 list_del(&id_priv->list); 537 cma_deref_dev(id_priv->cma_dev); 538 id_priv->cma_dev = NULL; 539 mutex_unlock(&lock); 540 } 541 542 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 543 { 544 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 545 } 546 547 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 548 { 549 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 550 } 551 552 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 553 { 554 return id_priv->id.route.addr.src_addr.ss_family; 555 } 556 557 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 558 { 559 struct ib_sa_mcmember_rec rec; 560 int ret = 0; 561 562 if (id_priv->qkey) { 563 if (qkey && id_priv->qkey != qkey) 564 return -EINVAL; 565 return 0; 566 } 567 568 if (qkey) { 569 id_priv->qkey = qkey; 570 return 0; 571 } 572 573 switch (id_priv->id.ps) { 574 case RDMA_PS_UDP: 575 case RDMA_PS_IB: 576 id_priv->qkey = RDMA_UDP_QKEY; 577 break; 578 case RDMA_PS_IPOIB: 579 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 580 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 581 id_priv->id.port_num, &rec.mgid, 582 &rec); 583 if (!ret) 584 id_priv->qkey = be32_to_cpu(rec.qkey); 585 break; 586 default: 587 break; 588 } 589 return ret; 590 } 591 592 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 593 { 594 dev_addr->dev_type = ARPHRD_INFINIBAND; 595 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 596 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 597 } 598 599 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 600 { 601 int ret; 602 603 if (addr->sa_family != AF_IB) { 604 ret = rdma_translate_ip(addr, dev_addr); 605 } else { 606 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 607 ret = 0; 608 } 609 610 return ret; 611 } 612 613 static inline int cma_validate_port(struct ib_device *device, u8 port, 614 enum ib_gid_type gid_type, 615 union ib_gid *gid, 616 struct rdma_id_private *id_priv) 617 { 618 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 619 int bound_if_index = dev_addr->bound_dev_if; 620 int dev_type = dev_addr->dev_type; 621 struct net_device *ndev = NULL; 622 int ret = -ENODEV; 623 624 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 625 return ret; 626 627 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 628 return ret; 629 630 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 631 ndev = dev_get_by_index(dev_addr->net, bound_if_index); 632 if (!ndev) 633 return ret; 634 } else { 635 gid_type = IB_GID_TYPE_IB; 636 } 637 638 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 639 ndev, NULL); 640 641 if (ndev) 642 dev_put(ndev); 643 644 return ret; 645 } 646 647 static int cma_acquire_dev(struct rdma_id_private *id_priv, 648 struct rdma_id_private *listen_id_priv) 649 { 650 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 651 struct cma_device *cma_dev; 652 union ib_gid gid, iboe_gid, *gidp; 653 int ret = -ENODEV; 654 u8 port; 655 656 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 657 id_priv->id.ps == RDMA_PS_IPOIB) 658 return -EINVAL; 659 660 mutex_lock(&lock); 661 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 662 &iboe_gid); 663 664 memcpy(&gid, dev_addr->src_dev_addr + 665 rdma_addr_gid_offset(dev_addr), sizeof gid); 666 667 if (listen_id_priv) { 668 cma_dev = listen_id_priv->cma_dev; 669 port = listen_id_priv->id.port_num; 670 gidp = rdma_protocol_roce(cma_dev->device, port) ? 671 &iboe_gid : &gid; 672 673 ret = cma_validate_port(cma_dev->device, port, 674 rdma_protocol_ib(cma_dev->device, port) ? 675 IB_GID_TYPE_IB : 676 listen_id_priv->gid_type, gidp, 677 id_priv); 678 if (!ret) { 679 id_priv->id.port_num = port; 680 goto out; 681 } 682 } 683 684 list_for_each_entry(cma_dev, &dev_list, list) { 685 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 686 if (listen_id_priv && 687 listen_id_priv->cma_dev == cma_dev && 688 listen_id_priv->id.port_num == port) 689 continue; 690 691 gidp = rdma_protocol_roce(cma_dev->device, port) ? 692 &iboe_gid : &gid; 693 694 ret = cma_validate_port(cma_dev->device, port, 695 rdma_protocol_ib(cma_dev->device, port) ? 696 IB_GID_TYPE_IB : 697 cma_dev->default_gid_type[port - 1], 698 gidp, id_priv); 699 if (!ret) { 700 id_priv->id.port_num = port; 701 goto out; 702 } 703 } 704 } 705 706 out: 707 if (!ret) 708 cma_attach_to_dev(id_priv, cma_dev); 709 710 mutex_unlock(&lock); 711 return ret; 712 } 713 714 /* 715 * Select the source IB device and address to reach the destination IB address. 716 */ 717 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 718 { 719 struct cma_device *cma_dev, *cur_dev; 720 struct sockaddr_ib *addr; 721 union ib_gid gid, sgid, *dgid; 722 u16 pkey, index; 723 u8 p; 724 enum ib_port_state port_state; 725 int i; 726 727 cma_dev = NULL; 728 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 729 dgid = (union ib_gid *) &addr->sib_addr; 730 pkey = ntohs(addr->sib_pkey); 731 732 list_for_each_entry(cur_dev, &dev_list, list) { 733 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 734 if (!rdma_cap_af_ib(cur_dev->device, p)) 735 continue; 736 737 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 738 continue; 739 740 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 741 continue; 742 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 743 &gid, NULL); 744 i++) { 745 if (!memcmp(&gid, dgid, sizeof(gid))) { 746 cma_dev = cur_dev; 747 sgid = gid; 748 id_priv->id.port_num = p; 749 goto found; 750 } 751 752 if (!cma_dev && (gid.global.subnet_prefix == 753 dgid->global.subnet_prefix) && 754 port_state == IB_PORT_ACTIVE) { 755 cma_dev = cur_dev; 756 sgid = gid; 757 id_priv->id.port_num = p; 758 } 759 } 760 } 761 } 762 763 if (!cma_dev) 764 return -ENODEV; 765 766 found: 767 cma_attach_to_dev(id_priv, cma_dev); 768 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 769 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 770 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 771 return 0; 772 } 773 774 static void cma_deref_id(struct rdma_id_private *id_priv) 775 { 776 if (atomic_dec_and_test(&id_priv->refcount)) 777 complete(&id_priv->comp); 778 } 779 780 struct rdma_cm_id *rdma_create_id(struct net *net, 781 rdma_cm_event_handler event_handler, 782 void *context, enum rdma_port_space ps, 783 enum ib_qp_type qp_type) 784 { 785 struct rdma_id_private *id_priv; 786 787 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 788 if (!id_priv) 789 return ERR_PTR(-ENOMEM); 790 791 id_priv->owner = task_pid_nr(current); 792 id_priv->state = RDMA_CM_IDLE; 793 id_priv->id.context = context; 794 id_priv->id.event_handler = event_handler; 795 id_priv->id.ps = ps; 796 id_priv->id.qp_type = qp_type; 797 id_priv->tos_set = false; 798 spin_lock_init(&id_priv->lock); 799 mutex_init(&id_priv->qp_mutex); 800 init_completion(&id_priv->comp); 801 atomic_set(&id_priv->refcount, 1); 802 mutex_init(&id_priv->handler_mutex); 803 INIT_LIST_HEAD(&id_priv->listen_list); 804 INIT_LIST_HEAD(&id_priv->mc_list); 805 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 806 id_priv->id.route.addr.dev_addr.net = get_net(net); 807 id_priv->seq_num &= 0x00ffffff; 808 809 return &id_priv->id; 810 } 811 EXPORT_SYMBOL(rdma_create_id); 812 813 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 814 { 815 struct ib_qp_attr qp_attr; 816 int qp_attr_mask, ret; 817 818 qp_attr.qp_state = IB_QPS_INIT; 819 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 820 if (ret) 821 return ret; 822 823 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 824 if (ret) 825 return ret; 826 827 qp_attr.qp_state = IB_QPS_RTR; 828 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 829 if (ret) 830 return ret; 831 832 qp_attr.qp_state = IB_QPS_RTS; 833 qp_attr.sq_psn = 0; 834 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 835 836 return ret; 837 } 838 839 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 840 { 841 struct ib_qp_attr qp_attr; 842 int qp_attr_mask, ret; 843 844 qp_attr.qp_state = IB_QPS_INIT; 845 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 846 if (ret) 847 return ret; 848 849 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 850 } 851 852 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 853 struct ib_qp_init_attr *qp_init_attr) 854 { 855 struct rdma_id_private *id_priv; 856 struct ib_qp *qp; 857 int ret; 858 859 id_priv = container_of(id, struct rdma_id_private, id); 860 if (id->device != pd->device) 861 return -EINVAL; 862 863 qp_init_attr->port_num = id->port_num; 864 qp = ib_create_qp(pd, qp_init_attr); 865 if (IS_ERR(qp)) 866 return PTR_ERR(qp); 867 868 if (id->qp_type == IB_QPT_UD) 869 ret = cma_init_ud_qp(id_priv, qp); 870 else 871 ret = cma_init_conn_qp(id_priv, qp); 872 if (ret) 873 goto err; 874 875 id->qp = qp; 876 id_priv->qp_num = qp->qp_num; 877 id_priv->srq = (qp->srq != NULL); 878 return 0; 879 err: 880 ib_destroy_qp(qp); 881 return ret; 882 } 883 EXPORT_SYMBOL(rdma_create_qp); 884 885 void rdma_destroy_qp(struct rdma_cm_id *id) 886 { 887 struct rdma_id_private *id_priv; 888 889 id_priv = container_of(id, struct rdma_id_private, id); 890 mutex_lock(&id_priv->qp_mutex); 891 ib_destroy_qp(id_priv->id.qp); 892 id_priv->id.qp = NULL; 893 mutex_unlock(&id_priv->qp_mutex); 894 } 895 EXPORT_SYMBOL(rdma_destroy_qp); 896 897 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 898 struct rdma_conn_param *conn_param) 899 { 900 struct ib_qp_attr qp_attr; 901 int qp_attr_mask, ret; 902 union ib_gid sgid; 903 904 mutex_lock(&id_priv->qp_mutex); 905 if (!id_priv->id.qp) { 906 ret = 0; 907 goto out; 908 } 909 910 /* Need to update QP attributes from default values. */ 911 qp_attr.qp_state = IB_QPS_INIT; 912 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 913 if (ret) 914 goto out; 915 916 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 917 if (ret) 918 goto out; 919 920 qp_attr.qp_state = IB_QPS_RTR; 921 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 922 if (ret) 923 goto out; 924 925 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 926 rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index, 927 &sgid, NULL); 928 if (ret) 929 goto out; 930 931 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 932 933 if (conn_param) 934 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 935 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 936 out: 937 mutex_unlock(&id_priv->qp_mutex); 938 return ret; 939 } 940 941 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 942 struct rdma_conn_param *conn_param) 943 { 944 struct ib_qp_attr qp_attr; 945 int qp_attr_mask, ret; 946 947 mutex_lock(&id_priv->qp_mutex); 948 if (!id_priv->id.qp) { 949 ret = 0; 950 goto out; 951 } 952 953 qp_attr.qp_state = IB_QPS_RTS; 954 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 955 if (ret) 956 goto out; 957 958 if (conn_param) 959 qp_attr.max_rd_atomic = conn_param->initiator_depth; 960 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 961 out: 962 mutex_unlock(&id_priv->qp_mutex); 963 return ret; 964 } 965 966 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 967 { 968 struct ib_qp_attr qp_attr; 969 int ret; 970 971 mutex_lock(&id_priv->qp_mutex); 972 if (!id_priv->id.qp) { 973 ret = 0; 974 goto out; 975 } 976 977 qp_attr.qp_state = IB_QPS_ERR; 978 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 979 out: 980 mutex_unlock(&id_priv->qp_mutex); 981 return ret; 982 } 983 984 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 985 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 986 { 987 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 988 int ret; 989 u16 pkey; 990 991 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 992 pkey = 0xffff; 993 else 994 pkey = ib_addr_get_pkey(dev_addr); 995 996 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 997 pkey, &qp_attr->pkey_index); 998 if (ret) 999 return ret; 1000 1001 qp_attr->port_num = id_priv->id.port_num; 1002 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 1003 1004 if (id_priv->id.qp_type == IB_QPT_UD) { 1005 ret = cma_set_qkey(id_priv, 0); 1006 if (ret) 1007 return ret; 1008 1009 qp_attr->qkey = id_priv->qkey; 1010 *qp_attr_mask |= IB_QP_QKEY; 1011 } else { 1012 qp_attr->qp_access_flags = 0; 1013 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 1014 } 1015 return 0; 1016 } 1017 1018 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 1019 int *qp_attr_mask) 1020 { 1021 struct rdma_id_private *id_priv; 1022 int ret = 0; 1023 1024 id_priv = container_of(id, struct rdma_id_private, id); 1025 if (rdma_cap_ib_cm(id->device, id->port_num)) { 1026 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 1027 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 1028 else 1029 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 1030 qp_attr_mask); 1031 1032 if (qp_attr->qp_state == IB_QPS_RTR) 1033 qp_attr->rq_psn = id_priv->seq_num; 1034 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1035 if (!id_priv->cm_id.iw) { 1036 qp_attr->qp_access_flags = 0; 1037 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1038 } else 1039 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1040 qp_attr_mask); 1041 qp_attr->port_num = id_priv->id.port_num; 1042 *qp_attr_mask |= IB_QP_PORT; 1043 } else 1044 ret = -ENOSYS; 1045 1046 return ret; 1047 } 1048 EXPORT_SYMBOL(rdma_init_qp_attr); 1049 1050 static inline int cma_zero_addr(struct sockaddr *addr) 1051 { 1052 switch (addr->sa_family) { 1053 case AF_INET: 1054 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1055 case AF_INET6: 1056 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1057 case AF_IB: 1058 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1059 default: 1060 return 0; 1061 } 1062 } 1063 1064 static inline int cma_loopback_addr(struct sockaddr *addr) 1065 { 1066 switch (addr->sa_family) { 1067 case AF_INET: 1068 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1069 case AF_INET6: 1070 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1071 case AF_IB: 1072 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1073 default: 1074 return 0; 1075 } 1076 } 1077 1078 static inline int cma_any_addr(struct sockaddr *addr) 1079 { 1080 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1081 } 1082 1083 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1084 { 1085 if (src->sa_family != dst->sa_family) 1086 return -1; 1087 1088 switch (src->sa_family) { 1089 case AF_INET: 1090 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1091 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1092 case AF_INET6: 1093 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1094 &((struct sockaddr_in6 *) dst)->sin6_addr); 1095 default: 1096 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1097 &((struct sockaddr_ib *) dst)->sib_addr); 1098 } 1099 } 1100 1101 static __be16 cma_port(struct sockaddr *addr) 1102 { 1103 struct sockaddr_ib *sib; 1104 1105 switch (addr->sa_family) { 1106 case AF_INET: 1107 return ((struct sockaddr_in *) addr)->sin_port; 1108 case AF_INET6: 1109 return ((struct sockaddr_in6 *) addr)->sin6_port; 1110 case AF_IB: 1111 sib = (struct sockaddr_ib *) addr; 1112 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1113 be64_to_cpu(sib->sib_sid_mask))); 1114 default: 1115 return 0; 1116 } 1117 } 1118 1119 static inline int cma_any_port(struct sockaddr *addr) 1120 { 1121 return !cma_port(addr); 1122 } 1123 1124 static void cma_save_ib_info(struct sockaddr *src_addr, 1125 struct sockaddr *dst_addr, 1126 struct rdma_cm_id *listen_id, 1127 struct sa_path_rec *path) 1128 { 1129 struct sockaddr_ib *listen_ib, *ib; 1130 1131 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1132 if (src_addr) { 1133 ib = (struct sockaddr_ib *)src_addr; 1134 ib->sib_family = AF_IB; 1135 if (path) { 1136 ib->sib_pkey = path->pkey; 1137 ib->sib_flowinfo = path->flow_label; 1138 memcpy(&ib->sib_addr, &path->sgid, 16); 1139 ib->sib_sid = path->service_id; 1140 ib->sib_scope_id = 0; 1141 } else { 1142 ib->sib_pkey = listen_ib->sib_pkey; 1143 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1144 ib->sib_addr = listen_ib->sib_addr; 1145 ib->sib_sid = listen_ib->sib_sid; 1146 ib->sib_scope_id = listen_ib->sib_scope_id; 1147 } 1148 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1149 } 1150 if (dst_addr) { 1151 ib = (struct sockaddr_ib *)dst_addr; 1152 ib->sib_family = AF_IB; 1153 if (path) { 1154 ib->sib_pkey = path->pkey; 1155 ib->sib_flowinfo = path->flow_label; 1156 memcpy(&ib->sib_addr, &path->dgid, 16); 1157 } 1158 } 1159 } 1160 1161 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1162 struct sockaddr_in *dst_addr, 1163 struct cma_hdr *hdr, 1164 __be16 local_port) 1165 { 1166 if (src_addr) { 1167 *src_addr = (struct sockaddr_in) { 1168 .sin_family = AF_INET, 1169 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1170 .sin_port = local_port, 1171 }; 1172 } 1173 1174 if (dst_addr) { 1175 *dst_addr = (struct sockaddr_in) { 1176 .sin_family = AF_INET, 1177 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1178 .sin_port = hdr->port, 1179 }; 1180 } 1181 } 1182 1183 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1184 struct sockaddr_in6 *dst_addr, 1185 struct cma_hdr *hdr, 1186 __be16 local_port) 1187 { 1188 if (src_addr) { 1189 *src_addr = (struct sockaddr_in6) { 1190 .sin6_family = AF_INET6, 1191 .sin6_addr = hdr->dst_addr.ip6, 1192 .sin6_port = local_port, 1193 }; 1194 } 1195 1196 if (dst_addr) { 1197 *dst_addr = (struct sockaddr_in6) { 1198 .sin6_family = AF_INET6, 1199 .sin6_addr = hdr->src_addr.ip6, 1200 .sin6_port = hdr->port, 1201 }; 1202 } 1203 } 1204 1205 static u16 cma_port_from_service_id(__be64 service_id) 1206 { 1207 return (u16)be64_to_cpu(service_id); 1208 } 1209 1210 static int cma_save_ip_info(struct sockaddr *src_addr, 1211 struct sockaddr *dst_addr, 1212 struct ib_cm_event *ib_event, 1213 __be64 service_id) 1214 { 1215 struct cma_hdr *hdr; 1216 __be16 port; 1217 1218 hdr = ib_event->private_data; 1219 if (hdr->cma_version != CMA_VERSION) 1220 return -EINVAL; 1221 1222 port = htons(cma_port_from_service_id(service_id)); 1223 1224 switch (cma_get_ip_ver(hdr)) { 1225 case 4: 1226 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1227 (struct sockaddr_in *)dst_addr, hdr, port); 1228 break; 1229 case 6: 1230 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1231 (struct sockaddr_in6 *)dst_addr, hdr, port); 1232 break; 1233 default: 1234 return -EAFNOSUPPORT; 1235 } 1236 1237 return 0; 1238 } 1239 1240 static int cma_save_net_info(struct sockaddr *src_addr, 1241 struct sockaddr *dst_addr, 1242 struct rdma_cm_id *listen_id, 1243 struct ib_cm_event *ib_event, 1244 sa_family_t sa_family, __be64 service_id) 1245 { 1246 if (sa_family == AF_IB) { 1247 if (ib_event->event == IB_CM_REQ_RECEIVED) 1248 cma_save_ib_info(src_addr, dst_addr, listen_id, 1249 ib_event->param.req_rcvd.primary_path); 1250 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1251 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1252 return 0; 1253 } 1254 1255 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1256 } 1257 1258 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1259 struct cma_req_info *req) 1260 { 1261 const struct ib_cm_req_event_param *req_param = 1262 &ib_event->param.req_rcvd; 1263 const struct ib_cm_sidr_req_event_param *sidr_param = 1264 &ib_event->param.sidr_req_rcvd; 1265 1266 switch (ib_event->event) { 1267 case IB_CM_REQ_RECEIVED: 1268 req->device = req_param->listen_id->device; 1269 req->port = req_param->port; 1270 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1271 sizeof(req->local_gid)); 1272 req->has_gid = true; 1273 req->service_id = req_param->primary_path->service_id; 1274 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1275 if (req->pkey != req_param->bth_pkey) 1276 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1277 "RDMA CMA: in the future this may cause the request to be dropped\n", 1278 req_param->bth_pkey, req->pkey); 1279 break; 1280 case IB_CM_SIDR_REQ_RECEIVED: 1281 req->device = sidr_param->listen_id->device; 1282 req->port = sidr_param->port; 1283 req->has_gid = false; 1284 req->service_id = sidr_param->service_id; 1285 req->pkey = sidr_param->pkey; 1286 if (req->pkey != sidr_param->bth_pkey) 1287 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1288 "RDMA CMA: in the future this may cause the request to be dropped\n", 1289 sidr_param->bth_pkey, req->pkey); 1290 break; 1291 default: 1292 return -EINVAL; 1293 } 1294 1295 return 0; 1296 } 1297 1298 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1299 const struct sockaddr_in *dst_addr, 1300 const struct sockaddr_in *src_addr) 1301 { 1302 __be32 daddr = dst_addr->sin_addr.s_addr, 1303 saddr = src_addr->sin_addr.s_addr; 1304 struct fib_result res; 1305 struct flowi4 fl4; 1306 int err; 1307 bool ret; 1308 1309 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1310 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1311 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1312 ipv4_is_loopback(saddr)) 1313 return false; 1314 1315 memset(&fl4, 0, sizeof(fl4)); 1316 fl4.flowi4_iif = net_dev->ifindex; 1317 fl4.daddr = daddr; 1318 fl4.saddr = saddr; 1319 1320 rcu_read_lock(); 1321 err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); 1322 ret = err == 0 && FIB_RES_DEV(res) == net_dev; 1323 rcu_read_unlock(); 1324 1325 return ret; 1326 } 1327 1328 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1329 const struct sockaddr_in6 *dst_addr, 1330 const struct sockaddr_in6 *src_addr) 1331 { 1332 #if IS_ENABLED(CONFIG_IPV6) 1333 const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & 1334 IPV6_ADDR_LINKLOCAL; 1335 struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, 1336 &src_addr->sin6_addr, net_dev->ifindex, 1337 NULL, strict); 1338 bool ret; 1339 1340 if (!rt) 1341 return false; 1342 1343 ret = rt->rt6i_idev->dev == net_dev; 1344 ip6_rt_put(rt); 1345 1346 return ret; 1347 #else 1348 return false; 1349 #endif 1350 } 1351 1352 static bool validate_net_dev(struct net_device *net_dev, 1353 const struct sockaddr *daddr, 1354 const struct sockaddr *saddr) 1355 { 1356 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1357 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1358 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1359 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1360 1361 switch (daddr->sa_family) { 1362 case AF_INET: 1363 return saddr->sa_family == AF_INET && 1364 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1365 1366 case AF_INET6: 1367 return saddr->sa_family == AF_INET6 && 1368 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1369 1370 default: 1371 return false; 1372 } 1373 } 1374 1375 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1376 const struct cma_req_info *req) 1377 { 1378 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1379 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1380 *src_addr = (struct sockaddr *)&src_addr_storage; 1381 struct net_device *net_dev; 1382 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1383 int err; 1384 1385 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1386 req->service_id); 1387 if (err) 1388 return ERR_PTR(err); 1389 1390 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1391 gid, listen_addr); 1392 if (!net_dev) 1393 return ERR_PTR(-ENODEV); 1394 1395 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1396 dev_put(net_dev); 1397 return ERR_PTR(-EHOSTUNREACH); 1398 } 1399 1400 return net_dev; 1401 } 1402 1403 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1404 { 1405 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1406 } 1407 1408 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1409 const struct cma_hdr *hdr) 1410 { 1411 struct sockaddr *addr = cma_src_addr(id_priv); 1412 __be32 ip4_addr; 1413 struct in6_addr ip6_addr; 1414 1415 if (cma_any_addr(addr) && !id_priv->afonly) 1416 return true; 1417 1418 switch (addr->sa_family) { 1419 case AF_INET: 1420 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1421 if (cma_get_ip_ver(hdr) != 4) 1422 return false; 1423 if (!cma_any_addr(addr) && 1424 hdr->dst_addr.ip4.addr != ip4_addr) 1425 return false; 1426 break; 1427 case AF_INET6: 1428 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1429 if (cma_get_ip_ver(hdr) != 6) 1430 return false; 1431 if (!cma_any_addr(addr) && 1432 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1433 return false; 1434 break; 1435 case AF_IB: 1436 return true; 1437 default: 1438 return false; 1439 } 1440 1441 return true; 1442 } 1443 1444 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1445 { 1446 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1447 enum rdma_transport_type transport = 1448 rdma_node_get_transport(device->node_type); 1449 1450 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1451 } 1452 1453 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1454 { 1455 struct ib_device *device = id->device; 1456 const int port_num = id->port_num ?: rdma_start_port(device); 1457 1458 return cma_protocol_roce_dev_port(device, port_num); 1459 } 1460 1461 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1462 const struct net_device *net_dev, 1463 u8 port_num) 1464 { 1465 const struct rdma_addr *addr = &id->route.addr; 1466 1467 if (!net_dev) 1468 /* This request is an AF_IB request or a RoCE request */ 1469 return (!id->port_num || id->port_num == port_num) && 1470 (addr->src_addr.ss_family == AF_IB || 1471 cma_protocol_roce_dev_port(id->device, port_num)); 1472 1473 return !addr->dev_addr.bound_dev_if || 1474 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1475 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1476 } 1477 1478 static struct rdma_id_private *cma_find_listener( 1479 const struct rdma_bind_list *bind_list, 1480 const struct ib_cm_id *cm_id, 1481 const struct ib_cm_event *ib_event, 1482 const struct cma_req_info *req, 1483 const struct net_device *net_dev) 1484 { 1485 struct rdma_id_private *id_priv, *id_priv_dev; 1486 1487 if (!bind_list) 1488 return ERR_PTR(-EINVAL); 1489 1490 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1491 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1492 if (id_priv->id.device == cm_id->device && 1493 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1494 return id_priv; 1495 list_for_each_entry(id_priv_dev, 1496 &id_priv->listen_list, 1497 listen_list) { 1498 if (id_priv_dev->id.device == cm_id->device && 1499 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1500 return id_priv_dev; 1501 } 1502 } 1503 } 1504 1505 return ERR_PTR(-EINVAL); 1506 } 1507 1508 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1509 struct ib_cm_event *ib_event, 1510 struct net_device **net_dev) 1511 { 1512 struct cma_req_info req; 1513 struct rdma_bind_list *bind_list; 1514 struct rdma_id_private *id_priv; 1515 int err; 1516 1517 err = cma_save_req_info(ib_event, &req); 1518 if (err) 1519 return ERR_PTR(err); 1520 1521 *net_dev = cma_get_net_dev(ib_event, &req); 1522 if (IS_ERR(*net_dev)) { 1523 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1524 /* Assuming the protocol is AF_IB */ 1525 *net_dev = NULL; 1526 } else if (cma_protocol_roce_dev_port(req.device, req.port)) { 1527 /* TODO find the net dev matching the request parameters 1528 * through the RoCE GID table */ 1529 *net_dev = NULL; 1530 } else { 1531 return ERR_CAST(*net_dev); 1532 } 1533 } 1534 1535 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1536 rdma_ps_from_service_id(req.service_id), 1537 cma_port_from_service_id(req.service_id)); 1538 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1539 if (IS_ERR(id_priv) && *net_dev) { 1540 dev_put(*net_dev); 1541 *net_dev = NULL; 1542 } 1543 1544 return id_priv; 1545 } 1546 1547 static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) 1548 { 1549 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1550 } 1551 1552 static void cma_cancel_route(struct rdma_id_private *id_priv) 1553 { 1554 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1555 if (id_priv->query) 1556 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1557 } 1558 } 1559 1560 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1561 { 1562 struct rdma_id_private *dev_id_priv; 1563 1564 /* 1565 * Remove from listen_any_list to prevent added devices from spawning 1566 * additional listen requests. 1567 */ 1568 mutex_lock(&lock); 1569 list_del(&id_priv->list); 1570 1571 while (!list_empty(&id_priv->listen_list)) { 1572 dev_id_priv = list_entry(id_priv->listen_list.next, 1573 struct rdma_id_private, listen_list); 1574 /* sync with device removal to avoid duplicate destruction */ 1575 list_del_init(&dev_id_priv->list); 1576 list_del(&dev_id_priv->listen_list); 1577 mutex_unlock(&lock); 1578 1579 rdma_destroy_id(&dev_id_priv->id); 1580 mutex_lock(&lock); 1581 } 1582 mutex_unlock(&lock); 1583 } 1584 1585 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1586 enum rdma_cm_state state) 1587 { 1588 switch (state) { 1589 case RDMA_CM_ADDR_QUERY: 1590 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1591 break; 1592 case RDMA_CM_ROUTE_QUERY: 1593 cma_cancel_route(id_priv); 1594 break; 1595 case RDMA_CM_LISTEN: 1596 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1597 cma_cancel_listens(id_priv); 1598 break; 1599 default: 1600 break; 1601 } 1602 } 1603 1604 static void cma_release_port(struct rdma_id_private *id_priv) 1605 { 1606 struct rdma_bind_list *bind_list = id_priv->bind_list; 1607 struct net *net = id_priv->id.route.addr.dev_addr.net; 1608 1609 if (!bind_list) 1610 return; 1611 1612 mutex_lock(&lock); 1613 hlist_del(&id_priv->node); 1614 if (hlist_empty(&bind_list->owners)) { 1615 cma_ps_remove(net, bind_list->ps, bind_list->port); 1616 kfree(bind_list); 1617 } 1618 mutex_unlock(&lock); 1619 } 1620 1621 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1622 { 1623 struct cma_multicast *mc; 1624 1625 while (!list_empty(&id_priv->mc_list)) { 1626 mc = container_of(id_priv->mc_list.next, 1627 struct cma_multicast, list); 1628 list_del(&mc->list); 1629 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1630 id_priv->id.port_num)) { 1631 ib_sa_free_multicast(mc->multicast.ib); 1632 kfree(mc); 1633 } else { 1634 if (mc->igmp_joined) { 1635 struct rdma_dev_addr *dev_addr = 1636 &id_priv->id.route.addr.dev_addr; 1637 struct net_device *ndev = NULL; 1638 1639 if (dev_addr->bound_dev_if) 1640 ndev = dev_get_by_index(&init_net, 1641 dev_addr->bound_dev_if); 1642 if (ndev) { 1643 cma_igmp_send(ndev, 1644 &mc->multicast.ib->rec.mgid, 1645 false); 1646 dev_put(ndev); 1647 } 1648 } 1649 kref_put(&mc->mcref, release_mc); 1650 } 1651 } 1652 } 1653 1654 void rdma_destroy_id(struct rdma_cm_id *id) 1655 { 1656 struct rdma_id_private *id_priv; 1657 enum rdma_cm_state state; 1658 1659 id_priv = container_of(id, struct rdma_id_private, id); 1660 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1661 cma_cancel_operation(id_priv, state); 1662 1663 /* 1664 * Wait for any active callback to finish. New callbacks will find 1665 * the id_priv state set to destroying and abort. 1666 */ 1667 mutex_lock(&id_priv->handler_mutex); 1668 mutex_unlock(&id_priv->handler_mutex); 1669 1670 if (id_priv->cma_dev) { 1671 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1672 if (id_priv->cm_id.ib) 1673 ib_destroy_cm_id(id_priv->cm_id.ib); 1674 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1675 if (id_priv->cm_id.iw) 1676 iw_destroy_cm_id(id_priv->cm_id.iw); 1677 } 1678 cma_leave_mc_groups(id_priv); 1679 cma_release_dev(id_priv); 1680 } 1681 1682 cma_release_port(id_priv); 1683 cma_deref_id(id_priv); 1684 wait_for_completion(&id_priv->comp); 1685 1686 if (id_priv->internal_id) 1687 cma_deref_id(id_priv->id.context); 1688 1689 kfree(id_priv->id.route.path_rec); 1690 put_net(id_priv->id.route.addr.dev_addr.net); 1691 kfree(id_priv); 1692 } 1693 EXPORT_SYMBOL(rdma_destroy_id); 1694 1695 static int cma_rep_recv(struct rdma_id_private *id_priv) 1696 { 1697 int ret; 1698 1699 ret = cma_modify_qp_rtr(id_priv, NULL); 1700 if (ret) 1701 goto reject; 1702 1703 ret = cma_modify_qp_rts(id_priv, NULL); 1704 if (ret) 1705 goto reject; 1706 1707 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1708 if (ret) 1709 goto reject; 1710 1711 return 0; 1712 reject: 1713 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); 1714 cma_modify_qp_err(id_priv); 1715 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1716 NULL, 0, NULL, 0); 1717 return ret; 1718 } 1719 1720 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1721 struct ib_cm_rep_event_param *rep_data, 1722 void *private_data) 1723 { 1724 event->param.conn.private_data = private_data; 1725 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1726 event->param.conn.responder_resources = rep_data->responder_resources; 1727 event->param.conn.initiator_depth = rep_data->initiator_depth; 1728 event->param.conn.flow_control = rep_data->flow_control; 1729 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1730 event->param.conn.srq = rep_data->srq; 1731 event->param.conn.qp_num = rep_data->remote_qpn; 1732 } 1733 1734 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1735 { 1736 struct rdma_id_private *id_priv = cm_id->context; 1737 struct rdma_cm_event event; 1738 int ret = 0; 1739 1740 mutex_lock(&id_priv->handler_mutex); 1741 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1742 id_priv->state != RDMA_CM_CONNECT) || 1743 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1744 id_priv->state != RDMA_CM_DISCONNECT)) 1745 goto out; 1746 1747 memset(&event, 0, sizeof event); 1748 switch (ib_event->event) { 1749 case IB_CM_REQ_ERROR: 1750 case IB_CM_REP_ERROR: 1751 event.event = RDMA_CM_EVENT_UNREACHABLE; 1752 event.status = -ETIMEDOUT; 1753 break; 1754 case IB_CM_REP_RECEIVED: 1755 if (cma_comp(id_priv, RDMA_CM_CONNECT) && 1756 (id_priv->id.qp_type != IB_QPT_UD)) 1757 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1758 if (id_priv->id.qp) { 1759 event.status = cma_rep_recv(id_priv); 1760 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1761 RDMA_CM_EVENT_ESTABLISHED; 1762 } else { 1763 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1764 } 1765 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1766 ib_event->private_data); 1767 break; 1768 case IB_CM_RTU_RECEIVED: 1769 case IB_CM_USER_ESTABLISHED: 1770 event.event = RDMA_CM_EVENT_ESTABLISHED; 1771 break; 1772 case IB_CM_DREQ_ERROR: 1773 event.status = -ETIMEDOUT; /* fall through */ 1774 case IB_CM_DREQ_RECEIVED: 1775 case IB_CM_DREP_RECEIVED: 1776 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1777 RDMA_CM_DISCONNECT)) 1778 goto out; 1779 event.event = RDMA_CM_EVENT_DISCONNECTED; 1780 break; 1781 case IB_CM_TIMEWAIT_EXIT: 1782 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1783 break; 1784 case IB_CM_MRA_RECEIVED: 1785 /* ignore event */ 1786 goto out; 1787 case IB_CM_REJ_RECEIVED: 1788 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, 1789 ib_event->param.rej_rcvd.reason)); 1790 cma_modify_qp_err(id_priv); 1791 event.status = ib_event->param.rej_rcvd.reason; 1792 event.event = RDMA_CM_EVENT_REJECTED; 1793 event.param.conn.private_data = ib_event->private_data; 1794 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1795 break; 1796 default: 1797 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1798 ib_event->event); 1799 goto out; 1800 } 1801 1802 ret = id_priv->id.event_handler(&id_priv->id, &event); 1803 if (ret) { 1804 /* Destroy the CM ID by returning a non-zero value. */ 1805 id_priv->cm_id.ib = NULL; 1806 cma_exch(id_priv, RDMA_CM_DESTROYING); 1807 mutex_unlock(&id_priv->handler_mutex); 1808 rdma_destroy_id(&id_priv->id); 1809 return ret; 1810 } 1811 out: 1812 mutex_unlock(&id_priv->handler_mutex); 1813 return ret; 1814 } 1815 1816 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1817 struct ib_cm_event *ib_event, 1818 struct net_device *net_dev) 1819 { 1820 struct rdma_id_private *id_priv; 1821 struct rdma_cm_id *id; 1822 struct rdma_route *rt; 1823 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1824 struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; 1825 const __be64 service_id = 1826 ib_event->param.req_rcvd.primary_path->service_id; 1827 int ret; 1828 1829 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 1830 listen_id->event_handler, listen_id->context, 1831 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1832 if (IS_ERR(id)) 1833 return NULL; 1834 1835 id_priv = container_of(id, struct rdma_id_private, id); 1836 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1837 (struct sockaddr *)&id->route.addr.dst_addr, 1838 listen_id, ib_event, ss_family, service_id)) 1839 goto err; 1840 1841 rt = &id->route; 1842 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1843 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1844 GFP_KERNEL); 1845 if (!rt->path_rec) 1846 goto err; 1847 1848 rt->path_rec[0] = *path; 1849 if (rt->num_paths == 2) 1850 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1851 1852 if (net_dev) { 1853 rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1854 } else { 1855 if (!cma_protocol_roce(listen_id) && 1856 cma_any_addr(cma_src_addr(id_priv))) { 1857 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1858 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1859 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1860 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1861 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1862 if (ret) 1863 goto err; 1864 } 1865 } 1866 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1867 1868 id_priv->state = RDMA_CM_CONNECT; 1869 return id_priv; 1870 1871 err: 1872 rdma_destroy_id(id); 1873 return NULL; 1874 } 1875 1876 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1877 struct ib_cm_event *ib_event, 1878 struct net_device *net_dev) 1879 { 1880 struct rdma_id_private *id_priv; 1881 struct rdma_cm_id *id; 1882 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1883 struct net *net = listen_id->route.addr.dev_addr.net; 1884 int ret; 1885 1886 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 1887 listen_id->ps, IB_QPT_UD); 1888 if (IS_ERR(id)) 1889 return NULL; 1890 1891 id_priv = container_of(id, struct rdma_id_private, id); 1892 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1893 (struct sockaddr *)&id->route.addr.dst_addr, 1894 listen_id, ib_event, ss_family, 1895 ib_event->param.sidr_req_rcvd.service_id)) 1896 goto err; 1897 1898 if (net_dev) { 1899 rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1900 } else { 1901 if (!cma_any_addr(cma_src_addr(id_priv))) { 1902 ret = cma_translate_addr(cma_src_addr(id_priv), 1903 &id->route.addr.dev_addr); 1904 if (ret) 1905 goto err; 1906 } 1907 } 1908 1909 id_priv->state = RDMA_CM_CONNECT; 1910 return id_priv; 1911 err: 1912 rdma_destroy_id(id); 1913 return NULL; 1914 } 1915 1916 static void cma_set_req_event_data(struct rdma_cm_event *event, 1917 struct ib_cm_req_event_param *req_data, 1918 void *private_data, int offset) 1919 { 1920 event->param.conn.private_data = private_data + offset; 1921 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1922 event->param.conn.responder_resources = req_data->responder_resources; 1923 event->param.conn.initiator_depth = req_data->initiator_depth; 1924 event->param.conn.flow_control = req_data->flow_control; 1925 event->param.conn.retry_count = req_data->retry_count; 1926 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1927 event->param.conn.srq = req_data->srq; 1928 event->param.conn.qp_num = req_data->remote_qpn; 1929 } 1930 1931 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1932 { 1933 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1934 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1935 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1936 (id->qp_type == IB_QPT_UD)) || 1937 (!id->qp_type)); 1938 } 1939 1940 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1941 { 1942 struct rdma_id_private *listen_id, *conn_id = NULL; 1943 struct rdma_cm_event event; 1944 struct net_device *net_dev; 1945 u8 offset; 1946 int ret; 1947 1948 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1949 if (IS_ERR(listen_id)) 1950 return PTR_ERR(listen_id); 1951 1952 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1953 ret = -EINVAL; 1954 goto net_dev_put; 1955 } 1956 1957 mutex_lock(&listen_id->handler_mutex); 1958 if (listen_id->state != RDMA_CM_LISTEN) { 1959 ret = -ECONNABORTED; 1960 goto err1; 1961 } 1962 1963 memset(&event, 0, sizeof event); 1964 offset = cma_user_data_offset(listen_id); 1965 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1966 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1967 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1968 event.param.ud.private_data = ib_event->private_data + offset; 1969 event.param.ud.private_data_len = 1970 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1971 } else { 1972 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1973 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1974 ib_event->private_data, offset); 1975 } 1976 if (!conn_id) { 1977 ret = -ENOMEM; 1978 goto err1; 1979 } 1980 1981 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1982 ret = cma_acquire_dev(conn_id, listen_id); 1983 if (ret) 1984 goto err2; 1985 1986 conn_id->cm_id.ib = cm_id; 1987 cm_id->context = conn_id; 1988 cm_id->cm_handler = cma_ib_handler; 1989 1990 /* 1991 * Protect against the user destroying conn_id from another thread 1992 * until we're done accessing it. 1993 */ 1994 atomic_inc(&conn_id->refcount); 1995 ret = conn_id->id.event_handler(&conn_id->id, &event); 1996 if (ret) 1997 goto err3; 1998 /* 1999 * Acquire mutex to prevent user executing rdma_destroy_id() 2000 * while we're accessing the cm_id. 2001 */ 2002 mutex_lock(&lock); 2003 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2004 (conn_id->id.qp_type != IB_QPT_UD)) 2005 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2006 mutex_unlock(&lock); 2007 mutex_unlock(&conn_id->handler_mutex); 2008 mutex_unlock(&listen_id->handler_mutex); 2009 cma_deref_id(conn_id); 2010 if (net_dev) 2011 dev_put(net_dev); 2012 return 0; 2013 2014 err3: 2015 cma_deref_id(conn_id); 2016 /* Destroy the CM ID by returning a non-zero value. */ 2017 conn_id->cm_id.ib = NULL; 2018 err2: 2019 cma_exch(conn_id, RDMA_CM_DESTROYING); 2020 mutex_unlock(&conn_id->handler_mutex); 2021 err1: 2022 mutex_unlock(&listen_id->handler_mutex); 2023 if (conn_id) 2024 rdma_destroy_id(&conn_id->id); 2025 2026 net_dev_put: 2027 if (net_dev) 2028 dev_put(net_dev); 2029 2030 return ret; 2031 } 2032 2033 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2034 { 2035 if (addr->sa_family == AF_IB) 2036 return ((struct sockaddr_ib *) addr)->sib_sid; 2037 2038 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2039 } 2040 EXPORT_SYMBOL(rdma_get_service_id); 2041 2042 void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, 2043 union ib_gid *dgid) 2044 { 2045 struct rdma_addr *addr = &cm_id->route.addr; 2046 2047 if (!cm_id->device) { 2048 if (sgid) 2049 memset(sgid, 0, sizeof(*sgid)); 2050 if (dgid) 2051 memset(dgid, 0, sizeof(*dgid)); 2052 return; 2053 } 2054 2055 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) { 2056 if (sgid) 2057 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid); 2058 if (dgid) 2059 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid); 2060 } else { 2061 if (sgid) 2062 rdma_addr_get_sgid(&addr->dev_addr, sgid); 2063 if (dgid) 2064 rdma_addr_get_dgid(&addr->dev_addr, dgid); 2065 } 2066 } 2067 EXPORT_SYMBOL(rdma_read_gids); 2068 2069 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2070 { 2071 struct rdma_id_private *id_priv = iw_id->context; 2072 struct rdma_cm_event event; 2073 int ret = 0; 2074 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2075 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2076 2077 mutex_lock(&id_priv->handler_mutex); 2078 if (id_priv->state != RDMA_CM_CONNECT) 2079 goto out; 2080 2081 memset(&event, 0, sizeof event); 2082 switch (iw_event->event) { 2083 case IW_CM_EVENT_CLOSE: 2084 event.event = RDMA_CM_EVENT_DISCONNECTED; 2085 break; 2086 case IW_CM_EVENT_CONNECT_REPLY: 2087 memcpy(cma_src_addr(id_priv), laddr, 2088 rdma_addr_size(laddr)); 2089 memcpy(cma_dst_addr(id_priv), raddr, 2090 rdma_addr_size(raddr)); 2091 switch (iw_event->status) { 2092 case 0: 2093 event.event = RDMA_CM_EVENT_ESTABLISHED; 2094 event.param.conn.initiator_depth = iw_event->ird; 2095 event.param.conn.responder_resources = iw_event->ord; 2096 break; 2097 case -ECONNRESET: 2098 case -ECONNREFUSED: 2099 event.event = RDMA_CM_EVENT_REJECTED; 2100 break; 2101 case -ETIMEDOUT: 2102 event.event = RDMA_CM_EVENT_UNREACHABLE; 2103 break; 2104 default: 2105 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2106 break; 2107 } 2108 break; 2109 case IW_CM_EVENT_ESTABLISHED: 2110 event.event = RDMA_CM_EVENT_ESTABLISHED; 2111 event.param.conn.initiator_depth = iw_event->ird; 2112 event.param.conn.responder_resources = iw_event->ord; 2113 break; 2114 default: 2115 BUG_ON(1); 2116 } 2117 2118 event.status = iw_event->status; 2119 event.param.conn.private_data = iw_event->private_data; 2120 event.param.conn.private_data_len = iw_event->private_data_len; 2121 ret = id_priv->id.event_handler(&id_priv->id, &event); 2122 if (ret) { 2123 /* Destroy the CM ID by returning a non-zero value. */ 2124 id_priv->cm_id.iw = NULL; 2125 cma_exch(id_priv, RDMA_CM_DESTROYING); 2126 mutex_unlock(&id_priv->handler_mutex); 2127 rdma_destroy_id(&id_priv->id); 2128 return ret; 2129 } 2130 2131 out: 2132 mutex_unlock(&id_priv->handler_mutex); 2133 return ret; 2134 } 2135 2136 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2137 struct iw_cm_event *iw_event) 2138 { 2139 struct rdma_cm_id *new_cm_id; 2140 struct rdma_id_private *listen_id, *conn_id; 2141 struct rdma_cm_event event; 2142 int ret = -ECONNABORTED; 2143 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2144 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2145 2146 listen_id = cm_id->context; 2147 2148 mutex_lock(&listen_id->handler_mutex); 2149 if (listen_id->state != RDMA_CM_LISTEN) 2150 goto out; 2151 2152 /* Create a new RDMA id for the new IW CM ID */ 2153 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2154 listen_id->id.event_handler, 2155 listen_id->id.context, 2156 RDMA_PS_TCP, IB_QPT_RC); 2157 if (IS_ERR(new_cm_id)) { 2158 ret = -ENOMEM; 2159 goto out; 2160 } 2161 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2162 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2163 conn_id->state = RDMA_CM_CONNECT; 2164 2165 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2166 if (ret) { 2167 mutex_unlock(&conn_id->handler_mutex); 2168 rdma_destroy_id(new_cm_id); 2169 goto out; 2170 } 2171 2172 ret = cma_acquire_dev(conn_id, listen_id); 2173 if (ret) { 2174 mutex_unlock(&conn_id->handler_mutex); 2175 rdma_destroy_id(new_cm_id); 2176 goto out; 2177 } 2178 2179 conn_id->cm_id.iw = cm_id; 2180 cm_id->context = conn_id; 2181 cm_id->cm_handler = cma_iw_handler; 2182 2183 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2184 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2185 2186 memset(&event, 0, sizeof event); 2187 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2188 event.param.conn.private_data = iw_event->private_data; 2189 event.param.conn.private_data_len = iw_event->private_data_len; 2190 event.param.conn.initiator_depth = iw_event->ird; 2191 event.param.conn.responder_resources = iw_event->ord; 2192 2193 /* 2194 * Protect against the user destroying conn_id from another thread 2195 * until we're done accessing it. 2196 */ 2197 atomic_inc(&conn_id->refcount); 2198 ret = conn_id->id.event_handler(&conn_id->id, &event); 2199 if (ret) { 2200 /* User wants to destroy the CM ID */ 2201 conn_id->cm_id.iw = NULL; 2202 cma_exch(conn_id, RDMA_CM_DESTROYING); 2203 mutex_unlock(&conn_id->handler_mutex); 2204 cma_deref_id(conn_id); 2205 rdma_destroy_id(&conn_id->id); 2206 goto out; 2207 } 2208 2209 mutex_unlock(&conn_id->handler_mutex); 2210 cma_deref_id(conn_id); 2211 2212 out: 2213 mutex_unlock(&listen_id->handler_mutex); 2214 return ret; 2215 } 2216 2217 static int cma_ib_listen(struct rdma_id_private *id_priv) 2218 { 2219 struct sockaddr *addr; 2220 struct ib_cm_id *id; 2221 __be64 svc_id; 2222 2223 addr = cma_src_addr(id_priv); 2224 svc_id = rdma_get_service_id(&id_priv->id, addr); 2225 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2226 if (IS_ERR(id)) 2227 return PTR_ERR(id); 2228 id_priv->cm_id.ib = id; 2229 2230 return 0; 2231 } 2232 2233 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2234 { 2235 int ret; 2236 struct iw_cm_id *id; 2237 2238 id = iw_create_cm_id(id_priv->id.device, 2239 iw_conn_req_handler, 2240 id_priv); 2241 if (IS_ERR(id)) 2242 return PTR_ERR(id); 2243 2244 id->tos = id_priv->tos; 2245 id_priv->cm_id.iw = id; 2246 2247 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2248 rdma_addr_size(cma_src_addr(id_priv))); 2249 2250 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2251 2252 if (ret) { 2253 iw_destroy_cm_id(id_priv->cm_id.iw); 2254 id_priv->cm_id.iw = NULL; 2255 } 2256 2257 return ret; 2258 } 2259 2260 static int cma_listen_handler(struct rdma_cm_id *id, 2261 struct rdma_cm_event *event) 2262 { 2263 struct rdma_id_private *id_priv = id->context; 2264 2265 id->context = id_priv->id.context; 2266 id->event_handler = id_priv->id.event_handler; 2267 return id_priv->id.event_handler(id, event); 2268 } 2269 2270 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2271 struct cma_device *cma_dev) 2272 { 2273 struct rdma_id_private *dev_id_priv; 2274 struct rdma_cm_id *id; 2275 struct net *net = id_priv->id.route.addr.dev_addr.net; 2276 int ret; 2277 2278 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2279 return; 2280 2281 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2282 id_priv->id.qp_type); 2283 if (IS_ERR(id)) 2284 return; 2285 2286 dev_id_priv = container_of(id, struct rdma_id_private, id); 2287 2288 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2289 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2290 rdma_addr_size(cma_src_addr(id_priv))); 2291 2292 _cma_attach_to_dev(dev_id_priv, cma_dev); 2293 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2294 atomic_inc(&id_priv->refcount); 2295 dev_id_priv->internal_id = 1; 2296 dev_id_priv->afonly = id_priv->afonly; 2297 2298 ret = rdma_listen(id, id_priv->backlog); 2299 if (ret) 2300 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2301 ret, cma_dev->device->name); 2302 } 2303 2304 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2305 { 2306 struct cma_device *cma_dev; 2307 2308 mutex_lock(&lock); 2309 list_add_tail(&id_priv->list, &listen_any_list); 2310 list_for_each_entry(cma_dev, &dev_list, list) 2311 cma_listen_on_dev(id_priv, cma_dev); 2312 mutex_unlock(&lock); 2313 } 2314 2315 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2316 { 2317 struct rdma_id_private *id_priv; 2318 2319 id_priv = container_of(id, struct rdma_id_private, id); 2320 id_priv->tos = (u8) tos; 2321 id_priv->tos_set = true; 2322 } 2323 EXPORT_SYMBOL(rdma_set_service_type); 2324 2325 static void cma_query_handler(int status, struct sa_path_rec *path_rec, 2326 void *context) 2327 { 2328 struct cma_work *work = context; 2329 struct rdma_route *route; 2330 2331 route = &work->id->id.route; 2332 2333 if (!status) { 2334 route->num_paths = 1; 2335 *route->path_rec = *path_rec; 2336 } else { 2337 work->old_state = RDMA_CM_ROUTE_QUERY; 2338 work->new_state = RDMA_CM_ADDR_RESOLVED; 2339 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2340 work->event.status = status; 2341 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", 2342 status); 2343 } 2344 2345 queue_work(cma_wq, &work->work); 2346 } 2347 2348 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2349 struct cma_work *work) 2350 { 2351 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2352 struct sa_path_rec path_rec; 2353 ib_sa_comp_mask comp_mask; 2354 struct sockaddr_in6 *sin6; 2355 struct sockaddr_ib *sib; 2356 2357 memset(&path_rec, 0, sizeof path_rec); 2358 2359 if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num)) 2360 path_rec.rec_type = SA_PATH_REC_TYPE_OPA; 2361 else 2362 path_rec.rec_type = SA_PATH_REC_TYPE_IB; 2363 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2364 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2365 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2366 path_rec.numb_path = 1; 2367 path_rec.reversible = 1; 2368 path_rec.service_id = rdma_get_service_id(&id_priv->id, 2369 cma_dst_addr(id_priv)); 2370 2371 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2372 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2373 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2374 2375 switch (cma_family(id_priv)) { 2376 case AF_INET: 2377 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2378 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2379 break; 2380 case AF_INET6: 2381 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2382 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2383 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2384 break; 2385 case AF_IB: 2386 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2387 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2388 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2389 break; 2390 } 2391 2392 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2393 id_priv->id.port_num, &path_rec, 2394 comp_mask, timeout_ms, 2395 GFP_KERNEL, cma_query_handler, 2396 work, &id_priv->query); 2397 2398 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2399 } 2400 2401 static void cma_work_handler(struct work_struct *_work) 2402 { 2403 struct cma_work *work = container_of(_work, struct cma_work, work); 2404 struct rdma_id_private *id_priv = work->id; 2405 int destroy = 0; 2406 2407 mutex_lock(&id_priv->handler_mutex); 2408 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2409 goto out; 2410 2411 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2412 cma_exch(id_priv, RDMA_CM_DESTROYING); 2413 destroy = 1; 2414 } 2415 out: 2416 mutex_unlock(&id_priv->handler_mutex); 2417 cma_deref_id(id_priv); 2418 if (destroy) 2419 rdma_destroy_id(&id_priv->id); 2420 kfree(work); 2421 } 2422 2423 static void cma_ndev_work_handler(struct work_struct *_work) 2424 { 2425 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 2426 struct rdma_id_private *id_priv = work->id; 2427 int destroy = 0; 2428 2429 mutex_lock(&id_priv->handler_mutex); 2430 if (id_priv->state == RDMA_CM_DESTROYING || 2431 id_priv->state == RDMA_CM_DEVICE_REMOVAL) 2432 goto out; 2433 2434 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2435 cma_exch(id_priv, RDMA_CM_DESTROYING); 2436 destroy = 1; 2437 } 2438 2439 out: 2440 mutex_unlock(&id_priv->handler_mutex); 2441 cma_deref_id(id_priv); 2442 if (destroy) 2443 rdma_destroy_id(&id_priv->id); 2444 kfree(work); 2445 } 2446 2447 static void cma_init_resolve_route_work(struct cma_work *work, 2448 struct rdma_id_private *id_priv) 2449 { 2450 work->id = id_priv; 2451 INIT_WORK(&work->work, cma_work_handler); 2452 work->old_state = RDMA_CM_ROUTE_QUERY; 2453 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2454 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2455 } 2456 2457 static void cma_init_resolve_addr_work(struct cma_work *work, 2458 struct rdma_id_private *id_priv) 2459 { 2460 work->id = id_priv; 2461 INIT_WORK(&work->work, cma_work_handler); 2462 work->old_state = RDMA_CM_ADDR_QUERY; 2463 work->new_state = RDMA_CM_ADDR_RESOLVED; 2464 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2465 } 2466 2467 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2468 { 2469 struct rdma_route *route = &id_priv->id.route; 2470 struct cma_work *work; 2471 int ret; 2472 2473 work = kzalloc(sizeof *work, GFP_KERNEL); 2474 if (!work) 2475 return -ENOMEM; 2476 2477 cma_init_resolve_route_work(work, id_priv); 2478 2479 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2480 if (!route->path_rec) { 2481 ret = -ENOMEM; 2482 goto err1; 2483 } 2484 2485 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2486 if (ret) 2487 goto err2; 2488 2489 return 0; 2490 err2: 2491 kfree(route->path_rec); 2492 route->path_rec = NULL; 2493 err1: 2494 kfree(work); 2495 return ret; 2496 } 2497 2498 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2499 unsigned long supported_gids, 2500 enum ib_gid_type default_gid) 2501 { 2502 if ((network_type == RDMA_NETWORK_IPV4 || 2503 network_type == RDMA_NETWORK_IPV6) && 2504 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2505 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2506 2507 return default_gid; 2508 } 2509 2510 /* 2511 * cma_iboe_set_path_rec_l2_fields() is helper function which sets 2512 * path record type based on GID type. 2513 * It also sets up other L2 fields which includes destination mac address 2514 * netdev ifindex, of the path record. 2515 * It returns the netdev of the bound interface for this path record entry. 2516 */ 2517 static struct net_device * 2518 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) 2519 { 2520 struct rdma_route *route = &id_priv->id.route; 2521 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; 2522 struct rdma_addr *addr = &route->addr; 2523 unsigned long supported_gids; 2524 struct net_device *ndev; 2525 2526 if (!addr->dev_addr.bound_dev_if) 2527 return NULL; 2528 2529 ndev = dev_get_by_index(addr->dev_addr.net, 2530 addr->dev_addr.bound_dev_if); 2531 if (!ndev) 2532 return NULL; 2533 2534 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2535 id_priv->id.port_num); 2536 gid_type = cma_route_gid_type(addr->dev_addr.network, 2537 supported_gids, 2538 id_priv->gid_type); 2539 /* Use the hint from IP Stack to select GID Type */ 2540 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2541 gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2542 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); 2543 2544 sa_path_set_ndev(route->path_rec, addr->dev_addr.net); 2545 sa_path_set_ifindex(route->path_rec, ndev->ifindex); 2546 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); 2547 return ndev; 2548 } 2549 2550 int rdma_set_ib_path(struct rdma_cm_id *id, 2551 struct sa_path_rec *path_rec) 2552 { 2553 struct rdma_id_private *id_priv; 2554 struct net_device *ndev; 2555 int ret; 2556 2557 id_priv = container_of(id, struct rdma_id_private, id); 2558 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2559 RDMA_CM_ROUTE_RESOLVED)) 2560 return -EINVAL; 2561 2562 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec), 2563 GFP_KERNEL); 2564 if (!id->route.path_rec) { 2565 ret = -ENOMEM; 2566 goto err; 2567 } 2568 2569 if (rdma_protocol_roce(id->device, id->port_num)) { 2570 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2571 if (!ndev) { 2572 ret = -ENODEV; 2573 goto err_free; 2574 } 2575 dev_put(ndev); 2576 } 2577 2578 id->route.num_paths = 1; 2579 return 0; 2580 2581 err_free: 2582 kfree(id->route.path_rec); 2583 id->route.path_rec = NULL; 2584 err: 2585 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2586 return ret; 2587 } 2588 EXPORT_SYMBOL(rdma_set_ib_path); 2589 2590 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2591 { 2592 struct cma_work *work; 2593 2594 work = kzalloc(sizeof *work, GFP_KERNEL); 2595 if (!work) 2596 return -ENOMEM; 2597 2598 cma_init_resolve_route_work(work, id_priv); 2599 queue_work(cma_wq, &work->work); 2600 return 0; 2601 } 2602 2603 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2604 { 2605 int prio; 2606 struct net_device *dev; 2607 2608 prio = rt_tos2priority(tos); 2609 dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; 2610 if (dev->num_tc) 2611 return netdev_get_prio_tc_map(dev, prio); 2612 2613 #if IS_ENABLED(CONFIG_VLAN_8021Q) 2614 if (is_vlan_dev(ndev)) 2615 return (vlan_dev_get_egress_qos_mask(ndev, prio) & 2616 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 2617 #endif 2618 return 0; 2619 } 2620 2621 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2622 { 2623 struct rdma_route *route = &id_priv->id.route; 2624 struct rdma_addr *addr = &route->addr; 2625 struct cma_work *work; 2626 int ret; 2627 struct net_device *ndev; 2628 2629 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 2630 rdma_start_port(id_priv->cma_dev->device)]; 2631 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 2632 2633 2634 work = kzalloc(sizeof *work, GFP_KERNEL); 2635 if (!work) 2636 return -ENOMEM; 2637 2638 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2639 if (!route->path_rec) { 2640 ret = -ENOMEM; 2641 goto err1; 2642 } 2643 2644 route->num_paths = 1; 2645 2646 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2647 if (!ndev) { 2648 ret = -ENODEV; 2649 goto err2; 2650 } 2651 2652 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2653 &route->path_rec->sgid); 2654 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2655 &route->path_rec->dgid); 2656 2657 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2658 /* TODO: get the hoplimit from the inet/inet6 device */ 2659 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2660 else 2661 route->path_rec->hop_limit = 1; 2662 route->path_rec->reversible = 1; 2663 route->path_rec->pkey = cpu_to_be16(0xffff); 2664 route->path_rec->mtu_selector = IB_SA_EQ; 2665 route->path_rec->sl = iboe_tos_to_sl(ndev, tos); 2666 route->path_rec->traffic_class = tos; 2667 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 2668 route->path_rec->rate_selector = IB_SA_EQ; 2669 route->path_rec->rate = iboe_get_rate(ndev); 2670 dev_put(ndev); 2671 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2672 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2673 if (!route->path_rec->mtu) { 2674 ret = -EINVAL; 2675 goto err2; 2676 } 2677 2678 cma_init_resolve_route_work(work, id_priv); 2679 queue_work(cma_wq, &work->work); 2680 2681 return 0; 2682 2683 err2: 2684 kfree(route->path_rec); 2685 route->path_rec = NULL; 2686 err1: 2687 kfree(work); 2688 return ret; 2689 } 2690 2691 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2692 { 2693 struct rdma_id_private *id_priv; 2694 int ret; 2695 2696 id_priv = container_of(id, struct rdma_id_private, id); 2697 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2698 return -EINVAL; 2699 2700 atomic_inc(&id_priv->refcount); 2701 if (rdma_cap_ib_sa(id->device, id->port_num)) 2702 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2703 else if (rdma_protocol_roce(id->device, id->port_num)) 2704 ret = cma_resolve_iboe_route(id_priv); 2705 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2706 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2707 else 2708 ret = -ENOSYS; 2709 2710 if (ret) 2711 goto err; 2712 2713 return 0; 2714 err: 2715 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2716 cma_deref_id(id_priv); 2717 return ret; 2718 } 2719 EXPORT_SYMBOL(rdma_resolve_route); 2720 2721 static void cma_set_loopback(struct sockaddr *addr) 2722 { 2723 switch (addr->sa_family) { 2724 case AF_INET: 2725 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2726 break; 2727 case AF_INET6: 2728 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2729 0, 0, 0, htonl(1)); 2730 break; 2731 default: 2732 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2733 0, 0, 0, htonl(1)); 2734 break; 2735 } 2736 } 2737 2738 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2739 { 2740 struct cma_device *cma_dev, *cur_dev; 2741 union ib_gid gid; 2742 enum ib_port_state port_state; 2743 u16 pkey; 2744 int ret; 2745 u8 p; 2746 2747 cma_dev = NULL; 2748 mutex_lock(&lock); 2749 list_for_each_entry(cur_dev, &dev_list, list) { 2750 if (cma_family(id_priv) == AF_IB && 2751 !rdma_cap_ib_cm(cur_dev->device, 1)) 2752 continue; 2753 2754 if (!cma_dev) 2755 cma_dev = cur_dev; 2756 2757 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2758 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && 2759 port_state == IB_PORT_ACTIVE) { 2760 cma_dev = cur_dev; 2761 goto port_found; 2762 } 2763 } 2764 } 2765 2766 if (!cma_dev) { 2767 ret = -ENODEV; 2768 goto out; 2769 } 2770 2771 p = 1; 2772 2773 port_found: 2774 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2775 if (ret) 2776 goto out; 2777 2778 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2779 if (ret) 2780 goto out; 2781 2782 id_priv->id.route.addr.dev_addr.dev_type = 2783 (rdma_protocol_ib(cma_dev->device, p)) ? 2784 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2785 2786 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2787 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2788 id_priv->id.port_num = p; 2789 cma_attach_to_dev(id_priv, cma_dev); 2790 cma_set_loopback(cma_src_addr(id_priv)); 2791 out: 2792 mutex_unlock(&lock); 2793 return ret; 2794 } 2795 2796 static void addr_handler(int status, struct sockaddr *src_addr, 2797 struct rdma_dev_addr *dev_addr, void *context) 2798 { 2799 struct rdma_id_private *id_priv = context; 2800 struct rdma_cm_event event; 2801 2802 memset(&event, 0, sizeof event); 2803 mutex_lock(&id_priv->handler_mutex); 2804 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2805 RDMA_CM_ADDR_RESOLVED)) 2806 goto out; 2807 2808 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2809 if (!status && !id_priv->cma_dev) { 2810 status = cma_acquire_dev(id_priv, NULL); 2811 if (status) 2812 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2813 status); 2814 } else { 2815 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); 2816 } 2817 2818 if (status) { 2819 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2820 RDMA_CM_ADDR_BOUND)) 2821 goto out; 2822 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2823 event.status = status; 2824 } else 2825 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2826 2827 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2828 cma_exch(id_priv, RDMA_CM_DESTROYING); 2829 mutex_unlock(&id_priv->handler_mutex); 2830 cma_deref_id(id_priv); 2831 rdma_destroy_id(&id_priv->id); 2832 return; 2833 } 2834 out: 2835 mutex_unlock(&id_priv->handler_mutex); 2836 cma_deref_id(id_priv); 2837 } 2838 2839 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2840 { 2841 struct cma_work *work; 2842 union ib_gid gid; 2843 int ret; 2844 2845 work = kzalloc(sizeof *work, GFP_KERNEL); 2846 if (!work) 2847 return -ENOMEM; 2848 2849 if (!id_priv->cma_dev) { 2850 ret = cma_bind_loopback(id_priv); 2851 if (ret) 2852 goto err; 2853 } 2854 2855 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2856 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2857 2858 cma_init_resolve_addr_work(work, id_priv); 2859 queue_work(cma_wq, &work->work); 2860 return 0; 2861 err: 2862 kfree(work); 2863 return ret; 2864 } 2865 2866 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2867 { 2868 struct cma_work *work; 2869 int ret; 2870 2871 work = kzalloc(sizeof *work, GFP_KERNEL); 2872 if (!work) 2873 return -ENOMEM; 2874 2875 if (!id_priv->cma_dev) { 2876 ret = cma_resolve_ib_dev(id_priv); 2877 if (ret) 2878 goto err; 2879 } 2880 2881 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2882 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2883 2884 cma_init_resolve_addr_work(work, id_priv); 2885 queue_work(cma_wq, &work->work); 2886 return 0; 2887 err: 2888 kfree(work); 2889 return ret; 2890 } 2891 2892 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2893 struct sockaddr *dst_addr) 2894 { 2895 if (!src_addr || !src_addr->sa_family) { 2896 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2897 src_addr->sa_family = dst_addr->sa_family; 2898 if (IS_ENABLED(CONFIG_IPV6) && 2899 dst_addr->sa_family == AF_INET6) { 2900 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2901 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2902 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2903 if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 2904 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2905 } else if (dst_addr->sa_family == AF_IB) { 2906 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2907 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2908 } 2909 } 2910 return rdma_bind_addr(id, src_addr); 2911 } 2912 2913 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2914 struct sockaddr *dst_addr, int timeout_ms) 2915 { 2916 struct rdma_id_private *id_priv; 2917 int ret; 2918 2919 id_priv = container_of(id, struct rdma_id_private, id); 2920 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2921 if (id_priv->state == RDMA_CM_IDLE) { 2922 ret = cma_bind_addr(id, src_addr, dst_addr); 2923 if (ret) { 2924 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2925 return ret; 2926 } 2927 } 2928 2929 if (cma_family(id_priv) != dst_addr->sa_family) { 2930 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2931 return -EINVAL; 2932 } 2933 2934 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 2935 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 2936 return -EINVAL; 2937 } 2938 2939 atomic_inc(&id_priv->refcount); 2940 if (cma_any_addr(dst_addr)) { 2941 ret = cma_resolve_loopback(id_priv); 2942 } else { 2943 if (dst_addr->sa_family == AF_IB) { 2944 ret = cma_resolve_ib_addr(id_priv); 2945 } else { 2946 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2947 dst_addr, &id->route.addr.dev_addr, 2948 timeout_ms, addr_handler, id_priv); 2949 } 2950 } 2951 if (ret) 2952 goto err; 2953 2954 return 0; 2955 err: 2956 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2957 cma_deref_id(id_priv); 2958 return ret; 2959 } 2960 EXPORT_SYMBOL(rdma_resolve_addr); 2961 2962 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2963 { 2964 struct rdma_id_private *id_priv; 2965 unsigned long flags; 2966 int ret; 2967 2968 id_priv = container_of(id, struct rdma_id_private, id); 2969 spin_lock_irqsave(&id_priv->lock, flags); 2970 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2971 id_priv->reuseaddr = reuse; 2972 ret = 0; 2973 } else { 2974 ret = -EINVAL; 2975 } 2976 spin_unlock_irqrestore(&id_priv->lock, flags); 2977 return ret; 2978 } 2979 EXPORT_SYMBOL(rdma_set_reuseaddr); 2980 2981 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2982 { 2983 struct rdma_id_private *id_priv; 2984 unsigned long flags; 2985 int ret; 2986 2987 id_priv = container_of(id, struct rdma_id_private, id); 2988 spin_lock_irqsave(&id_priv->lock, flags); 2989 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2990 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2991 id_priv->afonly = afonly; 2992 ret = 0; 2993 } else { 2994 ret = -EINVAL; 2995 } 2996 spin_unlock_irqrestore(&id_priv->lock, flags); 2997 return ret; 2998 } 2999 EXPORT_SYMBOL(rdma_set_afonly); 3000 3001 static void cma_bind_port(struct rdma_bind_list *bind_list, 3002 struct rdma_id_private *id_priv) 3003 { 3004 struct sockaddr *addr; 3005 struct sockaddr_ib *sib; 3006 u64 sid, mask; 3007 __be16 port; 3008 3009 addr = cma_src_addr(id_priv); 3010 port = htons(bind_list->port); 3011 3012 switch (addr->sa_family) { 3013 case AF_INET: 3014 ((struct sockaddr_in *) addr)->sin_port = port; 3015 break; 3016 case AF_INET6: 3017 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3018 break; 3019 case AF_IB: 3020 sib = (struct sockaddr_ib *) addr; 3021 sid = be64_to_cpu(sib->sib_sid); 3022 mask = be64_to_cpu(sib->sib_sid_mask); 3023 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3024 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3025 break; 3026 } 3027 id_priv->bind_list = bind_list; 3028 hlist_add_head(&id_priv->node, &bind_list->owners); 3029 } 3030 3031 static int cma_alloc_port(enum rdma_port_space ps, 3032 struct rdma_id_private *id_priv, unsigned short snum) 3033 { 3034 struct rdma_bind_list *bind_list; 3035 int ret; 3036 3037 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3038 if (!bind_list) 3039 return -ENOMEM; 3040 3041 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3042 snum); 3043 if (ret < 0) 3044 goto err; 3045 3046 bind_list->ps = ps; 3047 bind_list->port = (unsigned short)ret; 3048 cma_bind_port(bind_list, id_priv); 3049 return 0; 3050 err: 3051 kfree(bind_list); 3052 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3053 } 3054 3055 static int cma_port_is_unique(struct rdma_bind_list *bind_list, 3056 struct rdma_id_private *id_priv) 3057 { 3058 struct rdma_id_private *cur_id; 3059 struct sockaddr *daddr = cma_dst_addr(id_priv); 3060 struct sockaddr *saddr = cma_src_addr(id_priv); 3061 __be16 dport = cma_port(daddr); 3062 3063 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3064 struct sockaddr *cur_daddr = cma_dst_addr(cur_id); 3065 struct sockaddr *cur_saddr = cma_src_addr(cur_id); 3066 __be16 cur_dport = cma_port(cur_daddr); 3067 3068 if (id_priv == cur_id) 3069 continue; 3070 3071 /* different dest port -> unique */ 3072 if (!cma_any_port(daddr) && 3073 !cma_any_port(cur_daddr) && 3074 (dport != cur_dport)) 3075 continue; 3076 3077 /* different src address -> unique */ 3078 if (!cma_any_addr(saddr) && 3079 !cma_any_addr(cur_saddr) && 3080 cma_addr_cmp(saddr, cur_saddr)) 3081 continue; 3082 3083 /* different dst address -> unique */ 3084 if (!cma_any_addr(daddr) && 3085 !cma_any_addr(cur_daddr) && 3086 cma_addr_cmp(daddr, cur_daddr)) 3087 continue; 3088 3089 return -EADDRNOTAVAIL; 3090 } 3091 return 0; 3092 } 3093 3094 static int cma_alloc_any_port(enum rdma_port_space ps, 3095 struct rdma_id_private *id_priv) 3096 { 3097 static unsigned int last_used_port; 3098 int low, high, remaining; 3099 unsigned int rover; 3100 struct net *net = id_priv->id.route.addr.dev_addr.net; 3101 3102 inet_get_local_port_range(net, &low, &high); 3103 remaining = (high - low) + 1; 3104 rover = prandom_u32() % remaining + low; 3105 retry: 3106 if (last_used_port != rover) { 3107 struct rdma_bind_list *bind_list; 3108 int ret; 3109 3110 bind_list = cma_ps_find(net, ps, (unsigned short)rover); 3111 3112 if (!bind_list) { 3113 ret = cma_alloc_port(ps, id_priv, rover); 3114 } else { 3115 ret = cma_port_is_unique(bind_list, id_priv); 3116 if (!ret) 3117 cma_bind_port(bind_list, id_priv); 3118 } 3119 /* 3120 * Remember previously used port number in order to avoid 3121 * re-using same port immediately after it is closed. 3122 */ 3123 if (!ret) 3124 last_used_port = rover; 3125 if (ret != -EADDRNOTAVAIL) 3126 return ret; 3127 } 3128 if (--remaining) { 3129 rover++; 3130 if ((rover < low) || (rover > high)) 3131 rover = low; 3132 goto retry; 3133 } 3134 return -EADDRNOTAVAIL; 3135 } 3136 3137 /* 3138 * Check that the requested port is available. This is called when trying to 3139 * bind to a specific port, or when trying to listen on a bound port. In 3140 * the latter case, the provided id_priv may already be on the bind_list, but 3141 * we still need to check that it's okay to start listening. 3142 */ 3143 static int cma_check_port(struct rdma_bind_list *bind_list, 3144 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3145 { 3146 struct rdma_id_private *cur_id; 3147 struct sockaddr *addr, *cur_addr; 3148 3149 addr = cma_src_addr(id_priv); 3150 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3151 if (id_priv == cur_id) 3152 continue; 3153 3154 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3155 cur_id->reuseaddr) 3156 continue; 3157 3158 cur_addr = cma_src_addr(cur_id); 3159 if (id_priv->afonly && cur_id->afonly && 3160 (addr->sa_family != cur_addr->sa_family)) 3161 continue; 3162 3163 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3164 return -EADDRNOTAVAIL; 3165 3166 if (!cma_addr_cmp(addr, cur_addr)) 3167 return -EADDRINUSE; 3168 } 3169 return 0; 3170 } 3171 3172 static int cma_use_port(enum rdma_port_space ps, 3173 struct rdma_id_private *id_priv) 3174 { 3175 struct rdma_bind_list *bind_list; 3176 unsigned short snum; 3177 int ret; 3178 3179 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3180 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 3181 return -EACCES; 3182 3183 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3184 if (!bind_list) { 3185 ret = cma_alloc_port(ps, id_priv, snum); 3186 } else { 3187 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3188 if (!ret) 3189 cma_bind_port(bind_list, id_priv); 3190 } 3191 return ret; 3192 } 3193 3194 static int cma_bind_listen(struct rdma_id_private *id_priv) 3195 { 3196 struct rdma_bind_list *bind_list = id_priv->bind_list; 3197 int ret = 0; 3198 3199 mutex_lock(&lock); 3200 if (bind_list->owners.first->next) 3201 ret = cma_check_port(bind_list, id_priv, 0); 3202 mutex_unlock(&lock); 3203 return ret; 3204 } 3205 3206 static enum rdma_port_space cma_select_inet_ps( 3207 struct rdma_id_private *id_priv) 3208 { 3209 switch (id_priv->id.ps) { 3210 case RDMA_PS_TCP: 3211 case RDMA_PS_UDP: 3212 case RDMA_PS_IPOIB: 3213 case RDMA_PS_IB: 3214 return id_priv->id.ps; 3215 default: 3216 3217 return 0; 3218 } 3219 } 3220 3221 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3222 { 3223 enum rdma_port_space ps = 0; 3224 struct sockaddr_ib *sib; 3225 u64 sid_ps, mask, sid; 3226 3227 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3228 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3229 sid = be64_to_cpu(sib->sib_sid) & mask; 3230 3231 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3232 sid_ps = RDMA_IB_IP_PS_IB; 3233 ps = RDMA_PS_IB; 3234 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3235 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3236 sid_ps = RDMA_IB_IP_PS_TCP; 3237 ps = RDMA_PS_TCP; 3238 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3239 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3240 sid_ps = RDMA_IB_IP_PS_UDP; 3241 ps = RDMA_PS_UDP; 3242 } 3243 3244 if (ps) { 3245 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3246 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3247 be64_to_cpu(sib->sib_sid_mask)); 3248 } 3249 return ps; 3250 } 3251 3252 static int cma_get_port(struct rdma_id_private *id_priv) 3253 { 3254 enum rdma_port_space ps; 3255 int ret; 3256 3257 if (cma_family(id_priv) != AF_IB) 3258 ps = cma_select_inet_ps(id_priv); 3259 else 3260 ps = cma_select_ib_ps(id_priv); 3261 if (!ps) 3262 return -EPROTONOSUPPORT; 3263 3264 mutex_lock(&lock); 3265 if (cma_any_port(cma_src_addr(id_priv))) 3266 ret = cma_alloc_any_port(ps, id_priv); 3267 else 3268 ret = cma_use_port(ps, id_priv); 3269 mutex_unlock(&lock); 3270 3271 return ret; 3272 } 3273 3274 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3275 struct sockaddr *addr) 3276 { 3277 #if IS_ENABLED(CONFIG_IPV6) 3278 struct sockaddr_in6 *sin6; 3279 3280 if (addr->sa_family != AF_INET6) 3281 return 0; 3282 3283 sin6 = (struct sockaddr_in6 *) addr; 3284 3285 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 3286 return 0; 3287 3288 if (!sin6->sin6_scope_id) 3289 return -EINVAL; 3290 3291 dev_addr->bound_dev_if = sin6->sin6_scope_id; 3292 #endif 3293 return 0; 3294 } 3295 3296 int rdma_listen(struct rdma_cm_id *id, int backlog) 3297 { 3298 struct rdma_id_private *id_priv; 3299 int ret; 3300 3301 id_priv = container_of(id, struct rdma_id_private, id); 3302 if (id_priv->state == RDMA_CM_IDLE) { 3303 id->route.addr.src_addr.ss_family = AF_INET; 3304 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3305 if (ret) 3306 return ret; 3307 } 3308 3309 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3310 return -EINVAL; 3311 3312 if (id_priv->reuseaddr) { 3313 ret = cma_bind_listen(id_priv); 3314 if (ret) 3315 goto err; 3316 } 3317 3318 id_priv->backlog = backlog; 3319 if (id->device) { 3320 if (rdma_cap_ib_cm(id->device, 1)) { 3321 ret = cma_ib_listen(id_priv); 3322 if (ret) 3323 goto err; 3324 } else if (rdma_cap_iw_cm(id->device, 1)) { 3325 ret = cma_iw_listen(id_priv, backlog); 3326 if (ret) 3327 goto err; 3328 } else { 3329 ret = -ENOSYS; 3330 goto err; 3331 } 3332 } else 3333 cma_listen_on_all(id_priv); 3334 3335 return 0; 3336 err: 3337 id_priv->backlog = 0; 3338 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3339 return ret; 3340 } 3341 EXPORT_SYMBOL(rdma_listen); 3342 3343 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3344 { 3345 struct rdma_id_private *id_priv; 3346 int ret; 3347 struct sockaddr *daddr; 3348 3349 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3350 addr->sa_family != AF_IB) 3351 return -EAFNOSUPPORT; 3352 3353 id_priv = container_of(id, struct rdma_id_private, id); 3354 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3355 return -EINVAL; 3356 3357 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3358 if (ret) 3359 goto err1; 3360 3361 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3362 if (!cma_any_addr(addr)) { 3363 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3364 if (ret) 3365 goto err1; 3366 3367 ret = cma_acquire_dev(id_priv, NULL); 3368 if (ret) 3369 goto err1; 3370 } 3371 3372 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3373 if (addr->sa_family == AF_INET) 3374 id_priv->afonly = 1; 3375 #if IS_ENABLED(CONFIG_IPV6) 3376 else if (addr->sa_family == AF_INET6) { 3377 struct net *net = id_priv->id.route.addr.dev_addr.net; 3378 3379 id_priv->afonly = net->ipv6.sysctl.bindv6only; 3380 } 3381 #endif 3382 } 3383 daddr = cma_dst_addr(id_priv); 3384 daddr->sa_family = addr->sa_family; 3385 3386 ret = cma_get_port(id_priv); 3387 if (ret) 3388 goto err2; 3389 3390 return 0; 3391 err2: 3392 if (id_priv->cma_dev) 3393 cma_release_dev(id_priv); 3394 err1: 3395 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3396 return ret; 3397 } 3398 EXPORT_SYMBOL(rdma_bind_addr); 3399 3400 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3401 { 3402 struct cma_hdr *cma_hdr; 3403 3404 cma_hdr = hdr; 3405 cma_hdr->cma_version = CMA_VERSION; 3406 if (cma_family(id_priv) == AF_INET) { 3407 struct sockaddr_in *src4, *dst4; 3408 3409 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3410 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3411 3412 cma_set_ip_ver(cma_hdr, 4); 3413 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3414 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3415 cma_hdr->port = src4->sin_port; 3416 } else if (cma_family(id_priv) == AF_INET6) { 3417 struct sockaddr_in6 *src6, *dst6; 3418 3419 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3420 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3421 3422 cma_set_ip_ver(cma_hdr, 6); 3423 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3424 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3425 cma_hdr->port = src6->sin6_port; 3426 } 3427 return 0; 3428 } 3429 3430 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3431 struct ib_cm_event *ib_event) 3432 { 3433 struct rdma_id_private *id_priv = cm_id->context; 3434 struct rdma_cm_event event; 3435 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3436 int ret = 0; 3437 3438 mutex_lock(&id_priv->handler_mutex); 3439 if (id_priv->state != RDMA_CM_CONNECT) 3440 goto out; 3441 3442 memset(&event, 0, sizeof event); 3443 switch (ib_event->event) { 3444 case IB_CM_SIDR_REQ_ERROR: 3445 event.event = RDMA_CM_EVENT_UNREACHABLE; 3446 event.status = -ETIMEDOUT; 3447 break; 3448 case IB_CM_SIDR_REP_RECEIVED: 3449 event.param.ud.private_data = ib_event->private_data; 3450 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3451 if (rep->status != IB_SIDR_SUCCESS) { 3452 event.event = RDMA_CM_EVENT_UNREACHABLE; 3453 event.status = ib_event->param.sidr_rep_rcvd.status; 3454 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", 3455 event.status); 3456 break; 3457 } 3458 ret = cma_set_qkey(id_priv, rep->qkey); 3459 if (ret) { 3460 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); 3461 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3462 event.status = ret; 3463 break; 3464 } 3465 ib_init_ah_attr_from_path(id_priv->id.device, 3466 id_priv->id.port_num, 3467 id_priv->id.route.path_rec, 3468 &event.param.ud.ah_attr); 3469 event.param.ud.qp_num = rep->qpn; 3470 event.param.ud.qkey = rep->qkey; 3471 event.event = RDMA_CM_EVENT_ESTABLISHED; 3472 event.status = 0; 3473 break; 3474 default: 3475 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3476 ib_event->event); 3477 goto out; 3478 } 3479 3480 ret = id_priv->id.event_handler(&id_priv->id, &event); 3481 if (ret) { 3482 /* Destroy the CM ID by returning a non-zero value. */ 3483 id_priv->cm_id.ib = NULL; 3484 cma_exch(id_priv, RDMA_CM_DESTROYING); 3485 mutex_unlock(&id_priv->handler_mutex); 3486 rdma_destroy_id(&id_priv->id); 3487 return ret; 3488 } 3489 out: 3490 mutex_unlock(&id_priv->handler_mutex); 3491 return ret; 3492 } 3493 3494 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3495 struct rdma_conn_param *conn_param) 3496 { 3497 struct ib_cm_sidr_req_param req; 3498 struct ib_cm_id *id; 3499 void *private_data; 3500 u8 offset; 3501 int ret; 3502 3503 memset(&req, 0, sizeof req); 3504 offset = cma_user_data_offset(id_priv); 3505 req.private_data_len = offset + conn_param->private_data_len; 3506 if (req.private_data_len < conn_param->private_data_len) 3507 return -EINVAL; 3508 3509 if (req.private_data_len) { 3510 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3511 if (!private_data) 3512 return -ENOMEM; 3513 } else { 3514 private_data = NULL; 3515 } 3516 3517 if (conn_param->private_data && conn_param->private_data_len) 3518 memcpy(private_data + offset, conn_param->private_data, 3519 conn_param->private_data_len); 3520 3521 if (private_data) { 3522 ret = cma_format_hdr(private_data, id_priv); 3523 if (ret) 3524 goto out; 3525 req.private_data = private_data; 3526 } 3527 3528 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3529 id_priv); 3530 if (IS_ERR(id)) { 3531 ret = PTR_ERR(id); 3532 goto out; 3533 } 3534 id_priv->cm_id.ib = id; 3535 3536 req.path = id_priv->id.route.path_rec; 3537 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3538 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3539 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3540 3541 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3542 if (ret) { 3543 ib_destroy_cm_id(id_priv->cm_id.ib); 3544 id_priv->cm_id.ib = NULL; 3545 } 3546 out: 3547 kfree(private_data); 3548 return ret; 3549 } 3550 3551 static int cma_connect_ib(struct rdma_id_private *id_priv, 3552 struct rdma_conn_param *conn_param) 3553 { 3554 struct ib_cm_req_param req; 3555 struct rdma_route *route; 3556 void *private_data; 3557 struct ib_cm_id *id; 3558 u8 offset; 3559 int ret; 3560 3561 memset(&req, 0, sizeof req); 3562 offset = cma_user_data_offset(id_priv); 3563 req.private_data_len = offset + conn_param->private_data_len; 3564 if (req.private_data_len < conn_param->private_data_len) 3565 return -EINVAL; 3566 3567 if (req.private_data_len) { 3568 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3569 if (!private_data) 3570 return -ENOMEM; 3571 } else { 3572 private_data = NULL; 3573 } 3574 3575 if (conn_param->private_data && conn_param->private_data_len) 3576 memcpy(private_data + offset, conn_param->private_data, 3577 conn_param->private_data_len); 3578 3579 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3580 if (IS_ERR(id)) { 3581 ret = PTR_ERR(id); 3582 goto out; 3583 } 3584 id_priv->cm_id.ib = id; 3585 3586 route = &id_priv->id.route; 3587 if (private_data) { 3588 ret = cma_format_hdr(private_data, id_priv); 3589 if (ret) 3590 goto out; 3591 req.private_data = private_data; 3592 } 3593 3594 req.primary_path = &route->path_rec[0]; 3595 if (route->num_paths == 2) 3596 req.alternate_path = &route->path_rec[1]; 3597 3598 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3599 req.qp_num = id_priv->qp_num; 3600 req.qp_type = id_priv->id.qp_type; 3601 req.starting_psn = id_priv->seq_num; 3602 req.responder_resources = conn_param->responder_resources; 3603 req.initiator_depth = conn_param->initiator_depth; 3604 req.flow_control = conn_param->flow_control; 3605 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3606 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3607 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3608 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3609 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3610 req.srq = id_priv->srq ? 1 : 0; 3611 3612 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3613 out: 3614 if (ret && !IS_ERR(id)) { 3615 ib_destroy_cm_id(id); 3616 id_priv->cm_id.ib = NULL; 3617 } 3618 3619 kfree(private_data); 3620 return ret; 3621 } 3622 3623 static int cma_connect_iw(struct rdma_id_private *id_priv, 3624 struct rdma_conn_param *conn_param) 3625 { 3626 struct iw_cm_id *cm_id; 3627 int ret; 3628 struct iw_cm_conn_param iw_param; 3629 3630 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3631 if (IS_ERR(cm_id)) 3632 return PTR_ERR(cm_id); 3633 3634 cm_id->tos = id_priv->tos; 3635 id_priv->cm_id.iw = cm_id; 3636 3637 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3638 rdma_addr_size(cma_src_addr(id_priv))); 3639 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3640 rdma_addr_size(cma_dst_addr(id_priv))); 3641 3642 ret = cma_modify_qp_rtr(id_priv, conn_param); 3643 if (ret) 3644 goto out; 3645 3646 if (conn_param) { 3647 iw_param.ord = conn_param->initiator_depth; 3648 iw_param.ird = conn_param->responder_resources; 3649 iw_param.private_data = conn_param->private_data; 3650 iw_param.private_data_len = conn_param->private_data_len; 3651 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3652 } else { 3653 memset(&iw_param, 0, sizeof iw_param); 3654 iw_param.qpn = id_priv->qp_num; 3655 } 3656 ret = iw_cm_connect(cm_id, &iw_param); 3657 out: 3658 if (ret) { 3659 iw_destroy_cm_id(cm_id); 3660 id_priv->cm_id.iw = NULL; 3661 } 3662 return ret; 3663 } 3664 3665 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3666 { 3667 struct rdma_id_private *id_priv; 3668 int ret; 3669 3670 id_priv = container_of(id, struct rdma_id_private, id); 3671 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3672 return -EINVAL; 3673 3674 if (!id->qp) { 3675 id_priv->qp_num = conn_param->qp_num; 3676 id_priv->srq = conn_param->srq; 3677 } 3678 3679 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3680 if (id->qp_type == IB_QPT_UD) 3681 ret = cma_resolve_ib_udp(id_priv, conn_param); 3682 else 3683 ret = cma_connect_ib(id_priv, conn_param); 3684 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3685 ret = cma_connect_iw(id_priv, conn_param); 3686 else 3687 ret = -ENOSYS; 3688 if (ret) 3689 goto err; 3690 3691 return 0; 3692 err: 3693 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3694 return ret; 3695 } 3696 EXPORT_SYMBOL(rdma_connect); 3697 3698 static int cma_accept_ib(struct rdma_id_private *id_priv, 3699 struct rdma_conn_param *conn_param) 3700 { 3701 struct ib_cm_rep_param rep; 3702 int ret; 3703 3704 ret = cma_modify_qp_rtr(id_priv, conn_param); 3705 if (ret) 3706 goto out; 3707 3708 ret = cma_modify_qp_rts(id_priv, conn_param); 3709 if (ret) 3710 goto out; 3711 3712 memset(&rep, 0, sizeof rep); 3713 rep.qp_num = id_priv->qp_num; 3714 rep.starting_psn = id_priv->seq_num; 3715 rep.private_data = conn_param->private_data; 3716 rep.private_data_len = conn_param->private_data_len; 3717 rep.responder_resources = conn_param->responder_resources; 3718 rep.initiator_depth = conn_param->initiator_depth; 3719 rep.failover_accepted = 0; 3720 rep.flow_control = conn_param->flow_control; 3721 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3722 rep.srq = id_priv->srq ? 1 : 0; 3723 3724 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3725 out: 3726 return ret; 3727 } 3728 3729 static int cma_accept_iw(struct rdma_id_private *id_priv, 3730 struct rdma_conn_param *conn_param) 3731 { 3732 struct iw_cm_conn_param iw_param; 3733 int ret; 3734 3735 if (!conn_param) 3736 return -EINVAL; 3737 3738 ret = cma_modify_qp_rtr(id_priv, conn_param); 3739 if (ret) 3740 return ret; 3741 3742 iw_param.ord = conn_param->initiator_depth; 3743 iw_param.ird = conn_param->responder_resources; 3744 iw_param.private_data = conn_param->private_data; 3745 iw_param.private_data_len = conn_param->private_data_len; 3746 if (id_priv->id.qp) { 3747 iw_param.qpn = id_priv->qp_num; 3748 } else 3749 iw_param.qpn = conn_param->qp_num; 3750 3751 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3752 } 3753 3754 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3755 enum ib_cm_sidr_status status, u32 qkey, 3756 const void *private_data, int private_data_len) 3757 { 3758 struct ib_cm_sidr_rep_param rep; 3759 int ret; 3760 3761 memset(&rep, 0, sizeof rep); 3762 rep.status = status; 3763 if (status == IB_SIDR_SUCCESS) { 3764 ret = cma_set_qkey(id_priv, qkey); 3765 if (ret) 3766 return ret; 3767 rep.qp_num = id_priv->qp_num; 3768 rep.qkey = id_priv->qkey; 3769 } 3770 rep.private_data = private_data; 3771 rep.private_data_len = private_data_len; 3772 3773 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3774 } 3775 3776 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3777 { 3778 struct rdma_id_private *id_priv; 3779 int ret; 3780 3781 id_priv = container_of(id, struct rdma_id_private, id); 3782 3783 id_priv->owner = task_pid_nr(current); 3784 3785 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3786 return -EINVAL; 3787 3788 if (!id->qp && conn_param) { 3789 id_priv->qp_num = conn_param->qp_num; 3790 id_priv->srq = conn_param->srq; 3791 } 3792 3793 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3794 if (id->qp_type == IB_QPT_UD) { 3795 if (conn_param) 3796 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3797 conn_param->qkey, 3798 conn_param->private_data, 3799 conn_param->private_data_len); 3800 else 3801 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3802 0, NULL, 0); 3803 } else { 3804 if (conn_param) 3805 ret = cma_accept_ib(id_priv, conn_param); 3806 else 3807 ret = cma_rep_recv(id_priv); 3808 } 3809 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3810 ret = cma_accept_iw(id_priv, conn_param); 3811 else 3812 ret = -ENOSYS; 3813 3814 if (ret) 3815 goto reject; 3816 3817 return 0; 3818 reject: 3819 cma_modify_qp_err(id_priv); 3820 rdma_reject(id, NULL, 0); 3821 return ret; 3822 } 3823 EXPORT_SYMBOL(rdma_accept); 3824 3825 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3826 { 3827 struct rdma_id_private *id_priv; 3828 int ret; 3829 3830 id_priv = container_of(id, struct rdma_id_private, id); 3831 if (!id_priv->cm_id.ib) 3832 return -EINVAL; 3833 3834 switch (id->device->node_type) { 3835 case RDMA_NODE_IB_CA: 3836 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3837 break; 3838 default: 3839 ret = 0; 3840 break; 3841 } 3842 return ret; 3843 } 3844 EXPORT_SYMBOL(rdma_notify); 3845 3846 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3847 u8 private_data_len) 3848 { 3849 struct rdma_id_private *id_priv; 3850 int ret; 3851 3852 id_priv = container_of(id, struct rdma_id_private, id); 3853 if (!id_priv->cm_id.ib) 3854 return -EINVAL; 3855 3856 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3857 if (id->qp_type == IB_QPT_UD) 3858 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3859 private_data, private_data_len); 3860 else 3861 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3862 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3863 0, private_data, private_data_len); 3864 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3865 ret = iw_cm_reject(id_priv->cm_id.iw, 3866 private_data, private_data_len); 3867 } else 3868 ret = -ENOSYS; 3869 3870 return ret; 3871 } 3872 EXPORT_SYMBOL(rdma_reject); 3873 3874 int rdma_disconnect(struct rdma_cm_id *id) 3875 { 3876 struct rdma_id_private *id_priv; 3877 int ret; 3878 3879 id_priv = container_of(id, struct rdma_id_private, id); 3880 if (!id_priv->cm_id.ib) 3881 return -EINVAL; 3882 3883 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3884 ret = cma_modify_qp_err(id_priv); 3885 if (ret) 3886 goto out; 3887 /* Initiate or respond to a disconnect. */ 3888 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3889 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3890 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3891 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3892 } else 3893 ret = -EINVAL; 3894 3895 out: 3896 return ret; 3897 } 3898 EXPORT_SYMBOL(rdma_disconnect); 3899 3900 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3901 { 3902 struct rdma_id_private *id_priv; 3903 struct cma_multicast *mc = multicast->context; 3904 struct rdma_cm_event event; 3905 int ret = 0; 3906 3907 id_priv = mc->id_priv; 3908 mutex_lock(&id_priv->handler_mutex); 3909 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3910 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3911 goto out; 3912 3913 if (!status) 3914 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3915 else 3916 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", 3917 status); 3918 mutex_lock(&id_priv->qp_mutex); 3919 if (!status && id_priv->id.qp) { 3920 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3921 be16_to_cpu(multicast->rec.mlid)); 3922 if (status) 3923 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", 3924 status); 3925 } 3926 mutex_unlock(&id_priv->qp_mutex); 3927 3928 memset(&event, 0, sizeof event); 3929 event.status = status; 3930 event.param.ud.private_data = mc->context; 3931 if (!status) { 3932 struct rdma_dev_addr *dev_addr = 3933 &id_priv->id.route.addr.dev_addr; 3934 struct net_device *ndev = 3935 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3936 enum ib_gid_type gid_type = 3937 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3938 rdma_start_port(id_priv->cma_dev->device)]; 3939 3940 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3941 ib_init_ah_from_mcmember(id_priv->id.device, 3942 id_priv->id.port_num, &multicast->rec, 3943 ndev, gid_type, 3944 &event.param.ud.ah_attr); 3945 event.param.ud.qp_num = 0xFFFFFF; 3946 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3947 if (ndev) 3948 dev_put(ndev); 3949 } else 3950 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3951 3952 ret = id_priv->id.event_handler(&id_priv->id, &event); 3953 if (ret) { 3954 cma_exch(id_priv, RDMA_CM_DESTROYING); 3955 mutex_unlock(&id_priv->handler_mutex); 3956 rdma_destroy_id(&id_priv->id); 3957 return 0; 3958 } 3959 3960 out: 3961 mutex_unlock(&id_priv->handler_mutex); 3962 return 0; 3963 } 3964 3965 static void cma_set_mgid(struct rdma_id_private *id_priv, 3966 struct sockaddr *addr, union ib_gid *mgid) 3967 { 3968 unsigned char mc_map[MAX_ADDR_LEN]; 3969 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3970 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3971 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3972 3973 if (cma_any_addr(addr)) { 3974 memset(mgid, 0, sizeof *mgid); 3975 } else if ((addr->sa_family == AF_INET6) && 3976 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3977 0xFF10A01B)) { 3978 /* IPv6 address is an SA assigned MGID. */ 3979 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3980 } else if (addr->sa_family == AF_IB) { 3981 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3982 } else if ((addr->sa_family == AF_INET6)) { 3983 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3984 if (id_priv->id.ps == RDMA_PS_UDP) 3985 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3986 *mgid = *(union ib_gid *) (mc_map + 4); 3987 } else { 3988 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3989 if (id_priv->id.ps == RDMA_PS_UDP) 3990 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3991 *mgid = *(union ib_gid *) (mc_map + 4); 3992 } 3993 } 3994 3995 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3996 struct cma_multicast *mc) 3997 { 3998 struct ib_sa_mcmember_rec rec; 3999 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4000 ib_sa_comp_mask comp_mask; 4001 int ret; 4002 4003 ib_addr_get_mgid(dev_addr, &rec.mgid); 4004 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4005 &rec.mgid, &rec); 4006 if (ret) 4007 return ret; 4008 4009 ret = cma_set_qkey(id_priv, 0); 4010 if (ret) 4011 return ret; 4012 4013 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4014 rec.qkey = cpu_to_be32(id_priv->qkey); 4015 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4016 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4017 rec.join_state = mc->join_state; 4018 4019 if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && 4020 (!ib_sa_sendonly_fullmem_support(&sa_client, 4021 id_priv->id.device, 4022 id_priv->id.port_num))) { 4023 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4024 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4025 id_priv->id.device->name, id_priv->id.port_num); 4026 return -EOPNOTSUPP; 4027 } 4028 4029 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4030 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4031 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4032 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4033 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4034 4035 if (id_priv->id.ps == RDMA_PS_IPOIB) 4036 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4037 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4038 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4039 IB_SA_MCMEMBER_REC_MTU | 4040 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4041 4042 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4043 id_priv->id.port_num, &rec, 4044 comp_mask, GFP_KERNEL, 4045 cma_ib_mc_handler, mc); 4046 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4047 } 4048 4049 static void iboe_mcast_work_handler(struct work_struct *work) 4050 { 4051 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4052 struct cma_multicast *mc = mw->mc; 4053 struct ib_sa_multicast *m = mc->multicast.ib; 4054 4055 mc->multicast.ib->context = mc; 4056 cma_ib_mc_handler(0, m); 4057 kref_put(&mc->mcref, release_mc); 4058 kfree(mw); 4059 } 4060 4061 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4062 enum ib_gid_type gid_type) 4063 { 4064 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4065 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4066 4067 if (cma_any_addr(addr)) { 4068 memset(mgid, 0, sizeof *mgid); 4069 } else if (addr->sa_family == AF_INET6) { 4070 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4071 } else { 4072 mgid->raw[0] = 4073 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4074 mgid->raw[1] = 4075 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4076 mgid->raw[2] = 0; 4077 mgid->raw[3] = 0; 4078 mgid->raw[4] = 0; 4079 mgid->raw[5] = 0; 4080 mgid->raw[6] = 0; 4081 mgid->raw[7] = 0; 4082 mgid->raw[8] = 0; 4083 mgid->raw[9] = 0; 4084 mgid->raw[10] = 0xff; 4085 mgid->raw[11] = 0xff; 4086 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4087 } 4088 } 4089 4090 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4091 struct cma_multicast *mc) 4092 { 4093 struct iboe_mcast_work *work; 4094 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4095 int err = 0; 4096 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4097 struct net_device *ndev = NULL; 4098 enum ib_gid_type gid_type; 4099 bool send_only; 4100 4101 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4102 4103 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4104 return -EINVAL; 4105 4106 work = kzalloc(sizeof *work, GFP_KERNEL); 4107 if (!work) 4108 return -ENOMEM; 4109 4110 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4111 if (!mc->multicast.ib) { 4112 err = -ENOMEM; 4113 goto out1; 4114 } 4115 4116 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4117 rdma_start_port(id_priv->cma_dev->device)]; 4118 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4119 4120 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4121 if (id_priv->id.ps == RDMA_PS_UDP) 4122 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4123 4124 if (dev_addr->bound_dev_if) 4125 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4126 if (!ndev) { 4127 err = -ENODEV; 4128 goto out2; 4129 } 4130 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4131 mc->multicast.ib->rec.hop_limit = 1; 4132 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); 4133 4134 if (addr->sa_family == AF_INET) { 4135 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4136 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4137 if (!send_only) { 4138 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4139 true); 4140 if (!err) 4141 mc->igmp_joined = true; 4142 } 4143 } 4144 } else { 4145 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4146 err = -ENOTSUPP; 4147 } 4148 dev_put(ndev); 4149 if (err || !mc->multicast.ib->rec.mtu) { 4150 if (!err) 4151 err = -EINVAL; 4152 goto out2; 4153 } 4154 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4155 &mc->multicast.ib->rec.port_gid); 4156 work->id = id_priv; 4157 work->mc = mc; 4158 INIT_WORK(&work->work, iboe_mcast_work_handler); 4159 kref_get(&mc->mcref); 4160 queue_work(cma_wq, &work->work); 4161 4162 return 0; 4163 4164 out2: 4165 kfree(mc->multicast.ib); 4166 out1: 4167 kfree(work); 4168 return err; 4169 } 4170 4171 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4172 u8 join_state, void *context) 4173 { 4174 struct rdma_id_private *id_priv; 4175 struct cma_multicast *mc; 4176 int ret; 4177 4178 if (!id->device) 4179 return -EINVAL; 4180 4181 id_priv = container_of(id, struct rdma_id_private, id); 4182 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4183 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4184 return -EINVAL; 4185 4186 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4187 if (!mc) 4188 return -ENOMEM; 4189 4190 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4191 mc->context = context; 4192 mc->id_priv = id_priv; 4193 mc->igmp_joined = false; 4194 mc->join_state = join_state; 4195 spin_lock(&id_priv->lock); 4196 list_add(&mc->list, &id_priv->mc_list); 4197 spin_unlock(&id_priv->lock); 4198 4199 if (rdma_protocol_roce(id->device, id->port_num)) { 4200 kref_init(&mc->mcref); 4201 ret = cma_iboe_join_multicast(id_priv, mc); 4202 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4203 ret = cma_join_ib_multicast(id_priv, mc); 4204 else 4205 ret = -ENOSYS; 4206 4207 if (ret) { 4208 spin_lock_irq(&id_priv->lock); 4209 list_del(&mc->list); 4210 spin_unlock_irq(&id_priv->lock); 4211 kfree(mc); 4212 } 4213 return ret; 4214 } 4215 EXPORT_SYMBOL(rdma_join_multicast); 4216 4217 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4218 { 4219 struct rdma_id_private *id_priv; 4220 struct cma_multicast *mc; 4221 4222 id_priv = container_of(id, struct rdma_id_private, id); 4223 spin_lock_irq(&id_priv->lock); 4224 list_for_each_entry(mc, &id_priv->mc_list, list) { 4225 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4226 list_del(&mc->list); 4227 spin_unlock_irq(&id_priv->lock); 4228 4229 if (id->qp) 4230 ib_detach_mcast(id->qp, 4231 &mc->multicast.ib->rec.mgid, 4232 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4233 4234 BUG_ON(id_priv->cma_dev->device != id->device); 4235 4236 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4237 ib_sa_free_multicast(mc->multicast.ib); 4238 kfree(mc); 4239 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4240 if (mc->igmp_joined) { 4241 struct rdma_dev_addr *dev_addr = 4242 &id->route.addr.dev_addr; 4243 struct net_device *ndev = NULL; 4244 4245 if (dev_addr->bound_dev_if) 4246 ndev = dev_get_by_index(dev_addr->net, 4247 dev_addr->bound_dev_if); 4248 if (ndev) { 4249 cma_igmp_send(ndev, 4250 &mc->multicast.ib->rec.mgid, 4251 false); 4252 dev_put(ndev); 4253 } 4254 mc->igmp_joined = false; 4255 } 4256 kref_put(&mc->mcref, release_mc); 4257 } 4258 return; 4259 } 4260 } 4261 spin_unlock_irq(&id_priv->lock); 4262 } 4263 EXPORT_SYMBOL(rdma_leave_multicast); 4264 4265 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 4266 { 4267 struct rdma_dev_addr *dev_addr; 4268 struct cma_ndev_work *work; 4269 4270 dev_addr = &id_priv->id.route.addr.dev_addr; 4271 4272 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4273 (net_eq(dev_net(ndev), dev_addr->net)) && 4274 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4275 pr_info("RDMA CM addr change for ndev %s used by id %p\n", 4276 ndev->name, &id_priv->id); 4277 work = kzalloc(sizeof *work, GFP_KERNEL); 4278 if (!work) 4279 return -ENOMEM; 4280 4281 INIT_WORK(&work->work, cma_ndev_work_handler); 4282 work->id = id_priv; 4283 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 4284 atomic_inc(&id_priv->refcount); 4285 queue_work(cma_wq, &work->work); 4286 } 4287 4288 return 0; 4289 } 4290 4291 static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 4292 void *ptr) 4293 { 4294 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4295 struct cma_device *cma_dev; 4296 struct rdma_id_private *id_priv; 4297 int ret = NOTIFY_DONE; 4298 4299 if (event != NETDEV_BONDING_FAILOVER) 4300 return NOTIFY_DONE; 4301 4302 if (!netif_is_bond_master(ndev)) 4303 return NOTIFY_DONE; 4304 4305 mutex_lock(&lock); 4306 list_for_each_entry(cma_dev, &dev_list, list) 4307 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4308 ret = cma_netdev_change(ndev, id_priv); 4309 if (ret) 4310 goto out; 4311 } 4312 4313 out: 4314 mutex_unlock(&lock); 4315 return ret; 4316 } 4317 4318 static struct notifier_block cma_nb = { 4319 .notifier_call = cma_netdev_callback 4320 }; 4321 4322 static void cma_add_one(struct ib_device *device) 4323 { 4324 struct cma_device *cma_dev; 4325 struct rdma_id_private *id_priv; 4326 unsigned int i; 4327 unsigned long supported_gids = 0; 4328 4329 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4330 if (!cma_dev) 4331 return; 4332 4333 cma_dev->device = device; 4334 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4335 sizeof(*cma_dev->default_gid_type), 4336 GFP_KERNEL); 4337 if (!cma_dev->default_gid_type) 4338 goto free_cma_dev; 4339 4340 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4341 sizeof(*cma_dev->default_roce_tos), 4342 GFP_KERNEL); 4343 if (!cma_dev->default_roce_tos) 4344 goto free_gid_type; 4345 4346 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4347 supported_gids = roce_gid_type_mask_support(device, i); 4348 WARN_ON(!supported_gids); 4349 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4350 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4351 CMA_PREFERRED_ROCE_GID_TYPE; 4352 else 4353 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4354 find_first_bit(&supported_gids, BITS_PER_LONG); 4355 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4356 } 4357 4358 init_completion(&cma_dev->comp); 4359 atomic_set(&cma_dev->refcount, 1); 4360 INIT_LIST_HEAD(&cma_dev->id_list); 4361 ib_set_client_data(device, &cma_client, cma_dev); 4362 4363 mutex_lock(&lock); 4364 list_add_tail(&cma_dev->list, &dev_list); 4365 list_for_each_entry(id_priv, &listen_any_list, list) 4366 cma_listen_on_dev(id_priv, cma_dev); 4367 mutex_unlock(&lock); 4368 4369 return; 4370 4371 free_gid_type: 4372 kfree(cma_dev->default_gid_type); 4373 4374 free_cma_dev: 4375 kfree(cma_dev); 4376 4377 return; 4378 } 4379 4380 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4381 { 4382 struct rdma_cm_event event; 4383 enum rdma_cm_state state; 4384 int ret = 0; 4385 4386 /* Record that we want to remove the device */ 4387 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4388 if (state == RDMA_CM_DESTROYING) 4389 return 0; 4390 4391 cma_cancel_operation(id_priv, state); 4392 mutex_lock(&id_priv->handler_mutex); 4393 4394 /* Check for destruction from another callback. */ 4395 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4396 goto out; 4397 4398 memset(&event, 0, sizeof event); 4399 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4400 ret = id_priv->id.event_handler(&id_priv->id, &event); 4401 out: 4402 mutex_unlock(&id_priv->handler_mutex); 4403 return ret; 4404 } 4405 4406 static void cma_process_remove(struct cma_device *cma_dev) 4407 { 4408 struct rdma_id_private *id_priv; 4409 int ret; 4410 4411 mutex_lock(&lock); 4412 while (!list_empty(&cma_dev->id_list)) { 4413 id_priv = list_entry(cma_dev->id_list.next, 4414 struct rdma_id_private, list); 4415 4416 list_del(&id_priv->listen_list); 4417 list_del_init(&id_priv->list); 4418 atomic_inc(&id_priv->refcount); 4419 mutex_unlock(&lock); 4420 4421 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4422 cma_deref_id(id_priv); 4423 if (ret) 4424 rdma_destroy_id(&id_priv->id); 4425 4426 mutex_lock(&lock); 4427 } 4428 mutex_unlock(&lock); 4429 4430 cma_deref_dev(cma_dev); 4431 wait_for_completion(&cma_dev->comp); 4432 } 4433 4434 static void cma_remove_one(struct ib_device *device, void *client_data) 4435 { 4436 struct cma_device *cma_dev = client_data; 4437 4438 if (!cma_dev) 4439 return; 4440 4441 mutex_lock(&lock); 4442 list_del(&cma_dev->list); 4443 mutex_unlock(&lock); 4444 4445 cma_process_remove(cma_dev); 4446 kfree(cma_dev->default_roce_tos); 4447 kfree(cma_dev->default_gid_type); 4448 kfree(cma_dev); 4449 } 4450 4451 static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) 4452 { 4453 struct nlmsghdr *nlh; 4454 struct rdma_cm_id_stats *id_stats; 4455 struct rdma_id_private *id_priv; 4456 struct rdma_cm_id *id = NULL; 4457 struct cma_device *cma_dev; 4458 int i_dev = 0, i_id = 0; 4459 4460 /* 4461 * We export all of the IDs as a sequence of messages. Each 4462 * ID gets its own netlink message. 4463 */ 4464 mutex_lock(&lock); 4465 4466 list_for_each_entry(cma_dev, &dev_list, list) { 4467 if (i_dev < cb->args[0]) { 4468 i_dev++; 4469 continue; 4470 } 4471 4472 i_id = 0; 4473 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4474 if (i_id < cb->args[1]) { 4475 i_id++; 4476 continue; 4477 } 4478 4479 id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, 4480 sizeof *id_stats, RDMA_NL_RDMA_CM, 4481 RDMA_NL_RDMA_CM_ID_STATS, 4482 NLM_F_MULTI); 4483 if (!id_stats) 4484 goto out; 4485 4486 memset(id_stats, 0, sizeof *id_stats); 4487 id = &id_priv->id; 4488 id_stats->node_type = id->route.addr.dev_addr.dev_type; 4489 id_stats->port_num = id->port_num; 4490 id_stats->bound_dev_if = 4491 id->route.addr.dev_addr.bound_dev_if; 4492 4493 if (ibnl_put_attr(skb, nlh, 4494 rdma_addr_size(cma_src_addr(id_priv)), 4495 cma_src_addr(id_priv), 4496 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) 4497 goto out; 4498 if (ibnl_put_attr(skb, nlh, 4499 rdma_addr_size(cma_dst_addr(id_priv)), 4500 cma_dst_addr(id_priv), 4501 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) 4502 goto out; 4503 4504 id_stats->pid = id_priv->owner; 4505 id_stats->port_space = id->ps; 4506 id_stats->cm_state = id_priv->state; 4507 id_stats->qp_num = id_priv->qp_num; 4508 id_stats->qp_type = id->qp_type; 4509 4510 i_id++; 4511 nlmsg_end(skb, nlh); 4512 } 4513 4514 cb->args[1] = 0; 4515 i_dev++; 4516 } 4517 4518 out: 4519 mutex_unlock(&lock); 4520 cb->args[0] = i_dev; 4521 cb->args[1] = i_id; 4522 4523 return skb->len; 4524 } 4525 4526 static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { 4527 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, 4528 }; 4529 4530 static int cma_init_net(struct net *net) 4531 { 4532 struct cma_pernet *pernet = cma_pernet(net); 4533 4534 idr_init(&pernet->tcp_ps); 4535 idr_init(&pernet->udp_ps); 4536 idr_init(&pernet->ipoib_ps); 4537 idr_init(&pernet->ib_ps); 4538 4539 return 0; 4540 } 4541 4542 static void cma_exit_net(struct net *net) 4543 { 4544 struct cma_pernet *pernet = cma_pernet(net); 4545 4546 idr_destroy(&pernet->tcp_ps); 4547 idr_destroy(&pernet->udp_ps); 4548 idr_destroy(&pernet->ipoib_ps); 4549 idr_destroy(&pernet->ib_ps); 4550 } 4551 4552 static struct pernet_operations cma_pernet_operations = { 4553 .init = cma_init_net, 4554 .exit = cma_exit_net, 4555 .id = &cma_pernet_id, 4556 .size = sizeof(struct cma_pernet), 4557 }; 4558 4559 static int __init cma_init(void) 4560 { 4561 int ret; 4562 4563 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4564 if (!cma_wq) 4565 return -ENOMEM; 4566 4567 ret = register_pernet_subsys(&cma_pernet_operations); 4568 if (ret) 4569 goto err_wq; 4570 4571 ib_sa_register_client(&sa_client); 4572 rdma_addr_register_client(&addr_client); 4573 register_netdevice_notifier(&cma_nb); 4574 4575 ret = ib_register_client(&cma_client); 4576 if (ret) 4577 goto err; 4578 4579 rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table); 4580 cma_configfs_init(); 4581 4582 return 0; 4583 4584 err: 4585 unregister_netdevice_notifier(&cma_nb); 4586 rdma_addr_unregister_client(&addr_client); 4587 ib_sa_unregister_client(&sa_client); 4588 err_wq: 4589 destroy_workqueue(cma_wq); 4590 return ret; 4591 } 4592 4593 static void __exit cma_cleanup(void) 4594 { 4595 cma_configfs_exit(); 4596 rdma_nl_unregister(RDMA_NL_RDMA_CM); 4597 ib_unregister_client(&cma_client); 4598 unregister_netdevice_notifier(&cma_nb); 4599 rdma_addr_unregister_client(&addr_client); 4600 ib_sa_unregister_client(&sa_client); 4601 unregister_pernet_subsys(&cma_pernet_operations); 4602 destroy_workqueue(cma_wq); 4603 } 4604 4605 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1); 4606 4607 module_init(cma_init); 4608 module_exit(cma_cleanup); 4609