1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 4 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 5 * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. 6 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 7 */ 8 9 #include <linux/completion.h> 10 #include <linux/in.h> 11 #include <linux/in6.h> 12 #include <linux/mutex.h> 13 #include <linux/random.h> 14 #include <linux/igmp.h> 15 #include <linux/xarray.h> 16 #include <linux/inetdevice.h> 17 #include <linux/slab.h> 18 #include <linux/module.h> 19 #include <net/route.h> 20 21 #include <net/net_namespace.h> 22 #include <net/netns/generic.h> 23 #include <net/tcp.h> 24 #include <net/ipv6.h> 25 #include <net/ip_fib.h> 26 #include <net/ip6_route.h> 27 28 #include <rdma/rdma_cm.h> 29 #include <rdma/rdma_cm_ib.h> 30 #include <rdma/rdma_netlink.h> 31 #include <rdma/ib.h> 32 #include <rdma/ib_cache.h> 33 #include <rdma/ib_cm.h> 34 #include <rdma/ib_sa.h> 35 #include <rdma/iw_cm.h> 36 37 #include "core_priv.h" 38 #include "cma_priv.h" 39 #include "cma_trace.h" 40 41 MODULE_AUTHOR("Sean Hefty"); 42 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 43 MODULE_LICENSE("Dual BSD/GPL"); 44 45 #define CMA_CM_RESPONSE_TIMEOUT 20 46 #define CMA_MAX_CM_RETRIES 15 47 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 48 #define CMA_IBOE_PACKET_LIFETIME 18 49 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP 50 51 static const char * const cma_events[] = { 52 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 53 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 54 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 55 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 56 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 57 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 58 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 59 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 60 [RDMA_CM_EVENT_REJECTED] = "rejected", 61 [RDMA_CM_EVENT_ESTABLISHED] = "established", 62 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 63 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 64 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 65 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 66 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 67 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 68 }; 69 70 static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, 71 union ib_gid *mgid); 72 73 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 74 { 75 size_t index = event; 76 77 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 78 cma_events[index] : "unrecognized event"; 79 } 80 EXPORT_SYMBOL(rdma_event_msg); 81 82 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 83 int reason) 84 { 85 if (rdma_ib_or_roce(id->device, id->port_num)) 86 return ibcm_reject_msg(reason); 87 88 if (rdma_protocol_iwarp(id->device, id->port_num)) 89 return iwcm_reject_msg(reason); 90 91 WARN_ON_ONCE(1); 92 return "unrecognized transport"; 93 } 94 EXPORT_SYMBOL(rdma_reject_msg); 95 96 /** 97 * rdma_is_consumer_reject - return true if the consumer rejected the connect 98 * request. 99 * @id: Communication identifier that received the REJECT event. 100 * @reason: Value returned in the REJECT event status field. 101 */ 102 static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) 103 { 104 if (rdma_ib_or_roce(id->device, id->port_num)) 105 return reason == IB_CM_REJ_CONSUMER_DEFINED; 106 107 if (rdma_protocol_iwarp(id->device, id->port_num)) 108 return reason == -ECONNREFUSED; 109 110 WARN_ON_ONCE(1); 111 return false; 112 } 113 114 const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 115 struct rdma_cm_event *ev, u8 *data_len) 116 { 117 const void *p; 118 119 if (rdma_is_consumer_reject(id, ev->status)) { 120 *data_len = ev->param.conn.private_data_len; 121 p = ev->param.conn.private_data; 122 } else { 123 *data_len = 0; 124 p = NULL; 125 } 126 return p; 127 } 128 EXPORT_SYMBOL(rdma_consumer_reject_data); 129 130 /** 131 * rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id. 132 * @id: Communication Identifier 133 */ 134 struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id) 135 { 136 struct rdma_id_private *id_priv; 137 138 id_priv = container_of(id, struct rdma_id_private, id); 139 if (id->device->node_type == RDMA_NODE_RNIC) 140 return id_priv->cm_id.iw; 141 return NULL; 142 } 143 EXPORT_SYMBOL(rdma_iw_cm_id); 144 145 /** 146 * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack. 147 * @res: rdma resource tracking entry pointer 148 */ 149 struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res) 150 { 151 struct rdma_id_private *id_priv = 152 container_of(res, struct rdma_id_private, res); 153 154 return &id_priv->id; 155 } 156 EXPORT_SYMBOL(rdma_res_to_id); 157 158 static int cma_add_one(struct ib_device *device); 159 static void cma_remove_one(struct ib_device *device, void *client_data); 160 161 static struct ib_client cma_client = { 162 .name = "cma", 163 .add = cma_add_one, 164 .remove = cma_remove_one 165 }; 166 167 static struct ib_sa_client sa_client; 168 static LIST_HEAD(dev_list); 169 static LIST_HEAD(listen_any_list); 170 static DEFINE_MUTEX(lock); 171 static struct workqueue_struct *cma_wq; 172 static unsigned int cma_pernet_id; 173 174 struct cma_pernet { 175 struct xarray tcp_ps; 176 struct xarray udp_ps; 177 struct xarray ipoib_ps; 178 struct xarray ib_ps; 179 }; 180 181 static struct cma_pernet *cma_pernet(struct net *net) 182 { 183 return net_generic(net, cma_pernet_id); 184 } 185 186 static 187 struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps) 188 { 189 struct cma_pernet *pernet = cma_pernet(net); 190 191 switch (ps) { 192 case RDMA_PS_TCP: 193 return &pernet->tcp_ps; 194 case RDMA_PS_UDP: 195 return &pernet->udp_ps; 196 case RDMA_PS_IPOIB: 197 return &pernet->ipoib_ps; 198 case RDMA_PS_IB: 199 return &pernet->ib_ps; 200 default: 201 return NULL; 202 } 203 } 204 205 struct cma_device { 206 struct list_head list; 207 struct ib_device *device; 208 struct completion comp; 209 refcount_t refcount; 210 struct list_head id_list; 211 enum ib_gid_type *default_gid_type; 212 u8 *default_roce_tos; 213 }; 214 215 struct rdma_bind_list { 216 enum rdma_ucm_port_space ps; 217 struct hlist_head owners; 218 unsigned short port; 219 }; 220 221 static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps, 222 struct rdma_bind_list *bind_list, int snum) 223 { 224 struct xarray *xa = cma_pernet_xa(net, ps); 225 226 return xa_insert(xa, snum, bind_list, GFP_KERNEL); 227 } 228 229 static struct rdma_bind_list *cma_ps_find(struct net *net, 230 enum rdma_ucm_port_space ps, int snum) 231 { 232 struct xarray *xa = cma_pernet_xa(net, ps); 233 234 return xa_load(xa, snum); 235 } 236 237 static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps, 238 int snum) 239 { 240 struct xarray *xa = cma_pernet_xa(net, ps); 241 242 xa_erase(xa, snum); 243 } 244 245 enum { 246 CMA_OPTION_AFONLY, 247 }; 248 249 void cma_dev_get(struct cma_device *cma_dev) 250 { 251 refcount_inc(&cma_dev->refcount); 252 } 253 254 void cma_dev_put(struct cma_device *cma_dev) 255 { 256 if (refcount_dec_and_test(&cma_dev->refcount)) 257 complete(&cma_dev->comp); 258 } 259 260 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 261 void *cookie) 262 { 263 struct cma_device *cma_dev; 264 struct cma_device *found_cma_dev = NULL; 265 266 mutex_lock(&lock); 267 268 list_for_each_entry(cma_dev, &dev_list, list) 269 if (filter(cma_dev->device, cookie)) { 270 found_cma_dev = cma_dev; 271 break; 272 } 273 274 if (found_cma_dev) 275 cma_dev_get(found_cma_dev); 276 mutex_unlock(&lock); 277 return found_cma_dev; 278 } 279 280 int cma_get_default_gid_type(struct cma_device *cma_dev, 281 u32 port) 282 { 283 if (!rdma_is_port_valid(cma_dev->device, port)) 284 return -EINVAL; 285 286 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 287 } 288 289 int cma_set_default_gid_type(struct cma_device *cma_dev, 290 u32 port, 291 enum ib_gid_type default_gid_type) 292 { 293 unsigned long supported_gids; 294 295 if (!rdma_is_port_valid(cma_dev->device, port)) 296 return -EINVAL; 297 298 if (default_gid_type == IB_GID_TYPE_IB && 299 rdma_protocol_roce_eth_encap(cma_dev->device, port)) 300 default_gid_type = IB_GID_TYPE_ROCE; 301 302 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 303 304 if (!(supported_gids & 1 << default_gid_type)) 305 return -EINVAL; 306 307 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 308 default_gid_type; 309 310 return 0; 311 } 312 313 int cma_get_default_roce_tos(struct cma_device *cma_dev, u32 port) 314 { 315 if (!rdma_is_port_valid(cma_dev->device, port)) 316 return -EINVAL; 317 318 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; 319 } 320 321 int cma_set_default_roce_tos(struct cma_device *cma_dev, u32 port, 322 u8 default_roce_tos) 323 { 324 if (!rdma_is_port_valid(cma_dev->device, port)) 325 return -EINVAL; 326 327 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = 328 default_roce_tos; 329 330 return 0; 331 } 332 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 333 { 334 return cma_dev->device; 335 } 336 337 /* 338 * Device removal can occur at anytime, so we need extra handling to 339 * serialize notifying the user of device removal with other callbacks. 340 * We do this by disabling removal notification while a callback is in process, 341 * and reporting it after the callback completes. 342 */ 343 344 struct cma_multicast { 345 struct rdma_id_private *id_priv; 346 union { 347 struct ib_sa_multicast *sa_mc; 348 struct { 349 struct work_struct work; 350 struct rdma_cm_event event; 351 } iboe_join; 352 }; 353 struct list_head list; 354 void *context; 355 struct sockaddr_storage addr; 356 u8 join_state; 357 }; 358 359 struct cma_work { 360 struct work_struct work; 361 struct rdma_id_private *id; 362 enum rdma_cm_state old_state; 363 enum rdma_cm_state new_state; 364 struct rdma_cm_event event; 365 }; 366 367 union cma_ip_addr { 368 struct in6_addr ip6; 369 struct { 370 __be32 pad[3]; 371 __be32 addr; 372 } ip4; 373 }; 374 375 struct cma_hdr { 376 u8 cma_version; 377 u8 ip_version; /* IP version: 7:4 */ 378 __be16 port; 379 union cma_ip_addr src_addr; 380 union cma_ip_addr dst_addr; 381 }; 382 383 #define CMA_VERSION 0x00 384 385 struct cma_req_info { 386 struct sockaddr_storage listen_addr_storage; 387 struct sockaddr_storage src_addr_storage; 388 struct ib_device *device; 389 union ib_gid local_gid; 390 __be64 service_id; 391 int port; 392 bool has_gid; 393 u16 pkey; 394 }; 395 396 static int cma_comp_exch(struct rdma_id_private *id_priv, 397 enum rdma_cm_state comp, enum rdma_cm_state exch) 398 { 399 unsigned long flags; 400 int ret; 401 402 /* 403 * The FSM uses a funny double locking where state is protected by both 404 * the handler_mutex and the spinlock. State is not allowed to change 405 * to/from a handler_mutex protected value without also holding 406 * handler_mutex. 407 */ 408 if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT) 409 lockdep_assert_held(&id_priv->handler_mutex); 410 411 spin_lock_irqsave(&id_priv->lock, flags); 412 if ((ret = (id_priv->state == comp))) 413 id_priv->state = exch; 414 spin_unlock_irqrestore(&id_priv->lock, flags); 415 return ret; 416 } 417 418 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 419 { 420 return hdr->ip_version >> 4; 421 } 422 423 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 424 { 425 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 426 } 427 428 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) 429 { 430 struct in_device *in_dev = NULL; 431 432 if (ndev) { 433 rtnl_lock(); 434 in_dev = __in_dev_get_rtnl(ndev); 435 if (in_dev) { 436 if (join) 437 ip_mc_inc_group(in_dev, 438 *(__be32 *)(mgid->raw + 12)); 439 else 440 ip_mc_dec_group(in_dev, 441 *(__be32 *)(mgid->raw + 12)); 442 } 443 rtnl_unlock(); 444 } 445 return (in_dev) ? 0 : -ENODEV; 446 } 447 448 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 449 struct cma_device *cma_dev) 450 { 451 cma_dev_get(cma_dev); 452 id_priv->cma_dev = cma_dev; 453 id_priv->id.device = cma_dev->device; 454 id_priv->id.route.addr.dev_addr.transport = 455 rdma_node_get_transport(cma_dev->device->node_type); 456 list_add_tail(&id_priv->list, &cma_dev->id_list); 457 458 trace_cm_id_attach(id_priv, cma_dev->device); 459 } 460 461 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 462 struct cma_device *cma_dev) 463 { 464 _cma_attach_to_dev(id_priv, cma_dev); 465 id_priv->gid_type = 466 cma_dev->default_gid_type[id_priv->id.port_num - 467 rdma_start_port(cma_dev->device)]; 468 } 469 470 static void cma_release_dev(struct rdma_id_private *id_priv) 471 { 472 mutex_lock(&lock); 473 list_del(&id_priv->list); 474 cma_dev_put(id_priv->cma_dev); 475 id_priv->cma_dev = NULL; 476 if (id_priv->id.route.addr.dev_addr.sgid_attr) { 477 rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); 478 id_priv->id.route.addr.dev_addr.sgid_attr = NULL; 479 } 480 mutex_unlock(&lock); 481 } 482 483 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 484 { 485 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 486 } 487 488 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 489 { 490 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 491 } 492 493 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 494 { 495 return id_priv->id.route.addr.src_addr.ss_family; 496 } 497 498 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 499 { 500 struct ib_sa_mcmember_rec rec; 501 int ret = 0; 502 503 if (id_priv->qkey) { 504 if (qkey && id_priv->qkey != qkey) 505 return -EINVAL; 506 return 0; 507 } 508 509 if (qkey) { 510 id_priv->qkey = qkey; 511 return 0; 512 } 513 514 switch (id_priv->id.ps) { 515 case RDMA_PS_UDP: 516 case RDMA_PS_IB: 517 id_priv->qkey = RDMA_UDP_QKEY; 518 break; 519 case RDMA_PS_IPOIB: 520 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 521 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 522 id_priv->id.port_num, &rec.mgid, 523 &rec); 524 if (!ret) 525 id_priv->qkey = be32_to_cpu(rec.qkey); 526 break; 527 default: 528 break; 529 } 530 return ret; 531 } 532 533 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 534 { 535 dev_addr->dev_type = ARPHRD_INFINIBAND; 536 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 537 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 538 } 539 540 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 541 { 542 int ret; 543 544 if (addr->sa_family != AF_IB) { 545 ret = rdma_translate_ip(addr, dev_addr); 546 } else { 547 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 548 ret = 0; 549 } 550 551 return ret; 552 } 553 554 static const struct ib_gid_attr * 555 cma_validate_port(struct ib_device *device, u32 port, 556 enum ib_gid_type gid_type, 557 union ib_gid *gid, 558 struct rdma_id_private *id_priv) 559 { 560 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 561 int bound_if_index = dev_addr->bound_dev_if; 562 const struct ib_gid_attr *sgid_attr; 563 int dev_type = dev_addr->dev_type; 564 struct net_device *ndev = NULL; 565 566 if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) 567 return ERR_PTR(-ENODEV); 568 569 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 570 return ERR_PTR(-ENODEV); 571 572 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 573 return ERR_PTR(-ENODEV); 574 575 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 576 ndev = dev_get_by_index(dev_addr->net, bound_if_index); 577 if (!ndev) 578 return ERR_PTR(-ENODEV); 579 } else { 580 gid_type = IB_GID_TYPE_IB; 581 } 582 583 sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); 584 if (ndev) 585 dev_put(ndev); 586 return sgid_attr; 587 } 588 589 static void cma_bind_sgid_attr(struct rdma_id_private *id_priv, 590 const struct ib_gid_attr *sgid_attr) 591 { 592 WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr); 593 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; 594 } 595 596 /** 597 * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute 598 * based on source ip address. 599 * @id_priv: cm_id which should be bound to cma device 600 * 601 * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute 602 * based on source IP address. It returns 0 on success or error code otherwise. 603 * It is applicable to active and passive side cm_id. 604 */ 605 static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv) 606 { 607 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 608 const struct ib_gid_attr *sgid_attr; 609 union ib_gid gid, iboe_gid, *gidp; 610 struct cma_device *cma_dev; 611 enum ib_gid_type gid_type; 612 int ret = -ENODEV; 613 u32 port; 614 615 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 616 id_priv->id.ps == RDMA_PS_IPOIB) 617 return -EINVAL; 618 619 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 620 &iboe_gid); 621 622 memcpy(&gid, dev_addr->src_dev_addr + 623 rdma_addr_gid_offset(dev_addr), sizeof(gid)); 624 625 mutex_lock(&lock); 626 list_for_each_entry(cma_dev, &dev_list, list) { 627 rdma_for_each_port (cma_dev->device, port) { 628 gidp = rdma_protocol_roce(cma_dev->device, port) ? 629 &iboe_gid : &gid; 630 gid_type = cma_dev->default_gid_type[port - 1]; 631 sgid_attr = cma_validate_port(cma_dev->device, port, 632 gid_type, gidp, id_priv); 633 if (!IS_ERR(sgid_attr)) { 634 id_priv->id.port_num = port; 635 cma_bind_sgid_attr(id_priv, sgid_attr); 636 cma_attach_to_dev(id_priv, cma_dev); 637 ret = 0; 638 goto out; 639 } 640 } 641 } 642 out: 643 mutex_unlock(&lock); 644 return ret; 645 } 646 647 /** 648 * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute 649 * @id_priv: cm id to bind to cma device 650 * @listen_id_priv: listener cm id to match against 651 * @req: Pointer to req structure containaining incoming 652 * request information 653 * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when 654 * rdma device matches for listen_id and incoming request. It also verifies 655 * that a GID table entry is present for the source address. 656 * Returns 0 on success, or returns error code otherwise. 657 */ 658 static int cma_ib_acquire_dev(struct rdma_id_private *id_priv, 659 const struct rdma_id_private *listen_id_priv, 660 struct cma_req_info *req) 661 { 662 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 663 const struct ib_gid_attr *sgid_attr; 664 enum ib_gid_type gid_type; 665 union ib_gid gid; 666 667 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 668 id_priv->id.ps == RDMA_PS_IPOIB) 669 return -EINVAL; 670 671 if (rdma_protocol_roce(req->device, req->port)) 672 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 673 &gid); 674 else 675 memcpy(&gid, dev_addr->src_dev_addr + 676 rdma_addr_gid_offset(dev_addr), sizeof(gid)); 677 678 gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1]; 679 sgid_attr = cma_validate_port(req->device, req->port, 680 gid_type, &gid, id_priv); 681 if (IS_ERR(sgid_attr)) 682 return PTR_ERR(sgid_attr); 683 684 id_priv->id.port_num = req->port; 685 cma_bind_sgid_attr(id_priv, sgid_attr); 686 /* Need to acquire lock to protect against reader 687 * of cma_dev->id_list such as cma_netdev_callback() and 688 * cma_process_remove(). 689 */ 690 mutex_lock(&lock); 691 cma_attach_to_dev(id_priv, listen_id_priv->cma_dev); 692 mutex_unlock(&lock); 693 rdma_restrack_add(&id_priv->res); 694 return 0; 695 } 696 697 static int cma_iw_acquire_dev(struct rdma_id_private *id_priv, 698 const struct rdma_id_private *listen_id_priv) 699 { 700 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 701 const struct ib_gid_attr *sgid_attr; 702 struct cma_device *cma_dev; 703 enum ib_gid_type gid_type; 704 int ret = -ENODEV; 705 union ib_gid gid; 706 u32 port; 707 708 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 709 id_priv->id.ps == RDMA_PS_IPOIB) 710 return -EINVAL; 711 712 memcpy(&gid, dev_addr->src_dev_addr + 713 rdma_addr_gid_offset(dev_addr), sizeof(gid)); 714 715 mutex_lock(&lock); 716 717 cma_dev = listen_id_priv->cma_dev; 718 port = listen_id_priv->id.port_num; 719 gid_type = listen_id_priv->gid_type; 720 sgid_attr = cma_validate_port(cma_dev->device, port, 721 gid_type, &gid, id_priv); 722 if (!IS_ERR(sgid_attr)) { 723 id_priv->id.port_num = port; 724 cma_bind_sgid_attr(id_priv, sgid_attr); 725 ret = 0; 726 goto out; 727 } 728 729 list_for_each_entry(cma_dev, &dev_list, list) { 730 rdma_for_each_port (cma_dev->device, port) { 731 if (listen_id_priv->cma_dev == cma_dev && 732 listen_id_priv->id.port_num == port) 733 continue; 734 735 gid_type = cma_dev->default_gid_type[port - 1]; 736 sgid_attr = cma_validate_port(cma_dev->device, port, 737 gid_type, &gid, id_priv); 738 if (!IS_ERR(sgid_attr)) { 739 id_priv->id.port_num = port; 740 cma_bind_sgid_attr(id_priv, sgid_attr); 741 ret = 0; 742 goto out; 743 } 744 } 745 } 746 747 out: 748 if (!ret) { 749 cma_attach_to_dev(id_priv, cma_dev); 750 rdma_restrack_add(&id_priv->res); 751 } 752 753 mutex_unlock(&lock); 754 return ret; 755 } 756 757 /* 758 * Select the source IB device and address to reach the destination IB address. 759 */ 760 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 761 { 762 struct cma_device *cma_dev, *cur_dev; 763 struct sockaddr_ib *addr; 764 union ib_gid gid, sgid, *dgid; 765 unsigned int p; 766 u16 pkey, index; 767 enum ib_port_state port_state; 768 int i; 769 770 cma_dev = NULL; 771 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 772 dgid = (union ib_gid *) &addr->sib_addr; 773 pkey = ntohs(addr->sib_pkey); 774 775 mutex_lock(&lock); 776 list_for_each_entry(cur_dev, &dev_list, list) { 777 rdma_for_each_port (cur_dev->device, p) { 778 if (!rdma_cap_af_ib(cur_dev->device, p)) 779 continue; 780 781 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 782 continue; 783 784 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 785 continue; 786 for (i = 0; !rdma_query_gid(cur_dev->device, 787 p, i, &gid); 788 i++) { 789 if (!memcmp(&gid, dgid, sizeof(gid))) { 790 cma_dev = cur_dev; 791 sgid = gid; 792 id_priv->id.port_num = p; 793 goto found; 794 } 795 796 if (!cma_dev && (gid.global.subnet_prefix == 797 dgid->global.subnet_prefix) && 798 port_state == IB_PORT_ACTIVE) { 799 cma_dev = cur_dev; 800 sgid = gid; 801 id_priv->id.port_num = p; 802 goto found; 803 } 804 } 805 } 806 } 807 mutex_unlock(&lock); 808 return -ENODEV; 809 810 found: 811 cma_attach_to_dev(id_priv, cma_dev); 812 rdma_restrack_add(&id_priv->res); 813 mutex_unlock(&lock); 814 addr = (struct sockaddr_ib *)cma_src_addr(id_priv); 815 memcpy(&addr->sib_addr, &sgid, sizeof(sgid)); 816 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 817 return 0; 818 } 819 820 static void cma_id_get(struct rdma_id_private *id_priv) 821 { 822 refcount_inc(&id_priv->refcount); 823 } 824 825 static void cma_id_put(struct rdma_id_private *id_priv) 826 { 827 if (refcount_dec_and_test(&id_priv->refcount)) 828 complete(&id_priv->comp); 829 } 830 831 static struct rdma_id_private * 832 __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, 833 void *context, enum rdma_ucm_port_space ps, 834 enum ib_qp_type qp_type, const struct rdma_id_private *parent) 835 { 836 struct rdma_id_private *id_priv; 837 838 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 839 if (!id_priv) 840 return ERR_PTR(-ENOMEM); 841 842 id_priv->state = RDMA_CM_IDLE; 843 id_priv->id.context = context; 844 id_priv->id.event_handler = event_handler; 845 id_priv->id.ps = ps; 846 id_priv->id.qp_type = qp_type; 847 id_priv->tos_set = false; 848 id_priv->timeout_set = false; 849 id_priv->min_rnr_timer_set = false; 850 id_priv->gid_type = IB_GID_TYPE_IB; 851 spin_lock_init(&id_priv->lock); 852 mutex_init(&id_priv->qp_mutex); 853 init_completion(&id_priv->comp); 854 refcount_set(&id_priv->refcount, 1); 855 mutex_init(&id_priv->handler_mutex); 856 INIT_LIST_HEAD(&id_priv->listen_list); 857 INIT_LIST_HEAD(&id_priv->mc_list); 858 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 859 id_priv->id.route.addr.dev_addr.net = get_net(net); 860 id_priv->seq_num &= 0x00ffffff; 861 862 rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); 863 if (parent) 864 rdma_restrack_parent_name(&id_priv->res, &parent->res); 865 866 return id_priv; 867 } 868 869 struct rdma_cm_id * 870 __rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler, 871 void *context, enum rdma_ucm_port_space ps, 872 enum ib_qp_type qp_type, const char *caller) 873 { 874 struct rdma_id_private *ret; 875 876 ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL); 877 if (IS_ERR(ret)) 878 return ERR_CAST(ret); 879 880 rdma_restrack_set_name(&ret->res, caller); 881 return &ret->id; 882 } 883 EXPORT_SYMBOL(__rdma_create_kernel_id); 884 885 struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler, 886 void *context, 887 enum rdma_ucm_port_space ps, 888 enum ib_qp_type qp_type) 889 { 890 struct rdma_id_private *ret; 891 892 ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context, 893 ps, qp_type, NULL); 894 if (IS_ERR(ret)) 895 return ERR_CAST(ret); 896 897 rdma_restrack_set_name(&ret->res, NULL); 898 return &ret->id; 899 } 900 EXPORT_SYMBOL(rdma_create_user_id); 901 902 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 903 { 904 struct ib_qp_attr qp_attr; 905 int qp_attr_mask, ret; 906 907 qp_attr.qp_state = IB_QPS_INIT; 908 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 909 if (ret) 910 return ret; 911 912 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 913 if (ret) 914 return ret; 915 916 qp_attr.qp_state = IB_QPS_RTR; 917 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 918 if (ret) 919 return ret; 920 921 qp_attr.qp_state = IB_QPS_RTS; 922 qp_attr.sq_psn = 0; 923 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 924 925 return ret; 926 } 927 928 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 929 { 930 struct ib_qp_attr qp_attr; 931 int qp_attr_mask, ret; 932 933 qp_attr.qp_state = IB_QPS_INIT; 934 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 935 if (ret) 936 return ret; 937 938 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 939 } 940 941 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 942 struct ib_qp_init_attr *qp_init_attr) 943 { 944 struct rdma_id_private *id_priv; 945 struct ib_qp *qp; 946 int ret; 947 948 id_priv = container_of(id, struct rdma_id_private, id); 949 if (id->device != pd->device) { 950 ret = -EINVAL; 951 goto out_err; 952 } 953 954 qp_init_attr->port_num = id->port_num; 955 qp = ib_create_qp(pd, qp_init_attr); 956 if (IS_ERR(qp)) { 957 ret = PTR_ERR(qp); 958 goto out_err; 959 } 960 961 if (id->qp_type == IB_QPT_UD) 962 ret = cma_init_ud_qp(id_priv, qp); 963 else 964 ret = cma_init_conn_qp(id_priv, qp); 965 if (ret) 966 goto out_destroy; 967 968 id->qp = qp; 969 id_priv->qp_num = qp->qp_num; 970 id_priv->srq = (qp->srq != NULL); 971 trace_cm_qp_create(id_priv, pd, qp_init_attr, 0); 972 return 0; 973 out_destroy: 974 ib_destroy_qp(qp); 975 out_err: 976 trace_cm_qp_create(id_priv, pd, qp_init_attr, ret); 977 return ret; 978 } 979 EXPORT_SYMBOL(rdma_create_qp); 980 981 void rdma_destroy_qp(struct rdma_cm_id *id) 982 { 983 struct rdma_id_private *id_priv; 984 985 id_priv = container_of(id, struct rdma_id_private, id); 986 trace_cm_qp_destroy(id_priv); 987 mutex_lock(&id_priv->qp_mutex); 988 ib_destroy_qp(id_priv->id.qp); 989 id_priv->id.qp = NULL; 990 mutex_unlock(&id_priv->qp_mutex); 991 } 992 EXPORT_SYMBOL(rdma_destroy_qp); 993 994 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 995 struct rdma_conn_param *conn_param) 996 { 997 struct ib_qp_attr qp_attr; 998 int qp_attr_mask, ret; 999 1000 mutex_lock(&id_priv->qp_mutex); 1001 if (!id_priv->id.qp) { 1002 ret = 0; 1003 goto out; 1004 } 1005 1006 /* Need to update QP attributes from default values. */ 1007 qp_attr.qp_state = IB_QPS_INIT; 1008 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 1009 if (ret) 1010 goto out; 1011 1012 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 1013 if (ret) 1014 goto out; 1015 1016 qp_attr.qp_state = IB_QPS_RTR; 1017 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 1018 if (ret) 1019 goto out; 1020 1021 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 1022 1023 if (conn_param) 1024 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 1025 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 1026 out: 1027 mutex_unlock(&id_priv->qp_mutex); 1028 return ret; 1029 } 1030 1031 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 1032 struct rdma_conn_param *conn_param) 1033 { 1034 struct ib_qp_attr qp_attr; 1035 int qp_attr_mask, ret; 1036 1037 mutex_lock(&id_priv->qp_mutex); 1038 if (!id_priv->id.qp) { 1039 ret = 0; 1040 goto out; 1041 } 1042 1043 qp_attr.qp_state = IB_QPS_RTS; 1044 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 1045 if (ret) 1046 goto out; 1047 1048 if (conn_param) 1049 qp_attr.max_rd_atomic = conn_param->initiator_depth; 1050 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 1051 out: 1052 mutex_unlock(&id_priv->qp_mutex); 1053 return ret; 1054 } 1055 1056 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 1057 { 1058 struct ib_qp_attr qp_attr; 1059 int ret; 1060 1061 mutex_lock(&id_priv->qp_mutex); 1062 if (!id_priv->id.qp) { 1063 ret = 0; 1064 goto out; 1065 } 1066 1067 qp_attr.qp_state = IB_QPS_ERR; 1068 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 1069 out: 1070 mutex_unlock(&id_priv->qp_mutex); 1071 return ret; 1072 } 1073 1074 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 1075 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 1076 { 1077 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 1078 int ret; 1079 u16 pkey; 1080 1081 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 1082 pkey = 0xffff; 1083 else 1084 pkey = ib_addr_get_pkey(dev_addr); 1085 1086 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 1087 pkey, &qp_attr->pkey_index); 1088 if (ret) 1089 return ret; 1090 1091 qp_attr->port_num = id_priv->id.port_num; 1092 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 1093 1094 if (id_priv->id.qp_type == IB_QPT_UD) { 1095 ret = cma_set_qkey(id_priv, 0); 1096 if (ret) 1097 return ret; 1098 1099 qp_attr->qkey = id_priv->qkey; 1100 *qp_attr_mask |= IB_QP_QKEY; 1101 } else { 1102 qp_attr->qp_access_flags = 0; 1103 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 1104 } 1105 return 0; 1106 } 1107 1108 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 1109 int *qp_attr_mask) 1110 { 1111 struct rdma_id_private *id_priv; 1112 int ret = 0; 1113 1114 id_priv = container_of(id, struct rdma_id_private, id); 1115 if (rdma_cap_ib_cm(id->device, id->port_num)) { 1116 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 1117 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 1118 else 1119 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 1120 qp_attr_mask); 1121 1122 if (qp_attr->qp_state == IB_QPS_RTR) 1123 qp_attr->rq_psn = id_priv->seq_num; 1124 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1125 if (!id_priv->cm_id.iw) { 1126 qp_attr->qp_access_flags = 0; 1127 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1128 } else 1129 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1130 qp_attr_mask); 1131 qp_attr->port_num = id_priv->id.port_num; 1132 *qp_attr_mask |= IB_QP_PORT; 1133 } else { 1134 ret = -ENOSYS; 1135 } 1136 1137 if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set) 1138 qp_attr->timeout = id_priv->timeout; 1139 1140 if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set) 1141 qp_attr->min_rnr_timer = id_priv->min_rnr_timer; 1142 1143 return ret; 1144 } 1145 EXPORT_SYMBOL(rdma_init_qp_attr); 1146 1147 static inline bool cma_zero_addr(const struct sockaddr *addr) 1148 { 1149 switch (addr->sa_family) { 1150 case AF_INET: 1151 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1152 case AF_INET6: 1153 return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr); 1154 case AF_IB: 1155 return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr); 1156 default: 1157 return false; 1158 } 1159 } 1160 1161 static inline bool cma_loopback_addr(const struct sockaddr *addr) 1162 { 1163 switch (addr->sa_family) { 1164 case AF_INET: 1165 return ipv4_is_loopback( 1166 ((struct sockaddr_in *)addr)->sin_addr.s_addr); 1167 case AF_INET6: 1168 return ipv6_addr_loopback( 1169 &((struct sockaddr_in6 *)addr)->sin6_addr); 1170 case AF_IB: 1171 return ib_addr_loopback( 1172 &((struct sockaddr_ib *)addr)->sib_addr); 1173 default: 1174 return false; 1175 } 1176 } 1177 1178 static inline bool cma_any_addr(const struct sockaddr *addr) 1179 { 1180 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1181 } 1182 1183 static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst) 1184 { 1185 if (src->sa_family != dst->sa_family) 1186 return -1; 1187 1188 switch (src->sa_family) { 1189 case AF_INET: 1190 return ((struct sockaddr_in *)src)->sin_addr.s_addr != 1191 ((struct sockaddr_in *)dst)->sin_addr.s_addr; 1192 case AF_INET6: { 1193 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src; 1194 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst; 1195 bool link_local; 1196 1197 if (ipv6_addr_cmp(&src_addr6->sin6_addr, 1198 &dst_addr6->sin6_addr)) 1199 return 1; 1200 link_local = ipv6_addr_type(&dst_addr6->sin6_addr) & 1201 IPV6_ADDR_LINKLOCAL; 1202 /* Link local must match their scope_ids */ 1203 return link_local ? (src_addr6->sin6_scope_id != 1204 dst_addr6->sin6_scope_id) : 1205 0; 1206 } 1207 1208 default: 1209 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1210 &((struct sockaddr_ib *) dst)->sib_addr); 1211 } 1212 } 1213 1214 static __be16 cma_port(const struct sockaddr *addr) 1215 { 1216 struct sockaddr_ib *sib; 1217 1218 switch (addr->sa_family) { 1219 case AF_INET: 1220 return ((struct sockaddr_in *) addr)->sin_port; 1221 case AF_INET6: 1222 return ((struct sockaddr_in6 *) addr)->sin6_port; 1223 case AF_IB: 1224 sib = (struct sockaddr_ib *) addr; 1225 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1226 be64_to_cpu(sib->sib_sid_mask))); 1227 default: 1228 return 0; 1229 } 1230 } 1231 1232 static inline int cma_any_port(const struct sockaddr *addr) 1233 { 1234 return !cma_port(addr); 1235 } 1236 1237 static void cma_save_ib_info(struct sockaddr *src_addr, 1238 struct sockaddr *dst_addr, 1239 const struct rdma_cm_id *listen_id, 1240 const struct sa_path_rec *path) 1241 { 1242 struct sockaddr_ib *listen_ib, *ib; 1243 1244 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1245 if (src_addr) { 1246 ib = (struct sockaddr_ib *)src_addr; 1247 ib->sib_family = AF_IB; 1248 if (path) { 1249 ib->sib_pkey = path->pkey; 1250 ib->sib_flowinfo = path->flow_label; 1251 memcpy(&ib->sib_addr, &path->sgid, 16); 1252 ib->sib_sid = path->service_id; 1253 ib->sib_scope_id = 0; 1254 } else { 1255 ib->sib_pkey = listen_ib->sib_pkey; 1256 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1257 ib->sib_addr = listen_ib->sib_addr; 1258 ib->sib_sid = listen_ib->sib_sid; 1259 ib->sib_scope_id = listen_ib->sib_scope_id; 1260 } 1261 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1262 } 1263 if (dst_addr) { 1264 ib = (struct sockaddr_ib *)dst_addr; 1265 ib->sib_family = AF_IB; 1266 if (path) { 1267 ib->sib_pkey = path->pkey; 1268 ib->sib_flowinfo = path->flow_label; 1269 memcpy(&ib->sib_addr, &path->dgid, 16); 1270 } 1271 } 1272 } 1273 1274 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1275 struct sockaddr_in *dst_addr, 1276 struct cma_hdr *hdr, 1277 __be16 local_port) 1278 { 1279 if (src_addr) { 1280 *src_addr = (struct sockaddr_in) { 1281 .sin_family = AF_INET, 1282 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1283 .sin_port = local_port, 1284 }; 1285 } 1286 1287 if (dst_addr) { 1288 *dst_addr = (struct sockaddr_in) { 1289 .sin_family = AF_INET, 1290 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1291 .sin_port = hdr->port, 1292 }; 1293 } 1294 } 1295 1296 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1297 struct sockaddr_in6 *dst_addr, 1298 struct cma_hdr *hdr, 1299 __be16 local_port) 1300 { 1301 if (src_addr) { 1302 *src_addr = (struct sockaddr_in6) { 1303 .sin6_family = AF_INET6, 1304 .sin6_addr = hdr->dst_addr.ip6, 1305 .sin6_port = local_port, 1306 }; 1307 } 1308 1309 if (dst_addr) { 1310 *dst_addr = (struct sockaddr_in6) { 1311 .sin6_family = AF_INET6, 1312 .sin6_addr = hdr->src_addr.ip6, 1313 .sin6_port = hdr->port, 1314 }; 1315 } 1316 } 1317 1318 static u16 cma_port_from_service_id(__be64 service_id) 1319 { 1320 return (u16)be64_to_cpu(service_id); 1321 } 1322 1323 static int cma_save_ip_info(struct sockaddr *src_addr, 1324 struct sockaddr *dst_addr, 1325 const struct ib_cm_event *ib_event, 1326 __be64 service_id) 1327 { 1328 struct cma_hdr *hdr; 1329 __be16 port; 1330 1331 hdr = ib_event->private_data; 1332 if (hdr->cma_version != CMA_VERSION) 1333 return -EINVAL; 1334 1335 port = htons(cma_port_from_service_id(service_id)); 1336 1337 switch (cma_get_ip_ver(hdr)) { 1338 case 4: 1339 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1340 (struct sockaddr_in *)dst_addr, hdr, port); 1341 break; 1342 case 6: 1343 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1344 (struct sockaddr_in6 *)dst_addr, hdr, port); 1345 break; 1346 default: 1347 return -EAFNOSUPPORT; 1348 } 1349 1350 return 0; 1351 } 1352 1353 static int cma_save_net_info(struct sockaddr *src_addr, 1354 struct sockaddr *dst_addr, 1355 const struct rdma_cm_id *listen_id, 1356 const struct ib_cm_event *ib_event, 1357 sa_family_t sa_family, __be64 service_id) 1358 { 1359 if (sa_family == AF_IB) { 1360 if (ib_event->event == IB_CM_REQ_RECEIVED) 1361 cma_save_ib_info(src_addr, dst_addr, listen_id, 1362 ib_event->param.req_rcvd.primary_path); 1363 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1364 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1365 return 0; 1366 } 1367 1368 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1369 } 1370 1371 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1372 struct cma_req_info *req) 1373 { 1374 const struct ib_cm_req_event_param *req_param = 1375 &ib_event->param.req_rcvd; 1376 const struct ib_cm_sidr_req_event_param *sidr_param = 1377 &ib_event->param.sidr_req_rcvd; 1378 1379 switch (ib_event->event) { 1380 case IB_CM_REQ_RECEIVED: 1381 req->device = req_param->listen_id->device; 1382 req->port = req_param->port; 1383 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1384 sizeof(req->local_gid)); 1385 req->has_gid = true; 1386 req->service_id = req_param->primary_path->service_id; 1387 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1388 if (req->pkey != req_param->bth_pkey) 1389 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1390 "RDMA CMA: in the future this may cause the request to be dropped\n", 1391 req_param->bth_pkey, req->pkey); 1392 break; 1393 case IB_CM_SIDR_REQ_RECEIVED: 1394 req->device = sidr_param->listen_id->device; 1395 req->port = sidr_param->port; 1396 req->has_gid = false; 1397 req->service_id = sidr_param->service_id; 1398 req->pkey = sidr_param->pkey; 1399 if (req->pkey != sidr_param->bth_pkey) 1400 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1401 "RDMA CMA: in the future this may cause the request to be dropped\n", 1402 sidr_param->bth_pkey, req->pkey); 1403 break; 1404 default: 1405 return -EINVAL; 1406 } 1407 1408 return 0; 1409 } 1410 1411 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1412 const struct sockaddr_in *dst_addr, 1413 const struct sockaddr_in *src_addr) 1414 { 1415 __be32 daddr = dst_addr->sin_addr.s_addr, 1416 saddr = src_addr->sin_addr.s_addr; 1417 struct fib_result res; 1418 struct flowi4 fl4; 1419 int err; 1420 bool ret; 1421 1422 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1423 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1424 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1425 ipv4_is_loopback(saddr)) 1426 return false; 1427 1428 memset(&fl4, 0, sizeof(fl4)); 1429 fl4.flowi4_iif = net_dev->ifindex; 1430 fl4.daddr = daddr; 1431 fl4.saddr = saddr; 1432 1433 rcu_read_lock(); 1434 err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); 1435 ret = err == 0 && FIB_RES_DEV(res) == net_dev; 1436 rcu_read_unlock(); 1437 1438 return ret; 1439 } 1440 1441 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1442 const struct sockaddr_in6 *dst_addr, 1443 const struct sockaddr_in6 *src_addr) 1444 { 1445 #if IS_ENABLED(CONFIG_IPV6) 1446 const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & 1447 IPV6_ADDR_LINKLOCAL; 1448 struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, 1449 &src_addr->sin6_addr, net_dev->ifindex, 1450 NULL, strict); 1451 bool ret; 1452 1453 if (!rt) 1454 return false; 1455 1456 ret = rt->rt6i_idev->dev == net_dev; 1457 ip6_rt_put(rt); 1458 1459 return ret; 1460 #else 1461 return false; 1462 #endif 1463 } 1464 1465 static bool validate_net_dev(struct net_device *net_dev, 1466 const struct sockaddr *daddr, 1467 const struct sockaddr *saddr) 1468 { 1469 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1470 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1471 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1472 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1473 1474 switch (daddr->sa_family) { 1475 case AF_INET: 1476 return saddr->sa_family == AF_INET && 1477 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1478 1479 case AF_INET6: 1480 return saddr->sa_family == AF_INET6 && 1481 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1482 1483 default: 1484 return false; 1485 } 1486 } 1487 1488 static struct net_device * 1489 roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) 1490 { 1491 const struct ib_gid_attr *sgid_attr = NULL; 1492 struct net_device *ndev; 1493 1494 if (ib_event->event == IB_CM_REQ_RECEIVED) 1495 sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr; 1496 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1497 sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr; 1498 1499 if (!sgid_attr) 1500 return NULL; 1501 1502 rcu_read_lock(); 1503 ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr); 1504 if (IS_ERR(ndev)) 1505 ndev = NULL; 1506 else 1507 dev_hold(ndev); 1508 rcu_read_unlock(); 1509 return ndev; 1510 } 1511 1512 static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event, 1513 struct cma_req_info *req) 1514 { 1515 struct sockaddr *listen_addr = 1516 (struct sockaddr *)&req->listen_addr_storage; 1517 struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage; 1518 struct net_device *net_dev; 1519 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1520 int err; 1521 1522 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1523 req->service_id); 1524 if (err) 1525 return ERR_PTR(err); 1526 1527 if (rdma_protocol_roce(req->device, req->port)) 1528 net_dev = roce_get_net_dev_by_cm_event(ib_event); 1529 else 1530 net_dev = ib_get_net_dev_by_params(req->device, req->port, 1531 req->pkey, 1532 gid, listen_addr); 1533 if (!net_dev) 1534 return ERR_PTR(-ENODEV); 1535 1536 return net_dev; 1537 } 1538 1539 static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id) 1540 { 1541 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1542 } 1543 1544 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1545 const struct cma_hdr *hdr) 1546 { 1547 struct sockaddr *addr = cma_src_addr(id_priv); 1548 __be32 ip4_addr; 1549 struct in6_addr ip6_addr; 1550 1551 if (cma_any_addr(addr) && !id_priv->afonly) 1552 return true; 1553 1554 switch (addr->sa_family) { 1555 case AF_INET: 1556 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1557 if (cma_get_ip_ver(hdr) != 4) 1558 return false; 1559 if (!cma_any_addr(addr) && 1560 hdr->dst_addr.ip4.addr != ip4_addr) 1561 return false; 1562 break; 1563 case AF_INET6: 1564 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1565 if (cma_get_ip_ver(hdr) != 6) 1566 return false; 1567 if (!cma_any_addr(addr) && 1568 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1569 return false; 1570 break; 1571 case AF_IB: 1572 return true; 1573 default: 1574 return false; 1575 } 1576 1577 return true; 1578 } 1579 1580 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1581 { 1582 struct ib_device *device = id->device; 1583 const u32 port_num = id->port_num ?: rdma_start_port(device); 1584 1585 return rdma_protocol_roce(device, port_num); 1586 } 1587 1588 static bool cma_is_req_ipv6_ll(const struct cma_req_info *req) 1589 { 1590 const struct sockaddr *daddr = 1591 (const struct sockaddr *)&req->listen_addr_storage; 1592 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1593 1594 /* Returns true if the req is for IPv6 link local */ 1595 return (daddr->sa_family == AF_INET6 && 1596 (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)); 1597 } 1598 1599 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1600 const struct net_device *net_dev, 1601 const struct cma_req_info *req) 1602 { 1603 const struct rdma_addr *addr = &id->route.addr; 1604 1605 if (!net_dev) 1606 /* This request is an AF_IB request */ 1607 return (!id->port_num || id->port_num == req->port) && 1608 (addr->src_addr.ss_family == AF_IB); 1609 1610 /* 1611 * If the request is not for IPv6 link local, allow matching 1612 * request to any netdevice of the one or multiport rdma device. 1613 */ 1614 if (!cma_is_req_ipv6_ll(req)) 1615 return true; 1616 /* 1617 * Net namespaces must match, and if the listner is listening 1618 * on a specific netdevice than netdevice must match as well. 1619 */ 1620 if (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1621 (!!addr->dev_addr.bound_dev_if == 1622 (addr->dev_addr.bound_dev_if == net_dev->ifindex))) 1623 return true; 1624 else 1625 return false; 1626 } 1627 1628 static struct rdma_id_private *cma_find_listener( 1629 const struct rdma_bind_list *bind_list, 1630 const struct ib_cm_id *cm_id, 1631 const struct ib_cm_event *ib_event, 1632 const struct cma_req_info *req, 1633 const struct net_device *net_dev) 1634 { 1635 struct rdma_id_private *id_priv, *id_priv_dev; 1636 1637 lockdep_assert_held(&lock); 1638 1639 if (!bind_list) 1640 return ERR_PTR(-EINVAL); 1641 1642 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1643 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1644 if (id_priv->id.device == cm_id->device && 1645 cma_match_net_dev(&id_priv->id, net_dev, req)) 1646 return id_priv; 1647 list_for_each_entry(id_priv_dev, 1648 &id_priv->listen_list, 1649 listen_list) { 1650 if (id_priv_dev->id.device == cm_id->device && 1651 cma_match_net_dev(&id_priv_dev->id, 1652 net_dev, req)) 1653 return id_priv_dev; 1654 } 1655 } 1656 } 1657 1658 return ERR_PTR(-EINVAL); 1659 } 1660 1661 static struct rdma_id_private * 1662 cma_ib_id_from_event(struct ib_cm_id *cm_id, 1663 const struct ib_cm_event *ib_event, 1664 struct cma_req_info *req, 1665 struct net_device **net_dev) 1666 { 1667 struct rdma_bind_list *bind_list; 1668 struct rdma_id_private *id_priv; 1669 int err; 1670 1671 err = cma_save_req_info(ib_event, req); 1672 if (err) 1673 return ERR_PTR(err); 1674 1675 *net_dev = cma_get_net_dev(ib_event, req); 1676 if (IS_ERR(*net_dev)) { 1677 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1678 /* Assuming the protocol is AF_IB */ 1679 *net_dev = NULL; 1680 } else { 1681 return ERR_CAST(*net_dev); 1682 } 1683 } 1684 1685 mutex_lock(&lock); 1686 /* 1687 * Net namespace might be getting deleted while route lookup, 1688 * cm_id lookup is in progress. Therefore, perform netdevice 1689 * validation, cm_id lookup under rcu lock. 1690 * RCU lock along with netdevice state check, synchronizes with 1691 * netdevice migrating to different net namespace and also avoids 1692 * case where net namespace doesn't get deleted while lookup is in 1693 * progress. 1694 * If the device state is not IFF_UP, its properties such as ifindex 1695 * and nd_net cannot be trusted to remain valid without rcu lock. 1696 * net/core/dev.c change_net_namespace() ensures to synchronize with 1697 * ongoing operations on net device after device is closed using 1698 * synchronize_net(). 1699 */ 1700 rcu_read_lock(); 1701 if (*net_dev) { 1702 /* 1703 * If netdevice is down, it is likely that it is administratively 1704 * down or it might be migrating to different namespace. 1705 * In that case avoid further processing, as the net namespace 1706 * or ifindex may change. 1707 */ 1708 if (((*net_dev)->flags & IFF_UP) == 0) { 1709 id_priv = ERR_PTR(-EHOSTUNREACH); 1710 goto err; 1711 } 1712 1713 if (!validate_net_dev(*net_dev, 1714 (struct sockaddr *)&req->listen_addr_storage, 1715 (struct sockaddr *)&req->src_addr_storage)) { 1716 id_priv = ERR_PTR(-EHOSTUNREACH); 1717 goto err; 1718 } 1719 } 1720 1721 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1722 rdma_ps_from_service_id(req->service_id), 1723 cma_port_from_service_id(req->service_id)); 1724 id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev); 1725 err: 1726 rcu_read_unlock(); 1727 mutex_unlock(&lock); 1728 if (IS_ERR(id_priv) && *net_dev) { 1729 dev_put(*net_dev); 1730 *net_dev = NULL; 1731 } 1732 return id_priv; 1733 } 1734 1735 static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) 1736 { 1737 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1738 } 1739 1740 static void cma_cancel_route(struct rdma_id_private *id_priv) 1741 { 1742 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1743 if (id_priv->query) 1744 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1745 } 1746 } 1747 1748 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1749 { 1750 struct rdma_id_private *dev_id_priv; 1751 1752 /* 1753 * Remove from listen_any_list to prevent added devices from spawning 1754 * additional listen requests. 1755 */ 1756 mutex_lock(&lock); 1757 list_del(&id_priv->list); 1758 1759 while (!list_empty(&id_priv->listen_list)) { 1760 dev_id_priv = list_entry(id_priv->listen_list.next, 1761 struct rdma_id_private, listen_list); 1762 /* sync with device removal to avoid duplicate destruction */ 1763 list_del_init(&dev_id_priv->list); 1764 list_del(&dev_id_priv->listen_list); 1765 mutex_unlock(&lock); 1766 1767 rdma_destroy_id(&dev_id_priv->id); 1768 mutex_lock(&lock); 1769 } 1770 mutex_unlock(&lock); 1771 } 1772 1773 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1774 enum rdma_cm_state state) 1775 { 1776 switch (state) { 1777 case RDMA_CM_ADDR_QUERY: 1778 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1779 break; 1780 case RDMA_CM_ROUTE_QUERY: 1781 cma_cancel_route(id_priv); 1782 break; 1783 case RDMA_CM_LISTEN: 1784 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1785 cma_cancel_listens(id_priv); 1786 break; 1787 default: 1788 break; 1789 } 1790 } 1791 1792 static void cma_release_port(struct rdma_id_private *id_priv) 1793 { 1794 struct rdma_bind_list *bind_list = id_priv->bind_list; 1795 struct net *net = id_priv->id.route.addr.dev_addr.net; 1796 1797 if (!bind_list) 1798 return; 1799 1800 mutex_lock(&lock); 1801 hlist_del(&id_priv->node); 1802 if (hlist_empty(&bind_list->owners)) { 1803 cma_ps_remove(net, bind_list->ps, bind_list->port); 1804 kfree(bind_list); 1805 } 1806 mutex_unlock(&lock); 1807 } 1808 1809 static void destroy_mc(struct rdma_id_private *id_priv, 1810 struct cma_multicast *mc) 1811 { 1812 if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) 1813 ib_sa_free_multicast(mc->sa_mc); 1814 1815 if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { 1816 struct rdma_dev_addr *dev_addr = 1817 &id_priv->id.route.addr.dev_addr; 1818 struct net_device *ndev = NULL; 1819 1820 if (dev_addr->bound_dev_if) 1821 ndev = dev_get_by_index(dev_addr->net, 1822 dev_addr->bound_dev_if); 1823 if (ndev) { 1824 union ib_gid mgid; 1825 1826 cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, 1827 &mgid); 1828 cma_igmp_send(ndev, &mgid, false); 1829 dev_put(ndev); 1830 } 1831 1832 cancel_work_sync(&mc->iboe_join.work); 1833 } 1834 kfree(mc); 1835 } 1836 1837 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1838 { 1839 struct cma_multicast *mc; 1840 1841 while (!list_empty(&id_priv->mc_list)) { 1842 mc = list_first_entry(&id_priv->mc_list, struct cma_multicast, 1843 list); 1844 list_del(&mc->list); 1845 destroy_mc(id_priv, mc); 1846 } 1847 } 1848 1849 static void _destroy_id(struct rdma_id_private *id_priv, 1850 enum rdma_cm_state state) 1851 { 1852 cma_cancel_operation(id_priv, state); 1853 1854 if (id_priv->cma_dev) { 1855 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1856 if (id_priv->cm_id.ib) 1857 ib_destroy_cm_id(id_priv->cm_id.ib); 1858 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1859 if (id_priv->cm_id.iw) 1860 iw_destroy_cm_id(id_priv->cm_id.iw); 1861 } 1862 cma_leave_mc_groups(id_priv); 1863 cma_release_dev(id_priv); 1864 } 1865 1866 cma_release_port(id_priv); 1867 cma_id_put(id_priv); 1868 wait_for_completion(&id_priv->comp); 1869 1870 if (id_priv->internal_id) 1871 cma_id_put(id_priv->id.context); 1872 1873 kfree(id_priv->id.route.path_rec); 1874 1875 put_net(id_priv->id.route.addr.dev_addr.net); 1876 rdma_restrack_del(&id_priv->res); 1877 kfree(id_priv); 1878 } 1879 1880 /* 1881 * destroy an ID from within the handler_mutex. This ensures that no other 1882 * handlers can start running concurrently. 1883 */ 1884 static void destroy_id_handler_unlock(struct rdma_id_private *id_priv) 1885 __releases(&idprv->handler_mutex) 1886 { 1887 enum rdma_cm_state state; 1888 unsigned long flags; 1889 1890 trace_cm_id_destroy(id_priv); 1891 1892 /* 1893 * Setting the state to destroyed under the handler mutex provides a 1894 * fence against calling handler callbacks. If this is invoked due to 1895 * the failure of a handler callback then it guarentees that no future 1896 * handlers will be called. 1897 */ 1898 lockdep_assert_held(&id_priv->handler_mutex); 1899 spin_lock_irqsave(&id_priv->lock, flags); 1900 state = id_priv->state; 1901 id_priv->state = RDMA_CM_DESTROYING; 1902 spin_unlock_irqrestore(&id_priv->lock, flags); 1903 mutex_unlock(&id_priv->handler_mutex); 1904 _destroy_id(id_priv, state); 1905 } 1906 1907 void rdma_destroy_id(struct rdma_cm_id *id) 1908 { 1909 struct rdma_id_private *id_priv = 1910 container_of(id, struct rdma_id_private, id); 1911 1912 mutex_lock(&id_priv->handler_mutex); 1913 destroy_id_handler_unlock(id_priv); 1914 } 1915 EXPORT_SYMBOL(rdma_destroy_id); 1916 1917 static int cma_rep_recv(struct rdma_id_private *id_priv) 1918 { 1919 int ret; 1920 1921 ret = cma_modify_qp_rtr(id_priv, NULL); 1922 if (ret) 1923 goto reject; 1924 1925 ret = cma_modify_qp_rts(id_priv, NULL); 1926 if (ret) 1927 goto reject; 1928 1929 trace_cm_send_rtu(id_priv); 1930 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1931 if (ret) 1932 goto reject; 1933 1934 return 0; 1935 reject: 1936 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); 1937 cma_modify_qp_err(id_priv); 1938 trace_cm_send_rej(id_priv); 1939 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1940 NULL, 0, NULL, 0); 1941 return ret; 1942 } 1943 1944 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1945 const struct ib_cm_rep_event_param *rep_data, 1946 void *private_data) 1947 { 1948 event->param.conn.private_data = private_data; 1949 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1950 event->param.conn.responder_resources = rep_data->responder_resources; 1951 event->param.conn.initiator_depth = rep_data->initiator_depth; 1952 event->param.conn.flow_control = rep_data->flow_control; 1953 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1954 event->param.conn.srq = rep_data->srq; 1955 event->param.conn.qp_num = rep_data->remote_qpn; 1956 1957 event->ece.vendor_id = rep_data->ece.vendor_id; 1958 event->ece.attr_mod = rep_data->ece.attr_mod; 1959 } 1960 1961 static int cma_cm_event_handler(struct rdma_id_private *id_priv, 1962 struct rdma_cm_event *event) 1963 { 1964 int ret; 1965 1966 lockdep_assert_held(&id_priv->handler_mutex); 1967 1968 trace_cm_event_handler(id_priv, event); 1969 ret = id_priv->id.event_handler(&id_priv->id, event); 1970 trace_cm_event_done(id_priv, event, ret); 1971 return ret; 1972 } 1973 1974 static int cma_ib_handler(struct ib_cm_id *cm_id, 1975 const struct ib_cm_event *ib_event) 1976 { 1977 struct rdma_id_private *id_priv = cm_id->context; 1978 struct rdma_cm_event event = {}; 1979 enum rdma_cm_state state; 1980 int ret; 1981 1982 mutex_lock(&id_priv->handler_mutex); 1983 state = READ_ONCE(id_priv->state); 1984 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1985 state != RDMA_CM_CONNECT) || 1986 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1987 state != RDMA_CM_DISCONNECT)) 1988 goto out; 1989 1990 switch (ib_event->event) { 1991 case IB_CM_REQ_ERROR: 1992 case IB_CM_REP_ERROR: 1993 event.event = RDMA_CM_EVENT_UNREACHABLE; 1994 event.status = -ETIMEDOUT; 1995 break; 1996 case IB_CM_REP_RECEIVED: 1997 if (state == RDMA_CM_CONNECT && 1998 (id_priv->id.qp_type != IB_QPT_UD)) { 1999 trace_cm_send_mra(id_priv); 2000 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2001 } 2002 if (id_priv->id.qp) { 2003 event.status = cma_rep_recv(id_priv); 2004 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 2005 RDMA_CM_EVENT_ESTABLISHED; 2006 } else { 2007 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 2008 } 2009 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 2010 ib_event->private_data); 2011 break; 2012 case IB_CM_RTU_RECEIVED: 2013 case IB_CM_USER_ESTABLISHED: 2014 event.event = RDMA_CM_EVENT_ESTABLISHED; 2015 break; 2016 case IB_CM_DREQ_ERROR: 2017 event.status = -ETIMEDOUT; 2018 fallthrough; 2019 case IB_CM_DREQ_RECEIVED: 2020 case IB_CM_DREP_RECEIVED: 2021 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 2022 RDMA_CM_DISCONNECT)) 2023 goto out; 2024 event.event = RDMA_CM_EVENT_DISCONNECTED; 2025 break; 2026 case IB_CM_TIMEWAIT_EXIT: 2027 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 2028 break; 2029 case IB_CM_MRA_RECEIVED: 2030 /* ignore event */ 2031 goto out; 2032 case IB_CM_REJ_RECEIVED: 2033 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, 2034 ib_event->param.rej_rcvd.reason)); 2035 cma_modify_qp_err(id_priv); 2036 event.status = ib_event->param.rej_rcvd.reason; 2037 event.event = RDMA_CM_EVENT_REJECTED; 2038 event.param.conn.private_data = ib_event->private_data; 2039 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 2040 break; 2041 default: 2042 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 2043 ib_event->event); 2044 goto out; 2045 } 2046 2047 ret = cma_cm_event_handler(id_priv, &event); 2048 if (ret) { 2049 /* Destroy the CM ID by returning a non-zero value. */ 2050 id_priv->cm_id.ib = NULL; 2051 destroy_id_handler_unlock(id_priv); 2052 return ret; 2053 } 2054 out: 2055 mutex_unlock(&id_priv->handler_mutex); 2056 return 0; 2057 } 2058 2059 static struct rdma_id_private * 2060 cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, 2061 const struct ib_cm_event *ib_event, 2062 struct net_device *net_dev) 2063 { 2064 struct rdma_id_private *listen_id_priv; 2065 struct rdma_id_private *id_priv; 2066 struct rdma_cm_id *id; 2067 struct rdma_route *rt; 2068 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2069 struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; 2070 const __be64 service_id = 2071 ib_event->param.req_rcvd.primary_path->service_id; 2072 int ret; 2073 2074 listen_id_priv = container_of(listen_id, struct rdma_id_private, id); 2075 id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net, 2076 listen_id->event_handler, listen_id->context, 2077 listen_id->ps, 2078 ib_event->param.req_rcvd.qp_type, 2079 listen_id_priv); 2080 if (IS_ERR(id_priv)) 2081 return NULL; 2082 2083 id = &id_priv->id; 2084 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2085 (struct sockaddr *)&id->route.addr.dst_addr, 2086 listen_id, ib_event, ss_family, service_id)) 2087 goto err; 2088 2089 rt = &id->route; 2090 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 2091 rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec), 2092 GFP_KERNEL); 2093 if (!rt->path_rec) 2094 goto err; 2095 2096 rt->path_rec[0] = *path; 2097 if (rt->num_paths == 2) 2098 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 2099 2100 if (net_dev) { 2101 rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev); 2102 } else { 2103 if (!cma_protocol_roce(listen_id) && 2104 cma_any_addr(cma_src_addr(id_priv))) { 2105 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 2106 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 2107 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 2108 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 2109 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 2110 if (ret) 2111 goto err; 2112 } 2113 } 2114 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 2115 2116 id_priv->state = RDMA_CM_CONNECT; 2117 return id_priv; 2118 2119 err: 2120 rdma_destroy_id(id); 2121 return NULL; 2122 } 2123 2124 static struct rdma_id_private * 2125 cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, 2126 const struct ib_cm_event *ib_event, 2127 struct net_device *net_dev) 2128 { 2129 const struct rdma_id_private *listen_id_priv; 2130 struct rdma_id_private *id_priv; 2131 struct rdma_cm_id *id; 2132 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2133 struct net *net = listen_id->route.addr.dev_addr.net; 2134 int ret; 2135 2136 listen_id_priv = container_of(listen_id, struct rdma_id_private, id); 2137 id_priv = __rdma_create_id(net, listen_id->event_handler, 2138 listen_id->context, listen_id->ps, IB_QPT_UD, 2139 listen_id_priv); 2140 if (IS_ERR(id_priv)) 2141 return NULL; 2142 2143 id = &id_priv->id; 2144 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2145 (struct sockaddr *)&id->route.addr.dst_addr, 2146 listen_id, ib_event, ss_family, 2147 ib_event->param.sidr_req_rcvd.service_id)) 2148 goto err; 2149 2150 if (net_dev) { 2151 rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev); 2152 } else { 2153 if (!cma_any_addr(cma_src_addr(id_priv))) { 2154 ret = cma_translate_addr(cma_src_addr(id_priv), 2155 &id->route.addr.dev_addr); 2156 if (ret) 2157 goto err; 2158 } 2159 } 2160 2161 id_priv->state = RDMA_CM_CONNECT; 2162 return id_priv; 2163 err: 2164 rdma_destroy_id(id); 2165 return NULL; 2166 } 2167 2168 static void cma_set_req_event_data(struct rdma_cm_event *event, 2169 const struct ib_cm_req_event_param *req_data, 2170 void *private_data, int offset) 2171 { 2172 event->param.conn.private_data = private_data + offset; 2173 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 2174 event->param.conn.responder_resources = req_data->responder_resources; 2175 event->param.conn.initiator_depth = req_data->initiator_depth; 2176 event->param.conn.flow_control = req_data->flow_control; 2177 event->param.conn.retry_count = req_data->retry_count; 2178 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 2179 event->param.conn.srq = req_data->srq; 2180 event->param.conn.qp_num = req_data->remote_qpn; 2181 2182 event->ece.vendor_id = req_data->ece.vendor_id; 2183 event->ece.attr_mod = req_data->ece.attr_mod; 2184 } 2185 2186 static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id, 2187 const struct ib_cm_event *ib_event) 2188 { 2189 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 2190 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 2191 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 2192 (id->qp_type == IB_QPT_UD)) || 2193 (!id->qp_type)); 2194 } 2195 2196 static int cma_ib_req_handler(struct ib_cm_id *cm_id, 2197 const struct ib_cm_event *ib_event) 2198 { 2199 struct rdma_id_private *listen_id, *conn_id = NULL; 2200 struct rdma_cm_event event = {}; 2201 struct cma_req_info req = {}; 2202 struct net_device *net_dev; 2203 u8 offset; 2204 int ret; 2205 2206 listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev); 2207 if (IS_ERR(listen_id)) 2208 return PTR_ERR(listen_id); 2209 2210 trace_cm_req_handler(listen_id, ib_event->event); 2211 if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { 2212 ret = -EINVAL; 2213 goto net_dev_put; 2214 } 2215 2216 mutex_lock(&listen_id->handler_mutex); 2217 if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) { 2218 ret = -ECONNABORTED; 2219 goto err_unlock; 2220 } 2221 2222 offset = cma_user_data_offset(listen_id); 2223 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2224 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 2225 conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev); 2226 event.param.ud.private_data = ib_event->private_data + offset; 2227 event.param.ud.private_data_len = 2228 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 2229 } else { 2230 conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev); 2231 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 2232 ib_event->private_data, offset); 2233 } 2234 if (!conn_id) { 2235 ret = -ENOMEM; 2236 goto err_unlock; 2237 } 2238 2239 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2240 ret = cma_ib_acquire_dev(conn_id, listen_id, &req); 2241 if (ret) { 2242 destroy_id_handler_unlock(conn_id); 2243 goto err_unlock; 2244 } 2245 2246 conn_id->cm_id.ib = cm_id; 2247 cm_id->context = conn_id; 2248 cm_id->cm_handler = cma_ib_handler; 2249 2250 ret = cma_cm_event_handler(conn_id, &event); 2251 if (ret) { 2252 /* Destroy the CM ID by returning a non-zero value. */ 2253 conn_id->cm_id.ib = NULL; 2254 mutex_unlock(&listen_id->handler_mutex); 2255 destroy_id_handler_unlock(conn_id); 2256 goto net_dev_put; 2257 } 2258 2259 if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT && 2260 conn_id->id.qp_type != IB_QPT_UD) { 2261 trace_cm_send_mra(cm_id->context); 2262 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2263 } 2264 mutex_unlock(&conn_id->handler_mutex); 2265 2266 err_unlock: 2267 mutex_unlock(&listen_id->handler_mutex); 2268 2269 net_dev_put: 2270 if (net_dev) 2271 dev_put(net_dev); 2272 2273 return ret; 2274 } 2275 2276 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2277 { 2278 if (addr->sa_family == AF_IB) 2279 return ((struct sockaddr_ib *) addr)->sib_sid; 2280 2281 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2282 } 2283 EXPORT_SYMBOL(rdma_get_service_id); 2284 2285 void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, 2286 union ib_gid *dgid) 2287 { 2288 struct rdma_addr *addr = &cm_id->route.addr; 2289 2290 if (!cm_id->device) { 2291 if (sgid) 2292 memset(sgid, 0, sizeof(*sgid)); 2293 if (dgid) 2294 memset(dgid, 0, sizeof(*dgid)); 2295 return; 2296 } 2297 2298 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) { 2299 if (sgid) 2300 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid); 2301 if (dgid) 2302 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid); 2303 } else { 2304 if (sgid) 2305 rdma_addr_get_sgid(&addr->dev_addr, sgid); 2306 if (dgid) 2307 rdma_addr_get_dgid(&addr->dev_addr, dgid); 2308 } 2309 } 2310 EXPORT_SYMBOL(rdma_read_gids); 2311 2312 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2313 { 2314 struct rdma_id_private *id_priv = iw_id->context; 2315 struct rdma_cm_event event = {}; 2316 int ret = 0; 2317 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2318 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2319 2320 mutex_lock(&id_priv->handler_mutex); 2321 if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) 2322 goto out; 2323 2324 switch (iw_event->event) { 2325 case IW_CM_EVENT_CLOSE: 2326 event.event = RDMA_CM_EVENT_DISCONNECTED; 2327 break; 2328 case IW_CM_EVENT_CONNECT_REPLY: 2329 memcpy(cma_src_addr(id_priv), laddr, 2330 rdma_addr_size(laddr)); 2331 memcpy(cma_dst_addr(id_priv), raddr, 2332 rdma_addr_size(raddr)); 2333 switch (iw_event->status) { 2334 case 0: 2335 event.event = RDMA_CM_EVENT_ESTABLISHED; 2336 event.param.conn.initiator_depth = iw_event->ird; 2337 event.param.conn.responder_resources = iw_event->ord; 2338 break; 2339 case -ECONNRESET: 2340 case -ECONNREFUSED: 2341 event.event = RDMA_CM_EVENT_REJECTED; 2342 break; 2343 case -ETIMEDOUT: 2344 event.event = RDMA_CM_EVENT_UNREACHABLE; 2345 break; 2346 default: 2347 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2348 break; 2349 } 2350 break; 2351 case IW_CM_EVENT_ESTABLISHED: 2352 event.event = RDMA_CM_EVENT_ESTABLISHED; 2353 event.param.conn.initiator_depth = iw_event->ird; 2354 event.param.conn.responder_resources = iw_event->ord; 2355 break; 2356 default: 2357 goto out; 2358 } 2359 2360 event.status = iw_event->status; 2361 event.param.conn.private_data = iw_event->private_data; 2362 event.param.conn.private_data_len = iw_event->private_data_len; 2363 ret = cma_cm_event_handler(id_priv, &event); 2364 if (ret) { 2365 /* Destroy the CM ID by returning a non-zero value. */ 2366 id_priv->cm_id.iw = NULL; 2367 destroy_id_handler_unlock(id_priv); 2368 return ret; 2369 } 2370 2371 out: 2372 mutex_unlock(&id_priv->handler_mutex); 2373 return ret; 2374 } 2375 2376 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2377 struct iw_cm_event *iw_event) 2378 { 2379 struct rdma_id_private *listen_id, *conn_id; 2380 struct rdma_cm_event event = {}; 2381 int ret = -ECONNABORTED; 2382 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2383 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2384 2385 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2386 event.param.conn.private_data = iw_event->private_data; 2387 event.param.conn.private_data_len = iw_event->private_data_len; 2388 event.param.conn.initiator_depth = iw_event->ird; 2389 event.param.conn.responder_resources = iw_event->ord; 2390 2391 listen_id = cm_id->context; 2392 2393 mutex_lock(&listen_id->handler_mutex); 2394 if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) 2395 goto out; 2396 2397 /* Create a new RDMA id for the new IW CM ID */ 2398 conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2399 listen_id->id.event_handler, 2400 listen_id->id.context, RDMA_PS_TCP, 2401 IB_QPT_RC, listen_id); 2402 if (IS_ERR(conn_id)) { 2403 ret = -ENOMEM; 2404 goto out; 2405 } 2406 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2407 conn_id->state = RDMA_CM_CONNECT; 2408 2409 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2410 if (ret) { 2411 mutex_unlock(&listen_id->handler_mutex); 2412 destroy_id_handler_unlock(conn_id); 2413 return ret; 2414 } 2415 2416 ret = cma_iw_acquire_dev(conn_id, listen_id); 2417 if (ret) { 2418 mutex_unlock(&listen_id->handler_mutex); 2419 destroy_id_handler_unlock(conn_id); 2420 return ret; 2421 } 2422 2423 conn_id->cm_id.iw = cm_id; 2424 cm_id->context = conn_id; 2425 cm_id->cm_handler = cma_iw_handler; 2426 2427 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2428 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2429 2430 ret = cma_cm_event_handler(conn_id, &event); 2431 if (ret) { 2432 /* User wants to destroy the CM ID */ 2433 conn_id->cm_id.iw = NULL; 2434 mutex_unlock(&listen_id->handler_mutex); 2435 destroy_id_handler_unlock(conn_id); 2436 return ret; 2437 } 2438 2439 mutex_unlock(&conn_id->handler_mutex); 2440 2441 out: 2442 mutex_unlock(&listen_id->handler_mutex); 2443 return ret; 2444 } 2445 2446 static int cma_ib_listen(struct rdma_id_private *id_priv) 2447 { 2448 struct sockaddr *addr; 2449 struct ib_cm_id *id; 2450 __be64 svc_id; 2451 2452 addr = cma_src_addr(id_priv); 2453 svc_id = rdma_get_service_id(&id_priv->id, addr); 2454 id = ib_cm_insert_listen(id_priv->id.device, 2455 cma_ib_req_handler, svc_id); 2456 if (IS_ERR(id)) 2457 return PTR_ERR(id); 2458 id_priv->cm_id.ib = id; 2459 2460 return 0; 2461 } 2462 2463 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2464 { 2465 int ret; 2466 struct iw_cm_id *id; 2467 2468 id = iw_create_cm_id(id_priv->id.device, 2469 iw_conn_req_handler, 2470 id_priv); 2471 if (IS_ERR(id)) 2472 return PTR_ERR(id); 2473 2474 id->tos = id_priv->tos; 2475 id->tos_set = id_priv->tos_set; 2476 id->afonly = id_priv->afonly; 2477 id_priv->cm_id.iw = id; 2478 2479 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2480 rdma_addr_size(cma_src_addr(id_priv))); 2481 2482 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2483 2484 if (ret) { 2485 iw_destroy_cm_id(id_priv->cm_id.iw); 2486 id_priv->cm_id.iw = NULL; 2487 } 2488 2489 return ret; 2490 } 2491 2492 static int cma_listen_handler(struct rdma_cm_id *id, 2493 struct rdma_cm_event *event) 2494 { 2495 struct rdma_id_private *id_priv = id->context; 2496 2497 /* Listening IDs are always destroyed on removal */ 2498 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 2499 return -1; 2500 2501 id->context = id_priv->id.context; 2502 id->event_handler = id_priv->id.event_handler; 2503 trace_cm_event_handler(id_priv, event); 2504 return id_priv->id.event_handler(id, event); 2505 } 2506 2507 static int cma_listen_on_dev(struct rdma_id_private *id_priv, 2508 struct cma_device *cma_dev, 2509 struct rdma_id_private **to_destroy) 2510 { 2511 struct rdma_id_private *dev_id_priv; 2512 struct net *net = id_priv->id.route.addr.dev_addr.net; 2513 int ret; 2514 2515 lockdep_assert_held(&lock); 2516 2517 *to_destroy = NULL; 2518 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2519 return 0; 2520 2521 dev_id_priv = 2522 __rdma_create_id(net, cma_listen_handler, id_priv, 2523 id_priv->id.ps, id_priv->id.qp_type, id_priv); 2524 if (IS_ERR(dev_id_priv)) 2525 return PTR_ERR(dev_id_priv); 2526 2527 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2528 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2529 rdma_addr_size(cma_src_addr(id_priv))); 2530 2531 _cma_attach_to_dev(dev_id_priv, cma_dev); 2532 rdma_restrack_add(&dev_id_priv->res); 2533 cma_id_get(id_priv); 2534 dev_id_priv->internal_id = 1; 2535 dev_id_priv->afonly = id_priv->afonly; 2536 dev_id_priv->tos_set = id_priv->tos_set; 2537 dev_id_priv->tos = id_priv->tos; 2538 2539 ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); 2540 if (ret) 2541 goto err_listen; 2542 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2543 return 0; 2544 err_listen: 2545 /* Caller must destroy this after releasing lock */ 2546 *to_destroy = dev_id_priv; 2547 dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); 2548 return ret; 2549 } 2550 2551 static int cma_listen_on_all(struct rdma_id_private *id_priv) 2552 { 2553 struct rdma_id_private *to_destroy; 2554 struct cma_device *cma_dev; 2555 int ret; 2556 2557 mutex_lock(&lock); 2558 list_add_tail(&id_priv->list, &listen_any_list); 2559 list_for_each_entry(cma_dev, &dev_list, list) { 2560 ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); 2561 if (ret) { 2562 /* Prevent racing with cma_process_remove() */ 2563 if (to_destroy) 2564 list_del_init(&to_destroy->list); 2565 goto err_listen; 2566 } 2567 } 2568 mutex_unlock(&lock); 2569 return 0; 2570 2571 err_listen: 2572 list_del(&id_priv->list); 2573 mutex_unlock(&lock); 2574 if (to_destroy) 2575 rdma_destroy_id(&to_destroy->id); 2576 return ret; 2577 } 2578 2579 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2580 { 2581 struct rdma_id_private *id_priv; 2582 2583 id_priv = container_of(id, struct rdma_id_private, id); 2584 id_priv->tos = (u8) tos; 2585 id_priv->tos_set = true; 2586 } 2587 EXPORT_SYMBOL(rdma_set_service_type); 2588 2589 /** 2590 * rdma_set_ack_timeout() - Set the ack timeout of QP associated 2591 * with a connection identifier. 2592 * @id: Communication identifier to associated with service type. 2593 * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec. 2594 * 2595 * This function should be called before rdma_connect() on active side, 2596 * and on passive side before rdma_accept(). It is applicable to primary 2597 * path only. The timeout will affect the local side of the QP, it is not 2598 * negotiated with remote side and zero disables the timer. In case it is 2599 * set before rdma_resolve_route, the value will also be used to determine 2600 * PacketLifeTime for RoCE. 2601 * 2602 * Return: 0 for success 2603 */ 2604 int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) 2605 { 2606 struct rdma_id_private *id_priv; 2607 2608 if (id->qp_type != IB_QPT_RC) 2609 return -EINVAL; 2610 2611 id_priv = container_of(id, struct rdma_id_private, id); 2612 id_priv->timeout = timeout; 2613 id_priv->timeout_set = true; 2614 2615 return 0; 2616 } 2617 EXPORT_SYMBOL(rdma_set_ack_timeout); 2618 2619 /** 2620 * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the 2621 * QP associated with a connection identifier. 2622 * @id: Communication identifier to associated with service type. 2623 * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK 2624 * Timer Field" in the IBTA specification. 2625 * 2626 * This function should be called before rdma_connect() on active 2627 * side, and on passive side before rdma_accept(). The timer value 2628 * will be associated with the local QP. When it receives a send it is 2629 * not read to handle, typically if the receive queue is empty, an RNR 2630 * Retry NAK is returned to the requester with the min_rnr_timer 2631 * encoded. The requester will then wait at least the time specified 2632 * in the NAK before retrying. The default is zero, which translates 2633 * to a minimum RNR Timer value of 655 ms. 2634 * 2635 * Return: 0 for success 2636 */ 2637 int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer) 2638 { 2639 struct rdma_id_private *id_priv; 2640 2641 /* It is a five-bit value */ 2642 if (min_rnr_timer & 0xe0) 2643 return -EINVAL; 2644 2645 if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT)) 2646 return -EINVAL; 2647 2648 id_priv = container_of(id, struct rdma_id_private, id); 2649 id_priv->min_rnr_timer = min_rnr_timer; 2650 id_priv->min_rnr_timer_set = true; 2651 2652 return 0; 2653 } 2654 EXPORT_SYMBOL(rdma_set_min_rnr_timer); 2655 2656 static void cma_query_handler(int status, struct sa_path_rec *path_rec, 2657 void *context) 2658 { 2659 struct cma_work *work = context; 2660 struct rdma_route *route; 2661 2662 route = &work->id->id.route; 2663 2664 if (!status) { 2665 route->num_paths = 1; 2666 *route->path_rec = *path_rec; 2667 } else { 2668 work->old_state = RDMA_CM_ROUTE_QUERY; 2669 work->new_state = RDMA_CM_ADDR_RESOLVED; 2670 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2671 work->event.status = status; 2672 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", 2673 status); 2674 } 2675 2676 queue_work(cma_wq, &work->work); 2677 } 2678 2679 static int cma_query_ib_route(struct rdma_id_private *id_priv, 2680 unsigned long timeout_ms, struct cma_work *work) 2681 { 2682 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2683 struct sa_path_rec path_rec; 2684 ib_sa_comp_mask comp_mask; 2685 struct sockaddr_in6 *sin6; 2686 struct sockaddr_ib *sib; 2687 2688 memset(&path_rec, 0, sizeof path_rec); 2689 2690 if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num)) 2691 path_rec.rec_type = SA_PATH_REC_TYPE_OPA; 2692 else 2693 path_rec.rec_type = SA_PATH_REC_TYPE_IB; 2694 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2695 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2696 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2697 path_rec.numb_path = 1; 2698 path_rec.reversible = 1; 2699 path_rec.service_id = rdma_get_service_id(&id_priv->id, 2700 cma_dst_addr(id_priv)); 2701 2702 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2703 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2704 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2705 2706 switch (cma_family(id_priv)) { 2707 case AF_INET: 2708 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2709 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2710 break; 2711 case AF_INET6: 2712 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2713 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2714 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2715 break; 2716 case AF_IB: 2717 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2718 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2719 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2720 break; 2721 } 2722 2723 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2724 id_priv->id.port_num, &path_rec, 2725 comp_mask, timeout_ms, 2726 GFP_KERNEL, cma_query_handler, 2727 work, &id_priv->query); 2728 2729 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2730 } 2731 2732 static void cma_iboe_join_work_handler(struct work_struct *work) 2733 { 2734 struct cma_multicast *mc = 2735 container_of(work, struct cma_multicast, iboe_join.work); 2736 struct rdma_cm_event *event = &mc->iboe_join.event; 2737 struct rdma_id_private *id_priv = mc->id_priv; 2738 int ret; 2739 2740 mutex_lock(&id_priv->handler_mutex); 2741 if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || 2742 READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) 2743 goto out_unlock; 2744 2745 ret = cma_cm_event_handler(id_priv, event); 2746 WARN_ON(ret); 2747 2748 out_unlock: 2749 mutex_unlock(&id_priv->handler_mutex); 2750 if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN) 2751 rdma_destroy_ah_attr(&event->param.ud.ah_attr); 2752 } 2753 2754 static void cma_work_handler(struct work_struct *_work) 2755 { 2756 struct cma_work *work = container_of(_work, struct cma_work, work); 2757 struct rdma_id_private *id_priv = work->id; 2758 2759 mutex_lock(&id_priv->handler_mutex); 2760 if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || 2761 READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) 2762 goto out_unlock; 2763 if (work->old_state != 0 || work->new_state != 0) { 2764 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2765 goto out_unlock; 2766 } 2767 2768 if (cma_cm_event_handler(id_priv, &work->event)) { 2769 cma_id_put(id_priv); 2770 destroy_id_handler_unlock(id_priv); 2771 goto out_free; 2772 } 2773 2774 out_unlock: 2775 mutex_unlock(&id_priv->handler_mutex); 2776 cma_id_put(id_priv); 2777 out_free: 2778 if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN) 2779 rdma_destroy_ah_attr(&work->event.param.ud.ah_attr); 2780 kfree(work); 2781 } 2782 2783 static void cma_init_resolve_route_work(struct cma_work *work, 2784 struct rdma_id_private *id_priv) 2785 { 2786 work->id = id_priv; 2787 INIT_WORK(&work->work, cma_work_handler); 2788 work->old_state = RDMA_CM_ROUTE_QUERY; 2789 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2790 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2791 } 2792 2793 static void enqueue_resolve_addr_work(struct cma_work *work, 2794 struct rdma_id_private *id_priv) 2795 { 2796 /* Balances with cma_id_put() in cma_work_handler */ 2797 cma_id_get(id_priv); 2798 2799 work->id = id_priv; 2800 INIT_WORK(&work->work, cma_work_handler); 2801 work->old_state = RDMA_CM_ADDR_QUERY; 2802 work->new_state = RDMA_CM_ADDR_RESOLVED; 2803 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2804 2805 queue_work(cma_wq, &work->work); 2806 } 2807 2808 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, 2809 unsigned long timeout_ms) 2810 { 2811 struct rdma_route *route = &id_priv->id.route; 2812 struct cma_work *work; 2813 int ret; 2814 2815 work = kzalloc(sizeof *work, GFP_KERNEL); 2816 if (!work) 2817 return -ENOMEM; 2818 2819 cma_init_resolve_route_work(work, id_priv); 2820 2821 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2822 if (!route->path_rec) { 2823 ret = -ENOMEM; 2824 goto err1; 2825 } 2826 2827 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2828 if (ret) 2829 goto err2; 2830 2831 return 0; 2832 err2: 2833 kfree(route->path_rec); 2834 route->path_rec = NULL; 2835 err1: 2836 kfree(work); 2837 return ret; 2838 } 2839 2840 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2841 unsigned long supported_gids, 2842 enum ib_gid_type default_gid) 2843 { 2844 if ((network_type == RDMA_NETWORK_IPV4 || 2845 network_type == RDMA_NETWORK_IPV6) && 2846 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2847 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2848 2849 return default_gid; 2850 } 2851 2852 /* 2853 * cma_iboe_set_path_rec_l2_fields() is helper function which sets 2854 * path record type based on GID type. 2855 * It also sets up other L2 fields which includes destination mac address 2856 * netdev ifindex, of the path record. 2857 * It returns the netdev of the bound interface for this path record entry. 2858 */ 2859 static struct net_device * 2860 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) 2861 { 2862 struct rdma_route *route = &id_priv->id.route; 2863 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; 2864 struct rdma_addr *addr = &route->addr; 2865 unsigned long supported_gids; 2866 struct net_device *ndev; 2867 2868 if (!addr->dev_addr.bound_dev_if) 2869 return NULL; 2870 2871 ndev = dev_get_by_index(addr->dev_addr.net, 2872 addr->dev_addr.bound_dev_if); 2873 if (!ndev) 2874 return NULL; 2875 2876 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2877 id_priv->id.port_num); 2878 gid_type = cma_route_gid_type(addr->dev_addr.network, 2879 supported_gids, 2880 id_priv->gid_type); 2881 /* Use the hint from IP Stack to select GID Type */ 2882 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2883 gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2884 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); 2885 2886 route->path_rec->roce.route_resolved = true; 2887 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); 2888 return ndev; 2889 } 2890 2891 int rdma_set_ib_path(struct rdma_cm_id *id, 2892 struct sa_path_rec *path_rec) 2893 { 2894 struct rdma_id_private *id_priv; 2895 struct net_device *ndev; 2896 int ret; 2897 2898 id_priv = container_of(id, struct rdma_id_private, id); 2899 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2900 RDMA_CM_ROUTE_RESOLVED)) 2901 return -EINVAL; 2902 2903 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec), 2904 GFP_KERNEL); 2905 if (!id->route.path_rec) { 2906 ret = -ENOMEM; 2907 goto err; 2908 } 2909 2910 if (rdma_protocol_roce(id->device, id->port_num)) { 2911 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 2912 if (!ndev) { 2913 ret = -ENODEV; 2914 goto err_free; 2915 } 2916 dev_put(ndev); 2917 } 2918 2919 id->route.num_paths = 1; 2920 return 0; 2921 2922 err_free: 2923 kfree(id->route.path_rec); 2924 id->route.path_rec = NULL; 2925 err: 2926 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2927 return ret; 2928 } 2929 EXPORT_SYMBOL(rdma_set_ib_path); 2930 2931 static int cma_resolve_iw_route(struct rdma_id_private *id_priv) 2932 { 2933 struct cma_work *work; 2934 2935 work = kzalloc(sizeof *work, GFP_KERNEL); 2936 if (!work) 2937 return -ENOMEM; 2938 2939 cma_init_resolve_route_work(work, id_priv); 2940 queue_work(cma_wq, &work->work); 2941 return 0; 2942 } 2943 2944 static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio) 2945 { 2946 struct net_device *dev; 2947 2948 dev = vlan_dev_real_dev(vlan_ndev); 2949 if (dev->num_tc) 2950 return netdev_get_prio_tc_map(dev, prio); 2951 2952 return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) & 2953 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 2954 } 2955 2956 struct iboe_prio_tc_map { 2957 int input_prio; 2958 int output_tc; 2959 bool found; 2960 }; 2961 2962 static int get_lower_vlan_dev_tc(struct net_device *dev, 2963 struct netdev_nested_priv *priv) 2964 { 2965 struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data; 2966 2967 if (is_vlan_dev(dev)) 2968 map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); 2969 else if (dev->num_tc) 2970 map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio); 2971 else 2972 map->output_tc = 0; 2973 /* We are interested only in first level VLAN device, so always 2974 * return 1 to stop iterating over next level devices. 2975 */ 2976 map->found = true; 2977 return 1; 2978 } 2979 2980 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2981 { 2982 struct iboe_prio_tc_map prio_tc_map = {}; 2983 int prio = rt_tos2priority(tos); 2984 struct netdev_nested_priv priv; 2985 2986 /* If VLAN device, get it directly from the VLAN netdev */ 2987 if (is_vlan_dev(ndev)) 2988 return get_vlan_ndev_tc(ndev, prio); 2989 2990 prio_tc_map.input_prio = prio; 2991 priv.data = (void *)&prio_tc_map; 2992 rcu_read_lock(); 2993 netdev_walk_all_lower_dev_rcu(ndev, 2994 get_lower_vlan_dev_tc, 2995 &priv); 2996 rcu_read_unlock(); 2997 /* If map is found from lower device, use it; Otherwise 2998 * continue with the current netdevice to get priority to tc map. 2999 */ 3000 if (prio_tc_map.found) 3001 return prio_tc_map.output_tc; 3002 else if (ndev->num_tc) 3003 return netdev_get_prio_tc_map(ndev, prio); 3004 else 3005 return 0; 3006 } 3007 3008 static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv) 3009 { 3010 struct sockaddr_in6 *addr6; 3011 u16 dport, sport; 3012 u32 hash, fl; 3013 3014 addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv); 3015 fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK; 3016 if ((cma_family(id_priv) != AF_INET6) || !fl) { 3017 dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv))); 3018 sport = be16_to_cpu(cma_port(cma_src_addr(id_priv))); 3019 hash = (u32)sport * 31 + dport; 3020 fl = hash & IB_GRH_FLOWLABEL_MASK; 3021 } 3022 3023 return cpu_to_be32(fl); 3024 } 3025 3026 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 3027 { 3028 struct rdma_route *route = &id_priv->id.route; 3029 struct rdma_addr *addr = &route->addr; 3030 struct cma_work *work; 3031 int ret; 3032 struct net_device *ndev; 3033 3034 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 3035 rdma_start_port(id_priv->cma_dev->device)]; 3036 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 3037 3038 3039 work = kzalloc(sizeof *work, GFP_KERNEL); 3040 if (!work) 3041 return -ENOMEM; 3042 3043 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 3044 if (!route->path_rec) { 3045 ret = -ENOMEM; 3046 goto err1; 3047 } 3048 3049 route->num_paths = 1; 3050 3051 ndev = cma_iboe_set_path_rec_l2_fields(id_priv); 3052 if (!ndev) { 3053 ret = -ENODEV; 3054 goto err2; 3055 } 3056 3057 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 3058 &route->path_rec->sgid); 3059 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 3060 &route->path_rec->dgid); 3061 3062 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 3063 /* TODO: get the hoplimit from the inet/inet6 device */ 3064 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 3065 else 3066 route->path_rec->hop_limit = 1; 3067 route->path_rec->reversible = 1; 3068 route->path_rec->pkey = cpu_to_be16(0xffff); 3069 route->path_rec->mtu_selector = IB_SA_EQ; 3070 route->path_rec->sl = iboe_tos_to_sl(ndev, tos); 3071 route->path_rec->traffic_class = tos; 3072 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 3073 route->path_rec->rate_selector = IB_SA_EQ; 3074 route->path_rec->rate = iboe_get_rate(ndev); 3075 dev_put(ndev); 3076 route->path_rec->packet_life_time_selector = IB_SA_EQ; 3077 /* In case ACK timeout is set, use this value to calculate 3078 * PacketLifeTime. As per IBTA 12.7.34, 3079 * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay). 3080 * Assuming a negligible local ACK delay, we can use 3081 * PacketLifeTime = local ACK timeout/2 3082 * as a reasonable approximation for RoCE networks. 3083 */ 3084 route->path_rec->packet_life_time = id_priv->timeout_set ? 3085 id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME; 3086 3087 if (!route->path_rec->mtu) { 3088 ret = -EINVAL; 3089 goto err2; 3090 } 3091 3092 if (rdma_protocol_roce_udp_encap(id_priv->id.device, 3093 id_priv->id.port_num)) 3094 route->path_rec->flow_label = 3095 cma_get_roce_udp_flow_label(id_priv); 3096 3097 cma_init_resolve_route_work(work, id_priv); 3098 queue_work(cma_wq, &work->work); 3099 3100 return 0; 3101 3102 err2: 3103 kfree(route->path_rec); 3104 route->path_rec = NULL; 3105 route->num_paths = 0; 3106 err1: 3107 kfree(work); 3108 return ret; 3109 } 3110 3111 int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) 3112 { 3113 struct rdma_id_private *id_priv; 3114 int ret; 3115 3116 id_priv = container_of(id, struct rdma_id_private, id); 3117 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 3118 return -EINVAL; 3119 3120 cma_id_get(id_priv); 3121 if (rdma_cap_ib_sa(id->device, id->port_num)) 3122 ret = cma_resolve_ib_route(id_priv, timeout_ms); 3123 else if (rdma_protocol_roce(id->device, id->port_num)) 3124 ret = cma_resolve_iboe_route(id_priv); 3125 else if (rdma_protocol_iwarp(id->device, id->port_num)) 3126 ret = cma_resolve_iw_route(id_priv); 3127 else 3128 ret = -ENOSYS; 3129 3130 if (ret) 3131 goto err; 3132 3133 return 0; 3134 err: 3135 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 3136 cma_id_put(id_priv); 3137 return ret; 3138 } 3139 EXPORT_SYMBOL(rdma_resolve_route); 3140 3141 static void cma_set_loopback(struct sockaddr *addr) 3142 { 3143 switch (addr->sa_family) { 3144 case AF_INET: 3145 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3146 break; 3147 case AF_INET6: 3148 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 3149 0, 0, 0, htonl(1)); 3150 break; 3151 default: 3152 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 3153 0, 0, 0, htonl(1)); 3154 break; 3155 } 3156 } 3157 3158 static int cma_bind_loopback(struct rdma_id_private *id_priv) 3159 { 3160 struct cma_device *cma_dev, *cur_dev; 3161 union ib_gid gid; 3162 enum ib_port_state port_state; 3163 unsigned int p; 3164 u16 pkey; 3165 int ret; 3166 3167 cma_dev = NULL; 3168 mutex_lock(&lock); 3169 list_for_each_entry(cur_dev, &dev_list, list) { 3170 if (cma_family(id_priv) == AF_IB && 3171 !rdma_cap_ib_cm(cur_dev->device, 1)) 3172 continue; 3173 3174 if (!cma_dev) 3175 cma_dev = cur_dev; 3176 3177 rdma_for_each_port (cur_dev->device, p) { 3178 if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && 3179 port_state == IB_PORT_ACTIVE) { 3180 cma_dev = cur_dev; 3181 goto port_found; 3182 } 3183 } 3184 } 3185 3186 if (!cma_dev) { 3187 ret = -ENODEV; 3188 goto out; 3189 } 3190 3191 p = 1; 3192 3193 port_found: 3194 ret = rdma_query_gid(cma_dev->device, p, 0, &gid); 3195 if (ret) 3196 goto out; 3197 3198 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 3199 if (ret) 3200 goto out; 3201 3202 id_priv->id.route.addr.dev_addr.dev_type = 3203 (rdma_protocol_ib(cma_dev->device, p)) ? 3204 ARPHRD_INFINIBAND : ARPHRD_ETHER; 3205 3206 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 3207 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 3208 id_priv->id.port_num = p; 3209 cma_attach_to_dev(id_priv, cma_dev); 3210 rdma_restrack_add(&id_priv->res); 3211 cma_set_loopback(cma_src_addr(id_priv)); 3212 out: 3213 mutex_unlock(&lock); 3214 return ret; 3215 } 3216 3217 static void addr_handler(int status, struct sockaddr *src_addr, 3218 struct rdma_dev_addr *dev_addr, void *context) 3219 { 3220 struct rdma_id_private *id_priv = context; 3221 struct rdma_cm_event event = {}; 3222 struct sockaddr *addr; 3223 struct sockaddr_storage old_addr; 3224 3225 mutex_lock(&id_priv->handler_mutex); 3226 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 3227 RDMA_CM_ADDR_RESOLVED)) 3228 goto out; 3229 3230 /* 3231 * Store the previous src address, so that if we fail to acquire 3232 * matching rdma device, old address can be restored back, which helps 3233 * to cancel the cma listen operation correctly. 3234 */ 3235 addr = cma_src_addr(id_priv); 3236 memcpy(&old_addr, addr, rdma_addr_size(addr)); 3237 memcpy(addr, src_addr, rdma_addr_size(src_addr)); 3238 if (!status && !id_priv->cma_dev) { 3239 status = cma_acquire_dev_by_src_ip(id_priv); 3240 if (status) 3241 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 3242 status); 3243 rdma_restrack_add(&id_priv->res); 3244 } else if (status) { 3245 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); 3246 } 3247 3248 if (status) { 3249 memcpy(addr, &old_addr, 3250 rdma_addr_size((struct sockaddr *)&old_addr)); 3251 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 3252 RDMA_CM_ADDR_BOUND)) 3253 goto out; 3254 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3255 event.status = status; 3256 } else 3257 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 3258 3259 if (cma_cm_event_handler(id_priv, &event)) { 3260 destroy_id_handler_unlock(id_priv); 3261 return; 3262 } 3263 out: 3264 mutex_unlock(&id_priv->handler_mutex); 3265 } 3266 3267 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 3268 { 3269 struct cma_work *work; 3270 union ib_gid gid; 3271 int ret; 3272 3273 work = kzalloc(sizeof *work, GFP_KERNEL); 3274 if (!work) 3275 return -ENOMEM; 3276 3277 if (!id_priv->cma_dev) { 3278 ret = cma_bind_loopback(id_priv); 3279 if (ret) 3280 goto err; 3281 } 3282 3283 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 3284 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 3285 3286 enqueue_resolve_addr_work(work, id_priv); 3287 return 0; 3288 err: 3289 kfree(work); 3290 return ret; 3291 } 3292 3293 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 3294 { 3295 struct cma_work *work; 3296 int ret; 3297 3298 work = kzalloc(sizeof *work, GFP_KERNEL); 3299 if (!work) 3300 return -ENOMEM; 3301 3302 if (!id_priv->cma_dev) { 3303 ret = cma_resolve_ib_dev(id_priv); 3304 if (ret) 3305 goto err; 3306 } 3307 3308 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 3309 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 3310 3311 enqueue_resolve_addr_work(work, id_priv); 3312 return 0; 3313 err: 3314 kfree(work); 3315 return ret; 3316 } 3317 3318 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3319 const struct sockaddr *dst_addr) 3320 { 3321 if (!src_addr || !src_addr->sa_family) { 3322 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 3323 src_addr->sa_family = dst_addr->sa_family; 3324 if (IS_ENABLED(CONFIG_IPV6) && 3325 dst_addr->sa_family == AF_INET6) { 3326 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 3327 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 3328 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 3329 if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 3330 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 3331 } else if (dst_addr->sa_family == AF_IB) { 3332 ((struct sockaddr_ib *) src_addr)->sib_pkey = 3333 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 3334 } 3335 } 3336 return rdma_bind_addr(id, src_addr); 3337 } 3338 3339 /* 3340 * If required, resolve the source address for bind and leave the id_priv in 3341 * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior 3342 * calls made by ULP, a previously bound ID will not be re-bound and src_addr is 3343 * ignored. 3344 */ 3345 static int resolve_prepare_src(struct rdma_id_private *id_priv, 3346 struct sockaddr *src_addr, 3347 const struct sockaddr *dst_addr) 3348 { 3349 int ret; 3350 3351 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 3352 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 3353 /* For a well behaved ULP state will be RDMA_CM_IDLE */ 3354 ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); 3355 if (ret) 3356 goto err_dst; 3357 if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, 3358 RDMA_CM_ADDR_QUERY))) { 3359 ret = -EINVAL; 3360 goto err_dst; 3361 } 3362 } 3363 3364 if (cma_family(id_priv) != dst_addr->sa_family) { 3365 ret = -EINVAL; 3366 goto err_state; 3367 } 3368 return 0; 3369 3370 err_state: 3371 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3372 err_dst: 3373 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); 3374 return ret; 3375 } 3376 3377 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3378 const struct sockaddr *dst_addr, unsigned long timeout_ms) 3379 { 3380 struct rdma_id_private *id_priv = 3381 container_of(id, struct rdma_id_private, id); 3382 int ret; 3383 3384 ret = resolve_prepare_src(id_priv, src_addr, dst_addr); 3385 if (ret) 3386 return ret; 3387 3388 if (cma_any_addr(dst_addr)) { 3389 ret = cma_resolve_loopback(id_priv); 3390 } else { 3391 if (dst_addr->sa_family == AF_IB) { 3392 ret = cma_resolve_ib_addr(id_priv); 3393 } else { 3394 ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, 3395 &id->route.addr.dev_addr, 3396 timeout_ms, addr_handler, 3397 false, id_priv); 3398 } 3399 } 3400 if (ret) 3401 goto err; 3402 3403 return 0; 3404 err: 3405 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3406 return ret; 3407 } 3408 EXPORT_SYMBOL(rdma_resolve_addr); 3409 3410 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 3411 { 3412 struct rdma_id_private *id_priv; 3413 unsigned long flags; 3414 int ret; 3415 3416 id_priv = container_of(id, struct rdma_id_private, id); 3417 spin_lock_irqsave(&id_priv->lock, flags); 3418 if ((reuse && id_priv->state != RDMA_CM_LISTEN) || 3419 id_priv->state == RDMA_CM_IDLE) { 3420 id_priv->reuseaddr = reuse; 3421 ret = 0; 3422 } else { 3423 ret = -EINVAL; 3424 } 3425 spin_unlock_irqrestore(&id_priv->lock, flags); 3426 return ret; 3427 } 3428 EXPORT_SYMBOL(rdma_set_reuseaddr); 3429 3430 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 3431 { 3432 struct rdma_id_private *id_priv; 3433 unsigned long flags; 3434 int ret; 3435 3436 id_priv = container_of(id, struct rdma_id_private, id); 3437 spin_lock_irqsave(&id_priv->lock, flags); 3438 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 3439 id_priv->options |= (1 << CMA_OPTION_AFONLY); 3440 id_priv->afonly = afonly; 3441 ret = 0; 3442 } else { 3443 ret = -EINVAL; 3444 } 3445 spin_unlock_irqrestore(&id_priv->lock, flags); 3446 return ret; 3447 } 3448 EXPORT_SYMBOL(rdma_set_afonly); 3449 3450 static void cma_bind_port(struct rdma_bind_list *bind_list, 3451 struct rdma_id_private *id_priv) 3452 { 3453 struct sockaddr *addr; 3454 struct sockaddr_ib *sib; 3455 u64 sid, mask; 3456 __be16 port; 3457 3458 lockdep_assert_held(&lock); 3459 3460 addr = cma_src_addr(id_priv); 3461 port = htons(bind_list->port); 3462 3463 switch (addr->sa_family) { 3464 case AF_INET: 3465 ((struct sockaddr_in *) addr)->sin_port = port; 3466 break; 3467 case AF_INET6: 3468 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3469 break; 3470 case AF_IB: 3471 sib = (struct sockaddr_ib *) addr; 3472 sid = be64_to_cpu(sib->sib_sid); 3473 mask = be64_to_cpu(sib->sib_sid_mask); 3474 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3475 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3476 break; 3477 } 3478 id_priv->bind_list = bind_list; 3479 hlist_add_head(&id_priv->node, &bind_list->owners); 3480 } 3481 3482 static int cma_alloc_port(enum rdma_ucm_port_space ps, 3483 struct rdma_id_private *id_priv, unsigned short snum) 3484 { 3485 struct rdma_bind_list *bind_list; 3486 int ret; 3487 3488 lockdep_assert_held(&lock); 3489 3490 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3491 if (!bind_list) 3492 return -ENOMEM; 3493 3494 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3495 snum); 3496 if (ret < 0) 3497 goto err; 3498 3499 bind_list->ps = ps; 3500 bind_list->port = snum; 3501 cma_bind_port(bind_list, id_priv); 3502 return 0; 3503 err: 3504 kfree(bind_list); 3505 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3506 } 3507 3508 static int cma_port_is_unique(struct rdma_bind_list *bind_list, 3509 struct rdma_id_private *id_priv) 3510 { 3511 struct rdma_id_private *cur_id; 3512 struct sockaddr *daddr = cma_dst_addr(id_priv); 3513 struct sockaddr *saddr = cma_src_addr(id_priv); 3514 __be16 dport = cma_port(daddr); 3515 3516 lockdep_assert_held(&lock); 3517 3518 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3519 struct sockaddr *cur_daddr = cma_dst_addr(cur_id); 3520 struct sockaddr *cur_saddr = cma_src_addr(cur_id); 3521 __be16 cur_dport = cma_port(cur_daddr); 3522 3523 if (id_priv == cur_id) 3524 continue; 3525 3526 /* different dest port -> unique */ 3527 if (!cma_any_port(daddr) && 3528 !cma_any_port(cur_daddr) && 3529 (dport != cur_dport)) 3530 continue; 3531 3532 /* different src address -> unique */ 3533 if (!cma_any_addr(saddr) && 3534 !cma_any_addr(cur_saddr) && 3535 cma_addr_cmp(saddr, cur_saddr)) 3536 continue; 3537 3538 /* different dst address -> unique */ 3539 if (!cma_any_addr(daddr) && 3540 !cma_any_addr(cur_daddr) && 3541 cma_addr_cmp(daddr, cur_daddr)) 3542 continue; 3543 3544 return -EADDRNOTAVAIL; 3545 } 3546 return 0; 3547 } 3548 3549 static int cma_alloc_any_port(enum rdma_ucm_port_space ps, 3550 struct rdma_id_private *id_priv) 3551 { 3552 static unsigned int last_used_port; 3553 int low, high, remaining; 3554 unsigned int rover; 3555 struct net *net = id_priv->id.route.addr.dev_addr.net; 3556 3557 lockdep_assert_held(&lock); 3558 3559 inet_get_local_port_range(net, &low, &high); 3560 remaining = (high - low) + 1; 3561 rover = prandom_u32() % remaining + low; 3562 retry: 3563 if (last_used_port != rover) { 3564 struct rdma_bind_list *bind_list; 3565 int ret; 3566 3567 bind_list = cma_ps_find(net, ps, (unsigned short)rover); 3568 3569 if (!bind_list) { 3570 ret = cma_alloc_port(ps, id_priv, rover); 3571 } else { 3572 ret = cma_port_is_unique(bind_list, id_priv); 3573 if (!ret) 3574 cma_bind_port(bind_list, id_priv); 3575 } 3576 /* 3577 * Remember previously used port number in order to avoid 3578 * re-using same port immediately after it is closed. 3579 */ 3580 if (!ret) 3581 last_used_port = rover; 3582 if (ret != -EADDRNOTAVAIL) 3583 return ret; 3584 } 3585 if (--remaining) { 3586 rover++; 3587 if ((rover < low) || (rover > high)) 3588 rover = low; 3589 goto retry; 3590 } 3591 return -EADDRNOTAVAIL; 3592 } 3593 3594 /* 3595 * Check that the requested port is available. This is called when trying to 3596 * bind to a specific port, or when trying to listen on a bound port. In 3597 * the latter case, the provided id_priv may already be on the bind_list, but 3598 * we still need to check that it's okay to start listening. 3599 */ 3600 static int cma_check_port(struct rdma_bind_list *bind_list, 3601 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3602 { 3603 struct rdma_id_private *cur_id; 3604 struct sockaddr *addr, *cur_addr; 3605 3606 lockdep_assert_held(&lock); 3607 3608 addr = cma_src_addr(id_priv); 3609 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3610 if (id_priv == cur_id) 3611 continue; 3612 3613 if (reuseaddr && cur_id->reuseaddr) 3614 continue; 3615 3616 cur_addr = cma_src_addr(cur_id); 3617 if (id_priv->afonly && cur_id->afonly && 3618 (addr->sa_family != cur_addr->sa_family)) 3619 continue; 3620 3621 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3622 return -EADDRNOTAVAIL; 3623 3624 if (!cma_addr_cmp(addr, cur_addr)) 3625 return -EADDRINUSE; 3626 } 3627 return 0; 3628 } 3629 3630 static int cma_use_port(enum rdma_ucm_port_space ps, 3631 struct rdma_id_private *id_priv) 3632 { 3633 struct rdma_bind_list *bind_list; 3634 unsigned short snum; 3635 int ret; 3636 3637 lockdep_assert_held(&lock); 3638 3639 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3640 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 3641 return -EACCES; 3642 3643 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3644 if (!bind_list) { 3645 ret = cma_alloc_port(ps, id_priv, snum); 3646 } else { 3647 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3648 if (!ret) 3649 cma_bind_port(bind_list, id_priv); 3650 } 3651 return ret; 3652 } 3653 3654 static enum rdma_ucm_port_space 3655 cma_select_inet_ps(struct rdma_id_private *id_priv) 3656 { 3657 switch (id_priv->id.ps) { 3658 case RDMA_PS_TCP: 3659 case RDMA_PS_UDP: 3660 case RDMA_PS_IPOIB: 3661 case RDMA_PS_IB: 3662 return id_priv->id.ps; 3663 default: 3664 3665 return 0; 3666 } 3667 } 3668 3669 static enum rdma_ucm_port_space 3670 cma_select_ib_ps(struct rdma_id_private *id_priv) 3671 { 3672 enum rdma_ucm_port_space ps = 0; 3673 struct sockaddr_ib *sib; 3674 u64 sid_ps, mask, sid; 3675 3676 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3677 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3678 sid = be64_to_cpu(sib->sib_sid) & mask; 3679 3680 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3681 sid_ps = RDMA_IB_IP_PS_IB; 3682 ps = RDMA_PS_IB; 3683 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3684 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3685 sid_ps = RDMA_IB_IP_PS_TCP; 3686 ps = RDMA_PS_TCP; 3687 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3688 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3689 sid_ps = RDMA_IB_IP_PS_UDP; 3690 ps = RDMA_PS_UDP; 3691 } 3692 3693 if (ps) { 3694 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3695 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3696 be64_to_cpu(sib->sib_sid_mask)); 3697 } 3698 return ps; 3699 } 3700 3701 static int cma_get_port(struct rdma_id_private *id_priv) 3702 { 3703 enum rdma_ucm_port_space ps; 3704 int ret; 3705 3706 if (cma_family(id_priv) != AF_IB) 3707 ps = cma_select_inet_ps(id_priv); 3708 else 3709 ps = cma_select_ib_ps(id_priv); 3710 if (!ps) 3711 return -EPROTONOSUPPORT; 3712 3713 mutex_lock(&lock); 3714 if (cma_any_port(cma_src_addr(id_priv))) 3715 ret = cma_alloc_any_port(ps, id_priv); 3716 else 3717 ret = cma_use_port(ps, id_priv); 3718 mutex_unlock(&lock); 3719 3720 return ret; 3721 } 3722 3723 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3724 struct sockaddr *addr) 3725 { 3726 #if IS_ENABLED(CONFIG_IPV6) 3727 struct sockaddr_in6 *sin6; 3728 3729 if (addr->sa_family != AF_INET6) 3730 return 0; 3731 3732 sin6 = (struct sockaddr_in6 *) addr; 3733 3734 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 3735 return 0; 3736 3737 if (!sin6->sin6_scope_id) 3738 return -EINVAL; 3739 3740 dev_addr->bound_dev_if = sin6->sin6_scope_id; 3741 #endif 3742 return 0; 3743 } 3744 3745 int rdma_listen(struct rdma_cm_id *id, int backlog) 3746 { 3747 struct rdma_id_private *id_priv = 3748 container_of(id, struct rdma_id_private, id); 3749 int ret; 3750 3751 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) { 3752 /* For a well behaved ULP state will be RDMA_CM_IDLE */ 3753 id->route.addr.src_addr.ss_family = AF_INET; 3754 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3755 if (ret) 3756 return ret; 3757 if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, 3758 RDMA_CM_LISTEN))) 3759 return -EINVAL; 3760 } 3761 3762 /* 3763 * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable 3764 * any more, and has to be unique in the bind list. 3765 */ 3766 if (id_priv->reuseaddr) { 3767 mutex_lock(&lock); 3768 ret = cma_check_port(id_priv->bind_list, id_priv, 0); 3769 if (!ret) 3770 id_priv->reuseaddr = 0; 3771 mutex_unlock(&lock); 3772 if (ret) 3773 goto err; 3774 } 3775 3776 id_priv->backlog = backlog; 3777 if (id->device) { 3778 if (rdma_cap_ib_cm(id->device, 1)) { 3779 ret = cma_ib_listen(id_priv); 3780 if (ret) 3781 goto err; 3782 } else if (rdma_cap_iw_cm(id->device, 1)) { 3783 ret = cma_iw_listen(id_priv, backlog); 3784 if (ret) 3785 goto err; 3786 } else { 3787 ret = -ENOSYS; 3788 goto err; 3789 } 3790 } else { 3791 ret = cma_listen_on_all(id_priv); 3792 if (ret) 3793 goto err; 3794 } 3795 3796 return 0; 3797 err: 3798 id_priv->backlog = 0; 3799 /* 3800 * All the failure paths that lead here will not allow the req_handler's 3801 * to have run. 3802 */ 3803 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3804 return ret; 3805 } 3806 EXPORT_SYMBOL(rdma_listen); 3807 3808 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3809 { 3810 struct rdma_id_private *id_priv; 3811 int ret; 3812 struct sockaddr *daddr; 3813 3814 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3815 addr->sa_family != AF_IB) 3816 return -EAFNOSUPPORT; 3817 3818 id_priv = container_of(id, struct rdma_id_private, id); 3819 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3820 return -EINVAL; 3821 3822 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3823 if (ret) 3824 goto err1; 3825 3826 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3827 if (!cma_any_addr(addr)) { 3828 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3829 if (ret) 3830 goto err1; 3831 3832 ret = cma_acquire_dev_by_src_ip(id_priv); 3833 if (ret) 3834 goto err1; 3835 } 3836 3837 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3838 if (addr->sa_family == AF_INET) 3839 id_priv->afonly = 1; 3840 #if IS_ENABLED(CONFIG_IPV6) 3841 else if (addr->sa_family == AF_INET6) { 3842 struct net *net = id_priv->id.route.addr.dev_addr.net; 3843 3844 id_priv->afonly = net->ipv6.sysctl.bindv6only; 3845 } 3846 #endif 3847 } 3848 daddr = cma_dst_addr(id_priv); 3849 daddr->sa_family = addr->sa_family; 3850 3851 ret = cma_get_port(id_priv); 3852 if (ret) 3853 goto err2; 3854 3855 if (!cma_any_addr(addr)) 3856 rdma_restrack_add(&id_priv->res); 3857 return 0; 3858 err2: 3859 if (id_priv->cma_dev) 3860 cma_release_dev(id_priv); 3861 err1: 3862 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3863 return ret; 3864 } 3865 EXPORT_SYMBOL(rdma_bind_addr); 3866 3867 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3868 { 3869 struct cma_hdr *cma_hdr; 3870 3871 cma_hdr = hdr; 3872 cma_hdr->cma_version = CMA_VERSION; 3873 if (cma_family(id_priv) == AF_INET) { 3874 struct sockaddr_in *src4, *dst4; 3875 3876 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3877 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3878 3879 cma_set_ip_ver(cma_hdr, 4); 3880 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3881 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3882 cma_hdr->port = src4->sin_port; 3883 } else if (cma_family(id_priv) == AF_INET6) { 3884 struct sockaddr_in6 *src6, *dst6; 3885 3886 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3887 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3888 3889 cma_set_ip_ver(cma_hdr, 6); 3890 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3891 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3892 cma_hdr->port = src6->sin6_port; 3893 } 3894 return 0; 3895 } 3896 3897 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3898 const struct ib_cm_event *ib_event) 3899 { 3900 struct rdma_id_private *id_priv = cm_id->context; 3901 struct rdma_cm_event event = {}; 3902 const struct ib_cm_sidr_rep_event_param *rep = 3903 &ib_event->param.sidr_rep_rcvd; 3904 int ret; 3905 3906 mutex_lock(&id_priv->handler_mutex); 3907 if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) 3908 goto out; 3909 3910 switch (ib_event->event) { 3911 case IB_CM_SIDR_REQ_ERROR: 3912 event.event = RDMA_CM_EVENT_UNREACHABLE; 3913 event.status = -ETIMEDOUT; 3914 break; 3915 case IB_CM_SIDR_REP_RECEIVED: 3916 event.param.ud.private_data = ib_event->private_data; 3917 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3918 if (rep->status != IB_SIDR_SUCCESS) { 3919 event.event = RDMA_CM_EVENT_UNREACHABLE; 3920 event.status = ib_event->param.sidr_rep_rcvd.status; 3921 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", 3922 event.status); 3923 break; 3924 } 3925 ret = cma_set_qkey(id_priv, rep->qkey); 3926 if (ret) { 3927 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); 3928 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3929 event.status = ret; 3930 break; 3931 } 3932 ib_init_ah_attr_from_path(id_priv->id.device, 3933 id_priv->id.port_num, 3934 id_priv->id.route.path_rec, 3935 &event.param.ud.ah_attr, 3936 rep->sgid_attr); 3937 event.param.ud.qp_num = rep->qpn; 3938 event.param.ud.qkey = rep->qkey; 3939 event.event = RDMA_CM_EVENT_ESTABLISHED; 3940 event.status = 0; 3941 break; 3942 default: 3943 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3944 ib_event->event); 3945 goto out; 3946 } 3947 3948 ret = cma_cm_event_handler(id_priv, &event); 3949 3950 rdma_destroy_ah_attr(&event.param.ud.ah_attr); 3951 if (ret) { 3952 /* Destroy the CM ID by returning a non-zero value. */ 3953 id_priv->cm_id.ib = NULL; 3954 destroy_id_handler_unlock(id_priv); 3955 return ret; 3956 } 3957 out: 3958 mutex_unlock(&id_priv->handler_mutex); 3959 return 0; 3960 } 3961 3962 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3963 struct rdma_conn_param *conn_param) 3964 { 3965 struct ib_cm_sidr_req_param req; 3966 struct ib_cm_id *id; 3967 void *private_data; 3968 u8 offset; 3969 int ret; 3970 3971 memset(&req, 0, sizeof req); 3972 offset = cma_user_data_offset(id_priv); 3973 req.private_data_len = offset + conn_param->private_data_len; 3974 if (req.private_data_len < conn_param->private_data_len) 3975 return -EINVAL; 3976 3977 if (req.private_data_len) { 3978 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3979 if (!private_data) 3980 return -ENOMEM; 3981 } else { 3982 private_data = NULL; 3983 } 3984 3985 if (conn_param->private_data && conn_param->private_data_len) 3986 memcpy(private_data + offset, conn_param->private_data, 3987 conn_param->private_data_len); 3988 3989 if (private_data) { 3990 ret = cma_format_hdr(private_data, id_priv); 3991 if (ret) 3992 goto out; 3993 req.private_data = private_data; 3994 } 3995 3996 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3997 id_priv); 3998 if (IS_ERR(id)) { 3999 ret = PTR_ERR(id); 4000 goto out; 4001 } 4002 id_priv->cm_id.ib = id; 4003 4004 req.path = id_priv->id.route.path_rec; 4005 req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; 4006 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 4007 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 4008 req.max_cm_retries = CMA_MAX_CM_RETRIES; 4009 4010 trace_cm_send_sidr_req(id_priv); 4011 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 4012 if (ret) { 4013 ib_destroy_cm_id(id_priv->cm_id.ib); 4014 id_priv->cm_id.ib = NULL; 4015 } 4016 out: 4017 kfree(private_data); 4018 return ret; 4019 } 4020 4021 static int cma_connect_ib(struct rdma_id_private *id_priv, 4022 struct rdma_conn_param *conn_param) 4023 { 4024 struct ib_cm_req_param req; 4025 struct rdma_route *route; 4026 void *private_data; 4027 struct ib_cm_id *id; 4028 u8 offset; 4029 int ret; 4030 4031 memset(&req, 0, sizeof req); 4032 offset = cma_user_data_offset(id_priv); 4033 req.private_data_len = offset + conn_param->private_data_len; 4034 if (req.private_data_len < conn_param->private_data_len) 4035 return -EINVAL; 4036 4037 if (req.private_data_len) { 4038 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 4039 if (!private_data) 4040 return -ENOMEM; 4041 } else { 4042 private_data = NULL; 4043 } 4044 4045 if (conn_param->private_data && conn_param->private_data_len) 4046 memcpy(private_data + offset, conn_param->private_data, 4047 conn_param->private_data_len); 4048 4049 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 4050 if (IS_ERR(id)) { 4051 ret = PTR_ERR(id); 4052 goto out; 4053 } 4054 id_priv->cm_id.ib = id; 4055 4056 route = &id_priv->id.route; 4057 if (private_data) { 4058 ret = cma_format_hdr(private_data, id_priv); 4059 if (ret) 4060 goto out; 4061 req.private_data = private_data; 4062 } 4063 4064 req.primary_path = &route->path_rec[0]; 4065 if (route->num_paths == 2) 4066 req.alternate_path = &route->path_rec[1]; 4067 4068 req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; 4069 /* Alternate path SGID attribute currently unsupported */ 4070 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 4071 req.qp_num = id_priv->qp_num; 4072 req.qp_type = id_priv->id.qp_type; 4073 req.starting_psn = id_priv->seq_num; 4074 req.responder_resources = conn_param->responder_resources; 4075 req.initiator_depth = conn_param->initiator_depth; 4076 req.flow_control = conn_param->flow_control; 4077 req.retry_count = min_t(u8, 7, conn_param->retry_count); 4078 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 4079 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 4080 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 4081 req.max_cm_retries = CMA_MAX_CM_RETRIES; 4082 req.srq = id_priv->srq ? 1 : 0; 4083 req.ece.vendor_id = id_priv->ece.vendor_id; 4084 req.ece.attr_mod = id_priv->ece.attr_mod; 4085 4086 trace_cm_send_req(id_priv); 4087 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 4088 out: 4089 if (ret && !IS_ERR(id)) { 4090 ib_destroy_cm_id(id); 4091 id_priv->cm_id.ib = NULL; 4092 } 4093 4094 kfree(private_data); 4095 return ret; 4096 } 4097 4098 static int cma_connect_iw(struct rdma_id_private *id_priv, 4099 struct rdma_conn_param *conn_param) 4100 { 4101 struct iw_cm_id *cm_id; 4102 int ret; 4103 struct iw_cm_conn_param iw_param; 4104 4105 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 4106 if (IS_ERR(cm_id)) 4107 return PTR_ERR(cm_id); 4108 4109 cm_id->tos = id_priv->tos; 4110 cm_id->tos_set = id_priv->tos_set; 4111 id_priv->cm_id.iw = cm_id; 4112 4113 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 4114 rdma_addr_size(cma_src_addr(id_priv))); 4115 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 4116 rdma_addr_size(cma_dst_addr(id_priv))); 4117 4118 ret = cma_modify_qp_rtr(id_priv, conn_param); 4119 if (ret) 4120 goto out; 4121 4122 if (conn_param) { 4123 iw_param.ord = conn_param->initiator_depth; 4124 iw_param.ird = conn_param->responder_resources; 4125 iw_param.private_data = conn_param->private_data; 4126 iw_param.private_data_len = conn_param->private_data_len; 4127 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 4128 } else { 4129 memset(&iw_param, 0, sizeof iw_param); 4130 iw_param.qpn = id_priv->qp_num; 4131 } 4132 ret = iw_cm_connect(cm_id, &iw_param); 4133 out: 4134 if (ret) { 4135 iw_destroy_cm_id(cm_id); 4136 id_priv->cm_id.iw = NULL; 4137 } 4138 return ret; 4139 } 4140 4141 /** 4142 * rdma_connect_locked - Initiate an active connection request. 4143 * @id: Connection identifier to connect. 4144 * @conn_param: Connection information used for connected QPs. 4145 * 4146 * Same as rdma_connect() but can only be called from the 4147 * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback. 4148 */ 4149 int rdma_connect_locked(struct rdma_cm_id *id, 4150 struct rdma_conn_param *conn_param) 4151 { 4152 struct rdma_id_private *id_priv = 4153 container_of(id, struct rdma_id_private, id); 4154 int ret; 4155 4156 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 4157 return -EINVAL; 4158 4159 if (!id->qp) { 4160 id_priv->qp_num = conn_param->qp_num; 4161 id_priv->srq = conn_param->srq; 4162 } 4163 4164 if (rdma_cap_ib_cm(id->device, id->port_num)) { 4165 if (id->qp_type == IB_QPT_UD) 4166 ret = cma_resolve_ib_udp(id_priv, conn_param); 4167 else 4168 ret = cma_connect_ib(id_priv, conn_param); 4169 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 4170 ret = cma_connect_iw(id_priv, conn_param); 4171 } else { 4172 ret = -ENOSYS; 4173 } 4174 if (ret) 4175 goto err_state; 4176 return 0; 4177 err_state: 4178 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 4179 return ret; 4180 } 4181 EXPORT_SYMBOL(rdma_connect_locked); 4182 4183 /** 4184 * rdma_connect - Initiate an active connection request. 4185 * @id: Connection identifier to connect. 4186 * @conn_param: Connection information used for connected QPs. 4187 * 4188 * Users must have resolved a route for the rdma_cm_id to connect with by having 4189 * called rdma_resolve_route before calling this routine. 4190 * 4191 * This call will either connect to a remote QP or obtain remote QP information 4192 * for unconnected rdma_cm_id's. The actual operation is based on the 4193 * rdma_cm_id's port space. 4194 */ 4195 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 4196 { 4197 struct rdma_id_private *id_priv = 4198 container_of(id, struct rdma_id_private, id); 4199 int ret; 4200 4201 mutex_lock(&id_priv->handler_mutex); 4202 ret = rdma_connect_locked(id, conn_param); 4203 mutex_unlock(&id_priv->handler_mutex); 4204 return ret; 4205 } 4206 EXPORT_SYMBOL(rdma_connect); 4207 4208 /** 4209 * rdma_connect_ece - Initiate an active connection request with ECE data. 4210 * @id: Connection identifier to connect. 4211 * @conn_param: Connection information used for connected QPs. 4212 * @ece: ECE parameters 4213 * 4214 * See rdma_connect() explanation. 4215 */ 4216 int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, 4217 struct rdma_ucm_ece *ece) 4218 { 4219 struct rdma_id_private *id_priv = 4220 container_of(id, struct rdma_id_private, id); 4221 4222 id_priv->ece.vendor_id = ece->vendor_id; 4223 id_priv->ece.attr_mod = ece->attr_mod; 4224 4225 return rdma_connect(id, conn_param); 4226 } 4227 EXPORT_SYMBOL(rdma_connect_ece); 4228 4229 static int cma_accept_ib(struct rdma_id_private *id_priv, 4230 struct rdma_conn_param *conn_param) 4231 { 4232 struct ib_cm_rep_param rep; 4233 int ret; 4234 4235 ret = cma_modify_qp_rtr(id_priv, conn_param); 4236 if (ret) 4237 goto out; 4238 4239 ret = cma_modify_qp_rts(id_priv, conn_param); 4240 if (ret) 4241 goto out; 4242 4243 memset(&rep, 0, sizeof rep); 4244 rep.qp_num = id_priv->qp_num; 4245 rep.starting_psn = id_priv->seq_num; 4246 rep.private_data = conn_param->private_data; 4247 rep.private_data_len = conn_param->private_data_len; 4248 rep.responder_resources = conn_param->responder_resources; 4249 rep.initiator_depth = conn_param->initiator_depth; 4250 rep.failover_accepted = 0; 4251 rep.flow_control = conn_param->flow_control; 4252 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 4253 rep.srq = id_priv->srq ? 1 : 0; 4254 rep.ece.vendor_id = id_priv->ece.vendor_id; 4255 rep.ece.attr_mod = id_priv->ece.attr_mod; 4256 4257 trace_cm_send_rep(id_priv); 4258 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 4259 out: 4260 return ret; 4261 } 4262 4263 static int cma_accept_iw(struct rdma_id_private *id_priv, 4264 struct rdma_conn_param *conn_param) 4265 { 4266 struct iw_cm_conn_param iw_param; 4267 int ret; 4268 4269 if (!conn_param) 4270 return -EINVAL; 4271 4272 ret = cma_modify_qp_rtr(id_priv, conn_param); 4273 if (ret) 4274 return ret; 4275 4276 iw_param.ord = conn_param->initiator_depth; 4277 iw_param.ird = conn_param->responder_resources; 4278 iw_param.private_data = conn_param->private_data; 4279 iw_param.private_data_len = conn_param->private_data_len; 4280 if (id_priv->id.qp) 4281 iw_param.qpn = id_priv->qp_num; 4282 else 4283 iw_param.qpn = conn_param->qp_num; 4284 4285 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 4286 } 4287 4288 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 4289 enum ib_cm_sidr_status status, u32 qkey, 4290 const void *private_data, int private_data_len) 4291 { 4292 struct ib_cm_sidr_rep_param rep; 4293 int ret; 4294 4295 memset(&rep, 0, sizeof rep); 4296 rep.status = status; 4297 if (status == IB_SIDR_SUCCESS) { 4298 ret = cma_set_qkey(id_priv, qkey); 4299 if (ret) 4300 return ret; 4301 rep.qp_num = id_priv->qp_num; 4302 rep.qkey = id_priv->qkey; 4303 4304 rep.ece.vendor_id = id_priv->ece.vendor_id; 4305 rep.ece.attr_mod = id_priv->ece.attr_mod; 4306 } 4307 4308 rep.private_data = private_data; 4309 rep.private_data_len = private_data_len; 4310 4311 trace_cm_send_sidr_rep(id_priv); 4312 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 4313 } 4314 4315 /** 4316 * rdma_accept - Called to accept a connection request or response. 4317 * @id: Connection identifier associated with the request. 4318 * @conn_param: Information needed to establish the connection. This must be 4319 * provided if accepting a connection request. If accepting a connection 4320 * response, this parameter must be NULL. 4321 * 4322 * Typically, this routine is only called by the listener to accept a connection 4323 * request. It must also be called on the active side of a connection if the 4324 * user is performing their own QP transitions. 4325 * 4326 * In the case of error, a reject message is sent to the remote side and the 4327 * state of the qp associated with the id is modified to error, such that any 4328 * previously posted receive buffers would be flushed. 4329 * 4330 * This function is for use by kernel ULPs and must be called from under the 4331 * handler callback. 4332 */ 4333 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 4334 { 4335 struct rdma_id_private *id_priv = 4336 container_of(id, struct rdma_id_private, id); 4337 int ret; 4338 4339 lockdep_assert_held(&id_priv->handler_mutex); 4340 4341 if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) 4342 return -EINVAL; 4343 4344 if (!id->qp && conn_param) { 4345 id_priv->qp_num = conn_param->qp_num; 4346 id_priv->srq = conn_param->srq; 4347 } 4348 4349 if (rdma_cap_ib_cm(id->device, id->port_num)) { 4350 if (id->qp_type == IB_QPT_UD) { 4351 if (conn_param) 4352 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 4353 conn_param->qkey, 4354 conn_param->private_data, 4355 conn_param->private_data_len); 4356 else 4357 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 4358 0, NULL, 0); 4359 } else { 4360 if (conn_param) 4361 ret = cma_accept_ib(id_priv, conn_param); 4362 else 4363 ret = cma_rep_recv(id_priv); 4364 } 4365 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 4366 ret = cma_accept_iw(id_priv, conn_param); 4367 } else { 4368 ret = -ENOSYS; 4369 } 4370 if (ret) 4371 goto reject; 4372 4373 return 0; 4374 reject: 4375 cma_modify_qp_err(id_priv); 4376 rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); 4377 return ret; 4378 } 4379 EXPORT_SYMBOL(rdma_accept); 4380 4381 int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, 4382 struct rdma_ucm_ece *ece) 4383 { 4384 struct rdma_id_private *id_priv = 4385 container_of(id, struct rdma_id_private, id); 4386 4387 id_priv->ece.vendor_id = ece->vendor_id; 4388 id_priv->ece.attr_mod = ece->attr_mod; 4389 4390 return rdma_accept(id, conn_param); 4391 } 4392 EXPORT_SYMBOL(rdma_accept_ece); 4393 4394 void rdma_lock_handler(struct rdma_cm_id *id) 4395 { 4396 struct rdma_id_private *id_priv = 4397 container_of(id, struct rdma_id_private, id); 4398 4399 mutex_lock(&id_priv->handler_mutex); 4400 } 4401 EXPORT_SYMBOL(rdma_lock_handler); 4402 4403 void rdma_unlock_handler(struct rdma_cm_id *id) 4404 { 4405 struct rdma_id_private *id_priv = 4406 container_of(id, struct rdma_id_private, id); 4407 4408 mutex_unlock(&id_priv->handler_mutex); 4409 } 4410 EXPORT_SYMBOL(rdma_unlock_handler); 4411 4412 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 4413 { 4414 struct rdma_id_private *id_priv; 4415 int ret; 4416 4417 id_priv = container_of(id, struct rdma_id_private, id); 4418 if (!id_priv->cm_id.ib) 4419 return -EINVAL; 4420 4421 switch (id->device->node_type) { 4422 case RDMA_NODE_IB_CA: 4423 ret = ib_cm_notify(id_priv->cm_id.ib, event); 4424 break; 4425 default: 4426 ret = 0; 4427 break; 4428 } 4429 return ret; 4430 } 4431 EXPORT_SYMBOL(rdma_notify); 4432 4433 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 4434 u8 private_data_len, u8 reason) 4435 { 4436 struct rdma_id_private *id_priv; 4437 int ret; 4438 4439 id_priv = container_of(id, struct rdma_id_private, id); 4440 if (!id_priv->cm_id.ib) 4441 return -EINVAL; 4442 4443 if (rdma_cap_ib_cm(id->device, id->port_num)) { 4444 if (id->qp_type == IB_QPT_UD) { 4445 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 4446 private_data, private_data_len); 4447 } else { 4448 trace_cm_send_rej(id_priv); 4449 ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0, 4450 private_data, private_data_len); 4451 } 4452 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 4453 ret = iw_cm_reject(id_priv->cm_id.iw, 4454 private_data, private_data_len); 4455 } else { 4456 ret = -ENOSYS; 4457 } 4458 4459 return ret; 4460 } 4461 EXPORT_SYMBOL(rdma_reject); 4462 4463 int rdma_disconnect(struct rdma_cm_id *id) 4464 { 4465 struct rdma_id_private *id_priv; 4466 int ret; 4467 4468 id_priv = container_of(id, struct rdma_id_private, id); 4469 if (!id_priv->cm_id.ib) 4470 return -EINVAL; 4471 4472 if (rdma_cap_ib_cm(id->device, id->port_num)) { 4473 ret = cma_modify_qp_err(id_priv); 4474 if (ret) 4475 goto out; 4476 /* Initiate or respond to a disconnect. */ 4477 trace_cm_disconnect(id_priv); 4478 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) { 4479 if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0)) 4480 trace_cm_sent_drep(id_priv); 4481 } else { 4482 trace_cm_sent_dreq(id_priv); 4483 } 4484 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 4485 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 4486 } else 4487 ret = -EINVAL; 4488 4489 out: 4490 return ret; 4491 } 4492 EXPORT_SYMBOL(rdma_disconnect); 4493 4494 static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, 4495 struct ib_sa_multicast *multicast, 4496 struct rdma_cm_event *event, 4497 struct cma_multicast *mc) 4498 { 4499 struct rdma_dev_addr *dev_addr; 4500 enum ib_gid_type gid_type; 4501 struct net_device *ndev; 4502 4503 if (!status) 4504 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 4505 else 4506 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", 4507 status); 4508 4509 event->status = status; 4510 event->param.ud.private_data = mc->context; 4511 if (status) { 4512 event->event = RDMA_CM_EVENT_MULTICAST_ERROR; 4513 return; 4514 } 4515 4516 dev_addr = &id_priv->id.route.addr.dev_addr; 4517 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4518 gid_type = 4519 id_priv->cma_dev 4520 ->default_gid_type[id_priv->id.port_num - 4521 rdma_start_port( 4522 id_priv->cma_dev->device)]; 4523 4524 event->event = RDMA_CM_EVENT_MULTICAST_JOIN; 4525 if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, 4526 &multicast->rec, ndev, gid_type, 4527 &event->param.ud.ah_attr)) { 4528 event->event = RDMA_CM_EVENT_MULTICAST_ERROR; 4529 goto out; 4530 } 4531 4532 event->param.ud.qp_num = 0xFFFFFF; 4533 event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 4534 4535 out: 4536 if (ndev) 4537 dev_put(ndev); 4538 } 4539 4540 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 4541 { 4542 struct cma_multicast *mc = multicast->context; 4543 struct rdma_id_private *id_priv = mc->id_priv; 4544 struct rdma_cm_event event = {}; 4545 int ret = 0; 4546 4547 mutex_lock(&id_priv->handler_mutex); 4548 if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL || 4549 READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING) 4550 goto out; 4551 4552 cma_make_mc_event(status, id_priv, multicast, &event, mc); 4553 ret = cma_cm_event_handler(id_priv, &event); 4554 rdma_destroy_ah_attr(&event.param.ud.ah_attr); 4555 WARN_ON(ret); 4556 4557 out: 4558 mutex_unlock(&id_priv->handler_mutex); 4559 return 0; 4560 } 4561 4562 static void cma_set_mgid(struct rdma_id_private *id_priv, 4563 struct sockaddr *addr, union ib_gid *mgid) 4564 { 4565 unsigned char mc_map[MAX_ADDR_LEN]; 4566 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4567 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 4568 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 4569 4570 if (cma_any_addr(addr)) { 4571 memset(mgid, 0, sizeof *mgid); 4572 } else if ((addr->sa_family == AF_INET6) && 4573 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 4574 0xFF10A01B)) { 4575 /* IPv6 address is an SA assigned MGID. */ 4576 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4577 } else if (addr->sa_family == AF_IB) { 4578 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 4579 } else if (addr->sa_family == AF_INET6) { 4580 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 4581 if (id_priv->id.ps == RDMA_PS_UDP) 4582 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4583 *mgid = *(union ib_gid *) (mc_map + 4); 4584 } else { 4585 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 4586 if (id_priv->id.ps == RDMA_PS_UDP) 4587 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4588 *mgid = *(union ib_gid *) (mc_map + 4); 4589 } 4590 } 4591 4592 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 4593 struct cma_multicast *mc) 4594 { 4595 struct ib_sa_mcmember_rec rec; 4596 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4597 ib_sa_comp_mask comp_mask; 4598 int ret; 4599 4600 ib_addr_get_mgid(dev_addr, &rec.mgid); 4601 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4602 &rec.mgid, &rec); 4603 if (ret) 4604 return ret; 4605 4606 ret = cma_set_qkey(id_priv, 0); 4607 if (ret) 4608 return ret; 4609 4610 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4611 rec.qkey = cpu_to_be32(id_priv->qkey); 4612 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4613 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4614 rec.join_state = mc->join_state; 4615 4616 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4617 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4618 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4619 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4620 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4621 4622 if (id_priv->id.ps == RDMA_PS_IPOIB) 4623 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4624 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4625 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4626 IB_SA_MCMEMBER_REC_MTU | 4627 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4628 4629 mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4630 id_priv->id.port_num, &rec, comp_mask, 4631 GFP_KERNEL, cma_ib_mc_handler, mc); 4632 return PTR_ERR_OR_ZERO(mc->sa_mc); 4633 } 4634 4635 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4636 enum ib_gid_type gid_type) 4637 { 4638 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4639 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4640 4641 if (cma_any_addr(addr)) { 4642 memset(mgid, 0, sizeof *mgid); 4643 } else if (addr->sa_family == AF_INET6) { 4644 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4645 } else { 4646 mgid->raw[0] = 4647 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4648 mgid->raw[1] = 4649 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4650 mgid->raw[2] = 0; 4651 mgid->raw[3] = 0; 4652 mgid->raw[4] = 0; 4653 mgid->raw[5] = 0; 4654 mgid->raw[6] = 0; 4655 mgid->raw[7] = 0; 4656 mgid->raw[8] = 0; 4657 mgid->raw[9] = 0; 4658 mgid->raw[10] = 0xff; 4659 mgid->raw[11] = 0xff; 4660 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4661 } 4662 } 4663 4664 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4665 struct cma_multicast *mc) 4666 { 4667 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4668 int err = 0; 4669 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4670 struct net_device *ndev = NULL; 4671 struct ib_sa_multicast ib; 4672 enum ib_gid_type gid_type; 4673 bool send_only; 4674 4675 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4676 4677 if (cma_zero_addr(addr)) 4678 return -EINVAL; 4679 4680 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4681 rdma_start_port(id_priv->cma_dev->device)]; 4682 cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); 4683 4684 ib.rec.pkey = cpu_to_be16(0xffff); 4685 if (id_priv->id.ps == RDMA_PS_UDP) 4686 ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4687 4688 if (dev_addr->bound_dev_if) 4689 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4690 if (!ndev) 4691 return -ENODEV; 4692 4693 ib.rec.rate = iboe_get_rate(ndev); 4694 ib.rec.hop_limit = 1; 4695 ib.rec.mtu = iboe_get_mtu(ndev->mtu); 4696 4697 if (addr->sa_family == AF_INET) { 4698 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4699 ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4700 if (!send_only) { 4701 err = cma_igmp_send(ndev, &ib.rec.mgid, 4702 true); 4703 } 4704 } 4705 } else { 4706 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4707 err = -ENOTSUPP; 4708 } 4709 dev_put(ndev); 4710 if (err || !ib.rec.mtu) 4711 return err ?: -EINVAL; 4712 4713 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4714 &ib.rec.port_gid); 4715 INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); 4716 cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc); 4717 queue_work(cma_wq, &mc->iboe_join.work); 4718 return 0; 4719 } 4720 4721 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4722 u8 join_state, void *context) 4723 { 4724 struct rdma_id_private *id_priv = 4725 container_of(id, struct rdma_id_private, id); 4726 struct cma_multicast *mc; 4727 int ret; 4728 4729 /* Not supported for kernel QPs */ 4730 if (WARN_ON(id->qp)) 4731 return -EINVAL; 4732 4733 /* ULP is calling this wrong. */ 4734 if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND && 4735 READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) 4736 return -EINVAL; 4737 4738 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 4739 if (!mc) 4740 return -ENOMEM; 4741 4742 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4743 mc->context = context; 4744 mc->id_priv = id_priv; 4745 mc->join_state = join_state; 4746 4747 if (rdma_protocol_roce(id->device, id->port_num)) { 4748 ret = cma_iboe_join_multicast(id_priv, mc); 4749 if (ret) 4750 goto out_err; 4751 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4752 ret = cma_join_ib_multicast(id_priv, mc); 4753 if (ret) 4754 goto out_err; 4755 } else { 4756 ret = -ENOSYS; 4757 goto out_err; 4758 } 4759 4760 spin_lock(&id_priv->lock); 4761 list_add(&mc->list, &id_priv->mc_list); 4762 spin_unlock(&id_priv->lock); 4763 4764 return 0; 4765 out_err: 4766 kfree(mc); 4767 return ret; 4768 } 4769 EXPORT_SYMBOL(rdma_join_multicast); 4770 4771 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4772 { 4773 struct rdma_id_private *id_priv; 4774 struct cma_multicast *mc; 4775 4776 id_priv = container_of(id, struct rdma_id_private, id); 4777 spin_lock_irq(&id_priv->lock); 4778 list_for_each_entry(mc, &id_priv->mc_list, list) { 4779 if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0) 4780 continue; 4781 list_del(&mc->list); 4782 spin_unlock_irq(&id_priv->lock); 4783 4784 WARN_ON(id_priv->cma_dev->device != id->device); 4785 destroy_mc(id_priv, mc); 4786 return; 4787 } 4788 spin_unlock_irq(&id_priv->lock); 4789 } 4790 EXPORT_SYMBOL(rdma_leave_multicast); 4791 4792 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 4793 { 4794 struct rdma_dev_addr *dev_addr; 4795 struct cma_work *work; 4796 4797 dev_addr = &id_priv->id.route.addr.dev_addr; 4798 4799 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4800 (net_eq(dev_net(ndev), dev_addr->net)) && 4801 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4802 pr_info("RDMA CM addr change for ndev %s used by id %p\n", 4803 ndev->name, &id_priv->id); 4804 work = kzalloc(sizeof *work, GFP_KERNEL); 4805 if (!work) 4806 return -ENOMEM; 4807 4808 INIT_WORK(&work->work, cma_work_handler); 4809 work->id = id_priv; 4810 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 4811 cma_id_get(id_priv); 4812 queue_work(cma_wq, &work->work); 4813 } 4814 4815 return 0; 4816 } 4817 4818 static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 4819 void *ptr) 4820 { 4821 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4822 struct cma_device *cma_dev; 4823 struct rdma_id_private *id_priv; 4824 int ret = NOTIFY_DONE; 4825 4826 if (event != NETDEV_BONDING_FAILOVER) 4827 return NOTIFY_DONE; 4828 4829 if (!netif_is_bond_master(ndev)) 4830 return NOTIFY_DONE; 4831 4832 mutex_lock(&lock); 4833 list_for_each_entry(cma_dev, &dev_list, list) 4834 list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4835 ret = cma_netdev_change(ndev, id_priv); 4836 if (ret) 4837 goto out; 4838 } 4839 4840 out: 4841 mutex_unlock(&lock); 4842 return ret; 4843 } 4844 4845 static struct notifier_block cma_nb = { 4846 .notifier_call = cma_netdev_callback 4847 }; 4848 4849 static void cma_send_device_removal_put(struct rdma_id_private *id_priv) 4850 { 4851 struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; 4852 enum rdma_cm_state state; 4853 unsigned long flags; 4854 4855 mutex_lock(&id_priv->handler_mutex); 4856 /* Record that we want to remove the device */ 4857 spin_lock_irqsave(&id_priv->lock, flags); 4858 state = id_priv->state; 4859 if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) { 4860 spin_unlock_irqrestore(&id_priv->lock, flags); 4861 mutex_unlock(&id_priv->handler_mutex); 4862 cma_id_put(id_priv); 4863 return; 4864 } 4865 id_priv->state = RDMA_CM_DEVICE_REMOVAL; 4866 spin_unlock_irqrestore(&id_priv->lock, flags); 4867 4868 if (cma_cm_event_handler(id_priv, &event)) { 4869 /* 4870 * At this point the ULP promises it won't call 4871 * rdma_destroy_id() concurrently 4872 */ 4873 cma_id_put(id_priv); 4874 mutex_unlock(&id_priv->handler_mutex); 4875 trace_cm_id_destroy(id_priv); 4876 _destroy_id(id_priv, state); 4877 return; 4878 } 4879 mutex_unlock(&id_priv->handler_mutex); 4880 4881 /* 4882 * If this races with destroy then the thread that first assigns state 4883 * to a destroying does the cancel. 4884 */ 4885 cma_cancel_operation(id_priv, state); 4886 cma_id_put(id_priv); 4887 } 4888 4889 static void cma_process_remove(struct cma_device *cma_dev) 4890 { 4891 mutex_lock(&lock); 4892 while (!list_empty(&cma_dev->id_list)) { 4893 struct rdma_id_private *id_priv = list_first_entry( 4894 &cma_dev->id_list, struct rdma_id_private, list); 4895 4896 list_del(&id_priv->listen_list); 4897 list_del_init(&id_priv->list); 4898 cma_id_get(id_priv); 4899 mutex_unlock(&lock); 4900 4901 cma_send_device_removal_put(id_priv); 4902 4903 mutex_lock(&lock); 4904 } 4905 mutex_unlock(&lock); 4906 4907 cma_dev_put(cma_dev); 4908 wait_for_completion(&cma_dev->comp); 4909 } 4910 4911 static bool cma_supported(struct ib_device *device) 4912 { 4913 u32 i; 4914 4915 rdma_for_each_port(device, i) { 4916 if (rdma_cap_ib_cm(device, i) || rdma_cap_iw_cm(device, i)) 4917 return true; 4918 } 4919 return false; 4920 } 4921 4922 static int cma_add_one(struct ib_device *device) 4923 { 4924 struct rdma_id_private *to_destroy; 4925 struct cma_device *cma_dev; 4926 struct rdma_id_private *id_priv; 4927 unsigned long supported_gids = 0; 4928 int ret; 4929 u32 i; 4930 4931 if (!cma_supported(device)) 4932 return -EOPNOTSUPP; 4933 4934 cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); 4935 if (!cma_dev) 4936 return -ENOMEM; 4937 4938 cma_dev->device = device; 4939 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4940 sizeof(*cma_dev->default_gid_type), 4941 GFP_KERNEL); 4942 if (!cma_dev->default_gid_type) { 4943 ret = -ENOMEM; 4944 goto free_cma_dev; 4945 } 4946 4947 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4948 sizeof(*cma_dev->default_roce_tos), 4949 GFP_KERNEL); 4950 if (!cma_dev->default_roce_tos) { 4951 ret = -ENOMEM; 4952 goto free_gid_type; 4953 } 4954 4955 rdma_for_each_port (device, i) { 4956 supported_gids = roce_gid_type_mask_support(device, i); 4957 WARN_ON(!supported_gids); 4958 if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4959 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4960 CMA_PREFERRED_ROCE_GID_TYPE; 4961 else 4962 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4963 find_first_bit(&supported_gids, BITS_PER_LONG); 4964 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4965 } 4966 4967 init_completion(&cma_dev->comp); 4968 refcount_set(&cma_dev->refcount, 1); 4969 INIT_LIST_HEAD(&cma_dev->id_list); 4970 ib_set_client_data(device, &cma_client, cma_dev); 4971 4972 mutex_lock(&lock); 4973 list_add_tail(&cma_dev->list, &dev_list); 4974 list_for_each_entry(id_priv, &listen_any_list, list) { 4975 ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); 4976 if (ret) 4977 goto free_listen; 4978 } 4979 mutex_unlock(&lock); 4980 4981 trace_cm_add_one(device); 4982 return 0; 4983 4984 free_listen: 4985 list_del(&cma_dev->list); 4986 mutex_unlock(&lock); 4987 4988 /* cma_process_remove() will delete to_destroy */ 4989 cma_process_remove(cma_dev); 4990 kfree(cma_dev->default_roce_tos); 4991 free_gid_type: 4992 kfree(cma_dev->default_gid_type); 4993 4994 free_cma_dev: 4995 kfree(cma_dev); 4996 return ret; 4997 } 4998 4999 static void cma_remove_one(struct ib_device *device, void *client_data) 5000 { 5001 struct cma_device *cma_dev = client_data; 5002 5003 trace_cm_remove_one(device); 5004 5005 mutex_lock(&lock); 5006 list_del(&cma_dev->list); 5007 mutex_unlock(&lock); 5008 5009 cma_process_remove(cma_dev); 5010 kfree(cma_dev->default_roce_tos); 5011 kfree(cma_dev->default_gid_type); 5012 kfree(cma_dev); 5013 } 5014 5015 static int cma_init_net(struct net *net) 5016 { 5017 struct cma_pernet *pernet = cma_pernet(net); 5018 5019 xa_init(&pernet->tcp_ps); 5020 xa_init(&pernet->udp_ps); 5021 xa_init(&pernet->ipoib_ps); 5022 xa_init(&pernet->ib_ps); 5023 5024 return 0; 5025 } 5026 5027 static void cma_exit_net(struct net *net) 5028 { 5029 struct cma_pernet *pernet = cma_pernet(net); 5030 5031 WARN_ON(!xa_empty(&pernet->tcp_ps)); 5032 WARN_ON(!xa_empty(&pernet->udp_ps)); 5033 WARN_ON(!xa_empty(&pernet->ipoib_ps)); 5034 WARN_ON(!xa_empty(&pernet->ib_ps)); 5035 } 5036 5037 static struct pernet_operations cma_pernet_operations = { 5038 .init = cma_init_net, 5039 .exit = cma_exit_net, 5040 .id = &cma_pernet_id, 5041 .size = sizeof(struct cma_pernet), 5042 }; 5043 5044 static int __init cma_init(void) 5045 { 5046 int ret; 5047 5048 /* 5049 * There is a rare lock ordering dependency in cma_netdev_callback() 5050 * that only happens when bonding is enabled. Teach lockdep that rtnl 5051 * must never be nested under lock so it can find these without having 5052 * to test with bonding. 5053 */ 5054 if (IS_ENABLED(CONFIG_LOCKDEP)) { 5055 rtnl_lock(); 5056 mutex_lock(&lock); 5057 mutex_unlock(&lock); 5058 rtnl_unlock(); 5059 } 5060 5061 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 5062 if (!cma_wq) 5063 return -ENOMEM; 5064 5065 ret = register_pernet_subsys(&cma_pernet_operations); 5066 if (ret) 5067 goto err_wq; 5068 5069 ib_sa_register_client(&sa_client); 5070 register_netdevice_notifier(&cma_nb); 5071 5072 ret = ib_register_client(&cma_client); 5073 if (ret) 5074 goto err; 5075 5076 ret = cma_configfs_init(); 5077 if (ret) 5078 goto err_ib; 5079 5080 return 0; 5081 5082 err_ib: 5083 ib_unregister_client(&cma_client); 5084 err: 5085 unregister_netdevice_notifier(&cma_nb); 5086 ib_sa_unregister_client(&sa_client); 5087 unregister_pernet_subsys(&cma_pernet_operations); 5088 err_wq: 5089 destroy_workqueue(cma_wq); 5090 return ret; 5091 } 5092 5093 static void __exit cma_cleanup(void) 5094 { 5095 cma_configfs_exit(); 5096 ib_unregister_client(&cma_client); 5097 unregister_netdevice_notifier(&cma_nb); 5098 ib_sa_unregister_client(&sa_client); 5099 unregister_pernet_subsys(&cma_pernet_operations); 5100 destroy_workqueue(cma_wq); 5101 } 5102 5103 module_init(cma_init); 5104 module_exit(cma_cleanup); 5105