1 /* 2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 8 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 9 * 10 * This software is available to you under a choice of one of two 11 * licenses. You may choose to be licensed under the terms of the GNU 12 * General Public License (GPL) Version 2, available from the file 13 * COPYING in the main directory of this source tree, or the 14 * OpenIB.org BSD license below: 15 * 16 * Redistribution and use in source and binary forms, with or 17 * without modification, are permitted provided that the following 18 * conditions are met: 19 * 20 * - Redistributions of source code must retain the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer. 23 * 24 * - Redistributions in binary form must reproduce the above 25 * copyright notice, this list of conditions and the following 26 * disclaimer in the documentation and/or other materials 27 * provided with the distribution. 28 * 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 * SOFTWARE. 37 */ 38 39 #include <linux/errno.h> 40 #include <linux/err.h> 41 #include <linux/export.h> 42 #include <linux/string.h> 43 #include <linux/slab.h> 44 #include <linux/in.h> 45 #include <linux/in6.h> 46 #include <net/addrconf.h> 47 48 #include <rdma/ib_verbs.h> 49 #include <rdma/ib_cache.h> 50 #include <rdma/ib_addr.h> 51 52 #include "core_priv.h" 53 54 static const char * const ib_events[] = { 55 [IB_EVENT_CQ_ERR] = "CQ error", 56 [IB_EVENT_QP_FATAL] = "QP fatal error", 57 [IB_EVENT_QP_REQ_ERR] = "QP request error", 58 [IB_EVENT_QP_ACCESS_ERR] = "QP access error", 59 [IB_EVENT_COMM_EST] = "communication established", 60 [IB_EVENT_SQ_DRAINED] = "send queue drained", 61 [IB_EVENT_PATH_MIG] = "path migration successful", 62 [IB_EVENT_PATH_MIG_ERR] = "path migration error", 63 [IB_EVENT_DEVICE_FATAL] = "device fatal error", 64 [IB_EVENT_PORT_ACTIVE] = "port active", 65 [IB_EVENT_PORT_ERR] = "port error", 66 [IB_EVENT_LID_CHANGE] = "LID change", 67 [IB_EVENT_PKEY_CHANGE] = "P_key change", 68 [IB_EVENT_SM_CHANGE] = "SM change", 69 [IB_EVENT_SRQ_ERR] = "SRQ error", 70 [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", 71 [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", 72 [IB_EVENT_CLIENT_REREGISTER] = "client reregister", 73 [IB_EVENT_GID_CHANGE] = "GID changed", 74 }; 75 76 const char *__attribute_const__ ib_event_msg(enum ib_event_type event) 77 { 78 size_t index = event; 79 80 return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? 81 ib_events[index] : "unrecognized event"; 82 } 83 EXPORT_SYMBOL(ib_event_msg); 84 85 static const char * const wc_statuses[] = { 86 [IB_WC_SUCCESS] = "success", 87 [IB_WC_LOC_LEN_ERR] = "local length error", 88 [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", 89 [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", 90 [IB_WC_LOC_PROT_ERR] = "local protection error", 91 [IB_WC_WR_FLUSH_ERR] = "WR flushed", 92 [IB_WC_MW_BIND_ERR] = "memory management operation error", 93 [IB_WC_BAD_RESP_ERR] = "bad response error", 94 [IB_WC_LOC_ACCESS_ERR] = "local access error", 95 [IB_WC_REM_INV_REQ_ERR] = "invalid request error", 96 [IB_WC_REM_ACCESS_ERR] = "remote access error", 97 [IB_WC_REM_OP_ERR] = "remote operation error", 98 [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", 99 [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", 100 [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", 101 [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", 102 [IB_WC_REM_ABORT_ERR] = "operation aborted", 103 [IB_WC_INV_EECN_ERR] = "invalid EE context number", 104 [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", 105 [IB_WC_FATAL_ERR] = "fatal error", 106 [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", 107 [IB_WC_GENERAL_ERR] = "general error", 108 }; 109 110 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) 111 { 112 size_t index = status; 113 114 return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? 115 wc_statuses[index] : "unrecognized status"; 116 } 117 EXPORT_SYMBOL(ib_wc_status_msg); 118 119 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) 120 { 121 switch (rate) { 122 case IB_RATE_2_5_GBPS: return 1; 123 case IB_RATE_5_GBPS: return 2; 124 case IB_RATE_10_GBPS: return 4; 125 case IB_RATE_20_GBPS: return 8; 126 case IB_RATE_30_GBPS: return 12; 127 case IB_RATE_40_GBPS: return 16; 128 case IB_RATE_60_GBPS: return 24; 129 case IB_RATE_80_GBPS: return 32; 130 case IB_RATE_120_GBPS: return 48; 131 default: return -1; 132 } 133 } 134 EXPORT_SYMBOL(ib_rate_to_mult); 135 136 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) 137 { 138 switch (mult) { 139 case 1: return IB_RATE_2_5_GBPS; 140 case 2: return IB_RATE_5_GBPS; 141 case 4: return IB_RATE_10_GBPS; 142 case 8: return IB_RATE_20_GBPS; 143 case 12: return IB_RATE_30_GBPS; 144 case 16: return IB_RATE_40_GBPS; 145 case 24: return IB_RATE_60_GBPS; 146 case 32: return IB_RATE_80_GBPS; 147 case 48: return IB_RATE_120_GBPS; 148 default: return IB_RATE_PORT_CURRENT; 149 } 150 } 151 EXPORT_SYMBOL(mult_to_ib_rate); 152 153 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) 154 { 155 switch (rate) { 156 case IB_RATE_2_5_GBPS: return 2500; 157 case IB_RATE_5_GBPS: return 5000; 158 case IB_RATE_10_GBPS: return 10000; 159 case IB_RATE_20_GBPS: return 20000; 160 case IB_RATE_30_GBPS: return 30000; 161 case IB_RATE_40_GBPS: return 40000; 162 case IB_RATE_60_GBPS: return 60000; 163 case IB_RATE_80_GBPS: return 80000; 164 case IB_RATE_120_GBPS: return 120000; 165 case IB_RATE_14_GBPS: return 14062; 166 case IB_RATE_56_GBPS: return 56250; 167 case IB_RATE_112_GBPS: return 112500; 168 case IB_RATE_168_GBPS: return 168750; 169 case IB_RATE_25_GBPS: return 25781; 170 case IB_RATE_100_GBPS: return 103125; 171 case IB_RATE_200_GBPS: return 206250; 172 case IB_RATE_300_GBPS: return 309375; 173 default: return -1; 174 } 175 } 176 EXPORT_SYMBOL(ib_rate_to_mbps); 177 178 __attribute_const__ enum rdma_transport_type 179 rdma_node_get_transport(enum rdma_node_type node_type) 180 { 181 switch (node_type) { 182 case RDMA_NODE_IB_CA: 183 case RDMA_NODE_IB_SWITCH: 184 case RDMA_NODE_IB_ROUTER: 185 return RDMA_TRANSPORT_IB; 186 case RDMA_NODE_RNIC: 187 return RDMA_TRANSPORT_IWARP; 188 case RDMA_NODE_USNIC: 189 return RDMA_TRANSPORT_USNIC; 190 case RDMA_NODE_USNIC_UDP: 191 return RDMA_TRANSPORT_USNIC_UDP; 192 default: 193 BUG(); 194 return 0; 195 } 196 } 197 EXPORT_SYMBOL(rdma_node_get_transport); 198 199 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) 200 { 201 if (device->get_link_layer) 202 return device->get_link_layer(device, port_num); 203 204 switch (rdma_node_get_transport(device->node_type)) { 205 case RDMA_TRANSPORT_IB: 206 return IB_LINK_LAYER_INFINIBAND; 207 case RDMA_TRANSPORT_IWARP: 208 case RDMA_TRANSPORT_USNIC: 209 case RDMA_TRANSPORT_USNIC_UDP: 210 return IB_LINK_LAYER_ETHERNET; 211 default: 212 return IB_LINK_LAYER_UNSPECIFIED; 213 } 214 } 215 EXPORT_SYMBOL(rdma_port_get_link_layer); 216 217 /* Protection domains */ 218 219 /** 220 * ib_alloc_pd - Allocates an unused protection domain. 221 * @device: The device on which to allocate the protection domain. 222 * 223 * A protection domain object provides an association between QPs, shared 224 * receive queues, address handles, memory regions, and memory windows. 225 * 226 * Every PD has a local_dma_lkey which can be used as the lkey value for local 227 * memory operations. 228 */ 229 struct ib_pd *ib_alloc_pd(struct ib_device *device) 230 { 231 struct ib_pd *pd; 232 233 pd = device->alloc_pd(device, NULL, NULL); 234 if (IS_ERR(pd)) 235 return pd; 236 237 pd->device = device; 238 pd->uobject = NULL; 239 pd->local_mr = NULL; 240 atomic_set(&pd->usecnt, 0); 241 242 if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) 243 pd->local_dma_lkey = device->local_dma_lkey; 244 else { 245 struct ib_mr *mr; 246 247 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); 248 if (IS_ERR(mr)) { 249 ib_dealloc_pd(pd); 250 return (struct ib_pd *)mr; 251 } 252 253 pd->local_mr = mr; 254 pd->local_dma_lkey = pd->local_mr->lkey; 255 } 256 return pd; 257 } 258 EXPORT_SYMBOL(ib_alloc_pd); 259 260 /** 261 * ib_dealloc_pd - Deallocates a protection domain. 262 * @pd: The protection domain to deallocate. 263 * 264 * It is an error to call this function while any resources in the pd still 265 * exist. The caller is responsible to synchronously destroy them and 266 * guarantee no new allocations will happen. 267 */ 268 void ib_dealloc_pd(struct ib_pd *pd) 269 { 270 int ret; 271 272 if (pd->local_mr) { 273 ret = ib_dereg_mr(pd->local_mr); 274 WARN_ON(ret); 275 pd->local_mr = NULL; 276 } 277 278 /* uverbs manipulates usecnt with proper locking, while the kabi 279 requires the caller to guarantee we can't race here. */ 280 WARN_ON(atomic_read(&pd->usecnt)); 281 282 /* Making delalloc_pd a void return is a WIP, no driver should return 283 an error here. */ 284 ret = pd->device->dealloc_pd(pd); 285 WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); 286 } 287 EXPORT_SYMBOL(ib_dealloc_pd); 288 289 /* Address handles */ 290 291 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 292 { 293 struct ib_ah *ah; 294 295 ah = pd->device->create_ah(pd, ah_attr); 296 297 if (!IS_ERR(ah)) { 298 ah->device = pd->device; 299 ah->pd = pd; 300 ah->uobject = NULL; 301 atomic_inc(&pd->usecnt); 302 } 303 304 return ah; 305 } 306 EXPORT_SYMBOL(ib_create_ah); 307 308 static int ib_get_header_version(const union rdma_network_hdr *hdr) 309 { 310 const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; 311 struct iphdr ip4h_checked; 312 const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh; 313 314 /* If it's IPv6, the version must be 6, otherwise, the first 315 * 20 bytes (before the IPv4 header) are garbled. 316 */ 317 if (ip6h->version != 6) 318 return (ip4h->version == 4) ? 4 : 0; 319 /* version may be 6 or 4 because the first 20 bytes could be garbled */ 320 321 /* RoCE v2 requires no options, thus header length 322 * must be 5 words 323 */ 324 if (ip4h->ihl != 5) 325 return 6; 326 327 /* Verify checksum. 328 * We can't write on scattered buffers so we need to copy to 329 * temp buffer. 330 */ 331 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); 332 ip4h_checked.check = 0; 333 ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5); 334 /* if IPv4 header checksum is OK, believe it */ 335 if (ip4h->check == ip4h_checked.check) 336 return 4; 337 return 6; 338 } 339 340 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, 341 u8 port_num, 342 const struct ib_grh *grh) 343 { 344 int grh_version; 345 346 if (rdma_protocol_ib(device, port_num)) 347 return RDMA_NETWORK_IB; 348 349 grh_version = ib_get_header_version((union rdma_network_hdr *)grh); 350 351 if (grh_version == 4) 352 return RDMA_NETWORK_IPV4; 353 354 if (grh->next_hdr == IPPROTO_UDP) 355 return RDMA_NETWORK_IPV6; 356 357 return RDMA_NETWORK_ROCE_V1; 358 } 359 360 struct find_gid_index_context { 361 u16 vlan_id; 362 enum ib_gid_type gid_type; 363 }; 364 365 static bool find_gid_index(const union ib_gid *gid, 366 const struct ib_gid_attr *gid_attr, 367 void *context) 368 { 369 struct find_gid_index_context *ctx = 370 (struct find_gid_index_context *)context; 371 372 if (ctx->gid_type != gid_attr->gid_type) 373 return false; 374 375 if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || 376 (is_vlan_dev(gid_attr->ndev) && 377 vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) 378 return false; 379 380 return true; 381 } 382 383 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, 384 u16 vlan_id, const union ib_gid *sgid, 385 enum ib_gid_type gid_type, 386 u16 *gid_index) 387 { 388 struct find_gid_index_context context = {.vlan_id = vlan_id, 389 .gid_type = gid_type}; 390 391 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, 392 &context, gid_index); 393 } 394 395 static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, 396 enum rdma_network_type net_type, 397 union ib_gid *sgid, union ib_gid *dgid) 398 { 399 struct sockaddr_in src_in; 400 struct sockaddr_in dst_in; 401 __be32 src_saddr, dst_saddr; 402 403 if (!sgid || !dgid) 404 return -EINVAL; 405 406 if (net_type == RDMA_NETWORK_IPV4) { 407 memcpy(&src_in.sin_addr.s_addr, 408 &hdr->roce4grh.saddr, 4); 409 memcpy(&dst_in.sin_addr.s_addr, 410 &hdr->roce4grh.daddr, 4); 411 src_saddr = src_in.sin_addr.s_addr; 412 dst_saddr = dst_in.sin_addr.s_addr; 413 ipv6_addr_set_v4mapped(src_saddr, 414 (struct in6_addr *)sgid); 415 ipv6_addr_set_v4mapped(dst_saddr, 416 (struct in6_addr *)dgid); 417 return 0; 418 } else if (net_type == RDMA_NETWORK_IPV6 || 419 net_type == RDMA_NETWORK_IB) { 420 *dgid = hdr->ibgrh.dgid; 421 *sgid = hdr->ibgrh.sgid; 422 return 0; 423 } else { 424 return -EINVAL; 425 } 426 } 427 428 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 429 const struct ib_wc *wc, const struct ib_grh *grh, 430 struct ib_ah_attr *ah_attr) 431 { 432 u32 flow_class; 433 u16 gid_index; 434 int ret; 435 enum rdma_network_type net_type = RDMA_NETWORK_IB; 436 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 437 int hoplimit = 0xff; 438 union ib_gid dgid; 439 union ib_gid sgid; 440 441 memset(ah_attr, 0, sizeof *ah_attr); 442 if (rdma_cap_eth_ah(device, port_num)) { 443 if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) 444 net_type = wc->network_hdr_type; 445 else 446 net_type = ib_get_net_type_by_grh(device, port_num, grh); 447 gid_type = ib_network_to_gid_type(net_type); 448 } 449 ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, 450 &sgid, &dgid); 451 if (ret) 452 return ret; 453 454 if (rdma_protocol_roce(device, port_num)) { 455 int if_index = 0; 456 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? 457 wc->vlan_id : 0xffff; 458 struct net_device *idev; 459 struct net_device *resolved_dev; 460 461 if (!(wc->wc_flags & IB_WC_GRH)) 462 return -EPROTOTYPE; 463 464 if (!device->get_netdev) 465 return -EOPNOTSUPP; 466 467 idev = device->get_netdev(device, port_num); 468 if (!idev) 469 return -ENODEV; 470 471 ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, 472 ah_attr->dmac, 473 wc->wc_flags & IB_WC_WITH_VLAN ? 474 NULL : &vlan_id, 475 &if_index, &hoplimit); 476 if (ret) { 477 dev_put(idev); 478 return ret; 479 } 480 481 resolved_dev = dev_get_by_index(&init_net, if_index); 482 if (resolved_dev->flags & IFF_LOOPBACK) { 483 dev_put(resolved_dev); 484 resolved_dev = idev; 485 dev_hold(resolved_dev); 486 } 487 rcu_read_lock(); 488 if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, 489 resolved_dev)) 490 ret = -EHOSTUNREACH; 491 rcu_read_unlock(); 492 dev_put(idev); 493 dev_put(resolved_dev); 494 if (ret) 495 return ret; 496 497 ret = get_sgid_index_from_eth(device, port_num, vlan_id, 498 &dgid, gid_type, &gid_index); 499 if (ret) 500 return ret; 501 } 502 503 ah_attr->dlid = wc->slid; 504 ah_attr->sl = wc->sl; 505 ah_attr->src_path_bits = wc->dlid_path_bits; 506 ah_attr->port_num = port_num; 507 508 if (wc->wc_flags & IB_WC_GRH) { 509 ah_attr->ah_flags = IB_AH_GRH; 510 ah_attr->grh.dgid = sgid; 511 512 if (!rdma_cap_eth_ah(device, port_num)) { 513 ret = ib_find_cached_gid_by_port(device, &dgid, 514 IB_GID_TYPE_IB, 515 port_num, NULL, 516 &gid_index); 517 if (ret) 518 return ret; 519 } 520 521 ah_attr->grh.sgid_index = (u8) gid_index; 522 flow_class = be32_to_cpu(grh->version_tclass_flow); 523 ah_attr->grh.flow_label = flow_class & 0xFFFFF; 524 ah_attr->grh.hop_limit = hoplimit; 525 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; 526 } 527 return 0; 528 } 529 EXPORT_SYMBOL(ib_init_ah_from_wc); 530 531 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, 532 const struct ib_grh *grh, u8 port_num) 533 { 534 struct ib_ah_attr ah_attr; 535 int ret; 536 537 ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); 538 if (ret) 539 return ERR_PTR(ret); 540 541 return ib_create_ah(pd, &ah_attr); 542 } 543 EXPORT_SYMBOL(ib_create_ah_from_wc); 544 545 int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 546 { 547 return ah->device->modify_ah ? 548 ah->device->modify_ah(ah, ah_attr) : 549 -ENOSYS; 550 } 551 EXPORT_SYMBOL(ib_modify_ah); 552 553 int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 554 { 555 return ah->device->query_ah ? 556 ah->device->query_ah(ah, ah_attr) : 557 -ENOSYS; 558 } 559 EXPORT_SYMBOL(ib_query_ah); 560 561 int ib_destroy_ah(struct ib_ah *ah) 562 { 563 struct ib_pd *pd; 564 int ret; 565 566 pd = ah->pd; 567 ret = ah->device->destroy_ah(ah); 568 if (!ret) 569 atomic_dec(&pd->usecnt); 570 571 return ret; 572 } 573 EXPORT_SYMBOL(ib_destroy_ah); 574 575 /* Shared receive queues */ 576 577 struct ib_srq *ib_create_srq(struct ib_pd *pd, 578 struct ib_srq_init_attr *srq_init_attr) 579 { 580 struct ib_srq *srq; 581 582 if (!pd->device->create_srq) 583 return ERR_PTR(-ENOSYS); 584 585 srq = pd->device->create_srq(pd, srq_init_attr, NULL); 586 587 if (!IS_ERR(srq)) { 588 srq->device = pd->device; 589 srq->pd = pd; 590 srq->uobject = NULL; 591 srq->event_handler = srq_init_attr->event_handler; 592 srq->srq_context = srq_init_attr->srq_context; 593 srq->srq_type = srq_init_attr->srq_type; 594 if (srq->srq_type == IB_SRQT_XRC) { 595 srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; 596 srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; 597 atomic_inc(&srq->ext.xrc.xrcd->usecnt); 598 atomic_inc(&srq->ext.xrc.cq->usecnt); 599 } 600 atomic_inc(&pd->usecnt); 601 atomic_set(&srq->usecnt, 0); 602 } 603 604 return srq; 605 } 606 EXPORT_SYMBOL(ib_create_srq); 607 608 int ib_modify_srq(struct ib_srq *srq, 609 struct ib_srq_attr *srq_attr, 610 enum ib_srq_attr_mask srq_attr_mask) 611 { 612 return srq->device->modify_srq ? 613 srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : 614 -ENOSYS; 615 } 616 EXPORT_SYMBOL(ib_modify_srq); 617 618 int ib_query_srq(struct ib_srq *srq, 619 struct ib_srq_attr *srq_attr) 620 { 621 return srq->device->query_srq ? 622 srq->device->query_srq(srq, srq_attr) : -ENOSYS; 623 } 624 EXPORT_SYMBOL(ib_query_srq); 625 626 int ib_destroy_srq(struct ib_srq *srq) 627 { 628 struct ib_pd *pd; 629 enum ib_srq_type srq_type; 630 struct ib_xrcd *uninitialized_var(xrcd); 631 struct ib_cq *uninitialized_var(cq); 632 int ret; 633 634 if (atomic_read(&srq->usecnt)) 635 return -EBUSY; 636 637 pd = srq->pd; 638 srq_type = srq->srq_type; 639 if (srq_type == IB_SRQT_XRC) { 640 xrcd = srq->ext.xrc.xrcd; 641 cq = srq->ext.xrc.cq; 642 } 643 644 ret = srq->device->destroy_srq(srq); 645 if (!ret) { 646 atomic_dec(&pd->usecnt); 647 if (srq_type == IB_SRQT_XRC) { 648 atomic_dec(&xrcd->usecnt); 649 atomic_dec(&cq->usecnt); 650 } 651 } 652 653 return ret; 654 } 655 EXPORT_SYMBOL(ib_destroy_srq); 656 657 /* Queue pairs */ 658 659 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) 660 { 661 struct ib_qp *qp = context; 662 unsigned long flags; 663 664 spin_lock_irqsave(&qp->device->event_handler_lock, flags); 665 list_for_each_entry(event->element.qp, &qp->open_list, open_list) 666 if (event->element.qp->event_handler) 667 event->element.qp->event_handler(event, event->element.qp->qp_context); 668 spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); 669 } 670 671 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) 672 { 673 mutex_lock(&xrcd->tgt_qp_mutex); 674 list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); 675 mutex_unlock(&xrcd->tgt_qp_mutex); 676 } 677 678 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, 679 void (*event_handler)(struct ib_event *, void *), 680 void *qp_context) 681 { 682 struct ib_qp *qp; 683 unsigned long flags; 684 685 qp = kzalloc(sizeof *qp, GFP_KERNEL); 686 if (!qp) 687 return ERR_PTR(-ENOMEM); 688 689 qp->real_qp = real_qp; 690 atomic_inc(&real_qp->usecnt); 691 qp->device = real_qp->device; 692 qp->event_handler = event_handler; 693 qp->qp_context = qp_context; 694 qp->qp_num = real_qp->qp_num; 695 qp->qp_type = real_qp->qp_type; 696 697 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); 698 list_add(&qp->open_list, &real_qp->open_list); 699 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 700 701 return qp; 702 } 703 704 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, 705 struct ib_qp_open_attr *qp_open_attr) 706 { 707 struct ib_qp *qp, *real_qp; 708 709 if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) 710 return ERR_PTR(-EINVAL); 711 712 qp = ERR_PTR(-EINVAL); 713 mutex_lock(&xrcd->tgt_qp_mutex); 714 list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { 715 if (real_qp->qp_num == qp_open_attr->qp_num) { 716 qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, 717 qp_open_attr->qp_context); 718 break; 719 } 720 } 721 mutex_unlock(&xrcd->tgt_qp_mutex); 722 return qp; 723 } 724 EXPORT_SYMBOL(ib_open_qp); 725 726 struct ib_qp *ib_create_qp(struct ib_pd *pd, 727 struct ib_qp_init_attr *qp_init_attr) 728 { 729 struct ib_qp *qp, *real_qp; 730 struct ib_device *device; 731 732 device = pd ? pd->device : qp_init_attr->xrcd->device; 733 qp = device->create_qp(pd, qp_init_attr, NULL); 734 735 if (!IS_ERR(qp)) { 736 qp->device = device; 737 qp->real_qp = qp; 738 qp->uobject = NULL; 739 qp->qp_type = qp_init_attr->qp_type; 740 741 atomic_set(&qp->usecnt, 0); 742 if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { 743 qp->event_handler = __ib_shared_qp_event_handler; 744 qp->qp_context = qp; 745 qp->pd = NULL; 746 qp->send_cq = qp->recv_cq = NULL; 747 qp->srq = NULL; 748 qp->xrcd = qp_init_attr->xrcd; 749 atomic_inc(&qp_init_attr->xrcd->usecnt); 750 INIT_LIST_HEAD(&qp->open_list); 751 752 real_qp = qp; 753 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, 754 qp_init_attr->qp_context); 755 if (!IS_ERR(qp)) 756 __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); 757 else 758 real_qp->device->destroy_qp(real_qp); 759 } else { 760 qp->event_handler = qp_init_attr->event_handler; 761 qp->qp_context = qp_init_attr->qp_context; 762 if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { 763 qp->recv_cq = NULL; 764 qp->srq = NULL; 765 } else { 766 qp->recv_cq = qp_init_attr->recv_cq; 767 atomic_inc(&qp_init_attr->recv_cq->usecnt); 768 qp->srq = qp_init_attr->srq; 769 if (qp->srq) 770 atomic_inc(&qp_init_attr->srq->usecnt); 771 } 772 773 qp->pd = pd; 774 qp->send_cq = qp_init_attr->send_cq; 775 qp->xrcd = NULL; 776 777 atomic_inc(&pd->usecnt); 778 atomic_inc(&qp_init_attr->send_cq->usecnt); 779 } 780 } 781 782 return qp; 783 } 784 EXPORT_SYMBOL(ib_create_qp); 785 786 static const struct { 787 int valid; 788 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 789 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 790 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 791 [IB_QPS_RESET] = { 792 [IB_QPS_RESET] = { .valid = 1 }, 793 [IB_QPS_INIT] = { 794 .valid = 1, 795 .req_param = { 796 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 797 IB_QP_PORT | 798 IB_QP_QKEY), 799 [IB_QPT_RAW_PACKET] = IB_QP_PORT, 800 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 801 IB_QP_PORT | 802 IB_QP_ACCESS_FLAGS), 803 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 804 IB_QP_PORT | 805 IB_QP_ACCESS_FLAGS), 806 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 807 IB_QP_PORT | 808 IB_QP_ACCESS_FLAGS), 809 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 810 IB_QP_PORT | 811 IB_QP_ACCESS_FLAGS), 812 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 813 IB_QP_QKEY), 814 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 815 IB_QP_QKEY), 816 } 817 }, 818 }, 819 [IB_QPS_INIT] = { 820 [IB_QPS_RESET] = { .valid = 1 }, 821 [IB_QPS_ERR] = { .valid = 1 }, 822 [IB_QPS_INIT] = { 823 .valid = 1, 824 .opt_param = { 825 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 826 IB_QP_PORT | 827 IB_QP_QKEY), 828 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 829 IB_QP_PORT | 830 IB_QP_ACCESS_FLAGS), 831 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 832 IB_QP_PORT | 833 IB_QP_ACCESS_FLAGS), 834 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 835 IB_QP_PORT | 836 IB_QP_ACCESS_FLAGS), 837 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 838 IB_QP_PORT | 839 IB_QP_ACCESS_FLAGS), 840 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 841 IB_QP_QKEY), 842 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 843 IB_QP_QKEY), 844 } 845 }, 846 [IB_QPS_RTR] = { 847 .valid = 1, 848 .req_param = { 849 [IB_QPT_UC] = (IB_QP_AV | 850 IB_QP_PATH_MTU | 851 IB_QP_DEST_QPN | 852 IB_QP_RQ_PSN), 853 [IB_QPT_RC] = (IB_QP_AV | 854 IB_QP_PATH_MTU | 855 IB_QP_DEST_QPN | 856 IB_QP_RQ_PSN | 857 IB_QP_MAX_DEST_RD_ATOMIC | 858 IB_QP_MIN_RNR_TIMER), 859 [IB_QPT_XRC_INI] = (IB_QP_AV | 860 IB_QP_PATH_MTU | 861 IB_QP_DEST_QPN | 862 IB_QP_RQ_PSN), 863 [IB_QPT_XRC_TGT] = (IB_QP_AV | 864 IB_QP_PATH_MTU | 865 IB_QP_DEST_QPN | 866 IB_QP_RQ_PSN | 867 IB_QP_MAX_DEST_RD_ATOMIC | 868 IB_QP_MIN_RNR_TIMER), 869 }, 870 .opt_param = { 871 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 872 IB_QP_QKEY), 873 [IB_QPT_UC] = (IB_QP_ALT_PATH | 874 IB_QP_ACCESS_FLAGS | 875 IB_QP_PKEY_INDEX), 876 [IB_QPT_RC] = (IB_QP_ALT_PATH | 877 IB_QP_ACCESS_FLAGS | 878 IB_QP_PKEY_INDEX), 879 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | 880 IB_QP_ACCESS_FLAGS | 881 IB_QP_PKEY_INDEX), 882 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | 883 IB_QP_ACCESS_FLAGS | 884 IB_QP_PKEY_INDEX), 885 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 886 IB_QP_QKEY), 887 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 888 IB_QP_QKEY), 889 }, 890 }, 891 }, 892 [IB_QPS_RTR] = { 893 [IB_QPS_RESET] = { .valid = 1 }, 894 [IB_QPS_ERR] = { .valid = 1 }, 895 [IB_QPS_RTS] = { 896 .valid = 1, 897 .req_param = { 898 [IB_QPT_UD] = IB_QP_SQ_PSN, 899 [IB_QPT_UC] = IB_QP_SQ_PSN, 900 [IB_QPT_RC] = (IB_QP_TIMEOUT | 901 IB_QP_RETRY_CNT | 902 IB_QP_RNR_RETRY | 903 IB_QP_SQ_PSN | 904 IB_QP_MAX_QP_RD_ATOMIC), 905 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | 906 IB_QP_RETRY_CNT | 907 IB_QP_RNR_RETRY | 908 IB_QP_SQ_PSN | 909 IB_QP_MAX_QP_RD_ATOMIC), 910 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | 911 IB_QP_SQ_PSN), 912 [IB_QPT_SMI] = IB_QP_SQ_PSN, 913 [IB_QPT_GSI] = IB_QP_SQ_PSN, 914 }, 915 .opt_param = { 916 [IB_QPT_UD] = (IB_QP_CUR_STATE | 917 IB_QP_QKEY), 918 [IB_QPT_UC] = (IB_QP_CUR_STATE | 919 IB_QP_ALT_PATH | 920 IB_QP_ACCESS_FLAGS | 921 IB_QP_PATH_MIG_STATE), 922 [IB_QPT_RC] = (IB_QP_CUR_STATE | 923 IB_QP_ALT_PATH | 924 IB_QP_ACCESS_FLAGS | 925 IB_QP_MIN_RNR_TIMER | 926 IB_QP_PATH_MIG_STATE), 927 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 928 IB_QP_ALT_PATH | 929 IB_QP_ACCESS_FLAGS | 930 IB_QP_PATH_MIG_STATE), 931 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 932 IB_QP_ALT_PATH | 933 IB_QP_ACCESS_FLAGS | 934 IB_QP_MIN_RNR_TIMER | 935 IB_QP_PATH_MIG_STATE), 936 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 937 IB_QP_QKEY), 938 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 939 IB_QP_QKEY), 940 } 941 } 942 }, 943 [IB_QPS_RTS] = { 944 [IB_QPS_RESET] = { .valid = 1 }, 945 [IB_QPS_ERR] = { .valid = 1 }, 946 [IB_QPS_RTS] = { 947 .valid = 1, 948 .opt_param = { 949 [IB_QPT_UD] = (IB_QP_CUR_STATE | 950 IB_QP_QKEY), 951 [IB_QPT_UC] = (IB_QP_CUR_STATE | 952 IB_QP_ACCESS_FLAGS | 953 IB_QP_ALT_PATH | 954 IB_QP_PATH_MIG_STATE), 955 [IB_QPT_RC] = (IB_QP_CUR_STATE | 956 IB_QP_ACCESS_FLAGS | 957 IB_QP_ALT_PATH | 958 IB_QP_PATH_MIG_STATE | 959 IB_QP_MIN_RNR_TIMER), 960 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 961 IB_QP_ACCESS_FLAGS | 962 IB_QP_ALT_PATH | 963 IB_QP_PATH_MIG_STATE), 964 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 965 IB_QP_ACCESS_FLAGS | 966 IB_QP_ALT_PATH | 967 IB_QP_PATH_MIG_STATE | 968 IB_QP_MIN_RNR_TIMER), 969 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 970 IB_QP_QKEY), 971 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 972 IB_QP_QKEY), 973 } 974 }, 975 [IB_QPS_SQD] = { 976 .valid = 1, 977 .opt_param = { 978 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, 979 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 980 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 981 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 982 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ 983 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 984 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY 985 } 986 }, 987 }, 988 [IB_QPS_SQD] = { 989 [IB_QPS_RESET] = { .valid = 1 }, 990 [IB_QPS_ERR] = { .valid = 1 }, 991 [IB_QPS_RTS] = { 992 .valid = 1, 993 .opt_param = { 994 [IB_QPT_UD] = (IB_QP_CUR_STATE | 995 IB_QP_QKEY), 996 [IB_QPT_UC] = (IB_QP_CUR_STATE | 997 IB_QP_ALT_PATH | 998 IB_QP_ACCESS_FLAGS | 999 IB_QP_PATH_MIG_STATE), 1000 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1001 IB_QP_ALT_PATH | 1002 IB_QP_ACCESS_FLAGS | 1003 IB_QP_MIN_RNR_TIMER | 1004 IB_QP_PATH_MIG_STATE), 1005 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1006 IB_QP_ALT_PATH | 1007 IB_QP_ACCESS_FLAGS | 1008 IB_QP_PATH_MIG_STATE), 1009 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1010 IB_QP_ALT_PATH | 1011 IB_QP_ACCESS_FLAGS | 1012 IB_QP_MIN_RNR_TIMER | 1013 IB_QP_PATH_MIG_STATE), 1014 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1015 IB_QP_QKEY), 1016 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1017 IB_QP_QKEY), 1018 } 1019 }, 1020 [IB_QPS_SQD] = { 1021 .valid = 1, 1022 .opt_param = { 1023 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1024 IB_QP_QKEY), 1025 [IB_QPT_UC] = (IB_QP_AV | 1026 IB_QP_ALT_PATH | 1027 IB_QP_ACCESS_FLAGS | 1028 IB_QP_PKEY_INDEX | 1029 IB_QP_PATH_MIG_STATE), 1030 [IB_QPT_RC] = (IB_QP_PORT | 1031 IB_QP_AV | 1032 IB_QP_TIMEOUT | 1033 IB_QP_RETRY_CNT | 1034 IB_QP_RNR_RETRY | 1035 IB_QP_MAX_QP_RD_ATOMIC | 1036 IB_QP_MAX_DEST_RD_ATOMIC | 1037 IB_QP_ALT_PATH | 1038 IB_QP_ACCESS_FLAGS | 1039 IB_QP_PKEY_INDEX | 1040 IB_QP_MIN_RNR_TIMER | 1041 IB_QP_PATH_MIG_STATE), 1042 [IB_QPT_XRC_INI] = (IB_QP_PORT | 1043 IB_QP_AV | 1044 IB_QP_TIMEOUT | 1045 IB_QP_RETRY_CNT | 1046 IB_QP_RNR_RETRY | 1047 IB_QP_MAX_QP_RD_ATOMIC | 1048 IB_QP_ALT_PATH | 1049 IB_QP_ACCESS_FLAGS | 1050 IB_QP_PKEY_INDEX | 1051 IB_QP_PATH_MIG_STATE), 1052 [IB_QPT_XRC_TGT] = (IB_QP_PORT | 1053 IB_QP_AV | 1054 IB_QP_TIMEOUT | 1055 IB_QP_MAX_DEST_RD_ATOMIC | 1056 IB_QP_ALT_PATH | 1057 IB_QP_ACCESS_FLAGS | 1058 IB_QP_PKEY_INDEX | 1059 IB_QP_MIN_RNR_TIMER | 1060 IB_QP_PATH_MIG_STATE), 1061 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1062 IB_QP_QKEY), 1063 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1064 IB_QP_QKEY), 1065 } 1066 } 1067 }, 1068 [IB_QPS_SQE] = { 1069 [IB_QPS_RESET] = { .valid = 1 }, 1070 [IB_QPS_ERR] = { .valid = 1 }, 1071 [IB_QPS_RTS] = { 1072 .valid = 1, 1073 .opt_param = { 1074 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1075 IB_QP_QKEY), 1076 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1077 IB_QP_ACCESS_FLAGS), 1078 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1079 IB_QP_QKEY), 1080 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1081 IB_QP_QKEY), 1082 } 1083 } 1084 }, 1085 [IB_QPS_ERR] = { 1086 [IB_QPS_RESET] = { .valid = 1 }, 1087 [IB_QPS_ERR] = { .valid = 1 } 1088 } 1089 }; 1090 1091 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1092 enum ib_qp_type type, enum ib_qp_attr_mask mask, 1093 enum rdma_link_layer ll) 1094 { 1095 enum ib_qp_attr_mask req_param, opt_param; 1096 1097 if (cur_state < 0 || cur_state > IB_QPS_ERR || 1098 next_state < 0 || next_state > IB_QPS_ERR) 1099 return 0; 1100 1101 if (mask & IB_QP_CUR_STATE && 1102 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 1103 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 1104 return 0; 1105 1106 if (!qp_state_table[cur_state][next_state].valid) 1107 return 0; 1108 1109 req_param = qp_state_table[cur_state][next_state].req_param[type]; 1110 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 1111 1112 if ((mask & req_param) != req_param) 1113 return 0; 1114 1115 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 1116 return 0; 1117 1118 return 1; 1119 } 1120 EXPORT_SYMBOL(ib_modify_qp_is_ok); 1121 1122 int ib_resolve_eth_dmac(struct ib_qp *qp, 1123 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 1124 { 1125 int ret = 0; 1126 1127 if (*qp_attr_mask & IB_QP_AV) { 1128 if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || 1129 qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) 1130 return -EINVAL; 1131 1132 if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) 1133 return 0; 1134 1135 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { 1136 rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, 1137 qp_attr->ah_attr.dmac); 1138 } else { 1139 union ib_gid sgid; 1140 struct ib_gid_attr sgid_attr; 1141 int ifindex; 1142 int hop_limit; 1143 1144 ret = ib_query_gid(qp->device, 1145 qp_attr->ah_attr.port_num, 1146 qp_attr->ah_attr.grh.sgid_index, 1147 &sgid, &sgid_attr); 1148 1149 if (ret || !sgid_attr.ndev) { 1150 if (!ret) 1151 ret = -ENXIO; 1152 goto out; 1153 } 1154 1155 ifindex = sgid_attr.ndev->ifindex; 1156 1157 ret = rdma_addr_find_l2_eth_by_grh(&sgid, 1158 &qp_attr->ah_attr.grh.dgid, 1159 qp_attr->ah_attr.dmac, 1160 NULL, &ifindex, &hop_limit); 1161 1162 dev_put(sgid_attr.ndev); 1163 1164 qp_attr->ah_attr.grh.hop_limit = hop_limit; 1165 } 1166 } 1167 out: 1168 return ret; 1169 } 1170 EXPORT_SYMBOL(ib_resolve_eth_dmac); 1171 1172 1173 int ib_modify_qp(struct ib_qp *qp, 1174 struct ib_qp_attr *qp_attr, 1175 int qp_attr_mask) 1176 { 1177 int ret; 1178 1179 ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); 1180 if (ret) 1181 return ret; 1182 1183 return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 1184 } 1185 EXPORT_SYMBOL(ib_modify_qp); 1186 1187 int ib_query_qp(struct ib_qp *qp, 1188 struct ib_qp_attr *qp_attr, 1189 int qp_attr_mask, 1190 struct ib_qp_init_attr *qp_init_attr) 1191 { 1192 return qp->device->query_qp ? 1193 qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : 1194 -ENOSYS; 1195 } 1196 EXPORT_SYMBOL(ib_query_qp); 1197 1198 int ib_close_qp(struct ib_qp *qp) 1199 { 1200 struct ib_qp *real_qp; 1201 unsigned long flags; 1202 1203 real_qp = qp->real_qp; 1204 if (real_qp == qp) 1205 return -EINVAL; 1206 1207 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); 1208 list_del(&qp->open_list); 1209 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 1210 1211 atomic_dec(&real_qp->usecnt); 1212 kfree(qp); 1213 1214 return 0; 1215 } 1216 EXPORT_SYMBOL(ib_close_qp); 1217 1218 static int __ib_destroy_shared_qp(struct ib_qp *qp) 1219 { 1220 struct ib_xrcd *xrcd; 1221 struct ib_qp *real_qp; 1222 int ret; 1223 1224 real_qp = qp->real_qp; 1225 xrcd = real_qp->xrcd; 1226 1227 mutex_lock(&xrcd->tgt_qp_mutex); 1228 ib_close_qp(qp); 1229 if (atomic_read(&real_qp->usecnt) == 0) 1230 list_del(&real_qp->xrcd_list); 1231 else 1232 real_qp = NULL; 1233 mutex_unlock(&xrcd->tgt_qp_mutex); 1234 1235 if (real_qp) { 1236 ret = ib_destroy_qp(real_qp); 1237 if (!ret) 1238 atomic_dec(&xrcd->usecnt); 1239 else 1240 __ib_insert_xrcd_qp(xrcd, real_qp); 1241 } 1242 1243 return 0; 1244 } 1245 1246 int ib_destroy_qp(struct ib_qp *qp) 1247 { 1248 struct ib_pd *pd; 1249 struct ib_cq *scq, *rcq; 1250 struct ib_srq *srq; 1251 int ret; 1252 1253 if (atomic_read(&qp->usecnt)) 1254 return -EBUSY; 1255 1256 if (qp->real_qp != qp) 1257 return __ib_destroy_shared_qp(qp); 1258 1259 pd = qp->pd; 1260 scq = qp->send_cq; 1261 rcq = qp->recv_cq; 1262 srq = qp->srq; 1263 1264 ret = qp->device->destroy_qp(qp); 1265 if (!ret) { 1266 if (pd) 1267 atomic_dec(&pd->usecnt); 1268 if (scq) 1269 atomic_dec(&scq->usecnt); 1270 if (rcq) 1271 atomic_dec(&rcq->usecnt); 1272 if (srq) 1273 atomic_dec(&srq->usecnt); 1274 } 1275 1276 return ret; 1277 } 1278 EXPORT_SYMBOL(ib_destroy_qp); 1279 1280 /* Completion queues */ 1281 1282 struct ib_cq *ib_create_cq(struct ib_device *device, 1283 ib_comp_handler comp_handler, 1284 void (*event_handler)(struct ib_event *, void *), 1285 void *cq_context, 1286 const struct ib_cq_init_attr *cq_attr) 1287 { 1288 struct ib_cq *cq; 1289 1290 cq = device->create_cq(device, cq_attr, NULL, NULL); 1291 1292 if (!IS_ERR(cq)) { 1293 cq->device = device; 1294 cq->uobject = NULL; 1295 cq->comp_handler = comp_handler; 1296 cq->event_handler = event_handler; 1297 cq->cq_context = cq_context; 1298 atomic_set(&cq->usecnt, 0); 1299 } 1300 1301 return cq; 1302 } 1303 EXPORT_SYMBOL(ib_create_cq); 1304 1305 int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 1306 { 1307 return cq->device->modify_cq ? 1308 cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; 1309 } 1310 EXPORT_SYMBOL(ib_modify_cq); 1311 1312 int ib_destroy_cq(struct ib_cq *cq) 1313 { 1314 if (atomic_read(&cq->usecnt)) 1315 return -EBUSY; 1316 1317 return cq->device->destroy_cq(cq); 1318 } 1319 EXPORT_SYMBOL(ib_destroy_cq); 1320 1321 int ib_resize_cq(struct ib_cq *cq, int cqe) 1322 { 1323 return cq->device->resize_cq ? 1324 cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; 1325 } 1326 EXPORT_SYMBOL(ib_resize_cq); 1327 1328 /* Memory regions */ 1329 1330 struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) 1331 { 1332 struct ib_mr *mr; 1333 int err; 1334 1335 err = ib_check_mr_access(mr_access_flags); 1336 if (err) 1337 return ERR_PTR(err); 1338 1339 mr = pd->device->get_dma_mr(pd, mr_access_flags); 1340 1341 if (!IS_ERR(mr)) { 1342 mr->device = pd->device; 1343 mr->pd = pd; 1344 mr->uobject = NULL; 1345 atomic_inc(&pd->usecnt); 1346 } 1347 1348 return mr; 1349 } 1350 EXPORT_SYMBOL(ib_get_dma_mr); 1351 1352 int ib_dereg_mr(struct ib_mr *mr) 1353 { 1354 struct ib_pd *pd = mr->pd; 1355 int ret; 1356 1357 ret = mr->device->dereg_mr(mr); 1358 if (!ret) 1359 atomic_dec(&pd->usecnt); 1360 1361 return ret; 1362 } 1363 EXPORT_SYMBOL(ib_dereg_mr); 1364 1365 /** 1366 * ib_alloc_mr() - Allocates a memory region 1367 * @pd: protection domain associated with the region 1368 * @mr_type: memory region type 1369 * @max_num_sg: maximum sg entries available for registration. 1370 * 1371 * Notes: 1372 * Memory registeration page/sg lists must not exceed max_num_sg. 1373 * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed 1374 * max_num_sg * used_page_size. 1375 * 1376 */ 1377 struct ib_mr *ib_alloc_mr(struct ib_pd *pd, 1378 enum ib_mr_type mr_type, 1379 u32 max_num_sg) 1380 { 1381 struct ib_mr *mr; 1382 1383 if (!pd->device->alloc_mr) 1384 return ERR_PTR(-ENOSYS); 1385 1386 mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); 1387 if (!IS_ERR(mr)) { 1388 mr->device = pd->device; 1389 mr->pd = pd; 1390 mr->uobject = NULL; 1391 atomic_inc(&pd->usecnt); 1392 } 1393 1394 return mr; 1395 } 1396 EXPORT_SYMBOL(ib_alloc_mr); 1397 1398 /* "Fast" memory regions */ 1399 1400 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, 1401 int mr_access_flags, 1402 struct ib_fmr_attr *fmr_attr) 1403 { 1404 struct ib_fmr *fmr; 1405 1406 if (!pd->device->alloc_fmr) 1407 return ERR_PTR(-ENOSYS); 1408 1409 fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); 1410 if (!IS_ERR(fmr)) { 1411 fmr->device = pd->device; 1412 fmr->pd = pd; 1413 atomic_inc(&pd->usecnt); 1414 } 1415 1416 return fmr; 1417 } 1418 EXPORT_SYMBOL(ib_alloc_fmr); 1419 1420 int ib_unmap_fmr(struct list_head *fmr_list) 1421 { 1422 struct ib_fmr *fmr; 1423 1424 if (list_empty(fmr_list)) 1425 return 0; 1426 1427 fmr = list_entry(fmr_list->next, struct ib_fmr, list); 1428 return fmr->device->unmap_fmr(fmr_list); 1429 } 1430 EXPORT_SYMBOL(ib_unmap_fmr); 1431 1432 int ib_dealloc_fmr(struct ib_fmr *fmr) 1433 { 1434 struct ib_pd *pd; 1435 int ret; 1436 1437 pd = fmr->pd; 1438 ret = fmr->device->dealloc_fmr(fmr); 1439 if (!ret) 1440 atomic_dec(&pd->usecnt); 1441 1442 return ret; 1443 } 1444 EXPORT_SYMBOL(ib_dealloc_fmr); 1445 1446 /* Multicast groups */ 1447 1448 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 1449 { 1450 int ret; 1451 1452 if (!qp->device->attach_mcast) 1453 return -ENOSYS; 1454 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) 1455 return -EINVAL; 1456 1457 ret = qp->device->attach_mcast(qp, gid, lid); 1458 if (!ret) 1459 atomic_inc(&qp->usecnt); 1460 return ret; 1461 } 1462 EXPORT_SYMBOL(ib_attach_mcast); 1463 1464 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 1465 { 1466 int ret; 1467 1468 if (!qp->device->detach_mcast) 1469 return -ENOSYS; 1470 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) 1471 return -EINVAL; 1472 1473 ret = qp->device->detach_mcast(qp, gid, lid); 1474 if (!ret) 1475 atomic_dec(&qp->usecnt); 1476 return ret; 1477 } 1478 EXPORT_SYMBOL(ib_detach_mcast); 1479 1480 struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) 1481 { 1482 struct ib_xrcd *xrcd; 1483 1484 if (!device->alloc_xrcd) 1485 return ERR_PTR(-ENOSYS); 1486 1487 xrcd = device->alloc_xrcd(device, NULL, NULL); 1488 if (!IS_ERR(xrcd)) { 1489 xrcd->device = device; 1490 xrcd->inode = NULL; 1491 atomic_set(&xrcd->usecnt, 0); 1492 mutex_init(&xrcd->tgt_qp_mutex); 1493 INIT_LIST_HEAD(&xrcd->tgt_qp_list); 1494 } 1495 1496 return xrcd; 1497 } 1498 EXPORT_SYMBOL(ib_alloc_xrcd); 1499 1500 int ib_dealloc_xrcd(struct ib_xrcd *xrcd) 1501 { 1502 struct ib_qp *qp; 1503 int ret; 1504 1505 if (atomic_read(&xrcd->usecnt)) 1506 return -EBUSY; 1507 1508 while (!list_empty(&xrcd->tgt_qp_list)) { 1509 qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); 1510 ret = ib_destroy_qp(qp); 1511 if (ret) 1512 return ret; 1513 } 1514 1515 return xrcd->device->dealloc_xrcd(xrcd); 1516 } 1517 EXPORT_SYMBOL(ib_dealloc_xrcd); 1518 1519 struct ib_flow *ib_create_flow(struct ib_qp *qp, 1520 struct ib_flow_attr *flow_attr, 1521 int domain) 1522 { 1523 struct ib_flow *flow_id; 1524 if (!qp->device->create_flow) 1525 return ERR_PTR(-ENOSYS); 1526 1527 flow_id = qp->device->create_flow(qp, flow_attr, domain); 1528 if (!IS_ERR(flow_id)) 1529 atomic_inc(&qp->usecnt); 1530 return flow_id; 1531 } 1532 EXPORT_SYMBOL(ib_create_flow); 1533 1534 int ib_destroy_flow(struct ib_flow *flow_id) 1535 { 1536 int err; 1537 struct ib_qp *qp = flow_id->qp; 1538 1539 err = qp->device->destroy_flow(flow_id); 1540 if (!err) 1541 atomic_dec(&qp->usecnt); 1542 return err; 1543 } 1544 EXPORT_SYMBOL(ib_destroy_flow); 1545 1546 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, 1547 struct ib_mr_status *mr_status) 1548 { 1549 return mr->device->check_mr_status ? 1550 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; 1551 } 1552 EXPORT_SYMBOL(ib_check_mr_status); 1553 1554 /** 1555 * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list 1556 * and set it the memory region. 1557 * @mr: memory region 1558 * @sg: dma mapped scatterlist 1559 * @sg_nents: number of entries in sg 1560 * @page_size: page vector desired page size 1561 * 1562 * Constraints: 1563 * - The first sg element is allowed to have an offset. 1564 * - Each sg element must be aligned to page_size (or physically 1565 * contiguous to the previous element). In case an sg element has a 1566 * non contiguous offset, the mapping prefix will not include it. 1567 * - The last sg element is allowed to have length less than page_size. 1568 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size 1569 * then only max_num_sg entries will be mapped. 1570 * 1571 * Returns the number of sg elements that were mapped to the memory region. 1572 * 1573 * After this completes successfully, the memory region 1574 * is ready for registration. 1575 */ 1576 int ib_map_mr_sg(struct ib_mr *mr, 1577 struct scatterlist *sg, 1578 int sg_nents, 1579 unsigned int page_size) 1580 { 1581 if (unlikely(!mr->device->map_mr_sg)) 1582 return -ENOSYS; 1583 1584 mr->page_size = page_size; 1585 1586 return mr->device->map_mr_sg(mr, sg, sg_nents); 1587 } 1588 EXPORT_SYMBOL(ib_map_mr_sg); 1589 1590 /** 1591 * ib_sg_to_pages() - Convert the largest prefix of a sg list 1592 * to a page vector 1593 * @mr: memory region 1594 * @sgl: dma mapped scatterlist 1595 * @sg_nents: number of entries in sg 1596 * @set_page: driver page assignment function pointer 1597 * 1598 * Core service helper for drivers to convert the largest 1599 * prefix of given sg list to a page vector. The sg list 1600 * prefix converted is the prefix that meet the requirements 1601 * of ib_map_mr_sg. 1602 * 1603 * Returns the number of sg elements that were assigned to 1604 * a page vector. 1605 */ 1606 int ib_sg_to_pages(struct ib_mr *mr, 1607 struct scatterlist *sgl, 1608 int sg_nents, 1609 int (*set_page)(struct ib_mr *, u64)) 1610 { 1611 struct scatterlist *sg; 1612 u64 last_end_dma_addr = 0; 1613 unsigned int last_page_off = 0; 1614 u64 page_mask = ~((u64)mr->page_size - 1); 1615 int i, ret; 1616 1617 mr->iova = sg_dma_address(&sgl[0]); 1618 mr->length = 0; 1619 1620 for_each_sg(sgl, sg, sg_nents, i) { 1621 u64 dma_addr = sg_dma_address(sg); 1622 unsigned int dma_len = sg_dma_len(sg); 1623 u64 end_dma_addr = dma_addr + dma_len; 1624 u64 page_addr = dma_addr & page_mask; 1625 1626 /* 1627 * For the second and later elements, check whether either the 1628 * end of element i-1 or the start of element i is not aligned 1629 * on a page boundary. 1630 */ 1631 if (i && (last_page_off != 0 || page_addr != dma_addr)) { 1632 /* Stop mapping if there is a gap. */ 1633 if (last_end_dma_addr != dma_addr) 1634 break; 1635 1636 /* 1637 * Coalesce this element with the last. If it is small 1638 * enough just update mr->length. Otherwise start 1639 * mapping from the next page. 1640 */ 1641 goto next_page; 1642 } 1643 1644 do { 1645 ret = set_page(mr, page_addr); 1646 if (unlikely(ret < 0)) 1647 return i ? : ret; 1648 next_page: 1649 page_addr += mr->page_size; 1650 } while (page_addr < end_dma_addr); 1651 1652 mr->length += dma_len; 1653 last_end_dma_addr = end_dma_addr; 1654 last_page_off = end_dma_addr & ~page_mask; 1655 } 1656 1657 return i; 1658 } 1659 EXPORT_SYMBOL(ib_sg_to_pages); 1660