1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <asm/page.h> 47 #include <linux/inet.h> 48 #include <linux/io.h> 49 #include <rdma/ib_addr.h> 50 #include <rdma/ib_smi.h> 51 #include <rdma/ib_user_verbs.h> 52 #include <rdma/vmw_pvrdma-abi.h> 53 54 #include "pvrdma.h" 55 56 /** 57 * pvrdma_query_device - query device 58 * @ibdev: the device to query 59 * @props: the device properties 60 * @uhw: user data 61 * 62 * @return: 0 on success, otherwise negative errno 63 */ 64 int pvrdma_query_device(struct ib_device *ibdev, 65 struct ib_device_attr *props, 66 struct ib_udata *uhw) 67 { 68 struct pvrdma_dev *dev = to_vdev(ibdev); 69 70 if (uhw->inlen || uhw->outlen) 71 return -EINVAL; 72 73 memset(props, 0, sizeof(*props)); 74 75 props->fw_ver = dev->dsr->caps.fw_ver; 76 props->sys_image_guid = dev->dsr->caps.sys_image_guid; 77 props->max_mr_size = dev->dsr->caps.max_mr_size; 78 props->page_size_cap = dev->dsr->caps.page_size_cap; 79 props->vendor_id = dev->dsr->caps.vendor_id; 80 props->vendor_part_id = dev->pdev->device; 81 props->hw_ver = dev->dsr->caps.hw_ver; 82 props->max_qp = dev->dsr->caps.max_qp; 83 props->max_qp_wr = dev->dsr->caps.max_qp_wr; 84 props->device_cap_flags = dev->dsr->caps.device_cap_flags; 85 props->max_sge = dev->dsr->caps.max_sge; 86 props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, 87 dev->dsr->caps.max_sge_rd); 88 props->max_cq = dev->dsr->caps.max_cq; 89 props->max_cqe = dev->dsr->caps.max_cqe; 90 props->max_mr = dev->dsr->caps.max_mr; 91 props->max_pd = dev->dsr->caps.max_pd; 92 props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom; 93 props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom; 94 props->atomic_cap = 95 dev->dsr->caps.atomic_ops & 96 (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ? 97 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 98 props->masked_atomic_cap = props->atomic_cap; 99 props->max_ah = dev->dsr->caps.max_ah; 100 props->max_pkeys = dev->dsr->caps.max_pkeys; 101 props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay; 102 if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) && 103 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) && 104 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) { 105 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 106 props->max_fast_reg_page_list_len = PVRDMA_GET_CAP(dev, 107 PVRDMA_MAX_FAST_REG_PAGES, 108 dev->dsr->caps.max_fast_reg_page_list_len); 109 } 110 111 props->device_cap_flags |= IB_DEVICE_PORT_ACTIVE_EVENT | 112 IB_DEVICE_RC_RNR_NAK_GEN; 113 114 return 0; 115 } 116 117 /** 118 * pvrdma_query_port - query device port attributes 119 * @ibdev: the device to query 120 * @port: the port number 121 * @props: the device properties 122 * 123 * @return: 0 on success, otherwise negative errno 124 */ 125 int pvrdma_query_port(struct ib_device *ibdev, u8 port, 126 struct ib_port_attr *props) 127 { 128 struct pvrdma_dev *dev = to_vdev(ibdev); 129 union pvrdma_cmd_req req; 130 union pvrdma_cmd_resp rsp; 131 struct pvrdma_cmd_query_port *cmd = &req.query_port; 132 struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp; 133 int err; 134 135 memset(cmd, 0, sizeof(*cmd)); 136 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT; 137 cmd->port_num = port; 138 139 err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP); 140 if (err < 0) { 141 dev_warn(&dev->pdev->dev, 142 "could not query port, error: %d\n", err); 143 return err; 144 } 145 146 /* props being zeroed by the caller, avoid zeroing it here */ 147 148 props->state = pvrdma_port_state_to_ib(resp->attrs.state); 149 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu); 150 props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu); 151 props->gid_tbl_len = resp->attrs.gid_tbl_len; 152 props->port_cap_flags = 153 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); 154 props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; 155 props->max_msg_sz = resp->attrs.max_msg_sz; 156 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; 157 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; 158 props->pkey_tbl_len = resp->attrs.pkey_tbl_len; 159 props->lid = resp->attrs.lid; 160 props->sm_lid = resp->attrs.sm_lid; 161 props->lmc = resp->attrs.lmc; 162 props->max_vl_num = resp->attrs.max_vl_num; 163 props->sm_sl = resp->attrs.sm_sl; 164 props->subnet_timeout = resp->attrs.subnet_timeout; 165 props->init_type_reply = resp->attrs.init_type_reply; 166 props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width); 167 props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed); 168 props->phys_state = resp->attrs.phys_state; 169 170 return 0; 171 } 172 173 /** 174 * pvrdma_query_gid - query device gid 175 * @ibdev: the device to query 176 * @port: the port number 177 * @index: the index 178 * @gid: the device gid value 179 * 180 * @return: 0 on success, otherwise negative errno 181 */ 182 int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index, 183 union ib_gid *gid) 184 { 185 struct pvrdma_dev *dev = to_vdev(ibdev); 186 187 if (index >= dev->dsr->caps.gid_tbl_len) 188 return -EINVAL; 189 190 memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid)); 191 192 return 0; 193 } 194 195 /** 196 * pvrdma_query_pkey - query device port's P_Key table 197 * @ibdev: the device to query 198 * @port: the port number 199 * @index: the index 200 * @pkey: the device P_Key value 201 * 202 * @return: 0 on success, otherwise negative errno 203 */ 204 int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 205 u16 *pkey) 206 { 207 int err = 0; 208 union pvrdma_cmd_req req; 209 union pvrdma_cmd_resp rsp; 210 struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey; 211 212 memset(cmd, 0, sizeof(*cmd)); 213 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY; 214 cmd->port_num = port; 215 cmd->index = index; 216 217 err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp, 218 PVRDMA_CMD_QUERY_PKEY_RESP); 219 if (err < 0) { 220 dev_warn(&to_vdev(ibdev)->pdev->dev, 221 "could not query pkey, error: %d\n", err); 222 return err; 223 } 224 225 *pkey = rsp.query_pkey_resp.pkey; 226 227 return 0; 228 } 229 230 enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, 231 u8 port) 232 { 233 return IB_LINK_LAYER_ETHERNET; 234 } 235 236 int pvrdma_modify_device(struct ib_device *ibdev, int mask, 237 struct ib_device_modify *props) 238 { 239 unsigned long flags; 240 241 if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | 242 IB_DEVICE_MODIFY_NODE_DESC)) { 243 dev_warn(&to_vdev(ibdev)->pdev->dev, 244 "unsupported device modify mask %#x\n", mask); 245 return -EOPNOTSUPP; 246 } 247 248 if (mask & IB_DEVICE_MODIFY_NODE_DESC) { 249 spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags); 250 memcpy(ibdev->node_desc, props->node_desc, 64); 251 spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags); 252 } 253 254 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { 255 mutex_lock(&to_vdev(ibdev)->port_mutex); 256 to_vdev(ibdev)->sys_image_guid = 257 cpu_to_be64(props->sys_image_guid); 258 mutex_unlock(&to_vdev(ibdev)->port_mutex); 259 } 260 261 return 0; 262 } 263 264 /** 265 * pvrdma_modify_port - modify device port attributes 266 * @ibdev: the device to modify 267 * @port: the port number 268 * @mask: attributes to modify 269 * @props: the device properties 270 * 271 * @return: 0 on success, otherwise negative errno 272 */ 273 int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, 274 struct ib_port_modify *props) 275 { 276 struct ib_port_attr attr; 277 struct pvrdma_dev *vdev = to_vdev(ibdev); 278 int ret; 279 280 if (mask & ~IB_PORT_SHUTDOWN) { 281 dev_warn(&vdev->pdev->dev, 282 "unsupported port modify mask %#x\n", mask); 283 return -EOPNOTSUPP; 284 } 285 286 mutex_lock(&vdev->port_mutex); 287 ret = ib_query_port(ibdev, port, &attr); 288 if (ret) 289 goto out; 290 291 vdev->port_cap_mask |= props->set_port_cap_mask; 292 vdev->port_cap_mask &= ~props->clr_port_cap_mask; 293 294 if (mask & IB_PORT_SHUTDOWN) 295 vdev->ib_active = false; 296 297 out: 298 mutex_unlock(&vdev->port_mutex); 299 return ret; 300 } 301 302 /** 303 * pvrdma_alloc_ucontext - allocate ucontext 304 * @ibdev: the IB device 305 * @udata: user data 306 * 307 * @return: the ib_ucontext pointer on success, otherwise errno. 308 */ 309 struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, 310 struct ib_udata *udata) 311 { 312 struct pvrdma_dev *vdev = to_vdev(ibdev); 313 struct pvrdma_ucontext *context; 314 union pvrdma_cmd_req req; 315 union pvrdma_cmd_resp rsp; 316 struct pvrdma_cmd_create_uc *cmd = &req.create_uc; 317 struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp; 318 struct pvrdma_alloc_ucontext_resp uresp = {0}; 319 int ret; 320 void *ptr; 321 322 if (!vdev->ib_active) 323 return ERR_PTR(-EAGAIN); 324 325 context = kmalloc(sizeof(*context), GFP_KERNEL); 326 if (!context) 327 return ERR_PTR(-ENOMEM); 328 329 context->dev = vdev; 330 ret = pvrdma_uar_alloc(vdev, &context->uar); 331 if (ret) { 332 kfree(context); 333 return ERR_PTR(-ENOMEM); 334 } 335 336 /* get ctx_handle from host */ 337 memset(cmd, 0, sizeof(*cmd)); 338 cmd->pfn = context->uar.pfn; 339 cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC; 340 ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP); 341 if (ret < 0) { 342 dev_warn(&vdev->pdev->dev, 343 "could not create ucontext, error: %d\n", ret); 344 ptr = ERR_PTR(ret); 345 goto err; 346 } 347 348 context->ctx_handle = resp->ctx_handle; 349 350 /* copy back to user */ 351 uresp.qp_tab_size = vdev->dsr->caps.max_qp; 352 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 353 if (ret) { 354 pvrdma_uar_free(vdev, &context->uar); 355 context->ibucontext.device = ibdev; 356 pvrdma_dealloc_ucontext(&context->ibucontext); 357 return ERR_PTR(-EFAULT); 358 } 359 360 return &context->ibucontext; 361 362 err: 363 pvrdma_uar_free(vdev, &context->uar); 364 kfree(context); 365 return ptr; 366 } 367 368 /** 369 * pvrdma_dealloc_ucontext - deallocate ucontext 370 * @ibcontext: the ucontext 371 * 372 * @return: 0 on success, otherwise errno. 373 */ 374 int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) 375 { 376 struct pvrdma_ucontext *context = to_vucontext(ibcontext); 377 union pvrdma_cmd_req req; 378 struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc; 379 int ret; 380 381 memset(cmd, 0, sizeof(*cmd)); 382 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC; 383 cmd->ctx_handle = context->ctx_handle; 384 385 ret = pvrdma_cmd_post(context->dev, &req, NULL, 0); 386 if (ret < 0) 387 dev_warn(&context->dev->pdev->dev, 388 "destroy ucontext failed, error: %d\n", ret); 389 390 /* Free the UAR even if the device command failed */ 391 pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar); 392 kfree(context); 393 394 return ret; 395 } 396 397 /** 398 * pvrdma_mmap - create mmap region 399 * @ibcontext: the user context 400 * @vma: the VMA 401 * 402 * @return: 0 on success, otherwise errno. 403 */ 404 int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 405 { 406 struct pvrdma_ucontext *context = to_vucontext(ibcontext); 407 unsigned long start = vma->vm_start; 408 unsigned long size = vma->vm_end - vma->vm_start; 409 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 410 411 dev_dbg(&context->dev->pdev->dev, "create mmap region\n"); 412 413 if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) { 414 dev_warn(&context->dev->pdev->dev, 415 "invalid params for mmap region\n"); 416 return -EINVAL; 417 } 418 419 /* Map UAR to kernel space, VM_LOCKED? */ 420 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 421 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 422 if (io_remap_pfn_range(vma, start, context->uar.pfn, size, 423 vma->vm_page_prot)) 424 return -EAGAIN; 425 426 return 0; 427 } 428 429 /** 430 * pvrdma_alloc_pd - allocate protection domain 431 * @ibdev: the IB device 432 * @context: user context 433 * @udata: user data 434 * 435 * @return: the ib_pd protection domain pointer on success, otherwise errno. 436 */ 437 struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, 438 struct ib_ucontext *context, 439 struct ib_udata *udata) 440 { 441 struct pvrdma_pd *pd; 442 struct pvrdma_dev *dev = to_vdev(ibdev); 443 union pvrdma_cmd_req req; 444 union pvrdma_cmd_resp rsp; 445 struct pvrdma_cmd_create_pd *cmd = &req.create_pd; 446 struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; 447 int ret; 448 void *ptr; 449 450 /* Check allowed max pds */ 451 if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) 452 return ERR_PTR(-ENOMEM); 453 454 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 455 if (!pd) { 456 ptr = ERR_PTR(-ENOMEM); 457 goto err; 458 } 459 460 memset(cmd, 0, sizeof(*cmd)); 461 cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; 462 cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; 463 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); 464 if (ret < 0) { 465 dev_warn(&dev->pdev->dev, 466 "failed to allocate protection domain, error: %d\n", 467 ret); 468 ptr = ERR_PTR(ret); 469 goto freepd; 470 } 471 472 pd->privileged = !context; 473 pd->pd_handle = resp->pd_handle; 474 pd->pdn = resp->pd_handle; 475 476 if (context) { 477 if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { 478 dev_warn(&dev->pdev->dev, 479 "failed to copy back protection domain\n"); 480 pvrdma_dealloc_pd(&pd->ibpd); 481 return ERR_PTR(-EFAULT); 482 } 483 } 484 485 /* u32 pd handle */ 486 return &pd->ibpd; 487 488 freepd: 489 kfree(pd); 490 err: 491 atomic_dec(&dev->num_pds); 492 return ptr; 493 } 494 495 /** 496 * pvrdma_dealloc_pd - deallocate protection domain 497 * @pd: the protection domain to be released 498 * 499 * @return: 0 on success, otherwise errno. 500 */ 501 int pvrdma_dealloc_pd(struct ib_pd *pd) 502 { 503 struct pvrdma_dev *dev = to_vdev(pd->device); 504 union pvrdma_cmd_req req; 505 struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd; 506 int ret; 507 508 memset(cmd, 0, sizeof(*cmd)); 509 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD; 510 cmd->pd_handle = to_vpd(pd)->pd_handle; 511 512 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 513 if (ret) 514 dev_warn(&dev->pdev->dev, 515 "could not dealloc protection domain, error: %d\n", 516 ret); 517 518 kfree(to_vpd(pd)); 519 atomic_dec(&dev->num_pds); 520 521 return 0; 522 } 523 524 /** 525 * pvrdma_create_ah - create an address handle 526 * @pd: the protection domain 527 * @ah_attr: the attributes of the AH 528 * @udata: user data blob 529 * 530 * @return: the ib_ah pointer on success, otherwise errno. 531 */ 532 struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, 533 struct ib_udata *udata) 534 { 535 struct pvrdma_dev *dev = to_vdev(pd->device); 536 struct pvrdma_ah *ah; 537 const struct ib_global_route *grh; 538 u8 port_num = rdma_ah_get_port_num(ah_attr); 539 540 if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) 541 return ERR_PTR(-EINVAL); 542 543 grh = rdma_ah_read_grh(ah_attr); 544 if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) || 545 rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) 546 return ERR_PTR(-EINVAL); 547 548 if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) 549 return ERR_PTR(-ENOMEM); 550 551 ah = kzalloc(sizeof(*ah), GFP_KERNEL); 552 if (!ah) { 553 atomic_dec(&dev->num_ahs); 554 return ERR_PTR(-ENOMEM); 555 } 556 557 ah->av.port_pd = to_vpd(pd)->pd_handle | (port_num << 24); 558 ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr); 559 ah->av.src_path_bits |= 0x80; 560 ah->av.gid_index = grh->sgid_index; 561 ah->av.hop_limit = grh->hop_limit; 562 ah->av.sl_tclass_flowlabel = (grh->traffic_class << 20) | 563 grh->flow_label; 564 memcpy(ah->av.dgid, grh->dgid.raw, 16); 565 memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN); 566 567 ah->ibah.device = pd->device; 568 ah->ibah.pd = pd; 569 ah->ibah.uobject = NULL; 570 571 return &ah->ibah; 572 } 573 574 /** 575 * pvrdma_destroy_ah - destroy an address handle 576 * @ah: the address handle to destroyed 577 * 578 * @return: 0 on success. 579 */ 580 int pvrdma_destroy_ah(struct ib_ah *ah) 581 { 582 struct pvrdma_dev *dev = to_vdev(ah->device); 583 584 kfree(to_vah(ah)); 585 atomic_dec(&dev->num_ahs); 586 587 return 0; 588 } 589