1 /* 2 * Copyright(c) 2015 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/bitops.h> 49 #include <linux/lockdep.h> 50 #include <linux/vmalloc.h> 51 #include <linux/slab.h> 52 #include <rdma/ib_verbs.h> 53 #include "qp.h" 54 #include "vt.h" 55 56 /* 57 * Note that it is OK to post send work requests in the SQE and ERR 58 * states; rvt_do_send() will process them and generate error 59 * completions as per IB 1.2 C10-96. 60 */ 61 const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { 62 [IB_QPS_RESET] = 0, 63 [IB_QPS_INIT] = RVT_POST_RECV_OK, 64 [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, 65 [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | 66 RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | 67 RVT_PROCESS_NEXT_SEND_OK, 68 [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | 69 RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, 70 [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | 71 RVT_POST_SEND_OK | RVT_FLUSH_SEND, 72 [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | 73 RVT_POST_SEND_OK | RVT_FLUSH_SEND, 74 }; 75 EXPORT_SYMBOL(ib_rvt_state_ops); 76 77 static void get_map_page(struct rvt_qpn_table *qpt, 78 struct rvt_qpn_map *map, 79 gfp_t gfp) 80 { 81 unsigned long page = get_zeroed_page(gfp); 82 83 /* 84 * Free the page if someone raced with us installing it. 85 */ 86 87 spin_lock(&qpt->lock); 88 if (map->page) 89 free_page(page); 90 else 91 map->page = (void *)page; 92 spin_unlock(&qpt->lock); 93 } 94 95 /** 96 * init_qpn_table - initialize the QP number table for a device 97 * @qpt: the QPN table 98 */ 99 static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) 100 { 101 u32 offset, i; 102 struct rvt_qpn_map *map; 103 int ret = 0; 104 105 if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start)) 106 return -EINVAL; 107 108 spin_lock_init(&qpt->lock); 109 110 qpt->last = rdi->dparms.qpn_start; 111 qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift; 112 113 /* 114 * Drivers may want some QPs beyond what we need for verbs let them use 115 * our qpn table. No need for two. Lets go ahead and mark the bitmaps 116 * for those. The reserved range must be *after* the range which verbs 117 * will pick from. 118 */ 119 120 /* Figure out number of bit maps needed before reserved range */ 121 qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE; 122 123 /* This should always be zero */ 124 offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK; 125 126 /* Starting with the first reserved bit map */ 127 map = &qpt->map[qpt->nmaps]; 128 129 rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", 130 rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); 131 for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { 132 if (!map->page) { 133 get_map_page(qpt, map, GFP_KERNEL); 134 if (!map->page) { 135 ret = -ENOMEM; 136 break; 137 } 138 } 139 set_bit(offset, map->page); 140 offset++; 141 if (offset == RVT_BITS_PER_PAGE) { 142 /* next page */ 143 qpt->nmaps++; 144 map++; 145 offset = 0; 146 } 147 } 148 return ret; 149 } 150 151 /** 152 * free_qpn_table - free the QP number table for a device 153 * @qpt: the QPN table 154 */ 155 static void free_qpn_table(struct rvt_qpn_table *qpt) 156 { 157 int i; 158 159 for (i = 0; i < ARRAY_SIZE(qpt->map); i++) 160 free_page((unsigned long)qpt->map[i].page); 161 } 162 163 int rvt_driver_qp_init(struct rvt_dev_info *rdi) 164 { 165 int i; 166 int ret = -ENOMEM; 167 168 if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { 169 rvt_pr_info(rdi, "Driver is doing QP init.\n"); 170 return 0; 171 } 172 173 if (!rdi->dparms.qp_table_size) 174 return -EINVAL; 175 176 /* 177 * If driver is not doing any QP allocation then make sure it is 178 * providing the necessary QP functions. 179 */ 180 if (!rdi->driver_f.free_all_qps || 181 !rdi->driver_f.qp_priv_alloc || 182 !rdi->driver_f.qp_priv_free || 183 !rdi->driver_f.notify_qp_reset) 184 return -EINVAL; 185 186 /* allocate parent object */ 187 rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); 188 if (!rdi->qp_dev) 189 return -ENOMEM; 190 191 /* allocate hash table */ 192 rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; 193 rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); 194 rdi->qp_dev->qp_table = 195 kmalloc(rdi->qp_dev->qp_table_size * 196 sizeof(*rdi->qp_dev->qp_table), 197 GFP_KERNEL); 198 if (!rdi->qp_dev->qp_table) 199 goto no_qp_table; 200 201 for (i = 0; i < rdi->qp_dev->qp_table_size; i++) 202 RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL); 203 204 spin_lock_init(&rdi->qp_dev->qpt_lock); 205 206 /* initialize qpn map */ 207 if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) 208 goto fail_table; 209 210 spin_lock_init(&rdi->n_qps_lock); 211 212 return 0; 213 214 fail_table: 215 kfree(rdi->qp_dev->qp_table); 216 free_qpn_table(&rdi->qp_dev->qpn_table); 217 218 no_qp_table: 219 kfree(rdi->qp_dev); 220 221 return ret; 222 } 223 224 /** 225 * free_all_qps - check for QPs still in use 226 * @qpt: the QP table to empty 227 * 228 * There should not be any QPs still in use. 229 * Free memory for table. 230 */ 231 static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) 232 { 233 unsigned long flags; 234 struct rvt_qp *qp; 235 unsigned n, qp_inuse = 0; 236 spinlock_t *ql; /* work around too long line below */ 237 238 if (rdi->driver_f.free_all_qps) 239 qp_inuse = rdi->driver_f.free_all_qps(rdi); 240 241 if (!rdi->qp_dev) 242 return qp_inuse; 243 244 ql = &rdi->qp_dev->qpt_lock; 245 spin_lock_irqsave(ql, flags); 246 for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { 247 qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], 248 lockdep_is_held(ql)); 249 RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); 250 251 for (; qp; qp = rcu_dereference_protected(qp->next, 252 lockdep_is_held(ql))) 253 qp_inuse++; 254 } 255 spin_unlock_irqrestore(ql, flags); 256 synchronize_rcu(); 257 return qp_inuse; 258 } 259 260 void rvt_qp_exit(struct rvt_dev_info *rdi) 261 { 262 u32 qps_inuse = rvt_free_all_qps(rdi); 263 264 if (qps_inuse) 265 rvt_pr_err(rdi, "QP memory leak! %u still in use\n", 266 qps_inuse); 267 if (!rdi->qp_dev) 268 return; 269 270 if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) 271 return; /* driver did the qp init so nothing else to do */ 272 273 kfree(rdi->qp_dev->qp_table); 274 free_qpn_table(&rdi->qp_dev->qpn_table); 275 kfree(rdi->qp_dev); 276 } 277 278 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, 279 struct rvt_qpn_map *map, unsigned off) 280 { 281 return (map - qpt->map) * RVT_BITS_PER_PAGE + off; 282 } 283 284 /* 285 * Allocate the next available QPN or 286 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. 287 */ 288 static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, 289 enum ib_qp_type type, u8 port, gfp_t gfp) 290 { 291 u32 i, offset, max_scan, qpn; 292 struct rvt_qpn_map *map; 293 u32 ret; 294 295 if (rdi->driver_f.alloc_qpn) 296 return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, 297 GFP_KERNEL); 298 299 if (type == IB_QPT_SMI || type == IB_QPT_GSI) { 300 unsigned n; 301 302 ret = type == IB_QPT_GSI; 303 n = 1 << (ret + 2 * (port - 1)); 304 spin_lock(&qpt->lock); 305 if (qpt->flags & n) 306 ret = -EINVAL; 307 else 308 qpt->flags |= n; 309 spin_unlock(&qpt->lock); 310 goto bail; 311 } 312 313 qpn = qpt->last + qpt->incr; 314 if (qpn >= RVT_QPN_MAX) 315 qpn = qpt->incr | ((qpt->last & 1) ^ 1); 316 /* offset carries bit 0 */ 317 offset = qpn & RVT_BITS_PER_PAGE_MASK; 318 map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; 319 max_scan = qpt->nmaps - !offset; 320 for (i = 0;;) { 321 if (unlikely(!map->page)) { 322 get_map_page(qpt, map, gfp); 323 if (unlikely(!map->page)) 324 break; 325 } 326 do { 327 if (!test_and_set_bit(offset, map->page)) { 328 qpt->last = qpn; 329 ret = qpn; 330 goto bail; 331 } 332 offset += qpt->incr; 333 /* 334 * This qpn might be bogus if offset >= BITS_PER_PAGE. 335 * That is OK. It gets re-assigned below 336 */ 337 qpn = mk_qpn(qpt, map, offset); 338 } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); 339 /* 340 * In order to keep the number of pages allocated to a 341 * minimum, we scan the all existing pages before increasing 342 * the size of the bitmap table. 343 */ 344 if (++i > max_scan) { 345 if (qpt->nmaps == RVT_QPNMAP_ENTRIES) 346 break; 347 map = &qpt->map[qpt->nmaps++]; 348 /* start at incr with current bit 0 */ 349 offset = qpt->incr | (offset & 1); 350 } else if (map < &qpt->map[qpt->nmaps]) { 351 ++map; 352 /* start at incr with current bit 0 */ 353 offset = qpt->incr | (offset & 1); 354 } else { 355 map = &qpt->map[0]; 356 /* wrap to first map page, invert bit 0 */ 357 offset = qpt->incr | ((offset & 1) ^ 1); 358 } 359 /* there can be no bits at shift and below */ 360 WARN_ON(offset & (rdi->dparms.qos_shift - 1)); 361 qpn = mk_qpn(qpt, map, offset); 362 } 363 364 ret = -ENOMEM; 365 366 bail: 367 return ret; 368 } 369 370 static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) 371 { 372 struct rvt_qpn_map *map; 373 374 map = qpt->map + qpn / RVT_BITS_PER_PAGE; 375 if (map->page) 376 clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); 377 } 378 379 /** 380 * reset_qp - initialize the QP state to the reset state 381 * @qp: the QP to reset 382 * @type: the QP type 383 */ 384 void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, 385 enum ib_qp_type type) 386 { 387 qp->remote_qpn = 0; 388 qp->qkey = 0; 389 qp->qp_access_flags = 0; 390 391 /* 392 * Let driver do anything it needs to for a new/reset qp 393 */ 394 rdi->driver_f.notify_qp_reset(qp); 395 396 qp->s_flags &= RVT_S_SIGNAL_REQ_WR; 397 qp->s_hdrwords = 0; 398 qp->s_wqe = NULL; 399 qp->s_draining = 0; 400 qp->s_next_psn = 0; 401 qp->s_last_psn = 0; 402 qp->s_sending_psn = 0; 403 qp->s_sending_hpsn = 0; 404 qp->s_psn = 0; 405 qp->r_psn = 0; 406 qp->r_msn = 0; 407 if (type == IB_QPT_RC) { 408 qp->s_state = IB_OPCODE_RC_SEND_LAST; 409 qp->r_state = IB_OPCODE_RC_SEND_LAST; 410 } else { 411 qp->s_state = IB_OPCODE_UC_SEND_LAST; 412 qp->r_state = IB_OPCODE_UC_SEND_LAST; 413 } 414 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 415 qp->r_nak_state = 0; 416 qp->r_aflags = 0; 417 qp->r_flags = 0; 418 qp->s_head = 0; 419 qp->s_tail = 0; 420 qp->s_cur = 0; 421 qp->s_acked = 0; 422 qp->s_last = 0; 423 qp->s_ssn = 1; 424 qp->s_lsn = 0; 425 qp->s_mig_state = IB_MIG_MIGRATED; 426 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 427 qp->r_head_ack_queue = 0; 428 qp->s_tail_ack_queue = 0; 429 qp->s_num_rd_atomic = 0; 430 if (qp->r_rq.wq) { 431 qp->r_rq.wq->head = 0; 432 qp->r_rq.wq->tail = 0; 433 } 434 qp->r_sge.num_sge = 0; 435 } 436 EXPORT_SYMBOL(rvt_reset_qp); 437 438 /** 439 * rvt_create_qp - create a queue pair for a device 440 * @ibpd: the protection domain who's device we create the queue pair for 441 * @init_attr: the attributes of the queue pair 442 * @udata: user data for libibverbs.so 443 * 444 * Queue pair creation is mostly an rvt issue. However, drivers have their own 445 * unique idea of what queue pair numbers mean. For instance there is a reserved 446 * range for PSM. 447 * 448 * Returns the queue pair on success, otherwise returns an errno. 449 * 450 * Called by the ib_create_qp() core verbs function. 451 */ 452 struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, 453 struct ib_qp_init_attr *init_attr, 454 struct ib_udata *udata) 455 { 456 struct rvt_qp *qp; 457 int err; 458 struct rvt_swqe *swq = NULL; 459 size_t sz; 460 size_t sg_list_sz; 461 struct ib_qp *ret = ERR_PTR(-ENOMEM); 462 struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); 463 void *priv = NULL; 464 gfp_t gfp; 465 466 if (!rdi) 467 return ERR_PTR(-EINVAL); 468 469 if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || 470 init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || 471 init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) 472 return ERR_PTR(-EINVAL); 473 474 /* GFP_NOIO is applicable to RC QP's only */ 475 476 if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && 477 init_attr->qp_type != IB_QPT_RC) 478 return ERR_PTR(-EINVAL); 479 480 gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? 481 GFP_NOIO : GFP_KERNEL; 482 483 /* Check receive queue parameters if no SRQ is specified. */ 484 if (!init_attr->srq) { 485 if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || 486 init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) 487 return ERR_PTR(-EINVAL); 488 489 if (init_attr->cap.max_send_sge + 490 init_attr->cap.max_send_wr + 491 init_attr->cap.max_recv_sge + 492 init_attr->cap.max_recv_wr == 0) 493 return ERR_PTR(-EINVAL); 494 } 495 496 switch (init_attr->qp_type) { 497 case IB_QPT_SMI: 498 case IB_QPT_GSI: 499 if (init_attr->port_num == 0 || 500 init_attr->port_num > ibpd->device->phys_port_cnt) 501 return ERR_PTR(-EINVAL); 502 case IB_QPT_UC: 503 case IB_QPT_RC: 504 case IB_QPT_UD: 505 sz = sizeof(struct rvt_sge) * 506 init_attr->cap.max_send_sge + 507 sizeof(struct rvt_swqe); 508 if (gfp == GFP_NOIO) 509 swq = __vmalloc( 510 (init_attr->cap.max_send_wr + 1) * sz, 511 gfp, PAGE_KERNEL); 512 else 513 swq = vmalloc( 514 (init_attr->cap.max_send_wr + 1) * sz); 515 if (!swq) 516 return ERR_PTR(-ENOMEM); 517 518 sz = sizeof(*qp); 519 sg_list_sz = 0; 520 if (init_attr->srq) { 521 struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); 522 523 if (srq->rq.max_sge > 1) 524 sg_list_sz = sizeof(*qp->r_sg_list) * 525 (srq->rq.max_sge - 1); 526 } else if (init_attr->cap.max_recv_sge > 1) 527 sg_list_sz = sizeof(*qp->r_sg_list) * 528 (init_attr->cap.max_recv_sge - 1); 529 qp = kzalloc(sz + sg_list_sz, gfp); 530 if (!qp) 531 goto bail_swq; 532 533 RCU_INIT_POINTER(qp->next, NULL); 534 535 /* 536 * Driver needs to set up it's private QP structure and do any 537 * initialization that is needed. 538 */ 539 priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); 540 if (!priv) 541 goto bail_qp; 542 qp->priv = priv; 543 qp->timeout_jiffies = 544 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 545 1000UL); 546 if (init_attr->srq) { 547 sz = 0; 548 } else { 549 qp->r_rq.size = init_attr->cap.max_recv_wr + 1; 550 qp->r_rq.max_sge = init_attr->cap.max_recv_sge; 551 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + 552 sizeof(struct rvt_rwqe); 553 if (udata) 554 qp->r_rq.wq = vmalloc_user( 555 sizeof(struct rvt_rwq) + 556 qp->r_rq.size * sz); 557 else if (gfp == GFP_NOIO) 558 qp->r_rq.wq = __vmalloc( 559 sizeof(struct rvt_rwq) + 560 qp->r_rq.size * sz, 561 gfp, PAGE_KERNEL); 562 else 563 qp->r_rq.wq = vmalloc( 564 sizeof(struct rvt_rwq) + 565 qp->r_rq.size * sz); 566 if (!qp->r_rq.wq) 567 goto bail_driver_priv; 568 } 569 570 /* 571 * ib_create_qp() will initialize qp->ibqp 572 * except for qp->ibqp.qp_num. 573 */ 574 spin_lock_init(&qp->r_lock); 575 spin_lock_init(&qp->s_lock); 576 spin_lock_init(&qp->r_rq.lock); 577 atomic_set(&qp->refcount, 0); 578 init_waitqueue_head(&qp->wait); 579 init_timer(&qp->s_timer); 580 qp->s_timer.data = (unsigned long)qp; 581 INIT_LIST_HEAD(&qp->rspwait); 582 qp->state = IB_QPS_RESET; 583 qp->s_wq = swq; 584 qp->s_size = init_attr->cap.max_send_wr + 1; 585 qp->s_max_sge = init_attr->cap.max_send_sge; 586 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 587 qp->s_flags = RVT_S_SIGNAL_REQ_WR; 588 589 err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, 590 init_attr->qp_type, 591 init_attr->port_num, gfp); 592 if (err < 0) { 593 ret = ERR_PTR(err); 594 goto bail_rq_wq; 595 } 596 qp->ibqp.qp_num = err; 597 qp->port_num = init_attr->port_num; 598 rvt_reset_qp(rdi, qp, init_attr->qp_type); 599 break; 600 601 default: 602 /* Don't support raw QPs */ 603 return ERR_PTR(-EINVAL); 604 } 605 606 init_attr->cap.max_inline_data = 0; 607 608 /* 609 * Return the address of the RWQ as the offset to mmap. 610 * See rvt_mmap() for details. 611 */ 612 if (udata && udata->outlen >= sizeof(__u64)) { 613 if (!qp->r_rq.wq) { 614 __u64 offset = 0; 615 616 err = ib_copy_to_udata(udata, &offset, 617 sizeof(offset)); 618 if (err) { 619 ret = ERR_PTR(err); 620 goto bail_qpn; 621 } 622 } else { 623 u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; 624 625 qp->ip = rvt_create_mmap_info(rdi, s, 626 ibpd->uobject->context, 627 qp->r_rq.wq); 628 if (!qp->ip) { 629 ret = ERR_PTR(-ENOMEM); 630 goto bail_qpn; 631 } 632 633 err = ib_copy_to_udata(udata, &qp->ip->offset, 634 sizeof(qp->ip->offset)); 635 if (err) { 636 ret = ERR_PTR(err); 637 goto bail_ip; 638 } 639 } 640 } 641 642 spin_lock(&rdi->n_qps_lock); 643 if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { 644 spin_unlock(&rdi->n_qps_lock); 645 ret = ERR_PTR(-ENOMEM); 646 goto bail_ip; 647 } 648 649 rdi->n_qps_allocated++; 650 spin_unlock(&rdi->n_qps_lock); 651 652 if (qp->ip) { 653 spin_lock_irq(&rdi->pending_lock); 654 list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); 655 spin_unlock_irq(&rdi->pending_lock); 656 } 657 658 ret = &qp->ibqp; 659 660 /* 661 * We have our QP and its good, now keep track of what types of opcodes 662 * can be processed on this QP. We do this by keeping track of what the 663 * 3 high order bits of the opcode are. 664 */ 665 switch (init_attr->qp_type) { 666 case IB_QPT_SMI: 667 case IB_QPT_GSI: 668 case IB_QPT_UD: 669 qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; 670 break; 671 case IB_QPT_RC: 672 qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; 673 break; 674 case IB_QPT_UC: 675 qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; 676 break; 677 default: 678 ret = ERR_PTR(-EINVAL); 679 goto bail_ip; 680 } 681 682 return ret; 683 684 bail_ip: 685 kref_put(&qp->ip->ref, rvt_release_mmap_info); 686 687 bail_qpn: 688 free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); 689 690 bail_rq_wq: 691 vfree(qp->r_rq.wq); 692 693 bail_driver_priv: 694 rdi->driver_f.qp_priv_free(rdi, qp); 695 696 bail_qp: 697 kfree(qp); 698 699 bail_swq: 700 vfree(swq); 701 702 return ret; 703 } 704 705 /** 706 * qib_modify_qp - modify the attributes of a queue pair 707 * @ibqp: the queue pair who's attributes we're modifying 708 * @attr: the new attributes 709 * @attr_mask: the mask of attributes to modify 710 * @udata: user data for libibverbs.so 711 * 712 * Returns 0 on success, otherwise returns an errno. 713 */ 714 int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 715 int attr_mask, struct ib_udata *udata) 716 { 717 /* 718 * VT-DRIVER-API: qp_mtu() 719 * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC 720 * to VL and the mapping table of MTUs per VL. This is not something 721 * that IB has and should not live in the rvt. 722 */ 723 return -EOPNOTSUPP; 724 } 725 726 /** 727 * rvt_destroy_qp - destroy a queue pair 728 * @ibqp: the queue pair to destroy 729 * 730 * Returns 0 on success. 731 * 732 * Note that this can be called while the QP is actively sending or 733 * receiving! 734 */ 735 int rvt_destroy_qp(struct ib_qp *ibqp) 736 { 737 /* 738 * VT-DRIVER-API: qp_flush() 739 * Driver provies a mechanism to flush and wait for that flush to 740 * finish. 741 */ 742 743 return -EOPNOTSUPP; 744 } 745 746 int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 747 int attr_mask, struct ib_qp_init_attr *init_attr) 748 { 749 return -EOPNOTSUPP; 750 } 751 752 /** 753 * rvt_post_receive - post a receive on a QP 754 * @ibqp: the QP to post the receive on 755 * @wr: the WR to post 756 * @bad_wr: the first bad WR is put here 757 * 758 * This may be called from interrupt context. 759 */ 760 int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 761 struct ib_recv_wr **bad_wr) 762 { 763 /* 764 * When a packet arrives the driver needs to call up to rvt to process 765 * the packet. The UD, RC, UC processing will be done in rvt, however 766 * the driver should be able to override this if it so choses. Perhaps a 767 * set of function pointers set up at registration time. 768 */ 769 770 return -EOPNOTSUPP; 771 } 772 773 /** 774 * rvt_post_one_wr - post one RC, UC, or UD send work request 775 * @qp: the QP to post on 776 * @wr: the work request to send 777 */ 778 static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) 779 { 780 struct rvt_swqe *wqe; 781 u32 next; 782 int i; 783 int j; 784 int acc; 785 struct rvt_lkey_table *rkt; 786 struct rvt_pd *pd; 787 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); 788 789 /* IB spec says that num_sge == 0 is OK. */ 790 if (unlikely(wr->num_sge > qp->s_max_sge)) 791 return -EINVAL; 792 793 /* 794 * Don't allow RDMA reads or atomic operations on UC or 795 * undefined operations. 796 * Make sure buffer is large enough to hold the result for atomics. 797 */ 798 if (qp->ibqp.qp_type == IB_QPT_UC) { 799 if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) 800 return -EINVAL; 801 } else if (qp->ibqp.qp_type != IB_QPT_RC) { 802 /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ 803 if (wr->opcode != IB_WR_SEND && 804 wr->opcode != IB_WR_SEND_WITH_IMM) 805 return -EINVAL; 806 /* Check UD destination address PD */ 807 if (qp->ibqp.pd != ud_wr(wr)->ah->pd) 808 return -EINVAL; 809 } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { 810 return -EINVAL; 811 } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && 812 (wr->num_sge == 0 || 813 wr->sg_list[0].length < sizeof(u64) || 814 wr->sg_list[0].addr & (sizeof(u64) - 1))) { 815 return -EINVAL; 816 } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { 817 return -EINVAL; 818 } 819 820 next = qp->s_head + 1; 821 if (next >= qp->s_size) 822 next = 0; 823 if (next == qp->s_last) 824 return -ENOMEM; 825 826 rkt = &rdi->lkey_table; 827 pd = ibpd_to_rvtpd(qp->ibqp.pd); 828 wqe = rvt_get_swqe_ptr(qp, qp->s_head); 829 830 if (qp->ibqp.qp_type != IB_QPT_UC && 831 qp->ibqp.qp_type != IB_QPT_RC) 832 memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); 833 else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || 834 wr->opcode == IB_WR_RDMA_WRITE || 835 wr->opcode == IB_WR_RDMA_READ) 836 memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); 837 else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || 838 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 839 memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); 840 else 841 memcpy(&wqe->wr, wr, sizeof(wqe->wr)); 842 843 wqe->length = 0; 844 j = 0; 845 if (wr->num_sge) { 846 acc = wr->opcode >= IB_WR_RDMA_READ ? 847 IB_ACCESS_LOCAL_WRITE : 0; 848 for (i = 0; i < wr->num_sge; i++) { 849 u32 length = wr->sg_list[i].length; 850 int ok; 851 852 if (length == 0) 853 continue; 854 ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], 855 &wr->sg_list[i], acc); 856 if (!ok) 857 goto bail_inval_free; 858 wqe->length += length; 859 j++; 860 } 861 wqe->wr.num_sge = j; 862 } 863 if (qp->ibqp.qp_type == IB_QPT_UC || 864 qp->ibqp.qp_type == IB_QPT_RC) { 865 if (wqe->length > 0x80000000U) 866 goto bail_inval_free; 867 } else { 868 atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); 869 } 870 wqe->ssn = qp->s_ssn++; 871 qp->s_head = next; 872 873 return 0; 874 875 bail_inval_free: 876 /* release mr holds */ 877 while (j) { 878 struct rvt_sge *sge = &wqe->sg_list[--j]; 879 880 rvt_put_mr(sge->mr); 881 } 882 return -EINVAL; 883 } 884 885 /** 886 * rvt_post_send - post a send on a QP 887 * @ibqp: the QP to post the send on 888 * @wr: the list of work requests to post 889 * @bad_wr: the first bad WR is put here 890 * 891 * This may be called from interrupt context. 892 */ 893 int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 894 struct ib_send_wr **bad_wr) 895 { 896 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); 897 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); 898 unsigned long flags = 0; 899 int call_send; 900 unsigned nreq = 0; 901 int err = 0; 902 903 spin_lock_irqsave(&qp->s_lock, flags); 904 905 /* 906 * Ensure QP state is such that we can send. If not bail out early, 907 * there is no need to do this every time we post a send. 908 */ 909 if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { 910 spin_unlock_irqrestore(&qp->s_lock, flags); 911 return -EINVAL; 912 } 913 914 /* 915 * If the send queue is empty, and we only have a single WR then just go 916 * ahead and kick the send engine into gear. Otherwise we will always 917 * just schedule the send to happen later. 918 */ 919 call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; 920 921 for (; wr; wr = wr->next) { 922 err = rvt_post_one_wr(qp, wr); 923 if (unlikely(err)) { 924 *bad_wr = wr; 925 goto bail; 926 } 927 nreq++; 928 } 929 bail: 930 if (nreq && !call_send) 931 rdi->driver_f.schedule_send(qp); 932 spin_unlock_irqrestore(&qp->s_lock, flags); 933 if (nreq && call_send) 934 rdi->driver_f.do_send(qp); 935 return err; 936 } 937 938 /** 939 * rvt_post_srq_receive - post a receive on a shared receive queue 940 * @ibsrq: the SRQ to post the receive on 941 * @wr: the list of work requests to post 942 * @bad_wr: A pointer to the first WR to cause a problem is put here 943 * 944 * This may be called from interrupt context. 945 */ 946 int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 947 struct ib_recv_wr **bad_wr) 948 { 949 return -EOPNOTSUPP; 950 } 951