1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <rdma/ib_cache.h> 34 #include <rdma/ib_pack.h> 35 36 #include <linux/mlx4/qp.h> 37 38 #include "mlx4_ib.h" 39 #include "user.h" 40 41 enum { 42 MLX4_IB_ACK_REQ_FREQ = 8, 43 }; 44 45 enum { 46 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, 47 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f 48 }; 49 50 enum { 51 /* 52 * Largest possible UD header: send with GRH and immediate data. 53 */ 54 MLX4_IB_UD_HEADER_SIZE = 72 55 }; 56 57 struct mlx4_ib_sqp { 58 struct mlx4_ib_qp qp; 59 int pkey_index; 60 u32 qkey; 61 u32 send_psn; 62 struct ib_ud_header ud_header; 63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 64 }; 65 66 static const __be32 mlx4_ib_opcode[] = { 67 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), 68 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), 69 [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), 70 [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), 71 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), 72 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), 73 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 74 }; 75 76 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) 77 { 78 return container_of(mqp, struct mlx4_ib_sqp, qp); 79 } 80 81 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 82 { 83 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 84 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; 85 } 86 87 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 88 { 89 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 90 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; 91 } 92 93 static void *get_wqe(struct mlx4_ib_qp *qp, int offset) 94 { 95 if (qp->buf.nbufs == 1) 96 return qp->buf.u.direct.buf + offset; 97 else 98 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + 99 (offset & (PAGE_SIZE - 1)); 100 } 101 102 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) 103 { 104 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); 105 } 106 107 static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) 108 { 109 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); 110 } 111 112 /* 113 * Stamp a SQ WQE so that it is invalid if prefetched by marking the 114 * first four bytes of every 64 byte chunk with 0xffffffff, except for 115 * the very first chunk of the WQE. 116 */ 117 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) 118 { 119 u32 *wqe = get_send_wqe(qp, n); 120 int i; 121 122 for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) 123 wqe[i] = 0xffffffff; 124 } 125 126 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) 127 { 128 struct ib_event event; 129 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; 130 131 if (type == MLX4_EVENT_TYPE_PATH_MIG) 132 to_mibqp(qp)->port = to_mibqp(qp)->alt_port; 133 134 if (ibqp->event_handler) { 135 event.device = ibqp->device; 136 event.element.qp = ibqp; 137 switch (type) { 138 case MLX4_EVENT_TYPE_PATH_MIG: 139 event.event = IB_EVENT_PATH_MIG; 140 break; 141 case MLX4_EVENT_TYPE_COMM_EST: 142 event.event = IB_EVENT_COMM_EST; 143 break; 144 case MLX4_EVENT_TYPE_SQ_DRAINED: 145 event.event = IB_EVENT_SQ_DRAINED; 146 break; 147 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: 148 event.event = IB_EVENT_QP_LAST_WQE_REACHED; 149 break; 150 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: 151 event.event = IB_EVENT_QP_FATAL; 152 break; 153 case MLX4_EVENT_TYPE_PATH_MIG_FAILED: 154 event.event = IB_EVENT_PATH_MIG_ERR; 155 break; 156 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 157 event.event = IB_EVENT_QP_REQ_ERR; 158 break; 159 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: 160 event.event = IB_EVENT_QP_ACCESS_ERR; 161 break; 162 default: 163 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " 164 "on QP %06x\n", type, qp->qpn); 165 return; 166 } 167 168 ibqp->event_handler(&event, ibqp->qp_context); 169 } 170 } 171 172 static int send_wqe_overhead(enum ib_qp_type type) 173 { 174 /* 175 * UD WQEs must have a datagram segment. 176 * RC and UC WQEs might have a remote address segment. 177 * MLX WQEs need two extra inline data segments (for the UD 178 * header and space for the ICRC). 179 */ 180 switch (type) { 181 case IB_QPT_UD: 182 return sizeof (struct mlx4_wqe_ctrl_seg) + 183 sizeof (struct mlx4_wqe_datagram_seg); 184 case IB_QPT_UC: 185 return sizeof (struct mlx4_wqe_ctrl_seg) + 186 sizeof (struct mlx4_wqe_raddr_seg); 187 case IB_QPT_RC: 188 return sizeof (struct mlx4_wqe_ctrl_seg) + 189 sizeof (struct mlx4_wqe_atomic_seg) + 190 sizeof (struct mlx4_wqe_raddr_seg); 191 case IB_QPT_SMI: 192 case IB_QPT_GSI: 193 return sizeof (struct mlx4_wqe_ctrl_seg) + 194 ALIGN(MLX4_IB_UD_HEADER_SIZE + 195 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, 196 MLX4_INLINE_ALIGN) * 197 sizeof (struct mlx4_wqe_inline_seg), 198 sizeof (struct mlx4_wqe_data_seg)) + 199 ALIGN(4 + 200 sizeof (struct mlx4_wqe_inline_seg), 201 sizeof (struct mlx4_wqe_data_seg)); 202 default: 203 return sizeof (struct mlx4_wqe_ctrl_seg); 204 } 205 } 206 207 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 208 int is_user, int has_srq, struct mlx4_ib_qp *qp) 209 { 210 /* Sanity check RQ size before proceeding */ 211 if (cap->max_recv_wr > dev->dev->caps.max_wqes || 212 cap->max_recv_sge > dev->dev->caps.max_rq_sg) 213 return -EINVAL; 214 215 if (has_srq) { 216 /* QPs attached to an SRQ should have no RQ */ 217 if (cap->max_recv_wr) 218 return -EINVAL; 219 220 qp->rq.wqe_cnt = qp->rq.max_gs = 0; 221 } else { 222 /* HW requires >= 1 RQ entry with >= 1 gather entry */ 223 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) 224 return -EINVAL; 225 226 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); 227 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); 228 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); 229 } 230 231 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; 232 cap->max_recv_sge = qp->rq.max_gs; 233 234 return 0; 235 } 236 237 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 238 enum ib_qp_type type, struct mlx4_ib_qp *qp) 239 { 240 /* Sanity check SQ size before proceeding */ 241 if (cap->max_send_wr > dev->dev->caps.max_wqes || 242 cap->max_send_sge > dev->dev->caps.max_sq_sg || 243 cap->max_inline_data + send_wqe_overhead(type) + 244 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 245 return -EINVAL; 246 247 /* 248 * For MLX transport we need 2 extra S/G entries: 249 * one for the header and one for the checksum at the end 250 */ 251 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && 252 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 253 return -EINVAL; 254 255 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 256 sizeof (struct mlx4_wqe_data_seg), 257 cap->max_inline_data + 258 sizeof (struct mlx4_wqe_inline_seg)) + 259 send_wqe_overhead(type))); 260 qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / 261 sizeof (struct mlx4_wqe_data_seg); 262 263 /* 264 * We need to leave 2 KB + 1 WQE of headroom in the SQ to 265 * allow HW to prefetch. 266 */ 267 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; 268 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); 269 270 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 271 (qp->sq.wqe_cnt << qp->sq.wqe_shift); 272 if (qp->rq.wqe_shift > qp->sq.wqe_shift) { 273 qp->rq.offset = 0; 274 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; 275 } else { 276 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; 277 qp->sq.offset = 0; 278 } 279 280 cap->max_send_wr = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; 281 cap->max_send_sge = qp->sq.max_gs; 282 /* We don't support inline sends for kernel QPs (yet) */ 283 cap->max_inline_data = 0; 284 285 return 0; 286 } 287 288 static int set_user_sq_size(struct mlx4_ib_qp *qp, 289 struct mlx4_ib_create_qp *ucmd) 290 { 291 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; 292 qp->sq.wqe_shift = ucmd->log_sq_stride; 293 294 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 295 (qp->sq.wqe_cnt << qp->sq.wqe_shift); 296 297 return 0; 298 } 299 300 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 301 struct ib_qp_init_attr *init_attr, 302 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) 303 { 304 int err; 305 306 mutex_init(&qp->mutex); 307 spin_lock_init(&qp->sq.lock); 308 spin_lock_init(&qp->rq.lock); 309 310 qp->state = IB_QPS_RESET; 311 qp->atomic_rd_en = 0; 312 qp->resp_depth = 0; 313 314 qp->rq.head = 0; 315 qp->rq.tail = 0; 316 qp->sq.head = 0; 317 qp->sq.tail = 0; 318 319 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); 320 if (err) 321 goto err; 322 323 if (pd->uobject) { 324 struct mlx4_ib_create_qp ucmd; 325 326 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { 327 err = -EFAULT; 328 goto err; 329 } 330 331 qp->sq_no_prefetch = ucmd.sq_no_prefetch; 332 333 err = set_user_sq_size(qp, &ucmd); 334 if (err) 335 goto err; 336 337 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 338 qp->buf_size, 0); 339 if (IS_ERR(qp->umem)) { 340 err = PTR_ERR(qp->umem); 341 goto err; 342 } 343 344 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), 345 ilog2(qp->umem->page_size), &qp->mtt); 346 if (err) 347 goto err_buf; 348 349 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); 350 if (err) 351 goto err_mtt; 352 353 if (!init_attr->srq) { 354 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), 355 ucmd.db_addr, &qp->db); 356 if (err) 357 goto err_mtt; 358 } 359 } else { 360 qp->sq_no_prefetch = 0; 361 362 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 363 if (err) 364 goto err; 365 366 if (!init_attr->srq) { 367 err = mlx4_ib_db_alloc(dev, &qp->db, 0); 368 if (err) 369 goto err; 370 371 *qp->db.db = 0; 372 } 373 374 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { 375 err = -ENOMEM; 376 goto err_db; 377 } 378 379 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, 380 &qp->mtt); 381 if (err) 382 goto err_buf; 383 384 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); 385 if (err) 386 goto err_mtt; 387 388 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); 389 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); 390 391 if (!qp->sq.wrid || !qp->rq.wrid) { 392 err = -ENOMEM; 393 goto err_wrid; 394 } 395 } 396 397 err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); 398 if (err) 399 goto err_wrid; 400 401 /* 402 * Hardware wants QPN written in big-endian order (after 403 * shifting) for send doorbell. Precompute this value to save 404 * a little bit when posting sends. 405 */ 406 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 407 408 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 409 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 410 else 411 qp->sq_signal_bits = 0; 412 413 qp->mqp.event = mlx4_ib_qp_event; 414 415 return 0; 416 417 err_wrid: 418 if (pd->uobject && !init_attr->srq) 419 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); 420 else { 421 kfree(qp->sq.wrid); 422 kfree(qp->rq.wrid); 423 } 424 425 err_mtt: 426 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 427 428 err_buf: 429 if (pd->uobject) 430 ib_umem_release(qp->umem); 431 else 432 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 433 434 err_db: 435 if (!pd->uobject && !init_attr->srq) 436 mlx4_ib_db_free(dev, &qp->db); 437 438 err: 439 return err; 440 } 441 442 static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) 443 { 444 switch (state) { 445 case IB_QPS_RESET: return MLX4_QP_STATE_RST; 446 case IB_QPS_INIT: return MLX4_QP_STATE_INIT; 447 case IB_QPS_RTR: return MLX4_QP_STATE_RTR; 448 case IB_QPS_RTS: return MLX4_QP_STATE_RTS; 449 case IB_QPS_SQD: return MLX4_QP_STATE_SQD; 450 case IB_QPS_SQE: return MLX4_QP_STATE_SQER; 451 case IB_QPS_ERR: return MLX4_QP_STATE_ERR; 452 default: return -1; 453 } 454 } 455 456 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) 457 { 458 if (send_cq == recv_cq) 459 spin_lock_irq(&send_cq->lock); 460 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 461 spin_lock_irq(&send_cq->lock); 462 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); 463 } else { 464 spin_lock_irq(&recv_cq->lock); 465 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); 466 } 467 } 468 469 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) 470 { 471 if (send_cq == recv_cq) 472 spin_unlock_irq(&send_cq->lock); 473 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 474 spin_unlock(&recv_cq->lock); 475 spin_unlock_irq(&send_cq->lock); 476 } else { 477 spin_unlock(&send_cq->lock); 478 spin_unlock_irq(&recv_cq->lock); 479 } 480 } 481 482 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 483 int is_user) 484 { 485 struct mlx4_ib_cq *send_cq, *recv_cq; 486 487 if (qp->state != IB_QPS_RESET) 488 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), 489 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) 490 printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", 491 qp->mqp.qpn); 492 493 send_cq = to_mcq(qp->ibqp.send_cq); 494 recv_cq = to_mcq(qp->ibqp.recv_cq); 495 496 mlx4_ib_lock_cqs(send_cq, recv_cq); 497 498 if (!is_user) { 499 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, 500 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); 501 if (send_cq != recv_cq) 502 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 503 } 504 505 mlx4_qp_remove(dev->dev, &qp->mqp); 506 507 mlx4_ib_unlock_cqs(send_cq, recv_cq); 508 509 mlx4_qp_free(dev->dev, &qp->mqp); 510 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 511 512 if (is_user) { 513 if (!qp->ibqp.srq) 514 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), 515 &qp->db); 516 ib_umem_release(qp->umem); 517 } else { 518 kfree(qp->sq.wrid); 519 kfree(qp->rq.wrid); 520 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 521 if (!qp->ibqp.srq) 522 mlx4_ib_db_free(dev, &qp->db); 523 } 524 } 525 526 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 527 struct ib_qp_init_attr *init_attr, 528 struct ib_udata *udata) 529 { 530 struct mlx4_ib_dev *dev = to_mdev(pd->device); 531 struct mlx4_ib_sqp *sqp; 532 struct mlx4_ib_qp *qp; 533 int err; 534 535 switch (init_attr->qp_type) { 536 case IB_QPT_RC: 537 case IB_QPT_UC: 538 case IB_QPT_UD: 539 { 540 qp = kmalloc(sizeof *qp, GFP_KERNEL); 541 if (!qp) 542 return ERR_PTR(-ENOMEM); 543 544 err = create_qp_common(dev, pd, init_attr, udata, 0, qp); 545 if (err) { 546 kfree(qp); 547 return ERR_PTR(err); 548 } 549 550 qp->ibqp.qp_num = qp->mqp.qpn; 551 552 break; 553 } 554 case IB_QPT_SMI: 555 case IB_QPT_GSI: 556 { 557 /* Userspace is not allowed to create special QPs: */ 558 if (pd->uobject) 559 return ERR_PTR(-EINVAL); 560 561 sqp = kmalloc(sizeof *sqp, GFP_KERNEL); 562 if (!sqp) 563 return ERR_PTR(-ENOMEM); 564 565 qp = &sqp->qp; 566 567 err = create_qp_common(dev, pd, init_attr, udata, 568 dev->dev->caps.sqp_start + 569 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + 570 init_attr->port_num - 1, 571 qp); 572 if (err) { 573 kfree(sqp); 574 return ERR_PTR(err); 575 } 576 577 qp->port = init_attr->port_num; 578 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 579 580 break; 581 } 582 default: 583 /* Don't support raw QPs */ 584 return ERR_PTR(-EINVAL); 585 } 586 587 return &qp->ibqp; 588 } 589 590 int mlx4_ib_destroy_qp(struct ib_qp *qp) 591 { 592 struct mlx4_ib_dev *dev = to_mdev(qp->device); 593 struct mlx4_ib_qp *mqp = to_mqp(qp); 594 595 if (is_qp0(dev, mqp)) 596 mlx4_CLOSE_PORT(dev->dev, mqp->port); 597 598 destroy_qp_common(dev, mqp, !!qp->pd->uobject); 599 600 if (is_sqp(dev, mqp)) 601 kfree(to_msqp(mqp)); 602 else 603 kfree(mqp); 604 605 return 0; 606 } 607 608 static int to_mlx4_st(enum ib_qp_type type) 609 { 610 switch (type) { 611 case IB_QPT_RC: return MLX4_QP_ST_RC; 612 case IB_QPT_UC: return MLX4_QP_ST_UC; 613 case IB_QPT_UD: return MLX4_QP_ST_UD; 614 case IB_QPT_SMI: 615 case IB_QPT_GSI: return MLX4_QP_ST_MLX; 616 default: return -1; 617 } 618 } 619 620 static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, 621 int attr_mask) 622 { 623 u8 dest_rd_atomic; 624 u32 access_flags; 625 u32 hw_access_flags = 0; 626 627 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 628 dest_rd_atomic = attr->max_dest_rd_atomic; 629 else 630 dest_rd_atomic = qp->resp_depth; 631 632 if (attr_mask & IB_QP_ACCESS_FLAGS) 633 access_flags = attr->qp_access_flags; 634 else 635 access_flags = qp->atomic_rd_en; 636 637 if (!dest_rd_atomic) 638 access_flags &= IB_ACCESS_REMOTE_WRITE; 639 640 if (access_flags & IB_ACCESS_REMOTE_READ) 641 hw_access_flags |= MLX4_QP_BIT_RRE; 642 if (access_flags & IB_ACCESS_REMOTE_ATOMIC) 643 hw_access_flags |= MLX4_QP_BIT_RAE; 644 if (access_flags & IB_ACCESS_REMOTE_WRITE) 645 hw_access_flags |= MLX4_QP_BIT_RWE; 646 647 return cpu_to_be32(hw_access_flags); 648 } 649 650 static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, 651 int attr_mask) 652 { 653 if (attr_mask & IB_QP_PKEY_INDEX) 654 sqp->pkey_index = attr->pkey_index; 655 if (attr_mask & IB_QP_QKEY) 656 sqp->qkey = attr->qkey; 657 if (attr_mask & IB_QP_SQ_PSN) 658 sqp->send_psn = attr->sq_psn; 659 } 660 661 static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) 662 { 663 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 664 } 665 666 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 667 struct mlx4_qp_path *path, u8 port) 668 { 669 path->grh_mylmc = ah->src_path_bits & 0x7f; 670 path->rlid = cpu_to_be16(ah->dlid); 671 if (ah->static_rate) { 672 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; 673 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && 674 !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) 675 --path->static_rate; 676 } else 677 path->static_rate = 0; 678 path->counter_index = 0xff; 679 680 if (ah->ah_flags & IB_AH_GRH) { 681 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { 682 printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", 683 ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); 684 return -1; 685 } 686 687 path->grh_mylmc |= 1 << 7; 688 path->mgid_index = ah->grh.sgid_index; 689 path->hop_limit = ah->grh.hop_limit; 690 path->tclass_flowlabel = 691 cpu_to_be32((ah->grh.traffic_class << 20) | 692 (ah->grh.flow_label)); 693 memcpy(path->rgid, ah->grh.dgid.raw, 16); 694 } 695 696 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 697 ((port - 1) << 6) | ((ah->sl & 0xf) << 2); 698 699 return 0; 700 } 701 702 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 703 const struct ib_qp_attr *attr, int attr_mask, 704 enum ib_qp_state cur_state, enum ib_qp_state new_state) 705 { 706 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 707 struct mlx4_ib_qp *qp = to_mqp(ibqp); 708 struct mlx4_qp_context *context; 709 enum mlx4_qp_optpar optpar = 0; 710 int sqd_event; 711 int err = -EINVAL; 712 713 context = kzalloc(sizeof *context, GFP_KERNEL); 714 if (!context) 715 return -ENOMEM; 716 717 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 718 (to_mlx4_st(ibqp->qp_type) << 16)); 719 context->flags |= cpu_to_be32(1 << 8); /* DE? */ 720 721 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 722 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 723 else { 724 optpar |= MLX4_QP_OPTPAR_PM_STATE; 725 switch (attr->path_mig_state) { 726 case IB_MIG_MIGRATED: 727 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 728 break; 729 case IB_MIG_REARM: 730 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); 731 break; 732 case IB_MIG_ARMED: 733 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); 734 break; 735 } 736 } 737 738 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || 739 ibqp->qp_type == IB_QPT_UD) 740 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; 741 else if (attr_mask & IB_QP_PATH_MTU) { 742 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { 743 printk(KERN_ERR "path MTU (%u) is invalid\n", 744 attr->path_mtu); 745 return -EINVAL; 746 } 747 context->mtu_msgmax = (attr->path_mtu << 5) | 31; 748 } 749 750 if (qp->rq.wqe_cnt) 751 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; 752 context->rq_size_stride |= qp->rq.wqe_shift - 4; 753 754 if (qp->sq.wqe_cnt) 755 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; 756 context->sq_size_stride |= qp->sq.wqe_shift - 4; 757 758 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 759 context->sq_size_stride |= !!qp->sq_no_prefetch << 7; 760 761 if (qp->ibqp.uobject) 762 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); 763 else 764 context->usr_page = cpu_to_be32(dev->priv_uar.index); 765 766 if (attr_mask & IB_QP_DEST_QPN) 767 context->remote_qpn = cpu_to_be32(attr->dest_qp_num); 768 769 if (attr_mask & IB_QP_PORT) { 770 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && 771 !(attr_mask & IB_QP_AV)) { 772 mlx4_set_sched(&context->pri_path, attr->port_num); 773 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; 774 } 775 } 776 777 if (attr_mask & IB_QP_PKEY_INDEX) { 778 context->pri_path.pkey_index = attr->pkey_index; 779 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 780 } 781 782 if (attr_mask & IB_QP_AV) { 783 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, 784 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) { 785 err = -EINVAL; 786 goto out; 787 } 788 789 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 790 MLX4_QP_OPTPAR_SCHED_QUEUE); 791 } 792 793 if (attr_mask & IB_QP_TIMEOUT) { 794 context->pri_path.ackto = attr->timeout << 3; 795 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; 796 } 797 798 if (attr_mask & IB_QP_ALT_PATH) { 799 if (attr->alt_port_num == 0 || 800 attr->alt_port_num > dev->dev->caps.num_ports) 801 return -EINVAL; 802 803 if (attr->alt_pkey_index >= 804 dev->dev->caps.pkey_table_len[attr->alt_port_num]) 805 return -EINVAL; 806 807 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, 808 attr->alt_port_num)) 809 return -EINVAL; 810 811 context->alt_path.pkey_index = attr->alt_pkey_index; 812 context->alt_path.ackto = attr->alt_timeout << 3; 813 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; 814 } 815 816 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); 817 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); 818 819 if (attr_mask & IB_QP_RNR_RETRY) { 820 context->params1 |= cpu_to_be32(attr->rnr_retry << 13); 821 optpar |= MLX4_QP_OPTPAR_RNR_RETRY; 822 } 823 824 if (attr_mask & IB_QP_RETRY_CNT) { 825 context->params1 |= cpu_to_be32(attr->retry_cnt << 16); 826 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; 827 } 828 829 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 830 if (attr->max_rd_atomic) 831 context->params1 |= 832 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); 833 optpar |= MLX4_QP_OPTPAR_SRA_MAX; 834 } 835 836 if (attr_mask & IB_QP_SQ_PSN) 837 context->next_send_psn = cpu_to_be32(attr->sq_psn); 838 839 context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); 840 841 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 842 if (attr->max_dest_rd_atomic) 843 context->params2 |= 844 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); 845 optpar |= MLX4_QP_OPTPAR_RRA_MAX; 846 } 847 848 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { 849 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); 850 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; 851 } 852 853 if (ibqp->srq) 854 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); 855 856 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 857 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); 858 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; 859 } 860 if (attr_mask & IB_QP_RQ_PSN) 861 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 862 863 context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); 864 865 if (attr_mask & IB_QP_QKEY) { 866 context->qkey = cpu_to_be32(attr->qkey); 867 optpar |= MLX4_QP_OPTPAR_Q_KEY; 868 } 869 870 if (ibqp->srq) 871 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); 872 873 if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 874 context->db_rec_addr = cpu_to_be64(qp->db.dma); 875 876 if (cur_state == IB_QPS_INIT && 877 new_state == IB_QPS_RTR && 878 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || 879 ibqp->qp_type == IB_QPT_UD)) { 880 context->pri_path.sched_queue = (qp->port - 1) << 6; 881 if (is_qp0(dev, qp)) 882 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 883 else 884 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 885 } 886 887 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 888 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) 889 sqd_event = 1; 890 else 891 sqd_event = 0; 892 893 /* 894 * Before passing a kernel QP to the HW, make sure that the 895 * ownership bits of the send queue are set and the SQ 896 * headroom is stamped so that the hardware doesn't start 897 * processing stale work requests. 898 */ 899 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 900 struct mlx4_wqe_ctrl_seg *ctrl; 901 int i; 902 903 for (i = 0; i < qp->sq.wqe_cnt; ++i) { 904 ctrl = get_send_wqe(qp, i); 905 ctrl->owner_opcode = cpu_to_be32(1 << 31); 906 907 stamp_send_wqe(qp, i); 908 } 909 } 910 911 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), 912 to_mlx4_state(new_state), context, optpar, 913 sqd_event, &qp->mqp); 914 if (err) 915 goto out; 916 917 qp->state = new_state; 918 919 if (attr_mask & IB_QP_ACCESS_FLAGS) 920 qp->atomic_rd_en = attr->qp_access_flags; 921 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 922 qp->resp_depth = attr->max_dest_rd_atomic; 923 if (attr_mask & IB_QP_PORT) 924 qp->port = attr->port_num; 925 if (attr_mask & IB_QP_ALT_PATH) 926 qp->alt_port = attr->alt_port_num; 927 928 if (is_sqp(dev, qp)) 929 store_sqp_attrs(to_msqp(qp), attr, attr_mask); 930 931 /* 932 * If we moved QP0 to RTR, bring the IB link up; if we moved 933 * QP0 to RESET or ERROR, bring the link back down. 934 */ 935 if (is_qp0(dev, qp)) { 936 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) 937 if (mlx4_INIT_PORT(dev->dev, qp->port)) 938 printk(KERN_WARNING "INIT_PORT failed for port %d\n", 939 qp->port); 940 941 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && 942 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) 943 mlx4_CLOSE_PORT(dev->dev, qp->port); 944 } 945 946 /* 947 * If we moved a kernel QP to RESET, clean up all old CQ 948 * entries and reinitialize the QP. 949 */ 950 if (new_state == IB_QPS_RESET && !ibqp->uobject) { 951 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, 952 ibqp->srq ? to_msrq(ibqp->srq): NULL); 953 if (ibqp->send_cq != ibqp->recv_cq) 954 mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); 955 956 qp->rq.head = 0; 957 qp->rq.tail = 0; 958 qp->sq.head = 0; 959 qp->sq.tail = 0; 960 if (!ibqp->srq) 961 *qp->db.db = 0; 962 } 963 964 out: 965 kfree(context); 966 return err; 967 } 968 969 static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; 970 static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { 971 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 972 IB_QP_PORT | 973 IB_QP_QKEY), 974 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 975 IB_QP_PORT | 976 IB_QP_ACCESS_FLAGS), 977 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 978 IB_QP_PORT | 979 IB_QP_ACCESS_FLAGS), 980 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 981 IB_QP_QKEY), 982 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 983 IB_QP_QKEY), 984 }; 985 986 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 987 int attr_mask, struct ib_udata *udata) 988 { 989 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 990 struct mlx4_ib_qp *qp = to_mqp(ibqp); 991 enum ib_qp_state cur_state, new_state; 992 int err = -EINVAL; 993 994 mutex_lock(&qp->mutex); 995 996 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 997 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 998 999 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) 1000 goto out; 1001 1002 if ((attr_mask & IB_QP_PORT) && 1003 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { 1004 goto out; 1005 } 1006 1007 if (attr_mask & IB_QP_PKEY_INDEX) { 1008 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1009 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) 1010 goto out; 1011 } 1012 1013 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 1014 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { 1015 goto out; 1016 } 1017 1018 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 1019 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { 1020 goto out; 1021 } 1022 1023 if (cur_state == new_state && cur_state == IB_QPS_RESET) { 1024 err = 0; 1025 goto out; 1026 } 1027 1028 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { 1029 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, 1030 mlx4_ib_qp_attr_mask_table[ibqp->qp_type], 1031 IB_QPS_RESET, IB_QPS_INIT); 1032 if (err) 1033 goto out; 1034 cur_state = IB_QPS_INIT; 1035 } 1036 1037 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 1038 1039 out: 1040 mutex_unlock(&qp->mutex); 1041 return err; 1042 } 1043 1044 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1045 void *wqe) 1046 { 1047 struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; 1048 struct mlx4_wqe_mlx_seg *mlx = wqe; 1049 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 1050 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 1051 u16 pkey; 1052 int send_size; 1053 int header_size; 1054 int spc; 1055 int i; 1056 1057 send_size = 0; 1058 for (i = 0; i < wr->num_sge; ++i) 1059 send_size += wr->sg_list[i].length; 1060 1061 ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); 1062 1063 sqp->ud_header.lrh.service_level = 1064 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; 1065 sqp->ud_header.lrh.destination_lid = ah->av.dlid; 1066 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); 1067 if (mlx4_ib_ah_grh_present(ah)) { 1068 sqp->ud_header.grh.traffic_class = 1069 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; 1070 sqp->ud_header.grh.flow_label = 1071 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1072 sqp->ud_header.grh.hop_limit = ah->av.hop_limit; 1073 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, 1074 ah->av.gid_index, &sqp->ud_header.grh.source_gid); 1075 memcpy(sqp->ud_header.grh.destination_gid.raw, 1076 ah->av.dgid, 16); 1077 } 1078 1079 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 1080 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | 1081 (sqp->ud_header.lrh.destination_lid == 1082 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1083 (sqp->ud_header.lrh.service_level << 8)); 1084 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1085 1086 switch (wr->opcode) { 1087 case IB_WR_SEND: 1088 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 1089 sqp->ud_header.immediate_present = 0; 1090 break; 1091 case IB_WR_SEND_WITH_IMM: 1092 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 1093 sqp->ud_header.immediate_present = 1; 1094 sqp->ud_header.immediate_data = wr->imm_data; 1095 break; 1096 default: 1097 return -EINVAL; 1098 } 1099 1100 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; 1101 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 1102 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 1103 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1104 if (!sqp->qp.ibqp.qp_num) 1105 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); 1106 else 1107 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); 1108 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 1109 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1110 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 1111 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? 1112 sqp->qkey : wr->wr.ud.remote_qkey); 1113 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); 1114 1115 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 1116 1117 if (0) { 1118 printk(KERN_ERR "built UD header of size %d:\n", header_size); 1119 for (i = 0; i < header_size / 4; ++i) { 1120 if (i % 8 == 0) 1121 printk(" [%02x] ", i * 4); 1122 printk(" %08x", 1123 be32_to_cpu(((__be32 *) sqp->header_buf)[i])); 1124 if ((i + 1) % 8 == 0) 1125 printk("\n"); 1126 } 1127 printk("\n"); 1128 } 1129 1130 /* 1131 * Inline data segments may not cross a 64 byte boundary. If 1132 * our UD header is bigger than the space available up to the 1133 * next 64 byte boundary in the WQE, use two inline data 1134 * segments to hold the UD header. 1135 */ 1136 spc = MLX4_INLINE_ALIGN - 1137 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 1138 if (header_size <= spc) { 1139 inl->byte_count = cpu_to_be32(1 << 31 | header_size); 1140 memcpy(inl + 1, sqp->header_buf, header_size); 1141 i = 1; 1142 } else { 1143 inl->byte_count = cpu_to_be32(1 << 31 | spc); 1144 memcpy(inl + 1, sqp->header_buf, spc); 1145 1146 inl = (void *) (inl + 1) + spc; 1147 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); 1148 /* 1149 * Need a barrier here to make sure all the data is 1150 * visible before the byte_count field is set. 1151 * Otherwise the HCA prefetcher could grab the 64-byte 1152 * chunk with this inline segment and get a valid (!= 1153 * 0xffffffff) byte count but stale data, and end up 1154 * generating a packet with bad headers. 1155 * 1156 * The first inline segment's byte_count field doesn't 1157 * need a barrier, because it comes after a 1158 * control/MLX segment and therefore is at an offset 1159 * of 16 mod 64. 1160 */ 1161 wmb(); 1162 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); 1163 i = 2; 1164 } 1165 1166 return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); 1167 } 1168 1169 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) 1170 { 1171 unsigned cur; 1172 struct mlx4_ib_cq *cq; 1173 1174 cur = wq->head - wq->tail; 1175 if (likely(cur + nreq < wq->max_post)) 1176 return 0; 1177 1178 cq = to_mcq(ib_cq); 1179 spin_lock(&cq->lock); 1180 cur = wq->head - wq->tail; 1181 spin_unlock(&cq->lock); 1182 1183 return cur + nreq >= wq->max_post; 1184 } 1185 1186 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1187 struct ib_send_wr **bad_wr) 1188 { 1189 struct mlx4_ib_qp *qp = to_mqp(ibqp); 1190 void *wqe; 1191 struct mlx4_wqe_ctrl_seg *ctrl; 1192 unsigned long flags; 1193 int nreq; 1194 int err = 0; 1195 int ind; 1196 int size; 1197 int i; 1198 1199 spin_lock_irqsave(&qp->rq.lock, flags); 1200 1201 ind = qp->sq.head; 1202 1203 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1204 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 1205 err = -ENOMEM; 1206 *bad_wr = wr; 1207 goto out; 1208 } 1209 1210 if (unlikely(wr->num_sge > qp->sq.max_gs)) { 1211 err = -EINVAL; 1212 *bad_wr = wr; 1213 goto out; 1214 } 1215 1216 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 1217 qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; 1218 1219 ctrl->srcrb_flags = 1220 (wr->send_flags & IB_SEND_SIGNALED ? 1221 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | 1222 (wr->send_flags & IB_SEND_SOLICITED ? 1223 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | 1224 qp->sq_signal_bits; 1225 1226 if (wr->opcode == IB_WR_SEND_WITH_IMM || 1227 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) 1228 ctrl->imm = wr->imm_data; 1229 else 1230 ctrl->imm = 0; 1231 1232 wqe += sizeof *ctrl; 1233 size = sizeof *ctrl / 16; 1234 1235 switch (ibqp->qp_type) { 1236 case IB_QPT_RC: 1237 case IB_QPT_UC: 1238 switch (wr->opcode) { 1239 case IB_WR_ATOMIC_CMP_AND_SWP: 1240 case IB_WR_ATOMIC_FETCH_AND_ADD: 1241 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = 1242 cpu_to_be64(wr->wr.atomic.remote_addr); 1243 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = 1244 cpu_to_be32(wr->wr.atomic.rkey); 1245 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; 1246 1247 wqe += sizeof (struct mlx4_wqe_raddr_seg); 1248 1249 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1250 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = 1251 cpu_to_be64(wr->wr.atomic.swap); 1252 ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 1253 cpu_to_be64(wr->wr.atomic.compare_add); 1254 } else { 1255 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = 1256 cpu_to_be64(wr->wr.atomic.compare_add); 1257 ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0; 1258 } 1259 1260 wqe += sizeof (struct mlx4_wqe_atomic_seg); 1261 size += (sizeof (struct mlx4_wqe_raddr_seg) + 1262 sizeof (struct mlx4_wqe_atomic_seg)) / 16; 1263 1264 break; 1265 1266 case IB_WR_RDMA_READ: 1267 case IB_WR_RDMA_WRITE: 1268 case IB_WR_RDMA_WRITE_WITH_IMM: 1269 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = 1270 cpu_to_be64(wr->wr.rdma.remote_addr); 1271 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = 1272 cpu_to_be32(wr->wr.rdma.rkey); 1273 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; 1274 1275 wqe += sizeof (struct mlx4_wqe_raddr_seg); 1276 size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 1277 1278 break; 1279 1280 default: 1281 /* No extra segments required for sends */ 1282 break; 1283 } 1284 break; 1285 1286 case IB_QPT_UD: 1287 memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, 1288 &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); 1289 ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = 1290 cpu_to_be32(wr->wr.ud.remote_qpn); 1291 ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = 1292 cpu_to_be32(wr->wr.ud.remote_qkey); 1293 1294 wqe += sizeof (struct mlx4_wqe_datagram_seg); 1295 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 1296 break; 1297 1298 case IB_QPT_SMI: 1299 case IB_QPT_GSI: 1300 err = build_mlx_header(to_msqp(qp), wr, ctrl); 1301 if (err < 0) { 1302 *bad_wr = wr; 1303 goto out; 1304 } 1305 wqe += err; 1306 size += err / 16; 1307 1308 err = 0; 1309 break; 1310 1311 default: 1312 break; 1313 } 1314 1315 for (i = 0; i < wr->num_sge; ++i) { 1316 ((struct mlx4_wqe_data_seg *) wqe)->byte_count = 1317 cpu_to_be32(wr->sg_list[i].length); 1318 ((struct mlx4_wqe_data_seg *) wqe)->lkey = 1319 cpu_to_be32(wr->sg_list[i].lkey); 1320 ((struct mlx4_wqe_data_seg *) wqe)->addr = 1321 cpu_to_be64(wr->sg_list[i].addr); 1322 1323 wqe += sizeof (struct mlx4_wqe_data_seg); 1324 size += sizeof (struct mlx4_wqe_data_seg) / 16; 1325 } 1326 1327 /* Add one more inline data segment for ICRC for MLX sends */ 1328 if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) { 1329 ((struct mlx4_wqe_inline_seg *) wqe)->byte_count = 1330 cpu_to_be32((1 << 31) | 4); 1331 ((u32 *) wqe)[1] = 0; 1332 wqe += sizeof (struct mlx4_wqe_data_seg); 1333 size += sizeof (struct mlx4_wqe_data_seg) / 16; 1334 } 1335 1336 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? 1337 MLX4_WQE_CTRL_FENCE : 0) | size; 1338 1339 /* 1340 * Make sure descriptor is fully written before 1341 * setting ownership bit (because HW can start 1342 * executing as soon as we do). 1343 */ 1344 wmb(); 1345 1346 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { 1347 err = -EINVAL; 1348 goto out; 1349 } 1350 1351 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | 1352 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); 1353 1354 /* 1355 * We can improve latency by not stamping the last 1356 * send queue WQE until after ringing the doorbell, so 1357 * only stamp here if there are still more WQEs to post. 1358 */ 1359 if (wr->next) 1360 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & 1361 (qp->sq.wqe_cnt - 1)); 1362 1363 ++ind; 1364 } 1365 1366 out: 1367 if (likely(nreq)) { 1368 qp->sq.head += nreq; 1369 1370 /* 1371 * Make sure that descriptors are written before 1372 * doorbell record. 1373 */ 1374 wmb(); 1375 1376 writel(qp->doorbell_qpn, 1377 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); 1378 1379 /* 1380 * Make sure doorbells don't leak out of SQ spinlock 1381 * and reach the HCA out of order. 1382 */ 1383 mmiowb(); 1384 1385 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & 1386 (qp->sq.wqe_cnt - 1)); 1387 } 1388 1389 spin_unlock_irqrestore(&qp->rq.lock, flags); 1390 1391 return err; 1392 } 1393 1394 int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1395 struct ib_recv_wr **bad_wr) 1396 { 1397 struct mlx4_ib_qp *qp = to_mqp(ibqp); 1398 struct mlx4_wqe_data_seg *scat; 1399 unsigned long flags; 1400 int err = 0; 1401 int nreq; 1402 int ind; 1403 int i; 1404 1405 spin_lock_irqsave(&qp->rq.lock, flags); 1406 1407 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 1408 1409 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1410 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) { 1411 err = -ENOMEM; 1412 *bad_wr = wr; 1413 goto out; 1414 } 1415 1416 if (unlikely(wr->num_sge > qp->rq.max_gs)) { 1417 err = -EINVAL; 1418 *bad_wr = wr; 1419 goto out; 1420 } 1421 1422 scat = get_recv_wqe(qp, ind); 1423 1424 for (i = 0; i < wr->num_sge; ++i) { 1425 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); 1426 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); 1427 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); 1428 } 1429 1430 if (i < qp->rq.max_gs) { 1431 scat[i].byte_count = 0; 1432 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 1433 scat[i].addr = 0; 1434 } 1435 1436 qp->rq.wrid[ind] = wr->wr_id; 1437 1438 ind = (ind + 1) & (qp->rq.wqe_cnt - 1); 1439 } 1440 1441 out: 1442 if (likely(nreq)) { 1443 qp->rq.head += nreq; 1444 1445 /* 1446 * Make sure that descriptors are written before 1447 * doorbell record. 1448 */ 1449 wmb(); 1450 1451 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); 1452 } 1453 1454 spin_unlock_irqrestore(&qp->rq.lock, flags); 1455 1456 return err; 1457 } 1458