1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/mlx4/cq.h> 34 #include <linux/mlx4/qp.h> 35 36 #include "mlx4_ib.h" 37 #include "user.h" 38 39 static void mlx4_ib_cq_comp(struct mlx4_cq *cq) 40 { 41 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; 42 ibcq->comp_handler(ibcq, ibcq->cq_context); 43 } 44 45 static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) 46 { 47 struct ib_event event; 48 struct ib_cq *ibcq; 49 50 if (type != MLX4_EVENT_TYPE_CQ_ERROR) { 51 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " 52 "on CQ %06x\n", type, cq->cqn); 53 return; 54 } 55 56 ibcq = &to_mibcq(cq)->ibcq; 57 if (ibcq->event_handler) { 58 event.device = ibcq->device; 59 event.event = IB_EVENT_CQ_ERR; 60 event.element.cq = ibcq; 61 ibcq->event_handler(&event, ibcq->cq_context); 62 } 63 } 64 65 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) 66 { 67 int offset = n * sizeof (struct mlx4_cqe); 68 69 if (buf->buf.nbufs == 1) 70 return buf->buf.u.direct.buf + offset; 71 else 72 return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf + 73 (offset & (PAGE_SIZE - 1)); 74 } 75 76 static void *get_cqe(struct mlx4_ib_cq *cq, int n) 77 { 78 return get_cqe_from_buf(&cq->buf, n); 79 } 80 81 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) 82 { 83 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); 84 85 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ 86 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; 87 } 88 89 static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) 90 { 91 return get_sw_cqe(cq, cq->mcq.cons_index); 92 } 93 94 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, 95 struct ib_ucontext *context, 96 struct ib_udata *udata) 97 { 98 struct mlx4_ib_dev *dev = to_mdev(ibdev); 99 struct mlx4_ib_cq *cq; 100 struct mlx4_uar *uar; 101 int buf_size; 102 int err; 103 104 if (entries < 1 || entries > dev->dev->caps.max_cqes) 105 return ERR_PTR(-EINVAL); 106 107 cq = kmalloc(sizeof *cq, GFP_KERNEL); 108 if (!cq) 109 return ERR_PTR(-ENOMEM); 110 111 entries = roundup_pow_of_two(entries + 1); 112 cq->ibcq.cqe = entries - 1; 113 buf_size = entries * sizeof (struct mlx4_cqe); 114 spin_lock_init(&cq->lock); 115 116 if (context) { 117 struct mlx4_ib_create_cq ucmd; 118 119 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { 120 err = -EFAULT; 121 goto err_cq; 122 } 123 124 cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size, 125 IB_ACCESS_LOCAL_WRITE); 126 if (IS_ERR(cq->umem)) { 127 err = PTR_ERR(cq->umem); 128 goto err_cq; 129 } 130 131 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem), 132 ilog2(cq->umem->page_size), &cq->buf.mtt); 133 if (err) 134 goto err_buf; 135 136 err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem); 137 if (err) 138 goto err_mtt; 139 140 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, 141 &cq->db); 142 if (err) 143 goto err_mtt; 144 145 uar = &to_mucontext(context)->uar; 146 } else { 147 err = mlx4_ib_db_alloc(dev, &cq->db, 1); 148 if (err) 149 goto err_cq; 150 151 cq->mcq.set_ci_db = cq->db.db; 152 cq->mcq.arm_db = cq->db.db + 1; 153 *cq->mcq.set_ci_db = 0; 154 *cq->mcq.arm_db = 0; 155 156 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) { 157 err = -ENOMEM; 158 goto err_db; 159 } 160 161 err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift, 162 &cq->buf.mtt); 163 if (err) 164 goto err_buf; 165 166 err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf); 167 if (err) 168 goto err_mtt; 169 170 uar = &dev->priv_uar; 171 } 172 173 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, 174 cq->db.dma, &cq->mcq); 175 if (err) 176 goto err_dbmap; 177 178 cq->mcq.comp = mlx4_ib_cq_comp; 179 cq->mcq.event = mlx4_ib_cq_event; 180 181 if (context) 182 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { 183 err = -EFAULT; 184 goto err_dbmap; 185 } 186 187 return &cq->ibcq; 188 189 err_dbmap: 190 if (context) 191 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); 192 193 err_mtt: 194 mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); 195 196 err_buf: 197 if (context) 198 ib_umem_release(cq->umem); 199 else 200 mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe), 201 &cq->buf.buf); 202 203 err_db: 204 if (!context) 205 mlx4_ib_db_free(dev, &cq->db); 206 207 err_cq: 208 kfree(cq); 209 210 return ERR_PTR(err); 211 } 212 213 int mlx4_ib_destroy_cq(struct ib_cq *cq) 214 { 215 struct mlx4_ib_dev *dev = to_mdev(cq->device); 216 struct mlx4_ib_cq *mcq = to_mcq(cq); 217 218 mlx4_cq_free(dev->dev, &mcq->mcq); 219 mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); 220 221 if (cq->uobject) { 222 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); 223 ib_umem_release(mcq->umem); 224 } else { 225 mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe), 226 &mcq->buf.buf); 227 mlx4_ib_db_free(dev, &mcq->db); 228 } 229 230 kfree(mcq); 231 232 return 0; 233 } 234 235 static void dump_cqe(void *cqe) 236 { 237 __be32 *buf = cqe; 238 239 printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", 240 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), 241 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), 242 be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); 243 } 244 245 static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, 246 struct ib_wc *wc) 247 { 248 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { 249 printk(KERN_DEBUG "local QP operation err " 250 "(QPN %06x, WQE index %x, vendor syndrome %02x, " 251 "opcode = %02x)\n", 252 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), 253 cqe->vendor_err_syndrome, 254 cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); 255 dump_cqe(cqe); 256 } 257 258 switch (cqe->syndrome) { 259 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: 260 wc->status = IB_WC_LOC_LEN_ERR; 261 break; 262 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: 263 wc->status = IB_WC_LOC_QP_OP_ERR; 264 break; 265 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: 266 wc->status = IB_WC_LOC_PROT_ERR; 267 break; 268 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: 269 wc->status = IB_WC_WR_FLUSH_ERR; 270 break; 271 case MLX4_CQE_SYNDROME_MW_BIND_ERR: 272 wc->status = IB_WC_MW_BIND_ERR; 273 break; 274 case MLX4_CQE_SYNDROME_BAD_RESP_ERR: 275 wc->status = IB_WC_BAD_RESP_ERR; 276 break; 277 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: 278 wc->status = IB_WC_LOC_ACCESS_ERR; 279 break; 280 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: 281 wc->status = IB_WC_REM_INV_REQ_ERR; 282 break; 283 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: 284 wc->status = IB_WC_REM_ACCESS_ERR; 285 break; 286 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: 287 wc->status = IB_WC_REM_OP_ERR; 288 break; 289 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: 290 wc->status = IB_WC_RETRY_EXC_ERR; 291 break; 292 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: 293 wc->status = IB_WC_RNR_RETRY_EXC_ERR; 294 break; 295 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: 296 wc->status = IB_WC_REM_ABORT_ERR; 297 break; 298 default: 299 wc->status = IB_WC_GENERAL_ERR; 300 break; 301 } 302 303 wc->vendor_err = cqe->vendor_err_syndrome; 304 } 305 306 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, 307 struct mlx4_ib_qp **cur_qp, 308 struct ib_wc *wc) 309 { 310 struct mlx4_cqe *cqe; 311 struct mlx4_qp *mqp; 312 struct mlx4_ib_wq *wq; 313 struct mlx4_ib_srq *srq; 314 int is_send; 315 int is_error; 316 u16 wqe_ctr; 317 318 cqe = next_cqe_sw(cq); 319 if (!cqe) 320 return -EAGAIN; 321 322 ++cq->mcq.cons_index; 323 324 /* 325 * Make sure we read CQ entry contents after we've checked the 326 * ownership bit. 327 */ 328 rmb(); 329 330 is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; 331 is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 332 MLX4_CQE_OPCODE_ERROR; 333 334 if (!*cur_qp || 335 (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { 336 /* 337 * We do not have to take the QP table lock here, 338 * because CQs will be locked while QPs are removed 339 * from the table. 340 */ 341 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, 342 be32_to_cpu(cqe->my_qpn)); 343 if (unlikely(!mqp)) { 344 printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", 345 cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); 346 return -EINVAL; 347 } 348 349 *cur_qp = to_mibqp(mqp); 350 } 351 352 wc->qp = &(*cur_qp)->ibqp; 353 354 if (is_send) { 355 wq = &(*cur_qp)->sq; 356 wqe_ctr = be16_to_cpu(cqe->wqe_index); 357 wq->tail += (u16) (wqe_ctr - (u16) wq->tail); 358 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 359 ++wq->tail; 360 } else if ((*cur_qp)->ibqp.srq) { 361 srq = to_msrq((*cur_qp)->ibqp.srq); 362 wqe_ctr = be16_to_cpu(cqe->wqe_index); 363 wc->wr_id = srq->wrid[wqe_ctr]; 364 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 365 } else { 366 wq = &(*cur_qp)->rq; 367 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 368 ++wq->tail; 369 } 370 371 if (unlikely(is_error)) { 372 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); 373 return 0; 374 } 375 376 wc->status = IB_WC_SUCCESS; 377 378 if (is_send) { 379 wc->wc_flags = 0; 380 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 381 case MLX4_OPCODE_RDMA_WRITE_IMM: 382 wc->wc_flags |= IB_WC_WITH_IMM; 383 case MLX4_OPCODE_RDMA_WRITE: 384 wc->opcode = IB_WC_RDMA_WRITE; 385 break; 386 case MLX4_OPCODE_SEND_IMM: 387 wc->wc_flags |= IB_WC_WITH_IMM; 388 case MLX4_OPCODE_SEND: 389 wc->opcode = IB_WC_SEND; 390 break; 391 case MLX4_OPCODE_RDMA_READ: 392 wc->opcode = IB_WC_SEND; 393 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 394 break; 395 case MLX4_OPCODE_ATOMIC_CS: 396 wc->opcode = IB_WC_COMP_SWAP; 397 wc->byte_len = 8; 398 break; 399 case MLX4_OPCODE_ATOMIC_FA: 400 wc->opcode = IB_WC_FETCH_ADD; 401 wc->byte_len = 8; 402 break; 403 case MLX4_OPCODE_BIND_MW: 404 wc->opcode = IB_WC_BIND_MW; 405 break; 406 } 407 } else { 408 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 409 410 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 411 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 412 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 413 wc->wc_flags = IB_WC_WITH_IMM; 414 wc->imm_data = cqe->immed_rss_invalid; 415 break; 416 case MLX4_RECV_OPCODE_SEND: 417 wc->opcode = IB_WC_RECV; 418 wc->wc_flags = 0; 419 break; 420 case MLX4_RECV_OPCODE_SEND_IMM: 421 wc->opcode = IB_WC_RECV; 422 wc->wc_flags = IB_WC_WITH_IMM; 423 wc->imm_data = cqe->immed_rss_invalid; 424 break; 425 } 426 427 wc->slid = be16_to_cpu(cqe->rlid); 428 wc->sl = cqe->sl >> 4; 429 wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; 430 wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; 431 wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? 432 IB_WC_GRH : 0; 433 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16; 434 } 435 436 return 0; 437 } 438 439 int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 440 { 441 struct mlx4_ib_cq *cq = to_mcq(ibcq); 442 struct mlx4_ib_qp *cur_qp = NULL; 443 unsigned long flags; 444 int npolled; 445 int err = 0; 446 447 spin_lock_irqsave(&cq->lock, flags); 448 449 for (npolled = 0; npolled < num_entries; ++npolled) { 450 err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); 451 if (err) 452 break; 453 } 454 455 if (npolled) 456 mlx4_cq_set_ci(&cq->mcq); 457 458 spin_unlock_irqrestore(&cq->lock, flags); 459 460 if (err == 0 || err == -EAGAIN) 461 return npolled; 462 else 463 return err; 464 } 465 466 int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 467 { 468 mlx4_cq_arm(&to_mcq(ibcq)->mcq, 469 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 470 MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT, 471 to_mdev(ibcq->device)->uar_map, 472 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock)); 473 474 return 0; 475 } 476 477 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) 478 { 479 u32 prod_index; 480 int nfreed = 0; 481 struct mlx4_cqe *cqe, *dest; 482 u8 owner_bit; 483 484 /* 485 * First we need to find the current producer index, so we 486 * know where to start cleaning from. It doesn't matter if HW 487 * adds new entries after this loop -- the QP we're worried 488 * about is already in RESET, so the new entries won't come 489 * from our QP and therefore don't need to be checked. 490 */ 491 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index) 492 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) 493 break; 494 495 /* 496 * Now sweep backwards through the CQ, removing CQ entries 497 * that match our QP by copying older entries on top of them. 498 */ 499 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 500 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 501 if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { 502 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 503 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); 504 ++nfreed; 505 } else if (nfreed) { 506 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); 507 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; 508 memcpy(dest, cqe, sizeof *cqe); 509 dest->owner_sr_opcode = owner_bit | 510 (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); 511 } 512 } 513 514 if (nfreed) { 515 cq->mcq.cons_index += nfreed; 516 /* 517 * Make sure update of buffer contents is done before 518 * updating consumer index. 519 */ 520 wmb(); 521 mlx4_cq_set_ci(&cq->mcq); 522 } 523 } 524 525 void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) 526 { 527 spin_lock_irq(&cq->lock); 528 __mlx4_ib_cq_clean(cq, qpn, srq); 529 spin_unlock_irq(&cq->lock); 530 } 531