1 /* 2 * Copyright (c) 2016 HGST, a Western Digital Company. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 */ 13 #include <linux/moduleparam.h> 14 #include <linux/slab.h> 15 #include <linux/pci-p2pdma.h> 16 #include <rdma/mr_pool.h> 17 #include <rdma/rw.h> 18 19 enum { 20 RDMA_RW_SINGLE_WR, 21 RDMA_RW_MULTI_WR, 22 RDMA_RW_MR, 23 RDMA_RW_SIG_MR, 24 }; 25 26 static bool rdma_rw_force_mr; 27 module_param_named(force_mr, rdma_rw_force_mr, bool, 0); 28 MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); 29 30 /* 31 * Check if the device might use memory registration. This is currently only 32 * true for iWarp devices. In the future we can hopefully fine tune this based 33 * on HCA driver input. 34 */ 35 static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) 36 { 37 if (rdma_protocol_iwarp(dev, port_num)) 38 return true; 39 if (unlikely(rdma_rw_force_mr)) 40 return true; 41 return false; 42 } 43 44 /* 45 * Check if the device will use memory registration for this RW operation. 46 * We currently always use memory registrations for iWarp RDMA READs, and 47 * have a debug option to force usage of MRs. 48 * 49 * XXX: In the future we can hopefully fine tune this based on HCA driver 50 * input. 51 */ 52 static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, 53 enum dma_data_direction dir, int dma_nents) 54 { 55 if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) 56 return true; 57 if (unlikely(rdma_rw_force_mr)) 58 return true; 59 return false; 60 } 61 62 static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) 63 { 64 /* arbitrary limit to avoid allocating gigantic resources */ 65 return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); 66 } 67 68 static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) 69 { 70 int count = 0; 71 72 if (reg->mr->need_inval) { 73 reg->inv_wr.opcode = IB_WR_LOCAL_INV; 74 reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; 75 reg->inv_wr.next = ®->reg_wr.wr; 76 count++; 77 } else { 78 reg->inv_wr.next = NULL; 79 } 80 81 return count; 82 } 83 84 /* Caller must have zero-initialized *reg. */ 85 static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, 86 struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, 87 u32 sg_cnt, u32 offset) 88 { 89 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 90 u32 nents = min(sg_cnt, pages_per_mr); 91 int count = 0, ret; 92 93 reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs); 94 if (!reg->mr) 95 return -EAGAIN; 96 97 count += rdma_rw_inv_key(reg); 98 99 ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); 100 if (ret < 0 || ret < nents) { 101 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); 102 return -EINVAL; 103 } 104 105 reg->reg_wr.wr.opcode = IB_WR_REG_MR; 106 reg->reg_wr.mr = reg->mr; 107 reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; 108 if (rdma_protocol_iwarp(qp->device, port_num)) 109 reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; 110 count++; 111 112 reg->sge.addr = reg->mr->iova; 113 reg->sge.length = reg->mr->length; 114 return count; 115 } 116 117 static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 118 u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, 119 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 120 { 121 struct rdma_rw_reg_ctx *prev = NULL; 122 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 123 int i, j, ret = 0, count = 0; 124 125 ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; 126 ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL); 127 if (!ctx->reg) { 128 ret = -ENOMEM; 129 goto out; 130 } 131 132 for (i = 0; i < ctx->nr_ops; i++) { 133 struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; 134 u32 nents = min(sg_cnt, pages_per_mr); 135 136 ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt, 137 offset); 138 if (ret < 0) 139 goto out_free; 140 count += ret; 141 142 if (prev) { 143 if (reg->mr->need_inval) 144 prev->wr.wr.next = ®->inv_wr; 145 else 146 prev->wr.wr.next = ®->reg_wr.wr; 147 } 148 149 reg->reg_wr.wr.next = ®->wr.wr; 150 151 reg->wr.wr.sg_list = ®->sge; 152 reg->wr.wr.num_sge = 1; 153 reg->wr.remote_addr = remote_addr; 154 reg->wr.rkey = rkey; 155 if (dir == DMA_TO_DEVICE) { 156 reg->wr.wr.opcode = IB_WR_RDMA_WRITE; 157 } else if (!rdma_cap_read_inv(qp->device, port_num)) { 158 reg->wr.wr.opcode = IB_WR_RDMA_READ; 159 } else { 160 reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 161 reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey; 162 } 163 count++; 164 165 remote_addr += reg->sge.length; 166 sg_cnt -= nents; 167 for (j = 0; j < nents; j++) 168 sg = sg_next(sg); 169 prev = reg; 170 offset = 0; 171 } 172 173 if (prev) 174 prev->wr.wr.next = NULL; 175 176 ctx->type = RDMA_RW_MR; 177 return count; 178 179 out_free: 180 while (--i >= 0) 181 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 182 kfree(ctx->reg); 183 out: 184 return ret; 185 } 186 187 static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 188 struct scatterlist *sg, u32 sg_cnt, u32 offset, 189 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 190 { 191 u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : 192 qp->max_read_sge; 193 struct ib_sge *sge; 194 u32 total_len = 0, i, j; 195 196 ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge); 197 198 ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL); 199 if (!ctx->map.sges) 200 goto out; 201 202 ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL); 203 if (!ctx->map.wrs) 204 goto out_free_sges; 205 206 for (i = 0; i < ctx->nr_ops; i++) { 207 struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i]; 208 u32 nr_sge = min(sg_cnt, max_sge); 209 210 if (dir == DMA_TO_DEVICE) 211 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 212 else 213 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 214 rdma_wr->remote_addr = remote_addr + total_len; 215 rdma_wr->rkey = rkey; 216 rdma_wr->wr.num_sge = nr_sge; 217 rdma_wr->wr.sg_list = sge; 218 219 for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { 220 sge->addr = sg_dma_address(sg) + offset; 221 sge->length = sg_dma_len(sg) - offset; 222 sge->lkey = qp->pd->local_dma_lkey; 223 224 total_len += sge->length; 225 sge++; 226 sg_cnt--; 227 offset = 0; 228 } 229 230 rdma_wr->wr.next = i + 1 < ctx->nr_ops ? 231 &ctx->map.wrs[i + 1].wr : NULL; 232 } 233 234 ctx->type = RDMA_RW_MULTI_WR; 235 return ctx->nr_ops; 236 237 out_free_sges: 238 kfree(ctx->map.sges); 239 out: 240 return -ENOMEM; 241 } 242 243 static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 244 struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey, 245 enum dma_data_direction dir) 246 { 247 struct ib_rdma_wr *rdma_wr = &ctx->single.wr; 248 249 ctx->nr_ops = 1; 250 251 ctx->single.sge.lkey = qp->pd->local_dma_lkey; 252 ctx->single.sge.addr = sg_dma_address(sg) + offset; 253 ctx->single.sge.length = sg_dma_len(sg) - offset; 254 255 memset(rdma_wr, 0, sizeof(*rdma_wr)); 256 if (dir == DMA_TO_DEVICE) 257 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 258 else 259 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 260 rdma_wr->wr.sg_list = &ctx->single.sge; 261 rdma_wr->wr.num_sge = 1; 262 rdma_wr->remote_addr = remote_addr; 263 rdma_wr->rkey = rkey; 264 265 ctx->type = RDMA_RW_SINGLE_WR; 266 return 1; 267 } 268 269 /** 270 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context 271 * @ctx: context to initialize 272 * @qp: queue pair to operate on 273 * @port_num: port num to which the connection is bound 274 * @sg: scatterlist to READ/WRITE from/to 275 * @sg_cnt: number of entries in @sg 276 * @sg_offset: current byte offset into @sg 277 * @remote_addr:remote address to read/write (relative to @rkey) 278 * @rkey: remote key to operate on 279 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 280 * 281 * Returns the number of WQEs that will be needed on the workqueue if 282 * successful, or a negative error code. 283 */ 284 int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 285 struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, 286 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 287 { 288 struct ib_device *dev = qp->pd->device; 289 int ret; 290 291 if (is_pci_p2pdma_page(sg_page(sg))) 292 ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); 293 else 294 ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); 295 296 if (!ret) 297 return -ENOMEM; 298 sg_cnt = ret; 299 300 /* 301 * Skip to the S/G entry that sg_offset falls into: 302 */ 303 for (;;) { 304 u32 len = sg_dma_len(sg); 305 306 if (sg_offset < len) 307 break; 308 309 sg = sg_next(sg); 310 sg_offset -= len; 311 sg_cnt--; 312 } 313 314 ret = -EIO; 315 if (WARN_ON_ONCE(sg_cnt == 0)) 316 goto out_unmap_sg; 317 318 if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { 319 ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, 320 sg_offset, remote_addr, rkey, dir); 321 } else if (sg_cnt > 1) { 322 ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, 323 remote_addr, rkey, dir); 324 } else { 325 ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, 326 remote_addr, rkey, dir); 327 } 328 329 if (ret < 0) 330 goto out_unmap_sg; 331 return ret; 332 333 out_unmap_sg: 334 ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 335 return ret; 336 } 337 EXPORT_SYMBOL(rdma_rw_ctx_init); 338 339 /** 340 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload 341 * @ctx: context to initialize 342 * @qp: queue pair to operate on 343 * @port_num: port num to which the connection is bound 344 * @sg: scatterlist to READ/WRITE from/to 345 * @sg_cnt: number of entries in @sg 346 * @prot_sg: scatterlist to READ/WRITE protection information from/to 347 * @prot_sg_cnt: number of entries in @prot_sg 348 * @sig_attrs: signature offloading algorithms 349 * @remote_addr:remote address to read/write (relative to @rkey) 350 * @rkey: remote key to operate on 351 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 352 * 353 * Returns the number of WQEs that will be needed on the workqueue if 354 * successful, or a negative error code. 355 */ 356 int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 357 u8 port_num, struct scatterlist *sg, u32 sg_cnt, 358 struct scatterlist *prot_sg, u32 prot_sg_cnt, 359 struct ib_sig_attrs *sig_attrs, 360 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 361 { 362 struct ib_device *dev = qp->pd->device; 363 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 364 struct ib_rdma_wr *rdma_wr; 365 struct ib_send_wr *prev_wr = NULL; 366 int count = 0, ret; 367 368 if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { 369 pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n", 370 sg_cnt, prot_sg_cnt, pages_per_mr); 371 return -EINVAL; 372 } 373 374 ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); 375 if (!ret) 376 return -ENOMEM; 377 sg_cnt = ret; 378 379 ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); 380 if (!ret) { 381 ret = -ENOMEM; 382 goto out_unmap_sg; 383 } 384 prot_sg_cnt = ret; 385 386 ctx->type = RDMA_RW_SIG_MR; 387 ctx->nr_ops = 1; 388 ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); 389 if (!ctx->sig) { 390 ret = -ENOMEM; 391 goto out_unmap_prot_sg; 392 } 393 394 ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); 395 if (ret < 0) 396 goto out_free_ctx; 397 count += ret; 398 prev_wr = &ctx->sig->data.reg_wr.wr; 399 400 ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, 401 prot_sg, prot_sg_cnt, 0); 402 if (ret < 0) 403 goto out_destroy_data_mr; 404 count += ret; 405 406 if (ctx->sig->prot.inv_wr.next) 407 prev_wr->next = &ctx->sig->prot.inv_wr; 408 else 409 prev_wr->next = &ctx->sig->prot.reg_wr.wr; 410 prev_wr = &ctx->sig->prot.reg_wr.wr; 411 412 ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); 413 if (!ctx->sig->sig_mr) { 414 ret = -EAGAIN; 415 goto out_destroy_prot_mr; 416 } 417 418 if (ctx->sig->sig_mr->need_inval) { 419 memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); 420 421 ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; 422 ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; 423 424 prev_wr->next = &ctx->sig->sig_inv_wr; 425 prev_wr = &ctx->sig->sig_inv_wr; 426 } 427 428 ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; 429 ctx->sig->sig_wr.wr.wr_cqe = NULL; 430 ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; 431 ctx->sig->sig_wr.wr.num_sge = 1; 432 ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; 433 ctx->sig->sig_wr.sig_attrs = sig_attrs; 434 ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; 435 if (prot_sg_cnt) 436 ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; 437 prev_wr->next = &ctx->sig->sig_wr.wr; 438 prev_wr = &ctx->sig->sig_wr.wr; 439 count++; 440 441 ctx->sig->sig_sge.addr = 0; 442 ctx->sig->sig_sge.length = ctx->sig->data.sge.length; 443 if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) 444 ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; 445 446 rdma_wr = &ctx->sig->data.wr; 447 rdma_wr->wr.sg_list = &ctx->sig->sig_sge; 448 rdma_wr->wr.num_sge = 1; 449 rdma_wr->remote_addr = remote_addr; 450 rdma_wr->rkey = rkey; 451 if (dir == DMA_TO_DEVICE) 452 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 453 else 454 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 455 prev_wr->next = &rdma_wr->wr; 456 prev_wr = &rdma_wr->wr; 457 count++; 458 459 return count; 460 461 out_destroy_prot_mr: 462 if (prot_sg_cnt) 463 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); 464 out_destroy_data_mr: 465 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); 466 out_free_ctx: 467 kfree(ctx->sig); 468 out_unmap_prot_sg: 469 ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); 470 out_unmap_sg: 471 ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 472 return ret; 473 } 474 EXPORT_SYMBOL(rdma_rw_ctx_signature_init); 475 476 /* 477 * Now that we are going to post the WRs we can update the lkey and need_inval 478 * state on the MRs. If we were doing this at init time, we would get double 479 * or missing invalidations if a context was initialized but not actually 480 * posted. 481 */ 482 static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval) 483 { 484 reg->mr->need_inval = need_inval; 485 ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey)); 486 reg->reg_wr.key = reg->mr->lkey; 487 reg->sge.lkey = reg->mr->lkey; 488 } 489 490 /** 491 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation 492 * @ctx: context to operate on 493 * @qp: queue pair to operate on 494 * @port_num: port num to which the connection is bound 495 * @cqe: completion queue entry for the last WR 496 * @chain_wr: WR to append to the posted chain 497 * 498 * Return the WR chain for the set of RDMA READ/WRITE operations described by 499 * @ctx, as well as any memory registration operations needed. If @chain_wr 500 * is non-NULL the WR it points to will be appended to the chain of WRs posted. 501 * If @chain_wr is not set @cqe must be set so that the caller gets a 502 * completion notification. 503 */ 504 struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 505 u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 506 { 507 struct ib_send_wr *first_wr, *last_wr; 508 int i; 509 510 switch (ctx->type) { 511 case RDMA_RW_SIG_MR: 512 rdma_rw_update_lkey(&ctx->sig->data, true); 513 if (ctx->sig->prot.mr) 514 rdma_rw_update_lkey(&ctx->sig->prot, true); 515 516 ctx->sig->sig_mr->need_inval = true; 517 ib_update_fast_reg_key(ctx->sig->sig_mr, 518 ib_inc_rkey(ctx->sig->sig_mr->lkey)); 519 ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; 520 521 if (ctx->sig->data.inv_wr.next) 522 first_wr = &ctx->sig->data.inv_wr; 523 else 524 first_wr = &ctx->sig->data.reg_wr.wr; 525 last_wr = &ctx->sig->data.wr.wr; 526 break; 527 case RDMA_RW_MR: 528 for (i = 0; i < ctx->nr_ops; i++) { 529 rdma_rw_update_lkey(&ctx->reg[i], 530 ctx->reg[i].wr.wr.opcode != 531 IB_WR_RDMA_READ_WITH_INV); 532 } 533 534 if (ctx->reg[0].inv_wr.next) 535 first_wr = &ctx->reg[0].inv_wr; 536 else 537 first_wr = &ctx->reg[0].reg_wr.wr; 538 last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr; 539 break; 540 case RDMA_RW_MULTI_WR: 541 first_wr = &ctx->map.wrs[0].wr; 542 last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr; 543 break; 544 case RDMA_RW_SINGLE_WR: 545 first_wr = &ctx->single.wr.wr; 546 last_wr = &ctx->single.wr.wr; 547 break; 548 default: 549 BUG(); 550 } 551 552 if (chain_wr) { 553 last_wr->next = chain_wr; 554 } else { 555 last_wr->wr_cqe = cqe; 556 last_wr->send_flags |= IB_SEND_SIGNALED; 557 } 558 559 return first_wr; 560 } 561 EXPORT_SYMBOL(rdma_rw_ctx_wrs); 562 563 /** 564 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation 565 * @ctx: context to operate on 566 * @qp: queue pair to operate on 567 * @port_num: port num to which the connection is bound 568 * @cqe: completion queue entry for the last WR 569 * @chain_wr: WR to append to the posted chain 570 * 571 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as 572 * any memory registration operations needed. If @chain_wr is non-NULL the 573 * WR it points to will be appended to the chain of WRs posted. If @chain_wr 574 * is not set @cqe must be set so that the caller gets a completion 575 * notification. 576 */ 577 int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 578 struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 579 { 580 struct ib_send_wr *first_wr; 581 582 first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); 583 return ib_post_send(qp, first_wr, NULL); 584 } 585 EXPORT_SYMBOL(rdma_rw_ctx_post); 586 587 /** 588 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init 589 * @ctx: context to release 590 * @qp: queue pair to operate on 591 * @port_num: port num to which the connection is bound 592 * @sg: scatterlist that was used for the READ/WRITE 593 * @sg_cnt: number of entries in @sg 594 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 595 */ 596 void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 597 struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) 598 { 599 int i; 600 601 switch (ctx->type) { 602 case RDMA_RW_MR: 603 for (i = 0; i < ctx->nr_ops; i++) 604 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 605 kfree(ctx->reg); 606 break; 607 case RDMA_RW_MULTI_WR: 608 kfree(ctx->map.wrs); 609 kfree(ctx->map.sges); 610 break; 611 case RDMA_RW_SINGLE_WR: 612 break; 613 default: 614 BUG(); 615 break; 616 } 617 618 /* P2PDMA contexts do not need to be unmapped */ 619 if (!is_pci_p2pdma_page(sg_page(sg))) 620 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 621 } 622 EXPORT_SYMBOL(rdma_rw_ctx_destroy); 623 624 /** 625 * rdma_rw_ctx_destroy_signature - release all resources allocated by 626 * rdma_rw_ctx_signature_init 627 * @ctx: context to release 628 * @qp: queue pair to operate on 629 * @port_num: port num to which the connection is bound 630 * @sg: scatterlist that was used for the READ/WRITE 631 * @sg_cnt: number of entries in @sg 632 * @prot_sg: scatterlist that was used for the READ/WRITE of the PI 633 * @prot_sg_cnt: number of entries in @prot_sg 634 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 635 */ 636 void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 637 u8 port_num, struct scatterlist *sg, u32 sg_cnt, 638 struct scatterlist *prot_sg, u32 prot_sg_cnt, 639 enum dma_data_direction dir) 640 { 641 if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) 642 return; 643 644 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); 645 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 646 647 if (ctx->sig->prot.mr) { 648 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); 649 ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); 650 } 651 652 ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); 653 kfree(ctx->sig); 654 } 655 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); 656 657 /** 658 * rdma_rw_mr_factor - return number of MRs required for a payload 659 * @device: device handling the connection 660 * @port_num: port num to which the connection is bound 661 * @maxpages: maximum payload pages per rdma_rw_ctx 662 * 663 * Returns the number of MRs the device requires to move @maxpayload 664 * bytes. The returned value is used during transport creation to 665 * compute max_rdma_ctxts and the size of the transport's Send and 666 * Send Completion Queues. 667 */ 668 unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, 669 unsigned int maxpages) 670 { 671 unsigned int mr_pages; 672 673 if (rdma_rw_can_use_mr(device, port_num)) 674 mr_pages = rdma_rw_fr_page_list_len(device); 675 else 676 mr_pages = device->attrs.max_sge_rd; 677 return DIV_ROUND_UP(maxpages, mr_pages); 678 } 679 EXPORT_SYMBOL(rdma_rw_mr_factor); 680 681 void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) 682 { 683 u32 factor; 684 685 WARN_ON_ONCE(attr->port_num == 0); 686 687 /* 688 * Each context needs at least one RDMA READ or WRITE WR. 689 * 690 * For some hardware we might need more, eventually we should ask the 691 * HCA driver for a multiplier here. 692 */ 693 factor = 1; 694 695 /* 696 * If the devices needs MRs to perform RDMA READ or WRITE operations, 697 * we'll need two additional MRs for the registrations and the 698 * invalidation. 699 */ 700 if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) 701 factor += 6; /* (inv + reg) * (data + prot + sig) */ 702 else if (rdma_rw_can_use_mr(dev, attr->port_num)) 703 factor += 2; /* inv + reg */ 704 705 attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; 706 707 /* 708 * But maybe we were just too high in the sky and the device doesn't 709 * even support all we need, and we'll have to live with what we get.. 710 */ 711 attr->cap.max_send_wr = 712 min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr); 713 } 714 715 int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) 716 { 717 struct ib_device *dev = qp->pd->device; 718 u32 nr_mrs = 0, nr_sig_mrs = 0; 719 int ret = 0; 720 721 if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) { 722 nr_sig_mrs = attr->cap.max_rdma_ctxs; 723 nr_mrs = attr->cap.max_rdma_ctxs * 2; 724 } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { 725 nr_mrs = attr->cap.max_rdma_ctxs; 726 } 727 728 if (nr_mrs) { 729 ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, 730 IB_MR_TYPE_MEM_REG, 731 rdma_rw_fr_page_list_len(dev), 0); 732 if (ret) { 733 pr_err("%s: failed to allocated %d MRs\n", 734 __func__, nr_mrs); 735 return ret; 736 } 737 } 738 739 if (nr_sig_mrs) { 740 ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, 741 IB_MR_TYPE_SIGNATURE, 2, 0); 742 if (ret) { 743 pr_err("%s: failed to allocated %d SIG MRs\n", 744 __func__, nr_sig_mrs); 745 goto out_free_rdma_mrs; 746 } 747 } 748 749 return 0; 750 751 out_free_rdma_mrs: 752 ib_mr_pool_destroy(qp, &qp->rdma_mrs); 753 return ret; 754 } 755 756 void rdma_rw_cleanup_mrs(struct ib_qp *qp) 757 { 758 ib_mr_pool_destroy(qp, &qp->sig_mrs); 759 ib_mr_pool_destroy(qp, &qp->rdma_mrs); 760 } 761