1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2016 HGST, a Western Digital Company. 4 */ 5 #include <linux/moduleparam.h> 6 #include <linux/slab.h> 7 #include <linux/pci-p2pdma.h> 8 #include <rdma/mr_pool.h> 9 #include <rdma/rw.h> 10 11 enum { 12 RDMA_RW_SINGLE_WR, 13 RDMA_RW_MULTI_WR, 14 RDMA_RW_MR, 15 RDMA_RW_SIG_MR, 16 }; 17 18 static bool rdma_rw_force_mr; 19 module_param_named(force_mr, rdma_rw_force_mr, bool, 0); 20 MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); 21 22 /* 23 * Check if the device might use memory registration. This is currently only 24 * true for iWarp devices. In the future we can hopefully fine tune this based 25 * on HCA driver input. 26 */ 27 static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) 28 { 29 if (rdma_protocol_iwarp(dev, port_num)) 30 return true; 31 if (unlikely(rdma_rw_force_mr)) 32 return true; 33 return false; 34 } 35 36 /* 37 * Check if the device will use memory registration for this RW operation. 38 * We currently always use memory registrations for iWarp RDMA READs, and 39 * have a debug option to force usage of MRs. 40 * 41 * XXX: In the future we can hopefully fine tune this based on HCA driver 42 * input. 43 */ 44 static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, 45 enum dma_data_direction dir, int dma_nents) 46 { 47 if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) 48 return true; 49 if (unlikely(rdma_rw_force_mr)) 50 return true; 51 return false; 52 } 53 54 static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) 55 { 56 /* arbitrary limit to avoid allocating gigantic resources */ 57 return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); 58 } 59 60 /* Caller must have zero-initialized *reg. */ 61 static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, 62 struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, 63 u32 sg_cnt, u32 offset) 64 { 65 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 66 u32 nents = min(sg_cnt, pages_per_mr); 67 int count = 0, ret; 68 69 reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs); 70 if (!reg->mr) 71 return -EAGAIN; 72 73 if (reg->mr->need_inval) { 74 reg->inv_wr.opcode = IB_WR_LOCAL_INV; 75 reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; 76 reg->inv_wr.next = ®->reg_wr.wr; 77 count++; 78 } else { 79 reg->inv_wr.next = NULL; 80 } 81 82 ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); 83 if (ret < 0 || ret < nents) { 84 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); 85 return -EINVAL; 86 } 87 88 reg->reg_wr.wr.opcode = IB_WR_REG_MR; 89 reg->reg_wr.mr = reg->mr; 90 reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; 91 if (rdma_protocol_iwarp(qp->device, port_num)) 92 reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; 93 count++; 94 95 reg->sge.addr = reg->mr->iova; 96 reg->sge.length = reg->mr->length; 97 return count; 98 } 99 100 static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 101 u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, 102 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 103 { 104 struct rdma_rw_reg_ctx *prev = NULL; 105 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 106 int i, j, ret = 0, count = 0; 107 108 ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; 109 ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL); 110 if (!ctx->reg) { 111 ret = -ENOMEM; 112 goto out; 113 } 114 115 for (i = 0; i < ctx->nr_ops; i++) { 116 struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; 117 u32 nents = min(sg_cnt, pages_per_mr); 118 119 ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt, 120 offset); 121 if (ret < 0) 122 goto out_free; 123 count += ret; 124 125 if (prev) { 126 if (reg->mr->need_inval) 127 prev->wr.wr.next = ®->inv_wr; 128 else 129 prev->wr.wr.next = ®->reg_wr.wr; 130 } 131 132 reg->reg_wr.wr.next = ®->wr.wr; 133 134 reg->wr.wr.sg_list = ®->sge; 135 reg->wr.wr.num_sge = 1; 136 reg->wr.remote_addr = remote_addr; 137 reg->wr.rkey = rkey; 138 if (dir == DMA_TO_DEVICE) { 139 reg->wr.wr.opcode = IB_WR_RDMA_WRITE; 140 } else if (!rdma_cap_read_inv(qp->device, port_num)) { 141 reg->wr.wr.opcode = IB_WR_RDMA_READ; 142 } else { 143 reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 144 reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey; 145 } 146 count++; 147 148 remote_addr += reg->sge.length; 149 sg_cnt -= nents; 150 for (j = 0; j < nents; j++) 151 sg = sg_next(sg); 152 prev = reg; 153 offset = 0; 154 } 155 156 if (prev) 157 prev->wr.wr.next = NULL; 158 159 ctx->type = RDMA_RW_MR; 160 return count; 161 162 out_free: 163 while (--i >= 0) 164 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 165 kfree(ctx->reg); 166 out: 167 return ret; 168 } 169 170 static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 171 struct scatterlist *sg, u32 sg_cnt, u32 offset, 172 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 173 { 174 u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : 175 qp->max_read_sge; 176 struct ib_sge *sge; 177 u32 total_len = 0, i, j; 178 179 ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge); 180 181 ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL); 182 if (!ctx->map.sges) 183 goto out; 184 185 ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL); 186 if (!ctx->map.wrs) 187 goto out_free_sges; 188 189 for (i = 0; i < ctx->nr_ops; i++) { 190 struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i]; 191 u32 nr_sge = min(sg_cnt, max_sge); 192 193 if (dir == DMA_TO_DEVICE) 194 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 195 else 196 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 197 rdma_wr->remote_addr = remote_addr + total_len; 198 rdma_wr->rkey = rkey; 199 rdma_wr->wr.num_sge = nr_sge; 200 rdma_wr->wr.sg_list = sge; 201 202 for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { 203 sge->addr = sg_dma_address(sg) + offset; 204 sge->length = sg_dma_len(sg) - offset; 205 sge->lkey = qp->pd->local_dma_lkey; 206 207 total_len += sge->length; 208 sge++; 209 sg_cnt--; 210 offset = 0; 211 } 212 213 rdma_wr->wr.next = i + 1 < ctx->nr_ops ? 214 &ctx->map.wrs[i + 1].wr : NULL; 215 } 216 217 ctx->type = RDMA_RW_MULTI_WR; 218 return ctx->nr_ops; 219 220 out_free_sges: 221 kfree(ctx->map.sges); 222 out: 223 return -ENOMEM; 224 } 225 226 static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 227 struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey, 228 enum dma_data_direction dir) 229 { 230 struct ib_rdma_wr *rdma_wr = &ctx->single.wr; 231 232 ctx->nr_ops = 1; 233 234 ctx->single.sge.lkey = qp->pd->local_dma_lkey; 235 ctx->single.sge.addr = sg_dma_address(sg) + offset; 236 ctx->single.sge.length = sg_dma_len(sg) - offset; 237 238 memset(rdma_wr, 0, sizeof(*rdma_wr)); 239 if (dir == DMA_TO_DEVICE) 240 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 241 else 242 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 243 rdma_wr->wr.sg_list = &ctx->single.sge; 244 rdma_wr->wr.num_sge = 1; 245 rdma_wr->remote_addr = remote_addr; 246 rdma_wr->rkey = rkey; 247 248 ctx->type = RDMA_RW_SINGLE_WR; 249 return 1; 250 } 251 252 /** 253 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context 254 * @ctx: context to initialize 255 * @qp: queue pair to operate on 256 * @port_num: port num to which the connection is bound 257 * @sg: scatterlist to READ/WRITE from/to 258 * @sg_cnt: number of entries in @sg 259 * @sg_offset: current byte offset into @sg 260 * @remote_addr:remote address to read/write (relative to @rkey) 261 * @rkey: remote key to operate on 262 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 263 * 264 * Returns the number of WQEs that will be needed on the workqueue if 265 * successful, or a negative error code. 266 */ 267 int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 268 struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, 269 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 270 { 271 struct ib_device *dev = qp->pd->device; 272 int ret; 273 274 if (is_pci_p2pdma_page(sg_page(sg))) 275 ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); 276 else 277 ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); 278 279 if (!ret) 280 return -ENOMEM; 281 sg_cnt = ret; 282 283 /* 284 * Skip to the S/G entry that sg_offset falls into: 285 */ 286 for (;;) { 287 u32 len = sg_dma_len(sg); 288 289 if (sg_offset < len) 290 break; 291 292 sg = sg_next(sg); 293 sg_offset -= len; 294 sg_cnt--; 295 } 296 297 ret = -EIO; 298 if (WARN_ON_ONCE(sg_cnt == 0)) 299 goto out_unmap_sg; 300 301 if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { 302 ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, 303 sg_offset, remote_addr, rkey, dir); 304 } else if (sg_cnt > 1) { 305 ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, 306 remote_addr, rkey, dir); 307 } else { 308 ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, 309 remote_addr, rkey, dir); 310 } 311 312 if (ret < 0) 313 goto out_unmap_sg; 314 return ret; 315 316 out_unmap_sg: 317 ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 318 return ret; 319 } 320 EXPORT_SYMBOL(rdma_rw_ctx_init); 321 322 /** 323 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload 324 * @ctx: context to initialize 325 * @qp: queue pair to operate on 326 * @port_num: port num to which the connection is bound 327 * @sg: scatterlist to READ/WRITE from/to 328 * @sg_cnt: number of entries in @sg 329 * @prot_sg: scatterlist to READ/WRITE protection information from/to 330 * @prot_sg_cnt: number of entries in @prot_sg 331 * @sig_attrs: signature offloading algorithms 332 * @remote_addr:remote address to read/write (relative to @rkey) 333 * @rkey: remote key to operate on 334 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 335 * 336 * Returns the number of WQEs that will be needed on the workqueue if 337 * successful, or a negative error code. 338 */ 339 int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 340 u8 port_num, struct scatterlist *sg, u32 sg_cnt, 341 struct scatterlist *prot_sg, u32 prot_sg_cnt, 342 struct ib_sig_attrs *sig_attrs, 343 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 344 { 345 struct ib_device *dev = qp->pd->device; 346 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 347 struct ib_rdma_wr *rdma_wr; 348 struct ib_send_wr *prev_wr = NULL; 349 int count = 0, ret; 350 351 if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { 352 pr_err("SG count too large\n"); 353 return -EINVAL; 354 } 355 356 ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); 357 if (!ret) 358 return -ENOMEM; 359 sg_cnt = ret; 360 361 ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); 362 if (!ret) { 363 ret = -ENOMEM; 364 goto out_unmap_sg; 365 } 366 prot_sg_cnt = ret; 367 368 ctx->type = RDMA_RW_SIG_MR; 369 ctx->nr_ops = 1; 370 ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); 371 if (!ctx->sig) { 372 ret = -ENOMEM; 373 goto out_unmap_prot_sg; 374 } 375 376 ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); 377 if (ret < 0) 378 goto out_free_ctx; 379 count += ret; 380 prev_wr = &ctx->sig->data.reg_wr.wr; 381 382 ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, 383 prot_sg, prot_sg_cnt, 0); 384 if (ret < 0) 385 goto out_destroy_data_mr; 386 count += ret; 387 388 if (ctx->sig->prot.inv_wr.next) 389 prev_wr->next = &ctx->sig->prot.inv_wr; 390 else 391 prev_wr->next = &ctx->sig->prot.reg_wr.wr; 392 prev_wr = &ctx->sig->prot.reg_wr.wr; 393 394 ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); 395 if (!ctx->sig->sig_mr) { 396 ret = -EAGAIN; 397 goto out_destroy_prot_mr; 398 } 399 400 if (ctx->sig->sig_mr->need_inval) { 401 memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); 402 403 ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; 404 ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; 405 406 prev_wr->next = &ctx->sig->sig_inv_wr; 407 prev_wr = &ctx->sig->sig_inv_wr; 408 } 409 410 ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; 411 ctx->sig->sig_wr.wr.wr_cqe = NULL; 412 ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; 413 ctx->sig->sig_wr.wr.num_sge = 1; 414 ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; 415 ctx->sig->sig_wr.sig_attrs = sig_attrs; 416 ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; 417 if (prot_sg_cnt) 418 ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; 419 prev_wr->next = &ctx->sig->sig_wr.wr; 420 prev_wr = &ctx->sig->sig_wr.wr; 421 count++; 422 423 ctx->sig->sig_sge.addr = 0; 424 ctx->sig->sig_sge.length = ctx->sig->data.sge.length; 425 if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) 426 ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; 427 428 rdma_wr = &ctx->sig->data.wr; 429 rdma_wr->wr.sg_list = &ctx->sig->sig_sge; 430 rdma_wr->wr.num_sge = 1; 431 rdma_wr->remote_addr = remote_addr; 432 rdma_wr->rkey = rkey; 433 if (dir == DMA_TO_DEVICE) 434 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 435 else 436 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 437 prev_wr->next = &rdma_wr->wr; 438 prev_wr = &rdma_wr->wr; 439 count++; 440 441 return count; 442 443 out_destroy_prot_mr: 444 if (prot_sg_cnt) 445 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); 446 out_destroy_data_mr: 447 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); 448 out_free_ctx: 449 kfree(ctx->sig); 450 out_unmap_prot_sg: 451 ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); 452 out_unmap_sg: 453 ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 454 return ret; 455 } 456 EXPORT_SYMBOL(rdma_rw_ctx_signature_init); 457 458 /* 459 * Now that we are going to post the WRs we can update the lkey and need_inval 460 * state on the MRs. If we were doing this at init time, we would get double 461 * or missing invalidations if a context was initialized but not actually 462 * posted. 463 */ 464 static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval) 465 { 466 reg->mr->need_inval = need_inval; 467 ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey)); 468 reg->reg_wr.key = reg->mr->lkey; 469 reg->sge.lkey = reg->mr->lkey; 470 } 471 472 /** 473 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation 474 * @ctx: context to operate on 475 * @qp: queue pair to operate on 476 * @port_num: port num to which the connection is bound 477 * @cqe: completion queue entry for the last WR 478 * @chain_wr: WR to append to the posted chain 479 * 480 * Return the WR chain for the set of RDMA READ/WRITE operations described by 481 * @ctx, as well as any memory registration operations needed. If @chain_wr 482 * is non-NULL the WR it points to will be appended to the chain of WRs posted. 483 * If @chain_wr is not set @cqe must be set so that the caller gets a 484 * completion notification. 485 */ 486 struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 487 u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 488 { 489 struct ib_send_wr *first_wr, *last_wr; 490 int i; 491 492 switch (ctx->type) { 493 case RDMA_RW_SIG_MR: 494 rdma_rw_update_lkey(&ctx->sig->data, true); 495 if (ctx->sig->prot.mr) 496 rdma_rw_update_lkey(&ctx->sig->prot, true); 497 498 ctx->sig->sig_mr->need_inval = true; 499 ib_update_fast_reg_key(ctx->sig->sig_mr, 500 ib_inc_rkey(ctx->sig->sig_mr->lkey)); 501 ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; 502 503 if (ctx->sig->data.inv_wr.next) 504 first_wr = &ctx->sig->data.inv_wr; 505 else 506 first_wr = &ctx->sig->data.reg_wr.wr; 507 last_wr = &ctx->sig->data.wr.wr; 508 break; 509 case RDMA_RW_MR: 510 for (i = 0; i < ctx->nr_ops; i++) { 511 rdma_rw_update_lkey(&ctx->reg[i], 512 ctx->reg[i].wr.wr.opcode != 513 IB_WR_RDMA_READ_WITH_INV); 514 } 515 516 if (ctx->reg[0].inv_wr.next) 517 first_wr = &ctx->reg[0].inv_wr; 518 else 519 first_wr = &ctx->reg[0].reg_wr.wr; 520 last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr; 521 break; 522 case RDMA_RW_MULTI_WR: 523 first_wr = &ctx->map.wrs[0].wr; 524 last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr; 525 break; 526 case RDMA_RW_SINGLE_WR: 527 first_wr = &ctx->single.wr.wr; 528 last_wr = &ctx->single.wr.wr; 529 break; 530 default: 531 BUG(); 532 } 533 534 if (chain_wr) { 535 last_wr->next = chain_wr; 536 } else { 537 last_wr->wr_cqe = cqe; 538 last_wr->send_flags |= IB_SEND_SIGNALED; 539 } 540 541 return first_wr; 542 } 543 EXPORT_SYMBOL(rdma_rw_ctx_wrs); 544 545 /** 546 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation 547 * @ctx: context to operate on 548 * @qp: queue pair to operate on 549 * @port_num: port num to which the connection is bound 550 * @cqe: completion queue entry for the last WR 551 * @chain_wr: WR to append to the posted chain 552 * 553 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as 554 * any memory registration operations needed. If @chain_wr is non-NULL the 555 * WR it points to will be appended to the chain of WRs posted. If @chain_wr 556 * is not set @cqe must be set so that the caller gets a completion 557 * notification. 558 */ 559 int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 560 struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 561 { 562 struct ib_send_wr *first_wr; 563 564 first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); 565 return ib_post_send(qp, first_wr, NULL); 566 } 567 EXPORT_SYMBOL(rdma_rw_ctx_post); 568 569 /** 570 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init 571 * @ctx: context to release 572 * @qp: queue pair to operate on 573 * @port_num: port num to which the connection is bound 574 * @sg: scatterlist that was used for the READ/WRITE 575 * @sg_cnt: number of entries in @sg 576 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 577 */ 578 void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 579 struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) 580 { 581 int i; 582 583 switch (ctx->type) { 584 case RDMA_RW_MR: 585 for (i = 0; i < ctx->nr_ops; i++) 586 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 587 kfree(ctx->reg); 588 break; 589 case RDMA_RW_MULTI_WR: 590 kfree(ctx->map.wrs); 591 kfree(ctx->map.sges); 592 break; 593 case RDMA_RW_SINGLE_WR: 594 break; 595 default: 596 BUG(); 597 break; 598 } 599 600 /* P2PDMA contexts do not need to be unmapped */ 601 if (!is_pci_p2pdma_page(sg_page(sg))) 602 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 603 } 604 EXPORT_SYMBOL(rdma_rw_ctx_destroy); 605 606 /** 607 * rdma_rw_ctx_destroy_signature - release all resources allocated by 608 * rdma_rw_ctx_init_signature 609 * @ctx: context to release 610 * @qp: queue pair to operate on 611 * @port_num: port num to which the connection is bound 612 * @sg: scatterlist that was used for the READ/WRITE 613 * @sg_cnt: number of entries in @sg 614 * @prot_sg: scatterlist that was used for the READ/WRITE of the PI 615 * @prot_sg_cnt: number of entries in @prot_sg 616 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 617 */ 618 void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 619 u8 port_num, struct scatterlist *sg, u32 sg_cnt, 620 struct scatterlist *prot_sg, u32 prot_sg_cnt, 621 enum dma_data_direction dir) 622 { 623 if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) 624 return; 625 626 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); 627 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 628 629 if (ctx->sig->prot.mr) { 630 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); 631 ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); 632 } 633 634 ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); 635 kfree(ctx->sig); 636 } 637 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); 638 639 /** 640 * rdma_rw_mr_factor - return number of MRs required for a payload 641 * @device: device handling the connection 642 * @port_num: port num to which the connection is bound 643 * @maxpages: maximum payload pages per rdma_rw_ctx 644 * 645 * Returns the number of MRs the device requires to move @maxpayload 646 * bytes. The returned value is used during transport creation to 647 * compute max_rdma_ctxts and the size of the transport's Send and 648 * Send Completion Queues. 649 */ 650 unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, 651 unsigned int maxpages) 652 { 653 unsigned int mr_pages; 654 655 if (rdma_rw_can_use_mr(device, port_num)) 656 mr_pages = rdma_rw_fr_page_list_len(device); 657 else 658 mr_pages = device->attrs.max_sge_rd; 659 return DIV_ROUND_UP(maxpages, mr_pages); 660 } 661 EXPORT_SYMBOL(rdma_rw_mr_factor); 662 663 void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) 664 { 665 u32 factor; 666 667 WARN_ON_ONCE(attr->port_num == 0); 668 669 /* 670 * Each context needs at least one RDMA READ or WRITE WR. 671 * 672 * For some hardware we might need more, eventually we should ask the 673 * HCA driver for a multiplier here. 674 */ 675 factor = 1; 676 677 /* 678 * If the devices needs MRs to perform RDMA READ or WRITE operations, 679 * we'll need two additional MRs for the registrations and the 680 * invalidation. 681 */ 682 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) 683 factor += 6; /* (inv + reg) * (data + prot + sig) */ 684 else if (rdma_rw_can_use_mr(dev, attr->port_num)) 685 factor += 2; /* inv + reg */ 686 687 attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; 688 689 /* 690 * But maybe we were just too high in the sky and the device doesn't 691 * even support all we need, and we'll have to live with what we get.. 692 */ 693 attr->cap.max_send_wr = 694 min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr); 695 } 696 697 int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) 698 { 699 struct ib_device *dev = qp->pd->device; 700 u32 nr_mrs = 0, nr_sig_mrs = 0; 701 int ret = 0; 702 703 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) { 704 nr_sig_mrs = attr->cap.max_rdma_ctxs; 705 nr_mrs = attr->cap.max_rdma_ctxs * 2; 706 } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { 707 nr_mrs = attr->cap.max_rdma_ctxs; 708 } 709 710 if (nr_mrs) { 711 ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, 712 IB_MR_TYPE_MEM_REG, 713 rdma_rw_fr_page_list_len(dev)); 714 if (ret) { 715 pr_err("%s: failed to allocated %d MRs\n", 716 __func__, nr_mrs); 717 return ret; 718 } 719 } 720 721 if (nr_sig_mrs) { 722 ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, 723 IB_MR_TYPE_SIGNATURE, 2); 724 if (ret) { 725 pr_err("%s: failed to allocated %d SIG MRs\n", 726 __func__, nr_mrs); 727 goto out_free_rdma_mrs; 728 } 729 } 730 731 return 0; 732 733 out_free_rdma_mrs: 734 ib_mr_pool_destroy(qp, &qp->rdma_mrs); 735 return ret; 736 } 737 738 void rdma_rw_cleanup_mrs(struct ib_qp *qp) 739 { 740 ib_mr_pool_destroy(qp, &qp->sig_mrs); 741 ib_mr_pool_destroy(qp, &qp->rdma_mrs); 742 } 743