1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ 3 4 #include <rdma/ib_umem_odp.h> 5 #include "mlx5_ib.h" 6 #include "umr.h" 7 #include "wr.h" 8 9 /* 10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't 11 * work on kernel modules memory 12 */ 13 void *xlt_emergency_page; 14 static DEFINE_MUTEX(xlt_emergency_page_mutex); 15 16 static __be64 get_umr_enable_mr_mask(void) 17 { 18 u64 result; 19 20 result = MLX5_MKEY_MASK_KEY | 21 MLX5_MKEY_MASK_FREE; 22 23 return cpu_to_be64(result); 24 } 25 26 static __be64 get_umr_disable_mr_mask(void) 27 { 28 u64 result; 29 30 result = MLX5_MKEY_MASK_FREE; 31 32 return cpu_to_be64(result); 33 } 34 35 static __be64 get_umr_update_translation_mask(void) 36 { 37 u64 result; 38 39 result = MLX5_MKEY_MASK_LEN | 40 MLX5_MKEY_MASK_PAGE_SIZE | 41 MLX5_MKEY_MASK_START_ADDR; 42 43 return cpu_to_be64(result); 44 } 45 46 static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev) 47 { 48 u64 result; 49 50 result = MLX5_MKEY_MASK_LR | 51 MLX5_MKEY_MASK_LW | 52 MLX5_MKEY_MASK_RR | 53 MLX5_MKEY_MASK_RW; 54 55 if (MLX5_CAP_GEN(dev->mdev, atomic)) 56 result |= MLX5_MKEY_MASK_A; 57 58 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 59 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; 60 61 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 62 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; 63 64 return cpu_to_be64(result); 65 } 66 67 static __be64 get_umr_update_pd_mask(void) 68 { 69 u64 result; 70 71 result = MLX5_MKEY_MASK_PD; 72 73 return cpu_to_be64(result); 74 } 75 76 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) 77 { 78 if (mask & MLX5_MKEY_MASK_PAGE_SIZE && 79 MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 80 return -EPERM; 81 82 if (mask & MLX5_MKEY_MASK_A && 83 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 84 return -EPERM; 85 86 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && 87 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 88 return -EPERM; 89 90 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && 91 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 92 return -EPERM; 93 94 return 0; 95 } 96 97 enum { 98 MAX_UMR_WR = 128, 99 }; 100 101 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) 102 { 103 struct ib_qp_attr attr = {}; 104 int ret; 105 106 attr.qp_state = IB_QPS_INIT; 107 attr.port_num = 1; 108 ret = ib_modify_qp(qp, &attr, 109 IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); 110 if (ret) { 111 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 112 return ret; 113 } 114 115 memset(&attr, 0, sizeof(attr)); 116 attr.qp_state = IB_QPS_RTR; 117 118 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 119 if (ret) { 120 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 121 return ret; 122 } 123 124 memset(&attr, 0, sizeof(attr)); 125 attr.qp_state = IB_QPS_RTS; 126 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 127 if (ret) { 128 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 129 return ret; 130 } 131 132 return 0; 133 } 134 135 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) 136 { 137 struct ib_qp_init_attr init_attr = {}; 138 struct ib_pd *pd; 139 struct ib_cq *cq; 140 struct ib_qp *qp; 141 int ret; 142 143 pd = ib_alloc_pd(&dev->ib_dev, 0); 144 if (IS_ERR(pd)) { 145 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 146 return PTR_ERR(pd); 147 } 148 149 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 150 if (IS_ERR(cq)) { 151 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 152 ret = PTR_ERR(cq); 153 goto destroy_pd; 154 } 155 156 init_attr.send_cq = cq; 157 init_attr.recv_cq = cq; 158 init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; 159 init_attr.cap.max_send_wr = MAX_UMR_WR; 160 init_attr.cap.max_send_sge = 1; 161 init_attr.qp_type = MLX5_IB_QPT_REG_UMR; 162 init_attr.port_num = 1; 163 qp = ib_create_qp(pd, &init_attr); 164 if (IS_ERR(qp)) { 165 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 166 ret = PTR_ERR(qp); 167 goto destroy_cq; 168 } 169 170 ret = mlx5r_umr_qp_rst2rts(dev, qp); 171 if (ret) 172 goto destroy_qp; 173 174 dev->umrc.qp = qp; 175 dev->umrc.cq = cq; 176 dev->umrc.pd = pd; 177 178 sema_init(&dev->umrc.sem, MAX_UMR_WR); 179 mutex_init(&dev->umrc.lock); 180 181 return 0; 182 183 destroy_qp: 184 ib_destroy_qp(qp); 185 destroy_cq: 186 ib_free_cq(cq); 187 destroy_pd: 188 ib_dealloc_pd(pd); 189 return ret; 190 } 191 192 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) 193 { 194 ib_destroy_qp(dev->umrc.qp); 195 ib_free_cq(dev->umrc.cq); 196 ib_dealloc_pd(dev->umrc.pd); 197 } 198 199 static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) 200 { 201 struct umr_common *umrc = &dev->umrc; 202 struct ib_qp_attr attr; 203 int err; 204 205 attr.qp_state = IB_QPS_RESET; 206 err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); 207 if (err) { 208 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 209 goto err; 210 } 211 212 err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); 213 if (err) 214 goto err; 215 216 umrc->state = MLX5_UMR_STATE_ACTIVE; 217 return 0; 218 219 err: 220 umrc->state = MLX5_UMR_STATE_ERR; 221 return err; 222 } 223 224 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, 225 struct mlx5r_umr_wqe *wqe, bool with_data) 226 { 227 unsigned int wqe_size = 228 with_data ? sizeof(struct mlx5r_umr_wqe) : 229 sizeof(struct mlx5r_umr_wqe) - 230 sizeof(struct mlx5_wqe_data_seg); 231 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 232 struct mlx5_core_dev *mdev = dev->mdev; 233 struct mlx5_ib_qp *qp = to_mqp(ibqp); 234 struct mlx5_wqe_ctrl_seg *ctrl; 235 union { 236 struct ib_cqe *ib_cqe; 237 u64 wr_id; 238 } id; 239 void *cur_edge, *seg; 240 unsigned long flags; 241 unsigned int idx; 242 int size, err; 243 244 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 245 return -EIO; 246 247 spin_lock_irqsave(&qp->sq.lock, flags); 248 249 err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0, 250 cpu_to_be32(mkey), false, false); 251 if (WARN_ON(err)) 252 goto out; 253 254 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 255 256 mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size); 257 258 id.ib_cqe = cqe; 259 mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, 260 MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR); 261 262 mlx5r_ring_db(qp, 1, ctrl); 263 264 out: 265 spin_unlock_irqrestore(&qp->sq.lock, flags); 266 267 return err; 268 } 269 270 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) 271 { 272 struct mlx5_ib_umr_context *context = 273 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 274 275 context->status = wc->status; 276 complete(&context->done); 277 } 278 279 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context) 280 { 281 context->cqe.done = mlx5r_umr_done; 282 init_completion(&context->done); 283 } 284 285 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, 286 struct mlx5r_umr_wqe *wqe, bool with_data) 287 { 288 struct umr_common *umrc = &dev->umrc; 289 struct mlx5r_umr_context umr_context; 290 int err; 291 292 err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask)); 293 if (WARN_ON(err)) 294 return err; 295 296 mlx5r_umr_init_context(&umr_context); 297 298 down(&umrc->sem); 299 while (true) { 300 mutex_lock(&umrc->lock); 301 if (umrc->state == MLX5_UMR_STATE_ERR) { 302 mutex_unlock(&umrc->lock); 303 err = -EFAULT; 304 break; 305 } 306 307 if (umrc->state == MLX5_UMR_STATE_RECOVER) { 308 mutex_unlock(&umrc->lock); 309 usleep_range(3000, 5000); 310 continue; 311 } 312 313 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, 314 with_data); 315 mutex_unlock(&umrc->lock); 316 if (err) { 317 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", 318 err); 319 break; 320 } 321 322 wait_for_completion(&umr_context.done); 323 324 if (umr_context.status == IB_WC_SUCCESS) 325 break; 326 327 if (umr_context.status == IB_WC_WR_FLUSH_ERR) 328 continue; 329 330 WARN_ON_ONCE(1); 331 mlx5_ib_warn(dev, 332 "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n", 333 umr_context.status); 334 mutex_lock(&umrc->lock); 335 err = mlx5r_umr_recover(dev); 336 mutex_unlock(&umrc->lock); 337 if (err) 338 mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", 339 err); 340 err = -EFAULT; 341 break; 342 } 343 up(&umrc->sem); 344 return err; 345 } 346 347 /** 348 * mlx5r_umr_revoke_mr - Fence all DMA on the MR 349 * @mr: The MR to fence 350 * 351 * Upon return the NIC will not be doing any DMA to the pages under the MR, 352 * and any DMA in progress will be completed. Failure of this function 353 * indicates the HW has failed catastrophically. 354 */ 355 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr) 356 { 357 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 358 struct mlx5r_umr_wqe wqe = {}; 359 360 if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 361 return 0; 362 363 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 364 wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask(); 365 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; 366 367 MLX5_SET(mkc, &wqe.mkey_seg, free, 1); 368 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn); 369 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); 370 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, 371 mlx5_mkey_variant(mr->mmkey.key)); 372 373 return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); 374 } 375 376 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, 377 struct mlx5_mkey_seg *seg, 378 unsigned int access_flags) 379 { 380 MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 381 MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 382 MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 383 MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 384 MLX5_SET(mkc, seg, lr, 1); 385 MLX5_SET(mkc, seg, relaxed_ordering_write, 386 !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 387 MLX5_SET(mkc, seg, relaxed_ordering_read, 388 !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 389 } 390 391 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, 392 int access_flags) 393 { 394 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 395 struct mlx5r_umr_wqe wqe = {}; 396 int err; 397 398 wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev); 399 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 400 wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE; 401 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; 402 403 mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags); 404 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn); 405 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); 406 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, 407 mlx5_mkey_variant(mr->mmkey.key)); 408 409 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); 410 if (err) 411 return err; 412 413 mr->access_flags = access_flags; 414 return 0; 415 } 416 417 #define MLX5_MAX_UMR_CHUNK \ 418 ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT) 419 #define MLX5_SPARE_UMR_CHUNK 0x10000 420 421 /* 422 * Allocate a temporary buffer to hold the per-page information to transfer to 423 * HW. For efficiency this should be as large as it can be, but buffer 424 * allocation failure is not allowed, so try smaller sizes. 425 */ 426 static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) 427 { 428 const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size; 429 size_t size; 430 void *res = NULL; 431 432 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); 433 434 /* 435 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the 436 * allocation can't trigger any kind of reclaim. 437 */ 438 might_sleep(); 439 440 gfp_mask |= __GFP_ZERO | __GFP_NORETRY; 441 442 /* 443 * If the system already has a suitable high order page then just use 444 * that, but don't try hard to create one. This max is about 1M, so a 445 * free x86 huge page will satisfy it. 446 */ 447 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), 448 MLX5_MAX_UMR_CHUNK); 449 *nents = size / ent_size; 450 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 451 get_order(size)); 452 if (res) 453 return res; 454 455 if (size > MLX5_SPARE_UMR_CHUNK) { 456 size = MLX5_SPARE_UMR_CHUNK; 457 *nents = size / ent_size; 458 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 459 get_order(size)); 460 if (res) 461 return res; 462 } 463 464 *nents = PAGE_SIZE / ent_size; 465 res = (void *)__get_free_page(gfp_mask); 466 if (res) 467 return res; 468 469 mutex_lock(&xlt_emergency_page_mutex); 470 memset(xlt_emergency_page, 0, PAGE_SIZE); 471 return xlt_emergency_page; 472 } 473 474 static void mlx5r_umr_free_xlt(void *xlt, size_t length) 475 { 476 if (xlt == xlt_emergency_page) { 477 mutex_unlock(&xlt_emergency_page_mutex); 478 return; 479 } 480 481 free_pages((unsigned long)xlt, get_order(length)); 482 } 483 484 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, 485 struct ib_sge *sg) 486 { 487 struct device *ddev = &dev->mdev->pdev->dev; 488 489 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); 490 mlx5r_umr_free_xlt(xlt, sg->length); 491 } 492 493 /* 494 * Create an XLT buffer ready for submission. 495 */ 496 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg, 497 size_t nents, size_t ent_size, 498 unsigned int flags) 499 { 500 struct device *ddev = &dev->mdev->pdev->dev; 501 dma_addr_t dma; 502 void *xlt; 503 504 xlt = mlx5r_umr_alloc_xlt(&nents, ent_size, 505 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : 506 GFP_KERNEL); 507 sg->length = nents * ent_size; 508 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); 509 if (dma_mapping_error(ddev, dma)) { 510 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 511 mlx5r_umr_free_xlt(xlt, sg->length); 512 return NULL; 513 } 514 sg->addr = dma; 515 sg->lkey = dev->umrc.pd->local_dma_lkey; 516 517 return xlt; 518 } 519 520 static void 521 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, 522 unsigned int flags, struct ib_sge *sg) 523 { 524 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 525 /* fail if free */ 526 ctrl_seg->flags = MLX5_UMR_CHECK_FREE; 527 else 528 /* fail if not free */ 529 ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE; 530 ctrl_seg->xlt_octowords = 531 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); 532 } 533 534 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev, 535 struct mlx5_mkey_seg *mkey_seg, 536 struct mlx5_ib_mr *mr, 537 unsigned int page_shift) 538 { 539 mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags); 540 MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn); 541 MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova); 542 MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length); 543 MLX5_SET(mkc, mkey_seg, log_page_size, page_shift); 544 MLX5_SET(mkc, mkey_seg, qpn, 0xffffff); 545 MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); 546 } 547 548 static void 549 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg, 550 struct ib_sge *sg) 551 { 552 data_seg->byte_count = cpu_to_be32(sg->length); 553 data_seg->lkey = cpu_to_be32(sg->lkey); 554 data_seg->addr = cpu_to_be64(sg->addr); 555 } 556 557 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, 558 u64 offset) 559 { 560 u64 octo_offset = mlx5r_umr_get_xlt_octo(offset); 561 562 ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff); 563 ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16); 564 ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 565 } 566 567 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, 568 struct mlx5r_umr_wqe *wqe, 569 struct mlx5_ib_mr *mr, struct ib_sge *sg, 570 unsigned int flags) 571 { 572 bool update_pd_access, update_translation; 573 574 if (flags & MLX5_IB_UPD_XLT_ENABLE) 575 wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask(); 576 577 update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE || 578 flags & MLX5_IB_UPD_XLT_PD || 579 flags & MLX5_IB_UPD_XLT_ACCESS; 580 581 if (update_pd_access) { 582 wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev); 583 wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 584 } 585 586 update_translation = 587 flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; 588 589 if (update_translation) { 590 wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); 591 if (!mr->ibmr.length) 592 MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); 593 } 594 595 wqe->ctrl_seg.xlt_octowords = 596 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); 597 wqe->data_seg.byte_count = cpu_to_be32(sg->length); 598 } 599 600 /* 601 * Send the DMA list to the HW for a normal MR using UMR. 602 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP 603 * flag may be used. 604 */ 605 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) 606 { 607 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 608 struct device *ddev = &dev->mdev->pdev->dev; 609 struct mlx5r_umr_wqe wqe = {}; 610 struct ib_block_iter biter; 611 struct mlx5_mtt *cur_mtt; 612 size_t orig_sg_length; 613 struct mlx5_mtt *mtt; 614 size_t final_size; 615 struct ib_sge sg; 616 u64 offset = 0; 617 int err = 0; 618 619 if (WARN_ON(mr->umem->is_odp)) 620 return -EINVAL; 621 622 mtt = mlx5r_umr_create_xlt( 623 dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift), 624 sizeof(*mtt), flags); 625 if (!mtt) 626 return -ENOMEM; 627 628 orig_sg_length = sg.length; 629 630 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); 631 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, 632 mr->page_shift); 633 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); 634 635 cur_mtt = mtt; 636 rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter, 637 mr->umem->sgt_append.sgt.nents, 638 BIT(mr->page_shift)) { 639 if (cur_mtt == (void *)mtt + sg.length) { 640 dma_sync_single_for_device(ddev, sg.addr, sg.length, 641 DMA_TO_DEVICE); 642 643 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, 644 true); 645 if (err) 646 goto err; 647 dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 648 DMA_TO_DEVICE); 649 offset += sg.length; 650 mlx5r_umr_update_offset(&wqe.ctrl_seg, offset); 651 652 cur_mtt = mtt; 653 } 654 655 cur_mtt->ptag = 656 cpu_to_be64(rdma_block_iter_dma_address(&biter) | 657 MLX5_IB_MTT_PRESENT); 658 659 if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) 660 cur_mtt->ptag = 0; 661 662 cur_mtt++; 663 } 664 665 final_size = (void *)cur_mtt - (void *)mtt; 666 sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); 667 memset(cur_mtt, 0, sg.length - final_size); 668 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); 669 670 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); 671 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); 672 673 err: 674 sg.length = orig_sg_length; 675 mlx5r_umr_unmap_free_xlt(dev, mtt, &sg); 676 return err; 677 } 678 679 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 680 { 681 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 682 } 683 684 int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 685 int page_shift, int flags) 686 { 687 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 688 ? sizeof(struct mlx5_klm) 689 : sizeof(struct mlx5_mtt); 690 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 691 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 692 struct device *ddev = &dev->mdev->pdev->dev; 693 const int page_mask = page_align - 1; 694 struct mlx5r_umr_wqe wqe = {}; 695 size_t pages_mapped = 0; 696 size_t pages_to_map = 0; 697 size_t size_to_map = 0; 698 size_t orig_sg_length; 699 size_t pages_iter; 700 struct ib_sge sg; 701 int err = 0; 702 void *xlt; 703 704 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 705 !umr_can_use_indirect_mkey(dev)) 706 return -EPERM; 707 708 if (WARN_ON(!mr->umem->is_odp)) 709 return -EINVAL; 710 711 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 712 * so we need to align the offset and length accordingly 713 */ 714 if (idx & page_mask) { 715 npages += idx & page_mask; 716 idx &= ~page_mask; 717 } 718 pages_to_map = ALIGN(npages, page_align); 719 720 xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags); 721 if (!xlt) 722 return -ENOMEM; 723 724 pages_iter = sg.length / desc_size; 725 orig_sg_length = sg.length; 726 727 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 728 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 729 size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 730 731 pages_to_map = min_t(size_t, pages_to_map, max_pages); 732 } 733 734 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); 735 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift); 736 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); 737 738 for (pages_mapped = 0; 739 pages_mapped < pages_to_map && !err; 740 pages_mapped += pages_iter, idx += pages_iter) { 741 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 742 size_to_map = npages * desc_size; 743 dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 744 DMA_TO_DEVICE); 745 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 746 dma_sync_single_for_device(ddev, sg.addr, sg.length, 747 DMA_TO_DEVICE); 748 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 749 750 if (pages_mapped + pages_iter >= pages_to_map) 751 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); 752 mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size); 753 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); 754 } 755 sg.length = orig_sg_length; 756 mlx5r_umr_unmap_free_xlt(dev, xlt, &sg); 757 return err; 758 } 759