1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 switch (mr->type) { 28 case RXE_MR_TYPE_DMA: 29 return 0; 30 31 case RXE_MR_TYPE_MR: 32 if (iova < mr->iova || length > mr->length || 33 iova > mr->iova + mr->length - length) 34 return -EFAULT; 35 return 0; 36 37 default: 38 return -EFAULT; 39 } 40 } 41 42 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 43 | IB_ACCESS_REMOTE_WRITE \ 44 | IB_ACCESS_REMOTE_ATOMIC) 45 46 static void rxe_mr_init(int access, struct rxe_mr *mr) 47 { 48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); 49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 50 51 mr->ibmr.lkey = lkey; 52 mr->ibmr.rkey = rkey; 53 mr->state = RXE_MR_STATE_INVALID; 54 mr->type = RXE_MR_TYPE_NONE; 55 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 56 } 57 58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 59 { 60 int i; 61 int num_map; 62 struct rxe_map **map = mr->map; 63 64 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 65 66 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 67 if (!mr->map) 68 goto err1; 69 70 for (i = 0; i < num_map; i++) { 71 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 72 if (!mr->map[i]) 73 goto err2; 74 } 75 76 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 77 78 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 79 mr->map_mask = RXE_BUF_PER_MAP - 1; 80 81 mr->num_buf = num_buf; 82 mr->num_map = num_map; 83 mr->max_buf = num_map * RXE_BUF_PER_MAP; 84 85 return 0; 86 87 err2: 88 for (i--; i >= 0; i--) 89 kfree(mr->map[i]); 90 91 kfree(mr->map); 92 err1: 93 return -ENOMEM; 94 } 95 96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) 97 { 98 rxe_mr_init(access, mr); 99 100 mr->ibmr.pd = &pd->ibpd; 101 mr->access = access; 102 mr->state = RXE_MR_STATE_VALID; 103 mr->type = RXE_MR_TYPE_DMA; 104 } 105 106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 107 int access, struct rxe_mr *mr) 108 { 109 struct rxe_map **map; 110 struct rxe_phys_buf *buf = NULL; 111 struct ib_umem *umem; 112 struct sg_page_iter sg_iter; 113 int num_buf; 114 void *vaddr; 115 int err; 116 int i; 117 118 umem = ib_umem_get(pd->ibpd.device, start, length, access); 119 if (IS_ERR(umem)) { 120 pr_warn("%s: Unable to pin memory region err = %d\n", 121 __func__, (int)PTR_ERR(umem)); 122 err = PTR_ERR(umem); 123 goto err_out; 124 } 125 126 num_buf = ib_umem_num_pages(umem); 127 128 rxe_mr_init(access, mr); 129 130 err = rxe_mr_alloc(mr, num_buf); 131 if (err) { 132 pr_warn("%s: Unable to allocate memory for map\n", 133 __func__); 134 goto err_release_umem; 135 } 136 137 mr->page_shift = PAGE_SHIFT; 138 mr->page_mask = PAGE_SIZE - 1; 139 140 num_buf = 0; 141 map = mr->map; 142 if (length > 0) { 143 buf = map[0]->buf; 144 145 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { 146 if (num_buf >= RXE_BUF_PER_MAP) { 147 map++; 148 buf = map[0]->buf; 149 num_buf = 0; 150 } 151 152 vaddr = page_address(sg_page_iter_page(&sg_iter)); 153 if (!vaddr) { 154 pr_warn("%s: Unable to get virtual address\n", 155 __func__); 156 err = -ENOMEM; 157 goto err_cleanup_map; 158 } 159 160 buf->addr = (uintptr_t)vaddr; 161 buf->size = PAGE_SIZE; 162 num_buf++; 163 buf++; 164 165 } 166 } 167 168 mr->ibmr.pd = &pd->ibpd; 169 mr->umem = umem; 170 mr->access = access; 171 mr->length = length; 172 mr->iova = iova; 173 mr->va = start; 174 mr->offset = ib_umem_offset(umem); 175 mr->state = RXE_MR_STATE_VALID; 176 mr->type = RXE_MR_TYPE_MR; 177 178 return 0; 179 180 err_cleanup_map: 181 for (i = 0; i < mr->num_map; i++) 182 kfree(mr->map[i]); 183 kfree(mr->map); 184 err_release_umem: 185 ib_umem_release(umem); 186 err_out: 187 return err; 188 } 189 190 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) 191 { 192 int err; 193 194 rxe_mr_init(0, mr); 195 196 /* In fastreg, we also set the rkey */ 197 mr->ibmr.rkey = mr->ibmr.lkey; 198 199 err = rxe_mr_alloc(mr, max_pages); 200 if (err) 201 goto err1; 202 203 mr->ibmr.pd = &pd->ibpd; 204 mr->max_buf = max_pages; 205 mr->state = RXE_MR_STATE_FREE; 206 mr->type = RXE_MR_TYPE_MR; 207 208 return 0; 209 210 err1: 211 return err; 212 } 213 214 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 215 size_t *offset_out) 216 { 217 size_t offset = iova - mr->iova + mr->offset; 218 int map_index; 219 int buf_index; 220 u64 length; 221 222 if (likely(mr->page_shift)) { 223 *offset_out = offset & mr->page_mask; 224 offset >>= mr->page_shift; 225 *n_out = offset & mr->map_mask; 226 *m_out = offset >> mr->map_shift; 227 } else { 228 map_index = 0; 229 buf_index = 0; 230 231 length = mr->map[map_index]->buf[buf_index].size; 232 233 while (offset >= length) { 234 offset -= length; 235 buf_index++; 236 237 if (buf_index == RXE_BUF_PER_MAP) { 238 map_index++; 239 buf_index = 0; 240 } 241 length = mr->map[map_index]->buf[buf_index].size; 242 } 243 244 *m_out = map_index; 245 *n_out = buf_index; 246 *offset_out = offset; 247 } 248 } 249 250 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 251 { 252 size_t offset; 253 int m, n; 254 void *addr; 255 256 if (mr->state != RXE_MR_STATE_VALID) { 257 pr_warn("mr not in valid state\n"); 258 addr = NULL; 259 goto out; 260 } 261 262 if (!mr->map) { 263 addr = (void *)(uintptr_t)iova; 264 goto out; 265 } 266 267 if (mr_check_range(mr, iova, length)) { 268 pr_warn("range violation\n"); 269 addr = NULL; 270 goto out; 271 } 272 273 lookup_iova(mr, iova, &m, &n, &offset); 274 275 if (offset + length > mr->map[m]->buf[n].size) { 276 pr_warn("crosses page boundary\n"); 277 addr = NULL; 278 goto out; 279 } 280 281 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 282 283 out: 284 return addr; 285 } 286 287 /* copy data from a range (vaddr, vaddr+length-1) to or from 288 * a mr object starting at iova. 289 */ 290 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 291 enum rxe_mr_copy_dir dir) 292 { 293 int err; 294 int bytes; 295 u8 *va; 296 struct rxe_map **map; 297 struct rxe_phys_buf *buf; 298 int m; 299 int i; 300 size_t offset; 301 302 if (length == 0) 303 return 0; 304 305 if (mr->type == RXE_MR_TYPE_DMA) { 306 u8 *src, *dest; 307 308 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 309 310 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 311 312 memcpy(dest, src, length); 313 314 return 0; 315 } 316 317 WARN_ON_ONCE(!mr->map); 318 319 err = mr_check_range(mr, iova, length); 320 if (err) { 321 err = -EFAULT; 322 goto err1; 323 } 324 325 lookup_iova(mr, iova, &m, &i, &offset); 326 327 map = mr->map + m; 328 buf = map[0]->buf + i; 329 330 while (length > 0) { 331 u8 *src, *dest; 332 333 va = (u8 *)(uintptr_t)buf->addr + offset; 334 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 335 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 336 337 bytes = buf->size - offset; 338 339 if (bytes > length) 340 bytes = length; 341 342 memcpy(dest, src, bytes); 343 344 length -= bytes; 345 addr += bytes; 346 347 offset = 0; 348 buf++; 349 i++; 350 351 if (i == RXE_BUF_PER_MAP) { 352 i = 0; 353 map++; 354 buf = map[0]->buf; 355 } 356 } 357 358 return 0; 359 360 err1: 361 return err; 362 } 363 364 /* copy data in or out of a wqe, i.e. sg list 365 * under the control of a dma descriptor 366 */ 367 int copy_data( 368 struct rxe_pd *pd, 369 int access, 370 struct rxe_dma_info *dma, 371 void *addr, 372 int length, 373 enum rxe_mr_copy_dir dir) 374 { 375 int bytes; 376 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 377 int offset = dma->sge_offset; 378 int resid = dma->resid; 379 struct rxe_mr *mr = NULL; 380 u64 iova; 381 int err; 382 383 if (length == 0) 384 return 0; 385 386 if (length > resid) { 387 err = -EINVAL; 388 goto err2; 389 } 390 391 if (sge->length && (offset < sge->length)) { 392 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 393 if (!mr) { 394 err = -EINVAL; 395 goto err1; 396 } 397 } 398 399 while (length > 0) { 400 bytes = length; 401 402 if (offset >= sge->length) { 403 if (mr) { 404 rxe_drop_ref(mr); 405 mr = NULL; 406 } 407 sge++; 408 dma->cur_sge++; 409 offset = 0; 410 411 if (dma->cur_sge >= dma->num_sge) { 412 err = -ENOSPC; 413 goto err2; 414 } 415 416 if (sge->length) { 417 mr = lookup_mr(pd, access, sge->lkey, 418 RXE_LOOKUP_LOCAL); 419 if (!mr) { 420 err = -EINVAL; 421 goto err1; 422 } 423 } else { 424 continue; 425 } 426 } 427 428 if (bytes > sge->length - offset) 429 bytes = sge->length - offset; 430 431 if (bytes > 0) { 432 iova = sge->addr + offset; 433 434 err = rxe_mr_copy(mr, iova, addr, bytes, dir); 435 if (err) 436 goto err2; 437 438 offset += bytes; 439 resid -= bytes; 440 length -= bytes; 441 addr += bytes; 442 } 443 } 444 445 dma->sge_offset = offset; 446 dma->resid = resid; 447 448 if (mr) 449 rxe_drop_ref(mr); 450 451 return 0; 452 453 err2: 454 if (mr) 455 rxe_drop_ref(mr); 456 err1: 457 return err; 458 } 459 460 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 461 { 462 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 463 int offset = dma->sge_offset; 464 int resid = dma->resid; 465 466 while (length) { 467 unsigned int bytes; 468 469 if (offset >= sge->length) { 470 sge++; 471 dma->cur_sge++; 472 offset = 0; 473 if (dma->cur_sge >= dma->num_sge) 474 return -ENOSPC; 475 } 476 477 bytes = length; 478 479 if (bytes > sge->length - offset) 480 bytes = sge->length - offset; 481 482 offset += bytes; 483 resid -= bytes; 484 length -= bytes; 485 } 486 487 dma->sge_offset = offset; 488 dma->resid = resid; 489 490 return 0; 491 } 492 493 /* (1) find the mr corresponding to lkey/rkey 494 * depending on lookup_type 495 * (2) verify that the (qp) pd matches the mr pd 496 * (3) verify that the mr can support the requested access 497 * (4) verify that mr state is valid 498 */ 499 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 500 enum rxe_mr_lookup_type type) 501 { 502 struct rxe_mr *mr; 503 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 504 int index = key >> 8; 505 506 mr = rxe_pool_get_index(&rxe->mr_pool, index); 507 if (!mr) 508 return NULL; 509 510 if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || 511 (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || 512 mr_pd(mr) != pd || (access && !(access & mr->access)) || 513 mr->state != RXE_MR_STATE_VALID)) { 514 rxe_drop_ref(mr); 515 mr = NULL; 516 } 517 518 return mr; 519 } 520 521 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) 522 { 523 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 524 struct rxe_mr *mr; 525 int ret; 526 527 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 528 if (!mr) { 529 pr_err("%s: No MR for rkey %#x\n", __func__, rkey); 530 ret = -EINVAL; 531 goto err; 532 } 533 534 if (rkey != mr->ibmr.rkey) { 535 pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", 536 __func__, rkey, mr->ibmr.rkey); 537 ret = -EINVAL; 538 goto err_drop_ref; 539 } 540 541 if (atomic_read(&mr->num_mw) > 0) { 542 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 543 __func__); 544 ret = -EINVAL; 545 goto err_drop_ref; 546 } 547 548 mr->state = RXE_MR_STATE_FREE; 549 ret = 0; 550 551 err_drop_ref: 552 rxe_drop_ref(mr); 553 err: 554 return ret; 555 } 556 557 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 558 { 559 struct rxe_mr *mr = to_rmr(ibmr); 560 561 if (atomic_read(&mr->num_mw) > 0) { 562 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n", 563 __func__); 564 return -EINVAL; 565 } 566 567 mr->state = RXE_MR_STATE_ZOMBIE; 568 rxe_drop_ref(mr_pd(mr)); 569 rxe_drop_index(mr); 570 rxe_drop_ref(mr); 571 572 return 0; 573 } 574 575 void rxe_mr_cleanup(struct rxe_pool_entry *arg) 576 { 577 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); 578 int i; 579 580 ib_umem_release(mr->umem); 581 582 if (mr->map) { 583 for (i = 0; i < mr->num_map; i++) 584 kfree(mr->map[i]); 585 586 kfree(mr->map); 587 } 588 } 589