1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 switch (mr->type) { 28 case RXE_MR_TYPE_DMA: 29 return 0; 30 31 case RXE_MR_TYPE_MR: 32 if (iova < mr->iova || length > mr->length || 33 iova > mr->iova + mr->length - length) 34 return -EFAULT; 35 return 0; 36 37 default: 38 return -EFAULT; 39 } 40 } 41 42 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 43 | IB_ACCESS_REMOTE_WRITE \ 44 | IB_ACCESS_REMOTE_ATOMIC) 45 46 static void rxe_mr_init(int access, struct rxe_mr *mr) 47 { 48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); 49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 50 51 mr->ibmr.lkey = lkey; 52 mr->ibmr.rkey = rkey; 53 mr->state = RXE_MR_STATE_INVALID; 54 mr->type = RXE_MR_TYPE_NONE; 55 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 56 } 57 58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 59 { 60 int i; 61 int num_map; 62 struct rxe_map **map = mr->map; 63 64 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 65 66 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 67 if (!mr->map) 68 goto err1; 69 70 for (i = 0; i < num_map; i++) { 71 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 72 if (!mr->map[i]) 73 goto err2; 74 } 75 76 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 77 78 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 79 mr->map_mask = RXE_BUF_PER_MAP - 1; 80 81 mr->num_buf = num_buf; 82 mr->num_map = num_map; 83 mr->max_buf = num_map * RXE_BUF_PER_MAP; 84 85 return 0; 86 87 err2: 88 for (i--; i >= 0; i--) 89 kfree(mr->map[i]); 90 91 kfree(mr->map); 92 err1: 93 return -ENOMEM; 94 } 95 96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) 97 { 98 rxe_mr_init(access, mr); 99 100 mr->ibmr.pd = &pd->ibpd; 101 mr->access = access; 102 mr->state = RXE_MR_STATE_VALID; 103 mr->type = RXE_MR_TYPE_DMA; 104 } 105 106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 107 int access, struct rxe_mr *mr) 108 { 109 struct rxe_map **map; 110 struct rxe_phys_buf *buf = NULL; 111 struct ib_umem *umem; 112 struct sg_page_iter sg_iter; 113 int num_buf; 114 void *vaddr; 115 int err; 116 int i; 117 118 umem = ib_umem_get(pd->ibpd.device, start, length, access); 119 if (IS_ERR(umem)) { 120 pr_warn("%s: Unable to pin memory region err = %d\n", 121 __func__, (int)PTR_ERR(umem)); 122 err = PTR_ERR(umem); 123 goto err_out; 124 } 125 126 mr->umem = umem; 127 num_buf = ib_umem_num_pages(umem); 128 129 rxe_mr_init(access, mr); 130 131 err = rxe_mr_alloc(mr, num_buf); 132 if (err) { 133 pr_warn("%s: Unable to allocate memory for map\n", 134 __func__); 135 goto err_release_umem; 136 } 137 138 mr->page_shift = PAGE_SHIFT; 139 mr->page_mask = PAGE_SIZE - 1; 140 141 num_buf = 0; 142 map = mr->map; 143 if (length > 0) { 144 buf = map[0]->buf; 145 146 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 147 if (num_buf >= RXE_BUF_PER_MAP) { 148 map++; 149 buf = map[0]->buf; 150 num_buf = 0; 151 } 152 153 vaddr = page_address(sg_page_iter_page(&sg_iter)); 154 if (!vaddr) { 155 pr_warn("%s: Unable to get virtual address\n", 156 __func__); 157 err = -ENOMEM; 158 goto err_cleanup_map; 159 } 160 161 buf->addr = (uintptr_t)vaddr; 162 buf->size = PAGE_SIZE; 163 num_buf++; 164 buf++; 165 166 } 167 } 168 169 mr->ibmr.pd = &pd->ibpd; 170 mr->umem = umem; 171 mr->access = access; 172 mr->length = length; 173 mr->iova = iova; 174 mr->va = start; 175 mr->offset = ib_umem_offset(umem); 176 mr->state = RXE_MR_STATE_VALID; 177 mr->type = RXE_MR_TYPE_MR; 178 179 return 0; 180 181 err_cleanup_map: 182 for (i = 0; i < mr->num_map; i++) 183 kfree(mr->map[i]); 184 kfree(mr->map); 185 err_release_umem: 186 ib_umem_release(umem); 187 err_out: 188 return err; 189 } 190 191 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) 192 { 193 int err; 194 195 rxe_mr_init(0, mr); 196 197 /* In fastreg, we also set the rkey */ 198 mr->ibmr.rkey = mr->ibmr.lkey; 199 200 err = rxe_mr_alloc(mr, max_pages); 201 if (err) 202 goto err1; 203 204 mr->ibmr.pd = &pd->ibpd; 205 mr->max_buf = max_pages; 206 mr->state = RXE_MR_STATE_FREE; 207 mr->type = RXE_MR_TYPE_MR; 208 209 return 0; 210 211 err1: 212 return err; 213 } 214 215 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 216 size_t *offset_out) 217 { 218 size_t offset = iova - mr->iova + mr->offset; 219 int map_index; 220 int buf_index; 221 u64 length; 222 223 if (likely(mr->page_shift)) { 224 *offset_out = offset & mr->page_mask; 225 offset >>= mr->page_shift; 226 *n_out = offset & mr->map_mask; 227 *m_out = offset >> mr->map_shift; 228 } else { 229 map_index = 0; 230 buf_index = 0; 231 232 length = mr->map[map_index]->buf[buf_index].size; 233 234 while (offset >= length) { 235 offset -= length; 236 buf_index++; 237 238 if (buf_index == RXE_BUF_PER_MAP) { 239 map_index++; 240 buf_index = 0; 241 } 242 length = mr->map[map_index]->buf[buf_index].size; 243 } 244 245 *m_out = map_index; 246 *n_out = buf_index; 247 *offset_out = offset; 248 } 249 } 250 251 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 252 { 253 size_t offset; 254 int m, n; 255 void *addr; 256 257 if (mr->state != RXE_MR_STATE_VALID) { 258 pr_warn("mr not in valid state\n"); 259 addr = NULL; 260 goto out; 261 } 262 263 if (!mr->map) { 264 addr = (void *)(uintptr_t)iova; 265 goto out; 266 } 267 268 if (mr_check_range(mr, iova, length)) { 269 pr_warn("range violation\n"); 270 addr = NULL; 271 goto out; 272 } 273 274 lookup_iova(mr, iova, &m, &n, &offset); 275 276 if (offset + length > mr->map[m]->buf[n].size) { 277 pr_warn("crosses page boundary\n"); 278 addr = NULL; 279 goto out; 280 } 281 282 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 283 284 out: 285 return addr; 286 } 287 288 /* copy data from a range (vaddr, vaddr+length-1) to or from 289 * a mr object starting at iova. Compute incremental value of 290 * crc32 if crcp is not zero. caller must hold a reference to mr 291 */ 292 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 293 enum rxe_mr_copy_dir dir, u32 *crcp) 294 { 295 int err; 296 int bytes; 297 u8 *va; 298 struct rxe_map **map; 299 struct rxe_phys_buf *buf; 300 int m; 301 int i; 302 size_t offset; 303 u32 crc = crcp ? (*crcp) : 0; 304 305 if (length == 0) 306 return 0; 307 308 if (mr->type == RXE_MR_TYPE_DMA) { 309 u8 *src, *dest; 310 311 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 312 313 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 314 315 memcpy(dest, src, length); 316 317 if (crcp) 318 *crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest, 319 length); 320 321 return 0; 322 } 323 324 WARN_ON_ONCE(!mr->map); 325 326 err = mr_check_range(mr, iova, length); 327 if (err) { 328 err = -EFAULT; 329 goto err1; 330 } 331 332 lookup_iova(mr, iova, &m, &i, &offset); 333 334 map = mr->map + m; 335 buf = map[0]->buf + i; 336 337 while (length > 0) { 338 u8 *src, *dest; 339 340 va = (u8 *)(uintptr_t)buf->addr + offset; 341 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 342 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 343 344 bytes = buf->size - offset; 345 346 if (bytes > length) 347 bytes = length; 348 349 memcpy(dest, src, bytes); 350 351 if (crcp) 352 crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest, 353 bytes); 354 355 length -= bytes; 356 addr += bytes; 357 358 offset = 0; 359 buf++; 360 i++; 361 362 if (i == RXE_BUF_PER_MAP) { 363 i = 0; 364 map++; 365 buf = map[0]->buf; 366 } 367 } 368 369 if (crcp) 370 *crcp = crc; 371 372 return 0; 373 374 err1: 375 return err; 376 } 377 378 /* copy data in or out of a wqe, i.e. sg list 379 * under the control of a dma descriptor 380 */ 381 int copy_data( 382 struct rxe_pd *pd, 383 int access, 384 struct rxe_dma_info *dma, 385 void *addr, 386 int length, 387 enum rxe_mr_copy_dir dir, 388 u32 *crcp) 389 { 390 int bytes; 391 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 392 int offset = dma->sge_offset; 393 int resid = dma->resid; 394 struct rxe_mr *mr = NULL; 395 u64 iova; 396 int err; 397 398 if (length == 0) 399 return 0; 400 401 if (length > resid) { 402 err = -EINVAL; 403 goto err2; 404 } 405 406 if (sge->length && (offset < sge->length)) { 407 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 408 if (!mr) { 409 err = -EINVAL; 410 goto err1; 411 } 412 } 413 414 while (length > 0) { 415 bytes = length; 416 417 if (offset >= sge->length) { 418 if (mr) { 419 rxe_drop_ref(mr); 420 mr = NULL; 421 } 422 sge++; 423 dma->cur_sge++; 424 offset = 0; 425 426 if (dma->cur_sge >= dma->num_sge) { 427 err = -ENOSPC; 428 goto err2; 429 } 430 431 if (sge->length) { 432 mr = lookup_mr(pd, access, sge->lkey, 433 RXE_LOOKUP_LOCAL); 434 if (!mr) { 435 err = -EINVAL; 436 goto err1; 437 } 438 } else { 439 continue; 440 } 441 } 442 443 if (bytes > sge->length - offset) 444 bytes = sge->length - offset; 445 446 if (bytes > 0) { 447 iova = sge->addr + offset; 448 449 err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp); 450 if (err) 451 goto err2; 452 453 offset += bytes; 454 resid -= bytes; 455 length -= bytes; 456 addr += bytes; 457 } 458 } 459 460 dma->sge_offset = offset; 461 dma->resid = resid; 462 463 if (mr) 464 rxe_drop_ref(mr); 465 466 return 0; 467 468 err2: 469 if (mr) 470 rxe_drop_ref(mr); 471 err1: 472 return err; 473 } 474 475 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 476 { 477 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 478 int offset = dma->sge_offset; 479 int resid = dma->resid; 480 481 while (length) { 482 unsigned int bytes; 483 484 if (offset >= sge->length) { 485 sge++; 486 dma->cur_sge++; 487 offset = 0; 488 if (dma->cur_sge >= dma->num_sge) 489 return -ENOSPC; 490 } 491 492 bytes = length; 493 494 if (bytes > sge->length - offset) 495 bytes = sge->length - offset; 496 497 offset += bytes; 498 resid -= bytes; 499 length -= bytes; 500 } 501 502 dma->sge_offset = offset; 503 dma->resid = resid; 504 505 return 0; 506 } 507 508 /* (1) find the mr corresponding to lkey/rkey 509 * depending on lookup_type 510 * (2) verify that the (qp) pd matches the mr pd 511 * (3) verify that the mr can support the requested access 512 * (4) verify that mr state is valid 513 */ 514 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 515 enum rxe_mr_lookup_type type) 516 { 517 struct rxe_mr *mr; 518 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 519 int index = key >> 8; 520 521 mr = rxe_pool_get_index(&rxe->mr_pool, index); 522 if (!mr) 523 return NULL; 524 525 if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || 526 (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || 527 mr_pd(mr) != pd || (access && !(access & mr->access)) || 528 mr->state != RXE_MR_STATE_VALID)) { 529 rxe_drop_ref(mr); 530 mr = NULL; 531 } 532 533 return mr; 534 } 535 536 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) 537 { 538 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 539 struct rxe_mr *mr; 540 int ret; 541 542 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 543 if (!mr) { 544 pr_err("%s: No MR for rkey %#x\n", __func__, rkey); 545 ret = -EINVAL; 546 goto err; 547 } 548 549 if (rkey != mr->ibmr.rkey) { 550 pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", 551 __func__, rkey, mr->ibmr.rkey); 552 ret = -EINVAL; 553 goto err_drop_ref; 554 } 555 556 if (atomic_read(&mr->num_mw) > 0) { 557 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 558 __func__); 559 ret = -EINVAL; 560 goto err_drop_ref; 561 } 562 563 mr->state = RXE_MR_STATE_FREE; 564 ret = 0; 565 566 err_drop_ref: 567 rxe_drop_ref(mr); 568 err: 569 return ret; 570 } 571 572 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 573 { 574 struct rxe_mr *mr = to_rmr(ibmr); 575 576 if (atomic_read(&mr->num_mw) > 0) { 577 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n", 578 __func__); 579 return -EINVAL; 580 } 581 582 mr->state = RXE_MR_STATE_ZOMBIE; 583 rxe_drop_ref(mr_pd(mr)); 584 rxe_drop_index(mr); 585 rxe_drop_ref(mr); 586 587 return 0; 588 } 589 590 void rxe_mr_cleanup(struct rxe_pool_entry *arg) 591 { 592 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); 593 int i; 594 595 ib_umem_release(mr->umem); 596 597 if (mr->map) { 598 for (i = 0; i < mr->num_map; i++) 599 kfree(mr->map[i]); 600 601 kfree(mr->map); 602 } 603 } 604