1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 switch (mr->type) { 28 case RXE_MR_TYPE_DMA: 29 return 0; 30 31 case RXE_MR_TYPE_MR: 32 if (iova < mr->iova || length > mr->length || 33 iova > mr->iova + mr->length - length) 34 return -EFAULT; 35 return 0; 36 37 default: 38 return -EFAULT; 39 } 40 } 41 42 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 43 | IB_ACCESS_REMOTE_WRITE \ 44 | IB_ACCESS_REMOTE_ATOMIC) 45 46 static void rxe_mr_init(int access, struct rxe_mr *mr) 47 { 48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); 49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 50 51 mr->ibmr.lkey = lkey; 52 mr->ibmr.rkey = rkey; 53 mr->state = RXE_MR_STATE_INVALID; 54 mr->type = RXE_MR_TYPE_NONE; 55 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 56 } 57 58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 59 { 60 int i; 61 int num_map; 62 struct rxe_map **map = mr->map; 63 64 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 65 66 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 67 if (!mr->map) 68 goto err1; 69 70 for (i = 0; i < num_map; i++) { 71 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 72 if (!mr->map[i]) 73 goto err2; 74 } 75 76 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 77 78 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 79 mr->map_mask = RXE_BUF_PER_MAP - 1; 80 81 mr->num_buf = num_buf; 82 mr->num_map = num_map; 83 mr->max_buf = num_map * RXE_BUF_PER_MAP; 84 85 return 0; 86 87 err2: 88 for (i--; i >= 0; i--) 89 kfree(mr->map[i]); 90 91 kfree(mr->map); 92 err1: 93 return -ENOMEM; 94 } 95 96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) 97 { 98 rxe_mr_init(access, mr); 99 100 mr->ibmr.pd = &pd->ibpd; 101 mr->access = access; 102 mr->state = RXE_MR_STATE_VALID; 103 mr->type = RXE_MR_TYPE_DMA; 104 } 105 106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 107 int access, struct rxe_mr *mr) 108 { 109 struct rxe_map **map; 110 struct rxe_phys_buf *buf = NULL; 111 struct ib_umem *umem; 112 struct sg_page_iter sg_iter; 113 int num_buf; 114 void *vaddr; 115 int err; 116 117 umem = ib_umem_get(pd->ibpd.device, start, length, access); 118 if (IS_ERR(umem)) { 119 pr_warn("err %d from rxe_umem_get\n", 120 (int)PTR_ERR(umem)); 121 err = PTR_ERR(umem); 122 goto err1; 123 } 124 125 mr->umem = umem; 126 num_buf = ib_umem_num_pages(umem); 127 128 rxe_mr_init(access, mr); 129 130 err = rxe_mr_alloc(mr, num_buf); 131 if (err) { 132 pr_warn("err %d from rxe_mr_alloc\n", err); 133 ib_umem_release(umem); 134 goto err1; 135 } 136 137 mr->page_shift = PAGE_SHIFT; 138 mr->page_mask = PAGE_SIZE - 1; 139 140 num_buf = 0; 141 map = mr->map; 142 if (length > 0) { 143 buf = map[0]->buf; 144 145 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 146 if (num_buf >= RXE_BUF_PER_MAP) { 147 map++; 148 buf = map[0]->buf; 149 num_buf = 0; 150 } 151 152 vaddr = page_address(sg_page_iter_page(&sg_iter)); 153 if (!vaddr) { 154 pr_warn("null vaddr\n"); 155 ib_umem_release(umem); 156 err = -ENOMEM; 157 goto err1; 158 } 159 160 buf->addr = (uintptr_t)vaddr; 161 buf->size = PAGE_SIZE; 162 num_buf++; 163 buf++; 164 165 } 166 } 167 168 mr->ibmr.pd = &pd->ibpd; 169 mr->umem = umem; 170 mr->access = access; 171 mr->length = length; 172 mr->iova = iova; 173 mr->va = start; 174 mr->offset = ib_umem_offset(umem); 175 mr->state = RXE_MR_STATE_VALID; 176 mr->type = RXE_MR_TYPE_MR; 177 178 return 0; 179 180 err1: 181 return err; 182 } 183 184 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) 185 { 186 int err; 187 188 rxe_mr_init(0, mr); 189 190 /* In fastreg, we also set the rkey */ 191 mr->ibmr.rkey = mr->ibmr.lkey; 192 193 err = rxe_mr_alloc(mr, max_pages); 194 if (err) 195 goto err1; 196 197 mr->ibmr.pd = &pd->ibpd; 198 mr->max_buf = max_pages; 199 mr->state = RXE_MR_STATE_FREE; 200 mr->type = RXE_MR_TYPE_MR; 201 202 return 0; 203 204 err1: 205 return err; 206 } 207 208 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 209 size_t *offset_out) 210 { 211 size_t offset = iova - mr->iova + mr->offset; 212 int map_index; 213 int buf_index; 214 u64 length; 215 216 if (likely(mr->page_shift)) { 217 *offset_out = offset & mr->page_mask; 218 offset >>= mr->page_shift; 219 *n_out = offset & mr->map_mask; 220 *m_out = offset >> mr->map_shift; 221 } else { 222 map_index = 0; 223 buf_index = 0; 224 225 length = mr->map[map_index]->buf[buf_index].size; 226 227 while (offset >= length) { 228 offset -= length; 229 buf_index++; 230 231 if (buf_index == RXE_BUF_PER_MAP) { 232 map_index++; 233 buf_index = 0; 234 } 235 length = mr->map[map_index]->buf[buf_index].size; 236 } 237 238 *m_out = map_index; 239 *n_out = buf_index; 240 *offset_out = offset; 241 } 242 } 243 244 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 245 { 246 size_t offset; 247 int m, n; 248 void *addr; 249 250 if (mr->state != RXE_MR_STATE_VALID) { 251 pr_warn("mr not in valid state\n"); 252 addr = NULL; 253 goto out; 254 } 255 256 if (!mr->map) { 257 addr = (void *)(uintptr_t)iova; 258 goto out; 259 } 260 261 if (mr_check_range(mr, iova, length)) { 262 pr_warn("range violation\n"); 263 addr = NULL; 264 goto out; 265 } 266 267 lookup_iova(mr, iova, &m, &n, &offset); 268 269 if (offset + length > mr->map[m]->buf[n].size) { 270 pr_warn("crosses page boundary\n"); 271 addr = NULL; 272 goto out; 273 } 274 275 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 276 277 out: 278 return addr; 279 } 280 281 /* copy data from a range (vaddr, vaddr+length-1) to or from 282 * a mr object starting at iova. Compute incremental value of 283 * crc32 if crcp is not zero. caller must hold a reference to mr 284 */ 285 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 286 enum rxe_mr_copy_dir dir, u32 *crcp) 287 { 288 int err; 289 int bytes; 290 u8 *va; 291 struct rxe_map **map; 292 struct rxe_phys_buf *buf; 293 int m; 294 int i; 295 size_t offset; 296 u32 crc = crcp ? (*crcp) : 0; 297 298 if (length == 0) 299 return 0; 300 301 if (mr->type == RXE_MR_TYPE_DMA) { 302 u8 *src, *dest; 303 304 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 305 306 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 307 308 memcpy(dest, src, length); 309 310 if (crcp) 311 *crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest, 312 length); 313 314 return 0; 315 } 316 317 WARN_ON_ONCE(!mr->map); 318 319 err = mr_check_range(mr, iova, length); 320 if (err) { 321 err = -EFAULT; 322 goto err1; 323 } 324 325 lookup_iova(mr, iova, &m, &i, &offset); 326 327 map = mr->map + m; 328 buf = map[0]->buf + i; 329 330 while (length > 0) { 331 u8 *src, *dest; 332 333 va = (u8 *)(uintptr_t)buf->addr + offset; 334 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 335 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 336 337 bytes = buf->size - offset; 338 339 if (bytes > length) 340 bytes = length; 341 342 memcpy(dest, src, bytes); 343 344 if (crcp) 345 crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest, 346 bytes); 347 348 length -= bytes; 349 addr += bytes; 350 351 offset = 0; 352 buf++; 353 i++; 354 355 if (i == RXE_BUF_PER_MAP) { 356 i = 0; 357 map++; 358 buf = map[0]->buf; 359 } 360 } 361 362 if (crcp) 363 *crcp = crc; 364 365 return 0; 366 367 err1: 368 return err; 369 } 370 371 /* copy data in or out of a wqe, i.e. sg list 372 * under the control of a dma descriptor 373 */ 374 int copy_data( 375 struct rxe_pd *pd, 376 int access, 377 struct rxe_dma_info *dma, 378 void *addr, 379 int length, 380 enum rxe_mr_copy_dir dir, 381 u32 *crcp) 382 { 383 int bytes; 384 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 385 int offset = dma->sge_offset; 386 int resid = dma->resid; 387 struct rxe_mr *mr = NULL; 388 u64 iova; 389 int err; 390 391 if (length == 0) 392 return 0; 393 394 if (length > resid) { 395 err = -EINVAL; 396 goto err2; 397 } 398 399 if (sge->length && (offset < sge->length)) { 400 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 401 if (!mr) { 402 err = -EINVAL; 403 goto err1; 404 } 405 } 406 407 while (length > 0) { 408 bytes = length; 409 410 if (offset >= sge->length) { 411 if (mr) { 412 rxe_drop_ref(mr); 413 mr = NULL; 414 } 415 sge++; 416 dma->cur_sge++; 417 offset = 0; 418 419 if (dma->cur_sge >= dma->num_sge) { 420 err = -ENOSPC; 421 goto err2; 422 } 423 424 if (sge->length) { 425 mr = lookup_mr(pd, access, sge->lkey, 426 RXE_LOOKUP_LOCAL); 427 if (!mr) { 428 err = -EINVAL; 429 goto err1; 430 } 431 } else { 432 continue; 433 } 434 } 435 436 if (bytes > sge->length - offset) 437 bytes = sge->length - offset; 438 439 if (bytes > 0) { 440 iova = sge->addr + offset; 441 442 err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp); 443 if (err) 444 goto err2; 445 446 offset += bytes; 447 resid -= bytes; 448 length -= bytes; 449 addr += bytes; 450 } 451 } 452 453 dma->sge_offset = offset; 454 dma->resid = resid; 455 456 if (mr) 457 rxe_drop_ref(mr); 458 459 return 0; 460 461 err2: 462 if (mr) 463 rxe_drop_ref(mr); 464 err1: 465 return err; 466 } 467 468 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 469 { 470 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 471 int offset = dma->sge_offset; 472 int resid = dma->resid; 473 474 while (length) { 475 unsigned int bytes; 476 477 if (offset >= sge->length) { 478 sge++; 479 dma->cur_sge++; 480 offset = 0; 481 if (dma->cur_sge >= dma->num_sge) 482 return -ENOSPC; 483 } 484 485 bytes = length; 486 487 if (bytes > sge->length - offset) 488 bytes = sge->length - offset; 489 490 offset += bytes; 491 resid -= bytes; 492 length -= bytes; 493 } 494 495 dma->sge_offset = offset; 496 dma->resid = resid; 497 498 return 0; 499 } 500 501 /* (1) find the mr corresponding to lkey/rkey 502 * depending on lookup_type 503 * (2) verify that the (qp) pd matches the mr pd 504 * (3) verify that the mr can support the requested access 505 * (4) verify that mr state is valid 506 */ 507 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 508 enum rxe_mr_lookup_type type) 509 { 510 struct rxe_mr *mr; 511 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 512 int index = key >> 8; 513 514 mr = rxe_pool_get_index(&rxe->mr_pool, index); 515 if (!mr) 516 return NULL; 517 518 if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || 519 (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || 520 mr_pd(mr) != pd || (access && !(access & mr->access)) || 521 mr->state != RXE_MR_STATE_VALID)) { 522 rxe_drop_ref(mr); 523 mr = NULL; 524 } 525 526 return mr; 527 } 528 529 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) 530 { 531 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 532 struct rxe_mr *mr; 533 int ret; 534 535 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 536 if (!mr) { 537 pr_err("%s: No MR for rkey %#x\n", __func__, rkey); 538 ret = -EINVAL; 539 goto err; 540 } 541 542 if (rkey != mr->ibmr.rkey) { 543 pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", 544 __func__, rkey, mr->ibmr.rkey); 545 ret = -EINVAL; 546 goto err_drop_ref; 547 } 548 549 if (atomic_read(&mr->num_mw) > 0) { 550 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 551 __func__); 552 ret = -EINVAL; 553 goto err_drop_ref; 554 } 555 556 mr->state = RXE_MR_STATE_FREE; 557 ret = 0; 558 559 err_drop_ref: 560 rxe_drop_ref(mr); 561 err: 562 return ret; 563 } 564 565 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 566 { 567 struct rxe_mr *mr = to_rmr(ibmr); 568 569 if (atomic_read(&mr->num_mw) > 0) { 570 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n", 571 __func__); 572 return -EINVAL; 573 } 574 575 mr->state = RXE_MR_STATE_ZOMBIE; 576 rxe_drop_ref(mr_pd(mr)); 577 rxe_drop_index(mr); 578 rxe_drop_ref(mr); 579 580 return 0; 581 } 582 583 void rxe_mr_cleanup(struct rxe_pool_entry *arg) 584 { 585 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); 586 int i; 587 588 ib_umem_release(mr->umem); 589 590 if (mr->map) { 591 for (i = 0; i < mr->num_map; i++) 592 kfree(mr->map[i]); 593 594 kfree(mr->map); 595 } 596 } 597