1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 if (iova < mem->iova || 63 length > mem->length || 64 iova > mem->iova + mem->length - length) 65 return -EFAULT; 66 return 0; 67 68 default: 69 return -EFAULT; 70 } 71 } 72 73 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 74 | IB_ACCESS_REMOTE_WRITE \ 75 | IB_ACCESS_REMOTE_ATOMIC) 76 77 static void rxe_mem_init(int access, struct rxe_mem *mem) 78 { 79 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 80 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 81 82 if (mem->pelem.pool->type == RXE_TYPE_MR) { 83 mem->ibmr.lkey = lkey; 84 mem->ibmr.rkey = rkey; 85 } 86 87 mem->lkey = lkey; 88 mem->rkey = rkey; 89 mem->state = RXE_MEM_STATE_INVALID; 90 mem->type = RXE_MEM_TYPE_NONE; 91 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 92 } 93 94 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 95 { 96 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 97 int i; 98 99 if (mem->umem) 100 ib_umem_release(mem->umem); 101 102 if (mem->map) { 103 for (i = 0; i < mem->num_map; i++) 104 kfree(mem->map[i]); 105 106 kfree(mem->map); 107 } 108 } 109 110 static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) 111 { 112 int i; 113 int num_map; 114 struct rxe_map **map = mem->map; 115 116 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 117 118 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 119 if (!mem->map) 120 goto err1; 121 122 for (i = 0; i < num_map; i++) { 123 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 124 if (!mem->map[i]) 125 goto err2; 126 } 127 128 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 129 130 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 131 mem->map_mask = RXE_BUF_PER_MAP - 1; 132 133 mem->num_buf = num_buf; 134 mem->num_map = num_map; 135 mem->max_buf = num_map * RXE_BUF_PER_MAP; 136 137 return 0; 138 139 err2: 140 for (i--; i >= 0; i--) 141 kfree(mem->map[i]); 142 143 kfree(mem->map); 144 err1: 145 return -ENOMEM; 146 } 147 148 int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, 149 int access, struct rxe_mem *mem) 150 { 151 rxe_mem_init(access, mem); 152 153 mem->pd = pd; 154 mem->access = access; 155 mem->state = RXE_MEM_STATE_VALID; 156 mem->type = RXE_MEM_TYPE_DMA; 157 158 return 0; 159 } 160 161 int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, 162 u64 length, u64 iova, int access, struct ib_udata *udata, 163 struct rxe_mem *mem) 164 { 165 int entry; 166 struct rxe_map **map; 167 struct rxe_phys_buf *buf = NULL; 168 struct ib_umem *umem; 169 struct scatterlist *sg; 170 int num_buf; 171 void *vaddr; 172 int err; 173 174 umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); 175 if (IS_ERR(umem)) { 176 pr_warn("err %d from rxe_umem_get\n", 177 (int)PTR_ERR(umem)); 178 err = -EINVAL; 179 goto err1; 180 } 181 182 mem->umem = umem; 183 num_buf = umem->nmap; 184 185 rxe_mem_init(access, mem); 186 187 err = rxe_mem_alloc(rxe, mem, num_buf); 188 if (err) { 189 pr_warn("err %d from rxe_mem_alloc\n", err); 190 ib_umem_release(umem); 191 goto err1; 192 } 193 194 mem->page_shift = umem->page_shift; 195 mem->page_mask = BIT(umem->page_shift) - 1; 196 197 num_buf = 0; 198 map = mem->map; 199 if (length > 0) { 200 buf = map[0]->buf; 201 202 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 203 vaddr = page_address(sg_page(sg)); 204 if (!vaddr) { 205 pr_warn("null vaddr\n"); 206 err = -ENOMEM; 207 goto err1; 208 } 209 210 buf->addr = (uintptr_t)vaddr; 211 buf->size = BIT(umem->page_shift); 212 num_buf++; 213 buf++; 214 215 if (num_buf >= RXE_BUF_PER_MAP) { 216 map++; 217 buf = map[0]->buf; 218 num_buf = 0; 219 } 220 } 221 } 222 223 mem->pd = pd; 224 mem->umem = umem; 225 mem->access = access; 226 mem->length = length; 227 mem->iova = iova; 228 mem->va = start; 229 mem->offset = ib_umem_offset(umem); 230 mem->state = RXE_MEM_STATE_VALID; 231 mem->type = RXE_MEM_TYPE_MR; 232 233 return 0; 234 235 err1: 236 return err; 237 } 238 239 int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, 240 int max_pages, struct rxe_mem *mem) 241 { 242 int err; 243 244 rxe_mem_init(0, mem); 245 246 /* In fastreg, we also set the rkey */ 247 mem->ibmr.rkey = mem->ibmr.lkey; 248 249 err = rxe_mem_alloc(rxe, mem, max_pages); 250 if (err) 251 goto err1; 252 253 mem->pd = pd; 254 mem->max_buf = max_pages; 255 mem->state = RXE_MEM_STATE_FREE; 256 mem->type = RXE_MEM_TYPE_MR; 257 258 return 0; 259 260 err1: 261 return err; 262 } 263 264 static void lookup_iova( 265 struct rxe_mem *mem, 266 u64 iova, 267 int *m_out, 268 int *n_out, 269 size_t *offset_out) 270 { 271 size_t offset = iova - mem->iova + mem->offset; 272 int map_index; 273 int buf_index; 274 u64 length; 275 276 if (likely(mem->page_shift)) { 277 *offset_out = offset & mem->page_mask; 278 offset >>= mem->page_shift; 279 *n_out = offset & mem->map_mask; 280 *m_out = offset >> mem->map_shift; 281 } else { 282 map_index = 0; 283 buf_index = 0; 284 285 length = mem->map[map_index]->buf[buf_index].size; 286 287 while (offset >= length) { 288 offset -= length; 289 buf_index++; 290 291 if (buf_index == RXE_BUF_PER_MAP) { 292 map_index++; 293 buf_index = 0; 294 } 295 length = mem->map[map_index]->buf[buf_index].size; 296 } 297 298 *m_out = map_index; 299 *n_out = buf_index; 300 *offset_out = offset; 301 } 302 } 303 304 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 305 { 306 size_t offset; 307 int m, n; 308 void *addr; 309 310 if (mem->state != RXE_MEM_STATE_VALID) { 311 pr_warn("mem not in valid state\n"); 312 addr = NULL; 313 goto out; 314 } 315 316 if (!mem->map) { 317 addr = (void *)(uintptr_t)iova; 318 goto out; 319 } 320 321 if (mem_check_range(mem, iova, length)) { 322 pr_warn("range violation\n"); 323 addr = NULL; 324 goto out; 325 } 326 327 lookup_iova(mem, iova, &m, &n, &offset); 328 329 if (offset + length > mem->map[m]->buf[n].size) { 330 pr_warn("crosses page boundary\n"); 331 addr = NULL; 332 goto out; 333 } 334 335 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 336 337 out: 338 return addr; 339 } 340 341 /* copy data from a range (vaddr, vaddr+length-1) to or from 342 * a mem object starting at iova. Compute incremental value of 343 * crc32 if crcp is not zero. caller must hold a reference to mem 344 */ 345 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 346 enum copy_direction dir, u32 *crcp) 347 { 348 int err; 349 int bytes; 350 u8 *va; 351 struct rxe_map **map; 352 struct rxe_phys_buf *buf; 353 int m; 354 int i; 355 size_t offset; 356 u32 crc = crcp ? (*crcp) : 0; 357 358 if (length == 0) 359 return 0; 360 361 if (mem->type == RXE_MEM_TYPE_DMA) { 362 u8 *src, *dest; 363 364 src = (dir == to_mem_obj) ? 365 addr : ((void *)(uintptr_t)iova); 366 367 dest = (dir == to_mem_obj) ? 368 ((void *)(uintptr_t)iova) : addr; 369 370 if (crcp) 371 *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), 372 *crcp, src, length); 373 374 memcpy(dest, src, length); 375 376 return 0; 377 } 378 379 WARN_ON_ONCE(!mem->map); 380 381 err = mem_check_range(mem, iova, length); 382 if (err) { 383 err = -EFAULT; 384 goto err1; 385 } 386 387 lookup_iova(mem, iova, &m, &i, &offset); 388 389 map = mem->map + m; 390 buf = map[0]->buf + i; 391 392 while (length > 0) { 393 u8 *src, *dest; 394 395 va = (u8 *)(uintptr_t)buf->addr + offset; 396 src = (dir == to_mem_obj) ? addr : va; 397 dest = (dir == to_mem_obj) ? va : addr; 398 399 bytes = buf->size - offset; 400 401 if (bytes > length) 402 bytes = length; 403 404 if (crcp) 405 crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), 406 crc, src, bytes); 407 408 memcpy(dest, src, bytes); 409 410 length -= bytes; 411 addr += bytes; 412 413 offset = 0; 414 buf++; 415 i++; 416 417 if (i == RXE_BUF_PER_MAP) { 418 i = 0; 419 map++; 420 buf = map[0]->buf; 421 } 422 } 423 424 if (crcp) 425 *crcp = crc; 426 427 return 0; 428 429 err1: 430 return err; 431 } 432 433 /* copy data in or out of a wqe, i.e. sg list 434 * under the control of a dma descriptor 435 */ 436 int copy_data( 437 struct rxe_dev *rxe, 438 struct rxe_pd *pd, 439 int access, 440 struct rxe_dma_info *dma, 441 void *addr, 442 int length, 443 enum copy_direction dir, 444 u32 *crcp) 445 { 446 int bytes; 447 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 448 int offset = dma->sge_offset; 449 int resid = dma->resid; 450 struct rxe_mem *mem = NULL; 451 u64 iova; 452 int err; 453 454 if (length == 0) 455 return 0; 456 457 if (length > resid) { 458 err = -EINVAL; 459 goto err2; 460 } 461 462 if (sge->length && (offset < sge->length)) { 463 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 464 if (!mem) { 465 err = -EINVAL; 466 goto err1; 467 } 468 } 469 470 while (length > 0) { 471 bytes = length; 472 473 if (offset >= sge->length) { 474 if (mem) { 475 rxe_drop_ref(mem); 476 mem = NULL; 477 } 478 sge++; 479 dma->cur_sge++; 480 offset = 0; 481 482 if (dma->cur_sge >= dma->num_sge) { 483 err = -ENOSPC; 484 goto err2; 485 } 486 487 if (sge->length) { 488 mem = lookup_mem(pd, access, sge->lkey, 489 lookup_local); 490 if (!mem) { 491 err = -EINVAL; 492 goto err1; 493 } 494 } else { 495 continue; 496 } 497 } 498 499 if (bytes > sge->length - offset) 500 bytes = sge->length - offset; 501 502 if (bytes > 0) { 503 iova = sge->addr + offset; 504 505 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 506 if (err) 507 goto err2; 508 509 offset += bytes; 510 resid -= bytes; 511 length -= bytes; 512 addr += bytes; 513 } 514 } 515 516 dma->sge_offset = offset; 517 dma->resid = resid; 518 519 if (mem) 520 rxe_drop_ref(mem); 521 522 return 0; 523 524 err2: 525 if (mem) 526 rxe_drop_ref(mem); 527 err1: 528 return err; 529 } 530 531 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 532 { 533 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 534 int offset = dma->sge_offset; 535 int resid = dma->resid; 536 537 while (length) { 538 unsigned int bytes; 539 540 if (offset >= sge->length) { 541 sge++; 542 dma->cur_sge++; 543 offset = 0; 544 if (dma->cur_sge >= dma->num_sge) 545 return -ENOSPC; 546 } 547 548 bytes = length; 549 550 if (bytes > sge->length - offset) 551 bytes = sge->length - offset; 552 553 offset += bytes; 554 resid -= bytes; 555 length -= bytes; 556 } 557 558 dma->sge_offset = offset; 559 dma->resid = resid; 560 561 return 0; 562 } 563 564 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 565 * depending on lookup_type 566 * (2) verify that the (qp) pd matches the mem pd 567 * (3) verify that the mem can support the requested access 568 * (4) verify that mem state is valid 569 */ 570 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 571 enum lookup_type type) 572 { 573 struct rxe_mem *mem; 574 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 575 int index = key >> 8; 576 577 if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 578 mem = rxe_pool_get_index(&rxe->mr_pool, index); 579 if (!mem) 580 goto err1; 581 } else { 582 goto err1; 583 } 584 585 if ((type == lookup_local && mem->lkey != key) || 586 (type == lookup_remote && mem->rkey != key)) 587 goto err2; 588 589 if (mem->pd != pd) 590 goto err2; 591 592 if (access && !(access & mem->access)) 593 goto err2; 594 595 if (mem->state != RXE_MEM_STATE_VALID) 596 goto err2; 597 598 return mem; 599 600 err2: 601 rxe_drop_ref(mem); 602 err1: 603 return NULL; 604 } 605 606 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 607 u64 *page, int num_pages, u64 iova) 608 { 609 int i; 610 int num_buf; 611 int err; 612 struct rxe_map **map; 613 struct rxe_phys_buf *buf; 614 int page_size; 615 616 if (num_pages > mem->max_buf) { 617 err = -EINVAL; 618 goto err1; 619 } 620 621 num_buf = 0; 622 page_size = 1 << mem->page_shift; 623 map = mem->map; 624 buf = map[0]->buf; 625 626 for (i = 0; i < num_pages; i++) { 627 buf->addr = *page++; 628 buf->size = page_size; 629 buf++; 630 num_buf++; 631 632 if (num_buf == RXE_BUF_PER_MAP) { 633 map++; 634 buf = map[0]->buf; 635 num_buf = 0; 636 } 637 } 638 639 mem->iova = iova; 640 mem->va = iova; 641 mem->length = num_pages << mem->page_shift; 642 mem->state = RXE_MEM_STATE_VALID; 643 644 return 0; 645 646 err1: 647 return err; 648 } 649