1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 if (iova < mem->iova || 63 length > mem->length || 64 iova > mem->iova + mem->length - length) 65 return -EFAULT; 66 return 0; 67 68 default: 69 return -EFAULT; 70 } 71 } 72 73 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 74 | IB_ACCESS_REMOTE_WRITE \ 75 | IB_ACCESS_REMOTE_ATOMIC) 76 77 static void rxe_mem_init(int access, struct rxe_mem *mem) 78 { 79 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 80 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 81 82 if (mem->pelem.pool->type == RXE_TYPE_MR) { 83 mem->ibmr.lkey = lkey; 84 mem->ibmr.rkey = rkey; 85 } 86 87 mem->lkey = lkey; 88 mem->rkey = rkey; 89 mem->state = RXE_MEM_STATE_INVALID; 90 mem->type = RXE_MEM_TYPE_NONE; 91 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 92 } 93 94 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 95 { 96 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 97 int i; 98 99 if (mem->umem) 100 ib_umem_release(mem->umem); 101 102 if (mem->map) { 103 for (i = 0; i < mem->num_map; i++) 104 kfree(mem->map[i]); 105 106 kfree(mem->map); 107 } 108 } 109 110 static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) 111 { 112 int i; 113 int num_map; 114 struct rxe_map **map = mem->map; 115 116 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 117 118 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 119 if (!mem->map) 120 goto err1; 121 122 for (i = 0; i < num_map; i++) { 123 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 124 if (!mem->map[i]) 125 goto err2; 126 } 127 128 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 129 130 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 131 mem->map_mask = RXE_BUF_PER_MAP - 1; 132 133 mem->num_buf = num_buf; 134 mem->num_map = num_map; 135 mem->max_buf = num_map * RXE_BUF_PER_MAP; 136 137 return 0; 138 139 err2: 140 for (i--; i >= 0; i--) 141 kfree(mem->map[i]); 142 143 kfree(mem->map); 144 err1: 145 return -ENOMEM; 146 } 147 148 int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, 149 int access, struct rxe_mem *mem) 150 { 151 rxe_mem_init(access, mem); 152 153 mem->pd = pd; 154 mem->access = access; 155 mem->state = RXE_MEM_STATE_VALID; 156 mem->type = RXE_MEM_TYPE_DMA; 157 158 return 0; 159 } 160 161 int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, 162 u64 length, u64 iova, int access, struct ib_udata *udata, 163 struct rxe_mem *mem) 164 { 165 int entry; 166 struct rxe_map **map; 167 struct rxe_phys_buf *buf = NULL; 168 struct ib_umem *umem; 169 struct scatterlist *sg; 170 int num_buf; 171 void *vaddr; 172 int err; 173 174 umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); 175 if (IS_ERR(umem)) { 176 pr_warn("err %d from rxe_umem_get\n", 177 (int)PTR_ERR(umem)); 178 err = -EINVAL; 179 goto err1; 180 } 181 182 mem->umem = umem; 183 num_buf = umem->nmap; 184 185 rxe_mem_init(access, mem); 186 187 err = rxe_mem_alloc(rxe, mem, num_buf); 188 if (err) { 189 pr_warn("err %d from rxe_mem_alloc\n", err); 190 ib_umem_release(umem); 191 goto err1; 192 } 193 194 WARN_ON_ONCE(!is_power_of_2(umem->page_size)); 195 196 mem->page_shift = ilog2(umem->page_size); 197 mem->page_mask = umem->page_size - 1; 198 199 num_buf = 0; 200 map = mem->map; 201 if (length > 0) { 202 buf = map[0]->buf; 203 204 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 205 vaddr = page_address(sg_page(sg)); 206 if (!vaddr) { 207 pr_warn("null vaddr\n"); 208 err = -ENOMEM; 209 goto err1; 210 } 211 212 buf->addr = (uintptr_t)vaddr; 213 buf->size = umem->page_size; 214 num_buf++; 215 buf++; 216 217 if (num_buf >= RXE_BUF_PER_MAP) { 218 map++; 219 buf = map[0]->buf; 220 num_buf = 0; 221 } 222 } 223 } 224 225 mem->pd = pd; 226 mem->umem = umem; 227 mem->access = access; 228 mem->length = length; 229 mem->iova = iova; 230 mem->va = start; 231 mem->offset = ib_umem_offset(umem); 232 mem->state = RXE_MEM_STATE_VALID; 233 mem->type = RXE_MEM_TYPE_MR; 234 235 return 0; 236 237 err1: 238 return err; 239 } 240 241 int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, 242 int max_pages, struct rxe_mem *mem) 243 { 244 int err; 245 246 rxe_mem_init(0, mem); 247 248 /* In fastreg, we also set the rkey */ 249 mem->ibmr.rkey = mem->ibmr.lkey; 250 251 err = rxe_mem_alloc(rxe, mem, max_pages); 252 if (err) 253 goto err1; 254 255 mem->pd = pd; 256 mem->max_buf = max_pages; 257 mem->state = RXE_MEM_STATE_FREE; 258 mem->type = RXE_MEM_TYPE_MR; 259 260 return 0; 261 262 err1: 263 return err; 264 } 265 266 static void lookup_iova( 267 struct rxe_mem *mem, 268 u64 iova, 269 int *m_out, 270 int *n_out, 271 size_t *offset_out) 272 { 273 size_t offset = iova - mem->iova + mem->offset; 274 int map_index; 275 int buf_index; 276 u64 length; 277 278 if (likely(mem->page_shift)) { 279 *offset_out = offset & mem->page_mask; 280 offset >>= mem->page_shift; 281 *n_out = offset & mem->map_mask; 282 *m_out = offset >> mem->map_shift; 283 } else { 284 map_index = 0; 285 buf_index = 0; 286 287 length = mem->map[map_index]->buf[buf_index].size; 288 289 while (offset >= length) { 290 offset -= length; 291 buf_index++; 292 293 if (buf_index == RXE_BUF_PER_MAP) { 294 map_index++; 295 buf_index = 0; 296 } 297 length = mem->map[map_index]->buf[buf_index].size; 298 } 299 300 *m_out = map_index; 301 *n_out = buf_index; 302 *offset_out = offset; 303 } 304 } 305 306 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 307 { 308 size_t offset; 309 int m, n; 310 void *addr; 311 312 if (mem->state != RXE_MEM_STATE_VALID) { 313 pr_warn("mem not in valid state\n"); 314 addr = NULL; 315 goto out; 316 } 317 318 if (!mem->map) { 319 addr = (void *)(uintptr_t)iova; 320 goto out; 321 } 322 323 if (mem_check_range(mem, iova, length)) { 324 pr_warn("range violation\n"); 325 addr = NULL; 326 goto out; 327 } 328 329 lookup_iova(mem, iova, &m, &n, &offset); 330 331 if (offset + length > mem->map[m]->buf[n].size) { 332 pr_warn("crosses page boundary\n"); 333 addr = NULL; 334 goto out; 335 } 336 337 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 338 339 out: 340 return addr; 341 } 342 343 /* copy data from a range (vaddr, vaddr+length-1) to or from 344 * a mem object starting at iova. Compute incremental value of 345 * crc32 if crcp is not zero. caller must hold a reference to mem 346 */ 347 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 348 enum copy_direction dir, u32 *crcp) 349 { 350 int err; 351 int bytes; 352 u8 *va; 353 struct rxe_map **map; 354 struct rxe_phys_buf *buf; 355 int m; 356 int i; 357 size_t offset; 358 u32 crc = crcp ? (*crcp) : 0; 359 360 if (length == 0) 361 return 0; 362 363 if (mem->type == RXE_MEM_TYPE_DMA) { 364 u8 *src, *dest; 365 366 src = (dir == to_mem_obj) ? 367 addr : ((void *)(uintptr_t)iova); 368 369 dest = (dir == to_mem_obj) ? 370 ((void *)(uintptr_t)iova) : addr; 371 372 if (crcp) 373 *crcp = crc32_le(*crcp, src, length); 374 375 memcpy(dest, src, length); 376 377 return 0; 378 } 379 380 WARN_ON_ONCE(!mem->map); 381 382 err = mem_check_range(mem, iova, length); 383 if (err) { 384 err = -EFAULT; 385 goto err1; 386 } 387 388 lookup_iova(mem, iova, &m, &i, &offset); 389 390 map = mem->map + m; 391 buf = map[0]->buf + i; 392 393 while (length > 0) { 394 u8 *src, *dest; 395 396 va = (u8 *)(uintptr_t)buf->addr + offset; 397 src = (dir == to_mem_obj) ? addr : va; 398 dest = (dir == to_mem_obj) ? va : addr; 399 400 bytes = buf->size - offset; 401 402 if (bytes > length) 403 bytes = length; 404 405 if (crcp) 406 crc = crc32_le(crc, src, bytes); 407 408 memcpy(dest, src, bytes); 409 410 length -= bytes; 411 addr += bytes; 412 413 offset = 0; 414 buf++; 415 i++; 416 417 if (i == RXE_BUF_PER_MAP) { 418 i = 0; 419 map++; 420 buf = map[0]->buf; 421 } 422 } 423 424 if (crcp) 425 *crcp = crc; 426 427 return 0; 428 429 err1: 430 return err; 431 } 432 433 /* copy data in or out of a wqe, i.e. sg list 434 * under the control of a dma descriptor 435 */ 436 int copy_data( 437 struct rxe_dev *rxe, 438 struct rxe_pd *pd, 439 int access, 440 struct rxe_dma_info *dma, 441 void *addr, 442 int length, 443 enum copy_direction dir, 444 u32 *crcp) 445 { 446 int bytes; 447 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 448 int offset = dma->sge_offset; 449 int resid = dma->resid; 450 struct rxe_mem *mem = NULL; 451 u64 iova; 452 int err; 453 454 if (length == 0) 455 return 0; 456 457 if (length > resid) { 458 err = -EINVAL; 459 goto err2; 460 } 461 462 if (sge->length && (offset < sge->length)) { 463 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 464 if (!mem) { 465 err = -EINVAL; 466 goto err1; 467 } 468 } 469 470 while (length > 0) { 471 bytes = length; 472 473 if (offset >= sge->length) { 474 if (mem) { 475 rxe_drop_ref(mem); 476 mem = NULL; 477 } 478 sge++; 479 dma->cur_sge++; 480 offset = 0; 481 482 if (dma->cur_sge >= dma->num_sge) { 483 err = -ENOSPC; 484 goto err2; 485 } 486 487 if (sge->length) { 488 mem = lookup_mem(pd, access, sge->lkey, 489 lookup_local); 490 if (!mem) { 491 err = -EINVAL; 492 goto err1; 493 } 494 } else { 495 continue; 496 } 497 } 498 499 if (bytes > sge->length - offset) 500 bytes = sge->length - offset; 501 502 if (bytes > 0) { 503 iova = sge->addr + offset; 504 505 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 506 if (err) 507 goto err2; 508 509 offset += bytes; 510 resid -= bytes; 511 length -= bytes; 512 addr += bytes; 513 } 514 } 515 516 dma->sge_offset = offset; 517 dma->resid = resid; 518 519 if (mem) 520 rxe_drop_ref(mem); 521 522 return 0; 523 524 err2: 525 if (mem) 526 rxe_drop_ref(mem); 527 err1: 528 return err; 529 } 530 531 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 532 { 533 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 534 int offset = dma->sge_offset; 535 int resid = dma->resid; 536 537 while (length) { 538 unsigned int bytes; 539 540 if (offset >= sge->length) { 541 sge++; 542 dma->cur_sge++; 543 offset = 0; 544 if (dma->cur_sge >= dma->num_sge) 545 return -ENOSPC; 546 } 547 548 bytes = length; 549 550 if (bytes > sge->length - offset) 551 bytes = sge->length - offset; 552 553 offset += bytes; 554 resid -= bytes; 555 length -= bytes; 556 } 557 558 dma->sge_offset = offset; 559 dma->resid = resid; 560 561 return 0; 562 } 563 564 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 565 * depending on lookup_type 566 * (2) verify that the (qp) pd matches the mem pd 567 * (3) verify that the mem can support the requested access 568 * (4) verify that mem state is valid 569 */ 570 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 571 enum lookup_type type) 572 { 573 struct rxe_mem *mem; 574 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 575 int index = key >> 8; 576 577 if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 578 mem = rxe_pool_get_index(&rxe->mr_pool, index); 579 if (!mem) 580 goto err1; 581 } else { 582 goto err1; 583 } 584 585 if ((type == lookup_local && mem->lkey != key) || 586 (type == lookup_remote && mem->rkey != key)) 587 goto err2; 588 589 if (mem->pd != pd) 590 goto err2; 591 592 if (access && !(access & mem->access)) 593 goto err2; 594 595 if (mem->state != RXE_MEM_STATE_VALID) 596 goto err2; 597 598 return mem; 599 600 err2: 601 rxe_drop_ref(mem); 602 err1: 603 return NULL; 604 } 605 606 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 607 u64 *page, int num_pages, u64 iova) 608 { 609 int i; 610 int num_buf; 611 int err; 612 struct rxe_map **map; 613 struct rxe_phys_buf *buf; 614 int page_size; 615 616 if (num_pages > mem->max_buf) { 617 err = -EINVAL; 618 goto err1; 619 } 620 621 num_buf = 0; 622 page_size = 1 << mem->page_shift; 623 map = mem->map; 624 buf = map[0]->buf; 625 626 for (i = 0; i < num_pages; i++) { 627 buf->addr = *page++; 628 buf->size = page_size; 629 buf++; 630 num_buf++; 631 632 if (num_buf == RXE_BUF_PER_MAP) { 633 map++; 634 buf = map[0]->buf; 635 num_buf = 0; 636 } 637 } 638 639 mem->iova = iova; 640 mem->va = iova; 641 mem->length = num_pages << mem->page_shift; 642 mem->state = RXE_MEM_STATE_VALID; 643 644 return 0; 645 646 err1: 647 return err; 648 } 649