1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 return ((iova < mem->iova) || 63 ((iova + length) > (mem->iova + mem->length))) ? 64 -EFAULT : 0; 65 66 default: 67 return -EFAULT; 68 } 69 } 70 71 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 72 | IB_ACCESS_REMOTE_WRITE \ 73 | IB_ACCESS_REMOTE_ATOMIC) 74 75 static void rxe_mem_init(int access, struct rxe_mem *mem) 76 { 77 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 78 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 79 80 if (mem->pelem.pool->type == RXE_TYPE_MR) { 81 mem->ibmr.lkey = lkey; 82 mem->ibmr.rkey = rkey; 83 } 84 85 mem->lkey = lkey; 86 mem->rkey = rkey; 87 mem->state = RXE_MEM_STATE_INVALID; 88 mem->type = RXE_MEM_TYPE_NONE; 89 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 90 } 91 92 void rxe_mem_cleanup(void *arg) 93 { 94 struct rxe_mem *mem = arg; 95 int i; 96 97 if (mem->umem) 98 ib_umem_release(mem->umem); 99 100 if (mem->map) { 101 for (i = 0; i < mem->num_map; i++) 102 kfree(mem->map[i]); 103 104 kfree(mem->map); 105 } 106 } 107 108 static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) 109 { 110 int i; 111 int num_map; 112 struct rxe_map **map = mem->map; 113 114 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 115 116 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 117 if (!mem->map) 118 goto err1; 119 120 for (i = 0; i < num_map; i++) { 121 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 122 if (!mem->map[i]) 123 goto err2; 124 } 125 126 WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 127 128 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 129 mem->map_mask = RXE_BUF_PER_MAP - 1; 130 131 mem->num_buf = num_buf; 132 mem->num_map = num_map; 133 mem->max_buf = num_map * RXE_BUF_PER_MAP; 134 135 return 0; 136 137 err2: 138 for (i--; i >= 0; i--) 139 kfree(mem->map[i]); 140 141 kfree(mem->map); 142 err1: 143 return -ENOMEM; 144 } 145 146 int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, 147 int access, struct rxe_mem *mem) 148 { 149 rxe_mem_init(access, mem); 150 151 mem->pd = pd; 152 mem->access = access; 153 mem->state = RXE_MEM_STATE_VALID; 154 mem->type = RXE_MEM_TYPE_DMA; 155 156 return 0; 157 } 158 159 int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, 160 u64 length, u64 iova, int access, struct ib_udata *udata, 161 struct rxe_mem *mem) 162 { 163 int entry; 164 struct rxe_map **map; 165 struct rxe_phys_buf *buf = NULL; 166 struct ib_umem *umem; 167 struct scatterlist *sg; 168 int num_buf; 169 void *vaddr; 170 int err; 171 172 umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); 173 if (IS_ERR(umem)) { 174 pr_warn("err %d from rxe_umem_get\n", 175 (int)PTR_ERR(umem)); 176 err = -EINVAL; 177 goto err1; 178 } 179 180 mem->umem = umem; 181 num_buf = umem->nmap; 182 183 rxe_mem_init(access, mem); 184 185 err = rxe_mem_alloc(rxe, mem, num_buf); 186 if (err) { 187 pr_warn("err %d from rxe_mem_alloc\n", err); 188 ib_umem_release(umem); 189 goto err1; 190 } 191 192 WARN_ON(!is_power_of_2(umem->page_size)); 193 194 mem->page_shift = ilog2(umem->page_size); 195 mem->page_mask = umem->page_size - 1; 196 197 num_buf = 0; 198 map = mem->map; 199 if (length > 0) { 200 buf = map[0]->buf; 201 202 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 203 vaddr = page_address(sg_page(sg)); 204 if (!vaddr) { 205 pr_warn("null vaddr\n"); 206 err = -ENOMEM; 207 goto err1; 208 } 209 210 buf->addr = (uintptr_t)vaddr; 211 buf->size = umem->page_size; 212 num_buf++; 213 buf++; 214 215 if (num_buf >= RXE_BUF_PER_MAP) { 216 map++; 217 buf = map[0]->buf; 218 num_buf = 0; 219 } 220 } 221 } 222 223 mem->pd = pd; 224 mem->umem = umem; 225 mem->access = access; 226 mem->length = length; 227 mem->iova = iova; 228 mem->va = start; 229 mem->offset = ib_umem_offset(umem); 230 mem->state = RXE_MEM_STATE_VALID; 231 mem->type = RXE_MEM_TYPE_MR; 232 233 return 0; 234 235 err1: 236 return err; 237 } 238 239 int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, 240 int max_pages, struct rxe_mem *mem) 241 { 242 int err; 243 244 rxe_mem_init(0, mem); 245 246 /* In fastreg, we also set the rkey */ 247 mem->ibmr.rkey = mem->ibmr.lkey; 248 249 err = rxe_mem_alloc(rxe, mem, max_pages); 250 if (err) 251 goto err1; 252 253 mem->pd = pd; 254 mem->max_buf = max_pages; 255 mem->state = RXE_MEM_STATE_FREE; 256 mem->type = RXE_MEM_TYPE_MR; 257 258 return 0; 259 260 err1: 261 return err; 262 } 263 264 static void lookup_iova( 265 struct rxe_mem *mem, 266 u64 iova, 267 int *m_out, 268 int *n_out, 269 size_t *offset_out) 270 { 271 size_t offset = iova - mem->iova + mem->offset; 272 int map_index; 273 int buf_index; 274 u64 length; 275 276 if (likely(mem->page_shift)) { 277 *offset_out = offset & mem->page_mask; 278 offset >>= mem->page_shift; 279 *n_out = offset & mem->map_mask; 280 *m_out = offset >> mem->map_shift; 281 } else { 282 map_index = 0; 283 buf_index = 0; 284 285 length = mem->map[map_index]->buf[buf_index].size; 286 287 while (offset >= length) { 288 offset -= length; 289 buf_index++; 290 291 if (buf_index == RXE_BUF_PER_MAP) { 292 map_index++; 293 buf_index = 0; 294 } 295 length = mem->map[map_index]->buf[buf_index].size; 296 } 297 298 *m_out = map_index; 299 *n_out = buf_index; 300 *offset_out = offset; 301 } 302 } 303 304 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 305 { 306 size_t offset; 307 int m, n; 308 void *addr; 309 310 if (mem->state != RXE_MEM_STATE_VALID) { 311 pr_warn("mem not in valid state\n"); 312 addr = NULL; 313 goto out; 314 } 315 316 if (!mem->map) { 317 addr = (void *)(uintptr_t)iova; 318 goto out; 319 } 320 321 if (mem_check_range(mem, iova, length)) { 322 pr_warn("range violation\n"); 323 addr = NULL; 324 goto out; 325 } 326 327 lookup_iova(mem, iova, &m, &n, &offset); 328 329 if (offset + length > mem->map[m]->buf[n].size) { 330 pr_warn("crosses page boundary\n"); 331 addr = NULL; 332 goto out; 333 } 334 335 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 336 337 out: 338 return addr; 339 } 340 341 /* copy data from a range (vaddr, vaddr+length-1) to or from 342 * a mem object starting at iova. Compute incremental value of 343 * crc32 if crcp is not zero. caller must hold a reference to mem 344 */ 345 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 346 enum copy_direction dir, u32 *crcp) 347 { 348 int err; 349 int bytes; 350 u8 *va; 351 struct rxe_map **map; 352 struct rxe_phys_buf *buf; 353 int m; 354 int i; 355 size_t offset; 356 u32 crc = crcp ? (*crcp) : 0; 357 358 if (length == 0) 359 return 0; 360 361 if (mem->type == RXE_MEM_TYPE_DMA) { 362 u8 *src, *dest; 363 364 src = (dir == to_mem_obj) ? 365 addr : ((void *)(uintptr_t)iova); 366 367 dest = (dir == to_mem_obj) ? 368 ((void *)(uintptr_t)iova) : addr; 369 370 if (crcp) 371 *crcp = crc32_le(*crcp, src, length); 372 373 memcpy(dest, src, length); 374 375 return 0; 376 } 377 378 WARN_ON(!mem->map); 379 380 err = mem_check_range(mem, iova, length); 381 if (err) { 382 err = -EFAULT; 383 goto err1; 384 } 385 386 lookup_iova(mem, iova, &m, &i, &offset); 387 388 map = mem->map + m; 389 buf = map[0]->buf + i; 390 391 while (length > 0) { 392 u8 *src, *dest; 393 394 va = (u8 *)(uintptr_t)buf->addr + offset; 395 src = (dir == to_mem_obj) ? addr : va; 396 dest = (dir == to_mem_obj) ? va : addr; 397 398 bytes = buf->size - offset; 399 400 if (bytes > length) 401 bytes = length; 402 403 if (crcp) 404 crc = crc32_le(crc, src, bytes); 405 406 memcpy(dest, src, bytes); 407 408 length -= bytes; 409 addr += bytes; 410 411 offset = 0; 412 buf++; 413 i++; 414 415 if (i == RXE_BUF_PER_MAP) { 416 i = 0; 417 map++; 418 buf = map[0]->buf; 419 } 420 } 421 422 if (crcp) 423 *crcp = crc; 424 425 return 0; 426 427 err1: 428 return err; 429 } 430 431 /* copy data in or out of a wqe, i.e. sg list 432 * under the control of a dma descriptor 433 */ 434 int copy_data( 435 struct rxe_dev *rxe, 436 struct rxe_pd *pd, 437 int access, 438 struct rxe_dma_info *dma, 439 void *addr, 440 int length, 441 enum copy_direction dir, 442 u32 *crcp) 443 { 444 int bytes; 445 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 446 int offset = dma->sge_offset; 447 int resid = dma->resid; 448 struct rxe_mem *mem = NULL; 449 u64 iova; 450 int err; 451 452 if (length == 0) 453 return 0; 454 455 if (length > resid) { 456 err = -EINVAL; 457 goto err2; 458 } 459 460 if (sge->length && (offset < sge->length)) { 461 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 462 if (!mem) { 463 err = -EINVAL; 464 goto err1; 465 } 466 } 467 468 while (length > 0) { 469 bytes = length; 470 471 if (offset >= sge->length) { 472 if (mem) { 473 rxe_drop_ref(mem); 474 mem = NULL; 475 } 476 sge++; 477 dma->cur_sge++; 478 offset = 0; 479 480 if (dma->cur_sge >= dma->num_sge) { 481 err = -ENOSPC; 482 goto err2; 483 } 484 485 if (sge->length) { 486 mem = lookup_mem(pd, access, sge->lkey, 487 lookup_local); 488 if (!mem) { 489 err = -EINVAL; 490 goto err1; 491 } 492 } else { 493 continue; 494 } 495 } 496 497 if (bytes > sge->length - offset) 498 bytes = sge->length - offset; 499 500 if (bytes > 0) { 501 iova = sge->addr + offset; 502 503 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 504 if (err) 505 goto err2; 506 507 offset += bytes; 508 resid -= bytes; 509 length -= bytes; 510 addr += bytes; 511 } 512 } 513 514 dma->sge_offset = offset; 515 dma->resid = resid; 516 517 if (mem) 518 rxe_drop_ref(mem); 519 520 return 0; 521 522 err2: 523 if (mem) 524 rxe_drop_ref(mem); 525 err1: 526 return err; 527 } 528 529 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 530 { 531 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 532 int offset = dma->sge_offset; 533 int resid = dma->resid; 534 535 while (length) { 536 unsigned int bytes; 537 538 if (offset >= sge->length) { 539 sge++; 540 dma->cur_sge++; 541 offset = 0; 542 if (dma->cur_sge >= dma->num_sge) 543 return -ENOSPC; 544 } 545 546 bytes = length; 547 548 if (bytes > sge->length - offset) 549 bytes = sge->length - offset; 550 551 offset += bytes; 552 resid -= bytes; 553 length -= bytes; 554 } 555 556 dma->sge_offset = offset; 557 dma->resid = resid; 558 559 return 0; 560 } 561 562 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 563 * depending on lookup_type 564 * (2) verify that the (qp) pd matches the mem pd 565 * (3) verify that the mem can support the requested access 566 * (4) verify that mem state is valid 567 */ 568 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 569 enum lookup_type type) 570 { 571 struct rxe_mem *mem; 572 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 573 int index = key >> 8; 574 575 if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 576 mem = rxe_pool_get_index(&rxe->mr_pool, index); 577 if (!mem) 578 goto err1; 579 } else { 580 goto err1; 581 } 582 583 if ((type == lookup_local && mem->lkey != key) || 584 (type == lookup_remote && mem->rkey != key)) 585 goto err2; 586 587 if (mem->pd != pd) 588 goto err2; 589 590 if (access && !(access & mem->access)) 591 goto err2; 592 593 if (mem->state != RXE_MEM_STATE_VALID) 594 goto err2; 595 596 return mem; 597 598 err2: 599 rxe_drop_ref(mem); 600 err1: 601 return NULL; 602 } 603 604 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 605 u64 *page, int num_pages, u64 iova) 606 { 607 int i; 608 int num_buf; 609 int err; 610 struct rxe_map **map; 611 struct rxe_phys_buf *buf; 612 int page_size; 613 614 if (num_pages > mem->max_buf) { 615 err = -EINVAL; 616 goto err1; 617 } 618 619 num_buf = 0; 620 page_size = 1 << mem->page_shift; 621 map = mem->map; 622 buf = map[0]->buf; 623 624 for (i = 0; i < num_pages; i++) { 625 buf->addr = *page++; 626 buf->size = page_size; 627 buf++; 628 num_buf++; 629 630 if (num_buf == RXE_BUF_PER_MAP) { 631 map++; 632 buf = map[0]->buf; 633 num_buf = 0; 634 } 635 } 636 637 mem->iova = iova; 638 mem->va = iova; 639 mem->length = num_pages << mem->page_shift; 640 mem->state = RXE_MEM_STATE_VALID; 641 642 return 0; 643 644 err1: 645 return err; 646 } 647