1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static unsigned key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 return ((iova < mem->iova) || 63 ((iova + length) > (mem->iova + mem->length))) ? 64 -EFAULT : 0; 65 66 default: 67 return -EFAULT; 68 } 69 } 70 71 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 72 | IB_ACCESS_REMOTE_WRITE \ 73 | IB_ACCESS_REMOTE_ATOMIC) 74 75 static void rxe_mem_init(int access, struct rxe_mem *mem) 76 { 77 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 78 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 79 80 if (mem->pelem.pool->type == RXE_TYPE_MR) { 81 mem->ibmr.lkey = lkey; 82 mem->ibmr.rkey = rkey; 83 } 84 85 mem->lkey = lkey; 86 mem->rkey = rkey; 87 mem->state = RXE_MEM_STATE_INVALID; 88 mem->type = RXE_MEM_TYPE_NONE; 89 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 90 } 91 92 void rxe_mem_cleanup(void *arg) 93 { 94 struct rxe_mem *mem = arg; 95 int i; 96 97 if (mem->umem) 98 ib_umem_release(mem->umem); 99 100 if (mem->map) { 101 for (i = 0; i < mem->num_map; i++) 102 kfree(mem->map[i]); 103 104 kfree(mem->map); 105 } 106 } 107 108 static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) 109 { 110 int i; 111 int num_map; 112 struct rxe_map **map = mem->map; 113 114 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 115 116 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 117 if (!mem->map) 118 goto err1; 119 120 for (i = 0; i < num_map; i++) { 121 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 122 if (!mem->map[i]) 123 goto err2; 124 } 125 126 WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 127 128 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 129 mem->map_mask = RXE_BUF_PER_MAP - 1; 130 131 mem->num_buf = num_buf; 132 mem->num_map = num_map; 133 mem->max_buf = num_map * RXE_BUF_PER_MAP; 134 135 return 0; 136 137 err2: 138 for (i--; i >= 0; i--) 139 kfree(mem->map[i]); 140 141 kfree(mem->map); 142 err1: 143 return -ENOMEM; 144 } 145 146 int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, 147 int access, struct rxe_mem *mem) 148 { 149 rxe_mem_init(access, mem); 150 151 mem->pd = pd; 152 mem->access = access; 153 mem->state = RXE_MEM_STATE_VALID; 154 mem->type = RXE_MEM_TYPE_DMA; 155 156 return 0; 157 } 158 159 int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, 160 u64 length, u64 iova, int access, struct ib_udata *udata, 161 struct rxe_mem *mem) 162 { 163 int entry; 164 struct rxe_map **map; 165 struct rxe_phys_buf *buf = NULL; 166 struct ib_umem *umem; 167 struct scatterlist *sg; 168 int num_buf; 169 void *vaddr; 170 int err; 171 172 umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); 173 if (IS_ERR(umem)) { 174 pr_warn("err %d from rxe_umem_get\n", 175 (int)PTR_ERR(umem)); 176 err = -EINVAL; 177 goto err1; 178 } 179 180 mem->umem = umem; 181 num_buf = umem->nmap; 182 183 rxe_mem_init(access, mem); 184 185 err = rxe_mem_alloc(rxe, mem, num_buf); 186 if (err) { 187 pr_warn("err %d from rxe_mem_alloc\n", err); 188 ib_umem_release(umem); 189 goto err1; 190 } 191 192 WARN_ON(!is_power_of_2(umem->page_size)); 193 194 mem->page_shift = ilog2(umem->page_size); 195 mem->page_mask = umem->page_size - 1; 196 197 num_buf = 0; 198 map = mem->map; 199 if (length > 0) { 200 buf = map[0]->buf; 201 202 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 203 vaddr = page_address(sg_page(sg)); 204 if (!vaddr) { 205 pr_warn("null vaddr\n"); 206 err = -ENOMEM; 207 goto err1; 208 } 209 210 buf->addr = (uintptr_t)vaddr; 211 buf->size = umem->page_size; 212 num_buf++; 213 buf++; 214 215 if (num_buf >= RXE_BUF_PER_MAP) { 216 map++; 217 buf = map[0]->buf; 218 num_buf = 0; 219 } 220 } 221 } 222 223 mem->pd = pd; 224 mem->umem = umem; 225 mem->access = access; 226 mem->length = length; 227 mem->iova = iova; 228 mem->va = start; 229 mem->offset = ib_umem_offset(umem); 230 mem->state = RXE_MEM_STATE_VALID; 231 mem->type = RXE_MEM_TYPE_MR; 232 233 return 0; 234 235 err1: 236 return err; 237 } 238 239 int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, 240 int max_pages, struct rxe_mem *mem) 241 { 242 int err; 243 244 rxe_mem_init(0, mem); 245 246 /* In fastreg, we also set the rkey */ 247 mem->ibmr.rkey = mem->ibmr.lkey; 248 249 err = rxe_mem_alloc(rxe, mem, max_pages); 250 if (err) 251 goto err1; 252 253 mem->pd = pd; 254 mem->max_buf = max_pages; 255 mem->state = RXE_MEM_STATE_FREE; 256 mem->type = RXE_MEM_TYPE_MR; 257 258 return 0; 259 260 err1: 261 return err; 262 } 263 264 static void lookup_iova( 265 struct rxe_mem *mem, 266 u64 iova, 267 int *m_out, 268 int *n_out, 269 size_t *offset_out) 270 { 271 size_t offset = iova - mem->iova + mem->offset; 272 int map_index; 273 int buf_index; 274 u64 length; 275 276 if (likely(mem->page_shift)) { 277 *offset_out = offset & mem->page_mask; 278 offset >>= mem->page_shift; 279 *n_out = offset & mem->map_mask; 280 *m_out = offset >> mem->map_shift; 281 } else { 282 map_index = 0; 283 buf_index = 0; 284 285 length = mem->map[map_index]->buf[buf_index].size; 286 287 while (offset >= length) { 288 offset -= length; 289 buf_index++; 290 291 if (buf_index == RXE_BUF_PER_MAP) { 292 map_index++; 293 buf_index = 0; 294 } 295 length = mem->map[map_index]->buf[buf_index].size; 296 } 297 298 *m_out = map_index; 299 *n_out = buf_index; 300 *offset_out = offset; 301 } 302 } 303 304 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 305 { 306 size_t offset; 307 int m, n; 308 void *addr; 309 310 if (mem->state != RXE_MEM_STATE_VALID) { 311 pr_warn("mem not in valid state\n"); 312 addr = NULL; 313 goto out; 314 } 315 316 if (!mem->map) { 317 addr = (void *)(uintptr_t)iova; 318 goto out; 319 } 320 321 if (mem_check_range(mem, iova, length)) { 322 pr_warn("range violation\n"); 323 addr = NULL; 324 goto out; 325 } 326 327 lookup_iova(mem, iova, &m, &n, &offset); 328 329 if (offset + length > mem->map[m]->buf[n].size) { 330 pr_warn("crosses page boundary\n"); 331 addr = NULL; 332 goto out; 333 } 334 335 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 336 337 out: 338 return addr; 339 } 340 341 /* copy data from a range (vaddr, vaddr+length-1) to or from 342 * a mem object starting at iova. Compute incremental value of 343 * crc32 if crcp is not zero. caller must hold a reference to mem 344 */ 345 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 346 enum copy_direction dir, u32 *crcp) 347 { 348 int err; 349 int bytes; 350 u8 *va; 351 struct rxe_map **map; 352 struct rxe_phys_buf *buf; 353 int m; 354 int i; 355 size_t offset; 356 u32 crc = crcp ? (*crcp) : 0; 357 358 if (mem->type == RXE_MEM_TYPE_DMA) { 359 u8 *src, *dest; 360 361 src = (dir == to_mem_obj) ? 362 addr : ((void *)(uintptr_t)iova); 363 364 dest = (dir == to_mem_obj) ? 365 ((void *)(uintptr_t)iova) : addr; 366 367 if (crcp) 368 *crcp = crc32_le(*crcp, src, length); 369 370 memcpy(dest, src, length); 371 372 return 0; 373 } 374 375 WARN_ON(!mem->map); 376 377 err = mem_check_range(mem, iova, length); 378 if (err) { 379 err = -EFAULT; 380 goto err1; 381 } 382 383 lookup_iova(mem, iova, &m, &i, &offset); 384 385 map = mem->map + m; 386 buf = map[0]->buf + i; 387 388 while (length > 0) { 389 u8 *src, *dest; 390 391 va = (u8 *)(uintptr_t)buf->addr + offset; 392 src = (dir == to_mem_obj) ? addr : va; 393 dest = (dir == to_mem_obj) ? va : addr; 394 395 bytes = buf->size - offset; 396 397 if (bytes > length) 398 bytes = length; 399 400 if (crcp) 401 crc = crc32_le(crc, src, bytes); 402 403 memcpy(dest, src, bytes); 404 405 length -= bytes; 406 addr += bytes; 407 408 offset = 0; 409 buf++; 410 i++; 411 412 if (i == RXE_BUF_PER_MAP) { 413 i = 0; 414 map++; 415 buf = map[0]->buf; 416 } 417 } 418 419 if (crcp) 420 *crcp = crc; 421 422 return 0; 423 424 err1: 425 return err; 426 } 427 428 /* copy data in or out of a wqe, i.e. sg list 429 * under the control of a dma descriptor 430 */ 431 int copy_data( 432 struct rxe_dev *rxe, 433 struct rxe_pd *pd, 434 int access, 435 struct rxe_dma_info *dma, 436 void *addr, 437 int length, 438 enum copy_direction dir, 439 u32 *crcp) 440 { 441 int bytes; 442 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 443 int offset = dma->sge_offset; 444 int resid = dma->resid; 445 struct rxe_mem *mem = NULL; 446 u64 iova; 447 int err; 448 449 if (length == 0) 450 return 0; 451 452 if (length > resid) { 453 err = -EINVAL; 454 goto err2; 455 } 456 457 if (sge->length && (offset < sge->length)) { 458 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 459 if (!mem) { 460 err = -EINVAL; 461 goto err1; 462 } 463 } 464 465 while (length > 0) { 466 bytes = length; 467 468 if (offset >= sge->length) { 469 if (mem) { 470 rxe_drop_ref(mem); 471 mem = NULL; 472 } 473 sge++; 474 dma->cur_sge++; 475 offset = 0; 476 477 if (dma->cur_sge >= dma->num_sge) { 478 err = -ENOSPC; 479 goto err2; 480 } 481 482 if (sge->length) { 483 mem = lookup_mem(pd, access, sge->lkey, 484 lookup_local); 485 if (!mem) { 486 err = -EINVAL; 487 goto err1; 488 } 489 } else { 490 continue; 491 } 492 } 493 494 if (bytes > sge->length - offset) 495 bytes = sge->length - offset; 496 497 if (bytes > 0) { 498 iova = sge->addr + offset; 499 500 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 501 if (err) 502 goto err2; 503 504 offset += bytes; 505 resid -= bytes; 506 length -= bytes; 507 addr += bytes; 508 } 509 } 510 511 dma->sge_offset = offset; 512 dma->resid = resid; 513 514 if (mem) 515 rxe_drop_ref(mem); 516 517 return 0; 518 519 err2: 520 if (mem) 521 rxe_drop_ref(mem); 522 err1: 523 return err; 524 } 525 526 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 527 { 528 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 529 int offset = dma->sge_offset; 530 int resid = dma->resid; 531 532 while (length) { 533 unsigned int bytes; 534 535 if (offset >= sge->length) { 536 sge++; 537 dma->cur_sge++; 538 offset = 0; 539 if (dma->cur_sge >= dma->num_sge) 540 return -ENOSPC; 541 } 542 543 bytes = length; 544 545 if (bytes > sge->length - offset) 546 bytes = sge->length - offset; 547 548 offset += bytes; 549 resid -= bytes; 550 length -= bytes; 551 } 552 553 dma->sge_offset = offset; 554 dma->resid = resid; 555 556 return 0; 557 } 558 559 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 560 * depending on lookup_type 561 * (2) verify that the (qp) pd matches the mem pd 562 * (3) verify that the mem can support the requested access 563 * (4) verify that mem state is valid 564 */ 565 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 566 enum lookup_type type) 567 { 568 struct rxe_mem *mem; 569 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 570 int index = key >> 8; 571 572 if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 573 mem = rxe_pool_get_index(&rxe->mr_pool, index); 574 if (!mem) 575 goto err1; 576 } else { 577 goto err1; 578 } 579 580 if ((type == lookup_local && mem->lkey != key) || 581 (type == lookup_remote && mem->rkey != key)) 582 goto err2; 583 584 if (mem->pd != pd) 585 goto err2; 586 587 if (access && !(access & mem->access)) 588 goto err2; 589 590 if (mem->state != RXE_MEM_STATE_VALID) 591 goto err2; 592 593 return mem; 594 595 err2: 596 rxe_drop_ref(mem); 597 err1: 598 return NULL; 599 } 600 601 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 602 u64 *page, int num_pages, u64 iova) 603 { 604 int i; 605 int num_buf; 606 int err; 607 struct rxe_map **map; 608 struct rxe_phys_buf *buf; 609 int page_size; 610 611 if (num_pages > mem->max_buf) { 612 err = -EINVAL; 613 goto err1; 614 } 615 616 num_buf = 0; 617 page_size = 1 << mem->page_shift; 618 map = mem->map; 619 buf = map[0]->buf; 620 621 for (i = 0; i < num_pages; i++) { 622 buf->addr = *page++; 623 buf->size = page_size; 624 buf++; 625 num_buf++; 626 627 if (num_buf == RXE_BUF_PER_MAP) { 628 map++; 629 buf = map[0]->buf; 630 num_buf = 0; 631 } 632 } 633 634 mem->iova = iova; 635 mem->va = iova; 636 mem->length = num_pages << mem->page_shift; 637 mem->state = RXE_MEM_STATE_VALID; 638 639 return 0; 640 641 err1: 642 return err; 643 } 644