1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 if (iova < mem->iova || 63 length > mem->length || 64 iova > mem->iova + mem->length - length) 65 return -EFAULT; 66 return 0; 67 68 default: 69 return -EFAULT; 70 } 71 } 72 73 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 74 | IB_ACCESS_REMOTE_WRITE \ 75 | IB_ACCESS_REMOTE_ATOMIC) 76 77 static void rxe_mem_init(int access, struct rxe_mem *mem) 78 { 79 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 80 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 81 82 if (mem->pelem.pool->type == RXE_TYPE_MR) { 83 mem->ibmr.lkey = lkey; 84 mem->ibmr.rkey = rkey; 85 } 86 87 mem->lkey = lkey; 88 mem->rkey = rkey; 89 mem->state = RXE_MEM_STATE_INVALID; 90 mem->type = RXE_MEM_TYPE_NONE; 91 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 92 } 93 94 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 95 { 96 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 97 int i; 98 99 ib_umem_release(mem->umem); 100 101 if (mem->map) { 102 for (i = 0; i < mem->num_map; i++) 103 kfree(mem->map[i]); 104 105 kfree(mem->map); 106 } 107 } 108 109 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf) 110 { 111 int i; 112 int num_map; 113 struct rxe_map **map = mem->map; 114 115 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 116 117 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 118 if (!mem->map) 119 goto err1; 120 121 for (i = 0; i < num_map; i++) { 122 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 123 if (!mem->map[i]) 124 goto err2; 125 } 126 127 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 128 129 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 130 mem->map_mask = RXE_BUF_PER_MAP - 1; 131 132 mem->num_buf = num_buf; 133 mem->num_map = num_map; 134 mem->max_buf = num_map * RXE_BUF_PER_MAP; 135 136 return 0; 137 138 err2: 139 for (i--; i >= 0; i--) 140 kfree(mem->map[i]); 141 142 kfree(mem->map); 143 err1: 144 return -ENOMEM; 145 } 146 147 int rxe_mem_init_dma(struct rxe_pd *pd, 148 int access, struct rxe_mem *mem) 149 { 150 rxe_mem_init(access, mem); 151 152 mem->pd = pd; 153 mem->access = access; 154 mem->state = RXE_MEM_STATE_VALID; 155 mem->type = RXE_MEM_TYPE_DMA; 156 157 return 0; 158 } 159 160 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, 161 u64 length, u64 iova, int access, struct ib_udata *udata, 162 struct rxe_mem *mem) 163 { 164 struct rxe_map **map; 165 struct rxe_phys_buf *buf = NULL; 166 struct ib_umem *umem; 167 struct sg_page_iter sg_iter; 168 int num_buf; 169 void *vaddr; 170 int err; 171 172 umem = ib_umem_get(pd->ibpd.device, start, length, access); 173 if (IS_ERR(umem)) { 174 pr_warn("err %d from rxe_umem_get\n", 175 (int)PTR_ERR(umem)); 176 err = -EINVAL; 177 goto err1; 178 } 179 180 mem->umem = umem; 181 num_buf = ib_umem_num_pages(umem); 182 183 rxe_mem_init(access, mem); 184 185 err = rxe_mem_alloc(mem, num_buf); 186 if (err) { 187 pr_warn("err %d from rxe_mem_alloc\n", err); 188 ib_umem_release(umem); 189 goto err1; 190 } 191 192 mem->page_shift = PAGE_SHIFT; 193 mem->page_mask = PAGE_SIZE - 1; 194 195 num_buf = 0; 196 map = mem->map; 197 if (length > 0) { 198 buf = map[0]->buf; 199 200 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 201 if (num_buf >= RXE_BUF_PER_MAP) { 202 map++; 203 buf = map[0]->buf; 204 num_buf = 0; 205 } 206 207 vaddr = page_address(sg_page_iter_page(&sg_iter)); 208 if (!vaddr) { 209 pr_warn("null vaddr\n"); 210 err = -ENOMEM; 211 goto err1; 212 } 213 214 buf->addr = (uintptr_t)vaddr; 215 buf->size = PAGE_SIZE; 216 num_buf++; 217 buf++; 218 219 } 220 } 221 222 mem->pd = pd; 223 mem->umem = umem; 224 mem->access = access; 225 mem->length = length; 226 mem->iova = iova; 227 mem->va = start; 228 mem->offset = ib_umem_offset(umem); 229 mem->state = RXE_MEM_STATE_VALID; 230 mem->type = RXE_MEM_TYPE_MR; 231 232 return 0; 233 234 err1: 235 return err; 236 } 237 238 int rxe_mem_init_fast(struct rxe_pd *pd, 239 int max_pages, struct rxe_mem *mem) 240 { 241 int err; 242 243 rxe_mem_init(0, mem); 244 245 /* In fastreg, we also set the rkey */ 246 mem->ibmr.rkey = mem->ibmr.lkey; 247 248 err = rxe_mem_alloc(mem, max_pages); 249 if (err) 250 goto err1; 251 252 mem->pd = pd; 253 mem->max_buf = max_pages; 254 mem->state = RXE_MEM_STATE_FREE; 255 mem->type = RXE_MEM_TYPE_MR; 256 257 return 0; 258 259 err1: 260 return err; 261 } 262 263 static void lookup_iova( 264 struct rxe_mem *mem, 265 u64 iova, 266 int *m_out, 267 int *n_out, 268 size_t *offset_out) 269 { 270 size_t offset = iova - mem->iova + mem->offset; 271 int map_index; 272 int buf_index; 273 u64 length; 274 275 if (likely(mem->page_shift)) { 276 *offset_out = offset & mem->page_mask; 277 offset >>= mem->page_shift; 278 *n_out = offset & mem->map_mask; 279 *m_out = offset >> mem->map_shift; 280 } else { 281 map_index = 0; 282 buf_index = 0; 283 284 length = mem->map[map_index]->buf[buf_index].size; 285 286 while (offset >= length) { 287 offset -= length; 288 buf_index++; 289 290 if (buf_index == RXE_BUF_PER_MAP) { 291 map_index++; 292 buf_index = 0; 293 } 294 length = mem->map[map_index]->buf[buf_index].size; 295 } 296 297 *m_out = map_index; 298 *n_out = buf_index; 299 *offset_out = offset; 300 } 301 } 302 303 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 304 { 305 size_t offset; 306 int m, n; 307 void *addr; 308 309 if (mem->state != RXE_MEM_STATE_VALID) { 310 pr_warn("mem not in valid state\n"); 311 addr = NULL; 312 goto out; 313 } 314 315 if (!mem->map) { 316 addr = (void *)(uintptr_t)iova; 317 goto out; 318 } 319 320 if (mem_check_range(mem, iova, length)) { 321 pr_warn("range violation\n"); 322 addr = NULL; 323 goto out; 324 } 325 326 lookup_iova(mem, iova, &m, &n, &offset); 327 328 if (offset + length > mem->map[m]->buf[n].size) { 329 pr_warn("crosses page boundary\n"); 330 addr = NULL; 331 goto out; 332 } 333 334 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 335 336 out: 337 return addr; 338 } 339 340 /* copy data from a range (vaddr, vaddr+length-1) to or from 341 * a mem object starting at iova. Compute incremental value of 342 * crc32 if crcp is not zero. caller must hold a reference to mem 343 */ 344 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 345 enum copy_direction dir, u32 *crcp) 346 { 347 int err; 348 int bytes; 349 u8 *va; 350 struct rxe_map **map; 351 struct rxe_phys_buf *buf; 352 int m; 353 int i; 354 size_t offset; 355 u32 crc = crcp ? (*crcp) : 0; 356 357 if (length == 0) 358 return 0; 359 360 if (mem->type == RXE_MEM_TYPE_DMA) { 361 u8 *src, *dest; 362 363 src = (dir == to_mem_obj) ? 364 addr : ((void *)(uintptr_t)iova); 365 366 dest = (dir == to_mem_obj) ? 367 ((void *)(uintptr_t)iova) : addr; 368 369 memcpy(dest, src, length); 370 371 if (crcp) 372 *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), 373 *crcp, dest, length); 374 375 return 0; 376 } 377 378 WARN_ON_ONCE(!mem->map); 379 380 err = mem_check_range(mem, iova, length); 381 if (err) { 382 err = -EFAULT; 383 goto err1; 384 } 385 386 lookup_iova(mem, iova, &m, &i, &offset); 387 388 map = mem->map + m; 389 buf = map[0]->buf + i; 390 391 while (length > 0) { 392 u8 *src, *dest; 393 394 va = (u8 *)(uintptr_t)buf->addr + offset; 395 src = (dir == to_mem_obj) ? addr : va; 396 dest = (dir == to_mem_obj) ? va : addr; 397 398 bytes = buf->size - offset; 399 400 if (bytes > length) 401 bytes = length; 402 403 memcpy(dest, src, bytes); 404 405 if (crcp) 406 crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), 407 crc, dest, bytes); 408 409 length -= bytes; 410 addr += bytes; 411 412 offset = 0; 413 buf++; 414 i++; 415 416 if (i == RXE_BUF_PER_MAP) { 417 i = 0; 418 map++; 419 buf = map[0]->buf; 420 } 421 } 422 423 if (crcp) 424 *crcp = crc; 425 426 return 0; 427 428 err1: 429 return err; 430 } 431 432 /* copy data in or out of a wqe, i.e. sg list 433 * under the control of a dma descriptor 434 */ 435 int copy_data( 436 struct rxe_pd *pd, 437 int access, 438 struct rxe_dma_info *dma, 439 void *addr, 440 int length, 441 enum copy_direction dir, 442 u32 *crcp) 443 { 444 int bytes; 445 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 446 int offset = dma->sge_offset; 447 int resid = dma->resid; 448 struct rxe_mem *mem = NULL; 449 u64 iova; 450 int err; 451 452 if (length == 0) 453 return 0; 454 455 if (length > resid) { 456 err = -EINVAL; 457 goto err2; 458 } 459 460 if (sge->length && (offset < sge->length)) { 461 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 462 if (!mem) { 463 err = -EINVAL; 464 goto err1; 465 } 466 } 467 468 while (length > 0) { 469 bytes = length; 470 471 if (offset >= sge->length) { 472 if (mem) { 473 rxe_drop_ref(mem); 474 mem = NULL; 475 } 476 sge++; 477 dma->cur_sge++; 478 offset = 0; 479 480 if (dma->cur_sge >= dma->num_sge) { 481 err = -ENOSPC; 482 goto err2; 483 } 484 485 if (sge->length) { 486 mem = lookup_mem(pd, access, sge->lkey, 487 lookup_local); 488 if (!mem) { 489 err = -EINVAL; 490 goto err1; 491 } 492 } else { 493 continue; 494 } 495 } 496 497 if (bytes > sge->length - offset) 498 bytes = sge->length - offset; 499 500 if (bytes > 0) { 501 iova = sge->addr + offset; 502 503 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 504 if (err) 505 goto err2; 506 507 offset += bytes; 508 resid -= bytes; 509 length -= bytes; 510 addr += bytes; 511 } 512 } 513 514 dma->sge_offset = offset; 515 dma->resid = resid; 516 517 if (mem) 518 rxe_drop_ref(mem); 519 520 return 0; 521 522 err2: 523 if (mem) 524 rxe_drop_ref(mem); 525 err1: 526 return err; 527 } 528 529 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 530 { 531 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 532 int offset = dma->sge_offset; 533 int resid = dma->resid; 534 535 while (length) { 536 unsigned int bytes; 537 538 if (offset >= sge->length) { 539 sge++; 540 dma->cur_sge++; 541 offset = 0; 542 if (dma->cur_sge >= dma->num_sge) 543 return -ENOSPC; 544 } 545 546 bytes = length; 547 548 if (bytes > sge->length - offset) 549 bytes = sge->length - offset; 550 551 offset += bytes; 552 resid -= bytes; 553 length -= bytes; 554 } 555 556 dma->sge_offset = offset; 557 dma->resid = resid; 558 559 return 0; 560 } 561 562 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 563 * depending on lookup_type 564 * (2) verify that the (qp) pd matches the mem pd 565 * (3) verify that the mem can support the requested access 566 * (4) verify that mem state is valid 567 */ 568 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 569 enum lookup_type type) 570 { 571 struct rxe_mem *mem; 572 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 573 int index = key >> 8; 574 575 mem = rxe_pool_get_index(&rxe->mr_pool, index); 576 if (!mem) 577 return NULL; 578 579 if (unlikely((type == lookup_local && mem->lkey != key) || 580 (type == lookup_remote && mem->rkey != key) || 581 mem->pd != pd || 582 (access && !(access & mem->access)) || 583 mem->state != RXE_MEM_STATE_VALID)) { 584 rxe_drop_ref(mem); 585 mem = NULL; 586 } 587 588 return mem; 589 } 590 591 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 592 u64 *page, int num_pages, u64 iova) 593 { 594 int i; 595 int num_buf; 596 int err; 597 struct rxe_map **map; 598 struct rxe_phys_buf *buf; 599 int page_size; 600 601 if (num_pages > mem->max_buf) { 602 err = -EINVAL; 603 goto err1; 604 } 605 606 num_buf = 0; 607 page_size = 1 << mem->page_shift; 608 map = mem->map; 609 buf = map[0]->buf; 610 611 for (i = 0; i < num_pages; i++) { 612 buf->addr = *page++; 613 buf->size = page_size; 614 buf++; 615 num_buf++; 616 617 if (num_buf == RXE_BUF_PER_MAP) { 618 map++; 619 buf = map[0]->buf; 620 num_buf = 0; 621 } 622 } 623 624 mem->iova = iova; 625 mem->va = iova; 626 mem->length = num_pages << mem->page_shift; 627 mem->state = RXE_MEM_STATE_VALID; 628 629 return 0; 630 631 err1: 632 return err; 633 } 634