1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 if (iova < mem->iova || 63 length > mem->length || 64 iova > mem->iova + mem->length - length) 65 return -EFAULT; 66 return 0; 67 68 default: 69 return -EFAULT; 70 } 71 } 72 73 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 74 | IB_ACCESS_REMOTE_WRITE \ 75 | IB_ACCESS_REMOTE_ATOMIC) 76 77 static void rxe_mem_init(int access, struct rxe_mem *mem) 78 { 79 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 80 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 81 82 if (mem->pelem.pool->type == RXE_TYPE_MR) { 83 mem->ibmr.lkey = lkey; 84 mem->ibmr.rkey = rkey; 85 } 86 87 mem->lkey = lkey; 88 mem->rkey = rkey; 89 mem->state = RXE_MEM_STATE_INVALID; 90 mem->type = RXE_MEM_TYPE_NONE; 91 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 92 } 93 94 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 95 { 96 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 97 int i; 98 99 ib_umem_release(mem->umem); 100 101 if (mem->map) { 102 for (i = 0; i < mem->num_map; i++) 103 kfree(mem->map[i]); 104 105 kfree(mem->map); 106 } 107 } 108 109 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf) 110 { 111 int i; 112 int num_map; 113 struct rxe_map **map = mem->map; 114 115 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 116 117 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 118 if (!mem->map) 119 goto err1; 120 121 for (i = 0; i < num_map; i++) { 122 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 123 if (!mem->map[i]) 124 goto err2; 125 } 126 127 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 128 129 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 130 mem->map_mask = RXE_BUF_PER_MAP - 1; 131 132 mem->num_buf = num_buf; 133 mem->num_map = num_map; 134 mem->max_buf = num_map * RXE_BUF_PER_MAP; 135 136 return 0; 137 138 err2: 139 for (i--; i >= 0; i--) 140 kfree(mem->map[i]); 141 142 kfree(mem->map); 143 err1: 144 return -ENOMEM; 145 } 146 147 void rxe_mem_init_dma(struct rxe_pd *pd, 148 int access, struct rxe_mem *mem) 149 { 150 rxe_mem_init(access, mem); 151 152 mem->pd = pd; 153 mem->access = access; 154 mem->state = RXE_MEM_STATE_VALID; 155 mem->type = RXE_MEM_TYPE_DMA; 156 } 157 158 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, 159 u64 length, u64 iova, int access, struct ib_udata *udata, 160 struct rxe_mem *mem) 161 { 162 struct rxe_map **map; 163 struct rxe_phys_buf *buf = NULL; 164 struct ib_umem *umem; 165 struct sg_page_iter sg_iter; 166 int num_buf; 167 void *vaddr; 168 int err; 169 170 umem = ib_umem_get(pd->ibpd.device, start, length, access); 171 if (IS_ERR(umem)) { 172 pr_warn("err %d from rxe_umem_get\n", 173 (int)PTR_ERR(umem)); 174 err = -EINVAL; 175 goto err1; 176 } 177 178 mem->umem = umem; 179 num_buf = ib_umem_num_pages(umem); 180 181 rxe_mem_init(access, mem); 182 183 err = rxe_mem_alloc(mem, num_buf); 184 if (err) { 185 pr_warn("err %d from rxe_mem_alloc\n", err); 186 ib_umem_release(umem); 187 goto err1; 188 } 189 190 mem->page_shift = PAGE_SHIFT; 191 mem->page_mask = PAGE_SIZE - 1; 192 193 num_buf = 0; 194 map = mem->map; 195 if (length > 0) { 196 buf = map[0]->buf; 197 198 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 199 if (num_buf >= RXE_BUF_PER_MAP) { 200 map++; 201 buf = map[0]->buf; 202 num_buf = 0; 203 } 204 205 vaddr = page_address(sg_page_iter_page(&sg_iter)); 206 if (!vaddr) { 207 pr_warn("null vaddr\n"); 208 ib_umem_release(umem); 209 err = -ENOMEM; 210 goto err1; 211 } 212 213 buf->addr = (uintptr_t)vaddr; 214 buf->size = PAGE_SIZE; 215 num_buf++; 216 buf++; 217 218 } 219 } 220 221 mem->pd = pd; 222 mem->umem = umem; 223 mem->access = access; 224 mem->length = length; 225 mem->iova = iova; 226 mem->va = start; 227 mem->offset = ib_umem_offset(umem); 228 mem->state = RXE_MEM_STATE_VALID; 229 mem->type = RXE_MEM_TYPE_MR; 230 231 return 0; 232 233 err1: 234 return err; 235 } 236 237 int rxe_mem_init_fast(struct rxe_pd *pd, 238 int max_pages, struct rxe_mem *mem) 239 { 240 int err; 241 242 rxe_mem_init(0, mem); 243 244 /* In fastreg, we also set the rkey */ 245 mem->ibmr.rkey = mem->ibmr.lkey; 246 247 err = rxe_mem_alloc(mem, max_pages); 248 if (err) 249 goto err1; 250 251 mem->pd = pd; 252 mem->max_buf = max_pages; 253 mem->state = RXE_MEM_STATE_FREE; 254 mem->type = RXE_MEM_TYPE_MR; 255 256 return 0; 257 258 err1: 259 return err; 260 } 261 262 static void lookup_iova( 263 struct rxe_mem *mem, 264 u64 iova, 265 int *m_out, 266 int *n_out, 267 size_t *offset_out) 268 { 269 size_t offset = iova - mem->iova + mem->offset; 270 int map_index; 271 int buf_index; 272 u64 length; 273 274 if (likely(mem->page_shift)) { 275 *offset_out = offset & mem->page_mask; 276 offset >>= mem->page_shift; 277 *n_out = offset & mem->map_mask; 278 *m_out = offset >> mem->map_shift; 279 } else { 280 map_index = 0; 281 buf_index = 0; 282 283 length = mem->map[map_index]->buf[buf_index].size; 284 285 while (offset >= length) { 286 offset -= length; 287 buf_index++; 288 289 if (buf_index == RXE_BUF_PER_MAP) { 290 map_index++; 291 buf_index = 0; 292 } 293 length = mem->map[map_index]->buf[buf_index].size; 294 } 295 296 *m_out = map_index; 297 *n_out = buf_index; 298 *offset_out = offset; 299 } 300 } 301 302 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 303 { 304 size_t offset; 305 int m, n; 306 void *addr; 307 308 if (mem->state != RXE_MEM_STATE_VALID) { 309 pr_warn("mem not in valid state\n"); 310 addr = NULL; 311 goto out; 312 } 313 314 if (!mem->map) { 315 addr = (void *)(uintptr_t)iova; 316 goto out; 317 } 318 319 if (mem_check_range(mem, iova, length)) { 320 pr_warn("range violation\n"); 321 addr = NULL; 322 goto out; 323 } 324 325 lookup_iova(mem, iova, &m, &n, &offset); 326 327 if (offset + length > mem->map[m]->buf[n].size) { 328 pr_warn("crosses page boundary\n"); 329 addr = NULL; 330 goto out; 331 } 332 333 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 334 335 out: 336 return addr; 337 } 338 339 /* copy data from a range (vaddr, vaddr+length-1) to or from 340 * a mem object starting at iova. Compute incremental value of 341 * crc32 if crcp is not zero. caller must hold a reference to mem 342 */ 343 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 344 enum copy_direction dir, u32 *crcp) 345 { 346 int err; 347 int bytes; 348 u8 *va; 349 struct rxe_map **map; 350 struct rxe_phys_buf *buf; 351 int m; 352 int i; 353 size_t offset; 354 u32 crc = crcp ? (*crcp) : 0; 355 356 if (length == 0) 357 return 0; 358 359 if (mem->type == RXE_MEM_TYPE_DMA) { 360 u8 *src, *dest; 361 362 src = (dir == to_mem_obj) ? 363 addr : ((void *)(uintptr_t)iova); 364 365 dest = (dir == to_mem_obj) ? 366 ((void *)(uintptr_t)iova) : addr; 367 368 memcpy(dest, src, length); 369 370 if (crcp) 371 *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), 372 *crcp, dest, length); 373 374 return 0; 375 } 376 377 WARN_ON_ONCE(!mem->map); 378 379 err = mem_check_range(mem, iova, length); 380 if (err) { 381 err = -EFAULT; 382 goto err1; 383 } 384 385 lookup_iova(mem, iova, &m, &i, &offset); 386 387 map = mem->map + m; 388 buf = map[0]->buf + i; 389 390 while (length > 0) { 391 u8 *src, *dest; 392 393 va = (u8 *)(uintptr_t)buf->addr + offset; 394 src = (dir == to_mem_obj) ? addr : va; 395 dest = (dir == to_mem_obj) ? va : addr; 396 397 bytes = buf->size - offset; 398 399 if (bytes > length) 400 bytes = length; 401 402 memcpy(dest, src, bytes); 403 404 if (crcp) 405 crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), 406 crc, dest, bytes); 407 408 length -= bytes; 409 addr += bytes; 410 411 offset = 0; 412 buf++; 413 i++; 414 415 if (i == RXE_BUF_PER_MAP) { 416 i = 0; 417 map++; 418 buf = map[0]->buf; 419 } 420 } 421 422 if (crcp) 423 *crcp = crc; 424 425 return 0; 426 427 err1: 428 return err; 429 } 430 431 /* copy data in or out of a wqe, i.e. sg list 432 * under the control of a dma descriptor 433 */ 434 int copy_data( 435 struct rxe_pd *pd, 436 int access, 437 struct rxe_dma_info *dma, 438 void *addr, 439 int length, 440 enum copy_direction dir, 441 u32 *crcp) 442 { 443 int bytes; 444 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 445 int offset = dma->sge_offset; 446 int resid = dma->resid; 447 struct rxe_mem *mem = NULL; 448 u64 iova; 449 int err; 450 451 if (length == 0) 452 return 0; 453 454 if (length > resid) { 455 err = -EINVAL; 456 goto err2; 457 } 458 459 if (sge->length && (offset < sge->length)) { 460 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 461 if (!mem) { 462 err = -EINVAL; 463 goto err1; 464 } 465 } 466 467 while (length > 0) { 468 bytes = length; 469 470 if (offset >= sge->length) { 471 if (mem) { 472 rxe_drop_ref(mem); 473 mem = NULL; 474 } 475 sge++; 476 dma->cur_sge++; 477 offset = 0; 478 479 if (dma->cur_sge >= dma->num_sge) { 480 err = -ENOSPC; 481 goto err2; 482 } 483 484 if (sge->length) { 485 mem = lookup_mem(pd, access, sge->lkey, 486 lookup_local); 487 if (!mem) { 488 err = -EINVAL; 489 goto err1; 490 } 491 } else { 492 continue; 493 } 494 } 495 496 if (bytes > sge->length - offset) 497 bytes = sge->length - offset; 498 499 if (bytes > 0) { 500 iova = sge->addr + offset; 501 502 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 503 if (err) 504 goto err2; 505 506 offset += bytes; 507 resid -= bytes; 508 length -= bytes; 509 addr += bytes; 510 } 511 } 512 513 dma->sge_offset = offset; 514 dma->resid = resid; 515 516 if (mem) 517 rxe_drop_ref(mem); 518 519 return 0; 520 521 err2: 522 if (mem) 523 rxe_drop_ref(mem); 524 err1: 525 return err; 526 } 527 528 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 529 { 530 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 531 int offset = dma->sge_offset; 532 int resid = dma->resid; 533 534 while (length) { 535 unsigned int bytes; 536 537 if (offset >= sge->length) { 538 sge++; 539 dma->cur_sge++; 540 offset = 0; 541 if (dma->cur_sge >= dma->num_sge) 542 return -ENOSPC; 543 } 544 545 bytes = length; 546 547 if (bytes > sge->length - offset) 548 bytes = sge->length - offset; 549 550 offset += bytes; 551 resid -= bytes; 552 length -= bytes; 553 } 554 555 dma->sge_offset = offset; 556 dma->resid = resid; 557 558 return 0; 559 } 560 561 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 562 * depending on lookup_type 563 * (2) verify that the (qp) pd matches the mem pd 564 * (3) verify that the mem can support the requested access 565 * (4) verify that mem state is valid 566 */ 567 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 568 enum lookup_type type) 569 { 570 struct rxe_mem *mem; 571 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 572 int index = key >> 8; 573 574 mem = rxe_pool_get_index(&rxe->mr_pool, index); 575 if (!mem) 576 return NULL; 577 578 if (unlikely((type == lookup_local && mem->lkey != key) || 579 (type == lookup_remote && mem->rkey != key) || 580 mem->pd != pd || 581 (access && !(access & mem->access)) || 582 mem->state != RXE_MEM_STATE_VALID)) { 583 rxe_drop_ref(mem); 584 mem = NULL; 585 } 586 587 return mem; 588 } 589