1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include "rxe.h" 35 #include "rxe_loc.h" 36 37 /* 38 * lfsr (linear feedback shift register) with period 255 39 */ 40 static u8 rxe_get_key(void) 41 { 42 static u32 key = 1; 43 44 key = key << 1; 45 46 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 49 key &= 0xff; 50 51 return key; 52 } 53 54 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 { 56 switch (mem->type) { 57 case RXE_MEM_TYPE_DMA: 58 return 0; 59 60 case RXE_MEM_TYPE_MR: 61 case RXE_MEM_TYPE_FMR: 62 if (iova < mem->iova || 63 length > mem->length || 64 iova > mem->iova + mem->length - length) 65 return -EFAULT; 66 return 0; 67 68 default: 69 return -EFAULT; 70 } 71 } 72 73 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 74 | IB_ACCESS_REMOTE_WRITE \ 75 | IB_ACCESS_REMOTE_ATOMIC) 76 77 static void rxe_mem_init(int access, struct rxe_mem *mem) 78 { 79 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 80 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 81 82 if (mem->pelem.pool->type == RXE_TYPE_MR) { 83 mem->ibmr.lkey = lkey; 84 mem->ibmr.rkey = rkey; 85 } 86 87 mem->lkey = lkey; 88 mem->rkey = rkey; 89 mem->state = RXE_MEM_STATE_INVALID; 90 mem->type = RXE_MEM_TYPE_NONE; 91 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 92 } 93 94 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 95 { 96 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 97 int i; 98 99 ib_umem_release(mem->umem); 100 101 if (mem->map) { 102 for (i = 0; i < mem->num_map; i++) 103 kfree(mem->map[i]); 104 105 kfree(mem->map); 106 } 107 } 108 109 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf) 110 { 111 int i; 112 int num_map; 113 struct rxe_map **map = mem->map; 114 115 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 116 117 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 118 if (!mem->map) 119 goto err1; 120 121 for (i = 0; i < num_map; i++) { 122 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 123 if (!mem->map[i]) 124 goto err2; 125 } 126 127 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 128 129 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 130 mem->map_mask = RXE_BUF_PER_MAP - 1; 131 132 mem->num_buf = num_buf; 133 mem->num_map = num_map; 134 mem->max_buf = num_map * RXE_BUF_PER_MAP; 135 136 return 0; 137 138 err2: 139 for (i--; i >= 0; i--) 140 kfree(mem->map[i]); 141 142 kfree(mem->map); 143 err1: 144 return -ENOMEM; 145 } 146 147 void rxe_mem_init_dma(struct rxe_pd *pd, 148 int access, struct rxe_mem *mem) 149 { 150 rxe_mem_init(access, mem); 151 152 mem->pd = pd; 153 mem->access = access; 154 mem->state = RXE_MEM_STATE_VALID; 155 mem->type = RXE_MEM_TYPE_DMA; 156 } 157 158 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, 159 u64 length, u64 iova, int access, struct ib_udata *udata, 160 struct rxe_mem *mem) 161 { 162 struct rxe_map **map; 163 struct rxe_phys_buf *buf = NULL; 164 struct ib_umem *umem; 165 struct sg_page_iter sg_iter; 166 int num_buf; 167 void *vaddr; 168 int err; 169 170 umem = ib_umem_get(pd->ibpd.device, start, length, access); 171 if (IS_ERR(umem)) { 172 pr_warn("err %d from rxe_umem_get\n", 173 (int)PTR_ERR(umem)); 174 err = -EINVAL; 175 goto err1; 176 } 177 178 mem->umem = umem; 179 num_buf = ib_umem_num_pages(umem); 180 181 rxe_mem_init(access, mem); 182 183 err = rxe_mem_alloc(mem, num_buf); 184 if (err) { 185 pr_warn("err %d from rxe_mem_alloc\n", err); 186 ib_umem_release(umem); 187 goto err1; 188 } 189 190 mem->page_shift = PAGE_SHIFT; 191 mem->page_mask = PAGE_SIZE - 1; 192 193 num_buf = 0; 194 map = mem->map; 195 if (length > 0) { 196 buf = map[0]->buf; 197 198 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 199 if (num_buf >= RXE_BUF_PER_MAP) { 200 map++; 201 buf = map[0]->buf; 202 num_buf = 0; 203 } 204 205 vaddr = page_address(sg_page_iter_page(&sg_iter)); 206 if (!vaddr) { 207 pr_warn("null vaddr\n"); 208 err = -ENOMEM; 209 goto err1; 210 } 211 212 buf->addr = (uintptr_t)vaddr; 213 buf->size = PAGE_SIZE; 214 num_buf++; 215 buf++; 216 217 } 218 } 219 220 mem->pd = pd; 221 mem->umem = umem; 222 mem->access = access; 223 mem->length = length; 224 mem->iova = iova; 225 mem->va = start; 226 mem->offset = ib_umem_offset(umem); 227 mem->state = RXE_MEM_STATE_VALID; 228 mem->type = RXE_MEM_TYPE_MR; 229 230 return 0; 231 232 err1: 233 return err; 234 } 235 236 int rxe_mem_init_fast(struct rxe_pd *pd, 237 int max_pages, struct rxe_mem *mem) 238 { 239 int err; 240 241 rxe_mem_init(0, mem); 242 243 /* In fastreg, we also set the rkey */ 244 mem->ibmr.rkey = mem->ibmr.lkey; 245 246 err = rxe_mem_alloc(mem, max_pages); 247 if (err) 248 goto err1; 249 250 mem->pd = pd; 251 mem->max_buf = max_pages; 252 mem->state = RXE_MEM_STATE_FREE; 253 mem->type = RXE_MEM_TYPE_MR; 254 255 return 0; 256 257 err1: 258 return err; 259 } 260 261 static void lookup_iova( 262 struct rxe_mem *mem, 263 u64 iova, 264 int *m_out, 265 int *n_out, 266 size_t *offset_out) 267 { 268 size_t offset = iova - mem->iova + mem->offset; 269 int map_index; 270 int buf_index; 271 u64 length; 272 273 if (likely(mem->page_shift)) { 274 *offset_out = offset & mem->page_mask; 275 offset >>= mem->page_shift; 276 *n_out = offset & mem->map_mask; 277 *m_out = offset >> mem->map_shift; 278 } else { 279 map_index = 0; 280 buf_index = 0; 281 282 length = mem->map[map_index]->buf[buf_index].size; 283 284 while (offset >= length) { 285 offset -= length; 286 buf_index++; 287 288 if (buf_index == RXE_BUF_PER_MAP) { 289 map_index++; 290 buf_index = 0; 291 } 292 length = mem->map[map_index]->buf[buf_index].size; 293 } 294 295 *m_out = map_index; 296 *n_out = buf_index; 297 *offset_out = offset; 298 } 299 } 300 301 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 302 { 303 size_t offset; 304 int m, n; 305 void *addr; 306 307 if (mem->state != RXE_MEM_STATE_VALID) { 308 pr_warn("mem not in valid state\n"); 309 addr = NULL; 310 goto out; 311 } 312 313 if (!mem->map) { 314 addr = (void *)(uintptr_t)iova; 315 goto out; 316 } 317 318 if (mem_check_range(mem, iova, length)) { 319 pr_warn("range violation\n"); 320 addr = NULL; 321 goto out; 322 } 323 324 lookup_iova(mem, iova, &m, &n, &offset); 325 326 if (offset + length > mem->map[m]->buf[n].size) { 327 pr_warn("crosses page boundary\n"); 328 addr = NULL; 329 goto out; 330 } 331 332 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 333 334 out: 335 return addr; 336 } 337 338 /* copy data from a range (vaddr, vaddr+length-1) to or from 339 * a mem object starting at iova. Compute incremental value of 340 * crc32 if crcp is not zero. caller must hold a reference to mem 341 */ 342 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 343 enum copy_direction dir, u32 *crcp) 344 { 345 int err; 346 int bytes; 347 u8 *va; 348 struct rxe_map **map; 349 struct rxe_phys_buf *buf; 350 int m; 351 int i; 352 size_t offset; 353 u32 crc = crcp ? (*crcp) : 0; 354 355 if (length == 0) 356 return 0; 357 358 if (mem->type == RXE_MEM_TYPE_DMA) { 359 u8 *src, *dest; 360 361 src = (dir == to_mem_obj) ? 362 addr : ((void *)(uintptr_t)iova); 363 364 dest = (dir == to_mem_obj) ? 365 ((void *)(uintptr_t)iova) : addr; 366 367 memcpy(dest, src, length); 368 369 if (crcp) 370 *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), 371 *crcp, dest, length); 372 373 return 0; 374 } 375 376 WARN_ON_ONCE(!mem->map); 377 378 err = mem_check_range(mem, iova, length); 379 if (err) { 380 err = -EFAULT; 381 goto err1; 382 } 383 384 lookup_iova(mem, iova, &m, &i, &offset); 385 386 map = mem->map + m; 387 buf = map[0]->buf + i; 388 389 while (length > 0) { 390 u8 *src, *dest; 391 392 va = (u8 *)(uintptr_t)buf->addr + offset; 393 src = (dir == to_mem_obj) ? addr : va; 394 dest = (dir == to_mem_obj) ? va : addr; 395 396 bytes = buf->size - offset; 397 398 if (bytes > length) 399 bytes = length; 400 401 memcpy(dest, src, bytes); 402 403 if (crcp) 404 crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), 405 crc, dest, bytes); 406 407 length -= bytes; 408 addr += bytes; 409 410 offset = 0; 411 buf++; 412 i++; 413 414 if (i == RXE_BUF_PER_MAP) { 415 i = 0; 416 map++; 417 buf = map[0]->buf; 418 } 419 } 420 421 if (crcp) 422 *crcp = crc; 423 424 return 0; 425 426 err1: 427 return err; 428 } 429 430 /* copy data in or out of a wqe, i.e. sg list 431 * under the control of a dma descriptor 432 */ 433 int copy_data( 434 struct rxe_pd *pd, 435 int access, 436 struct rxe_dma_info *dma, 437 void *addr, 438 int length, 439 enum copy_direction dir, 440 u32 *crcp) 441 { 442 int bytes; 443 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 444 int offset = dma->sge_offset; 445 int resid = dma->resid; 446 struct rxe_mem *mem = NULL; 447 u64 iova; 448 int err; 449 450 if (length == 0) 451 return 0; 452 453 if (length > resid) { 454 err = -EINVAL; 455 goto err2; 456 } 457 458 if (sge->length && (offset < sge->length)) { 459 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 460 if (!mem) { 461 err = -EINVAL; 462 goto err1; 463 } 464 } 465 466 while (length > 0) { 467 bytes = length; 468 469 if (offset >= sge->length) { 470 if (mem) { 471 rxe_drop_ref(mem); 472 mem = NULL; 473 } 474 sge++; 475 dma->cur_sge++; 476 offset = 0; 477 478 if (dma->cur_sge >= dma->num_sge) { 479 err = -ENOSPC; 480 goto err2; 481 } 482 483 if (sge->length) { 484 mem = lookup_mem(pd, access, sge->lkey, 485 lookup_local); 486 if (!mem) { 487 err = -EINVAL; 488 goto err1; 489 } 490 } else { 491 continue; 492 } 493 } 494 495 if (bytes > sge->length - offset) 496 bytes = sge->length - offset; 497 498 if (bytes > 0) { 499 iova = sge->addr + offset; 500 501 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 502 if (err) 503 goto err2; 504 505 offset += bytes; 506 resid -= bytes; 507 length -= bytes; 508 addr += bytes; 509 } 510 } 511 512 dma->sge_offset = offset; 513 dma->resid = resid; 514 515 if (mem) 516 rxe_drop_ref(mem); 517 518 return 0; 519 520 err2: 521 if (mem) 522 rxe_drop_ref(mem); 523 err1: 524 return err; 525 } 526 527 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 528 { 529 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 530 int offset = dma->sge_offset; 531 int resid = dma->resid; 532 533 while (length) { 534 unsigned int bytes; 535 536 if (offset >= sge->length) { 537 sge++; 538 dma->cur_sge++; 539 offset = 0; 540 if (dma->cur_sge >= dma->num_sge) 541 return -ENOSPC; 542 } 543 544 bytes = length; 545 546 if (bytes > sge->length - offset) 547 bytes = sge->length - offset; 548 549 offset += bytes; 550 resid -= bytes; 551 length -= bytes; 552 } 553 554 dma->sge_offset = offset; 555 dma->resid = resid; 556 557 return 0; 558 } 559 560 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 561 * depending on lookup_type 562 * (2) verify that the (qp) pd matches the mem pd 563 * (3) verify that the mem can support the requested access 564 * (4) verify that mem state is valid 565 */ 566 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 567 enum lookup_type type) 568 { 569 struct rxe_mem *mem; 570 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 571 int index = key >> 8; 572 573 mem = rxe_pool_get_index(&rxe->mr_pool, index); 574 if (!mem) 575 return NULL; 576 577 if (unlikely((type == lookup_local && mem->lkey != key) || 578 (type == lookup_remote && mem->rkey != key) || 579 mem->pd != pd || 580 (access && !(access & mem->access)) || 581 mem->state != RXE_MEM_STATE_VALID)) { 582 rxe_drop_ref(mem); 583 mem = NULL; 584 } 585 586 return mem; 587 } 588