1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* 11 * lfsr (linear feedback shift register) with period 255 12 */ 13 static u8 rxe_get_key(void) 14 { 15 static u32 key = 1; 16 17 key = key << 1; 18 19 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 20 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 21 22 key &= 0xff; 23 24 return key; 25 } 26 27 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 28 { 29 switch (mem->type) { 30 case RXE_MEM_TYPE_DMA: 31 return 0; 32 33 case RXE_MEM_TYPE_MR: 34 if (iova < mem->iova || 35 length > mem->length || 36 iova > mem->iova + mem->length - length) 37 return -EFAULT; 38 return 0; 39 40 default: 41 return -EFAULT; 42 } 43 } 44 45 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 46 | IB_ACCESS_REMOTE_WRITE \ 47 | IB_ACCESS_REMOTE_ATOMIC) 48 49 static void rxe_mem_init(int access, struct rxe_mem *mem) 50 { 51 u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 52 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 53 54 mem->ibmr.lkey = lkey; 55 mem->ibmr.rkey = rkey; 56 mem->state = RXE_MEM_STATE_INVALID; 57 mem->type = RXE_MEM_TYPE_NONE; 58 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 59 } 60 61 void rxe_mem_cleanup(struct rxe_pool_entry *arg) 62 { 63 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); 64 int i; 65 66 ib_umem_release(mem->umem); 67 68 if (mem->map) { 69 for (i = 0; i < mem->num_map; i++) 70 kfree(mem->map[i]); 71 72 kfree(mem->map); 73 } 74 } 75 76 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf) 77 { 78 int i; 79 int num_map; 80 struct rxe_map **map = mem->map; 81 82 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 83 84 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 85 if (!mem->map) 86 goto err1; 87 88 for (i = 0; i < num_map; i++) { 89 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 90 if (!mem->map[i]) 91 goto err2; 92 } 93 94 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 95 96 mem->map_shift = ilog2(RXE_BUF_PER_MAP); 97 mem->map_mask = RXE_BUF_PER_MAP - 1; 98 99 mem->num_buf = num_buf; 100 mem->num_map = num_map; 101 mem->max_buf = num_map * RXE_BUF_PER_MAP; 102 103 return 0; 104 105 err2: 106 for (i--; i >= 0; i--) 107 kfree(mem->map[i]); 108 109 kfree(mem->map); 110 err1: 111 return -ENOMEM; 112 } 113 114 void rxe_mem_init_dma(struct rxe_pd *pd, 115 int access, struct rxe_mem *mem) 116 { 117 rxe_mem_init(access, mem); 118 119 mem->ibmr.pd = &pd->ibpd; 120 mem->access = access; 121 mem->state = RXE_MEM_STATE_VALID; 122 mem->type = RXE_MEM_TYPE_DMA; 123 } 124 125 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, 126 u64 length, u64 iova, int access, struct ib_udata *udata, 127 struct rxe_mem *mem) 128 { 129 struct rxe_map **map; 130 struct rxe_phys_buf *buf = NULL; 131 struct ib_umem *umem; 132 struct sg_page_iter sg_iter; 133 int num_buf; 134 void *vaddr; 135 int err; 136 137 umem = ib_umem_get(pd->ibpd.device, start, length, access); 138 if (IS_ERR(umem)) { 139 pr_warn("err %d from rxe_umem_get\n", 140 (int)PTR_ERR(umem)); 141 err = -EINVAL; 142 goto err1; 143 } 144 145 mem->umem = umem; 146 num_buf = ib_umem_num_pages(umem); 147 148 rxe_mem_init(access, mem); 149 150 err = rxe_mem_alloc(mem, num_buf); 151 if (err) { 152 pr_warn("err %d from rxe_mem_alloc\n", err); 153 ib_umem_release(umem); 154 goto err1; 155 } 156 157 mem->page_shift = PAGE_SHIFT; 158 mem->page_mask = PAGE_SIZE - 1; 159 160 num_buf = 0; 161 map = mem->map; 162 if (length > 0) { 163 buf = map[0]->buf; 164 165 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 166 if (num_buf >= RXE_BUF_PER_MAP) { 167 map++; 168 buf = map[0]->buf; 169 num_buf = 0; 170 } 171 172 vaddr = page_address(sg_page_iter_page(&sg_iter)); 173 if (!vaddr) { 174 pr_warn("null vaddr\n"); 175 ib_umem_release(umem); 176 err = -ENOMEM; 177 goto err1; 178 } 179 180 buf->addr = (uintptr_t)vaddr; 181 buf->size = PAGE_SIZE; 182 num_buf++; 183 buf++; 184 185 } 186 } 187 188 mem->ibmr.pd = &pd->ibpd; 189 mem->umem = umem; 190 mem->access = access; 191 mem->length = length; 192 mem->iova = iova; 193 mem->va = start; 194 mem->offset = ib_umem_offset(umem); 195 mem->state = RXE_MEM_STATE_VALID; 196 mem->type = RXE_MEM_TYPE_MR; 197 198 return 0; 199 200 err1: 201 return err; 202 } 203 204 int rxe_mem_init_fast(struct rxe_pd *pd, 205 int max_pages, struct rxe_mem *mem) 206 { 207 int err; 208 209 rxe_mem_init(0, mem); 210 211 /* In fastreg, we also set the rkey */ 212 mem->ibmr.rkey = mem->ibmr.lkey; 213 214 err = rxe_mem_alloc(mem, max_pages); 215 if (err) 216 goto err1; 217 218 mem->ibmr.pd = &pd->ibpd; 219 mem->max_buf = max_pages; 220 mem->state = RXE_MEM_STATE_FREE; 221 mem->type = RXE_MEM_TYPE_MR; 222 223 return 0; 224 225 err1: 226 return err; 227 } 228 229 static void lookup_iova( 230 struct rxe_mem *mem, 231 u64 iova, 232 int *m_out, 233 int *n_out, 234 size_t *offset_out) 235 { 236 size_t offset = iova - mem->iova + mem->offset; 237 int map_index; 238 int buf_index; 239 u64 length; 240 241 if (likely(mem->page_shift)) { 242 *offset_out = offset & mem->page_mask; 243 offset >>= mem->page_shift; 244 *n_out = offset & mem->map_mask; 245 *m_out = offset >> mem->map_shift; 246 } else { 247 map_index = 0; 248 buf_index = 0; 249 250 length = mem->map[map_index]->buf[buf_index].size; 251 252 while (offset >= length) { 253 offset -= length; 254 buf_index++; 255 256 if (buf_index == RXE_BUF_PER_MAP) { 257 map_index++; 258 buf_index = 0; 259 } 260 length = mem->map[map_index]->buf[buf_index].size; 261 } 262 263 *m_out = map_index; 264 *n_out = buf_index; 265 *offset_out = offset; 266 } 267 } 268 269 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 270 { 271 size_t offset; 272 int m, n; 273 void *addr; 274 275 if (mem->state != RXE_MEM_STATE_VALID) { 276 pr_warn("mem not in valid state\n"); 277 addr = NULL; 278 goto out; 279 } 280 281 if (!mem->map) { 282 addr = (void *)(uintptr_t)iova; 283 goto out; 284 } 285 286 if (mem_check_range(mem, iova, length)) { 287 pr_warn("range violation\n"); 288 addr = NULL; 289 goto out; 290 } 291 292 lookup_iova(mem, iova, &m, &n, &offset); 293 294 if (offset + length > mem->map[m]->buf[n].size) { 295 pr_warn("crosses page boundary\n"); 296 addr = NULL; 297 goto out; 298 } 299 300 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 301 302 out: 303 return addr; 304 } 305 306 /* copy data from a range (vaddr, vaddr+length-1) to or from 307 * a mem object starting at iova. Compute incremental value of 308 * crc32 if crcp is not zero. caller must hold a reference to mem 309 */ 310 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 311 enum copy_direction dir, u32 *crcp) 312 { 313 int err; 314 int bytes; 315 u8 *va; 316 struct rxe_map **map; 317 struct rxe_phys_buf *buf; 318 int m; 319 int i; 320 size_t offset; 321 u32 crc = crcp ? (*crcp) : 0; 322 323 if (length == 0) 324 return 0; 325 326 if (mem->type == RXE_MEM_TYPE_DMA) { 327 u8 *src, *dest; 328 329 src = (dir == to_mem_obj) ? 330 addr : ((void *)(uintptr_t)iova); 331 332 dest = (dir == to_mem_obj) ? 333 ((void *)(uintptr_t)iova) : addr; 334 335 memcpy(dest, src, length); 336 337 if (crcp) 338 *crcp = rxe_crc32(to_rdev(mem->ibmr.device), 339 *crcp, dest, length); 340 341 return 0; 342 } 343 344 WARN_ON_ONCE(!mem->map); 345 346 err = mem_check_range(mem, iova, length); 347 if (err) { 348 err = -EFAULT; 349 goto err1; 350 } 351 352 lookup_iova(mem, iova, &m, &i, &offset); 353 354 map = mem->map + m; 355 buf = map[0]->buf + i; 356 357 while (length > 0) { 358 u8 *src, *dest; 359 360 va = (u8 *)(uintptr_t)buf->addr + offset; 361 src = (dir == to_mem_obj) ? addr : va; 362 dest = (dir == to_mem_obj) ? va : addr; 363 364 bytes = buf->size - offset; 365 366 if (bytes > length) 367 bytes = length; 368 369 memcpy(dest, src, bytes); 370 371 if (crcp) 372 crc = rxe_crc32(to_rdev(mem->ibmr.device), 373 crc, dest, bytes); 374 375 length -= bytes; 376 addr += bytes; 377 378 offset = 0; 379 buf++; 380 i++; 381 382 if (i == RXE_BUF_PER_MAP) { 383 i = 0; 384 map++; 385 buf = map[0]->buf; 386 } 387 } 388 389 if (crcp) 390 *crcp = crc; 391 392 return 0; 393 394 err1: 395 return err; 396 } 397 398 /* copy data in or out of a wqe, i.e. sg list 399 * under the control of a dma descriptor 400 */ 401 int copy_data( 402 struct rxe_pd *pd, 403 int access, 404 struct rxe_dma_info *dma, 405 void *addr, 406 int length, 407 enum copy_direction dir, 408 u32 *crcp) 409 { 410 int bytes; 411 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 412 int offset = dma->sge_offset; 413 int resid = dma->resid; 414 struct rxe_mem *mem = NULL; 415 u64 iova; 416 int err; 417 418 if (length == 0) 419 return 0; 420 421 if (length > resid) { 422 err = -EINVAL; 423 goto err2; 424 } 425 426 if (sge->length && (offset < sge->length)) { 427 mem = lookup_mem(pd, access, sge->lkey, lookup_local); 428 if (!mem) { 429 err = -EINVAL; 430 goto err1; 431 } 432 } 433 434 while (length > 0) { 435 bytes = length; 436 437 if (offset >= sge->length) { 438 if (mem) { 439 rxe_drop_ref(mem); 440 mem = NULL; 441 } 442 sge++; 443 dma->cur_sge++; 444 offset = 0; 445 446 if (dma->cur_sge >= dma->num_sge) { 447 err = -ENOSPC; 448 goto err2; 449 } 450 451 if (sge->length) { 452 mem = lookup_mem(pd, access, sge->lkey, 453 lookup_local); 454 if (!mem) { 455 err = -EINVAL; 456 goto err1; 457 } 458 } else { 459 continue; 460 } 461 } 462 463 if (bytes > sge->length - offset) 464 bytes = sge->length - offset; 465 466 if (bytes > 0) { 467 iova = sge->addr + offset; 468 469 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 470 if (err) 471 goto err2; 472 473 offset += bytes; 474 resid -= bytes; 475 length -= bytes; 476 addr += bytes; 477 } 478 } 479 480 dma->sge_offset = offset; 481 dma->resid = resid; 482 483 if (mem) 484 rxe_drop_ref(mem); 485 486 return 0; 487 488 err2: 489 if (mem) 490 rxe_drop_ref(mem); 491 err1: 492 return err; 493 } 494 495 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 496 { 497 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 498 int offset = dma->sge_offset; 499 int resid = dma->resid; 500 501 while (length) { 502 unsigned int bytes; 503 504 if (offset >= sge->length) { 505 sge++; 506 dma->cur_sge++; 507 offset = 0; 508 if (dma->cur_sge >= dma->num_sge) 509 return -ENOSPC; 510 } 511 512 bytes = length; 513 514 if (bytes > sge->length - offset) 515 bytes = sge->length - offset; 516 517 offset += bytes; 518 resid -= bytes; 519 length -= bytes; 520 } 521 522 dma->sge_offset = offset; 523 dma->resid = resid; 524 525 return 0; 526 } 527 528 /* (1) find the mem (mr or mw) corresponding to lkey/rkey 529 * depending on lookup_type 530 * (2) verify that the (qp) pd matches the mem pd 531 * (3) verify that the mem can support the requested access 532 * (4) verify that mem state is valid 533 */ 534 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 535 enum lookup_type type) 536 { 537 struct rxe_mem *mem; 538 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 539 int index = key >> 8; 540 541 mem = rxe_pool_get_index(&rxe->mr_pool, index); 542 if (!mem) 543 return NULL; 544 545 if (unlikely((type == lookup_local && mr_lkey(mem) != key) || 546 (type == lookup_remote && mr_rkey(mem) != key) || 547 mr_pd(mem) != pd || 548 (access && !(access & mem->access)) || 549 mem->state != RXE_MEM_STATE_VALID)) { 550 rxe_drop_ref(mem); 551 mem = NULL; 552 } 553 554 return mem; 555 } 556