1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* 11 * lfsr (linear feedback shift register) with period 255 12 */ 13 static u8 rxe_get_key(void) 14 { 15 static u32 key = 1; 16 17 key = key << 1; 18 19 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 20 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 21 22 key &= 0xff; 23 24 return key; 25 } 26 27 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 28 { 29 switch (mr->type) { 30 case RXE_MR_TYPE_DMA: 31 return 0; 32 33 case RXE_MR_TYPE_MR: 34 if (iova < mr->iova || length > mr->length || 35 iova > mr->iova + mr->length - length) 36 return -EFAULT; 37 return 0; 38 39 default: 40 return -EFAULT; 41 } 42 } 43 44 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 45 | IB_ACCESS_REMOTE_WRITE \ 46 | IB_ACCESS_REMOTE_ATOMIC) 47 48 static void rxe_mr_init(int access, struct rxe_mr *mr) 49 { 50 u32 lkey = mr->pelem.index << 8 | rxe_get_key(); 51 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 52 53 mr->ibmr.lkey = lkey; 54 mr->ibmr.rkey = rkey; 55 mr->state = RXE_MR_STATE_INVALID; 56 mr->type = RXE_MR_TYPE_NONE; 57 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 58 } 59 60 void rxe_mr_cleanup(struct rxe_pool_entry *arg) 61 { 62 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); 63 int i; 64 65 ib_umem_release(mr->umem); 66 67 if (mr->map) { 68 for (i = 0; i < mr->num_map; i++) 69 kfree(mr->map[i]); 70 71 kfree(mr->map); 72 } 73 } 74 75 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 76 { 77 int i; 78 int num_map; 79 struct rxe_map **map = mr->map; 80 81 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 82 83 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 84 if (!mr->map) 85 goto err1; 86 87 for (i = 0; i < num_map; i++) { 88 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 89 if (!mr->map[i]) 90 goto err2; 91 } 92 93 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 94 95 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 96 mr->map_mask = RXE_BUF_PER_MAP - 1; 97 98 mr->num_buf = num_buf; 99 mr->num_map = num_map; 100 mr->max_buf = num_map * RXE_BUF_PER_MAP; 101 102 return 0; 103 104 err2: 105 for (i--; i >= 0; i--) 106 kfree(mr->map[i]); 107 108 kfree(mr->map); 109 err1: 110 return -ENOMEM; 111 } 112 113 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) 114 { 115 rxe_mr_init(access, mr); 116 117 mr->ibmr.pd = &pd->ibpd; 118 mr->access = access; 119 mr->state = RXE_MR_STATE_VALID; 120 mr->type = RXE_MR_TYPE_DMA; 121 } 122 123 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 124 int access, struct ib_udata *udata, struct rxe_mr *mr) 125 { 126 struct rxe_map **map; 127 struct rxe_phys_buf *buf = NULL; 128 struct ib_umem *umem; 129 struct sg_page_iter sg_iter; 130 int num_buf; 131 void *vaddr; 132 int err; 133 134 umem = ib_umem_get(pd->ibpd.device, start, length, access); 135 if (IS_ERR(umem)) { 136 pr_warn("err %d from rxe_umem_get\n", 137 (int)PTR_ERR(umem)); 138 err = -EINVAL; 139 goto err1; 140 } 141 142 mr->umem = umem; 143 num_buf = ib_umem_num_pages(umem); 144 145 rxe_mr_init(access, mr); 146 147 err = rxe_mr_alloc(mr, num_buf); 148 if (err) { 149 pr_warn("err %d from rxe_mr_alloc\n", err); 150 ib_umem_release(umem); 151 goto err1; 152 } 153 154 mr->page_shift = PAGE_SHIFT; 155 mr->page_mask = PAGE_SIZE - 1; 156 157 num_buf = 0; 158 map = mr->map; 159 if (length > 0) { 160 buf = map[0]->buf; 161 162 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 163 if (num_buf >= RXE_BUF_PER_MAP) { 164 map++; 165 buf = map[0]->buf; 166 num_buf = 0; 167 } 168 169 vaddr = page_address(sg_page_iter_page(&sg_iter)); 170 if (!vaddr) { 171 pr_warn("null vaddr\n"); 172 ib_umem_release(umem); 173 err = -ENOMEM; 174 goto err1; 175 } 176 177 buf->addr = (uintptr_t)vaddr; 178 buf->size = PAGE_SIZE; 179 num_buf++; 180 buf++; 181 182 } 183 } 184 185 mr->ibmr.pd = &pd->ibpd; 186 mr->umem = umem; 187 mr->access = access; 188 mr->length = length; 189 mr->iova = iova; 190 mr->va = start; 191 mr->offset = ib_umem_offset(umem); 192 mr->state = RXE_MR_STATE_VALID; 193 mr->type = RXE_MR_TYPE_MR; 194 195 return 0; 196 197 err1: 198 return err; 199 } 200 201 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) 202 { 203 int err; 204 205 rxe_mr_init(0, mr); 206 207 /* In fastreg, we also set the rkey */ 208 mr->ibmr.rkey = mr->ibmr.lkey; 209 210 err = rxe_mr_alloc(mr, max_pages); 211 if (err) 212 goto err1; 213 214 mr->ibmr.pd = &pd->ibpd; 215 mr->max_buf = max_pages; 216 mr->state = RXE_MR_STATE_FREE; 217 mr->type = RXE_MR_TYPE_MR; 218 219 return 0; 220 221 err1: 222 return err; 223 } 224 225 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 226 size_t *offset_out) 227 { 228 size_t offset = iova - mr->iova + mr->offset; 229 int map_index; 230 int buf_index; 231 u64 length; 232 233 if (likely(mr->page_shift)) { 234 *offset_out = offset & mr->page_mask; 235 offset >>= mr->page_shift; 236 *n_out = offset & mr->map_mask; 237 *m_out = offset >> mr->map_shift; 238 } else { 239 map_index = 0; 240 buf_index = 0; 241 242 length = mr->map[map_index]->buf[buf_index].size; 243 244 while (offset >= length) { 245 offset -= length; 246 buf_index++; 247 248 if (buf_index == RXE_BUF_PER_MAP) { 249 map_index++; 250 buf_index = 0; 251 } 252 length = mr->map[map_index]->buf[buf_index].size; 253 } 254 255 *m_out = map_index; 256 *n_out = buf_index; 257 *offset_out = offset; 258 } 259 } 260 261 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 262 { 263 size_t offset; 264 int m, n; 265 void *addr; 266 267 if (mr->state != RXE_MR_STATE_VALID) { 268 pr_warn("mr not in valid state\n"); 269 addr = NULL; 270 goto out; 271 } 272 273 if (!mr->map) { 274 addr = (void *)(uintptr_t)iova; 275 goto out; 276 } 277 278 if (mr_check_range(mr, iova, length)) { 279 pr_warn("range violation\n"); 280 addr = NULL; 281 goto out; 282 } 283 284 lookup_iova(mr, iova, &m, &n, &offset); 285 286 if (offset + length > mr->map[m]->buf[n].size) { 287 pr_warn("crosses page boundary\n"); 288 addr = NULL; 289 goto out; 290 } 291 292 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 293 294 out: 295 return addr; 296 } 297 298 /* copy data from a range (vaddr, vaddr+length-1) to or from 299 * a mr object starting at iova. Compute incremental value of 300 * crc32 if crcp is not zero. caller must hold a reference to mr 301 */ 302 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 303 enum copy_direction dir, u32 *crcp) 304 { 305 int err; 306 int bytes; 307 u8 *va; 308 struct rxe_map **map; 309 struct rxe_phys_buf *buf; 310 int m; 311 int i; 312 size_t offset; 313 u32 crc = crcp ? (*crcp) : 0; 314 315 if (length == 0) 316 return 0; 317 318 if (mr->type == RXE_MR_TYPE_DMA) { 319 u8 *src, *dest; 320 321 src = (dir == to_mr_obj) ? addr : ((void *)(uintptr_t)iova); 322 323 dest = (dir == to_mr_obj) ? ((void *)(uintptr_t)iova) : addr; 324 325 memcpy(dest, src, length); 326 327 if (crcp) 328 *crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest, 329 length); 330 331 return 0; 332 } 333 334 WARN_ON_ONCE(!mr->map); 335 336 err = mr_check_range(mr, iova, length); 337 if (err) { 338 err = -EFAULT; 339 goto err1; 340 } 341 342 lookup_iova(mr, iova, &m, &i, &offset); 343 344 map = mr->map + m; 345 buf = map[0]->buf + i; 346 347 while (length > 0) { 348 u8 *src, *dest; 349 350 va = (u8 *)(uintptr_t)buf->addr + offset; 351 src = (dir == to_mr_obj) ? addr : va; 352 dest = (dir == to_mr_obj) ? va : addr; 353 354 bytes = buf->size - offset; 355 356 if (bytes > length) 357 bytes = length; 358 359 memcpy(dest, src, bytes); 360 361 if (crcp) 362 crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest, 363 bytes); 364 365 length -= bytes; 366 addr += bytes; 367 368 offset = 0; 369 buf++; 370 i++; 371 372 if (i == RXE_BUF_PER_MAP) { 373 i = 0; 374 map++; 375 buf = map[0]->buf; 376 } 377 } 378 379 if (crcp) 380 *crcp = crc; 381 382 return 0; 383 384 err1: 385 return err; 386 } 387 388 /* copy data in or out of a wqe, i.e. sg list 389 * under the control of a dma descriptor 390 */ 391 int copy_data( 392 struct rxe_pd *pd, 393 int access, 394 struct rxe_dma_info *dma, 395 void *addr, 396 int length, 397 enum copy_direction dir, 398 u32 *crcp) 399 { 400 int bytes; 401 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 402 int offset = dma->sge_offset; 403 int resid = dma->resid; 404 struct rxe_mr *mr = NULL; 405 u64 iova; 406 int err; 407 408 if (length == 0) 409 return 0; 410 411 if (length > resid) { 412 err = -EINVAL; 413 goto err2; 414 } 415 416 if (sge->length && (offset < sge->length)) { 417 mr = lookup_mr(pd, access, sge->lkey, lookup_local); 418 if (!mr) { 419 err = -EINVAL; 420 goto err1; 421 } 422 } 423 424 while (length > 0) { 425 bytes = length; 426 427 if (offset >= sge->length) { 428 if (mr) { 429 rxe_drop_ref(mr); 430 mr = NULL; 431 } 432 sge++; 433 dma->cur_sge++; 434 offset = 0; 435 436 if (dma->cur_sge >= dma->num_sge) { 437 err = -ENOSPC; 438 goto err2; 439 } 440 441 if (sge->length) { 442 mr = lookup_mr(pd, access, sge->lkey, 443 lookup_local); 444 if (!mr) { 445 err = -EINVAL; 446 goto err1; 447 } 448 } else { 449 continue; 450 } 451 } 452 453 if (bytes > sge->length - offset) 454 bytes = sge->length - offset; 455 456 if (bytes > 0) { 457 iova = sge->addr + offset; 458 459 err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp); 460 if (err) 461 goto err2; 462 463 offset += bytes; 464 resid -= bytes; 465 length -= bytes; 466 addr += bytes; 467 } 468 } 469 470 dma->sge_offset = offset; 471 dma->resid = resid; 472 473 if (mr) 474 rxe_drop_ref(mr); 475 476 return 0; 477 478 err2: 479 if (mr) 480 rxe_drop_ref(mr); 481 err1: 482 return err; 483 } 484 485 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 486 { 487 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 488 int offset = dma->sge_offset; 489 int resid = dma->resid; 490 491 while (length) { 492 unsigned int bytes; 493 494 if (offset >= sge->length) { 495 sge++; 496 dma->cur_sge++; 497 offset = 0; 498 if (dma->cur_sge >= dma->num_sge) 499 return -ENOSPC; 500 } 501 502 bytes = length; 503 504 if (bytes > sge->length - offset) 505 bytes = sge->length - offset; 506 507 offset += bytes; 508 resid -= bytes; 509 length -= bytes; 510 } 511 512 dma->sge_offset = offset; 513 dma->resid = resid; 514 515 return 0; 516 } 517 518 /* (1) find the mr corresponding to lkey/rkey 519 * depending on lookup_type 520 * (2) verify that the (qp) pd matches the mr pd 521 * (3) verify that the mr can support the requested access 522 * (4) verify that mr state is valid 523 */ 524 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 525 enum lookup_type type) 526 { 527 struct rxe_mr *mr; 528 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 529 int index = key >> 8; 530 531 mr = rxe_pool_get_index(&rxe->mr_pool, index); 532 if (!mr) 533 return NULL; 534 535 if (unlikely((type == lookup_local && mr_lkey(mr) != key) || 536 (type == lookup_remote && mr_rkey(mr) != key) || 537 mr_pd(mr) != pd || (access && !(access & mr->access)) || 538 mr->state != RXE_MR_STATE_VALID)) { 539 rxe_drop_ref(mr); 540 mr = NULL; 541 } 542 543 return mr; 544 } 545