1 /* 2 * Copyright (c) 2016 Hisilicon Limited. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/platform_device.h> 35 #include <linux/vmalloc.h> 36 #include <rdma/ib_umem.h> 37 #include "hns_roce_device.h" 38 #include "hns_roce_cmd.h" 39 #include "hns_roce_hem.h" 40 41 static u32 hw_index_to_key(unsigned long ind) 42 { 43 return (u32)(ind >> 24) | (ind << 8); 44 } 45 46 unsigned long key_to_hw_index(u32 key) 47 { 48 return (key << 24) | (key >> 8); 49 } 50 51 static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, 52 struct hns_roce_cmd_mailbox *mailbox, 53 unsigned long mpt_index) 54 { 55 return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, 56 HNS_ROCE_CMD_CREATE_MPT, 57 HNS_ROCE_CMD_TIMEOUT_MSECS); 58 } 59 60 int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, 61 struct hns_roce_cmd_mailbox *mailbox, 62 unsigned long mpt_index) 63 { 64 return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, 65 mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, 66 HNS_ROCE_CMD_TIMEOUT_MSECS); 67 } 68 69 static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) 70 { 71 struct ib_device *ibdev = &hr_dev->ib_dev; 72 unsigned long obj = 0; 73 int err; 74 75 /* Allocate a key for mr from mr_table */ 76 err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj); 77 if (err) { 78 ibdev_err(ibdev, 79 "failed to alloc bitmap for MR key, ret = %d.\n", 80 err); 81 return -ENOMEM; 82 } 83 84 mr->key = hw_index_to_key(obj); /* MR key */ 85 86 err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); 87 if (err) { 88 ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err); 89 goto err_free_bitmap; 90 } 91 92 return 0; 93 err_free_bitmap: 94 hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); 95 return err; 96 } 97 98 static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) 99 { 100 unsigned long obj = key_to_hw_index(mr->key); 101 102 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); 103 hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); 104 } 105 106 static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, 107 struct ib_udata *udata, u64 start) 108 { 109 struct ib_device *ibdev = &hr_dev->ib_dev; 110 bool is_fast = mr->type == MR_TYPE_FRMR; 111 struct hns_roce_buf_attr buf_attr = {}; 112 int err; 113 114 mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; 115 buf_attr.page_shift = is_fast ? PAGE_SHIFT : 116 hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; 117 buf_attr.region[0].size = mr->size; 118 buf_attr.region[0].hopnum = mr->pbl_hop_num; 119 buf_attr.region_count = 1; 120 buf_attr.user_access = mr->access; 121 /* fast MR's buffer is alloced before mapping, not at creation */ 122 buf_attr.mtt_only = is_fast; 123 124 err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, 125 hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, 126 udata, start); 127 if (err) 128 ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); 129 else 130 mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count; 131 132 return err; 133 } 134 135 static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) 136 { 137 hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); 138 } 139 140 static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, 141 struct hns_roce_mr *mr) 142 { 143 struct ib_device *ibdev = &hr_dev->ib_dev; 144 int ret; 145 146 if (mr->enabled) { 147 ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, 148 key_to_hw_index(mr->key) & 149 (hr_dev->caps.num_mtpts - 1)); 150 if (ret) 151 ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", 152 ret); 153 } 154 155 free_mr_pbl(hr_dev, mr); 156 free_mr_key(hr_dev, mr); 157 } 158 159 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, 160 struct hns_roce_mr *mr) 161 { 162 unsigned long mtpt_idx = key_to_hw_index(mr->key); 163 struct hns_roce_cmd_mailbox *mailbox; 164 struct device *dev = hr_dev->dev; 165 int ret; 166 167 /* Allocate mailbox memory */ 168 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 169 if (IS_ERR(mailbox)) { 170 ret = PTR_ERR(mailbox); 171 return ret; 172 } 173 174 if (mr->type != MR_TYPE_FRMR) 175 ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr, 176 mtpt_idx); 177 else 178 ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); 179 if (ret) { 180 dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); 181 goto err_page; 182 } 183 184 ret = hns_roce_hw_create_mpt(hr_dev, mailbox, 185 mtpt_idx & (hr_dev->caps.num_mtpts - 1)); 186 if (ret) { 187 dev_err(dev, "failed to create mpt, ret = %d.\n", ret); 188 goto err_page; 189 } 190 191 mr->enabled = 1; 192 193 err_page: 194 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 195 196 return ret; 197 } 198 199 int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) 200 { 201 struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; 202 int ret; 203 204 ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap, 205 hr_dev->caps.num_mtpts, 206 hr_dev->caps.num_mtpts - 1, 207 hr_dev->caps.reserved_mrws, 0); 208 return ret; 209 } 210 211 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) 212 { 213 struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; 214 215 hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); 216 } 217 218 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) 219 { 220 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); 221 struct hns_roce_mr *mr; 222 int ret; 223 224 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 225 if (mr == NULL) 226 return ERR_PTR(-ENOMEM); 227 228 mr->type = MR_TYPE_DMA; 229 mr->pd = to_hr_pd(pd)->pdn; 230 mr->access = acc; 231 232 /* Allocate memory region key */ 233 hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); 234 ret = alloc_mr_key(hr_dev, mr); 235 if (ret) 236 goto err_free; 237 238 ret = hns_roce_mr_enable(hr_dev, mr); 239 if (ret) 240 goto err_mr; 241 242 mr->ibmr.rkey = mr->ibmr.lkey = mr->key; 243 244 return &mr->ibmr; 245 err_mr: 246 free_mr_key(hr_dev, mr); 247 248 err_free: 249 kfree(mr); 250 return ERR_PTR(ret); 251 } 252 253 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 254 u64 virt_addr, int access_flags, 255 struct ib_udata *udata) 256 { 257 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); 258 struct hns_roce_mr *mr; 259 int ret; 260 261 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 262 if (!mr) 263 return ERR_PTR(-ENOMEM); 264 265 mr->iova = virt_addr; 266 mr->size = length; 267 mr->pd = to_hr_pd(pd)->pdn; 268 mr->access = access_flags; 269 mr->type = MR_TYPE_MR; 270 271 ret = alloc_mr_key(hr_dev, mr); 272 if (ret) 273 goto err_alloc_mr; 274 275 ret = alloc_mr_pbl(hr_dev, mr, udata, start); 276 if (ret) 277 goto err_alloc_key; 278 279 ret = hns_roce_mr_enable(hr_dev, mr); 280 if (ret) 281 goto err_alloc_pbl; 282 283 mr->ibmr.rkey = mr->ibmr.lkey = mr->key; 284 mr->ibmr.length = length; 285 286 return &mr->ibmr; 287 288 err_alloc_pbl: 289 free_mr_pbl(hr_dev, mr); 290 err_alloc_key: 291 free_mr_key(hr_dev, mr); 292 err_alloc_mr: 293 kfree(mr); 294 return ERR_PTR(ret); 295 } 296 297 struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, 298 u64 length, u64 virt_addr, 299 int mr_access_flags, struct ib_pd *pd, 300 struct ib_udata *udata) 301 { 302 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); 303 struct ib_device *ib_dev = &hr_dev->ib_dev; 304 struct hns_roce_mr *mr = to_hr_mr(ibmr); 305 struct hns_roce_cmd_mailbox *mailbox; 306 unsigned long mtpt_idx; 307 int ret; 308 309 if (!mr->enabled) 310 return ERR_PTR(-EINVAL); 311 312 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 313 if (IS_ERR(mailbox)) 314 return ERR_CAST(mailbox); 315 316 mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); 317 ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, 318 HNS_ROCE_CMD_QUERY_MPT, 319 HNS_ROCE_CMD_TIMEOUT_MSECS); 320 if (ret) 321 goto free_cmd_mbox; 322 323 ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); 324 if (ret) 325 ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); 326 327 mr->enabled = 0; 328 mr->iova = virt_addr; 329 mr->size = length; 330 331 if (flags & IB_MR_REREG_PD) 332 mr->pd = to_hr_pd(pd)->pdn; 333 334 if (flags & IB_MR_REREG_ACCESS) 335 mr->access = mr_access_flags; 336 337 if (flags & IB_MR_REREG_TRANS) { 338 free_mr_pbl(hr_dev, mr); 339 ret = alloc_mr_pbl(hr_dev, mr, udata, start); 340 if (ret) { 341 ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n", 342 ret); 343 goto free_cmd_mbox; 344 } 345 } 346 347 ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf); 348 if (ret) { 349 ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret); 350 goto free_cmd_mbox; 351 } 352 353 ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); 354 if (ret) { 355 ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret); 356 goto free_cmd_mbox; 357 } 358 359 mr->enabled = 1; 360 361 free_cmd_mbox: 362 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 363 364 return ERR_PTR(ret); 365 } 366 367 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 368 { 369 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); 370 struct hns_roce_mr *mr = to_hr_mr(ibmr); 371 int ret = 0; 372 373 if (hr_dev->hw->dereg_mr) { 374 ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); 375 } else { 376 hns_roce_mr_free(hr_dev, mr); 377 kfree(mr); 378 } 379 380 return ret; 381 } 382 383 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 384 u32 max_num_sg) 385 { 386 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); 387 struct device *dev = hr_dev->dev; 388 struct hns_roce_mr *mr; 389 int ret; 390 391 if (mr_type != IB_MR_TYPE_MEM_REG) 392 return ERR_PTR(-EINVAL); 393 394 if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) { 395 dev_err(dev, "max_num_sg larger than %d\n", 396 HNS_ROCE_FRMR_MAX_PA); 397 return ERR_PTR(-EINVAL); 398 } 399 400 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 401 if (!mr) 402 return ERR_PTR(-ENOMEM); 403 404 mr->type = MR_TYPE_FRMR; 405 mr->pd = to_hr_pd(pd)->pdn; 406 mr->size = max_num_sg * (1 << PAGE_SHIFT); 407 408 /* Allocate memory region key */ 409 ret = alloc_mr_key(hr_dev, mr); 410 if (ret) 411 goto err_free; 412 413 ret = alloc_mr_pbl(hr_dev, mr, NULL, 0); 414 if (ret) 415 goto err_key; 416 417 ret = hns_roce_mr_enable(hr_dev, mr); 418 if (ret) 419 goto err_pbl; 420 421 mr->ibmr.rkey = mr->ibmr.lkey = mr->key; 422 mr->ibmr.length = mr->size; 423 424 return &mr->ibmr; 425 426 err_key: 427 free_mr_key(hr_dev, mr); 428 err_pbl: 429 free_mr_pbl(hr_dev, mr); 430 err_free: 431 kfree(mr); 432 return ERR_PTR(ret); 433 } 434 435 static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) 436 { 437 struct hns_roce_mr *mr = to_hr_mr(ibmr); 438 439 if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) { 440 mr->page_list[mr->npages++] = addr; 441 return 0; 442 } 443 444 return -ENOBUFS; 445 } 446 447 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 448 unsigned int *sg_offset) 449 { 450 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); 451 struct ib_device *ibdev = &hr_dev->ib_dev; 452 struct hns_roce_mr *mr = to_hr_mr(ibmr); 453 struct hns_roce_mtr *mtr = &mr->pbl_mtr; 454 int ret = 0; 455 456 mr->npages = 0; 457 mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, 458 sizeof(dma_addr_t), GFP_KERNEL); 459 if (!mr->page_list) 460 return ret; 461 462 ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); 463 if (ret < 1) { 464 ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", 465 mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); 466 goto err_page_list; 467 } 468 469 mtr->hem_cfg.region[0].offset = 0; 470 mtr->hem_cfg.region[0].count = mr->npages; 471 mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num; 472 mtr->hem_cfg.region_count = 1; 473 ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages); 474 if (ret) { 475 ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); 476 ret = 0; 477 } else { 478 mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); 479 ret = mr->npages; 480 } 481 482 err_page_list: 483 kvfree(mr->page_list); 484 mr->page_list = NULL; 485 486 return ret; 487 } 488 489 static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, 490 struct hns_roce_mw *mw) 491 { 492 struct device *dev = hr_dev->dev; 493 int ret; 494 495 if (mw->enabled) { 496 ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, 497 key_to_hw_index(mw->rkey) & 498 (hr_dev->caps.num_mtpts - 1)); 499 if (ret) 500 dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret); 501 502 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, 503 key_to_hw_index(mw->rkey)); 504 } 505 506 hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, 507 key_to_hw_index(mw->rkey), BITMAP_NO_RR); 508 } 509 510 static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, 511 struct hns_roce_mw *mw) 512 { 513 struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; 514 struct hns_roce_cmd_mailbox *mailbox; 515 struct device *dev = hr_dev->dev; 516 unsigned long mtpt_idx = key_to_hw_index(mw->rkey); 517 int ret; 518 519 /* prepare HEM entry memory */ 520 ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); 521 if (ret) 522 return ret; 523 524 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 525 if (IS_ERR(mailbox)) { 526 ret = PTR_ERR(mailbox); 527 goto err_table; 528 } 529 530 ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw); 531 if (ret) { 532 dev_err(dev, "MW write mtpt fail!\n"); 533 goto err_page; 534 } 535 536 ret = hns_roce_hw_create_mpt(hr_dev, mailbox, 537 mtpt_idx & (hr_dev->caps.num_mtpts - 1)); 538 if (ret) { 539 dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); 540 goto err_page; 541 } 542 543 mw->enabled = 1; 544 545 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 546 547 return 0; 548 549 err_page: 550 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 551 552 err_table: 553 hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); 554 555 return ret; 556 } 557 558 int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 559 { 560 struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); 561 struct hns_roce_mw *mw = to_hr_mw(ibmw); 562 unsigned long index = 0; 563 int ret; 564 565 /* Allocate a key for mw from bitmap */ 566 ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); 567 if (ret) 568 return ret; 569 570 mw->rkey = hw_index_to_key(index); 571 572 ibmw->rkey = mw->rkey; 573 mw->pdn = to_hr_pd(ibmw->pd)->pdn; 574 mw->pbl_hop_num = hr_dev->caps.pbl_hop_num; 575 mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; 576 mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; 577 578 ret = hns_roce_mw_enable(hr_dev, mw); 579 if (ret) 580 goto err_mw; 581 582 return 0; 583 584 err_mw: 585 hns_roce_mw_free(hr_dev, mw); 586 return ret; 587 } 588 589 int hns_roce_dealloc_mw(struct ib_mw *ibmw) 590 { 591 struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); 592 struct hns_roce_mw *mw = to_hr_mw(ibmw); 593 594 hns_roce_mw_free(hr_dev, mw); 595 return 0; 596 } 597 598 static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 599 struct hns_roce_buf_region *region, dma_addr_t *pages, 600 int max_count) 601 { 602 int count, npage; 603 int offset, end; 604 __le64 *mtts; 605 u64 addr; 606 int i; 607 608 offset = region->offset; 609 end = offset + region->count; 610 npage = 0; 611 while (offset < end && npage < max_count) { 612 count = 0; 613 mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, 614 offset, &count, NULL); 615 if (!mtts) 616 return -ENOBUFS; 617 618 for (i = 0; i < count && npage < max_count; i++) { 619 if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) 620 addr = to_hr_hw_page_addr(pages[npage]); 621 else 622 addr = pages[npage]; 623 624 mtts[i] = cpu_to_le64(addr); 625 npage++; 626 } 627 offset += count; 628 } 629 630 return npage; 631 } 632 633 static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) 634 { 635 int i; 636 637 for (i = 0; i < attr->region_count; i++) 638 if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && 639 attr->region[i].hopnum > 0) 640 return true; 641 642 /* because the mtr only one root base address, when hopnum is 0 means 643 * root base address equals the first buffer address, thus all alloced 644 * memory must in a continuous space accessed by direct mode. 645 */ 646 return false; 647 } 648 649 static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) 650 { 651 size_t size = 0; 652 int i; 653 654 for (i = 0; i < attr->region_count; i++) 655 size += attr->region[i].size; 656 657 return size; 658 } 659 660 /* 661 * check the given pages in continuous address space 662 * Returns 0 on success, or the error page num. 663 */ 664 static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, 665 unsigned int page_shift) 666 { 667 size_t page_size = 1 << page_shift; 668 int i; 669 670 for (i = 1; i < page_count; i++) 671 if (pages[i] - pages[i - 1] != page_size) 672 return i; 673 674 return 0; 675 } 676 677 static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) 678 { 679 /* release user buffers */ 680 if (mtr->umem) { 681 ib_umem_release(mtr->umem); 682 mtr->umem = NULL; 683 } 684 685 /* release kernel buffers */ 686 if (mtr->kmem) { 687 hns_roce_buf_free(hr_dev, mtr->kmem); 688 mtr->kmem = NULL; 689 } 690 } 691 692 static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 693 struct hns_roce_buf_attr *buf_attr, 694 struct ib_udata *udata, unsigned long user_addr) 695 { 696 struct ib_device *ibdev = &hr_dev->ib_dev; 697 size_t total_size; 698 699 total_size = mtr_bufs_size(buf_attr); 700 701 if (udata) { 702 mtr->kmem = NULL; 703 mtr->umem = ib_umem_get(ibdev, user_addr, total_size, 704 buf_attr->user_access); 705 if (IS_ERR_OR_NULL(mtr->umem)) { 706 ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", 707 PTR_ERR(mtr->umem)); 708 return -ENOMEM; 709 } 710 } else { 711 mtr->umem = NULL; 712 mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size, 713 buf_attr->page_shift, 714 mtr->hem_cfg.is_direct ? 715 HNS_ROCE_BUF_DIRECT : 0); 716 if (IS_ERR(mtr->kmem)) { 717 ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", 718 PTR_ERR(mtr->kmem)); 719 return PTR_ERR(mtr->kmem); 720 } 721 } 722 723 return 0; 724 } 725 726 static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 727 int page_count, unsigned int page_shift) 728 { 729 struct ib_device *ibdev = &hr_dev->ib_dev; 730 dma_addr_t *pages; 731 int npage; 732 int ret; 733 734 /* alloc a tmp array to store buffer's dma address */ 735 pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); 736 if (!pages) 737 return -ENOMEM; 738 739 if (mtr->umem) 740 npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, 741 mtr->umem, page_shift); 742 else 743 npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, 744 mtr->kmem); 745 746 if (npage != page_count) { 747 ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, 748 page_count); 749 ret = -ENOBUFS; 750 goto err_alloc_list; 751 } 752 753 if (mtr->hem_cfg.is_direct && npage > 1) { 754 ret = mtr_check_direct_pages(pages, npage, page_shift); 755 if (ret) { 756 ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", 757 mtr->umem ? "user" : "kernel", ret); 758 ret = -ENOBUFS; 759 goto err_alloc_list; 760 } 761 } 762 763 ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); 764 if (ret) 765 ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); 766 767 err_alloc_list: 768 kvfree(pages); 769 770 return ret; 771 } 772 773 int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 774 dma_addr_t *pages, unsigned int page_cnt) 775 { 776 struct ib_device *ibdev = &hr_dev->ib_dev; 777 struct hns_roce_buf_region *r; 778 unsigned int i, mapped_cnt; 779 int ret; 780 781 /* 782 * Only use the first page address as root ba when hopnum is 0, this 783 * is because the addresses of all pages are consecutive in this case. 784 */ 785 if (mtr->hem_cfg.is_direct) { 786 mtr->hem_cfg.root_ba = pages[0]; 787 return 0; 788 } 789 790 for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && 791 mapped_cnt < page_cnt; i++) { 792 r = &mtr->hem_cfg.region[i]; 793 /* if hopnum is 0, no need to map pages in this region */ 794 if (!r->hopnum) { 795 mapped_cnt += r->count; 796 continue; 797 } 798 799 if (r->offset + r->count > page_cnt) { 800 ret = -EINVAL; 801 ibdev_err(ibdev, 802 "failed to check mtr%u end %u + %u, max %u.\n", 803 i, r->offset, r->count, page_cnt); 804 return ret; 805 } 806 807 ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], 808 page_cnt - mapped_cnt); 809 if (ret < 0) { 810 ibdev_err(ibdev, 811 "failed to map mtr%u offset %u, ret = %d.\n", 812 i, r->offset, ret); 813 return ret; 814 } 815 mapped_cnt += ret; 816 ret = 0; 817 } 818 819 if (mapped_cnt < page_cnt) { 820 ret = -ENOBUFS; 821 ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n", 822 mapped_cnt, page_cnt); 823 } 824 825 return ret; 826 } 827 828 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 829 int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) 830 { 831 struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; 832 int mtt_count, left; 833 int start_index; 834 int total = 0; 835 __le64 *mtts; 836 u32 npage; 837 u64 addr; 838 839 if (!mtt_buf || mtt_max < 1) 840 goto done; 841 842 /* no mtt memory in direct mode, so just return the buffer address */ 843 if (cfg->is_direct) { 844 start_index = offset >> HNS_HW_PAGE_SHIFT; 845 for (mtt_count = 0; mtt_count < cfg->region_count && 846 total < mtt_max; mtt_count++) { 847 npage = cfg->region[mtt_count].offset; 848 if (npage < start_index) 849 continue; 850 851 addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); 852 if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) 853 mtt_buf[total] = to_hr_hw_page_addr(addr); 854 else 855 mtt_buf[total] = addr; 856 857 total++; 858 } 859 860 goto done; 861 } 862 863 start_index = offset >> cfg->buf_pg_shift; 864 left = mtt_max; 865 while (left > 0) { 866 mtt_count = 0; 867 mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, 868 start_index + total, 869 &mtt_count, NULL); 870 if (!mtts || !mtt_count) 871 goto done; 872 873 npage = min(mtt_count, left); 874 left -= npage; 875 for (mtt_count = 0; mtt_count < npage; mtt_count++) 876 mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); 877 } 878 879 done: 880 if (base_addr) 881 *base_addr = cfg->root_ba; 882 883 return total; 884 } 885 886 static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, 887 struct hns_roce_buf_attr *attr, 888 struct hns_roce_hem_cfg *cfg, 889 unsigned int *buf_page_shift, int unalinged_size) 890 { 891 struct hns_roce_buf_region *r; 892 int first_region_padding; 893 int page_cnt, region_cnt; 894 unsigned int page_shift; 895 size_t buf_size; 896 897 /* If mtt is disabled, all pages must be within a continuous range */ 898 cfg->is_direct = !mtr_has_mtt(attr); 899 buf_size = mtr_bufs_size(attr); 900 if (cfg->is_direct) { 901 /* When HEM buffer uses 0-level addressing, the page size is 902 * equal to the whole buffer size, and we split the buffer into 903 * small pages which is used to check whether the adjacent 904 * units are in the continuous space and its size is fixed to 905 * 4K based on hns ROCEE's requirement. 906 */ 907 page_shift = HNS_HW_PAGE_SHIFT; 908 909 /* The ROCEE requires the page size to be 4K * 2 ^ N. */ 910 cfg->buf_pg_count = 1; 911 cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + 912 order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); 913 first_region_padding = 0; 914 } else { 915 page_shift = attr->page_shift; 916 cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, 917 1 << page_shift); 918 cfg->buf_pg_shift = page_shift; 919 first_region_padding = unalinged_size; 920 } 921 922 /* Convert buffer size to page index and page count for each region and 923 * the buffer's offset needs to be appended to the first region. 924 */ 925 for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && 926 region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { 927 r = &cfg->region[region_cnt]; 928 r->offset = page_cnt; 929 buf_size = hr_hw_page_align(attr->region[region_cnt].size + 930 first_region_padding); 931 r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); 932 first_region_padding = 0; 933 page_cnt += r->count; 934 r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, 935 r->count); 936 } 937 938 cfg->region_count = region_cnt; 939 *buf_page_shift = page_shift; 940 941 return page_cnt; 942 } 943 944 static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 945 unsigned int ba_page_shift) 946 { 947 struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; 948 int ret; 949 950 hns_roce_hem_list_init(&mtr->hem_list); 951 if (!cfg->is_direct) { 952 ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, 953 cfg->region, cfg->region_count, 954 ba_page_shift); 955 if (ret) 956 return ret; 957 cfg->root_ba = mtr->hem_list.root_ba; 958 cfg->ba_pg_shift = ba_page_shift; 959 } else { 960 cfg->ba_pg_shift = cfg->buf_pg_shift; 961 } 962 963 return 0; 964 } 965 966 static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) 967 { 968 hns_roce_hem_list_release(hr_dev, &mtr->hem_list); 969 } 970 971 /** 972 * hns_roce_mtr_create - Create hns memory translate region. 973 * 974 * @hr_dev: RoCE device struct pointer 975 * @mtr: memory translate region 976 * @buf_attr: buffer attribute for creating mtr 977 * @ba_page_shift: page shift for multi-hop base address table 978 * @udata: user space context, if it's NULL, means kernel space 979 * @user_addr: userspace virtual address to start at 980 */ 981 int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 982 struct hns_roce_buf_attr *buf_attr, 983 unsigned int ba_page_shift, struct ib_udata *udata, 984 unsigned long user_addr) 985 { 986 struct ib_device *ibdev = &hr_dev->ib_dev; 987 unsigned int buf_page_shift = 0; 988 int buf_page_cnt; 989 int ret; 990 991 buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, 992 &buf_page_shift, 993 udata ? user_addr & ~PAGE_MASK : 0); 994 if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { 995 ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n", 996 buf_page_cnt, buf_page_shift); 997 return -EINVAL; 998 } 999 1000 ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); 1001 if (ret) { 1002 ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); 1003 return ret; 1004 } 1005 1006 /* The caller has its own buffer list and invokes the hns_roce_mtr_map() 1007 * to finish the MTT configuration. 1008 */ 1009 if (buf_attr->mtt_only) { 1010 mtr->umem = NULL; 1011 mtr->kmem = NULL; 1012 return 0; 1013 } 1014 1015 ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); 1016 if (ret) { 1017 ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); 1018 goto err_alloc_mtt; 1019 } 1020 1021 /* Write buffer's dma address to MTT */ 1022 ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); 1023 if (ret) 1024 ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); 1025 else 1026 return 0; 1027 1028 mtr_free_bufs(hr_dev, mtr); 1029 err_alloc_mtt: 1030 mtr_free_mtt(hr_dev, mtr); 1031 return ret; 1032 } 1033 1034 void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) 1035 { 1036 /* release multi-hop addressing resource */ 1037 hns_roce_hem_list_release(hr_dev, &mtr->hem_list); 1038 1039 /* free buffers */ 1040 mtr_free_bufs(hr_dev, mtr); 1041 } 1042