1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/vhost_types.h> 5 #include <linux/vdpa.h> 6 #include <linux/gcd.h> 7 #include <linux/string.h> 8 #include <linux/mlx5/qp.h> 9 #include "mlx5_vdpa.h" 10 11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ 12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ 13 ({ \ 14 u64 __s = _s; \ 15 u64 _res; \ 16 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ 17 _res; \ 18 }) 19 20 static int get_octo_len(u64 len, int page_shift) 21 { 22 u64 page_size = 1ULL << page_shift; 23 int npages; 24 25 npages = ALIGN(len, page_size) >> page_shift; 26 return (npages + 1) / 2; 27 } 28 29 static void mlx5_set_access_mode(void *mkc, int mode) 30 { 31 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 32 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); 33 } 34 35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) 36 { 37 struct scatterlist *sg; 38 int nsg = mr->nsg; 39 u64 dma_addr; 40 u64 dma_len; 41 int j = 0; 42 int i; 43 44 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { 45 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); 46 nsg && dma_len; 47 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) 48 mtt[j++] = cpu_to_be64(dma_addr); 49 } 50 } 51 52 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 53 { 54 int inlen; 55 void *mkc; 56 void *in; 57 int err; 58 59 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); 60 in = kvzalloc(inlen, GFP_KERNEL); 61 if (!in) 62 return -ENOMEM; 63 64 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 65 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 66 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); 67 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); 68 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); 69 MLX5_SET(mkc, mkc, qpn, 0xffffff); 70 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 71 MLX5_SET64(mkc, mkc, start_addr, mr->offset); 72 MLX5_SET64(mkc, mkc, len, mr->end - mr->start); 73 MLX5_SET(mkc, mkc, log_page_size, mr->log_size); 74 MLX5_SET(mkc, mkc, translations_octword_size, 75 get_octo_len(mr->end - mr->start, mr->log_size)); 76 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 77 get_octo_len(mr->end - mr->start, mr->log_size)); 78 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); 79 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); 80 kvfree(in); 81 if (err) { 82 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); 83 return err; 84 } 85 86 return 0; 87 } 88 89 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 90 { 91 mlx5_vdpa_destroy_mkey(mvdev, mr->mr); 92 } 93 94 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 95 { 96 return max_t(u64, map->start, mr->start); 97 } 98 99 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 100 { 101 return min_t(u64, map->last + 1, mr->end); 102 } 103 104 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 105 { 106 return map_end(map, mr) - map_start(map, mr); 107 } 108 109 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) 110 #define MLX5_VDPA_INVALID_LEN ((u64)-1) 111 112 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) 113 { 114 struct mlx5_vdpa_direct_mr *s; 115 116 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 117 if (!s) 118 return MLX5_VDPA_INVALID_START_ADDR; 119 120 return s->start; 121 } 122 123 static u64 indir_len(struct mlx5_vdpa_mr *mkey) 124 { 125 struct mlx5_vdpa_direct_mr *s; 126 struct mlx5_vdpa_direct_mr *e; 127 128 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 129 if (!s) 130 return MLX5_VDPA_INVALID_LEN; 131 132 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); 133 134 return e->end - s->start; 135 } 136 137 #define LOG_MAX_KLM_SIZE 30 138 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) 139 140 static u32 klm_bcount(u64 size) 141 { 142 return (u32)size; 143 } 144 145 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) 146 { 147 struct mlx5_vdpa_direct_mr *dmr; 148 struct mlx5_klm *klmarr; 149 struct mlx5_klm *klm; 150 bool first = true; 151 u64 preve; 152 int i; 153 154 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 155 i = 0; 156 list_for_each_entry(dmr, &mkey->head, list) { 157 again: 158 klm = &klmarr[i++]; 159 if (first) { 160 preve = dmr->start; 161 first = false; 162 } 163 164 if (preve == dmr->start) { 165 klm->key = cpu_to_be32(dmr->mr); 166 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); 167 preve = dmr->end; 168 } else { 169 klm->key = cpu_to_be32(mvdev->res.null_mkey); 170 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); 171 preve = dmr->start; 172 goto again; 173 } 174 } 175 } 176 177 static int klm_byte_size(int nklms) 178 { 179 return 16 * ALIGN(nklms, 4); 180 } 181 182 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 183 { 184 int inlen; 185 void *mkc; 186 void *in; 187 int err; 188 u64 start; 189 u64 len; 190 191 start = indir_start_addr(mr); 192 len = indir_len(mr); 193 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) 194 return -EINVAL; 195 196 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); 197 in = kzalloc(inlen, GFP_KERNEL); 198 if (!in) 199 return -ENOMEM; 200 201 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 202 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 203 MLX5_SET(mkc, mkc, lw, 1); 204 MLX5_SET(mkc, mkc, lr, 1); 205 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); 206 MLX5_SET(mkc, mkc, qpn, 0xffffff); 207 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 208 MLX5_SET64(mkc, mkc, start_addr, start); 209 MLX5_SET64(mkc, mkc, len, len); 210 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); 211 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); 212 fill_indir(mvdev, mr, in); 213 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 214 kfree(in); 215 return err; 216 } 217 218 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) 219 { 220 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey); 221 } 222 223 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, 224 struct vhost_iotlb *iotlb) 225 { 226 struct vhost_iotlb_map *map; 227 unsigned long lgcd = 0; 228 int log_entity_size; 229 unsigned long size; 230 u64 start = 0; 231 int err; 232 struct page *pg; 233 unsigned int nsg; 234 int sglen; 235 u64 pa, offset; 236 u64 paend; 237 struct scatterlist *sg; 238 struct device *dma = mvdev->vdev.dma_dev; 239 240 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 241 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { 242 size = maplen(map, mr); 243 lgcd = gcd(lgcd, size); 244 start += size; 245 } 246 log_entity_size = ilog2(lgcd); 247 248 sglen = 1 << log_entity_size; 249 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); 250 251 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); 252 if (err) 253 return err; 254 255 sg = mr->sg_head.sgl; 256 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 257 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { 258 offset = mr->start > map->start ? mr->start - map->start : 0; 259 pa = map->addr + offset; 260 paend = map->addr + offset + maplen(map, mr); 261 for (; pa < paend; pa += sglen) { 262 pg = pfn_to_page(__phys_to_pfn(pa)); 263 if (!sg) { 264 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", 265 map->start, map->last + 1); 266 err = -ENOMEM; 267 goto err_map; 268 } 269 sg_set_page(sg, pg, sglen, 0); 270 sg = sg_next(sg); 271 if (!sg) 272 goto done; 273 } 274 } 275 done: 276 mr->log_size = log_entity_size; 277 mr->nsg = nsg; 278 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 279 if (!mr->nent) { 280 err = -ENOMEM; 281 goto err_map; 282 } 283 284 err = create_direct_mr(mvdev, mr); 285 if (err) 286 goto err_direct; 287 288 return 0; 289 290 err_direct: 291 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 292 err_map: 293 sg_free_table(&mr->sg_head); 294 return err; 295 } 296 297 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 298 { 299 struct device *dma = mvdev->vdev.dma_dev; 300 301 destroy_direct_mr(mvdev, mr); 302 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 303 sg_free_table(&mr->sg_head); 304 } 305 306 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, 307 struct vhost_iotlb *iotlb) 308 { 309 struct mlx5_vdpa_mr *mr = &mvdev->mr; 310 struct mlx5_vdpa_direct_mr *dmr; 311 struct mlx5_vdpa_direct_mr *n; 312 LIST_HEAD(tmp); 313 u64 st; 314 u64 sz; 315 int err; 316 317 st = start; 318 while (size) { 319 sz = (u32)min_t(u64, MAX_KLM_SIZE, size); 320 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); 321 if (!dmr) { 322 err = -ENOMEM; 323 goto err_alloc; 324 } 325 326 dmr->start = st; 327 dmr->end = st + sz; 328 dmr->perm = perm; 329 err = map_direct_mr(mvdev, dmr, iotlb); 330 if (err) { 331 kfree(dmr); 332 goto err_alloc; 333 } 334 335 list_add_tail(&dmr->list, &tmp); 336 size -= sz; 337 mr->num_directs++; 338 mr->num_klms++; 339 st += sz; 340 } 341 list_splice_tail(&tmp, &mr->head); 342 return 0; 343 344 err_alloc: 345 list_for_each_entry_safe(dmr, n, &mr->head, list) { 346 list_del_init(&dmr->list); 347 unmap_direct_mr(mvdev, dmr); 348 kfree(dmr); 349 } 350 return err; 351 } 352 353 /* The iotlb pointer contains a list of maps. Go over the maps, possibly 354 * merging mergeable maps, and create direct memory keys that provide the 355 * device access to memory. The direct mkeys are then referred to by the 356 * indirect memory key that provides access to the enitre address space given 357 * by iotlb. 358 */ 359 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 360 { 361 struct mlx5_vdpa_mr *mr = &mvdev->mr; 362 struct mlx5_vdpa_direct_mr *dmr; 363 struct mlx5_vdpa_direct_mr *n; 364 struct vhost_iotlb_map *map; 365 u32 pperm = U16_MAX; 366 u64 last = U64_MAX; 367 u64 ps = U64_MAX; 368 u64 pe = U64_MAX; 369 u64 start = 0; 370 int err = 0; 371 int nnuls; 372 373 INIT_LIST_HEAD(&mr->head); 374 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 375 map = vhost_iotlb_itree_next(map, start, last)) { 376 start = map->start; 377 if (pe == map->start && pperm == map->perm) { 378 pe = map->last + 1; 379 } else { 380 if (ps != U64_MAX) { 381 if (pe < map->start) { 382 /* We have a hole in the map. Check how 383 * many null keys are required to fill it. 384 */ 385 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, 386 LOG_MAX_KLM_SIZE); 387 mr->num_klms += nnuls; 388 } 389 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); 390 if (err) 391 goto err_chain; 392 } 393 ps = map->start; 394 pe = map->last + 1; 395 pperm = map->perm; 396 } 397 } 398 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); 399 if (err) 400 goto err_chain; 401 402 /* Create the memory key that defines the guests's address space. This 403 * memory key refers to the direct keys that contain the MTT 404 * translations 405 */ 406 err = create_indirect_key(mvdev, mr); 407 if (err) 408 goto err_chain; 409 410 mr->user_mr = true; 411 return 0; 412 413 err_chain: 414 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 415 list_del_init(&dmr->list); 416 unmap_direct_mr(mvdev, dmr); 417 kfree(dmr); 418 } 419 return err; 420 } 421 422 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 423 { 424 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 425 void *mkc; 426 u32 *in; 427 int err; 428 429 in = kzalloc(inlen, GFP_KERNEL); 430 if (!in) 431 return -ENOMEM; 432 433 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 434 435 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 436 MLX5_SET(mkc, mkc, length64, 1); 437 MLX5_SET(mkc, mkc, lw, 1); 438 MLX5_SET(mkc, mkc, lr, 1); 439 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 440 MLX5_SET(mkc, mkc, qpn, 0xffffff); 441 442 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 443 if (!err) 444 mr->user_mr = false; 445 446 kfree(in); 447 return err; 448 } 449 450 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 451 { 452 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); 453 } 454 455 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) 456 { 457 struct vhost_iotlb_map *map; 458 u64 start = 0, last = ULLONG_MAX; 459 int err; 460 461 if (!src) { 462 err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); 463 return err; 464 } 465 466 for (map = vhost_iotlb_itree_first(src, start, last); map; 467 map = vhost_iotlb_itree_next(map, start, last)) { 468 err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, 469 map->addr, map->perm); 470 if (err) 471 return err; 472 } 473 return 0; 474 } 475 476 static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) 477 { 478 vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX); 479 } 480 481 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 482 { 483 struct mlx5_vdpa_direct_mr *dmr; 484 struct mlx5_vdpa_direct_mr *n; 485 486 destroy_indirect_key(mvdev, mr); 487 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 488 list_del_init(&dmr->list); 489 unmap_direct_mr(mvdev, dmr); 490 kfree(dmr); 491 } 492 } 493 494 static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 495 { 496 if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) 497 return; 498 499 prune_iotlb(mvdev); 500 } 501 502 static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 503 { 504 struct mlx5_vdpa_mr *mr = &mvdev->mr; 505 506 if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) 507 return; 508 509 if (!mr->initialized) 510 return; 511 512 if (mr->user_mr) 513 destroy_user_mr(mvdev, mr); 514 else 515 destroy_dma_mr(mvdev, mr); 516 517 mr->initialized = false; 518 } 519 520 void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 521 { 522 struct mlx5_vdpa_mr *mr = &mvdev->mr; 523 524 mutex_lock(&mr->mkey_mtx); 525 526 _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); 527 _mlx5_vdpa_destroy_cvq_mr(mvdev, asid); 528 529 mutex_unlock(&mr->mkey_mtx); 530 } 531 532 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) 533 { 534 mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]); 535 mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]); 536 } 537 538 static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, 539 struct vhost_iotlb *iotlb, 540 unsigned int asid) 541 { 542 if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) 543 return 0; 544 545 return dup_iotlb(mvdev, iotlb); 546 } 547 548 static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, 549 struct vhost_iotlb *iotlb, 550 unsigned int asid) 551 { 552 struct mlx5_vdpa_mr *mr = &mvdev->mr; 553 int err; 554 555 if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) 556 return 0; 557 558 if (mr->initialized) 559 return 0; 560 561 if (iotlb) 562 err = create_user_mr(mvdev, iotlb); 563 else 564 err = create_dma_mr(mvdev, mr); 565 566 if (err) 567 return err; 568 569 mr->initialized = true; 570 571 return 0; 572 } 573 574 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 575 struct vhost_iotlb *iotlb, unsigned int asid) 576 { 577 int err; 578 579 err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); 580 if (err) 581 return err; 582 583 err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid); 584 if (err) 585 goto out_err; 586 587 return 0; 588 589 out_err: 590 _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); 591 592 return err; 593 } 594 595 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 596 unsigned int asid) 597 { 598 int err; 599 600 mutex_lock(&mvdev->mr.mkey_mtx); 601 err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); 602 mutex_unlock(&mvdev->mr.mkey_mtx); 603 return err; 604 } 605 606 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 607 bool *change_map, unsigned int asid) 608 { 609 struct mlx5_vdpa_mr *mr = &mvdev->mr; 610 int err = 0; 611 612 *change_map = false; 613 mutex_lock(&mr->mkey_mtx); 614 if (mr->initialized) { 615 mlx5_vdpa_info(mvdev, "memory map update\n"); 616 *change_map = true; 617 } 618 if (!*change_map) 619 err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); 620 mutex_unlock(&mr->mkey_mtx); 621 622 return err; 623 } 624