1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/vdpa.h> 5 #include <linux/gcd.h> 6 #include <linux/string.h> 7 #include <linux/mlx5/qp.h> 8 #include "mlx5_vdpa.h" 9 10 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ 11 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ 12 ({ \ 13 u64 __s = _s; \ 14 u64 _res; \ 15 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ 16 _res; \ 17 }) 18 19 static int get_octo_len(u64 len, int page_shift) 20 { 21 u64 page_size = 1ULL << page_shift; 22 int npages; 23 24 npages = ALIGN(len, page_size) >> page_shift; 25 return (npages + 1) / 2; 26 } 27 28 static void mlx5_set_access_mode(void *mkc, int mode) 29 { 30 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 31 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); 32 } 33 34 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) 35 { 36 struct scatterlist *sg; 37 int nsg = mr->nsg; 38 u64 dma_addr; 39 u64 dma_len; 40 int j = 0; 41 int i; 42 43 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { 44 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); 45 nsg && dma_len; 46 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) 47 mtt[j++] = cpu_to_be64(dma_addr); 48 } 49 } 50 51 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 52 { 53 int inlen; 54 void *mkc; 55 void *in; 56 int err; 57 58 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); 59 in = kvzalloc(inlen, GFP_KERNEL); 60 if (!in) 61 return -ENOMEM; 62 63 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 64 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 65 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); 66 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); 67 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); 68 MLX5_SET(mkc, mkc, qpn, 0xffffff); 69 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 70 MLX5_SET64(mkc, mkc, start_addr, mr->offset); 71 MLX5_SET64(mkc, mkc, len, mr->end - mr->start); 72 MLX5_SET(mkc, mkc, log_page_size, mr->log_size); 73 MLX5_SET(mkc, mkc, translations_octword_size, 74 get_octo_len(mr->end - mr->start, mr->log_size)); 75 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 76 get_octo_len(mr->end - mr->start, mr->log_size)); 77 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); 78 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); 79 kvfree(in); 80 if (err) { 81 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); 82 return err; 83 } 84 85 return 0; 86 } 87 88 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 89 { 90 mlx5_vdpa_destroy_mkey(mvdev, &mr->mr); 91 } 92 93 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 94 { 95 return max_t(u64, map->start, mr->start); 96 } 97 98 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 99 { 100 return min_t(u64, map->last + 1, mr->end); 101 } 102 103 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 104 { 105 return map_end(map, mr) - map_start(map, mr); 106 } 107 108 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) 109 #define MLX5_VDPA_INVALID_LEN ((u64)-1) 110 111 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) 112 { 113 struct mlx5_vdpa_direct_mr *s; 114 115 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 116 if (!s) 117 return MLX5_VDPA_INVALID_START_ADDR; 118 119 return s->start; 120 } 121 122 static u64 indir_len(struct mlx5_vdpa_mr *mkey) 123 { 124 struct mlx5_vdpa_direct_mr *s; 125 struct mlx5_vdpa_direct_mr *e; 126 127 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 128 if (!s) 129 return MLX5_VDPA_INVALID_LEN; 130 131 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); 132 133 return e->end - s->start; 134 } 135 136 #define LOG_MAX_KLM_SIZE 30 137 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) 138 139 static u32 klm_bcount(u64 size) 140 { 141 return (u32)size; 142 } 143 144 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) 145 { 146 struct mlx5_vdpa_direct_mr *dmr; 147 struct mlx5_klm *klmarr; 148 struct mlx5_klm *klm; 149 bool first = true; 150 u64 preve; 151 int i; 152 153 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 154 i = 0; 155 list_for_each_entry(dmr, &mkey->head, list) { 156 again: 157 klm = &klmarr[i++]; 158 if (first) { 159 preve = dmr->start; 160 first = false; 161 } 162 163 if (preve == dmr->start) { 164 klm->key = cpu_to_be32(dmr->mr.key); 165 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); 166 preve = dmr->end; 167 } else { 168 klm->key = cpu_to_be32(mvdev->res.null_mkey); 169 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); 170 preve = dmr->start; 171 goto again; 172 } 173 } 174 } 175 176 static int klm_byte_size(int nklms) 177 { 178 return 16 * ALIGN(nklms, 4); 179 } 180 181 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 182 { 183 int inlen; 184 void *mkc; 185 void *in; 186 int err; 187 u64 start; 188 u64 len; 189 190 start = indir_start_addr(mr); 191 len = indir_len(mr); 192 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) 193 return -EINVAL; 194 195 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); 196 in = kzalloc(inlen, GFP_KERNEL); 197 if (!in) 198 return -ENOMEM; 199 200 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 201 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 202 MLX5_SET(mkc, mkc, lw, 1); 203 MLX5_SET(mkc, mkc, lr, 1); 204 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); 205 MLX5_SET(mkc, mkc, qpn, 0xffffff); 206 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 207 MLX5_SET64(mkc, mkc, start_addr, start); 208 MLX5_SET64(mkc, mkc, len, len); 209 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); 210 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); 211 fill_indir(mvdev, mr, in); 212 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 213 kfree(in); 214 return err; 215 } 216 217 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) 218 { 219 mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); 220 } 221 222 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, 223 struct vhost_iotlb *iotlb) 224 { 225 struct vhost_iotlb_map *map; 226 unsigned long lgcd = 0; 227 int log_entity_size; 228 unsigned long size; 229 u64 start = 0; 230 int err; 231 struct page *pg; 232 unsigned int nsg; 233 int sglen; 234 u64 pa; 235 u64 paend; 236 struct scatterlist *sg; 237 struct device *dma = mvdev->vdev.dma_dev; 238 239 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 240 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { 241 size = maplen(map, mr); 242 lgcd = gcd(lgcd, size); 243 start += size; 244 } 245 log_entity_size = ilog2(lgcd); 246 247 sglen = 1 << log_entity_size; 248 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); 249 250 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); 251 if (err) 252 return err; 253 254 sg = mr->sg_head.sgl; 255 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 256 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { 257 paend = map->addr + maplen(map, mr); 258 for (pa = map->addr; pa < paend; pa += sglen) { 259 pg = pfn_to_page(__phys_to_pfn(pa)); 260 if (!sg) { 261 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", 262 map->start, map->last + 1); 263 err = -ENOMEM; 264 goto err_map; 265 } 266 sg_set_page(sg, pg, sglen, 0); 267 sg = sg_next(sg); 268 if (!sg) 269 goto done; 270 } 271 } 272 done: 273 mr->log_size = log_entity_size; 274 mr->nsg = nsg; 275 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 276 if (!mr->nent) { 277 err = -ENOMEM; 278 goto err_map; 279 } 280 281 err = create_direct_mr(mvdev, mr); 282 if (err) 283 goto err_direct; 284 285 return 0; 286 287 err_direct: 288 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 289 err_map: 290 sg_free_table(&mr->sg_head); 291 return err; 292 } 293 294 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 295 { 296 struct device *dma = mvdev->vdev.dma_dev; 297 298 destroy_direct_mr(mvdev, mr); 299 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 300 sg_free_table(&mr->sg_head); 301 } 302 303 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, 304 struct vhost_iotlb *iotlb) 305 { 306 struct mlx5_vdpa_mr *mr = &mvdev->mr; 307 struct mlx5_vdpa_direct_mr *dmr; 308 struct mlx5_vdpa_direct_mr *n; 309 LIST_HEAD(tmp); 310 u64 st; 311 u64 sz; 312 int err; 313 int i = 0; 314 315 st = start; 316 while (size) { 317 sz = (u32)min_t(u64, MAX_KLM_SIZE, size); 318 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); 319 if (!dmr) { 320 err = -ENOMEM; 321 goto err_alloc; 322 } 323 324 dmr->start = st; 325 dmr->end = st + sz; 326 dmr->perm = perm; 327 err = map_direct_mr(mvdev, dmr, iotlb); 328 if (err) { 329 kfree(dmr); 330 goto err_alloc; 331 } 332 333 list_add_tail(&dmr->list, &tmp); 334 size -= sz; 335 mr->num_directs++; 336 mr->num_klms++; 337 st += sz; 338 i++; 339 } 340 list_splice_tail(&tmp, &mr->head); 341 return 0; 342 343 err_alloc: 344 list_for_each_entry_safe(dmr, n, &mr->head, list) { 345 list_del_init(&dmr->list); 346 unmap_direct_mr(mvdev, dmr); 347 kfree(dmr); 348 } 349 return err; 350 } 351 352 /* The iotlb pointer contains a list of maps. Go over the maps, possibly 353 * merging mergeable maps, and create direct memory keys that provide the 354 * device access to memory. The direct mkeys are then referred to by the 355 * indirect memory key that provides access to the enitre address space given 356 * by iotlb. 357 */ 358 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 359 { 360 struct mlx5_vdpa_mr *mr = &mvdev->mr; 361 struct mlx5_vdpa_direct_mr *dmr; 362 struct mlx5_vdpa_direct_mr *n; 363 struct vhost_iotlb_map *map; 364 u32 pperm = U16_MAX; 365 u64 last = U64_MAX; 366 u64 ps = U64_MAX; 367 u64 pe = U64_MAX; 368 u64 start = 0; 369 int err = 0; 370 int nnuls; 371 372 INIT_LIST_HEAD(&mr->head); 373 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 374 map = vhost_iotlb_itree_next(map, start, last)) { 375 start = map->start; 376 if (pe == map->start && pperm == map->perm) { 377 pe = map->last + 1; 378 } else { 379 if (ps != U64_MAX) { 380 if (pe < map->start) { 381 /* We have a hole in the map. Check how 382 * many null keys are required to fill it. 383 */ 384 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, 385 LOG_MAX_KLM_SIZE); 386 mr->num_klms += nnuls; 387 } 388 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); 389 if (err) 390 goto err_chain; 391 } 392 ps = map->start; 393 pe = map->last + 1; 394 pperm = map->perm; 395 } 396 } 397 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); 398 if (err) 399 goto err_chain; 400 401 /* Create the memory key that defines the guests's address space. This 402 * memory key refers to the direct keys that contain the MTT 403 * translations 404 */ 405 err = create_indirect_key(mvdev, mr); 406 if (err) 407 goto err_chain; 408 409 mr->user_mr = true; 410 return 0; 411 412 err_chain: 413 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 414 list_del_init(&dmr->list); 415 unmap_direct_mr(mvdev, dmr); 416 kfree(dmr); 417 } 418 return err; 419 } 420 421 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 422 { 423 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 424 void *mkc; 425 u32 *in; 426 int err; 427 428 in = kzalloc(inlen, GFP_KERNEL); 429 if (!in) 430 return -ENOMEM; 431 432 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 433 434 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 435 MLX5_SET(mkc, mkc, length64, 1); 436 MLX5_SET(mkc, mkc, lw, 1); 437 MLX5_SET(mkc, mkc, lr, 1); 438 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 439 MLX5_SET(mkc, mkc, qpn, 0xffffff); 440 441 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 442 if (!err) 443 mr->user_mr = false; 444 445 kfree(in); 446 return err; 447 } 448 449 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 450 { 451 mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey); 452 } 453 454 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 455 { 456 struct mlx5_vdpa_mr *mr = &mvdev->mr; 457 int err; 458 459 if (mr->initialized) 460 return 0; 461 462 if (iotlb) 463 err = create_user_mr(mvdev, iotlb); 464 else 465 err = create_dma_mr(mvdev, mr); 466 467 if (!err) 468 mr->initialized = true; 469 470 return err; 471 } 472 473 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 474 { 475 int err; 476 477 mutex_lock(&mvdev->mr.mkey_mtx); 478 err = _mlx5_vdpa_create_mr(mvdev, iotlb); 479 mutex_unlock(&mvdev->mr.mkey_mtx); 480 return err; 481 } 482 483 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 484 { 485 struct mlx5_vdpa_direct_mr *dmr; 486 struct mlx5_vdpa_direct_mr *n; 487 488 destroy_indirect_key(mvdev, mr); 489 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 490 list_del_init(&dmr->list); 491 unmap_direct_mr(mvdev, dmr); 492 kfree(dmr); 493 } 494 } 495 496 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) 497 { 498 struct mlx5_vdpa_mr *mr = &mvdev->mr; 499 500 mutex_lock(&mr->mkey_mtx); 501 if (!mr->initialized) 502 goto out; 503 504 if (mr->user_mr) 505 destroy_user_mr(mvdev, mr); 506 else 507 destroy_dma_mr(mvdev, mr); 508 509 memset(mr, 0, sizeof(*mr)); 510 mr->initialized = false; 511 out: 512 mutex_unlock(&mr->mkey_mtx); 513 } 514 515 static bool map_empty(struct vhost_iotlb *iotlb) 516 { 517 return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); 518 } 519 520 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 521 bool *change_map) 522 { 523 struct mlx5_vdpa_mr *mr = &mvdev->mr; 524 int err = 0; 525 526 *change_map = false; 527 if (map_empty(iotlb)) { 528 mlx5_vdpa_destroy_mr(mvdev); 529 return 0; 530 } 531 mutex_lock(&mr->mkey_mtx); 532 if (mr->initialized) { 533 mlx5_vdpa_info(mvdev, "memory map update\n"); 534 *change_map = true; 535 } 536 if (!*change_map) 537 err = _mlx5_vdpa_create_mr(mvdev, iotlb); 538 mutex_unlock(&mr->mkey_mtx); 539 540 return err; 541 } 542