1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/vdpa.h> 5 #include <linux/gcd.h> 6 #include <linux/string.h> 7 #include <linux/mlx5/qp.h> 8 #include "mlx5_vdpa.h" 9 10 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ 11 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ 12 ({ \ 13 u64 __s = _s; \ 14 u64 _res; \ 15 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ 16 _res; \ 17 }) 18 19 static int get_octo_len(u64 len, int page_shift) 20 { 21 u64 page_size = 1ULL << page_shift; 22 int npages; 23 24 npages = ALIGN(len, page_size) >> page_shift; 25 return (npages + 1) / 2; 26 } 27 28 static void fill_sg(struct mlx5_vdpa_direct_mr *mr, void *in) 29 { 30 struct scatterlist *sg; 31 __be64 *pas; 32 int i; 33 34 pas = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 35 for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) 36 (*pas) = cpu_to_be64(sg_dma_address(sg)); 37 } 38 39 static void mlx5_set_access_mode(void *mkc, int mode) 40 { 41 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 42 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); 43 } 44 45 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) 46 { 47 struct scatterlist *sg; 48 int i; 49 50 for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) 51 mtt[i] = cpu_to_be64(sg_dma_address(sg)); 52 } 53 54 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 55 { 56 int inlen; 57 void *mkc; 58 void *in; 59 int err; 60 61 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); 62 in = kvzalloc(inlen, GFP_KERNEL); 63 if (!in) 64 return -ENOMEM; 65 66 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 67 fill_sg(mr, in); 68 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 69 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); 70 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); 71 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); 72 MLX5_SET(mkc, mkc, qpn, 0xffffff); 73 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 74 MLX5_SET64(mkc, mkc, start_addr, mr->offset); 75 MLX5_SET64(mkc, mkc, len, mr->end - mr->start); 76 MLX5_SET(mkc, mkc, log_page_size, mr->log_size); 77 MLX5_SET(mkc, mkc, translations_octword_size, 78 get_octo_len(mr->end - mr->start, mr->log_size)); 79 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 80 get_octo_len(mr->end - mr->start, mr->log_size)); 81 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); 82 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); 83 kvfree(in); 84 if (err) { 85 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); 86 return err; 87 } 88 89 return 0; 90 } 91 92 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 93 { 94 mlx5_vdpa_destroy_mkey(mvdev, &mr->mr); 95 } 96 97 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 98 { 99 return max_t(u64, map->start, mr->start); 100 } 101 102 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 103 { 104 return min_t(u64, map->last + 1, mr->end); 105 } 106 107 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 108 { 109 return map_end(map, mr) - map_start(map, mr); 110 } 111 112 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) 113 #define MLX5_VDPA_INVALID_LEN ((u64)-1) 114 115 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) 116 { 117 struct mlx5_vdpa_direct_mr *s; 118 119 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 120 if (!s) 121 return MLX5_VDPA_INVALID_START_ADDR; 122 123 return s->start; 124 } 125 126 static u64 indir_len(struct mlx5_vdpa_mr *mkey) 127 { 128 struct mlx5_vdpa_direct_mr *s; 129 struct mlx5_vdpa_direct_mr *e; 130 131 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 132 if (!s) 133 return MLX5_VDPA_INVALID_LEN; 134 135 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); 136 137 return e->end - s->start; 138 } 139 140 #define LOG_MAX_KLM_SIZE 30 141 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) 142 143 static u32 klm_bcount(u64 size) 144 { 145 return (u32)size; 146 } 147 148 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) 149 { 150 struct mlx5_vdpa_direct_mr *dmr; 151 struct mlx5_klm *klmarr; 152 struct mlx5_klm *klm; 153 bool first = true; 154 u64 preve; 155 int i; 156 157 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 158 i = 0; 159 list_for_each_entry(dmr, &mkey->head, list) { 160 again: 161 klm = &klmarr[i++]; 162 if (first) { 163 preve = dmr->start; 164 first = false; 165 } 166 167 if (preve == dmr->start) { 168 klm->key = cpu_to_be32(dmr->mr.key); 169 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); 170 preve = dmr->end; 171 } else { 172 klm->key = cpu_to_be32(mvdev->res.null_mkey); 173 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); 174 preve = dmr->start; 175 goto again; 176 } 177 } 178 } 179 180 static int klm_byte_size(int nklms) 181 { 182 return 16 * ALIGN(nklms, 4); 183 } 184 185 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 186 { 187 int inlen; 188 void *mkc; 189 void *in; 190 int err; 191 u64 start; 192 u64 len; 193 194 start = indir_start_addr(mr); 195 len = indir_len(mr); 196 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) 197 return -EINVAL; 198 199 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); 200 in = kzalloc(inlen, GFP_KERNEL); 201 if (!in) 202 return -ENOMEM; 203 204 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 205 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 206 MLX5_SET(mkc, mkc, lw, 1); 207 MLX5_SET(mkc, mkc, lr, 1); 208 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); 209 MLX5_SET(mkc, mkc, qpn, 0xffffff); 210 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 211 MLX5_SET64(mkc, mkc, start_addr, start); 212 MLX5_SET64(mkc, mkc, len, len); 213 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); 214 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); 215 fill_indir(mvdev, mr, in); 216 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 217 kfree(in); 218 return err; 219 } 220 221 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) 222 { 223 mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); 224 } 225 226 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, 227 struct vhost_iotlb *iotlb) 228 { 229 struct vhost_iotlb_map *map; 230 unsigned long lgcd = 0; 231 int log_entity_size; 232 unsigned long size; 233 u64 start = 0; 234 int err; 235 struct page *pg; 236 unsigned int nsg; 237 int sglen; 238 u64 pa; 239 u64 paend; 240 struct scatterlist *sg; 241 struct device *dma = mvdev->mdev->device; 242 243 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 244 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { 245 size = maplen(map, mr); 246 lgcd = gcd(lgcd, size); 247 start += size; 248 } 249 log_entity_size = ilog2(lgcd); 250 251 sglen = 1 << log_entity_size; 252 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); 253 254 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); 255 if (err) 256 return err; 257 258 sg = mr->sg_head.sgl; 259 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 260 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { 261 paend = map->addr + maplen(map, mr); 262 for (pa = map->addr; pa < paend; pa += sglen) { 263 pg = pfn_to_page(__phys_to_pfn(pa)); 264 if (!sg) { 265 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", 266 map->start, map->last + 1); 267 err = -ENOMEM; 268 goto err_map; 269 } 270 sg_set_page(sg, pg, sglen, 0); 271 sg = sg_next(sg); 272 if (!sg) 273 goto done; 274 } 275 } 276 done: 277 mr->log_size = log_entity_size; 278 mr->nsg = nsg; 279 err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 280 if (!err) 281 goto err_map; 282 283 err = create_direct_mr(mvdev, mr); 284 if (err) 285 goto err_direct; 286 287 return 0; 288 289 err_direct: 290 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 291 err_map: 292 sg_free_table(&mr->sg_head); 293 return err; 294 } 295 296 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 297 { 298 struct device *dma = mvdev->mdev->device; 299 300 destroy_direct_mr(mvdev, mr); 301 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 302 sg_free_table(&mr->sg_head); 303 } 304 305 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, 306 struct vhost_iotlb *iotlb) 307 { 308 struct mlx5_vdpa_mr *mr = &mvdev->mr; 309 struct mlx5_vdpa_direct_mr *dmr; 310 struct mlx5_vdpa_direct_mr *n; 311 LIST_HEAD(tmp); 312 u64 st; 313 u64 sz; 314 int err; 315 int i = 0; 316 317 st = start; 318 while (size) { 319 sz = (u32)min_t(u64, MAX_KLM_SIZE, size); 320 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); 321 if (!dmr) { 322 err = -ENOMEM; 323 goto err_alloc; 324 } 325 326 dmr->start = st; 327 dmr->end = st + sz; 328 dmr->perm = perm; 329 err = map_direct_mr(mvdev, dmr, iotlb); 330 if (err) { 331 kfree(dmr); 332 goto err_alloc; 333 } 334 335 list_add_tail(&dmr->list, &tmp); 336 size -= sz; 337 mr->num_directs++; 338 mr->num_klms++; 339 st += sz; 340 i++; 341 } 342 list_splice_tail(&tmp, &mr->head); 343 return 0; 344 345 err_alloc: 346 list_for_each_entry_safe(dmr, n, &mr->head, list) { 347 list_del_init(&dmr->list); 348 unmap_direct_mr(mvdev, dmr); 349 kfree(dmr); 350 } 351 return err; 352 } 353 354 /* The iotlb pointer contains a list of maps. Go over the maps, possibly 355 * merging mergeable maps, and create direct memory keys that provide the 356 * device access to memory. The direct mkeys are then referred to by the 357 * indirect memory key that provides access to the enitre address space given 358 * by iotlb. 359 */ 360 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 361 { 362 struct mlx5_vdpa_mr *mr = &mvdev->mr; 363 struct mlx5_vdpa_direct_mr *dmr; 364 struct mlx5_vdpa_direct_mr *n; 365 struct vhost_iotlb_map *map; 366 u32 pperm = U16_MAX; 367 u64 last = U64_MAX; 368 u64 ps = U64_MAX; 369 u64 pe = U64_MAX; 370 u64 start = 0; 371 int err = 0; 372 int nnuls; 373 374 if (mr->initialized) 375 return 0; 376 377 INIT_LIST_HEAD(&mr->head); 378 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 379 map = vhost_iotlb_itree_next(map, start, last)) { 380 start = map->start; 381 if (pe == map->start && pperm == map->perm) { 382 pe = map->last + 1; 383 } else { 384 if (ps != U64_MAX) { 385 if (pe < map->start) { 386 /* We have a hole in the map. Check how 387 * many null keys are required to fill it. 388 */ 389 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, 390 LOG_MAX_KLM_SIZE); 391 mr->num_klms += nnuls; 392 } 393 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); 394 if (err) 395 goto err_chain; 396 } 397 ps = map->start; 398 pe = map->last + 1; 399 pperm = map->perm; 400 } 401 } 402 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); 403 if (err) 404 goto err_chain; 405 406 /* Create the memory key that defines the guests's address space. This 407 * memory key refers to the direct keys that contain the MTT 408 * translations 409 */ 410 err = create_indirect_key(mvdev, mr); 411 if (err) 412 goto err_chain; 413 414 mr->initialized = true; 415 return 0; 416 417 err_chain: 418 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 419 list_del_init(&dmr->list); 420 unmap_direct_mr(mvdev, dmr); 421 kfree(dmr); 422 } 423 return err; 424 } 425 426 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) 427 { 428 struct mlx5_vdpa_mr *mr = &mvdev->mr; 429 int err; 430 431 mutex_lock(&mr->mkey_mtx); 432 err = _mlx5_vdpa_create_mr(mvdev, iotlb); 433 mutex_unlock(&mr->mkey_mtx); 434 return err; 435 } 436 437 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) 438 { 439 struct mlx5_vdpa_mr *mr = &mvdev->mr; 440 struct mlx5_vdpa_direct_mr *dmr; 441 struct mlx5_vdpa_direct_mr *n; 442 443 mutex_lock(&mr->mkey_mtx); 444 if (!mr->initialized) 445 goto out; 446 447 destroy_indirect_key(mvdev, mr); 448 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 449 list_del_init(&dmr->list); 450 unmap_direct_mr(mvdev, dmr); 451 kfree(dmr); 452 } 453 memset(mr, 0, sizeof(*mr)); 454 mr->initialized = false; 455 out: 456 mutex_unlock(&mr->mkey_mtx); 457 } 458 459 static bool map_empty(struct vhost_iotlb *iotlb) 460 { 461 return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); 462 } 463 464 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 465 bool *change_map) 466 { 467 struct mlx5_vdpa_mr *mr = &mvdev->mr; 468 int err = 0; 469 470 *change_map = false; 471 if (map_empty(iotlb)) { 472 mlx5_vdpa_destroy_mr(mvdev); 473 return 0; 474 } 475 mutex_lock(&mr->mkey_mtx); 476 if (mr->initialized) { 477 mlx5_vdpa_info(mvdev, "memory map update\n"); 478 *change_map = true; 479 } 480 if (!*change_map) 481 err = _mlx5_vdpa_create_mr(mvdev, iotlb); 482 mutex_unlock(&mr->mkey_mtx); 483 484 return err; 485 } 486