1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 */ 5 6 #include "cmd.h" 7 8 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 9 10 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 11 u16 *vhca_id); 12 static void 13 _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); 14 15 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 16 { 17 u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; 18 u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; 19 20 lockdep_assert_held(&mvdev->state_mutex); 21 if (mvdev->mdev_detach) 22 return -ENOTCONN; 23 24 MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); 25 MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); 26 MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); 27 28 return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); 29 } 30 31 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 32 { 33 u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; 34 u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; 35 36 lockdep_assert_held(&mvdev->state_mutex); 37 if (mvdev->mdev_detach) 38 return -ENOTCONN; 39 40 MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); 41 MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); 42 MLX5_SET(resume_vhca_in, in, op_mod, op_mod); 43 44 return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); 45 } 46 47 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, 48 size_t *state_size) 49 { 50 u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; 51 u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; 52 int ret; 53 54 lockdep_assert_held(&mvdev->state_mutex); 55 if (mvdev->mdev_detach) 56 return -ENOTCONN; 57 58 MLX5_SET(query_vhca_migration_state_in, in, opcode, 59 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); 60 MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); 61 MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); 62 63 ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, 64 out); 65 if (ret) 66 return ret; 67 68 *state_size = MLX5_GET(query_vhca_migration_state_out, out, 69 required_umem_size); 70 return 0; 71 } 72 73 static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) 74 { 75 /* Mark the tracker under an error and wake it up if it's running */ 76 mvdev->tracker.is_err = true; 77 complete(&mvdev->tracker_comp); 78 } 79 80 static int mlx5fv_vf_event(struct notifier_block *nb, 81 unsigned long event, void *data) 82 { 83 struct mlx5vf_pci_core_device *mvdev = 84 container_of(nb, struct mlx5vf_pci_core_device, nb); 85 86 switch (event) { 87 case MLX5_PF_NOTIFY_ENABLE_VF: 88 mutex_lock(&mvdev->state_mutex); 89 mvdev->mdev_detach = false; 90 mlx5vf_state_mutex_unlock(mvdev); 91 break; 92 case MLX5_PF_NOTIFY_DISABLE_VF: 93 mlx5vf_cmd_close_migratable(mvdev); 94 mutex_lock(&mvdev->state_mutex); 95 mvdev->mdev_detach = true; 96 mlx5vf_state_mutex_unlock(mvdev); 97 break; 98 default: 99 break; 100 } 101 102 return 0; 103 } 104 105 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) 106 { 107 if (!mvdev->migrate_cap) 108 return; 109 110 /* Must be done outside the lock to let it progress */ 111 set_tracker_error(mvdev); 112 mutex_lock(&mvdev->state_mutex); 113 mlx5vf_disable_fds(mvdev); 114 _mlx5vf_free_page_tracker_resources(mvdev); 115 mlx5vf_state_mutex_unlock(mvdev); 116 } 117 118 void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) 119 { 120 if (!mvdev->migrate_cap) 121 return; 122 123 mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id, 124 &mvdev->nb); 125 destroy_workqueue(mvdev->cb_wq); 126 } 127 128 void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, 129 const struct vfio_migration_ops *mig_ops, 130 const struct vfio_log_ops *log_ops) 131 { 132 struct pci_dev *pdev = mvdev->core_device.pdev; 133 int ret; 134 135 if (!pdev->is_virtfn) 136 return; 137 138 mvdev->mdev = mlx5_vf_get_core_dev(pdev); 139 if (!mvdev->mdev) 140 return; 141 142 if (!MLX5_CAP_GEN(mvdev->mdev, migration)) 143 goto end; 144 145 mvdev->vf_id = pci_iov_vf_id(pdev); 146 if (mvdev->vf_id < 0) 147 goto end; 148 149 if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, 150 &mvdev->vhca_id)) 151 goto end; 152 153 mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0); 154 if (!mvdev->cb_wq) 155 goto end; 156 157 mutex_init(&mvdev->state_mutex); 158 spin_lock_init(&mvdev->reset_lock); 159 mvdev->nb.notifier_call = mlx5fv_vf_event; 160 ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id, 161 &mvdev->nb); 162 if (ret) { 163 destroy_workqueue(mvdev->cb_wq); 164 goto end; 165 } 166 167 mvdev->migrate_cap = 1; 168 mvdev->core_device.vdev.migration_flags = 169 VFIO_MIGRATION_STOP_COPY | 170 VFIO_MIGRATION_P2P; 171 mvdev->core_device.vdev.mig_ops = mig_ops; 172 init_completion(&mvdev->tracker_comp); 173 if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) 174 mvdev->core_device.vdev.log_ops = log_ops; 175 176 end: 177 mlx5_vf_put_core_dev(mvdev->mdev); 178 } 179 180 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 181 u16 *vhca_id) 182 { 183 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 184 int out_size; 185 void *out; 186 int ret; 187 188 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 189 out = kzalloc(out_size, GFP_KERNEL); 190 if (!out) 191 return -ENOMEM; 192 193 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 194 MLX5_SET(query_hca_cap_in, in, other_function, 1); 195 MLX5_SET(query_hca_cap_in, in, function_id, function_id); 196 MLX5_SET(query_hca_cap_in, in, op_mod, 197 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | 198 HCA_CAP_OPMOD_GET_CUR); 199 200 ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 201 if (ret) 202 goto err_exec; 203 204 *vhca_id = MLX5_GET(query_hca_cap_out, out, 205 capability.cmd_hca_cap.vhca_id); 206 207 err_exec: 208 kfree(out); 209 return ret; 210 } 211 212 static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, 213 struct mlx5_vf_migration_file *migf, 214 struct mlx5_vhca_recv_buf *recv_buf, 215 u32 *mkey) 216 { 217 size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) : 218 recv_buf->npages; 219 int err = 0, inlen; 220 __be64 *mtt; 221 void *mkc; 222 u32 *in; 223 224 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 225 sizeof(*mtt) * round_up(npages, 2); 226 227 in = kvzalloc(inlen, GFP_KERNEL); 228 if (!in) 229 return -ENOMEM; 230 231 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 232 DIV_ROUND_UP(npages, 2)); 233 mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 234 235 if (migf) { 236 struct sg_dma_page_iter dma_iter; 237 238 for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) 239 *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); 240 } else { 241 int i; 242 243 for (i = 0; i < npages; i++) 244 *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]); 245 } 246 247 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 248 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 249 MLX5_SET(mkc, mkc, lr, 1); 250 MLX5_SET(mkc, mkc, lw, 1); 251 MLX5_SET(mkc, mkc, rr, 1); 252 MLX5_SET(mkc, mkc, rw, 1); 253 MLX5_SET(mkc, mkc, pd, pdn); 254 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 255 MLX5_SET(mkc, mkc, qpn, 0xffffff); 256 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 257 MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); 258 MLX5_SET64(mkc, mkc, len, 259 migf ? migf->total_length : (npages * PAGE_SIZE)); 260 err = mlx5_core_create_mkey(mdev, mkey, in, inlen); 261 kvfree(in); 262 return err; 263 } 264 265 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) 266 { 267 struct mlx5vf_async_data *async_data = container_of(_work, 268 struct mlx5vf_async_data, work); 269 struct mlx5_vf_migration_file *migf = container_of(async_data, 270 struct mlx5_vf_migration_file, async_data); 271 struct mlx5_core_dev *mdev = migf->mvdev->mdev; 272 273 mutex_lock(&migf->lock); 274 if (async_data->status) { 275 migf->is_err = true; 276 wake_up_interruptible(&migf->poll_wait); 277 } 278 mutex_unlock(&migf->lock); 279 280 mlx5_core_destroy_mkey(mdev, async_data->mkey); 281 dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); 282 mlx5_core_dealloc_pd(mdev, async_data->pdn); 283 kvfree(async_data->out); 284 fput(migf->filp); 285 } 286 287 static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) 288 { 289 struct mlx5vf_async_data *async_data = container_of(context, 290 struct mlx5vf_async_data, cb_work); 291 struct mlx5_vf_migration_file *migf = container_of(async_data, 292 struct mlx5_vf_migration_file, async_data); 293 294 if (!status) { 295 WRITE_ONCE(migf->total_length, 296 MLX5_GET(save_vhca_state_out, async_data->out, 297 actual_image_size)); 298 wake_up_interruptible(&migf->poll_wait); 299 } 300 301 /* 302 * The error and the cleanup flows can't run from an 303 * interrupt context 304 */ 305 async_data->status = status; 306 queue_work(migf->mvdev->cb_wq, &async_data->work); 307 } 308 309 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, 310 struct mlx5_vf_migration_file *migf) 311 { 312 u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); 313 u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; 314 struct mlx5vf_async_data *async_data; 315 struct mlx5_core_dev *mdev; 316 u32 pdn, mkey; 317 int err; 318 319 lockdep_assert_held(&mvdev->state_mutex); 320 if (mvdev->mdev_detach) 321 return -ENOTCONN; 322 323 mdev = mvdev->mdev; 324 err = mlx5_core_alloc_pd(mdev, &pdn); 325 if (err) 326 return err; 327 328 err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 329 0); 330 if (err) 331 goto err_dma_map; 332 333 err = _create_mkey(mdev, pdn, migf, NULL, &mkey); 334 if (err) 335 goto err_create_mkey; 336 337 MLX5_SET(save_vhca_state_in, in, opcode, 338 MLX5_CMD_OP_SAVE_VHCA_STATE); 339 MLX5_SET(save_vhca_state_in, in, op_mod, 0); 340 MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); 341 MLX5_SET(save_vhca_state_in, in, mkey, mkey); 342 MLX5_SET(save_vhca_state_in, in, size, migf->total_length); 343 344 async_data = &migf->async_data; 345 async_data->out = kvzalloc(out_size, GFP_KERNEL); 346 if (!async_data->out) { 347 err = -ENOMEM; 348 goto err_out; 349 } 350 351 /* no data exists till the callback comes back */ 352 migf->total_length = 0; 353 get_file(migf->filp); 354 async_data->mkey = mkey; 355 async_data->pdn = pdn; 356 err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), 357 async_data->out, 358 out_size, mlx5vf_save_callback, 359 &async_data->cb_work); 360 if (err) 361 goto err_exec; 362 363 return 0; 364 365 err_exec: 366 fput(migf->filp); 367 kvfree(async_data->out); 368 err_out: 369 mlx5_core_destroy_mkey(mdev, mkey); 370 err_create_mkey: 371 dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); 372 err_dma_map: 373 mlx5_core_dealloc_pd(mdev, pdn); 374 return err; 375 } 376 377 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, 378 struct mlx5_vf_migration_file *migf) 379 { 380 struct mlx5_core_dev *mdev; 381 u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; 382 u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; 383 u32 pdn, mkey; 384 int err; 385 386 lockdep_assert_held(&mvdev->state_mutex); 387 if (mvdev->mdev_detach) 388 return -ENOTCONN; 389 390 mutex_lock(&migf->lock); 391 if (!migf->total_length) { 392 err = -EINVAL; 393 goto end; 394 } 395 396 mdev = mvdev->mdev; 397 err = mlx5_core_alloc_pd(mdev, &pdn); 398 if (err) 399 goto end; 400 401 err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); 402 if (err) 403 goto err_reg; 404 405 err = _create_mkey(mdev, pdn, migf, NULL, &mkey); 406 if (err) 407 goto err_mkey; 408 409 MLX5_SET(load_vhca_state_in, in, opcode, 410 MLX5_CMD_OP_LOAD_VHCA_STATE); 411 MLX5_SET(load_vhca_state_in, in, op_mod, 0); 412 MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); 413 MLX5_SET(load_vhca_state_in, in, mkey, mkey); 414 MLX5_SET(load_vhca_state_in, in, size, migf->total_length); 415 416 err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out); 417 418 mlx5_core_destroy_mkey(mdev, mkey); 419 err_mkey: 420 dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); 421 err_reg: 422 mlx5_core_dealloc_pd(mdev, pdn); 423 end: 424 mutex_unlock(&migf->lock); 425 return err; 426 } 427 428 static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes, 429 u32 req_nodes) 430 { 431 struct interval_tree_node *prev, *curr, *comb_start, *comb_end; 432 unsigned long min_gap; 433 unsigned long curr_gap; 434 435 /* Special shortcut when a single range is required */ 436 if (req_nodes == 1) { 437 unsigned long last; 438 439 curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); 440 while (curr) { 441 last = curr->last; 442 prev = curr; 443 curr = interval_tree_iter_next(curr, 0, ULONG_MAX); 444 if (prev != comb_start) 445 interval_tree_remove(prev, root); 446 } 447 comb_start->last = last; 448 return; 449 } 450 451 /* Combine ranges which have the smallest gap */ 452 while (cur_nodes > req_nodes) { 453 prev = NULL; 454 min_gap = ULONG_MAX; 455 curr = interval_tree_iter_first(root, 0, ULONG_MAX); 456 while (curr) { 457 if (prev) { 458 curr_gap = curr->start - prev->last; 459 if (curr_gap < min_gap) { 460 min_gap = curr_gap; 461 comb_start = prev; 462 comb_end = curr; 463 } 464 } 465 prev = curr; 466 curr = interval_tree_iter_next(curr, 0, ULONG_MAX); 467 } 468 comb_start->last = comb_end->last; 469 interval_tree_remove(comb_end, root); 470 cur_nodes--; 471 } 472 } 473 474 static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, 475 struct mlx5vf_pci_core_device *mvdev, 476 struct rb_root_cached *ranges, u32 nnodes) 477 { 478 int max_num_range = 479 MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range); 480 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 481 int record_size = MLX5_ST_SZ_BYTES(page_track_range); 482 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 483 struct interval_tree_node *node = NULL; 484 u64 total_ranges_len = 0; 485 u32 num_ranges = nnodes; 486 u8 log_addr_space_size; 487 void *range_list_ptr; 488 void *obj_context; 489 void *cmd_hdr; 490 int inlen; 491 void *in; 492 int err; 493 int i; 494 495 if (num_ranges > max_num_range) { 496 combine_ranges(ranges, nnodes, max_num_range); 497 num_ranges = max_num_range; 498 } 499 500 inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) + 501 record_size * num_ranges; 502 in = kzalloc(inlen, GFP_KERNEL); 503 if (!in) 504 return -ENOMEM; 505 506 cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in, 507 general_obj_in_cmd_hdr); 508 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, 509 MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 510 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, 511 MLX5_OBJ_TYPE_PAGE_TRACK); 512 obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context); 513 MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id); 514 MLX5_SET(page_track, obj_context, track_type, 1); 515 MLX5_SET(page_track, obj_context, log_page_size, 516 ilog2(tracker->host_qp->tracked_page_size)); 517 MLX5_SET(page_track, obj_context, log_msg_size, 518 ilog2(tracker->host_qp->max_msg_size)); 519 MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn); 520 MLX5_SET(page_track, obj_context, num_ranges, num_ranges); 521 522 range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range); 523 node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 524 for (i = 0; i < num_ranges; i++) { 525 void *addr_range_i_base = range_list_ptr + record_size * i; 526 unsigned long length = node->last - node->start; 527 528 MLX5_SET64(page_track_range, addr_range_i_base, start_address, 529 node->start); 530 MLX5_SET64(page_track_range, addr_range_i_base, length, length); 531 total_ranges_len += length; 532 node = interval_tree_iter_next(node, 0, ULONG_MAX); 533 } 534 535 WARN_ON(node); 536 log_addr_space_size = ilog2(total_ranges_len); 537 if (log_addr_space_size < 538 (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || 539 log_addr_space_size > 540 (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) { 541 err = -EOPNOTSUPP; 542 goto out; 543 } 544 545 MLX5_SET(page_track, obj_context, log_addr_space_size, 546 log_addr_space_size); 547 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 548 if (err) 549 goto out; 550 551 tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 552 out: 553 kfree(in); 554 return err; 555 } 556 557 static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev, 558 u32 tracker_id) 559 { 560 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; 561 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 562 563 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 564 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 565 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id); 566 567 return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 568 } 569 570 static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, 571 u32 tracker_id, unsigned long iova, 572 unsigned long length, u32 tracker_state) 573 { 574 u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {}; 575 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 576 void *obj_context; 577 void *cmd_hdr; 578 579 cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); 580 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 581 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 582 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id); 583 584 obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context); 585 MLX5_SET64(page_track, obj_context, modify_field_select, 0x3); 586 MLX5_SET64(page_track, obj_context, range_start_address, iova); 587 MLX5_SET64(page_track, obj_context, length, length); 588 MLX5_SET(page_track, obj_context, state, tracker_state); 589 590 return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 591 } 592 593 static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, 594 struct mlx5_vhca_cq_buf *buf, int nent, 595 int cqe_size) 596 { 597 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 598 u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); 599 u8 log_wq_sz = ilog2(cqe_size); 600 int err; 601 602 err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf, 603 mdev->priv.numa_node); 604 if (err) 605 return err; 606 607 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 608 buf->cqe_size = cqe_size; 609 buf->nent = nent; 610 return 0; 611 } 612 613 static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf) 614 { 615 struct mlx5_cqe64 *cqe64; 616 void *cqe; 617 int i; 618 619 for (i = 0; i < buf->nent; i++) { 620 cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); 621 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; 622 cqe64->op_own = MLX5_CQE_INVALID << 4; 623 } 624 } 625 626 static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev, 627 struct mlx5_vhca_cq *cq) 628 { 629 mlx5_core_destroy_cq(mdev, &cq->mcq); 630 mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); 631 mlx5_db_free(mdev, &cq->db); 632 } 633 634 static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) 635 { 636 if (type != MLX5_EVENT_TYPE_CQ_ERROR) 637 return; 638 639 set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device, 640 tracker.cq.mcq)); 641 } 642 643 static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, 644 void *data) 645 { 646 struct mlx5_vhca_page_tracker *tracker = 647 mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); 648 struct mlx5vf_pci_core_device *mvdev = container_of( 649 tracker, struct mlx5vf_pci_core_device, tracker); 650 struct mlx5_eqe *eqe = data; 651 u8 event_type = (u8)type; 652 u8 queue_type; 653 int qp_num; 654 655 switch (event_type) { 656 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: 657 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 658 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 659 queue_type = eqe->data.qp_srq.type; 660 if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP) 661 break; 662 qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; 663 if (qp_num != tracker->host_qp->qpn && 664 qp_num != tracker->fw_qp->qpn) 665 break; 666 set_tracker_error(mvdev); 667 break; 668 default: 669 break; 670 } 671 672 return NOTIFY_OK; 673 } 674 675 static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq, 676 struct mlx5_eqe *eqe) 677 { 678 struct mlx5vf_pci_core_device *mvdev = 679 container_of(mcq, struct mlx5vf_pci_core_device, 680 tracker.cq.mcq); 681 682 complete(&mvdev->tracker_comp); 683 } 684 685 static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, 686 struct mlx5_vhca_page_tracker *tracker, 687 size_t ncqe) 688 { 689 int cqe_size = cache_line_size() == 128 ? 128 : 64; 690 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 691 struct mlx5_vhca_cq *cq; 692 int inlen, err, eqn; 693 void *cqc, *in; 694 __be64 *pas; 695 int vector; 696 697 cq = &tracker->cq; 698 ncqe = roundup_pow_of_two(ncqe); 699 err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node); 700 if (err) 701 return err; 702 703 cq->ncqe = ncqe; 704 cq->mcq.set_ci_db = cq->db.db; 705 cq->mcq.arm_db = cq->db.db + 1; 706 cq->mcq.cqe_sz = cqe_size; 707 err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size); 708 if (err) 709 goto err_db_free; 710 711 init_cq_frag_buf(&cq->buf); 712 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 713 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * 714 cq->buf.frag_buf.npages; 715 in = kvzalloc(inlen, GFP_KERNEL); 716 if (!in) { 717 err = -ENOMEM; 718 goto err_buff; 719 } 720 721 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); 722 err = mlx5_vector2eqn(mdev, vector, &eqn); 723 if (err) 724 goto err_vec; 725 726 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 727 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 728 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 729 MLX5_SET(cqc, cqc, uar_page, tracker->uar->index); 730 MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift - 731 MLX5_ADAPTER_PAGE_SHIFT); 732 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); 733 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 734 mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); 735 cq->mcq.comp = mlx5vf_cq_complete; 736 cq->mcq.event = mlx5vf_cq_event; 737 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 738 if (err) 739 goto err_vec; 740 741 mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, 742 cq->mcq.cons_index); 743 kvfree(in); 744 return 0; 745 746 err_vec: 747 kvfree(in); 748 err_buff: 749 mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); 750 err_db_free: 751 mlx5_db_free(mdev, &cq->db); 752 return err; 753 } 754 755 static struct mlx5_vhca_qp * 756 mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, 757 struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr) 758 { 759 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 760 struct mlx5_vhca_qp *qp; 761 u8 log_rq_stride; 762 u8 log_rq_sz; 763 void *qpc; 764 int inlen; 765 void *in; 766 int err; 767 768 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 769 if (!qp) 770 return ERR_PTR(-ENOMEM); 771 772 qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); 773 log_rq_stride = ilog2(MLX5_SEND_WQE_DS); 774 log_rq_sz = ilog2(qp->rq.wqe_cnt); 775 err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); 776 if (err) 777 goto err_free; 778 779 if (max_recv_wr) { 780 err = mlx5_frag_buf_alloc_node(mdev, 781 wq_get_byte_sz(log_rq_sz, log_rq_stride), 782 &qp->buf, mdev->priv.numa_node); 783 if (err) 784 goto err_db_free; 785 mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc); 786 } 787 788 qp->rq.db = &qp->db.db[MLX5_RCV_DBR]; 789 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 790 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 791 qp->buf.npages; 792 in = kvzalloc(inlen, GFP_KERNEL); 793 if (!in) { 794 err = -ENOMEM; 795 goto err_in; 796 } 797 798 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 799 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 800 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 801 MLX5_SET(qpc, qpc, pd, tracker->pdn); 802 MLX5_SET(qpc, qpc, uar_page, tracker->uar->index); 803 MLX5_SET(qpc, qpc, log_page_size, 804 qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 805 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); 806 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 807 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 808 MLX5_SET(qpc, qpc, no_sq, 1); 809 if (max_recv_wr) { 810 MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn); 811 MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4); 812 MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz); 813 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 814 MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); 815 mlx5_fill_page_frag_array(&qp->buf, 816 (__be64 *)MLX5_ADDR_OF(create_qp_in, 817 in, pas)); 818 } else { 819 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 820 } 821 822 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 823 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 824 kvfree(in); 825 if (err) 826 goto err_in; 827 828 qp->qpn = MLX5_GET(create_qp_out, out, qpn); 829 return qp; 830 831 err_in: 832 if (max_recv_wr) 833 mlx5_frag_buf_free(mdev, &qp->buf); 834 err_db_free: 835 mlx5_db_free(mdev, &qp->db); 836 err_free: 837 kfree(qp); 838 return ERR_PTR(err); 839 } 840 841 static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp) 842 { 843 struct mlx5_wqe_data_seg *data; 844 unsigned int ix; 845 846 WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt); 847 ix = qp->rq.pc & (qp->rq.wqe_cnt - 1); 848 data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix); 849 data->byte_count = cpu_to_be32(qp->max_msg_size); 850 data->lkey = cpu_to_be32(qp->recv_buf.mkey); 851 data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset); 852 qp->rq.pc++; 853 /* Make sure that descriptors are written before doorbell record. */ 854 dma_wmb(); 855 *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff); 856 } 857 858 static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev, 859 struct mlx5_vhca_qp *qp, u32 remote_qpn, 860 bool host_qp) 861 { 862 u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 863 u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 864 u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 865 void *qpc; 866 int ret; 867 868 /* Init */ 869 qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc); 870 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); 871 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 872 MLX5_SET(qpc, qpc, rre, 1); 873 MLX5_SET(qpc, qpc, rwe, 1); 874 MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP); 875 MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn); 876 ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in); 877 if (ret) 878 return ret; 879 880 if (host_qp) { 881 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 882 int i; 883 884 for (i = 0; i < qp->rq.wqe_cnt; i++) { 885 mlx5vf_post_recv(qp); 886 recv_buf->next_rq_offset += qp->max_msg_size; 887 } 888 } 889 890 /* RTR */ 891 qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc); 892 MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); 893 MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); 894 MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg)); 895 MLX5_SET(qpc, qpc, remote_qpn, remote_qpn); 896 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); 897 MLX5_SET(qpc, qpc, primary_address_path.fl, 1); 898 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 899 MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 900 MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); 901 ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in); 902 if (ret || host_qp) 903 return ret; 904 905 /* RTS */ 906 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc); 907 MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); 908 MLX5_SET(qpc, qpc, retry_count, 7); 909 MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ 910 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ 911 MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 912 MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); 913 914 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in); 915 } 916 917 static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev, 918 struct mlx5_vhca_qp *qp) 919 { 920 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 921 922 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 923 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 924 mlx5_cmd_exec_in(mdev, destroy_qp, in); 925 926 mlx5_frag_buf_free(mdev, &qp->buf); 927 mlx5_db_free(mdev, &qp->db); 928 kfree(qp); 929 } 930 931 static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf) 932 { 933 int i; 934 935 /* Undo alloc_pages_bulk_array() */ 936 for (i = 0; i < recv_buf->npages; i++) 937 __free_page(recv_buf->page_list[i]); 938 939 kvfree(recv_buf->page_list); 940 } 941 942 static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, 943 unsigned int npages) 944 { 945 unsigned int filled = 0, done = 0; 946 int i; 947 948 recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list), 949 GFP_KERNEL); 950 if (!recv_buf->page_list) 951 return -ENOMEM; 952 953 for (;;) { 954 filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done, 955 recv_buf->page_list + done); 956 if (!filled) 957 goto err; 958 959 done += filled; 960 if (done == npages) 961 break; 962 } 963 964 recv_buf->npages = npages; 965 return 0; 966 967 err: 968 for (i = 0; i < npages; i++) { 969 if (recv_buf->page_list[i]) 970 __free_page(recv_buf->page_list[i]); 971 } 972 973 kvfree(recv_buf->page_list); 974 return -ENOMEM; 975 } 976 977 static int register_dma_recv_pages(struct mlx5_core_dev *mdev, 978 struct mlx5_vhca_recv_buf *recv_buf) 979 { 980 int i, j; 981 982 recv_buf->dma_addrs = kvcalloc(recv_buf->npages, 983 sizeof(*recv_buf->dma_addrs), 984 GFP_KERNEL); 985 if (!recv_buf->dma_addrs) 986 return -ENOMEM; 987 988 for (i = 0; i < recv_buf->npages; i++) { 989 recv_buf->dma_addrs[i] = dma_map_page(mdev->device, 990 recv_buf->page_list[i], 991 0, PAGE_SIZE, 992 DMA_FROM_DEVICE); 993 if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i])) 994 goto error; 995 } 996 return 0; 997 998 error: 999 for (j = 0; j < i; j++) 1000 dma_unmap_single(mdev->device, recv_buf->dma_addrs[j], 1001 PAGE_SIZE, DMA_FROM_DEVICE); 1002 1003 kvfree(recv_buf->dma_addrs); 1004 return -ENOMEM; 1005 } 1006 1007 static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev, 1008 struct mlx5_vhca_recv_buf *recv_buf) 1009 { 1010 int i; 1011 1012 for (i = 0; i < recv_buf->npages; i++) 1013 dma_unmap_single(mdev->device, recv_buf->dma_addrs[i], 1014 PAGE_SIZE, DMA_FROM_DEVICE); 1015 1016 kvfree(recv_buf->dma_addrs); 1017 } 1018 1019 static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, 1020 struct mlx5_vhca_qp *qp) 1021 { 1022 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1023 1024 mlx5_core_destroy_mkey(mdev, recv_buf->mkey); 1025 unregister_dma_recv_pages(mdev, recv_buf); 1026 free_recv_pages(&qp->recv_buf); 1027 } 1028 1029 static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, 1030 struct mlx5_vhca_qp *qp, u32 pdn, 1031 u64 rq_size) 1032 { 1033 unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE); 1034 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1035 int err; 1036 1037 err = alloc_recv_pages(recv_buf, npages); 1038 if (err < 0) 1039 return err; 1040 1041 err = register_dma_recv_pages(mdev, recv_buf); 1042 if (err) 1043 goto end; 1044 1045 err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey); 1046 if (err) 1047 goto err_create_mkey; 1048 1049 return 0; 1050 1051 err_create_mkey: 1052 unregister_dma_recv_pages(mdev, recv_buf); 1053 end: 1054 free_recv_pages(recv_buf); 1055 return err; 1056 } 1057 1058 static void 1059 _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev) 1060 { 1061 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1062 struct mlx5_core_dev *mdev = mvdev->mdev; 1063 1064 lockdep_assert_held(&mvdev->state_mutex); 1065 1066 if (!mvdev->log_active) 1067 return; 1068 1069 WARN_ON(mvdev->mdev_detach); 1070 1071 mlx5_eq_notifier_unregister(mdev, &tracker->nb); 1072 mlx5vf_cmd_destroy_tracker(mdev, tracker->id); 1073 mlx5vf_destroy_qp(mdev, tracker->fw_qp); 1074 mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp); 1075 mlx5vf_destroy_qp(mdev, tracker->host_qp); 1076 mlx5vf_destroy_cq(mdev, &tracker->cq); 1077 mlx5_core_dealloc_pd(mdev, tracker->pdn); 1078 mlx5_put_uars_page(mdev, tracker->uar); 1079 mvdev->log_active = false; 1080 } 1081 1082 int mlx5vf_stop_page_tracker(struct vfio_device *vdev) 1083 { 1084 struct mlx5vf_pci_core_device *mvdev = container_of( 1085 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1086 1087 mutex_lock(&mvdev->state_mutex); 1088 if (!mvdev->log_active) 1089 goto end; 1090 1091 _mlx5vf_free_page_tracker_resources(mvdev); 1092 mvdev->log_active = false; 1093 end: 1094 mlx5vf_state_mutex_unlock(mvdev); 1095 return 0; 1096 } 1097 1098 int mlx5vf_start_page_tracker(struct vfio_device *vdev, 1099 struct rb_root_cached *ranges, u32 nnodes, 1100 u64 *page_size) 1101 { 1102 struct mlx5vf_pci_core_device *mvdev = container_of( 1103 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1104 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1105 u8 log_tracked_page = ilog2(*page_size); 1106 struct mlx5_vhca_qp *host_qp; 1107 struct mlx5_vhca_qp *fw_qp; 1108 struct mlx5_core_dev *mdev; 1109 u32 max_msg_size = PAGE_SIZE; 1110 u64 rq_size = SZ_2M; 1111 u32 max_recv_wr; 1112 int err; 1113 1114 mutex_lock(&mvdev->state_mutex); 1115 if (mvdev->mdev_detach) { 1116 err = -ENOTCONN; 1117 goto end; 1118 } 1119 1120 if (mvdev->log_active) { 1121 err = -EINVAL; 1122 goto end; 1123 } 1124 1125 mdev = mvdev->mdev; 1126 memset(tracker, 0, sizeof(*tracker)); 1127 tracker->uar = mlx5_get_uars_page(mdev); 1128 if (IS_ERR(tracker->uar)) { 1129 err = PTR_ERR(tracker->uar); 1130 goto end; 1131 } 1132 1133 err = mlx5_core_alloc_pd(mdev, &tracker->pdn); 1134 if (err) 1135 goto err_uar; 1136 1137 max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size); 1138 err = mlx5vf_create_cq(mdev, tracker, max_recv_wr); 1139 if (err) 1140 goto err_dealloc_pd; 1141 1142 host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr); 1143 if (IS_ERR(host_qp)) { 1144 err = PTR_ERR(host_qp); 1145 goto err_cq; 1146 } 1147 1148 host_qp->max_msg_size = max_msg_size; 1149 if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1150 pg_track_log_min_page_size)) { 1151 log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1152 pg_track_log_min_page_size); 1153 } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1154 pg_track_log_max_page_size)) { 1155 log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1156 pg_track_log_max_page_size); 1157 } 1158 1159 host_qp->tracked_page_size = (1ULL << log_tracked_page); 1160 err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn, 1161 rq_size); 1162 if (err) 1163 goto err_host_qp; 1164 1165 fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0); 1166 if (IS_ERR(fw_qp)) { 1167 err = PTR_ERR(fw_qp); 1168 goto err_recv_resources; 1169 } 1170 1171 err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true); 1172 if (err) 1173 goto err_activate; 1174 1175 err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false); 1176 if (err) 1177 goto err_activate; 1178 1179 tracker->host_qp = host_qp; 1180 tracker->fw_qp = fw_qp; 1181 err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes); 1182 if (err) 1183 goto err_activate; 1184 1185 MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY); 1186 mlx5_eq_notifier_register(mdev, &tracker->nb); 1187 *page_size = host_qp->tracked_page_size; 1188 mvdev->log_active = true; 1189 mlx5vf_state_mutex_unlock(mvdev); 1190 return 0; 1191 1192 err_activate: 1193 mlx5vf_destroy_qp(mdev, fw_qp); 1194 err_recv_resources: 1195 mlx5vf_free_qp_recv_resources(mdev, host_qp); 1196 err_host_qp: 1197 mlx5vf_destroy_qp(mdev, host_qp); 1198 err_cq: 1199 mlx5vf_destroy_cq(mdev, &tracker->cq); 1200 err_dealloc_pd: 1201 mlx5_core_dealloc_pd(mdev, tracker->pdn); 1202 err_uar: 1203 mlx5_put_uars_page(mdev, tracker->uar); 1204 end: 1205 mlx5vf_state_mutex_unlock(mvdev); 1206 return err; 1207 } 1208 1209 static void 1210 set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp, 1211 struct iova_bitmap *dirty) 1212 { 1213 u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry); 1214 u32 nent = size / entry_size; 1215 struct page *page; 1216 u64 addr; 1217 u64 *buf; 1218 int i; 1219 1220 if (WARN_ON(index >= qp->recv_buf.npages || 1221 (nent > qp->max_msg_size / entry_size))) 1222 return; 1223 1224 page = qp->recv_buf.page_list[index]; 1225 buf = kmap_local_page(page); 1226 for (i = 0; i < nent; i++) { 1227 addr = MLX5_GET(page_track_report_entry, buf + i, 1228 dirty_address_low); 1229 addr |= (u64)MLX5_GET(page_track_report_entry, buf + i, 1230 dirty_address_high) << 32; 1231 iova_bitmap_set(dirty, addr, qp->tracked_page_size); 1232 } 1233 kunmap_local(buf); 1234 } 1235 1236 static void 1237 mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe, 1238 struct iova_bitmap *dirty, int *tracker_status) 1239 { 1240 u32 size; 1241 int ix; 1242 1243 qp->rq.cc++; 1244 *tracker_status = be32_to_cpu(cqe->immediate) >> 28; 1245 size = be32_to_cpu(cqe->byte_cnt); 1246 ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1); 1247 1248 /* zero length CQE, no data */ 1249 WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING); 1250 if (size) 1251 set_report_output(size, ix, qp, dirty); 1252 1253 qp->recv_buf.next_rq_offset = ix * qp->max_msg_size; 1254 mlx5vf_post_recv(qp); 1255 } 1256 1257 static void *get_cqe(struct mlx5_vhca_cq *cq, int n) 1258 { 1259 return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n); 1260 } 1261 1262 static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n) 1263 { 1264 void *cqe = get_cqe(cq, n & (cq->ncqe - 1)); 1265 struct mlx5_cqe64 *cqe64; 1266 1267 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 1268 1269 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 1270 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) { 1271 return cqe64; 1272 } else { 1273 return NULL; 1274 } 1275 } 1276 1277 static int 1278 mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp, 1279 struct iova_bitmap *dirty, int *tracker_status) 1280 { 1281 struct mlx5_cqe64 *cqe; 1282 u8 opcode; 1283 1284 cqe = get_sw_cqe(cq, cq->mcq.cons_index); 1285 if (!cqe) 1286 return CQ_EMPTY; 1287 1288 ++cq->mcq.cons_index; 1289 /* 1290 * Make sure we read CQ entry contents after we've checked the 1291 * ownership bit. 1292 */ 1293 rmb(); 1294 opcode = get_cqe_opcode(cqe); 1295 switch (opcode) { 1296 case MLX5_CQE_RESP_SEND_IMM: 1297 mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status); 1298 return CQ_OK; 1299 default: 1300 return CQ_POLL_ERR; 1301 } 1302 } 1303 1304 int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, 1305 unsigned long length, 1306 struct iova_bitmap *dirty) 1307 { 1308 struct mlx5vf_pci_core_device *mvdev = container_of( 1309 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1310 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1311 struct mlx5_vhca_cq *cq = &tracker->cq; 1312 struct mlx5_core_dev *mdev; 1313 int poll_err, err; 1314 1315 mutex_lock(&mvdev->state_mutex); 1316 if (!mvdev->log_active) { 1317 err = -EINVAL; 1318 goto end; 1319 } 1320 1321 if (mvdev->mdev_detach) { 1322 err = -ENOTCONN; 1323 goto end; 1324 } 1325 1326 mdev = mvdev->mdev; 1327 err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, 1328 MLX5_PAGE_TRACK_STATE_REPORTING); 1329 if (err) 1330 goto end; 1331 1332 tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING; 1333 while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING && 1334 !tracker->is_err) { 1335 poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty, 1336 &tracker->status); 1337 if (poll_err == CQ_EMPTY) { 1338 mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, 1339 cq->mcq.cons_index); 1340 poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, 1341 dirty, &tracker->status); 1342 if (poll_err == CQ_EMPTY) { 1343 wait_for_completion(&mvdev->tracker_comp); 1344 continue; 1345 } 1346 } 1347 if (poll_err == CQ_POLL_ERR) { 1348 err = -EIO; 1349 goto end; 1350 } 1351 mlx5_cq_set_ci(&cq->mcq); 1352 } 1353 1354 if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR) 1355 tracker->is_err = true; 1356 1357 if (tracker->is_err) 1358 err = -EIO; 1359 end: 1360 mlx5vf_state_mutex_unlock(mvdev); 1361 return err; 1362 } 1363