1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 */ 5 6 #include "cmd.h" 7 8 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 9 u16 *vhca_id); 10 11 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 12 { 13 u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; 14 u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; 15 16 lockdep_assert_held(&mvdev->state_mutex); 17 if (mvdev->mdev_detach) 18 return -ENOTCONN; 19 20 MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); 21 MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); 22 MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); 23 24 return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); 25 } 26 27 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 28 { 29 u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; 30 u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; 31 32 lockdep_assert_held(&mvdev->state_mutex); 33 if (mvdev->mdev_detach) 34 return -ENOTCONN; 35 36 MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); 37 MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); 38 MLX5_SET(resume_vhca_in, in, op_mod, op_mod); 39 40 return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); 41 } 42 43 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, 44 size_t *state_size) 45 { 46 u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; 47 u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; 48 int ret; 49 50 lockdep_assert_held(&mvdev->state_mutex); 51 if (mvdev->mdev_detach) 52 return -ENOTCONN; 53 54 MLX5_SET(query_vhca_migration_state_in, in, opcode, 55 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); 56 MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); 57 MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); 58 59 ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, 60 out); 61 if (ret) 62 return ret; 63 64 *state_size = MLX5_GET(query_vhca_migration_state_out, out, 65 required_umem_size); 66 return 0; 67 } 68 69 static int mlx5fv_vf_event(struct notifier_block *nb, 70 unsigned long event, void *data) 71 { 72 struct mlx5vf_pci_core_device *mvdev = 73 container_of(nb, struct mlx5vf_pci_core_device, nb); 74 75 mutex_lock(&mvdev->state_mutex); 76 switch (event) { 77 case MLX5_PF_NOTIFY_ENABLE_VF: 78 mvdev->mdev_detach = false; 79 break; 80 case MLX5_PF_NOTIFY_DISABLE_VF: 81 mlx5vf_disable_fds(mvdev); 82 mvdev->mdev_detach = true; 83 break; 84 default: 85 break; 86 } 87 mlx5vf_state_mutex_unlock(mvdev); 88 return 0; 89 } 90 91 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) 92 { 93 if (!mvdev->migrate_cap) 94 return; 95 96 mutex_lock(&mvdev->state_mutex); 97 mlx5vf_disable_fds(mvdev); 98 mlx5vf_state_mutex_unlock(mvdev); 99 } 100 101 void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) 102 { 103 if (!mvdev->migrate_cap) 104 return; 105 106 mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id, 107 &mvdev->nb); 108 destroy_workqueue(mvdev->cb_wq); 109 } 110 111 void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, 112 const struct vfio_migration_ops *mig_ops) 113 { 114 struct pci_dev *pdev = mvdev->core_device.pdev; 115 int ret; 116 117 if (!pdev->is_virtfn) 118 return; 119 120 mvdev->mdev = mlx5_vf_get_core_dev(pdev); 121 if (!mvdev->mdev) 122 return; 123 124 if (!MLX5_CAP_GEN(mvdev->mdev, migration)) 125 goto end; 126 127 mvdev->vf_id = pci_iov_vf_id(pdev); 128 if (mvdev->vf_id < 0) 129 goto end; 130 131 if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, 132 &mvdev->vhca_id)) 133 goto end; 134 135 mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0); 136 if (!mvdev->cb_wq) 137 goto end; 138 139 mutex_init(&mvdev->state_mutex); 140 spin_lock_init(&mvdev->reset_lock); 141 mvdev->nb.notifier_call = mlx5fv_vf_event; 142 ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id, 143 &mvdev->nb); 144 if (ret) { 145 destroy_workqueue(mvdev->cb_wq); 146 goto end; 147 } 148 149 mvdev->migrate_cap = 1; 150 mvdev->core_device.vdev.migration_flags = 151 VFIO_MIGRATION_STOP_COPY | 152 VFIO_MIGRATION_P2P; 153 mvdev->core_device.vdev.mig_ops = mig_ops; 154 155 end: 156 mlx5_vf_put_core_dev(mvdev->mdev); 157 } 158 159 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 160 u16 *vhca_id) 161 { 162 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 163 int out_size; 164 void *out; 165 int ret; 166 167 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 168 out = kzalloc(out_size, GFP_KERNEL); 169 if (!out) 170 return -ENOMEM; 171 172 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 173 MLX5_SET(query_hca_cap_in, in, other_function, 1); 174 MLX5_SET(query_hca_cap_in, in, function_id, function_id); 175 MLX5_SET(query_hca_cap_in, in, op_mod, 176 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | 177 HCA_CAP_OPMOD_GET_CUR); 178 179 ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 180 if (ret) 181 goto err_exec; 182 183 *vhca_id = MLX5_GET(query_hca_cap_out, out, 184 capability.cmd_hca_cap.vhca_id); 185 186 err_exec: 187 kfree(out); 188 return ret; 189 } 190 191 static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn, 192 struct mlx5_vf_migration_file *migf, u32 *mkey) 193 { 194 size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE); 195 struct sg_dma_page_iter dma_iter; 196 int err = 0, inlen; 197 __be64 *mtt; 198 void *mkc; 199 u32 *in; 200 201 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 202 sizeof(*mtt) * round_up(npages, 2); 203 204 in = kvzalloc(inlen, GFP_KERNEL); 205 if (!in) 206 return -ENOMEM; 207 208 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 209 DIV_ROUND_UP(npages, 2)); 210 mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 211 212 for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) 213 *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); 214 215 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 216 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 217 MLX5_SET(mkc, mkc, lr, 1); 218 MLX5_SET(mkc, mkc, lw, 1); 219 MLX5_SET(mkc, mkc, rr, 1); 220 MLX5_SET(mkc, mkc, rw, 1); 221 MLX5_SET(mkc, mkc, pd, pdn); 222 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 223 MLX5_SET(mkc, mkc, qpn, 0xffffff); 224 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 225 MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); 226 MLX5_SET64(mkc, mkc, len, migf->total_length); 227 err = mlx5_core_create_mkey(mdev, mkey, in, inlen); 228 kvfree(in); 229 return err; 230 } 231 232 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) 233 { 234 struct mlx5vf_async_data *async_data = container_of(_work, 235 struct mlx5vf_async_data, work); 236 struct mlx5_vf_migration_file *migf = container_of(async_data, 237 struct mlx5_vf_migration_file, async_data); 238 struct mlx5_core_dev *mdev = migf->mvdev->mdev; 239 240 mutex_lock(&migf->lock); 241 if (async_data->status) { 242 migf->is_err = true; 243 wake_up_interruptible(&migf->poll_wait); 244 } 245 mutex_unlock(&migf->lock); 246 247 mlx5_core_destroy_mkey(mdev, async_data->mkey); 248 dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); 249 mlx5_core_dealloc_pd(mdev, async_data->pdn); 250 kvfree(async_data->out); 251 fput(migf->filp); 252 } 253 254 static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) 255 { 256 struct mlx5vf_async_data *async_data = container_of(context, 257 struct mlx5vf_async_data, cb_work); 258 struct mlx5_vf_migration_file *migf = container_of(async_data, 259 struct mlx5_vf_migration_file, async_data); 260 261 if (!status) { 262 WRITE_ONCE(migf->total_length, 263 MLX5_GET(save_vhca_state_out, async_data->out, 264 actual_image_size)); 265 wake_up_interruptible(&migf->poll_wait); 266 } 267 268 /* 269 * The error and the cleanup flows can't run from an 270 * interrupt context 271 */ 272 async_data->status = status; 273 queue_work(migf->mvdev->cb_wq, &async_data->work); 274 } 275 276 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, 277 struct mlx5_vf_migration_file *migf) 278 { 279 u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); 280 u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; 281 struct mlx5vf_async_data *async_data; 282 struct mlx5_core_dev *mdev; 283 u32 pdn, mkey; 284 int err; 285 286 lockdep_assert_held(&mvdev->state_mutex); 287 if (mvdev->mdev_detach) 288 return -ENOTCONN; 289 290 mdev = mvdev->mdev; 291 err = mlx5_core_alloc_pd(mdev, &pdn); 292 if (err) 293 return err; 294 295 err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 296 0); 297 if (err) 298 goto err_dma_map; 299 300 err = _create_state_mkey(mdev, pdn, migf, &mkey); 301 if (err) 302 goto err_create_mkey; 303 304 MLX5_SET(save_vhca_state_in, in, opcode, 305 MLX5_CMD_OP_SAVE_VHCA_STATE); 306 MLX5_SET(save_vhca_state_in, in, op_mod, 0); 307 MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); 308 MLX5_SET(save_vhca_state_in, in, mkey, mkey); 309 MLX5_SET(save_vhca_state_in, in, size, migf->total_length); 310 311 async_data = &migf->async_data; 312 async_data->out = kvzalloc(out_size, GFP_KERNEL); 313 if (!async_data->out) { 314 err = -ENOMEM; 315 goto err_out; 316 } 317 318 /* no data exists till the callback comes back */ 319 migf->total_length = 0; 320 get_file(migf->filp); 321 async_data->mkey = mkey; 322 async_data->pdn = pdn; 323 err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), 324 async_data->out, 325 out_size, mlx5vf_save_callback, 326 &async_data->cb_work); 327 if (err) 328 goto err_exec; 329 330 return 0; 331 332 err_exec: 333 fput(migf->filp); 334 kvfree(async_data->out); 335 err_out: 336 mlx5_core_destroy_mkey(mdev, mkey); 337 err_create_mkey: 338 dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); 339 err_dma_map: 340 mlx5_core_dealloc_pd(mdev, pdn); 341 return err; 342 } 343 344 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, 345 struct mlx5_vf_migration_file *migf) 346 { 347 struct mlx5_core_dev *mdev; 348 u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; 349 u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; 350 u32 pdn, mkey; 351 int err; 352 353 lockdep_assert_held(&mvdev->state_mutex); 354 if (mvdev->mdev_detach) 355 return -ENOTCONN; 356 357 mutex_lock(&migf->lock); 358 if (!migf->total_length) { 359 err = -EINVAL; 360 goto end; 361 } 362 363 mdev = mvdev->mdev; 364 err = mlx5_core_alloc_pd(mdev, &pdn); 365 if (err) 366 goto end; 367 368 err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); 369 if (err) 370 goto err_reg; 371 372 err = _create_state_mkey(mdev, pdn, migf, &mkey); 373 if (err) 374 goto err_mkey; 375 376 MLX5_SET(load_vhca_state_in, in, opcode, 377 MLX5_CMD_OP_LOAD_VHCA_STATE); 378 MLX5_SET(load_vhca_state_in, in, op_mod, 0); 379 MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); 380 MLX5_SET(load_vhca_state_in, in, mkey, mkey); 381 MLX5_SET(load_vhca_state_in, in, size, migf->total_length); 382 383 err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out); 384 385 mlx5_core_destroy_mkey(mdev, mkey); 386 err_mkey: 387 dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); 388 err_reg: 389 mlx5_core_dealloc_pd(mdev, pdn); 390 end: 391 mutex_unlock(&migf->lock); 392 return err; 393 } 394