xref: /openbmc/linux/drivers/vfio/pci/mlx5/main.c (revision 519b58bb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4  */
5 
6 #include <linux/device.h>
7 #include <linux/eventfd.h>
8 #include <linux/file.h>
9 #include <linux/interrupt.h>
10 #include <linux/iommu.h>
11 #include <linux/module.h>
12 #include <linux/mutex.h>
13 #include <linux/notifier.h>
14 #include <linux/pci.h>
15 #include <linux/pm_runtime.h>
16 #include <linux/types.h>
17 #include <linux/uaccess.h>
18 #include <linux/vfio.h>
19 #include <linux/sched/mm.h>
20 #include <linux/anon_inodes.h>
21 
22 #include "cmd.h"
23 
24 /* Arbitrary to prevent userspace from consuming endless memory */
25 #define MAX_MIGRATION_SIZE (512*1024*1024)
26 
27 static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
28 {
29 	struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
30 
31 	return container_of(core_device, struct mlx5vf_pci_core_device,
32 			    core_device);
33 }
34 
35 static struct page *
36 mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
37 			  unsigned long offset)
38 {
39 	unsigned long cur_offset = 0;
40 	struct scatterlist *sg;
41 	unsigned int i;
42 
43 	/* All accesses are sequential */
44 	if (offset < migf->last_offset || !migf->last_offset_sg) {
45 		migf->last_offset = 0;
46 		migf->last_offset_sg = migf->table.sgt.sgl;
47 		migf->sg_last_entry = 0;
48 	}
49 
50 	cur_offset = migf->last_offset;
51 
52 	for_each_sg(migf->last_offset_sg, sg,
53 			migf->table.sgt.orig_nents - migf->sg_last_entry, i) {
54 		if (offset < sg->length + cur_offset) {
55 			migf->last_offset_sg = sg;
56 			migf->sg_last_entry += i;
57 			migf->last_offset = cur_offset;
58 			return nth_page(sg_page(sg),
59 					(offset - cur_offset) / PAGE_SIZE);
60 		}
61 		cur_offset += sg->length;
62 	}
63 	return NULL;
64 }
65 
66 static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
67 				      unsigned int npages)
68 {
69 	unsigned int to_alloc = npages;
70 	struct page **page_list;
71 	unsigned long filled;
72 	unsigned int to_fill;
73 	int ret;
74 
75 	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
76 	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL);
77 	if (!page_list)
78 		return -ENOMEM;
79 
80 	do {
81 		filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list);
82 		if (!filled) {
83 			ret = -ENOMEM;
84 			goto err;
85 		}
86 		to_alloc -= filled;
87 		ret = sg_alloc_append_table_from_pages(
88 			&migf->table, page_list, filled, 0,
89 			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
90 			GFP_KERNEL);
91 
92 		if (ret)
93 			goto err;
94 		migf->allocated_length += filled * PAGE_SIZE;
95 		/* clean input for another bulk allocation */
96 		memset(page_list, 0, filled * sizeof(*page_list));
97 		to_fill = min_t(unsigned int, to_alloc,
98 				PAGE_SIZE / sizeof(*page_list));
99 	} while (to_alloc > 0);
100 
101 	kvfree(page_list);
102 	return 0;
103 
104 err:
105 	kvfree(page_list);
106 	return ret;
107 }
108 
109 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
110 {
111 	struct sg_page_iter sg_iter;
112 
113 	mutex_lock(&migf->lock);
114 	/* Undo alloc_pages_bulk_array() */
115 	for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0)
116 		__free_page(sg_page_iter_page(&sg_iter));
117 	sg_free_append_table(&migf->table);
118 	migf->disabled = true;
119 	migf->total_length = 0;
120 	migf->allocated_length = 0;
121 	migf->filp->f_pos = 0;
122 	mutex_unlock(&migf->lock);
123 }
124 
125 static int mlx5vf_release_file(struct inode *inode, struct file *filp)
126 {
127 	struct mlx5_vf_migration_file *migf = filp->private_data;
128 
129 	mlx5vf_disable_fd(migf);
130 	mutex_destroy(&migf->lock);
131 	kfree(migf);
132 	return 0;
133 }
134 
135 static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
136 			       loff_t *pos)
137 {
138 	struct mlx5_vf_migration_file *migf = filp->private_data;
139 	ssize_t done = 0;
140 
141 	if (pos)
142 		return -ESPIPE;
143 	pos = &filp->f_pos;
144 
145 	if (!(filp->f_flags & O_NONBLOCK)) {
146 		if (wait_event_interruptible(migf->poll_wait,
147 			     READ_ONCE(migf->total_length) || migf->is_err))
148 			return -ERESTARTSYS;
149 	}
150 
151 	mutex_lock(&migf->lock);
152 	if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) {
153 		done = -EAGAIN;
154 		goto out_unlock;
155 	}
156 	if (*pos > migf->total_length) {
157 		done = -EINVAL;
158 		goto out_unlock;
159 	}
160 	if (migf->disabled || migf->is_err) {
161 		done = -ENODEV;
162 		goto out_unlock;
163 	}
164 
165 	len = min_t(size_t, migf->total_length - *pos, len);
166 	while (len) {
167 		size_t page_offset;
168 		struct page *page;
169 		size_t page_len;
170 		u8 *from_buff;
171 		int ret;
172 
173 		page_offset = (*pos) % PAGE_SIZE;
174 		page = mlx5vf_get_migration_page(migf, *pos - page_offset);
175 		if (!page) {
176 			if (done == 0)
177 				done = -EINVAL;
178 			goto out_unlock;
179 		}
180 
181 		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
182 		from_buff = kmap_local_page(page);
183 		ret = copy_to_user(buf, from_buff + page_offset, page_len);
184 		kunmap_local(from_buff);
185 		if (ret) {
186 			done = -EFAULT;
187 			goto out_unlock;
188 		}
189 		*pos += page_len;
190 		len -= page_len;
191 		done += page_len;
192 		buf += page_len;
193 	}
194 
195 out_unlock:
196 	mutex_unlock(&migf->lock);
197 	return done;
198 }
199 
200 static __poll_t mlx5vf_save_poll(struct file *filp,
201 				 struct poll_table_struct *wait)
202 {
203 	struct mlx5_vf_migration_file *migf = filp->private_data;
204 	__poll_t pollflags = 0;
205 
206 	poll_wait(filp, &migf->poll_wait, wait);
207 
208 	mutex_lock(&migf->lock);
209 	if (migf->disabled || migf->is_err)
210 		pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
211 	else if (READ_ONCE(migf->total_length))
212 		pollflags = EPOLLIN | EPOLLRDNORM;
213 	mutex_unlock(&migf->lock);
214 
215 	return pollflags;
216 }
217 
218 static const struct file_operations mlx5vf_save_fops = {
219 	.owner = THIS_MODULE,
220 	.read = mlx5vf_save_read,
221 	.poll = mlx5vf_save_poll,
222 	.release = mlx5vf_release_file,
223 	.llseek = no_llseek,
224 };
225 
226 static struct mlx5_vf_migration_file *
227 mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
228 {
229 	struct mlx5_vf_migration_file *migf;
230 	int ret;
231 
232 	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
233 	if (!migf)
234 		return ERR_PTR(-ENOMEM);
235 
236 	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
237 					O_RDONLY);
238 	if (IS_ERR(migf->filp)) {
239 		int err = PTR_ERR(migf->filp);
240 
241 		kfree(migf);
242 		return ERR_PTR(err);
243 	}
244 
245 	stream_open(migf->filp->f_inode, migf->filp);
246 	mutex_init(&migf->lock);
247 	init_waitqueue_head(&migf->poll_wait);
248 	mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
249 	INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
250 	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
251 						    &migf->total_length);
252 	if (ret)
253 		goto out_free;
254 
255 	ret = mlx5vf_add_migration_pages(
256 		migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
257 	if (ret)
258 		goto out_free;
259 
260 	migf->mvdev = mvdev;
261 	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
262 	if (ret)
263 		goto out_free;
264 	return migf;
265 out_free:
266 	fput(migf->filp);
267 	return ERR_PTR(ret);
268 }
269 
270 static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
271 				   size_t len, loff_t *pos)
272 {
273 	struct mlx5_vf_migration_file *migf = filp->private_data;
274 	loff_t requested_length;
275 	ssize_t done = 0;
276 
277 	if (pos)
278 		return -ESPIPE;
279 	pos = &filp->f_pos;
280 
281 	if (*pos < 0 ||
282 	    check_add_overflow((loff_t)len, *pos, &requested_length))
283 		return -EINVAL;
284 
285 	if (requested_length > MAX_MIGRATION_SIZE)
286 		return -ENOMEM;
287 
288 	mutex_lock(&migf->lock);
289 	if (migf->disabled) {
290 		done = -ENODEV;
291 		goto out_unlock;
292 	}
293 
294 	if (migf->allocated_length < requested_length) {
295 		done = mlx5vf_add_migration_pages(
296 			migf,
297 			DIV_ROUND_UP(requested_length - migf->allocated_length,
298 				     PAGE_SIZE));
299 		if (done)
300 			goto out_unlock;
301 	}
302 
303 	while (len) {
304 		size_t page_offset;
305 		struct page *page;
306 		size_t page_len;
307 		u8 *to_buff;
308 		int ret;
309 
310 		page_offset = (*pos) % PAGE_SIZE;
311 		page = mlx5vf_get_migration_page(migf, *pos - page_offset);
312 		if (!page) {
313 			if (done == 0)
314 				done = -EINVAL;
315 			goto out_unlock;
316 		}
317 
318 		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
319 		to_buff = kmap_local_page(page);
320 		ret = copy_from_user(to_buff + page_offset, buf, page_len);
321 		kunmap_local(to_buff);
322 		if (ret) {
323 			done = -EFAULT;
324 			goto out_unlock;
325 		}
326 		*pos += page_len;
327 		len -= page_len;
328 		done += page_len;
329 		buf += page_len;
330 		migf->total_length += page_len;
331 	}
332 out_unlock:
333 	mutex_unlock(&migf->lock);
334 	return done;
335 }
336 
337 static const struct file_operations mlx5vf_resume_fops = {
338 	.owner = THIS_MODULE,
339 	.write = mlx5vf_resume_write,
340 	.release = mlx5vf_release_file,
341 	.llseek = no_llseek,
342 };
343 
344 static struct mlx5_vf_migration_file *
345 mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
346 {
347 	struct mlx5_vf_migration_file *migf;
348 
349 	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
350 	if (!migf)
351 		return ERR_PTR(-ENOMEM);
352 
353 	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
354 					O_WRONLY);
355 	if (IS_ERR(migf->filp)) {
356 		int err = PTR_ERR(migf->filp);
357 
358 		kfree(migf);
359 		return ERR_PTR(err);
360 	}
361 	stream_open(migf->filp->f_inode, migf->filp);
362 	mutex_init(&migf->lock);
363 	return migf;
364 }
365 
366 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
367 {
368 	if (mvdev->resuming_migf) {
369 		mlx5vf_disable_fd(mvdev->resuming_migf);
370 		fput(mvdev->resuming_migf->filp);
371 		mvdev->resuming_migf = NULL;
372 	}
373 	if (mvdev->saving_migf) {
374 		mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
375 		cancel_work_sync(&mvdev->saving_migf->async_data.work);
376 		mlx5vf_disable_fd(mvdev->saving_migf);
377 		fput(mvdev->saving_migf->filp);
378 		mvdev->saving_migf = NULL;
379 	}
380 }
381 
382 static struct file *
383 mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
384 				    u32 new)
385 {
386 	u32 cur = mvdev->mig_state;
387 	int ret;
388 
389 	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
390 		ret = mlx5vf_cmd_suspend_vhca(mvdev,
391 			MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
392 		if (ret)
393 			return ERR_PTR(ret);
394 		return NULL;
395 	}
396 
397 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
398 		ret = mlx5vf_cmd_resume_vhca(mvdev,
399 			MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
400 		if (ret)
401 			return ERR_PTR(ret);
402 		return NULL;
403 	}
404 
405 	if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
406 		ret = mlx5vf_cmd_suspend_vhca(mvdev,
407 			MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
408 		if (ret)
409 			return ERR_PTR(ret);
410 		return NULL;
411 	}
412 
413 	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
414 		ret = mlx5vf_cmd_resume_vhca(mvdev,
415 			MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
416 		if (ret)
417 			return ERR_PTR(ret);
418 		return NULL;
419 	}
420 
421 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
422 		struct mlx5_vf_migration_file *migf;
423 
424 		migf = mlx5vf_pci_save_device_data(mvdev);
425 		if (IS_ERR(migf))
426 			return ERR_CAST(migf);
427 		get_file(migf->filp);
428 		mvdev->saving_migf = migf;
429 		return migf->filp;
430 	}
431 
432 	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
433 		mlx5vf_disable_fds(mvdev);
434 		return NULL;
435 	}
436 
437 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
438 		struct mlx5_vf_migration_file *migf;
439 
440 		migf = mlx5vf_pci_resume_device_data(mvdev);
441 		if (IS_ERR(migf))
442 			return ERR_CAST(migf);
443 		get_file(migf->filp);
444 		mvdev->resuming_migf = migf;
445 		return migf->filp;
446 	}
447 
448 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
449 		ret = mlx5vf_cmd_load_vhca_state(mvdev,
450 						 mvdev->resuming_migf);
451 		if (ret)
452 			return ERR_PTR(ret);
453 		mlx5vf_disable_fds(mvdev);
454 		return NULL;
455 	}
456 
457 	/*
458 	 * vfio_mig_get_next_state() does not use arcs other than the above
459 	 */
460 	WARN_ON(true);
461 	return ERR_PTR(-EINVAL);
462 }
463 
464 /*
465  * This function is called in all state_mutex unlock cases to
466  * handle a 'deferred_reset' if exists.
467  */
468 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
469 {
470 again:
471 	spin_lock(&mvdev->reset_lock);
472 	if (mvdev->deferred_reset) {
473 		mvdev->deferred_reset = false;
474 		spin_unlock(&mvdev->reset_lock);
475 		mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
476 		mlx5vf_disable_fds(mvdev);
477 		goto again;
478 	}
479 	mutex_unlock(&mvdev->state_mutex);
480 	spin_unlock(&mvdev->reset_lock);
481 }
482 
483 static struct file *
484 mlx5vf_pci_set_device_state(struct vfio_device *vdev,
485 			    enum vfio_device_mig_state new_state)
486 {
487 	struct mlx5vf_pci_core_device *mvdev = container_of(
488 		vdev, struct mlx5vf_pci_core_device, core_device.vdev);
489 	enum vfio_device_mig_state next_state;
490 	struct file *res = NULL;
491 	int ret;
492 
493 	mutex_lock(&mvdev->state_mutex);
494 	while (new_state != mvdev->mig_state) {
495 		ret = vfio_mig_get_next_state(vdev, mvdev->mig_state,
496 					      new_state, &next_state);
497 		if (ret) {
498 			res = ERR_PTR(ret);
499 			break;
500 		}
501 		res = mlx5vf_pci_step_device_state_locked(mvdev, next_state);
502 		if (IS_ERR(res))
503 			break;
504 		mvdev->mig_state = next_state;
505 		if (WARN_ON(res && new_state != mvdev->mig_state)) {
506 			fput(res);
507 			res = ERR_PTR(-EINVAL);
508 			break;
509 		}
510 	}
511 	mlx5vf_state_mutex_unlock(mvdev);
512 	return res;
513 }
514 
515 static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
516 				       enum vfio_device_mig_state *curr_state)
517 {
518 	struct mlx5vf_pci_core_device *mvdev = container_of(
519 		vdev, struct mlx5vf_pci_core_device, core_device.vdev);
520 
521 	mutex_lock(&mvdev->state_mutex);
522 	*curr_state = mvdev->mig_state;
523 	mlx5vf_state_mutex_unlock(mvdev);
524 	return 0;
525 }
526 
527 static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
528 {
529 	struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
530 
531 	if (!mvdev->migrate_cap)
532 		return;
533 
534 	/*
535 	 * As the higher VFIO layers are holding locks across reset and using
536 	 * those same locks with the mm_lock we need to prevent ABBA deadlock
537 	 * with the state_mutex and mm_lock.
538 	 * In case the state_mutex was taken already we defer the cleanup work
539 	 * to the unlock flow of the other running context.
540 	 */
541 	spin_lock(&mvdev->reset_lock);
542 	mvdev->deferred_reset = true;
543 	if (!mutex_trylock(&mvdev->state_mutex)) {
544 		spin_unlock(&mvdev->reset_lock);
545 		return;
546 	}
547 	spin_unlock(&mvdev->reset_lock);
548 	mlx5vf_state_mutex_unlock(mvdev);
549 }
550 
551 static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
552 {
553 	struct mlx5vf_pci_core_device *mvdev = container_of(
554 		core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
555 	struct vfio_pci_core_device *vdev = &mvdev->core_device;
556 	int ret;
557 
558 	ret = vfio_pci_core_enable(vdev);
559 	if (ret)
560 		return ret;
561 
562 	if (mvdev->migrate_cap)
563 		mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
564 	vfio_pci_core_finish_enable(vdev);
565 	return 0;
566 }
567 
568 static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
569 {
570 	struct mlx5vf_pci_core_device *mvdev = container_of(
571 		core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
572 
573 	mlx5vf_cmd_close_migratable(mvdev);
574 	vfio_pci_core_close_device(core_vdev);
575 }
576 
577 static const struct vfio_migration_ops mlx5vf_pci_mig_ops = {
578 	.migration_set_state = mlx5vf_pci_set_device_state,
579 	.migration_get_state = mlx5vf_pci_get_device_state,
580 };
581 
582 static const struct vfio_log_ops mlx5vf_pci_log_ops = {
583 	.log_start = mlx5vf_start_page_tracker,
584 	.log_stop = mlx5vf_stop_page_tracker,
585 	.log_read_and_clear = mlx5vf_tracker_read_and_clear,
586 };
587 
588 static int mlx5vf_pci_init_dev(struct vfio_device *core_vdev)
589 {
590 	struct mlx5vf_pci_core_device *mvdev = container_of(core_vdev,
591 			struct mlx5vf_pci_core_device, core_device.vdev);
592 	int ret;
593 
594 	ret = vfio_pci_core_init_dev(core_vdev);
595 	if (ret)
596 		return ret;
597 
598 	mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops,
599 				  &mlx5vf_pci_log_ops);
600 
601 	return 0;
602 }
603 
604 static void mlx5vf_pci_release_dev(struct vfio_device *core_vdev)
605 {
606 	struct mlx5vf_pci_core_device *mvdev = container_of(core_vdev,
607 			struct mlx5vf_pci_core_device, core_device.vdev);
608 
609 	mlx5vf_cmd_remove_migratable(mvdev);
610 	vfio_pci_core_release_dev(core_vdev);
611 }
612 
613 static const struct vfio_device_ops mlx5vf_pci_ops = {
614 	.name = "mlx5-vfio-pci",
615 	.init = mlx5vf_pci_init_dev,
616 	.release = mlx5vf_pci_release_dev,
617 	.open_device = mlx5vf_pci_open_device,
618 	.close_device = mlx5vf_pci_close_device,
619 	.ioctl = vfio_pci_core_ioctl,
620 	.device_feature = vfio_pci_core_ioctl_feature,
621 	.read = vfio_pci_core_read,
622 	.write = vfio_pci_core_write,
623 	.mmap = vfio_pci_core_mmap,
624 	.request = vfio_pci_core_request,
625 	.match = vfio_pci_core_match,
626 };
627 
628 static int mlx5vf_pci_probe(struct pci_dev *pdev,
629 			    const struct pci_device_id *id)
630 {
631 	struct mlx5vf_pci_core_device *mvdev;
632 	int ret;
633 
634 	mvdev = vfio_alloc_device(mlx5vf_pci_core_device, core_device.vdev,
635 				  &pdev->dev, &mlx5vf_pci_ops);
636 	if (IS_ERR(mvdev))
637 		return PTR_ERR(mvdev);
638 
639 	dev_set_drvdata(&pdev->dev, &mvdev->core_device);
640 	ret = vfio_pci_core_register_device(&mvdev->core_device);
641 	if (ret)
642 		goto out_put_vdev;
643 	return 0;
644 
645 out_put_vdev:
646 	vfio_put_device(&mvdev->core_device.vdev);
647 	return ret;
648 }
649 
650 static void mlx5vf_pci_remove(struct pci_dev *pdev)
651 {
652 	struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
653 
654 	vfio_pci_core_unregister_device(&mvdev->core_device);
655 	vfio_put_device(&mvdev->core_device.vdev);
656 }
657 
658 static const struct pci_device_id mlx5vf_pci_table[] = {
659 	{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_MELLANOX, 0x101e) }, /* ConnectX Family mlx5Gen Virtual Function */
660 	{}
661 };
662 
663 MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
664 
665 static const struct pci_error_handlers mlx5vf_err_handlers = {
666 	.reset_done = mlx5vf_pci_aer_reset_done,
667 	.error_detected = vfio_pci_core_aer_err_detected,
668 };
669 
670 static struct pci_driver mlx5vf_pci_driver = {
671 	.name = KBUILD_MODNAME,
672 	.id_table = mlx5vf_pci_table,
673 	.probe = mlx5vf_pci_probe,
674 	.remove = mlx5vf_pci_remove,
675 	.err_handler = &mlx5vf_err_handlers,
676 	.driver_managed_dma = true,
677 };
678 
679 static void __exit mlx5vf_pci_cleanup(void)
680 {
681 	pci_unregister_driver(&mlx5vf_pci_driver);
682 }
683 
684 static int __init mlx5vf_pci_init(void)
685 {
686 	return pci_register_driver(&mlx5vf_pci_driver);
687 }
688 
689 module_init(mlx5vf_pci_init);
690 module_exit(mlx5vf_pci_cleanup);
691 
692 MODULE_LICENSE("GPL");
693 MODULE_AUTHOR("Max Gurtovoy <mgurtovoy@nvidia.com>");
694 MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");
695 MODULE_DESCRIPTION(
696 	"MLX5 VFIO PCI - User Level meta-driver for MLX5 device family");
697