xref: /openbmc/linux/drivers/pci/iov.c (revision 0c5c62ddf88c34bc83b66e4ac9beb2bb0e1887d4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCI Express I/O Virtualization (IOV) support
4  *   Single Root IOV 1.0
5  *   Address Translation Service 1.0
6  *
7  * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
8  */
9 
10 #include <linux/pci.h>
11 #include <linux/slab.h>
12 #include <linux/export.h>
13 #include <linux/string.h>
14 #include <linux/delay.h>
15 #include "pci.h"
16 
17 #define VIRTFN_ID_LEN	16
18 
19 int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
20 {
21 	if (!dev->is_physfn)
22 		return -EINVAL;
23 	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
24 				    dev->sriov->stride * vf_id) >> 8);
25 }
26 
27 int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
28 {
29 	if (!dev->is_physfn)
30 		return -EINVAL;
31 	return (dev->devfn + dev->sriov->offset +
32 		dev->sriov->stride * vf_id) & 0xff;
33 }
34 EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
35 
36 /*
37  * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
38  * change when NumVFs changes.
39  *
40  * Update iov->offset and iov->stride when NumVFs is written.
41  */
42 static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
43 {
44 	struct pci_sriov *iov = dev->sriov;
45 
46 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
47 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
48 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
49 }
50 
51 /*
52  * The PF consumes one bus number.  NumVFs, First VF Offset, and VF Stride
53  * determine how many additional bus numbers will be consumed by VFs.
54  *
55  * Iterate over all valid NumVFs, validate offset and stride, and calculate
56  * the maximum number of bus numbers that could ever be required.
57  */
58 static int compute_max_vf_buses(struct pci_dev *dev)
59 {
60 	struct pci_sriov *iov = dev->sriov;
61 	int nr_virtfn, busnr, rc = 0;
62 
63 	for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
64 		pci_iov_set_numvfs(dev, nr_virtfn);
65 		if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
66 			rc = -EIO;
67 			goto out;
68 		}
69 
70 		busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
71 		if (busnr > iov->max_VF_buses)
72 			iov->max_VF_buses = busnr;
73 	}
74 
75 out:
76 	pci_iov_set_numvfs(dev, 0);
77 	return rc;
78 }
79 
80 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
81 {
82 	struct pci_bus *child;
83 
84 	if (bus->number == busnr)
85 		return bus;
86 
87 	child = pci_find_bus(pci_domain_nr(bus), busnr);
88 	if (child)
89 		return child;
90 
91 	child = pci_add_new_bus(bus, NULL, busnr);
92 	if (!child)
93 		return NULL;
94 
95 	pci_bus_insert_busn_res(child, busnr, busnr);
96 
97 	return child;
98 }
99 
100 static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
101 {
102 	if (physbus != virtbus && list_empty(&virtbus->devices))
103 		pci_remove_bus(virtbus);
104 }
105 
106 resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
107 {
108 	if (!dev->is_physfn)
109 		return 0;
110 
111 	return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
112 }
113 
114 static void pci_read_vf_config_common(struct pci_dev *virtfn)
115 {
116 	struct pci_dev *physfn = virtfn->physfn;
117 
118 	/*
119 	 * Some config registers are the same across all associated VFs.
120 	 * Read them once from VF0 so we can skip reading them from the
121 	 * other VFs.
122 	 *
123 	 * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
124 	 * have the same Revision ID and Subsystem ID, but we assume they
125 	 * do.
126 	 */
127 	pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
128 			      &physfn->sriov->class);
129 	pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
130 			     &physfn->sriov->hdr_type);
131 	pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
132 			     &physfn->sriov->subsystem_vendor);
133 	pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
134 			     &physfn->sriov->subsystem_device);
135 }
136 
137 int pci_iov_sysfs_link(struct pci_dev *dev,
138 		struct pci_dev *virtfn, int id)
139 {
140 	char buf[VIRTFN_ID_LEN];
141 	int rc;
142 
143 	sprintf(buf, "virtfn%u", id);
144 	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
145 	if (rc)
146 		goto failed;
147 	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
148 	if (rc)
149 		goto failed1;
150 
151 	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
152 
153 	return 0;
154 
155 failed1:
156 	sysfs_remove_link(&dev->dev.kobj, buf);
157 failed:
158 	return rc;
159 }
160 
161 #ifdef CONFIG_PCI_MSI
162 static ssize_t sriov_vf_total_msix_show(struct device *dev,
163 					struct device_attribute *attr,
164 					char *buf)
165 {
166 	struct pci_dev *pdev = to_pci_dev(dev);
167 	struct pci_driver *pdrv;
168 	u32 vf_total_msix = 0;
169 
170 	device_lock(dev);
171 	pdrv = to_pci_driver(dev->driver);
172 	if (!pdrv || !pdrv->sriov_get_vf_total_msix)
173 		goto unlock;
174 
175 	vf_total_msix = pdrv->sriov_get_vf_total_msix(pdev);
176 unlock:
177 	device_unlock(dev);
178 	return sysfs_emit(buf, "%u\n", vf_total_msix);
179 }
180 static DEVICE_ATTR_RO(sriov_vf_total_msix);
181 
182 static ssize_t sriov_vf_msix_count_store(struct device *dev,
183 					 struct device_attribute *attr,
184 					 const char *buf, size_t count)
185 {
186 	struct pci_dev *vf_dev = to_pci_dev(dev);
187 	struct pci_dev *pdev = pci_physfn(vf_dev);
188 	struct pci_driver *pdrv;
189 	int val, ret = 0;
190 
191 	if (kstrtoint(buf, 0, &val) < 0)
192 		return -EINVAL;
193 
194 	if (val < 0)
195 		return -EINVAL;
196 
197 	device_lock(&pdev->dev);
198 	pdrv = to_pci_driver(dev->driver);
199 	if (!pdrv || !pdrv->sriov_set_msix_vec_count) {
200 		ret = -EOPNOTSUPP;
201 		goto err_pdev;
202 	}
203 
204 	device_lock(&vf_dev->dev);
205 	if (to_pci_driver(vf_dev->dev.driver)) {
206 		/*
207 		 * A driver is already attached to this VF and has configured
208 		 * itself based on the current MSI-X vector count. Changing
209 		 * the vector size could mess up the driver, so block it.
210 		 */
211 		ret = -EBUSY;
212 		goto err_dev;
213 	}
214 
215 	ret = pdrv->sriov_set_msix_vec_count(vf_dev, val);
216 
217 err_dev:
218 	device_unlock(&vf_dev->dev);
219 err_pdev:
220 	device_unlock(&pdev->dev);
221 	return ret ? : count;
222 }
223 static DEVICE_ATTR_WO(sriov_vf_msix_count);
224 #endif
225 
226 static struct attribute *sriov_vf_dev_attrs[] = {
227 #ifdef CONFIG_PCI_MSI
228 	&dev_attr_sriov_vf_msix_count.attr,
229 #endif
230 	NULL,
231 };
232 
233 static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
234 					  struct attribute *a, int n)
235 {
236 	struct device *dev = kobj_to_dev(kobj);
237 	struct pci_dev *pdev = to_pci_dev(dev);
238 
239 	if (!pdev->is_virtfn)
240 		return 0;
241 
242 	return a->mode;
243 }
244 
245 const struct attribute_group sriov_vf_dev_attr_group = {
246 	.attrs = sriov_vf_dev_attrs,
247 	.is_visible = sriov_vf_attrs_are_visible,
248 };
249 
250 int pci_iov_add_virtfn(struct pci_dev *dev, int id)
251 {
252 	int i;
253 	int rc = -ENOMEM;
254 	u64 size;
255 	struct pci_dev *virtfn;
256 	struct resource *res;
257 	struct pci_sriov *iov = dev->sriov;
258 	struct pci_bus *bus;
259 
260 	bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
261 	if (!bus)
262 		goto failed;
263 
264 	virtfn = pci_alloc_dev(bus);
265 	if (!virtfn)
266 		goto failed0;
267 
268 	virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
269 	virtfn->vendor = dev->vendor;
270 	virtfn->device = iov->vf_device;
271 	virtfn->is_virtfn = 1;
272 	virtfn->physfn = pci_dev_get(dev);
273 	virtfn->no_command_memory = 1;
274 
275 	if (id == 0)
276 		pci_read_vf_config_common(virtfn);
277 
278 	rc = pci_setup_device(virtfn);
279 	if (rc)
280 		goto failed1;
281 
282 	virtfn->dev.parent = dev->dev.parent;
283 	virtfn->multifunction = 0;
284 
285 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
286 		res = &dev->resource[i + PCI_IOV_RESOURCES];
287 		if (!res->parent)
288 			continue;
289 		virtfn->resource[i].name = pci_name(virtfn);
290 		virtfn->resource[i].flags = res->flags;
291 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
292 		virtfn->resource[i].start = res->start + size * id;
293 		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
294 		rc = request_resource(res, &virtfn->resource[i]);
295 		BUG_ON(rc);
296 	}
297 
298 	pci_device_add(virtfn, virtfn->bus);
299 	rc = pci_iov_sysfs_link(dev, virtfn, id);
300 	if (rc)
301 		goto failed1;
302 
303 	pci_bus_add_device(virtfn);
304 
305 	return 0;
306 
307 failed1:
308 	pci_stop_and_remove_bus_device(virtfn);
309 	pci_dev_put(dev);
310 failed0:
311 	virtfn_remove_bus(dev->bus, bus);
312 failed:
313 
314 	return rc;
315 }
316 
317 void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
318 {
319 	char buf[VIRTFN_ID_LEN];
320 	struct pci_dev *virtfn;
321 
322 	virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
323 					     pci_iov_virtfn_bus(dev, id),
324 					     pci_iov_virtfn_devfn(dev, id));
325 	if (!virtfn)
326 		return;
327 
328 	sprintf(buf, "virtfn%u", id);
329 	sysfs_remove_link(&dev->dev.kobj, buf);
330 	/*
331 	 * pci_stop_dev() could have been called for this virtfn already,
332 	 * so the directory for the virtfn may have been removed before.
333 	 * Double check to avoid spurious sysfs warnings.
334 	 */
335 	if (virtfn->dev.kobj.sd)
336 		sysfs_remove_link(&virtfn->dev.kobj, "physfn");
337 
338 	pci_stop_and_remove_bus_device(virtfn);
339 	virtfn_remove_bus(dev->bus, virtfn->bus);
340 
341 	/* balance pci_get_domain_bus_and_slot() */
342 	pci_dev_put(virtfn);
343 	pci_dev_put(dev);
344 }
345 
346 static ssize_t sriov_totalvfs_show(struct device *dev,
347 				   struct device_attribute *attr,
348 				   char *buf)
349 {
350 	struct pci_dev *pdev = to_pci_dev(dev);
351 
352 	return sysfs_emit(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
353 }
354 
355 static ssize_t sriov_numvfs_show(struct device *dev,
356 				 struct device_attribute *attr,
357 				 char *buf)
358 {
359 	struct pci_dev *pdev = to_pci_dev(dev);
360 	u16 num_vfs;
361 
362 	/* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */
363 	device_lock(&pdev->dev);
364 	num_vfs = pdev->sriov->num_VFs;
365 	device_unlock(&pdev->dev);
366 
367 	return sysfs_emit(buf, "%u\n", num_vfs);
368 }
369 
370 /*
371  * num_vfs > 0; number of VFs to enable
372  * num_vfs = 0; disable all VFs
373  *
374  * Note: SRIOV spec does not allow partial VF
375  *	 disable, so it's all or none.
376  */
377 static ssize_t sriov_numvfs_store(struct device *dev,
378 				  struct device_attribute *attr,
379 				  const char *buf, size_t count)
380 {
381 	struct pci_dev *pdev = to_pci_dev(dev);
382 	struct pci_driver *pdrv;
383 	int ret = 0;
384 	u16 num_vfs;
385 
386 	if (kstrtou16(buf, 0, &num_vfs) < 0)
387 		return -EINVAL;
388 
389 	if (num_vfs > pci_sriov_get_totalvfs(pdev))
390 		return -ERANGE;
391 
392 	device_lock(&pdev->dev);
393 
394 	if (num_vfs == pdev->sriov->num_VFs)
395 		goto exit;
396 
397 	/* is PF driver loaded */
398 	pdrv = to_pci_driver(dev->driver);
399 	if (!pdrv) {
400 		pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n");
401 		ret = -ENOENT;
402 		goto exit;
403 	}
404 
405 	/* is PF driver loaded w/callback */
406 	if (!pdrv->sriov_configure) {
407 		pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n");
408 		ret = -ENOENT;
409 		goto exit;
410 	}
411 
412 	if (num_vfs == 0) {
413 		/* disable VFs */
414 		ret = pdrv->sriov_configure(pdev, 0);
415 		goto exit;
416 	}
417 
418 	/* enable VFs */
419 	if (pdev->sriov->num_VFs) {
420 		pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
421 			 pdev->sriov->num_VFs, num_vfs);
422 		ret = -EBUSY;
423 		goto exit;
424 	}
425 
426 	ret = pdrv->sriov_configure(pdev, num_vfs);
427 	if (ret < 0)
428 		goto exit;
429 
430 	if (ret != num_vfs)
431 		pci_warn(pdev, "%d VFs requested; only %d enabled\n",
432 			 num_vfs, ret);
433 
434 exit:
435 	device_unlock(&pdev->dev);
436 
437 	if (ret < 0)
438 		return ret;
439 
440 	return count;
441 }
442 
443 static ssize_t sriov_offset_show(struct device *dev,
444 				 struct device_attribute *attr,
445 				 char *buf)
446 {
447 	struct pci_dev *pdev = to_pci_dev(dev);
448 
449 	return sysfs_emit(buf, "%u\n", pdev->sriov->offset);
450 }
451 
452 static ssize_t sriov_stride_show(struct device *dev,
453 				 struct device_attribute *attr,
454 				 char *buf)
455 {
456 	struct pci_dev *pdev = to_pci_dev(dev);
457 
458 	return sysfs_emit(buf, "%u\n", pdev->sriov->stride);
459 }
460 
461 static ssize_t sriov_vf_device_show(struct device *dev,
462 				    struct device_attribute *attr,
463 				    char *buf)
464 {
465 	struct pci_dev *pdev = to_pci_dev(dev);
466 
467 	return sysfs_emit(buf, "%x\n", pdev->sriov->vf_device);
468 }
469 
470 static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
471 					    struct device_attribute *attr,
472 					    char *buf)
473 {
474 	struct pci_dev *pdev = to_pci_dev(dev);
475 
476 	return sysfs_emit(buf, "%u\n", pdev->sriov->drivers_autoprobe);
477 }
478 
479 static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
480 					     struct device_attribute *attr,
481 					     const char *buf, size_t count)
482 {
483 	struct pci_dev *pdev = to_pci_dev(dev);
484 	bool drivers_autoprobe;
485 
486 	if (kstrtobool(buf, &drivers_autoprobe) < 0)
487 		return -EINVAL;
488 
489 	pdev->sriov->drivers_autoprobe = drivers_autoprobe;
490 
491 	return count;
492 }
493 
494 static DEVICE_ATTR_RO(sriov_totalvfs);
495 static DEVICE_ATTR_RW(sriov_numvfs);
496 static DEVICE_ATTR_RO(sriov_offset);
497 static DEVICE_ATTR_RO(sriov_stride);
498 static DEVICE_ATTR_RO(sriov_vf_device);
499 static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
500 
501 static struct attribute *sriov_pf_dev_attrs[] = {
502 	&dev_attr_sriov_totalvfs.attr,
503 	&dev_attr_sriov_numvfs.attr,
504 	&dev_attr_sriov_offset.attr,
505 	&dev_attr_sriov_stride.attr,
506 	&dev_attr_sriov_vf_device.attr,
507 	&dev_attr_sriov_drivers_autoprobe.attr,
508 #ifdef CONFIG_PCI_MSI
509 	&dev_attr_sriov_vf_total_msix.attr,
510 #endif
511 	NULL,
512 };
513 
514 static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
515 					  struct attribute *a, int n)
516 {
517 	struct device *dev = kobj_to_dev(kobj);
518 
519 	if (!dev_is_pf(dev))
520 		return 0;
521 
522 	return a->mode;
523 }
524 
525 const struct attribute_group sriov_pf_dev_attr_group = {
526 	.attrs = sriov_pf_dev_attrs,
527 	.is_visible = sriov_pf_attrs_are_visible,
528 };
529 
530 int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
531 {
532 	return 0;
533 }
534 
535 int __weak pcibios_sriov_disable(struct pci_dev *pdev)
536 {
537 	return 0;
538 }
539 
540 static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
541 {
542 	unsigned int i;
543 	int rc;
544 
545 	if (dev->no_vf_scan)
546 		return 0;
547 
548 	for (i = 0; i < num_vfs; i++) {
549 		rc = pci_iov_add_virtfn(dev, i);
550 		if (rc)
551 			goto failed;
552 	}
553 	return 0;
554 failed:
555 	while (i--)
556 		pci_iov_remove_virtfn(dev, i);
557 
558 	return rc;
559 }
560 
561 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
562 {
563 	int rc;
564 	int i;
565 	int nres;
566 	u16 initial;
567 	struct resource *res;
568 	struct pci_dev *pdev;
569 	struct pci_sriov *iov = dev->sriov;
570 	int bars = 0;
571 	int bus;
572 
573 	if (!nr_virtfn)
574 		return 0;
575 
576 	if (iov->num_VFs)
577 		return -EINVAL;
578 
579 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
580 	if (initial > iov->total_VFs ||
581 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))
582 		return -EIO;
583 
584 	if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||
585 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
586 		return -EINVAL;
587 
588 	nres = 0;
589 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
590 		bars |= (1 << (i + PCI_IOV_RESOURCES));
591 		res = &dev->resource[i + PCI_IOV_RESOURCES];
592 		if (res->parent)
593 			nres++;
594 	}
595 	if (nres != iov->nres) {
596 		pci_err(dev, "not enough MMIO resources for SR-IOV\n");
597 		return -ENOMEM;
598 	}
599 
600 	bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
601 	if (bus > dev->bus->busn_res.end) {
602 		pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
603 			nr_virtfn, bus, &dev->bus->busn_res);
604 		return -ENOMEM;
605 	}
606 
607 	if (pci_enable_resources(dev, bars)) {
608 		pci_err(dev, "SR-IOV: IOV BARS not allocated\n");
609 		return -ENOMEM;
610 	}
611 
612 	if (iov->link != dev->devfn) {
613 		pdev = pci_get_slot(dev->bus, iov->link);
614 		if (!pdev)
615 			return -ENODEV;
616 
617 		if (!pdev->is_physfn) {
618 			pci_dev_put(pdev);
619 			return -ENOSYS;
620 		}
621 
622 		rc = sysfs_create_link(&dev->dev.kobj,
623 					&pdev->dev.kobj, "dep_link");
624 		pci_dev_put(pdev);
625 		if (rc)
626 			return rc;
627 	}
628 
629 	iov->initial_VFs = initial;
630 	if (nr_virtfn < initial)
631 		initial = nr_virtfn;
632 
633 	rc = pcibios_sriov_enable(dev, initial);
634 	if (rc) {
635 		pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);
636 		goto err_pcibios;
637 	}
638 
639 	pci_iov_set_numvfs(dev, nr_virtfn);
640 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
641 	pci_cfg_access_lock(dev);
642 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
643 	msleep(100);
644 	pci_cfg_access_unlock(dev);
645 
646 	rc = sriov_add_vfs(dev, initial);
647 	if (rc)
648 		goto err_pcibios;
649 
650 	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
651 	iov->num_VFs = nr_virtfn;
652 
653 	return 0;
654 
655 err_pcibios:
656 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
657 	pci_cfg_access_lock(dev);
658 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
659 	ssleep(1);
660 	pci_cfg_access_unlock(dev);
661 
662 	pcibios_sriov_disable(dev);
663 
664 	if (iov->link != dev->devfn)
665 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
666 
667 	pci_iov_set_numvfs(dev, 0);
668 	return rc;
669 }
670 
671 static void sriov_del_vfs(struct pci_dev *dev)
672 {
673 	struct pci_sriov *iov = dev->sriov;
674 	int i;
675 
676 	for (i = 0; i < iov->num_VFs; i++)
677 		pci_iov_remove_virtfn(dev, i);
678 }
679 
680 static void sriov_disable(struct pci_dev *dev)
681 {
682 	struct pci_sriov *iov = dev->sriov;
683 
684 	if (!iov->num_VFs)
685 		return;
686 
687 	sriov_del_vfs(dev);
688 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
689 	pci_cfg_access_lock(dev);
690 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
691 	ssleep(1);
692 	pci_cfg_access_unlock(dev);
693 
694 	pcibios_sriov_disable(dev);
695 
696 	if (iov->link != dev->devfn)
697 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
698 
699 	iov->num_VFs = 0;
700 	pci_iov_set_numvfs(dev, 0);
701 }
702 
703 static int sriov_init(struct pci_dev *dev, int pos)
704 {
705 	int i, bar64;
706 	int rc;
707 	int nres;
708 	u32 pgsz;
709 	u16 ctrl, total;
710 	struct pci_sriov *iov;
711 	struct resource *res;
712 	struct pci_dev *pdev;
713 
714 	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
715 	if (ctrl & PCI_SRIOV_CTRL_VFE) {
716 		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
717 		ssleep(1);
718 	}
719 
720 	ctrl = 0;
721 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
722 		if (pdev->is_physfn)
723 			goto found;
724 
725 	pdev = NULL;
726 	if (pci_ari_enabled(dev->bus))
727 		ctrl |= PCI_SRIOV_CTRL_ARI;
728 
729 found:
730 	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
731 
732 	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
733 	if (!total)
734 		return 0;
735 
736 	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
737 	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
738 	pgsz &= ~((1 << i) - 1);
739 	if (!pgsz)
740 		return -EIO;
741 
742 	pgsz &= ~(pgsz - 1);
743 	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
744 
745 	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
746 	if (!iov)
747 		return -ENOMEM;
748 
749 	nres = 0;
750 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
751 		res = &dev->resource[i + PCI_IOV_RESOURCES];
752 		/*
753 		 * If it is already FIXED, don't change it, something
754 		 * (perhaps EA or header fixups) wants it this way.
755 		 */
756 		if (res->flags & IORESOURCE_PCI_FIXED)
757 			bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
758 		else
759 			bar64 = __pci_read_base(dev, pci_bar_unknown, res,
760 						pos + PCI_SRIOV_BAR + i * 4);
761 		if (!res->flags)
762 			continue;
763 		if (resource_size(res) & (PAGE_SIZE - 1)) {
764 			rc = -EIO;
765 			goto failed;
766 		}
767 		iov->barsz[i] = resource_size(res);
768 		res->end = res->start + resource_size(res) * total - 1;
769 		pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
770 			 i, res, i, total);
771 		i += bar64;
772 		nres++;
773 	}
774 
775 	iov->pos = pos;
776 	iov->nres = nres;
777 	iov->ctrl = ctrl;
778 	iov->total_VFs = total;
779 	iov->driver_max_VFs = total;
780 	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
781 	iov->pgsz = pgsz;
782 	iov->self = dev;
783 	iov->drivers_autoprobe = true;
784 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
785 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
786 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
787 		iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
788 
789 	if (pdev)
790 		iov->dev = pci_dev_get(pdev);
791 	else
792 		iov->dev = dev;
793 
794 	dev->sriov = iov;
795 	dev->is_physfn = 1;
796 	rc = compute_max_vf_buses(dev);
797 	if (rc)
798 		goto fail_max_buses;
799 
800 	return 0;
801 
802 fail_max_buses:
803 	dev->sriov = NULL;
804 	dev->is_physfn = 0;
805 failed:
806 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
807 		res = &dev->resource[i + PCI_IOV_RESOURCES];
808 		res->flags = 0;
809 	}
810 
811 	kfree(iov);
812 	return rc;
813 }
814 
815 static void sriov_release(struct pci_dev *dev)
816 {
817 	BUG_ON(dev->sriov->num_VFs);
818 
819 	if (dev != dev->sriov->dev)
820 		pci_dev_put(dev->sriov->dev);
821 
822 	kfree(dev->sriov);
823 	dev->sriov = NULL;
824 }
825 
826 static void sriov_restore_state(struct pci_dev *dev)
827 {
828 	int i;
829 	u16 ctrl;
830 	struct pci_sriov *iov = dev->sriov;
831 
832 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
833 	if (ctrl & PCI_SRIOV_CTRL_VFE)
834 		return;
835 
836 	/*
837 	 * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because
838 	 * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI.
839 	 */
840 	ctrl &= ~PCI_SRIOV_CTRL_ARI;
841 	ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
842 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
843 
844 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
845 		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
846 
847 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
848 	pci_iov_set_numvfs(dev, iov->num_VFs);
849 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
850 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
851 		msleep(100);
852 }
853 
854 /**
855  * pci_iov_init - initialize the IOV capability
856  * @dev: the PCI device
857  *
858  * Returns 0 on success, or negative on failure.
859  */
860 int pci_iov_init(struct pci_dev *dev)
861 {
862 	int pos;
863 
864 	if (!pci_is_pcie(dev))
865 		return -ENODEV;
866 
867 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
868 	if (pos)
869 		return sriov_init(dev, pos);
870 
871 	return -ENODEV;
872 }
873 
874 /**
875  * pci_iov_release - release resources used by the IOV capability
876  * @dev: the PCI device
877  */
878 void pci_iov_release(struct pci_dev *dev)
879 {
880 	if (dev->is_physfn)
881 		sriov_release(dev);
882 }
883 
884 /**
885  * pci_iov_remove - clean up SR-IOV state after PF driver is detached
886  * @dev: the PCI device
887  */
888 void pci_iov_remove(struct pci_dev *dev)
889 {
890 	struct pci_sriov *iov = dev->sriov;
891 
892 	if (!dev->is_physfn)
893 		return;
894 
895 	iov->driver_max_VFs = iov->total_VFs;
896 	if (iov->num_VFs)
897 		pci_warn(dev, "driver left SR-IOV enabled after remove\n");
898 }
899 
900 /**
901  * pci_iov_update_resource - update a VF BAR
902  * @dev: the PCI device
903  * @resno: the resource number
904  *
905  * Update a VF BAR in the SR-IOV capability of a PF.
906  */
907 void pci_iov_update_resource(struct pci_dev *dev, int resno)
908 {
909 	struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;
910 	struct resource *res = dev->resource + resno;
911 	int vf_bar = resno - PCI_IOV_RESOURCES;
912 	struct pci_bus_region region;
913 	u16 cmd;
914 	u32 new;
915 	int reg;
916 
917 	/*
918 	 * The generic pci_restore_bars() path calls this for all devices,
919 	 * including VFs and non-SR-IOV devices.  If this is not a PF, we
920 	 * have nothing to do.
921 	 */
922 	if (!iov)
923 		return;
924 
925 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);
926 	if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {
927 		dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",
928 			 vf_bar, res);
929 		return;
930 	}
931 
932 	/*
933 	 * Ignore unimplemented BARs, unused resource slots for 64-bit
934 	 * BARs, and non-movable resources, e.g., those described via
935 	 * Enhanced Allocation.
936 	 */
937 	if (!res->flags)
938 		return;
939 
940 	if (res->flags & IORESOURCE_UNSET)
941 		return;
942 
943 	if (res->flags & IORESOURCE_PCI_FIXED)
944 		return;
945 
946 	pcibios_resource_to_bus(dev->bus, &region, res);
947 	new = region.start;
948 	new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
949 
950 	reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;
951 	pci_write_config_dword(dev, reg, new);
952 	if (res->flags & IORESOURCE_MEM_64) {
953 		new = region.start >> 16 >> 16;
954 		pci_write_config_dword(dev, reg + 4, new);
955 	}
956 }
957 
958 resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
959 						      int resno)
960 {
961 	return pci_iov_resource_size(dev, resno);
962 }
963 
964 /**
965  * pci_sriov_resource_alignment - get resource alignment for VF BAR
966  * @dev: the PCI device
967  * @resno: the resource number
968  *
969  * Returns the alignment of the VF BAR found in the SR-IOV capability.
970  * This is not the same as the resource size which is defined as
971  * the VF BAR size multiplied by the number of VFs.  The alignment
972  * is just the VF BAR size.
973  */
974 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
975 {
976 	return pcibios_iov_resource_alignment(dev, resno);
977 }
978 
979 /**
980  * pci_restore_iov_state - restore the state of the IOV capability
981  * @dev: the PCI device
982  */
983 void pci_restore_iov_state(struct pci_dev *dev)
984 {
985 	if (dev->is_physfn)
986 		sriov_restore_state(dev);
987 }
988 
989 /**
990  * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
991  * @dev: the PCI device
992  * @auto_probe: set VF drivers auto probe flag
993  */
994 void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
995 {
996 	if (dev->is_physfn)
997 		dev->sriov->drivers_autoprobe = auto_probe;
998 }
999 
1000 /**
1001  * pci_iov_bus_range - find bus range used by Virtual Function
1002  * @bus: the PCI bus
1003  *
1004  * Returns max number of buses (exclude current one) used by Virtual
1005  * Functions.
1006  */
1007 int pci_iov_bus_range(struct pci_bus *bus)
1008 {
1009 	int max = 0;
1010 	struct pci_dev *dev;
1011 
1012 	list_for_each_entry(dev, &bus->devices, bus_list) {
1013 		if (!dev->is_physfn)
1014 			continue;
1015 		if (dev->sriov->max_VF_buses > max)
1016 			max = dev->sriov->max_VF_buses;
1017 	}
1018 
1019 	return max ? max - bus->number : 0;
1020 }
1021 
1022 /**
1023  * pci_enable_sriov - enable the SR-IOV capability
1024  * @dev: the PCI device
1025  * @nr_virtfn: number of virtual functions to enable
1026  *
1027  * Returns 0 on success, or negative on failure.
1028  */
1029 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
1030 {
1031 	might_sleep();
1032 
1033 	if (!dev->is_physfn)
1034 		return -ENOSYS;
1035 
1036 	return sriov_enable(dev, nr_virtfn);
1037 }
1038 EXPORT_SYMBOL_GPL(pci_enable_sriov);
1039 
1040 /**
1041  * pci_disable_sriov - disable the SR-IOV capability
1042  * @dev: the PCI device
1043  */
1044 void pci_disable_sriov(struct pci_dev *dev)
1045 {
1046 	might_sleep();
1047 
1048 	if (!dev->is_physfn)
1049 		return;
1050 
1051 	sriov_disable(dev);
1052 }
1053 EXPORT_SYMBOL_GPL(pci_disable_sriov);
1054 
1055 /**
1056  * pci_num_vf - return number of VFs associated with a PF device_release_driver
1057  * @dev: the PCI device
1058  *
1059  * Returns number of VFs, or 0 if SR-IOV is not enabled.
1060  */
1061 int pci_num_vf(struct pci_dev *dev)
1062 {
1063 	if (!dev->is_physfn)
1064 		return 0;
1065 
1066 	return dev->sriov->num_VFs;
1067 }
1068 EXPORT_SYMBOL_GPL(pci_num_vf);
1069 
1070 /**
1071  * pci_vfs_assigned - returns number of VFs are assigned to a guest
1072  * @dev: the PCI device
1073  *
1074  * Returns number of VFs belonging to this device that are assigned to a guest.
1075  * If device is not a physical function returns 0.
1076  */
1077 int pci_vfs_assigned(struct pci_dev *dev)
1078 {
1079 	struct pci_dev *vfdev;
1080 	unsigned int vfs_assigned = 0;
1081 	unsigned short dev_id;
1082 
1083 	/* only search if we are a PF */
1084 	if (!dev->is_physfn)
1085 		return 0;
1086 
1087 	/*
1088 	 * determine the device ID for the VFs, the vendor ID will be the
1089 	 * same as the PF so there is no need to check for that one
1090 	 */
1091 	dev_id = dev->sriov->vf_device;
1092 
1093 	/* loop through all the VFs to see if we own any that are assigned */
1094 	vfdev = pci_get_device(dev->vendor, dev_id, NULL);
1095 	while (vfdev) {
1096 		/*
1097 		 * It is considered assigned if it is a virtual function with
1098 		 * our dev as the physical function and the assigned bit is set
1099 		 */
1100 		if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
1101 			pci_is_dev_assigned(vfdev))
1102 			vfs_assigned++;
1103 
1104 		vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
1105 	}
1106 
1107 	return vfs_assigned;
1108 }
1109 EXPORT_SYMBOL_GPL(pci_vfs_assigned);
1110 
1111 /**
1112  * pci_sriov_set_totalvfs -- reduce the TotalVFs available
1113  * @dev: the PCI PF device
1114  * @numvfs: number that should be used for TotalVFs supported
1115  *
1116  * Should be called from PF driver's probe routine with
1117  * device's mutex held.
1118  *
1119  * Returns 0 if PF is an SRIOV-capable device and
1120  * value of numvfs valid. If not a PF return -ENOSYS;
1121  * if numvfs is invalid return -EINVAL;
1122  * if VFs already enabled, return -EBUSY.
1123  */
1124 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
1125 {
1126 	if (!dev->is_physfn)
1127 		return -ENOSYS;
1128 
1129 	if (numvfs > dev->sriov->total_VFs)
1130 		return -EINVAL;
1131 
1132 	/* Shouldn't change if VFs already enabled */
1133 	if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
1134 		return -EBUSY;
1135 
1136 	dev->sriov->driver_max_VFs = numvfs;
1137 	return 0;
1138 }
1139 EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
1140 
1141 /**
1142  * pci_sriov_get_totalvfs -- get total VFs supported on this device
1143  * @dev: the PCI PF device
1144  *
1145  * For a PCIe device with SRIOV support, return the PCIe
1146  * SRIOV capability value of TotalVFs or the value of driver_max_VFs
1147  * if the driver reduced it.  Otherwise 0.
1148  */
1149 int pci_sriov_get_totalvfs(struct pci_dev *dev)
1150 {
1151 	if (!dev->is_physfn)
1152 		return 0;
1153 
1154 	return dev->sriov->driver_max_VFs;
1155 }
1156 EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
1157 
1158 /**
1159  * pci_sriov_configure_simple - helper to configure SR-IOV
1160  * @dev: the PCI device
1161  * @nr_virtfn: number of virtual functions to enable, 0 to disable
1162  *
1163  * Enable or disable SR-IOV for devices that don't require any PF setup
1164  * before enabling SR-IOV.  Return value is negative on error, or number of
1165  * VFs allocated on success.
1166  */
1167 int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
1168 {
1169 	int rc;
1170 
1171 	might_sleep();
1172 
1173 	if (!dev->is_physfn)
1174 		return -ENODEV;
1175 
1176 	if (pci_vfs_assigned(dev)) {
1177 		pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");
1178 		return -EPERM;
1179 	}
1180 
1181 	if (nr_virtfn == 0) {
1182 		sriov_disable(dev);
1183 		return 0;
1184 	}
1185 
1186 	rc = sriov_enable(dev, nr_virtfn);
1187 	if (rc < 0)
1188 		return rc;
1189 
1190 	return nr_virtfn;
1191 }
1192 EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);
1193