xref: /openbmc/linux/drivers/pci/iov.c (revision 81de3bf3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCI Express I/O Virtualization (IOV) support
4  *   Single Root IOV 1.0
5  *   Address Translation Service 1.0
6  *
7  * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
8  */
9 
10 #include <linux/pci.h>
11 #include <linux/slab.h>
12 #include <linux/export.h>
13 #include <linux/string.h>
14 #include <linux/delay.h>
15 #include "pci.h"
16 
17 #define VIRTFN_ID_LEN	16
18 
19 int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
20 {
21 	if (!dev->is_physfn)
22 		return -EINVAL;
23 	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
24 				    dev->sriov->stride * vf_id) >> 8);
25 }
26 
27 int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
28 {
29 	if (!dev->is_physfn)
30 		return -EINVAL;
31 	return (dev->devfn + dev->sriov->offset +
32 		dev->sriov->stride * vf_id) & 0xff;
33 }
34 
35 /*
36  * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
37  * change when NumVFs changes.
38  *
39  * Update iov->offset and iov->stride when NumVFs is written.
40  */
41 static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
42 {
43 	struct pci_sriov *iov = dev->sriov;
44 
45 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
46 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
47 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
48 }
49 
50 /*
51  * The PF consumes one bus number.  NumVFs, First VF Offset, and VF Stride
52  * determine how many additional bus numbers will be consumed by VFs.
53  *
54  * Iterate over all valid NumVFs, validate offset and stride, and calculate
55  * the maximum number of bus numbers that could ever be required.
56  */
57 static int compute_max_vf_buses(struct pci_dev *dev)
58 {
59 	struct pci_sriov *iov = dev->sriov;
60 	int nr_virtfn, busnr, rc = 0;
61 
62 	for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
63 		pci_iov_set_numvfs(dev, nr_virtfn);
64 		if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
65 			rc = -EIO;
66 			goto out;
67 		}
68 
69 		busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
70 		if (busnr > iov->max_VF_buses)
71 			iov->max_VF_buses = busnr;
72 	}
73 
74 out:
75 	pci_iov_set_numvfs(dev, 0);
76 	return rc;
77 }
78 
79 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
80 {
81 	struct pci_bus *child;
82 
83 	if (bus->number == busnr)
84 		return bus;
85 
86 	child = pci_find_bus(pci_domain_nr(bus), busnr);
87 	if (child)
88 		return child;
89 
90 	child = pci_add_new_bus(bus, NULL, busnr);
91 	if (!child)
92 		return NULL;
93 
94 	pci_bus_insert_busn_res(child, busnr, busnr);
95 
96 	return child;
97 }
98 
99 static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
100 {
101 	if (physbus != virtbus && list_empty(&virtbus->devices))
102 		pci_remove_bus(virtbus);
103 }
104 
105 resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
106 {
107 	if (!dev->is_physfn)
108 		return 0;
109 
110 	return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
111 }
112 
113 static void pci_read_vf_config_common(struct pci_dev *virtfn)
114 {
115 	struct pci_dev *physfn = virtfn->physfn;
116 
117 	/*
118 	 * Some config registers are the same across all associated VFs.
119 	 * Read them once from VF0 so we can skip reading them from the
120 	 * other VFs.
121 	 *
122 	 * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
123 	 * have the same Revision ID and Subsystem ID, but we assume they
124 	 * do.
125 	 */
126 	pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
127 			      &physfn->sriov->class);
128 	pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
129 			     &physfn->sriov->hdr_type);
130 	pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
131 			     &physfn->sriov->subsystem_vendor);
132 	pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
133 			     &physfn->sriov->subsystem_device);
134 }
135 
136 int pci_iov_add_virtfn(struct pci_dev *dev, int id)
137 {
138 	int i;
139 	int rc = -ENOMEM;
140 	u64 size;
141 	char buf[VIRTFN_ID_LEN];
142 	struct pci_dev *virtfn;
143 	struct resource *res;
144 	struct pci_sriov *iov = dev->sriov;
145 	struct pci_bus *bus;
146 
147 	bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
148 	if (!bus)
149 		goto failed;
150 
151 	virtfn = pci_alloc_dev(bus);
152 	if (!virtfn)
153 		goto failed0;
154 
155 	virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
156 	virtfn->vendor = dev->vendor;
157 	virtfn->device = iov->vf_device;
158 	virtfn->is_virtfn = 1;
159 	virtfn->physfn = pci_dev_get(dev);
160 
161 	if (id == 0)
162 		pci_read_vf_config_common(virtfn);
163 
164 	rc = pci_setup_device(virtfn);
165 	if (rc)
166 		goto failed1;
167 
168 	virtfn->dev.parent = dev->dev.parent;
169 	virtfn->multifunction = 0;
170 
171 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
172 		res = &dev->resource[i + PCI_IOV_RESOURCES];
173 		if (!res->parent)
174 			continue;
175 		virtfn->resource[i].name = pci_name(virtfn);
176 		virtfn->resource[i].flags = res->flags;
177 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
178 		virtfn->resource[i].start = res->start + size * id;
179 		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
180 		rc = request_resource(res, &virtfn->resource[i]);
181 		BUG_ON(rc);
182 	}
183 
184 	pci_device_add(virtfn, virtfn->bus);
185 
186 	sprintf(buf, "virtfn%u", id);
187 	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
188 	if (rc)
189 		goto failed2;
190 	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
191 	if (rc)
192 		goto failed3;
193 
194 	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
195 
196 	pci_bus_add_device(virtfn);
197 
198 	return 0;
199 
200 failed3:
201 	sysfs_remove_link(&dev->dev.kobj, buf);
202 failed2:
203 	pci_stop_and_remove_bus_device(virtfn);
204 failed1:
205 	pci_dev_put(dev);
206 failed0:
207 	virtfn_remove_bus(dev->bus, bus);
208 failed:
209 
210 	return rc;
211 }
212 
213 void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
214 {
215 	char buf[VIRTFN_ID_LEN];
216 	struct pci_dev *virtfn;
217 
218 	virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
219 					     pci_iov_virtfn_bus(dev, id),
220 					     pci_iov_virtfn_devfn(dev, id));
221 	if (!virtfn)
222 		return;
223 
224 	sprintf(buf, "virtfn%u", id);
225 	sysfs_remove_link(&dev->dev.kobj, buf);
226 	/*
227 	 * pci_stop_dev() could have been called for this virtfn already,
228 	 * so the directory for the virtfn may have been removed before.
229 	 * Double check to avoid spurious sysfs warnings.
230 	 */
231 	if (virtfn->dev.kobj.sd)
232 		sysfs_remove_link(&virtfn->dev.kobj, "physfn");
233 
234 	pci_stop_and_remove_bus_device(virtfn);
235 	virtfn_remove_bus(dev->bus, virtfn->bus);
236 
237 	/* balance pci_get_domain_bus_and_slot() */
238 	pci_dev_put(virtfn);
239 	pci_dev_put(dev);
240 }
241 
242 static ssize_t sriov_totalvfs_show(struct device *dev,
243 				   struct device_attribute *attr,
244 				   char *buf)
245 {
246 	struct pci_dev *pdev = to_pci_dev(dev);
247 
248 	return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
249 }
250 
251 static ssize_t sriov_numvfs_show(struct device *dev,
252 				 struct device_attribute *attr,
253 				 char *buf)
254 {
255 	struct pci_dev *pdev = to_pci_dev(dev);
256 	u16 num_vfs;
257 
258 	/* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */
259 	device_lock(&pdev->dev);
260 	num_vfs = pdev->sriov->num_VFs;
261 	device_unlock(&pdev->dev);
262 
263 	return sprintf(buf, "%u\n", num_vfs);
264 }
265 
266 /*
267  * num_vfs > 0; number of VFs to enable
268  * num_vfs = 0; disable all VFs
269  *
270  * Note: SRIOV spec does not allow partial VF
271  *	 disable, so it's all or none.
272  */
273 static ssize_t sriov_numvfs_store(struct device *dev,
274 				  struct device_attribute *attr,
275 				  const char *buf, size_t count)
276 {
277 	struct pci_dev *pdev = to_pci_dev(dev);
278 	int ret;
279 	u16 num_vfs;
280 
281 	ret = kstrtou16(buf, 0, &num_vfs);
282 	if (ret < 0)
283 		return ret;
284 
285 	if (num_vfs > pci_sriov_get_totalvfs(pdev))
286 		return -ERANGE;
287 
288 	device_lock(&pdev->dev);
289 
290 	if (num_vfs == pdev->sriov->num_VFs)
291 		goto exit;
292 
293 	/* is PF driver loaded w/callback */
294 	if (!pdev->driver || !pdev->driver->sriov_configure) {
295 		pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n");
296 		ret = -ENOENT;
297 		goto exit;
298 	}
299 
300 	if (num_vfs == 0) {
301 		/* disable VFs */
302 		ret = pdev->driver->sriov_configure(pdev, 0);
303 		goto exit;
304 	}
305 
306 	/* enable VFs */
307 	if (pdev->sriov->num_VFs) {
308 		pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
309 			 pdev->sriov->num_VFs, num_vfs);
310 		ret = -EBUSY;
311 		goto exit;
312 	}
313 
314 	ret = pdev->driver->sriov_configure(pdev, num_vfs);
315 	if (ret < 0)
316 		goto exit;
317 
318 	if (ret != num_vfs)
319 		pci_warn(pdev, "%d VFs requested; only %d enabled\n",
320 			 num_vfs, ret);
321 
322 exit:
323 	device_unlock(&pdev->dev);
324 
325 	if (ret < 0)
326 		return ret;
327 
328 	return count;
329 }
330 
331 static ssize_t sriov_offset_show(struct device *dev,
332 				 struct device_attribute *attr,
333 				 char *buf)
334 {
335 	struct pci_dev *pdev = to_pci_dev(dev);
336 
337 	return sprintf(buf, "%u\n", pdev->sriov->offset);
338 }
339 
340 static ssize_t sriov_stride_show(struct device *dev,
341 				 struct device_attribute *attr,
342 				 char *buf)
343 {
344 	struct pci_dev *pdev = to_pci_dev(dev);
345 
346 	return sprintf(buf, "%u\n", pdev->sriov->stride);
347 }
348 
349 static ssize_t sriov_vf_device_show(struct device *dev,
350 				    struct device_attribute *attr,
351 				    char *buf)
352 {
353 	struct pci_dev *pdev = to_pci_dev(dev);
354 
355 	return sprintf(buf, "%x\n", pdev->sriov->vf_device);
356 }
357 
358 static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
359 					    struct device_attribute *attr,
360 					    char *buf)
361 {
362 	struct pci_dev *pdev = to_pci_dev(dev);
363 
364 	return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
365 }
366 
367 static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
368 					     struct device_attribute *attr,
369 					     const char *buf, size_t count)
370 {
371 	struct pci_dev *pdev = to_pci_dev(dev);
372 	bool drivers_autoprobe;
373 
374 	if (kstrtobool(buf, &drivers_autoprobe) < 0)
375 		return -EINVAL;
376 
377 	pdev->sriov->drivers_autoprobe = drivers_autoprobe;
378 
379 	return count;
380 }
381 
382 static DEVICE_ATTR_RO(sriov_totalvfs);
383 static DEVICE_ATTR_RW(sriov_numvfs);
384 static DEVICE_ATTR_RO(sriov_offset);
385 static DEVICE_ATTR_RO(sriov_stride);
386 static DEVICE_ATTR_RO(sriov_vf_device);
387 static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
388 
389 static struct attribute *sriov_dev_attrs[] = {
390 	&dev_attr_sriov_totalvfs.attr,
391 	&dev_attr_sriov_numvfs.attr,
392 	&dev_attr_sriov_offset.attr,
393 	&dev_attr_sriov_stride.attr,
394 	&dev_attr_sriov_vf_device.attr,
395 	&dev_attr_sriov_drivers_autoprobe.attr,
396 	NULL,
397 };
398 
399 static umode_t sriov_attrs_are_visible(struct kobject *kobj,
400 				       struct attribute *a, int n)
401 {
402 	struct device *dev = kobj_to_dev(kobj);
403 
404 	if (!dev_is_pf(dev))
405 		return 0;
406 
407 	return a->mode;
408 }
409 
410 const struct attribute_group sriov_dev_attr_group = {
411 	.attrs = sriov_dev_attrs,
412 	.is_visible = sriov_attrs_are_visible,
413 };
414 
415 int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
416 {
417 	return 0;
418 }
419 
420 int __weak pcibios_sriov_disable(struct pci_dev *pdev)
421 {
422 	return 0;
423 }
424 
425 static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
426 {
427 	unsigned int i;
428 	int rc;
429 
430 	if (dev->no_vf_scan)
431 		return 0;
432 
433 	for (i = 0; i < num_vfs; i++) {
434 		rc = pci_iov_add_virtfn(dev, i);
435 		if (rc)
436 			goto failed;
437 	}
438 	return 0;
439 failed:
440 	while (i--)
441 		pci_iov_remove_virtfn(dev, i);
442 
443 	return rc;
444 }
445 
446 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
447 {
448 	int rc;
449 	int i;
450 	int nres;
451 	u16 initial;
452 	struct resource *res;
453 	struct pci_dev *pdev;
454 	struct pci_sriov *iov = dev->sriov;
455 	int bars = 0;
456 	int bus;
457 
458 	if (!nr_virtfn)
459 		return 0;
460 
461 	if (iov->num_VFs)
462 		return -EINVAL;
463 
464 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
465 	if (initial > iov->total_VFs ||
466 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))
467 		return -EIO;
468 
469 	if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||
470 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
471 		return -EINVAL;
472 
473 	nres = 0;
474 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
475 		bars |= (1 << (i + PCI_IOV_RESOURCES));
476 		res = &dev->resource[i + PCI_IOV_RESOURCES];
477 		if (res->parent)
478 			nres++;
479 	}
480 	if (nres != iov->nres) {
481 		pci_err(dev, "not enough MMIO resources for SR-IOV\n");
482 		return -ENOMEM;
483 	}
484 
485 	bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
486 	if (bus > dev->bus->busn_res.end) {
487 		pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
488 			nr_virtfn, bus, &dev->bus->busn_res);
489 		return -ENOMEM;
490 	}
491 
492 	if (pci_enable_resources(dev, bars)) {
493 		pci_err(dev, "SR-IOV: IOV BARS not allocated\n");
494 		return -ENOMEM;
495 	}
496 
497 	if (iov->link != dev->devfn) {
498 		pdev = pci_get_slot(dev->bus, iov->link);
499 		if (!pdev)
500 			return -ENODEV;
501 
502 		if (!pdev->is_physfn) {
503 			pci_dev_put(pdev);
504 			return -ENOSYS;
505 		}
506 
507 		rc = sysfs_create_link(&dev->dev.kobj,
508 					&pdev->dev.kobj, "dep_link");
509 		pci_dev_put(pdev);
510 		if (rc)
511 			return rc;
512 	}
513 
514 	iov->initial_VFs = initial;
515 	if (nr_virtfn < initial)
516 		initial = nr_virtfn;
517 
518 	rc = pcibios_sriov_enable(dev, initial);
519 	if (rc) {
520 		pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);
521 		goto err_pcibios;
522 	}
523 
524 	pci_iov_set_numvfs(dev, nr_virtfn);
525 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
526 	pci_cfg_access_lock(dev);
527 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
528 	msleep(100);
529 	pci_cfg_access_unlock(dev);
530 
531 	rc = sriov_add_vfs(dev, initial);
532 	if (rc)
533 		goto err_pcibios;
534 
535 	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
536 	iov->num_VFs = nr_virtfn;
537 
538 	return 0;
539 
540 err_pcibios:
541 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
542 	pci_cfg_access_lock(dev);
543 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
544 	ssleep(1);
545 	pci_cfg_access_unlock(dev);
546 
547 	pcibios_sriov_disable(dev);
548 
549 	if (iov->link != dev->devfn)
550 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
551 
552 	pci_iov_set_numvfs(dev, 0);
553 	return rc;
554 }
555 
556 static void sriov_del_vfs(struct pci_dev *dev)
557 {
558 	struct pci_sriov *iov = dev->sriov;
559 	int i;
560 
561 	if (dev->no_vf_scan)
562 		return;
563 
564 	for (i = 0; i < iov->num_VFs; i++)
565 		pci_iov_remove_virtfn(dev, i);
566 }
567 
568 static void sriov_disable(struct pci_dev *dev)
569 {
570 	struct pci_sriov *iov = dev->sriov;
571 
572 	if (!iov->num_VFs)
573 		return;
574 
575 	sriov_del_vfs(dev);
576 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
577 	pci_cfg_access_lock(dev);
578 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
579 	ssleep(1);
580 	pci_cfg_access_unlock(dev);
581 
582 	pcibios_sriov_disable(dev);
583 
584 	if (iov->link != dev->devfn)
585 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
586 
587 	iov->num_VFs = 0;
588 	pci_iov_set_numvfs(dev, 0);
589 }
590 
591 static int sriov_init(struct pci_dev *dev, int pos)
592 {
593 	int i, bar64;
594 	int rc;
595 	int nres;
596 	u32 pgsz;
597 	u16 ctrl, total;
598 	struct pci_sriov *iov;
599 	struct resource *res;
600 	struct pci_dev *pdev;
601 
602 	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
603 	if (ctrl & PCI_SRIOV_CTRL_VFE) {
604 		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
605 		ssleep(1);
606 	}
607 
608 	ctrl = 0;
609 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
610 		if (pdev->is_physfn)
611 			goto found;
612 
613 	pdev = NULL;
614 	if (pci_ari_enabled(dev->bus))
615 		ctrl |= PCI_SRIOV_CTRL_ARI;
616 
617 found:
618 	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
619 
620 	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
621 	if (!total)
622 		return 0;
623 
624 	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
625 	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
626 	pgsz &= ~((1 << i) - 1);
627 	if (!pgsz)
628 		return -EIO;
629 
630 	pgsz &= ~(pgsz - 1);
631 	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
632 
633 	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
634 	if (!iov)
635 		return -ENOMEM;
636 
637 	nres = 0;
638 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
639 		res = &dev->resource[i + PCI_IOV_RESOURCES];
640 		/*
641 		 * If it is already FIXED, don't change it, something
642 		 * (perhaps EA or header fixups) wants it this way.
643 		 */
644 		if (res->flags & IORESOURCE_PCI_FIXED)
645 			bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
646 		else
647 			bar64 = __pci_read_base(dev, pci_bar_unknown, res,
648 						pos + PCI_SRIOV_BAR + i * 4);
649 		if (!res->flags)
650 			continue;
651 		if (resource_size(res) & (PAGE_SIZE - 1)) {
652 			rc = -EIO;
653 			goto failed;
654 		}
655 		iov->barsz[i] = resource_size(res);
656 		res->end = res->start + resource_size(res) * total - 1;
657 		pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
658 			 i, res, i, total);
659 		i += bar64;
660 		nres++;
661 	}
662 
663 	iov->pos = pos;
664 	iov->nres = nres;
665 	iov->ctrl = ctrl;
666 	iov->total_VFs = total;
667 	iov->driver_max_VFs = total;
668 	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
669 	iov->pgsz = pgsz;
670 	iov->self = dev;
671 	iov->drivers_autoprobe = true;
672 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
673 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
674 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
675 		iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
676 
677 	if (pdev)
678 		iov->dev = pci_dev_get(pdev);
679 	else
680 		iov->dev = dev;
681 
682 	dev->sriov = iov;
683 	dev->is_physfn = 1;
684 	rc = compute_max_vf_buses(dev);
685 	if (rc)
686 		goto fail_max_buses;
687 
688 	return 0;
689 
690 fail_max_buses:
691 	dev->sriov = NULL;
692 	dev->is_physfn = 0;
693 failed:
694 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
695 		res = &dev->resource[i + PCI_IOV_RESOURCES];
696 		res->flags = 0;
697 	}
698 
699 	kfree(iov);
700 	return rc;
701 }
702 
703 static void sriov_release(struct pci_dev *dev)
704 {
705 	BUG_ON(dev->sriov->num_VFs);
706 
707 	if (dev != dev->sriov->dev)
708 		pci_dev_put(dev->sriov->dev);
709 
710 	kfree(dev->sriov);
711 	dev->sriov = NULL;
712 }
713 
714 static void sriov_restore_state(struct pci_dev *dev)
715 {
716 	int i;
717 	u16 ctrl;
718 	struct pci_sriov *iov = dev->sriov;
719 
720 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
721 	if (ctrl & PCI_SRIOV_CTRL_VFE)
722 		return;
723 
724 	/*
725 	 * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because
726 	 * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI.
727 	 */
728 	ctrl &= ~PCI_SRIOV_CTRL_ARI;
729 	ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
730 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
731 
732 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
733 		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
734 
735 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
736 	pci_iov_set_numvfs(dev, iov->num_VFs);
737 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
738 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
739 		msleep(100);
740 }
741 
742 /**
743  * pci_iov_init - initialize the IOV capability
744  * @dev: the PCI device
745  *
746  * Returns 0 on success, or negative on failure.
747  */
748 int pci_iov_init(struct pci_dev *dev)
749 {
750 	int pos;
751 
752 	if (!pci_is_pcie(dev))
753 		return -ENODEV;
754 
755 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
756 	if (pos)
757 		return sriov_init(dev, pos);
758 
759 	return -ENODEV;
760 }
761 
762 /**
763  * pci_iov_release - release resources used by the IOV capability
764  * @dev: the PCI device
765  */
766 void pci_iov_release(struct pci_dev *dev)
767 {
768 	if (dev->is_physfn)
769 		sriov_release(dev);
770 }
771 
772 /**
773  * pci_iov_remove - clean up SR-IOV state after PF driver is detached
774  * @dev: the PCI device
775  */
776 void pci_iov_remove(struct pci_dev *dev)
777 {
778 	struct pci_sriov *iov = dev->sriov;
779 
780 	if (!dev->is_physfn)
781 		return;
782 
783 	iov->driver_max_VFs = iov->total_VFs;
784 	if (iov->num_VFs)
785 		pci_warn(dev, "driver left SR-IOV enabled after remove\n");
786 }
787 
788 /**
789  * pci_iov_update_resource - update a VF BAR
790  * @dev: the PCI device
791  * @resno: the resource number
792  *
793  * Update a VF BAR in the SR-IOV capability of a PF.
794  */
795 void pci_iov_update_resource(struct pci_dev *dev, int resno)
796 {
797 	struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;
798 	struct resource *res = dev->resource + resno;
799 	int vf_bar = resno - PCI_IOV_RESOURCES;
800 	struct pci_bus_region region;
801 	u16 cmd;
802 	u32 new;
803 	int reg;
804 
805 	/*
806 	 * The generic pci_restore_bars() path calls this for all devices,
807 	 * including VFs and non-SR-IOV devices.  If this is not a PF, we
808 	 * have nothing to do.
809 	 */
810 	if (!iov)
811 		return;
812 
813 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);
814 	if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {
815 		dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",
816 			 vf_bar, res);
817 		return;
818 	}
819 
820 	/*
821 	 * Ignore unimplemented BARs, unused resource slots for 64-bit
822 	 * BARs, and non-movable resources, e.g., those described via
823 	 * Enhanced Allocation.
824 	 */
825 	if (!res->flags)
826 		return;
827 
828 	if (res->flags & IORESOURCE_UNSET)
829 		return;
830 
831 	if (res->flags & IORESOURCE_PCI_FIXED)
832 		return;
833 
834 	pcibios_resource_to_bus(dev->bus, &region, res);
835 	new = region.start;
836 	new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
837 
838 	reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;
839 	pci_write_config_dword(dev, reg, new);
840 	if (res->flags & IORESOURCE_MEM_64) {
841 		new = region.start >> 16 >> 16;
842 		pci_write_config_dword(dev, reg + 4, new);
843 	}
844 }
845 
846 resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
847 						      int resno)
848 {
849 	return pci_iov_resource_size(dev, resno);
850 }
851 
852 /**
853  * pci_sriov_resource_alignment - get resource alignment for VF BAR
854  * @dev: the PCI device
855  * @resno: the resource number
856  *
857  * Returns the alignment of the VF BAR found in the SR-IOV capability.
858  * This is not the same as the resource size which is defined as
859  * the VF BAR size multiplied by the number of VFs.  The alignment
860  * is just the VF BAR size.
861  */
862 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
863 {
864 	return pcibios_iov_resource_alignment(dev, resno);
865 }
866 
867 /**
868  * pci_restore_iov_state - restore the state of the IOV capability
869  * @dev: the PCI device
870  */
871 void pci_restore_iov_state(struct pci_dev *dev)
872 {
873 	if (dev->is_physfn)
874 		sriov_restore_state(dev);
875 }
876 
877 /**
878  * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
879  * @dev: the PCI device
880  * @auto_probe: set VF drivers auto probe flag
881  */
882 void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
883 {
884 	if (dev->is_physfn)
885 		dev->sriov->drivers_autoprobe = auto_probe;
886 }
887 
888 /**
889  * pci_iov_bus_range - find bus range used by Virtual Function
890  * @bus: the PCI bus
891  *
892  * Returns max number of buses (exclude current one) used by Virtual
893  * Functions.
894  */
895 int pci_iov_bus_range(struct pci_bus *bus)
896 {
897 	int max = 0;
898 	struct pci_dev *dev;
899 
900 	list_for_each_entry(dev, &bus->devices, bus_list) {
901 		if (!dev->is_physfn)
902 			continue;
903 		if (dev->sriov->max_VF_buses > max)
904 			max = dev->sriov->max_VF_buses;
905 	}
906 
907 	return max ? max - bus->number : 0;
908 }
909 
910 /**
911  * pci_enable_sriov - enable the SR-IOV capability
912  * @dev: the PCI device
913  * @nr_virtfn: number of virtual functions to enable
914  *
915  * Returns 0 on success, or negative on failure.
916  */
917 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
918 {
919 	might_sleep();
920 
921 	if (!dev->is_physfn)
922 		return -ENOSYS;
923 
924 	return sriov_enable(dev, nr_virtfn);
925 }
926 EXPORT_SYMBOL_GPL(pci_enable_sriov);
927 
928 /**
929  * pci_disable_sriov - disable the SR-IOV capability
930  * @dev: the PCI device
931  */
932 void pci_disable_sriov(struct pci_dev *dev)
933 {
934 	might_sleep();
935 
936 	if (!dev->is_physfn)
937 		return;
938 
939 	sriov_disable(dev);
940 }
941 EXPORT_SYMBOL_GPL(pci_disable_sriov);
942 
943 /**
944  * pci_num_vf - return number of VFs associated with a PF device_release_driver
945  * @dev: the PCI device
946  *
947  * Returns number of VFs, or 0 if SR-IOV is not enabled.
948  */
949 int pci_num_vf(struct pci_dev *dev)
950 {
951 	if (!dev->is_physfn)
952 		return 0;
953 
954 	return dev->sriov->num_VFs;
955 }
956 EXPORT_SYMBOL_GPL(pci_num_vf);
957 
958 /**
959  * pci_vfs_assigned - returns number of VFs are assigned to a guest
960  * @dev: the PCI device
961  *
962  * Returns number of VFs belonging to this device that are assigned to a guest.
963  * If device is not a physical function returns 0.
964  */
965 int pci_vfs_assigned(struct pci_dev *dev)
966 {
967 	struct pci_dev *vfdev;
968 	unsigned int vfs_assigned = 0;
969 	unsigned short dev_id;
970 
971 	/* only search if we are a PF */
972 	if (!dev->is_physfn)
973 		return 0;
974 
975 	/*
976 	 * determine the device ID for the VFs, the vendor ID will be the
977 	 * same as the PF so there is no need to check for that one
978 	 */
979 	dev_id = dev->sriov->vf_device;
980 
981 	/* loop through all the VFs to see if we own any that are assigned */
982 	vfdev = pci_get_device(dev->vendor, dev_id, NULL);
983 	while (vfdev) {
984 		/*
985 		 * It is considered assigned if it is a virtual function with
986 		 * our dev as the physical function and the assigned bit is set
987 		 */
988 		if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
989 			pci_is_dev_assigned(vfdev))
990 			vfs_assigned++;
991 
992 		vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
993 	}
994 
995 	return vfs_assigned;
996 }
997 EXPORT_SYMBOL_GPL(pci_vfs_assigned);
998 
999 /**
1000  * pci_sriov_set_totalvfs -- reduce the TotalVFs available
1001  * @dev: the PCI PF device
1002  * @numvfs: number that should be used for TotalVFs supported
1003  *
1004  * Should be called from PF driver's probe routine with
1005  * device's mutex held.
1006  *
1007  * Returns 0 if PF is an SRIOV-capable device and
1008  * value of numvfs valid. If not a PF return -ENOSYS;
1009  * if numvfs is invalid return -EINVAL;
1010  * if VFs already enabled, return -EBUSY.
1011  */
1012 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
1013 {
1014 	if (!dev->is_physfn)
1015 		return -ENOSYS;
1016 
1017 	if (numvfs > dev->sriov->total_VFs)
1018 		return -EINVAL;
1019 
1020 	/* Shouldn't change if VFs already enabled */
1021 	if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
1022 		return -EBUSY;
1023 
1024 	dev->sriov->driver_max_VFs = numvfs;
1025 	return 0;
1026 }
1027 EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
1028 
1029 /**
1030  * pci_sriov_get_totalvfs -- get total VFs supported on this device
1031  * @dev: the PCI PF device
1032  *
1033  * For a PCIe device with SRIOV support, return the PCIe
1034  * SRIOV capability value of TotalVFs or the value of driver_max_VFs
1035  * if the driver reduced it.  Otherwise 0.
1036  */
1037 int pci_sriov_get_totalvfs(struct pci_dev *dev)
1038 {
1039 	if (!dev->is_physfn)
1040 		return 0;
1041 
1042 	return dev->sriov->driver_max_VFs;
1043 }
1044 EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
1045 
1046 /**
1047  * pci_sriov_configure_simple - helper to configure SR-IOV
1048  * @dev: the PCI device
1049  * @nr_virtfn: number of virtual functions to enable, 0 to disable
1050  *
1051  * Enable or disable SR-IOV for devices that don't require any PF setup
1052  * before enabling SR-IOV.  Return value is negative on error, or number of
1053  * VFs allocated on success.
1054  */
1055 int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
1056 {
1057 	int rc;
1058 
1059 	might_sleep();
1060 
1061 	if (!dev->is_physfn)
1062 		return -ENODEV;
1063 
1064 	if (pci_vfs_assigned(dev)) {
1065 		pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");
1066 		return -EPERM;
1067 	}
1068 
1069 	if (nr_virtfn == 0) {
1070 		sriov_disable(dev);
1071 		return 0;
1072 	}
1073 
1074 	rc = sriov_enable(dev, nr_virtfn);
1075 	if (rc < 0)
1076 		return rc;
1077 
1078 	return nr_virtfn;
1079 }
1080 EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);
1081