xref: /openbmc/linux/drivers/pci/iov.c (revision 81d67439)
1 /*
2  * drivers/pci/iov.c
3  *
4  * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
5  *
6  * PCI Express I/O Virtualization (IOV) support.
7  *   Single Root IOV 1.0
8  *   Address Translation Service 1.0
9  */
10 
11 #include <linux/pci.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/string.h>
15 #include <linux/delay.h>
16 #include <linux/pci-ats.h>
17 #include "pci.h"
18 
19 #define VIRTFN_ID_LEN	16
20 
21 static inline u8 virtfn_bus(struct pci_dev *dev, int id)
22 {
23 	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
24 				    dev->sriov->stride * id) >> 8);
25 }
26 
27 static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
28 {
29 	return (dev->devfn + dev->sriov->offset +
30 		dev->sriov->stride * id) & 0xff;
31 }
32 
33 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
34 {
35 	int rc;
36 	struct pci_bus *child;
37 
38 	if (bus->number == busnr)
39 		return bus;
40 
41 	child = pci_find_bus(pci_domain_nr(bus), busnr);
42 	if (child)
43 		return child;
44 
45 	child = pci_add_new_bus(bus, NULL, busnr);
46 	if (!child)
47 		return NULL;
48 
49 	child->subordinate = busnr;
50 	child->dev.parent = bus->bridge;
51 	rc = pci_bus_add_child(child);
52 	if (rc) {
53 		pci_remove_bus(child);
54 		return NULL;
55 	}
56 
57 	return child;
58 }
59 
60 static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
61 {
62 	struct pci_bus *child;
63 
64 	if (bus->number == busnr)
65 		return;
66 
67 	child = pci_find_bus(pci_domain_nr(bus), busnr);
68 	BUG_ON(!child);
69 
70 	if (list_empty(&child->devices))
71 		pci_remove_bus(child);
72 }
73 
74 static int virtfn_add(struct pci_dev *dev, int id, int reset)
75 {
76 	int i;
77 	int rc;
78 	u64 size;
79 	char buf[VIRTFN_ID_LEN];
80 	struct pci_dev *virtfn;
81 	struct resource *res;
82 	struct pci_sriov *iov = dev->sriov;
83 
84 	virtfn = alloc_pci_dev();
85 	if (!virtfn)
86 		return -ENOMEM;
87 
88 	mutex_lock(&iov->dev->sriov->lock);
89 	virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
90 	if (!virtfn->bus) {
91 		kfree(virtfn);
92 		mutex_unlock(&iov->dev->sriov->lock);
93 		return -ENOMEM;
94 	}
95 	virtfn->devfn = virtfn_devfn(dev, id);
96 	virtfn->vendor = dev->vendor;
97 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
98 	pci_setup_device(virtfn);
99 	virtfn->dev.parent = dev->dev.parent;
100 
101 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
102 		res = dev->resource + PCI_IOV_RESOURCES + i;
103 		if (!res->parent)
104 			continue;
105 		virtfn->resource[i].name = pci_name(virtfn);
106 		virtfn->resource[i].flags = res->flags;
107 		size = resource_size(res);
108 		do_div(size, iov->total);
109 		virtfn->resource[i].start = res->start + size * id;
110 		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
111 		rc = request_resource(res, &virtfn->resource[i]);
112 		BUG_ON(rc);
113 	}
114 
115 	if (reset)
116 		__pci_reset_function(virtfn);
117 
118 	pci_device_add(virtfn, virtfn->bus);
119 	mutex_unlock(&iov->dev->sriov->lock);
120 
121 	virtfn->physfn = pci_dev_get(dev);
122 	virtfn->is_virtfn = 1;
123 
124 	rc = pci_bus_add_device(virtfn);
125 	if (rc)
126 		goto failed1;
127 	sprintf(buf, "virtfn%u", id);
128 	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
129 	if (rc)
130 		goto failed1;
131 	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
132 	if (rc)
133 		goto failed2;
134 
135 	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
136 
137 	return 0;
138 
139 failed2:
140 	sysfs_remove_link(&dev->dev.kobj, buf);
141 failed1:
142 	pci_dev_put(dev);
143 	mutex_lock(&iov->dev->sriov->lock);
144 	pci_remove_bus_device(virtfn);
145 	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
146 	mutex_unlock(&iov->dev->sriov->lock);
147 
148 	return rc;
149 }
150 
151 static void virtfn_remove(struct pci_dev *dev, int id, int reset)
152 {
153 	char buf[VIRTFN_ID_LEN];
154 	struct pci_bus *bus;
155 	struct pci_dev *virtfn;
156 	struct pci_sriov *iov = dev->sriov;
157 
158 	bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
159 	if (!bus)
160 		return;
161 
162 	virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
163 	if (!virtfn)
164 		return;
165 
166 	pci_dev_put(virtfn);
167 
168 	if (reset) {
169 		device_release_driver(&virtfn->dev);
170 		__pci_reset_function(virtfn);
171 	}
172 
173 	sprintf(buf, "virtfn%u", id);
174 	sysfs_remove_link(&dev->dev.kobj, buf);
175 	sysfs_remove_link(&virtfn->dev.kobj, "physfn");
176 
177 	mutex_lock(&iov->dev->sriov->lock);
178 	pci_remove_bus_device(virtfn);
179 	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
180 	mutex_unlock(&iov->dev->sriov->lock);
181 
182 	pci_dev_put(dev);
183 }
184 
185 static int sriov_migration(struct pci_dev *dev)
186 {
187 	u16 status;
188 	struct pci_sriov *iov = dev->sriov;
189 
190 	if (!iov->nr_virtfn)
191 		return 0;
192 
193 	if (!(iov->cap & PCI_SRIOV_CAP_VFM))
194 		return 0;
195 
196 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status);
197 	if (!(status & PCI_SRIOV_STATUS_VFM))
198 		return 0;
199 
200 	schedule_work(&iov->mtask);
201 
202 	return 1;
203 }
204 
205 static void sriov_migration_task(struct work_struct *work)
206 {
207 	int i;
208 	u8 state;
209 	u16 status;
210 	struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask);
211 
212 	for (i = iov->initial; i < iov->nr_virtfn; i++) {
213 		state = readb(iov->mstate + i);
214 		if (state == PCI_SRIOV_VFM_MI) {
215 			writeb(PCI_SRIOV_VFM_AV, iov->mstate + i);
216 			state = readb(iov->mstate + i);
217 			if (state == PCI_SRIOV_VFM_AV)
218 				virtfn_add(iov->self, i, 1);
219 		} else if (state == PCI_SRIOV_VFM_MO) {
220 			virtfn_remove(iov->self, i, 1);
221 			writeb(PCI_SRIOV_VFM_UA, iov->mstate + i);
222 			state = readb(iov->mstate + i);
223 			if (state == PCI_SRIOV_VFM_AV)
224 				virtfn_add(iov->self, i, 0);
225 		}
226 	}
227 
228 	pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status);
229 	status &= ~PCI_SRIOV_STATUS_VFM;
230 	pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status);
231 }
232 
233 static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn)
234 {
235 	int bir;
236 	u32 table;
237 	resource_size_t pa;
238 	struct pci_sriov *iov = dev->sriov;
239 
240 	if (nr_virtfn <= iov->initial)
241 		return 0;
242 
243 	pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table);
244 	bir = PCI_SRIOV_VFM_BIR(table);
245 	if (bir > PCI_STD_RESOURCE_END)
246 		return -EIO;
247 
248 	table = PCI_SRIOV_VFM_OFFSET(table);
249 	if (table + nr_virtfn > pci_resource_len(dev, bir))
250 		return -EIO;
251 
252 	pa = pci_resource_start(dev, bir) + table;
253 	iov->mstate = ioremap(pa, nr_virtfn);
254 	if (!iov->mstate)
255 		return -ENOMEM;
256 
257 	INIT_WORK(&iov->mtask, sriov_migration_task);
258 
259 	iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR;
260 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
261 
262 	return 0;
263 }
264 
265 static void sriov_disable_migration(struct pci_dev *dev)
266 {
267 	struct pci_sriov *iov = dev->sriov;
268 
269 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR);
270 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
271 
272 	cancel_work_sync(&iov->mtask);
273 	iounmap(iov->mstate);
274 }
275 
276 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
277 {
278 	int rc;
279 	int i, j;
280 	int nres;
281 	u16 offset, stride, initial;
282 	struct resource *res;
283 	struct pci_dev *pdev;
284 	struct pci_sriov *iov = dev->sriov;
285 
286 	if (!nr_virtfn)
287 		return 0;
288 
289 	if (iov->nr_virtfn)
290 		return -EINVAL;
291 
292 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
293 	if (initial > iov->total ||
294 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
295 		return -EIO;
296 
297 	if (nr_virtfn < 0 || nr_virtfn > iov->total ||
298 	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
299 		return -EINVAL;
300 
301 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
302 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
303 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
304 	if (!offset || (nr_virtfn > 1 && !stride))
305 		return -EIO;
306 
307 	nres = 0;
308 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
309 		res = dev->resource + PCI_IOV_RESOURCES + i;
310 		if (res->parent)
311 			nres++;
312 	}
313 	if (nres != iov->nres) {
314 		dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
315 		return -ENOMEM;
316 	}
317 
318 	iov->offset = offset;
319 	iov->stride = stride;
320 
321 	if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
322 		dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
323 		return -ENOMEM;
324 	}
325 
326 	if (iov->link != dev->devfn) {
327 		pdev = pci_get_slot(dev->bus, iov->link);
328 		if (!pdev)
329 			return -ENODEV;
330 
331 		pci_dev_put(pdev);
332 
333 		if (!pdev->is_physfn)
334 			return -ENODEV;
335 
336 		rc = sysfs_create_link(&dev->dev.kobj,
337 					&pdev->dev.kobj, "dep_link");
338 		if (rc)
339 			return rc;
340 	}
341 
342 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
343 	pci_block_user_cfg_access(dev);
344 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
345 	msleep(100);
346 	pci_unblock_user_cfg_access(dev);
347 
348 	iov->initial = initial;
349 	if (nr_virtfn < initial)
350 		initial = nr_virtfn;
351 
352 	for (i = 0; i < initial; i++) {
353 		rc = virtfn_add(dev, i, 0);
354 		if (rc)
355 			goto failed;
356 	}
357 
358 	if (iov->cap & PCI_SRIOV_CAP_VFM) {
359 		rc = sriov_enable_migration(dev, nr_virtfn);
360 		if (rc)
361 			goto failed;
362 	}
363 
364 	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
365 	iov->nr_virtfn = nr_virtfn;
366 
367 	return 0;
368 
369 failed:
370 	for (j = 0; j < i; j++)
371 		virtfn_remove(dev, j, 0);
372 
373 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
374 	pci_block_user_cfg_access(dev);
375 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
376 	ssleep(1);
377 	pci_unblock_user_cfg_access(dev);
378 
379 	if (iov->link != dev->devfn)
380 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
381 
382 	return rc;
383 }
384 
385 static void sriov_disable(struct pci_dev *dev)
386 {
387 	int i;
388 	struct pci_sriov *iov = dev->sriov;
389 
390 	if (!iov->nr_virtfn)
391 		return;
392 
393 	if (iov->cap & PCI_SRIOV_CAP_VFM)
394 		sriov_disable_migration(dev);
395 
396 	for (i = 0; i < iov->nr_virtfn; i++)
397 		virtfn_remove(dev, i, 0);
398 
399 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
400 	pci_block_user_cfg_access(dev);
401 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
402 	ssleep(1);
403 	pci_unblock_user_cfg_access(dev);
404 
405 	if (iov->link != dev->devfn)
406 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
407 
408 	iov->nr_virtfn = 0;
409 }
410 
411 static int sriov_init(struct pci_dev *dev, int pos)
412 {
413 	int i;
414 	int rc;
415 	int nres;
416 	u32 pgsz;
417 	u16 ctrl, total, offset, stride;
418 	struct pci_sriov *iov;
419 	struct resource *res;
420 	struct pci_dev *pdev;
421 
422 	if (dev->pcie_type != PCI_EXP_TYPE_RC_END &&
423 	    dev->pcie_type != PCI_EXP_TYPE_ENDPOINT)
424 		return -ENODEV;
425 
426 	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
427 	if (ctrl & PCI_SRIOV_CTRL_VFE) {
428 		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
429 		ssleep(1);
430 	}
431 
432 	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
433 	if (!total)
434 		return 0;
435 
436 	ctrl = 0;
437 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
438 		if (pdev->is_physfn)
439 			goto found;
440 
441 	pdev = NULL;
442 	if (pci_ari_enabled(dev->bus))
443 		ctrl |= PCI_SRIOV_CTRL_ARI;
444 
445 found:
446 	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
447 	pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
448 	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
449 	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
450 	if (!offset || (total > 1 && !stride))
451 		return -EIO;
452 
453 	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
454 	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
455 	pgsz &= ~((1 << i) - 1);
456 	if (!pgsz)
457 		return -EIO;
458 
459 	pgsz &= ~(pgsz - 1);
460 	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
461 
462 	nres = 0;
463 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
464 		res = dev->resource + PCI_IOV_RESOURCES + i;
465 		i += __pci_read_base(dev, pci_bar_unknown, res,
466 				     pos + PCI_SRIOV_BAR + i * 4);
467 		if (!res->flags)
468 			continue;
469 		if (resource_size(res) & (PAGE_SIZE - 1)) {
470 			rc = -EIO;
471 			goto failed;
472 		}
473 		res->end = res->start + resource_size(res) * total - 1;
474 		nres++;
475 	}
476 
477 	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
478 	if (!iov) {
479 		rc = -ENOMEM;
480 		goto failed;
481 	}
482 
483 	iov->pos = pos;
484 	iov->nres = nres;
485 	iov->ctrl = ctrl;
486 	iov->total = total;
487 	iov->offset = offset;
488 	iov->stride = stride;
489 	iov->pgsz = pgsz;
490 	iov->self = dev;
491 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
492 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
493 	if (dev->pcie_type == PCI_EXP_TYPE_RC_END)
494 		iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
495 
496 	if (pdev)
497 		iov->dev = pci_dev_get(pdev);
498 	else
499 		iov->dev = dev;
500 
501 	mutex_init(&iov->lock);
502 
503 	dev->sriov = iov;
504 	dev->is_physfn = 1;
505 
506 	return 0;
507 
508 failed:
509 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
510 		res = dev->resource + PCI_IOV_RESOURCES + i;
511 		res->flags = 0;
512 	}
513 
514 	return rc;
515 }
516 
517 static void sriov_release(struct pci_dev *dev)
518 {
519 	BUG_ON(dev->sriov->nr_virtfn);
520 
521 	if (dev != dev->sriov->dev)
522 		pci_dev_put(dev->sriov->dev);
523 
524 	mutex_destroy(&dev->sriov->lock);
525 
526 	kfree(dev->sriov);
527 	dev->sriov = NULL;
528 }
529 
530 static void sriov_restore_state(struct pci_dev *dev)
531 {
532 	int i;
533 	u16 ctrl;
534 	struct pci_sriov *iov = dev->sriov;
535 
536 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
537 	if (ctrl & PCI_SRIOV_CTRL_VFE)
538 		return;
539 
540 	for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
541 		pci_update_resource(dev, i);
542 
543 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
544 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
545 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
546 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
547 		msleep(100);
548 }
549 
550 /**
551  * pci_iov_init - initialize the IOV capability
552  * @dev: the PCI device
553  *
554  * Returns 0 on success, or negative on failure.
555  */
556 int pci_iov_init(struct pci_dev *dev)
557 {
558 	int pos;
559 
560 	if (!pci_is_pcie(dev))
561 		return -ENODEV;
562 
563 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
564 	if (pos)
565 		return sriov_init(dev, pos);
566 
567 	return -ENODEV;
568 }
569 
570 /**
571  * pci_iov_release - release resources used by the IOV capability
572  * @dev: the PCI device
573  */
574 void pci_iov_release(struct pci_dev *dev)
575 {
576 	if (dev->is_physfn)
577 		sriov_release(dev);
578 }
579 
580 /**
581  * pci_iov_resource_bar - get position of the SR-IOV BAR
582  * @dev: the PCI device
583  * @resno: the resource number
584  * @type: the BAR type to be filled in
585  *
586  * Returns position of the BAR encapsulated in the SR-IOV capability.
587  */
588 int pci_iov_resource_bar(struct pci_dev *dev, int resno,
589 			 enum pci_bar_type *type)
590 {
591 	if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
592 		return 0;
593 
594 	BUG_ON(!dev->is_physfn);
595 
596 	*type = pci_bar_unknown;
597 
598 	return dev->sriov->pos + PCI_SRIOV_BAR +
599 		4 * (resno - PCI_IOV_RESOURCES);
600 }
601 
602 /**
603  * pci_sriov_resource_alignment - get resource alignment for VF BAR
604  * @dev: the PCI device
605  * @resno: the resource number
606  *
607  * Returns the alignment of the VF BAR found in the SR-IOV capability.
608  * This is not the same as the resource size which is defined as
609  * the VF BAR size multiplied by the number of VFs.  The alignment
610  * is just the VF BAR size.
611  */
612 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
613 {
614 	struct resource tmp;
615 	enum pci_bar_type type;
616 	int reg = pci_iov_resource_bar(dev, resno, &type);
617 
618 	if (!reg)
619 		return 0;
620 
621 	 __pci_read_base(dev, type, &tmp, reg);
622 	return resource_alignment(&tmp);
623 }
624 
625 /**
626  * pci_restore_iov_state - restore the state of the IOV capability
627  * @dev: the PCI device
628  */
629 void pci_restore_iov_state(struct pci_dev *dev)
630 {
631 	if (dev->is_physfn)
632 		sriov_restore_state(dev);
633 }
634 
635 /**
636  * pci_iov_bus_range - find bus range used by Virtual Function
637  * @bus: the PCI bus
638  *
639  * Returns max number of buses (exclude current one) used by Virtual
640  * Functions.
641  */
642 int pci_iov_bus_range(struct pci_bus *bus)
643 {
644 	int max = 0;
645 	u8 busnr;
646 	struct pci_dev *dev;
647 
648 	list_for_each_entry(dev, &bus->devices, bus_list) {
649 		if (!dev->is_physfn)
650 			continue;
651 		busnr = virtfn_bus(dev, dev->sriov->total - 1);
652 		if (busnr > max)
653 			max = busnr;
654 	}
655 
656 	return max ? max - bus->number : 0;
657 }
658 
659 /**
660  * pci_enable_sriov - enable the SR-IOV capability
661  * @dev: the PCI device
662  * @nr_virtfn: number of virtual functions to enable
663  *
664  * Returns 0 on success, or negative on failure.
665  */
666 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
667 {
668 	might_sleep();
669 
670 	if (!dev->is_physfn)
671 		return -ENODEV;
672 
673 	return sriov_enable(dev, nr_virtfn);
674 }
675 EXPORT_SYMBOL_GPL(pci_enable_sriov);
676 
677 /**
678  * pci_disable_sriov - disable the SR-IOV capability
679  * @dev: the PCI device
680  */
681 void pci_disable_sriov(struct pci_dev *dev)
682 {
683 	might_sleep();
684 
685 	if (!dev->is_physfn)
686 		return;
687 
688 	sriov_disable(dev);
689 }
690 EXPORT_SYMBOL_GPL(pci_disable_sriov);
691 
692 /**
693  * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration
694  * @dev: the PCI device
695  *
696  * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not.
697  *
698  * Physical Function driver is responsible to register IRQ handler using
699  * VF Migration Interrupt Message Number, and call this function when the
700  * interrupt is generated by the hardware.
701  */
702 irqreturn_t pci_sriov_migration(struct pci_dev *dev)
703 {
704 	if (!dev->is_physfn)
705 		return IRQ_NONE;
706 
707 	return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
708 }
709 EXPORT_SYMBOL_GPL(pci_sriov_migration);
710 
711 /**
712  * pci_num_vf - return number of VFs associated with a PF device_release_driver
713  * @dev: the PCI device
714  *
715  * Returns number of VFs, or 0 if SR-IOV is not enabled.
716  */
717 int pci_num_vf(struct pci_dev *dev)
718 {
719 	if (!dev || !dev->is_physfn)
720 		return 0;
721 	else
722 		return dev->sriov->nr_virtfn;
723 }
724 EXPORT_SYMBOL_GPL(pci_num_vf);
725 
726 static int ats_alloc_one(struct pci_dev *dev, int ps)
727 {
728 	int pos;
729 	u16 cap;
730 	struct pci_ats *ats;
731 
732 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
733 	if (!pos)
734 		return -ENODEV;
735 
736 	ats = kzalloc(sizeof(*ats), GFP_KERNEL);
737 	if (!ats)
738 		return -ENOMEM;
739 
740 	ats->pos = pos;
741 	ats->stu = ps;
742 	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
743 	ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
744 					    PCI_ATS_MAX_QDEP;
745 	dev->ats = ats;
746 
747 	return 0;
748 }
749 
750 static void ats_free_one(struct pci_dev *dev)
751 {
752 	kfree(dev->ats);
753 	dev->ats = NULL;
754 }
755 
756 /**
757  * pci_enable_ats - enable the ATS capability
758  * @dev: the PCI device
759  * @ps: the IOMMU page shift
760  *
761  * Returns 0 on success, or negative on failure.
762  */
763 int pci_enable_ats(struct pci_dev *dev, int ps)
764 {
765 	int rc;
766 	u16 ctrl;
767 
768 	BUG_ON(dev->ats && dev->ats->is_enabled);
769 
770 	if (ps < PCI_ATS_MIN_STU)
771 		return -EINVAL;
772 
773 	if (dev->is_physfn || dev->is_virtfn) {
774 		struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
775 
776 		mutex_lock(&pdev->sriov->lock);
777 		if (pdev->ats)
778 			rc = pdev->ats->stu == ps ? 0 : -EINVAL;
779 		else
780 			rc = ats_alloc_one(pdev, ps);
781 
782 		if (!rc)
783 			pdev->ats->ref_cnt++;
784 		mutex_unlock(&pdev->sriov->lock);
785 		if (rc)
786 			return rc;
787 	}
788 
789 	if (!dev->is_physfn) {
790 		rc = ats_alloc_one(dev, ps);
791 		if (rc)
792 			return rc;
793 	}
794 
795 	ctrl = PCI_ATS_CTRL_ENABLE;
796 	if (!dev->is_virtfn)
797 		ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU);
798 	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
799 
800 	dev->ats->is_enabled = 1;
801 
802 	return 0;
803 }
804 
805 /**
806  * pci_disable_ats - disable the ATS capability
807  * @dev: the PCI device
808  */
809 void pci_disable_ats(struct pci_dev *dev)
810 {
811 	u16 ctrl;
812 
813 	BUG_ON(!dev->ats || !dev->ats->is_enabled);
814 
815 	pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl);
816 	ctrl &= ~PCI_ATS_CTRL_ENABLE;
817 	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
818 
819 	dev->ats->is_enabled = 0;
820 
821 	if (dev->is_physfn || dev->is_virtfn) {
822 		struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
823 
824 		mutex_lock(&pdev->sriov->lock);
825 		pdev->ats->ref_cnt--;
826 		if (!pdev->ats->ref_cnt)
827 			ats_free_one(pdev);
828 		mutex_unlock(&pdev->sriov->lock);
829 	}
830 
831 	if (!dev->is_physfn)
832 		ats_free_one(dev);
833 }
834 
835 /**
836  * pci_ats_queue_depth - query the ATS Invalidate Queue Depth
837  * @dev: the PCI device
838  *
839  * Returns the queue depth on success, or negative on failure.
840  *
841  * The ATS spec uses 0 in the Invalidate Queue Depth field to
842  * indicate that the function can accept 32 Invalidate Request.
843  * But here we use the `real' values (i.e. 1~32) for the Queue
844  * Depth; and 0 indicates the function shares the Queue with
845  * other functions (doesn't exclusively own a Queue).
846  */
847 int pci_ats_queue_depth(struct pci_dev *dev)
848 {
849 	int pos;
850 	u16 cap;
851 
852 	if (dev->is_virtfn)
853 		return 0;
854 
855 	if (dev->ats)
856 		return dev->ats->qdep;
857 
858 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
859 	if (!pos)
860 		return -ENODEV;
861 
862 	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
863 
864 	return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
865 				       PCI_ATS_MAX_QDEP;
866 }
867