xref: /openbmc/qemu/hw/pci/msix.c (revision b8f4fee6b495b8ee77148f690e7c599d640a8821)
1 /*
2  * MSI-X device support
3  *
4  * This module includes support for MSI-X in pci devices.
5  *
6  * Author: Michael S. Tsirkin <mst@redhat.com>
7  *
8  *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  *
13  * Contributions after 2012-01-13 are licensed under the terms of the
14  * GNU GPL, version 2 or (at your option) any later version.
15  */
16 
17 #include "qemu/osdep.h"
18 #include "qemu/log.h"
19 #include "hw/pci/msi.h"
20 #include "hw/pci/msix.h"
21 #include "hw/pci/pci.h"
22 #include "hw/xen/xen.h"
23 #include "system/xen.h"
24 #include "migration/qemu-file-types.h"
25 #include "migration/vmstate.h"
26 #include "qemu/range.h"
27 #include "qapi/error.h"
28 #include "trace.h"
29 
30 #include "hw/i386/kvm/xen_evtchn.h"
31 
32 /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
33 #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
34 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
35 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
36 
msix_prepare_message(PCIDevice * dev,unsigned vector)37 static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
38 {
39     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
40     MSIMessage msg;
41 
42     msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
43     msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
44     return msg;
45 }
46 
msix_get_message(PCIDevice * dev,unsigned vector)47 MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
48 {
49     return dev->msix_prepare_message(dev, vector);
50 }
51 
52 /*
53  * Special API for POWER to configure the vectors through
54  * a side channel. Should never be used by devices.
55  */
msix_set_message(PCIDevice * dev,int vector,struct MSIMessage msg)56 void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
57 {
58     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
59 
60     pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
61     pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
62     table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
63 }
64 
msix_pending_mask(int vector)65 static uint8_t msix_pending_mask(int vector)
66 {
67     return 1 << (vector % 8);
68 }
69 
msix_pending_byte(PCIDevice * dev,int vector)70 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
71 {
72     return dev->msix_pba + vector / 8;
73 }
74 
msix_is_pending(PCIDevice * dev,unsigned int vector)75 int msix_is_pending(PCIDevice *dev, unsigned int vector)
76 {
77     return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
78 }
79 
msix_set_pending(PCIDevice * dev,unsigned int vector)80 void msix_set_pending(PCIDevice *dev, unsigned int vector)
81 {
82     *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
83 }
84 
msix_clr_pending(PCIDevice * dev,int vector)85 void msix_clr_pending(PCIDevice *dev, int vector)
86 {
87     *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
88 }
89 
msix_vector_masked(PCIDevice * dev,unsigned int vector,bool fmask)90 static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
91 {
92     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE;
93     uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA];
94     /* MSIs on Xen can be remapped into pirqs. In those cases, masking
95      * and unmasking go through the PV evtchn path. */
96     if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) {
97         return false;
98     }
99     return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] &
100         PCI_MSIX_ENTRY_CTRL_MASKBIT;
101 }
102 
msix_is_masked(PCIDevice * dev,unsigned int vector)103 bool msix_is_masked(PCIDevice *dev, unsigned int vector)
104 {
105     return msix_vector_masked(dev, vector, dev->msix_function_masked);
106 }
107 
msix_fire_vector_notifier(PCIDevice * dev,unsigned int vector,bool is_masked)108 static void msix_fire_vector_notifier(PCIDevice *dev,
109                                       unsigned int vector, bool is_masked)
110 {
111     MSIMessage msg;
112     int ret;
113 
114     if (!dev->msix_vector_use_notifier) {
115         return;
116     }
117     if (is_masked) {
118         dev->msix_vector_release_notifier(dev, vector);
119     } else {
120         msg = msix_get_message(dev, vector);
121         ret = dev->msix_vector_use_notifier(dev, vector, msg);
122         assert(ret >= 0);
123     }
124 }
125 
msix_handle_mask_update(PCIDevice * dev,int vector,bool was_masked)126 static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
127 {
128     bool is_masked = msix_is_masked(dev, vector);
129 
130     if (xen_mode == XEN_EMULATE) {
131         MSIMessage msg = msix_prepare_message(dev, vector);
132 
133         xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data,
134                              is_masked);
135     }
136 
137     if (is_masked == was_masked) {
138         return;
139     }
140 
141     msix_fire_vector_notifier(dev, vector, is_masked);
142 
143     if (!is_masked && msix_is_pending(dev, vector)) {
144         msix_clr_pending(dev, vector);
145         msix_notify(dev, vector);
146     }
147 }
148 
msix_set_mask(PCIDevice * dev,int vector,bool mask)149 void msix_set_mask(PCIDevice *dev, int vector, bool mask)
150 {
151     unsigned offset;
152     bool was_masked;
153 
154     assert(vector < dev->msix_entries_nr);
155 
156     offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
157 
158     was_masked = msix_is_masked(dev, vector);
159 
160     if (mask) {
161         dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
162     } else {
163         dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
164     }
165 
166     msix_handle_mask_update(dev, vector, was_masked);
167 }
168 
msix_masked(PCIDevice * dev)169 static bool msix_masked(PCIDevice *dev)
170 {
171     return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
172 }
173 
msix_update_function_masked(PCIDevice * dev)174 static void msix_update_function_masked(PCIDevice *dev)
175 {
176     dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev);
177 }
178 
179 /* Handle MSI-X capability config write. */
msix_write_config(PCIDevice * dev,uint32_t addr,uint32_t val,int len)180 void msix_write_config(PCIDevice *dev, uint32_t addr,
181                        uint32_t val, int len)
182 {
183     unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
184     int vector;
185     bool was_masked;
186 
187     if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
188         return;
189     }
190 
191     trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev));
192 
193     was_masked = dev->msix_function_masked;
194     msix_update_function_masked(dev);
195 
196     if (!msix_enabled(dev)) {
197         return;
198     }
199 
200     pci_device_deassert_intx(dev);
201 
202     if (dev->msix_function_masked == was_masked) {
203         return;
204     }
205 
206     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
207         msix_handle_mask_update(dev, vector,
208                                 msix_vector_masked(dev, vector, was_masked));
209     }
210 }
211 
msix_table_mmio_read(void * opaque,hwaddr addr,unsigned size)212 static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
213                                      unsigned size)
214 {
215     PCIDevice *dev = opaque;
216 
217     assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
218     return pci_get_long(dev->msix_table + addr);
219 }
220 
msix_table_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)221 static void msix_table_mmio_write(void *opaque, hwaddr addr,
222                                   uint64_t val, unsigned size)
223 {
224     PCIDevice *dev = opaque;
225     int vector = addr / PCI_MSIX_ENTRY_SIZE;
226     bool was_masked;
227 
228     assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
229 
230     was_masked = msix_is_masked(dev, vector);
231     pci_set_long(dev->msix_table + addr, val);
232     msix_handle_mask_update(dev, vector, was_masked);
233 }
234 
235 static const MemoryRegionOps msix_table_mmio_ops = {
236     .read = msix_table_mmio_read,
237     .write = msix_table_mmio_write,
238     .endianness = DEVICE_LITTLE_ENDIAN,
239     .valid = {
240         .min_access_size = 4,
241         .max_access_size = 8,
242     },
243     .impl = {
244         .max_access_size = 4,
245     },
246 };
247 
msix_pba_mmio_read(void * opaque,hwaddr addr,unsigned size)248 static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
249                                    unsigned size)
250 {
251     PCIDevice *dev = opaque;
252     if (dev->msix_vector_poll_notifier) {
253         unsigned vector_start = addr * 8;
254         unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr);
255         dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
256     }
257 
258     return pci_get_long(dev->msix_pba + addr);
259 }
260 
msix_pba_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)261 static void msix_pba_mmio_write(void *opaque, hwaddr addr,
262                                 uint64_t val, unsigned size)
263 {
264     PCIDevice *dev = opaque;
265 
266     qemu_log_mask(LOG_GUEST_ERROR,
267                   "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X "
268                   "PBA at 0x%" FMT_PCIBUS ", ignoring.\n",
269                   pci_root_bus_path(dev), pci_dev_bus_num(dev),
270                   PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
271                   addr);
272 }
273 
274 static const MemoryRegionOps msix_pba_mmio_ops = {
275     .read = msix_pba_mmio_read,
276     .write = msix_pba_mmio_write,
277     .endianness = DEVICE_LITTLE_ENDIAN,
278     .valid = {
279         .min_access_size = 4,
280         .max_access_size = 8,
281     },
282     .impl = {
283         .max_access_size = 4,
284     },
285 };
286 
msix_mask_all(struct PCIDevice * dev,unsigned nentries)287 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
288 {
289     int vector;
290 
291     for (vector = 0; vector < nentries; ++vector) {
292         unsigned offset =
293             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
294         bool was_masked = msix_is_masked(dev, vector);
295 
296         dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
297         msix_handle_mask_update(dev, vector, was_masked);
298     }
299 }
300 
301 /*
302  * Make PCI device @dev MSI-X capable
303  * @nentries is the max number of MSI-X vectors that the device support.
304  * @table_bar is the MemoryRegion that MSI-X table structure resides.
305  * @table_bar_nr is number of base address register corresponding to @table_bar.
306  * @table_offset indicates the offset that the MSI-X table structure starts with
307  * in @table_bar.
308  * @pba_bar is the MemoryRegion that the Pending Bit Array structure resides.
309  * @pba_bar_nr is number of base address register corresponding to @pba_bar.
310  * @pba_offset indicates the offset that the Pending Bit Array structure
311  * starts with in @pba_bar.
312  * Non-zero @cap_pos puts capability MSI-X at that offset in PCI config space.
313  * @errp is for returning errors.
314  *
315  * Return 0 on success; set @errp and return -errno on error:
316  * -ENOTSUP means lacking msi support for a msi-capable platform.
317  * -EINVAL means capability overlap, happens when @cap_pos is non-zero,
318  * also means a programming error, except device assignment, which can check
319  * if a real HW is broken.
320  */
msix_init(struct PCIDevice * dev,uint32_t nentries,MemoryRegion * table_bar,uint8_t table_bar_nr,unsigned table_offset,MemoryRegion * pba_bar,uint8_t pba_bar_nr,unsigned pba_offset,uint8_t cap_pos,Error ** errp)321 int msix_init(struct PCIDevice *dev, uint32_t nentries,
322               MemoryRegion *table_bar, uint8_t table_bar_nr,
323               unsigned table_offset, MemoryRegion *pba_bar,
324               uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
325               Error **errp)
326 {
327     int cap;
328     unsigned table_size, pba_size;
329     uint8_t *config;
330 
331     /* Nothing to do if MSI is not supported by interrupt controller */
332     if (!msi_nonbroken) {
333         error_setg(errp, "MSI-X is not supported by interrupt controller");
334         return -ENOTSUP;
335     }
336 
337     if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
338         error_setg(errp, "The number of MSI-X vectors is invalid");
339         return -EINVAL;
340     }
341 
342     table_size = nentries * PCI_MSIX_ENTRY_SIZE;
343     pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
344 
345     /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
346     if ((table_bar_nr == pba_bar_nr &&
347          ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
348         table_offset + table_size > memory_region_size(table_bar) ||
349         pba_offset + pba_size > memory_region_size(pba_bar) ||
350         (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
351         error_setg(errp, "table & pba overlap, or they don't fit in BARs,"
352                    " or don't align");
353         return -EINVAL;
354     }
355 
356     cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
357                               cap_pos, MSIX_CAP_LENGTH, errp);
358     if (cap < 0) {
359         return cap;
360     }
361 
362     dev->msix_cap = cap;
363     dev->cap_present |= QEMU_PCI_CAP_MSIX;
364     config = dev->config + cap;
365 
366     pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
367     dev->msix_entries_nr = nentries;
368     dev->msix_function_masked = true;
369 
370     pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
371     pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
372 
373     /* Make flags bit writable. */
374     dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
375                                              MSIX_MASKALL_MASK;
376 
377     dev->msix_table = g_malloc0(table_size);
378     dev->msix_pba = g_malloc0(pba_size);
379     dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
380 
381     msix_mask_all(dev, nentries);
382 
383     memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev,
384                           "msix-table", table_size);
385     memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
386     memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev,
387                           "msix-pba", pba_size);
388     memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
389 
390     dev->msix_prepare_message = msix_prepare_message;
391 
392     return 0;
393 }
394 
msix_init_exclusive_bar(PCIDevice * dev,uint32_t nentries,uint8_t bar_nr,Error ** errp)395 int msix_init_exclusive_bar(PCIDevice *dev, uint32_t nentries,
396                             uint8_t bar_nr, Error **errp)
397 {
398     int ret;
399     char *name;
400     uint32_t bar_size = 4096;
401     uint32_t bar_pba_offset = bar_size / 2;
402     uint32_t bar_pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
403 
404     /* Sanity-check nentries before we use it in BAR size calculations */
405     if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
406         error_setg(errp, "The number of MSI-X vectors is invalid");
407         return -EINVAL;
408     }
409 
410     /*
411      * Migration compatibility dictates that this remains a 4k
412      * BAR with the vector table in the lower half and PBA in
413      * the upper half for nentries which is lower or equal to 128.
414      * No need to care about using more than 65 entries for legacy
415      * machine types who has at most 64 queues.
416      */
417     if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) {
418         bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE;
419     }
420 
421     if (bar_pba_offset + bar_pba_size > 4096) {
422         bar_size = bar_pba_offset + bar_pba_size;
423     }
424 
425     bar_size = pow2ceil(bar_size);
426 
427     name = g_strdup_printf("%s-msix", dev->name);
428     memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size);
429     g_free(name);
430 
431     ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
432                     0, &dev->msix_exclusive_bar,
433                     bar_nr, bar_pba_offset,
434                     0, errp);
435     if (ret) {
436         return ret;
437     }
438 
439     pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
440                      &dev->msix_exclusive_bar);
441 
442     return 0;
443 }
444 
msix_free_irq_entries(PCIDevice * dev)445 static void msix_free_irq_entries(PCIDevice *dev)
446 {
447     int vector;
448 
449     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
450         dev->msix_entry_used[vector] = 0;
451         msix_clr_pending(dev, vector);
452     }
453 }
454 
msix_clear_all_vectors(PCIDevice * dev)455 static void msix_clear_all_vectors(PCIDevice *dev)
456 {
457     int vector;
458 
459     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
460         msix_clr_pending(dev, vector);
461     }
462 }
463 
464 /* Clean up resources for the device. */
msix_uninit(PCIDevice * dev,MemoryRegion * table_bar,MemoryRegion * pba_bar)465 void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
466 {
467     if (!msix_present(dev)) {
468         return;
469     }
470     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
471     dev->msix_cap = 0;
472     msix_free_irq_entries(dev);
473     dev->msix_entries_nr = 0;
474     memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
475     g_free(dev->msix_pba);
476     dev->msix_pba = NULL;
477     memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
478     g_free(dev->msix_table);
479     dev->msix_table = NULL;
480     g_free(dev->msix_entry_used);
481     dev->msix_entry_used = NULL;
482     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
483     dev->msix_prepare_message = NULL;
484 }
485 
msix_uninit_exclusive_bar(PCIDevice * dev)486 void msix_uninit_exclusive_bar(PCIDevice *dev)
487 {
488     if (msix_present(dev)) {
489         msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
490     }
491 }
492 
msix_save(PCIDevice * dev,QEMUFile * f)493 void msix_save(PCIDevice *dev, QEMUFile *f)
494 {
495     unsigned n = dev->msix_entries_nr;
496 
497     if (!msix_present(dev)) {
498         return;
499     }
500 
501     qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
502     qemu_put_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
503 }
504 
505 /* Should be called after restoring the config space. */
msix_load(PCIDevice * dev,QEMUFile * f)506 void msix_load(PCIDevice *dev, QEMUFile *f)
507 {
508     unsigned n = dev->msix_entries_nr;
509     unsigned int vector;
510 
511     if (!msix_present(dev)) {
512         return;
513     }
514 
515     msix_clear_all_vectors(dev);
516     qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
517     qemu_get_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
518     msix_update_function_masked(dev);
519 
520     for (vector = 0; vector < n; vector++) {
521         msix_handle_mask_update(dev, vector, true);
522     }
523 }
524 
525 /* Does device support MSI-X? */
msix_present(PCIDevice * dev)526 int msix_present(PCIDevice *dev)
527 {
528     return dev->cap_present & QEMU_PCI_CAP_MSIX;
529 }
530 
531 /* Is MSI-X enabled? */
msix_enabled(PCIDevice * dev)532 int msix_enabled(PCIDevice *dev)
533 {
534     return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
535         (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
536          MSIX_ENABLE_MASK);
537 }
538 
539 /* Send an MSI-X message */
msix_notify(PCIDevice * dev,unsigned vector)540 void msix_notify(PCIDevice *dev, unsigned vector)
541 {
542     MSIMessage msg;
543 
544     assert(vector < dev->msix_entries_nr);
545 
546     if (!dev->msix_entry_used[vector]) {
547         return;
548     }
549 
550     if (msix_is_masked(dev, vector)) {
551         msix_set_pending(dev, vector);
552         return;
553     }
554 
555     msg = msix_get_message(dev, vector);
556 
557     msi_send_message(dev, msg);
558 }
559 
msix_reset(PCIDevice * dev)560 void msix_reset(PCIDevice *dev)
561 {
562     if (!msix_present(dev)) {
563         return;
564     }
565     msix_clear_all_vectors(dev);
566     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
567             ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
568     memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
569     memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
570     msix_mask_all(dev, dev->msix_entries_nr);
571 }
572 
573 /* PCI spec suggests that devices make it possible for software to configure
574  * less vectors than supported by the device, but does not specify a standard
575  * mechanism for devices to do so.
576  *
577  * We support this by asking devices to declare vectors software is going to
578  * actually use, and checking this on the notification path. Devices that
579  * don't want to follow the spec suggestion can declare all vectors as used. */
580 
581 /* Mark vector as used. */
msix_vector_use(PCIDevice * dev,unsigned vector)582 void msix_vector_use(PCIDevice *dev, unsigned vector)
583 {
584     assert(vector < dev->msix_entries_nr);
585     dev->msix_entry_used[vector]++;
586 }
587 
588 /* Mark vector as unused. */
msix_vector_unuse(PCIDevice * dev,unsigned vector)589 void msix_vector_unuse(PCIDevice *dev, unsigned vector)
590 {
591     assert(vector < dev->msix_entries_nr);
592     if (!dev->msix_entry_used[vector]) {
593         return;
594     }
595     if (--dev->msix_entry_used[vector]) {
596         return;
597     }
598     msix_clr_pending(dev, vector);
599 }
600 
msix_unuse_all_vectors(PCIDevice * dev)601 void msix_unuse_all_vectors(PCIDevice *dev)
602 {
603     if (!msix_present(dev)) {
604         return;
605     }
606     msix_free_irq_entries(dev);
607 }
608 
msix_nr_vectors_allocated(const PCIDevice * dev)609 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
610 {
611     return dev->msix_entries_nr;
612 }
613 
msix_set_notifier_for_vector(PCIDevice * dev,unsigned int vector)614 static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
615 {
616     MSIMessage msg;
617 
618     if (msix_is_masked(dev, vector)) {
619         return 0;
620     }
621     msg = msix_get_message(dev, vector);
622     return dev->msix_vector_use_notifier(dev, vector, msg);
623 }
624 
msix_unset_notifier_for_vector(PCIDevice * dev,unsigned int vector)625 static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
626 {
627     if (msix_is_masked(dev, vector)) {
628         return;
629     }
630     dev->msix_vector_release_notifier(dev, vector);
631 }
632 
msix_set_vector_notifiers(PCIDevice * dev,MSIVectorUseNotifier use_notifier,MSIVectorReleaseNotifier release_notifier,MSIVectorPollNotifier poll_notifier)633 int msix_set_vector_notifiers(PCIDevice *dev,
634                               MSIVectorUseNotifier use_notifier,
635                               MSIVectorReleaseNotifier release_notifier,
636                               MSIVectorPollNotifier poll_notifier)
637 {
638     int vector, ret;
639 
640     assert(use_notifier && release_notifier);
641 
642     dev->msix_vector_use_notifier = use_notifier;
643     dev->msix_vector_release_notifier = release_notifier;
644     dev->msix_vector_poll_notifier = poll_notifier;
645 
646     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
647         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
648         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
649             ret = msix_set_notifier_for_vector(dev, vector);
650             if (ret < 0) {
651                 goto undo;
652             }
653         }
654     }
655     if (dev->msix_vector_poll_notifier) {
656         dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
657     }
658     return 0;
659 
660 undo:
661     while (--vector >= 0) {
662         msix_unset_notifier_for_vector(dev, vector);
663     }
664     dev->msix_vector_use_notifier = NULL;
665     dev->msix_vector_release_notifier = NULL;
666     dev->msix_vector_poll_notifier = NULL;
667     return ret;
668 }
669 
msix_unset_vector_notifiers(PCIDevice * dev)670 void msix_unset_vector_notifiers(PCIDevice *dev)
671 {
672     int vector;
673 
674     assert(dev->msix_vector_use_notifier &&
675            dev->msix_vector_release_notifier);
676 
677     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
678         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
679         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
680             msix_unset_notifier_for_vector(dev, vector);
681         }
682     }
683     dev->msix_vector_use_notifier = NULL;
684     dev->msix_vector_release_notifier = NULL;
685     dev->msix_vector_poll_notifier = NULL;
686 }
687 
put_msix_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)688 static int put_msix_state(QEMUFile *f, void *pv, size_t size,
689                           const VMStateField *field, JSONWriter *vmdesc)
690 {
691     msix_save(pv, f);
692 
693     return 0;
694 }
695 
get_msix_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field)696 static int get_msix_state(QEMUFile *f, void *pv, size_t size,
697                           const VMStateField *field)
698 {
699     msix_load(pv, f);
700     return 0;
701 }
702 
703 static const VMStateInfo vmstate_info_msix = {
704     .name = "msix state",
705     .get  = get_msix_state,
706     .put  = put_msix_state,
707 };
708 
709 const VMStateDescription vmstate_msix = {
710     .name = "msix",
711     .fields = (const VMStateField[]) {
712         {
713             .name         = "msix",
714             .version_id   = 0,
715             .field_exists = NULL,
716             .size         = 0,   /* ouch */
717             .info         = &vmstate_info_msix,
718             .flags        = VMS_SINGLE,
719             .offset       = 0,
720         },
721         VMSTATE_END_OF_LIST()
722     }
723 };
724