xref: /openbmc/qemu/hw/intc/ioapic.c (revision 650d103d3ea959212f826acb9d3fe80cf30e347b)
1 /*
2  *  ioapic.c IOAPIC emulation logic
3  *
4  *  Copyright (c) 2004-2005 Fabrice Bellard
5  *
6  *  Split the ioapic logic from apic.c
7  *  Xiantao Zhang <xiantao.zhang@intel.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "qapi/error.h"
25 #include "monitor/monitor.h"
26 #include "hw/i386/pc.h"
27 #include "hw/i386/apic.h"
28 #include "hw/i386/ioapic.h"
29 #include "hw/i386/ioapic_internal.h"
30 #include "hw/pci/msi.h"
31 #include "sysemu/kvm.h"
32 #include "hw/i386/apic-msidef.h"
33 #include "hw/i386/x86-iommu.h"
34 #include "trace.h"
35 
36 #define APIC_DELIVERY_MODE_SHIFT 8
37 #define APIC_POLARITY_SHIFT 14
38 #define APIC_TRIG_MODE_SHIFT 15
39 
40 static IOAPICCommonState *ioapics[MAX_IOAPICS];
41 
42 /* global variable from ioapic_common.c */
43 extern int ioapic_no;
44 
45 struct ioapic_entry_info {
46     /* fields parsed from IOAPIC entries */
47     uint8_t masked;
48     uint8_t trig_mode;
49     uint16_t dest_idx;
50     uint8_t dest_mode;
51     uint8_t delivery_mode;
52     uint8_t vector;
53 
54     /* MSI message generated from above parsed fields */
55     uint32_t addr;
56     uint32_t data;
57 };
58 
59 static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
60 {
61     memset(info, 0, sizeof(*info));
62     info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
63     info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
64     /*
65      * By default, this would be dest_id[8] + reserved[8]. When IR
66      * is enabled, this would be interrupt_index[15] +
67      * interrupt_format[1]. This field never means anything, but
68      * only used to generate corresponding MSI.
69      */
70     info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
71     info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
72     info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
73         & IOAPIC_DM_MASK;
74     if (info->delivery_mode == IOAPIC_DM_EXTINT) {
75         info->vector = pic_read_irq(isa_pic);
76     } else {
77         info->vector = entry & IOAPIC_VECTOR_MASK;
78     }
79 
80     info->addr = APIC_DEFAULT_ADDRESS | \
81         (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
82         (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
83     info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
84         (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
85         (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
86 }
87 
88 static void ioapic_service(IOAPICCommonState *s)
89 {
90     AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as;
91     struct ioapic_entry_info info;
92     uint8_t i;
93     uint32_t mask;
94     uint64_t entry;
95 
96     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
97         mask = 1 << i;
98         if (s->irr & mask) {
99             int coalesce = 0;
100 
101             entry = s->ioredtbl[i];
102             ioapic_entry_parse(entry, &info);
103             if (!info.masked) {
104                 if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
105                     s->irr &= ~mask;
106                 } else {
107                     coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
108                     trace_ioapic_set_remote_irr(i);
109                     s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
110                 }
111 
112                 if (coalesce) {
113                     /* We are level triggered interrupts, and the
114                      * guest should be still working on previous one,
115                      * so skip it. */
116                     continue;
117                 }
118 
119 #ifdef CONFIG_KVM
120                 if (kvm_irqchip_is_split()) {
121                     if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
122                         kvm_set_irq(kvm_state, i, 1);
123                         kvm_set_irq(kvm_state, i, 0);
124                     } else {
125                         kvm_set_irq(kvm_state, i, 1);
126                     }
127                     continue;
128                 }
129 #endif
130 
131                 /* No matter whether IR is enabled, we translate
132                  * the IOAPIC message into a MSI one, and its
133                  * address space will decide whether we need a
134                  * translation. */
135                 stl_le_phys(ioapic_as, info.addr, info.data);
136             }
137         }
138     }
139 }
140 
141 #define SUCCESSIVE_IRQ_MAX_COUNT 10000
142 
143 static void delayed_ioapic_service_cb(void *opaque)
144 {
145     IOAPICCommonState *s = opaque;
146 
147     ioapic_service(s);
148 }
149 
150 static void ioapic_set_irq(void *opaque, int vector, int level)
151 {
152     IOAPICCommonState *s = opaque;
153 
154     /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
155      * to GSI 2.  GSI maps to ioapic 1-1.  This is not
156      * the cleanest way of doing it but it should work. */
157 
158     trace_ioapic_set_irq(vector, level);
159     ioapic_stat_update_irq(s, vector, level);
160     if (vector == 0) {
161         vector = 2;
162     }
163     if (vector < IOAPIC_NUM_PINS) {
164         uint32_t mask = 1 << vector;
165         uint64_t entry = s->ioredtbl[vector];
166 
167         if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
168             IOAPIC_TRIGGER_LEVEL) {
169             /* level triggered */
170             if (level) {
171                 s->irr |= mask;
172                 if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
173                     ioapic_service(s);
174                 }
175             } else {
176                 s->irr &= ~mask;
177             }
178         } else {
179             /* According to the 82093AA manual, we must ignore edge requests
180              * if the input pin is masked. */
181             if (level && !(entry & IOAPIC_LVT_MASKED)) {
182                 s->irr |= mask;
183                 ioapic_service(s);
184             }
185         }
186     }
187 }
188 
189 static void ioapic_update_kvm_routes(IOAPICCommonState *s)
190 {
191 #ifdef CONFIG_KVM
192     int i;
193 
194     if (kvm_irqchip_is_split()) {
195         for (i = 0; i < IOAPIC_NUM_PINS; i++) {
196             MSIMessage msg;
197             struct ioapic_entry_info info;
198             ioapic_entry_parse(s->ioredtbl[i], &info);
199             if (!info.masked) {
200                 msg.address = info.addr;
201                 msg.data = info.data;
202                 kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
203             }
204         }
205         kvm_irqchip_commit_routes(kvm_state);
206     }
207 #endif
208 }
209 
210 #ifdef CONFIG_KVM
211 static void ioapic_iec_notifier(void *private, bool global,
212                                 uint32_t index, uint32_t mask)
213 {
214     IOAPICCommonState *s = (IOAPICCommonState *)private;
215     /* For simplicity, we just update all the routes */
216     ioapic_update_kvm_routes(s);
217 }
218 #endif
219 
220 void ioapic_eoi_broadcast(int vector)
221 {
222     IOAPICCommonState *s;
223     uint64_t entry;
224     int i, n;
225 
226     trace_ioapic_eoi_broadcast(vector);
227 
228     for (i = 0; i < MAX_IOAPICS; i++) {
229         s = ioapics[i];
230         if (!s) {
231             continue;
232         }
233         for (n = 0; n < IOAPIC_NUM_PINS; n++) {
234             entry = s->ioredtbl[n];
235 
236             if ((entry & IOAPIC_VECTOR_MASK) != vector ||
237                 ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
238                 continue;
239             }
240 
241             if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
242                 continue;
243             }
244 
245             trace_ioapic_clear_remote_irr(n, vector);
246             s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
247 
248             if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
249                 ++s->irq_eoi[n];
250                 if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
251                     /*
252                      * Real hardware does not deliver the interrupt immediately
253                      * during eoi broadcast, and this lets a buggy guest make
254                      * slow progress even if it does not correctly handle a
255                      * level-triggered interrupt. Emulate this behavior if we
256                      * detect an interrupt storm.
257                      */
258                     s->irq_eoi[n] = 0;
259                     timer_mod_anticipate(s->delayed_ioapic_service_timer,
260                                          qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
261                                          NANOSECONDS_PER_SECOND / 100);
262                     trace_ioapic_eoi_delayed_reassert(n);
263                 } else {
264                     ioapic_service(s);
265                 }
266             } else {
267                 s->irq_eoi[n] = 0;
268             }
269         }
270     }
271 }
272 
273 static uint64_t
274 ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
275 {
276     IOAPICCommonState *s = opaque;
277     int index;
278     uint32_t val = 0;
279 
280     addr &= 0xff;
281 
282     switch (addr) {
283     case IOAPIC_IOREGSEL:
284         val = s->ioregsel;
285         break;
286     case IOAPIC_IOWIN:
287         if (size != 4) {
288             break;
289         }
290         switch (s->ioregsel) {
291         case IOAPIC_REG_ID:
292         case IOAPIC_REG_ARB:
293             val = s->id << IOAPIC_ID_SHIFT;
294             break;
295         case IOAPIC_REG_VER:
296             val = s->version |
297                 ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
298             break;
299         default:
300             index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
301             if (index >= 0 && index < IOAPIC_NUM_PINS) {
302                 if (s->ioregsel & 1) {
303                     val = s->ioredtbl[index] >> 32;
304                 } else {
305                     val = s->ioredtbl[index] & 0xffffffff;
306                 }
307             }
308         }
309         break;
310     }
311 
312     trace_ioapic_mem_read(addr, s->ioregsel, size, val);
313 
314     return val;
315 }
316 
317 /*
318  * This is to satisfy the hack in Linux kernel. One hack of it is to
319  * simulate clearing the Remote IRR bit of IOAPIC entry using the
320  * following:
321  *
322  * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
323  * Otherwise, we simulate the EOI message manually by changing the trigger
324  * mode to edge and then back to level, with RTE being masked during
325  * this."
326  *
327  * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
328  *
329  * This is based on the assumption that, Remote IRR bit will be
330  * cleared by IOAPIC hardware when configured as edge-triggered
331  * interrupts.
332  *
333  * Without this, level-triggered interrupts in IR mode might fail to
334  * work correctly.
335  */
336 static inline void
337 ioapic_fix_edge_remote_irr(uint64_t *entry)
338 {
339     if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
340         /* Edge-triggered interrupts, make sure remote IRR is zero */
341         *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
342     }
343 }
344 
345 static void
346 ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
347                  unsigned int size)
348 {
349     IOAPICCommonState *s = opaque;
350     int index;
351 
352     addr &= 0xff;
353     trace_ioapic_mem_write(addr, s->ioregsel, size, val);
354 
355     switch (addr) {
356     case IOAPIC_IOREGSEL:
357         s->ioregsel = val;
358         break;
359     case IOAPIC_IOWIN:
360         if (size != 4) {
361             break;
362         }
363         switch (s->ioregsel) {
364         case IOAPIC_REG_ID:
365             s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
366             break;
367         case IOAPIC_REG_VER:
368         case IOAPIC_REG_ARB:
369             break;
370         default:
371             index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
372             if (index >= 0 && index < IOAPIC_NUM_PINS) {
373                 uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
374                 if (s->ioregsel & 1) {
375                     s->ioredtbl[index] &= 0xffffffff;
376                     s->ioredtbl[index] |= (uint64_t)val << 32;
377                 } else {
378                     s->ioredtbl[index] &= ~0xffffffffULL;
379                     s->ioredtbl[index] |= val;
380                 }
381                 /* restore RO bits */
382                 s->ioredtbl[index] &= IOAPIC_RW_BITS;
383                 s->ioredtbl[index] |= ro_bits;
384                 s->irq_eoi[index] = 0;
385                 ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
386                 ioapic_service(s);
387             }
388         }
389         break;
390     case IOAPIC_EOI:
391         /* Explicit EOI is only supported for IOAPIC version 0x20 */
392         if (size != 4 || s->version != 0x20) {
393             break;
394         }
395         ioapic_eoi_broadcast(val);
396         break;
397     }
398 
399     ioapic_update_kvm_routes(s);
400 }
401 
402 static const MemoryRegionOps ioapic_io_ops = {
403     .read = ioapic_mem_read,
404     .write = ioapic_mem_write,
405     .endianness = DEVICE_NATIVE_ENDIAN,
406 };
407 
408 static void ioapic_machine_done_notify(Notifier *notifier, void *data)
409 {
410 #ifdef CONFIG_KVM
411     IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
412                                         machine_done);
413 
414     if (kvm_irqchip_is_split()) {
415         X86IOMMUState *iommu = x86_iommu_get_default();
416         if (iommu) {
417             /* Register this IOAPIC with IOMMU IEC notifier, so that
418              * when there are IR invalidates, we can be notified to
419              * update kernel IR cache. */
420             x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
421         }
422     }
423 #endif
424 }
425 
426 #define IOAPIC_VER_DEF 0x20
427 
428 static void ioapic_realize(DeviceState *dev, Error **errp)
429 {
430     IOAPICCommonState *s = IOAPIC_COMMON(dev);
431 
432     if (s->version != 0x11 && s->version != 0x20) {
433         error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
434                    "(default: 0x%x).", IOAPIC_VER_DEF);
435         return;
436     }
437 
438     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
439                           "ioapic", 0x1000);
440 
441     s->delayed_ioapic_service_timer =
442         timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
443 
444     qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
445 
446     ioapics[ioapic_no] = s;
447     s->machine_done.notify = ioapic_machine_done_notify;
448     qemu_add_machine_init_done_notifier(&s->machine_done);
449 }
450 
451 static void ioapic_unrealize(DeviceState *dev, Error **errp)
452 {
453     IOAPICCommonState *s = IOAPIC_COMMON(dev);
454 
455     timer_del(s->delayed_ioapic_service_timer);
456     timer_free(s->delayed_ioapic_service_timer);
457 }
458 
459 static Property ioapic_properties[] = {
460     DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
461     DEFINE_PROP_END_OF_LIST(),
462 };
463 
464 static void ioapic_class_init(ObjectClass *klass, void *data)
465 {
466     IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
467     DeviceClass *dc = DEVICE_CLASS(klass);
468 
469     k->realize = ioapic_realize;
470     k->unrealize = ioapic_unrealize;
471     /*
472      * If APIC is in kernel, we need to update the kernel cache after
473      * migration, otherwise first 24 gsi routes will be invalid.
474      */
475     k->post_load = ioapic_update_kvm_routes;
476     dc->reset = ioapic_reset_common;
477     dc->props = ioapic_properties;
478 }
479 
480 static const TypeInfo ioapic_info = {
481     .name          = TYPE_IOAPIC,
482     .parent        = TYPE_IOAPIC_COMMON,
483     .instance_size = sizeof(IOAPICCommonState),
484     .class_init    = ioapic_class_init,
485 };
486 
487 static void ioapic_register_types(void)
488 {
489     type_register_static(&ioapic_info);
490 }
491 
492 type_init(ioapic_register_types)
493