xref: /openbmc/qemu/hw/i386/amd_iommu.c (revision 72b88908d12ee9347d13539c7dd9a252625158d1)
1 /*
2  * QEMU emulation of AMD IOMMU (AMD-Vi)
3  *
4  * Copyright (C) 2011 Eduard - Gabriel Munteanu
5  * Copyright (C) 2015, 2016 David Kiarie Kahurani
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16 
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  *
20  * Cache implementation inspired by hw/i386/intel_iommu.c
21  */
22 
23 #include "qemu/osdep.h"
24 #include "hw/i386/pc.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/pci_bus.h"
27 #include "migration/vmstate.h"
28 #include "amd_iommu.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "hw/i386/apic_internal.h"
32 #include "trace.h"
33 #include "hw/i386/apic-msidef.h"
34 #include "hw/qdev-properties.h"
35 #include "kvm/kvm_i386.h"
36 
37 /* used AMD-Vi MMIO registers */
38 const char *amdvi_mmio_low[] = {
39     "AMDVI_MMIO_DEVTAB_BASE",
40     "AMDVI_MMIO_CMDBUF_BASE",
41     "AMDVI_MMIO_EVTLOG_BASE",
42     "AMDVI_MMIO_CONTROL",
43     "AMDVI_MMIO_EXCL_BASE",
44     "AMDVI_MMIO_EXCL_LIMIT",
45     "AMDVI_MMIO_EXT_FEATURES",
46     "AMDVI_MMIO_PPR_BASE",
47     "UNHANDLED"
48 };
49 const char *amdvi_mmio_high[] = {
50     "AMDVI_MMIO_COMMAND_HEAD",
51     "AMDVI_MMIO_COMMAND_TAIL",
52     "AMDVI_MMIO_EVTLOG_HEAD",
53     "AMDVI_MMIO_EVTLOG_TAIL",
54     "AMDVI_MMIO_STATUS",
55     "AMDVI_MMIO_PPR_HEAD",
56     "AMDVI_MMIO_PPR_TAIL",
57     "UNHANDLED"
58 };
59 
60 struct AMDVIAddressSpace {
61     uint8_t bus_num;            /* bus number                           */
62     uint8_t devfn;              /* device function                      */
63     AMDVIState *iommu_state;    /* AMDVI - one per machine              */
64     MemoryRegion root;          /* AMDVI Root memory map region         */
65     IOMMUMemoryRegion iommu;    /* Device's address translation region  */
66     MemoryRegion iommu_nodma;   /* Alias of shared nodma memory region  */
67     MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
68     AddressSpace as;            /* device's corresponding address space */
69 };
70 
71 /* AMDVI cache entry */
72 typedef struct AMDVIIOTLBEntry {
73     uint16_t domid;             /* assigned domain id  */
74     uint16_t devid;             /* device owning entry */
75     uint64_t perms;             /* access permissions  */
76     uint64_t translated_addr;   /* translated address  */
77     uint64_t page_mask;         /* physical page size  */
78 } AMDVIIOTLBEntry;
79 
amdvi_extended_feature_register(AMDVIState * s)80 uint64_t amdvi_extended_feature_register(AMDVIState *s)
81 {
82     uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
83     if (s->xtsup) {
84         feature |= AMDVI_FEATURE_XT;
85     }
86 
87     return feature;
88 }
89 
90 /* configure MMIO registers at startup/reset */
amdvi_set_quad(AMDVIState * s,hwaddr addr,uint64_t val,uint64_t romask,uint64_t w1cmask)91 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
92                            uint64_t romask, uint64_t w1cmask)
93 {
94     stq_le_p(&s->mmior[addr], val);
95     stq_le_p(&s->romask[addr], romask);
96     stq_le_p(&s->w1cmask[addr], w1cmask);
97 }
98 
amdvi_readw(AMDVIState * s,hwaddr addr)99 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
100 {
101     return lduw_le_p(&s->mmior[addr]);
102 }
103 
amdvi_readl(AMDVIState * s,hwaddr addr)104 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
105 {
106     return ldl_le_p(&s->mmior[addr]);
107 }
108 
amdvi_readq(AMDVIState * s,hwaddr addr)109 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
110 {
111     return ldq_le_p(&s->mmior[addr]);
112 }
113 
114 /* internal write */
amdvi_writeq_raw(AMDVIState * s,hwaddr addr,uint64_t val)115 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
116 {
117     stq_le_p(&s->mmior[addr], val);
118 }
119 
120 /* external write */
amdvi_writew(AMDVIState * s,hwaddr addr,uint16_t val)121 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
122 {
123     uint16_t romask = lduw_le_p(&s->romask[addr]);
124     uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
125     uint16_t oldval = lduw_le_p(&s->mmior[addr]);
126     stw_le_p(&s->mmior[addr],
127             ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
128 }
129 
amdvi_writel(AMDVIState * s,hwaddr addr,uint32_t val)130 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
131 {
132     uint32_t romask = ldl_le_p(&s->romask[addr]);
133     uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
134     uint32_t oldval = ldl_le_p(&s->mmior[addr]);
135     stl_le_p(&s->mmior[addr],
136             ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
137 }
138 
amdvi_writeq(AMDVIState * s,hwaddr addr,uint64_t val)139 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
140 {
141     uint64_t romask = ldq_le_p(&s->romask[addr]);
142     uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
143     uint32_t oldval = ldq_le_p(&s->mmior[addr]);
144     stq_le_p(&s->mmior[addr],
145             ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
146 }
147 
148 /* OR a 64-bit register with a 64-bit value */
amdvi_test_mask(AMDVIState * s,hwaddr addr,uint64_t val)149 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
150 {
151     return amdvi_readq(s, addr) | val;
152 }
153 
154 /* OR a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_orq(AMDVIState * s,hwaddr addr,uint64_t val)155 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
156 {
157     amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
158 }
159 
160 /* AND a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_andq(AMDVIState * s,hwaddr addr,uint64_t val)161 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
162 {
163    amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
164 }
165 
amdvi_generate_msi_interrupt(AMDVIState * s)166 static void amdvi_generate_msi_interrupt(AMDVIState *s)
167 {
168     MSIMessage msg = {};
169     MemTxAttrs attrs = {
170         .requester_id = pci_requester_id(&s->pci.dev)
171     };
172 
173     if (msi_enabled(&s->pci.dev)) {
174         msg = msi_get_message(&s->pci.dev, 0);
175         address_space_stl_le(&address_space_memory, msg.address, msg.data,
176                              attrs, NULL);
177     }
178 }
179 
amdvi_log_event(AMDVIState * s,uint64_t * evt)180 static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
181 {
182     /* event logging not enabled */
183     if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
184         AMDVI_MMIO_STATUS_EVT_OVF)) {
185         return;
186     }
187 
188     /* event log buffer full */
189     if (s->evtlog_tail >= s->evtlog_len) {
190         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
191         /* generate interrupt */
192         amdvi_generate_msi_interrupt(s);
193         return;
194     }
195 
196     if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
197                          evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
198         trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
199     }
200 
201     s->evtlog_tail += AMDVI_EVENT_LEN;
202     amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
203     amdvi_generate_msi_interrupt(s);
204 }
205 
amdvi_setevent_bits(uint64_t * buffer,uint64_t value,int start,int length)206 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
207                                 int length)
208 {
209     int index = start / 64, bitpos = start % 64;
210     uint64_t mask = MAKE_64BIT_MASK(start, length);
211     buffer[index] &= ~mask;
212     buffer[index] |= (value << bitpos) & mask;
213 }
214 /*
215  * AMDVi event structure
216  *    0:15   -> DeviceID
217  *    48:63  -> event type + miscellaneous info
218  *    64:127 -> related address
219  */
amdvi_encode_event(uint64_t * evt,uint16_t devid,uint64_t addr,uint16_t info)220 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
221                                uint16_t info)
222 {
223     evt[0] = 0;
224     evt[1] = 0;
225 
226     amdvi_setevent_bits(evt, devid, 0, 16);
227     amdvi_setevent_bits(evt, info, 48, 16);
228     amdvi_setevent_bits(evt, addr, 64, 64);
229 }
230 /* log an error encountered during a page walk
231  *
232  * @addr: virtual address in translation request
233  */
amdvi_page_fault(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)234 static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
235                              hwaddr addr, uint16_t info)
236 {
237     uint64_t evt[2];
238 
239     info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
240     amdvi_encode_event(evt, devid, addr, info);
241     amdvi_log_event(s, evt);
242     pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
243             PCI_STATUS_SIG_TARGET_ABORT);
244 }
245 /*
246  * log a master abort accessing device table
247  *  @devtab : address of device table entry
248  *  @info : error flags
249  */
amdvi_log_devtab_error(AMDVIState * s,uint16_t devid,hwaddr devtab,uint16_t info)250 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
251                                    hwaddr devtab, uint16_t info)
252 {
253     uint64_t evt[2];
254 
255     info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
256 
257     amdvi_encode_event(evt, devid, devtab, info);
258     amdvi_log_event(s, evt);
259     pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
260             PCI_STATUS_SIG_TARGET_ABORT);
261 }
262 /* log an event trying to access command buffer
263  *   @addr : address that couldn't be accessed
264  */
amdvi_log_command_error(AMDVIState * s,hwaddr addr)265 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
266 {
267     uint64_t evt[2];
268     uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
269 
270     amdvi_encode_event(evt, 0, addr, info);
271     amdvi_log_event(s, evt);
272     pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
273             PCI_STATUS_SIG_TARGET_ABORT);
274 }
275 /* log an illegal command event
276  *   @addr : address of illegal command
277  */
amdvi_log_illegalcom_error(AMDVIState * s,uint16_t info,hwaddr addr)278 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
279                                        hwaddr addr)
280 {
281     uint64_t evt[2];
282 
283     info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
284     amdvi_encode_event(evt, 0, addr, info);
285     amdvi_log_event(s, evt);
286 }
287 /* log an error accessing device table
288  *
289  *  @devid : device owning the table entry
290  *  @devtab : address of device table entry
291  *  @info : error flags
292  */
amdvi_log_illegaldevtab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)293 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
294                                           hwaddr addr, uint16_t info)
295 {
296     uint64_t evt[2];
297 
298     info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
299     amdvi_encode_event(evt, devid, addr, info);
300     amdvi_log_event(s, evt);
301 }
302 /* log an error accessing a PTE entry
303  * @addr : address that couldn't be accessed
304  */
amdvi_log_pagetab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)305 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
306                                     hwaddr addr, uint16_t info)
307 {
308     uint64_t evt[2];
309 
310     info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
311     amdvi_encode_event(evt, devid, addr, info);
312     amdvi_log_event(s, evt);
313     pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
314              PCI_STATUS_SIG_TARGET_ABORT);
315 }
316 
amdvi_uint64_equal(gconstpointer v1,gconstpointer v2)317 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
318 {
319     return *((const uint64_t *)v1) == *((const uint64_t *)v2);
320 }
321 
amdvi_uint64_hash(gconstpointer v)322 static guint amdvi_uint64_hash(gconstpointer v)
323 {
324     return (guint)*(const uint64_t *)v;
325 }
326 
amdvi_iotlb_lookup(AMDVIState * s,hwaddr addr,uint64_t devid)327 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
328                                            uint64_t devid)
329 {
330     uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
331                    ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
332     return g_hash_table_lookup(s->iotlb, &key);
333 }
334 
amdvi_iotlb_reset(AMDVIState * s)335 static void amdvi_iotlb_reset(AMDVIState *s)
336 {
337     assert(s->iotlb);
338     trace_amdvi_iotlb_reset();
339     g_hash_table_remove_all(s->iotlb);
340 }
341 
amdvi_iotlb_remove_by_devid(gpointer key,gpointer value,gpointer user_data)342 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
343                                             gpointer user_data)
344 {
345     AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
346     uint16_t devid = *(uint16_t *)user_data;
347     return entry->devid == devid;
348 }
349 
amdvi_iotlb_remove_page(AMDVIState * s,hwaddr addr,uint64_t devid)350 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
351                                     uint64_t devid)
352 {
353     uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
354                    ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
355     g_hash_table_remove(s->iotlb, &key);
356 }
357 
amdvi_update_iotlb(AMDVIState * s,uint16_t devid,uint64_t gpa,IOMMUTLBEntry to_cache,uint16_t domid)358 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
359                                uint64_t gpa, IOMMUTLBEntry to_cache,
360                                uint16_t domid)
361 {
362     /* don't cache erroneous translations */
363     if (to_cache.perm != IOMMU_NONE) {
364         AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
365         uint64_t *key = g_new(uint64_t, 1);
366         uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
367 
368         trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
369                 PCI_FUNC(devid), gpa, to_cache.translated_addr);
370 
371         if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
372             amdvi_iotlb_reset(s);
373         }
374 
375         entry->domid = domid;
376         entry->perms = to_cache.perm;
377         entry->translated_addr = to_cache.translated_addr;
378         entry->page_mask = to_cache.addr_mask;
379         *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
380         g_hash_table_replace(s->iotlb, key, entry);
381     }
382 }
383 
amdvi_completion_wait(AMDVIState * s,uint64_t * cmd)384 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
385 {
386     /* pad the last 3 bits */
387     hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
388     uint64_t data = cpu_to_le64(cmd[1]);
389 
390     if (extract64(cmd[0], 52, 8)) {
391         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
392                                    s->cmdbuf + s->cmdbuf_head);
393     }
394     if (extract64(cmd[0], 0, 1)) {
395         if (dma_memory_write(&address_space_memory, addr, &data,
396                              AMDVI_COMPLETION_DATA_SIZE,
397                              MEMTXATTRS_UNSPECIFIED)) {
398             trace_amdvi_completion_wait_fail(addr);
399         }
400     }
401     /* set completion interrupt */
402     if (extract64(cmd[0], 1, 1)) {
403         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
404         /* generate interrupt */
405         amdvi_generate_msi_interrupt(s);
406     }
407     trace_amdvi_completion_wait(addr, data);
408 }
409 
410 /* log error without aborting since linux seems to be using reserved bits */
amdvi_inval_devtab_entry(AMDVIState * s,uint64_t * cmd)411 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
412 {
413     uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
414 
415     /* This command should invalidate internal caches of which there isn't */
416     if (extract64(cmd[0], 16, 44) || cmd[1]) {
417         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
418                                    s->cmdbuf + s->cmdbuf_head);
419     }
420     trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
421                              PCI_FUNC(devid));
422 }
423 
amdvi_complete_ppr(AMDVIState * s,uint64_t * cmd)424 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
425 {
426     if (extract64(cmd[0], 16, 16) ||  extract64(cmd[0], 52, 8) ||
427         extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
428         || extract64(cmd[1], 48, 16)) {
429         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
430                                    s->cmdbuf + s->cmdbuf_head);
431     }
432     trace_amdvi_ppr_exec();
433 }
434 
amdvi_intremap_inval_notify_all(AMDVIState * s,bool global,uint32_t index,uint32_t mask)435 static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global,
436                                uint32_t index, uint32_t mask)
437 {
438     x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
439 }
440 
amdvi_inval_all(AMDVIState * s,uint64_t * cmd)441 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
442 {
443     if (extract64(cmd[0], 0, 60) || cmd[1]) {
444         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
445                                    s->cmdbuf + s->cmdbuf_head);
446     }
447 
448     /* Notify global invalidation */
449     amdvi_intremap_inval_notify_all(s, true, 0, 0);
450 
451     amdvi_iotlb_reset(s);
452     trace_amdvi_all_inval();
453 }
454 
amdvi_iotlb_remove_by_domid(gpointer key,gpointer value,gpointer user_data)455 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
456                                             gpointer user_data)
457 {
458     AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
459     uint16_t domid = *(uint16_t *)user_data;
460     return entry->domid == domid;
461 }
462 
463 /* we don't have devid - we can't remove pages by address */
amdvi_inval_pages(AMDVIState * s,uint64_t * cmd)464 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
465 {
466     uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
467 
468     if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
469         extract64(cmd[1], 3, 9)) {
470         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
471                                    s->cmdbuf + s->cmdbuf_head);
472     }
473 
474     g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
475                                 &domid);
476     trace_amdvi_pages_inval(domid);
477 }
478 
amdvi_prefetch_pages(AMDVIState * s,uint64_t * cmd)479 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
480 {
481     if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
482         extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
483         extract64(cmd[1], 5, 7)) {
484         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
485                                    s->cmdbuf + s->cmdbuf_head);
486     }
487 
488     trace_amdvi_prefetch_pages();
489 }
490 
amdvi_inval_inttable(AMDVIState * s,uint64_t * cmd)491 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
492 {
493     if (extract64(cmd[0], 16, 44) || cmd[1]) {
494         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
495                                    s->cmdbuf + s->cmdbuf_head);
496         return;
497     }
498 
499     /* Notify global invalidation */
500     amdvi_intremap_inval_notify_all(s, true, 0, 0);
501 
502     trace_amdvi_intr_inval();
503 }
504 
505 /* FIXME: Try to work with the specified size instead of all the pages
506  * when the S bit is on
507  */
iommu_inval_iotlb(AMDVIState * s,uint64_t * cmd)508 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
509 {
510 
511     uint16_t devid = extract64(cmd[0], 0, 16);
512     if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
513         extract64(cmd[1], 6, 6)) {
514         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
515                                    s->cmdbuf + s->cmdbuf_head);
516         return;
517     }
518 
519     if (extract64(cmd[1], 0, 1)) {
520         g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
521                                     &devid);
522     } else {
523         amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
524                                 cpu_to_le16(extract64(cmd[1], 0, 16)));
525     }
526     trace_amdvi_iotlb_inval();
527 }
528 
529 /* not honouring reserved bits is regarded as an illegal command */
amdvi_cmdbuf_exec(AMDVIState * s)530 static void amdvi_cmdbuf_exec(AMDVIState *s)
531 {
532     uint64_t cmd[2];
533 
534     if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
535                         cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
536         trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
537         amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
538         return;
539     }
540 
541     switch (extract64(cmd[0], 60, 4)) {
542     case AMDVI_CMD_COMPLETION_WAIT:
543         amdvi_completion_wait(s, cmd);
544         break;
545     case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
546         amdvi_inval_devtab_entry(s, cmd);
547         break;
548     case AMDVI_CMD_INVAL_AMDVI_PAGES:
549         amdvi_inval_pages(s, cmd);
550         break;
551     case AMDVI_CMD_INVAL_IOTLB_PAGES:
552         iommu_inval_iotlb(s, cmd);
553         break;
554     case AMDVI_CMD_INVAL_INTR_TABLE:
555         amdvi_inval_inttable(s, cmd);
556         break;
557     case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
558         amdvi_prefetch_pages(s, cmd);
559         break;
560     case AMDVI_CMD_COMPLETE_PPR_REQUEST:
561         amdvi_complete_ppr(s, cmd);
562         break;
563     case AMDVI_CMD_INVAL_AMDVI_ALL:
564         amdvi_inval_all(s, cmd);
565         break;
566     default:
567         trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
568         /* log illegal command */
569         amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
570                                    s->cmdbuf + s->cmdbuf_head);
571     }
572 }
573 
amdvi_cmdbuf_run(AMDVIState * s)574 static void amdvi_cmdbuf_run(AMDVIState *s)
575 {
576     if (!s->cmdbuf_enabled) {
577         trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
578         return;
579     }
580 
581     /* check if there is work to do. */
582     while (s->cmdbuf_head != s->cmdbuf_tail) {
583         trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
584         amdvi_cmdbuf_exec(s);
585         s->cmdbuf_head += AMDVI_COMMAND_SIZE;
586         amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
587 
588         /* wrap head pointer */
589         if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
590             s->cmdbuf_head = 0;
591         }
592     }
593 }
594 
amdvi_mmio_trace(hwaddr addr,unsigned size)595 static void amdvi_mmio_trace(hwaddr addr, unsigned size)
596 {
597     uint8_t index = (addr & ~0x2000) / 8;
598 
599     if ((addr & 0x2000)) {
600         /* high table */
601         index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
602         trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
603     } else {
604         index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
605         trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
606     }
607 }
608 
amdvi_mmio_read(void * opaque,hwaddr addr,unsigned size)609 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
610 {
611     AMDVIState *s = opaque;
612 
613     uint64_t val = -1;
614     if (addr + size > AMDVI_MMIO_SIZE) {
615         trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
616         return (uint64_t)-1;
617     }
618 
619     if (size == 2) {
620         val = amdvi_readw(s, addr);
621     } else if (size == 4) {
622         val = amdvi_readl(s, addr);
623     } else if (size == 8) {
624         val = amdvi_readq(s, addr);
625     }
626     amdvi_mmio_trace(addr, size);
627 
628     return val;
629 }
630 
amdvi_handle_control_write(AMDVIState * s)631 static void amdvi_handle_control_write(AMDVIState *s)
632 {
633     unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
634     s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
635 
636     s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
637     s->evtlog_enabled = s->enabled && !!(control &
638                         AMDVI_MMIO_CONTROL_EVENTLOGEN);
639 
640     s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
641     s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
642     s->cmdbuf_enabled = s->enabled && !!(control &
643                         AMDVI_MMIO_CONTROL_CMDBUFLEN);
644     s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
645 
646     /* update the flags depending on the control register */
647     if (s->cmdbuf_enabled) {
648         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
649     } else {
650         amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
651     }
652     if (s->evtlog_enabled) {
653         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
654     } else {
655         amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
656     }
657 
658     trace_amdvi_control_status(control);
659     amdvi_cmdbuf_run(s);
660 }
661 
amdvi_handle_devtab_write(AMDVIState * s)662 static inline void amdvi_handle_devtab_write(AMDVIState *s)
663 
664 {
665     uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
666     s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
667 
668     /* set device table length */
669     s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
670                     (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
671                      AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
672 }
673 
amdvi_handle_cmdhead_write(AMDVIState * s)674 static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
675 {
676     s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
677                      & AMDVI_MMIO_CMDBUF_HEAD_MASK;
678     amdvi_cmdbuf_run(s);
679 }
680 
amdvi_handle_cmdbase_write(AMDVIState * s)681 static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
682 {
683     s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
684                 & AMDVI_MMIO_CMDBUF_BASE_MASK;
685     s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
686                     & AMDVI_MMIO_CMDBUF_SIZE_MASK);
687     s->cmdbuf_head = s->cmdbuf_tail = 0;
688 }
689 
amdvi_handle_cmdtail_write(AMDVIState * s)690 static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
691 {
692     s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
693                      & AMDVI_MMIO_CMDBUF_TAIL_MASK;
694     amdvi_cmdbuf_run(s);
695 }
696 
amdvi_handle_excllim_write(AMDVIState * s)697 static inline void amdvi_handle_excllim_write(AMDVIState *s)
698 {
699     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
700     s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
701                     AMDVI_MMIO_EXCL_LIMIT_LOW;
702 }
703 
amdvi_handle_evtbase_write(AMDVIState * s)704 static inline void amdvi_handle_evtbase_write(AMDVIState *s)
705 {
706     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
707     s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
708     s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
709                     & AMDVI_MMIO_EVTLOG_SIZE_MASK);
710 }
711 
amdvi_handle_evttail_write(AMDVIState * s)712 static inline void amdvi_handle_evttail_write(AMDVIState *s)
713 {
714     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
715     s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
716 }
717 
amdvi_handle_evthead_write(AMDVIState * s)718 static inline void amdvi_handle_evthead_write(AMDVIState *s)
719 {
720     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
721     s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
722 }
723 
amdvi_handle_pprbase_write(AMDVIState * s)724 static inline void amdvi_handle_pprbase_write(AMDVIState *s)
725 {
726     uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
727     s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
728     s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
729                     & AMDVI_MMIO_PPRLOG_SIZE_MASK);
730 }
731 
amdvi_handle_pprhead_write(AMDVIState * s)732 static inline void amdvi_handle_pprhead_write(AMDVIState *s)
733 {
734     uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
735     s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
736 }
737 
amdvi_handle_pprtail_write(AMDVIState * s)738 static inline void amdvi_handle_pprtail_write(AMDVIState *s)
739 {
740     uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
741     s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
742 }
743 
744 /* FIXME: something might go wrong if System Software writes in chunks
745  * of one byte but linux writes in chunks of 4 bytes so currently it
746  * works correctly with linux but will definitely be busted if software
747  * reads/writes 8 bytes
748  */
amdvi_mmio_reg_write(AMDVIState * s,unsigned size,uint64_t val,hwaddr addr)749 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
750                                  hwaddr addr)
751 {
752     if (size == 2) {
753         amdvi_writew(s, addr, val);
754     } else if (size == 4) {
755         amdvi_writel(s, addr, val);
756     } else if (size == 8) {
757         amdvi_writeq(s, addr, val);
758     }
759 }
760 
amdvi_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)761 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
762                              unsigned size)
763 {
764     AMDVIState *s = opaque;
765     unsigned long offset = addr & 0x07;
766 
767     if (addr + size > AMDVI_MMIO_SIZE) {
768         trace_amdvi_mmio_write("error: addr outside region: max ",
769                 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
770         return;
771     }
772 
773     amdvi_mmio_trace(addr, size);
774     switch (addr & ~0x07) {
775     case AMDVI_MMIO_CONTROL:
776         amdvi_mmio_reg_write(s, size, val, addr);
777         amdvi_handle_control_write(s);
778         break;
779     case AMDVI_MMIO_DEVICE_TABLE:
780         amdvi_mmio_reg_write(s, size, val, addr);
781        /*  set device table address
782         *   This also suffers from inability to tell whether software
783         *   is done writing
784         */
785         if (offset || (size == 8)) {
786             amdvi_handle_devtab_write(s);
787         }
788         break;
789     case AMDVI_MMIO_COMMAND_HEAD:
790         amdvi_mmio_reg_write(s, size, val, addr);
791         amdvi_handle_cmdhead_write(s);
792         break;
793     case AMDVI_MMIO_COMMAND_BASE:
794         amdvi_mmio_reg_write(s, size, val, addr);
795         /* FIXME - make sure System Software has finished writing in case
796          * it writes in chucks less than 8 bytes in a robust way.As for
797          * now, this hacks works for the linux driver
798          */
799         if (offset || (size == 8)) {
800             amdvi_handle_cmdbase_write(s);
801         }
802         break;
803     case AMDVI_MMIO_COMMAND_TAIL:
804         amdvi_mmio_reg_write(s, size, val, addr);
805         amdvi_handle_cmdtail_write(s);
806         break;
807     case AMDVI_MMIO_EVENT_BASE:
808         amdvi_mmio_reg_write(s, size, val, addr);
809         amdvi_handle_evtbase_write(s);
810         break;
811     case AMDVI_MMIO_EVENT_HEAD:
812         amdvi_mmio_reg_write(s, size, val, addr);
813         amdvi_handle_evthead_write(s);
814         break;
815     case AMDVI_MMIO_EVENT_TAIL:
816         amdvi_mmio_reg_write(s, size, val, addr);
817         amdvi_handle_evttail_write(s);
818         break;
819     case AMDVI_MMIO_EXCL_LIMIT:
820         amdvi_mmio_reg_write(s, size, val, addr);
821         amdvi_handle_excllim_write(s);
822         break;
823         /* PPR log base - unused for now */
824     case AMDVI_MMIO_PPR_BASE:
825         amdvi_mmio_reg_write(s, size, val, addr);
826         amdvi_handle_pprbase_write(s);
827         break;
828         /* PPR log head - also unused for now */
829     case AMDVI_MMIO_PPR_HEAD:
830         amdvi_mmio_reg_write(s, size, val, addr);
831         amdvi_handle_pprhead_write(s);
832         break;
833         /* PPR log tail - unused for now */
834     case AMDVI_MMIO_PPR_TAIL:
835         amdvi_mmio_reg_write(s, size, val, addr);
836         amdvi_handle_pprtail_write(s);
837         break;
838     }
839 }
840 
amdvi_get_perms(uint64_t entry)841 static inline uint64_t amdvi_get_perms(uint64_t entry)
842 {
843     return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
844            AMDVI_DEV_PERM_SHIFT;
845 }
846 
847 /* validate that reserved bits are honoured */
amdvi_validate_dte(AMDVIState * s,uint16_t devid,uint64_t * dte)848 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
849                                uint64_t *dte)
850 {
851     if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
852         || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
853         || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
854         amdvi_log_illegaldevtab_error(s, devid,
855                                       s->devtab +
856                                       devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
857         return false;
858     }
859 
860     return true;
861 }
862 
863 /* get a device table entry given the devid */
amdvi_get_dte(AMDVIState * s,int devid,uint64_t * entry)864 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
865 {
866     uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
867 
868     if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
869                         AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
870         trace_amdvi_dte_get_fail(s->devtab, offset);
871         /* log error accessing dte */
872         amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
873         return false;
874     }
875 
876     *entry = le64_to_cpu(*entry);
877     if (!amdvi_validate_dte(s, devid, entry)) {
878         trace_amdvi_invalid_dte(entry[0]);
879         return false;
880     }
881 
882     return true;
883 }
884 
885 /* get pte translation mode */
get_pte_translation_mode(uint64_t pte)886 static inline uint8_t get_pte_translation_mode(uint64_t pte)
887 {
888     return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
889 }
890 
pte_override_page_mask(uint64_t pte)891 static inline uint64_t pte_override_page_mask(uint64_t pte)
892 {
893     uint8_t page_mask = 13;
894     uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
895     /* find the first zero bit */
896     while (addr & 1) {
897         page_mask++;
898         addr = addr >> 1;
899     }
900 
901     return ~((1ULL << page_mask) - 1);
902 }
903 
pte_get_page_mask(uint64_t oldlevel)904 static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
905 {
906     return ~((1UL << ((oldlevel * 9) + 3)) - 1);
907 }
908 
amdvi_get_pte_entry(AMDVIState * s,uint64_t pte_addr,uint16_t devid)909 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
910                                           uint16_t devid)
911 {
912     uint64_t pte;
913 
914     if (dma_memory_read(&address_space_memory, pte_addr,
915                         &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
916         trace_amdvi_get_pte_hwerror(pte_addr);
917         amdvi_log_pagetab_error(s, devid, pte_addr, 0);
918         pte = 0;
919         return pte;
920     }
921 
922     pte = le64_to_cpu(pte);
923     return pte;
924 }
925 
amdvi_page_walk(AMDVIAddressSpace * as,uint64_t * dte,IOMMUTLBEntry * ret,unsigned perms,hwaddr addr)926 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
927                             IOMMUTLBEntry *ret, unsigned perms,
928                             hwaddr addr)
929 {
930     unsigned level, present, pte_perms, oldlevel;
931     uint64_t pte = dte[0], pte_addr, page_mask;
932 
933     /* make sure the DTE has TV = 1 */
934     if (pte & AMDVI_DEV_TRANSLATION_VALID) {
935         level = get_pte_translation_mode(pte);
936         if (level >= 7) {
937             trace_amdvi_mode_invalid(level, addr);
938             return;
939         }
940         if (level == 0) {
941             goto no_remap;
942         }
943 
944         /* we are at the leaf page table or page table encodes a huge page */
945         do {
946             pte_perms = amdvi_get_perms(pte);
947             present = pte & 1;
948             if (!present || perms != (perms & pte_perms)) {
949                 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
950                 trace_amdvi_page_fault(addr);
951                 return;
952             }
953 
954             /* go to the next lower level */
955             pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
956             /* add offset and load pte */
957             pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
958             pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
959             if (!pte) {
960                 return;
961             }
962             oldlevel = level;
963             level = get_pte_translation_mode(pte);
964         } while (level > 0 && level < 7);
965 
966         if (level == 0x7) {
967             page_mask = pte_override_page_mask(pte);
968         } else {
969             page_mask = pte_get_page_mask(oldlevel);
970         }
971 
972         /* get access permissions from pte */
973         ret->iova = addr & page_mask;
974         ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
975         ret->addr_mask = ~page_mask;
976         ret->perm = amdvi_get_perms(pte);
977         return;
978     }
979 no_remap:
980     ret->iova = addr & AMDVI_PAGE_MASK_4K;
981     ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
982     ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
983     ret->perm = amdvi_get_perms(pte);
984 }
985 
amdvi_do_translate(AMDVIAddressSpace * as,hwaddr addr,bool is_write,IOMMUTLBEntry * ret)986 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
987                                bool is_write, IOMMUTLBEntry *ret)
988 {
989     AMDVIState *s = as->iommu_state;
990     uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
991     AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
992     uint64_t entry[4];
993 
994     if (iotlb_entry) {
995         trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
996                 PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
997         ret->iova = addr & ~iotlb_entry->page_mask;
998         ret->translated_addr = iotlb_entry->translated_addr;
999         ret->addr_mask = iotlb_entry->page_mask;
1000         ret->perm = iotlb_entry->perms;
1001         return;
1002     }
1003 
1004     if (!amdvi_get_dte(s, devid, entry)) {
1005         return;
1006     }
1007 
1008     /* devices with V = 0 are not translated */
1009     if (!(entry[0] & AMDVI_DEV_VALID)) {
1010         goto out;
1011     }
1012 
1013     amdvi_page_walk(as, entry, ret,
1014                     is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
1015 
1016     amdvi_update_iotlb(s, devid, addr, *ret,
1017                        entry[1] & AMDVI_DEV_DOMID_ID_MASK);
1018     return;
1019 
1020 out:
1021     ret->iova = addr & AMDVI_PAGE_MASK_4K;
1022     ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1023     ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1024     ret->perm = IOMMU_RW;
1025 }
1026 
amdvi_is_interrupt_addr(hwaddr addr)1027 static inline bool amdvi_is_interrupt_addr(hwaddr addr)
1028 {
1029     return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1030 }
1031 
amdvi_translate(IOMMUMemoryRegion * iommu,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)1032 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1033                                      IOMMUAccessFlags flag, int iommu_idx)
1034 {
1035     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1036     AMDVIState *s = as->iommu_state;
1037     IOMMUTLBEntry ret = {
1038         .target_as = &address_space_memory,
1039         .iova = addr,
1040         .translated_addr = 0,
1041         .addr_mask = ~(hwaddr)0,
1042         .perm = IOMMU_NONE
1043     };
1044 
1045     if (!s->enabled) {
1046         /* AMDVI disabled - corresponds to iommu=off not
1047          * failure to provide any parameter
1048          */
1049         ret.iova = addr & AMDVI_PAGE_MASK_4K;
1050         ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1051         ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1052         ret.perm = IOMMU_RW;
1053         return ret;
1054     } else if (amdvi_is_interrupt_addr(addr)) {
1055         ret.iova = addr & AMDVI_PAGE_MASK_4K;
1056         ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1057         ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1058         ret.perm = IOMMU_WO;
1059         return ret;
1060     }
1061 
1062     amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1063     trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1064             PCI_FUNC(as->devfn), addr, ret.translated_addr);
1065     return ret;
1066 }
1067 
amdvi_get_irte(AMDVIState * s,MSIMessage * origin,uint64_t * dte,union irte * irte,uint16_t devid)1068 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1069                           union irte *irte, uint16_t devid)
1070 {
1071     uint64_t irte_root, offset;
1072 
1073     irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1074     offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1075 
1076     trace_amdvi_ir_irte(irte_root, offset);
1077 
1078     if (dma_memory_read(&address_space_memory, irte_root + offset,
1079                         irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1080         trace_amdvi_ir_err("failed to get irte");
1081         return -AMDVI_IR_GET_IRTE;
1082     }
1083 
1084     trace_amdvi_ir_irte_val(irte->val);
1085 
1086     return 0;
1087 }
1088 
amdvi_int_remap_legacy(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1089 static int amdvi_int_remap_legacy(AMDVIState *iommu,
1090                                   MSIMessage *origin,
1091                                   MSIMessage *translated,
1092                                   uint64_t *dte,
1093                                   X86IOMMUIrq *irq,
1094                                   uint16_t sid)
1095 {
1096     int ret;
1097     union irte irte;
1098 
1099     /* get interrupt remapping table */
1100     ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1101     if (ret < 0) {
1102         return ret;
1103     }
1104 
1105     if (!irte.fields.valid) {
1106         trace_amdvi_ir_target_abort("RemapEn is disabled");
1107         return -AMDVI_IR_TARGET_ABORT;
1108     }
1109 
1110     if (irte.fields.guest_mode) {
1111         error_report_once("guest mode is not zero");
1112         return -AMDVI_IR_ERR;
1113     }
1114 
1115     if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1116         error_report_once("reserved int_type");
1117         return -AMDVI_IR_ERR;
1118     }
1119 
1120     irq->delivery_mode = irte.fields.int_type;
1121     irq->vector = irte.fields.vector;
1122     irq->dest_mode = irte.fields.dm;
1123     irq->redir_hint = irte.fields.rq_eoi;
1124     irq->dest = irte.fields.destination;
1125 
1126     return 0;
1127 }
1128 
amdvi_get_irte_ga(AMDVIState * s,MSIMessage * origin,uint64_t * dte,struct irte_ga * irte,uint16_t devid)1129 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1130                              struct irte_ga *irte, uint16_t devid)
1131 {
1132     uint64_t irte_root, offset;
1133 
1134     irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1135     offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1136     trace_amdvi_ir_irte(irte_root, offset);
1137 
1138     if (dma_memory_read(&address_space_memory, irte_root + offset,
1139                         irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1140         trace_amdvi_ir_err("failed to get irte_ga");
1141         return -AMDVI_IR_GET_IRTE;
1142     }
1143 
1144     trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1145     return 0;
1146 }
1147 
amdvi_int_remap_ga(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1148 static int amdvi_int_remap_ga(AMDVIState *iommu,
1149                               MSIMessage *origin,
1150                               MSIMessage *translated,
1151                               uint64_t *dte,
1152                               X86IOMMUIrq *irq,
1153                               uint16_t sid)
1154 {
1155     int ret;
1156     struct irte_ga irte;
1157 
1158     /* get interrupt remapping table */
1159     ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1160     if (ret < 0) {
1161         return ret;
1162     }
1163 
1164     if (!irte.lo.fields_remap.valid) {
1165         trace_amdvi_ir_target_abort("RemapEn is disabled");
1166         return -AMDVI_IR_TARGET_ABORT;
1167     }
1168 
1169     if (irte.lo.fields_remap.guest_mode) {
1170         error_report_once("guest mode is not zero");
1171         return -AMDVI_IR_ERR;
1172     }
1173 
1174     if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1175         error_report_once("reserved int_type is set");
1176         return -AMDVI_IR_ERR;
1177     }
1178 
1179     irq->delivery_mode = irte.lo.fields_remap.int_type;
1180     irq->vector = irte.hi.fields.vector;
1181     irq->dest_mode = irte.lo.fields_remap.dm;
1182     irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1183     if (iommu->xtsup) {
1184         irq->dest = irte.lo.fields_remap.destination |
1185                     (irte.hi.fields.destination_hi << 24);
1186     } else {
1187         irq->dest = irte.lo.fields_remap.destination & 0xff;
1188     }
1189 
1190     return 0;
1191 }
1192 
__amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1193 static int __amdvi_int_remap_msi(AMDVIState *iommu,
1194                                  MSIMessage *origin,
1195                                  MSIMessage *translated,
1196                                  uint64_t *dte,
1197                                  X86IOMMUIrq *irq,
1198                                  uint16_t sid)
1199 {
1200     int ret;
1201     uint8_t int_ctl;
1202 
1203     int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1204     trace_amdvi_ir_intctl(int_ctl);
1205 
1206     switch (int_ctl) {
1207     case AMDVI_IR_INTCTL_PASS:
1208         memcpy(translated, origin, sizeof(*origin));
1209         return 0;
1210     case AMDVI_IR_INTCTL_REMAP:
1211         break;
1212     case AMDVI_IR_INTCTL_ABORT:
1213         trace_amdvi_ir_target_abort("int_ctl abort");
1214         return -AMDVI_IR_TARGET_ABORT;
1215     default:
1216         trace_amdvi_ir_err("int_ctl reserved");
1217         return -AMDVI_IR_ERR;
1218     }
1219 
1220     if (iommu->ga_enabled) {
1221         ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1222     } else {
1223         ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1224     }
1225 
1226     return ret;
1227 }
1228 
1229 /* Interrupt remapping for MSI/MSI-X entry */
amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1230 static int amdvi_int_remap_msi(AMDVIState *iommu,
1231                                MSIMessage *origin,
1232                                MSIMessage *translated,
1233                                uint16_t sid)
1234 {
1235     int ret = 0;
1236     uint64_t pass = 0;
1237     uint64_t dte[4] = { 0 };
1238     X86IOMMUIrq irq = { 0 };
1239     uint8_t dest_mode, delivery_mode;
1240 
1241     assert(origin && translated);
1242 
1243     /*
1244      * When IOMMU is enabled, interrupt remap request will come either from
1245      * IO-APIC or PCI device. If interrupt is from PCI device then it will
1246      * have a valid requester id but if the interrupt is from IO-APIC
1247      * then requester id will be invalid.
1248      */
1249     if (sid == X86_IOMMU_SID_INVALID) {
1250         sid = AMDVI_IOAPIC_SB_DEVID;
1251     }
1252 
1253     trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1254 
1255     /* check if device table entry is set before we go further. */
1256     if (!iommu || !iommu->devtab_len) {
1257         memcpy(translated, origin, sizeof(*origin));
1258         goto out;
1259     }
1260 
1261     if (!amdvi_get_dte(iommu, sid, dte)) {
1262         return -AMDVI_IR_ERR;
1263     }
1264 
1265     /* Check if IR is enabled in DTE */
1266     if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1267         memcpy(translated, origin, sizeof(*origin));
1268         goto out;
1269     }
1270 
1271     /* validate that we are configure with intremap=on */
1272     if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1273         trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1274                         "not in the host. Use intremap=on to enable interrupt "
1275                         "remapping in amd-iommu.");
1276         return -AMDVI_IR_ERR;
1277     }
1278 
1279     if (origin->address < AMDVI_INT_ADDR_FIRST ||
1280         origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
1281         trace_amdvi_err("MSI is not from IOAPIC.");
1282         return -AMDVI_IR_ERR;
1283     }
1284 
1285     /*
1286      * The MSI data register [10:8] are used to get the upstream interrupt type.
1287      *
1288      * See MSI/MSI-X format:
1289      * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1290      * (page 5)
1291      */
1292     delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1293 
1294     switch (delivery_mode) {
1295     case AMDVI_IOAPIC_INT_TYPE_FIXED:
1296     case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1297         trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1298         ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1299         if (ret < 0) {
1300             goto remap_fail;
1301         } else {
1302             /* Translate IRQ to MSI messages */
1303             x86_iommu_irq_to_msi_message(&irq, translated);
1304             goto out;
1305         }
1306         break;
1307     case AMDVI_IOAPIC_INT_TYPE_SMI:
1308         error_report("SMI is not supported!");
1309         ret = -AMDVI_IR_ERR;
1310         break;
1311     case AMDVI_IOAPIC_INT_TYPE_NMI:
1312         pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
1313         trace_amdvi_ir_delivery_mode("nmi");
1314         break;
1315     case AMDVI_IOAPIC_INT_TYPE_INIT:
1316         pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
1317         trace_amdvi_ir_delivery_mode("init");
1318         break;
1319     case AMDVI_IOAPIC_INT_TYPE_EINT:
1320         pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
1321         trace_amdvi_ir_delivery_mode("eint");
1322         break;
1323     default:
1324         trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1325         ret = -AMDVI_IR_ERR;
1326         break;
1327     }
1328 
1329     if (ret < 0) {
1330         goto remap_fail;
1331     }
1332 
1333     /*
1334      * The MSI address register bit[2] is used to get the destination
1335      * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1336      * only.
1337      */
1338     dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1339     if (dest_mode) {
1340         trace_amdvi_ir_err("invalid dest_mode");
1341         ret = -AMDVI_IR_ERR;
1342         goto remap_fail;
1343     }
1344 
1345     if (pass) {
1346         memcpy(translated, origin, sizeof(*origin));
1347     } else {
1348         trace_amdvi_ir_err("passthrough is not enabled");
1349         ret = -AMDVI_IR_ERR;
1350         goto remap_fail;
1351     }
1352 
1353 out:
1354     trace_amdvi_ir_remap_msi(origin->address, origin->data,
1355                              translated->address, translated->data);
1356     return 0;
1357 
1358 remap_fail:
1359     return ret;
1360 }
1361 
amdvi_int_remap(X86IOMMUState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1362 static int amdvi_int_remap(X86IOMMUState *iommu,
1363                            MSIMessage *origin,
1364                            MSIMessage *translated,
1365                            uint16_t sid)
1366 {
1367     return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1368                                translated, sid);
1369 }
1370 
amdvi_mem_ir_write(void * opaque,hwaddr addr,uint64_t value,unsigned size,MemTxAttrs attrs)1371 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1372                                       uint64_t value, unsigned size,
1373                                       MemTxAttrs attrs)
1374 {
1375     int ret;
1376     MSIMessage from = { 0, 0 }, to = { 0, 0 };
1377     uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1378 
1379     from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1380     from.data = (uint32_t) value;
1381 
1382     trace_amdvi_mem_ir_write_req(addr, value, size);
1383 
1384     if (!attrs.unspecified) {
1385         /* We have explicit Source ID */
1386         sid = attrs.requester_id;
1387     }
1388 
1389     ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1390     if (ret < 0) {
1391         /* TODO: log the event using IOMMU log event interface */
1392         error_report_once("failed to remap interrupt from devid 0x%x", sid);
1393         return MEMTX_ERROR;
1394     }
1395 
1396     apic_get_class(NULL)->send_msi(&to);
1397 
1398     trace_amdvi_mem_ir_write(to.address, to.data);
1399     return MEMTX_OK;
1400 }
1401 
amdvi_mem_ir_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1402 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1403                                      uint64_t *data, unsigned size,
1404                                      MemTxAttrs attrs)
1405 {
1406     return MEMTX_OK;
1407 }
1408 
1409 static const MemoryRegionOps amdvi_ir_ops = {
1410     .read_with_attrs = amdvi_mem_ir_read,
1411     .write_with_attrs = amdvi_mem_ir_write,
1412     .endianness = DEVICE_LITTLE_ENDIAN,
1413     .impl = {
1414         .min_access_size = 4,
1415         .max_access_size = 4,
1416     },
1417     .valid = {
1418         .min_access_size = 4,
1419         .max_access_size = 4,
1420     }
1421 };
1422 
amdvi_host_dma_iommu(PCIBus * bus,void * opaque,int devfn)1423 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1424 {
1425     char name[128];
1426     AMDVIState *s = opaque;
1427     AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1428     int bus_num = pci_bus_num(bus);
1429     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
1430 
1431     iommu_as = s->address_spaces[bus_num];
1432 
1433     /* allocate memory during the first run */
1434     if (!iommu_as) {
1435         iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1436         s->address_spaces[bus_num] = iommu_as;
1437     }
1438 
1439     /* set up AMD-Vi region */
1440     if (!iommu_as[devfn]) {
1441         snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1442 
1443         iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1444         iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1445         iommu_as[devfn]->devfn = (uint8_t)devfn;
1446         iommu_as[devfn]->iommu_state = s;
1447 
1448         amdvi_dev_as = iommu_as[devfn];
1449 
1450         /*
1451          * Memory region relationships looks like (Address range shows
1452          * only lower 32 bits to make it short in length...):
1453          *
1454          * |--------------------+-------------------+----------|
1455          * | Name               | Address range     | Priority |
1456          * |--------------------+-------------------+----------+
1457          * | amdvi-root         | 00000000-ffffffff |        0 |
1458          * |  amdvi-iommu_nodma  | 00000000-ffffffff |       0 |
1459          * |  amdvi-iommu_ir     | fee00000-feefffff |       1 |
1460          * |--------------------+-------------------+----------|
1461          */
1462         memory_region_init_iommu(&amdvi_dev_as->iommu,
1463                                  sizeof(amdvi_dev_as->iommu),
1464                                  TYPE_AMD_IOMMU_MEMORY_REGION,
1465                                  OBJECT(s),
1466                                  "amd_iommu", UINT64_MAX);
1467         memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1468                            "amdvi_root", UINT64_MAX);
1469         address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1470         memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1471                                             MEMORY_REGION(&amdvi_dev_as->iommu),
1472                                             0);
1473 
1474         /* Build the DMA Disabled alias to shared memory */
1475         memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s),
1476                                  "amdvi-sys", &s->mr_sys, 0,
1477                                  memory_region_size(&s->mr_sys));
1478         memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1479                                             &amdvi_dev_as->iommu_nodma,
1480                                             0);
1481         /* Build the Interrupt Remapping alias to shared memory */
1482         memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s),
1483                                  "amdvi-ir", &s->mr_ir, 0,
1484                                  memory_region_size(&s->mr_ir));
1485         memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu),
1486                                             AMDVI_INT_ADDR_FIRST,
1487                                             &amdvi_dev_as->iommu_ir, 1);
1488 
1489         if (!x86_iommu->pt_supported) {
1490             memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
1491             memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
1492                                       true);
1493         } else {
1494             memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
1495                                       false);
1496             memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true);
1497         }
1498     }
1499     return &iommu_as[devfn]->as;
1500 }
1501 
1502 static const PCIIOMMUOps amdvi_iommu_ops = {
1503     .get_address_space = amdvi_host_dma_iommu,
1504 };
1505 
1506 static const MemoryRegionOps mmio_mem_ops = {
1507     .read = amdvi_mmio_read,
1508     .write = amdvi_mmio_write,
1509     .endianness = DEVICE_LITTLE_ENDIAN,
1510     .impl = {
1511         .min_access_size = 1,
1512         .max_access_size = 8,
1513         .unaligned = false,
1514     },
1515     .valid = {
1516         .min_access_size = 1,
1517         .max_access_size = 8,
1518     }
1519 };
1520 
amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion * iommu,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)1521 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1522                                            IOMMUNotifierFlag old,
1523                                            IOMMUNotifierFlag new,
1524                                            Error **errp)
1525 {
1526     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1527 
1528     if (new & IOMMU_NOTIFIER_MAP) {
1529         error_setg(errp,
1530                    "device %02x.%02x.%x requires iommu notifier which is not "
1531                    "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1532                    PCI_FUNC(as->devfn));
1533         return -EINVAL;
1534     }
1535     return 0;
1536 }
1537 
amdvi_init(AMDVIState * s)1538 static void amdvi_init(AMDVIState *s)
1539 {
1540     amdvi_iotlb_reset(s);
1541 
1542     s->devtab_len = 0;
1543     s->cmdbuf_len = 0;
1544     s->cmdbuf_head = 0;
1545     s->cmdbuf_tail = 0;
1546     s->evtlog_head = 0;
1547     s->evtlog_tail = 0;
1548     s->excl_enabled = false;
1549     s->excl_allow = false;
1550     s->mmio_enabled = false;
1551     s->enabled = false;
1552     s->ats_enabled = false;
1553     s->cmdbuf_enabled = false;
1554 
1555     /* reset MMIO */
1556     memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1557     amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
1558                    amdvi_extended_feature_register(s),
1559                    0xffffffffffffffef, 0);
1560     amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1561 }
1562 
amdvi_pci_realize(PCIDevice * pdev,Error ** errp)1563 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
1564 {
1565     AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
1566     int ret;
1567 
1568     ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
1569                              AMDVI_CAPAB_SIZE, errp);
1570     if (ret < 0) {
1571         return;
1572     }
1573     s->capab_offset = ret;
1574 
1575     ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
1576                              AMDVI_CAPAB_REG_SIZE, errp);
1577     if (ret < 0) {
1578         return;
1579     }
1580     ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
1581                              AMDVI_CAPAB_REG_SIZE, errp);
1582     if (ret < 0) {
1583         return;
1584     }
1585 
1586     if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
1587         return;
1588     }
1589 
1590     /* reset device ident */
1591     pci_config_set_prog_interface(pdev->config, 0);
1592 
1593     /* reset AMDVI specific capabilities, all r/o */
1594     pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1595     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1596                  AMDVI_BASE_ADDR & ~(0xffff0000));
1597     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1598                 (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
1599     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
1600                  0xff000000);
1601     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1602     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
1603             AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1604 }
1605 
amdvi_sysbus_reset(DeviceState * dev)1606 static void amdvi_sysbus_reset(DeviceState *dev)
1607 {
1608     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1609 
1610     msi_reset(&s->pci.dev);
1611     amdvi_init(s);
1612 }
1613 
amdvi_sysbus_realize(DeviceState * dev,Error ** errp)1614 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1615 {
1616     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1617     MachineState *ms = MACHINE(qdev_get_machine());
1618     PCMachineState *pcms = PC_MACHINE(ms);
1619     X86MachineState *x86ms = X86_MACHINE(ms);
1620     PCIBus *bus = pcms->pcibus;
1621 
1622     s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1623                                      amdvi_uint64_equal, g_free, g_free);
1624 
1625     /* This device should take care of IOMMU PCI properties */
1626     if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
1627         return;
1628     }
1629 
1630     /* Pseudo address space under root PCI bus. */
1631     x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1632 
1633     /* set up MMIO */
1634     memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
1635                           "amdvi-mmio", AMDVI_MMIO_SIZE);
1636     memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
1637                                 &s->mr_mmio);
1638 
1639     /* Create the share memory regions by all devices */
1640     memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX);
1641 
1642     /* set up the DMA disabled memory region */
1643     memory_region_init_alias(&s->mr_nodma, OBJECT(s),
1644                              "amdvi-nodma", get_system_memory(), 0,
1645                              memory_region_size(get_system_memory()));
1646     memory_region_add_subregion_overlap(&s->mr_sys, 0,
1647                                         &s->mr_nodma, 0);
1648 
1649     /* set up the Interrupt Remapping memory region */
1650     memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops,
1651                           s, "amdvi-ir", AMDVI_INT_ADDR_SIZE);
1652     memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
1653                                         &s->mr_ir, 1);
1654 
1655     /* AMD IOMMU with x2APIC mode requires xtsup=on */
1656     if (x86ms->apic_id_limit > 255 && !s->xtsup) {
1657         error_report("AMD IOMMU with x2APIC confguration requires xtsup=on");
1658         exit(EXIT_FAILURE);
1659     }
1660     if (s->xtsup) {
1661         if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
1662             error_report("AMD IOMMU xtsup=on requires support on the KVM side");
1663             exit(EXIT_FAILURE);
1664         }
1665     }
1666 
1667     pci_setup_iommu(bus, &amdvi_iommu_ops, s);
1668     amdvi_init(s);
1669 }
1670 
1671 static Property amdvi_properties[] = {
1672     DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
1673     DEFINE_PROP_END_OF_LIST(),
1674 };
1675 
1676 static const VMStateDescription vmstate_amdvi_sysbus = {
1677     .name = "amd-iommu",
1678     .unmigratable = 1
1679 };
1680 
amdvi_sysbus_instance_init(Object * klass)1681 static void amdvi_sysbus_instance_init(Object *klass)
1682 {
1683     AMDVIState *s = AMD_IOMMU_DEVICE(klass);
1684 
1685     object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
1686 }
1687 
amdvi_sysbus_class_init(ObjectClass * klass,void * data)1688 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
1689 {
1690     DeviceClass *dc = DEVICE_CLASS(klass);
1691     X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1692 
1693     device_class_set_legacy_reset(dc, amdvi_sysbus_reset);
1694     dc->vmsd = &vmstate_amdvi_sysbus;
1695     dc->hotpluggable = false;
1696     dc_class->realize = amdvi_sysbus_realize;
1697     dc_class->int_remap = amdvi_int_remap;
1698     /* Supported by the pc-q35-* machine types */
1699     dc->user_creatable = true;
1700     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1701     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1702     device_class_set_props(dc, amdvi_properties);
1703 }
1704 
1705 static const TypeInfo amdvi_sysbus = {
1706     .name = TYPE_AMD_IOMMU_DEVICE,
1707     .parent = TYPE_X86_IOMMU_DEVICE,
1708     .instance_size = sizeof(AMDVIState),
1709     .instance_init = amdvi_sysbus_instance_init,
1710     .class_init = amdvi_sysbus_class_init
1711 };
1712 
amdvi_pci_class_init(ObjectClass * klass,void * data)1713 static void amdvi_pci_class_init(ObjectClass *klass, void *data)
1714 {
1715     DeviceClass *dc = DEVICE_CLASS(klass);
1716     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1717 
1718     k->vendor_id = PCI_VENDOR_ID_AMD;
1719     k->class_id = 0x0806;
1720     k->realize = amdvi_pci_realize;
1721 
1722     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1723     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1724 }
1725 
1726 static const TypeInfo amdvi_pci = {
1727     .name = TYPE_AMD_IOMMU_PCI,
1728     .parent = TYPE_PCI_DEVICE,
1729     .instance_size = sizeof(AMDVIPCIState),
1730     .class_init = amdvi_pci_class_init,
1731     .interfaces = (InterfaceInfo[]) {
1732         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1733         { },
1734     },
1735 };
1736 
amdvi_iommu_memory_region_class_init(ObjectClass * klass,void * data)1737 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
1738 {
1739     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1740 
1741     imrc->translate = amdvi_translate;
1742     imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1743 }
1744 
1745 static const TypeInfo amdvi_iommu_memory_region_info = {
1746     .parent = TYPE_IOMMU_MEMORY_REGION,
1747     .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1748     .class_init = amdvi_iommu_memory_region_class_init,
1749 };
1750 
amdvi_register_types(void)1751 static void amdvi_register_types(void)
1752 {
1753     type_register_static(&amdvi_pci);
1754     type_register_static(&amdvi_sysbus);
1755     type_register_static(&amdvi_iommu_memory_region_info);
1756 }
1757 
1758 type_init(amdvi_register_types);
1759