xref: /openbmc/qemu/hw/i386/amd_iommu.c (revision e5859141b9b6aec9e0a14dacedc9f02fe2f15844)
1 /*
2  * QEMU emulation of AMD IOMMU (AMD-Vi)
3  *
4  * Copyright (C) 2011 Eduard - Gabriel Munteanu
5  * Copyright (C) 2015, 2016 David Kiarie Kahurani
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16 
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  *
20  * Cache implementation inspired by hw/i386/intel_iommu.c
21  */
22 
23 #include "qemu/osdep.h"
24 #include "hw/i386/pc.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/pci_bus.h"
27 #include "migration/vmstate.h"
28 #include "amd_iommu.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "hw/i386/apic_internal.h"
32 #include "trace.h"
33 #include "hw/i386/apic-msidef.h"
34 #include "hw/qdev-properties.h"
35 #include "kvm/kvm_i386.h"
36 
37 /* used AMD-Vi MMIO registers */
38 const char *amdvi_mmio_low[] = {
39     "AMDVI_MMIO_DEVTAB_BASE",
40     "AMDVI_MMIO_CMDBUF_BASE",
41     "AMDVI_MMIO_EVTLOG_BASE",
42     "AMDVI_MMIO_CONTROL",
43     "AMDVI_MMIO_EXCL_BASE",
44     "AMDVI_MMIO_EXCL_LIMIT",
45     "AMDVI_MMIO_EXT_FEATURES",
46     "AMDVI_MMIO_PPR_BASE",
47     "UNHANDLED"
48 };
49 const char *amdvi_mmio_high[] = {
50     "AMDVI_MMIO_COMMAND_HEAD",
51     "AMDVI_MMIO_COMMAND_TAIL",
52     "AMDVI_MMIO_EVTLOG_HEAD",
53     "AMDVI_MMIO_EVTLOG_TAIL",
54     "AMDVI_MMIO_STATUS",
55     "AMDVI_MMIO_PPR_HEAD",
56     "AMDVI_MMIO_PPR_TAIL",
57     "UNHANDLED"
58 };
59 
60 struct AMDVIAddressSpace {
61     uint8_t bus_num;            /* bus number                           */
62     uint8_t devfn;              /* device function                      */
63     AMDVIState *iommu_state;    /* AMDVI - one per machine              */
64     MemoryRegion root;          /* AMDVI Root memory map region         */
65     IOMMUMemoryRegion iommu;    /* Device's address translation region  */
66     MemoryRegion iommu_nodma;   /* Alias of shared nodma memory region  */
67     MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
68     AddressSpace as;            /* device's corresponding address space */
69 };
70 
71 /* AMDVI cache entry */
72 typedef struct AMDVIIOTLBEntry {
73     uint16_t domid;             /* assigned domain id  */
74     uint16_t devid;             /* device owning entry */
75     uint64_t perms;             /* access permissions  */
76     uint64_t translated_addr;   /* translated address  */
77     uint64_t page_mask;         /* physical page size  */
78 } AMDVIIOTLBEntry;
79 
amdvi_extended_feature_register(AMDVIState * s)80 uint64_t amdvi_extended_feature_register(AMDVIState *s)
81 {
82     uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
83     if (s->xtsup) {
84         feature |= AMDVI_FEATURE_XT;
85     }
86 
87     return feature;
88 }
89 
90 /* configure MMIO registers at startup/reset */
amdvi_set_quad(AMDVIState * s,hwaddr addr,uint64_t val,uint64_t romask,uint64_t w1cmask)91 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
92                            uint64_t romask, uint64_t w1cmask)
93 {
94     stq_le_p(&s->mmior[addr], val);
95     stq_le_p(&s->romask[addr], romask);
96     stq_le_p(&s->w1cmask[addr], w1cmask);
97 }
98 
amdvi_readw(AMDVIState * s,hwaddr addr)99 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
100 {
101     return lduw_le_p(&s->mmior[addr]);
102 }
103 
amdvi_readl(AMDVIState * s,hwaddr addr)104 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
105 {
106     return ldl_le_p(&s->mmior[addr]);
107 }
108 
amdvi_readq(AMDVIState * s,hwaddr addr)109 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
110 {
111     return ldq_le_p(&s->mmior[addr]);
112 }
113 
114 /* internal write */
amdvi_writeq_raw(AMDVIState * s,hwaddr addr,uint64_t val)115 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
116 {
117     stq_le_p(&s->mmior[addr], val);
118 }
119 
120 /* external write */
amdvi_writew(AMDVIState * s,hwaddr addr,uint16_t val)121 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
122 {
123     uint16_t romask = lduw_le_p(&s->romask[addr]);
124     uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
125     uint16_t oldval = lduw_le_p(&s->mmior[addr]);
126 
127     uint16_t oldval_preserved = oldval & (romask | w1cmask);
128     uint16_t newval_write = val & ~romask;
129     uint16_t newval_w1c_set = val & w1cmask;
130 
131     stw_le_p(&s->mmior[addr],
132              (oldval_preserved | newval_write) & ~newval_w1c_set);
133 }
134 
amdvi_writel(AMDVIState * s,hwaddr addr,uint32_t val)135 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
136 {
137     uint32_t romask = ldl_le_p(&s->romask[addr]);
138     uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
139     uint32_t oldval = ldl_le_p(&s->mmior[addr]);
140 
141     uint32_t oldval_preserved = oldval & (romask | w1cmask);
142     uint32_t newval_write = val & ~romask;
143     uint32_t newval_w1c_set = val & w1cmask;
144 
145     stl_le_p(&s->mmior[addr],
146              (oldval_preserved | newval_write) & ~newval_w1c_set);
147 }
148 
amdvi_writeq(AMDVIState * s,hwaddr addr,uint64_t val)149 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
150 {
151     uint64_t romask = ldq_le_p(&s->romask[addr]);
152     uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
153     uint64_t oldval = ldq_le_p(&s->mmior[addr]);
154 
155     uint64_t oldval_preserved = oldval & (romask | w1cmask);
156     uint64_t newval_write = val & ~romask;
157     uint64_t newval_w1c_set = val & w1cmask;
158 
159     stq_le_p(&s->mmior[addr],
160              (oldval_preserved | newval_write) & ~newval_w1c_set);
161 }
162 
163 /* AND a 64-bit register with a 64-bit value */
amdvi_test_mask(AMDVIState * s,hwaddr addr,uint64_t val)164 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
165 {
166     return amdvi_readq(s, addr) & val;
167 }
168 
169 /* OR a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_orq(AMDVIState * s,hwaddr addr,uint64_t val)170 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
171 {
172     amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
173 }
174 
175 /* AND a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_andq(AMDVIState * s,hwaddr addr,uint64_t val)176 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
177 {
178    amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
179 }
180 
amdvi_generate_msi_interrupt(AMDVIState * s)181 static void amdvi_generate_msi_interrupt(AMDVIState *s)
182 {
183     MSIMessage msg = {};
184     MemTxAttrs attrs = {
185         .requester_id = pci_requester_id(&s->pci->dev)
186     };
187 
188     if (msi_enabled(&s->pci->dev)) {
189         msg = msi_get_message(&s->pci->dev, 0);
190         address_space_stl_le(&address_space_memory, msg.address, msg.data,
191                              attrs, NULL);
192     }
193 }
194 
get_next_eventlog_entry(AMDVIState * s)195 static uint32_t get_next_eventlog_entry(AMDVIState *s)
196 {
197     uint32_t evtlog_size = s->evtlog_len * AMDVI_EVENT_LEN;
198     return (s->evtlog_tail + AMDVI_EVENT_LEN) % evtlog_size;
199 }
200 
amdvi_log_event(AMDVIState * s,uint64_t * evt)201 static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
202 {
203     uint32_t evtlog_tail_next;
204 
205     /* event logging not enabled */
206     if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
207         AMDVI_MMIO_STATUS_EVT_OVF)) {
208         return;
209     }
210 
211     evtlog_tail_next = get_next_eventlog_entry(s);
212 
213     /* event log buffer full */
214     if (evtlog_tail_next == s->evtlog_head) {
215         /* generate overflow interrupt */
216         if (s->evtlog_intr) {
217             amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
218             amdvi_generate_msi_interrupt(s);
219         }
220         return;
221     }
222 
223     if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
224                          evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
225         trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
226     }
227 
228     s->evtlog_tail = evtlog_tail_next;
229     amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail);
230 
231     if (s->evtlog_intr) {
232         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVENT_INT);
233         amdvi_generate_msi_interrupt(s);
234     }
235 }
236 
amdvi_setevent_bits(uint64_t * buffer,uint64_t value,int start,int length)237 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
238                                 int length)
239 {
240     int index = start / 64, bitpos = start % 64;
241     uint64_t mask = MAKE_64BIT_MASK(start, length);
242     buffer[index] &= ~mask;
243     buffer[index] |= (value << bitpos) & mask;
244 }
245 /*
246  * AMDVi event structure
247  *    0:15   -> DeviceID
248  *    48:63  -> event type + miscellaneous info
249  *    64:127 -> related address
250  */
amdvi_encode_event(uint64_t * evt,uint16_t devid,uint64_t addr,uint16_t info)251 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
252                                uint16_t info)
253 {
254     evt[0] = 0;
255     evt[1] = 0;
256 
257     amdvi_setevent_bits(evt, devid, 0, 16);
258     amdvi_setevent_bits(evt, info, 48, 16);
259     amdvi_setevent_bits(evt, addr, 64, 64);
260 }
261 /* log an error encountered during a page walk
262  *
263  * @addr: virtual address in translation request
264  */
amdvi_page_fault(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)265 static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
266                              hwaddr addr, uint16_t info)
267 {
268     uint64_t evt[2];
269 
270     info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
271     amdvi_encode_event(evt, devid, addr, info);
272     amdvi_log_event(s, evt);
273     pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
274             PCI_STATUS_SIG_TARGET_ABORT);
275 }
276 /*
277  * log a master abort accessing device table
278  *  @devtab : address of device table entry
279  *  @info : error flags
280  */
amdvi_log_devtab_error(AMDVIState * s,uint16_t devid,hwaddr devtab,uint16_t info)281 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
282                                    hwaddr devtab, uint16_t info)
283 {
284     uint64_t evt[2];
285 
286     info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
287 
288     amdvi_encode_event(evt, devid, devtab, info);
289     amdvi_log_event(s, evt);
290     pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
291             PCI_STATUS_SIG_TARGET_ABORT);
292 }
293 /* log an event trying to access command buffer
294  *   @addr : address that couldn't be accessed
295  */
amdvi_log_command_error(AMDVIState * s,hwaddr addr)296 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
297 {
298     uint64_t evt[2];
299     uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
300 
301     amdvi_encode_event(evt, 0, addr, info);
302     amdvi_log_event(s, evt);
303     pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
304             PCI_STATUS_SIG_TARGET_ABORT);
305 }
306 /* log an illegal command event
307  *   @addr : address of illegal command
308  */
amdvi_log_illegalcom_error(AMDVIState * s,uint16_t info,hwaddr addr)309 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
310                                        hwaddr addr)
311 {
312     uint64_t evt[2];
313 
314     info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
315     amdvi_encode_event(evt, 0, addr, info);
316     amdvi_log_event(s, evt);
317 }
318 /* log an error accessing device table
319  *
320  *  @devid : device owning the table entry
321  *  @devtab : address of device table entry
322  *  @info : error flags
323  */
amdvi_log_illegaldevtab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)324 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
325                                           hwaddr addr, uint16_t info)
326 {
327     uint64_t evt[2];
328 
329     info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
330     amdvi_encode_event(evt, devid, addr, info);
331     amdvi_log_event(s, evt);
332 }
333 /* log an error accessing a PTE entry
334  * @addr : address that couldn't be accessed
335  */
amdvi_log_pagetab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)336 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
337                                     hwaddr addr, uint16_t info)
338 {
339     uint64_t evt[2];
340 
341     info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
342     amdvi_encode_event(evt, devid, addr, info);
343     amdvi_log_event(s, evt);
344     pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
345              PCI_STATUS_SIG_TARGET_ABORT);
346 }
347 
amdvi_uint64_equal(gconstpointer v1,gconstpointer v2)348 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
349 {
350     return *((const uint64_t *)v1) == *((const uint64_t *)v2);
351 }
352 
amdvi_uint64_hash(gconstpointer v)353 static guint amdvi_uint64_hash(gconstpointer v)
354 {
355     return (guint)*(const uint64_t *)v;
356 }
357 
amdvi_iotlb_lookup(AMDVIState * s,hwaddr addr,uint64_t devid)358 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
359                                            uint64_t devid)
360 {
361     uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
362                    ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
363     return g_hash_table_lookup(s->iotlb, &key);
364 }
365 
amdvi_iotlb_reset(AMDVIState * s)366 static void amdvi_iotlb_reset(AMDVIState *s)
367 {
368     assert(s->iotlb);
369     trace_amdvi_iotlb_reset();
370     g_hash_table_remove_all(s->iotlb);
371 }
372 
amdvi_iotlb_remove_by_devid(gpointer key,gpointer value,gpointer user_data)373 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
374                                             gpointer user_data)
375 {
376     AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
377     uint16_t devid = *(uint16_t *)user_data;
378     return entry->devid == devid;
379 }
380 
amdvi_iotlb_remove_page(AMDVIState * s,hwaddr addr,uint64_t devid)381 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
382                                     uint64_t devid)
383 {
384     uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
385                    ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
386     g_hash_table_remove(s->iotlb, &key);
387 }
388 
amdvi_update_iotlb(AMDVIState * s,uint16_t devid,uint64_t gpa,IOMMUTLBEntry to_cache,uint16_t domid)389 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
390                                uint64_t gpa, IOMMUTLBEntry to_cache,
391                                uint16_t domid)
392 {
393     /* don't cache erroneous translations */
394     if (to_cache.perm != IOMMU_NONE) {
395         AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
396         uint64_t *key = g_new(uint64_t, 1);
397         uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
398 
399         trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
400                 PCI_FUNC(devid), gpa, to_cache.translated_addr);
401 
402         if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
403             amdvi_iotlb_reset(s);
404         }
405 
406         entry->domid = domid;
407         entry->perms = to_cache.perm;
408         entry->translated_addr = to_cache.translated_addr;
409         entry->page_mask = to_cache.addr_mask;
410         *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
411         g_hash_table_replace(s->iotlb, key, entry);
412     }
413 }
414 
amdvi_completion_wait(AMDVIState * s,uint64_t * cmd)415 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
416 {
417     /* pad the last 3 bits */
418     hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
419     uint64_t data = cpu_to_le64(cmd[1]);
420 
421     if (extract64(cmd[0], 52, 8)) {
422         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
423                                    s->cmdbuf + s->cmdbuf_head);
424     }
425     if (extract64(cmd[0], 0, 1)) {
426         if (dma_memory_write(&address_space_memory, addr, &data,
427                              AMDVI_COMPLETION_DATA_SIZE,
428                              MEMTXATTRS_UNSPECIFIED)) {
429             trace_amdvi_completion_wait_fail(addr);
430         }
431     }
432     /* set completion interrupt */
433     if (extract64(cmd[0], 1, 1)) {
434         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
435         /* generate interrupt */
436         amdvi_generate_msi_interrupt(s);
437     }
438     trace_amdvi_completion_wait(addr, data);
439 }
440 
441 /* log error without aborting since linux seems to be using reserved bits */
amdvi_inval_devtab_entry(AMDVIState * s,uint64_t * cmd)442 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
443 {
444     uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
445 
446     /* This command should invalidate internal caches of which there isn't */
447     if (extract64(cmd[0], 16, 44) || cmd[1]) {
448         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
449                                    s->cmdbuf + s->cmdbuf_head);
450     }
451     trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
452                              PCI_FUNC(devid));
453 }
454 
amdvi_complete_ppr(AMDVIState * s,uint64_t * cmd)455 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
456 {
457     if (extract64(cmd[0], 16, 16) ||  extract64(cmd[0], 52, 8) ||
458         extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
459         || extract64(cmd[1], 48, 16)) {
460         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
461                                    s->cmdbuf + s->cmdbuf_head);
462     }
463     trace_amdvi_ppr_exec();
464 }
465 
amdvi_intremap_inval_notify_all(AMDVIState * s,bool global,uint32_t index,uint32_t mask)466 static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global,
467                                uint32_t index, uint32_t mask)
468 {
469     x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
470 }
471 
amdvi_inval_all(AMDVIState * s,uint64_t * cmd)472 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
473 {
474     if (extract64(cmd[0], 0, 60) || cmd[1]) {
475         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
476                                    s->cmdbuf + s->cmdbuf_head);
477     }
478 
479     /* Notify global invalidation */
480     amdvi_intremap_inval_notify_all(s, true, 0, 0);
481 
482     amdvi_iotlb_reset(s);
483     trace_amdvi_all_inval();
484 }
485 
amdvi_iotlb_remove_by_domid(gpointer key,gpointer value,gpointer user_data)486 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
487                                             gpointer user_data)
488 {
489     AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
490     uint16_t domid = *(uint16_t *)user_data;
491     return entry->domid == domid;
492 }
493 
494 /* we don't have devid - we can't remove pages by address */
amdvi_inval_pages(AMDVIState * s,uint64_t * cmd)495 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
496 {
497     uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
498 
499     if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
500         extract64(cmd[1], 3, 9)) {
501         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
502                                    s->cmdbuf + s->cmdbuf_head);
503     }
504 
505     g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
506                                 &domid);
507     trace_amdvi_pages_inval(domid);
508 }
509 
amdvi_prefetch_pages(AMDVIState * s,uint64_t * cmd)510 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
511 {
512     if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
513         extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
514         extract64(cmd[1], 5, 7)) {
515         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
516                                    s->cmdbuf + s->cmdbuf_head);
517     }
518 
519     trace_amdvi_prefetch_pages();
520 }
521 
amdvi_inval_inttable(AMDVIState * s,uint64_t * cmd)522 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
523 {
524     if (extract64(cmd[0], 16, 44) || cmd[1]) {
525         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
526                                    s->cmdbuf + s->cmdbuf_head);
527         return;
528     }
529 
530     /* Notify global invalidation */
531     amdvi_intremap_inval_notify_all(s, true, 0, 0);
532 
533     trace_amdvi_intr_inval();
534 }
535 
536 /* FIXME: Try to work with the specified size instead of all the pages
537  * when the S bit is on
538  */
iommu_inval_iotlb(AMDVIState * s,uint64_t * cmd)539 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
540 {
541 
542     uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16));
543     if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
544         extract64(cmd[1], 6, 6)) {
545         amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
546                                    s->cmdbuf + s->cmdbuf_head);
547         return;
548     }
549 
550     if (extract64(cmd[1], 0, 1)) {
551         g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
552                                     &devid);
553     } else {
554         amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
555                                 devid);
556     }
557     trace_amdvi_iotlb_inval();
558 }
559 
560 /* not honouring reserved bits is regarded as an illegal command */
amdvi_cmdbuf_exec(AMDVIState * s)561 static void amdvi_cmdbuf_exec(AMDVIState *s)
562 {
563     uint64_t cmd[2];
564 
565     if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
566                         cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
567         trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
568         amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
569         return;
570     }
571 
572     switch (extract64(cmd[0], 60, 4)) {
573     case AMDVI_CMD_COMPLETION_WAIT:
574         amdvi_completion_wait(s, cmd);
575         break;
576     case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
577         amdvi_inval_devtab_entry(s, cmd);
578         break;
579     case AMDVI_CMD_INVAL_AMDVI_PAGES:
580         amdvi_inval_pages(s, cmd);
581         break;
582     case AMDVI_CMD_INVAL_IOTLB_PAGES:
583         iommu_inval_iotlb(s, cmd);
584         break;
585     case AMDVI_CMD_INVAL_INTR_TABLE:
586         amdvi_inval_inttable(s, cmd);
587         break;
588     case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
589         amdvi_prefetch_pages(s, cmd);
590         break;
591     case AMDVI_CMD_COMPLETE_PPR_REQUEST:
592         amdvi_complete_ppr(s, cmd);
593         break;
594     case AMDVI_CMD_INVAL_AMDVI_ALL:
595         amdvi_inval_all(s, cmd);
596         break;
597     default:
598         trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
599         /* log illegal command */
600         amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
601                                    s->cmdbuf + s->cmdbuf_head);
602     }
603 }
604 
amdvi_cmdbuf_run(AMDVIState * s)605 static void amdvi_cmdbuf_run(AMDVIState *s)
606 {
607     if (!s->cmdbuf_enabled) {
608         trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
609         return;
610     }
611 
612     /* check if there is work to do. */
613     while (s->cmdbuf_head != s->cmdbuf_tail) {
614         trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
615         amdvi_cmdbuf_exec(s);
616         s->cmdbuf_head += AMDVI_COMMAND_SIZE;
617         amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
618 
619         /* wrap head pointer */
620         if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
621             s->cmdbuf_head = 0;
622         }
623     }
624 }
625 
amdvi_mmio_get_index(hwaddr addr)626 static inline uint8_t amdvi_mmio_get_index(hwaddr addr)
627 {
628     uint8_t index = (addr & ~0x2000) / 8;
629 
630     if ((addr & 0x2000)) {
631         /* high table */
632         index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
633     } else {
634         index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
635     }
636 
637     return index;
638 }
639 
amdvi_mmio_trace_read(hwaddr addr,unsigned size)640 static void amdvi_mmio_trace_read(hwaddr addr, unsigned size)
641 {
642     uint8_t index = amdvi_mmio_get_index(addr);
643     trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
644 }
645 
amdvi_mmio_trace_write(hwaddr addr,unsigned size,uint64_t val)646 static void amdvi_mmio_trace_write(hwaddr addr, unsigned size, uint64_t val)
647 {
648     uint8_t index = amdvi_mmio_get_index(addr);
649     trace_amdvi_mmio_write(amdvi_mmio_low[index], addr, size, val,
650                            addr & ~0x07);
651 }
652 
amdvi_mmio_read(void * opaque,hwaddr addr,unsigned size)653 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
654 {
655     AMDVIState *s = opaque;
656 
657     uint64_t val = -1;
658     if (addr + size > AMDVI_MMIO_SIZE) {
659         trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
660         return (uint64_t)-1;
661     }
662 
663     if (size == 2) {
664         val = amdvi_readw(s, addr);
665     } else if (size == 4) {
666         val = amdvi_readl(s, addr);
667     } else if (size == 8) {
668         val = amdvi_readq(s, addr);
669     }
670     amdvi_mmio_trace_read(addr, size);
671 
672     return val;
673 }
674 
amdvi_handle_control_write(AMDVIState * s)675 static void amdvi_handle_control_write(AMDVIState *s)
676 {
677     unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
678     s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
679 
680     s->evtlog_enabled = s->enabled && !!(control &
681                         AMDVI_MMIO_CONTROL_EVENTLOGEN);
682 
683     s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
684     s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
685     s->cmdbuf_enabled = s->enabled && !!(control &
686                         AMDVI_MMIO_CONTROL_CMDBUFLEN);
687     s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
688 
689     /* update the flags depending on the control register */
690     if (s->cmdbuf_enabled) {
691         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
692     } else {
693         amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
694     }
695     if (s->evtlog_enabled) {
696         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
697     } else {
698         amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
699     }
700 
701     trace_amdvi_control_status(control);
702     amdvi_cmdbuf_run(s);
703 }
704 
amdvi_handle_devtab_write(AMDVIState * s)705 static inline void amdvi_handle_devtab_write(AMDVIState *s)
706 
707 {
708     uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
709     s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
710 
711     /* set device table length (i.e. number of entries table can hold) */
712     s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) *
713                     (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
714                      AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
715 }
716 
amdvi_handle_cmdhead_write(AMDVIState * s)717 static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
718 {
719     s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
720                      & AMDVI_MMIO_CMDBUF_HEAD_MASK;
721     amdvi_cmdbuf_run(s);
722 }
723 
amdvi_handle_cmdbase_write(AMDVIState * s)724 static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
725 {
726     s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
727                 & AMDVI_MMIO_CMDBUF_BASE_MASK;
728     s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
729                     & AMDVI_MMIO_CMDBUF_SIZE_MASK);
730     s->cmdbuf_head = s->cmdbuf_tail = 0;
731 }
732 
amdvi_handle_cmdtail_write(AMDVIState * s)733 static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
734 {
735     s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
736                      & AMDVI_MMIO_CMDBUF_TAIL_MASK;
737     amdvi_cmdbuf_run(s);
738 }
739 
amdvi_handle_excllim_write(AMDVIState * s)740 static inline void amdvi_handle_excllim_write(AMDVIState *s)
741 {
742     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
743     s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
744                     AMDVI_MMIO_EXCL_LIMIT_LOW;
745 }
746 
amdvi_handle_evtbase_write(AMDVIState * s)747 static inline void amdvi_handle_evtbase_write(AMDVIState *s)
748 {
749     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
750 
751     if (amdvi_readq(s, AMDVI_MMIO_STATUS) & AMDVI_MMIO_STATUS_EVENT_INT)
752         /* Do not reset if eventlog interrupt bit is set*/
753         return;
754 
755     s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
756     s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
757                     & AMDVI_MMIO_EVTLOG_SIZE_MASK);
758 
759     /* clear tail and head pointer to 0 when event base is updated */
760     s->evtlog_tail = s->evtlog_head = 0;
761     amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_HEAD, s->evtlog_head);
762     amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail);
763 }
764 
amdvi_handle_evttail_write(AMDVIState * s)765 static inline void amdvi_handle_evttail_write(AMDVIState *s)
766 {
767     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
768     s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
769 }
770 
amdvi_handle_evthead_write(AMDVIState * s)771 static inline void amdvi_handle_evthead_write(AMDVIState *s)
772 {
773     uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
774     s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
775 }
776 
amdvi_handle_pprbase_write(AMDVIState * s)777 static inline void amdvi_handle_pprbase_write(AMDVIState *s)
778 {
779     uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
780     s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
781     s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
782                     & AMDVI_MMIO_PPRLOG_SIZE_MASK);
783 }
784 
amdvi_handle_pprhead_write(AMDVIState * s)785 static inline void amdvi_handle_pprhead_write(AMDVIState *s)
786 {
787     uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
788     s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
789 }
790 
amdvi_handle_pprtail_write(AMDVIState * s)791 static inline void amdvi_handle_pprtail_write(AMDVIState *s)
792 {
793     uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
794     s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
795 }
796 
797 /* FIXME: something might go wrong if System Software writes in chunks
798  * of one byte but linux writes in chunks of 4 bytes so currently it
799  * works correctly with linux but will definitely be busted if software
800  * reads/writes 8 bytes
801  */
amdvi_mmio_reg_write(AMDVIState * s,unsigned size,uint64_t val,hwaddr addr)802 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
803                                  hwaddr addr)
804 {
805     if (size == 2) {
806         amdvi_writew(s, addr, val);
807     } else if (size == 4) {
808         amdvi_writel(s, addr, val);
809     } else if (size == 8) {
810         amdvi_writeq(s, addr, val);
811     }
812 }
813 
amdvi_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)814 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
815                              unsigned size)
816 {
817     AMDVIState *s = opaque;
818     unsigned long offset = addr & 0x07;
819 
820     if (addr + size > AMDVI_MMIO_SIZE) {
821         trace_amdvi_mmio_write("error: addr outside region: max ",
822                 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
823         return;
824     }
825 
826     amdvi_mmio_trace_write(addr, size, val);
827     switch (addr & ~0x07) {
828     case AMDVI_MMIO_CONTROL:
829         amdvi_mmio_reg_write(s, size, val, addr);
830         amdvi_handle_control_write(s);
831         break;
832     case AMDVI_MMIO_DEVICE_TABLE:
833         amdvi_mmio_reg_write(s, size, val, addr);
834        /*  set device table address
835         *   This also suffers from inability to tell whether software
836         *   is done writing
837         */
838         if (offset || (size == 8)) {
839             amdvi_handle_devtab_write(s);
840         }
841         break;
842     case AMDVI_MMIO_COMMAND_HEAD:
843         amdvi_mmio_reg_write(s, size, val, addr);
844         amdvi_handle_cmdhead_write(s);
845         break;
846     case AMDVI_MMIO_COMMAND_BASE:
847         amdvi_mmio_reg_write(s, size, val, addr);
848         /* FIXME - make sure System Software has finished writing in case
849          * it writes in chucks less than 8 bytes in a robust way.As for
850          * now, this hacks works for the linux driver
851          */
852         if (offset || (size == 8)) {
853             amdvi_handle_cmdbase_write(s);
854         }
855         break;
856     case AMDVI_MMIO_COMMAND_TAIL:
857         amdvi_mmio_reg_write(s, size, val, addr);
858         amdvi_handle_cmdtail_write(s);
859         break;
860     case AMDVI_MMIO_EVENT_BASE:
861         amdvi_mmio_reg_write(s, size, val, addr);
862         amdvi_handle_evtbase_write(s);
863         break;
864     case AMDVI_MMIO_EVENT_HEAD:
865         amdvi_mmio_reg_write(s, size, val, addr);
866         amdvi_handle_evthead_write(s);
867         break;
868     case AMDVI_MMIO_EVENT_TAIL:
869         amdvi_mmio_reg_write(s, size, val, addr);
870         amdvi_handle_evttail_write(s);
871         break;
872     case AMDVI_MMIO_EXCL_LIMIT:
873         amdvi_mmio_reg_write(s, size, val, addr);
874         amdvi_handle_excllim_write(s);
875         break;
876         /* PPR log base - unused for now */
877     case AMDVI_MMIO_PPR_BASE:
878         amdvi_mmio_reg_write(s, size, val, addr);
879         amdvi_handle_pprbase_write(s);
880         break;
881         /* PPR log head - also unused for now */
882     case AMDVI_MMIO_PPR_HEAD:
883         amdvi_mmio_reg_write(s, size, val, addr);
884         amdvi_handle_pprhead_write(s);
885         break;
886         /* PPR log tail - unused for now */
887     case AMDVI_MMIO_PPR_TAIL:
888         amdvi_mmio_reg_write(s, size, val, addr);
889         amdvi_handle_pprtail_write(s);
890         break;
891     case AMDVI_MMIO_STATUS:
892         amdvi_mmio_reg_write(s, size, val, addr);
893         break;
894     }
895 }
896 
amdvi_get_perms(uint64_t entry)897 static inline uint64_t amdvi_get_perms(uint64_t entry)
898 {
899     return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
900            AMDVI_DEV_PERM_SHIFT;
901 }
902 
903 /* validate that reserved bits are honoured */
amdvi_validate_dte(AMDVIState * s,uint16_t devid,uint64_t * dte)904 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
905                                uint64_t *dte)
906 {
907     if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) ||
908         (dte[1] & AMDVI_DTE_QUAD1_RESERVED) ||
909         (dte[2] & AMDVI_DTE_QUAD2_RESERVED) ||
910         (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) {
911         amdvi_log_illegaldevtab_error(s, devid,
912                                       s->devtab +
913                                       devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
914         return false;
915     }
916 
917     return true;
918 }
919 
920 /* get a device table entry given the devid */
amdvi_get_dte(AMDVIState * s,int devid,uint64_t * entry)921 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
922 {
923     uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
924 
925     if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
926                         AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
927         trace_amdvi_dte_get_fail(s->devtab, offset);
928         /* log error accessing dte */
929         amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
930         return false;
931     }
932 
933     *entry = le64_to_cpu(*entry);
934     if (!amdvi_validate_dte(s, devid, entry)) {
935         trace_amdvi_invalid_dte(entry[0]);
936         return false;
937     }
938 
939     return true;
940 }
941 
942 /* get pte translation mode */
get_pte_translation_mode(uint64_t pte)943 static inline uint8_t get_pte_translation_mode(uint64_t pte)
944 {
945     return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
946 }
947 
pte_override_page_mask(uint64_t pte)948 static inline uint64_t pte_override_page_mask(uint64_t pte)
949 {
950     uint8_t page_mask = 13;
951     uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
952     /* find the first zero bit */
953     while (addr & 1) {
954         page_mask++;
955         addr = addr >> 1;
956     }
957 
958     return ~((1ULL << page_mask) - 1);
959 }
960 
pte_get_page_mask(uint64_t oldlevel)961 static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
962 {
963     return ~((1UL << ((oldlevel * 9) + 3)) - 1);
964 }
965 
amdvi_get_pte_entry(AMDVIState * s,uint64_t pte_addr,uint16_t devid)966 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
967                                           uint16_t devid)
968 {
969     uint64_t pte;
970 
971     if (dma_memory_read(&address_space_memory, pte_addr,
972                         &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
973         trace_amdvi_get_pte_hwerror(pte_addr);
974         amdvi_log_pagetab_error(s, devid, pte_addr, 0);
975         pte = 0;
976         return pte;
977     }
978 
979     pte = le64_to_cpu(pte);
980     return pte;
981 }
982 
amdvi_page_walk(AMDVIAddressSpace * as,uint64_t * dte,IOMMUTLBEntry * ret,unsigned perms,hwaddr addr)983 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
984                             IOMMUTLBEntry *ret, unsigned perms,
985                             hwaddr addr)
986 {
987     unsigned level, present, pte_perms, oldlevel;
988     uint64_t pte = dte[0], pte_addr, page_mask;
989 
990     /* make sure the DTE has TV = 1 */
991     if (pte & AMDVI_DEV_TRANSLATION_VALID) {
992         level = get_pte_translation_mode(pte);
993         if (level >= 7) {
994             trace_amdvi_mode_invalid(level, addr);
995             return;
996         }
997         if (level == 0) {
998             goto no_remap;
999         }
1000 
1001         /* we are at the leaf page table or page table encodes a huge page */
1002         do {
1003             pte_perms = amdvi_get_perms(pte);
1004             present = pte & 1;
1005             if (!present || perms != (perms & pte_perms)) {
1006                 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
1007                 trace_amdvi_page_fault(addr);
1008                 return;
1009             }
1010 
1011             /* go to the next lower level */
1012             pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
1013             /* add offset and load pte */
1014             pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
1015             pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
1016             if (!pte) {
1017                 return;
1018             }
1019             oldlevel = level;
1020             level = get_pte_translation_mode(pte);
1021         } while (level > 0 && level < 7);
1022 
1023         if (level == 0x7) {
1024             page_mask = pte_override_page_mask(pte);
1025         } else {
1026             page_mask = pte_get_page_mask(oldlevel);
1027         }
1028 
1029         /* get access permissions from pte */
1030         ret->iova = addr & page_mask;
1031         ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
1032         ret->addr_mask = ~page_mask;
1033         ret->perm = amdvi_get_perms(pte);
1034         return;
1035     }
1036 no_remap:
1037     ret->iova = addr & AMDVI_PAGE_MASK_4K;
1038     ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1039     ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1040     ret->perm = amdvi_get_perms(pte);
1041 }
1042 
amdvi_do_translate(AMDVIAddressSpace * as,hwaddr addr,bool is_write,IOMMUTLBEntry * ret)1043 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
1044                                bool is_write, IOMMUTLBEntry *ret)
1045 {
1046     AMDVIState *s = as->iommu_state;
1047     uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
1048     AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
1049     uint64_t entry[4];
1050 
1051     if (iotlb_entry) {
1052         trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
1053                 PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
1054         ret->iova = addr & ~iotlb_entry->page_mask;
1055         ret->translated_addr = iotlb_entry->translated_addr;
1056         ret->addr_mask = iotlb_entry->page_mask;
1057         ret->perm = iotlb_entry->perms;
1058         return;
1059     }
1060 
1061     if (!amdvi_get_dte(s, devid, entry)) {
1062         return;
1063     }
1064 
1065     /* devices with V = 0 are not translated */
1066     if (!(entry[0] & AMDVI_DEV_VALID)) {
1067         goto out;
1068     }
1069 
1070     amdvi_page_walk(as, entry, ret,
1071                     is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
1072 
1073     amdvi_update_iotlb(s, devid, addr, *ret,
1074                        entry[1] & AMDVI_DEV_DOMID_ID_MASK);
1075     return;
1076 
1077 out:
1078     ret->iova = addr & AMDVI_PAGE_MASK_4K;
1079     ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1080     ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1081     ret->perm = IOMMU_RW;
1082 }
1083 
amdvi_is_interrupt_addr(hwaddr addr)1084 static inline bool amdvi_is_interrupt_addr(hwaddr addr)
1085 {
1086     return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1087 }
1088 
amdvi_translate(IOMMUMemoryRegion * iommu,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)1089 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1090                                      IOMMUAccessFlags flag, int iommu_idx)
1091 {
1092     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1093     AMDVIState *s = as->iommu_state;
1094     IOMMUTLBEntry ret = {
1095         .target_as = &address_space_memory,
1096         .iova = addr,
1097         .translated_addr = 0,
1098         .addr_mask = ~(hwaddr)0,
1099         .perm = IOMMU_NONE
1100     };
1101 
1102     if (!s->enabled) {
1103         /* AMDVI disabled - corresponds to iommu=off not
1104          * failure to provide any parameter
1105          */
1106         ret.iova = addr & AMDVI_PAGE_MASK_4K;
1107         ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1108         ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1109         ret.perm = IOMMU_RW;
1110         return ret;
1111     } else if (amdvi_is_interrupt_addr(addr)) {
1112         ret.iova = addr & AMDVI_PAGE_MASK_4K;
1113         ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1114         ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1115         ret.perm = IOMMU_WO;
1116         return ret;
1117     }
1118 
1119     amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1120     trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1121             PCI_FUNC(as->devfn), addr, ret.translated_addr);
1122     return ret;
1123 }
1124 
amdvi_get_irte(AMDVIState * s,MSIMessage * origin,uint64_t * dte,union irte * irte,uint16_t devid)1125 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1126                           union irte *irte, uint16_t devid)
1127 {
1128     uint64_t irte_root, offset;
1129 
1130     irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1131     offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1132 
1133     trace_amdvi_ir_irte(irte_root, offset);
1134 
1135     if (dma_memory_read(&address_space_memory, irte_root + offset,
1136                         irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1137         trace_amdvi_ir_err("failed to get irte");
1138         return -AMDVI_IR_GET_IRTE;
1139     }
1140 
1141     trace_amdvi_ir_irte_val(irte->val);
1142 
1143     return 0;
1144 }
1145 
amdvi_int_remap_legacy(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1146 static int amdvi_int_remap_legacy(AMDVIState *iommu,
1147                                   MSIMessage *origin,
1148                                   MSIMessage *translated,
1149                                   uint64_t *dte,
1150                                   X86IOMMUIrq *irq,
1151                                   uint16_t sid)
1152 {
1153     int ret;
1154     union irte irte;
1155 
1156     /* get interrupt remapping table */
1157     ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1158     if (ret < 0) {
1159         return ret;
1160     }
1161 
1162     if (!irte.fields.valid) {
1163         trace_amdvi_ir_target_abort("RemapEn is disabled");
1164         return -AMDVI_IR_TARGET_ABORT;
1165     }
1166 
1167     if (irte.fields.guest_mode) {
1168         error_report_once("guest mode is not zero");
1169         return -AMDVI_IR_ERR;
1170     }
1171 
1172     if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1173         error_report_once("reserved int_type");
1174         return -AMDVI_IR_ERR;
1175     }
1176 
1177     irq->delivery_mode = irte.fields.int_type;
1178     irq->vector = irte.fields.vector;
1179     irq->dest_mode = irte.fields.dm;
1180     irq->redir_hint = irte.fields.rq_eoi;
1181     irq->dest = irte.fields.destination;
1182 
1183     return 0;
1184 }
1185 
amdvi_get_irte_ga(AMDVIState * s,MSIMessage * origin,uint64_t * dte,struct irte_ga * irte,uint16_t devid)1186 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1187                              struct irte_ga *irte, uint16_t devid)
1188 {
1189     uint64_t irte_root, offset;
1190 
1191     irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1192     offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1193     trace_amdvi_ir_irte(irte_root, offset);
1194 
1195     if (dma_memory_read(&address_space_memory, irte_root + offset,
1196                         irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1197         trace_amdvi_ir_err("failed to get irte_ga");
1198         return -AMDVI_IR_GET_IRTE;
1199     }
1200 
1201     trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1202     return 0;
1203 }
1204 
amdvi_int_remap_ga(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1205 static int amdvi_int_remap_ga(AMDVIState *iommu,
1206                               MSIMessage *origin,
1207                               MSIMessage *translated,
1208                               uint64_t *dte,
1209                               X86IOMMUIrq *irq,
1210                               uint16_t sid)
1211 {
1212     int ret;
1213     struct irte_ga irte;
1214 
1215     /* get interrupt remapping table */
1216     ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1217     if (ret < 0) {
1218         return ret;
1219     }
1220 
1221     if (!irte.lo.fields_remap.valid) {
1222         trace_amdvi_ir_target_abort("RemapEn is disabled");
1223         return -AMDVI_IR_TARGET_ABORT;
1224     }
1225 
1226     if (irte.lo.fields_remap.guest_mode) {
1227         error_report_once("guest mode is not zero");
1228         return -AMDVI_IR_ERR;
1229     }
1230 
1231     if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1232         error_report_once("reserved int_type is set");
1233         return -AMDVI_IR_ERR;
1234     }
1235 
1236     irq->delivery_mode = irte.lo.fields_remap.int_type;
1237     irq->vector = irte.hi.fields.vector;
1238     irq->dest_mode = irte.lo.fields_remap.dm;
1239     irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1240     if (iommu->xtsup) {
1241         irq->dest = irte.lo.fields_remap.destination |
1242                     (irte.hi.fields.destination_hi << 24);
1243     } else {
1244         irq->dest = irte.lo.fields_remap.destination & 0xff;
1245     }
1246 
1247     return 0;
1248 }
1249 
__amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1250 static int __amdvi_int_remap_msi(AMDVIState *iommu,
1251                                  MSIMessage *origin,
1252                                  MSIMessage *translated,
1253                                  uint64_t *dte,
1254                                  X86IOMMUIrq *irq,
1255                                  uint16_t sid)
1256 {
1257     int ret;
1258     uint8_t int_ctl;
1259 
1260     int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1261     trace_amdvi_ir_intctl(int_ctl);
1262 
1263     switch (int_ctl) {
1264     case AMDVI_IR_INTCTL_PASS:
1265         memcpy(translated, origin, sizeof(*origin));
1266         return 0;
1267     case AMDVI_IR_INTCTL_REMAP:
1268         break;
1269     case AMDVI_IR_INTCTL_ABORT:
1270         trace_amdvi_ir_target_abort("int_ctl abort");
1271         return -AMDVI_IR_TARGET_ABORT;
1272     default:
1273         trace_amdvi_ir_err("int_ctl reserved");
1274         return -AMDVI_IR_ERR;
1275     }
1276 
1277     if (iommu->ga_enabled) {
1278         ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1279     } else {
1280         ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1281     }
1282 
1283     return ret;
1284 }
1285 
1286 /* Interrupt remapping for MSI/MSI-X entry */
amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1287 static int amdvi_int_remap_msi(AMDVIState *iommu,
1288                                MSIMessage *origin,
1289                                MSIMessage *translated,
1290                                uint16_t sid)
1291 {
1292     int ret = 0;
1293     uint64_t pass = 0;
1294     uint64_t dte[4] = { 0 };
1295     X86IOMMUIrq irq = { 0 };
1296     uint8_t dest_mode, delivery_mode;
1297 
1298     assert(origin && translated);
1299 
1300     /*
1301      * When IOMMU is enabled, interrupt remap request will come either from
1302      * IO-APIC or PCI device. If interrupt is from PCI device then it will
1303      * have a valid requester id but if the interrupt is from IO-APIC
1304      * then requester id will be invalid.
1305      */
1306     if (sid == X86_IOMMU_SID_INVALID) {
1307         sid = AMDVI_IOAPIC_SB_DEVID;
1308     }
1309 
1310     trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1311 
1312     /* check if device table entry is set before we go further. */
1313     if (!iommu || !iommu->devtab_len) {
1314         memcpy(translated, origin, sizeof(*origin));
1315         goto out;
1316     }
1317 
1318     if (!amdvi_get_dte(iommu, sid, dte)) {
1319         return -AMDVI_IR_ERR;
1320     }
1321 
1322     /* Check if IR is enabled in DTE */
1323     if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1324         memcpy(translated, origin, sizeof(*origin));
1325         goto out;
1326     }
1327 
1328     /* validate that we are configure with intremap=on */
1329     if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1330         trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1331                         "not in the host. Use intremap=on to enable interrupt "
1332                         "remapping in amd-iommu.");
1333         return -AMDVI_IR_ERR;
1334     }
1335 
1336     if (origin->address < AMDVI_INT_ADDR_FIRST ||
1337         origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
1338         trace_amdvi_err("MSI is not from IOAPIC.");
1339         return -AMDVI_IR_ERR;
1340     }
1341 
1342     /*
1343      * The MSI data register [10:8] are used to get the upstream interrupt type.
1344      *
1345      * See MSI/MSI-X format:
1346      * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1347      * (page 5)
1348      */
1349     delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1350 
1351     switch (delivery_mode) {
1352     case AMDVI_IOAPIC_INT_TYPE_FIXED:
1353     case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1354         trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1355         ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1356         if (ret < 0) {
1357             goto remap_fail;
1358         } else {
1359             /* Translate IRQ to MSI messages */
1360             x86_iommu_irq_to_msi_message(&irq, translated);
1361             goto out;
1362         }
1363         break;
1364     case AMDVI_IOAPIC_INT_TYPE_SMI:
1365         error_report("SMI is not supported!");
1366         ret = -AMDVI_IR_ERR;
1367         break;
1368     case AMDVI_IOAPIC_INT_TYPE_NMI:
1369         pass = dte[2] & AMDVI_DEV_NMI_PASS_MASK;
1370         trace_amdvi_ir_delivery_mode("nmi");
1371         break;
1372     case AMDVI_IOAPIC_INT_TYPE_INIT:
1373         pass = dte[2] & AMDVI_DEV_INT_PASS_MASK;
1374         trace_amdvi_ir_delivery_mode("init");
1375         break;
1376     case AMDVI_IOAPIC_INT_TYPE_EINT:
1377         pass = dte[2] & AMDVI_DEV_EINT_PASS_MASK;
1378         trace_amdvi_ir_delivery_mode("eint");
1379         break;
1380     default:
1381         trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1382         ret = -AMDVI_IR_ERR;
1383         break;
1384     }
1385 
1386     if (ret < 0) {
1387         goto remap_fail;
1388     }
1389 
1390     /*
1391      * The MSI address register bit[2] is used to get the destination
1392      * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1393      * only.
1394      */
1395     dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1396     if (dest_mode) {
1397         trace_amdvi_ir_err("invalid dest_mode");
1398         ret = -AMDVI_IR_ERR;
1399         goto remap_fail;
1400     }
1401 
1402     if (pass) {
1403         memcpy(translated, origin, sizeof(*origin));
1404     } else {
1405         trace_amdvi_ir_err("passthrough is not enabled");
1406         ret = -AMDVI_IR_ERR;
1407         goto remap_fail;
1408     }
1409 
1410 out:
1411     trace_amdvi_ir_remap_msi(origin->address, origin->data,
1412                              translated->address, translated->data);
1413     return 0;
1414 
1415 remap_fail:
1416     return ret;
1417 }
1418 
amdvi_int_remap(X86IOMMUState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1419 static int amdvi_int_remap(X86IOMMUState *iommu,
1420                            MSIMessage *origin,
1421                            MSIMessage *translated,
1422                            uint16_t sid)
1423 {
1424     return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1425                                translated, sid);
1426 }
1427 
amdvi_mem_ir_write(void * opaque,hwaddr addr,uint64_t value,unsigned size,MemTxAttrs attrs)1428 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1429                                       uint64_t value, unsigned size,
1430                                       MemTxAttrs attrs)
1431 {
1432     int ret;
1433     MSIMessage from = { 0, 0 }, to = { 0, 0 };
1434     uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1435 
1436     from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1437     from.data = (uint32_t) value;
1438 
1439     trace_amdvi_mem_ir_write_req(addr, value, size);
1440 
1441     if (!attrs.unspecified) {
1442         /* We have explicit Source ID */
1443         sid = attrs.requester_id;
1444     }
1445 
1446     ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1447     if (ret < 0) {
1448         /* TODO: log the event using IOMMU log event interface */
1449         error_report_once("failed to remap interrupt from devid 0x%x", sid);
1450         return MEMTX_ERROR;
1451     }
1452 
1453     apic_get_class(NULL)->send_msi(&to);
1454 
1455     trace_amdvi_mem_ir_write(to.address, to.data);
1456     return MEMTX_OK;
1457 }
1458 
amdvi_mem_ir_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1459 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1460                                      uint64_t *data, unsigned size,
1461                                      MemTxAttrs attrs)
1462 {
1463     return MEMTX_OK;
1464 }
1465 
1466 static const MemoryRegionOps amdvi_ir_ops = {
1467     .read_with_attrs = amdvi_mem_ir_read,
1468     .write_with_attrs = amdvi_mem_ir_write,
1469     .endianness = DEVICE_LITTLE_ENDIAN,
1470     .impl = {
1471         .min_access_size = 4,
1472         .max_access_size = 4,
1473     },
1474     .valid = {
1475         .min_access_size = 4,
1476         .max_access_size = 4,
1477     }
1478 };
1479 
amdvi_host_dma_iommu(PCIBus * bus,void * opaque,int devfn)1480 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1481 {
1482     char name[128];
1483     AMDVIState *s = opaque;
1484     AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1485     int bus_num = pci_bus_num(bus);
1486 
1487     iommu_as = s->address_spaces[bus_num];
1488 
1489     /* allocate memory during the first run */
1490     if (!iommu_as) {
1491         iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1492         s->address_spaces[bus_num] = iommu_as;
1493     }
1494 
1495     /* set up AMD-Vi region */
1496     if (!iommu_as[devfn]) {
1497         snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1498 
1499         iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1500         iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1501         iommu_as[devfn]->devfn = (uint8_t)devfn;
1502         iommu_as[devfn]->iommu_state = s;
1503 
1504         amdvi_dev_as = iommu_as[devfn];
1505 
1506         /*
1507          * Memory region relationships looks like (Address range shows
1508          * only lower 32 bits to make it short in length...):
1509          *
1510          * |--------------------+-------------------+----------|
1511          * | Name               | Address range     | Priority |
1512          * |--------------------+-------------------+----------+
1513          * | amdvi-root         | 00000000-ffffffff |        0 |
1514          * |  amdvi-iommu_nodma  | 00000000-ffffffff |       0 |
1515          * |  amdvi-iommu_ir     | fee00000-feefffff |       1 |
1516          * |--------------------+-------------------+----------|
1517          */
1518         memory_region_init_iommu(&amdvi_dev_as->iommu,
1519                                  sizeof(amdvi_dev_as->iommu),
1520                                  TYPE_AMD_IOMMU_MEMORY_REGION,
1521                                  OBJECT(s),
1522                                  "amd_iommu", UINT64_MAX);
1523         memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1524                            "amdvi_root", UINT64_MAX);
1525         address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1526         memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1527                                             MEMORY_REGION(&amdvi_dev_as->iommu),
1528                                             0);
1529 
1530         /* Build the DMA Disabled alias to shared memory */
1531         memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s),
1532                                  "amdvi-sys", &s->mr_sys, 0,
1533                                  memory_region_size(&s->mr_sys));
1534         memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1535                                             &amdvi_dev_as->iommu_nodma,
1536                                             0);
1537         /* Build the Interrupt Remapping alias to shared memory */
1538         memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s),
1539                                  "amdvi-ir", &s->mr_ir, 0,
1540                                  memory_region_size(&s->mr_ir));
1541         memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu),
1542                                             AMDVI_INT_ADDR_FIRST,
1543                                             &amdvi_dev_as->iommu_ir, 1);
1544 
1545         memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
1546         memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true);
1547     }
1548     return &iommu_as[devfn]->as;
1549 }
1550 
1551 static const PCIIOMMUOps amdvi_iommu_ops = {
1552     .get_address_space = amdvi_host_dma_iommu,
1553 };
1554 
1555 static const MemoryRegionOps mmio_mem_ops = {
1556     .read = amdvi_mmio_read,
1557     .write = amdvi_mmio_write,
1558     .endianness = DEVICE_LITTLE_ENDIAN,
1559     .impl = {
1560         .min_access_size = 1,
1561         .max_access_size = 8,
1562         .unaligned = false,
1563     },
1564     .valid = {
1565         .min_access_size = 1,
1566         .max_access_size = 8,
1567     }
1568 };
1569 
amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion * iommu,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)1570 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1571                                            IOMMUNotifierFlag old,
1572                                            IOMMUNotifierFlag new,
1573                                            Error **errp)
1574 {
1575     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1576 
1577     if (new & IOMMU_NOTIFIER_MAP) {
1578         error_setg(errp,
1579                    "device %02x.%02x.%x requires iommu notifier which is not "
1580                    "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1581                    PCI_FUNC(as->devfn));
1582         return -EINVAL;
1583     }
1584     return 0;
1585 }
1586 
amdvi_init(AMDVIState * s)1587 static void amdvi_init(AMDVIState *s)
1588 {
1589     amdvi_iotlb_reset(s);
1590 
1591     s->devtab_len = 0;
1592     s->cmdbuf_len = 0;
1593     s->cmdbuf_head = 0;
1594     s->cmdbuf_tail = 0;
1595     s->evtlog_head = 0;
1596     s->evtlog_tail = 0;
1597     s->excl_enabled = false;
1598     s->excl_allow = false;
1599     s->mmio_enabled = false;
1600     s->enabled = false;
1601     s->cmdbuf_enabled = false;
1602 
1603     /* reset MMIO */
1604     memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1605     amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
1606                    amdvi_extended_feature_register(s),
1607                    0xffffffffffffffef, 0);
1608     amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1609 }
1610 
amdvi_pci_realize(PCIDevice * pdev,Error ** errp)1611 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
1612 {
1613     AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
1614     int ret;
1615 
1616     ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
1617                              AMDVI_CAPAB_SIZE, errp);
1618     if (ret < 0) {
1619         return;
1620     }
1621     s->capab_offset = ret;
1622 
1623     ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
1624                              AMDVI_CAPAB_REG_SIZE, errp);
1625     if (ret < 0) {
1626         return;
1627     }
1628     ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
1629                              AMDVI_CAPAB_REG_SIZE, errp);
1630     if (ret < 0) {
1631         return;
1632     }
1633 
1634     if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
1635         return;
1636     }
1637 
1638     /* reset device ident */
1639     pci_config_set_prog_interface(pdev->config, 0);
1640 
1641     /* reset AMDVI specific capabilities, all r/o */
1642     pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1643     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1644                  AMDVI_BASE_ADDR & MAKE_64BIT_MASK(14, 18));
1645     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1646                 AMDVI_BASE_ADDR >> 32);
1647     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
1648                  0xff000000);
1649     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1650     pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
1651             AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1652 }
1653 
amdvi_sysbus_reset(DeviceState * dev)1654 static void amdvi_sysbus_reset(DeviceState *dev)
1655 {
1656     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1657 
1658     msi_reset(&s->pci->dev);
1659     amdvi_init(s);
1660 }
1661 
1662 static const VMStateDescription vmstate_amdvi_sysbus_migratable = {
1663     .name = "amd-iommu",
1664     .version_id = 1,
1665     .minimum_version_id = 1,
1666     .priority = MIG_PRI_IOMMU,
1667     .fields = (VMStateField[]) {
1668       /* Updated in  amdvi_handle_control_write() */
1669       VMSTATE_BOOL(enabled, AMDVIState),
1670       VMSTATE_BOOL(ga_enabled, AMDVIState),
1671       /* bool ats_enabled is obsolete */
1672       VMSTATE_UNUSED(1), /* was ats_enabled */
1673       VMSTATE_BOOL(cmdbuf_enabled, AMDVIState),
1674       VMSTATE_BOOL(completion_wait_intr, AMDVIState),
1675       VMSTATE_BOOL(evtlog_enabled, AMDVIState),
1676       VMSTATE_BOOL(evtlog_intr, AMDVIState),
1677       /* Updated in amdvi_handle_devtab_write() */
1678       VMSTATE_UINT64(devtab, AMDVIState),
1679       VMSTATE_UINT64(devtab_len, AMDVIState),
1680       /* Updated in amdvi_handle_cmdbase_write() */
1681       VMSTATE_UINT64(cmdbuf, AMDVIState),
1682       VMSTATE_UINT64(cmdbuf_len, AMDVIState),
1683       /* Updated in amdvi_handle_cmdhead_write() */
1684       VMSTATE_UINT32(cmdbuf_head, AMDVIState),
1685       /* Updated in amdvi_handle_cmdtail_write() */
1686       VMSTATE_UINT32(cmdbuf_tail, AMDVIState),
1687       /* Updated in amdvi_handle_evtbase_write() */
1688       VMSTATE_UINT64(evtlog, AMDVIState),
1689       VMSTATE_UINT32(evtlog_len, AMDVIState),
1690       /* Updated in amdvi_handle_evthead_write() */
1691       VMSTATE_UINT32(evtlog_head, AMDVIState),
1692       /* Updated in amdvi_handle_evttail_write() */
1693       VMSTATE_UINT32(evtlog_tail, AMDVIState),
1694       /* Updated in amdvi_handle_pprbase_write() */
1695       VMSTATE_UINT64(ppr_log, AMDVIState),
1696       VMSTATE_UINT32(pprlog_len, AMDVIState),
1697       /* Updated in amdvi_handle_pprhead_write() */
1698       VMSTATE_UINT32(pprlog_head, AMDVIState),
1699       /* Updated in amdvi_handle_tailhead_write() */
1700       VMSTATE_UINT32(pprlog_tail, AMDVIState),
1701       /* MMIO registers */
1702       VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE),
1703       VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE),
1704       VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE),
1705       VMSTATE_END_OF_LIST()
1706     }
1707 };
1708 
amdvi_sysbus_realize(DeviceState * dev,Error ** errp)1709 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1710 {
1711     DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev));
1712     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1713     MachineState *ms = MACHINE(qdev_get_machine());
1714     PCMachineState *pcms = PC_MACHINE(ms);
1715     X86MachineState *x86ms = X86_MACHINE(ms);
1716     PCIBus *bus = pcms->pcibus;
1717 
1718     if (s->pci_id) {
1719         PCIDevice *pdev = NULL;
1720         int ret = pci_qdev_find_device(s->pci_id, &pdev);
1721 
1722         if (ret) {
1723             error_report("Cannot find PCI device '%s'", s->pci_id);
1724             return;
1725         }
1726 
1727         if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) {
1728             error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id);
1729             return;
1730         }
1731 
1732         s->pci = AMD_IOMMU_PCI(pdev);
1733         dc->vmsd = &vmstate_amdvi_sysbus_migratable;
1734     } else {
1735         s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI));
1736         /* This device should take care of IOMMU PCI properties */
1737         if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) {
1738             return;
1739         }
1740     }
1741 
1742     s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1743                                      amdvi_uint64_equal, g_free, g_free);
1744 
1745     /* set up MMIO */
1746     memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
1747                           "amdvi-mmio", AMDVI_MMIO_SIZE);
1748     memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
1749                                 &s->mr_mmio);
1750 
1751     /* Create the share memory regions by all devices */
1752     memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX);
1753 
1754     /* set up the DMA disabled memory region */
1755     memory_region_init_alias(&s->mr_nodma, OBJECT(s),
1756                              "amdvi-nodma", get_system_memory(), 0,
1757                              memory_region_size(get_system_memory()));
1758     memory_region_add_subregion_overlap(&s->mr_sys, 0,
1759                                         &s->mr_nodma, 0);
1760 
1761     /* set up the Interrupt Remapping memory region */
1762     memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops,
1763                           s, "amdvi-ir", AMDVI_INT_ADDR_SIZE);
1764     memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
1765                                         &s->mr_ir, 1);
1766 
1767     /* Pseudo address space under root PCI bus. */
1768     x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1769 
1770     if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) {
1771         error_report("AMD IOMMU with x2APIC configuration requires xtsup=on");
1772         exit(EXIT_FAILURE);
1773     }
1774 
1775     if (s->xtsup) {
1776         if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
1777             error_report("AMD IOMMU xtsup=on requires x2APIC support on "
1778                           "the KVM side");
1779             exit(EXIT_FAILURE);
1780         }
1781     }
1782 
1783     pci_setup_iommu(bus, &amdvi_iommu_ops, s);
1784     amdvi_init(s);
1785 }
1786 
1787 static const Property amdvi_properties[] = {
1788     DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
1789     DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
1790 };
1791 
1792 static const VMStateDescription vmstate_amdvi_sysbus = {
1793     .name = "amd-iommu",
1794     .unmigratable = 1
1795 };
1796 
amdvi_sysbus_class_init(ObjectClass * klass,const void * data)1797 static void amdvi_sysbus_class_init(ObjectClass *klass, const void *data)
1798 {
1799     DeviceClass *dc = DEVICE_CLASS(klass);
1800     X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1801 
1802     device_class_set_legacy_reset(dc, amdvi_sysbus_reset);
1803     dc->vmsd = &vmstate_amdvi_sysbus;
1804     dc->hotpluggable = false;
1805     dc_class->realize = amdvi_sysbus_realize;
1806     dc_class->int_remap = amdvi_int_remap;
1807     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1808     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1809     device_class_set_props(dc, amdvi_properties);
1810 }
1811 
1812 static const TypeInfo amdvi_sysbus = {
1813     .name = TYPE_AMD_IOMMU_DEVICE,
1814     .parent = TYPE_X86_IOMMU_DEVICE,
1815     .instance_size = sizeof(AMDVIState),
1816     .class_init = amdvi_sysbus_class_init
1817 };
1818 
amdvi_pci_class_init(ObjectClass * klass,const void * data)1819 static void amdvi_pci_class_init(ObjectClass *klass, const void *data)
1820 {
1821     DeviceClass *dc = DEVICE_CLASS(klass);
1822     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1823 
1824     k->vendor_id = PCI_VENDOR_ID_AMD;
1825     k->device_id = 0x1419;
1826     k->class_id = 0x0806;
1827     k->realize = amdvi_pci_realize;
1828 
1829     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1830     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1831 }
1832 
1833 static const TypeInfo amdvi_pci = {
1834     .name = TYPE_AMD_IOMMU_PCI,
1835     .parent = TYPE_PCI_DEVICE,
1836     .instance_size = sizeof(AMDVIPCIState),
1837     .class_init = amdvi_pci_class_init,
1838     .interfaces = (const InterfaceInfo[]) {
1839         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1840         { },
1841     },
1842 };
1843 
amdvi_iommu_memory_region_class_init(ObjectClass * klass,const void * data)1844 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass,
1845                                                  const void *data)
1846 {
1847     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1848 
1849     imrc->translate = amdvi_translate;
1850     imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1851 }
1852 
1853 static const TypeInfo amdvi_iommu_memory_region_info = {
1854     .parent = TYPE_IOMMU_MEMORY_REGION,
1855     .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1856     .class_init = amdvi_iommu_memory_region_class_init,
1857 };
1858 
amdvi_register_types(void)1859 static void amdvi_register_types(void)
1860 {
1861     type_register_static(&amdvi_pci);
1862     type_register_static(&amdvi_sysbus);
1863     type_register_static(&amdvi_iommu_memory_region_info);
1864 }
1865 
1866 type_init(amdvi_register_types);
1867