1 /*
2 * QEMU emulation of AMD IOMMU (AMD-Vi)
3 *
4 * Copyright (C) 2011 Eduard - Gabriel Munteanu
5 * Copyright (C) 2015, 2016 David Kiarie Kahurani
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, see <http://www.gnu.org/licenses/>.
19 *
20 * Cache implementation inspired by hw/i386/intel_iommu.c
21 */
22
23 #include "qemu/osdep.h"
24 #include "hw/i386/pc.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/pci_bus.h"
27 #include "migration/vmstate.h"
28 #include "amd_iommu.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "hw/i386/apic_internal.h"
32 #include "trace.h"
33 #include "hw/i386/apic-msidef.h"
34 #include "hw/qdev-properties.h"
35 #include "kvm/kvm_i386.h"
36
37 /* used AMD-Vi MMIO registers */
38 const char *amdvi_mmio_low[] = {
39 "AMDVI_MMIO_DEVTAB_BASE",
40 "AMDVI_MMIO_CMDBUF_BASE",
41 "AMDVI_MMIO_EVTLOG_BASE",
42 "AMDVI_MMIO_CONTROL",
43 "AMDVI_MMIO_EXCL_BASE",
44 "AMDVI_MMIO_EXCL_LIMIT",
45 "AMDVI_MMIO_EXT_FEATURES",
46 "AMDVI_MMIO_PPR_BASE",
47 "UNHANDLED"
48 };
49 const char *amdvi_mmio_high[] = {
50 "AMDVI_MMIO_COMMAND_HEAD",
51 "AMDVI_MMIO_COMMAND_TAIL",
52 "AMDVI_MMIO_EVTLOG_HEAD",
53 "AMDVI_MMIO_EVTLOG_TAIL",
54 "AMDVI_MMIO_STATUS",
55 "AMDVI_MMIO_PPR_HEAD",
56 "AMDVI_MMIO_PPR_TAIL",
57 "UNHANDLED"
58 };
59
60 struct AMDVIAddressSpace {
61 uint8_t bus_num; /* bus number */
62 uint8_t devfn; /* device function */
63 AMDVIState *iommu_state; /* AMDVI - one per machine */
64 MemoryRegion root; /* AMDVI Root memory map region */
65 IOMMUMemoryRegion iommu; /* Device's address translation region */
66 MemoryRegion iommu_nodma; /* Alias of shared nodma memory region */
67 MemoryRegion iommu_ir; /* Device's interrupt remapping region */
68 AddressSpace as; /* device's corresponding address space */
69 };
70
71 /* AMDVI cache entry */
72 typedef struct AMDVIIOTLBEntry {
73 uint16_t domid; /* assigned domain id */
74 uint16_t devid; /* device owning entry */
75 uint64_t perms; /* access permissions */
76 uint64_t translated_addr; /* translated address */
77 uint64_t page_mask; /* physical page size */
78 } AMDVIIOTLBEntry;
79
amdvi_extended_feature_register(AMDVIState * s)80 uint64_t amdvi_extended_feature_register(AMDVIState *s)
81 {
82 uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
83 if (s->xtsup) {
84 feature |= AMDVI_FEATURE_XT;
85 }
86
87 return feature;
88 }
89
90 /* configure MMIO registers at startup/reset */
amdvi_set_quad(AMDVIState * s,hwaddr addr,uint64_t val,uint64_t romask,uint64_t w1cmask)91 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
92 uint64_t romask, uint64_t w1cmask)
93 {
94 stq_le_p(&s->mmior[addr], val);
95 stq_le_p(&s->romask[addr], romask);
96 stq_le_p(&s->w1cmask[addr], w1cmask);
97 }
98
amdvi_readw(AMDVIState * s,hwaddr addr)99 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
100 {
101 return lduw_le_p(&s->mmior[addr]);
102 }
103
amdvi_readl(AMDVIState * s,hwaddr addr)104 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
105 {
106 return ldl_le_p(&s->mmior[addr]);
107 }
108
amdvi_readq(AMDVIState * s,hwaddr addr)109 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
110 {
111 return ldq_le_p(&s->mmior[addr]);
112 }
113
114 /* internal write */
amdvi_writeq_raw(AMDVIState * s,hwaddr addr,uint64_t val)115 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
116 {
117 stq_le_p(&s->mmior[addr], val);
118 }
119
120 /* external write */
amdvi_writew(AMDVIState * s,hwaddr addr,uint16_t val)121 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
122 {
123 uint16_t romask = lduw_le_p(&s->romask[addr]);
124 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
125 uint16_t oldval = lduw_le_p(&s->mmior[addr]);
126 stw_le_p(&s->mmior[addr],
127 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
128 }
129
amdvi_writel(AMDVIState * s,hwaddr addr,uint32_t val)130 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
131 {
132 uint32_t romask = ldl_le_p(&s->romask[addr]);
133 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
134 uint32_t oldval = ldl_le_p(&s->mmior[addr]);
135 stl_le_p(&s->mmior[addr],
136 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
137 }
138
amdvi_writeq(AMDVIState * s,hwaddr addr,uint64_t val)139 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
140 {
141 uint64_t romask = ldq_le_p(&s->romask[addr]);
142 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
143 uint32_t oldval = ldq_le_p(&s->mmior[addr]);
144 stq_le_p(&s->mmior[addr],
145 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
146 }
147
148 /* OR a 64-bit register with a 64-bit value */
amdvi_test_mask(AMDVIState * s,hwaddr addr,uint64_t val)149 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
150 {
151 return amdvi_readq(s, addr) | val;
152 }
153
154 /* OR a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_orq(AMDVIState * s,hwaddr addr,uint64_t val)155 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
156 {
157 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
158 }
159
160 /* AND a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_andq(AMDVIState * s,hwaddr addr,uint64_t val)161 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
162 {
163 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
164 }
165
amdvi_generate_msi_interrupt(AMDVIState * s)166 static void amdvi_generate_msi_interrupt(AMDVIState *s)
167 {
168 MSIMessage msg = {};
169 MemTxAttrs attrs = {
170 .requester_id = pci_requester_id(&s->pci.dev)
171 };
172
173 if (msi_enabled(&s->pci.dev)) {
174 msg = msi_get_message(&s->pci.dev, 0);
175 address_space_stl_le(&address_space_memory, msg.address, msg.data,
176 attrs, NULL);
177 }
178 }
179
amdvi_log_event(AMDVIState * s,uint64_t * evt)180 static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
181 {
182 /* event logging not enabled */
183 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
184 AMDVI_MMIO_STATUS_EVT_OVF)) {
185 return;
186 }
187
188 /* event log buffer full */
189 if (s->evtlog_tail >= s->evtlog_len) {
190 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
191 /* generate interrupt */
192 amdvi_generate_msi_interrupt(s);
193 return;
194 }
195
196 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
197 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
198 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
199 }
200
201 s->evtlog_tail += AMDVI_EVENT_LEN;
202 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
203 amdvi_generate_msi_interrupt(s);
204 }
205
amdvi_setevent_bits(uint64_t * buffer,uint64_t value,int start,int length)206 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
207 int length)
208 {
209 int index = start / 64, bitpos = start % 64;
210 uint64_t mask = MAKE_64BIT_MASK(start, length);
211 buffer[index] &= ~mask;
212 buffer[index] |= (value << bitpos) & mask;
213 }
214 /*
215 * AMDVi event structure
216 * 0:15 -> DeviceID
217 * 48:63 -> event type + miscellaneous info
218 * 64:127 -> related address
219 */
amdvi_encode_event(uint64_t * evt,uint16_t devid,uint64_t addr,uint16_t info)220 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
221 uint16_t info)
222 {
223 evt[0] = 0;
224 evt[1] = 0;
225
226 amdvi_setevent_bits(evt, devid, 0, 16);
227 amdvi_setevent_bits(evt, info, 48, 16);
228 amdvi_setevent_bits(evt, addr, 64, 64);
229 }
230 /* log an error encountered during a page walk
231 *
232 * @addr: virtual address in translation request
233 */
amdvi_page_fault(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)234 static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
235 hwaddr addr, uint16_t info)
236 {
237 uint64_t evt[2];
238
239 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
240 amdvi_encode_event(evt, devid, addr, info);
241 amdvi_log_event(s, evt);
242 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
243 PCI_STATUS_SIG_TARGET_ABORT);
244 }
245 /*
246 * log a master abort accessing device table
247 * @devtab : address of device table entry
248 * @info : error flags
249 */
amdvi_log_devtab_error(AMDVIState * s,uint16_t devid,hwaddr devtab,uint16_t info)250 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
251 hwaddr devtab, uint16_t info)
252 {
253 uint64_t evt[2];
254
255 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
256
257 amdvi_encode_event(evt, devid, devtab, info);
258 amdvi_log_event(s, evt);
259 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
260 PCI_STATUS_SIG_TARGET_ABORT);
261 }
262 /* log an event trying to access command buffer
263 * @addr : address that couldn't be accessed
264 */
amdvi_log_command_error(AMDVIState * s,hwaddr addr)265 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
266 {
267 uint64_t evt[2];
268 uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
269
270 amdvi_encode_event(evt, 0, addr, info);
271 amdvi_log_event(s, evt);
272 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
273 PCI_STATUS_SIG_TARGET_ABORT);
274 }
275 /* log an illegal command event
276 * @addr : address of illegal command
277 */
amdvi_log_illegalcom_error(AMDVIState * s,uint16_t info,hwaddr addr)278 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
279 hwaddr addr)
280 {
281 uint64_t evt[2];
282
283 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
284 amdvi_encode_event(evt, 0, addr, info);
285 amdvi_log_event(s, evt);
286 }
287 /* log an error accessing device table
288 *
289 * @devid : device owning the table entry
290 * @devtab : address of device table entry
291 * @info : error flags
292 */
amdvi_log_illegaldevtab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)293 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
294 hwaddr addr, uint16_t info)
295 {
296 uint64_t evt[2];
297
298 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
299 amdvi_encode_event(evt, devid, addr, info);
300 amdvi_log_event(s, evt);
301 }
302 /* log an error accessing a PTE entry
303 * @addr : address that couldn't be accessed
304 */
amdvi_log_pagetab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)305 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
306 hwaddr addr, uint16_t info)
307 {
308 uint64_t evt[2];
309
310 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
311 amdvi_encode_event(evt, devid, addr, info);
312 amdvi_log_event(s, evt);
313 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
314 PCI_STATUS_SIG_TARGET_ABORT);
315 }
316
amdvi_uint64_equal(gconstpointer v1,gconstpointer v2)317 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
318 {
319 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
320 }
321
amdvi_uint64_hash(gconstpointer v)322 static guint amdvi_uint64_hash(gconstpointer v)
323 {
324 return (guint)*(const uint64_t *)v;
325 }
326
amdvi_iotlb_lookup(AMDVIState * s,hwaddr addr,uint64_t devid)327 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
328 uint64_t devid)
329 {
330 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
331 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
332 return g_hash_table_lookup(s->iotlb, &key);
333 }
334
amdvi_iotlb_reset(AMDVIState * s)335 static void amdvi_iotlb_reset(AMDVIState *s)
336 {
337 assert(s->iotlb);
338 trace_amdvi_iotlb_reset();
339 g_hash_table_remove_all(s->iotlb);
340 }
341
amdvi_iotlb_remove_by_devid(gpointer key,gpointer value,gpointer user_data)342 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
343 gpointer user_data)
344 {
345 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
346 uint16_t devid = *(uint16_t *)user_data;
347 return entry->devid == devid;
348 }
349
amdvi_iotlb_remove_page(AMDVIState * s,hwaddr addr,uint64_t devid)350 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
351 uint64_t devid)
352 {
353 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
354 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
355 g_hash_table_remove(s->iotlb, &key);
356 }
357
amdvi_update_iotlb(AMDVIState * s,uint16_t devid,uint64_t gpa,IOMMUTLBEntry to_cache,uint16_t domid)358 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
359 uint64_t gpa, IOMMUTLBEntry to_cache,
360 uint16_t domid)
361 {
362 /* don't cache erroneous translations */
363 if (to_cache.perm != IOMMU_NONE) {
364 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
365 uint64_t *key = g_new(uint64_t, 1);
366 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
367
368 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
369 PCI_FUNC(devid), gpa, to_cache.translated_addr);
370
371 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
372 amdvi_iotlb_reset(s);
373 }
374
375 entry->domid = domid;
376 entry->perms = to_cache.perm;
377 entry->translated_addr = to_cache.translated_addr;
378 entry->page_mask = to_cache.addr_mask;
379 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
380 g_hash_table_replace(s->iotlb, key, entry);
381 }
382 }
383
amdvi_completion_wait(AMDVIState * s,uint64_t * cmd)384 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
385 {
386 /* pad the last 3 bits */
387 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
388 uint64_t data = cpu_to_le64(cmd[1]);
389
390 if (extract64(cmd[0], 52, 8)) {
391 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
392 s->cmdbuf + s->cmdbuf_head);
393 }
394 if (extract64(cmd[0], 0, 1)) {
395 if (dma_memory_write(&address_space_memory, addr, &data,
396 AMDVI_COMPLETION_DATA_SIZE,
397 MEMTXATTRS_UNSPECIFIED)) {
398 trace_amdvi_completion_wait_fail(addr);
399 }
400 }
401 /* set completion interrupt */
402 if (extract64(cmd[0], 1, 1)) {
403 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
404 /* generate interrupt */
405 amdvi_generate_msi_interrupt(s);
406 }
407 trace_amdvi_completion_wait(addr, data);
408 }
409
410 /* log error without aborting since linux seems to be using reserved bits */
amdvi_inval_devtab_entry(AMDVIState * s,uint64_t * cmd)411 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
412 {
413 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
414
415 /* This command should invalidate internal caches of which there isn't */
416 if (extract64(cmd[0], 16, 44) || cmd[1]) {
417 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
418 s->cmdbuf + s->cmdbuf_head);
419 }
420 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
421 PCI_FUNC(devid));
422 }
423
amdvi_complete_ppr(AMDVIState * s,uint64_t * cmd)424 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
425 {
426 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) ||
427 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
428 || extract64(cmd[1], 48, 16)) {
429 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
430 s->cmdbuf + s->cmdbuf_head);
431 }
432 trace_amdvi_ppr_exec();
433 }
434
amdvi_intremap_inval_notify_all(AMDVIState * s,bool global,uint32_t index,uint32_t mask)435 static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global,
436 uint32_t index, uint32_t mask)
437 {
438 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
439 }
440
amdvi_inval_all(AMDVIState * s,uint64_t * cmd)441 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
442 {
443 if (extract64(cmd[0], 0, 60) || cmd[1]) {
444 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
445 s->cmdbuf + s->cmdbuf_head);
446 }
447
448 /* Notify global invalidation */
449 amdvi_intremap_inval_notify_all(s, true, 0, 0);
450
451 amdvi_iotlb_reset(s);
452 trace_amdvi_all_inval();
453 }
454
amdvi_iotlb_remove_by_domid(gpointer key,gpointer value,gpointer user_data)455 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
456 gpointer user_data)
457 {
458 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
459 uint16_t domid = *(uint16_t *)user_data;
460 return entry->domid == domid;
461 }
462
463 /* we don't have devid - we can't remove pages by address */
amdvi_inval_pages(AMDVIState * s,uint64_t * cmd)464 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
465 {
466 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
467
468 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
469 extract64(cmd[1], 3, 9)) {
470 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
471 s->cmdbuf + s->cmdbuf_head);
472 }
473
474 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
475 &domid);
476 trace_amdvi_pages_inval(domid);
477 }
478
amdvi_prefetch_pages(AMDVIState * s,uint64_t * cmd)479 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
480 {
481 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
482 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
483 extract64(cmd[1], 5, 7)) {
484 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
485 s->cmdbuf + s->cmdbuf_head);
486 }
487
488 trace_amdvi_prefetch_pages();
489 }
490
amdvi_inval_inttable(AMDVIState * s,uint64_t * cmd)491 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
492 {
493 if (extract64(cmd[0], 16, 44) || cmd[1]) {
494 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
495 s->cmdbuf + s->cmdbuf_head);
496 return;
497 }
498
499 /* Notify global invalidation */
500 amdvi_intremap_inval_notify_all(s, true, 0, 0);
501
502 trace_amdvi_intr_inval();
503 }
504
505 /* FIXME: Try to work with the specified size instead of all the pages
506 * when the S bit is on
507 */
iommu_inval_iotlb(AMDVIState * s,uint64_t * cmd)508 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
509 {
510
511 uint16_t devid = extract64(cmd[0], 0, 16);
512 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
513 extract64(cmd[1], 6, 6)) {
514 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
515 s->cmdbuf + s->cmdbuf_head);
516 return;
517 }
518
519 if (extract64(cmd[1], 0, 1)) {
520 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
521 &devid);
522 } else {
523 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
524 cpu_to_le16(extract64(cmd[1], 0, 16)));
525 }
526 trace_amdvi_iotlb_inval();
527 }
528
529 /* not honouring reserved bits is regarded as an illegal command */
amdvi_cmdbuf_exec(AMDVIState * s)530 static void amdvi_cmdbuf_exec(AMDVIState *s)
531 {
532 uint64_t cmd[2];
533
534 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
535 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
536 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
537 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
538 return;
539 }
540
541 switch (extract64(cmd[0], 60, 4)) {
542 case AMDVI_CMD_COMPLETION_WAIT:
543 amdvi_completion_wait(s, cmd);
544 break;
545 case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
546 amdvi_inval_devtab_entry(s, cmd);
547 break;
548 case AMDVI_CMD_INVAL_AMDVI_PAGES:
549 amdvi_inval_pages(s, cmd);
550 break;
551 case AMDVI_CMD_INVAL_IOTLB_PAGES:
552 iommu_inval_iotlb(s, cmd);
553 break;
554 case AMDVI_CMD_INVAL_INTR_TABLE:
555 amdvi_inval_inttable(s, cmd);
556 break;
557 case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
558 amdvi_prefetch_pages(s, cmd);
559 break;
560 case AMDVI_CMD_COMPLETE_PPR_REQUEST:
561 amdvi_complete_ppr(s, cmd);
562 break;
563 case AMDVI_CMD_INVAL_AMDVI_ALL:
564 amdvi_inval_all(s, cmd);
565 break;
566 default:
567 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
568 /* log illegal command */
569 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
570 s->cmdbuf + s->cmdbuf_head);
571 }
572 }
573
amdvi_cmdbuf_run(AMDVIState * s)574 static void amdvi_cmdbuf_run(AMDVIState *s)
575 {
576 if (!s->cmdbuf_enabled) {
577 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
578 return;
579 }
580
581 /* check if there is work to do. */
582 while (s->cmdbuf_head != s->cmdbuf_tail) {
583 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
584 amdvi_cmdbuf_exec(s);
585 s->cmdbuf_head += AMDVI_COMMAND_SIZE;
586 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
587
588 /* wrap head pointer */
589 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
590 s->cmdbuf_head = 0;
591 }
592 }
593 }
594
amdvi_mmio_trace(hwaddr addr,unsigned size)595 static void amdvi_mmio_trace(hwaddr addr, unsigned size)
596 {
597 uint8_t index = (addr & ~0x2000) / 8;
598
599 if ((addr & 0x2000)) {
600 /* high table */
601 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
602 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
603 } else {
604 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
605 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
606 }
607 }
608
amdvi_mmio_read(void * opaque,hwaddr addr,unsigned size)609 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
610 {
611 AMDVIState *s = opaque;
612
613 uint64_t val = -1;
614 if (addr + size > AMDVI_MMIO_SIZE) {
615 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
616 return (uint64_t)-1;
617 }
618
619 if (size == 2) {
620 val = amdvi_readw(s, addr);
621 } else if (size == 4) {
622 val = amdvi_readl(s, addr);
623 } else if (size == 8) {
624 val = amdvi_readq(s, addr);
625 }
626 amdvi_mmio_trace(addr, size);
627
628 return val;
629 }
630
amdvi_handle_control_write(AMDVIState * s)631 static void amdvi_handle_control_write(AMDVIState *s)
632 {
633 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
634 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
635
636 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
637 s->evtlog_enabled = s->enabled && !!(control &
638 AMDVI_MMIO_CONTROL_EVENTLOGEN);
639
640 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
641 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
642 s->cmdbuf_enabled = s->enabled && !!(control &
643 AMDVI_MMIO_CONTROL_CMDBUFLEN);
644 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
645
646 /* update the flags depending on the control register */
647 if (s->cmdbuf_enabled) {
648 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
649 } else {
650 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
651 }
652 if (s->evtlog_enabled) {
653 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
654 } else {
655 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
656 }
657
658 trace_amdvi_control_status(control);
659 amdvi_cmdbuf_run(s);
660 }
661
amdvi_handle_devtab_write(AMDVIState * s)662 static inline void amdvi_handle_devtab_write(AMDVIState *s)
663
664 {
665 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
666 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
667
668 /* set device table length */
669 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
670 (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
671 AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
672 }
673
amdvi_handle_cmdhead_write(AMDVIState * s)674 static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
675 {
676 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
677 & AMDVI_MMIO_CMDBUF_HEAD_MASK;
678 amdvi_cmdbuf_run(s);
679 }
680
amdvi_handle_cmdbase_write(AMDVIState * s)681 static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
682 {
683 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
684 & AMDVI_MMIO_CMDBUF_BASE_MASK;
685 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
686 & AMDVI_MMIO_CMDBUF_SIZE_MASK);
687 s->cmdbuf_head = s->cmdbuf_tail = 0;
688 }
689
amdvi_handle_cmdtail_write(AMDVIState * s)690 static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
691 {
692 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
693 & AMDVI_MMIO_CMDBUF_TAIL_MASK;
694 amdvi_cmdbuf_run(s);
695 }
696
amdvi_handle_excllim_write(AMDVIState * s)697 static inline void amdvi_handle_excllim_write(AMDVIState *s)
698 {
699 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
700 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
701 AMDVI_MMIO_EXCL_LIMIT_LOW;
702 }
703
amdvi_handle_evtbase_write(AMDVIState * s)704 static inline void amdvi_handle_evtbase_write(AMDVIState *s)
705 {
706 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
707 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
708 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
709 & AMDVI_MMIO_EVTLOG_SIZE_MASK);
710 }
711
amdvi_handle_evttail_write(AMDVIState * s)712 static inline void amdvi_handle_evttail_write(AMDVIState *s)
713 {
714 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
715 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
716 }
717
amdvi_handle_evthead_write(AMDVIState * s)718 static inline void amdvi_handle_evthead_write(AMDVIState *s)
719 {
720 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
721 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
722 }
723
amdvi_handle_pprbase_write(AMDVIState * s)724 static inline void amdvi_handle_pprbase_write(AMDVIState *s)
725 {
726 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
727 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
728 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
729 & AMDVI_MMIO_PPRLOG_SIZE_MASK);
730 }
731
amdvi_handle_pprhead_write(AMDVIState * s)732 static inline void amdvi_handle_pprhead_write(AMDVIState *s)
733 {
734 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
735 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
736 }
737
amdvi_handle_pprtail_write(AMDVIState * s)738 static inline void amdvi_handle_pprtail_write(AMDVIState *s)
739 {
740 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
741 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
742 }
743
744 /* FIXME: something might go wrong if System Software writes in chunks
745 * of one byte but linux writes in chunks of 4 bytes so currently it
746 * works correctly with linux but will definitely be busted if software
747 * reads/writes 8 bytes
748 */
amdvi_mmio_reg_write(AMDVIState * s,unsigned size,uint64_t val,hwaddr addr)749 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
750 hwaddr addr)
751 {
752 if (size == 2) {
753 amdvi_writew(s, addr, val);
754 } else if (size == 4) {
755 amdvi_writel(s, addr, val);
756 } else if (size == 8) {
757 amdvi_writeq(s, addr, val);
758 }
759 }
760
amdvi_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)761 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
762 unsigned size)
763 {
764 AMDVIState *s = opaque;
765 unsigned long offset = addr & 0x07;
766
767 if (addr + size > AMDVI_MMIO_SIZE) {
768 trace_amdvi_mmio_write("error: addr outside region: max ",
769 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
770 return;
771 }
772
773 amdvi_mmio_trace(addr, size);
774 switch (addr & ~0x07) {
775 case AMDVI_MMIO_CONTROL:
776 amdvi_mmio_reg_write(s, size, val, addr);
777 amdvi_handle_control_write(s);
778 break;
779 case AMDVI_MMIO_DEVICE_TABLE:
780 amdvi_mmio_reg_write(s, size, val, addr);
781 /* set device table address
782 * This also suffers from inability to tell whether software
783 * is done writing
784 */
785 if (offset || (size == 8)) {
786 amdvi_handle_devtab_write(s);
787 }
788 break;
789 case AMDVI_MMIO_COMMAND_HEAD:
790 amdvi_mmio_reg_write(s, size, val, addr);
791 amdvi_handle_cmdhead_write(s);
792 break;
793 case AMDVI_MMIO_COMMAND_BASE:
794 amdvi_mmio_reg_write(s, size, val, addr);
795 /* FIXME - make sure System Software has finished writing in case
796 * it writes in chucks less than 8 bytes in a robust way.As for
797 * now, this hacks works for the linux driver
798 */
799 if (offset || (size == 8)) {
800 amdvi_handle_cmdbase_write(s);
801 }
802 break;
803 case AMDVI_MMIO_COMMAND_TAIL:
804 amdvi_mmio_reg_write(s, size, val, addr);
805 amdvi_handle_cmdtail_write(s);
806 break;
807 case AMDVI_MMIO_EVENT_BASE:
808 amdvi_mmio_reg_write(s, size, val, addr);
809 amdvi_handle_evtbase_write(s);
810 break;
811 case AMDVI_MMIO_EVENT_HEAD:
812 amdvi_mmio_reg_write(s, size, val, addr);
813 amdvi_handle_evthead_write(s);
814 break;
815 case AMDVI_MMIO_EVENT_TAIL:
816 amdvi_mmio_reg_write(s, size, val, addr);
817 amdvi_handle_evttail_write(s);
818 break;
819 case AMDVI_MMIO_EXCL_LIMIT:
820 amdvi_mmio_reg_write(s, size, val, addr);
821 amdvi_handle_excllim_write(s);
822 break;
823 /* PPR log base - unused for now */
824 case AMDVI_MMIO_PPR_BASE:
825 amdvi_mmio_reg_write(s, size, val, addr);
826 amdvi_handle_pprbase_write(s);
827 break;
828 /* PPR log head - also unused for now */
829 case AMDVI_MMIO_PPR_HEAD:
830 amdvi_mmio_reg_write(s, size, val, addr);
831 amdvi_handle_pprhead_write(s);
832 break;
833 /* PPR log tail - unused for now */
834 case AMDVI_MMIO_PPR_TAIL:
835 amdvi_mmio_reg_write(s, size, val, addr);
836 amdvi_handle_pprtail_write(s);
837 break;
838 }
839 }
840
amdvi_get_perms(uint64_t entry)841 static inline uint64_t amdvi_get_perms(uint64_t entry)
842 {
843 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
844 AMDVI_DEV_PERM_SHIFT;
845 }
846
847 /* validate that reserved bits are honoured */
amdvi_validate_dte(AMDVIState * s,uint16_t devid,uint64_t * dte)848 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
849 uint64_t *dte)
850 {
851 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
852 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
853 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
854 amdvi_log_illegaldevtab_error(s, devid,
855 s->devtab +
856 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
857 return false;
858 }
859
860 return true;
861 }
862
863 /* get a device table entry given the devid */
amdvi_get_dte(AMDVIState * s,int devid,uint64_t * entry)864 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
865 {
866 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
867
868 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
869 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
870 trace_amdvi_dte_get_fail(s->devtab, offset);
871 /* log error accessing dte */
872 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
873 return false;
874 }
875
876 *entry = le64_to_cpu(*entry);
877 if (!amdvi_validate_dte(s, devid, entry)) {
878 trace_amdvi_invalid_dte(entry[0]);
879 return false;
880 }
881
882 return true;
883 }
884
885 /* get pte translation mode */
get_pte_translation_mode(uint64_t pte)886 static inline uint8_t get_pte_translation_mode(uint64_t pte)
887 {
888 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
889 }
890
pte_override_page_mask(uint64_t pte)891 static inline uint64_t pte_override_page_mask(uint64_t pte)
892 {
893 uint8_t page_mask = 13;
894 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
895 /* find the first zero bit */
896 while (addr & 1) {
897 page_mask++;
898 addr = addr >> 1;
899 }
900
901 return ~((1ULL << page_mask) - 1);
902 }
903
pte_get_page_mask(uint64_t oldlevel)904 static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
905 {
906 return ~((1UL << ((oldlevel * 9) + 3)) - 1);
907 }
908
amdvi_get_pte_entry(AMDVIState * s,uint64_t pte_addr,uint16_t devid)909 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
910 uint16_t devid)
911 {
912 uint64_t pte;
913
914 if (dma_memory_read(&address_space_memory, pte_addr,
915 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
916 trace_amdvi_get_pte_hwerror(pte_addr);
917 amdvi_log_pagetab_error(s, devid, pte_addr, 0);
918 pte = 0;
919 return pte;
920 }
921
922 pte = le64_to_cpu(pte);
923 return pte;
924 }
925
amdvi_page_walk(AMDVIAddressSpace * as,uint64_t * dte,IOMMUTLBEntry * ret,unsigned perms,hwaddr addr)926 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
927 IOMMUTLBEntry *ret, unsigned perms,
928 hwaddr addr)
929 {
930 unsigned level, present, pte_perms, oldlevel;
931 uint64_t pte = dte[0], pte_addr, page_mask;
932
933 /* make sure the DTE has TV = 1 */
934 if (pte & AMDVI_DEV_TRANSLATION_VALID) {
935 level = get_pte_translation_mode(pte);
936 if (level >= 7) {
937 trace_amdvi_mode_invalid(level, addr);
938 return;
939 }
940 if (level == 0) {
941 goto no_remap;
942 }
943
944 /* we are at the leaf page table or page table encodes a huge page */
945 do {
946 pte_perms = amdvi_get_perms(pte);
947 present = pte & 1;
948 if (!present || perms != (perms & pte_perms)) {
949 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
950 trace_amdvi_page_fault(addr);
951 return;
952 }
953
954 /* go to the next lower level */
955 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
956 /* add offset and load pte */
957 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
958 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
959 if (!pte) {
960 return;
961 }
962 oldlevel = level;
963 level = get_pte_translation_mode(pte);
964 } while (level > 0 && level < 7);
965
966 if (level == 0x7) {
967 page_mask = pte_override_page_mask(pte);
968 } else {
969 page_mask = pte_get_page_mask(oldlevel);
970 }
971
972 /* get access permissions from pte */
973 ret->iova = addr & page_mask;
974 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
975 ret->addr_mask = ~page_mask;
976 ret->perm = amdvi_get_perms(pte);
977 return;
978 }
979 no_remap:
980 ret->iova = addr & AMDVI_PAGE_MASK_4K;
981 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
982 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
983 ret->perm = amdvi_get_perms(pte);
984 }
985
amdvi_do_translate(AMDVIAddressSpace * as,hwaddr addr,bool is_write,IOMMUTLBEntry * ret)986 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
987 bool is_write, IOMMUTLBEntry *ret)
988 {
989 AMDVIState *s = as->iommu_state;
990 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
991 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
992 uint64_t entry[4];
993
994 if (iotlb_entry) {
995 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
996 PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
997 ret->iova = addr & ~iotlb_entry->page_mask;
998 ret->translated_addr = iotlb_entry->translated_addr;
999 ret->addr_mask = iotlb_entry->page_mask;
1000 ret->perm = iotlb_entry->perms;
1001 return;
1002 }
1003
1004 if (!amdvi_get_dte(s, devid, entry)) {
1005 return;
1006 }
1007
1008 /* devices with V = 0 are not translated */
1009 if (!(entry[0] & AMDVI_DEV_VALID)) {
1010 goto out;
1011 }
1012
1013 amdvi_page_walk(as, entry, ret,
1014 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
1015
1016 amdvi_update_iotlb(s, devid, addr, *ret,
1017 entry[1] & AMDVI_DEV_DOMID_ID_MASK);
1018 return;
1019
1020 out:
1021 ret->iova = addr & AMDVI_PAGE_MASK_4K;
1022 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1023 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1024 ret->perm = IOMMU_RW;
1025 }
1026
amdvi_is_interrupt_addr(hwaddr addr)1027 static inline bool amdvi_is_interrupt_addr(hwaddr addr)
1028 {
1029 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1030 }
1031
amdvi_translate(IOMMUMemoryRegion * iommu,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)1032 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1033 IOMMUAccessFlags flag, int iommu_idx)
1034 {
1035 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1036 AMDVIState *s = as->iommu_state;
1037 IOMMUTLBEntry ret = {
1038 .target_as = &address_space_memory,
1039 .iova = addr,
1040 .translated_addr = 0,
1041 .addr_mask = ~(hwaddr)0,
1042 .perm = IOMMU_NONE
1043 };
1044
1045 if (!s->enabled) {
1046 /* AMDVI disabled - corresponds to iommu=off not
1047 * failure to provide any parameter
1048 */
1049 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1050 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1051 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1052 ret.perm = IOMMU_RW;
1053 return ret;
1054 } else if (amdvi_is_interrupt_addr(addr)) {
1055 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1056 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1057 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1058 ret.perm = IOMMU_WO;
1059 return ret;
1060 }
1061
1062 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1063 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1064 PCI_FUNC(as->devfn), addr, ret.translated_addr);
1065 return ret;
1066 }
1067
amdvi_get_irte(AMDVIState * s,MSIMessage * origin,uint64_t * dte,union irte * irte,uint16_t devid)1068 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1069 union irte *irte, uint16_t devid)
1070 {
1071 uint64_t irte_root, offset;
1072
1073 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1074 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1075
1076 trace_amdvi_ir_irte(irte_root, offset);
1077
1078 if (dma_memory_read(&address_space_memory, irte_root + offset,
1079 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1080 trace_amdvi_ir_err("failed to get irte");
1081 return -AMDVI_IR_GET_IRTE;
1082 }
1083
1084 trace_amdvi_ir_irte_val(irte->val);
1085
1086 return 0;
1087 }
1088
amdvi_int_remap_legacy(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1089 static int amdvi_int_remap_legacy(AMDVIState *iommu,
1090 MSIMessage *origin,
1091 MSIMessage *translated,
1092 uint64_t *dte,
1093 X86IOMMUIrq *irq,
1094 uint16_t sid)
1095 {
1096 int ret;
1097 union irte irte;
1098
1099 /* get interrupt remapping table */
1100 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1101 if (ret < 0) {
1102 return ret;
1103 }
1104
1105 if (!irte.fields.valid) {
1106 trace_amdvi_ir_target_abort("RemapEn is disabled");
1107 return -AMDVI_IR_TARGET_ABORT;
1108 }
1109
1110 if (irte.fields.guest_mode) {
1111 error_report_once("guest mode is not zero");
1112 return -AMDVI_IR_ERR;
1113 }
1114
1115 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1116 error_report_once("reserved int_type");
1117 return -AMDVI_IR_ERR;
1118 }
1119
1120 irq->delivery_mode = irte.fields.int_type;
1121 irq->vector = irte.fields.vector;
1122 irq->dest_mode = irte.fields.dm;
1123 irq->redir_hint = irte.fields.rq_eoi;
1124 irq->dest = irte.fields.destination;
1125
1126 return 0;
1127 }
1128
amdvi_get_irte_ga(AMDVIState * s,MSIMessage * origin,uint64_t * dte,struct irte_ga * irte,uint16_t devid)1129 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1130 struct irte_ga *irte, uint16_t devid)
1131 {
1132 uint64_t irte_root, offset;
1133
1134 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1135 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1136 trace_amdvi_ir_irte(irte_root, offset);
1137
1138 if (dma_memory_read(&address_space_memory, irte_root + offset,
1139 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1140 trace_amdvi_ir_err("failed to get irte_ga");
1141 return -AMDVI_IR_GET_IRTE;
1142 }
1143
1144 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1145 return 0;
1146 }
1147
amdvi_int_remap_ga(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1148 static int amdvi_int_remap_ga(AMDVIState *iommu,
1149 MSIMessage *origin,
1150 MSIMessage *translated,
1151 uint64_t *dte,
1152 X86IOMMUIrq *irq,
1153 uint16_t sid)
1154 {
1155 int ret;
1156 struct irte_ga irte;
1157
1158 /* get interrupt remapping table */
1159 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1160 if (ret < 0) {
1161 return ret;
1162 }
1163
1164 if (!irte.lo.fields_remap.valid) {
1165 trace_amdvi_ir_target_abort("RemapEn is disabled");
1166 return -AMDVI_IR_TARGET_ABORT;
1167 }
1168
1169 if (irte.lo.fields_remap.guest_mode) {
1170 error_report_once("guest mode is not zero");
1171 return -AMDVI_IR_ERR;
1172 }
1173
1174 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1175 error_report_once("reserved int_type is set");
1176 return -AMDVI_IR_ERR;
1177 }
1178
1179 irq->delivery_mode = irte.lo.fields_remap.int_type;
1180 irq->vector = irte.hi.fields.vector;
1181 irq->dest_mode = irte.lo.fields_remap.dm;
1182 irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1183 if (iommu->xtsup) {
1184 irq->dest = irte.lo.fields_remap.destination |
1185 (irte.hi.fields.destination_hi << 24);
1186 } else {
1187 irq->dest = irte.lo.fields_remap.destination & 0xff;
1188 }
1189
1190 return 0;
1191 }
1192
__amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1193 static int __amdvi_int_remap_msi(AMDVIState *iommu,
1194 MSIMessage *origin,
1195 MSIMessage *translated,
1196 uint64_t *dte,
1197 X86IOMMUIrq *irq,
1198 uint16_t sid)
1199 {
1200 int ret;
1201 uint8_t int_ctl;
1202
1203 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1204 trace_amdvi_ir_intctl(int_ctl);
1205
1206 switch (int_ctl) {
1207 case AMDVI_IR_INTCTL_PASS:
1208 memcpy(translated, origin, sizeof(*origin));
1209 return 0;
1210 case AMDVI_IR_INTCTL_REMAP:
1211 break;
1212 case AMDVI_IR_INTCTL_ABORT:
1213 trace_amdvi_ir_target_abort("int_ctl abort");
1214 return -AMDVI_IR_TARGET_ABORT;
1215 default:
1216 trace_amdvi_ir_err("int_ctl reserved");
1217 return -AMDVI_IR_ERR;
1218 }
1219
1220 if (iommu->ga_enabled) {
1221 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1222 } else {
1223 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1224 }
1225
1226 return ret;
1227 }
1228
1229 /* Interrupt remapping for MSI/MSI-X entry */
amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1230 static int amdvi_int_remap_msi(AMDVIState *iommu,
1231 MSIMessage *origin,
1232 MSIMessage *translated,
1233 uint16_t sid)
1234 {
1235 int ret = 0;
1236 uint64_t pass = 0;
1237 uint64_t dte[4] = { 0 };
1238 X86IOMMUIrq irq = { 0 };
1239 uint8_t dest_mode, delivery_mode;
1240
1241 assert(origin && translated);
1242
1243 /*
1244 * When IOMMU is enabled, interrupt remap request will come either from
1245 * IO-APIC or PCI device. If interrupt is from PCI device then it will
1246 * have a valid requester id but if the interrupt is from IO-APIC
1247 * then requester id will be invalid.
1248 */
1249 if (sid == X86_IOMMU_SID_INVALID) {
1250 sid = AMDVI_IOAPIC_SB_DEVID;
1251 }
1252
1253 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1254
1255 /* check if device table entry is set before we go further. */
1256 if (!iommu || !iommu->devtab_len) {
1257 memcpy(translated, origin, sizeof(*origin));
1258 goto out;
1259 }
1260
1261 if (!amdvi_get_dte(iommu, sid, dte)) {
1262 return -AMDVI_IR_ERR;
1263 }
1264
1265 /* Check if IR is enabled in DTE */
1266 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1267 memcpy(translated, origin, sizeof(*origin));
1268 goto out;
1269 }
1270
1271 /* validate that we are configure with intremap=on */
1272 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1273 trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1274 "not in the host. Use intremap=on to enable interrupt "
1275 "remapping in amd-iommu.");
1276 return -AMDVI_IR_ERR;
1277 }
1278
1279 if (origin->address < AMDVI_INT_ADDR_FIRST ||
1280 origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
1281 trace_amdvi_err("MSI is not from IOAPIC.");
1282 return -AMDVI_IR_ERR;
1283 }
1284
1285 /*
1286 * The MSI data register [10:8] are used to get the upstream interrupt type.
1287 *
1288 * See MSI/MSI-X format:
1289 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1290 * (page 5)
1291 */
1292 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1293
1294 switch (delivery_mode) {
1295 case AMDVI_IOAPIC_INT_TYPE_FIXED:
1296 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1297 trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1298 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1299 if (ret < 0) {
1300 goto remap_fail;
1301 } else {
1302 /* Translate IRQ to MSI messages */
1303 x86_iommu_irq_to_msi_message(&irq, translated);
1304 goto out;
1305 }
1306 break;
1307 case AMDVI_IOAPIC_INT_TYPE_SMI:
1308 error_report("SMI is not supported!");
1309 ret = -AMDVI_IR_ERR;
1310 break;
1311 case AMDVI_IOAPIC_INT_TYPE_NMI:
1312 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
1313 trace_amdvi_ir_delivery_mode("nmi");
1314 break;
1315 case AMDVI_IOAPIC_INT_TYPE_INIT:
1316 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
1317 trace_amdvi_ir_delivery_mode("init");
1318 break;
1319 case AMDVI_IOAPIC_INT_TYPE_EINT:
1320 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
1321 trace_amdvi_ir_delivery_mode("eint");
1322 break;
1323 default:
1324 trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1325 ret = -AMDVI_IR_ERR;
1326 break;
1327 }
1328
1329 if (ret < 0) {
1330 goto remap_fail;
1331 }
1332
1333 /*
1334 * The MSI address register bit[2] is used to get the destination
1335 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1336 * only.
1337 */
1338 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1339 if (dest_mode) {
1340 trace_amdvi_ir_err("invalid dest_mode");
1341 ret = -AMDVI_IR_ERR;
1342 goto remap_fail;
1343 }
1344
1345 if (pass) {
1346 memcpy(translated, origin, sizeof(*origin));
1347 } else {
1348 trace_amdvi_ir_err("passthrough is not enabled");
1349 ret = -AMDVI_IR_ERR;
1350 goto remap_fail;
1351 }
1352
1353 out:
1354 trace_amdvi_ir_remap_msi(origin->address, origin->data,
1355 translated->address, translated->data);
1356 return 0;
1357
1358 remap_fail:
1359 return ret;
1360 }
1361
amdvi_int_remap(X86IOMMUState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1362 static int amdvi_int_remap(X86IOMMUState *iommu,
1363 MSIMessage *origin,
1364 MSIMessage *translated,
1365 uint16_t sid)
1366 {
1367 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1368 translated, sid);
1369 }
1370
amdvi_mem_ir_write(void * opaque,hwaddr addr,uint64_t value,unsigned size,MemTxAttrs attrs)1371 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1372 uint64_t value, unsigned size,
1373 MemTxAttrs attrs)
1374 {
1375 int ret;
1376 MSIMessage from = { 0, 0 }, to = { 0, 0 };
1377 uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1378
1379 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1380 from.data = (uint32_t) value;
1381
1382 trace_amdvi_mem_ir_write_req(addr, value, size);
1383
1384 if (!attrs.unspecified) {
1385 /* We have explicit Source ID */
1386 sid = attrs.requester_id;
1387 }
1388
1389 ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1390 if (ret < 0) {
1391 /* TODO: log the event using IOMMU log event interface */
1392 error_report_once("failed to remap interrupt from devid 0x%x", sid);
1393 return MEMTX_ERROR;
1394 }
1395
1396 apic_get_class(NULL)->send_msi(&to);
1397
1398 trace_amdvi_mem_ir_write(to.address, to.data);
1399 return MEMTX_OK;
1400 }
1401
amdvi_mem_ir_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1402 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1403 uint64_t *data, unsigned size,
1404 MemTxAttrs attrs)
1405 {
1406 return MEMTX_OK;
1407 }
1408
1409 static const MemoryRegionOps amdvi_ir_ops = {
1410 .read_with_attrs = amdvi_mem_ir_read,
1411 .write_with_attrs = amdvi_mem_ir_write,
1412 .endianness = DEVICE_LITTLE_ENDIAN,
1413 .impl = {
1414 .min_access_size = 4,
1415 .max_access_size = 4,
1416 },
1417 .valid = {
1418 .min_access_size = 4,
1419 .max_access_size = 4,
1420 }
1421 };
1422
amdvi_host_dma_iommu(PCIBus * bus,void * opaque,int devfn)1423 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1424 {
1425 char name[128];
1426 AMDVIState *s = opaque;
1427 AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1428 int bus_num = pci_bus_num(bus);
1429 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
1430
1431 iommu_as = s->address_spaces[bus_num];
1432
1433 /* allocate memory during the first run */
1434 if (!iommu_as) {
1435 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1436 s->address_spaces[bus_num] = iommu_as;
1437 }
1438
1439 /* set up AMD-Vi region */
1440 if (!iommu_as[devfn]) {
1441 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1442
1443 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1444 iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1445 iommu_as[devfn]->devfn = (uint8_t)devfn;
1446 iommu_as[devfn]->iommu_state = s;
1447
1448 amdvi_dev_as = iommu_as[devfn];
1449
1450 /*
1451 * Memory region relationships looks like (Address range shows
1452 * only lower 32 bits to make it short in length...):
1453 *
1454 * |--------------------+-------------------+----------|
1455 * | Name | Address range | Priority |
1456 * |--------------------+-------------------+----------+
1457 * | amdvi-root | 00000000-ffffffff | 0 |
1458 * | amdvi-iommu_nodma | 00000000-ffffffff | 0 |
1459 * | amdvi-iommu_ir | fee00000-feefffff | 1 |
1460 * |--------------------+-------------------+----------|
1461 */
1462 memory_region_init_iommu(&amdvi_dev_as->iommu,
1463 sizeof(amdvi_dev_as->iommu),
1464 TYPE_AMD_IOMMU_MEMORY_REGION,
1465 OBJECT(s),
1466 "amd_iommu", UINT64_MAX);
1467 memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1468 "amdvi_root", UINT64_MAX);
1469 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1470 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1471 MEMORY_REGION(&amdvi_dev_as->iommu),
1472 0);
1473
1474 /* Build the DMA Disabled alias to shared memory */
1475 memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s),
1476 "amdvi-sys", &s->mr_sys, 0,
1477 memory_region_size(&s->mr_sys));
1478 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1479 &amdvi_dev_as->iommu_nodma,
1480 0);
1481 /* Build the Interrupt Remapping alias to shared memory */
1482 memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s),
1483 "amdvi-ir", &s->mr_ir, 0,
1484 memory_region_size(&s->mr_ir));
1485 memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu),
1486 AMDVI_INT_ADDR_FIRST,
1487 &amdvi_dev_as->iommu_ir, 1);
1488
1489 if (!x86_iommu->pt_supported) {
1490 memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
1491 memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
1492 true);
1493 } else {
1494 memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
1495 false);
1496 memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true);
1497 }
1498 }
1499 return &iommu_as[devfn]->as;
1500 }
1501
1502 static const PCIIOMMUOps amdvi_iommu_ops = {
1503 .get_address_space = amdvi_host_dma_iommu,
1504 };
1505
1506 static const MemoryRegionOps mmio_mem_ops = {
1507 .read = amdvi_mmio_read,
1508 .write = amdvi_mmio_write,
1509 .endianness = DEVICE_LITTLE_ENDIAN,
1510 .impl = {
1511 .min_access_size = 1,
1512 .max_access_size = 8,
1513 .unaligned = false,
1514 },
1515 .valid = {
1516 .min_access_size = 1,
1517 .max_access_size = 8,
1518 }
1519 };
1520
amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion * iommu,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)1521 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1522 IOMMUNotifierFlag old,
1523 IOMMUNotifierFlag new,
1524 Error **errp)
1525 {
1526 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1527
1528 if (new & IOMMU_NOTIFIER_MAP) {
1529 error_setg(errp,
1530 "device %02x.%02x.%x requires iommu notifier which is not "
1531 "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1532 PCI_FUNC(as->devfn));
1533 return -EINVAL;
1534 }
1535 return 0;
1536 }
1537
amdvi_init(AMDVIState * s)1538 static void amdvi_init(AMDVIState *s)
1539 {
1540 amdvi_iotlb_reset(s);
1541
1542 s->devtab_len = 0;
1543 s->cmdbuf_len = 0;
1544 s->cmdbuf_head = 0;
1545 s->cmdbuf_tail = 0;
1546 s->evtlog_head = 0;
1547 s->evtlog_tail = 0;
1548 s->excl_enabled = false;
1549 s->excl_allow = false;
1550 s->mmio_enabled = false;
1551 s->enabled = false;
1552 s->ats_enabled = false;
1553 s->cmdbuf_enabled = false;
1554
1555 /* reset MMIO */
1556 memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1557 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
1558 amdvi_extended_feature_register(s),
1559 0xffffffffffffffef, 0);
1560 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1561 }
1562
amdvi_pci_realize(PCIDevice * pdev,Error ** errp)1563 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
1564 {
1565 AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
1566 int ret;
1567
1568 ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
1569 AMDVI_CAPAB_SIZE, errp);
1570 if (ret < 0) {
1571 return;
1572 }
1573 s->capab_offset = ret;
1574
1575 ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
1576 AMDVI_CAPAB_REG_SIZE, errp);
1577 if (ret < 0) {
1578 return;
1579 }
1580 ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
1581 AMDVI_CAPAB_REG_SIZE, errp);
1582 if (ret < 0) {
1583 return;
1584 }
1585
1586 if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
1587 return;
1588 }
1589
1590 /* reset device ident */
1591 pci_config_set_prog_interface(pdev->config, 0);
1592
1593 /* reset AMDVI specific capabilities, all r/o */
1594 pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1595 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1596 AMDVI_BASE_ADDR & ~(0xffff0000));
1597 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1598 (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
1599 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
1600 0xff000000);
1601 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1602 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
1603 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1604 }
1605
amdvi_sysbus_reset(DeviceState * dev)1606 static void amdvi_sysbus_reset(DeviceState *dev)
1607 {
1608 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1609
1610 msi_reset(&s->pci.dev);
1611 amdvi_init(s);
1612 }
1613
amdvi_sysbus_realize(DeviceState * dev,Error ** errp)1614 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1615 {
1616 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1617 MachineState *ms = MACHINE(qdev_get_machine());
1618 PCMachineState *pcms = PC_MACHINE(ms);
1619 X86MachineState *x86ms = X86_MACHINE(ms);
1620 PCIBus *bus = pcms->pcibus;
1621
1622 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1623 amdvi_uint64_equal, g_free, g_free);
1624
1625 /* This device should take care of IOMMU PCI properties */
1626 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
1627 return;
1628 }
1629
1630 /* Pseudo address space under root PCI bus. */
1631 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1632
1633 /* set up MMIO */
1634 memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
1635 "amdvi-mmio", AMDVI_MMIO_SIZE);
1636 memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
1637 &s->mr_mmio);
1638
1639 /* Create the share memory regions by all devices */
1640 memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX);
1641
1642 /* set up the DMA disabled memory region */
1643 memory_region_init_alias(&s->mr_nodma, OBJECT(s),
1644 "amdvi-nodma", get_system_memory(), 0,
1645 memory_region_size(get_system_memory()));
1646 memory_region_add_subregion_overlap(&s->mr_sys, 0,
1647 &s->mr_nodma, 0);
1648
1649 /* set up the Interrupt Remapping memory region */
1650 memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops,
1651 s, "amdvi-ir", AMDVI_INT_ADDR_SIZE);
1652 memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
1653 &s->mr_ir, 1);
1654
1655 /* AMD IOMMU with x2APIC mode requires xtsup=on */
1656 if (x86ms->apic_id_limit > 255 && !s->xtsup) {
1657 error_report("AMD IOMMU with x2APIC confguration requires xtsup=on");
1658 exit(EXIT_FAILURE);
1659 }
1660 if (s->xtsup) {
1661 if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
1662 error_report("AMD IOMMU xtsup=on requires support on the KVM side");
1663 exit(EXIT_FAILURE);
1664 }
1665 }
1666
1667 pci_setup_iommu(bus, &amdvi_iommu_ops, s);
1668 amdvi_init(s);
1669 }
1670
1671 static Property amdvi_properties[] = {
1672 DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
1673 DEFINE_PROP_END_OF_LIST(),
1674 };
1675
1676 static const VMStateDescription vmstate_amdvi_sysbus = {
1677 .name = "amd-iommu",
1678 .unmigratable = 1
1679 };
1680
amdvi_sysbus_instance_init(Object * klass)1681 static void amdvi_sysbus_instance_init(Object *klass)
1682 {
1683 AMDVIState *s = AMD_IOMMU_DEVICE(klass);
1684
1685 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
1686 }
1687
amdvi_sysbus_class_init(ObjectClass * klass,void * data)1688 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
1689 {
1690 DeviceClass *dc = DEVICE_CLASS(klass);
1691 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1692
1693 device_class_set_legacy_reset(dc, amdvi_sysbus_reset);
1694 dc->vmsd = &vmstate_amdvi_sysbus;
1695 dc->hotpluggable = false;
1696 dc_class->realize = amdvi_sysbus_realize;
1697 dc_class->int_remap = amdvi_int_remap;
1698 /* Supported by the pc-q35-* machine types */
1699 dc->user_creatable = true;
1700 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1701 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1702 device_class_set_props(dc, amdvi_properties);
1703 }
1704
1705 static const TypeInfo amdvi_sysbus = {
1706 .name = TYPE_AMD_IOMMU_DEVICE,
1707 .parent = TYPE_X86_IOMMU_DEVICE,
1708 .instance_size = sizeof(AMDVIState),
1709 .instance_init = amdvi_sysbus_instance_init,
1710 .class_init = amdvi_sysbus_class_init
1711 };
1712
amdvi_pci_class_init(ObjectClass * klass,void * data)1713 static void amdvi_pci_class_init(ObjectClass *klass, void *data)
1714 {
1715 DeviceClass *dc = DEVICE_CLASS(klass);
1716 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1717
1718 k->vendor_id = PCI_VENDOR_ID_AMD;
1719 k->class_id = 0x0806;
1720 k->realize = amdvi_pci_realize;
1721
1722 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1723 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1724 }
1725
1726 static const TypeInfo amdvi_pci = {
1727 .name = TYPE_AMD_IOMMU_PCI,
1728 .parent = TYPE_PCI_DEVICE,
1729 .instance_size = sizeof(AMDVIPCIState),
1730 .class_init = amdvi_pci_class_init,
1731 .interfaces = (InterfaceInfo[]) {
1732 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1733 { },
1734 },
1735 };
1736
amdvi_iommu_memory_region_class_init(ObjectClass * klass,void * data)1737 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
1738 {
1739 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1740
1741 imrc->translate = amdvi_translate;
1742 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1743 }
1744
1745 static const TypeInfo amdvi_iommu_memory_region_info = {
1746 .parent = TYPE_IOMMU_MEMORY_REGION,
1747 .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1748 .class_init = amdvi_iommu_memory_region_class_init,
1749 };
1750
amdvi_register_types(void)1751 static void amdvi_register_types(void)
1752 {
1753 type_register_static(&amdvi_pci);
1754 type_register_static(&amdvi_sysbus);
1755 type_register_static(&amdvi_iommu_memory_region_info);
1756 }
1757
1758 type_init(amdvi_register_types);
1759