1 /*
2 * QEMU emulation of AMD IOMMU (AMD-Vi)
3 *
4 * Copyright (C) 2011 Eduard - Gabriel Munteanu
5 * Copyright (C) 2015, 2016 David Kiarie Kahurani
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, see <http://www.gnu.org/licenses/>.
19 *
20 * Cache implementation inspired by hw/i386/intel_iommu.c
21 */
22
23 #include "qemu/osdep.h"
24 #include "hw/i386/pc.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/pci_bus.h"
27 #include "migration/vmstate.h"
28 #include "amd_iommu.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "hw/i386/apic_internal.h"
32 #include "trace.h"
33 #include "hw/i386/apic-msidef.h"
34 #include "hw/qdev-properties.h"
35 #include "kvm/kvm_i386.h"
36
37 /* used AMD-Vi MMIO registers */
38 const char *amdvi_mmio_low[] = {
39 "AMDVI_MMIO_DEVTAB_BASE",
40 "AMDVI_MMIO_CMDBUF_BASE",
41 "AMDVI_MMIO_EVTLOG_BASE",
42 "AMDVI_MMIO_CONTROL",
43 "AMDVI_MMIO_EXCL_BASE",
44 "AMDVI_MMIO_EXCL_LIMIT",
45 "AMDVI_MMIO_EXT_FEATURES",
46 "AMDVI_MMIO_PPR_BASE",
47 "UNHANDLED"
48 };
49 const char *amdvi_mmio_high[] = {
50 "AMDVI_MMIO_COMMAND_HEAD",
51 "AMDVI_MMIO_COMMAND_TAIL",
52 "AMDVI_MMIO_EVTLOG_HEAD",
53 "AMDVI_MMIO_EVTLOG_TAIL",
54 "AMDVI_MMIO_STATUS",
55 "AMDVI_MMIO_PPR_HEAD",
56 "AMDVI_MMIO_PPR_TAIL",
57 "UNHANDLED"
58 };
59
60 struct AMDVIAddressSpace {
61 uint8_t bus_num; /* bus number */
62 uint8_t devfn; /* device function */
63 AMDVIState *iommu_state; /* AMDVI - one per machine */
64 MemoryRegion root; /* AMDVI Root memory map region */
65 IOMMUMemoryRegion iommu; /* Device's address translation region */
66 MemoryRegion iommu_nodma; /* Alias of shared nodma memory region */
67 MemoryRegion iommu_ir; /* Device's interrupt remapping region */
68 AddressSpace as; /* device's corresponding address space */
69 };
70
71 /* AMDVI cache entry */
72 typedef struct AMDVIIOTLBEntry {
73 uint16_t domid; /* assigned domain id */
74 uint16_t devid; /* device owning entry */
75 uint64_t perms; /* access permissions */
76 uint64_t translated_addr; /* translated address */
77 uint64_t page_mask; /* physical page size */
78 } AMDVIIOTLBEntry;
79
amdvi_extended_feature_register(AMDVIState * s)80 uint64_t amdvi_extended_feature_register(AMDVIState *s)
81 {
82 uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
83 if (s->xtsup) {
84 feature |= AMDVI_FEATURE_XT;
85 }
86
87 return feature;
88 }
89
90 /* configure MMIO registers at startup/reset */
amdvi_set_quad(AMDVIState * s,hwaddr addr,uint64_t val,uint64_t romask,uint64_t w1cmask)91 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
92 uint64_t romask, uint64_t w1cmask)
93 {
94 stq_le_p(&s->mmior[addr], val);
95 stq_le_p(&s->romask[addr], romask);
96 stq_le_p(&s->w1cmask[addr], w1cmask);
97 }
98
amdvi_readw(AMDVIState * s,hwaddr addr)99 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
100 {
101 return lduw_le_p(&s->mmior[addr]);
102 }
103
amdvi_readl(AMDVIState * s,hwaddr addr)104 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
105 {
106 return ldl_le_p(&s->mmior[addr]);
107 }
108
amdvi_readq(AMDVIState * s,hwaddr addr)109 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
110 {
111 return ldq_le_p(&s->mmior[addr]);
112 }
113
114 /* internal write */
amdvi_writeq_raw(AMDVIState * s,hwaddr addr,uint64_t val)115 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
116 {
117 stq_le_p(&s->mmior[addr], val);
118 }
119
120 /* external write */
amdvi_writew(AMDVIState * s,hwaddr addr,uint16_t val)121 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
122 {
123 uint16_t romask = lduw_le_p(&s->romask[addr]);
124 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
125 uint16_t oldval = lduw_le_p(&s->mmior[addr]);
126 stw_le_p(&s->mmior[addr],
127 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
128 }
129
amdvi_writel(AMDVIState * s,hwaddr addr,uint32_t val)130 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
131 {
132 uint32_t romask = ldl_le_p(&s->romask[addr]);
133 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
134 uint32_t oldval = ldl_le_p(&s->mmior[addr]);
135 stl_le_p(&s->mmior[addr],
136 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
137 }
138
amdvi_writeq(AMDVIState * s,hwaddr addr,uint64_t val)139 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
140 {
141 uint64_t romask = ldq_le_p(&s->romask[addr]);
142 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
143 uint64_t oldval = ldq_le_p(&s->mmior[addr]);
144 stq_le_p(&s->mmior[addr],
145 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
146 }
147
148 /* OR a 64-bit register with a 64-bit value */
amdvi_test_mask(AMDVIState * s,hwaddr addr,uint64_t val)149 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
150 {
151 return amdvi_readq(s, addr) | val;
152 }
153
154 /* OR a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_orq(AMDVIState * s,hwaddr addr,uint64_t val)155 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
156 {
157 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
158 }
159
160 /* AND a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_andq(AMDVIState * s,hwaddr addr,uint64_t val)161 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
162 {
163 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
164 }
165
amdvi_generate_msi_interrupt(AMDVIState * s)166 static void amdvi_generate_msi_interrupt(AMDVIState *s)
167 {
168 MSIMessage msg = {};
169 MemTxAttrs attrs = {
170 .requester_id = pci_requester_id(&s->pci.dev)
171 };
172
173 if (msi_enabled(&s->pci.dev)) {
174 msg = msi_get_message(&s->pci.dev, 0);
175 address_space_stl_le(&address_space_memory, msg.address, msg.data,
176 attrs, NULL);
177 }
178 }
179
amdvi_log_event(AMDVIState * s,uint64_t * evt)180 static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
181 {
182 /* event logging not enabled */
183 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
184 AMDVI_MMIO_STATUS_EVT_OVF)) {
185 return;
186 }
187
188 /* event log buffer full */
189 if (s->evtlog_tail >= s->evtlog_len) {
190 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
191 /* generate interrupt */
192 amdvi_generate_msi_interrupt(s);
193 return;
194 }
195
196 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
197 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
198 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
199 }
200
201 s->evtlog_tail += AMDVI_EVENT_LEN;
202 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
203 amdvi_generate_msi_interrupt(s);
204 }
205
amdvi_setevent_bits(uint64_t * buffer,uint64_t value,int start,int length)206 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
207 int length)
208 {
209 int index = start / 64, bitpos = start % 64;
210 uint64_t mask = MAKE_64BIT_MASK(start, length);
211 buffer[index] &= ~mask;
212 buffer[index] |= (value << bitpos) & mask;
213 }
214 /*
215 * AMDVi event structure
216 * 0:15 -> DeviceID
217 * 48:63 -> event type + miscellaneous info
218 * 64:127 -> related address
219 */
amdvi_encode_event(uint64_t * evt,uint16_t devid,uint64_t addr,uint16_t info)220 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
221 uint16_t info)
222 {
223 evt[0] = 0;
224 evt[1] = 0;
225
226 amdvi_setevent_bits(evt, devid, 0, 16);
227 amdvi_setevent_bits(evt, info, 48, 16);
228 amdvi_setevent_bits(evt, addr, 64, 64);
229 }
230 /* log an error encountered during a page walk
231 *
232 * @addr: virtual address in translation request
233 */
amdvi_page_fault(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)234 static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
235 hwaddr addr, uint16_t info)
236 {
237 uint64_t evt[2];
238
239 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
240 amdvi_encode_event(evt, devid, addr, info);
241 amdvi_log_event(s, evt);
242 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
243 PCI_STATUS_SIG_TARGET_ABORT);
244 }
245 /*
246 * log a master abort accessing device table
247 * @devtab : address of device table entry
248 * @info : error flags
249 */
amdvi_log_devtab_error(AMDVIState * s,uint16_t devid,hwaddr devtab,uint16_t info)250 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
251 hwaddr devtab, uint16_t info)
252 {
253 uint64_t evt[2];
254
255 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
256
257 amdvi_encode_event(evt, devid, devtab, info);
258 amdvi_log_event(s, evt);
259 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
260 PCI_STATUS_SIG_TARGET_ABORT);
261 }
262 /* log an event trying to access command buffer
263 * @addr : address that couldn't be accessed
264 */
amdvi_log_command_error(AMDVIState * s,hwaddr addr)265 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
266 {
267 uint64_t evt[2];
268 uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
269
270 amdvi_encode_event(evt, 0, addr, info);
271 amdvi_log_event(s, evt);
272 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
273 PCI_STATUS_SIG_TARGET_ABORT);
274 }
275 /* log an illegal command event
276 * @addr : address of illegal command
277 */
amdvi_log_illegalcom_error(AMDVIState * s,uint16_t info,hwaddr addr)278 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
279 hwaddr addr)
280 {
281 uint64_t evt[2];
282
283 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
284 amdvi_encode_event(evt, 0, addr, info);
285 amdvi_log_event(s, evt);
286 }
287 /* log an error accessing device table
288 *
289 * @devid : device owning the table entry
290 * @devtab : address of device table entry
291 * @info : error flags
292 */
amdvi_log_illegaldevtab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)293 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
294 hwaddr addr, uint16_t info)
295 {
296 uint64_t evt[2];
297
298 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
299 amdvi_encode_event(evt, devid, addr, info);
300 amdvi_log_event(s, evt);
301 }
302 /* log an error accessing a PTE entry
303 * @addr : address that couldn't be accessed
304 */
amdvi_log_pagetab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)305 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
306 hwaddr addr, uint16_t info)
307 {
308 uint64_t evt[2];
309
310 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
311 amdvi_encode_event(evt, devid, addr, info);
312 amdvi_log_event(s, evt);
313 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
314 PCI_STATUS_SIG_TARGET_ABORT);
315 }
316
amdvi_uint64_equal(gconstpointer v1,gconstpointer v2)317 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
318 {
319 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
320 }
321
amdvi_uint64_hash(gconstpointer v)322 static guint amdvi_uint64_hash(gconstpointer v)
323 {
324 return (guint)*(const uint64_t *)v;
325 }
326
amdvi_iotlb_lookup(AMDVIState * s,hwaddr addr,uint64_t devid)327 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
328 uint64_t devid)
329 {
330 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
331 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
332 return g_hash_table_lookup(s->iotlb, &key);
333 }
334
amdvi_iotlb_reset(AMDVIState * s)335 static void amdvi_iotlb_reset(AMDVIState *s)
336 {
337 assert(s->iotlb);
338 trace_amdvi_iotlb_reset();
339 g_hash_table_remove_all(s->iotlb);
340 }
341
amdvi_iotlb_remove_by_devid(gpointer key,gpointer value,gpointer user_data)342 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
343 gpointer user_data)
344 {
345 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
346 uint16_t devid = *(uint16_t *)user_data;
347 return entry->devid == devid;
348 }
349
amdvi_iotlb_remove_page(AMDVIState * s,hwaddr addr,uint64_t devid)350 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
351 uint64_t devid)
352 {
353 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
354 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
355 g_hash_table_remove(s->iotlb, &key);
356 }
357
amdvi_update_iotlb(AMDVIState * s,uint16_t devid,uint64_t gpa,IOMMUTLBEntry to_cache,uint16_t domid)358 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
359 uint64_t gpa, IOMMUTLBEntry to_cache,
360 uint16_t domid)
361 {
362 /* don't cache erroneous translations */
363 if (to_cache.perm != IOMMU_NONE) {
364 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
365 uint64_t *key = g_new(uint64_t, 1);
366 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
367
368 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
369 PCI_FUNC(devid), gpa, to_cache.translated_addr);
370
371 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
372 amdvi_iotlb_reset(s);
373 }
374
375 entry->domid = domid;
376 entry->perms = to_cache.perm;
377 entry->translated_addr = to_cache.translated_addr;
378 entry->page_mask = to_cache.addr_mask;
379 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
380 g_hash_table_replace(s->iotlb, key, entry);
381 }
382 }
383
amdvi_completion_wait(AMDVIState * s,uint64_t * cmd)384 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
385 {
386 /* pad the last 3 bits */
387 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
388 uint64_t data = cpu_to_le64(cmd[1]);
389
390 if (extract64(cmd[0], 52, 8)) {
391 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
392 s->cmdbuf + s->cmdbuf_head);
393 }
394 if (extract64(cmd[0], 0, 1)) {
395 if (dma_memory_write(&address_space_memory, addr, &data,
396 AMDVI_COMPLETION_DATA_SIZE,
397 MEMTXATTRS_UNSPECIFIED)) {
398 trace_amdvi_completion_wait_fail(addr);
399 }
400 }
401 /* set completion interrupt */
402 if (extract64(cmd[0], 1, 1)) {
403 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
404 /* generate interrupt */
405 amdvi_generate_msi_interrupt(s);
406 }
407 trace_amdvi_completion_wait(addr, data);
408 }
409
410 /* log error without aborting since linux seems to be using reserved bits */
amdvi_inval_devtab_entry(AMDVIState * s,uint64_t * cmd)411 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
412 {
413 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
414
415 /* This command should invalidate internal caches of which there isn't */
416 if (extract64(cmd[0], 16, 44) || cmd[1]) {
417 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
418 s->cmdbuf + s->cmdbuf_head);
419 }
420 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
421 PCI_FUNC(devid));
422 }
423
amdvi_complete_ppr(AMDVIState * s,uint64_t * cmd)424 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
425 {
426 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) ||
427 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
428 || extract64(cmd[1], 48, 16)) {
429 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
430 s->cmdbuf + s->cmdbuf_head);
431 }
432 trace_amdvi_ppr_exec();
433 }
434
amdvi_intremap_inval_notify_all(AMDVIState * s,bool global,uint32_t index,uint32_t mask)435 static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global,
436 uint32_t index, uint32_t mask)
437 {
438 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
439 }
440
amdvi_inval_all(AMDVIState * s,uint64_t * cmd)441 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
442 {
443 if (extract64(cmd[0], 0, 60) || cmd[1]) {
444 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
445 s->cmdbuf + s->cmdbuf_head);
446 }
447
448 /* Notify global invalidation */
449 amdvi_intremap_inval_notify_all(s, true, 0, 0);
450
451 amdvi_iotlb_reset(s);
452 trace_amdvi_all_inval();
453 }
454
amdvi_iotlb_remove_by_domid(gpointer key,gpointer value,gpointer user_data)455 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
456 gpointer user_data)
457 {
458 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
459 uint16_t domid = *(uint16_t *)user_data;
460 return entry->domid == domid;
461 }
462
463 /* we don't have devid - we can't remove pages by address */
amdvi_inval_pages(AMDVIState * s,uint64_t * cmd)464 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
465 {
466 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
467
468 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
469 extract64(cmd[1], 3, 9)) {
470 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
471 s->cmdbuf + s->cmdbuf_head);
472 }
473
474 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
475 &domid);
476 trace_amdvi_pages_inval(domid);
477 }
478
amdvi_prefetch_pages(AMDVIState * s,uint64_t * cmd)479 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
480 {
481 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
482 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
483 extract64(cmd[1], 5, 7)) {
484 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
485 s->cmdbuf + s->cmdbuf_head);
486 }
487
488 trace_amdvi_prefetch_pages();
489 }
490
amdvi_inval_inttable(AMDVIState * s,uint64_t * cmd)491 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
492 {
493 if (extract64(cmd[0], 16, 44) || cmd[1]) {
494 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
495 s->cmdbuf + s->cmdbuf_head);
496 return;
497 }
498
499 /* Notify global invalidation */
500 amdvi_intremap_inval_notify_all(s, true, 0, 0);
501
502 trace_amdvi_intr_inval();
503 }
504
505 /* FIXME: Try to work with the specified size instead of all the pages
506 * when the S bit is on
507 */
iommu_inval_iotlb(AMDVIState * s,uint64_t * cmd)508 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
509 {
510
511 uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16));
512 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
513 extract64(cmd[1], 6, 6)) {
514 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
515 s->cmdbuf + s->cmdbuf_head);
516 return;
517 }
518
519 if (extract64(cmd[1], 0, 1)) {
520 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
521 &devid);
522 } else {
523 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
524 devid);
525 }
526 trace_amdvi_iotlb_inval();
527 }
528
529 /* not honouring reserved bits is regarded as an illegal command */
amdvi_cmdbuf_exec(AMDVIState * s)530 static void amdvi_cmdbuf_exec(AMDVIState *s)
531 {
532 uint64_t cmd[2];
533
534 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
535 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
536 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
537 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
538 return;
539 }
540
541 switch (extract64(cmd[0], 60, 4)) {
542 case AMDVI_CMD_COMPLETION_WAIT:
543 amdvi_completion_wait(s, cmd);
544 break;
545 case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
546 amdvi_inval_devtab_entry(s, cmd);
547 break;
548 case AMDVI_CMD_INVAL_AMDVI_PAGES:
549 amdvi_inval_pages(s, cmd);
550 break;
551 case AMDVI_CMD_INVAL_IOTLB_PAGES:
552 iommu_inval_iotlb(s, cmd);
553 break;
554 case AMDVI_CMD_INVAL_INTR_TABLE:
555 amdvi_inval_inttable(s, cmd);
556 break;
557 case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
558 amdvi_prefetch_pages(s, cmd);
559 break;
560 case AMDVI_CMD_COMPLETE_PPR_REQUEST:
561 amdvi_complete_ppr(s, cmd);
562 break;
563 case AMDVI_CMD_INVAL_AMDVI_ALL:
564 amdvi_inval_all(s, cmd);
565 break;
566 default:
567 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
568 /* log illegal command */
569 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
570 s->cmdbuf + s->cmdbuf_head);
571 }
572 }
573
amdvi_cmdbuf_run(AMDVIState * s)574 static void amdvi_cmdbuf_run(AMDVIState *s)
575 {
576 if (!s->cmdbuf_enabled) {
577 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
578 return;
579 }
580
581 /* check if there is work to do. */
582 while (s->cmdbuf_head != s->cmdbuf_tail) {
583 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
584 amdvi_cmdbuf_exec(s);
585 s->cmdbuf_head += AMDVI_COMMAND_SIZE;
586 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
587
588 /* wrap head pointer */
589 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
590 s->cmdbuf_head = 0;
591 }
592 }
593 }
594
amdvi_mmio_trace(hwaddr addr,unsigned size)595 static void amdvi_mmio_trace(hwaddr addr, unsigned size)
596 {
597 uint8_t index = (addr & ~0x2000) / 8;
598
599 if ((addr & 0x2000)) {
600 /* high table */
601 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
602 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
603 } else {
604 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
605 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
606 }
607 }
608
amdvi_mmio_read(void * opaque,hwaddr addr,unsigned size)609 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
610 {
611 AMDVIState *s = opaque;
612
613 uint64_t val = -1;
614 if (addr + size > AMDVI_MMIO_SIZE) {
615 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
616 return (uint64_t)-1;
617 }
618
619 if (size == 2) {
620 val = amdvi_readw(s, addr);
621 } else if (size == 4) {
622 val = amdvi_readl(s, addr);
623 } else if (size == 8) {
624 val = amdvi_readq(s, addr);
625 }
626 amdvi_mmio_trace(addr, size);
627
628 return val;
629 }
630
amdvi_handle_control_write(AMDVIState * s)631 static void amdvi_handle_control_write(AMDVIState *s)
632 {
633 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
634 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
635
636 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
637 s->evtlog_enabled = s->enabled && !!(control &
638 AMDVI_MMIO_CONTROL_EVENTLOGEN);
639
640 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
641 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
642 s->cmdbuf_enabled = s->enabled && !!(control &
643 AMDVI_MMIO_CONTROL_CMDBUFLEN);
644 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
645
646 /* update the flags depending on the control register */
647 if (s->cmdbuf_enabled) {
648 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
649 } else {
650 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
651 }
652 if (s->evtlog_enabled) {
653 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
654 } else {
655 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
656 }
657
658 trace_amdvi_control_status(control);
659 amdvi_cmdbuf_run(s);
660 }
661
amdvi_handle_devtab_write(AMDVIState * s)662 static inline void amdvi_handle_devtab_write(AMDVIState *s)
663
664 {
665 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
666 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
667
668 /* set device table length (i.e. number of entries table can hold) */
669 s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) *
670 (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
671 AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
672 }
673
amdvi_handle_cmdhead_write(AMDVIState * s)674 static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
675 {
676 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
677 & AMDVI_MMIO_CMDBUF_HEAD_MASK;
678 amdvi_cmdbuf_run(s);
679 }
680
amdvi_handle_cmdbase_write(AMDVIState * s)681 static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
682 {
683 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
684 & AMDVI_MMIO_CMDBUF_BASE_MASK;
685 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
686 & AMDVI_MMIO_CMDBUF_SIZE_MASK);
687 s->cmdbuf_head = s->cmdbuf_tail = 0;
688 }
689
amdvi_handle_cmdtail_write(AMDVIState * s)690 static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
691 {
692 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
693 & AMDVI_MMIO_CMDBUF_TAIL_MASK;
694 amdvi_cmdbuf_run(s);
695 }
696
amdvi_handle_excllim_write(AMDVIState * s)697 static inline void amdvi_handle_excllim_write(AMDVIState *s)
698 {
699 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
700 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
701 AMDVI_MMIO_EXCL_LIMIT_LOW;
702 }
703
amdvi_handle_evtbase_write(AMDVIState * s)704 static inline void amdvi_handle_evtbase_write(AMDVIState *s)
705 {
706 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
707 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
708 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
709 & AMDVI_MMIO_EVTLOG_SIZE_MASK);
710 }
711
amdvi_handle_evttail_write(AMDVIState * s)712 static inline void amdvi_handle_evttail_write(AMDVIState *s)
713 {
714 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
715 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
716 }
717
amdvi_handle_evthead_write(AMDVIState * s)718 static inline void amdvi_handle_evthead_write(AMDVIState *s)
719 {
720 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
721 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
722 }
723
amdvi_handle_pprbase_write(AMDVIState * s)724 static inline void amdvi_handle_pprbase_write(AMDVIState *s)
725 {
726 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
727 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
728 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
729 & AMDVI_MMIO_PPRLOG_SIZE_MASK);
730 }
731
amdvi_handle_pprhead_write(AMDVIState * s)732 static inline void amdvi_handle_pprhead_write(AMDVIState *s)
733 {
734 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
735 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
736 }
737
amdvi_handle_pprtail_write(AMDVIState * s)738 static inline void amdvi_handle_pprtail_write(AMDVIState *s)
739 {
740 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
741 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
742 }
743
744 /* FIXME: something might go wrong if System Software writes in chunks
745 * of one byte but linux writes in chunks of 4 bytes so currently it
746 * works correctly with linux but will definitely be busted if software
747 * reads/writes 8 bytes
748 */
amdvi_mmio_reg_write(AMDVIState * s,unsigned size,uint64_t val,hwaddr addr)749 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
750 hwaddr addr)
751 {
752 if (size == 2) {
753 amdvi_writew(s, addr, val);
754 } else if (size == 4) {
755 amdvi_writel(s, addr, val);
756 } else if (size == 8) {
757 amdvi_writeq(s, addr, val);
758 }
759 }
760
amdvi_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)761 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
762 unsigned size)
763 {
764 AMDVIState *s = opaque;
765 unsigned long offset = addr & 0x07;
766
767 if (addr + size > AMDVI_MMIO_SIZE) {
768 trace_amdvi_mmio_write("error: addr outside region: max ",
769 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
770 return;
771 }
772
773 amdvi_mmio_trace(addr, size);
774 switch (addr & ~0x07) {
775 case AMDVI_MMIO_CONTROL:
776 amdvi_mmio_reg_write(s, size, val, addr);
777 amdvi_handle_control_write(s);
778 break;
779 case AMDVI_MMIO_DEVICE_TABLE:
780 amdvi_mmio_reg_write(s, size, val, addr);
781 /* set device table address
782 * This also suffers from inability to tell whether software
783 * is done writing
784 */
785 if (offset || (size == 8)) {
786 amdvi_handle_devtab_write(s);
787 }
788 break;
789 case AMDVI_MMIO_COMMAND_HEAD:
790 amdvi_mmio_reg_write(s, size, val, addr);
791 amdvi_handle_cmdhead_write(s);
792 break;
793 case AMDVI_MMIO_COMMAND_BASE:
794 amdvi_mmio_reg_write(s, size, val, addr);
795 /* FIXME - make sure System Software has finished writing in case
796 * it writes in chucks less than 8 bytes in a robust way.As for
797 * now, this hacks works for the linux driver
798 */
799 if (offset || (size == 8)) {
800 amdvi_handle_cmdbase_write(s);
801 }
802 break;
803 case AMDVI_MMIO_COMMAND_TAIL:
804 amdvi_mmio_reg_write(s, size, val, addr);
805 amdvi_handle_cmdtail_write(s);
806 break;
807 case AMDVI_MMIO_EVENT_BASE:
808 amdvi_mmio_reg_write(s, size, val, addr);
809 amdvi_handle_evtbase_write(s);
810 break;
811 case AMDVI_MMIO_EVENT_HEAD:
812 amdvi_mmio_reg_write(s, size, val, addr);
813 amdvi_handle_evthead_write(s);
814 break;
815 case AMDVI_MMIO_EVENT_TAIL:
816 amdvi_mmio_reg_write(s, size, val, addr);
817 amdvi_handle_evttail_write(s);
818 break;
819 case AMDVI_MMIO_EXCL_LIMIT:
820 amdvi_mmio_reg_write(s, size, val, addr);
821 amdvi_handle_excllim_write(s);
822 break;
823 /* PPR log base - unused for now */
824 case AMDVI_MMIO_PPR_BASE:
825 amdvi_mmio_reg_write(s, size, val, addr);
826 amdvi_handle_pprbase_write(s);
827 break;
828 /* PPR log head - also unused for now */
829 case AMDVI_MMIO_PPR_HEAD:
830 amdvi_mmio_reg_write(s, size, val, addr);
831 amdvi_handle_pprhead_write(s);
832 break;
833 /* PPR log tail - unused for now */
834 case AMDVI_MMIO_PPR_TAIL:
835 amdvi_mmio_reg_write(s, size, val, addr);
836 amdvi_handle_pprtail_write(s);
837 break;
838 }
839 }
840
amdvi_get_perms(uint64_t entry)841 static inline uint64_t amdvi_get_perms(uint64_t entry)
842 {
843 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
844 AMDVI_DEV_PERM_SHIFT;
845 }
846
847 /* validate that reserved bits are honoured */
amdvi_validate_dte(AMDVIState * s,uint16_t devid,uint64_t * dte)848 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
849 uint64_t *dte)
850 {
851 if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) ||
852 (dte[1] & AMDVI_DTE_QUAD1_RESERVED) ||
853 (dte[2] & AMDVI_DTE_QUAD2_RESERVED) ||
854 (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) {
855 amdvi_log_illegaldevtab_error(s, devid,
856 s->devtab +
857 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
858 return false;
859 }
860
861 return true;
862 }
863
864 /* get a device table entry given the devid */
amdvi_get_dte(AMDVIState * s,int devid,uint64_t * entry)865 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
866 {
867 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
868
869 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
870 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
871 trace_amdvi_dte_get_fail(s->devtab, offset);
872 /* log error accessing dte */
873 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
874 return false;
875 }
876
877 *entry = le64_to_cpu(*entry);
878 if (!amdvi_validate_dte(s, devid, entry)) {
879 trace_amdvi_invalid_dte(entry[0]);
880 return false;
881 }
882
883 return true;
884 }
885
886 /* get pte translation mode */
get_pte_translation_mode(uint64_t pte)887 static inline uint8_t get_pte_translation_mode(uint64_t pte)
888 {
889 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
890 }
891
pte_override_page_mask(uint64_t pte)892 static inline uint64_t pte_override_page_mask(uint64_t pte)
893 {
894 uint8_t page_mask = 13;
895 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
896 /* find the first zero bit */
897 while (addr & 1) {
898 page_mask++;
899 addr = addr >> 1;
900 }
901
902 return ~((1ULL << page_mask) - 1);
903 }
904
pte_get_page_mask(uint64_t oldlevel)905 static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
906 {
907 return ~((1UL << ((oldlevel * 9) + 3)) - 1);
908 }
909
amdvi_get_pte_entry(AMDVIState * s,uint64_t pte_addr,uint16_t devid)910 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
911 uint16_t devid)
912 {
913 uint64_t pte;
914
915 if (dma_memory_read(&address_space_memory, pte_addr,
916 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
917 trace_amdvi_get_pte_hwerror(pte_addr);
918 amdvi_log_pagetab_error(s, devid, pte_addr, 0);
919 pte = 0;
920 return pte;
921 }
922
923 pte = le64_to_cpu(pte);
924 return pte;
925 }
926
amdvi_page_walk(AMDVIAddressSpace * as,uint64_t * dte,IOMMUTLBEntry * ret,unsigned perms,hwaddr addr)927 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
928 IOMMUTLBEntry *ret, unsigned perms,
929 hwaddr addr)
930 {
931 unsigned level, present, pte_perms, oldlevel;
932 uint64_t pte = dte[0], pte_addr, page_mask;
933
934 /* make sure the DTE has TV = 1 */
935 if (pte & AMDVI_DEV_TRANSLATION_VALID) {
936 level = get_pte_translation_mode(pte);
937 if (level >= 7) {
938 trace_amdvi_mode_invalid(level, addr);
939 return;
940 }
941 if (level == 0) {
942 goto no_remap;
943 }
944
945 /* we are at the leaf page table or page table encodes a huge page */
946 do {
947 pte_perms = amdvi_get_perms(pte);
948 present = pte & 1;
949 if (!present || perms != (perms & pte_perms)) {
950 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
951 trace_amdvi_page_fault(addr);
952 return;
953 }
954
955 /* go to the next lower level */
956 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
957 /* add offset and load pte */
958 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
959 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
960 if (!pte) {
961 return;
962 }
963 oldlevel = level;
964 level = get_pte_translation_mode(pte);
965 } while (level > 0 && level < 7);
966
967 if (level == 0x7) {
968 page_mask = pte_override_page_mask(pte);
969 } else {
970 page_mask = pte_get_page_mask(oldlevel);
971 }
972
973 /* get access permissions from pte */
974 ret->iova = addr & page_mask;
975 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
976 ret->addr_mask = ~page_mask;
977 ret->perm = amdvi_get_perms(pte);
978 return;
979 }
980 no_remap:
981 ret->iova = addr & AMDVI_PAGE_MASK_4K;
982 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
983 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
984 ret->perm = amdvi_get_perms(pte);
985 }
986
amdvi_do_translate(AMDVIAddressSpace * as,hwaddr addr,bool is_write,IOMMUTLBEntry * ret)987 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
988 bool is_write, IOMMUTLBEntry *ret)
989 {
990 AMDVIState *s = as->iommu_state;
991 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
992 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
993 uint64_t entry[4];
994
995 if (iotlb_entry) {
996 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
997 PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
998 ret->iova = addr & ~iotlb_entry->page_mask;
999 ret->translated_addr = iotlb_entry->translated_addr;
1000 ret->addr_mask = iotlb_entry->page_mask;
1001 ret->perm = iotlb_entry->perms;
1002 return;
1003 }
1004
1005 if (!amdvi_get_dte(s, devid, entry)) {
1006 return;
1007 }
1008
1009 /* devices with V = 0 are not translated */
1010 if (!(entry[0] & AMDVI_DEV_VALID)) {
1011 goto out;
1012 }
1013
1014 amdvi_page_walk(as, entry, ret,
1015 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
1016
1017 amdvi_update_iotlb(s, devid, addr, *ret,
1018 entry[1] & AMDVI_DEV_DOMID_ID_MASK);
1019 return;
1020
1021 out:
1022 ret->iova = addr & AMDVI_PAGE_MASK_4K;
1023 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1024 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1025 ret->perm = IOMMU_RW;
1026 }
1027
amdvi_is_interrupt_addr(hwaddr addr)1028 static inline bool amdvi_is_interrupt_addr(hwaddr addr)
1029 {
1030 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1031 }
1032
amdvi_translate(IOMMUMemoryRegion * iommu,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)1033 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1034 IOMMUAccessFlags flag, int iommu_idx)
1035 {
1036 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1037 AMDVIState *s = as->iommu_state;
1038 IOMMUTLBEntry ret = {
1039 .target_as = &address_space_memory,
1040 .iova = addr,
1041 .translated_addr = 0,
1042 .addr_mask = ~(hwaddr)0,
1043 .perm = IOMMU_NONE
1044 };
1045
1046 if (!s->enabled) {
1047 /* AMDVI disabled - corresponds to iommu=off not
1048 * failure to provide any parameter
1049 */
1050 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1051 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1052 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1053 ret.perm = IOMMU_RW;
1054 return ret;
1055 } else if (amdvi_is_interrupt_addr(addr)) {
1056 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1057 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1058 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1059 ret.perm = IOMMU_WO;
1060 return ret;
1061 }
1062
1063 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1064 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1065 PCI_FUNC(as->devfn), addr, ret.translated_addr);
1066 return ret;
1067 }
1068
amdvi_get_irte(AMDVIState * s,MSIMessage * origin,uint64_t * dte,union irte * irte,uint16_t devid)1069 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1070 union irte *irte, uint16_t devid)
1071 {
1072 uint64_t irte_root, offset;
1073
1074 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1075 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1076
1077 trace_amdvi_ir_irte(irte_root, offset);
1078
1079 if (dma_memory_read(&address_space_memory, irte_root + offset,
1080 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1081 trace_amdvi_ir_err("failed to get irte");
1082 return -AMDVI_IR_GET_IRTE;
1083 }
1084
1085 trace_amdvi_ir_irte_val(irte->val);
1086
1087 return 0;
1088 }
1089
amdvi_int_remap_legacy(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1090 static int amdvi_int_remap_legacy(AMDVIState *iommu,
1091 MSIMessage *origin,
1092 MSIMessage *translated,
1093 uint64_t *dte,
1094 X86IOMMUIrq *irq,
1095 uint16_t sid)
1096 {
1097 int ret;
1098 union irte irte;
1099
1100 /* get interrupt remapping table */
1101 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1102 if (ret < 0) {
1103 return ret;
1104 }
1105
1106 if (!irte.fields.valid) {
1107 trace_amdvi_ir_target_abort("RemapEn is disabled");
1108 return -AMDVI_IR_TARGET_ABORT;
1109 }
1110
1111 if (irte.fields.guest_mode) {
1112 error_report_once("guest mode is not zero");
1113 return -AMDVI_IR_ERR;
1114 }
1115
1116 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1117 error_report_once("reserved int_type");
1118 return -AMDVI_IR_ERR;
1119 }
1120
1121 irq->delivery_mode = irte.fields.int_type;
1122 irq->vector = irte.fields.vector;
1123 irq->dest_mode = irte.fields.dm;
1124 irq->redir_hint = irte.fields.rq_eoi;
1125 irq->dest = irte.fields.destination;
1126
1127 return 0;
1128 }
1129
amdvi_get_irte_ga(AMDVIState * s,MSIMessage * origin,uint64_t * dte,struct irte_ga * irte,uint16_t devid)1130 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1131 struct irte_ga *irte, uint16_t devid)
1132 {
1133 uint64_t irte_root, offset;
1134
1135 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1136 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1137 trace_amdvi_ir_irte(irte_root, offset);
1138
1139 if (dma_memory_read(&address_space_memory, irte_root + offset,
1140 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1141 trace_amdvi_ir_err("failed to get irte_ga");
1142 return -AMDVI_IR_GET_IRTE;
1143 }
1144
1145 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1146 return 0;
1147 }
1148
amdvi_int_remap_ga(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1149 static int amdvi_int_remap_ga(AMDVIState *iommu,
1150 MSIMessage *origin,
1151 MSIMessage *translated,
1152 uint64_t *dte,
1153 X86IOMMUIrq *irq,
1154 uint16_t sid)
1155 {
1156 int ret;
1157 struct irte_ga irte;
1158
1159 /* get interrupt remapping table */
1160 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1161 if (ret < 0) {
1162 return ret;
1163 }
1164
1165 if (!irte.lo.fields_remap.valid) {
1166 trace_amdvi_ir_target_abort("RemapEn is disabled");
1167 return -AMDVI_IR_TARGET_ABORT;
1168 }
1169
1170 if (irte.lo.fields_remap.guest_mode) {
1171 error_report_once("guest mode is not zero");
1172 return -AMDVI_IR_ERR;
1173 }
1174
1175 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1176 error_report_once("reserved int_type is set");
1177 return -AMDVI_IR_ERR;
1178 }
1179
1180 irq->delivery_mode = irte.lo.fields_remap.int_type;
1181 irq->vector = irte.hi.fields.vector;
1182 irq->dest_mode = irte.lo.fields_remap.dm;
1183 irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1184 if (iommu->xtsup) {
1185 irq->dest = irte.lo.fields_remap.destination |
1186 (irte.hi.fields.destination_hi << 24);
1187 } else {
1188 irq->dest = irte.lo.fields_remap.destination & 0xff;
1189 }
1190
1191 return 0;
1192 }
1193
__amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1194 static int __amdvi_int_remap_msi(AMDVIState *iommu,
1195 MSIMessage *origin,
1196 MSIMessage *translated,
1197 uint64_t *dte,
1198 X86IOMMUIrq *irq,
1199 uint16_t sid)
1200 {
1201 int ret;
1202 uint8_t int_ctl;
1203
1204 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1205 trace_amdvi_ir_intctl(int_ctl);
1206
1207 switch (int_ctl) {
1208 case AMDVI_IR_INTCTL_PASS:
1209 memcpy(translated, origin, sizeof(*origin));
1210 return 0;
1211 case AMDVI_IR_INTCTL_REMAP:
1212 break;
1213 case AMDVI_IR_INTCTL_ABORT:
1214 trace_amdvi_ir_target_abort("int_ctl abort");
1215 return -AMDVI_IR_TARGET_ABORT;
1216 default:
1217 trace_amdvi_ir_err("int_ctl reserved");
1218 return -AMDVI_IR_ERR;
1219 }
1220
1221 if (iommu->ga_enabled) {
1222 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1223 } else {
1224 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1225 }
1226
1227 return ret;
1228 }
1229
1230 /* Interrupt remapping for MSI/MSI-X entry */
amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1231 static int amdvi_int_remap_msi(AMDVIState *iommu,
1232 MSIMessage *origin,
1233 MSIMessage *translated,
1234 uint16_t sid)
1235 {
1236 int ret = 0;
1237 uint64_t pass = 0;
1238 uint64_t dte[4] = { 0 };
1239 X86IOMMUIrq irq = { 0 };
1240 uint8_t dest_mode, delivery_mode;
1241
1242 assert(origin && translated);
1243
1244 /*
1245 * When IOMMU is enabled, interrupt remap request will come either from
1246 * IO-APIC or PCI device. If interrupt is from PCI device then it will
1247 * have a valid requester id but if the interrupt is from IO-APIC
1248 * then requester id will be invalid.
1249 */
1250 if (sid == X86_IOMMU_SID_INVALID) {
1251 sid = AMDVI_IOAPIC_SB_DEVID;
1252 }
1253
1254 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1255
1256 /* check if device table entry is set before we go further. */
1257 if (!iommu || !iommu->devtab_len) {
1258 memcpy(translated, origin, sizeof(*origin));
1259 goto out;
1260 }
1261
1262 if (!amdvi_get_dte(iommu, sid, dte)) {
1263 return -AMDVI_IR_ERR;
1264 }
1265
1266 /* Check if IR is enabled in DTE */
1267 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1268 memcpy(translated, origin, sizeof(*origin));
1269 goto out;
1270 }
1271
1272 /* validate that we are configure with intremap=on */
1273 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1274 trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1275 "not in the host. Use intremap=on to enable interrupt "
1276 "remapping in amd-iommu.");
1277 return -AMDVI_IR_ERR;
1278 }
1279
1280 if (origin->address < AMDVI_INT_ADDR_FIRST ||
1281 origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
1282 trace_amdvi_err("MSI is not from IOAPIC.");
1283 return -AMDVI_IR_ERR;
1284 }
1285
1286 /*
1287 * The MSI data register [10:8] are used to get the upstream interrupt type.
1288 *
1289 * See MSI/MSI-X format:
1290 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1291 * (page 5)
1292 */
1293 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1294
1295 switch (delivery_mode) {
1296 case AMDVI_IOAPIC_INT_TYPE_FIXED:
1297 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1298 trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1299 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1300 if (ret < 0) {
1301 goto remap_fail;
1302 } else {
1303 /* Translate IRQ to MSI messages */
1304 x86_iommu_irq_to_msi_message(&irq, translated);
1305 goto out;
1306 }
1307 break;
1308 case AMDVI_IOAPIC_INT_TYPE_SMI:
1309 error_report("SMI is not supported!");
1310 ret = -AMDVI_IR_ERR;
1311 break;
1312 case AMDVI_IOAPIC_INT_TYPE_NMI:
1313 pass = dte[2] & AMDVI_DEV_NMI_PASS_MASK;
1314 trace_amdvi_ir_delivery_mode("nmi");
1315 break;
1316 case AMDVI_IOAPIC_INT_TYPE_INIT:
1317 pass = dte[2] & AMDVI_DEV_INT_PASS_MASK;
1318 trace_amdvi_ir_delivery_mode("init");
1319 break;
1320 case AMDVI_IOAPIC_INT_TYPE_EINT:
1321 pass = dte[2] & AMDVI_DEV_EINT_PASS_MASK;
1322 trace_amdvi_ir_delivery_mode("eint");
1323 break;
1324 default:
1325 trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1326 ret = -AMDVI_IR_ERR;
1327 break;
1328 }
1329
1330 if (ret < 0) {
1331 goto remap_fail;
1332 }
1333
1334 /*
1335 * The MSI address register bit[2] is used to get the destination
1336 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1337 * only.
1338 */
1339 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1340 if (dest_mode) {
1341 trace_amdvi_ir_err("invalid dest_mode");
1342 ret = -AMDVI_IR_ERR;
1343 goto remap_fail;
1344 }
1345
1346 if (pass) {
1347 memcpy(translated, origin, sizeof(*origin));
1348 } else {
1349 trace_amdvi_ir_err("passthrough is not enabled");
1350 ret = -AMDVI_IR_ERR;
1351 goto remap_fail;
1352 }
1353
1354 out:
1355 trace_amdvi_ir_remap_msi(origin->address, origin->data,
1356 translated->address, translated->data);
1357 return 0;
1358
1359 remap_fail:
1360 return ret;
1361 }
1362
amdvi_int_remap(X86IOMMUState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1363 static int amdvi_int_remap(X86IOMMUState *iommu,
1364 MSIMessage *origin,
1365 MSIMessage *translated,
1366 uint16_t sid)
1367 {
1368 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1369 translated, sid);
1370 }
1371
amdvi_mem_ir_write(void * opaque,hwaddr addr,uint64_t value,unsigned size,MemTxAttrs attrs)1372 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1373 uint64_t value, unsigned size,
1374 MemTxAttrs attrs)
1375 {
1376 int ret;
1377 MSIMessage from = { 0, 0 }, to = { 0, 0 };
1378 uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1379
1380 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1381 from.data = (uint32_t) value;
1382
1383 trace_amdvi_mem_ir_write_req(addr, value, size);
1384
1385 if (!attrs.unspecified) {
1386 /* We have explicit Source ID */
1387 sid = attrs.requester_id;
1388 }
1389
1390 ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1391 if (ret < 0) {
1392 /* TODO: log the event using IOMMU log event interface */
1393 error_report_once("failed to remap interrupt from devid 0x%x", sid);
1394 return MEMTX_ERROR;
1395 }
1396
1397 apic_get_class(NULL)->send_msi(&to);
1398
1399 trace_amdvi_mem_ir_write(to.address, to.data);
1400 return MEMTX_OK;
1401 }
1402
amdvi_mem_ir_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1403 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1404 uint64_t *data, unsigned size,
1405 MemTxAttrs attrs)
1406 {
1407 return MEMTX_OK;
1408 }
1409
1410 static const MemoryRegionOps amdvi_ir_ops = {
1411 .read_with_attrs = amdvi_mem_ir_read,
1412 .write_with_attrs = amdvi_mem_ir_write,
1413 .endianness = DEVICE_LITTLE_ENDIAN,
1414 .impl = {
1415 .min_access_size = 4,
1416 .max_access_size = 4,
1417 },
1418 .valid = {
1419 .min_access_size = 4,
1420 .max_access_size = 4,
1421 }
1422 };
1423
amdvi_host_dma_iommu(PCIBus * bus,void * opaque,int devfn)1424 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1425 {
1426 char name[128];
1427 AMDVIState *s = opaque;
1428 AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1429 int bus_num = pci_bus_num(bus);
1430
1431 iommu_as = s->address_spaces[bus_num];
1432
1433 /* allocate memory during the first run */
1434 if (!iommu_as) {
1435 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1436 s->address_spaces[bus_num] = iommu_as;
1437 }
1438
1439 /* set up AMD-Vi region */
1440 if (!iommu_as[devfn]) {
1441 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1442
1443 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1444 iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1445 iommu_as[devfn]->devfn = (uint8_t)devfn;
1446 iommu_as[devfn]->iommu_state = s;
1447
1448 amdvi_dev_as = iommu_as[devfn];
1449
1450 /*
1451 * Memory region relationships looks like (Address range shows
1452 * only lower 32 bits to make it short in length...):
1453 *
1454 * |--------------------+-------------------+----------|
1455 * | Name | Address range | Priority |
1456 * |--------------------+-------------------+----------+
1457 * | amdvi-root | 00000000-ffffffff | 0 |
1458 * | amdvi-iommu_nodma | 00000000-ffffffff | 0 |
1459 * | amdvi-iommu_ir | fee00000-feefffff | 1 |
1460 * |--------------------+-------------------+----------|
1461 */
1462 memory_region_init_iommu(&amdvi_dev_as->iommu,
1463 sizeof(amdvi_dev_as->iommu),
1464 TYPE_AMD_IOMMU_MEMORY_REGION,
1465 OBJECT(s),
1466 "amd_iommu", UINT64_MAX);
1467 memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1468 "amdvi_root", UINT64_MAX);
1469 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1470 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1471 MEMORY_REGION(&amdvi_dev_as->iommu),
1472 0);
1473
1474 /* Build the DMA Disabled alias to shared memory */
1475 memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s),
1476 "amdvi-sys", &s->mr_sys, 0,
1477 memory_region_size(&s->mr_sys));
1478 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1479 &amdvi_dev_as->iommu_nodma,
1480 0);
1481 /* Build the Interrupt Remapping alias to shared memory */
1482 memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s),
1483 "amdvi-ir", &s->mr_ir, 0,
1484 memory_region_size(&s->mr_ir));
1485 memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu),
1486 AMDVI_INT_ADDR_FIRST,
1487 &amdvi_dev_as->iommu_ir, 1);
1488
1489 memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
1490 memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true);
1491 }
1492 return &iommu_as[devfn]->as;
1493 }
1494
1495 static const PCIIOMMUOps amdvi_iommu_ops = {
1496 .get_address_space = amdvi_host_dma_iommu,
1497 };
1498
1499 static const MemoryRegionOps mmio_mem_ops = {
1500 .read = amdvi_mmio_read,
1501 .write = amdvi_mmio_write,
1502 .endianness = DEVICE_LITTLE_ENDIAN,
1503 .impl = {
1504 .min_access_size = 1,
1505 .max_access_size = 8,
1506 .unaligned = false,
1507 },
1508 .valid = {
1509 .min_access_size = 1,
1510 .max_access_size = 8,
1511 }
1512 };
1513
amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion * iommu,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)1514 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1515 IOMMUNotifierFlag old,
1516 IOMMUNotifierFlag new,
1517 Error **errp)
1518 {
1519 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1520
1521 if (new & IOMMU_NOTIFIER_MAP) {
1522 error_setg(errp,
1523 "device %02x.%02x.%x requires iommu notifier which is not "
1524 "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1525 PCI_FUNC(as->devfn));
1526 return -EINVAL;
1527 }
1528 return 0;
1529 }
1530
amdvi_init(AMDVIState * s)1531 static void amdvi_init(AMDVIState *s)
1532 {
1533 amdvi_iotlb_reset(s);
1534
1535 s->devtab_len = 0;
1536 s->cmdbuf_len = 0;
1537 s->cmdbuf_head = 0;
1538 s->cmdbuf_tail = 0;
1539 s->evtlog_head = 0;
1540 s->evtlog_tail = 0;
1541 s->excl_enabled = false;
1542 s->excl_allow = false;
1543 s->mmio_enabled = false;
1544 s->enabled = false;
1545 s->ats_enabled = false;
1546 s->cmdbuf_enabled = false;
1547
1548 /* reset MMIO */
1549 memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1550 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
1551 amdvi_extended_feature_register(s),
1552 0xffffffffffffffef, 0);
1553 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1554 }
1555
amdvi_pci_realize(PCIDevice * pdev,Error ** errp)1556 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
1557 {
1558 AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
1559 int ret;
1560
1561 ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
1562 AMDVI_CAPAB_SIZE, errp);
1563 if (ret < 0) {
1564 return;
1565 }
1566 s->capab_offset = ret;
1567
1568 ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
1569 AMDVI_CAPAB_REG_SIZE, errp);
1570 if (ret < 0) {
1571 return;
1572 }
1573 ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
1574 AMDVI_CAPAB_REG_SIZE, errp);
1575 if (ret < 0) {
1576 return;
1577 }
1578
1579 if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
1580 return;
1581 }
1582
1583 /* reset device ident */
1584 pci_config_set_prog_interface(pdev->config, 0);
1585
1586 /* reset AMDVI specific capabilities, all r/o */
1587 pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1588 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1589 AMDVI_BASE_ADDR & MAKE_64BIT_MASK(14, 18));
1590 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1591 AMDVI_BASE_ADDR >> 32);
1592 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
1593 0xff000000);
1594 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1595 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
1596 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1597 }
1598
amdvi_sysbus_reset(DeviceState * dev)1599 static void amdvi_sysbus_reset(DeviceState *dev)
1600 {
1601 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1602
1603 msi_reset(&s->pci.dev);
1604 amdvi_init(s);
1605 }
1606
amdvi_sysbus_realize(DeviceState * dev,Error ** errp)1607 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1608 {
1609 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1610 MachineState *ms = MACHINE(qdev_get_machine());
1611 PCMachineState *pcms = PC_MACHINE(ms);
1612 X86MachineState *x86ms = X86_MACHINE(ms);
1613 PCIBus *bus = pcms->pcibus;
1614
1615 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1616 amdvi_uint64_equal, g_free, g_free);
1617
1618 /* This device should take care of IOMMU PCI properties */
1619 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
1620 return;
1621 }
1622
1623 /* Pseudo address space under root PCI bus. */
1624 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1625
1626 /* set up MMIO */
1627 memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
1628 "amdvi-mmio", AMDVI_MMIO_SIZE);
1629 memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
1630 &s->mr_mmio);
1631
1632 /* Create the share memory regions by all devices */
1633 memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX);
1634
1635 /* set up the DMA disabled memory region */
1636 memory_region_init_alias(&s->mr_nodma, OBJECT(s),
1637 "amdvi-nodma", get_system_memory(), 0,
1638 memory_region_size(get_system_memory()));
1639 memory_region_add_subregion_overlap(&s->mr_sys, 0,
1640 &s->mr_nodma, 0);
1641
1642 /* set up the Interrupt Remapping memory region */
1643 memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops,
1644 s, "amdvi-ir", AMDVI_INT_ADDR_SIZE);
1645 memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
1646 &s->mr_ir, 1);
1647
1648 if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) {
1649 error_report("AMD IOMMU with x2APIC configuration requires xtsup=on");
1650 exit(EXIT_FAILURE);
1651 }
1652
1653 if (s->xtsup) {
1654 if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
1655 error_report("AMD IOMMU xtsup=on requires x2APIC support on "
1656 "the KVM side");
1657 exit(EXIT_FAILURE);
1658 }
1659 }
1660
1661 pci_setup_iommu(bus, &amdvi_iommu_ops, s);
1662 amdvi_init(s);
1663 }
1664
1665 static const Property amdvi_properties[] = {
1666 DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
1667 };
1668
1669 static const VMStateDescription vmstate_amdvi_sysbus = {
1670 .name = "amd-iommu",
1671 .unmigratable = 1
1672 };
1673
amdvi_sysbus_instance_init(Object * klass)1674 static void amdvi_sysbus_instance_init(Object *klass)
1675 {
1676 AMDVIState *s = AMD_IOMMU_DEVICE(klass);
1677
1678 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
1679 }
1680
amdvi_sysbus_class_init(ObjectClass * klass,void * data)1681 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
1682 {
1683 DeviceClass *dc = DEVICE_CLASS(klass);
1684 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1685
1686 device_class_set_legacy_reset(dc, amdvi_sysbus_reset);
1687 dc->vmsd = &vmstate_amdvi_sysbus;
1688 dc->hotpluggable = false;
1689 dc_class->realize = amdvi_sysbus_realize;
1690 dc_class->int_remap = amdvi_int_remap;
1691 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1692 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1693 device_class_set_props(dc, amdvi_properties);
1694 }
1695
1696 static const TypeInfo amdvi_sysbus = {
1697 .name = TYPE_AMD_IOMMU_DEVICE,
1698 .parent = TYPE_X86_IOMMU_DEVICE,
1699 .instance_size = sizeof(AMDVIState),
1700 .instance_init = amdvi_sysbus_instance_init,
1701 .class_init = amdvi_sysbus_class_init
1702 };
1703
amdvi_pci_class_init(ObjectClass * klass,void * data)1704 static void amdvi_pci_class_init(ObjectClass *klass, void *data)
1705 {
1706 DeviceClass *dc = DEVICE_CLASS(klass);
1707 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1708
1709 k->vendor_id = PCI_VENDOR_ID_AMD;
1710 k->device_id = 0x1419;
1711 k->class_id = 0x0806;
1712 k->realize = amdvi_pci_realize;
1713
1714 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1715 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1716 }
1717
1718 static const TypeInfo amdvi_pci = {
1719 .name = TYPE_AMD_IOMMU_PCI,
1720 .parent = TYPE_PCI_DEVICE,
1721 .instance_size = sizeof(AMDVIPCIState),
1722 .class_init = amdvi_pci_class_init,
1723 .interfaces = (InterfaceInfo[]) {
1724 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1725 { },
1726 },
1727 };
1728
amdvi_iommu_memory_region_class_init(ObjectClass * klass,void * data)1729 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
1730 {
1731 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1732
1733 imrc->translate = amdvi_translate;
1734 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1735 }
1736
1737 static const TypeInfo amdvi_iommu_memory_region_info = {
1738 .parent = TYPE_IOMMU_MEMORY_REGION,
1739 .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1740 .class_init = amdvi_iommu_memory_region_class_init,
1741 };
1742
amdvi_register_types(void)1743 static void amdvi_register_types(void)
1744 {
1745 type_register_static(&amdvi_pci);
1746 type_register_static(&amdvi_sysbus);
1747 type_register_static(&amdvi_iommu_memory_region_info);
1748 }
1749
1750 type_init(amdvi_register_types);
1751