1 /*
2 * QEMU emulation of AMD IOMMU (AMD-Vi)
3 *
4 * Copyright (C) 2011 Eduard - Gabriel Munteanu
5 * Copyright (C) 2015, 2016 David Kiarie Kahurani
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, see <http://www.gnu.org/licenses/>.
19 *
20 * Cache implementation inspired by hw/i386/intel_iommu.c
21 */
22
23 #include "qemu/osdep.h"
24 #include "hw/i386/pc.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/pci_bus.h"
27 #include "migration/vmstate.h"
28 #include "amd_iommu.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "hw/i386/apic_internal.h"
32 #include "trace.h"
33 #include "hw/i386/apic-msidef.h"
34 #include "hw/qdev-properties.h"
35 #include "kvm/kvm_i386.h"
36
37 /* used AMD-Vi MMIO registers */
38 const char *amdvi_mmio_low[] = {
39 "AMDVI_MMIO_DEVTAB_BASE",
40 "AMDVI_MMIO_CMDBUF_BASE",
41 "AMDVI_MMIO_EVTLOG_BASE",
42 "AMDVI_MMIO_CONTROL",
43 "AMDVI_MMIO_EXCL_BASE",
44 "AMDVI_MMIO_EXCL_LIMIT",
45 "AMDVI_MMIO_EXT_FEATURES",
46 "AMDVI_MMIO_PPR_BASE",
47 "UNHANDLED"
48 };
49 const char *amdvi_mmio_high[] = {
50 "AMDVI_MMIO_COMMAND_HEAD",
51 "AMDVI_MMIO_COMMAND_TAIL",
52 "AMDVI_MMIO_EVTLOG_HEAD",
53 "AMDVI_MMIO_EVTLOG_TAIL",
54 "AMDVI_MMIO_STATUS",
55 "AMDVI_MMIO_PPR_HEAD",
56 "AMDVI_MMIO_PPR_TAIL",
57 "UNHANDLED"
58 };
59
60 struct AMDVIAddressSpace {
61 uint8_t bus_num; /* bus number */
62 uint8_t devfn; /* device function */
63 AMDVIState *iommu_state; /* AMDVI - one per machine */
64 MemoryRegion root; /* AMDVI Root memory map region */
65 IOMMUMemoryRegion iommu; /* Device's address translation region */
66 MemoryRegion iommu_nodma; /* Alias of shared nodma memory region */
67 MemoryRegion iommu_ir; /* Device's interrupt remapping region */
68 AddressSpace as; /* device's corresponding address space */
69 };
70
71 /* AMDVI cache entry */
72 typedef struct AMDVIIOTLBEntry {
73 uint16_t domid; /* assigned domain id */
74 uint16_t devid; /* device owning entry */
75 uint64_t perms; /* access permissions */
76 uint64_t translated_addr; /* translated address */
77 uint64_t page_mask; /* physical page size */
78 } AMDVIIOTLBEntry;
79
amdvi_extended_feature_register(AMDVIState * s)80 uint64_t amdvi_extended_feature_register(AMDVIState *s)
81 {
82 uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
83 if (s->xtsup) {
84 feature |= AMDVI_FEATURE_XT;
85 }
86
87 return feature;
88 }
89
90 /* configure MMIO registers at startup/reset */
amdvi_set_quad(AMDVIState * s,hwaddr addr,uint64_t val,uint64_t romask,uint64_t w1cmask)91 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
92 uint64_t romask, uint64_t w1cmask)
93 {
94 stq_le_p(&s->mmior[addr], val);
95 stq_le_p(&s->romask[addr], romask);
96 stq_le_p(&s->w1cmask[addr], w1cmask);
97 }
98
amdvi_readw(AMDVIState * s,hwaddr addr)99 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
100 {
101 return lduw_le_p(&s->mmior[addr]);
102 }
103
amdvi_readl(AMDVIState * s,hwaddr addr)104 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
105 {
106 return ldl_le_p(&s->mmior[addr]);
107 }
108
amdvi_readq(AMDVIState * s,hwaddr addr)109 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
110 {
111 return ldq_le_p(&s->mmior[addr]);
112 }
113
114 /* internal write */
amdvi_writeq_raw(AMDVIState * s,hwaddr addr,uint64_t val)115 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
116 {
117 stq_le_p(&s->mmior[addr], val);
118 }
119
120 /* external write */
amdvi_writew(AMDVIState * s,hwaddr addr,uint16_t val)121 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
122 {
123 uint16_t romask = lduw_le_p(&s->romask[addr]);
124 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
125 uint16_t oldval = lduw_le_p(&s->mmior[addr]);
126
127 uint16_t oldval_preserved = oldval & (romask | w1cmask);
128 uint16_t newval_write = val & ~romask;
129 uint16_t newval_w1c_set = val & w1cmask;
130
131 stw_le_p(&s->mmior[addr],
132 (oldval_preserved | newval_write) & ~newval_w1c_set);
133 }
134
amdvi_writel(AMDVIState * s,hwaddr addr,uint32_t val)135 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
136 {
137 uint32_t romask = ldl_le_p(&s->romask[addr]);
138 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
139 uint32_t oldval = ldl_le_p(&s->mmior[addr]);
140
141 uint32_t oldval_preserved = oldval & (romask | w1cmask);
142 uint32_t newval_write = val & ~romask;
143 uint32_t newval_w1c_set = val & w1cmask;
144
145 stl_le_p(&s->mmior[addr],
146 (oldval_preserved | newval_write) & ~newval_w1c_set);
147 }
148
amdvi_writeq(AMDVIState * s,hwaddr addr,uint64_t val)149 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
150 {
151 uint64_t romask = ldq_le_p(&s->romask[addr]);
152 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
153 uint64_t oldval = ldq_le_p(&s->mmior[addr]);
154
155 uint64_t oldval_preserved = oldval & (romask | w1cmask);
156 uint64_t newval_write = val & ~romask;
157 uint64_t newval_w1c_set = val & w1cmask;
158
159 stq_le_p(&s->mmior[addr],
160 (oldval_preserved | newval_write) & ~newval_w1c_set);
161 }
162
163 /* AND a 64-bit register with a 64-bit value */
amdvi_test_mask(AMDVIState * s,hwaddr addr,uint64_t val)164 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
165 {
166 return amdvi_readq(s, addr) & val;
167 }
168
169 /* OR a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_orq(AMDVIState * s,hwaddr addr,uint64_t val)170 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
171 {
172 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
173 }
174
175 /* AND a 64-bit register with a 64-bit value storing result in the register */
amdvi_assign_andq(AMDVIState * s,hwaddr addr,uint64_t val)176 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
177 {
178 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
179 }
180
amdvi_generate_msi_interrupt(AMDVIState * s)181 static void amdvi_generate_msi_interrupt(AMDVIState *s)
182 {
183 MSIMessage msg = {};
184 MemTxAttrs attrs = {
185 .requester_id = pci_requester_id(&s->pci->dev)
186 };
187
188 if (msi_enabled(&s->pci->dev)) {
189 msg = msi_get_message(&s->pci->dev, 0);
190 address_space_stl_le(&address_space_memory, msg.address, msg.data,
191 attrs, NULL);
192 }
193 }
194
get_next_eventlog_entry(AMDVIState * s)195 static uint32_t get_next_eventlog_entry(AMDVIState *s)
196 {
197 uint32_t evtlog_size = s->evtlog_len * AMDVI_EVENT_LEN;
198 return (s->evtlog_tail + AMDVI_EVENT_LEN) % evtlog_size;
199 }
200
amdvi_log_event(AMDVIState * s,uint64_t * evt)201 static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
202 {
203 uint32_t evtlog_tail_next;
204
205 /* event logging not enabled */
206 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
207 AMDVI_MMIO_STATUS_EVT_OVF)) {
208 return;
209 }
210
211 evtlog_tail_next = get_next_eventlog_entry(s);
212
213 /* event log buffer full */
214 if (evtlog_tail_next == s->evtlog_head) {
215 /* generate overflow interrupt */
216 if (s->evtlog_intr) {
217 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
218 amdvi_generate_msi_interrupt(s);
219 }
220 return;
221 }
222
223 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
224 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
225 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
226 }
227
228 s->evtlog_tail = evtlog_tail_next;
229 amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail);
230
231 if (s->evtlog_intr) {
232 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVENT_INT);
233 amdvi_generate_msi_interrupt(s);
234 }
235 }
236
amdvi_setevent_bits(uint64_t * buffer,uint64_t value,int start,int length)237 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
238 int length)
239 {
240 int index = start / 64, bitpos = start % 64;
241 uint64_t mask = MAKE_64BIT_MASK(start, length);
242 buffer[index] &= ~mask;
243 buffer[index] |= (value << bitpos) & mask;
244 }
245 /*
246 * AMDVi event structure
247 * 0:15 -> DeviceID
248 * 48:63 -> event type + miscellaneous info
249 * 64:127 -> related address
250 */
amdvi_encode_event(uint64_t * evt,uint16_t devid,uint64_t addr,uint16_t info)251 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
252 uint16_t info)
253 {
254 evt[0] = 0;
255 evt[1] = 0;
256
257 amdvi_setevent_bits(evt, devid, 0, 16);
258 amdvi_setevent_bits(evt, info, 48, 16);
259 amdvi_setevent_bits(evt, addr, 64, 64);
260 }
261 /* log an error encountered during a page walk
262 *
263 * @addr: virtual address in translation request
264 */
amdvi_page_fault(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)265 static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
266 hwaddr addr, uint16_t info)
267 {
268 uint64_t evt[2];
269
270 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
271 amdvi_encode_event(evt, devid, addr, info);
272 amdvi_log_event(s, evt);
273 pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
274 PCI_STATUS_SIG_TARGET_ABORT);
275 }
276 /*
277 * log a master abort accessing device table
278 * @devtab : address of device table entry
279 * @info : error flags
280 */
amdvi_log_devtab_error(AMDVIState * s,uint16_t devid,hwaddr devtab,uint16_t info)281 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
282 hwaddr devtab, uint16_t info)
283 {
284 uint64_t evt[2];
285
286 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
287
288 amdvi_encode_event(evt, devid, devtab, info);
289 amdvi_log_event(s, evt);
290 pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
291 PCI_STATUS_SIG_TARGET_ABORT);
292 }
293 /* log an event trying to access command buffer
294 * @addr : address that couldn't be accessed
295 */
amdvi_log_command_error(AMDVIState * s,hwaddr addr)296 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
297 {
298 uint64_t evt[2];
299 uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
300
301 amdvi_encode_event(evt, 0, addr, info);
302 amdvi_log_event(s, evt);
303 pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
304 PCI_STATUS_SIG_TARGET_ABORT);
305 }
306 /* log an illegal command event
307 * @addr : address of illegal command
308 */
amdvi_log_illegalcom_error(AMDVIState * s,uint16_t info,hwaddr addr)309 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
310 hwaddr addr)
311 {
312 uint64_t evt[2];
313
314 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
315 amdvi_encode_event(evt, 0, addr, info);
316 amdvi_log_event(s, evt);
317 }
318 /* log an error accessing device table
319 *
320 * @devid : device owning the table entry
321 * @devtab : address of device table entry
322 * @info : error flags
323 */
amdvi_log_illegaldevtab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)324 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
325 hwaddr addr, uint16_t info)
326 {
327 uint64_t evt[2];
328
329 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
330 amdvi_encode_event(evt, devid, addr, info);
331 amdvi_log_event(s, evt);
332 }
333 /* log an error accessing a PTE entry
334 * @addr : address that couldn't be accessed
335 */
amdvi_log_pagetab_error(AMDVIState * s,uint16_t devid,hwaddr addr,uint16_t info)336 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
337 hwaddr addr, uint16_t info)
338 {
339 uint64_t evt[2];
340
341 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
342 amdvi_encode_event(evt, devid, addr, info);
343 amdvi_log_event(s, evt);
344 pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
345 PCI_STATUS_SIG_TARGET_ABORT);
346 }
347
amdvi_uint64_equal(gconstpointer v1,gconstpointer v2)348 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
349 {
350 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
351 }
352
amdvi_uint64_hash(gconstpointer v)353 static guint amdvi_uint64_hash(gconstpointer v)
354 {
355 return (guint)*(const uint64_t *)v;
356 }
357
amdvi_iotlb_lookup(AMDVIState * s,hwaddr addr,uint64_t devid)358 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
359 uint64_t devid)
360 {
361 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
362 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
363 return g_hash_table_lookup(s->iotlb, &key);
364 }
365
amdvi_iotlb_reset(AMDVIState * s)366 static void amdvi_iotlb_reset(AMDVIState *s)
367 {
368 assert(s->iotlb);
369 trace_amdvi_iotlb_reset();
370 g_hash_table_remove_all(s->iotlb);
371 }
372
amdvi_iotlb_remove_by_devid(gpointer key,gpointer value,gpointer user_data)373 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
374 gpointer user_data)
375 {
376 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
377 uint16_t devid = *(uint16_t *)user_data;
378 return entry->devid == devid;
379 }
380
amdvi_iotlb_remove_page(AMDVIState * s,hwaddr addr,uint64_t devid)381 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
382 uint64_t devid)
383 {
384 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
385 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
386 g_hash_table_remove(s->iotlb, &key);
387 }
388
amdvi_update_iotlb(AMDVIState * s,uint16_t devid,uint64_t gpa,IOMMUTLBEntry to_cache,uint16_t domid)389 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
390 uint64_t gpa, IOMMUTLBEntry to_cache,
391 uint16_t domid)
392 {
393 /* don't cache erroneous translations */
394 if (to_cache.perm != IOMMU_NONE) {
395 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
396 uint64_t *key = g_new(uint64_t, 1);
397 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
398
399 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
400 PCI_FUNC(devid), gpa, to_cache.translated_addr);
401
402 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
403 amdvi_iotlb_reset(s);
404 }
405
406 entry->domid = domid;
407 entry->perms = to_cache.perm;
408 entry->translated_addr = to_cache.translated_addr;
409 entry->page_mask = to_cache.addr_mask;
410 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
411 g_hash_table_replace(s->iotlb, key, entry);
412 }
413 }
414
amdvi_completion_wait(AMDVIState * s,uint64_t * cmd)415 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
416 {
417 /* pad the last 3 bits */
418 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
419 uint64_t data = cpu_to_le64(cmd[1]);
420
421 if (extract64(cmd[0], 52, 8)) {
422 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
423 s->cmdbuf + s->cmdbuf_head);
424 }
425 if (extract64(cmd[0], 0, 1)) {
426 if (dma_memory_write(&address_space_memory, addr, &data,
427 AMDVI_COMPLETION_DATA_SIZE,
428 MEMTXATTRS_UNSPECIFIED)) {
429 trace_amdvi_completion_wait_fail(addr);
430 }
431 }
432 /* set completion interrupt */
433 if (extract64(cmd[0], 1, 1)) {
434 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
435 /* generate interrupt */
436 amdvi_generate_msi_interrupt(s);
437 }
438 trace_amdvi_completion_wait(addr, data);
439 }
440
441 /* log error without aborting since linux seems to be using reserved bits */
amdvi_inval_devtab_entry(AMDVIState * s,uint64_t * cmd)442 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
443 {
444 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
445
446 /* This command should invalidate internal caches of which there isn't */
447 if (extract64(cmd[0], 16, 44) || cmd[1]) {
448 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
449 s->cmdbuf + s->cmdbuf_head);
450 }
451 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
452 PCI_FUNC(devid));
453 }
454
amdvi_complete_ppr(AMDVIState * s,uint64_t * cmd)455 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
456 {
457 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) ||
458 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
459 || extract64(cmd[1], 48, 16)) {
460 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
461 s->cmdbuf + s->cmdbuf_head);
462 }
463 trace_amdvi_ppr_exec();
464 }
465
amdvi_intremap_inval_notify_all(AMDVIState * s,bool global,uint32_t index,uint32_t mask)466 static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global,
467 uint32_t index, uint32_t mask)
468 {
469 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
470 }
471
amdvi_inval_all(AMDVIState * s,uint64_t * cmd)472 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
473 {
474 if (extract64(cmd[0], 0, 60) || cmd[1]) {
475 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
476 s->cmdbuf + s->cmdbuf_head);
477 }
478
479 /* Notify global invalidation */
480 amdvi_intremap_inval_notify_all(s, true, 0, 0);
481
482 amdvi_iotlb_reset(s);
483 trace_amdvi_all_inval();
484 }
485
amdvi_iotlb_remove_by_domid(gpointer key,gpointer value,gpointer user_data)486 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
487 gpointer user_data)
488 {
489 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
490 uint16_t domid = *(uint16_t *)user_data;
491 return entry->domid == domid;
492 }
493
494 /* we don't have devid - we can't remove pages by address */
amdvi_inval_pages(AMDVIState * s,uint64_t * cmd)495 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
496 {
497 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
498
499 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
500 extract64(cmd[1], 3, 9)) {
501 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
502 s->cmdbuf + s->cmdbuf_head);
503 }
504
505 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
506 &domid);
507 trace_amdvi_pages_inval(domid);
508 }
509
amdvi_prefetch_pages(AMDVIState * s,uint64_t * cmd)510 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
511 {
512 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
513 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
514 extract64(cmd[1], 5, 7)) {
515 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
516 s->cmdbuf + s->cmdbuf_head);
517 }
518
519 trace_amdvi_prefetch_pages();
520 }
521
amdvi_inval_inttable(AMDVIState * s,uint64_t * cmd)522 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
523 {
524 if (extract64(cmd[0], 16, 44) || cmd[1]) {
525 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
526 s->cmdbuf + s->cmdbuf_head);
527 return;
528 }
529
530 /* Notify global invalidation */
531 amdvi_intremap_inval_notify_all(s, true, 0, 0);
532
533 trace_amdvi_intr_inval();
534 }
535
536 /* FIXME: Try to work with the specified size instead of all the pages
537 * when the S bit is on
538 */
iommu_inval_iotlb(AMDVIState * s,uint64_t * cmd)539 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
540 {
541
542 uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16));
543 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
544 extract64(cmd[1], 6, 6)) {
545 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
546 s->cmdbuf + s->cmdbuf_head);
547 return;
548 }
549
550 if (extract64(cmd[1], 0, 1)) {
551 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
552 &devid);
553 } else {
554 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
555 devid);
556 }
557 trace_amdvi_iotlb_inval();
558 }
559
560 /* not honouring reserved bits is regarded as an illegal command */
amdvi_cmdbuf_exec(AMDVIState * s)561 static void amdvi_cmdbuf_exec(AMDVIState *s)
562 {
563 uint64_t cmd[2];
564
565 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
566 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
567 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
568 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
569 return;
570 }
571
572 switch (extract64(cmd[0], 60, 4)) {
573 case AMDVI_CMD_COMPLETION_WAIT:
574 amdvi_completion_wait(s, cmd);
575 break;
576 case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
577 amdvi_inval_devtab_entry(s, cmd);
578 break;
579 case AMDVI_CMD_INVAL_AMDVI_PAGES:
580 amdvi_inval_pages(s, cmd);
581 break;
582 case AMDVI_CMD_INVAL_IOTLB_PAGES:
583 iommu_inval_iotlb(s, cmd);
584 break;
585 case AMDVI_CMD_INVAL_INTR_TABLE:
586 amdvi_inval_inttable(s, cmd);
587 break;
588 case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
589 amdvi_prefetch_pages(s, cmd);
590 break;
591 case AMDVI_CMD_COMPLETE_PPR_REQUEST:
592 amdvi_complete_ppr(s, cmd);
593 break;
594 case AMDVI_CMD_INVAL_AMDVI_ALL:
595 amdvi_inval_all(s, cmd);
596 break;
597 default:
598 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
599 /* log illegal command */
600 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
601 s->cmdbuf + s->cmdbuf_head);
602 }
603 }
604
amdvi_cmdbuf_run(AMDVIState * s)605 static void amdvi_cmdbuf_run(AMDVIState *s)
606 {
607 if (!s->cmdbuf_enabled) {
608 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
609 return;
610 }
611
612 /* check if there is work to do. */
613 while (s->cmdbuf_head != s->cmdbuf_tail) {
614 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
615 amdvi_cmdbuf_exec(s);
616 s->cmdbuf_head += AMDVI_COMMAND_SIZE;
617 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
618
619 /* wrap head pointer */
620 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
621 s->cmdbuf_head = 0;
622 }
623 }
624 }
625
amdvi_mmio_get_index(hwaddr addr)626 static inline uint8_t amdvi_mmio_get_index(hwaddr addr)
627 {
628 uint8_t index = (addr & ~0x2000) / 8;
629
630 if ((addr & 0x2000)) {
631 /* high table */
632 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
633 } else {
634 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
635 }
636
637 return index;
638 }
639
amdvi_mmio_trace_read(hwaddr addr,unsigned size)640 static void amdvi_mmio_trace_read(hwaddr addr, unsigned size)
641 {
642 uint8_t index = amdvi_mmio_get_index(addr);
643 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
644 }
645
amdvi_mmio_trace_write(hwaddr addr,unsigned size,uint64_t val)646 static void amdvi_mmio_trace_write(hwaddr addr, unsigned size, uint64_t val)
647 {
648 uint8_t index = amdvi_mmio_get_index(addr);
649 trace_amdvi_mmio_write(amdvi_mmio_low[index], addr, size, val,
650 addr & ~0x07);
651 }
652
amdvi_mmio_read(void * opaque,hwaddr addr,unsigned size)653 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
654 {
655 AMDVIState *s = opaque;
656
657 uint64_t val = -1;
658 if (addr + size > AMDVI_MMIO_SIZE) {
659 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
660 return (uint64_t)-1;
661 }
662
663 if (size == 2) {
664 val = amdvi_readw(s, addr);
665 } else if (size == 4) {
666 val = amdvi_readl(s, addr);
667 } else if (size == 8) {
668 val = amdvi_readq(s, addr);
669 }
670 amdvi_mmio_trace_read(addr, size);
671
672 return val;
673 }
674
amdvi_handle_control_write(AMDVIState * s)675 static void amdvi_handle_control_write(AMDVIState *s)
676 {
677 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
678 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
679
680 s->evtlog_enabled = s->enabled && !!(control &
681 AMDVI_MMIO_CONTROL_EVENTLOGEN);
682
683 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
684 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
685 s->cmdbuf_enabled = s->enabled && !!(control &
686 AMDVI_MMIO_CONTROL_CMDBUFLEN);
687 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
688
689 /* update the flags depending on the control register */
690 if (s->cmdbuf_enabled) {
691 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
692 } else {
693 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
694 }
695 if (s->evtlog_enabled) {
696 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
697 } else {
698 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
699 }
700
701 trace_amdvi_control_status(control);
702 amdvi_cmdbuf_run(s);
703 }
704
amdvi_handle_devtab_write(AMDVIState * s)705 static inline void amdvi_handle_devtab_write(AMDVIState *s)
706
707 {
708 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
709 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
710
711 /* set device table length (i.e. number of entries table can hold) */
712 s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) *
713 (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
714 AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
715 }
716
amdvi_handle_cmdhead_write(AMDVIState * s)717 static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
718 {
719 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
720 & AMDVI_MMIO_CMDBUF_HEAD_MASK;
721 amdvi_cmdbuf_run(s);
722 }
723
amdvi_handle_cmdbase_write(AMDVIState * s)724 static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
725 {
726 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
727 & AMDVI_MMIO_CMDBUF_BASE_MASK;
728 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
729 & AMDVI_MMIO_CMDBUF_SIZE_MASK);
730 s->cmdbuf_head = s->cmdbuf_tail = 0;
731 }
732
amdvi_handle_cmdtail_write(AMDVIState * s)733 static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
734 {
735 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
736 & AMDVI_MMIO_CMDBUF_TAIL_MASK;
737 amdvi_cmdbuf_run(s);
738 }
739
amdvi_handle_excllim_write(AMDVIState * s)740 static inline void amdvi_handle_excllim_write(AMDVIState *s)
741 {
742 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
743 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
744 AMDVI_MMIO_EXCL_LIMIT_LOW;
745 }
746
amdvi_handle_evtbase_write(AMDVIState * s)747 static inline void amdvi_handle_evtbase_write(AMDVIState *s)
748 {
749 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
750
751 if (amdvi_readq(s, AMDVI_MMIO_STATUS) & AMDVI_MMIO_STATUS_EVENT_INT)
752 /* Do not reset if eventlog interrupt bit is set*/
753 return;
754
755 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
756 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
757 & AMDVI_MMIO_EVTLOG_SIZE_MASK);
758
759 /* clear tail and head pointer to 0 when event base is updated */
760 s->evtlog_tail = s->evtlog_head = 0;
761 amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_HEAD, s->evtlog_head);
762 amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail);
763 }
764
amdvi_handle_evttail_write(AMDVIState * s)765 static inline void amdvi_handle_evttail_write(AMDVIState *s)
766 {
767 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
768 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
769 }
770
amdvi_handle_evthead_write(AMDVIState * s)771 static inline void amdvi_handle_evthead_write(AMDVIState *s)
772 {
773 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
774 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
775 }
776
amdvi_handle_pprbase_write(AMDVIState * s)777 static inline void amdvi_handle_pprbase_write(AMDVIState *s)
778 {
779 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
780 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
781 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
782 & AMDVI_MMIO_PPRLOG_SIZE_MASK);
783 }
784
amdvi_handle_pprhead_write(AMDVIState * s)785 static inline void amdvi_handle_pprhead_write(AMDVIState *s)
786 {
787 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
788 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
789 }
790
amdvi_handle_pprtail_write(AMDVIState * s)791 static inline void amdvi_handle_pprtail_write(AMDVIState *s)
792 {
793 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
794 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
795 }
796
797 /* FIXME: something might go wrong if System Software writes in chunks
798 * of one byte but linux writes in chunks of 4 bytes so currently it
799 * works correctly with linux but will definitely be busted if software
800 * reads/writes 8 bytes
801 */
amdvi_mmio_reg_write(AMDVIState * s,unsigned size,uint64_t val,hwaddr addr)802 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
803 hwaddr addr)
804 {
805 if (size == 2) {
806 amdvi_writew(s, addr, val);
807 } else if (size == 4) {
808 amdvi_writel(s, addr, val);
809 } else if (size == 8) {
810 amdvi_writeq(s, addr, val);
811 }
812 }
813
amdvi_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)814 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
815 unsigned size)
816 {
817 AMDVIState *s = opaque;
818 unsigned long offset = addr & 0x07;
819
820 if (addr + size > AMDVI_MMIO_SIZE) {
821 trace_amdvi_mmio_write("error: addr outside region: max ",
822 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
823 return;
824 }
825
826 amdvi_mmio_trace_write(addr, size, val);
827 switch (addr & ~0x07) {
828 case AMDVI_MMIO_CONTROL:
829 amdvi_mmio_reg_write(s, size, val, addr);
830 amdvi_handle_control_write(s);
831 break;
832 case AMDVI_MMIO_DEVICE_TABLE:
833 amdvi_mmio_reg_write(s, size, val, addr);
834 /* set device table address
835 * This also suffers from inability to tell whether software
836 * is done writing
837 */
838 if (offset || (size == 8)) {
839 amdvi_handle_devtab_write(s);
840 }
841 break;
842 case AMDVI_MMIO_COMMAND_HEAD:
843 amdvi_mmio_reg_write(s, size, val, addr);
844 amdvi_handle_cmdhead_write(s);
845 break;
846 case AMDVI_MMIO_COMMAND_BASE:
847 amdvi_mmio_reg_write(s, size, val, addr);
848 /* FIXME - make sure System Software has finished writing in case
849 * it writes in chucks less than 8 bytes in a robust way.As for
850 * now, this hacks works for the linux driver
851 */
852 if (offset || (size == 8)) {
853 amdvi_handle_cmdbase_write(s);
854 }
855 break;
856 case AMDVI_MMIO_COMMAND_TAIL:
857 amdvi_mmio_reg_write(s, size, val, addr);
858 amdvi_handle_cmdtail_write(s);
859 break;
860 case AMDVI_MMIO_EVENT_BASE:
861 amdvi_mmio_reg_write(s, size, val, addr);
862 amdvi_handle_evtbase_write(s);
863 break;
864 case AMDVI_MMIO_EVENT_HEAD:
865 amdvi_mmio_reg_write(s, size, val, addr);
866 amdvi_handle_evthead_write(s);
867 break;
868 case AMDVI_MMIO_EVENT_TAIL:
869 amdvi_mmio_reg_write(s, size, val, addr);
870 amdvi_handle_evttail_write(s);
871 break;
872 case AMDVI_MMIO_EXCL_LIMIT:
873 amdvi_mmio_reg_write(s, size, val, addr);
874 amdvi_handle_excllim_write(s);
875 break;
876 /* PPR log base - unused for now */
877 case AMDVI_MMIO_PPR_BASE:
878 amdvi_mmio_reg_write(s, size, val, addr);
879 amdvi_handle_pprbase_write(s);
880 break;
881 /* PPR log head - also unused for now */
882 case AMDVI_MMIO_PPR_HEAD:
883 amdvi_mmio_reg_write(s, size, val, addr);
884 amdvi_handle_pprhead_write(s);
885 break;
886 /* PPR log tail - unused for now */
887 case AMDVI_MMIO_PPR_TAIL:
888 amdvi_mmio_reg_write(s, size, val, addr);
889 amdvi_handle_pprtail_write(s);
890 break;
891 case AMDVI_MMIO_STATUS:
892 amdvi_mmio_reg_write(s, size, val, addr);
893 break;
894 }
895 }
896
amdvi_get_perms(uint64_t entry)897 static inline uint64_t amdvi_get_perms(uint64_t entry)
898 {
899 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
900 AMDVI_DEV_PERM_SHIFT;
901 }
902
903 /* validate that reserved bits are honoured */
amdvi_validate_dte(AMDVIState * s,uint16_t devid,uint64_t * dte)904 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
905 uint64_t *dte)
906 {
907 if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) ||
908 (dte[1] & AMDVI_DTE_QUAD1_RESERVED) ||
909 (dte[2] & AMDVI_DTE_QUAD2_RESERVED) ||
910 (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) {
911 amdvi_log_illegaldevtab_error(s, devid,
912 s->devtab +
913 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
914 return false;
915 }
916
917 return true;
918 }
919
920 /* get a device table entry given the devid */
amdvi_get_dte(AMDVIState * s,int devid,uint64_t * entry)921 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
922 {
923 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
924
925 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
926 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
927 trace_amdvi_dte_get_fail(s->devtab, offset);
928 /* log error accessing dte */
929 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
930 return false;
931 }
932
933 *entry = le64_to_cpu(*entry);
934 if (!amdvi_validate_dte(s, devid, entry)) {
935 trace_amdvi_invalid_dte(entry[0]);
936 return false;
937 }
938
939 return true;
940 }
941
942 /* get pte translation mode */
get_pte_translation_mode(uint64_t pte)943 static inline uint8_t get_pte_translation_mode(uint64_t pte)
944 {
945 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
946 }
947
pte_override_page_mask(uint64_t pte)948 static inline uint64_t pte_override_page_mask(uint64_t pte)
949 {
950 uint8_t page_mask = 13;
951 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
952 /* find the first zero bit */
953 while (addr & 1) {
954 page_mask++;
955 addr = addr >> 1;
956 }
957
958 return ~((1ULL << page_mask) - 1);
959 }
960
pte_get_page_mask(uint64_t oldlevel)961 static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
962 {
963 return ~((1UL << ((oldlevel * 9) + 3)) - 1);
964 }
965
amdvi_get_pte_entry(AMDVIState * s,uint64_t pte_addr,uint16_t devid)966 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
967 uint16_t devid)
968 {
969 uint64_t pte;
970
971 if (dma_memory_read(&address_space_memory, pte_addr,
972 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
973 trace_amdvi_get_pte_hwerror(pte_addr);
974 amdvi_log_pagetab_error(s, devid, pte_addr, 0);
975 pte = 0;
976 return pte;
977 }
978
979 pte = le64_to_cpu(pte);
980 return pte;
981 }
982
amdvi_page_walk(AMDVIAddressSpace * as,uint64_t * dte,IOMMUTLBEntry * ret,unsigned perms,hwaddr addr)983 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
984 IOMMUTLBEntry *ret, unsigned perms,
985 hwaddr addr)
986 {
987 unsigned level, present, pte_perms, oldlevel;
988 uint64_t pte = dte[0], pte_addr, page_mask;
989
990 /* make sure the DTE has TV = 1 */
991 if (pte & AMDVI_DEV_TRANSLATION_VALID) {
992 level = get_pte_translation_mode(pte);
993 if (level >= 7) {
994 trace_amdvi_mode_invalid(level, addr);
995 return;
996 }
997 if (level == 0) {
998 goto no_remap;
999 }
1000
1001 /* we are at the leaf page table or page table encodes a huge page */
1002 do {
1003 pte_perms = amdvi_get_perms(pte);
1004 present = pte & 1;
1005 if (!present || perms != (perms & pte_perms)) {
1006 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
1007 trace_amdvi_page_fault(addr);
1008 return;
1009 }
1010
1011 /* go to the next lower level */
1012 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
1013 /* add offset and load pte */
1014 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
1015 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
1016 if (!pte) {
1017 return;
1018 }
1019 oldlevel = level;
1020 level = get_pte_translation_mode(pte);
1021 } while (level > 0 && level < 7);
1022
1023 if (level == 0x7) {
1024 page_mask = pte_override_page_mask(pte);
1025 } else {
1026 page_mask = pte_get_page_mask(oldlevel);
1027 }
1028
1029 /* get access permissions from pte */
1030 ret->iova = addr & page_mask;
1031 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
1032 ret->addr_mask = ~page_mask;
1033 ret->perm = amdvi_get_perms(pte);
1034 return;
1035 }
1036 no_remap:
1037 ret->iova = addr & AMDVI_PAGE_MASK_4K;
1038 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1039 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1040 ret->perm = amdvi_get_perms(pte);
1041 }
1042
amdvi_do_translate(AMDVIAddressSpace * as,hwaddr addr,bool is_write,IOMMUTLBEntry * ret)1043 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
1044 bool is_write, IOMMUTLBEntry *ret)
1045 {
1046 AMDVIState *s = as->iommu_state;
1047 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
1048 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
1049 uint64_t entry[4];
1050
1051 if (iotlb_entry) {
1052 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
1053 PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
1054 ret->iova = addr & ~iotlb_entry->page_mask;
1055 ret->translated_addr = iotlb_entry->translated_addr;
1056 ret->addr_mask = iotlb_entry->page_mask;
1057 ret->perm = iotlb_entry->perms;
1058 return;
1059 }
1060
1061 if (!amdvi_get_dte(s, devid, entry)) {
1062 return;
1063 }
1064
1065 /* devices with V = 0 are not translated */
1066 if (!(entry[0] & AMDVI_DEV_VALID)) {
1067 goto out;
1068 }
1069
1070 amdvi_page_walk(as, entry, ret,
1071 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
1072
1073 amdvi_update_iotlb(s, devid, addr, *ret,
1074 entry[1] & AMDVI_DEV_DOMID_ID_MASK);
1075 return;
1076
1077 out:
1078 ret->iova = addr & AMDVI_PAGE_MASK_4K;
1079 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1080 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1081 ret->perm = IOMMU_RW;
1082 }
1083
amdvi_is_interrupt_addr(hwaddr addr)1084 static inline bool amdvi_is_interrupt_addr(hwaddr addr)
1085 {
1086 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1087 }
1088
amdvi_translate(IOMMUMemoryRegion * iommu,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)1089 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1090 IOMMUAccessFlags flag, int iommu_idx)
1091 {
1092 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1093 AMDVIState *s = as->iommu_state;
1094 IOMMUTLBEntry ret = {
1095 .target_as = &address_space_memory,
1096 .iova = addr,
1097 .translated_addr = 0,
1098 .addr_mask = ~(hwaddr)0,
1099 .perm = IOMMU_NONE
1100 };
1101
1102 if (!s->enabled) {
1103 /* AMDVI disabled - corresponds to iommu=off not
1104 * failure to provide any parameter
1105 */
1106 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1107 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1108 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1109 ret.perm = IOMMU_RW;
1110 return ret;
1111 } else if (amdvi_is_interrupt_addr(addr)) {
1112 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1113 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1114 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1115 ret.perm = IOMMU_WO;
1116 return ret;
1117 }
1118
1119 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1120 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1121 PCI_FUNC(as->devfn), addr, ret.translated_addr);
1122 return ret;
1123 }
1124
amdvi_get_irte(AMDVIState * s,MSIMessage * origin,uint64_t * dte,union irte * irte,uint16_t devid)1125 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1126 union irte *irte, uint16_t devid)
1127 {
1128 uint64_t irte_root, offset;
1129
1130 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1131 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1132
1133 trace_amdvi_ir_irte(irte_root, offset);
1134
1135 if (dma_memory_read(&address_space_memory, irte_root + offset,
1136 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1137 trace_amdvi_ir_err("failed to get irte");
1138 return -AMDVI_IR_GET_IRTE;
1139 }
1140
1141 trace_amdvi_ir_irte_val(irte->val);
1142
1143 return 0;
1144 }
1145
amdvi_int_remap_legacy(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1146 static int amdvi_int_remap_legacy(AMDVIState *iommu,
1147 MSIMessage *origin,
1148 MSIMessage *translated,
1149 uint64_t *dte,
1150 X86IOMMUIrq *irq,
1151 uint16_t sid)
1152 {
1153 int ret;
1154 union irte irte;
1155
1156 /* get interrupt remapping table */
1157 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1158 if (ret < 0) {
1159 return ret;
1160 }
1161
1162 if (!irte.fields.valid) {
1163 trace_amdvi_ir_target_abort("RemapEn is disabled");
1164 return -AMDVI_IR_TARGET_ABORT;
1165 }
1166
1167 if (irte.fields.guest_mode) {
1168 error_report_once("guest mode is not zero");
1169 return -AMDVI_IR_ERR;
1170 }
1171
1172 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1173 error_report_once("reserved int_type");
1174 return -AMDVI_IR_ERR;
1175 }
1176
1177 irq->delivery_mode = irte.fields.int_type;
1178 irq->vector = irte.fields.vector;
1179 irq->dest_mode = irte.fields.dm;
1180 irq->redir_hint = irte.fields.rq_eoi;
1181 irq->dest = irte.fields.destination;
1182
1183 return 0;
1184 }
1185
amdvi_get_irte_ga(AMDVIState * s,MSIMessage * origin,uint64_t * dte,struct irte_ga * irte,uint16_t devid)1186 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1187 struct irte_ga *irte, uint16_t devid)
1188 {
1189 uint64_t irte_root, offset;
1190
1191 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1192 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1193 trace_amdvi_ir_irte(irte_root, offset);
1194
1195 if (dma_memory_read(&address_space_memory, irte_root + offset,
1196 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1197 trace_amdvi_ir_err("failed to get irte_ga");
1198 return -AMDVI_IR_GET_IRTE;
1199 }
1200
1201 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1202 return 0;
1203 }
1204
amdvi_int_remap_ga(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1205 static int amdvi_int_remap_ga(AMDVIState *iommu,
1206 MSIMessage *origin,
1207 MSIMessage *translated,
1208 uint64_t *dte,
1209 X86IOMMUIrq *irq,
1210 uint16_t sid)
1211 {
1212 int ret;
1213 struct irte_ga irte;
1214
1215 /* get interrupt remapping table */
1216 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1217 if (ret < 0) {
1218 return ret;
1219 }
1220
1221 if (!irte.lo.fields_remap.valid) {
1222 trace_amdvi_ir_target_abort("RemapEn is disabled");
1223 return -AMDVI_IR_TARGET_ABORT;
1224 }
1225
1226 if (irte.lo.fields_remap.guest_mode) {
1227 error_report_once("guest mode is not zero");
1228 return -AMDVI_IR_ERR;
1229 }
1230
1231 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1232 error_report_once("reserved int_type is set");
1233 return -AMDVI_IR_ERR;
1234 }
1235
1236 irq->delivery_mode = irte.lo.fields_remap.int_type;
1237 irq->vector = irte.hi.fields.vector;
1238 irq->dest_mode = irte.lo.fields_remap.dm;
1239 irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1240 if (iommu->xtsup) {
1241 irq->dest = irte.lo.fields_remap.destination |
1242 (irte.hi.fields.destination_hi << 24);
1243 } else {
1244 irq->dest = irte.lo.fields_remap.destination & 0xff;
1245 }
1246
1247 return 0;
1248 }
1249
__amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint64_t * dte,X86IOMMUIrq * irq,uint16_t sid)1250 static int __amdvi_int_remap_msi(AMDVIState *iommu,
1251 MSIMessage *origin,
1252 MSIMessage *translated,
1253 uint64_t *dte,
1254 X86IOMMUIrq *irq,
1255 uint16_t sid)
1256 {
1257 int ret;
1258 uint8_t int_ctl;
1259
1260 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1261 trace_amdvi_ir_intctl(int_ctl);
1262
1263 switch (int_ctl) {
1264 case AMDVI_IR_INTCTL_PASS:
1265 memcpy(translated, origin, sizeof(*origin));
1266 return 0;
1267 case AMDVI_IR_INTCTL_REMAP:
1268 break;
1269 case AMDVI_IR_INTCTL_ABORT:
1270 trace_amdvi_ir_target_abort("int_ctl abort");
1271 return -AMDVI_IR_TARGET_ABORT;
1272 default:
1273 trace_amdvi_ir_err("int_ctl reserved");
1274 return -AMDVI_IR_ERR;
1275 }
1276
1277 if (iommu->ga_enabled) {
1278 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1279 } else {
1280 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1281 }
1282
1283 return ret;
1284 }
1285
1286 /* Interrupt remapping for MSI/MSI-X entry */
amdvi_int_remap_msi(AMDVIState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1287 static int amdvi_int_remap_msi(AMDVIState *iommu,
1288 MSIMessage *origin,
1289 MSIMessage *translated,
1290 uint16_t sid)
1291 {
1292 int ret = 0;
1293 uint64_t pass = 0;
1294 uint64_t dte[4] = { 0 };
1295 X86IOMMUIrq irq = { 0 };
1296 uint8_t dest_mode, delivery_mode;
1297
1298 assert(origin && translated);
1299
1300 /*
1301 * When IOMMU is enabled, interrupt remap request will come either from
1302 * IO-APIC or PCI device. If interrupt is from PCI device then it will
1303 * have a valid requester id but if the interrupt is from IO-APIC
1304 * then requester id will be invalid.
1305 */
1306 if (sid == X86_IOMMU_SID_INVALID) {
1307 sid = AMDVI_IOAPIC_SB_DEVID;
1308 }
1309
1310 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1311
1312 /* check if device table entry is set before we go further. */
1313 if (!iommu || !iommu->devtab_len) {
1314 memcpy(translated, origin, sizeof(*origin));
1315 goto out;
1316 }
1317
1318 if (!amdvi_get_dte(iommu, sid, dte)) {
1319 return -AMDVI_IR_ERR;
1320 }
1321
1322 /* Check if IR is enabled in DTE */
1323 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1324 memcpy(translated, origin, sizeof(*origin));
1325 goto out;
1326 }
1327
1328 /* validate that we are configure with intremap=on */
1329 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1330 trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1331 "not in the host. Use intremap=on to enable interrupt "
1332 "remapping in amd-iommu.");
1333 return -AMDVI_IR_ERR;
1334 }
1335
1336 if (origin->address < AMDVI_INT_ADDR_FIRST ||
1337 origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
1338 trace_amdvi_err("MSI is not from IOAPIC.");
1339 return -AMDVI_IR_ERR;
1340 }
1341
1342 /*
1343 * The MSI data register [10:8] are used to get the upstream interrupt type.
1344 *
1345 * See MSI/MSI-X format:
1346 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1347 * (page 5)
1348 */
1349 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1350
1351 switch (delivery_mode) {
1352 case AMDVI_IOAPIC_INT_TYPE_FIXED:
1353 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1354 trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1355 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1356 if (ret < 0) {
1357 goto remap_fail;
1358 } else {
1359 /* Translate IRQ to MSI messages */
1360 x86_iommu_irq_to_msi_message(&irq, translated);
1361 goto out;
1362 }
1363 break;
1364 case AMDVI_IOAPIC_INT_TYPE_SMI:
1365 error_report("SMI is not supported!");
1366 ret = -AMDVI_IR_ERR;
1367 break;
1368 case AMDVI_IOAPIC_INT_TYPE_NMI:
1369 pass = dte[2] & AMDVI_DEV_NMI_PASS_MASK;
1370 trace_amdvi_ir_delivery_mode("nmi");
1371 break;
1372 case AMDVI_IOAPIC_INT_TYPE_INIT:
1373 pass = dte[2] & AMDVI_DEV_INT_PASS_MASK;
1374 trace_amdvi_ir_delivery_mode("init");
1375 break;
1376 case AMDVI_IOAPIC_INT_TYPE_EINT:
1377 pass = dte[2] & AMDVI_DEV_EINT_PASS_MASK;
1378 trace_amdvi_ir_delivery_mode("eint");
1379 break;
1380 default:
1381 trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1382 ret = -AMDVI_IR_ERR;
1383 break;
1384 }
1385
1386 if (ret < 0) {
1387 goto remap_fail;
1388 }
1389
1390 /*
1391 * The MSI address register bit[2] is used to get the destination
1392 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1393 * only.
1394 */
1395 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1396 if (dest_mode) {
1397 trace_amdvi_ir_err("invalid dest_mode");
1398 ret = -AMDVI_IR_ERR;
1399 goto remap_fail;
1400 }
1401
1402 if (pass) {
1403 memcpy(translated, origin, sizeof(*origin));
1404 } else {
1405 trace_amdvi_ir_err("passthrough is not enabled");
1406 ret = -AMDVI_IR_ERR;
1407 goto remap_fail;
1408 }
1409
1410 out:
1411 trace_amdvi_ir_remap_msi(origin->address, origin->data,
1412 translated->address, translated->data);
1413 return 0;
1414
1415 remap_fail:
1416 return ret;
1417 }
1418
amdvi_int_remap(X86IOMMUState * iommu,MSIMessage * origin,MSIMessage * translated,uint16_t sid)1419 static int amdvi_int_remap(X86IOMMUState *iommu,
1420 MSIMessage *origin,
1421 MSIMessage *translated,
1422 uint16_t sid)
1423 {
1424 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1425 translated, sid);
1426 }
1427
amdvi_mem_ir_write(void * opaque,hwaddr addr,uint64_t value,unsigned size,MemTxAttrs attrs)1428 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1429 uint64_t value, unsigned size,
1430 MemTxAttrs attrs)
1431 {
1432 int ret;
1433 MSIMessage from = { 0, 0 }, to = { 0, 0 };
1434 uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1435
1436 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1437 from.data = (uint32_t) value;
1438
1439 trace_amdvi_mem_ir_write_req(addr, value, size);
1440
1441 if (!attrs.unspecified) {
1442 /* We have explicit Source ID */
1443 sid = attrs.requester_id;
1444 }
1445
1446 ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1447 if (ret < 0) {
1448 /* TODO: log the event using IOMMU log event interface */
1449 error_report_once("failed to remap interrupt from devid 0x%x", sid);
1450 return MEMTX_ERROR;
1451 }
1452
1453 apic_get_class(NULL)->send_msi(&to);
1454
1455 trace_amdvi_mem_ir_write(to.address, to.data);
1456 return MEMTX_OK;
1457 }
1458
amdvi_mem_ir_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)1459 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1460 uint64_t *data, unsigned size,
1461 MemTxAttrs attrs)
1462 {
1463 return MEMTX_OK;
1464 }
1465
1466 static const MemoryRegionOps amdvi_ir_ops = {
1467 .read_with_attrs = amdvi_mem_ir_read,
1468 .write_with_attrs = amdvi_mem_ir_write,
1469 .endianness = DEVICE_LITTLE_ENDIAN,
1470 .impl = {
1471 .min_access_size = 4,
1472 .max_access_size = 4,
1473 },
1474 .valid = {
1475 .min_access_size = 4,
1476 .max_access_size = 4,
1477 }
1478 };
1479
amdvi_host_dma_iommu(PCIBus * bus,void * opaque,int devfn)1480 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1481 {
1482 char name[128];
1483 AMDVIState *s = opaque;
1484 AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1485 int bus_num = pci_bus_num(bus);
1486
1487 iommu_as = s->address_spaces[bus_num];
1488
1489 /* allocate memory during the first run */
1490 if (!iommu_as) {
1491 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1492 s->address_spaces[bus_num] = iommu_as;
1493 }
1494
1495 /* set up AMD-Vi region */
1496 if (!iommu_as[devfn]) {
1497 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1498
1499 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1500 iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1501 iommu_as[devfn]->devfn = (uint8_t)devfn;
1502 iommu_as[devfn]->iommu_state = s;
1503
1504 amdvi_dev_as = iommu_as[devfn];
1505
1506 /*
1507 * Memory region relationships looks like (Address range shows
1508 * only lower 32 bits to make it short in length...):
1509 *
1510 * |--------------------+-------------------+----------|
1511 * | Name | Address range | Priority |
1512 * |--------------------+-------------------+----------+
1513 * | amdvi-root | 00000000-ffffffff | 0 |
1514 * | amdvi-iommu_nodma | 00000000-ffffffff | 0 |
1515 * | amdvi-iommu_ir | fee00000-feefffff | 1 |
1516 * |--------------------+-------------------+----------|
1517 */
1518 memory_region_init_iommu(&amdvi_dev_as->iommu,
1519 sizeof(amdvi_dev_as->iommu),
1520 TYPE_AMD_IOMMU_MEMORY_REGION,
1521 OBJECT(s),
1522 "amd_iommu", UINT64_MAX);
1523 memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1524 "amdvi_root", UINT64_MAX);
1525 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1526 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1527 MEMORY_REGION(&amdvi_dev_as->iommu),
1528 0);
1529
1530 /* Build the DMA Disabled alias to shared memory */
1531 memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s),
1532 "amdvi-sys", &s->mr_sys, 0,
1533 memory_region_size(&s->mr_sys));
1534 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1535 &amdvi_dev_as->iommu_nodma,
1536 0);
1537 /* Build the Interrupt Remapping alias to shared memory */
1538 memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s),
1539 "amdvi-ir", &s->mr_ir, 0,
1540 memory_region_size(&s->mr_ir));
1541 memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu),
1542 AMDVI_INT_ADDR_FIRST,
1543 &amdvi_dev_as->iommu_ir, 1);
1544
1545 memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
1546 memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true);
1547 }
1548 return &iommu_as[devfn]->as;
1549 }
1550
1551 static const PCIIOMMUOps amdvi_iommu_ops = {
1552 .get_address_space = amdvi_host_dma_iommu,
1553 };
1554
1555 static const MemoryRegionOps mmio_mem_ops = {
1556 .read = amdvi_mmio_read,
1557 .write = amdvi_mmio_write,
1558 .endianness = DEVICE_LITTLE_ENDIAN,
1559 .impl = {
1560 .min_access_size = 1,
1561 .max_access_size = 8,
1562 .unaligned = false,
1563 },
1564 .valid = {
1565 .min_access_size = 1,
1566 .max_access_size = 8,
1567 }
1568 };
1569
amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion * iommu,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)1570 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1571 IOMMUNotifierFlag old,
1572 IOMMUNotifierFlag new,
1573 Error **errp)
1574 {
1575 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1576
1577 if (new & IOMMU_NOTIFIER_MAP) {
1578 error_setg(errp,
1579 "device %02x.%02x.%x requires iommu notifier which is not "
1580 "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1581 PCI_FUNC(as->devfn));
1582 return -EINVAL;
1583 }
1584 return 0;
1585 }
1586
amdvi_init(AMDVIState * s)1587 static void amdvi_init(AMDVIState *s)
1588 {
1589 amdvi_iotlb_reset(s);
1590
1591 s->devtab_len = 0;
1592 s->cmdbuf_len = 0;
1593 s->cmdbuf_head = 0;
1594 s->cmdbuf_tail = 0;
1595 s->evtlog_head = 0;
1596 s->evtlog_tail = 0;
1597 s->excl_enabled = false;
1598 s->excl_allow = false;
1599 s->mmio_enabled = false;
1600 s->enabled = false;
1601 s->cmdbuf_enabled = false;
1602
1603 /* reset MMIO */
1604 memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1605 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
1606 amdvi_extended_feature_register(s),
1607 0xffffffffffffffef, 0);
1608 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1609 }
1610
amdvi_pci_realize(PCIDevice * pdev,Error ** errp)1611 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
1612 {
1613 AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
1614 int ret;
1615
1616 ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
1617 AMDVI_CAPAB_SIZE, errp);
1618 if (ret < 0) {
1619 return;
1620 }
1621 s->capab_offset = ret;
1622
1623 ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
1624 AMDVI_CAPAB_REG_SIZE, errp);
1625 if (ret < 0) {
1626 return;
1627 }
1628 ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
1629 AMDVI_CAPAB_REG_SIZE, errp);
1630 if (ret < 0) {
1631 return;
1632 }
1633
1634 if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
1635 return;
1636 }
1637
1638 /* reset device ident */
1639 pci_config_set_prog_interface(pdev->config, 0);
1640
1641 /* reset AMDVI specific capabilities, all r/o */
1642 pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1643 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1644 AMDVI_BASE_ADDR & MAKE_64BIT_MASK(14, 18));
1645 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1646 AMDVI_BASE_ADDR >> 32);
1647 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
1648 0xff000000);
1649 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1650 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
1651 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1652 }
1653
amdvi_sysbus_reset(DeviceState * dev)1654 static void amdvi_sysbus_reset(DeviceState *dev)
1655 {
1656 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1657
1658 msi_reset(&s->pci->dev);
1659 amdvi_init(s);
1660 }
1661
1662 static const VMStateDescription vmstate_amdvi_sysbus_migratable = {
1663 .name = "amd-iommu",
1664 .version_id = 1,
1665 .minimum_version_id = 1,
1666 .priority = MIG_PRI_IOMMU,
1667 .fields = (VMStateField[]) {
1668 /* Updated in amdvi_handle_control_write() */
1669 VMSTATE_BOOL(enabled, AMDVIState),
1670 VMSTATE_BOOL(ga_enabled, AMDVIState),
1671 /* bool ats_enabled is obsolete */
1672 VMSTATE_UNUSED(1), /* was ats_enabled */
1673 VMSTATE_BOOL(cmdbuf_enabled, AMDVIState),
1674 VMSTATE_BOOL(completion_wait_intr, AMDVIState),
1675 VMSTATE_BOOL(evtlog_enabled, AMDVIState),
1676 VMSTATE_BOOL(evtlog_intr, AMDVIState),
1677 /* Updated in amdvi_handle_devtab_write() */
1678 VMSTATE_UINT64(devtab, AMDVIState),
1679 VMSTATE_UINT64(devtab_len, AMDVIState),
1680 /* Updated in amdvi_handle_cmdbase_write() */
1681 VMSTATE_UINT64(cmdbuf, AMDVIState),
1682 VMSTATE_UINT64(cmdbuf_len, AMDVIState),
1683 /* Updated in amdvi_handle_cmdhead_write() */
1684 VMSTATE_UINT32(cmdbuf_head, AMDVIState),
1685 /* Updated in amdvi_handle_cmdtail_write() */
1686 VMSTATE_UINT32(cmdbuf_tail, AMDVIState),
1687 /* Updated in amdvi_handle_evtbase_write() */
1688 VMSTATE_UINT64(evtlog, AMDVIState),
1689 VMSTATE_UINT32(evtlog_len, AMDVIState),
1690 /* Updated in amdvi_handle_evthead_write() */
1691 VMSTATE_UINT32(evtlog_head, AMDVIState),
1692 /* Updated in amdvi_handle_evttail_write() */
1693 VMSTATE_UINT32(evtlog_tail, AMDVIState),
1694 /* Updated in amdvi_handle_pprbase_write() */
1695 VMSTATE_UINT64(ppr_log, AMDVIState),
1696 VMSTATE_UINT32(pprlog_len, AMDVIState),
1697 /* Updated in amdvi_handle_pprhead_write() */
1698 VMSTATE_UINT32(pprlog_head, AMDVIState),
1699 /* Updated in amdvi_handle_tailhead_write() */
1700 VMSTATE_UINT32(pprlog_tail, AMDVIState),
1701 /* MMIO registers */
1702 VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE),
1703 VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE),
1704 VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE),
1705 VMSTATE_END_OF_LIST()
1706 }
1707 };
1708
amdvi_sysbus_realize(DeviceState * dev,Error ** errp)1709 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1710 {
1711 DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev));
1712 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1713 MachineState *ms = MACHINE(qdev_get_machine());
1714 PCMachineState *pcms = PC_MACHINE(ms);
1715 X86MachineState *x86ms = X86_MACHINE(ms);
1716 PCIBus *bus = pcms->pcibus;
1717
1718 if (s->pci_id) {
1719 PCIDevice *pdev = NULL;
1720 int ret = pci_qdev_find_device(s->pci_id, &pdev);
1721
1722 if (ret) {
1723 error_report("Cannot find PCI device '%s'", s->pci_id);
1724 return;
1725 }
1726
1727 if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) {
1728 error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id);
1729 return;
1730 }
1731
1732 s->pci = AMD_IOMMU_PCI(pdev);
1733 dc->vmsd = &vmstate_amdvi_sysbus_migratable;
1734 } else {
1735 s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI));
1736 /* This device should take care of IOMMU PCI properties */
1737 if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) {
1738 return;
1739 }
1740 }
1741
1742 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1743 amdvi_uint64_equal, g_free, g_free);
1744
1745 /* set up MMIO */
1746 memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
1747 "amdvi-mmio", AMDVI_MMIO_SIZE);
1748 memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
1749 &s->mr_mmio);
1750
1751 /* Create the share memory regions by all devices */
1752 memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX);
1753
1754 /* set up the DMA disabled memory region */
1755 memory_region_init_alias(&s->mr_nodma, OBJECT(s),
1756 "amdvi-nodma", get_system_memory(), 0,
1757 memory_region_size(get_system_memory()));
1758 memory_region_add_subregion_overlap(&s->mr_sys, 0,
1759 &s->mr_nodma, 0);
1760
1761 /* set up the Interrupt Remapping memory region */
1762 memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops,
1763 s, "amdvi-ir", AMDVI_INT_ADDR_SIZE);
1764 memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
1765 &s->mr_ir, 1);
1766
1767 /* Pseudo address space under root PCI bus. */
1768 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1769
1770 if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) {
1771 error_report("AMD IOMMU with x2APIC configuration requires xtsup=on");
1772 exit(EXIT_FAILURE);
1773 }
1774
1775 if (s->xtsup) {
1776 if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
1777 error_report("AMD IOMMU xtsup=on requires x2APIC support on "
1778 "the KVM side");
1779 exit(EXIT_FAILURE);
1780 }
1781 }
1782
1783 pci_setup_iommu(bus, &amdvi_iommu_ops, s);
1784 amdvi_init(s);
1785 }
1786
1787 static const Property amdvi_properties[] = {
1788 DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
1789 DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
1790 };
1791
1792 static const VMStateDescription vmstate_amdvi_sysbus = {
1793 .name = "amd-iommu",
1794 .unmigratable = 1
1795 };
1796
amdvi_sysbus_class_init(ObjectClass * klass,const void * data)1797 static void amdvi_sysbus_class_init(ObjectClass *klass, const void *data)
1798 {
1799 DeviceClass *dc = DEVICE_CLASS(klass);
1800 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1801
1802 device_class_set_legacy_reset(dc, amdvi_sysbus_reset);
1803 dc->vmsd = &vmstate_amdvi_sysbus;
1804 dc->hotpluggable = false;
1805 dc_class->realize = amdvi_sysbus_realize;
1806 dc_class->int_remap = amdvi_int_remap;
1807 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1808 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1809 device_class_set_props(dc, amdvi_properties);
1810 }
1811
1812 static const TypeInfo amdvi_sysbus = {
1813 .name = TYPE_AMD_IOMMU_DEVICE,
1814 .parent = TYPE_X86_IOMMU_DEVICE,
1815 .instance_size = sizeof(AMDVIState),
1816 .class_init = amdvi_sysbus_class_init
1817 };
1818
amdvi_pci_class_init(ObjectClass * klass,const void * data)1819 static void amdvi_pci_class_init(ObjectClass *klass, const void *data)
1820 {
1821 DeviceClass *dc = DEVICE_CLASS(klass);
1822 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1823
1824 k->vendor_id = PCI_VENDOR_ID_AMD;
1825 k->device_id = 0x1419;
1826 k->class_id = 0x0806;
1827 k->realize = amdvi_pci_realize;
1828
1829 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1830 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1831 }
1832
1833 static const TypeInfo amdvi_pci = {
1834 .name = TYPE_AMD_IOMMU_PCI,
1835 .parent = TYPE_PCI_DEVICE,
1836 .instance_size = sizeof(AMDVIPCIState),
1837 .class_init = amdvi_pci_class_init,
1838 .interfaces = (const InterfaceInfo[]) {
1839 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1840 { },
1841 },
1842 };
1843
amdvi_iommu_memory_region_class_init(ObjectClass * klass,const void * data)1844 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass,
1845 const void *data)
1846 {
1847 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1848
1849 imrc->translate = amdvi_translate;
1850 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1851 }
1852
1853 static const TypeInfo amdvi_iommu_memory_region_info = {
1854 .parent = TYPE_IOMMU_MEMORY_REGION,
1855 .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1856 .class_init = amdvi_iommu_memory_region_class_init,
1857 };
1858
amdvi_register_types(void)1859 static void amdvi_register_types(void)
1860 {
1861 type_register_static(&amdvi_pci);
1862 type_register_static(&amdvi_sysbus);
1863 type_register_static(&amdvi_iommu_memory_region_info);
1864 }
1865
1866 type_init(amdvi_register_types);
1867