1 /* 2 * QEMU emulation of an Intel IOMMU (VT-d) 3 * (DMA Remapping device) 4 * 5 * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> 6 * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef INTEL_IOMMU_H 23 #define INTEL_IOMMU_H 24 25 #include "hw/i386/x86-iommu.h" 26 #include "qemu/iova-tree.h" 27 #include "qom/object.h" 28 29 #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" 30 OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE) 31 32 #define TYPE_INTEL_IOMMU_MEMORY_REGION "intel-iommu-iommu-memory-region" 33 34 /* DMAR Hardware Unit Definition address (IOMMU unit) */ 35 #define Q35_HOST_BRIDGE_IOMMU_ADDR 0xfed90000ULL 36 37 #define VTD_PCI_BUS_MAX 256 38 #define VTD_PCI_SLOT_MAX 32 39 #define VTD_PCI_FUNC_MAX 8 40 #define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) 41 #define VTD_PCI_FUNC(devfn) ((devfn) & 0x07) 42 #define VTD_SID_TO_BUS(sid) (((sid) >> 8) & 0xff) 43 #define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff) 44 45 #define DMAR_REG_SIZE 0x230 46 #define VTD_HOST_AW_39BIT 39 47 #define VTD_HOST_AW_48BIT 48 48 #define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT 49 #define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) 50 51 #define DMAR_REPORT_F_INTR (1) 52 53 #define VTD_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) 54 #define VTD_MSI_ADDR_HI_SHIFT (32) 55 #define VTD_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) 56 57 typedef struct VTDContextEntry VTDContextEntry; 58 typedef struct VTDContextCacheEntry VTDContextCacheEntry; 59 typedef struct VTDAddressSpace VTDAddressSpace; 60 typedef struct VTDIOTLBEntry VTDIOTLBEntry; 61 typedef union VTD_IR_TableEntry VTD_IR_TableEntry; 62 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; 63 typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; 64 typedef struct VTDPASIDEntry VTDPASIDEntry; 65 66 /* Context-Entry */ 67 struct VTDContextEntry { 68 union { 69 struct { 70 uint64_t lo; 71 uint64_t hi; 72 }; 73 struct { 74 uint64_t val[4]; 75 }; 76 }; 77 }; 78 79 struct VTDContextCacheEntry { 80 /* The cache entry is obsolete if 81 * context_cache_gen!=IntelIOMMUState.context_cache_gen 82 */ 83 uint32_t context_cache_gen; 84 struct VTDContextEntry context_entry; 85 }; 86 87 /* PASID Directory Entry */ 88 struct VTDPASIDDirEntry { 89 uint64_t val; 90 }; 91 92 /* PASID Table Entry */ 93 struct VTDPASIDEntry { 94 uint64_t val[8]; 95 }; 96 97 struct VTDAddressSpace { 98 PCIBus *bus; 99 uint8_t devfn; 100 uint32_t pasid; 101 AddressSpace as; 102 IOMMUMemoryRegion iommu; 103 MemoryRegion root; /* The root container of the device */ 104 MemoryRegion nodmar; /* The alias of shared nodmar MR */ 105 MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ 106 MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */ 107 IntelIOMMUState *iommu_state; 108 VTDContextCacheEntry context_cache_entry; 109 QLIST_ENTRY(VTDAddressSpace) next; 110 /* Superset of notifier flags that this address space has */ 111 IOMMUNotifierFlag notifier_flags; 112 /* 113 * @iova_tree traces mapped IOVA ranges. 114 * 115 * The tree is not needed if no MAP notifier is registered with current 116 * VTD address space, because all guest invalidate commands can be 117 * directly passed to the IOMMU UNMAP notifiers without any further 118 * reshuffling. 119 * 120 * The tree OTOH is required for MAP typed iommu notifiers for a few 121 * reasons. 122 * 123 * Firstly, there's no way to identify whether an PSI (Page Selective 124 * Invalidations) or DSI (Domain Selective Invalidations) event is an 125 * MAP or UNMAP event within the message itself. Without having prior 126 * knowledge of existing state vIOMMU doesn't know whether it should 127 * notify MAP or UNMAP for a PSI message it received when caching mode 128 * is enabled (for MAP notifiers). 129 * 130 * Secondly, PSI messages received from guest driver can be enlarged in 131 * range, covers but not limited to what the guest driver wanted to 132 * invalidate. When the range to invalidates gets bigger than the 133 * limit of a PSI message, it can even become a DSI which will 134 * invalidate the whole domain. If the vIOMMU directly notifies the 135 * registered device with the unmodified range, it may confuse the 136 * registered drivers (e.g. vfio-pci) on either: 137 * 138 * (1) Trying to map the same region more than once (for 139 * VFIO_IOMMU_MAP_DMA, -EEXIST will trigger), or, 140 * 141 * (2) Trying to UNMAP a range that is still partially mapped. 142 * 143 * That accuracy is not required for UNMAP-only notifiers, but it is a 144 * must-to-have for notifiers registered with MAP events, because the 145 * vIOMMU needs to make sure the shadow page table is always in sync 146 * with the guest IOMMU pgtables for a device. 147 */ 148 IOVATree *iova_tree; 149 }; 150 151 struct VTDIOTLBEntry { 152 uint64_t gfn; 153 uint16_t domain_id; 154 uint32_t pasid; 155 uint64_t slpte; 156 uint64_t mask; 157 uint8_t access_flags; 158 }; 159 160 /* VT-d Source-ID Qualifier types */ 161 enum { 162 VTD_SQ_FULL = 0x00, /* Full SID verification */ 163 VTD_SQ_IGN_3 = 0x01, /* Ignore bit 3 */ 164 VTD_SQ_IGN_2_3 = 0x02, /* Ignore bits 2 & 3 */ 165 VTD_SQ_IGN_1_3 = 0x03, /* Ignore bits 1-3 */ 166 VTD_SQ_MAX, 167 }; 168 169 /* VT-d Source Validation Types */ 170 enum { 171 VTD_SVT_NONE = 0x00, /* No validation */ 172 VTD_SVT_ALL = 0x01, /* Do full validation */ 173 VTD_SVT_BUS = 0x02, /* Validate bus range */ 174 VTD_SVT_MAX, 175 }; 176 177 /* Interrupt Remapping Table Entry Definition */ 178 union VTD_IR_TableEntry { 179 struct { 180 #if HOST_BIG_ENDIAN 181 uint64_t dest_id:32; /* Destination ID */ 182 uint64_t __reserved_1:8; /* Reserved 1 */ 183 uint64_t vector:8; /* Interrupt Vector */ 184 uint64_t irte_mode:1; /* IRTE Mode */ 185 uint64_t __reserved_0:3; /* Reserved 0 */ 186 uint64_t __avail:4; /* Available spaces for software */ 187 uint64_t delivery_mode:3; /* Delivery Mode */ 188 uint64_t trigger_mode:1; /* Trigger Mode */ 189 uint64_t redir_hint:1; /* Redirection Hint */ 190 uint64_t dest_mode:1; /* Destination Mode */ 191 uint64_t fault_disable:1; /* Fault Processing Disable */ 192 uint64_t present:1; /* Whether entry present/available */ 193 #else 194 uint64_t present:1; /* Whether entry present/available */ 195 uint64_t fault_disable:1; /* Fault Processing Disable */ 196 uint64_t dest_mode:1; /* Destination Mode */ 197 uint64_t redir_hint:1; /* Redirection Hint */ 198 uint64_t trigger_mode:1; /* Trigger Mode */ 199 uint64_t delivery_mode:3; /* Delivery Mode */ 200 uint64_t __avail:4; /* Available spaces for software */ 201 uint64_t __reserved_0:3; /* Reserved 0 */ 202 uint64_t irte_mode:1; /* IRTE Mode */ 203 uint64_t vector:8; /* Interrupt Vector */ 204 uint64_t __reserved_1:8; /* Reserved 1 */ 205 uint64_t dest_id:32; /* Destination ID */ 206 #endif 207 #if HOST_BIG_ENDIAN 208 uint64_t __reserved_2:44; /* Reserved 2 */ 209 uint64_t sid_vtype:2; /* Source-ID Validation Type */ 210 uint64_t sid_q:2; /* Source-ID Qualifier */ 211 uint64_t source_id:16; /* Source-ID */ 212 #else 213 uint64_t source_id:16; /* Source-ID */ 214 uint64_t sid_q:2; /* Source-ID Qualifier */ 215 uint64_t sid_vtype:2; /* Source-ID Validation Type */ 216 uint64_t __reserved_2:44; /* Reserved 2 */ 217 #endif 218 } QEMU_PACKED irte; 219 uint64_t data[2]; 220 }; 221 222 #define VTD_IR_INT_FORMAT_COMPAT (0) /* Compatible Interrupt */ 223 #define VTD_IR_INT_FORMAT_REMAP (1) /* Remappable Interrupt */ 224 225 /* Programming format for MSI/MSI-X addresses */ 226 union VTD_IR_MSIAddress { 227 struct { 228 #if HOST_BIG_ENDIAN 229 uint32_t __head:12; /* Should always be: 0x0fee */ 230 uint32_t index_l:15; /* Interrupt index bit 14-0 */ 231 uint32_t int_mode:1; /* Interrupt format */ 232 uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ 233 uint32_t index_h:1; /* Interrupt index bit 15 */ 234 uint32_t __not_care:2; 235 #else 236 uint32_t __not_care:2; 237 uint32_t index_h:1; /* Interrupt index bit 15 */ 238 uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ 239 uint32_t int_mode:1; /* Interrupt format */ 240 uint32_t index_l:15; /* Interrupt index bit 14-0 */ 241 uint32_t __head:12; /* Should always be: 0x0fee */ 242 #endif 243 } QEMU_PACKED addr; 244 uint32_t data; 245 }; 246 247 /* When IR is enabled, all MSI/MSI-X data bits should be zero */ 248 #define VTD_IR_MSI_DATA (0) 249 250 /* The iommu (DMAR) device state struct */ 251 struct IntelIOMMUState { 252 X86IOMMUState x86_iommu; 253 MemoryRegion csrmem; 254 MemoryRegion mr_nodmar; 255 MemoryRegion mr_ir; 256 MemoryRegion mr_sys_alias; 257 uint8_t csr[DMAR_REG_SIZE]; /* register values */ 258 uint8_t wmask[DMAR_REG_SIZE]; /* R/W bytes */ 259 uint8_t w1cmask[DMAR_REG_SIZE]; /* RW1C(Write 1 to Clear) bytes */ 260 uint8_t womask[DMAR_REG_SIZE]; /* WO (write only - read returns 0) */ 261 uint32_t version; 262 263 bool caching_mode; /* RO - is cap CM enabled? */ 264 bool scalable_mode; /* RO - is Scalable Mode supported? */ 265 bool snoop_control; /* RO - is SNP filed supported? */ 266 267 dma_addr_t root; /* Current root table pointer */ 268 bool root_scalable; /* Type of root table (scalable or not) */ 269 bool dmar_enabled; /* Set if DMA remapping is enabled */ 270 271 uint16_t iq_head; /* Current invalidation queue head */ 272 uint16_t iq_tail; /* Current invalidation queue tail */ 273 dma_addr_t iq; /* Current invalidation queue pointer */ 274 uint16_t iq_size; /* IQ Size in number of entries */ 275 bool iq_dw; /* IQ descriptor width 256bit or not */ 276 bool qi_enabled; /* Set if the QI is enabled */ 277 uint8_t iq_last_desc_type; /* The type of last completed descriptor */ 278 279 /* The index of the Fault Recording Register to be used next. 280 * Wraps around from N-1 to 0, where N is the number of FRCD_REG. 281 */ 282 uint16_t next_frcd_reg; 283 284 uint64_t cap; /* The value of capability reg */ 285 uint64_t ecap; /* The value of extended capability reg */ 286 287 uint32_t context_cache_gen; /* Should be in [1,MAX] */ 288 GHashTable *iotlb; /* IOTLB */ 289 290 GHashTable *vtd_address_spaces; /* VTD address spaces */ 291 VTDAddressSpace *vtd_as_cache[VTD_PCI_BUS_MAX]; /* VTD address space cache */ 292 /* list of registered notifiers */ 293 QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; 294 295 GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */ 296 297 /* interrupt remapping */ 298 bool intr_enabled; /* Whether guest enabled IR */ 299 dma_addr_t intr_root; /* Interrupt remapping table pointer */ 300 uint32_t intr_size; /* Number of IR table entries */ 301 bool intr_eime; /* Extended interrupt mode enabled */ 302 OnOffAuto intr_eim; /* Toggle for EIM cabability */ 303 bool buggy_eim; /* Force buggy EIM unless eim=off */ 304 uint8_t aw_bits; /* Host/IOVA address width (in bits) */ 305 bool dma_drain; /* Whether DMA r/w draining enabled */ 306 bool dma_translation; /* Whether DMA translation supported */ 307 bool pasid; /* Whether to support PASID */ 308 309 /* 310 * Protects IOMMU states in general. Currently it protects the 311 * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace. 312 */ 313 QemuMutex iommu_lock; 314 }; 315 316 /* Find the VTD Address space associated with the given bus pointer, 317 * create a new one if none exists 318 */ 319 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, 320 int devfn, unsigned int pasid); 321 322 #endif 323