1 /* 2 * VFIO: IOMMU DMA mapping support for TCE on POWER 3 * 4 * Copyright (C) 2013 IBM Corp. All rights reserved. 5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio_iommu_type1.c: 12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 13 * Author: Alex Williamson <alex.williamson@redhat.com> 14 */ 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/slab.h> 19 #include <linux/uaccess.h> 20 #include <linux/err.h> 21 #include <linux/vfio.h> 22 #include <asm/iommu.h> 23 #include <asm/tce.h> 24 25 #define DRIVER_VERSION "0.1" 26 #define DRIVER_AUTHOR "aik@ozlabs.ru" 27 #define DRIVER_DESC "VFIO IOMMU SPAPR TCE" 28 29 static void tce_iommu_detach_group(void *iommu_data, 30 struct iommu_group *iommu_group); 31 32 /* 33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation 34 * 35 * This code handles mapping and unmapping of user data buffers 36 * into DMA'ble space using the IOMMU 37 */ 38 39 /* 40 * The container descriptor supports only a single group per container. 41 * Required by the API as the container is not supplied with the IOMMU group 42 * at the moment of initialization. 43 */ 44 struct tce_container { 45 struct mutex lock; 46 struct iommu_table *tbl; 47 bool enabled; 48 }; 49 50 static int tce_iommu_enable(struct tce_container *container) 51 { 52 int ret = 0; 53 unsigned long locked, lock_limit, npages; 54 struct iommu_table *tbl = container->tbl; 55 56 if (!container->tbl) 57 return -ENXIO; 58 59 if (!current->mm) 60 return -ESRCH; /* process exited */ 61 62 if (container->enabled) 63 return -EBUSY; 64 65 /* 66 * When userspace pages are mapped into the IOMMU, they are effectively 67 * locked memory, so, theoretically, we need to update the accounting 68 * of locked pages on each map and unmap. For powerpc, the map unmap 69 * paths can be very hot, though, and the accounting would kill 70 * performance, especially since it would be difficult to impossible 71 * to handle the accounting in real mode only. 72 * 73 * To address that, rather than precisely accounting every page, we 74 * instead account for a worst case on locked memory when the iommu is 75 * enabled and disabled. The worst case upper bound on locked memory 76 * is the size of the whole iommu window, which is usually relatively 77 * small (compared to total memory sizes) on POWER hardware. 78 * 79 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, 80 * that would effectively kill the guest at random points, much better 81 * enforcing the limit based on the max that the guest can map. 82 */ 83 down_write(¤t->mm->mmap_sem); 84 npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; 85 locked = current->mm->locked_vm + npages; 86 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 87 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { 88 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", 89 rlimit(RLIMIT_MEMLOCK)); 90 ret = -ENOMEM; 91 } else { 92 93 current->mm->locked_vm += npages; 94 container->enabled = true; 95 } 96 up_write(¤t->mm->mmap_sem); 97 98 return ret; 99 } 100 101 static void tce_iommu_disable(struct tce_container *container) 102 { 103 if (!container->enabled) 104 return; 105 106 container->enabled = false; 107 108 if (!container->tbl || !current->mm) 109 return; 110 111 down_write(¤t->mm->mmap_sem); 112 current->mm->locked_vm -= (container->tbl->it_size << 113 IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; 114 up_write(¤t->mm->mmap_sem); 115 } 116 117 static void *tce_iommu_open(unsigned long arg) 118 { 119 struct tce_container *container; 120 121 if (arg != VFIO_SPAPR_TCE_IOMMU) { 122 pr_err("tce_vfio: Wrong IOMMU type\n"); 123 return ERR_PTR(-EINVAL); 124 } 125 126 container = kzalloc(sizeof(*container), GFP_KERNEL); 127 if (!container) 128 return ERR_PTR(-ENOMEM); 129 130 mutex_init(&container->lock); 131 132 return container; 133 } 134 135 static void tce_iommu_release(void *iommu_data) 136 { 137 struct tce_container *container = iommu_data; 138 139 WARN_ON(container->tbl && !container->tbl->it_group); 140 tce_iommu_disable(container); 141 142 if (container->tbl && container->tbl->it_group) 143 tce_iommu_detach_group(iommu_data, container->tbl->it_group); 144 145 mutex_destroy(&container->lock); 146 147 kfree(container); 148 } 149 150 static long tce_iommu_ioctl(void *iommu_data, 151 unsigned int cmd, unsigned long arg) 152 { 153 struct tce_container *container = iommu_data; 154 unsigned long minsz; 155 long ret; 156 157 switch (cmd) { 158 case VFIO_CHECK_EXTENSION: 159 return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; 160 161 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { 162 struct vfio_iommu_spapr_tce_info info; 163 struct iommu_table *tbl = container->tbl; 164 165 if (WARN_ON(!tbl)) 166 return -ENXIO; 167 168 minsz = offsetofend(struct vfio_iommu_spapr_tce_info, 169 dma32_window_size); 170 171 if (copy_from_user(&info, (void __user *)arg, minsz)) 172 return -EFAULT; 173 174 if (info.argsz < minsz) 175 return -EINVAL; 176 177 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; 178 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; 179 info.flags = 0; 180 181 if (copy_to_user((void __user *)arg, &info, minsz)) 182 return -EFAULT; 183 184 return 0; 185 } 186 case VFIO_IOMMU_MAP_DMA: { 187 struct vfio_iommu_type1_dma_map param; 188 struct iommu_table *tbl = container->tbl; 189 unsigned long tce, i; 190 191 if (!tbl) 192 return -ENXIO; 193 194 BUG_ON(!tbl->it_group); 195 196 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); 197 198 if (copy_from_user(¶m, (void __user *)arg, minsz)) 199 return -EFAULT; 200 201 if (param.argsz < minsz) 202 return -EINVAL; 203 204 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | 205 VFIO_DMA_MAP_FLAG_WRITE)) 206 return -EINVAL; 207 208 if ((param.size & ~IOMMU_PAGE_MASK) || 209 (param.vaddr & ~IOMMU_PAGE_MASK)) 210 return -EINVAL; 211 212 /* iova is checked by the IOMMU API */ 213 tce = param.vaddr; 214 if (param.flags & VFIO_DMA_MAP_FLAG_READ) 215 tce |= TCE_PCI_READ; 216 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) 217 tce |= TCE_PCI_WRITE; 218 219 ret = iommu_tce_put_param_check(tbl, param.iova, tce); 220 if (ret) 221 return ret; 222 223 for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { 224 ret = iommu_put_tce_user_mode(tbl, 225 (param.iova >> IOMMU_PAGE_SHIFT) + i, 226 tce); 227 if (ret) 228 break; 229 tce += IOMMU_PAGE_SIZE; 230 } 231 if (ret) 232 iommu_clear_tces_and_put_pages(tbl, 233 param.iova >> IOMMU_PAGE_SHIFT, i); 234 235 iommu_flush_tce(tbl); 236 237 return ret; 238 } 239 case VFIO_IOMMU_UNMAP_DMA: { 240 struct vfio_iommu_type1_dma_unmap param; 241 struct iommu_table *tbl = container->tbl; 242 243 if (WARN_ON(!tbl)) 244 return -ENXIO; 245 246 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, 247 size); 248 249 if (copy_from_user(¶m, (void __user *)arg, minsz)) 250 return -EFAULT; 251 252 if (param.argsz < minsz) 253 return -EINVAL; 254 255 /* No flag is supported now */ 256 if (param.flags) 257 return -EINVAL; 258 259 if (param.size & ~IOMMU_PAGE_MASK) 260 return -EINVAL; 261 262 ret = iommu_tce_clear_param_check(tbl, param.iova, 0, 263 param.size >> IOMMU_PAGE_SHIFT); 264 if (ret) 265 return ret; 266 267 ret = iommu_clear_tces_and_put_pages(tbl, 268 param.iova >> IOMMU_PAGE_SHIFT, 269 param.size >> IOMMU_PAGE_SHIFT); 270 iommu_flush_tce(tbl); 271 272 return ret; 273 } 274 case VFIO_IOMMU_ENABLE: 275 mutex_lock(&container->lock); 276 ret = tce_iommu_enable(container); 277 mutex_unlock(&container->lock); 278 return ret; 279 280 281 case VFIO_IOMMU_DISABLE: 282 mutex_lock(&container->lock); 283 tce_iommu_disable(container); 284 mutex_unlock(&container->lock); 285 return 0; 286 } 287 288 return -ENOTTY; 289 } 290 291 static int tce_iommu_attach_group(void *iommu_data, 292 struct iommu_group *iommu_group) 293 { 294 int ret; 295 struct tce_container *container = iommu_data; 296 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); 297 298 BUG_ON(!tbl); 299 mutex_lock(&container->lock); 300 301 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", 302 iommu_group_id(iommu_group), iommu_group); */ 303 if (container->tbl) { 304 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", 305 iommu_group_id(container->tbl->it_group), 306 iommu_group_id(iommu_group)); 307 ret = -EBUSY; 308 } else if (container->enabled) { 309 pr_err("tce_vfio: attaching group #%u to enabled container\n", 310 iommu_group_id(iommu_group)); 311 ret = -EBUSY; 312 } else { 313 ret = iommu_take_ownership(tbl); 314 if (!ret) 315 container->tbl = tbl; 316 } 317 318 mutex_unlock(&container->lock); 319 320 return ret; 321 } 322 323 static void tce_iommu_detach_group(void *iommu_data, 324 struct iommu_group *iommu_group) 325 { 326 struct tce_container *container = iommu_data; 327 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); 328 329 BUG_ON(!tbl); 330 mutex_lock(&container->lock); 331 if (tbl != container->tbl) { 332 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", 333 iommu_group_id(iommu_group), 334 iommu_group_id(tbl->it_group)); 335 } else { 336 if (container->enabled) { 337 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", 338 iommu_group_id(tbl->it_group)); 339 tce_iommu_disable(container); 340 } 341 342 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", 343 iommu_group_id(iommu_group), iommu_group); */ 344 container->tbl = NULL; 345 iommu_release_ownership(tbl); 346 } 347 mutex_unlock(&container->lock); 348 } 349 350 const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { 351 .name = "iommu-vfio-powerpc", 352 .owner = THIS_MODULE, 353 .open = tce_iommu_open, 354 .release = tce_iommu_release, 355 .ioctl = tce_iommu_ioctl, 356 .attach_group = tce_iommu_attach_group, 357 .detach_group = tce_iommu_detach_group, 358 }; 359 360 static int __init tce_iommu_init(void) 361 { 362 return vfio_register_iommu_driver(&tce_iommu_driver_ops); 363 } 364 365 static void __exit tce_iommu_cleanup(void) 366 { 367 vfio_unregister_iommu_driver(&tce_iommu_driver_ops); 368 } 369 370 module_init(tce_iommu_init); 371 module_exit(tce_iommu_cleanup); 372 373 MODULE_VERSION(DRIVER_VERSION); 374 MODULE_LICENSE("GPL v2"); 375 MODULE_AUTHOR(DRIVER_AUTHOR); 376 MODULE_DESCRIPTION(DRIVER_DESC); 377 378