1 /* 2 * VFIO: IOMMU DMA mapping support for TCE on POWER 3 * 4 * Copyright (C) 2013 IBM Corp. All rights reserved. 5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio_iommu_type1.c: 12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 13 * Author: Alex Williamson <alex.williamson@redhat.com> 14 */ 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/slab.h> 19 #include <linux/uaccess.h> 20 #include <linux/err.h> 21 #include <linux/vfio.h> 22 #include <asm/iommu.h> 23 #include <asm/tce.h> 24 25 #define DRIVER_VERSION "0.1" 26 #define DRIVER_AUTHOR "aik@ozlabs.ru" 27 #define DRIVER_DESC "VFIO IOMMU SPAPR TCE" 28 29 static void tce_iommu_detach_group(void *iommu_data, 30 struct iommu_group *iommu_group); 31 32 /* 33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation 34 * 35 * This code handles mapping and unmapping of user data buffers 36 * into DMA'ble space using the IOMMU 37 */ 38 39 /* 40 * The container descriptor supports only a single group per container. 41 * Required by the API as the container is not supplied with the IOMMU group 42 * at the moment of initialization. 43 */ 44 struct tce_container { 45 struct mutex lock; 46 struct iommu_table *tbl; 47 bool enabled; 48 }; 49 50 static int tce_iommu_enable(struct tce_container *container) 51 { 52 int ret = 0; 53 unsigned long locked, lock_limit, npages; 54 struct iommu_table *tbl = container->tbl; 55 56 if (!container->tbl) 57 return -ENXIO; 58 59 if (!current->mm) 60 return -ESRCH; /* process exited */ 61 62 if (container->enabled) 63 return -EBUSY; 64 65 /* 66 * When userspace pages are mapped into the IOMMU, they are effectively 67 * locked memory, so, theoretically, we need to update the accounting 68 * of locked pages on each map and unmap. For powerpc, the map unmap 69 * paths can be very hot, though, and the accounting would kill 70 * performance, especially since it would be difficult to impossible 71 * to handle the accounting in real mode only. 72 * 73 * To address that, rather than precisely accounting every page, we 74 * instead account for a worst case on locked memory when the iommu is 75 * enabled and disabled. The worst case upper bound on locked memory 76 * is the size of the whole iommu window, which is usually relatively 77 * small (compared to total memory sizes) on POWER hardware. 78 * 79 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, 80 * that would effectively kill the guest at random points, much better 81 * enforcing the limit based on the max that the guest can map. 82 */ 83 down_write(¤t->mm->mmap_sem); 84 npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; 85 locked = current->mm->locked_vm + npages; 86 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 87 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { 88 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", 89 rlimit(RLIMIT_MEMLOCK)); 90 ret = -ENOMEM; 91 } else { 92 93 current->mm->locked_vm += npages; 94 container->enabled = true; 95 } 96 up_write(¤t->mm->mmap_sem); 97 98 return ret; 99 } 100 101 static void tce_iommu_disable(struct tce_container *container) 102 { 103 if (!container->enabled) 104 return; 105 106 container->enabled = false; 107 108 if (!container->tbl || !current->mm) 109 return; 110 111 down_write(¤t->mm->mmap_sem); 112 current->mm->locked_vm -= (container->tbl->it_size << 113 IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; 114 up_write(¤t->mm->mmap_sem); 115 } 116 117 static void *tce_iommu_open(unsigned long arg) 118 { 119 struct tce_container *container; 120 121 if (arg != VFIO_SPAPR_TCE_IOMMU) { 122 pr_err("tce_vfio: Wrong IOMMU type\n"); 123 return ERR_PTR(-EINVAL); 124 } 125 126 container = kzalloc(sizeof(*container), GFP_KERNEL); 127 if (!container) 128 return ERR_PTR(-ENOMEM); 129 130 mutex_init(&container->lock); 131 132 return container; 133 } 134 135 static void tce_iommu_release(void *iommu_data) 136 { 137 struct tce_container *container = iommu_data; 138 139 WARN_ON(container->tbl && !container->tbl->it_group); 140 tce_iommu_disable(container); 141 142 if (container->tbl && container->tbl->it_group) 143 tce_iommu_detach_group(iommu_data, container->tbl->it_group); 144 145 mutex_destroy(&container->lock); 146 147 kfree(container); 148 } 149 150 static long tce_iommu_ioctl(void *iommu_data, 151 unsigned int cmd, unsigned long arg) 152 { 153 struct tce_container *container = iommu_data; 154 unsigned long minsz; 155 long ret; 156 157 switch (cmd) { 158 case VFIO_CHECK_EXTENSION: 159 switch (arg) { 160 case VFIO_SPAPR_TCE_IOMMU: 161 ret = 1; 162 break; 163 default: 164 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); 165 break; 166 } 167 168 return (ret < 0) ? 0 : ret; 169 170 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { 171 struct vfio_iommu_spapr_tce_info info; 172 struct iommu_table *tbl = container->tbl; 173 174 if (WARN_ON(!tbl)) 175 return -ENXIO; 176 177 minsz = offsetofend(struct vfio_iommu_spapr_tce_info, 178 dma32_window_size); 179 180 if (copy_from_user(&info, (void __user *)arg, minsz)) 181 return -EFAULT; 182 183 if (info.argsz < minsz) 184 return -EINVAL; 185 186 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K; 187 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K; 188 info.flags = 0; 189 190 if (copy_to_user((void __user *)arg, &info, minsz)) 191 return -EFAULT; 192 193 return 0; 194 } 195 case VFIO_IOMMU_MAP_DMA: { 196 struct vfio_iommu_type1_dma_map param; 197 struct iommu_table *tbl = container->tbl; 198 unsigned long tce, i; 199 200 if (!tbl) 201 return -ENXIO; 202 203 BUG_ON(!tbl->it_group); 204 205 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); 206 207 if (copy_from_user(¶m, (void __user *)arg, minsz)) 208 return -EFAULT; 209 210 if (param.argsz < minsz) 211 return -EINVAL; 212 213 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | 214 VFIO_DMA_MAP_FLAG_WRITE)) 215 return -EINVAL; 216 217 if ((param.size & ~IOMMU_PAGE_MASK_4K) || 218 (param.vaddr & ~IOMMU_PAGE_MASK_4K)) 219 return -EINVAL; 220 221 /* iova is checked by the IOMMU API */ 222 tce = param.vaddr; 223 if (param.flags & VFIO_DMA_MAP_FLAG_READ) 224 tce |= TCE_PCI_READ; 225 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) 226 tce |= TCE_PCI_WRITE; 227 228 ret = iommu_tce_put_param_check(tbl, param.iova, tce); 229 if (ret) 230 return ret; 231 232 for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { 233 ret = iommu_put_tce_user_mode(tbl, 234 (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, 235 tce); 236 if (ret) 237 break; 238 tce += IOMMU_PAGE_SIZE_4K; 239 } 240 if (ret) 241 iommu_clear_tces_and_put_pages(tbl, 242 param.iova >> IOMMU_PAGE_SHIFT_4K, i); 243 244 iommu_flush_tce(tbl); 245 246 return ret; 247 } 248 case VFIO_IOMMU_UNMAP_DMA: { 249 struct vfio_iommu_type1_dma_unmap param; 250 struct iommu_table *tbl = container->tbl; 251 252 if (WARN_ON(!tbl)) 253 return -ENXIO; 254 255 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, 256 size); 257 258 if (copy_from_user(¶m, (void __user *)arg, minsz)) 259 return -EFAULT; 260 261 if (param.argsz < minsz) 262 return -EINVAL; 263 264 /* No flag is supported now */ 265 if (param.flags) 266 return -EINVAL; 267 268 if (param.size & ~IOMMU_PAGE_MASK_4K) 269 return -EINVAL; 270 271 ret = iommu_tce_clear_param_check(tbl, param.iova, 0, 272 param.size >> IOMMU_PAGE_SHIFT_4K); 273 if (ret) 274 return ret; 275 276 ret = iommu_clear_tces_and_put_pages(tbl, 277 param.iova >> IOMMU_PAGE_SHIFT_4K, 278 param.size >> IOMMU_PAGE_SHIFT_4K); 279 iommu_flush_tce(tbl); 280 281 return ret; 282 } 283 case VFIO_IOMMU_ENABLE: 284 mutex_lock(&container->lock); 285 ret = tce_iommu_enable(container); 286 mutex_unlock(&container->lock); 287 return ret; 288 289 290 case VFIO_IOMMU_DISABLE: 291 mutex_lock(&container->lock); 292 tce_iommu_disable(container); 293 mutex_unlock(&container->lock); 294 return 0; 295 case VFIO_EEH_PE_OP: 296 if (!container->tbl || !container->tbl->it_group) 297 return -ENODEV; 298 299 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, 300 cmd, arg); 301 } 302 303 return -ENOTTY; 304 } 305 306 static int tce_iommu_attach_group(void *iommu_data, 307 struct iommu_group *iommu_group) 308 { 309 int ret; 310 struct tce_container *container = iommu_data; 311 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); 312 313 BUG_ON(!tbl); 314 mutex_lock(&container->lock); 315 316 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", 317 iommu_group_id(iommu_group), iommu_group); */ 318 if (container->tbl) { 319 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", 320 iommu_group_id(container->tbl->it_group), 321 iommu_group_id(iommu_group)); 322 ret = -EBUSY; 323 } else if (container->enabled) { 324 pr_err("tce_vfio: attaching group #%u to enabled container\n", 325 iommu_group_id(iommu_group)); 326 ret = -EBUSY; 327 } else { 328 ret = iommu_take_ownership(tbl); 329 if (!ret) 330 container->tbl = tbl; 331 } 332 333 mutex_unlock(&container->lock); 334 335 return ret; 336 } 337 338 static void tce_iommu_detach_group(void *iommu_data, 339 struct iommu_group *iommu_group) 340 { 341 struct tce_container *container = iommu_data; 342 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); 343 344 BUG_ON(!tbl); 345 mutex_lock(&container->lock); 346 if (tbl != container->tbl) { 347 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", 348 iommu_group_id(iommu_group), 349 iommu_group_id(tbl->it_group)); 350 } else { 351 if (container->enabled) { 352 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", 353 iommu_group_id(tbl->it_group)); 354 tce_iommu_disable(container); 355 } 356 357 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", 358 iommu_group_id(iommu_group), iommu_group); */ 359 container->tbl = NULL; 360 iommu_release_ownership(tbl); 361 } 362 mutex_unlock(&container->lock); 363 } 364 365 const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { 366 .name = "iommu-vfio-powerpc", 367 .owner = THIS_MODULE, 368 .open = tce_iommu_open, 369 .release = tce_iommu_release, 370 .ioctl = tce_iommu_ioctl, 371 .attach_group = tce_iommu_attach_group, 372 .detach_group = tce_iommu_detach_group, 373 }; 374 375 static int __init tce_iommu_init(void) 376 { 377 return vfio_register_iommu_driver(&tce_iommu_driver_ops); 378 } 379 380 static void __exit tce_iommu_cleanup(void) 381 { 382 vfio_unregister_iommu_driver(&tce_iommu_driver_ops); 383 } 384 385 module_init(tce_iommu_init); 386 module_exit(tce_iommu_cleanup); 387 388 MODULE_VERSION(DRIVER_VERSION); 389 MODULE_LICENSE("GPL v2"); 390 MODULE_AUTHOR(DRIVER_AUTHOR); 391 MODULE_DESCRIPTION(DRIVER_DESC); 392 393