1 /* 2 * IOMMU helpers in MMU context. 3 * 4 * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/sched/signal.h> 14 #include <linux/slab.h> 15 #include <linux/rculist.h> 16 #include <linux/vmalloc.h> 17 #include <linux/mutex.h> 18 #include <linux/migrate.h> 19 #include <linux/hugetlb.h> 20 #include <linux/swap.h> 21 #include <linux/sizes.h> 22 #include <asm/mmu_context.h> 23 #include <asm/pte-walk.h> 24 #include <linux/mm_inline.h> 25 26 static DEFINE_MUTEX(mem_list_mutex); 27 28 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 29 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) 30 31 struct mm_iommu_table_group_mem_t { 32 struct list_head next; 33 struct rcu_head rcu; 34 unsigned long used; 35 atomic64_t mapped; 36 unsigned int pageshift; 37 u64 ua; /* userspace address */ 38 u64 entries; /* number of entries in hpas/hpages[] */ 39 /* 40 * in mm_iommu_get we temporarily use this to store 41 * struct page address. 42 * 43 * We need to convert ua to hpa in real mode. Make it 44 * simpler by storing physical address. 45 */ 46 union { 47 struct page **hpages; /* vmalloc'ed */ 48 phys_addr_t *hpas; 49 }; 50 #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) 51 u64 dev_hpa; /* Device memory base address */ 52 }; 53 54 static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, 55 unsigned long npages, bool incr) 56 { 57 long ret = 0, locked, lock_limit; 58 59 if (!npages) 60 return 0; 61 62 down_write(&mm->mmap_sem); 63 64 if (incr) { 65 locked = mm->locked_vm + npages; 66 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 67 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 68 ret = -ENOMEM; 69 else 70 mm->locked_vm += npages; 71 } else { 72 if (WARN_ON_ONCE(npages > mm->locked_vm)) 73 npages = mm->locked_vm; 74 mm->locked_vm -= npages; 75 } 76 77 pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", 78 current ? current->pid : 0, 79 incr ? '+' : '-', 80 npages << PAGE_SHIFT, 81 mm->locked_vm << PAGE_SHIFT, 82 rlimit(RLIMIT_MEMLOCK)); 83 up_write(&mm->mmap_sem); 84 85 return ret; 86 } 87 88 bool mm_iommu_preregistered(struct mm_struct *mm) 89 { 90 return !list_empty(&mm->context.iommu_group_mem_list); 91 } 92 EXPORT_SYMBOL_GPL(mm_iommu_preregistered); 93 94 static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, 95 unsigned long entries, unsigned long dev_hpa, 96 struct mm_iommu_table_group_mem_t **pmem) 97 { 98 struct mm_iommu_table_group_mem_t *mem, *mem2; 99 long i, ret, locked_entries = 0, pinned = 0; 100 unsigned int pageshift; 101 unsigned long entry, chunk; 102 103 if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { 104 ret = mm_iommu_adjust_locked_vm(mm, entries, true); 105 if (ret) 106 return ret; 107 108 locked_entries = entries; 109 } 110 111 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 112 if (!mem) { 113 ret = -ENOMEM; 114 goto unlock_exit; 115 } 116 117 if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { 118 mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); 119 mem->dev_hpa = dev_hpa; 120 goto good_exit; 121 } 122 mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; 123 124 /* 125 * For a starting point for a maximum page size calculation 126 * we use @ua and @entries natural alignment to allow IOMMU pages 127 * smaller than huge pages but still bigger than PAGE_SIZE. 128 */ 129 mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); 130 mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); 131 if (!mem->hpas) { 132 kfree(mem); 133 ret = -ENOMEM; 134 goto unlock_exit; 135 } 136 137 down_read(&mm->mmap_sem); 138 chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / 139 sizeof(struct vm_area_struct *); 140 chunk = min(chunk, entries); 141 for (entry = 0; entry < entries; entry += chunk) { 142 unsigned long n = min(entries - entry, chunk); 143 144 ret = get_user_pages(ua + (entry << PAGE_SHIFT), n, 145 FOLL_WRITE | FOLL_LONGTERM, 146 mem->hpages + entry, NULL); 147 if (ret == n) { 148 pinned += n; 149 continue; 150 } 151 if (ret > 0) 152 pinned += ret; 153 break; 154 } 155 up_read(&mm->mmap_sem); 156 if (pinned != entries) { 157 if (!ret) 158 ret = -EFAULT; 159 goto free_exit; 160 } 161 162 pageshift = PAGE_SHIFT; 163 for (i = 0; i < entries; ++i) { 164 struct page *page = mem->hpages[i]; 165 166 /* 167 * Allow to use larger than 64k IOMMU pages. Only do that 168 * if we are backed by hugetlb. 169 */ 170 if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { 171 struct page *head = compound_head(page); 172 173 pageshift = compound_order(head) + PAGE_SHIFT; 174 } 175 mem->pageshift = min(mem->pageshift, pageshift); 176 /* 177 * We don't need struct page reference any more, switch 178 * to physical address. 179 */ 180 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; 181 } 182 183 good_exit: 184 atomic64_set(&mem->mapped, 1); 185 mem->used = 1; 186 mem->ua = ua; 187 mem->entries = entries; 188 189 mutex_lock(&mem_list_mutex); 190 191 list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { 192 /* Overlap? */ 193 if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && 194 (ua < (mem2->ua + 195 (mem2->entries << PAGE_SHIFT)))) { 196 ret = -EINVAL; 197 mutex_unlock(&mem_list_mutex); 198 goto free_exit; 199 } 200 } 201 202 list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); 203 204 mutex_unlock(&mem_list_mutex); 205 206 *pmem = mem; 207 208 return 0; 209 210 free_exit: 211 /* free the reference taken */ 212 for (i = 0; i < pinned; i++) 213 put_page(mem->hpages[i]); 214 215 vfree(mem->hpas); 216 kfree(mem); 217 218 unlock_exit: 219 mm_iommu_adjust_locked_vm(mm, locked_entries, false); 220 221 return ret; 222 } 223 224 long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, 225 struct mm_iommu_table_group_mem_t **pmem) 226 { 227 return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, 228 pmem); 229 } 230 EXPORT_SYMBOL_GPL(mm_iommu_new); 231 232 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, 233 unsigned long entries, unsigned long dev_hpa, 234 struct mm_iommu_table_group_mem_t **pmem) 235 { 236 return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); 237 } 238 EXPORT_SYMBOL_GPL(mm_iommu_newdev); 239 240 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) 241 { 242 long i; 243 struct page *page = NULL; 244 245 if (!mem->hpas) 246 return; 247 248 for (i = 0; i < mem->entries; ++i) { 249 if (!mem->hpas[i]) 250 continue; 251 252 page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); 253 if (!page) 254 continue; 255 256 if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) 257 SetPageDirty(page); 258 259 put_page(page); 260 mem->hpas[i] = 0; 261 } 262 } 263 264 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) 265 { 266 267 mm_iommu_unpin(mem); 268 vfree(mem->hpas); 269 kfree(mem); 270 } 271 272 static void mm_iommu_free(struct rcu_head *head) 273 { 274 struct mm_iommu_table_group_mem_t *mem = container_of(head, 275 struct mm_iommu_table_group_mem_t, rcu); 276 277 mm_iommu_do_free(mem); 278 } 279 280 static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) 281 { 282 list_del_rcu(&mem->next); 283 call_rcu(&mem->rcu, mm_iommu_free); 284 } 285 286 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) 287 { 288 long ret = 0; 289 unsigned long unlock_entries = 0; 290 291 mutex_lock(&mem_list_mutex); 292 293 if (mem->used == 0) { 294 ret = -ENOENT; 295 goto unlock_exit; 296 } 297 298 --mem->used; 299 /* There are still users, exit */ 300 if (mem->used) 301 goto unlock_exit; 302 303 /* Are there still mappings? */ 304 if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { 305 ++mem->used; 306 ret = -EBUSY; 307 goto unlock_exit; 308 } 309 310 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 311 unlock_entries = mem->entries; 312 313 /* @mapped became 0 so now mappings are disabled, release the region */ 314 mm_iommu_release(mem); 315 316 unlock_exit: 317 mutex_unlock(&mem_list_mutex); 318 319 mm_iommu_adjust_locked_vm(mm, unlock_entries, false); 320 321 return ret; 322 } 323 EXPORT_SYMBOL_GPL(mm_iommu_put); 324 325 struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, 326 unsigned long ua, unsigned long size) 327 { 328 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 329 330 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 331 if ((mem->ua <= ua) && 332 (ua + size <= mem->ua + 333 (mem->entries << PAGE_SHIFT))) { 334 ret = mem; 335 break; 336 } 337 } 338 339 return ret; 340 } 341 EXPORT_SYMBOL_GPL(mm_iommu_lookup); 342 343 struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, 344 unsigned long ua, unsigned long size) 345 { 346 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 347 348 list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, 349 next) { 350 if ((mem->ua <= ua) && 351 (ua + size <= mem->ua + 352 (mem->entries << PAGE_SHIFT))) { 353 ret = mem; 354 break; 355 } 356 } 357 358 return ret; 359 } 360 361 struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, 362 unsigned long ua, unsigned long entries) 363 { 364 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 365 366 mutex_lock(&mem_list_mutex); 367 368 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 369 if ((mem->ua == ua) && (mem->entries == entries)) { 370 ret = mem; 371 ++mem->used; 372 break; 373 } 374 } 375 376 mutex_unlock(&mem_list_mutex); 377 378 return ret; 379 } 380 EXPORT_SYMBOL_GPL(mm_iommu_get); 381 382 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, 383 unsigned long ua, unsigned int pageshift, unsigned long *hpa) 384 { 385 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 386 u64 *va; 387 388 if (entry >= mem->entries) 389 return -EFAULT; 390 391 if (pageshift > mem->pageshift) 392 return -EFAULT; 393 394 if (!mem->hpas) { 395 *hpa = mem->dev_hpa + (ua - mem->ua); 396 return 0; 397 } 398 399 va = &mem->hpas[entry]; 400 *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); 401 402 return 0; 403 } 404 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); 405 406 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, 407 unsigned long ua, unsigned int pageshift, unsigned long *hpa) 408 { 409 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 410 unsigned long *pa; 411 412 if (entry >= mem->entries) 413 return -EFAULT; 414 415 if (pageshift > mem->pageshift) 416 return -EFAULT; 417 418 if (!mem->hpas) { 419 *hpa = mem->dev_hpa + (ua - mem->ua); 420 return 0; 421 } 422 423 pa = (void *) vmalloc_to_phys(&mem->hpas[entry]); 424 if (!pa) 425 return -EFAULT; 426 427 *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); 428 429 return 0; 430 } 431 432 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) 433 { 434 struct mm_iommu_table_group_mem_t *mem; 435 long entry; 436 void *va; 437 unsigned long *pa; 438 439 mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); 440 if (!mem) 441 return; 442 443 if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) 444 return; 445 446 entry = (ua - mem->ua) >> PAGE_SHIFT; 447 va = &mem->hpas[entry]; 448 449 pa = (void *) vmalloc_to_phys(va); 450 if (!pa) 451 return; 452 453 *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; 454 } 455 456 bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, 457 unsigned int pageshift, unsigned long *size) 458 { 459 struct mm_iommu_table_group_mem_t *mem; 460 unsigned long end; 461 462 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 463 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 464 continue; 465 466 end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); 467 if ((mem->dev_hpa <= hpa) && (hpa < end)) { 468 /* 469 * Since the IOMMU page size might be bigger than 470 * PAGE_SIZE, the amount of preregistered memory 471 * starting from @hpa might be smaller than 1<<pageshift 472 * and the caller needs to distinguish this situation. 473 */ 474 *size = min(1UL << pageshift, end - hpa); 475 return true; 476 } 477 } 478 479 return false; 480 } 481 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); 482 483 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) 484 { 485 if (atomic64_inc_not_zero(&mem->mapped)) 486 return 0; 487 488 /* Last mm_iommu_put() has been called, no more mappings allowed() */ 489 return -ENXIO; 490 } 491 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); 492 493 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) 494 { 495 atomic64_add_unless(&mem->mapped, -1, 1); 496 } 497 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); 498 499 void mm_iommu_init(struct mm_struct *mm) 500 { 501 INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); 502 } 503