1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IOMMU helpers in MMU context. 4 * 5 * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> 6 */ 7 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/rculist.h> 11 #include <linux/vmalloc.h> 12 #include <linux/mutex.h> 13 #include <linux/migrate.h> 14 #include <linux/hugetlb.h> 15 #include <linux/swap.h> 16 #include <linux/sizes.h> 17 #include <asm/mmu_context.h> 18 #include <asm/pte-walk.h> 19 #include <linux/mm_inline.h> 20 21 static DEFINE_MUTEX(mem_list_mutex); 22 23 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 24 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) 25 26 struct mm_iommu_table_group_mem_t { 27 struct list_head next; 28 struct rcu_head rcu; 29 unsigned long used; 30 atomic64_t mapped; 31 unsigned int pageshift; 32 u64 ua; /* userspace address */ 33 u64 entries; /* number of entries in hpas/hpages[] */ 34 /* 35 * in mm_iommu_get we temporarily use this to store 36 * struct page address. 37 * 38 * We need to convert ua to hpa in real mode. Make it 39 * simpler by storing physical address. 40 */ 41 union { 42 struct page **hpages; /* vmalloc'ed */ 43 phys_addr_t *hpas; 44 }; 45 #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) 46 u64 dev_hpa; /* Device memory base address */ 47 }; 48 49 static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, 50 unsigned long npages, bool incr) 51 { 52 long ret = 0, locked, lock_limit; 53 54 if (!npages) 55 return 0; 56 57 down_write(&mm->mmap_sem); 58 59 if (incr) { 60 locked = mm->locked_vm + npages; 61 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 62 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 63 ret = -ENOMEM; 64 else 65 mm->locked_vm += npages; 66 } else { 67 if (WARN_ON_ONCE(npages > mm->locked_vm)) 68 npages = mm->locked_vm; 69 mm->locked_vm -= npages; 70 } 71 72 pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", 73 current ? current->pid : 0, 74 incr ? '+' : '-', 75 npages << PAGE_SHIFT, 76 mm->locked_vm << PAGE_SHIFT, 77 rlimit(RLIMIT_MEMLOCK)); 78 up_write(&mm->mmap_sem); 79 80 return ret; 81 } 82 83 bool mm_iommu_preregistered(struct mm_struct *mm) 84 { 85 return !list_empty(&mm->context.iommu_group_mem_list); 86 } 87 EXPORT_SYMBOL_GPL(mm_iommu_preregistered); 88 89 static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, 90 unsigned long entries, unsigned long dev_hpa, 91 struct mm_iommu_table_group_mem_t **pmem) 92 { 93 struct mm_iommu_table_group_mem_t *mem, *mem2; 94 long i, ret, locked_entries = 0, pinned = 0; 95 unsigned int pageshift; 96 unsigned long entry, chunk; 97 98 if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { 99 ret = mm_iommu_adjust_locked_vm(mm, entries, true); 100 if (ret) 101 return ret; 102 103 locked_entries = entries; 104 } 105 106 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 107 if (!mem) { 108 ret = -ENOMEM; 109 goto unlock_exit; 110 } 111 112 if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { 113 mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); 114 mem->dev_hpa = dev_hpa; 115 goto good_exit; 116 } 117 mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; 118 119 /* 120 * For a starting point for a maximum page size calculation 121 * we use @ua and @entries natural alignment to allow IOMMU pages 122 * smaller than huge pages but still bigger than PAGE_SIZE. 123 */ 124 mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); 125 mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); 126 if (!mem->hpas) { 127 kfree(mem); 128 ret = -ENOMEM; 129 goto unlock_exit; 130 } 131 132 down_read(&mm->mmap_sem); 133 chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / 134 sizeof(struct vm_area_struct *); 135 chunk = min(chunk, entries); 136 for (entry = 0; entry < entries; entry += chunk) { 137 unsigned long n = min(entries - entry, chunk); 138 139 ret = get_user_pages(ua + (entry << PAGE_SHIFT), n, 140 FOLL_WRITE | FOLL_LONGTERM, 141 mem->hpages + entry, NULL); 142 if (ret == n) { 143 pinned += n; 144 continue; 145 } 146 if (ret > 0) 147 pinned += ret; 148 break; 149 } 150 up_read(&mm->mmap_sem); 151 if (pinned != entries) { 152 if (!ret) 153 ret = -EFAULT; 154 goto free_exit; 155 } 156 157 pageshift = PAGE_SHIFT; 158 for (i = 0; i < entries; ++i) { 159 struct page *page = mem->hpages[i]; 160 161 /* 162 * Allow to use larger than 64k IOMMU pages. Only do that 163 * if we are backed by hugetlb. 164 */ 165 if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { 166 struct page *head = compound_head(page); 167 168 pageshift = compound_order(head) + PAGE_SHIFT; 169 } 170 mem->pageshift = min(mem->pageshift, pageshift); 171 /* 172 * We don't need struct page reference any more, switch 173 * to physical address. 174 */ 175 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; 176 } 177 178 good_exit: 179 atomic64_set(&mem->mapped, 1); 180 mem->used = 1; 181 mem->ua = ua; 182 mem->entries = entries; 183 184 mutex_lock(&mem_list_mutex); 185 186 list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { 187 /* Overlap? */ 188 if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && 189 (ua < (mem2->ua + 190 (mem2->entries << PAGE_SHIFT)))) { 191 ret = -EINVAL; 192 mutex_unlock(&mem_list_mutex); 193 goto free_exit; 194 } 195 } 196 197 list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); 198 199 mutex_unlock(&mem_list_mutex); 200 201 *pmem = mem; 202 203 return 0; 204 205 free_exit: 206 /* free the reference taken */ 207 for (i = 0; i < pinned; i++) 208 put_page(mem->hpages[i]); 209 210 vfree(mem->hpas); 211 kfree(mem); 212 213 unlock_exit: 214 mm_iommu_adjust_locked_vm(mm, locked_entries, false); 215 216 return ret; 217 } 218 219 long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, 220 struct mm_iommu_table_group_mem_t **pmem) 221 { 222 return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, 223 pmem); 224 } 225 EXPORT_SYMBOL_GPL(mm_iommu_new); 226 227 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, 228 unsigned long entries, unsigned long dev_hpa, 229 struct mm_iommu_table_group_mem_t **pmem) 230 { 231 return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); 232 } 233 EXPORT_SYMBOL_GPL(mm_iommu_newdev); 234 235 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) 236 { 237 long i; 238 struct page *page = NULL; 239 240 if (!mem->hpas) 241 return; 242 243 for (i = 0; i < mem->entries; ++i) { 244 if (!mem->hpas[i]) 245 continue; 246 247 page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); 248 if (!page) 249 continue; 250 251 if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) 252 SetPageDirty(page); 253 254 put_page(page); 255 mem->hpas[i] = 0; 256 } 257 } 258 259 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) 260 { 261 262 mm_iommu_unpin(mem); 263 vfree(mem->hpas); 264 kfree(mem); 265 } 266 267 static void mm_iommu_free(struct rcu_head *head) 268 { 269 struct mm_iommu_table_group_mem_t *mem = container_of(head, 270 struct mm_iommu_table_group_mem_t, rcu); 271 272 mm_iommu_do_free(mem); 273 } 274 275 static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) 276 { 277 list_del_rcu(&mem->next); 278 call_rcu(&mem->rcu, mm_iommu_free); 279 } 280 281 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) 282 { 283 long ret = 0; 284 unsigned long unlock_entries = 0; 285 286 mutex_lock(&mem_list_mutex); 287 288 if (mem->used == 0) { 289 ret = -ENOENT; 290 goto unlock_exit; 291 } 292 293 --mem->used; 294 /* There are still users, exit */ 295 if (mem->used) 296 goto unlock_exit; 297 298 /* Are there still mappings? */ 299 if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { 300 ++mem->used; 301 ret = -EBUSY; 302 goto unlock_exit; 303 } 304 305 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 306 unlock_entries = mem->entries; 307 308 /* @mapped became 0 so now mappings are disabled, release the region */ 309 mm_iommu_release(mem); 310 311 unlock_exit: 312 mutex_unlock(&mem_list_mutex); 313 314 mm_iommu_adjust_locked_vm(mm, unlock_entries, false); 315 316 return ret; 317 } 318 EXPORT_SYMBOL_GPL(mm_iommu_put); 319 320 struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, 321 unsigned long ua, unsigned long size) 322 { 323 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 324 325 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 326 if ((mem->ua <= ua) && 327 (ua + size <= mem->ua + 328 (mem->entries << PAGE_SHIFT))) { 329 ret = mem; 330 break; 331 } 332 } 333 334 return ret; 335 } 336 EXPORT_SYMBOL_GPL(mm_iommu_lookup); 337 338 struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, 339 unsigned long ua, unsigned long size) 340 { 341 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 342 343 list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, 344 next) { 345 if ((mem->ua <= ua) && 346 (ua + size <= mem->ua + 347 (mem->entries << PAGE_SHIFT))) { 348 ret = mem; 349 break; 350 } 351 } 352 353 return ret; 354 } 355 356 struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, 357 unsigned long ua, unsigned long entries) 358 { 359 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 360 361 mutex_lock(&mem_list_mutex); 362 363 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 364 if ((mem->ua == ua) && (mem->entries == entries)) { 365 ret = mem; 366 ++mem->used; 367 break; 368 } 369 } 370 371 mutex_unlock(&mem_list_mutex); 372 373 return ret; 374 } 375 EXPORT_SYMBOL_GPL(mm_iommu_get); 376 377 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, 378 unsigned long ua, unsigned int pageshift, unsigned long *hpa) 379 { 380 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 381 u64 *va; 382 383 if (entry >= mem->entries) 384 return -EFAULT; 385 386 if (pageshift > mem->pageshift) 387 return -EFAULT; 388 389 if (!mem->hpas) { 390 *hpa = mem->dev_hpa + (ua - mem->ua); 391 return 0; 392 } 393 394 va = &mem->hpas[entry]; 395 *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); 396 397 return 0; 398 } 399 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); 400 401 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, 402 unsigned long ua, unsigned int pageshift, unsigned long *hpa) 403 { 404 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 405 unsigned long *pa; 406 407 if (entry >= mem->entries) 408 return -EFAULT; 409 410 if (pageshift > mem->pageshift) 411 return -EFAULT; 412 413 if (!mem->hpas) { 414 *hpa = mem->dev_hpa + (ua - mem->ua); 415 return 0; 416 } 417 418 pa = (void *) vmalloc_to_phys(&mem->hpas[entry]); 419 if (!pa) 420 return -EFAULT; 421 422 *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); 423 424 return 0; 425 } 426 427 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) 428 { 429 struct mm_iommu_table_group_mem_t *mem; 430 long entry; 431 void *va; 432 unsigned long *pa; 433 434 mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); 435 if (!mem) 436 return; 437 438 if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) 439 return; 440 441 entry = (ua - mem->ua) >> PAGE_SHIFT; 442 va = &mem->hpas[entry]; 443 444 pa = (void *) vmalloc_to_phys(va); 445 if (!pa) 446 return; 447 448 *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; 449 } 450 451 bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, 452 unsigned int pageshift, unsigned long *size) 453 { 454 struct mm_iommu_table_group_mem_t *mem; 455 unsigned long end; 456 457 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 458 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 459 continue; 460 461 end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); 462 if ((mem->dev_hpa <= hpa) && (hpa < end)) { 463 /* 464 * Since the IOMMU page size might be bigger than 465 * PAGE_SIZE, the amount of preregistered memory 466 * starting from @hpa might be smaller than 1<<pageshift 467 * and the caller needs to distinguish this situation. 468 */ 469 *size = min(1UL << pageshift, end - hpa); 470 return true; 471 } 472 } 473 474 return false; 475 } 476 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); 477 478 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) 479 { 480 if (atomic64_inc_not_zero(&mem->mapped)) 481 return 0; 482 483 /* Last mm_iommu_put() has been called, no more mappings allowed() */ 484 return -ENXIO; 485 } 486 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); 487 488 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) 489 { 490 atomic64_add_unless(&mem->mapped, -1, 1); 491 } 492 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); 493 494 void mm_iommu_init(struct mm_struct *mm) 495 { 496 INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); 497 } 498