1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/highmem.h> 14 #include <linux/pagemap.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/quicklist.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #include <asm/pgtable.h> 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 27 #ifndef CONFIG_64BIT 28 #define ALLOC_ORDER 1 29 #define FRAG_MASK 0x0f 30 #else 31 #define ALLOC_ORDER 2 32 #define FRAG_MASK 0x03 33 #endif 34 35 36 unsigned long *crst_table_alloc(struct mm_struct *mm) 37 { 38 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 39 40 if (!page) 41 return NULL; 42 return (unsigned long *) page_to_phys(page); 43 } 44 45 void crst_table_free(struct mm_struct *mm, unsigned long *table) 46 { 47 free_pages((unsigned long) table, ALLOC_ORDER); 48 } 49 50 #ifdef CONFIG_64BIT 51 static void __crst_table_upgrade(void *arg) 52 { 53 struct mm_struct *mm = arg; 54 55 if (current->active_mm == mm) 56 update_mm(mm, current); 57 __tlb_flush_local(); 58 } 59 60 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 61 { 62 unsigned long *table, *pgd; 63 unsigned long entry; 64 int flush; 65 66 BUG_ON(limit > (1UL << 53)); 67 flush = 0; 68 repeat: 69 table = crst_table_alloc(mm); 70 if (!table) 71 return -ENOMEM; 72 spin_lock_bh(&mm->page_table_lock); 73 if (mm->context.asce_limit < limit) { 74 pgd = (unsigned long *) mm->pgd; 75 if (mm->context.asce_limit <= (1UL << 31)) { 76 entry = _REGION3_ENTRY_EMPTY; 77 mm->context.asce_limit = 1UL << 42; 78 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 79 _ASCE_USER_BITS | 80 _ASCE_TYPE_REGION3; 81 } else { 82 entry = _REGION2_ENTRY_EMPTY; 83 mm->context.asce_limit = 1UL << 53; 84 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 85 _ASCE_USER_BITS | 86 _ASCE_TYPE_REGION2; 87 } 88 crst_table_init(table, entry); 89 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 90 mm->pgd = (pgd_t *) table; 91 mm->task_size = mm->context.asce_limit; 92 table = NULL; 93 flush = 1; 94 } 95 spin_unlock_bh(&mm->page_table_lock); 96 if (table) 97 crst_table_free(mm, table); 98 if (mm->context.asce_limit < limit) 99 goto repeat; 100 if (flush) 101 on_each_cpu(__crst_table_upgrade, mm, 0); 102 return 0; 103 } 104 105 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 106 { 107 pgd_t *pgd; 108 109 if (current->active_mm == mm) 110 __tlb_flush_mm(mm); 111 while (mm->context.asce_limit > limit) { 112 pgd = mm->pgd; 113 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 114 case _REGION_ENTRY_TYPE_R2: 115 mm->context.asce_limit = 1UL << 42; 116 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 117 _ASCE_USER_BITS | 118 _ASCE_TYPE_REGION3; 119 break; 120 case _REGION_ENTRY_TYPE_R3: 121 mm->context.asce_limit = 1UL << 31; 122 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 123 _ASCE_USER_BITS | 124 _ASCE_TYPE_SEGMENT; 125 break; 126 default: 127 BUG(); 128 } 129 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 130 mm->task_size = mm->context.asce_limit; 131 crst_table_free(mm, (unsigned long *) pgd); 132 } 133 if (current->active_mm == mm) 134 update_mm(mm, current); 135 } 136 #endif 137 138 #ifdef CONFIG_PGSTE 139 140 /** 141 * gmap_alloc - allocate a guest address space 142 * @mm: pointer to the parent mm_struct 143 * 144 * Returns a guest address space structure. 145 */ 146 struct gmap *gmap_alloc(struct mm_struct *mm) 147 { 148 struct gmap *gmap; 149 struct page *page; 150 unsigned long *table; 151 152 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 153 if (!gmap) 154 goto out; 155 INIT_LIST_HEAD(&gmap->crst_list); 156 gmap->mm = mm; 157 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 158 if (!page) 159 goto out_free; 160 list_add(&page->lru, &gmap->crst_list); 161 table = (unsigned long *) page_to_phys(page); 162 crst_table_init(table, _REGION1_ENTRY_EMPTY); 163 gmap->table = table; 164 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 165 _ASCE_USER_BITS | __pa(table); 166 list_add(&gmap->list, &mm->context.gmap_list); 167 return gmap; 168 169 out_free: 170 kfree(gmap); 171 out: 172 return NULL; 173 } 174 EXPORT_SYMBOL_GPL(gmap_alloc); 175 176 static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) 177 { 178 struct gmap_pgtable *mp; 179 struct gmap_rmap *rmap; 180 struct page *page; 181 182 if (*table & _SEGMENT_ENTRY_INVALID) 183 return 0; 184 page = pfn_to_page(*table >> PAGE_SHIFT); 185 mp = (struct gmap_pgtable *) page->index; 186 list_for_each_entry(rmap, &mp->mapper, list) { 187 if (rmap->entry != table) 188 continue; 189 list_del(&rmap->list); 190 kfree(rmap); 191 break; 192 } 193 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; 194 return 1; 195 } 196 197 static void gmap_flush_tlb(struct gmap *gmap) 198 { 199 if (MACHINE_HAS_IDTE) 200 __tlb_flush_idte((unsigned long) gmap->table | 201 _ASCE_TYPE_REGION1); 202 else 203 __tlb_flush_global(); 204 } 205 206 /** 207 * gmap_free - free a guest address space 208 * @gmap: pointer to the guest address space structure 209 */ 210 void gmap_free(struct gmap *gmap) 211 { 212 struct page *page, *next; 213 unsigned long *table; 214 int i; 215 216 217 /* Flush tlb. */ 218 if (MACHINE_HAS_IDTE) 219 __tlb_flush_idte((unsigned long) gmap->table | 220 _ASCE_TYPE_REGION1); 221 else 222 __tlb_flush_global(); 223 224 /* Free all segment & region tables. */ 225 down_read(&gmap->mm->mmap_sem); 226 spin_lock(&gmap->mm->page_table_lock); 227 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 228 table = (unsigned long *) page_to_phys(page); 229 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 230 /* Remove gmap rmap structures for segment table. */ 231 for (i = 0; i < PTRS_PER_PMD; i++, table++) 232 gmap_unlink_segment(gmap, table); 233 __free_pages(page, ALLOC_ORDER); 234 } 235 spin_unlock(&gmap->mm->page_table_lock); 236 up_read(&gmap->mm->mmap_sem); 237 list_del(&gmap->list); 238 kfree(gmap); 239 } 240 EXPORT_SYMBOL_GPL(gmap_free); 241 242 /** 243 * gmap_enable - switch primary space to the guest address space 244 * @gmap: pointer to the guest address space structure 245 */ 246 void gmap_enable(struct gmap *gmap) 247 { 248 S390_lowcore.gmap = (unsigned long) gmap; 249 } 250 EXPORT_SYMBOL_GPL(gmap_enable); 251 252 /** 253 * gmap_disable - switch back to the standard primary address space 254 * @gmap: pointer to the guest address space structure 255 */ 256 void gmap_disable(struct gmap *gmap) 257 { 258 S390_lowcore.gmap = 0UL; 259 } 260 EXPORT_SYMBOL_GPL(gmap_disable); 261 262 /* 263 * gmap_alloc_table is assumed to be called with mmap_sem held 264 */ 265 static int gmap_alloc_table(struct gmap *gmap, 266 unsigned long *table, unsigned long init) 267 __releases(&gmap->mm->page_table_lock) 268 __acquires(&gmap->mm->page_table_lock) 269 { 270 struct page *page; 271 unsigned long *new; 272 273 /* since we dont free the gmap table until gmap_free we can unlock */ 274 spin_unlock(&gmap->mm->page_table_lock); 275 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 276 spin_lock(&gmap->mm->page_table_lock); 277 if (!page) 278 return -ENOMEM; 279 new = (unsigned long *) page_to_phys(page); 280 crst_table_init(new, init); 281 if (*table & _REGION_ENTRY_INVALID) { 282 list_add(&page->lru, &gmap->crst_list); 283 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 284 (*table & _REGION_ENTRY_TYPE_MASK); 285 } else 286 __free_pages(page, ALLOC_ORDER); 287 return 0; 288 } 289 290 /** 291 * gmap_unmap_segment - unmap segment from the guest address space 292 * @gmap: pointer to the guest address space structure 293 * @addr: address in the guest address space 294 * @len: length of the memory area to unmap 295 * 296 * Returns 0 if the unmap succeded, -EINVAL if not. 297 */ 298 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 299 { 300 unsigned long *table; 301 unsigned long off; 302 int flush; 303 304 if ((to | len) & (PMD_SIZE - 1)) 305 return -EINVAL; 306 if (len == 0 || to + len < to) 307 return -EINVAL; 308 309 flush = 0; 310 down_read(&gmap->mm->mmap_sem); 311 spin_lock(&gmap->mm->page_table_lock); 312 for (off = 0; off < len; off += PMD_SIZE) { 313 /* Walk the guest addr space page table */ 314 table = gmap->table + (((to + off) >> 53) & 0x7ff); 315 if (*table & _REGION_ENTRY_INVALID) 316 goto out; 317 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 318 table = table + (((to + off) >> 42) & 0x7ff); 319 if (*table & _REGION_ENTRY_INVALID) 320 goto out; 321 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 322 table = table + (((to + off) >> 31) & 0x7ff); 323 if (*table & _REGION_ENTRY_INVALID) 324 goto out; 325 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 326 table = table + (((to + off) >> 20) & 0x7ff); 327 328 /* Clear segment table entry in guest address space. */ 329 flush |= gmap_unlink_segment(gmap, table); 330 *table = _SEGMENT_ENTRY_INVALID; 331 } 332 out: 333 spin_unlock(&gmap->mm->page_table_lock); 334 up_read(&gmap->mm->mmap_sem); 335 if (flush) 336 gmap_flush_tlb(gmap); 337 return 0; 338 } 339 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 340 341 /** 342 * gmap_mmap_segment - map a segment to the guest address space 343 * @gmap: pointer to the guest address space structure 344 * @from: source address in the parent address space 345 * @to: target address in the guest address space 346 * 347 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. 348 */ 349 int gmap_map_segment(struct gmap *gmap, unsigned long from, 350 unsigned long to, unsigned long len) 351 { 352 unsigned long *table; 353 unsigned long off; 354 int flush; 355 356 if ((from | to | len) & (PMD_SIZE - 1)) 357 return -EINVAL; 358 if (len == 0 || from + len > TASK_MAX_SIZE || 359 from + len < from || to + len < to) 360 return -EINVAL; 361 362 flush = 0; 363 down_read(&gmap->mm->mmap_sem); 364 spin_lock(&gmap->mm->page_table_lock); 365 for (off = 0; off < len; off += PMD_SIZE) { 366 /* Walk the gmap address space page table */ 367 table = gmap->table + (((to + off) >> 53) & 0x7ff); 368 if ((*table & _REGION_ENTRY_INVALID) && 369 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 370 goto out_unmap; 371 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 372 table = table + (((to + off) >> 42) & 0x7ff); 373 if ((*table & _REGION_ENTRY_INVALID) && 374 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 375 goto out_unmap; 376 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 377 table = table + (((to + off) >> 31) & 0x7ff); 378 if ((*table & _REGION_ENTRY_INVALID) && 379 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 380 goto out_unmap; 381 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 382 table = table + (((to + off) >> 20) & 0x7ff); 383 384 /* Store 'from' address in an invalid segment table entry. */ 385 flush |= gmap_unlink_segment(gmap, table); 386 *table = (from + off) | (_SEGMENT_ENTRY_INVALID | 387 _SEGMENT_ENTRY_PROTECT); 388 } 389 spin_unlock(&gmap->mm->page_table_lock); 390 up_read(&gmap->mm->mmap_sem); 391 if (flush) 392 gmap_flush_tlb(gmap); 393 return 0; 394 395 out_unmap: 396 spin_unlock(&gmap->mm->page_table_lock); 397 up_read(&gmap->mm->mmap_sem); 398 gmap_unmap_segment(gmap, to, len); 399 return -ENOMEM; 400 } 401 EXPORT_SYMBOL_GPL(gmap_map_segment); 402 403 static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) 404 { 405 unsigned long *table; 406 407 table = gmap->table + ((address >> 53) & 0x7ff); 408 if (unlikely(*table & _REGION_ENTRY_INVALID)) 409 return ERR_PTR(-EFAULT); 410 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 411 table = table + ((address >> 42) & 0x7ff); 412 if (unlikely(*table & _REGION_ENTRY_INVALID)) 413 return ERR_PTR(-EFAULT); 414 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 415 table = table + ((address >> 31) & 0x7ff); 416 if (unlikely(*table & _REGION_ENTRY_INVALID)) 417 return ERR_PTR(-EFAULT); 418 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 419 table = table + ((address >> 20) & 0x7ff); 420 return table; 421 } 422 423 /** 424 * __gmap_translate - translate a guest address to a user space address 425 * @address: guest address 426 * @gmap: pointer to guest mapping meta data structure 427 * 428 * Returns user space address which corresponds to the guest address or 429 * -EFAULT if no such mapping exists. 430 * This function does not establish potentially missing page table entries. 431 * The mmap_sem of the mm that belongs to the address space must be held 432 * when this function gets called. 433 */ 434 unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) 435 { 436 unsigned long *segment_ptr, vmaddr, segment; 437 struct gmap_pgtable *mp; 438 struct page *page; 439 440 current->thread.gmap_addr = address; 441 segment_ptr = gmap_table_walk(address, gmap); 442 if (IS_ERR(segment_ptr)) 443 return PTR_ERR(segment_ptr); 444 /* Convert the gmap address to an mm address. */ 445 segment = *segment_ptr; 446 if (!(segment & _SEGMENT_ENTRY_INVALID)) { 447 page = pfn_to_page(segment >> PAGE_SHIFT); 448 mp = (struct gmap_pgtable *) page->index; 449 return mp->vmaddr | (address & ~PMD_MASK); 450 } else if (segment & _SEGMENT_ENTRY_PROTECT) { 451 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 452 return vmaddr | (address & ~PMD_MASK); 453 } 454 return -EFAULT; 455 } 456 EXPORT_SYMBOL_GPL(__gmap_translate); 457 458 /** 459 * gmap_translate - translate a guest address to a user space address 460 * @address: guest address 461 * @gmap: pointer to guest mapping meta data structure 462 * 463 * Returns user space address which corresponds to the guest address or 464 * -EFAULT if no such mapping exists. 465 * This function does not establish potentially missing page table entries. 466 */ 467 unsigned long gmap_translate(unsigned long address, struct gmap *gmap) 468 { 469 unsigned long rc; 470 471 down_read(&gmap->mm->mmap_sem); 472 rc = __gmap_translate(address, gmap); 473 up_read(&gmap->mm->mmap_sem); 474 return rc; 475 } 476 EXPORT_SYMBOL_GPL(gmap_translate); 477 478 static int gmap_connect_pgtable(unsigned long address, unsigned long segment, 479 unsigned long *segment_ptr, struct gmap *gmap) 480 { 481 unsigned long vmaddr; 482 struct vm_area_struct *vma; 483 struct gmap_pgtable *mp; 484 struct gmap_rmap *rmap; 485 struct mm_struct *mm; 486 struct page *page; 487 pgd_t *pgd; 488 pud_t *pud; 489 pmd_t *pmd; 490 491 mm = gmap->mm; 492 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 493 vma = find_vma(mm, vmaddr); 494 if (!vma || vma->vm_start > vmaddr) 495 return -EFAULT; 496 /* Walk the parent mm page table */ 497 pgd = pgd_offset(mm, vmaddr); 498 pud = pud_alloc(mm, pgd, vmaddr); 499 if (!pud) 500 return -ENOMEM; 501 pmd = pmd_alloc(mm, pud, vmaddr); 502 if (!pmd) 503 return -ENOMEM; 504 if (!pmd_present(*pmd) && 505 __pte_alloc(mm, vma, pmd, vmaddr)) 506 return -ENOMEM; 507 /* pmd now points to a valid segment table entry. */ 508 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 509 if (!rmap) 510 return -ENOMEM; 511 /* Link gmap segment table entry location to page table. */ 512 page = pmd_page(*pmd); 513 mp = (struct gmap_pgtable *) page->index; 514 rmap->gmap = gmap; 515 rmap->entry = segment_ptr; 516 rmap->vmaddr = address & PMD_MASK; 517 spin_lock(&mm->page_table_lock); 518 if (*segment_ptr == segment) { 519 list_add(&rmap->list, &mp->mapper); 520 /* Set gmap segment table entry to page table. */ 521 *segment_ptr = pmd_val(*pmd) & PAGE_MASK; 522 rmap = NULL; 523 } 524 spin_unlock(&mm->page_table_lock); 525 kfree(rmap); 526 return 0; 527 } 528 529 static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) 530 { 531 struct gmap_rmap *rmap, *next; 532 struct gmap_pgtable *mp; 533 struct page *page; 534 int flush; 535 536 flush = 0; 537 spin_lock(&mm->page_table_lock); 538 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 539 mp = (struct gmap_pgtable *) page->index; 540 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 541 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | 542 _SEGMENT_ENTRY_PROTECT); 543 list_del(&rmap->list); 544 kfree(rmap); 545 flush = 1; 546 } 547 spin_unlock(&mm->page_table_lock); 548 if (flush) 549 __tlb_flush_global(); 550 } 551 552 /* 553 * this function is assumed to be called with mmap_sem held 554 */ 555 unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) 556 { 557 unsigned long *segment_ptr, segment; 558 struct gmap_pgtable *mp; 559 struct page *page; 560 int rc; 561 562 current->thread.gmap_addr = address; 563 segment_ptr = gmap_table_walk(address, gmap); 564 if (IS_ERR(segment_ptr)) 565 return -EFAULT; 566 /* Convert the gmap address to an mm address. */ 567 while (1) { 568 segment = *segment_ptr; 569 if (!(segment & _SEGMENT_ENTRY_INVALID)) { 570 /* Page table is present */ 571 page = pfn_to_page(segment >> PAGE_SHIFT); 572 mp = (struct gmap_pgtable *) page->index; 573 return mp->vmaddr | (address & ~PMD_MASK); 574 } 575 if (!(segment & _SEGMENT_ENTRY_PROTECT)) 576 /* Nothing mapped in the gmap address space. */ 577 break; 578 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); 579 if (rc) 580 return rc; 581 } 582 return -EFAULT; 583 } 584 585 unsigned long gmap_fault(unsigned long address, struct gmap *gmap) 586 { 587 unsigned long rc; 588 589 down_read(&gmap->mm->mmap_sem); 590 rc = __gmap_fault(address, gmap); 591 up_read(&gmap->mm->mmap_sem); 592 593 return rc; 594 } 595 EXPORT_SYMBOL_GPL(gmap_fault); 596 597 void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) 598 { 599 600 unsigned long *table, address, size; 601 struct vm_area_struct *vma; 602 struct gmap_pgtable *mp; 603 struct page *page; 604 605 down_read(&gmap->mm->mmap_sem); 606 address = from; 607 while (address < to) { 608 /* Walk the gmap address space page table */ 609 table = gmap->table + ((address >> 53) & 0x7ff); 610 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 611 address = (address + PMD_SIZE) & PMD_MASK; 612 continue; 613 } 614 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 615 table = table + ((address >> 42) & 0x7ff); 616 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 617 address = (address + PMD_SIZE) & PMD_MASK; 618 continue; 619 } 620 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 621 table = table + ((address >> 31) & 0x7ff); 622 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 623 address = (address + PMD_SIZE) & PMD_MASK; 624 continue; 625 } 626 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 627 table = table + ((address >> 20) & 0x7ff); 628 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { 629 address = (address + PMD_SIZE) & PMD_MASK; 630 continue; 631 } 632 page = pfn_to_page(*table >> PAGE_SHIFT); 633 mp = (struct gmap_pgtable *) page->index; 634 vma = find_vma(gmap->mm, mp->vmaddr); 635 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); 636 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), 637 size, NULL); 638 address = (address + PMD_SIZE) & PMD_MASK; 639 } 640 up_read(&gmap->mm->mmap_sem); 641 } 642 EXPORT_SYMBOL_GPL(gmap_discard); 643 644 static LIST_HEAD(gmap_notifier_list); 645 static DEFINE_SPINLOCK(gmap_notifier_lock); 646 647 /** 648 * gmap_register_ipte_notifier - register a pte invalidation callback 649 * @nb: pointer to the gmap notifier block 650 */ 651 void gmap_register_ipte_notifier(struct gmap_notifier *nb) 652 { 653 spin_lock(&gmap_notifier_lock); 654 list_add(&nb->list, &gmap_notifier_list); 655 spin_unlock(&gmap_notifier_lock); 656 } 657 EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 658 659 /** 660 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 661 * @nb: pointer to the gmap notifier block 662 */ 663 void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 664 { 665 spin_lock(&gmap_notifier_lock); 666 list_del_init(&nb->list); 667 spin_unlock(&gmap_notifier_lock); 668 } 669 EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 670 671 /** 672 * gmap_ipte_notify - mark a range of ptes for invalidation notification 673 * @gmap: pointer to guest mapping meta data structure 674 * @address: virtual address in the guest address space 675 * @len: size of area 676 * 677 * Returns 0 if for each page in the given range a gmap mapping exists and 678 * the invalidation notification could be set. If the gmap mapping is missing 679 * for one or more pages -EFAULT is returned. If no memory could be allocated 680 * -ENOMEM is returned. This function establishes missing page table entries. 681 */ 682 int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) 683 { 684 unsigned long addr; 685 spinlock_t *ptl; 686 pte_t *ptep, entry; 687 pgste_t pgste; 688 int rc = 0; 689 690 if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) 691 return -EINVAL; 692 down_read(&gmap->mm->mmap_sem); 693 while (len) { 694 /* Convert gmap address and connect the page tables */ 695 addr = __gmap_fault(start, gmap); 696 if (IS_ERR_VALUE(addr)) { 697 rc = addr; 698 break; 699 } 700 /* Get the page mapped */ 701 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { 702 rc = -EFAULT; 703 break; 704 } 705 /* Walk the process page table, lock and get pte pointer */ 706 ptep = get_locked_pte(gmap->mm, addr, &ptl); 707 if (unlikely(!ptep)) 708 continue; 709 /* Set notification bit in the pgste of the pte */ 710 entry = *ptep; 711 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { 712 pgste = pgste_get_lock(ptep); 713 pgste_val(pgste) |= PGSTE_IN_BIT; 714 pgste_set_unlock(ptep, pgste); 715 start += PAGE_SIZE; 716 len -= PAGE_SIZE; 717 } 718 spin_unlock(ptl); 719 } 720 up_read(&gmap->mm->mmap_sem); 721 return rc; 722 } 723 EXPORT_SYMBOL_GPL(gmap_ipte_notify); 724 725 /** 726 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 727 * @mm: pointer to the process mm_struct 728 * @addr: virtual address in the process address space 729 * @pte: pointer to the page table entry 730 * 731 * This function is assumed to be called with the page table lock held 732 * for the pte to notify. 733 */ 734 void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) 735 { 736 unsigned long segment_offset; 737 struct gmap_notifier *nb; 738 struct gmap_pgtable *mp; 739 struct gmap_rmap *rmap; 740 struct page *page; 741 742 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 743 segment_offset = segment_offset * (4096 / sizeof(pte_t)); 744 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); 745 mp = (struct gmap_pgtable *) page->index; 746 spin_lock(&gmap_notifier_lock); 747 list_for_each_entry(rmap, &mp->mapper, list) { 748 list_for_each_entry(nb, &gmap_notifier_list, list) 749 nb->notifier_call(rmap->gmap, 750 rmap->vmaddr + segment_offset); 751 } 752 spin_unlock(&gmap_notifier_lock); 753 } 754 755 static inline int page_table_with_pgste(struct page *page) 756 { 757 return atomic_read(&page->_mapcount) == 0; 758 } 759 760 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 761 unsigned long vmaddr) 762 { 763 struct page *page; 764 unsigned long *table; 765 struct gmap_pgtable *mp; 766 767 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 768 if (!page) 769 return NULL; 770 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); 771 if (!mp) { 772 __free_page(page); 773 return NULL; 774 } 775 if (!pgtable_page_ctor(page)) { 776 kfree(mp); 777 __free_page(page); 778 return NULL; 779 } 780 mp->vmaddr = vmaddr & PMD_MASK; 781 INIT_LIST_HEAD(&mp->mapper); 782 page->index = (unsigned long) mp; 783 atomic_set(&page->_mapcount, 0); 784 table = (unsigned long *) page_to_phys(page); 785 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 786 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, 787 PAGE_SIZE/2); 788 return table; 789 } 790 791 static inline void page_table_free_pgste(unsigned long *table) 792 { 793 struct page *page; 794 struct gmap_pgtable *mp; 795 796 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 797 mp = (struct gmap_pgtable *) page->index; 798 BUG_ON(!list_empty(&mp->mapper)); 799 pgtable_page_dtor(page); 800 atomic_set(&page->_mapcount, -1); 801 kfree(mp); 802 __free_page(page); 803 } 804 805 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 806 unsigned long key, bool nq) 807 { 808 spinlock_t *ptl; 809 pgste_t old, new; 810 pte_t *ptep; 811 812 down_read(&mm->mmap_sem); 813 ptep = get_locked_pte(current->mm, addr, &ptl); 814 if (unlikely(!ptep)) { 815 up_read(&mm->mmap_sem); 816 return -EFAULT; 817 } 818 819 new = old = pgste_get_lock(ptep); 820 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 821 PGSTE_ACC_BITS | PGSTE_FP_BIT); 822 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 823 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 824 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 825 unsigned long address, bits, skey; 826 827 address = pte_val(*ptep) & PAGE_MASK; 828 skey = (unsigned long) page_get_storage_key(address); 829 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 830 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 831 /* Set storage key ACC and FP */ 832 page_set_storage_key(address, skey, !nq); 833 /* Merge host changed & referenced into pgste */ 834 pgste_val(new) |= bits << 52; 835 } 836 /* changing the guest storage key is considered a change of the page */ 837 if ((pgste_val(new) ^ pgste_val(old)) & 838 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 839 pgste_val(new) |= PGSTE_HC_BIT; 840 841 pgste_set_unlock(ptep, new); 842 pte_unmap_unlock(*ptep, ptl); 843 up_read(&mm->mmap_sem); 844 return 0; 845 } 846 EXPORT_SYMBOL(set_guest_storage_key); 847 848 #else /* CONFIG_PGSTE */ 849 850 static inline int page_table_with_pgste(struct page *page) 851 { 852 return 0; 853 } 854 855 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 856 unsigned long vmaddr) 857 { 858 return NULL; 859 } 860 861 static inline void page_table_free_pgste(unsigned long *table) 862 { 863 } 864 865 static inline void gmap_disconnect_pgtable(struct mm_struct *mm, 866 unsigned long *table) 867 { 868 } 869 870 #endif /* CONFIG_PGSTE */ 871 872 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 873 { 874 unsigned int old, new; 875 876 do { 877 old = atomic_read(v); 878 new = old ^ bits; 879 } while (atomic_cmpxchg(v, old, new) != old); 880 return new; 881 } 882 883 /* 884 * page table entry allocation/free routines. 885 */ 886 unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 887 { 888 unsigned long *uninitialized_var(table); 889 struct page *uninitialized_var(page); 890 unsigned int mask, bit; 891 892 if (mm_has_pgste(mm)) 893 return page_table_alloc_pgste(mm, vmaddr); 894 /* Allocate fragments of a 4K page as 1K/2K page table */ 895 spin_lock_bh(&mm->context.list_lock); 896 mask = FRAG_MASK; 897 if (!list_empty(&mm->context.pgtable_list)) { 898 page = list_first_entry(&mm->context.pgtable_list, 899 struct page, lru); 900 table = (unsigned long *) page_to_phys(page); 901 mask = atomic_read(&page->_mapcount); 902 mask = mask | (mask >> 4); 903 } 904 if ((mask & FRAG_MASK) == FRAG_MASK) { 905 spin_unlock_bh(&mm->context.list_lock); 906 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 907 if (!page) 908 return NULL; 909 if (!pgtable_page_ctor(page)) { 910 __free_page(page); 911 return NULL; 912 } 913 atomic_set(&page->_mapcount, 1); 914 table = (unsigned long *) page_to_phys(page); 915 clear_table(table, _PAGE_INVALID, PAGE_SIZE); 916 spin_lock_bh(&mm->context.list_lock); 917 list_add(&page->lru, &mm->context.pgtable_list); 918 } else { 919 for (bit = 1; mask & bit; bit <<= 1) 920 table += PTRS_PER_PTE; 921 mask = atomic_xor_bits(&page->_mapcount, bit); 922 if ((mask & FRAG_MASK) == FRAG_MASK) 923 list_del(&page->lru); 924 } 925 spin_unlock_bh(&mm->context.list_lock); 926 return table; 927 } 928 929 void page_table_free(struct mm_struct *mm, unsigned long *table) 930 { 931 struct page *page; 932 unsigned int bit, mask; 933 934 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 935 if (page_table_with_pgste(page)) { 936 gmap_disconnect_pgtable(mm, table); 937 return page_table_free_pgste(table); 938 } 939 /* Free 1K/2K page table fragment of a 4K page */ 940 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 941 spin_lock_bh(&mm->context.list_lock); 942 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 943 list_del(&page->lru); 944 mask = atomic_xor_bits(&page->_mapcount, bit); 945 if (mask & FRAG_MASK) 946 list_add(&page->lru, &mm->context.pgtable_list); 947 spin_unlock_bh(&mm->context.list_lock); 948 if (mask == 0) { 949 pgtable_page_dtor(page); 950 atomic_set(&page->_mapcount, -1); 951 __free_page(page); 952 } 953 } 954 955 static void __page_table_free_rcu(void *table, unsigned bit) 956 { 957 struct page *page; 958 959 if (bit == FRAG_MASK) 960 return page_table_free_pgste(table); 961 /* Free 1K/2K page table fragment of a 4K page */ 962 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 963 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 964 pgtable_page_dtor(page); 965 atomic_set(&page->_mapcount, -1); 966 __free_page(page); 967 } 968 } 969 970 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 971 { 972 struct mm_struct *mm; 973 struct page *page; 974 unsigned int bit, mask; 975 976 mm = tlb->mm; 977 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 978 if (page_table_with_pgste(page)) { 979 gmap_disconnect_pgtable(mm, table); 980 table = (unsigned long *) (__pa(table) | FRAG_MASK); 981 tlb_remove_table(tlb, table); 982 return; 983 } 984 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 985 spin_lock_bh(&mm->context.list_lock); 986 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 987 list_del(&page->lru); 988 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 989 if (mask & FRAG_MASK) 990 list_add_tail(&page->lru, &mm->context.pgtable_list); 991 spin_unlock_bh(&mm->context.list_lock); 992 table = (unsigned long *) (__pa(table) | (bit << 4)); 993 tlb_remove_table(tlb, table); 994 } 995 996 static void __tlb_remove_table(void *_table) 997 { 998 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; 999 void *table = (void *)((unsigned long) _table & ~mask); 1000 unsigned type = (unsigned long) _table & mask; 1001 1002 if (type) 1003 __page_table_free_rcu(table, type); 1004 else 1005 free_pages((unsigned long) table, ALLOC_ORDER); 1006 } 1007 1008 static void tlb_remove_table_smp_sync(void *arg) 1009 { 1010 /* Simply deliver the interrupt */ 1011 } 1012 1013 static void tlb_remove_table_one(void *table) 1014 { 1015 /* 1016 * This isn't an RCU grace period and hence the page-tables cannot be 1017 * assumed to be actually RCU-freed. 1018 * 1019 * It is however sufficient for software page-table walkers that rely 1020 * on IRQ disabling. See the comment near struct mmu_table_batch. 1021 */ 1022 smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 1023 __tlb_remove_table(table); 1024 } 1025 1026 static void tlb_remove_table_rcu(struct rcu_head *head) 1027 { 1028 struct mmu_table_batch *batch; 1029 int i; 1030 1031 batch = container_of(head, struct mmu_table_batch, rcu); 1032 1033 for (i = 0; i < batch->nr; i++) 1034 __tlb_remove_table(batch->tables[i]); 1035 1036 free_page((unsigned long)batch); 1037 } 1038 1039 void tlb_table_flush(struct mmu_gather *tlb) 1040 { 1041 struct mmu_table_batch **batch = &tlb->batch; 1042 1043 if (*batch) { 1044 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 1045 *batch = NULL; 1046 } 1047 } 1048 1049 void tlb_remove_table(struct mmu_gather *tlb, void *table) 1050 { 1051 struct mmu_table_batch **batch = &tlb->batch; 1052 1053 tlb->mm->context.flush_mm = 1; 1054 if (*batch == NULL) { 1055 *batch = (struct mmu_table_batch *) 1056 __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 1057 if (*batch == NULL) { 1058 __tlb_flush_mm_lazy(tlb->mm); 1059 tlb_remove_table_one(table); 1060 return; 1061 } 1062 (*batch)->nr = 0; 1063 } 1064 (*batch)->tables[(*batch)->nr++] = table; 1065 if ((*batch)->nr == MAX_TABLE_BATCH) 1066 tlb_flush_mmu(tlb); 1067 } 1068 1069 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1070 static inline void thp_split_vma(struct vm_area_struct *vma) 1071 { 1072 unsigned long addr; 1073 1074 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) 1075 follow_page(vma, addr, FOLL_SPLIT); 1076 } 1077 1078 static inline void thp_split_mm(struct mm_struct *mm) 1079 { 1080 struct vm_area_struct *vma; 1081 1082 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 1083 thp_split_vma(vma); 1084 vma->vm_flags &= ~VM_HUGEPAGE; 1085 vma->vm_flags |= VM_NOHUGEPAGE; 1086 } 1087 mm->def_flags |= VM_NOHUGEPAGE; 1088 } 1089 #else 1090 static inline void thp_split_mm(struct mm_struct *mm) 1091 { 1092 } 1093 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1094 1095 static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, 1096 struct mm_struct *mm, pud_t *pud, 1097 unsigned long addr, unsigned long end) 1098 { 1099 unsigned long next, *table, *new; 1100 struct page *page; 1101 pmd_t *pmd; 1102 1103 pmd = pmd_offset(pud, addr); 1104 do { 1105 next = pmd_addr_end(addr, end); 1106 again: 1107 if (pmd_none_or_clear_bad(pmd)) 1108 continue; 1109 table = (unsigned long *) pmd_deref(*pmd); 1110 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1111 if (page_table_with_pgste(page)) 1112 continue; 1113 /* Allocate new page table with pgstes */ 1114 new = page_table_alloc_pgste(mm, addr); 1115 if (!new) 1116 return -ENOMEM; 1117 1118 spin_lock(&mm->page_table_lock); 1119 if (likely((unsigned long *) pmd_deref(*pmd) == table)) { 1120 /* Nuke pmd entry pointing to the "short" page table */ 1121 pmdp_flush_lazy(mm, addr, pmd); 1122 pmd_clear(pmd); 1123 /* Copy ptes from old table to new table */ 1124 memcpy(new, table, PAGE_SIZE/2); 1125 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 1126 /* Establish new table */ 1127 pmd_populate(mm, pmd, (pte_t *) new); 1128 /* Free old table with rcu, there might be a walker! */ 1129 page_table_free_rcu(tlb, table); 1130 new = NULL; 1131 } 1132 spin_unlock(&mm->page_table_lock); 1133 if (new) { 1134 page_table_free_pgste(new); 1135 goto again; 1136 } 1137 } while (pmd++, addr = next, addr != end); 1138 1139 return addr; 1140 } 1141 1142 static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, 1143 struct mm_struct *mm, pgd_t *pgd, 1144 unsigned long addr, unsigned long end) 1145 { 1146 unsigned long next; 1147 pud_t *pud; 1148 1149 pud = pud_offset(pgd, addr); 1150 do { 1151 next = pud_addr_end(addr, end); 1152 if (pud_none_or_clear_bad(pud)) 1153 continue; 1154 next = page_table_realloc_pmd(tlb, mm, pud, addr, next); 1155 if (unlikely(IS_ERR_VALUE(next))) 1156 return next; 1157 } while (pud++, addr = next, addr != end); 1158 1159 return addr; 1160 } 1161 1162 static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, 1163 unsigned long addr, unsigned long end) 1164 { 1165 unsigned long next; 1166 pgd_t *pgd; 1167 1168 pgd = pgd_offset(mm, addr); 1169 do { 1170 next = pgd_addr_end(addr, end); 1171 if (pgd_none_or_clear_bad(pgd)) 1172 continue; 1173 next = page_table_realloc_pud(tlb, mm, pgd, addr, next); 1174 if (unlikely(IS_ERR_VALUE(next))) 1175 return next; 1176 } while (pgd++, addr = next, addr != end); 1177 1178 return 0; 1179 } 1180 1181 /* 1182 * switch on pgstes for its userspace process (for kvm) 1183 */ 1184 int s390_enable_sie(void) 1185 { 1186 struct task_struct *tsk = current; 1187 struct mm_struct *mm = tsk->mm; 1188 struct mmu_gather tlb; 1189 1190 /* Do we have pgstes? if yes, we are done */ 1191 if (mm_has_pgste(tsk->mm)) 1192 return 0; 1193 1194 down_write(&mm->mmap_sem); 1195 /* split thp mappings and disable thp for future mappings */ 1196 thp_split_mm(mm); 1197 /* Reallocate the page tables with pgstes */ 1198 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); 1199 if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) 1200 mm->context.has_pgste = 1; 1201 tlb_finish_mmu(&tlb, 0, TASK_SIZE); 1202 up_write(&mm->mmap_sem); 1203 return mm->context.has_pgste ? 0 : -ENOMEM; 1204 } 1205 EXPORT_SYMBOL_GPL(s390_enable_sie); 1206 1207 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1208 int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, 1209 pmd_t *pmdp) 1210 { 1211 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1212 /* No need to flush TLB 1213 * On s390 reference bits are in storage key and never in TLB */ 1214 return pmdp_test_and_clear_young(vma, address, pmdp); 1215 } 1216 1217 int pmdp_set_access_flags(struct vm_area_struct *vma, 1218 unsigned long address, pmd_t *pmdp, 1219 pmd_t entry, int dirty) 1220 { 1221 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1222 1223 if (pmd_same(*pmdp, entry)) 1224 return 0; 1225 pmdp_invalidate(vma, address, pmdp); 1226 set_pmd_at(vma->vm_mm, address, pmdp, entry); 1227 return 1; 1228 } 1229 1230 static void pmdp_splitting_flush_sync(void *arg) 1231 { 1232 /* Simply deliver the interrupt */ 1233 } 1234 1235 void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, 1236 pmd_t *pmdp) 1237 { 1238 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1239 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, 1240 (unsigned long *) pmdp)) { 1241 /* need to serialize against gup-fast (IRQ disabled) */ 1242 smp_call_function(pmdp_splitting_flush_sync, NULL, 1); 1243 } 1244 } 1245 1246 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 1247 pgtable_t pgtable) 1248 { 1249 struct list_head *lh = (struct list_head *) pgtable; 1250 1251 assert_spin_locked(&mm->page_table_lock); 1252 1253 /* FIFO */ 1254 if (!pmd_huge_pte(mm, pmdp)) 1255 INIT_LIST_HEAD(lh); 1256 else 1257 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 1258 pmd_huge_pte(mm, pmdp) = pgtable; 1259 } 1260 1261 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 1262 { 1263 struct list_head *lh; 1264 pgtable_t pgtable; 1265 pte_t *ptep; 1266 1267 assert_spin_locked(&mm->page_table_lock); 1268 1269 /* FIFO */ 1270 pgtable = pmd_huge_pte(mm, pmdp); 1271 lh = (struct list_head *) pgtable; 1272 if (list_empty(lh)) 1273 pmd_huge_pte(mm, pmdp) = NULL; 1274 else { 1275 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 1276 list_del(lh); 1277 } 1278 ptep = (pte_t *) pgtable; 1279 pte_val(*ptep) = _PAGE_INVALID; 1280 ptep++; 1281 pte_val(*ptep) = _PAGE_INVALID; 1282 return pgtable; 1283 } 1284 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1285