1 /* 2 * Copyright IBM Corp. 2007,2009 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/highmem.h> 14 #include <linux/pagemap.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/quicklist.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #include <asm/system.h> 22 #include <asm/pgtable.h> 23 #include <asm/pgalloc.h> 24 #include <asm/tlb.h> 25 #include <asm/tlbflush.h> 26 #include <asm/mmu_context.h> 27 28 #ifndef CONFIG_64BIT 29 #define ALLOC_ORDER 1 30 #define FRAG_MASK 0x0f 31 #else 32 #define ALLOC_ORDER 2 33 #define FRAG_MASK 0x03 34 #endif 35 36 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; 37 EXPORT_SYMBOL(VMALLOC_START); 38 39 static int __init parse_vmalloc(char *arg) 40 { 41 if (!arg) 42 return -EINVAL; 43 VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; 44 return 0; 45 } 46 early_param("vmalloc", parse_vmalloc); 47 48 unsigned long *crst_table_alloc(struct mm_struct *mm) 49 { 50 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 51 52 if (!page) 53 return NULL; 54 return (unsigned long *) page_to_phys(page); 55 } 56 57 void crst_table_free(struct mm_struct *mm, unsigned long *table) 58 { 59 free_pages((unsigned long) table, ALLOC_ORDER); 60 } 61 62 #ifdef CONFIG_64BIT 63 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 64 { 65 unsigned long *table, *pgd; 66 unsigned long entry; 67 68 BUG_ON(limit > (1UL << 53)); 69 repeat: 70 table = crst_table_alloc(mm); 71 if (!table) 72 return -ENOMEM; 73 spin_lock_bh(&mm->page_table_lock); 74 if (mm->context.asce_limit < limit) { 75 pgd = (unsigned long *) mm->pgd; 76 if (mm->context.asce_limit <= (1UL << 31)) { 77 entry = _REGION3_ENTRY_EMPTY; 78 mm->context.asce_limit = 1UL << 42; 79 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 80 _ASCE_USER_BITS | 81 _ASCE_TYPE_REGION3; 82 } else { 83 entry = _REGION2_ENTRY_EMPTY; 84 mm->context.asce_limit = 1UL << 53; 85 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 86 _ASCE_USER_BITS | 87 _ASCE_TYPE_REGION2; 88 } 89 crst_table_init(table, entry); 90 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 91 mm->pgd = (pgd_t *) table; 92 mm->task_size = mm->context.asce_limit; 93 table = NULL; 94 } 95 spin_unlock_bh(&mm->page_table_lock); 96 if (table) 97 crst_table_free(mm, table); 98 if (mm->context.asce_limit < limit) 99 goto repeat; 100 update_mm(mm, current); 101 return 0; 102 } 103 104 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 105 { 106 pgd_t *pgd; 107 108 if (mm->context.asce_limit <= limit) 109 return; 110 __tlb_flush_mm(mm); 111 while (mm->context.asce_limit > limit) { 112 pgd = mm->pgd; 113 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 114 case _REGION_ENTRY_TYPE_R2: 115 mm->context.asce_limit = 1UL << 42; 116 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 117 _ASCE_USER_BITS | 118 _ASCE_TYPE_REGION3; 119 break; 120 case _REGION_ENTRY_TYPE_R3: 121 mm->context.asce_limit = 1UL << 31; 122 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 123 _ASCE_USER_BITS | 124 _ASCE_TYPE_SEGMENT; 125 break; 126 default: 127 BUG(); 128 } 129 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 130 mm->task_size = mm->context.asce_limit; 131 crst_table_free(mm, (unsigned long *) pgd); 132 } 133 update_mm(mm, current); 134 } 135 #endif 136 137 #ifdef CONFIG_PGSTE 138 139 /** 140 * gmap_alloc - allocate a guest address space 141 * @mm: pointer to the parent mm_struct 142 * 143 * Returns a guest address space structure. 144 */ 145 struct gmap *gmap_alloc(struct mm_struct *mm) 146 { 147 struct gmap *gmap; 148 struct page *page; 149 unsigned long *table; 150 151 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 152 if (!gmap) 153 goto out; 154 INIT_LIST_HEAD(&gmap->crst_list); 155 gmap->mm = mm; 156 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 157 if (!page) 158 goto out_free; 159 list_add(&page->lru, &gmap->crst_list); 160 table = (unsigned long *) page_to_phys(page); 161 crst_table_init(table, _REGION1_ENTRY_EMPTY); 162 gmap->table = table; 163 list_add(&gmap->list, &mm->context.gmap_list); 164 return gmap; 165 166 out_free: 167 kfree(gmap); 168 out: 169 return NULL; 170 } 171 EXPORT_SYMBOL_GPL(gmap_alloc); 172 173 static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) 174 { 175 struct gmap_pgtable *mp; 176 struct gmap_rmap *rmap; 177 struct page *page; 178 179 if (*table & _SEGMENT_ENTRY_INV) 180 return 0; 181 page = pfn_to_page(*table >> PAGE_SHIFT); 182 mp = (struct gmap_pgtable *) page->index; 183 list_for_each_entry(rmap, &mp->mapper, list) { 184 if (rmap->entry != table) 185 continue; 186 list_del(&rmap->list); 187 kfree(rmap); 188 break; 189 } 190 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 191 return 1; 192 } 193 194 static void gmap_flush_tlb(struct gmap *gmap) 195 { 196 if (MACHINE_HAS_IDTE) 197 __tlb_flush_idte((unsigned long) gmap->table | 198 _ASCE_TYPE_REGION1); 199 else 200 __tlb_flush_global(); 201 } 202 203 /** 204 * gmap_free - free a guest address space 205 * @gmap: pointer to the guest address space structure 206 */ 207 void gmap_free(struct gmap *gmap) 208 { 209 struct page *page, *next; 210 unsigned long *table; 211 int i; 212 213 214 /* Flush tlb. */ 215 if (MACHINE_HAS_IDTE) 216 __tlb_flush_idte((unsigned long) gmap->table | 217 _ASCE_TYPE_REGION1); 218 else 219 __tlb_flush_global(); 220 221 /* Free all segment & region tables. */ 222 down_read(&gmap->mm->mmap_sem); 223 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 224 table = (unsigned long *) page_to_phys(page); 225 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 226 /* Remove gmap rmap structures for segment table. */ 227 for (i = 0; i < PTRS_PER_PMD; i++, table++) 228 gmap_unlink_segment(gmap, table); 229 __free_pages(page, ALLOC_ORDER); 230 } 231 up_read(&gmap->mm->mmap_sem); 232 list_del(&gmap->list); 233 kfree(gmap); 234 } 235 EXPORT_SYMBOL_GPL(gmap_free); 236 237 /** 238 * gmap_enable - switch primary space to the guest address space 239 * @gmap: pointer to the guest address space structure 240 */ 241 void gmap_enable(struct gmap *gmap) 242 { 243 /* Load primary space page table origin. */ 244 S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 245 _ASCE_USER_BITS | __pa(gmap->table); 246 asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); 247 S390_lowcore.gmap = (unsigned long) gmap; 248 } 249 EXPORT_SYMBOL_GPL(gmap_enable); 250 251 /** 252 * gmap_disable - switch back to the standard primary address space 253 * @gmap: pointer to the guest address space structure 254 */ 255 void gmap_disable(struct gmap *gmap) 256 { 257 /* Load primary space page table origin. */ 258 S390_lowcore.user_asce = 259 gmap->mm->context.asce_bits | __pa(gmap->mm->pgd); 260 asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); 261 S390_lowcore.gmap = 0UL; 262 } 263 EXPORT_SYMBOL_GPL(gmap_disable); 264 265 static int gmap_alloc_table(struct gmap *gmap, 266 unsigned long *table, unsigned long init) 267 { 268 struct page *page; 269 unsigned long *new; 270 271 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 272 if (!page) 273 return -ENOMEM; 274 new = (unsigned long *) page_to_phys(page); 275 crst_table_init(new, init); 276 down_read(&gmap->mm->mmap_sem); 277 if (*table & _REGION_ENTRY_INV) { 278 list_add(&page->lru, &gmap->crst_list); 279 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 280 (*table & _REGION_ENTRY_TYPE_MASK); 281 } else 282 __free_pages(page, ALLOC_ORDER); 283 up_read(&gmap->mm->mmap_sem); 284 return 0; 285 } 286 287 /** 288 * gmap_unmap_segment - unmap segment from the guest address space 289 * @gmap: pointer to the guest address space structure 290 * @addr: address in the guest address space 291 * @len: length of the memory area to unmap 292 * 293 * Returns 0 if the unmap succeded, -EINVAL if not. 294 */ 295 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 296 { 297 unsigned long *table; 298 unsigned long off; 299 int flush; 300 301 if ((to | len) & (PMD_SIZE - 1)) 302 return -EINVAL; 303 if (len == 0 || to + len < to) 304 return -EINVAL; 305 306 flush = 0; 307 down_read(&gmap->mm->mmap_sem); 308 for (off = 0; off < len; off += PMD_SIZE) { 309 /* Walk the guest addr space page table */ 310 table = gmap->table + (((to + off) >> 53) & 0x7ff); 311 if (*table & _REGION_ENTRY_INV) 312 return 0; 313 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 314 table = table + (((to + off) >> 42) & 0x7ff); 315 if (*table & _REGION_ENTRY_INV) 316 return 0; 317 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 318 table = table + (((to + off) >> 31) & 0x7ff); 319 if (*table & _REGION_ENTRY_INV) 320 return 0; 321 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 322 table = table + (((to + off) >> 20) & 0x7ff); 323 324 /* Clear segment table entry in guest address space. */ 325 flush |= gmap_unlink_segment(gmap, table); 326 *table = _SEGMENT_ENTRY_INV; 327 } 328 up_read(&gmap->mm->mmap_sem); 329 if (flush) 330 gmap_flush_tlb(gmap); 331 return 0; 332 } 333 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 334 335 /** 336 * gmap_mmap_segment - map a segment to the guest address space 337 * @gmap: pointer to the guest address space structure 338 * @from: source address in the parent address space 339 * @to: target address in the guest address space 340 * 341 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. 342 */ 343 int gmap_map_segment(struct gmap *gmap, unsigned long from, 344 unsigned long to, unsigned long len) 345 { 346 unsigned long *table; 347 unsigned long off; 348 int flush; 349 350 if ((from | to | len) & (PMD_SIZE - 1)) 351 return -EINVAL; 352 if (len == 0 || from + len > PGDIR_SIZE || 353 from + len < from || to + len < to) 354 return -EINVAL; 355 356 flush = 0; 357 down_read(&gmap->mm->mmap_sem); 358 for (off = 0; off < len; off += PMD_SIZE) { 359 /* Walk the gmap address space page table */ 360 table = gmap->table + (((to + off) >> 53) & 0x7ff); 361 if ((*table & _REGION_ENTRY_INV) && 362 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 363 goto out_unmap; 364 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 365 table = table + (((to + off) >> 42) & 0x7ff); 366 if ((*table & _REGION_ENTRY_INV) && 367 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 368 goto out_unmap; 369 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 370 table = table + (((to + off) >> 31) & 0x7ff); 371 if ((*table & _REGION_ENTRY_INV) && 372 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 373 goto out_unmap; 374 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 375 table = table + (((to + off) >> 20) & 0x7ff); 376 377 /* Store 'from' address in an invalid segment table entry. */ 378 flush |= gmap_unlink_segment(gmap, table); 379 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 380 } 381 up_read(&gmap->mm->mmap_sem); 382 if (flush) 383 gmap_flush_tlb(gmap); 384 return 0; 385 386 out_unmap: 387 up_read(&gmap->mm->mmap_sem); 388 gmap_unmap_segment(gmap, to, len); 389 return -ENOMEM; 390 } 391 EXPORT_SYMBOL_GPL(gmap_map_segment); 392 393 unsigned long gmap_fault(unsigned long address, struct gmap *gmap) 394 { 395 unsigned long *table, vmaddr, segment; 396 struct mm_struct *mm; 397 struct gmap_pgtable *mp; 398 struct gmap_rmap *rmap; 399 struct vm_area_struct *vma; 400 struct page *page; 401 pgd_t *pgd; 402 pud_t *pud; 403 pmd_t *pmd; 404 405 current->thread.gmap_addr = address; 406 mm = gmap->mm; 407 /* Walk the gmap address space page table */ 408 table = gmap->table + ((address >> 53) & 0x7ff); 409 if (unlikely(*table & _REGION_ENTRY_INV)) 410 return -EFAULT; 411 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 412 table = table + ((address >> 42) & 0x7ff); 413 if (unlikely(*table & _REGION_ENTRY_INV)) 414 return -EFAULT; 415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 416 table = table + ((address >> 31) & 0x7ff); 417 if (unlikely(*table & _REGION_ENTRY_INV)) 418 return -EFAULT; 419 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 420 table = table + ((address >> 20) & 0x7ff); 421 422 /* Convert the gmap address to an mm address. */ 423 segment = *table; 424 if (likely(!(segment & _SEGMENT_ENTRY_INV))) { 425 page = pfn_to_page(segment >> PAGE_SHIFT); 426 mp = (struct gmap_pgtable *) page->index; 427 return mp->vmaddr | (address & ~PMD_MASK); 428 } else if (segment & _SEGMENT_ENTRY_RO) { 429 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 430 vma = find_vma(mm, vmaddr); 431 if (!vma || vma->vm_start > vmaddr) 432 return -EFAULT; 433 434 /* Walk the parent mm page table */ 435 pgd = pgd_offset(mm, vmaddr); 436 pud = pud_alloc(mm, pgd, vmaddr); 437 if (!pud) 438 return -ENOMEM; 439 pmd = pmd_alloc(mm, pud, vmaddr); 440 if (!pmd) 441 return -ENOMEM; 442 if (!pmd_present(*pmd) && 443 __pte_alloc(mm, vma, pmd, vmaddr)) 444 return -ENOMEM; 445 /* pmd now points to a valid segment table entry. */ 446 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 447 if (!rmap) 448 return -ENOMEM; 449 /* Link gmap segment table entry location to page table. */ 450 page = pmd_page(*pmd); 451 mp = (struct gmap_pgtable *) page->index; 452 rmap->entry = table; 453 list_add(&rmap->list, &mp->mapper); 454 /* Set gmap segment table entry to page table. */ 455 *table = pmd_val(*pmd) & PAGE_MASK; 456 return vmaddr | (address & ~PMD_MASK); 457 } 458 return -EFAULT; 459 460 } 461 EXPORT_SYMBOL_GPL(gmap_fault); 462 463 void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) 464 { 465 struct gmap_rmap *rmap, *next; 466 struct gmap_pgtable *mp; 467 struct page *page; 468 int flush; 469 470 flush = 0; 471 spin_lock(&mm->page_table_lock); 472 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 473 mp = (struct gmap_pgtable *) page->index; 474 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 475 *rmap->entry = 476 _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 477 list_del(&rmap->list); 478 kfree(rmap); 479 flush = 1; 480 } 481 spin_unlock(&mm->page_table_lock); 482 if (flush) 483 __tlb_flush_global(); 484 } 485 486 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 487 unsigned long vmaddr) 488 { 489 struct page *page; 490 unsigned long *table; 491 struct gmap_pgtable *mp; 492 493 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 494 if (!page) 495 return NULL; 496 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); 497 if (!mp) { 498 __free_page(page); 499 return NULL; 500 } 501 pgtable_page_ctor(page); 502 mp->vmaddr = vmaddr & PMD_MASK; 503 INIT_LIST_HEAD(&mp->mapper); 504 page->index = (unsigned long) mp; 505 atomic_set(&page->_mapcount, 3); 506 table = (unsigned long *) page_to_phys(page); 507 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 508 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 509 return table; 510 } 511 512 static inline void page_table_free_pgste(unsigned long *table) 513 { 514 struct page *page; 515 struct gmap_pgtable *mp; 516 517 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 518 mp = (struct gmap_pgtable *) page->index; 519 BUG_ON(!list_empty(&mp->mapper)); 520 pgtable_page_ctor(page); 521 atomic_set(&page->_mapcount, -1); 522 kfree(mp); 523 __free_page(page); 524 } 525 526 #else /* CONFIG_PGSTE */ 527 528 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 529 unsigned long vmaddr) 530 { 531 return NULL; 532 } 533 534 static inline void page_table_free_pgste(unsigned long *table) 535 { 536 } 537 538 static inline void gmap_unmap_notifier(struct mm_struct *mm, 539 unsigned long *table) 540 { 541 } 542 543 #endif /* CONFIG_PGSTE */ 544 545 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 546 { 547 unsigned int old, new; 548 549 do { 550 old = atomic_read(v); 551 new = old ^ bits; 552 } while (atomic_cmpxchg(v, old, new) != old); 553 return new; 554 } 555 556 /* 557 * page table entry allocation/free routines. 558 */ 559 unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 560 { 561 struct page *page; 562 unsigned long *table; 563 unsigned int mask, bit; 564 565 if (mm_has_pgste(mm)) 566 return page_table_alloc_pgste(mm, vmaddr); 567 /* Allocate fragments of a 4K page as 1K/2K page table */ 568 spin_lock_bh(&mm->context.list_lock); 569 mask = FRAG_MASK; 570 if (!list_empty(&mm->context.pgtable_list)) { 571 page = list_first_entry(&mm->context.pgtable_list, 572 struct page, lru); 573 table = (unsigned long *) page_to_phys(page); 574 mask = atomic_read(&page->_mapcount); 575 mask = mask | (mask >> 4); 576 } 577 if ((mask & FRAG_MASK) == FRAG_MASK) { 578 spin_unlock_bh(&mm->context.list_lock); 579 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 580 if (!page) 581 return NULL; 582 pgtable_page_ctor(page); 583 atomic_set(&page->_mapcount, 1); 584 table = (unsigned long *) page_to_phys(page); 585 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 586 spin_lock_bh(&mm->context.list_lock); 587 list_add(&page->lru, &mm->context.pgtable_list); 588 } else { 589 for (bit = 1; mask & bit; bit <<= 1) 590 table += PTRS_PER_PTE; 591 mask = atomic_xor_bits(&page->_mapcount, bit); 592 if ((mask & FRAG_MASK) == FRAG_MASK) 593 list_del(&page->lru); 594 } 595 spin_unlock_bh(&mm->context.list_lock); 596 return table; 597 } 598 599 void page_table_free(struct mm_struct *mm, unsigned long *table) 600 { 601 struct page *page; 602 unsigned int bit, mask; 603 604 if (mm_has_pgste(mm)) { 605 gmap_unmap_notifier(mm, table); 606 return page_table_free_pgste(table); 607 } 608 /* Free 1K/2K page table fragment of a 4K page */ 609 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 610 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 611 spin_lock_bh(&mm->context.list_lock); 612 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 613 list_del(&page->lru); 614 mask = atomic_xor_bits(&page->_mapcount, bit); 615 if (mask & FRAG_MASK) 616 list_add(&page->lru, &mm->context.pgtable_list); 617 spin_unlock_bh(&mm->context.list_lock); 618 if (mask == 0) { 619 pgtable_page_dtor(page); 620 atomic_set(&page->_mapcount, -1); 621 __free_page(page); 622 } 623 } 624 625 #ifdef CONFIG_HAVE_RCU_TABLE_FREE 626 627 static void __page_table_free_rcu(void *table, unsigned bit) 628 { 629 struct page *page; 630 631 if (bit == FRAG_MASK) 632 return page_table_free_pgste(table); 633 /* Free 1K/2K page table fragment of a 4K page */ 634 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 635 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 636 pgtable_page_dtor(page); 637 atomic_set(&page->_mapcount, -1); 638 __free_page(page); 639 } 640 } 641 642 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 643 { 644 struct mm_struct *mm; 645 struct page *page; 646 unsigned int bit, mask; 647 648 mm = tlb->mm; 649 if (mm_has_pgste(mm)) { 650 gmap_unmap_notifier(mm, table); 651 table = (unsigned long *) (__pa(table) | FRAG_MASK); 652 tlb_remove_table(tlb, table); 653 return; 654 } 655 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 656 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 657 spin_lock_bh(&mm->context.list_lock); 658 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 659 list_del(&page->lru); 660 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 661 if (mask & FRAG_MASK) 662 list_add_tail(&page->lru, &mm->context.pgtable_list); 663 spin_unlock_bh(&mm->context.list_lock); 664 table = (unsigned long *) (__pa(table) | (bit << 4)); 665 tlb_remove_table(tlb, table); 666 } 667 668 void __tlb_remove_table(void *_table) 669 { 670 void *table = (void *)((unsigned long) _table & PAGE_MASK); 671 unsigned type = (unsigned long) _table & ~PAGE_MASK; 672 673 if (type) 674 __page_table_free_rcu(table, type); 675 else 676 free_pages((unsigned long) table, ALLOC_ORDER); 677 } 678 679 #endif 680 681 /* 682 * switch on pgstes for its userspace process (for kvm) 683 */ 684 int s390_enable_sie(void) 685 { 686 struct task_struct *tsk = current; 687 struct mm_struct *mm, *old_mm; 688 689 /* Do we have switched amode? If no, we cannot do sie */ 690 if (user_mode == HOME_SPACE_MODE) 691 return -EINVAL; 692 693 /* Do we have pgstes? if yes, we are done */ 694 if (mm_has_pgste(tsk->mm)) 695 return 0; 696 697 /* lets check if we are allowed to replace the mm */ 698 task_lock(tsk); 699 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 700 #ifdef CONFIG_AIO 701 !hlist_empty(&tsk->mm->ioctx_list) || 702 #endif 703 tsk->mm != tsk->active_mm) { 704 task_unlock(tsk); 705 return -EINVAL; 706 } 707 task_unlock(tsk); 708 709 /* we copy the mm and let dup_mm create the page tables with_pgstes */ 710 tsk->mm->context.alloc_pgste = 1; 711 mm = dup_mm(tsk); 712 tsk->mm->context.alloc_pgste = 0; 713 if (!mm) 714 return -ENOMEM; 715 716 /* Now lets check again if something happened */ 717 task_lock(tsk); 718 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 719 #ifdef CONFIG_AIO 720 !hlist_empty(&tsk->mm->ioctx_list) || 721 #endif 722 tsk->mm != tsk->active_mm) { 723 mmput(mm); 724 task_unlock(tsk); 725 return -EINVAL; 726 } 727 728 /* ok, we are alone. No ptrace, no threads, etc. */ 729 old_mm = tsk->mm; 730 tsk->mm = tsk->active_mm = mm; 731 preempt_disable(); 732 update_mm(mm, tsk); 733 atomic_inc(&mm->context.attach_count); 734 atomic_dec(&old_mm->context.attach_count); 735 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 736 preempt_enable(); 737 task_unlock(tsk); 738 mmput(old_mm); 739 return 0; 740 } 741 EXPORT_SYMBOL_GPL(s390_enable_sie); 742 743 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) 744 bool kernel_page_present(struct page *page) 745 { 746 unsigned long addr; 747 int cc; 748 749 addr = page_to_phys(page); 750 asm volatile( 751 " lra %1,0(%1)\n" 752 " ipm %0\n" 753 " srl %0,28" 754 : "=d" (cc), "+a" (addr) : : "cc"); 755 return cc == 0; 756 } 757 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ 758