1 /* 2 * Copyright IBM Corp. 2007,2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/highmem.h> 14 #include <linux/pagemap.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/quicklist.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #include <asm/system.h> 22 #include <asm/pgtable.h> 23 #include <asm/pgalloc.h> 24 #include <asm/tlb.h> 25 #include <asm/tlbflush.h> 26 #include <asm/mmu_context.h> 27 28 #ifndef CONFIG_64BIT 29 #define ALLOC_ORDER 1 30 #define FRAG_MASK 0x0f 31 #else 32 #define ALLOC_ORDER 2 33 #define FRAG_MASK 0x03 34 #endif 35 36 37 unsigned long *crst_table_alloc(struct mm_struct *mm) 38 { 39 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 40 41 if (!page) 42 return NULL; 43 return (unsigned long *) page_to_phys(page); 44 } 45 46 void crst_table_free(struct mm_struct *mm, unsigned long *table) 47 { 48 free_pages((unsigned long) table, ALLOC_ORDER); 49 } 50 51 #ifdef CONFIG_64BIT 52 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 53 { 54 unsigned long *table, *pgd; 55 unsigned long entry; 56 57 BUG_ON(limit > (1UL << 53)); 58 repeat: 59 table = crst_table_alloc(mm); 60 if (!table) 61 return -ENOMEM; 62 spin_lock_bh(&mm->page_table_lock); 63 if (mm->context.asce_limit < limit) { 64 pgd = (unsigned long *) mm->pgd; 65 if (mm->context.asce_limit <= (1UL << 31)) { 66 entry = _REGION3_ENTRY_EMPTY; 67 mm->context.asce_limit = 1UL << 42; 68 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 69 _ASCE_USER_BITS | 70 _ASCE_TYPE_REGION3; 71 } else { 72 entry = _REGION2_ENTRY_EMPTY; 73 mm->context.asce_limit = 1UL << 53; 74 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 75 _ASCE_USER_BITS | 76 _ASCE_TYPE_REGION2; 77 } 78 crst_table_init(table, entry); 79 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 80 mm->pgd = (pgd_t *) table; 81 mm->task_size = mm->context.asce_limit; 82 table = NULL; 83 } 84 spin_unlock_bh(&mm->page_table_lock); 85 if (table) 86 crst_table_free(mm, table); 87 if (mm->context.asce_limit < limit) 88 goto repeat; 89 update_mm(mm, current); 90 return 0; 91 } 92 93 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 94 { 95 pgd_t *pgd; 96 97 if (mm->context.asce_limit <= limit) 98 return; 99 __tlb_flush_mm(mm); 100 while (mm->context.asce_limit > limit) { 101 pgd = mm->pgd; 102 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 103 case _REGION_ENTRY_TYPE_R2: 104 mm->context.asce_limit = 1UL << 42; 105 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 106 _ASCE_USER_BITS | 107 _ASCE_TYPE_REGION3; 108 break; 109 case _REGION_ENTRY_TYPE_R3: 110 mm->context.asce_limit = 1UL << 31; 111 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 112 _ASCE_USER_BITS | 113 _ASCE_TYPE_SEGMENT; 114 break; 115 default: 116 BUG(); 117 } 118 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 119 mm->task_size = mm->context.asce_limit; 120 crst_table_free(mm, (unsigned long *) pgd); 121 } 122 update_mm(mm, current); 123 } 124 #endif 125 126 #ifdef CONFIG_PGSTE 127 128 /** 129 * gmap_alloc - allocate a guest address space 130 * @mm: pointer to the parent mm_struct 131 * 132 * Returns a guest address space structure. 133 */ 134 struct gmap *gmap_alloc(struct mm_struct *mm) 135 { 136 struct gmap *gmap; 137 struct page *page; 138 unsigned long *table; 139 140 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 141 if (!gmap) 142 goto out; 143 INIT_LIST_HEAD(&gmap->crst_list); 144 gmap->mm = mm; 145 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 146 if (!page) 147 goto out_free; 148 list_add(&page->lru, &gmap->crst_list); 149 table = (unsigned long *) page_to_phys(page); 150 crst_table_init(table, _REGION1_ENTRY_EMPTY); 151 gmap->table = table; 152 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 153 _ASCE_USER_BITS | __pa(table); 154 list_add(&gmap->list, &mm->context.gmap_list); 155 return gmap; 156 157 out_free: 158 kfree(gmap); 159 out: 160 return NULL; 161 } 162 EXPORT_SYMBOL_GPL(gmap_alloc); 163 164 static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) 165 { 166 struct gmap_pgtable *mp; 167 struct gmap_rmap *rmap; 168 struct page *page; 169 170 if (*table & _SEGMENT_ENTRY_INV) 171 return 0; 172 page = pfn_to_page(*table >> PAGE_SHIFT); 173 mp = (struct gmap_pgtable *) page->index; 174 list_for_each_entry(rmap, &mp->mapper, list) { 175 if (rmap->entry != table) 176 continue; 177 list_del(&rmap->list); 178 kfree(rmap); 179 break; 180 } 181 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 182 return 1; 183 } 184 185 static void gmap_flush_tlb(struct gmap *gmap) 186 { 187 if (MACHINE_HAS_IDTE) 188 __tlb_flush_idte((unsigned long) gmap->table | 189 _ASCE_TYPE_REGION1); 190 else 191 __tlb_flush_global(); 192 } 193 194 /** 195 * gmap_free - free a guest address space 196 * @gmap: pointer to the guest address space structure 197 */ 198 void gmap_free(struct gmap *gmap) 199 { 200 struct page *page, *next; 201 unsigned long *table; 202 int i; 203 204 205 /* Flush tlb. */ 206 if (MACHINE_HAS_IDTE) 207 __tlb_flush_idte((unsigned long) gmap->table | 208 _ASCE_TYPE_REGION1); 209 else 210 __tlb_flush_global(); 211 212 /* Free all segment & region tables. */ 213 down_read(&gmap->mm->mmap_sem); 214 spin_lock(&gmap->mm->page_table_lock); 215 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 216 table = (unsigned long *) page_to_phys(page); 217 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 218 /* Remove gmap rmap structures for segment table. */ 219 for (i = 0; i < PTRS_PER_PMD; i++, table++) 220 gmap_unlink_segment(gmap, table); 221 __free_pages(page, ALLOC_ORDER); 222 } 223 spin_unlock(&gmap->mm->page_table_lock); 224 up_read(&gmap->mm->mmap_sem); 225 list_del(&gmap->list); 226 kfree(gmap); 227 } 228 EXPORT_SYMBOL_GPL(gmap_free); 229 230 /** 231 * gmap_enable - switch primary space to the guest address space 232 * @gmap: pointer to the guest address space structure 233 */ 234 void gmap_enable(struct gmap *gmap) 235 { 236 S390_lowcore.gmap = (unsigned long) gmap; 237 } 238 EXPORT_SYMBOL_GPL(gmap_enable); 239 240 /** 241 * gmap_disable - switch back to the standard primary address space 242 * @gmap: pointer to the guest address space structure 243 */ 244 void gmap_disable(struct gmap *gmap) 245 { 246 S390_lowcore.gmap = 0UL; 247 } 248 EXPORT_SYMBOL_GPL(gmap_disable); 249 250 /* 251 * gmap_alloc_table is assumed to be called with mmap_sem held 252 */ 253 static int gmap_alloc_table(struct gmap *gmap, 254 unsigned long *table, unsigned long init) 255 { 256 struct page *page; 257 unsigned long *new; 258 259 /* since we dont free the gmap table until gmap_free we can unlock */ 260 spin_unlock(&gmap->mm->page_table_lock); 261 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 262 spin_lock(&gmap->mm->page_table_lock); 263 if (!page) 264 return -ENOMEM; 265 new = (unsigned long *) page_to_phys(page); 266 crst_table_init(new, init); 267 if (*table & _REGION_ENTRY_INV) { 268 list_add(&page->lru, &gmap->crst_list); 269 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 270 (*table & _REGION_ENTRY_TYPE_MASK); 271 } else 272 __free_pages(page, ALLOC_ORDER); 273 return 0; 274 } 275 276 /** 277 * gmap_unmap_segment - unmap segment from the guest address space 278 * @gmap: pointer to the guest address space structure 279 * @addr: address in the guest address space 280 * @len: length of the memory area to unmap 281 * 282 * Returns 0 if the unmap succeded, -EINVAL if not. 283 */ 284 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 285 { 286 unsigned long *table; 287 unsigned long off; 288 int flush; 289 290 if ((to | len) & (PMD_SIZE - 1)) 291 return -EINVAL; 292 if (len == 0 || to + len < to) 293 return -EINVAL; 294 295 flush = 0; 296 down_read(&gmap->mm->mmap_sem); 297 spin_lock(&gmap->mm->page_table_lock); 298 for (off = 0; off < len; off += PMD_SIZE) { 299 /* Walk the guest addr space page table */ 300 table = gmap->table + (((to + off) >> 53) & 0x7ff); 301 if (*table & _REGION_ENTRY_INV) 302 goto out; 303 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 304 table = table + (((to + off) >> 42) & 0x7ff); 305 if (*table & _REGION_ENTRY_INV) 306 goto out; 307 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 308 table = table + (((to + off) >> 31) & 0x7ff); 309 if (*table & _REGION_ENTRY_INV) 310 goto out; 311 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 312 table = table + (((to + off) >> 20) & 0x7ff); 313 314 /* Clear segment table entry in guest address space. */ 315 flush |= gmap_unlink_segment(gmap, table); 316 *table = _SEGMENT_ENTRY_INV; 317 } 318 out: 319 spin_unlock(&gmap->mm->page_table_lock); 320 up_read(&gmap->mm->mmap_sem); 321 if (flush) 322 gmap_flush_tlb(gmap); 323 return 0; 324 } 325 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 326 327 /** 328 * gmap_mmap_segment - map a segment to the guest address space 329 * @gmap: pointer to the guest address space structure 330 * @from: source address in the parent address space 331 * @to: target address in the guest address space 332 * 333 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. 334 */ 335 int gmap_map_segment(struct gmap *gmap, unsigned long from, 336 unsigned long to, unsigned long len) 337 { 338 unsigned long *table; 339 unsigned long off; 340 int flush; 341 342 if ((from | to | len) & (PMD_SIZE - 1)) 343 return -EINVAL; 344 if (len == 0 || from + len > PGDIR_SIZE || 345 from + len < from || to + len < to) 346 return -EINVAL; 347 348 flush = 0; 349 down_read(&gmap->mm->mmap_sem); 350 spin_lock(&gmap->mm->page_table_lock); 351 for (off = 0; off < len; off += PMD_SIZE) { 352 /* Walk the gmap address space page table */ 353 table = gmap->table + (((to + off) >> 53) & 0x7ff); 354 if ((*table & _REGION_ENTRY_INV) && 355 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 356 goto out_unmap; 357 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 358 table = table + (((to + off) >> 42) & 0x7ff); 359 if ((*table & _REGION_ENTRY_INV) && 360 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 361 goto out_unmap; 362 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 363 table = table + (((to + off) >> 31) & 0x7ff); 364 if ((*table & _REGION_ENTRY_INV) && 365 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 366 goto out_unmap; 367 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 368 table = table + (((to + off) >> 20) & 0x7ff); 369 370 /* Store 'from' address in an invalid segment table entry. */ 371 flush |= gmap_unlink_segment(gmap, table); 372 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 373 } 374 spin_unlock(&gmap->mm->page_table_lock); 375 up_read(&gmap->mm->mmap_sem); 376 if (flush) 377 gmap_flush_tlb(gmap); 378 return 0; 379 380 out_unmap: 381 spin_unlock(&gmap->mm->page_table_lock); 382 up_read(&gmap->mm->mmap_sem); 383 gmap_unmap_segment(gmap, to, len); 384 return -ENOMEM; 385 } 386 EXPORT_SYMBOL_GPL(gmap_map_segment); 387 388 /* 389 * this function is assumed to be called with mmap_sem held 390 */ 391 unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) 392 { 393 unsigned long *table, vmaddr, segment; 394 struct mm_struct *mm; 395 struct gmap_pgtable *mp; 396 struct gmap_rmap *rmap; 397 struct vm_area_struct *vma; 398 struct page *page; 399 pgd_t *pgd; 400 pud_t *pud; 401 pmd_t *pmd; 402 403 current->thread.gmap_addr = address; 404 mm = gmap->mm; 405 /* Walk the gmap address space page table */ 406 table = gmap->table + ((address >> 53) & 0x7ff); 407 if (unlikely(*table & _REGION_ENTRY_INV)) 408 return -EFAULT; 409 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 410 table = table + ((address >> 42) & 0x7ff); 411 if (unlikely(*table & _REGION_ENTRY_INV)) 412 return -EFAULT; 413 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 414 table = table + ((address >> 31) & 0x7ff); 415 if (unlikely(*table & _REGION_ENTRY_INV)) 416 return -EFAULT; 417 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 418 table = table + ((address >> 20) & 0x7ff); 419 420 /* Convert the gmap address to an mm address. */ 421 segment = *table; 422 if (likely(!(segment & _SEGMENT_ENTRY_INV))) { 423 page = pfn_to_page(segment >> PAGE_SHIFT); 424 mp = (struct gmap_pgtable *) page->index; 425 return mp->vmaddr | (address & ~PMD_MASK); 426 } else if (segment & _SEGMENT_ENTRY_RO) { 427 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 428 vma = find_vma(mm, vmaddr); 429 if (!vma || vma->vm_start > vmaddr) 430 return -EFAULT; 431 432 /* Walk the parent mm page table */ 433 pgd = pgd_offset(mm, vmaddr); 434 pud = pud_alloc(mm, pgd, vmaddr); 435 if (!pud) 436 return -ENOMEM; 437 pmd = pmd_alloc(mm, pud, vmaddr); 438 if (!pmd) 439 return -ENOMEM; 440 if (!pmd_present(*pmd) && 441 __pte_alloc(mm, vma, pmd, vmaddr)) 442 return -ENOMEM; 443 /* pmd now points to a valid segment table entry. */ 444 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 445 if (!rmap) 446 return -ENOMEM; 447 /* Link gmap segment table entry location to page table. */ 448 page = pmd_page(*pmd); 449 mp = (struct gmap_pgtable *) page->index; 450 rmap->entry = table; 451 spin_lock(&mm->page_table_lock); 452 list_add(&rmap->list, &mp->mapper); 453 spin_unlock(&mm->page_table_lock); 454 /* Set gmap segment table entry to page table. */ 455 *table = pmd_val(*pmd) & PAGE_MASK; 456 return vmaddr | (address & ~PMD_MASK); 457 } 458 return -EFAULT; 459 } 460 461 unsigned long gmap_fault(unsigned long address, struct gmap *gmap) 462 { 463 unsigned long rc; 464 465 down_read(&gmap->mm->mmap_sem); 466 rc = __gmap_fault(address, gmap); 467 up_read(&gmap->mm->mmap_sem); 468 469 return rc; 470 } 471 EXPORT_SYMBOL_GPL(gmap_fault); 472 473 void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) 474 { 475 476 unsigned long *table, address, size; 477 struct vm_area_struct *vma; 478 struct gmap_pgtable *mp; 479 struct page *page; 480 481 down_read(&gmap->mm->mmap_sem); 482 address = from; 483 while (address < to) { 484 /* Walk the gmap address space page table */ 485 table = gmap->table + ((address >> 53) & 0x7ff); 486 if (unlikely(*table & _REGION_ENTRY_INV)) { 487 address = (address + PMD_SIZE) & PMD_MASK; 488 continue; 489 } 490 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 491 table = table + ((address >> 42) & 0x7ff); 492 if (unlikely(*table & _REGION_ENTRY_INV)) { 493 address = (address + PMD_SIZE) & PMD_MASK; 494 continue; 495 } 496 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 497 table = table + ((address >> 31) & 0x7ff); 498 if (unlikely(*table & _REGION_ENTRY_INV)) { 499 address = (address + PMD_SIZE) & PMD_MASK; 500 continue; 501 } 502 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 503 table = table + ((address >> 20) & 0x7ff); 504 if (unlikely(*table & _SEGMENT_ENTRY_INV)) { 505 address = (address + PMD_SIZE) & PMD_MASK; 506 continue; 507 } 508 page = pfn_to_page(*table >> PAGE_SHIFT); 509 mp = (struct gmap_pgtable *) page->index; 510 vma = find_vma(gmap->mm, mp->vmaddr); 511 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); 512 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), 513 size, NULL); 514 address = (address + PMD_SIZE) & PMD_MASK; 515 } 516 up_read(&gmap->mm->mmap_sem); 517 } 518 EXPORT_SYMBOL_GPL(gmap_discard); 519 520 void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) 521 { 522 struct gmap_rmap *rmap, *next; 523 struct gmap_pgtable *mp; 524 struct page *page; 525 int flush; 526 527 flush = 0; 528 spin_lock(&mm->page_table_lock); 529 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 530 mp = (struct gmap_pgtable *) page->index; 531 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 532 *rmap->entry = 533 _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 534 list_del(&rmap->list); 535 kfree(rmap); 536 flush = 1; 537 } 538 spin_unlock(&mm->page_table_lock); 539 if (flush) 540 __tlb_flush_global(); 541 } 542 543 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 544 unsigned long vmaddr) 545 { 546 struct page *page; 547 unsigned long *table; 548 struct gmap_pgtable *mp; 549 550 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 551 if (!page) 552 return NULL; 553 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); 554 if (!mp) { 555 __free_page(page); 556 return NULL; 557 } 558 pgtable_page_ctor(page); 559 mp->vmaddr = vmaddr & PMD_MASK; 560 INIT_LIST_HEAD(&mp->mapper); 561 page->index = (unsigned long) mp; 562 atomic_set(&page->_mapcount, 3); 563 table = (unsigned long *) page_to_phys(page); 564 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 565 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 566 return table; 567 } 568 569 static inline void page_table_free_pgste(unsigned long *table) 570 { 571 struct page *page; 572 struct gmap_pgtable *mp; 573 574 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 575 mp = (struct gmap_pgtable *) page->index; 576 BUG_ON(!list_empty(&mp->mapper)); 577 pgtable_page_ctor(page); 578 atomic_set(&page->_mapcount, -1); 579 kfree(mp); 580 __free_page(page); 581 } 582 583 #else /* CONFIG_PGSTE */ 584 585 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 586 unsigned long vmaddr) 587 { 588 return NULL; 589 } 590 591 static inline void page_table_free_pgste(unsigned long *table) 592 { 593 } 594 595 static inline void gmap_unmap_notifier(struct mm_struct *mm, 596 unsigned long *table) 597 { 598 } 599 600 #endif /* CONFIG_PGSTE */ 601 602 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 603 { 604 unsigned int old, new; 605 606 do { 607 old = atomic_read(v); 608 new = old ^ bits; 609 } while (atomic_cmpxchg(v, old, new) != old); 610 return new; 611 } 612 613 /* 614 * page table entry allocation/free routines. 615 */ 616 unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 617 { 618 struct page *page; 619 unsigned long *table; 620 unsigned int mask, bit; 621 622 if (mm_has_pgste(mm)) 623 return page_table_alloc_pgste(mm, vmaddr); 624 /* Allocate fragments of a 4K page as 1K/2K page table */ 625 spin_lock_bh(&mm->context.list_lock); 626 mask = FRAG_MASK; 627 if (!list_empty(&mm->context.pgtable_list)) { 628 page = list_first_entry(&mm->context.pgtable_list, 629 struct page, lru); 630 table = (unsigned long *) page_to_phys(page); 631 mask = atomic_read(&page->_mapcount); 632 mask = mask | (mask >> 4); 633 } 634 if ((mask & FRAG_MASK) == FRAG_MASK) { 635 spin_unlock_bh(&mm->context.list_lock); 636 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 637 if (!page) 638 return NULL; 639 pgtable_page_ctor(page); 640 atomic_set(&page->_mapcount, 1); 641 table = (unsigned long *) page_to_phys(page); 642 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 643 spin_lock_bh(&mm->context.list_lock); 644 list_add(&page->lru, &mm->context.pgtable_list); 645 } else { 646 for (bit = 1; mask & bit; bit <<= 1) 647 table += PTRS_PER_PTE; 648 mask = atomic_xor_bits(&page->_mapcount, bit); 649 if ((mask & FRAG_MASK) == FRAG_MASK) 650 list_del(&page->lru); 651 } 652 spin_unlock_bh(&mm->context.list_lock); 653 return table; 654 } 655 656 void page_table_free(struct mm_struct *mm, unsigned long *table) 657 { 658 struct page *page; 659 unsigned int bit, mask; 660 661 if (mm_has_pgste(mm)) { 662 gmap_unmap_notifier(mm, table); 663 return page_table_free_pgste(table); 664 } 665 /* Free 1K/2K page table fragment of a 4K page */ 666 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 667 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 668 spin_lock_bh(&mm->context.list_lock); 669 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 670 list_del(&page->lru); 671 mask = atomic_xor_bits(&page->_mapcount, bit); 672 if (mask & FRAG_MASK) 673 list_add(&page->lru, &mm->context.pgtable_list); 674 spin_unlock_bh(&mm->context.list_lock); 675 if (mask == 0) { 676 pgtable_page_dtor(page); 677 atomic_set(&page->_mapcount, -1); 678 __free_page(page); 679 } 680 } 681 682 #ifdef CONFIG_HAVE_RCU_TABLE_FREE 683 684 static void __page_table_free_rcu(void *table, unsigned bit) 685 { 686 struct page *page; 687 688 if (bit == FRAG_MASK) 689 return page_table_free_pgste(table); 690 /* Free 1K/2K page table fragment of a 4K page */ 691 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 692 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 693 pgtable_page_dtor(page); 694 atomic_set(&page->_mapcount, -1); 695 __free_page(page); 696 } 697 } 698 699 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 700 { 701 struct mm_struct *mm; 702 struct page *page; 703 unsigned int bit, mask; 704 705 mm = tlb->mm; 706 if (mm_has_pgste(mm)) { 707 gmap_unmap_notifier(mm, table); 708 table = (unsigned long *) (__pa(table) | FRAG_MASK); 709 tlb_remove_table(tlb, table); 710 return; 711 } 712 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 713 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 714 spin_lock_bh(&mm->context.list_lock); 715 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 716 list_del(&page->lru); 717 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 718 if (mask & FRAG_MASK) 719 list_add_tail(&page->lru, &mm->context.pgtable_list); 720 spin_unlock_bh(&mm->context.list_lock); 721 table = (unsigned long *) (__pa(table) | (bit << 4)); 722 tlb_remove_table(tlb, table); 723 } 724 725 void __tlb_remove_table(void *_table) 726 { 727 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; 728 void *table = (void *)((unsigned long) _table & ~mask); 729 unsigned type = (unsigned long) _table & mask; 730 731 if (type) 732 __page_table_free_rcu(table, type); 733 else 734 free_pages((unsigned long) table, ALLOC_ORDER); 735 } 736 737 #endif 738 739 /* 740 * switch on pgstes for its userspace process (for kvm) 741 */ 742 int s390_enable_sie(void) 743 { 744 struct task_struct *tsk = current; 745 struct mm_struct *mm, *old_mm; 746 747 /* Do we have switched amode? If no, we cannot do sie */ 748 if (user_mode == HOME_SPACE_MODE) 749 return -EINVAL; 750 751 /* Do we have pgstes? if yes, we are done */ 752 if (mm_has_pgste(tsk->mm)) 753 return 0; 754 755 /* lets check if we are allowed to replace the mm */ 756 task_lock(tsk); 757 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 758 #ifdef CONFIG_AIO 759 !hlist_empty(&tsk->mm->ioctx_list) || 760 #endif 761 tsk->mm != tsk->active_mm) { 762 task_unlock(tsk); 763 return -EINVAL; 764 } 765 task_unlock(tsk); 766 767 /* we copy the mm and let dup_mm create the page tables with_pgstes */ 768 tsk->mm->context.alloc_pgste = 1; 769 mm = dup_mm(tsk); 770 tsk->mm->context.alloc_pgste = 0; 771 if (!mm) 772 return -ENOMEM; 773 774 /* Now lets check again if something happened */ 775 task_lock(tsk); 776 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 777 #ifdef CONFIG_AIO 778 !hlist_empty(&tsk->mm->ioctx_list) || 779 #endif 780 tsk->mm != tsk->active_mm) { 781 mmput(mm); 782 task_unlock(tsk); 783 return -EINVAL; 784 } 785 786 /* ok, we are alone. No ptrace, no threads, etc. */ 787 old_mm = tsk->mm; 788 tsk->mm = tsk->active_mm = mm; 789 preempt_disable(); 790 update_mm(mm, tsk); 791 atomic_inc(&mm->context.attach_count); 792 atomic_dec(&old_mm->context.attach_count); 793 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 794 preempt_enable(); 795 task_unlock(tsk); 796 mmput(old_mm); 797 return 0; 798 } 799 EXPORT_SYMBOL_GPL(s390_enable_sie); 800 801 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) 802 bool kernel_page_present(struct page *page) 803 { 804 unsigned long addr; 805 int cc; 806 807 addr = page_to_phys(page); 808 asm volatile( 809 " lra %1,0(%1)\n" 810 " ipm %0\n" 811 " srl %0,28" 812 : "=d" (cc), "+a" (addr) : : "cc"); 813 return cc == 0; 814 } 815 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ 816