xref: /openbmc/linux/arch/s390/mm/pgtable.c (revision e290ed81)
1 /*
2  *    Copyright IBM Corp. 2007,2009
3  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4  */
5 
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
9 #include <linux/gfp.h>
10 #include <linux/mm.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/highmem.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 #include <linux/rcupdate.h>
19 #include <linux/slab.h>
20 
21 #include <asm/system.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26 #include <asm/mmu_context.h>
27 
28 #ifndef CONFIG_64BIT
29 #define ALLOC_ORDER	1
30 #define FRAG_MASK	0x0f
31 #else
32 #define ALLOC_ORDER	2
33 #define FRAG_MASK	0x03
34 #endif
35 
36 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
37 EXPORT_SYMBOL(VMALLOC_START);
38 
39 static int __init parse_vmalloc(char *arg)
40 {
41 	if (!arg)
42 		return -EINVAL;
43 	VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
44 	return 0;
45 }
46 early_param("vmalloc", parse_vmalloc);
47 
48 unsigned long *crst_table_alloc(struct mm_struct *mm)
49 {
50 	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
51 
52 	if (!page)
53 		return NULL;
54 	return (unsigned long *) page_to_phys(page);
55 }
56 
57 void crst_table_free(struct mm_struct *mm, unsigned long *table)
58 {
59 	free_pages((unsigned long) table, ALLOC_ORDER);
60 }
61 
62 #ifdef CONFIG_64BIT
63 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
64 {
65 	unsigned long *table, *pgd;
66 	unsigned long entry;
67 
68 	BUG_ON(limit > (1UL << 53));
69 repeat:
70 	table = crst_table_alloc(mm);
71 	if (!table)
72 		return -ENOMEM;
73 	spin_lock_bh(&mm->page_table_lock);
74 	if (mm->context.asce_limit < limit) {
75 		pgd = (unsigned long *) mm->pgd;
76 		if (mm->context.asce_limit <= (1UL << 31)) {
77 			entry = _REGION3_ENTRY_EMPTY;
78 			mm->context.asce_limit = 1UL << 42;
79 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
80 						_ASCE_USER_BITS |
81 						_ASCE_TYPE_REGION3;
82 		} else {
83 			entry = _REGION2_ENTRY_EMPTY;
84 			mm->context.asce_limit = 1UL << 53;
85 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
86 						_ASCE_USER_BITS |
87 						_ASCE_TYPE_REGION2;
88 		}
89 		crst_table_init(table, entry);
90 		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
91 		mm->pgd = (pgd_t *) table;
92 		mm->task_size = mm->context.asce_limit;
93 		table = NULL;
94 	}
95 	spin_unlock_bh(&mm->page_table_lock);
96 	if (table)
97 		crst_table_free(mm, table);
98 	if (mm->context.asce_limit < limit)
99 		goto repeat;
100 	update_mm(mm, current);
101 	return 0;
102 }
103 
104 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
105 {
106 	pgd_t *pgd;
107 
108 	if (mm->context.asce_limit <= limit)
109 		return;
110 	__tlb_flush_mm(mm);
111 	while (mm->context.asce_limit > limit) {
112 		pgd = mm->pgd;
113 		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
114 		case _REGION_ENTRY_TYPE_R2:
115 			mm->context.asce_limit = 1UL << 42;
116 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
117 						_ASCE_USER_BITS |
118 						_ASCE_TYPE_REGION3;
119 			break;
120 		case _REGION_ENTRY_TYPE_R3:
121 			mm->context.asce_limit = 1UL << 31;
122 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
123 						_ASCE_USER_BITS |
124 						_ASCE_TYPE_SEGMENT;
125 			break;
126 		default:
127 			BUG();
128 		}
129 		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
130 		mm->task_size = mm->context.asce_limit;
131 		crst_table_free(mm, (unsigned long *) pgd);
132 	}
133 	update_mm(mm, current);
134 }
135 #endif
136 
137 #ifdef CONFIG_PGSTE
138 
139 /**
140  * gmap_alloc - allocate a guest address space
141  * @mm: pointer to the parent mm_struct
142  *
143  * Returns a guest address space structure.
144  */
145 struct gmap *gmap_alloc(struct mm_struct *mm)
146 {
147 	struct gmap *gmap;
148 	struct page *page;
149 	unsigned long *table;
150 
151 	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
152 	if (!gmap)
153 		goto out;
154 	INIT_LIST_HEAD(&gmap->crst_list);
155 	gmap->mm = mm;
156 	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
157 	if (!page)
158 		goto out_free;
159 	list_add(&page->lru, &gmap->crst_list);
160 	table = (unsigned long *) page_to_phys(page);
161 	crst_table_init(table, _REGION1_ENTRY_EMPTY);
162 	gmap->table = table;
163 	list_add(&gmap->list, &mm->context.gmap_list);
164 	return gmap;
165 
166 out_free:
167 	kfree(gmap);
168 out:
169 	return NULL;
170 }
171 EXPORT_SYMBOL_GPL(gmap_alloc);
172 
173 static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
174 {
175 	struct gmap_pgtable *mp;
176 	struct gmap_rmap *rmap;
177 	struct page *page;
178 
179 	if (*table & _SEGMENT_ENTRY_INV)
180 		return 0;
181 	page = pfn_to_page(*table >> PAGE_SHIFT);
182 	mp = (struct gmap_pgtable *) page->index;
183 	list_for_each_entry(rmap, &mp->mapper, list) {
184 		if (rmap->entry != table)
185 			continue;
186 		list_del(&rmap->list);
187 		kfree(rmap);
188 		break;
189 	}
190 	*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
191 	return 1;
192 }
193 
194 static void gmap_flush_tlb(struct gmap *gmap)
195 {
196 	if (MACHINE_HAS_IDTE)
197 		__tlb_flush_idte((unsigned long) gmap->table |
198 				 _ASCE_TYPE_REGION1);
199 	else
200 		__tlb_flush_global();
201 }
202 
203 /**
204  * gmap_free - free a guest address space
205  * @gmap: pointer to the guest address space structure
206  */
207 void gmap_free(struct gmap *gmap)
208 {
209 	struct page *page, *next;
210 	unsigned long *table;
211 	int i;
212 
213 
214 	/* Flush tlb. */
215 	if (MACHINE_HAS_IDTE)
216 		__tlb_flush_idte((unsigned long) gmap->table |
217 				 _ASCE_TYPE_REGION1);
218 	else
219 		__tlb_flush_global();
220 
221 	/* Free all segment & region tables. */
222 	down_read(&gmap->mm->mmap_sem);
223 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
224 		table = (unsigned long *) page_to_phys(page);
225 		if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
226 			/* Remove gmap rmap structures for segment table. */
227 			for (i = 0; i < PTRS_PER_PMD; i++, table++)
228 				gmap_unlink_segment(gmap, table);
229 		__free_pages(page, ALLOC_ORDER);
230 	}
231 	up_read(&gmap->mm->mmap_sem);
232 	list_del(&gmap->list);
233 	kfree(gmap);
234 }
235 EXPORT_SYMBOL_GPL(gmap_free);
236 
237 /**
238  * gmap_enable - switch primary space to the guest address space
239  * @gmap: pointer to the guest address space structure
240  */
241 void gmap_enable(struct gmap *gmap)
242 {
243 	/* Load primary space page table origin. */
244 	S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
245 				 _ASCE_USER_BITS | __pa(gmap->table);
246 	asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) );
247 	S390_lowcore.gmap = (unsigned long) gmap;
248 }
249 EXPORT_SYMBOL_GPL(gmap_enable);
250 
251 /**
252  * gmap_disable - switch back to the standard primary address space
253  * @gmap: pointer to the guest address space structure
254  */
255 void gmap_disable(struct gmap *gmap)
256 {
257 	/* Load primary space page table origin. */
258 	S390_lowcore.user_asce =
259 		gmap->mm->context.asce_bits | __pa(gmap->mm->pgd);
260 	asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) );
261 	S390_lowcore.gmap = 0UL;
262 }
263 EXPORT_SYMBOL_GPL(gmap_disable);
264 
265 static int gmap_alloc_table(struct gmap *gmap,
266 			       unsigned long *table, unsigned long init)
267 {
268 	struct page *page;
269 	unsigned long *new;
270 
271 	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
272 	if (!page)
273 		return -ENOMEM;
274 	new = (unsigned long *) page_to_phys(page);
275 	crst_table_init(new, init);
276 	down_read(&gmap->mm->mmap_sem);
277 	if (*table & _REGION_ENTRY_INV) {
278 		list_add(&page->lru, &gmap->crst_list);
279 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
280 			(*table & _REGION_ENTRY_TYPE_MASK);
281 	} else
282 		__free_pages(page, ALLOC_ORDER);
283 	up_read(&gmap->mm->mmap_sem);
284 	return 0;
285 }
286 
287 /**
288  * gmap_unmap_segment - unmap segment from the guest address space
289  * @gmap: pointer to the guest address space structure
290  * @addr: address in the guest address space
291  * @len: length of the memory area to unmap
292  *
293  * Returns 0 if the unmap succeded, -EINVAL if not.
294  */
295 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
296 {
297 	unsigned long *table;
298 	unsigned long off;
299 	int flush;
300 
301 	if ((to | len) & (PMD_SIZE - 1))
302 		return -EINVAL;
303 	if (len == 0 || to + len < to)
304 		return -EINVAL;
305 
306 	flush = 0;
307 	down_read(&gmap->mm->mmap_sem);
308 	for (off = 0; off < len; off += PMD_SIZE) {
309 		/* Walk the guest addr space page table */
310 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
311 		if (*table & _REGION_ENTRY_INV)
312 			return 0;
313 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
314 		table = table + (((to + off) >> 42) & 0x7ff);
315 		if (*table & _REGION_ENTRY_INV)
316 			return 0;
317 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
318 		table = table + (((to + off) >> 31) & 0x7ff);
319 		if (*table & _REGION_ENTRY_INV)
320 			return 0;
321 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
322 		table = table + (((to + off) >> 20) & 0x7ff);
323 
324 		/* Clear segment table entry in guest address space. */
325 		flush |= gmap_unlink_segment(gmap, table);
326 		*table = _SEGMENT_ENTRY_INV;
327 	}
328 	up_read(&gmap->mm->mmap_sem);
329 	if (flush)
330 		gmap_flush_tlb(gmap);
331 	return 0;
332 }
333 EXPORT_SYMBOL_GPL(gmap_unmap_segment);
334 
335 /**
336  * gmap_mmap_segment - map a segment to the guest address space
337  * @gmap: pointer to the guest address space structure
338  * @from: source address in the parent address space
339  * @to: target address in the guest address space
340  *
341  * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
342  */
343 int gmap_map_segment(struct gmap *gmap, unsigned long from,
344 		     unsigned long to, unsigned long len)
345 {
346 	unsigned long *table;
347 	unsigned long off;
348 	int flush;
349 
350 	if ((from | to | len) & (PMD_SIZE - 1))
351 		return -EINVAL;
352 	if (len == 0 || from + len > PGDIR_SIZE ||
353 	    from + len < from || to + len < to)
354 		return -EINVAL;
355 
356 	flush = 0;
357 	down_read(&gmap->mm->mmap_sem);
358 	for (off = 0; off < len; off += PMD_SIZE) {
359 		/* Walk the gmap address space page table */
360 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
361 		if ((*table & _REGION_ENTRY_INV) &&
362 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
363 			goto out_unmap;
364 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
365 		table = table + (((to + off) >> 42) & 0x7ff);
366 		if ((*table & _REGION_ENTRY_INV) &&
367 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
368 			goto out_unmap;
369 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
370 		table = table + (((to + off) >> 31) & 0x7ff);
371 		if ((*table & _REGION_ENTRY_INV) &&
372 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
373 			goto out_unmap;
374 		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
375 		table = table + (((to + off) >> 20) & 0x7ff);
376 
377 		/* Store 'from' address in an invalid segment table entry. */
378 		flush |= gmap_unlink_segment(gmap, table);
379 		*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
380 	}
381 	up_read(&gmap->mm->mmap_sem);
382 	if (flush)
383 		gmap_flush_tlb(gmap);
384 	return 0;
385 
386 out_unmap:
387 	up_read(&gmap->mm->mmap_sem);
388 	gmap_unmap_segment(gmap, to, len);
389 	return -ENOMEM;
390 }
391 EXPORT_SYMBOL_GPL(gmap_map_segment);
392 
393 unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
394 {
395 	unsigned long *table, vmaddr, segment;
396 	struct mm_struct *mm;
397 	struct gmap_pgtable *mp;
398 	struct gmap_rmap *rmap;
399 	struct vm_area_struct *vma;
400 	struct page *page;
401 	pgd_t *pgd;
402 	pud_t *pud;
403 	pmd_t *pmd;
404 
405 	current->thread.gmap_addr = address;
406 	mm = gmap->mm;
407 	/* Walk the gmap address space page table */
408 	table = gmap->table + ((address >> 53) & 0x7ff);
409 	if (unlikely(*table & _REGION_ENTRY_INV))
410 		return -EFAULT;
411 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
412 	table = table + ((address >> 42) & 0x7ff);
413 	if (unlikely(*table & _REGION_ENTRY_INV))
414 		return -EFAULT;
415 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
416 	table = table + ((address >> 31) & 0x7ff);
417 	if (unlikely(*table & _REGION_ENTRY_INV))
418 		return -EFAULT;
419 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
420 	table = table + ((address >> 20) & 0x7ff);
421 
422 	/* Convert the gmap address to an mm address. */
423 	segment = *table;
424 	if (likely(!(segment & _SEGMENT_ENTRY_INV))) {
425 		page = pfn_to_page(segment >> PAGE_SHIFT);
426 		mp = (struct gmap_pgtable *) page->index;
427 		return mp->vmaddr | (address & ~PMD_MASK);
428 	} else if (segment & _SEGMENT_ENTRY_RO) {
429 		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
430 		vma = find_vma(mm, vmaddr);
431 		if (!vma || vma->vm_start > vmaddr)
432 			return -EFAULT;
433 
434 		/* Walk the parent mm page table */
435 		pgd = pgd_offset(mm, vmaddr);
436 		pud = pud_alloc(mm, pgd, vmaddr);
437 		if (!pud)
438 			return -ENOMEM;
439 		pmd = pmd_alloc(mm, pud, vmaddr);
440 		if (!pmd)
441 			return -ENOMEM;
442 		if (!pmd_present(*pmd) &&
443 		    __pte_alloc(mm, vma, pmd, vmaddr))
444 			return -ENOMEM;
445 		/* pmd now points to a valid segment table entry. */
446 		rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
447 		if (!rmap)
448 			return -ENOMEM;
449 		/* Link gmap segment table entry location to page table. */
450 		page = pmd_page(*pmd);
451 		mp = (struct gmap_pgtable *) page->index;
452 		rmap->entry = table;
453 		list_add(&rmap->list, &mp->mapper);
454 		/* Set gmap segment table entry to page table. */
455 		*table = pmd_val(*pmd) & PAGE_MASK;
456 		return vmaddr | (address & ~PMD_MASK);
457 	}
458 	return -EFAULT;
459 
460 }
461 EXPORT_SYMBOL_GPL(gmap_fault);
462 
463 void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
464 {
465 	struct gmap_rmap *rmap, *next;
466 	struct gmap_pgtable *mp;
467 	struct page *page;
468 	int flush;
469 
470 	flush = 0;
471 	spin_lock(&mm->page_table_lock);
472 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
473 	mp = (struct gmap_pgtable *) page->index;
474 	list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
475 		*rmap->entry =
476 			_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
477 		list_del(&rmap->list);
478 		kfree(rmap);
479 		flush = 1;
480 	}
481 	spin_unlock(&mm->page_table_lock);
482 	if (flush)
483 		__tlb_flush_global();
484 }
485 
486 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
487 						    unsigned long vmaddr)
488 {
489 	struct page *page;
490 	unsigned long *table;
491 	struct gmap_pgtable *mp;
492 
493 	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
494 	if (!page)
495 		return NULL;
496 	mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
497 	if (!mp) {
498 		__free_page(page);
499 		return NULL;
500 	}
501 	pgtable_page_ctor(page);
502 	mp->vmaddr = vmaddr & PMD_MASK;
503 	INIT_LIST_HEAD(&mp->mapper);
504 	page->index = (unsigned long) mp;
505 	atomic_set(&page->_mapcount, 3);
506 	table = (unsigned long *) page_to_phys(page);
507 	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
508 	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
509 	return table;
510 }
511 
512 static inline void page_table_free_pgste(unsigned long *table)
513 {
514 	struct page *page;
515 	struct gmap_pgtable *mp;
516 
517 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
518 	mp = (struct gmap_pgtable *) page->index;
519 	BUG_ON(!list_empty(&mp->mapper));
520 	pgtable_page_ctor(page);
521 	atomic_set(&page->_mapcount, -1);
522 	kfree(mp);
523 	__free_page(page);
524 }
525 
526 #else /* CONFIG_PGSTE */
527 
528 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
529 						    unsigned long vmaddr)
530 {
531 	return NULL;
532 }
533 
534 static inline void page_table_free_pgste(unsigned long *table)
535 {
536 }
537 
538 static inline void gmap_unmap_notifier(struct mm_struct *mm,
539 					  unsigned long *table)
540 {
541 }
542 
543 #endif /* CONFIG_PGSTE */
544 
545 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
546 {
547 	unsigned int old, new;
548 
549 	do {
550 		old = atomic_read(v);
551 		new = old ^ bits;
552 	} while (atomic_cmpxchg(v, old, new) != old);
553 	return new;
554 }
555 
556 /*
557  * page table entry allocation/free routines.
558  */
559 unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
560 {
561 	struct page *page;
562 	unsigned long *table;
563 	unsigned int mask, bit;
564 
565 	if (mm_has_pgste(mm))
566 		return page_table_alloc_pgste(mm, vmaddr);
567 	/* Allocate fragments of a 4K page as 1K/2K page table */
568 	spin_lock_bh(&mm->context.list_lock);
569 	mask = FRAG_MASK;
570 	if (!list_empty(&mm->context.pgtable_list)) {
571 		page = list_first_entry(&mm->context.pgtable_list,
572 					struct page, lru);
573 		table = (unsigned long *) page_to_phys(page);
574 		mask = atomic_read(&page->_mapcount);
575 		mask = mask | (mask >> 4);
576 	}
577 	if ((mask & FRAG_MASK) == FRAG_MASK) {
578 		spin_unlock_bh(&mm->context.list_lock);
579 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
580 		if (!page)
581 			return NULL;
582 		pgtable_page_ctor(page);
583 		atomic_set(&page->_mapcount, 1);
584 		table = (unsigned long *) page_to_phys(page);
585 		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
586 		spin_lock_bh(&mm->context.list_lock);
587 		list_add(&page->lru, &mm->context.pgtable_list);
588 	} else {
589 		for (bit = 1; mask & bit; bit <<= 1)
590 			table += PTRS_PER_PTE;
591 		mask = atomic_xor_bits(&page->_mapcount, bit);
592 		if ((mask & FRAG_MASK) == FRAG_MASK)
593 			list_del(&page->lru);
594 	}
595 	spin_unlock_bh(&mm->context.list_lock);
596 	return table;
597 }
598 
599 void page_table_free(struct mm_struct *mm, unsigned long *table)
600 {
601 	struct page *page;
602 	unsigned int bit, mask;
603 
604 	if (mm_has_pgste(mm)) {
605 		gmap_unmap_notifier(mm, table);
606 		return page_table_free_pgste(table);
607 	}
608 	/* Free 1K/2K page table fragment of a 4K page */
609 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
610 	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
611 	spin_lock_bh(&mm->context.list_lock);
612 	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
613 		list_del(&page->lru);
614 	mask = atomic_xor_bits(&page->_mapcount, bit);
615 	if (mask & FRAG_MASK)
616 		list_add(&page->lru, &mm->context.pgtable_list);
617 	spin_unlock_bh(&mm->context.list_lock);
618 	if (mask == 0) {
619 		pgtable_page_dtor(page);
620 		atomic_set(&page->_mapcount, -1);
621 		__free_page(page);
622 	}
623 }
624 
625 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
626 
627 static void __page_table_free_rcu(void *table, unsigned bit)
628 {
629 	struct page *page;
630 
631 	if (bit == FRAG_MASK)
632 		return page_table_free_pgste(table);
633 	/* Free 1K/2K page table fragment of a 4K page */
634 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
635 	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
636 		pgtable_page_dtor(page);
637 		atomic_set(&page->_mapcount, -1);
638 		__free_page(page);
639 	}
640 }
641 
642 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
643 {
644 	struct mm_struct *mm;
645 	struct page *page;
646 	unsigned int bit, mask;
647 
648 	mm = tlb->mm;
649 	if (mm_has_pgste(mm)) {
650 		gmap_unmap_notifier(mm, table);
651 		table = (unsigned long *) (__pa(table) | FRAG_MASK);
652 		tlb_remove_table(tlb, table);
653 		return;
654 	}
655 	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
656 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
657 	spin_lock_bh(&mm->context.list_lock);
658 	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
659 		list_del(&page->lru);
660 	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
661 	if (mask & FRAG_MASK)
662 		list_add_tail(&page->lru, &mm->context.pgtable_list);
663 	spin_unlock_bh(&mm->context.list_lock);
664 	table = (unsigned long *) (__pa(table) | (bit << 4));
665 	tlb_remove_table(tlb, table);
666 }
667 
668 void __tlb_remove_table(void *_table)
669 {
670 	void *table = (void *)((unsigned long) _table & PAGE_MASK);
671 	unsigned type = (unsigned long) _table & ~PAGE_MASK;
672 
673 	if (type)
674 		__page_table_free_rcu(table, type);
675 	else
676 		free_pages((unsigned long) table, ALLOC_ORDER);
677 }
678 
679 #endif
680 
681 /*
682  * switch on pgstes for its userspace process (for kvm)
683  */
684 int s390_enable_sie(void)
685 {
686 	struct task_struct *tsk = current;
687 	struct mm_struct *mm, *old_mm;
688 
689 	/* Do we have switched amode? If no, we cannot do sie */
690 	if (user_mode == HOME_SPACE_MODE)
691 		return -EINVAL;
692 
693 	/* Do we have pgstes? if yes, we are done */
694 	if (mm_has_pgste(tsk->mm))
695 		return 0;
696 
697 	/* lets check if we are allowed to replace the mm */
698 	task_lock(tsk);
699 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
700 #ifdef CONFIG_AIO
701 	    !hlist_empty(&tsk->mm->ioctx_list) ||
702 #endif
703 	    tsk->mm != tsk->active_mm) {
704 		task_unlock(tsk);
705 		return -EINVAL;
706 	}
707 	task_unlock(tsk);
708 
709 	/* we copy the mm and let dup_mm create the page tables with_pgstes */
710 	tsk->mm->context.alloc_pgste = 1;
711 	mm = dup_mm(tsk);
712 	tsk->mm->context.alloc_pgste = 0;
713 	if (!mm)
714 		return -ENOMEM;
715 
716 	/* Now lets check again if something happened */
717 	task_lock(tsk);
718 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
719 #ifdef CONFIG_AIO
720 	    !hlist_empty(&tsk->mm->ioctx_list) ||
721 #endif
722 	    tsk->mm != tsk->active_mm) {
723 		mmput(mm);
724 		task_unlock(tsk);
725 		return -EINVAL;
726 	}
727 
728 	/* ok, we are alone. No ptrace, no threads, etc. */
729 	old_mm = tsk->mm;
730 	tsk->mm = tsk->active_mm = mm;
731 	preempt_disable();
732 	update_mm(mm, tsk);
733 	atomic_inc(&mm->context.attach_count);
734 	atomic_dec(&old_mm->context.attach_count);
735 	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
736 	preempt_enable();
737 	task_unlock(tsk);
738 	mmput(old_mm);
739 	return 0;
740 }
741 EXPORT_SYMBOL_GPL(s390_enable_sie);
742 
743 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
744 bool kernel_page_present(struct page *page)
745 {
746 	unsigned long addr;
747 	int cc;
748 
749 	addr = page_to_phys(page);
750 	asm volatile(
751 		"	lra	%1,0(%1)\n"
752 		"	ipm	%0\n"
753 		"	srl	%0,28"
754 		: "=d" (cc), "+a" (addr) : : "cc");
755 	return cc == 0;
756 }
757 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
758