xref: /openbmc/linux/arch/s390/mm/pgtable.c (revision 22246614)
1 /*
2  *  arch/s390/mm/pgtable.c
3  *
4  *    Copyright IBM Corp. 2007
5  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/errno.h>
11 #include <linux/mm.h>
12 #include <linux/swap.h>
13 #include <linux/smp.h>
14 #include <linux/highmem.h>
15 #include <linux/slab.h>
16 #include <linux/pagemap.h>
17 #include <linux/spinlock.h>
18 #include <linux/module.h>
19 #include <linux/quicklist.h>
20 
21 #include <asm/system.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26 #include <asm/mmu_context.h>
27 
28 #ifndef CONFIG_64BIT
29 #define ALLOC_ORDER	1
30 #define TABLES_PER_PAGE	4
31 #define FRAG_MASK	15UL
32 #define SECOND_HALVES	10UL
33 
34 void clear_table_pgstes(unsigned long *table)
35 {
36 	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 	memset(table + 256, 0, PAGE_SIZE/4);
38 	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 	memset(table + 768, 0, PAGE_SIZE/4);
40 }
41 
42 #else
43 #define ALLOC_ORDER	2
44 #define TABLES_PER_PAGE	2
45 #define FRAG_MASK	3UL
46 #define SECOND_HALVES	2UL
47 
48 void clear_table_pgstes(unsigned long *table)
49 {
50 	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 	memset(table + 256, 0, PAGE_SIZE/2);
52 }
53 
54 #endif
55 
56 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
57 {
58 	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
59 
60 	if (!page)
61 		return NULL;
62 	page->index = 0;
63 	if (noexec) {
64 		struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
65 		if (!shadow) {
66 			__free_pages(page, ALLOC_ORDER);
67 			return NULL;
68 		}
69 		page->index = page_to_phys(shadow);
70 	}
71 	spin_lock(&mm->page_table_lock);
72 	list_add(&page->lru, &mm->context.crst_list);
73 	spin_unlock(&mm->page_table_lock);
74 	return (unsigned long *) page_to_phys(page);
75 }
76 
77 void crst_table_free(struct mm_struct *mm, unsigned long *table)
78 {
79 	unsigned long *shadow = get_shadow_table(table);
80 	struct page *page = virt_to_page(table);
81 
82 	spin_lock(&mm->page_table_lock);
83 	list_del(&page->lru);
84 	spin_unlock(&mm->page_table_lock);
85 	if (shadow)
86 		free_pages((unsigned long) shadow, ALLOC_ORDER);
87 	free_pages((unsigned long) table, ALLOC_ORDER);
88 }
89 
90 #ifdef CONFIG_64BIT
91 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
92 {
93 	unsigned long *table, *pgd;
94 	unsigned long entry;
95 
96 	BUG_ON(limit > (1UL << 53));
97 repeat:
98 	table = crst_table_alloc(mm, mm->context.noexec);
99 	if (!table)
100 		return -ENOMEM;
101 	spin_lock(&mm->page_table_lock);
102 	if (mm->context.asce_limit < limit) {
103 		pgd = (unsigned long *) mm->pgd;
104 		if (mm->context.asce_limit <= (1UL << 31)) {
105 			entry = _REGION3_ENTRY_EMPTY;
106 			mm->context.asce_limit = 1UL << 42;
107 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
108 						_ASCE_USER_BITS |
109 						_ASCE_TYPE_REGION3;
110 		} else {
111 			entry = _REGION2_ENTRY_EMPTY;
112 			mm->context.asce_limit = 1UL << 53;
113 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
114 						_ASCE_USER_BITS |
115 						_ASCE_TYPE_REGION2;
116 		}
117 		crst_table_init(table, entry);
118 		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
119 		mm->pgd = (pgd_t *) table;
120 		table = NULL;
121 	}
122 	spin_unlock(&mm->page_table_lock);
123 	if (table)
124 		crst_table_free(mm, table);
125 	if (mm->context.asce_limit < limit)
126 		goto repeat;
127 	update_mm(mm, current);
128 	return 0;
129 }
130 
131 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
132 {
133 	pgd_t *pgd;
134 
135 	if (mm->context.asce_limit <= limit)
136 		return;
137 	__tlb_flush_mm(mm);
138 	while (mm->context.asce_limit > limit) {
139 		pgd = mm->pgd;
140 		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
141 		case _REGION_ENTRY_TYPE_R2:
142 			mm->context.asce_limit = 1UL << 42;
143 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
144 						_ASCE_USER_BITS |
145 						_ASCE_TYPE_REGION3;
146 			break;
147 		case _REGION_ENTRY_TYPE_R3:
148 			mm->context.asce_limit = 1UL << 31;
149 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
150 						_ASCE_USER_BITS |
151 						_ASCE_TYPE_SEGMENT;
152 			break;
153 		default:
154 			BUG();
155 		}
156 		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
157 		crst_table_free(mm, (unsigned long *) pgd);
158 	}
159 	update_mm(mm, current);
160 }
161 #endif
162 
163 /*
164  * page table entry allocation/free routines.
165  */
166 unsigned long *page_table_alloc(struct mm_struct *mm)
167 {
168 	struct page *page;
169 	unsigned long *table;
170 	unsigned long bits;
171 
172 	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
173 	spin_lock(&mm->page_table_lock);
174 	page = NULL;
175 	if (!list_empty(&mm->context.pgtable_list)) {
176 		page = list_first_entry(&mm->context.pgtable_list,
177 					struct page, lru);
178 		if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
179 			page = NULL;
180 	}
181 	if (!page) {
182 		spin_unlock(&mm->page_table_lock);
183 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
184 		if (!page)
185 			return NULL;
186 		pgtable_page_ctor(page);
187 		page->flags &= ~FRAG_MASK;
188 		table = (unsigned long *) page_to_phys(page);
189 		if (mm->context.pgstes)
190 			clear_table_pgstes(table);
191 		else
192 			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
193 		spin_lock(&mm->page_table_lock);
194 		list_add(&page->lru, &mm->context.pgtable_list);
195 	}
196 	table = (unsigned long *) page_to_phys(page);
197 	while (page->flags & bits) {
198 		table += 256;
199 		bits <<= 1;
200 	}
201 	page->flags |= bits;
202 	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
203 		list_move_tail(&page->lru, &mm->context.pgtable_list);
204 	spin_unlock(&mm->page_table_lock);
205 	return table;
206 }
207 
208 void page_table_free(struct mm_struct *mm, unsigned long *table)
209 {
210 	struct page *page;
211 	unsigned long bits;
212 
213 	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
214 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
215 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
216 	spin_lock(&mm->page_table_lock);
217 	page->flags ^= bits;
218 	if (page->flags & FRAG_MASK) {
219 		/* Page now has some free pgtable fragments. */
220 		list_move(&page->lru, &mm->context.pgtable_list);
221 		page = NULL;
222 	} else
223 		/* All fragments of the 4K page have been freed. */
224 		list_del(&page->lru);
225 	spin_unlock(&mm->page_table_lock);
226 	if (page) {
227 		pgtable_page_dtor(page);
228 		__free_page(page);
229 	}
230 }
231 
232 void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
233 {
234 	struct page *page;
235 
236 	spin_lock(&mm->page_table_lock);
237 	/* Free shadow region and segment tables. */
238 	list_for_each_entry(page, &mm->context.crst_list, lru)
239 		if (page->index) {
240 			free_pages((unsigned long) page->index, ALLOC_ORDER);
241 			page->index = 0;
242 		}
243 	/* "Free" second halves of page tables. */
244 	list_for_each_entry(page, &mm->context.pgtable_list, lru)
245 		page->flags &= ~SECOND_HALVES;
246 	spin_unlock(&mm->page_table_lock);
247 	mm->context.noexec = 0;
248 	update_mm(mm, tsk);
249 }
250 
251 /*
252  * switch on pgstes for its userspace process (for kvm)
253  */
254 int s390_enable_sie(void)
255 {
256 	struct task_struct *tsk = current;
257 	struct mm_struct *mm;
258 	int rc;
259 
260 	task_lock(tsk);
261 
262 	rc = 0;
263 	if (tsk->mm->context.pgstes)
264 		goto unlock;
265 
266 	rc = -EINVAL;
267 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
268 	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
269 		goto unlock;
270 
271 	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
272 	mm = dup_mm(tsk);
273 	tsk->mm->context.pgstes = 0;
274 
275 	rc = -ENOMEM;
276 	if (!mm)
277 		goto unlock;
278 	mmput(tsk->mm);
279 	tsk->mm = tsk->active_mm = mm;
280 	preempt_disable();
281 	update_mm(mm, tsk);
282 	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
283 	preempt_enable();
284 	rc = 0;
285 unlock:
286 	task_unlock(tsk);
287 	return rc;
288 }
289 EXPORT_SYMBOL_GPL(s390_enable_sie);
290