xref: /openbmc/linux/arch/s390/mm/pgtable.c (revision 7587eb18)
1 /*
2  *    Copyright IBM Corp. 2007, 2011
3  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4  */
5 
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
9 #include <linux/gfp.h>
10 #include <linux/mm.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/spinlock.h>
14 #include <linux/rcupdate.h>
15 #include <linux/slab.h>
16 #include <linux/swapops.h>
17 #include <linux/sysctl.h>
18 #include <linux/ksm.h>
19 #include <linux/mman.h>
20 
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
23 #include <asm/tlb.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
26 
27 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
28 				      unsigned long addr, pte_t *ptep)
29 {
30 	int active, count;
31 	pte_t old;
32 
33 	old = *ptep;
34 	if (unlikely(pte_val(old) & _PAGE_INVALID))
35 		return old;
36 	active = (mm == current->active_mm) ? 1 : 0;
37 	count = atomic_add_return(0x10000, &mm->context.attach_count);
38 	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
39 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
40 		__ptep_ipte_local(addr, ptep);
41 	else
42 		__ptep_ipte(addr, ptep);
43 	atomic_sub(0x10000, &mm->context.attach_count);
44 	return old;
45 }
46 
47 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
48 				    unsigned long addr, pte_t *ptep)
49 {
50 	int active, count;
51 	pte_t old;
52 
53 	old = *ptep;
54 	if (unlikely(pte_val(old) & _PAGE_INVALID))
55 		return old;
56 	active = (mm == current->active_mm) ? 1 : 0;
57 	count = atomic_add_return(0x10000, &mm->context.attach_count);
58 	if ((count & 0xffff) <= active) {
59 		pte_val(*ptep) |= _PAGE_INVALID;
60 		mm->context.flush_mm = 1;
61 	} else
62 		__ptep_ipte(addr, ptep);
63 	atomic_sub(0x10000, &mm->context.attach_count);
64 	return old;
65 }
66 
67 static inline pgste_t pgste_get_lock(pte_t *ptep)
68 {
69 	unsigned long new = 0;
70 #ifdef CONFIG_PGSTE
71 	unsigned long old;
72 
73 	preempt_disable();
74 	asm(
75 		"	lg	%0,%2\n"
76 		"0:	lgr	%1,%0\n"
77 		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
78 		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
79 		"	csg	%0,%1,%2\n"
80 		"	jl	0b\n"
81 		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
82 		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
83 #endif
84 	return __pgste(new);
85 }
86 
87 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
88 {
89 #ifdef CONFIG_PGSTE
90 	asm(
91 		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
92 		"	stg	%1,%0\n"
93 		: "=Q" (ptep[PTRS_PER_PTE])
94 		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
95 		: "cc", "memory");
96 	preempt_enable();
97 #endif
98 }
99 
100 static inline pgste_t pgste_get(pte_t *ptep)
101 {
102 	unsigned long pgste = 0;
103 #ifdef CONFIG_PGSTE
104 	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
105 #endif
106 	return __pgste(pgste);
107 }
108 
109 static inline void pgste_set(pte_t *ptep, pgste_t pgste)
110 {
111 #ifdef CONFIG_PGSTE
112 	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
113 #endif
114 }
115 
116 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
117 				       struct mm_struct *mm)
118 {
119 #ifdef CONFIG_PGSTE
120 	unsigned long address, bits, skey;
121 
122 	if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
123 		return pgste;
124 	address = pte_val(pte) & PAGE_MASK;
125 	skey = (unsigned long) page_get_storage_key(address);
126 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
127 	/* Transfer page changed & referenced bit to guest bits in pgste */
128 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
129 	/* Copy page access key and fetch protection bit to pgste */
130 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
131 	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
132 #endif
133 	return pgste;
134 
135 }
136 
137 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
138 				 struct mm_struct *mm)
139 {
140 #ifdef CONFIG_PGSTE
141 	unsigned long address;
142 	unsigned long nkey;
143 
144 	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
145 		return;
146 	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
147 	address = pte_val(entry) & PAGE_MASK;
148 	/*
149 	 * Set page access key and fetch protection bit from pgste.
150 	 * The guest C/R information is still in the PGSTE, set real
151 	 * key C/R to 0.
152 	 */
153 	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
154 	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
155 	page_set_storage_key(address, nkey, 0);
156 #endif
157 }
158 
159 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
160 {
161 #ifdef CONFIG_PGSTE
162 	if ((pte_val(entry) & _PAGE_PRESENT) &&
163 	    (pte_val(entry) & _PAGE_WRITE) &&
164 	    !(pte_val(entry) & _PAGE_INVALID)) {
165 		if (!MACHINE_HAS_ESOP) {
166 			/*
167 			 * Without enhanced suppression-on-protection force
168 			 * the dirty bit on for all writable ptes.
169 			 */
170 			pte_val(entry) |= _PAGE_DIRTY;
171 			pte_val(entry) &= ~_PAGE_PROTECT;
172 		}
173 		if (!(pte_val(entry) & _PAGE_PROTECT))
174 			/* This pte allows write access, set user-dirty */
175 			pgste_val(pgste) |= PGSTE_UC_BIT;
176 	}
177 #endif
178 	*ptep = entry;
179 	return pgste;
180 }
181 
182 static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
183 					unsigned long addr,
184 					pte_t *ptep, pgste_t pgste)
185 {
186 #ifdef CONFIG_PGSTE
187 	if (pgste_val(pgste) & PGSTE_IN_BIT) {
188 		pgste_val(pgste) &= ~PGSTE_IN_BIT;
189 		ptep_notify(mm, addr, ptep);
190 	}
191 #endif
192 	return pgste;
193 }
194 
195 static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
196 				      unsigned long addr, pte_t *ptep)
197 {
198 	pgste_t pgste = __pgste(0);
199 
200 	if (mm_has_pgste(mm)) {
201 		pgste = pgste_get_lock(ptep);
202 		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
203 	}
204 	return pgste;
205 }
206 
207 static inline void ptep_xchg_commit(struct mm_struct *mm,
208 				    unsigned long addr, pte_t *ptep,
209 				    pgste_t pgste, pte_t old, pte_t new)
210 {
211 	if (mm_has_pgste(mm)) {
212 		if (pte_val(old) & _PAGE_INVALID)
213 			pgste_set_key(ptep, pgste, new, mm);
214 		if (pte_val(new) & _PAGE_INVALID) {
215 			pgste = pgste_update_all(old, pgste, mm);
216 			if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
217 			    _PGSTE_GPS_USAGE_UNUSED)
218 				pte_val(old) |= _PAGE_UNUSED;
219 		}
220 		pgste = pgste_set_pte(ptep, pgste, new);
221 		pgste_set_unlock(ptep, pgste);
222 	} else {
223 		*ptep = new;
224 	}
225 }
226 
227 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
228 		       pte_t *ptep, pte_t new)
229 {
230 	pgste_t pgste;
231 	pte_t old;
232 
233 	pgste = ptep_xchg_start(mm, addr, ptep);
234 	old = ptep_flush_direct(mm, addr, ptep);
235 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
236 	return old;
237 }
238 EXPORT_SYMBOL(ptep_xchg_direct);
239 
240 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
241 		     pte_t *ptep, pte_t new)
242 {
243 	pgste_t pgste;
244 	pte_t old;
245 
246 	pgste = ptep_xchg_start(mm, addr, ptep);
247 	old = ptep_flush_lazy(mm, addr, ptep);
248 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
249 	return old;
250 }
251 EXPORT_SYMBOL(ptep_xchg_lazy);
252 
253 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
254 			     pte_t *ptep)
255 {
256 	pgste_t pgste;
257 	pte_t old;
258 
259 	pgste = ptep_xchg_start(mm, addr, ptep);
260 	old = ptep_flush_lazy(mm, addr, ptep);
261 	if (mm_has_pgste(mm)) {
262 		pgste = pgste_update_all(old, pgste, mm);
263 		pgste_set(ptep, pgste);
264 	}
265 	return old;
266 }
267 EXPORT_SYMBOL(ptep_modify_prot_start);
268 
269 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
270 			     pte_t *ptep, pte_t pte)
271 {
272 	pgste_t pgste;
273 
274 	if (mm_has_pgste(mm)) {
275 		pgste = pgste_get(ptep);
276 		pgste_set_key(ptep, pgste, pte, mm);
277 		pgste = pgste_set_pte(ptep, pgste, pte);
278 		pgste_set_unlock(ptep, pgste);
279 	} else {
280 		*ptep = pte;
281 	}
282 }
283 EXPORT_SYMBOL(ptep_modify_prot_commit);
284 
285 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
286 				      unsigned long addr, pmd_t *pmdp)
287 {
288 	int active, count;
289 	pmd_t old;
290 
291 	old = *pmdp;
292 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
293 		return old;
294 	if (!MACHINE_HAS_IDTE) {
295 		__pmdp_csp(pmdp);
296 		return old;
297 	}
298 	active = (mm == current->active_mm) ? 1 : 0;
299 	count = atomic_add_return(0x10000, &mm->context.attach_count);
300 	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
301 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
302 		__pmdp_idte_local(addr, pmdp);
303 	else
304 		__pmdp_idte(addr, pmdp);
305 	atomic_sub(0x10000, &mm->context.attach_count);
306 	return old;
307 }
308 
309 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
310 				    unsigned long addr, pmd_t *pmdp)
311 {
312 	int active, count;
313 	pmd_t old;
314 
315 	old = *pmdp;
316 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
317 		return old;
318 	active = (mm == current->active_mm) ? 1 : 0;
319 	count = atomic_add_return(0x10000, &mm->context.attach_count);
320 	if ((count & 0xffff) <= active) {
321 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
322 		mm->context.flush_mm = 1;
323 	} else if (MACHINE_HAS_IDTE)
324 		__pmdp_idte(addr, pmdp);
325 	else
326 		__pmdp_csp(pmdp);
327 	atomic_sub(0x10000, &mm->context.attach_count);
328 	return old;
329 }
330 
331 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
332 		       pmd_t *pmdp, pmd_t new)
333 {
334 	pmd_t old;
335 
336 	old = pmdp_flush_direct(mm, addr, pmdp);
337 	*pmdp = new;
338 	return old;
339 }
340 EXPORT_SYMBOL(pmdp_xchg_direct);
341 
342 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
343 		     pmd_t *pmdp, pmd_t new)
344 {
345 	pmd_t old;
346 
347 	old = pmdp_flush_lazy(mm, addr, pmdp);
348 	*pmdp = new;
349 	return old;
350 }
351 EXPORT_SYMBOL(pmdp_xchg_lazy);
352 
353 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
354 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
355 				pgtable_t pgtable)
356 {
357 	struct list_head *lh = (struct list_head *) pgtable;
358 
359 	assert_spin_locked(pmd_lockptr(mm, pmdp));
360 
361 	/* FIFO */
362 	if (!pmd_huge_pte(mm, pmdp))
363 		INIT_LIST_HEAD(lh);
364 	else
365 		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
366 	pmd_huge_pte(mm, pmdp) = pgtable;
367 }
368 
369 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
370 {
371 	struct list_head *lh;
372 	pgtable_t pgtable;
373 	pte_t *ptep;
374 
375 	assert_spin_locked(pmd_lockptr(mm, pmdp));
376 
377 	/* FIFO */
378 	pgtable = pmd_huge_pte(mm, pmdp);
379 	lh = (struct list_head *) pgtable;
380 	if (list_empty(lh))
381 		pmd_huge_pte(mm, pmdp) = NULL;
382 	else {
383 		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
384 		list_del(lh);
385 	}
386 	ptep = (pte_t *) pgtable;
387 	pte_val(*ptep) = _PAGE_INVALID;
388 	ptep++;
389 	pte_val(*ptep) = _PAGE_INVALID;
390 	return pgtable;
391 }
392 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
393 
394 #ifdef CONFIG_PGSTE
395 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
396 		     pte_t *ptep, pte_t entry)
397 {
398 	pgste_t pgste;
399 
400 	/* the mm_has_pgste() check is done in set_pte_at() */
401 	pgste = pgste_get_lock(ptep);
402 	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
403 	pgste_set_key(ptep, pgste, entry, mm);
404 	pgste = pgste_set_pte(ptep, pgste, entry);
405 	pgste_set_unlock(ptep, pgste);
406 }
407 
408 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
409 {
410 	pgste_t pgste;
411 
412 	pgste = pgste_get_lock(ptep);
413 	pgste_val(pgste) |= PGSTE_IN_BIT;
414 	pgste_set_unlock(ptep, pgste);
415 }
416 
417 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
418 {
419 	if (!non_swap_entry(entry))
420 		dec_mm_counter(mm, MM_SWAPENTS);
421 	else if (is_migration_entry(entry)) {
422 		struct page *page = migration_entry_to_page(entry);
423 
424 		dec_mm_counter(mm, mm_counter(page));
425 	}
426 	free_swap_and_cache(entry);
427 }
428 
429 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
430 		     pte_t *ptep, int reset)
431 {
432 	unsigned long pgstev;
433 	pgste_t pgste;
434 	pte_t pte;
435 
436 	/* Zap unused and logically-zero pages */
437 	pgste = pgste_get_lock(ptep);
438 	pgstev = pgste_val(pgste);
439 	pte = *ptep;
440 	if (pte_swap(pte) &&
441 	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
442 	     (pgstev & _PGSTE_GPS_ZERO))) {
443 		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
444 		pte_clear(mm, addr, ptep);
445 	}
446 	if (reset)
447 		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
448 	pgste_set_unlock(ptep, pgste);
449 }
450 
451 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
452 {
453 	unsigned long ptev;
454 	pgste_t pgste;
455 
456 	/* Clear storage key */
457 	pgste = pgste_get_lock(ptep);
458 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
459 			      PGSTE_GR_BIT | PGSTE_GC_BIT);
460 	ptev = pte_val(*ptep);
461 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
462 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
463 	pgste_set_unlock(ptep, pgste);
464 }
465 
466 /*
467  * Test and reset if a guest page is dirty
468  */
469 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
470 {
471 	spinlock_t *ptl;
472 	pgste_t pgste;
473 	pte_t *ptep;
474 	pte_t pte;
475 	bool dirty;
476 
477 	ptep = get_locked_pte(mm, addr, &ptl);
478 	if (unlikely(!ptep))
479 		return false;
480 
481 	pgste = pgste_get_lock(ptep);
482 	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
483 	pgste_val(pgste) &= ~PGSTE_UC_BIT;
484 	pte = *ptep;
485 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
486 		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
487 		__ptep_ipte(addr, ptep);
488 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
489 			pte_val(pte) |= _PAGE_PROTECT;
490 		else
491 			pte_val(pte) |= _PAGE_INVALID;
492 		*ptep = pte;
493 	}
494 	pgste_set_unlock(ptep, pgste);
495 
496 	spin_unlock(ptl);
497 	return dirty;
498 }
499 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
500 
501 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
502 			  unsigned char key, bool nq)
503 {
504 	unsigned long keyul;
505 	spinlock_t *ptl;
506 	pgste_t old, new;
507 	pte_t *ptep;
508 
509 	down_read(&mm->mmap_sem);
510 	ptep = get_locked_pte(mm, addr, &ptl);
511 	if (unlikely(!ptep)) {
512 		up_read(&mm->mmap_sem);
513 		return -EFAULT;
514 	}
515 
516 	new = old = pgste_get_lock(ptep);
517 	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
518 			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
519 	keyul = (unsigned long) key;
520 	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
521 	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
522 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
523 		unsigned long address, bits, skey;
524 
525 		address = pte_val(*ptep) & PAGE_MASK;
526 		skey = (unsigned long) page_get_storage_key(address);
527 		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
528 		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
529 		/* Set storage key ACC and FP */
530 		page_set_storage_key(address, skey, !nq);
531 		/* Merge host changed & referenced into pgste  */
532 		pgste_val(new) |= bits << 52;
533 	}
534 	/* changing the guest storage key is considered a change of the page */
535 	if ((pgste_val(new) ^ pgste_val(old)) &
536 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
537 		pgste_val(new) |= PGSTE_UC_BIT;
538 
539 	pgste_set_unlock(ptep, new);
540 	pte_unmap_unlock(ptep, ptl);
541 	up_read(&mm->mmap_sem);
542 	return 0;
543 }
544 EXPORT_SYMBOL(set_guest_storage_key);
545 
546 unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
547 {
548 	unsigned char key;
549 	spinlock_t *ptl;
550 	pgste_t pgste;
551 	pte_t *ptep;
552 
553 	down_read(&mm->mmap_sem);
554 	ptep = get_locked_pte(mm, addr, &ptl);
555 	if (unlikely(!ptep)) {
556 		up_read(&mm->mmap_sem);
557 		return -EFAULT;
558 	}
559 	pgste = pgste_get_lock(ptep);
560 
561 	if (pte_val(*ptep) & _PAGE_INVALID) {
562 		key  = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
563 		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
564 		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
565 		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
566 	} else {
567 		key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
568 
569 		/* Reflect guest's logical view, not physical */
570 		if (pgste_val(pgste) & PGSTE_GR_BIT)
571 			key |= _PAGE_REFERENCED;
572 		if (pgste_val(pgste) & PGSTE_GC_BIT)
573 			key |= _PAGE_CHANGED;
574 	}
575 
576 	pgste_set_unlock(ptep, pgste);
577 	pte_unmap_unlock(ptep, ptl);
578 	up_read(&mm->mmap_sem);
579 	return key;
580 }
581 EXPORT_SYMBOL(get_guest_storage_key);
582 #endif
583