xref: /openbmc/linux/arch/s390/mm/gmap.c (revision 840565b1)
1ac41aaeeSGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21e133ab2SMartin Schwidefsky /*
31e133ab2SMartin Schwidefsky  *  KVM guest address space mapping code
41e133ab2SMartin Schwidefsky  *
50cd2a787SChristian Borntraeger  *    Copyright IBM Corp. 2007, 2020
61e133ab2SMartin Schwidefsky  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
7a9e00d83SJanosch Frank  *		 David Hildenbrand <david@redhat.com>
8a9e00d83SJanosch Frank  *		 Janosch Frank <frankja@linux.vnet.ibm.com>
91e133ab2SMartin Schwidefsky  */
101e133ab2SMartin Schwidefsky 
111e133ab2SMartin Schwidefsky #include <linux/kernel.h>
12a520110eSChristoph Hellwig #include <linux/pagewalk.h>
131e133ab2SMartin Schwidefsky #include <linux/swap.h>
141e133ab2SMartin Schwidefsky #include <linux/smp.h>
151e133ab2SMartin Schwidefsky #include <linux/spinlock.h>
161e133ab2SMartin Schwidefsky #include <linux/slab.h>
171e133ab2SMartin Schwidefsky #include <linux/swapops.h>
181e133ab2SMartin Schwidefsky #include <linux/ksm.h>
191e133ab2SMartin Schwidefsky #include <linux/mman.h>
20ca5999fdSMike Rapoport #include <linux/pgtable.h>
2165fddcfcSMike Rapoport 
221e133ab2SMartin Schwidefsky #include <asm/pgalloc.h>
231e133ab2SMartin Schwidefsky #include <asm/gmap.h>
24bfabe8d0SHeiko Carstens #include <asm/page.h>
251e133ab2SMartin Schwidefsky #include <asm/tlb.h>
261e133ab2SMartin Schwidefsky 
27fd8d4e3aSDavid Hildenbrand #define GMAP_SHADOW_FAKE_TABLE 1ULL
28fd8d4e3aSDavid Hildenbrand 
gmap_alloc_crst(void)29bfabe8d0SHeiko Carstens static struct page *gmap_alloc_crst(void)
30bfabe8d0SHeiko Carstens {
31bfabe8d0SHeiko Carstens 	struct page *page;
32bfabe8d0SHeiko Carstens 
33bfabe8d0SHeiko Carstens 	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
34bfabe8d0SHeiko Carstens 	if (!page)
35bfabe8d0SHeiko Carstens 		return NULL;
36bfabe8d0SHeiko Carstens 	arch_set_page_dat(page, CRST_ALLOC_ORDER);
37bfabe8d0SHeiko Carstens 	return page;
38bfabe8d0SHeiko Carstens }
39bfabe8d0SHeiko Carstens 
401e133ab2SMartin Schwidefsky /**
416ea427bbSMartin Schwidefsky  * gmap_alloc - allocate and initialize a guest address space
429c650d09SChristian Borntraeger  * @limit: maximum address of the gmap address space
431e133ab2SMartin Schwidefsky  *
441e133ab2SMartin Schwidefsky  * Returns a guest address space structure.
451e133ab2SMartin Schwidefsky  */
gmap_alloc(unsigned long limit)466ea427bbSMartin Schwidefsky static struct gmap *gmap_alloc(unsigned long limit)
471e133ab2SMartin Schwidefsky {
481e133ab2SMartin Schwidefsky 	struct gmap *gmap;
491e133ab2SMartin Schwidefsky 	struct page *page;
501e133ab2SMartin Schwidefsky 	unsigned long *table;
511e133ab2SMartin Schwidefsky 	unsigned long etype, atype;
521e133ab2SMartin Schwidefsky 
53f1c1174fSHeiko Carstens 	if (limit < _REGION3_SIZE) {
54f1c1174fSHeiko Carstens 		limit = _REGION3_SIZE - 1;
551e133ab2SMartin Schwidefsky 		atype = _ASCE_TYPE_SEGMENT;
561e133ab2SMartin Schwidefsky 		etype = _SEGMENT_ENTRY_EMPTY;
57f1c1174fSHeiko Carstens 	} else if (limit < _REGION2_SIZE) {
58f1c1174fSHeiko Carstens 		limit = _REGION2_SIZE - 1;
591e133ab2SMartin Schwidefsky 		atype = _ASCE_TYPE_REGION3;
601e133ab2SMartin Schwidefsky 		etype = _REGION3_ENTRY_EMPTY;
61f1c1174fSHeiko Carstens 	} else if (limit < _REGION1_SIZE) {
62f1c1174fSHeiko Carstens 		limit = _REGION1_SIZE - 1;
631e133ab2SMartin Schwidefsky 		atype = _ASCE_TYPE_REGION2;
641e133ab2SMartin Schwidefsky 		etype = _REGION2_ENTRY_EMPTY;
651e133ab2SMartin Schwidefsky 	} else {
661e133ab2SMartin Schwidefsky 		limit = -1UL;
671e133ab2SMartin Schwidefsky 		atype = _ASCE_TYPE_REGION1;
681e133ab2SMartin Schwidefsky 		etype = _REGION1_ENTRY_EMPTY;
691e133ab2SMartin Schwidefsky 	}
700cd2a787SChristian Borntraeger 	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
711e133ab2SMartin Schwidefsky 	if (!gmap)
721e133ab2SMartin Schwidefsky 		goto out;
731e133ab2SMartin Schwidefsky 	INIT_LIST_HEAD(&gmap->crst_list);
744be130a0SMartin Schwidefsky 	INIT_LIST_HEAD(&gmap->children);
754be130a0SMartin Schwidefsky 	INIT_LIST_HEAD(&gmap->pt_list);
760cd2a787SChristian Borntraeger 	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
770cd2a787SChristian Borntraeger 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
780cd2a787SChristian Borntraeger 	INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
791e133ab2SMartin Schwidefsky 	spin_lock_init(&gmap->guest_table_lock);
804be130a0SMartin Schwidefsky 	spin_lock_init(&gmap->shadow_lock);
8140e90656SChuhong Yuan 	refcount_set(&gmap->ref_count, 1);
82bfabe8d0SHeiko Carstens 	page = gmap_alloc_crst();
831e133ab2SMartin Schwidefsky 	if (!page)
841e133ab2SMartin Schwidefsky 		goto out_free;
851e133ab2SMartin Schwidefsky 	page->index = 0;
861e133ab2SMartin Schwidefsky 	list_add(&page->lru, &gmap->crst_list);
87079f0c21SNico Boehr 	table = page_to_virt(page);
881e133ab2SMartin Schwidefsky 	crst_table_init(table, etype);
891e133ab2SMartin Schwidefsky 	gmap->table = table;
901e133ab2SMartin Schwidefsky 	gmap->asce = atype | _ASCE_TABLE_LENGTH |
911e133ab2SMartin Schwidefsky 		_ASCE_USER_BITS | __pa(table);
921e133ab2SMartin Schwidefsky 	gmap->asce_end = limit;
931e133ab2SMartin Schwidefsky 	return gmap;
941e133ab2SMartin Schwidefsky 
951e133ab2SMartin Schwidefsky out_free:
961e133ab2SMartin Schwidefsky 	kfree(gmap);
971e133ab2SMartin Schwidefsky out:
981e133ab2SMartin Schwidefsky 	return NULL;
991e133ab2SMartin Schwidefsky }
1006ea427bbSMartin Schwidefsky 
1016ea427bbSMartin Schwidefsky /**
1026ea427bbSMartin Schwidefsky  * gmap_create - create a guest address space
1036ea427bbSMartin Schwidefsky  * @mm: pointer to the parent mm_struct
1046ea427bbSMartin Schwidefsky  * @limit: maximum size of the gmap address space
1056ea427bbSMartin Schwidefsky  *
1066ea427bbSMartin Schwidefsky  * Returns a guest address space structure.
1076ea427bbSMartin Schwidefsky  */
gmap_create(struct mm_struct * mm,unsigned long limit)1086ea427bbSMartin Schwidefsky struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
1096ea427bbSMartin Schwidefsky {
1106ea427bbSMartin Schwidefsky 	struct gmap *gmap;
11144b6cc81SMartin Schwidefsky 	unsigned long gmap_asce;
1126ea427bbSMartin Schwidefsky 
1136ea427bbSMartin Schwidefsky 	gmap = gmap_alloc(limit);
1146ea427bbSMartin Schwidefsky 	if (!gmap)
1156ea427bbSMartin Schwidefsky 		return NULL;
1166ea427bbSMartin Schwidefsky 	gmap->mm = mm;
117f28a4b4dSMartin Schwidefsky 	spin_lock(&mm->context.lock);
1186ea427bbSMartin Schwidefsky 	list_add_rcu(&gmap->list, &mm->context.gmap_list);
11944b6cc81SMartin Schwidefsky 	if (list_is_singular(&mm->context.gmap_list))
12044b6cc81SMartin Schwidefsky 		gmap_asce = gmap->asce;
12144b6cc81SMartin Schwidefsky 	else
12244b6cc81SMartin Schwidefsky 		gmap_asce = -1UL;
12344b6cc81SMartin Schwidefsky 	WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
124f28a4b4dSMartin Schwidefsky 	spin_unlock(&mm->context.lock);
1256ea427bbSMartin Schwidefsky 	return gmap;
1266ea427bbSMartin Schwidefsky }
1276ea427bbSMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_create);
1281e133ab2SMartin Schwidefsky 
gmap_flush_tlb(struct gmap * gmap)1291e133ab2SMartin Schwidefsky static void gmap_flush_tlb(struct gmap *gmap)
1301e133ab2SMartin Schwidefsky {
1311e133ab2SMartin Schwidefsky 	if (MACHINE_HAS_IDTE)
132f0454029SDavid Hildenbrand 		__tlb_flush_idte(gmap->asce);
1331e133ab2SMartin Schwidefsky 	else
1341e133ab2SMartin Schwidefsky 		__tlb_flush_global();
1351e133ab2SMartin Schwidefsky }
1361e133ab2SMartin Schwidefsky 
gmap_radix_tree_free(struct radix_tree_root * root)1371e133ab2SMartin Schwidefsky static void gmap_radix_tree_free(struct radix_tree_root *root)
1381e133ab2SMartin Schwidefsky {
1391e133ab2SMartin Schwidefsky 	struct radix_tree_iter iter;
1401e133ab2SMartin Schwidefsky 	unsigned long indices[16];
1411e133ab2SMartin Schwidefsky 	unsigned long index;
142d12a3d60SHeiko Carstens 	void __rcu **slot;
1431e133ab2SMartin Schwidefsky 	int i, nr;
1441e133ab2SMartin Schwidefsky 
1451e133ab2SMartin Schwidefsky 	/* A radix tree is freed by deleting all of its entries */
1461e133ab2SMartin Schwidefsky 	index = 0;
1471e133ab2SMartin Schwidefsky 	do {
1481e133ab2SMartin Schwidefsky 		nr = 0;
1491e133ab2SMartin Schwidefsky 		radix_tree_for_each_slot(slot, root, &iter, index) {
1501e133ab2SMartin Schwidefsky 			indices[nr] = iter.index;
1511e133ab2SMartin Schwidefsky 			if (++nr == 16)
1521e133ab2SMartin Schwidefsky 				break;
1531e133ab2SMartin Schwidefsky 		}
1541e133ab2SMartin Schwidefsky 		for (i = 0; i < nr; i++) {
1551e133ab2SMartin Schwidefsky 			index = indices[i];
1561e133ab2SMartin Schwidefsky 			radix_tree_delete(root, index);
1571e133ab2SMartin Schwidefsky 		}
1581e133ab2SMartin Schwidefsky 	} while (nr > 0);
1591e133ab2SMartin Schwidefsky }
1601e133ab2SMartin Schwidefsky 
gmap_rmap_radix_tree_free(struct radix_tree_root * root)1614be130a0SMartin Schwidefsky static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
1624be130a0SMartin Schwidefsky {
1634be130a0SMartin Schwidefsky 	struct gmap_rmap *rmap, *rnext, *head;
1644be130a0SMartin Schwidefsky 	struct radix_tree_iter iter;
1654be130a0SMartin Schwidefsky 	unsigned long indices[16];
1664be130a0SMartin Schwidefsky 	unsigned long index;
167d12a3d60SHeiko Carstens 	void __rcu **slot;
1684be130a0SMartin Schwidefsky 	int i, nr;
1694be130a0SMartin Schwidefsky 
1704be130a0SMartin Schwidefsky 	/* A radix tree is freed by deleting all of its entries */
1714be130a0SMartin Schwidefsky 	index = 0;
1724be130a0SMartin Schwidefsky 	do {
1734be130a0SMartin Schwidefsky 		nr = 0;
1744be130a0SMartin Schwidefsky 		radix_tree_for_each_slot(slot, root, &iter, index) {
1754be130a0SMartin Schwidefsky 			indices[nr] = iter.index;
1764be130a0SMartin Schwidefsky 			if (++nr == 16)
1774be130a0SMartin Schwidefsky 				break;
1784be130a0SMartin Schwidefsky 		}
1794be130a0SMartin Schwidefsky 		for (i = 0; i < nr; i++) {
1804be130a0SMartin Schwidefsky 			index = indices[i];
1814be130a0SMartin Schwidefsky 			head = radix_tree_delete(root, index);
1824be130a0SMartin Schwidefsky 			gmap_for_each_rmap_safe(rmap, rnext, head)
1834be130a0SMartin Schwidefsky 				kfree(rmap);
1844be130a0SMartin Schwidefsky 		}
1854be130a0SMartin Schwidefsky 	} while (nr > 0);
1864be130a0SMartin Schwidefsky }
1874be130a0SMartin Schwidefsky 
1881e133ab2SMartin Schwidefsky /**
1891e133ab2SMartin Schwidefsky  * gmap_free - free a guest address space
1901e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
1914be130a0SMartin Schwidefsky  *
1924be130a0SMartin Schwidefsky  * No locks required. There are no references to this gmap anymore.
1931e133ab2SMartin Schwidefsky  */
gmap_free(struct gmap * gmap)1946ea427bbSMartin Schwidefsky static void gmap_free(struct gmap *gmap)
1951e133ab2SMartin Schwidefsky {
1961e133ab2SMartin Schwidefsky 	struct page *page, *next;
1971e133ab2SMartin Schwidefsky 
198eea3678dSDavid Hildenbrand 	/* Flush tlb of all gmaps (if not already done for shadows) */
199eea3678dSDavid Hildenbrand 	if (!(gmap_is_shadow(gmap) && gmap->removed))
200eea3678dSDavid Hildenbrand 		gmap_flush_tlb(gmap);
2011e133ab2SMartin Schwidefsky 	/* Free all segment & region tables. */
2021e133ab2SMartin Schwidefsky 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
203f1c1174fSHeiko Carstens 		__free_pages(page, CRST_ALLOC_ORDER);
2041e133ab2SMartin Schwidefsky 	gmap_radix_tree_free(&gmap->guest_to_host);
2051e133ab2SMartin Schwidefsky 	gmap_radix_tree_free(&gmap->host_to_guest);
2064be130a0SMartin Schwidefsky 
2074be130a0SMartin Schwidefsky 	/* Free additional data for a shadow gmap */
2084be130a0SMartin Schwidefsky 	if (gmap_is_shadow(gmap)) {
2094be130a0SMartin Schwidefsky 		/* Free all page tables. */
2104be130a0SMartin Schwidefsky 		list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
2114be130a0SMartin Schwidefsky 			page_table_free_pgste(page);
2124be130a0SMartin Schwidefsky 		gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
2134be130a0SMartin Schwidefsky 		/* Release reference to the parent */
2144be130a0SMartin Schwidefsky 		gmap_put(gmap->parent);
2154be130a0SMartin Schwidefsky 	}
2164be130a0SMartin Schwidefsky 
2171e133ab2SMartin Schwidefsky 	kfree(gmap);
2181e133ab2SMartin Schwidefsky }
2196ea427bbSMartin Schwidefsky 
2206ea427bbSMartin Schwidefsky /**
2216ea427bbSMartin Schwidefsky  * gmap_get - increase reference counter for guest address space
2226ea427bbSMartin Schwidefsky  * @gmap: pointer to the guest address space structure
2236ea427bbSMartin Schwidefsky  *
2246ea427bbSMartin Schwidefsky  * Returns the gmap pointer
2256ea427bbSMartin Schwidefsky  */
gmap_get(struct gmap * gmap)2266ea427bbSMartin Schwidefsky struct gmap *gmap_get(struct gmap *gmap)
2276ea427bbSMartin Schwidefsky {
22840e90656SChuhong Yuan 	refcount_inc(&gmap->ref_count);
2296ea427bbSMartin Schwidefsky 	return gmap;
2306ea427bbSMartin Schwidefsky }
2316ea427bbSMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_get);
2326ea427bbSMartin Schwidefsky 
2336ea427bbSMartin Schwidefsky /**
2346ea427bbSMartin Schwidefsky  * gmap_put - decrease reference counter for guest address space
2356ea427bbSMartin Schwidefsky  * @gmap: pointer to the guest address space structure
2366ea427bbSMartin Schwidefsky  *
2376ea427bbSMartin Schwidefsky  * If the reference counter reaches zero the guest address space is freed.
2386ea427bbSMartin Schwidefsky  */
gmap_put(struct gmap * gmap)2396ea427bbSMartin Schwidefsky void gmap_put(struct gmap *gmap)
2406ea427bbSMartin Schwidefsky {
24140e90656SChuhong Yuan 	if (refcount_dec_and_test(&gmap->ref_count))
2426ea427bbSMartin Schwidefsky 		gmap_free(gmap);
2436ea427bbSMartin Schwidefsky }
2446ea427bbSMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_put);
2456ea427bbSMartin Schwidefsky 
2466ea427bbSMartin Schwidefsky /**
2476ea427bbSMartin Schwidefsky  * gmap_remove - remove a guest address space but do not free it yet
2486ea427bbSMartin Schwidefsky  * @gmap: pointer to the guest address space structure
2496ea427bbSMartin Schwidefsky  */
gmap_remove(struct gmap * gmap)2506ea427bbSMartin Schwidefsky void gmap_remove(struct gmap *gmap)
2516ea427bbSMartin Schwidefsky {
2524be130a0SMartin Schwidefsky 	struct gmap *sg, *next;
25344b6cc81SMartin Schwidefsky 	unsigned long gmap_asce;
2544be130a0SMartin Schwidefsky 
2554be130a0SMartin Schwidefsky 	/* Remove all shadow gmaps linked to this gmap */
2564be130a0SMartin Schwidefsky 	if (!list_empty(&gmap->children)) {
2574be130a0SMartin Schwidefsky 		spin_lock(&gmap->shadow_lock);
2584be130a0SMartin Schwidefsky 		list_for_each_entry_safe(sg, next, &gmap->children, list) {
2594be130a0SMartin Schwidefsky 			list_del(&sg->list);
2604be130a0SMartin Schwidefsky 			gmap_put(sg);
2614be130a0SMartin Schwidefsky 		}
2624be130a0SMartin Schwidefsky 		spin_unlock(&gmap->shadow_lock);
2634be130a0SMartin Schwidefsky 	}
2646ea427bbSMartin Schwidefsky 	/* Remove gmap from the pre-mm list */
265f28a4b4dSMartin Schwidefsky 	spin_lock(&gmap->mm->context.lock);
2666ea427bbSMartin Schwidefsky 	list_del_rcu(&gmap->list);
26744b6cc81SMartin Schwidefsky 	if (list_empty(&gmap->mm->context.gmap_list))
26844b6cc81SMartin Schwidefsky 		gmap_asce = 0;
26944b6cc81SMartin Schwidefsky 	else if (list_is_singular(&gmap->mm->context.gmap_list))
27044b6cc81SMartin Schwidefsky 		gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
27144b6cc81SMartin Schwidefsky 					     struct gmap, list)->asce;
27244b6cc81SMartin Schwidefsky 	else
27344b6cc81SMartin Schwidefsky 		gmap_asce = -1UL;
27444b6cc81SMartin Schwidefsky 	WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
275f28a4b4dSMartin Schwidefsky 	spin_unlock(&gmap->mm->context.lock);
2766ea427bbSMartin Schwidefsky 	synchronize_rcu();
2776ea427bbSMartin Schwidefsky 	/* Put reference */
2786ea427bbSMartin Schwidefsky 	gmap_put(gmap);
2796ea427bbSMartin Schwidefsky }
2806ea427bbSMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_remove);
2811e133ab2SMartin Schwidefsky 
2821e133ab2SMartin Schwidefsky /**
2831e133ab2SMartin Schwidefsky  * gmap_enable - switch primary space to the guest address space
2841e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
2851e133ab2SMartin Schwidefsky  */
gmap_enable(struct gmap * gmap)2861e133ab2SMartin Schwidefsky void gmap_enable(struct gmap *gmap)
2871e133ab2SMartin Schwidefsky {
2881e133ab2SMartin Schwidefsky 	S390_lowcore.gmap = (unsigned long) gmap;
2891e133ab2SMartin Schwidefsky }
2901e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_enable);
2911e133ab2SMartin Schwidefsky 
2921e133ab2SMartin Schwidefsky /**
2931e133ab2SMartin Schwidefsky  * gmap_disable - switch back to the standard primary address space
2941e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
2951e133ab2SMartin Schwidefsky  */
gmap_disable(struct gmap * gmap)2961e133ab2SMartin Schwidefsky void gmap_disable(struct gmap *gmap)
2971e133ab2SMartin Schwidefsky {
2981e133ab2SMartin Schwidefsky 	S390_lowcore.gmap = 0UL;
2991e133ab2SMartin Schwidefsky }
3001e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_disable);
3011e133ab2SMartin Schwidefsky 
30237d9df98SDavid Hildenbrand /**
30337d9df98SDavid Hildenbrand  * gmap_get_enabled - get a pointer to the currently enabled gmap
30437d9df98SDavid Hildenbrand  *
30537d9df98SDavid Hildenbrand  * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
30637d9df98SDavid Hildenbrand  */
gmap_get_enabled(void)30737d9df98SDavid Hildenbrand struct gmap *gmap_get_enabled(void)
30837d9df98SDavid Hildenbrand {
30937d9df98SDavid Hildenbrand 	return (struct gmap *) S390_lowcore.gmap;
31037d9df98SDavid Hildenbrand }
31137d9df98SDavid Hildenbrand EXPORT_SYMBOL_GPL(gmap_get_enabled);
31237d9df98SDavid Hildenbrand 
3131e133ab2SMartin Schwidefsky /*
314c1e8d7c6SMichel Lespinasse  * gmap_alloc_table is assumed to be called with mmap_lock held
3151e133ab2SMartin Schwidefsky  */
gmap_alloc_table(struct gmap * gmap,unsigned long * table,unsigned long init,unsigned long gaddr)3161e133ab2SMartin Schwidefsky static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
3171e133ab2SMartin Schwidefsky 			    unsigned long init, unsigned long gaddr)
3181e133ab2SMartin Schwidefsky {
3191e133ab2SMartin Schwidefsky 	struct page *page;
3201e133ab2SMartin Schwidefsky 	unsigned long *new;
3211e133ab2SMartin Schwidefsky 
3221e133ab2SMartin Schwidefsky 	/* since we dont free the gmap table until gmap_free we can unlock */
323bfabe8d0SHeiko Carstens 	page = gmap_alloc_crst();
3241e133ab2SMartin Schwidefsky 	if (!page)
3251e133ab2SMartin Schwidefsky 		return -ENOMEM;
326079f0c21SNico Boehr 	new = page_to_virt(page);
3271e133ab2SMartin Schwidefsky 	crst_table_init(new, init);
3284be130a0SMartin Schwidefsky 	spin_lock(&gmap->guest_table_lock);
3291e133ab2SMartin Schwidefsky 	if (*table & _REGION_ENTRY_INVALID) {
3301e133ab2SMartin Schwidefsky 		list_add(&page->lru, &gmap->crst_list);
331079f0c21SNico Boehr 		*table = __pa(new) | _REGION_ENTRY_LENGTH |
3321e133ab2SMartin Schwidefsky 			(*table & _REGION_ENTRY_TYPE_MASK);
3331e133ab2SMartin Schwidefsky 		page->index = gaddr;
3341e133ab2SMartin Schwidefsky 		page = NULL;
3351e133ab2SMartin Schwidefsky 	}
3364be130a0SMartin Schwidefsky 	spin_unlock(&gmap->guest_table_lock);
3371e133ab2SMartin Schwidefsky 	if (page)
338f1c1174fSHeiko Carstens 		__free_pages(page, CRST_ALLOC_ORDER);
3391e133ab2SMartin Schwidefsky 	return 0;
3401e133ab2SMartin Schwidefsky }
3411e133ab2SMartin Schwidefsky 
3421e133ab2SMartin Schwidefsky /**
3431e133ab2SMartin Schwidefsky  * __gmap_segment_gaddr - find virtual address from segment pointer
3441e133ab2SMartin Schwidefsky  * @entry: pointer to a segment table entry in the guest address space
3451e133ab2SMartin Schwidefsky  *
3461e133ab2SMartin Schwidefsky  * Returns the virtual address in the guest address space for the segment
3471e133ab2SMartin Schwidefsky  */
__gmap_segment_gaddr(unsigned long * entry)3481e133ab2SMartin Schwidefsky static unsigned long __gmap_segment_gaddr(unsigned long *entry)
3491e133ab2SMartin Schwidefsky {
3501e133ab2SMartin Schwidefsky 	struct page *page;
3517e25de77SAnshuman Khandual 	unsigned long offset;
3521e133ab2SMartin Schwidefsky 
3531e133ab2SMartin Schwidefsky 	offset = (unsigned long) entry / sizeof(unsigned long);
3541e133ab2SMartin Schwidefsky 	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
3557e25de77SAnshuman Khandual 	page = pmd_pgtable_page((pmd_t *) entry);
3561e133ab2SMartin Schwidefsky 	return page->index + offset;
3571e133ab2SMartin Schwidefsky }
3581e133ab2SMartin Schwidefsky 
3591e133ab2SMartin Schwidefsky /**
3601e133ab2SMartin Schwidefsky  * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
3611e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
3621e133ab2SMartin Schwidefsky  * @vmaddr: address in the host process address space
3631e133ab2SMartin Schwidefsky  *
3641e133ab2SMartin Schwidefsky  * Returns 1 if a TLB flush is required
3651e133ab2SMartin Schwidefsky  */
__gmap_unlink_by_vmaddr(struct gmap * gmap,unsigned long vmaddr)3661e133ab2SMartin Schwidefsky static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
3671e133ab2SMartin Schwidefsky {
3681e133ab2SMartin Schwidefsky 	unsigned long *entry;
3691e133ab2SMartin Schwidefsky 	int flush = 0;
3701e133ab2SMartin Schwidefsky 
3714be130a0SMartin Schwidefsky 	BUG_ON(gmap_is_shadow(gmap));
3721e133ab2SMartin Schwidefsky 	spin_lock(&gmap->guest_table_lock);
3731e133ab2SMartin Schwidefsky 	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
3741e133ab2SMartin Schwidefsky 	if (entry) {
37554397bb0SDominik Dingel 		flush = (*entry != _SEGMENT_ENTRY_EMPTY);
37654397bb0SDominik Dingel 		*entry = _SEGMENT_ENTRY_EMPTY;
3771e133ab2SMartin Schwidefsky 	}
3781e133ab2SMartin Schwidefsky 	spin_unlock(&gmap->guest_table_lock);
3791e133ab2SMartin Schwidefsky 	return flush;
3801e133ab2SMartin Schwidefsky }
3811e133ab2SMartin Schwidefsky 
3821e133ab2SMartin Schwidefsky /**
3831e133ab2SMartin Schwidefsky  * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
3841e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
3851e133ab2SMartin Schwidefsky  * @gaddr: address in the guest address space
3861e133ab2SMartin Schwidefsky  *
3871e133ab2SMartin Schwidefsky  * Returns 1 if a TLB flush is required
3881e133ab2SMartin Schwidefsky  */
__gmap_unmap_by_gaddr(struct gmap * gmap,unsigned long gaddr)3891e133ab2SMartin Schwidefsky static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
3901e133ab2SMartin Schwidefsky {
3911e133ab2SMartin Schwidefsky 	unsigned long vmaddr;
3921e133ab2SMartin Schwidefsky 
3931e133ab2SMartin Schwidefsky 	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
3941e133ab2SMartin Schwidefsky 						   gaddr >> PMD_SHIFT);
3951e133ab2SMartin Schwidefsky 	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
3961e133ab2SMartin Schwidefsky }
3971e133ab2SMartin Schwidefsky 
3981e133ab2SMartin Schwidefsky /**
3991e133ab2SMartin Schwidefsky  * gmap_unmap_segment - unmap segment from the guest address space
4001e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
4011e133ab2SMartin Schwidefsky  * @to: address in the guest address space
4021e133ab2SMartin Schwidefsky  * @len: length of the memory area to unmap
4031e133ab2SMartin Schwidefsky  *
4041e133ab2SMartin Schwidefsky  * Returns 0 if the unmap succeeded, -EINVAL if not.
4051e133ab2SMartin Schwidefsky  */
gmap_unmap_segment(struct gmap * gmap,unsigned long to,unsigned long len)4061e133ab2SMartin Schwidefsky int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
4071e133ab2SMartin Schwidefsky {
4081e133ab2SMartin Schwidefsky 	unsigned long off;
4091e133ab2SMartin Schwidefsky 	int flush;
4101e133ab2SMartin Schwidefsky 
4114be130a0SMartin Schwidefsky 	BUG_ON(gmap_is_shadow(gmap));
4121e133ab2SMartin Schwidefsky 	if ((to | len) & (PMD_SIZE - 1))
4131e133ab2SMartin Schwidefsky 		return -EINVAL;
4141e133ab2SMartin Schwidefsky 	if (len == 0 || to + len < to)
4151e133ab2SMartin Schwidefsky 		return -EINVAL;
4161e133ab2SMartin Schwidefsky 
4171e133ab2SMartin Schwidefsky 	flush = 0;
418d8ed45c5SMichel Lespinasse 	mmap_write_lock(gmap->mm);
4191e133ab2SMartin Schwidefsky 	for (off = 0; off < len; off += PMD_SIZE)
4201e133ab2SMartin Schwidefsky 		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
421d8ed45c5SMichel Lespinasse 	mmap_write_unlock(gmap->mm);
4221e133ab2SMartin Schwidefsky 	if (flush)
4231e133ab2SMartin Schwidefsky 		gmap_flush_tlb(gmap);
4241e133ab2SMartin Schwidefsky 	return 0;
4251e133ab2SMartin Schwidefsky }
4261e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_unmap_segment);
4271e133ab2SMartin Schwidefsky 
4281e133ab2SMartin Schwidefsky /**
4291e133ab2SMartin Schwidefsky  * gmap_map_segment - map a segment to the guest address space
4301e133ab2SMartin Schwidefsky  * @gmap: pointer to the guest address space structure
4311e133ab2SMartin Schwidefsky  * @from: source address in the parent address space
4321e133ab2SMartin Schwidefsky  * @to: target address in the guest address space
4331e133ab2SMartin Schwidefsky  * @len: length of the memory area to map
4341e133ab2SMartin Schwidefsky  *
4351e133ab2SMartin Schwidefsky  * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
4361e133ab2SMartin Schwidefsky  */
gmap_map_segment(struct gmap * gmap,unsigned long from,unsigned long to,unsigned long len)4371e133ab2SMartin Schwidefsky int gmap_map_segment(struct gmap *gmap, unsigned long from,
4381e133ab2SMartin Schwidefsky 		     unsigned long to, unsigned long len)
4391e133ab2SMartin Schwidefsky {
4401e133ab2SMartin Schwidefsky 	unsigned long off;
4411e133ab2SMartin Schwidefsky 	int flush;
4421e133ab2SMartin Schwidefsky 
4434be130a0SMartin Schwidefsky 	BUG_ON(gmap_is_shadow(gmap));
4441e133ab2SMartin Schwidefsky 	if ((from | to | len) & (PMD_SIZE - 1))
4451e133ab2SMartin Schwidefsky 		return -EINVAL;
4461e133ab2SMartin Schwidefsky 	if (len == 0 || from + len < from || to + len < to ||
447ee71d16dSMartin Schwidefsky 	    from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
4481e133ab2SMartin Schwidefsky 		return -EINVAL;
4491e133ab2SMartin Schwidefsky 
4501e133ab2SMartin Schwidefsky 	flush = 0;
451d8ed45c5SMichel Lespinasse 	mmap_write_lock(gmap->mm);
4521e133ab2SMartin Schwidefsky 	for (off = 0; off < len; off += PMD_SIZE) {
4531e133ab2SMartin Schwidefsky 		/* Remove old translation */
4541e133ab2SMartin Schwidefsky 		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
4551e133ab2SMartin Schwidefsky 		/* Store new translation */
4561e133ab2SMartin Schwidefsky 		if (radix_tree_insert(&gmap->guest_to_host,
4571e133ab2SMartin Schwidefsky 				      (to + off) >> PMD_SHIFT,
4581e133ab2SMartin Schwidefsky 				      (void *) from + off))
4591e133ab2SMartin Schwidefsky 			break;
4601e133ab2SMartin Schwidefsky 	}
461d8ed45c5SMichel Lespinasse 	mmap_write_unlock(gmap->mm);
4621e133ab2SMartin Schwidefsky 	if (flush)
4631e133ab2SMartin Schwidefsky 		gmap_flush_tlb(gmap);
4641e133ab2SMartin Schwidefsky 	if (off >= len)
4651e133ab2SMartin Schwidefsky 		return 0;
4661e133ab2SMartin Schwidefsky 	gmap_unmap_segment(gmap, to, len);
4671e133ab2SMartin Schwidefsky 	return -ENOMEM;
4681e133ab2SMartin Schwidefsky }
4691e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_map_segment);
4701e133ab2SMartin Schwidefsky 
4711e133ab2SMartin Schwidefsky /**
4721e133ab2SMartin Schwidefsky  * __gmap_translate - translate a guest address to a user space address
4731e133ab2SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
4741e133ab2SMartin Schwidefsky  * @gaddr: guest address
4751e133ab2SMartin Schwidefsky  *
4761e133ab2SMartin Schwidefsky  * Returns user space address which corresponds to the guest address or
4771e133ab2SMartin Schwidefsky  * -EFAULT if no such mapping exists.
4781e133ab2SMartin Schwidefsky  * This function does not establish potentially missing page table entries.
479c1e8d7c6SMichel Lespinasse  * The mmap_lock of the mm that belongs to the address space must be held
4801e133ab2SMartin Schwidefsky  * when this function gets called.
4814be130a0SMartin Schwidefsky  *
4824be130a0SMartin Schwidefsky  * Note: Can also be called for shadow gmaps.
4831e133ab2SMartin Schwidefsky  */
__gmap_translate(struct gmap * gmap,unsigned long gaddr)4841e133ab2SMartin Schwidefsky unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
4851e133ab2SMartin Schwidefsky {
4861e133ab2SMartin Schwidefsky 	unsigned long vmaddr;
4871e133ab2SMartin Schwidefsky 
4881e133ab2SMartin Schwidefsky 	vmaddr = (unsigned long)
4891e133ab2SMartin Schwidefsky 		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
4904be130a0SMartin Schwidefsky 	/* Note: guest_to_host is empty for a shadow gmap */
4911e133ab2SMartin Schwidefsky 	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
4921e133ab2SMartin Schwidefsky }
4931e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(__gmap_translate);
4941e133ab2SMartin Schwidefsky 
4951e133ab2SMartin Schwidefsky /**
4961e133ab2SMartin Schwidefsky  * gmap_translate - translate a guest address to a user space address
4971e133ab2SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
4981e133ab2SMartin Schwidefsky  * @gaddr: guest address
4991e133ab2SMartin Schwidefsky  *
5001e133ab2SMartin Schwidefsky  * Returns user space address which corresponds to the guest address or
5011e133ab2SMartin Schwidefsky  * -EFAULT if no such mapping exists.
5021e133ab2SMartin Schwidefsky  * This function does not establish potentially missing page table entries.
5031e133ab2SMartin Schwidefsky  */
gmap_translate(struct gmap * gmap,unsigned long gaddr)5041e133ab2SMartin Schwidefsky unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
5051e133ab2SMartin Schwidefsky {
5061e133ab2SMartin Schwidefsky 	unsigned long rc;
5071e133ab2SMartin Schwidefsky 
508d8ed45c5SMichel Lespinasse 	mmap_read_lock(gmap->mm);
5091e133ab2SMartin Schwidefsky 	rc = __gmap_translate(gmap, gaddr);
510d8ed45c5SMichel Lespinasse 	mmap_read_unlock(gmap->mm);
5111e133ab2SMartin Schwidefsky 	return rc;
5121e133ab2SMartin Schwidefsky }
5131e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_translate);
5141e133ab2SMartin Schwidefsky 
5151e133ab2SMartin Schwidefsky /**
5161e133ab2SMartin Schwidefsky  * gmap_unlink - disconnect a page table from the gmap shadow tables
5172e827528SHeiko Carstens  * @mm: pointer to the parent mm_struct
5181e133ab2SMartin Schwidefsky  * @table: pointer to the host page table
5191e133ab2SMartin Schwidefsky  * @vmaddr: vm address associated with the host page table
5201e133ab2SMartin Schwidefsky  */
gmap_unlink(struct mm_struct * mm,unsigned long * table,unsigned long vmaddr)5211e133ab2SMartin Schwidefsky void gmap_unlink(struct mm_struct *mm, unsigned long *table,
5221e133ab2SMartin Schwidefsky 		 unsigned long vmaddr)
5231e133ab2SMartin Schwidefsky {
5241e133ab2SMartin Schwidefsky 	struct gmap *gmap;
5251e133ab2SMartin Schwidefsky 	int flush;
5261e133ab2SMartin Schwidefsky 
5278ecb1a59SMartin Schwidefsky 	rcu_read_lock();
5288ecb1a59SMartin Schwidefsky 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
5291e133ab2SMartin Schwidefsky 		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
5301e133ab2SMartin Schwidefsky 		if (flush)
5311e133ab2SMartin Schwidefsky 			gmap_flush_tlb(gmap);
5321e133ab2SMartin Schwidefsky 	}
5338ecb1a59SMartin Schwidefsky 	rcu_read_unlock();
5341e133ab2SMartin Schwidefsky }
5351e133ab2SMartin Schwidefsky 
5360959e168SJanosch Frank static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
5370959e168SJanosch Frank 			   unsigned long gaddr);
5380959e168SJanosch Frank 
5391e133ab2SMartin Schwidefsky /**
5402e827528SHeiko Carstens  * __gmap_link - set up shadow page tables to connect a host to a guest address
5411e133ab2SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
5421e133ab2SMartin Schwidefsky  * @gaddr: guest address
5431e133ab2SMartin Schwidefsky  * @vmaddr: vm address
5441e133ab2SMartin Schwidefsky  *
5451e133ab2SMartin Schwidefsky  * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
5461e133ab2SMartin Schwidefsky  * if the vm address is already mapped to a different guest segment.
547c1e8d7c6SMichel Lespinasse  * The mmap_lock of the mm that belongs to the address space must be held
5481e133ab2SMartin Schwidefsky  * when this function gets called.
5491e133ab2SMartin Schwidefsky  */
__gmap_link(struct gmap * gmap,unsigned long gaddr,unsigned long vmaddr)5501e133ab2SMartin Schwidefsky int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
5511e133ab2SMartin Schwidefsky {
5521e133ab2SMartin Schwidefsky 	struct mm_struct *mm;
5531e133ab2SMartin Schwidefsky 	unsigned long *table;
5541e133ab2SMartin Schwidefsky 	spinlock_t *ptl;
5551e133ab2SMartin Schwidefsky 	pgd_t *pgd;
5561aea9b3fSMartin Schwidefsky 	p4d_t *p4d;
5571e133ab2SMartin Schwidefsky 	pud_t *pud;
5581e133ab2SMartin Schwidefsky 	pmd_t *pmd;
5590959e168SJanosch Frank 	u64 unprot;
5601e133ab2SMartin Schwidefsky 	int rc;
5611e133ab2SMartin Schwidefsky 
5624be130a0SMartin Schwidefsky 	BUG_ON(gmap_is_shadow(gmap));
5631e133ab2SMartin Schwidefsky 	/* Create higher level tables in the gmap page table */
5641e133ab2SMartin Schwidefsky 	table = gmap->table;
5651e133ab2SMartin Schwidefsky 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
566f1c1174fSHeiko Carstens 		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
5671e133ab2SMartin Schwidefsky 		if ((*table & _REGION_ENTRY_INVALID) &&
5681e133ab2SMartin Schwidefsky 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
569f1c1174fSHeiko Carstens 				     gaddr & _REGION1_MASK))
5701e133ab2SMartin Schwidefsky 			return -ENOMEM;
571079f0c21SNico Boehr 		table = __va(*table & _REGION_ENTRY_ORIGIN);
5721e133ab2SMartin Schwidefsky 	}
5731e133ab2SMartin Schwidefsky 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
574f1c1174fSHeiko Carstens 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
5751e133ab2SMartin Schwidefsky 		if ((*table & _REGION_ENTRY_INVALID) &&
5761e133ab2SMartin Schwidefsky 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
577f1c1174fSHeiko Carstens 				     gaddr & _REGION2_MASK))
5781e133ab2SMartin Schwidefsky 			return -ENOMEM;
579079f0c21SNico Boehr 		table = __va(*table & _REGION_ENTRY_ORIGIN);
5801e133ab2SMartin Schwidefsky 	}
5811e133ab2SMartin Schwidefsky 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
582f1c1174fSHeiko Carstens 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
5831e133ab2SMartin Schwidefsky 		if ((*table & _REGION_ENTRY_INVALID) &&
5841e133ab2SMartin Schwidefsky 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
585f1c1174fSHeiko Carstens 				     gaddr & _REGION3_MASK))
5861e133ab2SMartin Schwidefsky 			return -ENOMEM;
587079f0c21SNico Boehr 		table = __va(*table & _REGION_ENTRY_ORIGIN);
5881e133ab2SMartin Schwidefsky 	}
589f1c1174fSHeiko Carstens 	table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
5901e133ab2SMartin Schwidefsky 	/* Walk the parent mm page table */
5911e133ab2SMartin Schwidefsky 	mm = gmap->mm;
5921e133ab2SMartin Schwidefsky 	pgd = pgd_offset(mm, vmaddr);
5931e133ab2SMartin Schwidefsky 	VM_BUG_ON(pgd_none(*pgd));
5941aea9b3fSMartin Schwidefsky 	p4d = p4d_offset(pgd, vmaddr);
5951aea9b3fSMartin Schwidefsky 	VM_BUG_ON(p4d_none(*p4d));
5961aea9b3fSMartin Schwidefsky 	pud = pud_offset(p4d, vmaddr);
5971e133ab2SMartin Schwidefsky 	VM_BUG_ON(pud_none(*pud));
598d08de8e2SGerald Schaefer 	/* large puds cannot yet be handled */
599907835e6SPeter Xu 	if (pud_leaf(*pud))
600d08de8e2SGerald Schaefer 		return -EFAULT;
6011e133ab2SMartin Schwidefsky 	pmd = pmd_offset(pud, vmaddr);
6021e133ab2SMartin Schwidefsky 	VM_BUG_ON(pmd_none(*pmd));
603a9e00d83SJanosch Frank 	/* Are we allowed to use huge pages? */
604a9e00d83SJanosch Frank 	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
6051e133ab2SMartin Schwidefsky 		return -EFAULT;
6061e133ab2SMartin Schwidefsky 	/* Link gmap segment table entry location to page table. */
6070cd2a787SChristian Borntraeger 	rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
6081e133ab2SMartin Schwidefsky 	if (rc)
6091e133ab2SMartin Schwidefsky 		return rc;
6101e133ab2SMartin Schwidefsky 	ptl = pmd_lock(mm, pmd);
6111e133ab2SMartin Schwidefsky 	spin_lock(&gmap->guest_table_lock);
61254397bb0SDominik Dingel 	if (*table == _SEGMENT_ENTRY_EMPTY) {
6131e133ab2SMartin Schwidefsky 		rc = radix_tree_insert(&gmap->host_to_guest,
6141e133ab2SMartin Schwidefsky 				       vmaddr >> PMD_SHIFT, table);
61558b7e200SJanosch Frank 		if (!rc) {
61658b7e200SJanosch Frank 			if (pmd_large(*pmd)) {
6170959e168SJanosch Frank 				*table = (pmd_val(*pmd) &
6180959e168SJanosch Frank 					  _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
6190959e168SJanosch Frank 					| _SEGMENT_ENTRY_GMAP_UC;
6201e133ab2SMartin Schwidefsky 			} else
62158b7e200SJanosch Frank 				*table = pmd_val(*pmd) &
62258b7e200SJanosch Frank 					_SEGMENT_ENTRY_HARDWARE_BITS;
62358b7e200SJanosch Frank 		}
6240959e168SJanosch Frank 	} else if (*table & _SEGMENT_ENTRY_PROTECT &&
6250959e168SJanosch Frank 		   !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
6260959e168SJanosch Frank 		unprot = (u64)*table;
6270959e168SJanosch Frank 		unprot &= ~_SEGMENT_ENTRY_PROTECT;
6280959e168SJanosch Frank 		unprot |= _SEGMENT_ENTRY_GMAP_UC;
6290959e168SJanosch Frank 		gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
63058b7e200SJanosch Frank 	}
6311e133ab2SMartin Schwidefsky 	spin_unlock(&gmap->guest_table_lock);
6321e133ab2SMartin Schwidefsky 	spin_unlock(ptl);
6331e133ab2SMartin Schwidefsky 	radix_tree_preload_end();
6341e133ab2SMartin Schwidefsky 	return rc;
6351e133ab2SMartin Schwidefsky }
6361e133ab2SMartin Schwidefsky 
6371e133ab2SMartin Schwidefsky /**
6381e133ab2SMartin Schwidefsky  * gmap_fault - resolve a fault on a guest address
6391e133ab2SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
6401e133ab2SMartin Schwidefsky  * @gaddr: guest address
6411e133ab2SMartin Schwidefsky  * @fault_flags: flags to pass down to handle_mm_fault()
6421e133ab2SMartin Schwidefsky  *
6431e133ab2SMartin Schwidefsky  * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
6441e133ab2SMartin Schwidefsky  * if the vm address is already mapped to a different guest segment.
6451e133ab2SMartin Schwidefsky  */
gmap_fault(struct gmap * gmap,unsigned long gaddr,unsigned int fault_flags)6461e133ab2SMartin Schwidefsky int gmap_fault(struct gmap *gmap, unsigned long gaddr,
6471e133ab2SMartin Schwidefsky 	       unsigned int fault_flags)
6481e133ab2SMartin Schwidefsky {
6491e133ab2SMartin Schwidefsky 	unsigned long vmaddr;
6501e133ab2SMartin Schwidefsky 	int rc;
6511e133ab2SMartin Schwidefsky 	bool unlocked;
6521e133ab2SMartin Schwidefsky 
653d8ed45c5SMichel Lespinasse 	mmap_read_lock(gmap->mm);
6541e133ab2SMartin Schwidefsky 
6551e133ab2SMartin Schwidefsky retry:
6561e133ab2SMartin Schwidefsky 	unlocked = false;
6571e133ab2SMartin Schwidefsky 	vmaddr = __gmap_translate(gmap, gaddr);
6581e133ab2SMartin Schwidefsky 	if (IS_ERR_VALUE(vmaddr)) {
6591e133ab2SMartin Schwidefsky 		rc = vmaddr;
6601e133ab2SMartin Schwidefsky 		goto out_up;
6611e133ab2SMartin Schwidefsky 	}
66264019a2eSPeter Xu 	if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
6631e133ab2SMartin Schwidefsky 			     &unlocked)) {
6641e133ab2SMartin Schwidefsky 		rc = -EFAULT;
6651e133ab2SMartin Schwidefsky 		goto out_up;
6661e133ab2SMartin Schwidefsky 	}
6671e133ab2SMartin Schwidefsky 	/*
668c1e8d7c6SMichel Lespinasse 	 * In the case that fixup_user_fault unlocked the mmap_lock during
6691e133ab2SMartin Schwidefsky 	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
6701e133ab2SMartin Schwidefsky 	 */
6711e133ab2SMartin Schwidefsky 	if (unlocked)
6721e133ab2SMartin Schwidefsky 		goto retry;
6731e133ab2SMartin Schwidefsky 
6741e133ab2SMartin Schwidefsky 	rc = __gmap_link(gmap, gaddr, vmaddr);
6751e133ab2SMartin Schwidefsky out_up:
676d8ed45c5SMichel Lespinasse 	mmap_read_unlock(gmap->mm);
6771e133ab2SMartin Schwidefsky 	return rc;
6781e133ab2SMartin Schwidefsky }
6791e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_fault);
6801e133ab2SMartin Schwidefsky 
6811e133ab2SMartin Schwidefsky /*
682c1e8d7c6SMichel Lespinasse  * this function is assumed to be called with mmap_lock held
6831e133ab2SMartin Schwidefsky  */
__gmap_zap(struct gmap * gmap,unsigned long gaddr)6841e133ab2SMartin Schwidefsky void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
6851e133ab2SMartin Schwidefsky {
6862d8fb8f3SDavid Hildenbrand 	struct vm_area_struct *vma;
6871e133ab2SMartin Schwidefsky 	unsigned long vmaddr;
6881e133ab2SMartin Schwidefsky 	spinlock_t *ptl;
6891e133ab2SMartin Schwidefsky 	pte_t *ptep;
6901e133ab2SMartin Schwidefsky 
6911e133ab2SMartin Schwidefsky 	/* Find the vm address for the guest address */
6921e133ab2SMartin Schwidefsky 	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
6931e133ab2SMartin Schwidefsky 						   gaddr >> PMD_SHIFT);
6941e133ab2SMartin Schwidefsky 	if (vmaddr) {
6951e133ab2SMartin Schwidefsky 		vmaddr |= gaddr & ~PMD_MASK;
6962d8fb8f3SDavid Hildenbrand 
6972d8fb8f3SDavid Hildenbrand 		vma = vma_lookup(gmap->mm, vmaddr);
6982d8fb8f3SDavid Hildenbrand 		if (!vma || is_vm_hugetlb_page(vma))
6992d8fb8f3SDavid Hildenbrand 			return;
7002d8fb8f3SDavid Hildenbrand 
7011e133ab2SMartin Schwidefsky 		/* Get pointer to the page table entry */
7021e133ab2SMartin Schwidefsky 		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
703b159f94cSDavid Hildenbrand 		if (likely(ptep)) {
7041e133ab2SMartin Schwidefsky 			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
7051e133ab2SMartin Schwidefsky 			pte_unmap_unlock(ptep, ptl);
7061e133ab2SMartin Schwidefsky 		}
7071e133ab2SMartin Schwidefsky 	}
708b159f94cSDavid Hildenbrand }
7091e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(__gmap_zap);
7101e133ab2SMartin Schwidefsky 
gmap_discard(struct gmap * gmap,unsigned long from,unsigned long to)7111e133ab2SMartin Schwidefsky void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
7121e133ab2SMartin Schwidefsky {
7131e133ab2SMartin Schwidefsky 	unsigned long gaddr, vmaddr, size;
7141e133ab2SMartin Schwidefsky 	struct vm_area_struct *vma;
7151e133ab2SMartin Schwidefsky 
716d8ed45c5SMichel Lespinasse 	mmap_read_lock(gmap->mm);
7171e133ab2SMartin Schwidefsky 	for (gaddr = from; gaddr < to;
7181e133ab2SMartin Schwidefsky 	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
7191e133ab2SMartin Schwidefsky 		/* Find the vm address for the guest address */
7201e133ab2SMartin Schwidefsky 		vmaddr = (unsigned long)
7211e133ab2SMartin Schwidefsky 			radix_tree_lookup(&gmap->guest_to_host,
7221e133ab2SMartin Schwidefsky 					  gaddr >> PMD_SHIFT);
7231e133ab2SMartin Schwidefsky 		if (!vmaddr)
7241e133ab2SMartin Schwidefsky 			continue;
7251e133ab2SMartin Schwidefsky 		vmaddr |= gaddr & ~PMD_MASK;
7261e133ab2SMartin Schwidefsky 		/* Find vma in the parent mm */
7271e133ab2SMartin Schwidefsky 		vma = find_vma(gmap->mm, vmaddr);
7281843abd0SJanosch Frank 		if (!vma)
7291843abd0SJanosch Frank 			continue;
7307d735b9aSDominik Dingel 		/*
7317d735b9aSDominik Dingel 		 * We do not discard pages that are backed by
7327d735b9aSDominik Dingel 		 * hugetlbfs, so we don't have to refault them.
7337d735b9aSDominik Dingel 		 */
7341843abd0SJanosch Frank 		if (is_vm_hugetlb_page(vma))
7357d735b9aSDominik Dingel 			continue;
7361e133ab2SMartin Schwidefsky 		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
737e9adcfecSMike Kravetz 		zap_page_range_single(vma, vmaddr, size, NULL);
7381e133ab2SMartin Schwidefsky 	}
739d8ed45c5SMichel Lespinasse 	mmap_read_unlock(gmap->mm);
7401e133ab2SMartin Schwidefsky }
7411e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_discard);
7421e133ab2SMartin Schwidefsky 
7431e133ab2SMartin Schwidefsky static LIST_HEAD(gmap_notifier_list);
7441e133ab2SMartin Schwidefsky static DEFINE_SPINLOCK(gmap_notifier_lock);
7451e133ab2SMartin Schwidefsky 
7461e133ab2SMartin Schwidefsky /**
747b2d73b2aSMartin Schwidefsky  * gmap_register_pte_notifier - register a pte invalidation callback
7481e133ab2SMartin Schwidefsky  * @nb: pointer to the gmap notifier block
7491e133ab2SMartin Schwidefsky  */
gmap_register_pte_notifier(struct gmap_notifier * nb)750b2d73b2aSMartin Schwidefsky void gmap_register_pte_notifier(struct gmap_notifier *nb)
7511e133ab2SMartin Schwidefsky {
7521e133ab2SMartin Schwidefsky 	spin_lock(&gmap_notifier_lock);
7538ecb1a59SMartin Schwidefsky 	list_add_rcu(&nb->list, &gmap_notifier_list);
7541e133ab2SMartin Schwidefsky 	spin_unlock(&gmap_notifier_lock);
7551e133ab2SMartin Schwidefsky }
756b2d73b2aSMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
7571e133ab2SMartin Schwidefsky 
7581e133ab2SMartin Schwidefsky /**
759b2d73b2aSMartin Schwidefsky  * gmap_unregister_pte_notifier - remove a pte invalidation callback
7601e133ab2SMartin Schwidefsky  * @nb: pointer to the gmap notifier block
7611e133ab2SMartin Schwidefsky  */
gmap_unregister_pte_notifier(struct gmap_notifier * nb)762b2d73b2aSMartin Schwidefsky void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
7631e133ab2SMartin Schwidefsky {
7641e133ab2SMartin Schwidefsky 	spin_lock(&gmap_notifier_lock);
7658ecb1a59SMartin Schwidefsky 	list_del_rcu(&nb->list);
7661e133ab2SMartin Schwidefsky 	spin_unlock(&gmap_notifier_lock);
7678ecb1a59SMartin Schwidefsky 	synchronize_rcu();
7681e133ab2SMartin Schwidefsky }
769b2d73b2aSMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
7701e133ab2SMartin Schwidefsky 
7711e133ab2SMartin Schwidefsky /**
772414d3b07SMartin Schwidefsky  * gmap_call_notifier - call all registered invalidation callbacks
773414d3b07SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
774414d3b07SMartin Schwidefsky  * @start: start virtual address in the guest address space
775414d3b07SMartin Schwidefsky  * @end: end virtual address in the guest address space
776414d3b07SMartin Schwidefsky  */
gmap_call_notifier(struct gmap * gmap,unsigned long start,unsigned long end)777414d3b07SMartin Schwidefsky static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
778414d3b07SMartin Schwidefsky 			       unsigned long end)
779414d3b07SMartin Schwidefsky {
780414d3b07SMartin Schwidefsky 	struct gmap_notifier *nb;
781414d3b07SMartin Schwidefsky 
782414d3b07SMartin Schwidefsky 	list_for_each_entry(nb, &gmap_notifier_list, list)
783414d3b07SMartin Schwidefsky 		nb->notifier_call(gmap, start, end);
784414d3b07SMartin Schwidefsky }
785414d3b07SMartin Schwidefsky 
786414d3b07SMartin Schwidefsky /**
787b2d73b2aSMartin Schwidefsky  * gmap_table_walk - walk the gmap page tables
788b2d73b2aSMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
789b2d73b2aSMartin Schwidefsky  * @gaddr: virtual address in the guest address space
7904be130a0SMartin Schwidefsky  * @level: page table level to stop at
791b2d73b2aSMartin Schwidefsky  *
7924be130a0SMartin Schwidefsky  * Returns a table entry pointer for the given guest address and @level
7934be130a0SMartin Schwidefsky  * @level=0 : returns a pointer to a page table table entry (or NULL)
7944be130a0SMartin Schwidefsky  * @level=1 : returns a pointer to a segment table entry (or NULL)
7954be130a0SMartin Schwidefsky  * @level=2 : returns a pointer to a region-3 table entry (or NULL)
7964be130a0SMartin Schwidefsky  * @level=3 : returns a pointer to a region-2 table entry (or NULL)
7974be130a0SMartin Schwidefsky  * @level=4 : returns a pointer to a region-1 table entry (or NULL)
7984be130a0SMartin Schwidefsky  *
7994be130a0SMartin Schwidefsky  * Returns NULL if the gmap page tables could not be walked to the
8004be130a0SMartin Schwidefsky  * requested level.
8014be130a0SMartin Schwidefsky  *
8024be130a0SMartin Schwidefsky  * Note: Can also be called for shadow gmaps.
803b2d73b2aSMartin Schwidefsky  */
gmap_table_walk(struct gmap * gmap,unsigned long gaddr,int level)804b2d73b2aSMartin Schwidefsky static inline unsigned long *gmap_table_walk(struct gmap *gmap,
8054be130a0SMartin Schwidefsky 					     unsigned long gaddr, int level)
806b2d73b2aSMartin Schwidefsky {
807a1d032a4SDavid Hildenbrand 	const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
80862cf666eSDavid Hildenbrand 	unsigned long *table = gmap->table;
809b2d73b2aSMartin Schwidefsky 
8104be130a0SMartin Schwidefsky 	if (gmap_is_shadow(gmap) && gmap->removed)
8114be130a0SMartin Schwidefsky 		return NULL;
812a1d032a4SDavid Hildenbrand 
81362cf666eSDavid Hildenbrand 	if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
81462cf666eSDavid Hildenbrand 		return NULL;
81562cf666eSDavid Hildenbrand 
816a1d032a4SDavid Hildenbrand 	if (asce_type != _ASCE_TYPE_REGION1 &&
817a1d032a4SDavid Hildenbrand 	    gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
8184be130a0SMartin Schwidefsky 		return NULL;
819a1d032a4SDavid Hildenbrand 
82062cf666eSDavid Hildenbrand 	switch (asce_type) {
821b2d73b2aSMartin Schwidefsky 	case _ASCE_TYPE_REGION1:
822f1c1174fSHeiko Carstens 		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
8234be130a0SMartin Schwidefsky 		if (level == 4)
8244be130a0SMartin Schwidefsky 			break;
825b2d73b2aSMartin Schwidefsky 		if (*table & _REGION_ENTRY_INVALID)
826b2d73b2aSMartin Schwidefsky 			return NULL;
827079f0c21SNico Boehr 		table = __va(*table & _REGION_ENTRY_ORIGIN);
8283b684a42SJoe Perches 		fallthrough;
829b2d73b2aSMartin Schwidefsky 	case _ASCE_TYPE_REGION2:
830f1c1174fSHeiko Carstens 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
8314be130a0SMartin Schwidefsky 		if (level == 3)
8324be130a0SMartin Schwidefsky 			break;
833b2d73b2aSMartin Schwidefsky 		if (*table & _REGION_ENTRY_INVALID)
834b2d73b2aSMartin Schwidefsky 			return NULL;
835079f0c21SNico Boehr 		table = __va(*table & _REGION_ENTRY_ORIGIN);
8363b684a42SJoe Perches 		fallthrough;
837b2d73b2aSMartin Schwidefsky 	case _ASCE_TYPE_REGION3:
838f1c1174fSHeiko Carstens 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
8394be130a0SMartin Schwidefsky 		if (level == 2)
8404be130a0SMartin Schwidefsky 			break;
841b2d73b2aSMartin Schwidefsky 		if (*table & _REGION_ENTRY_INVALID)
842b2d73b2aSMartin Schwidefsky 			return NULL;
843079f0c21SNico Boehr 		table = __va(*table & _REGION_ENTRY_ORIGIN);
8443b684a42SJoe Perches 		fallthrough;
845b2d73b2aSMartin Schwidefsky 	case _ASCE_TYPE_SEGMENT:
846f1c1174fSHeiko Carstens 		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
8474be130a0SMartin Schwidefsky 		if (level == 1)
8484be130a0SMartin Schwidefsky 			break;
8494be130a0SMartin Schwidefsky 		if (*table & _REGION_ENTRY_INVALID)
8504be130a0SMartin Schwidefsky 			return NULL;
851079f0c21SNico Boehr 		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
852f1c1174fSHeiko Carstens 		table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
853b2d73b2aSMartin Schwidefsky 	}
854b2d73b2aSMartin Schwidefsky 	return table;
855b2d73b2aSMartin Schwidefsky }
856b2d73b2aSMartin Schwidefsky 
857b2d73b2aSMartin Schwidefsky /**
858b2d73b2aSMartin Schwidefsky  * gmap_pte_op_walk - walk the gmap page table, get the page table lock
859b2d73b2aSMartin Schwidefsky  *		      and return the pte pointer
860b2d73b2aSMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
861b2d73b2aSMartin Schwidefsky  * @gaddr: virtual address in the guest address space
862b2d73b2aSMartin Schwidefsky  * @ptl: pointer to the spinlock pointer
863b2d73b2aSMartin Schwidefsky  *
864b2d73b2aSMartin Schwidefsky  * Returns a pointer to the locked pte for a guest address, or NULL
865b2d73b2aSMartin Schwidefsky  */
gmap_pte_op_walk(struct gmap * gmap,unsigned long gaddr,spinlock_t ** ptl)866b2d73b2aSMartin Schwidefsky static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
867b2d73b2aSMartin Schwidefsky 			       spinlock_t **ptl)
868b2d73b2aSMartin Schwidefsky {
869b2d73b2aSMartin Schwidefsky 	unsigned long *table;
870b2d73b2aSMartin Schwidefsky 
87196965941SDavid Hildenbrand 	BUG_ON(gmap_is_shadow(gmap));
872b2d73b2aSMartin Schwidefsky 	/* Walk the gmap page table, lock and get pte pointer */
8734be130a0SMartin Schwidefsky 	table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
87496965941SDavid Hildenbrand 	if (!table || *table & _SEGMENT_ENTRY_INVALID)
875b2d73b2aSMartin Schwidefsky 		return NULL;
876b2d73b2aSMartin Schwidefsky 	return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
877b2d73b2aSMartin Schwidefsky }
878b2d73b2aSMartin Schwidefsky 
879b2d73b2aSMartin Schwidefsky /**
880b2d73b2aSMartin Schwidefsky  * gmap_pte_op_fixup - force a page in and connect the gmap page table
881b2d73b2aSMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
882b2d73b2aSMartin Schwidefsky  * @gaddr: virtual address in the guest address space
883b2d73b2aSMartin Schwidefsky  * @vmaddr: address in the host process address space
88401f71917SDavid Hildenbrand  * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
885b2d73b2aSMartin Schwidefsky  *
886b2d73b2aSMartin Schwidefsky  * Returns 0 if the caller can retry __gmap_translate (might fail again),
887b2d73b2aSMartin Schwidefsky  * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
888b2d73b2aSMartin Schwidefsky  * up or connecting the gmap page table.
889b2d73b2aSMartin Schwidefsky  */
gmap_pte_op_fixup(struct gmap * gmap,unsigned long gaddr,unsigned long vmaddr,int prot)890b2d73b2aSMartin Schwidefsky static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
89101f71917SDavid Hildenbrand 			     unsigned long vmaddr, int prot)
892b2d73b2aSMartin Schwidefsky {
893b2d73b2aSMartin Schwidefsky 	struct mm_struct *mm = gmap->mm;
89401f71917SDavid Hildenbrand 	unsigned int fault_flags;
895b2d73b2aSMartin Schwidefsky 	bool unlocked = false;
896b2d73b2aSMartin Schwidefsky 
8974be130a0SMartin Schwidefsky 	BUG_ON(gmap_is_shadow(gmap));
89801f71917SDavid Hildenbrand 	fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
89964019a2eSPeter Xu 	if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
900b2d73b2aSMartin Schwidefsky 		return -EFAULT;
901b2d73b2aSMartin Schwidefsky 	if (unlocked)
902c1e8d7c6SMichel Lespinasse 		/* lost mmap_lock, caller has to retry __gmap_translate */
903b2d73b2aSMartin Schwidefsky 		return 0;
904b2d73b2aSMartin Schwidefsky 	/* Connect the page tables */
905b2d73b2aSMartin Schwidefsky 	return __gmap_link(gmap, gaddr, vmaddr);
906b2d73b2aSMartin Schwidefsky }
907b2d73b2aSMartin Schwidefsky 
908b2d73b2aSMartin Schwidefsky /**
909b2d73b2aSMartin Schwidefsky  * gmap_pte_op_end - release the page table lock
910b2f58941SHugh Dickins  * @ptep: pointer to the locked pte
911b2f58941SHugh Dickins  * @ptl: pointer to the page table spinlock
912b2d73b2aSMartin Schwidefsky  */
gmap_pte_op_end(pte_t * ptep,spinlock_t * ptl)913b2f58941SHugh Dickins static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
914b2d73b2aSMartin Schwidefsky {
915b2f58941SHugh Dickins 	pte_unmap_unlock(ptep, ptl);
916b2d73b2aSMartin Schwidefsky }
917b2d73b2aSMartin Schwidefsky 
9185a045bb9SJanosch Frank /**
9195a045bb9SJanosch Frank  * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
9205a045bb9SJanosch Frank  *		      and return the pmd pointer
9215a045bb9SJanosch Frank  * @gmap: pointer to guest mapping meta data structure
9225a045bb9SJanosch Frank  * @gaddr: virtual address in the guest address space
9235a045bb9SJanosch Frank  *
9245a045bb9SJanosch Frank  * Returns a pointer to the pmd for a guest address, or NULL
9255a045bb9SJanosch Frank  */
gmap_pmd_op_walk(struct gmap * gmap,unsigned long gaddr)9265a045bb9SJanosch Frank static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
9275a045bb9SJanosch Frank {
9285a045bb9SJanosch Frank 	pmd_t *pmdp;
9295a045bb9SJanosch Frank 
9305a045bb9SJanosch Frank 	BUG_ON(gmap_is_shadow(gmap));
9315a045bb9SJanosch Frank 	pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
932af4bf6c3SDavid Hildenbrand 	if (!pmdp)
933af4bf6c3SDavid Hildenbrand 		return NULL;
9345a045bb9SJanosch Frank 
935af4bf6c3SDavid Hildenbrand 	/* without huge pages, there is no need to take the table lock */
936af4bf6c3SDavid Hildenbrand 	if (!gmap->mm->context.allow_gmap_hpage_1m)
937af4bf6c3SDavid Hildenbrand 		return pmd_none(*pmdp) ? NULL : pmdp;
938af4bf6c3SDavid Hildenbrand 
939af4bf6c3SDavid Hildenbrand 	spin_lock(&gmap->guest_table_lock);
940af4bf6c3SDavid Hildenbrand 	if (pmd_none(*pmdp)) {
9415a045bb9SJanosch Frank 		spin_unlock(&gmap->guest_table_lock);
9425a045bb9SJanosch Frank 		return NULL;
9435a045bb9SJanosch Frank 	}
9445a045bb9SJanosch Frank 
9455a045bb9SJanosch Frank 	/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
9465a045bb9SJanosch Frank 	if (!pmd_large(*pmdp))
9475a045bb9SJanosch Frank 		spin_unlock(&gmap->guest_table_lock);
9485a045bb9SJanosch Frank 	return pmdp;
9495a045bb9SJanosch Frank }
9505a045bb9SJanosch Frank 
9515a045bb9SJanosch Frank /**
9525a045bb9SJanosch Frank  * gmap_pmd_op_end - release the guest_table_lock if needed
9535a045bb9SJanosch Frank  * @gmap: pointer to the guest mapping meta data structure
9545a045bb9SJanosch Frank  * @pmdp: pointer to the pmd
9555a045bb9SJanosch Frank  */
gmap_pmd_op_end(struct gmap * gmap,pmd_t * pmdp)9565a045bb9SJanosch Frank static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
9575a045bb9SJanosch Frank {
9585a045bb9SJanosch Frank 	if (pmd_large(*pmdp))
9595a045bb9SJanosch Frank 		spin_unlock(&gmap->guest_table_lock);
9605a045bb9SJanosch Frank }
9615a045bb9SJanosch Frank 
9625a045bb9SJanosch Frank /*
9637c4b13a7SJanosch Frank  * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
9647c4b13a7SJanosch Frank  * @pmdp: pointer to the pmd to be protected
9657c4b13a7SJanosch Frank  * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
9667c4b13a7SJanosch Frank  * @bits: notification bits to set
9677c4b13a7SJanosch Frank  *
9687c4b13a7SJanosch Frank  * Returns:
9697c4b13a7SJanosch Frank  * 0 if successfully protected
9707c4b13a7SJanosch Frank  * -EAGAIN if a fixup is needed
9717c4b13a7SJanosch Frank  * -EINVAL if unsupported notifier bits have been specified
9727c4b13a7SJanosch Frank  *
973c1e8d7c6SMichel Lespinasse  * Expected to be called with sg->mm->mmap_lock in read and
9747c4b13a7SJanosch Frank  * guest_table_lock held.
9757c4b13a7SJanosch Frank  */
gmap_protect_pmd(struct gmap * gmap,unsigned long gaddr,pmd_t * pmdp,int prot,unsigned long bits)9767c4b13a7SJanosch Frank static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
9777c4b13a7SJanosch Frank 			    pmd_t *pmdp, int prot, unsigned long bits)
9787c4b13a7SJanosch Frank {
9797c4b13a7SJanosch Frank 	int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
9807c4b13a7SJanosch Frank 	int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
9810959e168SJanosch Frank 	pmd_t new = *pmdp;
9827c4b13a7SJanosch Frank 
9837c4b13a7SJanosch Frank 	/* Fixup needed */
9847c4b13a7SJanosch Frank 	if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
9857c4b13a7SJanosch Frank 		return -EAGAIN;
9867c4b13a7SJanosch Frank 
9870959e168SJanosch Frank 	if (prot == PROT_NONE && !pmd_i) {
988e1fc74ffSHeiko Carstens 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
9890959e168SJanosch Frank 		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
9900959e168SJanosch Frank 	}
9910959e168SJanosch Frank 
9920959e168SJanosch Frank 	if (prot == PROT_READ && !pmd_p) {
993e1fc74ffSHeiko Carstens 		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
994e1fc74ffSHeiko Carstens 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));
9950959e168SJanosch Frank 		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
9960959e168SJanosch Frank 	}
9970959e168SJanosch Frank 
9987c4b13a7SJanosch Frank 	if (bits & GMAP_NOTIFY_MPROT)
999b8e3b379SHeiko Carstens 		set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
10007c4b13a7SJanosch Frank 
10017c4b13a7SJanosch Frank 	/* Shadow GMAP protection needs split PMDs */
10027c4b13a7SJanosch Frank 	if (bits & GMAP_NOTIFY_SHADOW)
10037c4b13a7SJanosch Frank 		return -EINVAL;
10047c4b13a7SJanosch Frank 
10057c4b13a7SJanosch Frank 	return 0;
10067c4b13a7SJanosch Frank }
10077c4b13a7SJanosch Frank 
10087c4b13a7SJanosch Frank /*
10095a045bb9SJanosch Frank  * gmap_protect_pte - remove access rights to memory and set pgste bits
10105a045bb9SJanosch Frank  * @gmap: pointer to guest mapping meta data structure
10115a045bb9SJanosch Frank  * @gaddr: virtual address in the guest address space
10125a045bb9SJanosch Frank  * @pmdp: pointer to the pmd associated with the pte
10135a045bb9SJanosch Frank  * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
10142c46e974SJanosch Frank  * @bits: notification bits to set
10155a045bb9SJanosch Frank  *
10165a045bb9SJanosch Frank  * Returns 0 if successfully protected, -ENOMEM if out of memory and
10175a045bb9SJanosch Frank  * -EAGAIN if a fixup is needed.
10185a045bb9SJanosch Frank  *
1019c1e8d7c6SMichel Lespinasse  * Expected to be called with sg->mm->mmap_lock in read
10205a045bb9SJanosch Frank  */
gmap_protect_pte(struct gmap * gmap,unsigned long gaddr,pmd_t * pmdp,int prot,unsigned long bits)10215a045bb9SJanosch Frank static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
10225a045bb9SJanosch Frank 			    pmd_t *pmdp, int prot, unsigned long bits)
10235a045bb9SJanosch Frank {
10245a045bb9SJanosch Frank 	int rc;
10255a045bb9SJanosch Frank 	pte_t *ptep;
1026b2f58941SHugh Dickins 	spinlock_t *ptl;
10272c46e974SJanosch Frank 	unsigned long pbits = 0;
10285a045bb9SJanosch Frank 
10295a045bb9SJanosch Frank 	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
10305a045bb9SJanosch Frank 		return -EAGAIN;
10315a045bb9SJanosch Frank 
10325a045bb9SJanosch Frank 	ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
10335a045bb9SJanosch Frank 	if (!ptep)
10345a045bb9SJanosch Frank 		return -ENOMEM;
10355a045bb9SJanosch Frank 
10362c46e974SJanosch Frank 	pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
10372c46e974SJanosch Frank 	pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
10385a045bb9SJanosch Frank 	/* Protect and unlock. */
10392c46e974SJanosch Frank 	rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
1040b2f58941SHugh Dickins 	gmap_pte_op_end(ptep, ptl);
10415a045bb9SJanosch Frank 	return rc;
10425a045bb9SJanosch Frank }
10435a045bb9SJanosch Frank 
10444be130a0SMartin Schwidefsky /*
10454be130a0SMartin Schwidefsky  * gmap_protect_range - remove access rights to memory and set pgste bits
10461e133ab2SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
10471e133ab2SMartin Schwidefsky  * @gaddr: virtual address in the guest address space
10481e133ab2SMartin Schwidefsky  * @len: size of area
10494be130a0SMartin Schwidefsky  * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
10504be130a0SMartin Schwidefsky  * @bits: pgste notification bits to set
10511e133ab2SMartin Schwidefsky  *
10524be130a0SMartin Schwidefsky  * Returns 0 if successfully protected, -ENOMEM if out of memory and
10534be130a0SMartin Schwidefsky  * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
10544be130a0SMartin Schwidefsky  *
1055c1e8d7c6SMichel Lespinasse  * Called with sg->mm->mmap_lock in read.
10561e133ab2SMartin Schwidefsky  */
gmap_protect_range(struct gmap * gmap,unsigned long gaddr,unsigned long len,int prot,unsigned long bits)10574be130a0SMartin Schwidefsky static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
10584be130a0SMartin Schwidefsky 			      unsigned long len, int prot, unsigned long bits)
10591e133ab2SMartin Schwidefsky {
10607c4b13a7SJanosch Frank 	unsigned long vmaddr, dist;
10615a045bb9SJanosch Frank 	pmd_t *pmdp;
10624be130a0SMartin Schwidefsky 	int rc;
10631e133ab2SMartin Schwidefsky 
106496965941SDavid Hildenbrand 	BUG_ON(gmap_is_shadow(gmap));
10651e133ab2SMartin Schwidefsky 	while (len) {
10664be130a0SMartin Schwidefsky 		rc = -EAGAIN;
10675a045bb9SJanosch Frank 		pmdp = gmap_pmd_op_walk(gmap, gaddr);
10685a045bb9SJanosch Frank 		if (pmdp) {
10697c4b13a7SJanosch Frank 			if (!pmd_large(*pmdp)) {
10705a045bb9SJanosch Frank 				rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
10715a045bb9SJanosch Frank 						      bits);
10725a045bb9SJanosch Frank 				if (!rc) {
10735a045bb9SJanosch Frank 					len -= PAGE_SIZE;
10745a045bb9SJanosch Frank 					gaddr += PAGE_SIZE;
10755a045bb9SJanosch Frank 				}
10767c4b13a7SJanosch Frank 			} else {
10777c4b13a7SJanosch Frank 				rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
10787c4b13a7SJanosch Frank 						      bits);
10797c4b13a7SJanosch Frank 				if (!rc) {
10807c4b13a7SJanosch Frank 					dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
10817c4b13a7SJanosch Frank 					len = len < dist ? 0 : len - dist;
10827c4b13a7SJanosch Frank 					gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
10837c4b13a7SJanosch Frank 				}
10847c4b13a7SJanosch Frank 			}
10855a045bb9SJanosch Frank 			gmap_pmd_op_end(gmap, pmdp);
10861e133ab2SMartin Schwidefsky 		}
10874be130a0SMartin Schwidefsky 		if (rc) {
10887c4b13a7SJanosch Frank 			if (rc == -EINVAL)
10897c4b13a7SJanosch Frank 				return rc;
10907c4b13a7SJanosch Frank 
10917c4b13a7SJanosch Frank 			/* -EAGAIN, fixup of userspace mm and gmap */
10924be130a0SMartin Schwidefsky 			vmaddr = __gmap_translate(gmap, gaddr);
10934be130a0SMartin Schwidefsky 			if (IS_ERR_VALUE(vmaddr))
10944be130a0SMartin Schwidefsky 				return vmaddr;
109501f71917SDavid Hildenbrand 			rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
10961e133ab2SMartin Schwidefsky 			if (rc)
10974be130a0SMartin Schwidefsky 				return rc;
10984be130a0SMartin Schwidefsky 		}
10991e133ab2SMartin Schwidefsky 	}
11004be130a0SMartin Schwidefsky 	return 0;
11011e133ab2SMartin Schwidefsky }
11024be130a0SMartin Schwidefsky 
1103b2d73b2aSMartin Schwidefsky /**
1104b2d73b2aSMartin Schwidefsky  * gmap_mprotect_notify - change access rights for a range of ptes and
1105b2d73b2aSMartin Schwidefsky  *                        call the notifier if any pte changes again
11061e133ab2SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
11071e133ab2SMartin Schwidefsky  * @gaddr: virtual address in the guest address space
11081e133ab2SMartin Schwidefsky  * @len: size of area
1109b2d73b2aSMartin Schwidefsky  * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
11101e133ab2SMartin Schwidefsky  *
1111b2d73b2aSMartin Schwidefsky  * Returns 0 if for each page in the given range a gmap mapping exists,
1112b2d73b2aSMartin Schwidefsky  * the new access rights could be set and the notifier could be armed.
1113b2d73b2aSMartin Schwidefsky  * If the gmap mapping is missing for one or more pages -EFAULT is
1114b2d73b2aSMartin Schwidefsky  * returned. If no memory could be allocated -ENOMEM is returned.
1115b2d73b2aSMartin Schwidefsky  * This function establishes missing page table entries.
11161e133ab2SMartin Schwidefsky  */
gmap_mprotect_notify(struct gmap * gmap,unsigned long gaddr,unsigned long len,int prot)1117b2d73b2aSMartin Schwidefsky int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
1118b2d73b2aSMartin Schwidefsky 			 unsigned long len, int prot)
11191e133ab2SMartin Schwidefsky {
11204be130a0SMartin Schwidefsky 	int rc;
11211e133ab2SMartin Schwidefsky 
11224be130a0SMartin Schwidefsky 	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
11231e133ab2SMartin Schwidefsky 		return -EINVAL;
1124b2d73b2aSMartin Schwidefsky 	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
1125b2d73b2aSMartin Schwidefsky 		return -EINVAL;
1126d8ed45c5SMichel Lespinasse 	mmap_read_lock(gmap->mm);
11272c46e974SJanosch Frank 	rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
1128d8ed45c5SMichel Lespinasse 	mmap_read_unlock(gmap->mm);
11291e133ab2SMartin Schwidefsky 	return rc;
11301e133ab2SMartin Schwidefsky }
11314be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
11324be130a0SMartin Schwidefsky 
11334be130a0SMartin Schwidefsky /**
11344be130a0SMartin Schwidefsky  * gmap_read_table - get an unsigned long value from a guest page table using
11354be130a0SMartin Schwidefsky  *                   absolute addressing, without marking the page referenced.
11364be130a0SMartin Schwidefsky  * @gmap: pointer to guest mapping meta data structure
11374be130a0SMartin Schwidefsky  * @gaddr: virtual address in the guest address space
11384be130a0SMartin Schwidefsky  * @val: pointer to the unsigned long value to return
11394be130a0SMartin Schwidefsky  *
11404be130a0SMartin Schwidefsky  * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
114196965941SDavid Hildenbrand  * if reading using the virtual address failed. -EINVAL if called on a gmap
114296965941SDavid Hildenbrand  * shadow.
11434be130a0SMartin Schwidefsky  *
1144c1e8d7c6SMichel Lespinasse  * Called with gmap->mm->mmap_lock in read.
11454be130a0SMartin Schwidefsky  */
gmap_read_table(struct gmap * gmap,unsigned long gaddr,unsigned long * val)11464be130a0SMartin Schwidefsky int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
11474be130a0SMartin Schwidefsky {
11484be130a0SMartin Schwidefsky 	unsigned long address, vmaddr;
11494be130a0SMartin Schwidefsky 	spinlock_t *ptl;
11504be130a0SMartin Schwidefsky 	pte_t *ptep, pte;
11514be130a0SMartin Schwidefsky 	int rc;
11524be130a0SMartin Schwidefsky 
115396965941SDavid Hildenbrand 	if (gmap_is_shadow(gmap))
115496965941SDavid Hildenbrand 		return -EINVAL;
115596965941SDavid Hildenbrand 
11564be130a0SMartin Schwidefsky 	while (1) {
1157b2d73b2aSMartin Schwidefsky 		rc = -EAGAIN;
1158b2d73b2aSMartin Schwidefsky 		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
1159b2d73b2aSMartin Schwidefsky 		if (ptep) {
11604be130a0SMartin Schwidefsky 			pte = *ptep;
11614be130a0SMartin Schwidefsky 			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
11624be130a0SMartin Schwidefsky 				address = pte_val(pte) & PAGE_MASK;
11634be130a0SMartin Schwidefsky 				address += gaddr & ~PAGE_MASK;
1164079f0c21SNico Boehr 				*val = *(unsigned long *)__va(address);
1165b8e3b379SHeiko Carstens 				set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
11664be130a0SMartin Schwidefsky 				/* Do *NOT* clear the _PAGE_INVALID bit! */
11674be130a0SMartin Schwidefsky 				rc = 0;
11684be130a0SMartin Schwidefsky 			}
1169b2f58941SHugh Dickins 			gmap_pte_op_end(ptep, ptl);
1170b2d73b2aSMartin Schwidefsky 		}
11714be130a0SMartin Schwidefsky 		if (!rc)
11724be130a0SMartin Schwidefsky 			break;
1173b2d73b2aSMartin Schwidefsky 		vmaddr = __gmap_translate(gmap, gaddr);
1174b2d73b2aSMartin Schwidefsky 		if (IS_ERR_VALUE(vmaddr)) {
1175b2d73b2aSMartin Schwidefsky 			rc = vmaddr;
11761e133ab2SMartin Schwidefsky 			break;
11771e133ab2SMartin Schwidefsky 		}
117801f71917SDavid Hildenbrand 		rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
11791e133ab2SMartin Schwidefsky 		if (rc)
11801e133ab2SMartin Schwidefsky 			break;
1181b2d73b2aSMartin Schwidefsky 	}
11821e133ab2SMartin Schwidefsky 	return rc;
11831e133ab2SMartin Schwidefsky }
11844be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_read_table);
11854be130a0SMartin Schwidefsky 
11864be130a0SMartin Schwidefsky /**
11874be130a0SMartin Schwidefsky  * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
11884be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
11894be130a0SMartin Schwidefsky  * @vmaddr: vm address associated with the rmap
11904be130a0SMartin Schwidefsky  * @rmap: pointer to the rmap structure
11914be130a0SMartin Schwidefsky  *
11924be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
11934be130a0SMartin Schwidefsky  */
gmap_insert_rmap(struct gmap * sg,unsigned long vmaddr,struct gmap_rmap * rmap)11944be130a0SMartin Schwidefsky static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
11954be130a0SMartin Schwidefsky 				    struct gmap_rmap *rmap)
11964be130a0SMartin Schwidefsky {
1197a06afe83SChristian Borntraeger 	struct gmap_rmap *temp;
1198d12a3d60SHeiko Carstens 	void __rcu **slot;
11994be130a0SMartin Schwidefsky 
12004be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
12014be130a0SMartin Schwidefsky 	slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
12024be130a0SMartin Schwidefsky 	if (slot) {
12034be130a0SMartin Schwidefsky 		rmap->next = radix_tree_deref_slot_protected(slot,
12044be130a0SMartin Schwidefsky 							&sg->guest_table_lock);
1205a06afe83SChristian Borntraeger 		for (temp = rmap->next; temp; temp = temp->next) {
1206a06afe83SChristian Borntraeger 			if (temp->raddr == rmap->raddr) {
1207a06afe83SChristian Borntraeger 				kfree(rmap);
1208a06afe83SChristian Borntraeger 				return;
1209a06afe83SChristian Borntraeger 			}
1210a06afe83SChristian Borntraeger 		}
12116d75f366SJohannes Weiner 		radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
12124be130a0SMartin Schwidefsky 	} else {
12134be130a0SMartin Schwidefsky 		rmap->next = NULL;
12144be130a0SMartin Schwidefsky 		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
12154be130a0SMartin Schwidefsky 				  rmap);
12164be130a0SMartin Schwidefsky 	}
12174be130a0SMartin Schwidefsky }
12184be130a0SMartin Schwidefsky 
12194be130a0SMartin Schwidefsky /**
12205c528db0SDavid Hildenbrand  * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
12214be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
12224be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow gmap
12234be130a0SMartin Schwidefsky  * @paddr: address in the parent guest address space
12244be130a0SMartin Schwidefsky  * @len: length of the memory area to protect
12254be130a0SMartin Schwidefsky  *
12264be130a0SMartin Schwidefsky  * Returns 0 if successfully protected and the rmap was created, -ENOMEM
12274be130a0SMartin Schwidefsky  * if out of memory and -EFAULT if paddr is invalid.
12284be130a0SMartin Schwidefsky  */
gmap_protect_rmap(struct gmap * sg,unsigned long raddr,unsigned long paddr,unsigned long len)12294be130a0SMartin Schwidefsky static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
12305c528db0SDavid Hildenbrand 			     unsigned long paddr, unsigned long len)
12314be130a0SMartin Schwidefsky {
12324be130a0SMartin Schwidefsky 	struct gmap *parent;
12334be130a0SMartin Schwidefsky 	struct gmap_rmap *rmap;
12344be130a0SMartin Schwidefsky 	unsigned long vmaddr;
12354be130a0SMartin Schwidefsky 	spinlock_t *ptl;
12364be130a0SMartin Schwidefsky 	pte_t *ptep;
12374be130a0SMartin Schwidefsky 	int rc;
12384be130a0SMartin Schwidefsky 
12394be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
12404be130a0SMartin Schwidefsky 	parent = sg->parent;
12414be130a0SMartin Schwidefsky 	while (len) {
12424be130a0SMartin Schwidefsky 		vmaddr = __gmap_translate(parent, paddr);
12434be130a0SMartin Schwidefsky 		if (IS_ERR_VALUE(vmaddr))
12444be130a0SMartin Schwidefsky 			return vmaddr;
12450cd2a787SChristian Borntraeger 		rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
12464be130a0SMartin Schwidefsky 		if (!rmap)
12474be130a0SMartin Schwidefsky 			return -ENOMEM;
12484be130a0SMartin Schwidefsky 		rmap->raddr = raddr;
12490cd2a787SChristian Borntraeger 		rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
12504be130a0SMartin Schwidefsky 		if (rc) {
12514be130a0SMartin Schwidefsky 			kfree(rmap);
12524be130a0SMartin Schwidefsky 			return rc;
12534be130a0SMartin Schwidefsky 		}
12544be130a0SMartin Schwidefsky 		rc = -EAGAIN;
12554be130a0SMartin Schwidefsky 		ptep = gmap_pte_op_walk(parent, paddr, &ptl);
12564be130a0SMartin Schwidefsky 		if (ptep) {
12574be130a0SMartin Schwidefsky 			spin_lock(&sg->guest_table_lock);
12585c528db0SDavid Hildenbrand 			rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
12594be130a0SMartin Schwidefsky 					     PGSTE_VSIE_BIT);
12604be130a0SMartin Schwidefsky 			if (!rc)
12614be130a0SMartin Schwidefsky 				gmap_insert_rmap(sg, vmaddr, rmap);
12624be130a0SMartin Schwidefsky 			spin_unlock(&sg->guest_table_lock);
1263b2f58941SHugh Dickins 			gmap_pte_op_end(ptep, ptl);
12644be130a0SMartin Schwidefsky 		}
12654be130a0SMartin Schwidefsky 		radix_tree_preload_end();
12664be130a0SMartin Schwidefsky 		if (rc) {
12674be130a0SMartin Schwidefsky 			kfree(rmap);
12685c528db0SDavid Hildenbrand 			rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
12694be130a0SMartin Schwidefsky 			if (rc)
12704be130a0SMartin Schwidefsky 				return rc;
12714be130a0SMartin Schwidefsky 			continue;
12724be130a0SMartin Schwidefsky 		}
12734be130a0SMartin Schwidefsky 		paddr += PAGE_SIZE;
12744be130a0SMartin Schwidefsky 		len -= PAGE_SIZE;
12754be130a0SMartin Schwidefsky 	}
12764be130a0SMartin Schwidefsky 	return 0;
12774be130a0SMartin Schwidefsky }
12784be130a0SMartin Schwidefsky 
12794be130a0SMartin Schwidefsky #define _SHADOW_RMAP_MASK	0x7
12804be130a0SMartin Schwidefsky #define _SHADOW_RMAP_REGION1	0x5
12814be130a0SMartin Schwidefsky #define _SHADOW_RMAP_REGION2	0x4
12824be130a0SMartin Schwidefsky #define _SHADOW_RMAP_REGION3	0x3
12834be130a0SMartin Schwidefsky #define _SHADOW_RMAP_SEGMENT	0x2
12844be130a0SMartin Schwidefsky #define _SHADOW_RMAP_PGTABLE	0x1
12854be130a0SMartin Schwidefsky 
12864be130a0SMartin Schwidefsky /**
12874be130a0SMartin Schwidefsky  * gmap_idte_one - invalidate a single region or segment table entry
12884be130a0SMartin Schwidefsky  * @asce: region or segment table *origin* + table-type bits
12894be130a0SMartin Schwidefsky  * @vaddr: virtual address to identify the table entry to flush
12904be130a0SMartin Schwidefsky  *
12914be130a0SMartin Schwidefsky  * The invalid bit of a single region or segment table entry is set
12924be130a0SMartin Schwidefsky  * and the associated TLB entries depending on the entry are flushed.
12934be130a0SMartin Schwidefsky  * The table-type of the @asce identifies the portion of the @vaddr
12944be130a0SMartin Schwidefsky  * that is used as the invalidation index.
12954be130a0SMartin Schwidefsky  */
gmap_idte_one(unsigned long asce,unsigned long vaddr)12964be130a0SMartin Schwidefsky static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
12974be130a0SMartin Schwidefsky {
12984be130a0SMartin Schwidefsky 	asm volatile(
1299731efc96SVasily Gorbik 		"	idte	%0,0,%1"
13004be130a0SMartin Schwidefsky 		: : "a" (asce), "a" (vaddr) : "cc", "memory");
13014be130a0SMartin Schwidefsky }
13024be130a0SMartin Schwidefsky 
13034be130a0SMartin Schwidefsky /**
13044be130a0SMartin Schwidefsky  * gmap_unshadow_page - remove a page from a shadow page table
13054be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
13064be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
13074be130a0SMartin Schwidefsky  *
13084be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
13094be130a0SMartin Schwidefsky  */
gmap_unshadow_page(struct gmap * sg,unsigned long raddr)13104be130a0SMartin Schwidefsky static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
13114be130a0SMartin Schwidefsky {
13124be130a0SMartin Schwidefsky 	unsigned long *table;
13134be130a0SMartin Schwidefsky 
13144be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
13154be130a0SMartin Schwidefsky 	table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
13164be130a0SMartin Schwidefsky 	if (!table || *table & _PAGE_INVALID)
13174be130a0SMartin Schwidefsky 		return;
1318f1c1174fSHeiko Carstens 	gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
13194be130a0SMartin Schwidefsky 	ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
13204be130a0SMartin Schwidefsky }
13214be130a0SMartin Schwidefsky 
13224be130a0SMartin Schwidefsky /**
13234be130a0SMartin Schwidefsky  * __gmap_unshadow_pgt - remove all entries from a shadow page table
13244be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
13254be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
13264be130a0SMartin Schwidefsky  * @pgt: pointer to the start of a shadow page table
13274be130a0SMartin Schwidefsky  *
13284be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
13294be130a0SMartin Schwidefsky  */
__gmap_unshadow_pgt(struct gmap * sg,unsigned long raddr,unsigned long * pgt)13304be130a0SMartin Schwidefsky static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
13314be130a0SMartin Schwidefsky 				unsigned long *pgt)
13324be130a0SMartin Schwidefsky {
13334be130a0SMartin Schwidefsky 	int i;
13344be130a0SMartin Schwidefsky 
13354be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
1336f1c1174fSHeiko Carstens 	for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
13374be130a0SMartin Schwidefsky 		pgt[i] = _PAGE_INVALID;
13384be130a0SMartin Schwidefsky }
13394be130a0SMartin Schwidefsky 
13404be130a0SMartin Schwidefsky /**
13414be130a0SMartin Schwidefsky  * gmap_unshadow_pgt - remove a shadow page table from a segment entry
13424be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
13434be130a0SMartin Schwidefsky  * @raddr: address in the shadow guest address space
13444be130a0SMartin Schwidefsky  *
13454be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
13464be130a0SMartin Schwidefsky  */
gmap_unshadow_pgt(struct gmap * sg,unsigned long raddr)13474be130a0SMartin Schwidefsky static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
13484be130a0SMartin Schwidefsky {
1349079f0c21SNico Boehr 	unsigned long *ste;
1350079f0c21SNico Boehr 	phys_addr_t sto, pgt;
13514be130a0SMartin Schwidefsky 	struct page *page;
13524be130a0SMartin Schwidefsky 
13534be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
13544be130a0SMartin Schwidefsky 	ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
1355998f637cSDavid Hildenbrand 	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
13564be130a0SMartin Schwidefsky 		return;
1357f1c1174fSHeiko Carstens 	gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
1358079f0c21SNico Boehr 	sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
13594be130a0SMartin Schwidefsky 	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
1360079f0c21SNico Boehr 	pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
13614be130a0SMartin Schwidefsky 	*ste = _SEGMENT_ENTRY_EMPTY;
1362079f0c21SNico Boehr 	__gmap_unshadow_pgt(sg, raddr, __va(pgt));
13634be130a0SMartin Schwidefsky 	/* Free page table */
1364079f0c21SNico Boehr 	page = phys_to_page(pgt);
13654be130a0SMartin Schwidefsky 	list_del(&page->lru);
13664be130a0SMartin Schwidefsky 	page_table_free_pgste(page);
13674be130a0SMartin Schwidefsky }
13684be130a0SMartin Schwidefsky 
13694be130a0SMartin Schwidefsky /**
13704be130a0SMartin Schwidefsky  * __gmap_unshadow_sgt - remove all entries from a shadow segment table
13714be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
13724be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
13734be130a0SMartin Schwidefsky  * @sgt: pointer to the start of a shadow segment table
13744be130a0SMartin Schwidefsky  *
13754be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
13764be130a0SMartin Schwidefsky  */
__gmap_unshadow_sgt(struct gmap * sg,unsigned long raddr,unsigned long * sgt)13774be130a0SMartin Schwidefsky static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
13784be130a0SMartin Schwidefsky 				unsigned long *sgt)
13794be130a0SMartin Schwidefsky {
13804be130a0SMartin Schwidefsky 	struct page *page;
1381079f0c21SNico Boehr 	phys_addr_t pgt;
13824be130a0SMartin Schwidefsky 	int i;
13834be130a0SMartin Schwidefsky 
13844be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
1385f1c1174fSHeiko Carstens 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
1386998f637cSDavid Hildenbrand 		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
13874be130a0SMartin Schwidefsky 			continue;
1388079f0c21SNico Boehr 		pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
13894be130a0SMartin Schwidefsky 		sgt[i] = _SEGMENT_ENTRY_EMPTY;
1390079f0c21SNico Boehr 		__gmap_unshadow_pgt(sg, raddr, __va(pgt));
13914be130a0SMartin Schwidefsky 		/* Free page table */
1392079f0c21SNico Boehr 		page = phys_to_page(pgt);
13934be130a0SMartin Schwidefsky 		list_del(&page->lru);
13944be130a0SMartin Schwidefsky 		page_table_free_pgste(page);
13954be130a0SMartin Schwidefsky 	}
13964be130a0SMartin Schwidefsky }
13974be130a0SMartin Schwidefsky 
13984be130a0SMartin Schwidefsky /**
13994be130a0SMartin Schwidefsky  * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
14004be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
14014be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
14024be130a0SMartin Schwidefsky  *
14034be130a0SMartin Schwidefsky  * Called with the shadow->guest_table_lock
14044be130a0SMartin Schwidefsky  */
gmap_unshadow_sgt(struct gmap * sg,unsigned long raddr)14054be130a0SMartin Schwidefsky static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
14064be130a0SMartin Schwidefsky {
1407079f0c21SNico Boehr 	unsigned long r3o, *r3e;
1408079f0c21SNico Boehr 	phys_addr_t sgt;
14094be130a0SMartin Schwidefsky 	struct page *page;
14104be130a0SMartin Schwidefsky 
14114be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
14124be130a0SMartin Schwidefsky 	r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
1413998f637cSDavid Hildenbrand 	if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
14144be130a0SMartin Schwidefsky 		return;
1415f1c1174fSHeiko Carstens 	gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
1416f1c1174fSHeiko Carstens 	r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
1417079f0c21SNico Boehr 	gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
1418079f0c21SNico Boehr 	sgt = *r3e & _REGION_ENTRY_ORIGIN;
14194be130a0SMartin Schwidefsky 	*r3e = _REGION3_ENTRY_EMPTY;
1420079f0c21SNico Boehr 	__gmap_unshadow_sgt(sg, raddr, __va(sgt));
14214be130a0SMartin Schwidefsky 	/* Free segment table */
1422079f0c21SNico Boehr 	page = phys_to_page(sgt);
14234be130a0SMartin Schwidefsky 	list_del(&page->lru);
1424f1c1174fSHeiko Carstens 	__free_pages(page, CRST_ALLOC_ORDER);
14254be130a0SMartin Schwidefsky }
14264be130a0SMartin Schwidefsky 
14274be130a0SMartin Schwidefsky /**
14284be130a0SMartin Schwidefsky  * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
14294be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
14304be130a0SMartin Schwidefsky  * @raddr: address in the shadow guest address space
14314be130a0SMartin Schwidefsky  * @r3t: pointer to the start of a shadow region-3 table
14324be130a0SMartin Schwidefsky  *
14334be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
14344be130a0SMartin Schwidefsky  */
__gmap_unshadow_r3t(struct gmap * sg,unsigned long raddr,unsigned long * r3t)14354be130a0SMartin Schwidefsky static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
14364be130a0SMartin Schwidefsky 				unsigned long *r3t)
14374be130a0SMartin Schwidefsky {
14384be130a0SMartin Schwidefsky 	struct page *page;
1439079f0c21SNico Boehr 	phys_addr_t sgt;
14404be130a0SMartin Schwidefsky 	int i;
14414be130a0SMartin Schwidefsky 
14424be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
1443f1c1174fSHeiko Carstens 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
1444998f637cSDavid Hildenbrand 		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
14454be130a0SMartin Schwidefsky 			continue;
1446079f0c21SNico Boehr 		sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
14474be130a0SMartin Schwidefsky 		r3t[i] = _REGION3_ENTRY_EMPTY;
1448079f0c21SNico Boehr 		__gmap_unshadow_sgt(sg, raddr, __va(sgt));
14494be130a0SMartin Schwidefsky 		/* Free segment table */
1450079f0c21SNico Boehr 		page = phys_to_page(sgt);
14514be130a0SMartin Schwidefsky 		list_del(&page->lru);
1452f1c1174fSHeiko Carstens 		__free_pages(page, CRST_ALLOC_ORDER);
14534be130a0SMartin Schwidefsky 	}
14544be130a0SMartin Schwidefsky }
14554be130a0SMartin Schwidefsky 
14564be130a0SMartin Schwidefsky /**
14574be130a0SMartin Schwidefsky  * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
14584be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
14594be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
14604be130a0SMartin Schwidefsky  *
14614be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
14624be130a0SMartin Schwidefsky  */
gmap_unshadow_r3t(struct gmap * sg,unsigned long raddr)14634be130a0SMartin Schwidefsky static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
14644be130a0SMartin Schwidefsky {
1465079f0c21SNico Boehr 	unsigned long r2o, *r2e;
1466079f0c21SNico Boehr 	phys_addr_t r3t;
14674be130a0SMartin Schwidefsky 	struct page *page;
14684be130a0SMartin Schwidefsky 
14694be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
14704be130a0SMartin Schwidefsky 	r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
1471998f637cSDavid Hildenbrand 	if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
14724be130a0SMartin Schwidefsky 		return;
1473f1c1174fSHeiko Carstens 	gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
1474f1c1174fSHeiko Carstens 	r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
1475079f0c21SNico Boehr 	gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
1476079f0c21SNico Boehr 	r3t = *r2e & _REGION_ENTRY_ORIGIN;
14774be130a0SMartin Schwidefsky 	*r2e = _REGION2_ENTRY_EMPTY;
1478079f0c21SNico Boehr 	__gmap_unshadow_r3t(sg, raddr, __va(r3t));
14794be130a0SMartin Schwidefsky 	/* Free region 3 table */
1480079f0c21SNico Boehr 	page = phys_to_page(r3t);
14814be130a0SMartin Schwidefsky 	list_del(&page->lru);
1482f1c1174fSHeiko Carstens 	__free_pages(page, CRST_ALLOC_ORDER);
14834be130a0SMartin Schwidefsky }
14844be130a0SMartin Schwidefsky 
14854be130a0SMartin Schwidefsky /**
14864be130a0SMartin Schwidefsky  * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
14874be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
14884be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
14894be130a0SMartin Schwidefsky  * @r2t: pointer to the start of a shadow region-2 table
14904be130a0SMartin Schwidefsky  *
14914be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
14924be130a0SMartin Schwidefsky  */
__gmap_unshadow_r2t(struct gmap * sg,unsigned long raddr,unsigned long * r2t)14934be130a0SMartin Schwidefsky static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
14944be130a0SMartin Schwidefsky 				unsigned long *r2t)
14954be130a0SMartin Schwidefsky {
1496079f0c21SNico Boehr 	phys_addr_t r3t;
14974be130a0SMartin Schwidefsky 	struct page *page;
14984be130a0SMartin Schwidefsky 	int i;
14994be130a0SMartin Schwidefsky 
15004be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
1501f1c1174fSHeiko Carstens 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
1502998f637cSDavid Hildenbrand 		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
15034be130a0SMartin Schwidefsky 			continue;
1504079f0c21SNico Boehr 		r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
15054be130a0SMartin Schwidefsky 		r2t[i] = _REGION2_ENTRY_EMPTY;
1506079f0c21SNico Boehr 		__gmap_unshadow_r3t(sg, raddr, __va(r3t));
15074be130a0SMartin Schwidefsky 		/* Free region 3 table */
1508079f0c21SNico Boehr 		page = phys_to_page(r3t);
15094be130a0SMartin Schwidefsky 		list_del(&page->lru);
1510f1c1174fSHeiko Carstens 		__free_pages(page, CRST_ALLOC_ORDER);
15114be130a0SMartin Schwidefsky 	}
15124be130a0SMartin Schwidefsky }
15134be130a0SMartin Schwidefsky 
15144be130a0SMartin Schwidefsky /**
15154be130a0SMartin Schwidefsky  * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
15164be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
15174be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
15184be130a0SMartin Schwidefsky  *
15194be130a0SMartin Schwidefsky  * Called with the sg->guest_table_lock
15204be130a0SMartin Schwidefsky  */
gmap_unshadow_r2t(struct gmap * sg,unsigned long raddr)15214be130a0SMartin Schwidefsky static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
15224be130a0SMartin Schwidefsky {
1523079f0c21SNico Boehr 	unsigned long r1o, *r1e;
15244be130a0SMartin Schwidefsky 	struct page *page;
1525079f0c21SNico Boehr 	phys_addr_t r2t;
15264be130a0SMartin Schwidefsky 
15274be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
15284be130a0SMartin Schwidefsky 	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
1529998f637cSDavid Hildenbrand 	if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
15304be130a0SMartin Schwidefsky 		return;
1531f1c1174fSHeiko Carstens 	gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
1532f1c1174fSHeiko Carstens 	r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
1533079f0c21SNico Boehr 	gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
1534079f0c21SNico Boehr 	r2t = *r1e & _REGION_ENTRY_ORIGIN;
15354be130a0SMartin Schwidefsky 	*r1e = _REGION1_ENTRY_EMPTY;
1536079f0c21SNico Boehr 	__gmap_unshadow_r2t(sg, raddr, __va(r2t));
15374be130a0SMartin Schwidefsky 	/* Free region 2 table */
1538079f0c21SNico Boehr 	page = phys_to_page(r2t);
15394be130a0SMartin Schwidefsky 	list_del(&page->lru);
1540f1c1174fSHeiko Carstens 	__free_pages(page, CRST_ALLOC_ORDER);
15414be130a0SMartin Schwidefsky }
15424be130a0SMartin Schwidefsky 
15434be130a0SMartin Schwidefsky /**
15444be130a0SMartin Schwidefsky  * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
15454be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
15464be130a0SMartin Schwidefsky  * @raddr: rmap address in the shadow guest address space
15474be130a0SMartin Schwidefsky  * @r1t: pointer to the start of a shadow region-1 table
15484be130a0SMartin Schwidefsky  *
15494be130a0SMartin Schwidefsky  * Called with the shadow->guest_table_lock
15504be130a0SMartin Schwidefsky  */
__gmap_unshadow_r1t(struct gmap * sg,unsigned long raddr,unsigned long * r1t)15514be130a0SMartin Schwidefsky static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
15524be130a0SMartin Schwidefsky 				unsigned long *r1t)
15534be130a0SMartin Schwidefsky {
1554079f0c21SNico Boehr 	unsigned long asce;
15554be130a0SMartin Schwidefsky 	struct page *page;
1556079f0c21SNico Boehr 	phys_addr_t r2t;
15574be130a0SMartin Schwidefsky 	int i;
15584be130a0SMartin Schwidefsky 
15594be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
1560079f0c21SNico Boehr 	asce = __pa(r1t) | _ASCE_TYPE_REGION1;
1561f1c1174fSHeiko Carstens 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
1562998f637cSDavid Hildenbrand 		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
15634be130a0SMartin Schwidefsky 			continue;
1564079f0c21SNico Boehr 		r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
1565079f0c21SNico Boehr 		__gmap_unshadow_r2t(sg, raddr, __va(r2t));
15664be130a0SMartin Schwidefsky 		/* Clear entry and flush translation r1t -> r2t */
15674be130a0SMartin Schwidefsky 		gmap_idte_one(asce, raddr);
15684be130a0SMartin Schwidefsky 		r1t[i] = _REGION1_ENTRY_EMPTY;
15694be130a0SMartin Schwidefsky 		/* Free region 2 table */
1570079f0c21SNico Boehr 		page = phys_to_page(r2t);
15714be130a0SMartin Schwidefsky 		list_del(&page->lru);
1572f1c1174fSHeiko Carstens 		__free_pages(page, CRST_ALLOC_ORDER);
15734be130a0SMartin Schwidefsky 	}
15744be130a0SMartin Schwidefsky }
15754be130a0SMartin Schwidefsky 
15764be130a0SMartin Schwidefsky /**
15774be130a0SMartin Schwidefsky  * gmap_unshadow - remove a shadow page table completely
15784be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
15794be130a0SMartin Schwidefsky  *
15804be130a0SMartin Schwidefsky  * Called with sg->guest_table_lock
15814be130a0SMartin Schwidefsky  */
gmap_unshadow(struct gmap * sg)15824be130a0SMartin Schwidefsky static void gmap_unshadow(struct gmap *sg)
15834be130a0SMartin Schwidefsky {
15844be130a0SMartin Schwidefsky 	unsigned long *table;
15854be130a0SMartin Schwidefsky 
15864be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
15874be130a0SMartin Schwidefsky 	if (sg->removed)
15884be130a0SMartin Schwidefsky 		return;
15894be130a0SMartin Schwidefsky 	sg->removed = 1;
15904be130a0SMartin Schwidefsky 	gmap_call_notifier(sg, 0, -1UL);
1591eea3678dSDavid Hildenbrand 	gmap_flush_tlb(sg);
1592079f0c21SNico Boehr 	table = __va(sg->asce & _ASCE_ORIGIN);
15934be130a0SMartin Schwidefsky 	switch (sg->asce & _ASCE_TYPE_MASK) {
15944be130a0SMartin Schwidefsky 	case _ASCE_TYPE_REGION1:
15954be130a0SMartin Schwidefsky 		__gmap_unshadow_r1t(sg, 0, table);
15964be130a0SMartin Schwidefsky 		break;
15974be130a0SMartin Schwidefsky 	case _ASCE_TYPE_REGION2:
15984be130a0SMartin Schwidefsky 		__gmap_unshadow_r2t(sg, 0, table);
15994be130a0SMartin Schwidefsky 		break;
16004be130a0SMartin Schwidefsky 	case _ASCE_TYPE_REGION3:
16014be130a0SMartin Schwidefsky 		__gmap_unshadow_r3t(sg, 0, table);
16024be130a0SMartin Schwidefsky 		break;
16034be130a0SMartin Schwidefsky 	case _ASCE_TYPE_SEGMENT:
16044be130a0SMartin Schwidefsky 		__gmap_unshadow_sgt(sg, 0, table);
16054be130a0SMartin Schwidefsky 		break;
16064be130a0SMartin Schwidefsky 	}
16074be130a0SMartin Schwidefsky }
16084be130a0SMartin Schwidefsky 
16094be130a0SMartin Schwidefsky /**
16104be130a0SMartin Schwidefsky  * gmap_find_shadow - find a specific asce in the list of shadow tables
16114be130a0SMartin Schwidefsky  * @parent: pointer to the parent gmap
16124be130a0SMartin Schwidefsky  * @asce: ASCE for which the shadow table is created
16135b062bd4SDavid Hildenbrand  * @edat_level: edat level to be used for the shadow translation
16144be130a0SMartin Schwidefsky  *
16154be130a0SMartin Schwidefsky  * Returns the pointer to a gmap if a shadow table with the given asce is
16160f7f8489SDavid Hildenbrand  * already available, ERR_PTR(-EAGAIN) if another one is just being created,
16170f7f8489SDavid Hildenbrand  * otherwise NULL
16184be130a0SMartin Schwidefsky  */
gmap_find_shadow(struct gmap * parent,unsigned long asce,int edat_level)16195b062bd4SDavid Hildenbrand static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
16205b062bd4SDavid Hildenbrand 				     int edat_level)
16214be130a0SMartin Schwidefsky {
16224be130a0SMartin Schwidefsky 	struct gmap *sg;
16234be130a0SMartin Schwidefsky 
16244be130a0SMartin Schwidefsky 	list_for_each_entry(sg, &parent->children, list) {
16255b062bd4SDavid Hildenbrand 		if (sg->orig_asce != asce || sg->edat_level != edat_level ||
16265b062bd4SDavid Hildenbrand 		    sg->removed)
16274be130a0SMartin Schwidefsky 			continue;
16280f7f8489SDavid Hildenbrand 		if (!sg->initialized)
16290f7f8489SDavid Hildenbrand 			return ERR_PTR(-EAGAIN);
163040e90656SChuhong Yuan 		refcount_inc(&sg->ref_count);
16314be130a0SMartin Schwidefsky 		return sg;
16324be130a0SMartin Schwidefsky 	}
16334be130a0SMartin Schwidefsky 	return NULL;
16344be130a0SMartin Schwidefsky }
16354be130a0SMartin Schwidefsky 
16364be130a0SMartin Schwidefsky /**
16375b6c963bSDavid Hildenbrand  * gmap_shadow_valid - check if a shadow guest address space matches the
16385b6c963bSDavid Hildenbrand  *                     given properties and is still valid
16395b6c963bSDavid Hildenbrand  * @sg: pointer to the shadow guest address space structure
16405b6c963bSDavid Hildenbrand  * @asce: ASCE for which the shadow table is requested
16415b6c963bSDavid Hildenbrand  * @edat_level: edat level to be used for the shadow translation
16425b6c963bSDavid Hildenbrand  *
16435b6c963bSDavid Hildenbrand  * Returns 1 if the gmap shadow is still valid and matches the given
16445b6c963bSDavid Hildenbrand  * properties, the caller can continue using it. Returns 0 otherwise, the
16455b6c963bSDavid Hildenbrand  * caller has to request a new shadow gmap in this case.
16465b6c963bSDavid Hildenbrand  *
16475b6c963bSDavid Hildenbrand  */
gmap_shadow_valid(struct gmap * sg,unsigned long asce,int edat_level)16485b6c963bSDavid Hildenbrand int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
16495b6c963bSDavid Hildenbrand {
16505b6c963bSDavid Hildenbrand 	if (sg->removed)
16515b6c963bSDavid Hildenbrand 		return 0;
16525b6c963bSDavid Hildenbrand 	return sg->orig_asce == asce && sg->edat_level == edat_level;
16535b6c963bSDavid Hildenbrand }
16545b6c963bSDavid Hildenbrand EXPORT_SYMBOL_GPL(gmap_shadow_valid);
16555b6c963bSDavid Hildenbrand 
16565b6c963bSDavid Hildenbrand /**
16574be130a0SMartin Schwidefsky  * gmap_shadow - create/find a shadow guest address space
16584be130a0SMartin Schwidefsky  * @parent: pointer to the parent gmap
16594be130a0SMartin Schwidefsky  * @asce: ASCE for which the shadow table is created
16605b062bd4SDavid Hildenbrand  * @edat_level: edat level to be used for the shadow translation
16614be130a0SMartin Schwidefsky  *
16624be130a0SMartin Schwidefsky  * The pages of the top level page table referred by the asce parameter
16634be130a0SMartin Schwidefsky  * will be set to read-only and marked in the PGSTEs of the kvm process.
16644be130a0SMartin Schwidefsky  * The shadow table will be removed automatically on any change to the
16654be130a0SMartin Schwidefsky  * PTE mapping for the source table.
16664be130a0SMartin Schwidefsky  *
16670f7f8489SDavid Hildenbrand  * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
16680f7f8489SDavid Hildenbrand  * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
16690f7f8489SDavid Hildenbrand  * parent gmap table could not be protected.
16704be130a0SMartin Schwidefsky  */
gmap_shadow(struct gmap * parent,unsigned long asce,int edat_level)16715b062bd4SDavid Hildenbrand struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
16725b062bd4SDavid Hildenbrand 			 int edat_level)
16734be130a0SMartin Schwidefsky {
16744be130a0SMartin Schwidefsky 	struct gmap *sg, *new;
16754be130a0SMartin Schwidefsky 	unsigned long limit;
16764be130a0SMartin Schwidefsky 	int rc;
16774be130a0SMartin Schwidefsky 
1678a9e00d83SJanosch Frank 	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
16794be130a0SMartin Schwidefsky 	BUG_ON(gmap_is_shadow(parent));
16804be130a0SMartin Schwidefsky 	spin_lock(&parent->shadow_lock);
16815b062bd4SDavid Hildenbrand 	sg = gmap_find_shadow(parent, asce, edat_level);
16824be130a0SMartin Schwidefsky 	spin_unlock(&parent->shadow_lock);
16834be130a0SMartin Schwidefsky 	if (sg)
16844be130a0SMartin Schwidefsky 		return sg;
16854be130a0SMartin Schwidefsky 	/* Create a new shadow gmap */
16864be130a0SMartin Schwidefsky 	limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
16873218f709SDavid Hildenbrand 	if (asce & _ASCE_REAL_SPACE)
16883218f709SDavid Hildenbrand 		limit = -1UL;
16894be130a0SMartin Schwidefsky 	new = gmap_alloc(limit);
16904be130a0SMartin Schwidefsky 	if (!new)
16910f7f8489SDavid Hildenbrand 		return ERR_PTR(-ENOMEM);
16924be130a0SMartin Schwidefsky 	new->mm = parent->mm;
16934be130a0SMartin Schwidefsky 	new->parent = gmap_get(parent);
1694f5572c03SChristian Borntraeger 	new->private = parent->private;
16954be130a0SMartin Schwidefsky 	new->orig_asce = asce;
16965b062bd4SDavid Hildenbrand 	new->edat_level = edat_level;
16970f7f8489SDavid Hildenbrand 	new->initialized = false;
16980f7f8489SDavid Hildenbrand 	spin_lock(&parent->shadow_lock);
16990f7f8489SDavid Hildenbrand 	/* Recheck if another CPU created the same shadow */
17005b062bd4SDavid Hildenbrand 	sg = gmap_find_shadow(parent, asce, edat_level);
17010f7f8489SDavid Hildenbrand 	if (sg) {
17020f7f8489SDavid Hildenbrand 		spin_unlock(&parent->shadow_lock);
17030f7f8489SDavid Hildenbrand 		gmap_free(new);
17040f7f8489SDavid Hildenbrand 		return sg;
17050f7f8489SDavid Hildenbrand 	}
1706717c0555SDavid Hildenbrand 	if (asce & _ASCE_REAL_SPACE) {
1707717c0555SDavid Hildenbrand 		/* only allow one real-space gmap shadow */
1708717c0555SDavid Hildenbrand 		list_for_each_entry(sg, &parent->children, list) {
1709717c0555SDavid Hildenbrand 			if (sg->orig_asce & _ASCE_REAL_SPACE) {
1710717c0555SDavid Hildenbrand 				spin_lock(&sg->guest_table_lock);
1711717c0555SDavid Hildenbrand 				gmap_unshadow(sg);
1712717c0555SDavid Hildenbrand 				spin_unlock(&sg->guest_table_lock);
1713717c0555SDavid Hildenbrand 				list_del(&sg->list);
1714717c0555SDavid Hildenbrand 				gmap_put(sg);
1715717c0555SDavid Hildenbrand 				break;
1716717c0555SDavid Hildenbrand 			}
1717717c0555SDavid Hildenbrand 		}
1718717c0555SDavid Hildenbrand 	}
171940e90656SChuhong Yuan 	refcount_set(&new->ref_count, 2);
17200f7f8489SDavid Hildenbrand 	list_add(&new->list, &parent->children);
17213218f709SDavid Hildenbrand 	if (asce & _ASCE_REAL_SPACE) {
17223218f709SDavid Hildenbrand 		/* nothing to protect, return right away */
17233218f709SDavid Hildenbrand 		new->initialized = true;
17243218f709SDavid Hildenbrand 		spin_unlock(&parent->shadow_lock);
17253218f709SDavid Hildenbrand 		return new;
17263218f709SDavid Hildenbrand 	}
17270f7f8489SDavid Hildenbrand 	spin_unlock(&parent->shadow_lock);
17280f7f8489SDavid Hildenbrand 	/* protect after insertion, so it will get properly invalidated */
1729d8ed45c5SMichel Lespinasse 	mmap_read_lock(parent->mm);
17304be130a0SMartin Schwidefsky 	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
1731f1c1174fSHeiko Carstens 				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
17322c46e974SJanosch Frank 				PROT_READ, GMAP_NOTIFY_SHADOW);
1733d8ed45c5SMichel Lespinasse 	mmap_read_unlock(parent->mm);
17344be130a0SMartin Schwidefsky 	spin_lock(&parent->shadow_lock);
17350f7f8489SDavid Hildenbrand 	new->initialized = true;
17360f7f8489SDavid Hildenbrand 	if (rc) {
17370f7f8489SDavid Hildenbrand 		list_del(&new->list);
17380f7f8489SDavid Hildenbrand 		gmap_free(new);
17390f7f8489SDavid Hildenbrand 		new = ERR_PTR(rc);
17404be130a0SMartin Schwidefsky 	}
17414be130a0SMartin Schwidefsky 	spin_unlock(&parent->shadow_lock);
17420f7f8489SDavid Hildenbrand 	return new;
17434be130a0SMartin Schwidefsky }
17444be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow);
17454be130a0SMartin Schwidefsky 
17464be130a0SMartin Schwidefsky /**
17474be130a0SMartin Schwidefsky  * gmap_shadow_r2t - create an empty shadow region 2 table
17484be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
17494be130a0SMartin Schwidefsky  * @saddr: faulting address in the shadow gmap
17504be130a0SMartin Schwidefsky  * @r2t: parent gmap address of the region 2 table to get shadowed
17513218f709SDavid Hildenbrand  * @fake: r2t references contiguous guest memory block, not a r2t
17524be130a0SMartin Schwidefsky  *
17534be130a0SMartin Schwidefsky  * The r2t parameter specifies the address of the source table. The
17544be130a0SMartin Schwidefsky  * four pages of the source table are made read-only in the parent gmap
17554be130a0SMartin Schwidefsky  * address space. A write to the source table area @r2t will automatically
1756cada938aSHeiko Carstens  * remove the shadow r2 table and all of its descendants.
17574be130a0SMartin Schwidefsky  *
17584be130a0SMartin Schwidefsky  * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
17594be130a0SMartin Schwidefsky  * shadow table structure is incomplete, -ENOMEM if out of memory and
17604be130a0SMartin Schwidefsky  * -EFAULT if an address in the parent gmap could not be resolved.
17614be130a0SMartin Schwidefsky  *
1762c1e8d7c6SMichel Lespinasse  * Called with sg->mm->mmap_lock in read.
17634be130a0SMartin Schwidefsky  */
gmap_shadow_r2t(struct gmap * sg,unsigned long saddr,unsigned long r2t,int fake)17643218f709SDavid Hildenbrand int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
17653218f709SDavid Hildenbrand 		    int fake)
17664be130a0SMartin Schwidefsky {
17674be130a0SMartin Schwidefsky 	unsigned long raddr, origin, offset, len;
1768079f0c21SNico Boehr 	unsigned long *table;
1769079f0c21SNico Boehr 	phys_addr_t s_r2t;
17704be130a0SMartin Schwidefsky 	struct page *page;
17714be130a0SMartin Schwidefsky 	int rc;
17724be130a0SMartin Schwidefsky 
17734be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
17744be130a0SMartin Schwidefsky 	/* Allocate a shadow region second table */
1775bfabe8d0SHeiko Carstens 	page = gmap_alloc_crst();
17764be130a0SMartin Schwidefsky 	if (!page)
17774be130a0SMartin Schwidefsky 		return -ENOMEM;
17784be130a0SMartin Schwidefsky 	page->index = r2t & _REGION_ENTRY_ORIGIN;
17793218f709SDavid Hildenbrand 	if (fake)
17803218f709SDavid Hildenbrand 		page->index |= GMAP_SHADOW_FAKE_TABLE;
1781079f0c21SNico Boehr 	s_r2t = page_to_phys(page);
17824be130a0SMartin Schwidefsky 	/* Install shadow region second table */
17834be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
17844be130a0SMartin Schwidefsky 	table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
17854be130a0SMartin Schwidefsky 	if (!table) {
17864be130a0SMartin Schwidefsky 		rc = -EAGAIN;		/* Race with unshadow */
17874be130a0SMartin Schwidefsky 		goto out_free;
17884be130a0SMartin Schwidefsky 	}
17894be130a0SMartin Schwidefsky 	if (!(*table & _REGION_ENTRY_INVALID)) {
17904be130a0SMartin Schwidefsky 		rc = 0;			/* Already established */
17914be130a0SMartin Schwidefsky 		goto out_free;
1792998f637cSDavid Hildenbrand 	} else if (*table & _REGION_ENTRY_ORIGIN) {
1793998f637cSDavid Hildenbrand 		rc = -EAGAIN;		/* Race with shadow */
1794998f637cSDavid Hildenbrand 		goto out_free;
17954be130a0SMartin Schwidefsky 	}
1796079f0c21SNico Boehr 	crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
1797998f637cSDavid Hildenbrand 	/* mark as invalid as long as the parent table is not protected */
1798079f0c21SNico Boehr 	*table = s_r2t | _REGION_ENTRY_LENGTH |
1799998f637cSDavid Hildenbrand 		 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
1800fd8d4e3aSDavid Hildenbrand 	if (sg->edat_level >= 1)
1801fd8d4e3aSDavid Hildenbrand 		*table |= (r2t & _REGION_ENTRY_PROTECT);
18024be130a0SMartin Schwidefsky 	list_add(&page->lru, &sg->crst_list);
18033218f709SDavid Hildenbrand 	if (fake) {
18043218f709SDavid Hildenbrand 		/* nothing to protect for fake tables */
18053218f709SDavid Hildenbrand 		*table &= ~_REGION_ENTRY_INVALID;
18063218f709SDavid Hildenbrand 		spin_unlock(&sg->guest_table_lock);
18073218f709SDavid Hildenbrand 		return 0;
18083218f709SDavid Hildenbrand 	}
18094be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
18104be130a0SMartin Schwidefsky 	/* Make r2t read-only in parent gmap page table */
1811f1c1174fSHeiko Carstens 	raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
18124be130a0SMartin Schwidefsky 	origin = r2t & _REGION_ENTRY_ORIGIN;
1813f1c1174fSHeiko Carstens 	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1814f1c1174fSHeiko Carstens 	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
18155c528db0SDavid Hildenbrand 	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
18164be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
1817998f637cSDavid Hildenbrand 	if (!rc) {
1818998f637cSDavid Hildenbrand 		table = gmap_table_walk(sg, saddr, 4);
1819079f0c21SNico Boehr 		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
1820998f637cSDavid Hildenbrand 			rc = -EAGAIN;		/* Race with unshadow */
1821998f637cSDavid Hildenbrand 		else
1822998f637cSDavid Hildenbrand 			*table &= ~_REGION_ENTRY_INVALID;
1823998f637cSDavid Hildenbrand 	} else {
18244be130a0SMartin Schwidefsky 		gmap_unshadow_r2t(sg, raddr);
18254be130a0SMartin Schwidefsky 	}
1826998f637cSDavid Hildenbrand 	spin_unlock(&sg->guest_table_lock);
18274be130a0SMartin Schwidefsky 	return rc;
18284be130a0SMartin Schwidefsky out_free:
18294be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
1830f1c1174fSHeiko Carstens 	__free_pages(page, CRST_ALLOC_ORDER);
18314be130a0SMartin Schwidefsky 	return rc;
18324be130a0SMartin Schwidefsky }
18334be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
18344be130a0SMartin Schwidefsky 
18354be130a0SMartin Schwidefsky /**
18364be130a0SMartin Schwidefsky  * gmap_shadow_r3t - create a shadow region 3 table
18374be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
18384be130a0SMartin Schwidefsky  * @saddr: faulting address in the shadow gmap
18394be130a0SMartin Schwidefsky  * @r3t: parent gmap address of the region 3 table to get shadowed
18403218f709SDavid Hildenbrand  * @fake: r3t references contiguous guest memory block, not a r3t
18414be130a0SMartin Schwidefsky  *
18424be130a0SMartin Schwidefsky  * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
18434be130a0SMartin Schwidefsky  * shadow table structure is incomplete, -ENOMEM if out of memory and
18444be130a0SMartin Schwidefsky  * -EFAULT if an address in the parent gmap could not be resolved.
18454be130a0SMartin Schwidefsky  *
1846c1e8d7c6SMichel Lespinasse  * Called with sg->mm->mmap_lock in read.
18474be130a0SMartin Schwidefsky  */
gmap_shadow_r3t(struct gmap * sg,unsigned long saddr,unsigned long r3t,int fake)18483218f709SDavid Hildenbrand int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
18493218f709SDavid Hildenbrand 		    int fake)
18504be130a0SMartin Schwidefsky {
18514be130a0SMartin Schwidefsky 	unsigned long raddr, origin, offset, len;
1852079f0c21SNico Boehr 	unsigned long *table;
1853079f0c21SNico Boehr 	phys_addr_t s_r3t;
18544be130a0SMartin Schwidefsky 	struct page *page;
18554be130a0SMartin Schwidefsky 	int rc;
18564be130a0SMartin Schwidefsky 
18574be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
18584be130a0SMartin Schwidefsky 	/* Allocate a shadow region second table */
1859bfabe8d0SHeiko Carstens 	page = gmap_alloc_crst();
18604be130a0SMartin Schwidefsky 	if (!page)
18614be130a0SMartin Schwidefsky 		return -ENOMEM;
18624be130a0SMartin Schwidefsky 	page->index = r3t & _REGION_ENTRY_ORIGIN;
18633218f709SDavid Hildenbrand 	if (fake)
18643218f709SDavid Hildenbrand 		page->index |= GMAP_SHADOW_FAKE_TABLE;
1865079f0c21SNico Boehr 	s_r3t = page_to_phys(page);
18664be130a0SMartin Schwidefsky 	/* Install shadow region second table */
18674be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
18684be130a0SMartin Schwidefsky 	table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
18694be130a0SMartin Schwidefsky 	if (!table) {
18704be130a0SMartin Schwidefsky 		rc = -EAGAIN;		/* Race with unshadow */
18714be130a0SMartin Schwidefsky 		goto out_free;
18724be130a0SMartin Schwidefsky 	}
18734be130a0SMartin Schwidefsky 	if (!(*table & _REGION_ENTRY_INVALID)) {
18744be130a0SMartin Schwidefsky 		rc = 0;			/* Already established */
18754be130a0SMartin Schwidefsky 		goto out_free;
1876998f637cSDavid Hildenbrand 	} else if (*table & _REGION_ENTRY_ORIGIN) {
1877998f637cSDavid Hildenbrand 		rc = -EAGAIN;		/* Race with shadow */
18781493e0f9SDavid Hildenbrand 		goto out_free;
18794be130a0SMartin Schwidefsky 	}
1880079f0c21SNico Boehr 	crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
1881998f637cSDavid Hildenbrand 	/* mark as invalid as long as the parent table is not protected */
1882079f0c21SNico Boehr 	*table = s_r3t | _REGION_ENTRY_LENGTH |
1883998f637cSDavid Hildenbrand 		 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
1884fd8d4e3aSDavid Hildenbrand 	if (sg->edat_level >= 1)
1885fd8d4e3aSDavid Hildenbrand 		*table |= (r3t & _REGION_ENTRY_PROTECT);
18864be130a0SMartin Schwidefsky 	list_add(&page->lru, &sg->crst_list);
18873218f709SDavid Hildenbrand 	if (fake) {
18883218f709SDavid Hildenbrand 		/* nothing to protect for fake tables */
18893218f709SDavid Hildenbrand 		*table &= ~_REGION_ENTRY_INVALID;
18903218f709SDavid Hildenbrand 		spin_unlock(&sg->guest_table_lock);
18913218f709SDavid Hildenbrand 		return 0;
18923218f709SDavid Hildenbrand 	}
18934be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
18944be130a0SMartin Schwidefsky 	/* Make r3t read-only in parent gmap page table */
1895f1c1174fSHeiko Carstens 	raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
18964be130a0SMartin Schwidefsky 	origin = r3t & _REGION_ENTRY_ORIGIN;
1897f1c1174fSHeiko Carstens 	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1898f1c1174fSHeiko Carstens 	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
18995c528db0SDavid Hildenbrand 	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
19004be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
1901998f637cSDavid Hildenbrand 	if (!rc) {
1902998f637cSDavid Hildenbrand 		table = gmap_table_walk(sg, saddr, 3);
1903079f0c21SNico Boehr 		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
1904998f637cSDavid Hildenbrand 			rc = -EAGAIN;		/* Race with unshadow */
1905998f637cSDavid Hildenbrand 		else
1906998f637cSDavid Hildenbrand 			*table &= ~_REGION_ENTRY_INVALID;
1907998f637cSDavid Hildenbrand 	} else {
19084be130a0SMartin Schwidefsky 		gmap_unshadow_r3t(sg, raddr);
19094be130a0SMartin Schwidefsky 	}
1910998f637cSDavid Hildenbrand 	spin_unlock(&sg->guest_table_lock);
19114be130a0SMartin Schwidefsky 	return rc;
19124be130a0SMartin Schwidefsky out_free:
19134be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
1914f1c1174fSHeiko Carstens 	__free_pages(page, CRST_ALLOC_ORDER);
19154be130a0SMartin Schwidefsky 	return rc;
19164be130a0SMartin Schwidefsky }
19174be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
19184be130a0SMartin Schwidefsky 
19194be130a0SMartin Schwidefsky /**
19204be130a0SMartin Schwidefsky  * gmap_shadow_sgt - create a shadow segment table
19214be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
19224be130a0SMartin Schwidefsky  * @saddr: faulting address in the shadow gmap
19234be130a0SMartin Schwidefsky  * @sgt: parent gmap address of the segment table to get shadowed
192418b89809SDavid Hildenbrand  * @fake: sgt references contiguous guest memory block, not a sgt
19254be130a0SMartin Schwidefsky  *
19264be130a0SMartin Schwidefsky  * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
19274be130a0SMartin Schwidefsky  * shadow table structure is incomplete, -ENOMEM if out of memory and
19284be130a0SMartin Schwidefsky  * -EFAULT if an address in the parent gmap could not be resolved.
19294be130a0SMartin Schwidefsky  *
1930c1e8d7c6SMichel Lespinasse  * Called with sg->mm->mmap_lock in read.
19314be130a0SMartin Schwidefsky  */
gmap_shadow_sgt(struct gmap * sg,unsigned long saddr,unsigned long sgt,int fake)193218b89809SDavid Hildenbrand int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
193318b89809SDavid Hildenbrand 		    int fake)
19344be130a0SMartin Schwidefsky {
19354be130a0SMartin Schwidefsky 	unsigned long raddr, origin, offset, len;
1936079f0c21SNico Boehr 	unsigned long *table;
1937079f0c21SNico Boehr 	phys_addr_t s_sgt;
19384be130a0SMartin Schwidefsky 	struct page *page;
19394be130a0SMartin Schwidefsky 	int rc;
19404be130a0SMartin Schwidefsky 
194118b89809SDavid Hildenbrand 	BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
19424be130a0SMartin Schwidefsky 	/* Allocate a shadow segment table */
1943bfabe8d0SHeiko Carstens 	page = gmap_alloc_crst();
19444be130a0SMartin Schwidefsky 	if (!page)
19454be130a0SMartin Schwidefsky 		return -ENOMEM;
19464be130a0SMartin Schwidefsky 	page->index = sgt & _REGION_ENTRY_ORIGIN;
194718b89809SDavid Hildenbrand 	if (fake)
194818b89809SDavid Hildenbrand 		page->index |= GMAP_SHADOW_FAKE_TABLE;
1949079f0c21SNico Boehr 	s_sgt = page_to_phys(page);
19504be130a0SMartin Schwidefsky 	/* Install shadow region second table */
19514be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
19524be130a0SMartin Schwidefsky 	table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
19534be130a0SMartin Schwidefsky 	if (!table) {
19544be130a0SMartin Schwidefsky 		rc = -EAGAIN;		/* Race with unshadow */
19554be130a0SMartin Schwidefsky 		goto out_free;
19564be130a0SMartin Schwidefsky 	}
19574be130a0SMartin Schwidefsky 	if (!(*table & _REGION_ENTRY_INVALID)) {
19584be130a0SMartin Schwidefsky 		rc = 0;			/* Already established */
19594be130a0SMartin Schwidefsky 		goto out_free;
1960998f637cSDavid Hildenbrand 	} else if (*table & _REGION_ENTRY_ORIGIN) {
1961998f637cSDavid Hildenbrand 		rc = -EAGAIN;		/* Race with shadow */
1962998f637cSDavid Hildenbrand 		goto out_free;
19634be130a0SMartin Schwidefsky 	}
1964079f0c21SNico Boehr 	crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
1965998f637cSDavid Hildenbrand 	/* mark as invalid as long as the parent table is not protected */
1966079f0c21SNico Boehr 	*table = s_sgt | _REGION_ENTRY_LENGTH |
1967998f637cSDavid Hildenbrand 		 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
1968fd8d4e3aSDavid Hildenbrand 	if (sg->edat_level >= 1)
1969fd8d4e3aSDavid Hildenbrand 		*table |= sgt & _REGION_ENTRY_PROTECT;
19704be130a0SMartin Schwidefsky 	list_add(&page->lru, &sg->crst_list);
197118b89809SDavid Hildenbrand 	if (fake) {
197218b89809SDavid Hildenbrand 		/* nothing to protect for fake tables */
197318b89809SDavid Hildenbrand 		*table &= ~_REGION_ENTRY_INVALID;
197418b89809SDavid Hildenbrand 		spin_unlock(&sg->guest_table_lock);
197518b89809SDavid Hildenbrand 		return 0;
197618b89809SDavid Hildenbrand 	}
19774be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
19784be130a0SMartin Schwidefsky 	/* Make sgt read-only in parent gmap page table */
1979f1c1174fSHeiko Carstens 	raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
19804be130a0SMartin Schwidefsky 	origin = sgt & _REGION_ENTRY_ORIGIN;
1981f1c1174fSHeiko Carstens 	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1982f1c1174fSHeiko Carstens 	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
19835c528db0SDavid Hildenbrand 	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
19844be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
1985998f637cSDavid Hildenbrand 	if (!rc) {
1986998f637cSDavid Hildenbrand 		table = gmap_table_walk(sg, saddr, 2);
1987079f0c21SNico Boehr 		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
1988998f637cSDavid Hildenbrand 			rc = -EAGAIN;		/* Race with unshadow */
1989998f637cSDavid Hildenbrand 		else
1990998f637cSDavid Hildenbrand 			*table &= ~_REGION_ENTRY_INVALID;
1991998f637cSDavid Hildenbrand 	} else {
19924be130a0SMartin Schwidefsky 		gmap_unshadow_sgt(sg, raddr);
19934be130a0SMartin Schwidefsky 	}
1994998f637cSDavid Hildenbrand 	spin_unlock(&sg->guest_table_lock);
19954be130a0SMartin Schwidefsky 	return rc;
19964be130a0SMartin Schwidefsky out_free:
19974be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
1998f1c1174fSHeiko Carstens 	__free_pages(page, CRST_ALLOC_ORDER);
19994be130a0SMartin Schwidefsky 	return rc;
20004be130a0SMartin Schwidefsky }
20014be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
20024be130a0SMartin Schwidefsky 
20034be130a0SMartin Schwidefsky /**
20042e827528SHeiko Carstens  * gmap_shadow_pgt_lookup - find a shadow page table
20054be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
20064be130a0SMartin Schwidefsky  * @saddr: the address in the shadow aguest address space
20074be130a0SMartin Schwidefsky  * @pgt: parent gmap address of the page table to get shadowed
20084be130a0SMartin Schwidefsky  * @dat_protection: if the pgtable is marked as protected by dat
2009fd8d4e3aSDavid Hildenbrand  * @fake: pgt references contiguous guest memory block, not a pgtable
20104be130a0SMartin Schwidefsky  *
20114be130a0SMartin Schwidefsky  * Returns 0 if the shadow page table was found and -EAGAIN if the page
20124be130a0SMartin Schwidefsky  * table was not found.
20134be130a0SMartin Schwidefsky  *
2014c1e8d7c6SMichel Lespinasse  * Called with sg->mm->mmap_lock in read.
20154be130a0SMartin Schwidefsky  */
gmap_shadow_pgt_lookup(struct gmap * sg,unsigned long saddr,unsigned long * pgt,int * dat_protection,int * fake)20164be130a0SMartin Schwidefsky int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
2017fd8d4e3aSDavid Hildenbrand 			   unsigned long *pgt, int *dat_protection,
2018fd8d4e3aSDavid Hildenbrand 			   int *fake)
20194be130a0SMartin Schwidefsky {
20204be130a0SMartin Schwidefsky 	unsigned long *table;
20214be130a0SMartin Schwidefsky 	struct page *page;
20224be130a0SMartin Schwidefsky 	int rc;
20234be130a0SMartin Schwidefsky 
20244be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
20254be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
20264be130a0SMartin Schwidefsky 	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
20274be130a0SMartin Schwidefsky 	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
20284be130a0SMartin Schwidefsky 		/* Shadow page tables are full pages (pte+pgste) */
20294be130a0SMartin Schwidefsky 		page = pfn_to_page(*table >> PAGE_SHIFT);
2030fd8d4e3aSDavid Hildenbrand 		*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
20314be130a0SMartin Schwidefsky 		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
2032fd8d4e3aSDavid Hildenbrand 		*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
20334be130a0SMartin Schwidefsky 		rc = 0;
20344be130a0SMartin Schwidefsky 	} else  {
20354be130a0SMartin Schwidefsky 		rc = -EAGAIN;
20364be130a0SMartin Schwidefsky 	}
20374be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
20384be130a0SMartin Schwidefsky 	return rc;
20394be130a0SMartin Schwidefsky 
20404be130a0SMartin Schwidefsky }
20414be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
20424be130a0SMartin Schwidefsky 
20434be130a0SMartin Schwidefsky /**
20444be130a0SMartin Schwidefsky  * gmap_shadow_pgt - instantiate a shadow page table
20454be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
20464be130a0SMartin Schwidefsky  * @saddr: faulting address in the shadow gmap
20474be130a0SMartin Schwidefsky  * @pgt: parent gmap address of the page table to get shadowed
2048fd8d4e3aSDavid Hildenbrand  * @fake: pgt references contiguous guest memory block, not a pgtable
20494be130a0SMartin Schwidefsky  *
20504be130a0SMartin Schwidefsky  * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
20514be130a0SMartin Schwidefsky  * shadow table structure is incomplete, -ENOMEM if out of memory,
20524be130a0SMartin Schwidefsky  * -EFAULT if an address in the parent gmap could not be resolved and
20534be130a0SMartin Schwidefsky  *
2054c1e8d7c6SMichel Lespinasse  * Called with gmap->mm->mmap_lock in read
20554be130a0SMartin Schwidefsky  */
gmap_shadow_pgt(struct gmap * sg,unsigned long saddr,unsigned long pgt,int fake)2056fd8d4e3aSDavid Hildenbrand int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
2057fd8d4e3aSDavid Hildenbrand 		    int fake)
20584be130a0SMartin Schwidefsky {
20594be130a0SMartin Schwidefsky 	unsigned long raddr, origin;
2060079f0c21SNico Boehr 	unsigned long *table;
20614be130a0SMartin Schwidefsky 	struct page *page;
2062079f0c21SNico Boehr 	phys_addr_t s_pgt;
20634be130a0SMartin Schwidefsky 	int rc;
20644be130a0SMartin Schwidefsky 
2065fd8d4e3aSDavid Hildenbrand 	BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
20664be130a0SMartin Schwidefsky 	/* Allocate a shadow page table */
20674be130a0SMartin Schwidefsky 	page = page_table_alloc_pgste(sg->mm);
20684be130a0SMartin Schwidefsky 	if (!page)
20694be130a0SMartin Schwidefsky 		return -ENOMEM;
20704be130a0SMartin Schwidefsky 	page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
2071fd8d4e3aSDavid Hildenbrand 	if (fake)
2072fd8d4e3aSDavid Hildenbrand 		page->index |= GMAP_SHADOW_FAKE_TABLE;
2073079f0c21SNico Boehr 	s_pgt = page_to_phys(page);
20744be130a0SMartin Schwidefsky 	/* Install shadow page table */
20754be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
20764be130a0SMartin Schwidefsky 	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
20774be130a0SMartin Schwidefsky 	if (!table) {
20784be130a0SMartin Schwidefsky 		rc = -EAGAIN;		/* Race with unshadow */
20794be130a0SMartin Schwidefsky 		goto out_free;
20804be130a0SMartin Schwidefsky 	}
20814be130a0SMartin Schwidefsky 	if (!(*table & _SEGMENT_ENTRY_INVALID)) {
20824be130a0SMartin Schwidefsky 		rc = 0;			/* Already established */
20834be130a0SMartin Schwidefsky 		goto out_free;
2084998f637cSDavid Hildenbrand 	} else if (*table & _SEGMENT_ENTRY_ORIGIN) {
2085998f637cSDavid Hildenbrand 		rc = -EAGAIN;		/* Race with shadow */
2086998f637cSDavid Hildenbrand 		goto out_free;
20874be130a0SMartin Schwidefsky 	}
2088998f637cSDavid Hildenbrand 	/* mark as invalid as long as the parent table is not protected */
20894be130a0SMartin Schwidefsky 	*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
2090998f637cSDavid Hildenbrand 		 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
20914be130a0SMartin Schwidefsky 	list_add(&page->lru, &sg->pt_list);
2092fd8d4e3aSDavid Hildenbrand 	if (fake) {
2093fd8d4e3aSDavid Hildenbrand 		/* nothing to protect for fake tables */
2094fd8d4e3aSDavid Hildenbrand 		*table &= ~_SEGMENT_ENTRY_INVALID;
2095fd8d4e3aSDavid Hildenbrand 		spin_unlock(&sg->guest_table_lock);
2096fd8d4e3aSDavid Hildenbrand 		return 0;
2097fd8d4e3aSDavid Hildenbrand 	}
20984be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
20994be130a0SMartin Schwidefsky 	/* Make pgt read-only in parent gmap page table (not the pgste) */
2100f1c1174fSHeiko Carstens 	raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
21014be130a0SMartin Schwidefsky 	origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
21025c528db0SDavid Hildenbrand 	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
21034be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
2104998f637cSDavid Hildenbrand 	if (!rc) {
2105998f637cSDavid Hildenbrand 		table = gmap_table_walk(sg, saddr, 1);
2106079f0c21SNico Boehr 		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
2107998f637cSDavid Hildenbrand 			rc = -EAGAIN;		/* Race with unshadow */
2108998f637cSDavid Hildenbrand 		else
2109998f637cSDavid Hildenbrand 			*table &= ~_SEGMENT_ENTRY_INVALID;
2110998f637cSDavid Hildenbrand 	} else {
21114be130a0SMartin Schwidefsky 		gmap_unshadow_pgt(sg, raddr);
21124be130a0SMartin Schwidefsky 	}
2113998f637cSDavid Hildenbrand 	spin_unlock(&sg->guest_table_lock);
21144be130a0SMartin Schwidefsky 	return rc;
21154be130a0SMartin Schwidefsky out_free:
21164be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
21174be130a0SMartin Schwidefsky 	page_table_free_pgste(page);
21184be130a0SMartin Schwidefsky 	return rc;
21194be130a0SMartin Schwidefsky 
21204be130a0SMartin Schwidefsky }
21214be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
21224be130a0SMartin Schwidefsky 
21234be130a0SMartin Schwidefsky /**
21244be130a0SMartin Schwidefsky  * gmap_shadow_page - create a shadow page mapping
21254be130a0SMartin Schwidefsky  * @sg: pointer to the shadow guest address space structure
21264be130a0SMartin Schwidefsky  * @saddr: faulting address in the shadow gmap
2127a9d23e71SDavid Hildenbrand  * @pte: pte in parent gmap address space to get shadowed
21284be130a0SMartin Schwidefsky  *
21294be130a0SMartin Schwidefsky  * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
21304be130a0SMartin Schwidefsky  * shadow table structure is incomplete, -ENOMEM if out of memory and
21314be130a0SMartin Schwidefsky  * -EFAULT if an address in the parent gmap could not be resolved.
21324be130a0SMartin Schwidefsky  *
2133c1e8d7c6SMichel Lespinasse  * Called with sg->mm->mmap_lock in read.
21344be130a0SMartin Schwidefsky  */
gmap_shadow_page(struct gmap * sg,unsigned long saddr,pte_t pte)2135a9d23e71SDavid Hildenbrand int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
21364be130a0SMartin Schwidefsky {
21374be130a0SMartin Schwidefsky 	struct gmap *parent;
21384be130a0SMartin Schwidefsky 	struct gmap_rmap *rmap;
2139a9d23e71SDavid Hildenbrand 	unsigned long vmaddr, paddr;
21404be130a0SMartin Schwidefsky 	spinlock_t *ptl;
21414be130a0SMartin Schwidefsky 	pte_t *sptep, *tptep;
214201f71917SDavid Hildenbrand 	int prot;
21434be130a0SMartin Schwidefsky 	int rc;
21444be130a0SMartin Schwidefsky 
21454be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
21464be130a0SMartin Schwidefsky 	parent = sg->parent;
214701f71917SDavid Hildenbrand 	prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
21484be130a0SMartin Schwidefsky 
21490cd2a787SChristian Borntraeger 	rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
21504be130a0SMartin Schwidefsky 	if (!rmap)
21514be130a0SMartin Schwidefsky 		return -ENOMEM;
21524be130a0SMartin Schwidefsky 	rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
21534be130a0SMartin Schwidefsky 
21544be130a0SMartin Schwidefsky 	while (1) {
2155a9d23e71SDavid Hildenbrand 		paddr = pte_val(pte) & PAGE_MASK;
21564be130a0SMartin Schwidefsky 		vmaddr = __gmap_translate(parent, paddr);
21574be130a0SMartin Schwidefsky 		if (IS_ERR_VALUE(vmaddr)) {
21584be130a0SMartin Schwidefsky 			rc = vmaddr;
21594be130a0SMartin Schwidefsky 			break;
21604be130a0SMartin Schwidefsky 		}
21610cd2a787SChristian Borntraeger 		rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
21624be130a0SMartin Schwidefsky 		if (rc)
21634be130a0SMartin Schwidefsky 			break;
21644be130a0SMartin Schwidefsky 		rc = -EAGAIN;
21654be130a0SMartin Schwidefsky 		sptep = gmap_pte_op_walk(parent, paddr, &ptl);
21664be130a0SMartin Schwidefsky 		if (sptep) {
21674be130a0SMartin Schwidefsky 			spin_lock(&sg->guest_table_lock);
21684be130a0SMartin Schwidefsky 			/* Get page table pointer */
21694be130a0SMartin Schwidefsky 			tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
21704be130a0SMartin Schwidefsky 			if (!tptep) {
21714be130a0SMartin Schwidefsky 				spin_unlock(&sg->guest_table_lock);
2172b2f58941SHugh Dickins 				gmap_pte_op_end(sptep, ptl);
21734be130a0SMartin Schwidefsky 				radix_tree_preload_end();
21744be130a0SMartin Schwidefsky 				break;
21754be130a0SMartin Schwidefsky 			}
2176a9d23e71SDavid Hildenbrand 			rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
21774be130a0SMartin Schwidefsky 			if (rc > 0) {
21784be130a0SMartin Schwidefsky 				/* Success and a new mapping */
21794be130a0SMartin Schwidefsky 				gmap_insert_rmap(sg, vmaddr, rmap);
21804be130a0SMartin Schwidefsky 				rmap = NULL;
21814be130a0SMartin Schwidefsky 				rc = 0;
21824be130a0SMartin Schwidefsky 			}
2183b2f58941SHugh Dickins 			gmap_pte_op_end(sptep, ptl);
21844be130a0SMartin Schwidefsky 			spin_unlock(&sg->guest_table_lock);
21854be130a0SMartin Schwidefsky 		}
21864be130a0SMartin Schwidefsky 		radix_tree_preload_end();
21874be130a0SMartin Schwidefsky 		if (!rc)
21884be130a0SMartin Schwidefsky 			break;
218901f71917SDavid Hildenbrand 		rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
21904be130a0SMartin Schwidefsky 		if (rc)
21914be130a0SMartin Schwidefsky 			break;
21924be130a0SMartin Schwidefsky 	}
21934be130a0SMartin Schwidefsky 	kfree(rmap);
21944be130a0SMartin Schwidefsky 	return rc;
21954be130a0SMartin Schwidefsky }
21964be130a0SMartin Schwidefsky EXPORT_SYMBOL_GPL(gmap_shadow_page);
21974be130a0SMartin Schwidefsky 
21982e827528SHeiko Carstens /*
21994be130a0SMartin Schwidefsky  * gmap_shadow_notify - handle notifications for shadow gmap
22004be130a0SMartin Schwidefsky  *
22014be130a0SMartin Schwidefsky  * Called with sg->parent->shadow_lock.
22024be130a0SMartin Schwidefsky  */
gmap_shadow_notify(struct gmap * sg,unsigned long vmaddr,unsigned long gaddr)22034be130a0SMartin Schwidefsky static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
2204c0b4bd21SJanosch Frank 			       unsigned long gaddr)
22054be130a0SMartin Schwidefsky {
22064be130a0SMartin Schwidefsky 	struct gmap_rmap *rmap, *rnext, *head;
22072fa5ed7dSJanosch Frank 	unsigned long start, end, bits, raddr;
22084be130a0SMartin Schwidefsky 
22094be130a0SMartin Schwidefsky 	BUG_ON(!gmap_is_shadow(sg));
22104be130a0SMartin Schwidefsky 
22114be130a0SMartin Schwidefsky 	spin_lock(&sg->guest_table_lock);
22124be130a0SMartin Schwidefsky 	if (sg->removed) {
22134be130a0SMartin Schwidefsky 		spin_unlock(&sg->guest_table_lock);
22144be130a0SMartin Schwidefsky 		return;
22154be130a0SMartin Schwidefsky 	}
22164be130a0SMartin Schwidefsky 	/* Check for top level table */
22174be130a0SMartin Schwidefsky 	start = sg->orig_asce & _ASCE_ORIGIN;
2218f1c1174fSHeiko Carstens 	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
22193218f709SDavid Hildenbrand 	if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
22203218f709SDavid Hildenbrand 	    gaddr < end) {
22214be130a0SMartin Schwidefsky 		/* The complete shadow table has to go */
22224be130a0SMartin Schwidefsky 		gmap_unshadow(sg);
22234be130a0SMartin Schwidefsky 		spin_unlock(&sg->guest_table_lock);
22244be130a0SMartin Schwidefsky 		list_del(&sg->list);
22254be130a0SMartin Schwidefsky 		gmap_put(sg);
22264be130a0SMartin Schwidefsky 		return;
22274be130a0SMartin Schwidefsky 	}
22284be130a0SMartin Schwidefsky 	/* Remove the page table tree from on specific entry */
2229f1c1174fSHeiko Carstens 	head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
22304be130a0SMartin Schwidefsky 	gmap_for_each_rmap_safe(rmap, rnext, head) {
22314be130a0SMartin Schwidefsky 		bits = rmap->raddr & _SHADOW_RMAP_MASK;
22324be130a0SMartin Schwidefsky 		raddr = rmap->raddr ^ bits;
22334be130a0SMartin Schwidefsky 		switch (bits) {
22344be130a0SMartin Schwidefsky 		case _SHADOW_RMAP_REGION1:
22354be130a0SMartin Schwidefsky 			gmap_unshadow_r2t(sg, raddr);
22364be130a0SMartin Schwidefsky 			break;
22374be130a0SMartin Schwidefsky 		case _SHADOW_RMAP_REGION2:
22384be130a0SMartin Schwidefsky 			gmap_unshadow_r3t(sg, raddr);
22394be130a0SMartin Schwidefsky 			break;
22404be130a0SMartin Schwidefsky 		case _SHADOW_RMAP_REGION3:
22414be130a0SMartin Schwidefsky 			gmap_unshadow_sgt(sg, raddr);
22424be130a0SMartin Schwidefsky 			break;
22434be130a0SMartin Schwidefsky 		case _SHADOW_RMAP_SEGMENT:
22444be130a0SMartin Schwidefsky 			gmap_unshadow_pgt(sg, raddr);
22454be130a0SMartin Schwidefsky 			break;
22464be130a0SMartin Schwidefsky 		case _SHADOW_RMAP_PGTABLE:
22474be130a0SMartin Schwidefsky 			gmap_unshadow_page(sg, raddr);
22484be130a0SMartin Schwidefsky 			break;
22494be130a0SMartin Schwidefsky 		}
22504be130a0SMartin Schwidefsky 		kfree(rmap);
22514be130a0SMartin Schwidefsky 	}
22524be130a0SMartin Schwidefsky 	spin_unlock(&sg->guest_table_lock);
22534be130a0SMartin Schwidefsky }
22541e133ab2SMartin Schwidefsky 
22551e133ab2SMartin Schwidefsky /**
22561e133ab2SMartin Schwidefsky  * ptep_notify - call all invalidation callbacks for a specific pte.
22571e133ab2SMartin Schwidefsky  * @mm: pointer to the process mm_struct
22582e827528SHeiko Carstens  * @vmaddr: virtual address in the process address space
22591e133ab2SMartin Schwidefsky  * @pte: pointer to the page table entry
22604be130a0SMartin Schwidefsky  * @bits: bits from the pgste that caused the notify call
22611e133ab2SMartin Schwidefsky  *
22621e133ab2SMartin Schwidefsky  * This function is assumed to be called with the page table lock held
22631e133ab2SMartin Schwidefsky  * for the pte to notify.
22641e133ab2SMartin Schwidefsky  */
ptep_notify(struct mm_struct * mm,unsigned long vmaddr,pte_t * pte,unsigned long bits)22654be130a0SMartin Schwidefsky void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
22664be130a0SMartin Schwidefsky 		 pte_t *pte, unsigned long bits)
22671e133ab2SMartin Schwidefsky {
22682fa5ed7dSJanosch Frank 	unsigned long offset, gaddr = 0;
22691e133ab2SMartin Schwidefsky 	unsigned long *table;
22704be130a0SMartin Schwidefsky 	struct gmap *gmap, *sg, *next;
22711e133ab2SMartin Schwidefsky 
22721e133ab2SMartin Schwidefsky 	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
2273f1c1174fSHeiko Carstens 	offset = offset * (PAGE_SIZE / sizeof(pte_t));
22748ecb1a59SMartin Schwidefsky 	rcu_read_lock();
22758ecb1a59SMartin Schwidefsky 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
22768ecb1a59SMartin Schwidefsky 		spin_lock(&gmap->guest_table_lock);
22771e133ab2SMartin Schwidefsky 		table = radix_tree_lookup(&gmap->host_to_guest,
22781e133ab2SMartin Schwidefsky 					  vmaddr >> PMD_SHIFT);
22798ecb1a59SMartin Schwidefsky 		if (table)
22801e133ab2SMartin Schwidefsky 			gaddr = __gmap_segment_gaddr(table) + offset;
22818ecb1a59SMartin Schwidefsky 		spin_unlock(&gmap->guest_table_lock);
22822fa5ed7dSJanosch Frank 		if (!table)
22832fa5ed7dSJanosch Frank 			continue;
22842fa5ed7dSJanosch Frank 
22852fa5ed7dSJanosch Frank 		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
22862fa5ed7dSJanosch Frank 			spin_lock(&gmap->shadow_lock);
22872fa5ed7dSJanosch Frank 			list_for_each_entry_safe(sg, next,
22882fa5ed7dSJanosch Frank 						 &gmap->children, list)
2289c0b4bd21SJanosch Frank 				gmap_shadow_notify(sg, vmaddr, gaddr);
22902fa5ed7dSJanosch Frank 			spin_unlock(&gmap->shadow_lock);
22912fa5ed7dSJanosch Frank 		}
22922fa5ed7dSJanosch Frank 		if (bits & PGSTE_IN_BIT)
2293414d3b07SMartin Schwidefsky 			gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
22941e133ab2SMartin Schwidefsky 	}
22958ecb1a59SMartin Schwidefsky 	rcu_read_unlock();
22961e133ab2SMartin Schwidefsky }
22971e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(ptep_notify);
22981e133ab2SMartin Schwidefsky 
pmdp_notify_gmap(struct gmap * gmap,pmd_t * pmdp,unsigned long gaddr)22996a376277SJanosch Frank static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
23006a376277SJanosch Frank 			     unsigned long gaddr)
23016a376277SJanosch Frank {
2302b8e3b379SHeiko Carstens 	set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
23036a376277SJanosch Frank 	gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
23046a376277SJanosch Frank }
23056a376277SJanosch Frank 
23060959e168SJanosch Frank /**
23070959e168SJanosch Frank  * gmap_pmdp_xchg - exchange a gmap pmd with another
23080959e168SJanosch Frank  * @gmap: pointer to the guest address space structure
23090959e168SJanosch Frank  * @pmdp: pointer to the pmd entry
23100959e168SJanosch Frank  * @new: replacement entry
23110959e168SJanosch Frank  * @gaddr: the affected guest address
23120959e168SJanosch Frank  *
23130959e168SJanosch Frank  * This function is assumed to be called with the guest_table_lock
23140959e168SJanosch Frank  * held.
23150959e168SJanosch Frank  */
gmap_pmdp_xchg(struct gmap * gmap,pmd_t * pmdp,pmd_t new,unsigned long gaddr)23160959e168SJanosch Frank static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
23170959e168SJanosch Frank 			   unsigned long gaddr)
23180959e168SJanosch Frank {
23190959e168SJanosch Frank 	gaddr &= HPAGE_MASK;
23200959e168SJanosch Frank 	pmdp_notify_gmap(gmap, pmdp, gaddr);
2321e1fc74ffSHeiko Carstens 	new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
23220959e168SJanosch Frank 	if (MACHINE_HAS_TLB_GUEST)
23230959e168SJanosch Frank 		__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
23240959e168SJanosch Frank 			    IDTE_GLOBAL);
23250959e168SJanosch Frank 	else if (MACHINE_HAS_IDTE)
23260959e168SJanosch Frank 		__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
23270959e168SJanosch Frank 	else
23280959e168SJanosch Frank 		__pmdp_csp(pmdp);
2329b8e3b379SHeiko Carstens 	set_pmd(pmdp, new);
23300959e168SJanosch Frank }
23310959e168SJanosch Frank 
gmap_pmdp_clear(struct mm_struct * mm,unsigned long vmaddr,int purge)23326a376277SJanosch Frank static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
23336a376277SJanosch Frank 			    int purge)
23346a376277SJanosch Frank {
23356a376277SJanosch Frank 	pmd_t *pmdp;
23366a376277SJanosch Frank 	struct gmap *gmap;
23376a376277SJanosch Frank 	unsigned long gaddr;
23386a376277SJanosch Frank 
23396a376277SJanosch Frank 	rcu_read_lock();
23406a376277SJanosch Frank 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
23416a376277SJanosch Frank 		spin_lock(&gmap->guest_table_lock);
23426a376277SJanosch Frank 		pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
23436a376277SJanosch Frank 						  vmaddr >> PMD_SHIFT);
23446a376277SJanosch Frank 		if (pmdp) {
23456a376277SJanosch Frank 			gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
23466a376277SJanosch Frank 			pmdp_notify_gmap(gmap, pmdp, gaddr);
23470959e168SJanosch Frank 			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
23480959e168SJanosch Frank 						   _SEGMENT_ENTRY_GMAP_UC));
23496a376277SJanosch Frank 			if (purge)
23506a376277SJanosch Frank 				__pmdp_csp(pmdp);
2351b8e3b379SHeiko Carstens 			set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
23526a376277SJanosch Frank 		}
23536a376277SJanosch Frank 		spin_unlock(&gmap->guest_table_lock);
23546a376277SJanosch Frank 	}
23556a376277SJanosch Frank 	rcu_read_unlock();
23566a376277SJanosch Frank }
23576a376277SJanosch Frank 
23586a376277SJanosch Frank /**
23596a376277SJanosch Frank  * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
23606a376277SJanosch Frank  *                        flushing
23616a376277SJanosch Frank  * @mm: pointer to the process mm_struct
23626a376277SJanosch Frank  * @vmaddr: virtual address in the process address space
23636a376277SJanosch Frank  */
gmap_pmdp_invalidate(struct mm_struct * mm,unsigned long vmaddr)23646a376277SJanosch Frank void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
23656a376277SJanosch Frank {
23666a376277SJanosch Frank 	gmap_pmdp_clear(mm, vmaddr, 0);
23676a376277SJanosch Frank }
23686a376277SJanosch Frank EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
23696a376277SJanosch Frank 
23706a376277SJanosch Frank /**
23716a376277SJanosch Frank  * gmap_pmdp_csp - csp all affected guest pmd entries
23726a376277SJanosch Frank  * @mm: pointer to the process mm_struct
23736a376277SJanosch Frank  * @vmaddr: virtual address in the process address space
23746a376277SJanosch Frank  */
gmap_pmdp_csp(struct mm_struct * mm,unsigned long vmaddr)23756a376277SJanosch Frank void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
23766a376277SJanosch Frank {
23776a376277SJanosch Frank 	gmap_pmdp_clear(mm, vmaddr, 1);
23786a376277SJanosch Frank }
23796a376277SJanosch Frank EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
23806a376277SJanosch Frank 
23816a376277SJanosch Frank /**
23826a376277SJanosch Frank  * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
23836a376277SJanosch Frank  * @mm: pointer to the process mm_struct
23846a376277SJanosch Frank  * @vmaddr: virtual address in the process address space
23856a376277SJanosch Frank  */
gmap_pmdp_idte_local(struct mm_struct * mm,unsigned long vmaddr)23866a376277SJanosch Frank void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
23876a376277SJanosch Frank {
23886a376277SJanosch Frank 	unsigned long *entry, gaddr;
23896a376277SJanosch Frank 	struct gmap *gmap;
23906a376277SJanosch Frank 	pmd_t *pmdp;
23916a376277SJanosch Frank 
23926a376277SJanosch Frank 	rcu_read_lock();
23936a376277SJanosch Frank 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
23946a376277SJanosch Frank 		spin_lock(&gmap->guest_table_lock);
23956a376277SJanosch Frank 		entry = radix_tree_delete(&gmap->host_to_guest,
23966a376277SJanosch Frank 					  vmaddr >> PMD_SHIFT);
23976a376277SJanosch Frank 		if (entry) {
23986a376277SJanosch Frank 			pmdp = (pmd_t *)entry;
23996a376277SJanosch Frank 			gaddr = __gmap_segment_gaddr(entry);
24006a376277SJanosch Frank 			pmdp_notify_gmap(gmap, pmdp, gaddr);
24010959e168SJanosch Frank 			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
24020959e168SJanosch Frank 					   _SEGMENT_ENTRY_GMAP_UC));
24036a376277SJanosch Frank 			if (MACHINE_HAS_TLB_GUEST)
24046a376277SJanosch Frank 				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
24056a376277SJanosch Frank 					    gmap->asce, IDTE_LOCAL);
24066a376277SJanosch Frank 			else if (MACHINE_HAS_IDTE)
24076a376277SJanosch Frank 				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
24086a376277SJanosch Frank 			*entry = _SEGMENT_ENTRY_EMPTY;
24096a376277SJanosch Frank 		}
24106a376277SJanosch Frank 		spin_unlock(&gmap->guest_table_lock);
24116a376277SJanosch Frank 	}
24126a376277SJanosch Frank 	rcu_read_unlock();
24136a376277SJanosch Frank }
24146a376277SJanosch Frank EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
24156a376277SJanosch Frank 
24166a376277SJanosch Frank /**
24176a376277SJanosch Frank  * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
24186a376277SJanosch Frank  * @mm: pointer to the process mm_struct
24196a376277SJanosch Frank  * @vmaddr: virtual address in the process address space
24206a376277SJanosch Frank  */
gmap_pmdp_idte_global(struct mm_struct * mm,unsigned long vmaddr)24216a376277SJanosch Frank void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
24226a376277SJanosch Frank {
24236a376277SJanosch Frank 	unsigned long *entry, gaddr;
24246a376277SJanosch Frank 	struct gmap *gmap;
24256a376277SJanosch Frank 	pmd_t *pmdp;
24266a376277SJanosch Frank 
24276a376277SJanosch Frank 	rcu_read_lock();
24286a376277SJanosch Frank 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
24296a376277SJanosch Frank 		spin_lock(&gmap->guest_table_lock);
24306a376277SJanosch Frank 		entry = radix_tree_delete(&gmap->host_to_guest,
24316a376277SJanosch Frank 					  vmaddr >> PMD_SHIFT);
24326a376277SJanosch Frank 		if (entry) {
24336a376277SJanosch Frank 			pmdp = (pmd_t *)entry;
24346a376277SJanosch Frank 			gaddr = __gmap_segment_gaddr(entry);
24356a376277SJanosch Frank 			pmdp_notify_gmap(gmap, pmdp, gaddr);
24360959e168SJanosch Frank 			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
24370959e168SJanosch Frank 					   _SEGMENT_ENTRY_GMAP_UC));
24386a376277SJanosch Frank 			if (MACHINE_HAS_TLB_GUEST)
24396a376277SJanosch Frank 				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
24406a376277SJanosch Frank 					    gmap->asce, IDTE_GLOBAL);
24416a376277SJanosch Frank 			else if (MACHINE_HAS_IDTE)
24426a376277SJanosch Frank 				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
24436a376277SJanosch Frank 			else
24446a376277SJanosch Frank 				__pmdp_csp(pmdp);
24456a376277SJanosch Frank 			*entry = _SEGMENT_ENTRY_EMPTY;
24466a376277SJanosch Frank 		}
24476a376277SJanosch Frank 		spin_unlock(&gmap->guest_table_lock);
24486a376277SJanosch Frank 	}
24496a376277SJanosch Frank 	rcu_read_unlock();
24506a376277SJanosch Frank }
24516a376277SJanosch Frank EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
24526a376277SJanosch Frank 
24530959e168SJanosch Frank /**
24540959e168SJanosch Frank  * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
24550959e168SJanosch Frank  * @gmap: pointer to guest address space
24560959e168SJanosch Frank  * @pmdp: pointer to the pmd to be tested
24570959e168SJanosch Frank  * @gaddr: virtual address in the guest address space
24580959e168SJanosch Frank  *
24590959e168SJanosch Frank  * This function is assumed to be called with the guest_table_lock
24600959e168SJanosch Frank  * held.
24610959e168SJanosch Frank  */
gmap_test_and_clear_dirty_pmd(struct gmap * gmap,pmd_t * pmdp,unsigned long gaddr)2462ffbd2685SVasily Gorbik static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
24630959e168SJanosch Frank 					  unsigned long gaddr)
24640959e168SJanosch Frank {
24650959e168SJanosch Frank 	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
24660959e168SJanosch Frank 		return false;
24670959e168SJanosch Frank 
24680959e168SJanosch Frank 	/* Already protected memory, which did not change is clean */
24690959e168SJanosch Frank 	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
24700959e168SJanosch Frank 	    !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
24710959e168SJanosch Frank 		return false;
24720959e168SJanosch Frank 
24730959e168SJanosch Frank 	/* Clear UC indication and reset protection */
2474b8e3b379SHeiko Carstens 	set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));
24750959e168SJanosch Frank 	gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
24760959e168SJanosch Frank 	return true;
24770959e168SJanosch Frank }
24780959e168SJanosch Frank 
24790959e168SJanosch Frank /**
24800959e168SJanosch Frank  * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
24810959e168SJanosch Frank  * @gmap: pointer to guest address space
24820959e168SJanosch Frank  * @bitmap: dirty bitmap for this pmd
24830959e168SJanosch Frank  * @gaddr: virtual address in the guest address space
24840959e168SJanosch Frank  * @vmaddr: virtual address in the host address space
24850959e168SJanosch Frank  *
24860959e168SJanosch Frank  * This function is assumed to be called with the guest_table_lock
24870959e168SJanosch Frank  * held.
24880959e168SJanosch Frank  */
gmap_sync_dirty_log_pmd(struct gmap * gmap,unsigned long bitmap[4],unsigned long gaddr,unsigned long vmaddr)24890959e168SJanosch Frank void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
24900959e168SJanosch Frank 			     unsigned long gaddr, unsigned long vmaddr)
24910959e168SJanosch Frank {
24920959e168SJanosch Frank 	int i;
24930959e168SJanosch Frank 	pmd_t *pmdp;
24940959e168SJanosch Frank 	pte_t *ptep;
24950959e168SJanosch Frank 	spinlock_t *ptl;
24960959e168SJanosch Frank 
24970959e168SJanosch Frank 	pmdp = gmap_pmd_op_walk(gmap, gaddr);
24980959e168SJanosch Frank 	if (!pmdp)
24990959e168SJanosch Frank 		return;
25000959e168SJanosch Frank 
25010959e168SJanosch Frank 	if (pmd_large(*pmdp)) {
25020959e168SJanosch Frank 		if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
25030959e168SJanosch Frank 			bitmap_fill(bitmap, _PAGE_ENTRIES);
25040959e168SJanosch Frank 	} else {
25050959e168SJanosch Frank 		for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
25060959e168SJanosch Frank 			ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
25070959e168SJanosch Frank 			if (!ptep)
25080959e168SJanosch Frank 				continue;
25090959e168SJanosch Frank 			if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
25100959e168SJanosch Frank 				set_bit(i, bitmap);
2511b2f58941SHugh Dickins 			pte_unmap_unlock(ptep, ptl);
25120959e168SJanosch Frank 		}
25130959e168SJanosch Frank 	}
25140959e168SJanosch Frank 	gmap_pmd_op_end(gmap, pmdp);
25150959e168SJanosch Frank }
25160959e168SJanosch Frank EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
25170959e168SJanosch Frank 
2518ba925fa3SGerald Schaefer #ifdef CONFIG_TRANSPARENT_HUGEPAGE
thp_split_walk_pmd_entry(pmd_t * pmd,unsigned long addr,unsigned long end,struct mm_walk * walk)2519ba925fa3SGerald Schaefer static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
2520ba925fa3SGerald Schaefer 				    unsigned long end, struct mm_walk *walk)
2521ba925fa3SGerald Schaefer {
2522ba925fa3SGerald Schaefer 	struct vm_area_struct *vma = walk->vma;
2523ba925fa3SGerald Schaefer 
2524ba925fa3SGerald Schaefer 	split_huge_pmd(vma, pmd, addr);
2525ba925fa3SGerald Schaefer 	return 0;
2526ba925fa3SGerald Schaefer }
2527ba925fa3SGerald Schaefer 
2528ba925fa3SGerald Schaefer static const struct mm_walk_ops thp_split_walk_ops = {
2529ba925fa3SGerald Schaefer 	.pmd_entry	= thp_split_walk_pmd_entry,
253049b06385SSuren Baghdasaryan 	.walk_lock	= PGWALK_WRLOCK_VERIFY,
2531ba925fa3SGerald Schaefer };
2532ba925fa3SGerald Schaefer 
thp_split_mm(struct mm_struct * mm)25331e133ab2SMartin Schwidefsky static inline void thp_split_mm(struct mm_struct *mm)
25341e133ab2SMartin Schwidefsky {
25351e133ab2SMartin Schwidefsky 	struct vm_area_struct *vma;
2536e7b6b990SMatthew Wilcox (Oracle) 	VMA_ITERATOR(vmi, mm, 0);
25371e133ab2SMartin Schwidefsky 
2538e7b6b990SMatthew Wilcox (Oracle) 	for_each_vma(vmi, vma) {
25391c71222eSSuren Baghdasaryan 		vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
2540ba925fa3SGerald Schaefer 		walk_page_vma(vma, &thp_split_walk_ops, NULL);
25411e133ab2SMartin Schwidefsky 	}
25421e133ab2SMartin Schwidefsky 	mm->def_flags |= VM_NOHUGEPAGE;
25431e133ab2SMartin Schwidefsky }
2544ba925fa3SGerald Schaefer #else
thp_split_mm(struct mm_struct * mm)2545ba925fa3SGerald Schaefer static inline void thp_split_mm(struct mm_struct *mm)
2546ba925fa3SGerald Schaefer {
2547ba925fa3SGerald Schaefer }
2548ba925fa3SGerald Schaefer #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
25491e133ab2SMartin Schwidefsky 
25501e133ab2SMartin Schwidefsky /*
25511e133ab2SMartin Schwidefsky  * switch on pgstes for its userspace process (for kvm)
25521e133ab2SMartin Schwidefsky  */
s390_enable_sie(void)25531e133ab2SMartin Schwidefsky int s390_enable_sie(void)
25541e133ab2SMartin Schwidefsky {
25551e133ab2SMartin Schwidefsky 	struct mm_struct *mm = current->mm;
25561e133ab2SMartin Schwidefsky 
25571e133ab2SMartin Schwidefsky 	/* Do we have pgstes? if yes, we are done */
25581e133ab2SMartin Schwidefsky 	if (mm_has_pgste(mm))
25591e133ab2SMartin Schwidefsky 		return 0;
25601e133ab2SMartin Schwidefsky 	/* Fail if the page tables are 2K */
25611e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm))
25621e133ab2SMartin Schwidefsky 		return -EINVAL;
2563d8ed45c5SMichel Lespinasse 	mmap_write_lock(mm);
25641e133ab2SMartin Schwidefsky 	mm->context.has_pgste = 1;
25651e133ab2SMartin Schwidefsky 	/* split thp mappings and disable thp for future mappings */
25661e133ab2SMartin Schwidefsky 	thp_split_mm(mm);
2567d8ed45c5SMichel Lespinasse 	mmap_write_unlock(mm);
25681e133ab2SMartin Schwidefsky 	return 0;
25691e133ab2SMartin Schwidefsky }
25701e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(s390_enable_sie);
25711e133ab2SMartin Schwidefsky 
find_zeropage_pte_entry(pte_t * pte,unsigned long addr,unsigned long end,struct mm_walk * walk)2572840565b1SDavid Hildenbrand static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
2573840565b1SDavid Hildenbrand 				   unsigned long end, struct mm_walk *walk)
2574fa0c5eabSJanosch Frank {
2575840565b1SDavid Hildenbrand 	unsigned long *found_addr = walk->private;
2576840565b1SDavid Hildenbrand 
2577840565b1SDavid Hildenbrand 	/* Return 1 of the page is a zeropage. */
2578840565b1SDavid Hildenbrand 	if (is_zero_pfn(pte_pfn(*pte))) {
2579840565b1SDavid Hildenbrand 		/*
2580840565b1SDavid Hildenbrand 		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
2581840565b1SDavid Hildenbrand 		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
2582840565b1SDavid Hildenbrand 		 * currently only works in COW mappings, which is also where
2583840565b1SDavid Hildenbrand 		 * mm_forbids_zeropage() is checked.
2584840565b1SDavid Hildenbrand 		 */
2585840565b1SDavid Hildenbrand 		if (!is_cow_mapping(walk->vma->vm_flags))
2586840565b1SDavid Hildenbrand 			return -EFAULT;
2587840565b1SDavid Hildenbrand 
2588840565b1SDavid Hildenbrand 		*found_addr = addr;
2589840565b1SDavid Hildenbrand 		return 1;
2590840565b1SDavid Hildenbrand 	}
2591840565b1SDavid Hildenbrand 	return 0;
2592840565b1SDavid Hildenbrand }
2593840565b1SDavid Hildenbrand 
2594840565b1SDavid Hildenbrand static const struct mm_walk_ops find_zeropage_ops = {
2595840565b1SDavid Hildenbrand 	.pte_entry	= find_zeropage_pte_entry,
2596840565b1SDavid Hildenbrand 	.walk_lock	= PGWALK_WRLOCK,
2597840565b1SDavid Hildenbrand };
2598840565b1SDavid Hildenbrand 
2599840565b1SDavid Hildenbrand /*
2600840565b1SDavid Hildenbrand  * Unshare all shared zeropages, replacing them by anonymous pages. Note that
2601840565b1SDavid Hildenbrand  * we cannot simply zap all shared zeropages, because this could later
2602840565b1SDavid Hildenbrand  * trigger unexpected userfaultfd missing events.
2603840565b1SDavid Hildenbrand  *
2604840565b1SDavid Hildenbrand  * This must be called after mm->context.allow_cow_sharing was
2605840565b1SDavid Hildenbrand  * set to 0, to avoid future mappings of shared zeropages.
2606840565b1SDavid Hildenbrand  *
2607840565b1SDavid Hildenbrand  * mm contracts with s390, that even if mm were to remove a page table,
2608840565b1SDavid Hildenbrand  * and racing with walk_page_range_vma() calling pte_offset_map_lock()
2609840565b1SDavid Hildenbrand  * would fail, it will never insert a page table containing empty zero
2610840565b1SDavid Hildenbrand  * pages once mm_forbids_zeropage(mm) i.e.
2611840565b1SDavid Hildenbrand  * mm->context.allow_cow_sharing is set to 0.
2612840565b1SDavid Hildenbrand  */
__s390_unshare_zeropages(struct mm_struct * mm)2613840565b1SDavid Hildenbrand static int __s390_unshare_zeropages(struct mm_struct *mm)
2614840565b1SDavid Hildenbrand {
2615840565b1SDavid Hildenbrand 	struct vm_area_struct *vma;
2616840565b1SDavid Hildenbrand 	VMA_ITERATOR(vmi, mm, 0);
2617840565b1SDavid Hildenbrand 	unsigned long addr;
2618840565b1SDavid Hildenbrand 	vm_fault_t fault;
2619840565b1SDavid Hildenbrand 	int rc;
2620840565b1SDavid Hildenbrand 
2621840565b1SDavid Hildenbrand 	for_each_vma(vmi, vma) {
2622840565b1SDavid Hildenbrand 		/*
2623840565b1SDavid Hildenbrand 		 * We could only look at COW mappings, but it's more future
2624840565b1SDavid Hildenbrand 		 * proof to catch unexpected zeropages in other mappings and
2625840565b1SDavid Hildenbrand 		 * fail.
2626840565b1SDavid Hildenbrand 		 */
2627840565b1SDavid Hildenbrand 		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
2628840565b1SDavid Hildenbrand 			continue;
2629840565b1SDavid Hildenbrand 		addr = vma->vm_start;
2630840565b1SDavid Hildenbrand 
2631840565b1SDavid Hildenbrand retry:
2632840565b1SDavid Hildenbrand 		rc = walk_page_range_vma(vma, addr, vma->vm_end,
2633840565b1SDavid Hildenbrand 					 &find_zeropage_ops, &addr);
2634840565b1SDavid Hildenbrand 		if (rc < 0)
2635840565b1SDavid Hildenbrand 			return rc;
2636840565b1SDavid Hildenbrand 		else if (!rc)
2637840565b1SDavid Hildenbrand 			continue;
2638840565b1SDavid Hildenbrand 
2639840565b1SDavid Hildenbrand 		/* addr was updated by find_zeropage_pte_entry() */
2640840565b1SDavid Hildenbrand 		fault = handle_mm_fault(vma, addr,
2641840565b1SDavid Hildenbrand 					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
2642840565b1SDavid Hildenbrand 					NULL);
2643840565b1SDavid Hildenbrand 		if (fault & VM_FAULT_OOM)
2644840565b1SDavid Hildenbrand 			return -ENOMEM;
2645840565b1SDavid Hildenbrand 		/*
2646840565b1SDavid Hildenbrand 		 * See break_ksm(): even after handle_mm_fault() returned 0, we
2647840565b1SDavid Hildenbrand 		 * must start the lookup from the current address, because
2648840565b1SDavid Hildenbrand 		 * handle_mm_fault() may back out if there's any difficulty.
2649840565b1SDavid Hildenbrand 		 *
2650840565b1SDavid Hildenbrand 		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
2651840565b1SDavid Hildenbrand 		 * maybe they could trigger in the future on concurrent
2652840565b1SDavid Hildenbrand 		 * truncation. In that case, the shared zeropage would be gone
2653840565b1SDavid Hildenbrand 		 * and we can simply retry and make progress.
2654840565b1SDavid Hildenbrand 		 */
2655840565b1SDavid Hildenbrand 		cond_resched();
2656840565b1SDavid Hildenbrand 		goto retry;
2657840565b1SDavid Hildenbrand 	}
2658840565b1SDavid Hildenbrand 
2659840565b1SDavid Hildenbrand 	return 0;
2660840565b1SDavid Hildenbrand }
2661840565b1SDavid Hildenbrand 
__s390_disable_cow_sharing(struct mm_struct * mm)2662840565b1SDavid Hildenbrand static int __s390_disable_cow_sharing(struct mm_struct *mm)
2663840565b1SDavid Hildenbrand {
2664840565b1SDavid Hildenbrand 	int rc;
2665840565b1SDavid Hildenbrand 
2666840565b1SDavid Hildenbrand 	if (!mm->context.allow_cow_sharing)
2667840565b1SDavid Hildenbrand 		return 0;
2668840565b1SDavid Hildenbrand 
2669840565b1SDavid Hildenbrand 	mm->context.allow_cow_sharing = 0;
2670840565b1SDavid Hildenbrand 
2671840565b1SDavid Hildenbrand 	/* Replace all shared zeropages by anonymous pages. */
2672840565b1SDavid Hildenbrand 	rc = __s390_unshare_zeropages(mm);
2673d7597f59SStefan Roesch 	/*
2674d7597f59SStefan Roesch 	 * Make sure to disable KSM (if enabled for the whole process or
2675d7597f59SStefan Roesch 	 * individual VMAs). Note that nothing currently hinders user space
2676d7597f59SStefan Roesch 	 * from re-enabling it.
2677d7597f59SStefan Roesch 	 */
2678840565b1SDavid Hildenbrand 	if (!rc)
2679840565b1SDavid Hildenbrand 		rc = ksm_disable(mm);
2680840565b1SDavid Hildenbrand 	if (rc)
2681840565b1SDavid Hildenbrand 		mm->context.allow_cow_sharing = 1;
2682840565b1SDavid Hildenbrand 	return rc;
2683fa0c5eabSJanosch Frank }
2684840565b1SDavid Hildenbrand 
2685840565b1SDavid Hildenbrand /*
2686840565b1SDavid Hildenbrand  * Disable most COW-sharing of memory pages for the whole process:
2687840565b1SDavid Hildenbrand  * (1) Disable KSM and unmerge/unshare any KSM pages.
2688840565b1SDavid Hildenbrand  * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
2689840565b1SDavid Hildenbrand  *
2690840565b1SDavid Hildenbrand  * Not that we currently don't bother with COW-shared pages that are shared
2691840565b1SDavid Hildenbrand  * with parent/child processes due to fork().
2692840565b1SDavid Hildenbrand  */
s390_disable_cow_sharing(void)2693840565b1SDavid Hildenbrand int s390_disable_cow_sharing(void)
2694840565b1SDavid Hildenbrand {
2695840565b1SDavid Hildenbrand 	int rc;
2696840565b1SDavid Hildenbrand 
2697840565b1SDavid Hildenbrand 	mmap_write_lock(current->mm);
2698840565b1SDavid Hildenbrand 	rc = __s390_disable_cow_sharing(current->mm);
2699840565b1SDavid Hildenbrand 	mmap_write_unlock(current->mm);
2700840565b1SDavid Hildenbrand 	return rc;
2701840565b1SDavid Hildenbrand }
2702840565b1SDavid Hildenbrand EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
2703fa0c5eabSJanosch Frank 
27041e133ab2SMartin Schwidefsky /*
27051e133ab2SMartin Schwidefsky  * Enable storage key handling from now on and initialize the storage
27061e133ab2SMartin Schwidefsky  * keys with the default key.
27071e133ab2SMartin Schwidefsky  */
__s390_enable_skey_pte(pte_t * pte,unsigned long addr,unsigned long next,struct mm_walk * walk)2708964c2c05SDominik Dingel static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
27091e133ab2SMartin Schwidefsky 				  unsigned long next, struct mm_walk *walk)
27101e133ab2SMartin Schwidefsky {
27111e133ab2SMartin Schwidefsky 	/* Clear storage key */
27121e133ab2SMartin Schwidefsky 	ptep_zap_key(walk->mm, addr, pte);
27131e133ab2SMartin Schwidefsky 	return 0;
27141e133ab2SMartin Schwidefsky }
27151e133ab2SMartin Schwidefsky 
27166d594627SChristian Borntraeger /*
27176d594627SChristian Borntraeger  * Give a chance to schedule after setting a key to 256 pages.
27186d594627SChristian Borntraeger  * We only hold the mm lock, which is a rwsem and the kvm srcu.
27196d594627SChristian Borntraeger  * Both can sleep.
27206d594627SChristian Borntraeger  */
__s390_enable_skey_pmd(pmd_t * pmd,unsigned long addr,unsigned long next,struct mm_walk * walk)27216d594627SChristian Borntraeger static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
27226d594627SChristian Borntraeger 				  unsigned long next, struct mm_walk *walk)
27236d594627SChristian Borntraeger {
27246d594627SChristian Borntraeger 	cond_resched();
27256d594627SChristian Borntraeger 	return 0;
27266d594627SChristian Borntraeger }
27276d594627SChristian Borntraeger 
__s390_enable_skey_hugetlb(pte_t * pte,unsigned long addr,unsigned long hmask,unsigned long next,struct mm_walk * walk)2728964c2c05SDominik Dingel static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
2729964c2c05SDominik Dingel 				      unsigned long hmask, unsigned long next,
2730964c2c05SDominik Dingel 				      struct mm_walk *walk)
2731964c2c05SDominik Dingel {
2732964c2c05SDominik Dingel 	pmd_t *pmd = (pmd_t *)pte;
2733964c2c05SDominik Dingel 	unsigned long start, end;
27343afdfca6SJanosch Frank 	struct page *page = pmd_page(*pmd);
2735964c2c05SDominik Dingel 
2736964c2c05SDominik Dingel 	/*
2737964c2c05SDominik Dingel 	 * The write check makes sure we do not set a key on shared
2738964c2c05SDominik Dingel 	 * memory. This is needed as the walker does not differentiate
2739964c2c05SDominik Dingel 	 * between actual guest memory and the process executable or
2740964c2c05SDominik Dingel 	 * shared libraries.
2741964c2c05SDominik Dingel 	 */
2742964c2c05SDominik Dingel 	if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
2743964c2c05SDominik Dingel 	    !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
2744964c2c05SDominik Dingel 		return 0;
2745964c2c05SDominik Dingel 
2746964c2c05SDominik Dingel 	start = pmd_val(*pmd) & HPAGE_MASK;
2747819c33d7SClaudio Imbrenda 	end = start + HPAGE_SIZE;
2748964c2c05SDominik Dingel 	__storage_key_init_range(start, end);
27493afdfca6SJanosch Frank 	set_bit(PG_arch_1, &page->flags);
27506d594627SChristian Borntraeger 	cond_resched();
2751964c2c05SDominik Dingel 	return 0;
2752964c2c05SDominik Dingel }
2753964c2c05SDominik Dingel 
27547b86ac33SChristoph Hellwig static const struct mm_walk_ops enable_skey_walk_ops = {
2755964c2c05SDominik Dingel 	.hugetlb_entry		= __s390_enable_skey_hugetlb,
2756964c2c05SDominik Dingel 	.pte_entry		= __s390_enable_skey_pte,
27576d594627SChristian Borntraeger 	.pmd_entry		= __s390_enable_skey_pmd,
275849b06385SSuren Baghdasaryan 	.walk_lock		= PGWALK_WRLOCK,
2759964c2c05SDominik Dingel };
27607b86ac33SChristoph Hellwig 
s390_enable_skey(void)27617b86ac33SChristoph Hellwig int s390_enable_skey(void)
27627b86ac33SChristoph Hellwig {
27631e133ab2SMartin Schwidefsky 	struct mm_struct *mm = current->mm;
27641e133ab2SMartin Schwidefsky 	int rc = 0;
27651e133ab2SMartin Schwidefsky 
2766d8ed45c5SMichel Lespinasse 	mmap_write_lock(mm);
276755531b74SJanosch Frank 	if (mm_uses_skeys(mm))
27681e133ab2SMartin Schwidefsky 		goto out_up;
27691e133ab2SMartin Schwidefsky 
277055531b74SJanosch Frank 	mm->context.uses_skeys = 1;
2771840565b1SDavid Hildenbrand 	rc = __s390_disable_cow_sharing(mm);
2772fa0c5eabSJanosch Frank 	if (rc) {
277355531b74SJanosch Frank 		mm->context.uses_skeys = 0;
27741e133ab2SMartin Schwidefsky 		goto out_up;
27751e133ab2SMartin Schwidefsky 	}
27767b86ac33SChristoph Hellwig 	walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
27771e133ab2SMartin Schwidefsky 
27781e133ab2SMartin Schwidefsky out_up:
2779d8ed45c5SMichel Lespinasse 	mmap_write_unlock(mm);
27801e133ab2SMartin Schwidefsky 	return rc;
27811e133ab2SMartin Schwidefsky }
27821e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(s390_enable_skey);
27831e133ab2SMartin Schwidefsky 
27841e133ab2SMartin Schwidefsky /*
27851e133ab2SMartin Schwidefsky  * Reset CMMA state, make all pages stable again.
27861e133ab2SMartin Schwidefsky  */
__s390_reset_cmma(pte_t * pte,unsigned long addr,unsigned long next,struct mm_walk * walk)27871e133ab2SMartin Schwidefsky static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
27881e133ab2SMartin Schwidefsky 			     unsigned long next, struct mm_walk *walk)
27891e133ab2SMartin Schwidefsky {
27901e133ab2SMartin Schwidefsky 	ptep_zap_unused(walk->mm, addr, pte, 1);
27911e133ab2SMartin Schwidefsky 	return 0;
27921e133ab2SMartin Schwidefsky }
27931e133ab2SMartin Schwidefsky 
27947b86ac33SChristoph Hellwig static const struct mm_walk_ops reset_cmma_walk_ops = {
27957b86ac33SChristoph Hellwig 	.pte_entry		= __s390_reset_cmma,
279649b06385SSuren Baghdasaryan 	.walk_lock		= PGWALK_WRLOCK,
27977b86ac33SChristoph Hellwig };
27987b86ac33SChristoph Hellwig 
s390_reset_cmma(struct mm_struct * mm)27991e133ab2SMartin Schwidefsky void s390_reset_cmma(struct mm_struct *mm)
28001e133ab2SMartin Schwidefsky {
2801d8ed45c5SMichel Lespinasse 	mmap_write_lock(mm);
28027b86ac33SChristoph Hellwig 	walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
2803d8ed45c5SMichel Lespinasse 	mmap_write_unlock(mm);
28041e133ab2SMartin Schwidefsky }
28051e133ab2SMartin Schwidefsky EXPORT_SYMBOL_GPL(s390_reset_cmma);
280612748007SChristian Borntraeger 
28076f73517dSClaudio Imbrenda #define GATHER_GET_PAGES 32
280812748007SChristian Borntraeger 
28096f73517dSClaudio Imbrenda struct reset_walk_state {
28106f73517dSClaudio Imbrenda 	unsigned long next;
28116f73517dSClaudio Imbrenda 	unsigned long count;
28126f73517dSClaudio Imbrenda 	unsigned long pfns[GATHER_GET_PAGES];
281312748007SChristian Borntraeger };
281412748007SChristian Borntraeger 
s390_gather_pages(pte_t * ptep,unsigned long addr,unsigned long next,struct mm_walk * walk)28156f73517dSClaudio Imbrenda static int s390_gather_pages(pte_t *ptep, unsigned long addr,
28166f73517dSClaudio Imbrenda 			     unsigned long next, struct mm_walk *walk)
281712748007SChristian Borntraeger {
28186f73517dSClaudio Imbrenda 	struct reset_walk_state *p = walk->private;
28196f73517dSClaudio Imbrenda 	pte_t pte = READ_ONCE(*ptep);
28206f73517dSClaudio Imbrenda 
28216f73517dSClaudio Imbrenda 	if (pte_present(pte)) {
28226f73517dSClaudio Imbrenda 		/* we have a reference from the mapping, take an extra one */
28236f73517dSClaudio Imbrenda 		get_page(phys_to_page(pte_val(pte)));
28246f73517dSClaudio Imbrenda 		p->pfns[p->count] = phys_to_pfn(pte_val(pte));
28256f73517dSClaudio Imbrenda 		p->next = next;
28266f73517dSClaudio Imbrenda 		p->count++;
282712748007SChristian Borntraeger 	}
28286f73517dSClaudio Imbrenda 	return p->count >= GATHER_GET_PAGES;
28296f73517dSClaudio Imbrenda }
28306f73517dSClaudio Imbrenda 
28316f73517dSClaudio Imbrenda static const struct mm_walk_ops gather_pages_ops = {
28326f73517dSClaudio Imbrenda 	.pte_entry = s390_gather_pages,
283349b06385SSuren Baghdasaryan 	.walk_lock = PGWALK_RDLOCK,
28346f73517dSClaudio Imbrenda };
28356f73517dSClaudio Imbrenda 
28366f73517dSClaudio Imbrenda /*
28376f73517dSClaudio Imbrenda  * Call the Destroy secure page UVC on each page in the given array of PFNs.
28386f73517dSClaudio Imbrenda  * Each page needs to have an extra reference, which will be released here.
28396f73517dSClaudio Imbrenda  */
s390_uv_destroy_pfns(unsigned long count,unsigned long * pfns)28406f73517dSClaudio Imbrenda void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
28416f73517dSClaudio Imbrenda {
28426f73517dSClaudio Imbrenda 	unsigned long i;
28436f73517dSClaudio Imbrenda 
28446f73517dSClaudio Imbrenda 	for (i = 0; i < count; i++) {
28456f73517dSClaudio Imbrenda 		/* we always have an extra reference */
28466f73517dSClaudio Imbrenda 		uv_destroy_owned_page(pfn_to_phys(pfns[i]));
28476f73517dSClaudio Imbrenda 		/* get rid of the extra reference */
28486f73517dSClaudio Imbrenda 		put_page(pfn_to_page(pfns[i]));
28496f73517dSClaudio Imbrenda 		cond_resched();
28506f73517dSClaudio Imbrenda 	}
28516f73517dSClaudio Imbrenda }
28526f73517dSClaudio Imbrenda EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
28536f73517dSClaudio Imbrenda 
28546f73517dSClaudio Imbrenda /**
28556f73517dSClaudio Imbrenda  * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
28566f73517dSClaudio Imbrenda  * in the given range of the given address space.
28576f73517dSClaudio Imbrenda  * @mm: the mm to operate on
28586f73517dSClaudio Imbrenda  * @start: the start of the range
28596f73517dSClaudio Imbrenda  * @end: the end of the range
28606f73517dSClaudio Imbrenda  * @interruptible: if not 0, stop when a fatal signal is received
28616f73517dSClaudio Imbrenda  *
28626f73517dSClaudio Imbrenda  * Walk the given range of the given address space and call the destroy
28636f73517dSClaudio Imbrenda  * secure page UVC on each page. Optionally exit early if a fatal signal is
28646f73517dSClaudio Imbrenda  * pending.
28656f73517dSClaudio Imbrenda  *
28666f73517dSClaudio Imbrenda  * Return: 0 on success, -EINTR if the function stopped before completing
28676f73517dSClaudio Imbrenda  */
__s390_uv_destroy_range(struct mm_struct * mm,unsigned long start,unsigned long end,bool interruptible)28686f73517dSClaudio Imbrenda int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
28696f73517dSClaudio Imbrenda 			    unsigned long end, bool interruptible)
28706f73517dSClaudio Imbrenda {
28716f73517dSClaudio Imbrenda 	struct reset_walk_state state = { .next = start };
28726f73517dSClaudio Imbrenda 	int r = 1;
28736f73517dSClaudio Imbrenda 
28746f73517dSClaudio Imbrenda 	while (r > 0) {
28756f73517dSClaudio Imbrenda 		state.count = 0;
28766f73517dSClaudio Imbrenda 		mmap_read_lock(mm);
28776f73517dSClaudio Imbrenda 		r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
28786f73517dSClaudio Imbrenda 		mmap_read_unlock(mm);
28796f73517dSClaudio Imbrenda 		cond_resched();
28806f73517dSClaudio Imbrenda 		s390_uv_destroy_pfns(state.count, state.pfns);
28816f73517dSClaudio Imbrenda 		if (interruptible && fatal_signal_pending(current))
28826f73517dSClaudio Imbrenda 			return -EINTR;
28836f73517dSClaudio Imbrenda 	}
28846f73517dSClaudio Imbrenda 	return 0;
28856f73517dSClaudio Imbrenda }
28866f73517dSClaudio Imbrenda EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
2887faa2f72cSClaudio Imbrenda 
2888faa2f72cSClaudio Imbrenda /**
2889faa2f72cSClaudio Imbrenda  * s390_unlist_old_asce - Remove the topmost level of page tables from the
2890faa2f72cSClaudio Imbrenda  * list of page tables of the gmap.
2891faa2f72cSClaudio Imbrenda  * @gmap: the gmap whose table is to be removed
2892faa2f72cSClaudio Imbrenda  *
2893faa2f72cSClaudio Imbrenda  * On s390x, KVM keeps a list of all pages containing the page tables of the
2894faa2f72cSClaudio Imbrenda  * gmap (the CRST list). This list is used at tear down time to free all
2895faa2f72cSClaudio Imbrenda  * pages that are now not needed anymore.
2896faa2f72cSClaudio Imbrenda  *
2897faa2f72cSClaudio Imbrenda  * This function removes the topmost page of the tree (the one pointed to by
2898faa2f72cSClaudio Imbrenda  * the ASCE) from the CRST list.
2899faa2f72cSClaudio Imbrenda  *
2900faa2f72cSClaudio Imbrenda  * This means that it will not be freed when the VM is torn down, and needs
2901faa2f72cSClaudio Imbrenda  * to be handled separately by the caller, unless a leak is actually
2902faa2f72cSClaudio Imbrenda  * intended. Notice that this function will only remove the page from the
2903faa2f72cSClaudio Imbrenda  * list, the page will still be used as a top level page table (and ASCE).
2904faa2f72cSClaudio Imbrenda  */
s390_unlist_old_asce(struct gmap * gmap)2905faa2f72cSClaudio Imbrenda void s390_unlist_old_asce(struct gmap *gmap)
2906faa2f72cSClaudio Imbrenda {
2907faa2f72cSClaudio Imbrenda 	struct page *old;
2908faa2f72cSClaudio Imbrenda 
2909faa2f72cSClaudio Imbrenda 	old = virt_to_page(gmap->table);
2910faa2f72cSClaudio Imbrenda 	spin_lock(&gmap->guest_table_lock);
2911faa2f72cSClaudio Imbrenda 	list_del(&old->lru);
2912faa2f72cSClaudio Imbrenda 	/*
2913faa2f72cSClaudio Imbrenda 	 * Sometimes the topmost page might need to be "removed" multiple
2914faa2f72cSClaudio Imbrenda 	 * times, for example if the VM is rebooted into secure mode several
2915faa2f72cSClaudio Imbrenda 	 * times concurrently, or if s390_replace_asce fails after calling
2916faa2f72cSClaudio Imbrenda 	 * s390_remove_old_asce and is attempted again later. In that case
2917faa2f72cSClaudio Imbrenda 	 * the old asce has been removed from the list, and therefore it
2918faa2f72cSClaudio Imbrenda 	 * will not be freed when the VM terminates, but the ASCE is still
2919faa2f72cSClaudio Imbrenda 	 * in use and still pointed to.
2920faa2f72cSClaudio Imbrenda 	 * A subsequent call to replace_asce will follow the pointer and try
2921faa2f72cSClaudio Imbrenda 	 * to remove the same page from the list again.
2922faa2f72cSClaudio Imbrenda 	 * Therefore it's necessary that the page of the ASCE has valid
2923faa2f72cSClaudio Imbrenda 	 * pointers, so list_del can work (and do nothing) without
2924faa2f72cSClaudio Imbrenda 	 * dereferencing stale or invalid pointers.
2925faa2f72cSClaudio Imbrenda 	 */
2926faa2f72cSClaudio Imbrenda 	INIT_LIST_HEAD(&old->lru);
2927faa2f72cSClaudio Imbrenda 	spin_unlock(&gmap->guest_table_lock);
2928faa2f72cSClaudio Imbrenda }
2929faa2f72cSClaudio Imbrenda EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
2930faa2f72cSClaudio Imbrenda 
2931faa2f72cSClaudio Imbrenda /**
2932faa2f72cSClaudio Imbrenda  * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
2933faa2f72cSClaudio Imbrenda  * @gmap: the gmap whose ASCE needs to be replaced
2934faa2f72cSClaudio Imbrenda  *
2935292a7d6fSClaudio Imbrenda  * If the ASCE is a SEGMENT type then this function will return -EINVAL,
2936292a7d6fSClaudio Imbrenda  * otherwise the pointers in the host_to_guest radix tree will keep pointing
2937292a7d6fSClaudio Imbrenda  * to the wrong pages, causing use-after-free and memory corruption.
2938faa2f72cSClaudio Imbrenda  * If the allocation of the new top level page table fails, the ASCE is not
2939faa2f72cSClaudio Imbrenda  * replaced.
2940faa2f72cSClaudio Imbrenda  * In any case, the old ASCE is always removed from the gmap CRST list.
2941faa2f72cSClaudio Imbrenda  * Therefore the caller has to make sure to save a pointer to it
2942faa2f72cSClaudio Imbrenda  * beforehand, unless a leak is actually intended.
2943faa2f72cSClaudio Imbrenda  */
s390_replace_asce(struct gmap * gmap)2944faa2f72cSClaudio Imbrenda int s390_replace_asce(struct gmap *gmap)
2945faa2f72cSClaudio Imbrenda {
2946faa2f72cSClaudio Imbrenda 	unsigned long asce;
2947faa2f72cSClaudio Imbrenda 	struct page *page;
2948faa2f72cSClaudio Imbrenda 	void *table;
2949faa2f72cSClaudio Imbrenda 
2950faa2f72cSClaudio Imbrenda 	s390_unlist_old_asce(gmap);
2951faa2f72cSClaudio Imbrenda 
2952292a7d6fSClaudio Imbrenda 	/* Replacing segment type ASCEs would cause serious issues */
2953292a7d6fSClaudio Imbrenda 	if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
2954292a7d6fSClaudio Imbrenda 		return -EINVAL;
2955292a7d6fSClaudio Imbrenda 
2956bfabe8d0SHeiko Carstens 	page = gmap_alloc_crst();
2957faa2f72cSClaudio Imbrenda 	if (!page)
2958faa2f72cSClaudio Imbrenda 		return -ENOMEM;
2959c2fceb59SClaudio Imbrenda 	page->index = 0;
2960faa2f72cSClaudio Imbrenda 	table = page_to_virt(page);
2961faa2f72cSClaudio Imbrenda 	memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
2962faa2f72cSClaudio Imbrenda 
2963faa2f72cSClaudio Imbrenda 	/*
2964faa2f72cSClaudio Imbrenda 	 * The caller has to deal with the old ASCE, but here we make sure
2965faa2f72cSClaudio Imbrenda 	 * the new one is properly added to the CRST list, so that
2966faa2f72cSClaudio Imbrenda 	 * it will be freed when the VM is torn down.
2967faa2f72cSClaudio Imbrenda 	 */
2968faa2f72cSClaudio Imbrenda 	spin_lock(&gmap->guest_table_lock);
2969faa2f72cSClaudio Imbrenda 	list_add(&page->lru, &gmap->crst_list);
2970faa2f72cSClaudio Imbrenda 	spin_unlock(&gmap->guest_table_lock);
2971faa2f72cSClaudio Imbrenda 
2972faa2f72cSClaudio Imbrenda 	/* Set new table origin while preserving existing ASCE control bits */
2973faa2f72cSClaudio Imbrenda 	asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
2974faa2f72cSClaudio Imbrenda 	WRITE_ONCE(gmap->asce, asce);
2975faa2f72cSClaudio Imbrenda 	WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
2976faa2f72cSClaudio Imbrenda 	WRITE_ONCE(gmap->table, table);
2977faa2f72cSClaudio Imbrenda 
2978faa2f72cSClaudio Imbrenda 	return 0;
2979faa2f72cSClaudio Imbrenda }
2980faa2f72cSClaudio Imbrenda EXPORT_SYMBOL_GPL(s390_replace_asce);
2981