1aa04b4ccSPaul Mackerras /*
2aa04b4ccSPaul Mackerras  * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3aa04b4ccSPaul Mackerras  *
4aa04b4ccSPaul Mackerras  * This program is free software; you can redistribute it and/or modify
5aa04b4ccSPaul Mackerras  * it under the terms of the GNU General Public License, version 2, as
6aa04b4ccSPaul Mackerras  * published by the Free Software Foundation.
7aa04b4ccSPaul Mackerras  */
8aa04b4ccSPaul Mackerras 
9441c19c8SMichael Ellerman #include <linux/cpu.h>
10aa04b4ccSPaul Mackerras #include <linux/kvm_host.h>
11aa04b4ccSPaul Mackerras #include <linux/preempt.h>
1266b15db6SPaul Gortmaker #include <linux/export.h>
13aa04b4ccSPaul Mackerras #include <linux/sched.h>
14aa04b4ccSPaul Mackerras #include <linux/spinlock.h>
15aa04b4ccSPaul Mackerras #include <linux/bootmem.h>
16aa04b4ccSPaul Mackerras #include <linux/init.h>
17fa61a4e3SAneesh Kumar K.V #include <linux/memblock.h>
18fa61a4e3SAneesh Kumar K.V #include <linux/sizes.h>
19fc95ca72SJoonsoo Kim #include <linux/cma.h>
20aa04b4ccSPaul Mackerras 
21aa04b4ccSPaul Mackerras #include <asm/cputable.h>
22aa04b4ccSPaul Mackerras #include <asm/kvm_ppc.h>
23aa04b4ccSPaul Mackerras #include <asm/kvm_book3s.h>
24aa04b4ccSPaul Mackerras 
25fc95ca72SJoonsoo Kim #define KVM_CMA_CHUNK_ORDER	18
26fc95ca72SJoonsoo Kim 
27fa61a4e3SAneesh Kumar K.V /*
28fa61a4e3SAneesh Kumar K.V  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
29fa61a4e3SAneesh Kumar K.V  * should be power of 2.
30fa61a4e3SAneesh Kumar K.V  */
31fa61a4e3SAneesh Kumar K.V #define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
32fa61a4e3SAneesh Kumar K.V /*
33fa61a4e3SAneesh Kumar K.V  * By default we reserve 5% of memory for hash pagetable allocation.
34fa61a4e3SAneesh Kumar K.V  */
35fa61a4e3SAneesh Kumar K.V static unsigned long kvm_cma_resv_ratio = 5;
36aa04b4ccSPaul Mackerras /*
376c45b810SAneesh Kumar K.V  * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
38aa04b4ccSPaul Mackerras  * Each RMA has to be physically contiguous and of a size that the
39aa04b4ccSPaul Mackerras  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
40aa04b4ccSPaul Mackerras  * and other larger sizes.  Since we are unlikely to be allocate that
41aa04b4ccSPaul Mackerras  * much physically contiguous memory after the system is up and running,
426c45b810SAneesh Kumar K.V  * we preallocate a set of RMAs in early boot using CMA.
436c45b810SAneesh Kumar K.V  * should be power of 2.
44aa04b4ccSPaul Mackerras  */
456c45b810SAneesh Kumar K.V unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
466c45b810SAneesh Kumar K.V EXPORT_SYMBOL_GPL(kvm_rma_pages);
47aa04b4ccSPaul Mackerras 
48fc95ca72SJoonsoo Kim static struct cma *kvm_cma;
49fc95ca72SJoonsoo Kim 
50aa04b4ccSPaul Mackerras /* Work out RMLS (real mode limit selector) field value for a given RMA size.
519e368f29SPaul Mackerras    Assumes POWER7 or PPC970. */
52aa04b4ccSPaul Mackerras static inline int lpcr_rmls(unsigned long rma_size)
53aa04b4ccSPaul Mackerras {
54aa04b4ccSPaul Mackerras 	switch (rma_size) {
55aa04b4ccSPaul Mackerras 	case 32ul << 20:	/* 32 MB */
569e368f29SPaul Mackerras 		if (cpu_has_feature(CPU_FTR_ARCH_206))
579e368f29SPaul Mackerras 			return 8;	/* only supported on POWER7 */
589e368f29SPaul Mackerras 		return -1;
59aa04b4ccSPaul Mackerras 	case 64ul << 20:	/* 64 MB */
60aa04b4ccSPaul Mackerras 		return 3;
61aa04b4ccSPaul Mackerras 	case 128ul << 20:	/* 128 MB */
62aa04b4ccSPaul Mackerras 		return 7;
63aa04b4ccSPaul Mackerras 	case 256ul << 20:	/* 256 MB */
64aa04b4ccSPaul Mackerras 		return 4;
65aa04b4ccSPaul Mackerras 	case 1ul << 30:		/* 1 GB */
66aa04b4ccSPaul Mackerras 		return 2;
67aa04b4ccSPaul Mackerras 	case 16ul << 30:	/* 16 GB */
68aa04b4ccSPaul Mackerras 		return 1;
69aa04b4ccSPaul Mackerras 	case 256ul << 30:	/* 256 GB */
70aa04b4ccSPaul Mackerras 		return 0;
71aa04b4ccSPaul Mackerras 	default:
72aa04b4ccSPaul Mackerras 		return -1;
73aa04b4ccSPaul Mackerras 	}
74aa04b4ccSPaul Mackerras }
75aa04b4ccSPaul Mackerras 
76b4e70611SAlexander Graf static int __init early_parse_rma_size(char *p)
77b4e70611SAlexander Graf {
786c45b810SAneesh Kumar K.V 	unsigned long kvm_rma_size;
796c45b810SAneesh Kumar K.V 
806c45b810SAneesh Kumar K.V 	pr_debug("%s(%s)\n", __func__, p);
81b4e70611SAlexander Graf 	if (!p)
826c45b810SAneesh Kumar K.V 		return -EINVAL;
83b4e70611SAlexander Graf 	kvm_rma_size = memparse(p, &p);
846c45b810SAneesh Kumar K.V 	/*
856c45b810SAneesh Kumar K.V 	 * Check that the requested size is one supported in hardware
866c45b810SAneesh Kumar K.V 	 */
876c45b810SAneesh Kumar K.V 	if (lpcr_rmls(kvm_rma_size) < 0) {
886c45b810SAneesh Kumar K.V 		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
896c45b810SAneesh Kumar K.V 		return -EINVAL;
906c45b810SAneesh Kumar K.V 	}
916c45b810SAneesh Kumar K.V 	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
92b4e70611SAlexander Graf 	return 0;
93b4e70611SAlexander Graf }
94b4e70611SAlexander Graf early_param("kvm_rma_size", early_parse_rma_size);
95b4e70611SAlexander Graf 
966c45b810SAneesh Kumar K.V struct kvm_rma_info *kvm_alloc_rma()
97b4e70611SAlexander Graf {
986c45b810SAneesh Kumar K.V 	struct page *page;
996c45b810SAneesh Kumar K.V 	struct kvm_rma_info *ri;
100b4e70611SAlexander Graf 
1016c45b810SAneesh Kumar K.V 	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
1026c45b810SAneesh Kumar K.V 	if (!ri)
1036c45b810SAneesh Kumar K.V 		return NULL;
104c04fa583SAlexey Kardashevskiy 	page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
1056c45b810SAneesh Kumar K.V 	if (!page)
1066c45b810SAneesh Kumar K.V 		goto err_out;
1076c45b810SAneesh Kumar K.V 	atomic_set(&ri->use_count, 1);
1086c45b810SAneesh Kumar K.V 	ri->base_pfn = page_to_pfn(page);
1096c45b810SAneesh Kumar K.V 	return ri;
1106c45b810SAneesh Kumar K.V err_out:
1116c45b810SAneesh Kumar K.V 	kfree(ri);
1126c45b810SAneesh Kumar K.V 	return NULL;
113b4e70611SAlexander Graf }
114b4e70611SAlexander Graf EXPORT_SYMBOL_GPL(kvm_alloc_rma);
115b4e70611SAlexander Graf 
1166c45b810SAneesh Kumar K.V void kvm_release_rma(struct kvm_rma_info *ri)
117b4e70611SAlexander Graf {
1186c45b810SAneesh Kumar K.V 	if (atomic_dec_and_test(&ri->use_count)) {
119fc95ca72SJoonsoo Kim 		cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
1206c45b810SAneesh Kumar K.V 		kfree(ri);
1216c45b810SAneesh Kumar K.V 	}
122b4e70611SAlexander Graf }
123b4e70611SAlexander Graf EXPORT_SYMBOL_GPL(kvm_release_rma);
124b4e70611SAlexander Graf 
125fa61a4e3SAneesh Kumar K.V static int __init early_parse_kvm_cma_resv(char *p)
126d2a1b483SAlexander Graf {
127fa61a4e3SAneesh Kumar K.V 	pr_debug("%s(%s)\n", __func__, p);
128d2a1b483SAlexander Graf 	if (!p)
129fa61a4e3SAneesh Kumar K.V 		return -EINVAL;
130fa61a4e3SAneesh Kumar K.V 	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
131d2a1b483SAlexander Graf }
132fa61a4e3SAneesh Kumar K.V early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
133d2a1b483SAlexander Graf 
134fa61a4e3SAneesh Kumar K.V struct page *kvm_alloc_hpt(unsigned long nr_pages)
135d2a1b483SAlexander Graf {
136fa61a4e3SAneesh Kumar K.V 	unsigned long align_pages = HPT_ALIGN_PAGES;
137fa61a4e3SAneesh Kumar K.V 
138c04fa583SAlexey Kardashevskiy 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
139fc95ca72SJoonsoo Kim 
140fa61a4e3SAneesh Kumar K.V 	/* Old CPUs require HPT aligned on a multiple of its size */
141fa61a4e3SAneesh Kumar K.V 	if (!cpu_has_feature(CPU_FTR_ARCH_206))
142fa61a4e3SAneesh Kumar K.V 		align_pages = nr_pages;
143c04fa583SAlexey Kardashevskiy 	return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
144d2a1b483SAlexander Graf }
145d2a1b483SAlexander Graf EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
146d2a1b483SAlexander Graf 
147fa61a4e3SAneesh Kumar K.V void kvm_release_hpt(struct page *page, unsigned long nr_pages)
148d2a1b483SAlexander Graf {
149fc95ca72SJoonsoo Kim 	cma_release(kvm_cma, page, nr_pages);
150d2a1b483SAlexander Graf }
151d2a1b483SAlexander Graf EXPORT_SYMBOL_GPL(kvm_release_hpt);
152d2a1b483SAlexander Graf 
153fa61a4e3SAneesh Kumar K.V /**
154fa61a4e3SAneesh Kumar K.V  * kvm_cma_reserve() - reserve area for kvm hash pagetable
155fa61a4e3SAneesh Kumar K.V  *
156fa61a4e3SAneesh Kumar K.V  * This function reserves memory from early allocator. It should be
157fa61a4e3SAneesh Kumar K.V  * called by arch specific code once the early allocator (memblock or bootmem)
158fa61a4e3SAneesh Kumar K.V  * has been activated and all other subsystems have already allocated/reserved
159fa61a4e3SAneesh Kumar K.V  * memory.
160fa61a4e3SAneesh Kumar K.V  */
161fa61a4e3SAneesh Kumar K.V void __init kvm_cma_reserve(void)
162fa61a4e3SAneesh Kumar K.V {
163fa61a4e3SAneesh Kumar K.V 	unsigned long align_size;
164fa61a4e3SAneesh Kumar K.V 	struct memblock_region *reg;
165fa61a4e3SAneesh Kumar K.V 	phys_addr_t selected_size = 0;
166fa61a4e3SAneesh Kumar K.V 	/*
167fa61a4e3SAneesh Kumar K.V 	 * We cannot use memblock_phys_mem_size() here, because
168fa61a4e3SAneesh Kumar K.V 	 * memblock_analyze() has not been called yet.
169fa61a4e3SAneesh Kumar K.V 	 */
170fa61a4e3SAneesh Kumar K.V 	for_each_memblock(memory, reg)
171fa61a4e3SAneesh Kumar K.V 		selected_size += memblock_region_memory_end_pfn(reg) -
172fa61a4e3SAneesh Kumar K.V 				 memblock_region_memory_base_pfn(reg);
173fa61a4e3SAneesh Kumar K.V 
174fa61a4e3SAneesh Kumar K.V 	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
175fa61a4e3SAneesh Kumar K.V 	if (selected_size) {
176fa61a4e3SAneesh Kumar K.V 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
177fa61a4e3SAneesh Kumar K.V 			 (unsigned long)selected_size / SZ_1M);
178fa61a4e3SAneesh Kumar K.V 		/*
179fa61a4e3SAneesh Kumar K.V 		 * Old CPUs require HPT aligned on a multiple of its size. So for them
180fa61a4e3SAneesh Kumar K.V 		 * make the alignment as max size we could request.
181fa61a4e3SAneesh Kumar K.V 		 */
182fa61a4e3SAneesh Kumar K.V 		if (!cpu_has_feature(CPU_FTR_ARCH_206))
183fa61a4e3SAneesh Kumar K.V 			align_size = __rounddown_pow_of_two(selected_size);
184fa61a4e3SAneesh Kumar K.V 		else
185fa61a4e3SAneesh Kumar K.V 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
1866c45b810SAneesh Kumar K.V 
1876c45b810SAneesh Kumar K.V 		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
188c1f733aaSJoonsoo Kim 		cma_declare_contiguous(0, selected_size, 0, align_size,
189c1f733aaSJoonsoo Kim 			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
190fa61a4e3SAneesh Kumar K.V 	}
191fa61a4e3SAneesh Kumar K.V }
192441c19c8SMichael Ellerman 
193441c19c8SMichael Ellerman /*
194441c19c8SMichael Ellerman  * When running HV mode KVM we need to block certain operations while KVM VMs
195441c19c8SMichael Ellerman  * exist in the system. We use a counter of VMs to track this.
196441c19c8SMichael Ellerman  *
197441c19c8SMichael Ellerman  * One of the operations we need to block is onlining of secondaries, so we
198441c19c8SMichael Ellerman  * protect hv_vm_count with get/put_online_cpus().
199441c19c8SMichael Ellerman  */
200441c19c8SMichael Ellerman static atomic_t hv_vm_count;
201441c19c8SMichael Ellerman 
202441c19c8SMichael Ellerman void kvm_hv_vm_activated(void)
203441c19c8SMichael Ellerman {
204441c19c8SMichael Ellerman 	get_online_cpus();
205441c19c8SMichael Ellerman 	atomic_inc(&hv_vm_count);
206441c19c8SMichael Ellerman 	put_online_cpus();
207441c19c8SMichael Ellerman }
208441c19c8SMichael Ellerman EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
209441c19c8SMichael Ellerman 
210441c19c8SMichael Ellerman void kvm_hv_vm_deactivated(void)
211441c19c8SMichael Ellerman {
212441c19c8SMichael Ellerman 	get_online_cpus();
213441c19c8SMichael Ellerman 	atomic_dec(&hv_vm_count);
214441c19c8SMichael Ellerman 	put_online_cpus();
215441c19c8SMichael Ellerman }
216441c19c8SMichael Ellerman EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
217441c19c8SMichael Ellerman 
218441c19c8SMichael Ellerman bool kvm_hv_mode_active(void)
219441c19c8SMichael Ellerman {
220441c19c8SMichael Ellerman 	return atomic_read(&hv_vm_count) != 0;
221441c19c8SMichael Ellerman }
222ae2113a4SPaul Mackerras 
223ae2113a4SPaul Mackerras extern int hcall_real_table[], hcall_real_table_end[];
224ae2113a4SPaul Mackerras 
225ae2113a4SPaul Mackerras int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
226ae2113a4SPaul Mackerras {
227ae2113a4SPaul Mackerras 	cmd /= 4;
228ae2113a4SPaul Mackerras 	if (cmd < hcall_real_table_end - hcall_real_table &&
229ae2113a4SPaul Mackerras 	    hcall_real_table[cmd])
230ae2113a4SPaul Mackerras 		return 1;
231ae2113a4SPaul Mackerras 
232ae2113a4SPaul Mackerras 	return 0;
233ae2113a4SPaul Mackerras }
234ae2113a4SPaul Mackerras EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
235