1aa04b4ccSPaul Mackerras /*
2aa04b4ccSPaul Mackerras  * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3aa04b4ccSPaul Mackerras  *
4aa04b4ccSPaul Mackerras  * This program is free software; you can redistribute it and/or modify
5aa04b4ccSPaul Mackerras  * it under the terms of the GNU General Public License, version 2, as
6aa04b4ccSPaul Mackerras  * published by the Free Software Foundation.
7aa04b4ccSPaul Mackerras  */
8aa04b4ccSPaul Mackerras 
9441c19c8SMichael Ellerman #include <linux/cpu.h>
10aa04b4ccSPaul Mackerras #include <linux/kvm_host.h>
11aa04b4ccSPaul Mackerras #include <linux/preempt.h>
1266b15db6SPaul Gortmaker #include <linux/export.h>
13aa04b4ccSPaul Mackerras #include <linux/sched.h>
14aa04b4ccSPaul Mackerras #include <linux/spinlock.h>
15aa04b4ccSPaul Mackerras #include <linux/bootmem.h>
16aa04b4ccSPaul Mackerras #include <linux/init.h>
17fa61a4e3SAneesh Kumar K.V #include <linux/memblock.h>
18fa61a4e3SAneesh Kumar K.V #include <linux/sizes.h>
19aa04b4ccSPaul Mackerras 
20aa04b4ccSPaul Mackerras #include <asm/cputable.h>
21aa04b4ccSPaul Mackerras #include <asm/kvm_ppc.h>
22aa04b4ccSPaul Mackerras #include <asm/kvm_book3s.h>
23aa04b4ccSPaul Mackerras 
24fa61a4e3SAneesh Kumar K.V #include "book3s_hv_cma.h"
25fa61a4e3SAneesh Kumar K.V /*
26fa61a4e3SAneesh Kumar K.V  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
27fa61a4e3SAneesh Kumar K.V  * should be power of 2.
28fa61a4e3SAneesh Kumar K.V  */
29fa61a4e3SAneesh Kumar K.V #define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
30fa61a4e3SAneesh Kumar K.V /*
31fa61a4e3SAneesh Kumar K.V  * By default we reserve 5% of memory for hash pagetable allocation.
32fa61a4e3SAneesh Kumar K.V  */
33fa61a4e3SAneesh Kumar K.V static unsigned long kvm_cma_resv_ratio = 5;
34aa04b4ccSPaul Mackerras /*
356c45b810SAneesh Kumar K.V  * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
36aa04b4ccSPaul Mackerras  * Each RMA has to be physically contiguous and of a size that the
37aa04b4ccSPaul Mackerras  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
38aa04b4ccSPaul Mackerras  * and other larger sizes.  Since we are unlikely to be allocate that
39aa04b4ccSPaul Mackerras  * much physically contiguous memory after the system is up and running,
406c45b810SAneesh Kumar K.V  * we preallocate a set of RMAs in early boot using CMA.
416c45b810SAneesh Kumar K.V  * should be power of 2.
42aa04b4ccSPaul Mackerras  */
436c45b810SAneesh Kumar K.V unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
446c45b810SAneesh Kumar K.V EXPORT_SYMBOL_GPL(kvm_rma_pages);
45aa04b4ccSPaul Mackerras 
46aa04b4ccSPaul Mackerras /* Work out RMLS (real mode limit selector) field value for a given RMA size.
479e368f29SPaul Mackerras    Assumes POWER7 or PPC970. */
48aa04b4ccSPaul Mackerras static inline int lpcr_rmls(unsigned long rma_size)
49aa04b4ccSPaul Mackerras {
50aa04b4ccSPaul Mackerras 	switch (rma_size) {
51aa04b4ccSPaul Mackerras 	case 32ul << 20:	/* 32 MB */
529e368f29SPaul Mackerras 		if (cpu_has_feature(CPU_FTR_ARCH_206))
539e368f29SPaul Mackerras 			return 8;	/* only supported on POWER7 */
549e368f29SPaul Mackerras 		return -1;
55aa04b4ccSPaul Mackerras 	case 64ul << 20:	/* 64 MB */
56aa04b4ccSPaul Mackerras 		return 3;
57aa04b4ccSPaul Mackerras 	case 128ul << 20:	/* 128 MB */
58aa04b4ccSPaul Mackerras 		return 7;
59aa04b4ccSPaul Mackerras 	case 256ul << 20:	/* 256 MB */
60aa04b4ccSPaul Mackerras 		return 4;
61aa04b4ccSPaul Mackerras 	case 1ul << 30:		/* 1 GB */
62aa04b4ccSPaul Mackerras 		return 2;
63aa04b4ccSPaul Mackerras 	case 16ul << 30:	/* 16 GB */
64aa04b4ccSPaul Mackerras 		return 1;
65aa04b4ccSPaul Mackerras 	case 256ul << 30:	/* 256 GB */
66aa04b4ccSPaul Mackerras 		return 0;
67aa04b4ccSPaul Mackerras 	default:
68aa04b4ccSPaul Mackerras 		return -1;
69aa04b4ccSPaul Mackerras 	}
70aa04b4ccSPaul Mackerras }
71aa04b4ccSPaul Mackerras 
72b4e70611SAlexander Graf static int __init early_parse_rma_size(char *p)
73b4e70611SAlexander Graf {
746c45b810SAneesh Kumar K.V 	unsigned long kvm_rma_size;
756c45b810SAneesh Kumar K.V 
766c45b810SAneesh Kumar K.V 	pr_debug("%s(%s)\n", __func__, p);
77b4e70611SAlexander Graf 	if (!p)
786c45b810SAneesh Kumar K.V 		return -EINVAL;
79b4e70611SAlexander Graf 	kvm_rma_size = memparse(p, &p);
806c45b810SAneesh Kumar K.V 	/*
816c45b810SAneesh Kumar K.V 	 * Check that the requested size is one supported in hardware
826c45b810SAneesh Kumar K.V 	 */
836c45b810SAneesh Kumar K.V 	if (lpcr_rmls(kvm_rma_size) < 0) {
846c45b810SAneesh Kumar K.V 		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
856c45b810SAneesh Kumar K.V 		return -EINVAL;
866c45b810SAneesh Kumar K.V 	}
876c45b810SAneesh Kumar K.V 	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
88b4e70611SAlexander Graf 	return 0;
89b4e70611SAlexander Graf }
90b4e70611SAlexander Graf early_param("kvm_rma_size", early_parse_rma_size);
91b4e70611SAlexander Graf 
926c45b810SAneesh Kumar K.V struct kvm_rma_info *kvm_alloc_rma()
93b4e70611SAlexander Graf {
946c45b810SAneesh Kumar K.V 	struct page *page;
956c45b810SAneesh Kumar K.V 	struct kvm_rma_info *ri;
96b4e70611SAlexander Graf 
976c45b810SAneesh Kumar K.V 	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
986c45b810SAneesh Kumar K.V 	if (!ri)
996c45b810SAneesh Kumar K.V 		return NULL;
1006c45b810SAneesh Kumar K.V 	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
1016c45b810SAneesh Kumar K.V 	if (!page)
1026c45b810SAneesh Kumar K.V 		goto err_out;
1036c45b810SAneesh Kumar K.V 	atomic_set(&ri->use_count, 1);
1046c45b810SAneesh Kumar K.V 	ri->base_pfn = page_to_pfn(page);
1056c45b810SAneesh Kumar K.V 	return ri;
1066c45b810SAneesh Kumar K.V err_out:
1076c45b810SAneesh Kumar K.V 	kfree(ri);
1086c45b810SAneesh Kumar K.V 	return NULL;
109b4e70611SAlexander Graf }
110b4e70611SAlexander Graf EXPORT_SYMBOL_GPL(kvm_alloc_rma);
111b4e70611SAlexander Graf 
1126c45b810SAneesh Kumar K.V void kvm_release_rma(struct kvm_rma_info *ri)
113b4e70611SAlexander Graf {
1146c45b810SAneesh Kumar K.V 	if (atomic_dec_and_test(&ri->use_count)) {
1156c45b810SAneesh Kumar K.V 		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
1166c45b810SAneesh Kumar K.V 		kfree(ri);
1176c45b810SAneesh Kumar K.V 	}
118b4e70611SAlexander Graf }
119b4e70611SAlexander Graf EXPORT_SYMBOL_GPL(kvm_release_rma);
120b4e70611SAlexander Graf 
121fa61a4e3SAneesh Kumar K.V static int __init early_parse_kvm_cma_resv(char *p)
122d2a1b483SAlexander Graf {
123fa61a4e3SAneesh Kumar K.V 	pr_debug("%s(%s)\n", __func__, p);
124d2a1b483SAlexander Graf 	if (!p)
125fa61a4e3SAneesh Kumar K.V 		return -EINVAL;
126fa61a4e3SAneesh Kumar K.V 	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
127d2a1b483SAlexander Graf }
128fa61a4e3SAneesh Kumar K.V early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
129d2a1b483SAlexander Graf 
130fa61a4e3SAneesh Kumar K.V struct page *kvm_alloc_hpt(unsigned long nr_pages)
131d2a1b483SAlexander Graf {
132fa61a4e3SAneesh Kumar K.V 	unsigned long align_pages = HPT_ALIGN_PAGES;
133fa61a4e3SAneesh Kumar K.V 
134fa61a4e3SAneesh Kumar K.V 	/* Old CPUs require HPT aligned on a multiple of its size */
135fa61a4e3SAneesh Kumar K.V 	if (!cpu_has_feature(CPU_FTR_ARCH_206))
136fa61a4e3SAneesh Kumar K.V 		align_pages = nr_pages;
137fa61a4e3SAneesh Kumar K.V 	return kvm_alloc_cma(nr_pages, align_pages);
138d2a1b483SAlexander Graf }
139d2a1b483SAlexander Graf EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
140d2a1b483SAlexander Graf 
141fa61a4e3SAneesh Kumar K.V void kvm_release_hpt(struct page *page, unsigned long nr_pages)
142d2a1b483SAlexander Graf {
143fa61a4e3SAneesh Kumar K.V 	kvm_release_cma(page, nr_pages);
144d2a1b483SAlexander Graf }
145d2a1b483SAlexander Graf EXPORT_SYMBOL_GPL(kvm_release_hpt);
146d2a1b483SAlexander Graf 
147fa61a4e3SAneesh Kumar K.V /**
148fa61a4e3SAneesh Kumar K.V  * kvm_cma_reserve() - reserve area for kvm hash pagetable
149fa61a4e3SAneesh Kumar K.V  *
150fa61a4e3SAneesh Kumar K.V  * This function reserves memory from early allocator. It should be
151fa61a4e3SAneesh Kumar K.V  * called by arch specific code once the early allocator (memblock or bootmem)
152fa61a4e3SAneesh Kumar K.V  * has been activated and all other subsystems have already allocated/reserved
153fa61a4e3SAneesh Kumar K.V  * memory.
154fa61a4e3SAneesh Kumar K.V  */
155fa61a4e3SAneesh Kumar K.V void __init kvm_cma_reserve(void)
156fa61a4e3SAneesh Kumar K.V {
157fa61a4e3SAneesh Kumar K.V 	unsigned long align_size;
158fa61a4e3SAneesh Kumar K.V 	struct memblock_region *reg;
159fa61a4e3SAneesh Kumar K.V 	phys_addr_t selected_size = 0;
160fa61a4e3SAneesh Kumar K.V 	/*
161fa61a4e3SAneesh Kumar K.V 	 * We cannot use memblock_phys_mem_size() here, because
162fa61a4e3SAneesh Kumar K.V 	 * memblock_analyze() has not been called yet.
163fa61a4e3SAneesh Kumar K.V 	 */
164fa61a4e3SAneesh Kumar K.V 	for_each_memblock(memory, reg)
165fa61a4e3SAneesh Kumar K.V 		selected_size += memblock_region_memory_end_pfn(reg) -
166fa61a4e3SAneesh Kumar K.V 				 memblock_region_memory_base_pfn(reg);
167fa61a4e3SAneesh Kumar K.V 
168fa61a4e3SAneesh Kumar K.V 	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
169fa61a4e3SAneesh Kumar K.V 	if (selected_size) {
170fa61a4e3SAneesh Kumar K.V 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
171fa61a4e3SAneesh Kumar K.V 			 (unsigned long)selected_size / SZ_1M);
172fa61a4e3SAneesh Kumar K.V 		/*
173fa61a4e3SAneesh Kumar K.V 		 * Old CPUs require HPT aligned on a multiple of its size. So for them
174fa61a4e3SAneesh Kumar K.V 		 * make the alignment as max size we could request.
175fa61a4e3SAneesh Kumar K.V 		 */
176fa61a4e3SAneesh Kumar K.V 		if (!cpu_has_feature(CPU_FTR_ARCH_206))
177fa61a4e3SAneesh Kumar K.V 			align_size = __rounddown_pow_of_two(selected_size);
178fa61a4e3SAneesh Kumar K.V 		else
179fa61a4e3SAneesh Kumar K.V 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
1806c45b810SAneesh Kumar K.V 
1816c45b810SAneesh Kumar K.V 		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
182fa61a4e3SAneesh Kumar K.V 		kvm_cma_declare_contiguous(selected_size, align_size);
183fa61a4e3SAneesh Kumar K.V 	}
184fa61a4e3SAneesh Kumar K.V }
185441c19c8SMichael Ellerman 
186441c19c8SMichael Ellerman /*
187441c19c8SMichael Ellerman  * When running HV mode KVM we need to block certain operations while KVM VMs
188441c19c8SMichael Ellerman  * exist in the system. We use a counter of VMs to track this.
189441c19c8SMichael Ellerman  *
190441c19c8SMichael Ellerman  * One of the operations we need to block is onlining of secondaries, so we
191441c19c8SMichael Ellerman  * protect hv_vm_count with get/put_online_cpus().
192441c19c8SMichael Ellerman  */
193441c19c8SMichael Ellerman static atomic_t hv_vm_count;
194441c19c8SMichael Ellerman 
195441c19c8SMichael Ellerman void kvm_hv_vm_activated(void)
196441c19c8SMichael Ellerman {
197441c19c8SMichael Ellerman 	get_online_cpus();
198441c19c8SMichael Ellerman 	atomic_inc(&hv_vm_count);
199441c19c8SMichael Ellerman 	put_online_cpus();
200441c19c8SMichael Ellerman }
201441c19c8SMichael Ellerman EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
202441c19c8SMichael Ellerman 
203441c19c8SMichael Ellerman void kvm_hv_vm_deactivated(void)
204441c19c8SMichael Ellerman {
205441c19c8SMichael Ellerman 	get_online_cpus();
206441c19c8SMichael Ellerman 	atomic_dec(&hv_vm_count);
207441c19c8SMichael Ellerman 	put_online_cpus();
208441c19c8SMichael Ellerman }
209441c19c8SMichael Ellerman EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
210441c19c8SMichael Ellerman 
211441c19c8SMichael Ellerman bool kvm_hv_mode_active(void)
212441c19c8SMichael Ellerman {
213441c19c8SMichael Ellerman 	return atomic_read(&hv_vm_count) != 0;
214441c19c8SMichael Ellerman }
215ae2113a4SPaul Mackerras 
216ae2113a4SPaul Mackerras extern int hcall_real_table[], hcall_real_table_end[];
217ae2113a4SPaul Mackerras 
218ae2113a4SPaul Mackerras int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
219ae2113a4SPaul Mackerras {
220ae2113a4SPaul Mackerras 	cmd /= 4;
221ae2113a4SPaul Mackerras 	if (cmd < hcall_real_table_end - hcall_real_table &&
222ae2113a4SPaul Mackerras 	    hcall_real_table[cmd])
223ae2113a4SPaul Mackerras 		return 1;
224ae2113a4SPaul Mackerras 
225ae2113a4SPaul Mackerras 	return 0;
226ae2113a4SPaul Mackerras }
227ae2113a4SPaul Mackerras EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
228