1aa04b4ccSPaul Mackerras /*
2aa04b4ccSPaul Mackerras  * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3aa04b4ccSPaul Mackerras  *
4aa04b4ccSPaul Mackerras  * This program is free software; you can redistribute it and/or modify
5aa04b4ccSPaul Mackerras  * it under the terms of the GNU General Public License, version 2, as
6aa04b4ccSPaul Mackerras  * published by the Free Software Foundation.
7aa04b4ccSPaul Mackerras  */
8aa04b4ccSPaul Mackerras 
9aa04b4ccSPaul Mackerras #include <linux/kvm_host.h>
10aa04b4ccSPaul Mackerras #include <linux/preempt.h>
1166b15db6SPaul Gortmaker #include <linux/export.h>
12aa04b4ccSPaul Mackerras #include <linux/sched.h>
13aa04b4ccSPaul Mackerras #include <linux/spinlock.h>
14aa04b4ccSPaul Mackerras #include <linux/bootmem.h>
15aa04b4ccSPaul Mackerras #include <linux/init.h>
16fa61a4e3SAneesh Kumar K.V #include <linux/memblock.h>
17fa61a4e3SAneesh Kumar K.V #include <linux/sizes.h>
18aa04b4ccSPaul Mackerras 
19aa04b4ccSPaul Mackerras #include <asm/cputable.h>
20aa04b4ccSPaul Mackerras #include <asm/kvm_ppc.h>
21aa04b4ccSPaul Mackerras #include <asm/kvm_book3s.h>
22aa04b4ccSPaul Mackerras 
23fa61a4e3SAneesh Kumar K.V #include "book3s_hv_cma.h"
24fa61a4e3SAneesh Kumar K.V /*
25fa61a4e3SAneesh Kumar K.V  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
26fa61a4e3SAneesh Kumar K.V  * should be power of 2.
27fa61a4e3SAneesh Kumar K.V  */
28fa61a4e3SAneesh Kumar K.V #define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
29fa61a4e3SAneesh Kumar K.V /*
30fa61a4e3SAneesh Kumar K.V  * By default we reserve 5% of memory for hash pagetable allocation.
31fa61a4e3SAneesh Kumar K.V  */
32fa61a4e3SAneesh Kumar K.V static unsigned long kvm_cma_resv_ratio = 5;
33aa04b4ccSPaul Mackerras /*
346c45b810SAneesh Kumar K.V  * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
35aa04b4ccSPaul Mackerras  * Each RMA has to be physically contiguous and of a size that the
36aa04b4ccSPaul Mackerras  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
37aa04b4ccSPaul Mackerras  * and other larger sizes.  Since we are unlikely to be allocate that
38aa04b4ccSPaul Mackerras  * much physically contiguous memory after the system is up and running,
396c45b810SAneesh Kumar K.V  * we preallocate a set of RMAs in early boot using CMA.
406c45b810SAneesh Kumar K.V  * should be power of 2.
41aa04b4ccSPaul Mackerras  */
426c45b810SAneesh Kumar K.V unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
436c45b810SAneesh Kumar K.V EXPORT_SYMBOL_GPL(kvm_rma_pages);
44aa04b4ccSPaul Mackerras 
45aa04b4ccSPaul Mackerras /* Work out RMLS (real mode limit selector) field value for a given RMA size.
469e368f29SPaul Mackerras    Assumes POWER7 or PPC970. */
47aa04b4ccSPaul Mackerras static inline int lpcr_rmls(unsigned long rma_size)
48aa04b4ccSPaul Mackerras {
49aa04b4ccSPaul Mackerras 	switch (rma_size) {
50aa04b4ccSPaul Mackerras 	case 32ul << 20:	/* 32 MB */
519e368f29SPaul Mackerras 		if (cpu_has_feature(CPU_FTR_ARCH_206))
529e368f29SPaul Mackerras 			return 8;	/* only supported on POWER7 */
539e368f29SPaul Mackerras 		return -1;
54aa04b4ccSPaul Mackerras 	case 64ul << 20:	/* 64 MB */
55aa04b4ccSPaul Mackerras 		return 3;
56aa04b4ccSPaul Mackerras 	case 128ul << 20:	/* 128 MB */
57aa04b4ccSPaul Mackerras 		return 7;
58aa04b4ccSPaul Mackerras 	case 256ul << 20:	/* 256 MB */
59aa04b4ccSPaul Mackerras 		return 4;
60aa04b4ccSPaul Mackerras 	case 1ul << 30:		/* 1 GB */
61aa04b4ccSPaul Mackerras 		return 2;
62aa04b4ccSPaul Mackerras 	case 16ul << 30:	/* 16 GB */
63aa04b4ccSPaul Mackerras 		return 1;
64aa04b4ccSPaul Mackerras 	case 256ul << 30:	/* 256 GB */
65aa04b4ccSPaul Mackerras 		return 0;
66aa04b4ccSPaul Mackerras 	default:
67aa04b4ccSPaul Mackerras 		return -1;
68aa04b4ccSPaul Mackerras 	}
69aa04b4ccSPaul Mackerras }
70aa04b4ccSPaul Mackerras 
71b4e70611SAlexander Graf static int __init early_parse_rma_size(char *p)
72b4e70611SAlexander Graf {
736c45b810SAneesh Kumar K.V 	unsigned long kvm_rma_size;
746c45b810SAneesh Kumar K.V 
756c45b810SAneesh Kumar K.V 	pr_debug("%s(%s)\n", __func__, p);
76b4e70611SAlexander Graf 	if (!p)
776c45b810SAneesh Kumar K.V 		return -EINVAL;
78b4e70611SAlexander Graf 	kvm_rma_size = memparse(p, &p);
796c45b810SAneesh Kumar K.V 	/*
806c45b810SAneesh Kumar K.V 	 * Check that the requested size is one supported in hardware
816c45b810SAneesh Kumar K.V 	 */
826c45b810SAneesh Kumar K.V 	if (lpcr_rmls(kvm_rma_size) < 0) {
836c45b810SAneesh Kumar K.V 		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
846c45b810SAneesh Kumar K.V 		return -EINVAL;
856c45b810SAneesh Kumar K.V 	}
866c45b810SAneesh Kumar K.V 	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
87b4e70611SAlexander Graf 	return 0;
88b4e70611SAlexander Graf }
89b4e70611SAlexander Graf early_param("kvm_rma_size", early_parse_rma_size);
90b4e70611SAlexander Graf 
916c45b810SAneesh Kumar K.V struct kvm_rma_info *kvm_alloc_rma()
92b4e70611SAlexander Graf {
936c45b810SAneesh Kumar K.V 	struct page *page;
946c45b810SAneesh Kumar K.V 	struct kvm_rma_info *ri;
95b4e70611SAlexander Graf 
966c45b810SAneesh Kumar K.V 	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
976c45b810SAneesh Kumar K.V 	if (!ri)
986c45b810SAneesh Kumar K.V 		return NULL;
996c45b810SAneesh Kumar K.V 	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
1006c45b810SAneesh Kumar K.V 	if (!page)
1016c45b810SAneesh Kumar K.V 		goto err_out;
1026c45b810SAneesh Kumar K.V 	atomic_set(&ri->use_count, 1);
1036c45b810SAneesh Kumar K.V 	ri->base_pfn = page_to_pfn(page);
1046c45b810SAneesh Kumar K.V 	return ri;
1056c45b810SAneesh Kumar K.V err_out:
1066c45b810SAneesh Kumar K.V 	kfree(ri);
1076c45b810SAneesh Kumar K.V 	return NULL;
108b4e70611SAlexander Graf }
109b4e70611SAlexander Graf EXPORT_SYMBOL_GPL(kvm_alloc_rma);
110b4e70611SAlexander Graf 
1116c45b810SAneesh Kumar K.V void kvm_release_rma(struct kvm_rma_info *ri)
112b4e70611SAlexander Graf {
1136c45b810SAneesh Kumar K.V 	if (atomic_dec_and_test(&ri->use_count)) {
1146c45b810SAneesh Kumar K.V 		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
1156c45b810SAneesh Kumar K.V 		kfree(ri);
1166c45b810SAneesh Kumar K.V 	}
117b4e70611SAlexander Graf }
118b4e70611SAlexander Graf EXPORT_SYMBOL_GPL(kvm_release_rma);
119b4e70611SAlexander Graf 
120fa61a4e3SAneesh Kumar K.V static int __init early_parse_kvm_cma_resv(char *p)
121d2a1b483SAlexander Graf {
122fa61a4e3SAneesh Kumar K.V 	pr_debug("%s(%s)\n", __func__, p);
123d2a1b483SAlexander Graf 	if (!p)
124fa61a4e3SAneesh Kumar K.V 		return -EINVAL;
125fa61a4e3SAneesh Kumar K.V 	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
126d2a1b483SAlexander Graf }
127fa61a4e3SAneesh Kumar K.V early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
128d2a1b483SAlexander Graf 
129fa61a4e3SAneesh Kumar K.V struct page *kvm_alloc_hpt(unsigned long nr_pages)
130d2a1b483SAlexander Graf {
131fa61a4e3SAneesh Kumar K.V 	unsigned long align_pages = HPT_ALIGN_PAGES;
132fa61a4e3SAneesh Kumar K.V 
133fa61a4e3SAneesh Kumar K.V 	/* Old CPUs require HPT aligned on a multiple of its size */
134fa61a4e3SAneesh Kumar K.V 	if (!cpu_has_feature(CPU_FTR_ARCH_206))
135fa61a4e3SAneesh Kumar K.V 		align_pages = nr_pages;
136fa61a4e3SAneesh Kumar K.V 	return kvm_alloc_cma(nr_pages, align_pages);
137d2a1b483SAlexander Graf }
138d2a1b483SAlexander Graf EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
139d2a1b483SAlexander Graf 
140fa61a4e3SAneesh Kumar K.V void kvm_release_hpt(struct page *page, unsigned long nr_pages)
141d2a1b483SAlexander Graf {
142fa61a4e3SAneesh Kumar K.V 	kvm_release_cma(page, nr_pages);
143d2a1b483SAlexander Graf }
144d2a1b483SAlexander Graf EXPORT_SYMBOL_GPL(kvm_release_hpt);
145d2a1b483SAlexander Graf 
146fa61a4e3SAneesh Kumar K.V /**
147fa61a4e3SAneesh Kumar K.V  * kvm_cma_reserve() - reserve area for kvm hash pagetable
148fa61a4e3SAneesh Kumar K.V  *
149fa61a4e3SAneesh Kumar K.V  * This function reserves memory from early allocator. It should be
150fa61a4e3SAneesh Kumar K.V  * called by arch specific code once the early allocator (memblock or bootmem)
151fa61a4e3SAneesh Kumar K.V  * has been activated and all other subsystems have already allocated/reserved
152fa61a4e3SAneesh Kumar K.V  * memory.
153fa61a4e3SAneesh Kumar K.V  */
154fa61a4e3SAneesh Kumar K.V void __init kvm_cma_reserve(void)
155fa61a4e3SAneesh Kumar K.V {
156fa61a4e3SAneesh Kumar K.V 	unsigned long align_size;
157fa61a4e3SAneesh Kumar K.V 	struct memblock_region *reg;
158fa61a4e3SAneesh Kumar K.V 	phys_addr_t selected_size = 0;
159fa61a4e3SAneesh Kumar K.V 	/*
160fa61a4e3SAneesh Kumar K.V 	 * We cannot use memblock_phys_mem_size() here, because
161fa61a4e3SAneesh Kumar K.V 	 * memblock_analyze() has not been called yet.
162fa61a4e3SAneesh Kumar K.V 	 */
163fa61a4e3SAneesh Kumar K.V 	for_each_memblock(memory, reg)
164fa61a4e3SAneesh Kumar K.V 		selected_size += memblock_region_memory_end_pfn(reg) -
165fa61a4e3SAneesh Kumar K.V 				 memblock_region_memory_base_pfn(reg);
166fa61a4e3SAneesh Kumar K.V 
167fa61a4e3SAneesh Kumar K.V 	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
168fa61a4e3SAneesh Kumar K.V 	if (selected_size) {
169fa61a4e3SAneesh Kumar K.V 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
170fa61a4e3SAneesh Kumar K.V 			 (unsigned long)selected_size / SZ_1M);
171fa61a4e3SAneesh Kumar K.V 		/*
172fa61a4e3SAneesh Kumar K.V 		 * Old CPUs require HPT aligned on a multiple of its size. So for them
173fa61a4e3SAneesh Kumar K.V 		 * make the alignment as max size we could request.
174fa61a4e3SAneesh Kumar K.V 		 */
175fa61a4e3SAneesh Kumar K.V 		if (!cpu_has_feature(CPU_FTR_ARCH_206))
176fa61a4e3SAneesh Kumar K.V 			align_size = __rounddown_pow_of_two(selected_size);
177fa61a4e3SAneesh Kumar K.V 		else
178fa61a4e3SAneesh Kumar K.V 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
1796c45b810SAneesh Kumar K.V 
1806c45b810SAneesh Kumar K.V 		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
181fa61a4e3SAneesh Kumar K.V 		kvm_cma_declare_contiguous(selected_size, align_size);
182fa61a4e3SAneesh Kumar K.V 	}
183fa61a4e3SAneesh Kumar K.V }
184