xref: /openbmc/linux/arch/x86/hyperv/mmu.c (revision 6396bb221514d2876fd6dc0aa2a1f240d99b37bb)
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2 
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7 
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12 
13 #define CREATE_TRACE_POINTS
14 #include <asm/trace/hyperv.h>
15 
16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
17 struct hv_flush_pcpu {
18 	u64 address_space;
19 	u64 flags;
20 	u64 processor_mask;
21 	u64 gva_list[];
22 };
23 
24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
25 struct hv_flush_pcpu_ex {
26 	u64 address_space;
27 	u64 flags;
28 	struct hv_vpset hv_vp_set;
29 	u64 gva_list[];
30 };
31 
32 /* Each gva in gva_list encodes up to 4096 pages to flush */
33 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
34 
35 
36 /*
37  * Fills in gva_list starting from offset. Returns the number of items added.
38  */
39 static inline int fill_gva_list(u64 gva_list[], int offset,
40 				unsigned long start, unsigned long end)
41 {
42 	int gva_n = offset;
43 	unsigned long cur = start, diff;
44 
45 	do {
46 		diff = end > cur ? end - cur : 0;
47 
48 		gva_list[gva_n] = cur & PAGE_MASK;
49 		/*
50 		 * Lower 12 bits encode the number of additional
51 		 * pages to flush (in addition to the 'cur' page).
52 		 */
53 		if (diff >= HV_TLB_FLUSH_UNIT)
54 			gva_list[gva_n] |= ~PAGE_MASK;
55 		else if (diff)
56 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
57 
58 		cur += HV_TLB_FLUSH_UNIT;
59 		gva_n++;
60 
61 	} while (cur < end);
62 
63 	return gva_n - offset;
64 }
65 
66 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
67 				    const struct flush_tlb_info *info)
68 {
69 	int cpu, vcpu, gva_n, max_gvas;
70 	struct hv_flush_pcpu **flush_pcpu;
71 	struct hv_flush_pcpu *flush;
72 	u64 status = U64_MAX;
73 	unsigned long flags;
74 
75 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
76 
77 	if (!hv_hypercall_pg)
78 		goto do_native;
79 
80 	if (cpumask_empty(cpus))
81 		return;
82 
83 	local_irq_save(flags);
84 
85 	flush_pcpu = (struct hv_flush_pcpu **)
86 		     this_cpu_ptr(hyperv_pcpu_input_arg);
87 
88 	flush = *flush_pcpu;
89 
90 	if (unlikely(!flush)) {
91 		local_irq_restore(flags);
92 		goto do_native;
93 	}
94 
95 	if (info->mm) {
96 		/*
97 		 * AddressSpace argument must match the CR3 with PCID bits
98 		 * stripped out.
99 		 */
100 		flush->address_space = virt_to_phys(info->mm->pgd);
101 		flush->address_space &= CR3_ADDR_MASK;
102 		flush->flags = 0;
103 	} else {
104 		flush->address_space = 0;
105 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
106 	}
107 
108 	flush->processor_mask = 0;
109 	if (cpumask_equal(cpus, cpu_present_mask)) {
110 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
111 	} else {
112 		for_each_cpu(cpu, cpus) {
113 			vcpu = hv_cpu_number_to_vp_number(cpu);
114 			if (vcpu >= 64)
115 				goto do_native;
116 
117 			__set_bit(vcpu, (unsigned long *)
118 				  &flush->processor_mask);
119 		}
120 	}
121 
122 	/*
123 	 * We can flush not more than max_gvas with one hypercall. Flush the
124 	 * whole address space if we were asked to do more.
125 	 */
126 	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
127 
128 	if (info->end == TLB_FLUSH_ALL) {
129 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
130 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
131 					 flush, NULL);
132 	} else if (info->end &&
133 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
134 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
135 					 flush, NULL);
136 	} else {
137 		gva_n = fill_gva_list(flush->gva_list, 0,
138 				      info->start, info->end);
139 		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
140 					     gva_n, 0, flush, NULL);
141 	}
142 
143 	local_irq_restore(flags);
144 
145 	if (!(status & HV_HYPERCALL_RESULT_MASK))
146 		return;
147 do_native:
148 	native_flush_tlb_others(cpus, info);
149 }
150 
151 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
152 				       const struct flush_tlb_info *info)
153 {
154 	int nr_bank = 0, max_gvas, gva_n;
155 	struct hv_flush_pcpu_ex **flush_pcpu;
156 	struct hv_flush_pcpu_ex *flush;
157 	u64 status = U64_MAX;
158 	unsigned long flags;
159 
160 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
161 
162 	if (!hv_hypercall_pg)
163 		goto do_native;
164 
165 	if (cpumask_empty(cpus))
166 		return;
167 
168 	local_irq_save(flags);
169 
170 	flush_pcpu = (struct hv_flush_pcpu_ex **)
171 		     this_cpu_ptr(hyperv_pcpu_input_arg);
172 
173 	flush = *flush_pcpu;
174 
175 	if (unlikely(!flush)) {
176 		local_irq_restore(flags);
177 		goto do_native;
178 	}
179 
180 	if (info->mm) {
181 		/*
182 		 * AddressSpace argument must match the CR3 with PCID bits
183 		 * stripped out.
184 		 */
185 		flush->address_space = virt_to_phys(info->mm->pgd);
186 		flush->address_space &= CR3_ADDR_MASK;
187 		flush->flags = 0;
188 	} else {
189 		flush->address_space = 0;
190 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
191 	}
192 
193 	flush->hv_vp_set.valid_bank_mask = 0;
194 
195 	if (!cpumask_equal(cpus, cpu_present_mask)) {
196 		flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
197 		nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
198 	}
199 
200 	if (!nr_bank) {
201 		flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
202 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
203 	}
204 
205 	/*
206 	 * We can flush not more than max_gvas with one hypercall. Flush the
207 	 * whole address space if we were asked to do more.
208 	 */
209 	max_gvas =
210 		(PAGE_SIZE - sizeof(*flush) - nr_bank *
211 		 sizeof(flush->hv_vp_set.bank_contents[0])) /
212 		sizeof(flush->gva_list[0]);
213 
214 	if (info->end == TLB_FLUSH_ALL) {
215 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
216 		status = hv_do_rep_hypercall(
217 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
218 			0, nr_bank, flush, NULL);
219 	} else if (info->end &&
220 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
221 		status = hv_do_rep_hypercall(
222 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
223 			0, nr_bank, flush, NULL);
224 	} else {
225 		gva_n = fill_gva_list(flush->gva_list, nr_bank,
226 				      info->start, info->end);
227 		status = hv_do_rep_hypercall(
228 			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
229 			gva_n, nr_bank, flush, NULL);
230 	}
231 
232 	local_irq_restore(flags);
233 
234 	if (!(status & HV_HYPERCALL_RESULT_MASK))
235 		return;
236 do_native:
237 	native_flush_tlb_others(cpus, info);
238 }
239 
240 void hyperv_setup_mmu_ops(void)
241 {
242 	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
243 		return;
244 
245 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
246 		pr_info("Using hypercall for remote TLB flush\n");
247 		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
248 	} else {
249 		pr_info("Using ext hypercall for remote TLB flush\n");
250 		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
251 	}
252 }
253