1 #define pr_fmt(fmt) "Hyper-V: " fmt 2 3 #include <linux/hyperv.h> 4 #include <linux/log2.h> 5 #include <linux/slab.h> 6 #include <linux/types.h> 7 8 #include <asm/fpu/api.h> 9 #include <asm/mshyperv.h> 10 #include <asm/msr.h> 11 #include <asm/tlbflush.h> 12 13 #define CREATE_TRACE_POINTS 14 #include <asm/trace/hyperv.h> 15 16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ 17 struct hv_flush_pcpu { 18 u64 address_space; 19 u64 flags; 20 u64 processor_mask; 21 u64 gva_list[]; 22 }; 23 24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ 25 struct hv_flush_pcpu_ex { 26 u64 address_space; 27 u64 flags; 28 struct { 29 u64 format; 30 u64 valid_bank_mask; 31 u64 bank_contents[]; 32 } hv_vp_set; 33 u64 gva_list[]; 34 }; 35 36 /* Each gva in gva_list encodes up to 4096 pages to flush */ 37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 38 39 static struct hv_flush_pcpu __percpu **pcpu_flush; 40 41 static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; 42 43 /* 44 * Fills in gva_list starting from offset. Returns the number of items added. 45 */ 46 static inline int fill_gva_list(u64 gva_list[], int offset, 47 unsigned long start, unsigned long end) 48 { 49 int gva_n = offset; 50 unsigned long cur = start, diff; 51 52 do { 53 diff = end > cur ? end - cur : 0; 54 55 gva_list[gva_n] = cur & PAGE_MASK; 56 /* 57 * Lower 12 bits encode the number of additional 58 * pages to flush (in addition to the 'cur' page). 59 */ 60 if (diff >= HV_TLB_FLUSH_UNIT) 61 gva_list[gva_n] |= ~PAGE_MASK; 62 else if (diff) 63 gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 64 65 cur += HV_TLB_FLUSH_UNIT; 66 gva_n++; 67 68 } while (cur < end); 69 70 return gva_n - offset; 71 } 72 73 /* Return the number of banks in the resulting vp_set */ 74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, 75 const struct cpumask *cpus) 76 { 77 int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; 78 79 /* valid_bank_mask can represent up to 64 banks */ 80 if (hv_max_vp_index / 64 >= 64) 81 return 0; 82 83 /* 84 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex 85 * structs are not cleared between calls, we risk flushing unneeded 86 * vCPUs otherwise. 87 */ 88 for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) 89 flush->hv_vp_set.bank_contents[vcpu_bank] = 0; 90 91 /* 92 * Some banks may end up being empty but this is acceptable. 93 */ 94 for_each_cpu(cpu, cpus) { 95 vcpu = hv_cpu_number_to_vp_number(cpu); 96 vcpu_bank = vcpu / 64; 97 vcpu_offset = vcpu % 64; 98 __set_bit(vcpu_offset, (unsigned long *) 99 &flush->hv_vp_set.bank_contents[vcpu_bank]); 100 if (vcpu_bank >= nr_bank) 101 nr_bank = vcpu_bank + 1; 102 } 103 flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); 104 105 return nr_bank; 106 } 107 108 static void hyperv_flush_tlb_others(const struct cpumask *cpus, 109 const struct flush_tlb_info *info) 110 { 111 int cpu, vcpu, gva_n, max_gvas; 112 struct hv_flush_pcpu **flush_pcpu; 113 struct hv_flush_pcpu *flush; 114 u64 status = U64_MAX; 115 unsigned long flags; 116 117 trace_hyperv_mmu_flush_tlb_others(cpus, info); 118 119 if (!pcpu_flush || !hv_hypercall_pg) 120 goto do_native; 121 122 if (cpumask_empty(cpus)) 123 return; 124 125 local_irq_save(flags); 126 127 flush_pcpu = this_cpu_ptr(pcpu_flush); 128 129 if (unlikely(!*flush_pcpu)) 130 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); 131 132 flush = *flush_pcpu; 133 134 if (unlikely(!flush)) { 135 local_irq_restore(flags); 136 goto do_native; 137 } 138 139 if (info->mm) { 140 /* 141 * AddressSpace argument must match the CR3 with PCID bits 142 * stripped out. 143 */ 144 flush->address_space = virt_to_phys(info->mm->pgd); 145 flush->address_space &= CR3_ADDR_MASK; 146 flush->flags = 0; 147 } else { 148 flush->address_space = 0; 149 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 150 } 151 152 flush->processor_mask = 0; 153 if (cpumask_equal(cpus, cpu_present_mask)) { 154 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 155 } else { 156 for_each_cpu(cpu, cpus) { 157 vcpu = hv_cpu_number_to_vp_number(cpu); 158 if (vcpu >= 64) 159 goto do_native; 160 161 __set_bit(vcpu, (unsigned long *) 162 &flush->processor_mask); 163 } 164 } 165 166 /* 167 * We can flush not more than max_gvas with one hypercall. Flush the 168 * whole address space if we were asked to do more. 169 */ 170 max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 171 172 if (info->end == TLB_FLUSH_ALL) { 173 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 174 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 175 flush, NULL); 176 } else if (info->end && 177 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 178 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 179 flush, NULL); 180 } else { 181 gva_n = fill_gva_list(flush->gva_list, 0, 182 info->start, info->end); 183 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 184 gva_n, 0, flush, NULL); 185 } 186 187 local_irq_restore(flags); 188 189 if (!(status & HV_HYPERCALL_RESULT_MASK)) 190 return; 191 do_native: 192 native_flush_tlb_others(cpus, info); 193 } 194 195 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 196 const struct flush_tlb_info *info) 197 { 198 int nr_bank = 0, max_gvas, gva_n; 199 struct hv_flush_pcpu_ex **flush_pcpu; 200 struct hv_flush_pcpu_ex *flush; 201 u64 status = U64_MAX; 202 unsigned long flags; 203 204 trace_hyperv_mmu_flush_tlb_others(cpus, info); 205 206 if (!pcpu_flush_ex || !hv_hypercall_pg) 207 goto do_native; 208 209 if (cpumask_empty(cpus)) 210 return; 211 212 local_irq_save(flags); 213 214 flush_pcpu = this_cpu_ptr(pcpu_flush_ex); 215 216 if (unlikely(!*flush_pcpu)) 217 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); 218 219 flush = *flush_pcpu; 220 221 if (unlikely(!flush)) { 222 local_irq_restore(flags); 223 goto do_native; 224 } 225 226 if (info->mm) { 227 /* 228 * AddressSpace argument must match the CR3 with PCID bits 229 * stripped out. 230 */ 231 flush->address_space = virt_to_phys(info->mm->pgd); 232 flush->address_space &= CR3_ADDR_MASK; 233 flush->flags = 0; 234 } else { 235 flush->address_space = 0; 236 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 237 } 238 239 flush->hv_vp_set.valid_bank_mask = 0; 240 241 if (!cpumask_equal(cpus, cpu_present_mask)) { 242 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; 243 nr_bank = cpumask_to_vp_set(flush, cpus); 244 } 245 246 if (!nr_bank) { 247 flush->hv_vp_set.format = HV_GENERIC_SET_ALL; 248 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 249 } 250 251 /* 252 * We can flush not more than max_gvas with one hypercall. Flush the 253 * whole address space if we were asked to do more. 254 */ 255 max_gvas = 256 (PAGE_SIZE - sizeof(*flush) - nr_bank * 257 sizeof(flush->hv_vp_set.bank_contents[0])) / 258 sizeof(flush->gva_list[0]); 259 260 if (info->end == TLB_FLUSH_ALL) { 261 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 262 status = hv_do_rep_hypercall( 263 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 264 0, nr_bank, flush, NULL); 265 } else if (info->end && 266 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 267 status = hv_do_rep_hypercall( 268 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 269 0, nr_bank, flush, NULL); 270 } else { 271 gva_n = fill_gva_list(flush->gva_list, nr_bank, 272 info->start, info->end); 273 status = hv_do_rep_hypercall( 274 HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, 275 gva_n, nr_bank, flush, NULL); 276 } 277 278 local_irq_restore(flags); 279 280 if (!(status & HV_HYPERCALL_RESULT_MASK)) 281 return; 282 do_native: 283 native_flush_tlb_others(cpus, info); 284 } 285 286 void hyperv_setup_mmu_ops(void) 287 { 288 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 289 return; 290 291 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) { 292 pr_info("Using hypercall for remote TLB flush\n"); 293 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; 294 } else { 295 pr_info("Using ext hypercall for remote TLB flush\n"); 296 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; 297 } 298 } 299 300 void hyper_alloc_mmu(void) 301 { 302 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 303 return; 304 305 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 306 pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); 307 else 308 pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); 309 } 310