1 #define pr_fmt(fmt) "Hyper-V: " fmt 2 3 #include <linux/hyperv.h> 4 #include <linux/log2.h> 5 #include <linux/slab.h> 6 #include <linux/types.h> 7 8 #include <asm/fpu/api.h> 9 #include <asm/mshyperv.h> 10 #include <asm/msr.h> 11 #include <asm/tlbflush.h> 12 13 #define CREATE_TRACE_POINTS 14 #include <asm/trace/hyperv.h> 15 16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ 17 struct hv_flush_pcpu { 18 u64 address_space; 19 u64 flags; 20 u64 processor_mask; 21 u64 gva_list[]; 22 }; 23 24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ 25 struct hv_flush_pcpu_ex { 26 u64 address_space; 27 u64 flags; 28 struct { 29 u64 format; 30 u64 valid_bank_mask; 31 u64 bank_contents[]; 32 } hv_vp_set; 33 u64 gva_list[]; 34 }; 35 36 /* Each gva in gva_list encodes up to 4096 pages to flush */ 37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 38 39 static struct hv_flush_pcpu __percpu *pcpu_flush; 40 41 static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex; 42 43 /* 44 * Fills in gva_list starting from offset. Returns the number of items added. 45 */ 46 static inline int fill_gva_list(u64 gva_list[], int offset, 47 unsigned long start, unsigned long end) 48 { 49 int gva_n = offset; 50 unsigned long cur = start, diff; 51 52 do { 53 diff = end > cur ? end - cur : 0; 54 55 gva_list[gva_n] = cur & PAGE_MASK; 56 /* 57 * Lower 12 bits encode the number of additional 58 * pages to flush (in addition to the 'cur' page). 59 */ 60 if (diff >= HV_TLB_FLUSH_UNIT) 61 gva_list[gva_n] |= ~PAGE_MASK; 62 else if (diff) 63 gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 64 65 cur += HV_TLB_FLUSH_UNIT; 66 gva_n++; 67 68 } while (cur < end); 69 70 return gva_n - offset; 71 } 72 73 /* Return the number of banks in the resulting vp_set */ 74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, 75 const struct cpumask *cpus) 76 { 77 int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; 78 79 /* 80 * Some banks may end up being empty but this is acceptable. 81 */ 82 for_each_cpu(cpu, cpus) { 83 vcpu = hv_cpu_number_to_vp_number(cpu); 84 vcpu_bank = vcpu / 64; 85 vcpu_offset = vcpu % 64; 86 87 /* valid_bank_mask can represent up to 64 banks */ 88 if (vcpu_bank >= 64) 89 return 0; 90 91 __set_bit(vcpu_offset, (unsigned long *) 92 &flush->hv_vp_set.bank_contents[vcpu_bank]); 93 if (vcpu_bank >= nr_bank) 94 nr_bank = vcpu_bank + 1; 95 } 96 flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); 97 98 return nr_bank; 99 } 100 101 static void hyperv_flush_tlb_others(const struct cpumask *cpus, 102 const struct flush_tlb_info *info) 103 { 104 int cpu, vcpu, gva_n, max_gvas; 105 struct hv_flush_pcpu *flush; 106 u64 status = U64_MAX; 107 unsigned long flags; 108 109 trace_hyperv_mmu_flush_tlb_others(cpus, info); 110 111 if (!pcpu_flush || !hv_hypercall_pg) 112 goto do_native; 113 114 if (cpumask_empty(cpus)) 115 return; 116 117 local_irq_save(flags); 118 119 flush = this_cpu_ptr(pcpu_flush); 120 121 if (info->mm) { 122 flush->address_space = virt_to_phys(info->mm->pgd); 123 flush->flags = 0; 124 } else { 125 flush->address_space = 0; 126 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 127 } 128 129 flush->processor_mask = 0; 130 if (cpumask_equal(cpus, cpu_present_mask)) { 131 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 132 } else { 133 for_each_cpu(cpu, cpus) { 134 vcpu = hv_cpu_number_to_vp_number(cpu); 135 if (vcpu >= 64) 136 goto do_native; 137 138 __set_bit(vcpu, (unsigned long *) 139 &flush->processor_mask); 140 } 141 } 142 143 /* 144 * We can flush not more than max_gvas with one hypercall. Flush the 145 * whole address space if we were asked to do more. 146 */ 147 max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 148 149 if (info->end == TLB_FLUSH_ALL) { 150 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 151 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 152 flush, NULL); 153 } else if (info->end && 154 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 155 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 156 flush, NULL); 157 } else { 158 gva_n = fill_gva_list(flush->gva_list, 0, 159 info->start, info->end); 160 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 161 gva_n, 0, flush, NULL); 162 } 163 164 local_irq_restore(flags); 165 166 if (!(status & HV_HYPERCALL_RESULT_MASK)) 167 return; 168 do_native: 169 native_flush_tlb_others(cpus, info); 170 } 171 172 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 173 const struct flush_tlb_info *info) 174 { 175 int nr_bank = 0, max_gvas, gva_n; 176 struct hv_flush_pcpu_ex *flush; 177 u64 status = U64_MAX; 178 unsigned long flags; 179 180 trace_hyperv_mmu_flush_tlb_others(cpus, info); 181 182 if (!pcpu_flush_ex || !hv_hypercall_pg) 183 goto do_native; 184 185 if (cpumask_empty(cpus)) 186 return; 187 188 local_irq_save(flags); 189 190 flush = this_cpu_ptr(pcpu_flush_ex); 191 192 if (info->mm) { 193 flush->address_space = virt_to_phys(info->mm->pgd); 194 flush->flags = 0; 195 } else { 196 flush->address_space = 0; 197 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 198 } 199 200 flush->hv_vp_set.valid_bank_mask = 0; 201 202 if (!cpumask_equal(cpus, cpu_present_mask)) { 203 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; 204 nr_bank = cpumask_to_vp_set(flush, cpus); 205 } 206 207 if (!nr_bank) { 208 flush->hv_vp_set.format = HV_GENERIC_SET_ALL; 209 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 210 } 211 212 /* 213 * We can flush not more than max_gvas with one hypercall. Flush the 214 * whole address space if we were asked to do more. 215 */ 216 max_gvas = 217 (PAGE_SIZE - sizeof(*flush) - nr_bank * 218 sizeof(flush->hv_vp_set.bank_contents[0])) / 219 sizeof(flush->gva_list[0]); 220 221 if (info->end == TLB_FLUSH_ALL) { 222 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 223 status = hv_do_rep_hypercall( 224 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 225 0, nr_bank + 2, flush, NULL); 226 } else if (info->end && 227 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 228 status = hv_do_rep_hypercall( 229 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 230 0, nr_bank + 2, flush, NULL); 231 } else { 232 gva_n = fill_gva_list(flush->gva_list, nr_bank, 233 info->start, info->end); 234 status = hv_do_rep_hypercall( 235 HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, 236 gva_n, nr_bank + 2, flush, NULL); 237 } 238 239 local_irq_restore(flags); 240 241 if (!(status & HV_HYPERCALL_RESULT_MASK)) 242 return; 243 do_native: 244 native_flush_tlb_others(cpus, info); 245 } 246 247 void hyperv_setup_mmu_ops(void) 248 { 249 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 250 return; 251 252 setup_clear_cpu_cap(X86_FEATURE_PCID); 253 254 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) { 255 pr_info("Using hypercall for remote TLB flush\n"); 256 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; 257 } else { 258 pr_info("Using ext hypercall for remote TLB flush\n"); 259 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; 260 } 261 } 262 263 void hyper_alloc_mmu(void) 264 { 265 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 266 return; 267 268 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 269 pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); 270 else 271 pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); 272 } 273