1 /* 2 * Info about, and flushing the host cpu caches. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include "qemu/osdep.h" 9 #include "qemu/cacheflush.h" 10 #include "qemu/cacheinfo.h" 11 #include "qemu/bitops.h" 12 #include "qemu/host-utils.h" 13 #include "qemu/atomic.h" 14 15 16 int qemu_icache_linesize = 0; 17 int qemu_icache_linesize_log; 18 int qemu_dcache_linesize = 0; 19 int qemu_dcache_linesize_log; 20 21 /* 22 * Operating system specific cache detection mechanisms. 23 */ 24 25 #if defined(_WIN32) 26 27 static void sys_cache_info(int *isize, int *dsize) 28 { 29 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; 30 DWORD size = 0; 31 BOOL success; 32 size_t i, n; 33 34 /* 35 * Check for the required buffer size first. Note that if the zero 36 * size we use for the probe results in success, then there is no 37 * data available; fail in that case. 38 */ 39 success = GetLogicalProcessorInformation(0, &size); 40 if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 41 return; 42 } 43 44 n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 45 size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 46 buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); 47 if (!GetLogicalProcessorInformation(buf, &size)) { 48 goto fail; 49 } 50 51 for (i = 0; i < n; i++) { 52 if (buf[i].Relationship == RelationCache 53 && buf[i].Cache.Level == 1) { 54 switch (buf[i].Cache.Type) { 55 case CacheUnified: 56 *isize = *dsize = buf[i].Cache.LineSize; 57 break; 58 case CacheInstruction: 59 *isize = buf[i].Cache.LineSize; 60 break; 61 case CacheData: 62 *dsize = buf[i].Cache.LineSize; 63 break; 64 default: 65 break; 66 } 67 } 68 } 69 fail: 70 g_free(buf); 71 } 72 73 #elif defined(CONFIG_DARWIN) 74 # include <sys/sysctl.h> 75 static void sys_cache_info(int *isize, int *dsize) 76 { 77 /* There's only a single sysctl for both I/D cache line sizes. */ 78 long size; 79 size_t len = sizeof(size); 80 if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { 81 *isize = *dsize = size; 82 } 83 } 84 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 85 # include <sys/sysctl.h> 86 static void sys_cache_info(int *isize, int *dsize) 87 { 88 /* There's only a single sysctl for both I/D cache line sizes. */ 89 int size; 90 size_t len = sizeof(size); 91 if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { 92 *isize = *dsize = size; 93 } 94 } 95 #else 96 /* POSIX */ 97 98 static void sys_cache_info(int *isize, int *dsize) 99 { 100 # ifdef _SC_LEVEL1_ICACHE_LINESIZE 101 int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); 102 if (tmp_isize > 0) { 103 *isize = tmp_isize; 104 } 105 # endif 106 # ifdef _SC_LEVEL1_DCACHE_LINESIZE 107 int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); 108 if (tmp_dsize > 0) { 109 *dsize = tmp_dsize; 110 } 111 # endif 112 } 113 #endif /* sys_cache_info */ 114 115 116 /* 117 * Architecture (+ OS) specific cache detection mechanisms. 118 */ 119 120 #if defined(__powerpc__) 121 static bool have_coherent_icache; 122 #endif 123 124 #if defined(__aarch64__) && !defined(CONFIG_DARWIN) 125 /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 126 static uint64_t save_ctr_el0; 127 static void arch_cache_info(int *isize, int *dsize) 128 { 129 uint64_t ctr; 130 131 /* 132 * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, 133 * but (at least under Linux) these are marked protected by the 134 * kernel. However, CTR_EL0 contains the minimum linesize in the 135 * entire hierarchy, and is used by userspace cache flushing. 136 * 137 * We will also use this value in flush_idcache_range. 138 */ 139 asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); 140 save_ctr_el0 = ctr; 141 142 if (*isize == 0 || *dsize == 0) { 143 if (*isize == 0) { 144 *isize = 4 << (ctr & 0xf); 145 } 146 if (*dsize == 0) { 147 *dsize = 4 << ((ctr >> 16) & 0xf); 148 } 149 } 150 } 151 152 #elif defined(_ARCH_PPC) && defined(__linux__) 153 # include "elf.h" 154 155 static void arch_cache_info(int *isize, int *dsize) 156 { 157 if (*isize == 0) { 158 *isize = qemu_getauxval(AT_ICACHEBSIZE); 159 } 160 if (*dsize == 0) { 161 *dsize = qemu_getauxval(AT_DCACHEBSIZE); 162 } 163 have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; 164 } 165 166 #else 167 static void arch_cache_info(int *isize, int *dsize) { } 168 #endif /* arch_cache_info */ 169 170 /* 171 * ... and if all else fails ... 172 */ 173 174 static void fallback_cache_info(int *isize, int *dsize) 175 { 176 /* If we can only find one of the two, assume they're the same. */ 177 if (*isize) { 178 if (*dsize) { 179 /* Success! */ 180 } else { 181 *dsize = *isize; 182 } 183 } else if (*dsize) { 184 *isize = *dsize; 185 } else { 186 #if defined(_ARCH_PPC) 187 /* 188 * For PPC, we're going to use the cache sizes computed for 189 * flush_idcache_range. Which means that we must use the 190 * architecture minimum. 191 */ 192 *isize = *dsize = 16; 193 #else 194 /* Otherwise, 64 bytes is not uncommon. */ 195 *isize = *dsize = 64; 196 #endif 197 } 198 } 199 200 static void __attribute__((constructor)) init_cache_info(void) 201 { 202 int isize = 0, dsize = 0; 203 204 sys_cache_info(&isize, &dsize); 205 arch_cache_info(&isize, &dsize); 206 fallback_cache_info(&isize, &dsize); 207 208 assert((isize & (isize - 1)) == 0); 209 assert((dsize & (dsize - 1)) == 0); 210 211 qemu_icache_linesize = isize; 212 qemu_icache_linesize_log = ctz32(isize); 213 qemu_dcache_linesize = dsize; 214 qemu_dcache_linesize_log = ctz32(dsize); 215 216 qatomic64_init(); 217 } 218 219 220 /* 221 * Architecture (+ OS) specific cache flushing mechanisms. 222 */ 223 224 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) 225 226 /* Caches are coherent and do not require flushing; symbol inline. */ 227 228 #elif defined(__aarch64__) 229 230 #ifdef CONFIG_DARWIN 231 /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 232 extern void sys_icache_invalidate(void *start, size_t len); 233 extern void sys_dcache_flush(void *start, size_t len); 234 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 235 { 236 sys_dcache_flush((void *)rw, len); 237 sys_icache_invalidate((void *)rx, len); 238 } 239 #else 240 241 /* 242 * This is a copy of gcc's __aarch64_sync_cache_range, modified 243 * to fit this three-operand interface. 244 */ 245 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 246 { 247 const unsigned CTR_IDC = 1u << 28; 248 const unsigned CTR_DIC = 1u << 29; 249 const uint64_t ctr_el0 = save_ctr_el0; 250 const uintptr_t icache_lsize = qemu_icache_linesize; 251 const uintptr_t dcache_lsize = qemu_dcache_linesize; 252 uintptr_t p; 253 254 /* 255 * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification 256 * is not required for instruction to data coherence. 257 */ 258 if (!(ctr_el0 & CTR_IDC)) { 259 /* 260 * Loop over the address range, clearing one cache line at once. 261 * Data cache must be flushed to unification first to make sure 262 * the instruction cache fetches the updated data. 263 */ 264 for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { 265 asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); 266 } 267 asm volatile("dsb\tish" : : : "memory"); 268 } 269 270 /* 271 * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point 272 * of Unification is not required for instruction to data coherence. 273 */ 274 if (!(ctr_el0 & CTR_DIC)) { 275 for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { 276 asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); 277 } 278 asm volatile ("dsb\tish" : : : "memory"); 279 } 280 281 asm volatile("isb" : : : "memory"); 282 } 283 #endif /* CONFIG_DARWIN */ 284 285 #elif defined(__mips__) 286 287 #ifdef __OpenBSD__ 288 #include <machine/sysarch.h> 289 #else 290 #include <sys/cachectl.h> 291 #endif 292 293 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 294 { 295 if (rx != rw) { 296 cacheflush((void *)rw, len, DCACHE); 297 } 298 cacheflush((void *)rx, len, ICACHE); 299 } 300 301 #elif defined(__powerpc__) 302 303 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 304 { 305 uintptr_t p, b, e; 306 size_t dsize, isize; 307 308 /* 309 * Some processors have coherent caches and support a simplified 310 * flushing procedure. See 311 * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 312 * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k 313 */ 314 if (have_coherent_icache) { 315 asm volatile ("sync\n\t" 316 "icbi 0,%0\n\t" 317 "isync" 318 : : "r"(rx) : "memory"); 319 return; 320 } 321 322 dsize = qemu_dcache_linesize; 323 isize = qemu_icache_linesize; 324 325 b = rw & ~(dsize - 1); 326 e = (rw + len + dsize - 1) & ~(dsize - 1); 327 for (p = b; p < e; p += dsize) { 328 asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); 329 } 330 asm volatile ("sync" : : : "memory"); 331 332 b = rx & ~(isize - 1); 333 e = (rx + len + isize - 1) & ~(isize - 1); 334 for (p = b; p < e; p += isize) { 335 asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); 336 } 337 asm volatile ("sync" : : : "memory"); 338 asm volatile ("isync" : : : "memory"); 339 } 340 341 #elif defined(__sparc__) 342 343 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 344 { 345 /* No additional data flush to the RW virtual address required. */ 346 uintptr_t p, end = (rx + len + 7) & -8; 347 for (p = rx & -8; p < end; p += 8) { 348 __asm__ __volatile__("flush\t%0" : : "r" (p)); 349 } 350 } 351 352 #else 353 354 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 355 { 356 if (rw != rx) { 357 __builtin___clear_cache((char *)rw, (char *)rw + len); 358 } 359 __builtin___clear_cache((char *)rx, (char *)rx + len); 360 } 361 362 #endif 363