1 /* 2 * Info about, and flushing the host cpu caches. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include "qemu/osdep.h" 9 #include "qemu/cacheflush.h" 10 #include "qemu/cacheinfo.h" 11 #include "qemu/bitops.h" 12 #include "qemu/host-utils.h" 13 #include "qemu/atomic.h" 14 15 16 int qemu_icache_linesize = 0; 17 int qemu_icache_linesize_log; 18 int qemu_dcache_linesize = 0; 19 int qemu_dcache_linesize_log; 20 21 /* 22 * Operating system specific cache detection mechanisms. 23 */ 24 25 #if defined(_WIN32) 26 27 static void sys_cache_info(int *isize, int *dsize) 28 { 29 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; 30 DWORD size = 0; 31 BOOL success; 32 size_t i, n; 33 34 /* 35 * Check for the required buffer size first. Note that if the zero 36 * size we use for the probe results in success, then there is no 37 * data available; fail in that case. 38 */ 39 success = GetLogicalProcessorInformation(0, &size); 40 if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 41 return; 42 } 43 44 n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 45 size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 46 buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); 47 if (!GetLogicalProcessorInformation(buf, &size)) { 48 goto fail; 49 } 50 51 for (i = 0; i < n; i++) { 52 if (buf[i].Relationship == RelationCache 53 && buf[i].Cache.Level == 1) { 54 switch (buf[i].Cache.Type) { 55 case CacheUnified: 56 *isize = *dsize = buf[i].Cache.LineSize; 57 break; 58 case CacheInstruction: 59 *isize = buf[i].Cache.LineSize; 60 break; 61 case CacheData: 62 *dsize = buf[i].Cache.LineSize; 63 break; 64 default: 65 break; 66 } 67 } 68 } 69 fail: 70 g_free(buf); 71 } 72 73 #elif defined(CONFIG_DARWIN) 74 # include <sys/sysctl.h> 75 static void sys_cache_info(int *isize, int *dsize) 76 { 77 /* There's only a single sysctl for both I/D cache line sizes. */ 78 long size; 79 size_t len = sizeof(size); 80 if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { 81 *isize = *dsize = size; 82 } 83 } 84 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 85 # include <sys/sysctl.h> 86 static void sys_cache_info(int *isize, int *dsize) 87 { 88 /* There's only a single sysctl for both I/D cache line sizes. */ 89 int size; 90 size_t len = sizeof(size); 91 if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { 92 *isize = *dsize = size; 93 } 94 } 95 #else 96 /* POSIX */ 97 98 static void sys_cache_info(int *isize, int *dsize) 99 { 100 # ifdef _SC_LEVEL1_ICACHE_LINESIZE 101 int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); 102 if (tmp_isize > 0) { 103 *isize = tmp_isize; 104 } 105 # endif 106 # ifdef _SC_LEVEL1_DCACHE_LINESIZE 107 int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); 108 if (tmp_dsize > 0) { 109 *dsize = tmp_dsize; 110 } 111 # endif 112 } 113 #endif /* sys_cache_info */ 114 115 116 /* 117 * Architecture (+ OS) specific cache detection mechanisms. 118 */ 119 120 #if defined(__powerpc__) 121 static bool have_coherent_icache; 122 #endif 123 124 #if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32) 125 /* 126 * Apple does not expose CTR_EL0, so we must use system interfaces. 127 * Windows neither, but we use a generic implementation of flush_idcache_range 128 * in this case. 129 */ 130 static uint64_t save_ctr_el0; 131 static void arch_cache_info(int *isize, int *dsize) 132 { 133 uint64_t ctr; 134 135 /* 136 * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, 137 * but (at least under Linux) these are marked protected by the 138 * kernel. However, CTR_EL0 contains the minimum linesize in the 139 * entire hierarchy, and is used by userspace cache flushing. 140 * 141 * We will also use this value in flush_idcache_range. 142 */ 143 asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); 144 save_ctr_el0 = ctr; 145 146 if (*isize == 0 || *dsize == 0) { 147 if (*isize == 0) { 148 *isize = 4 << (ctr & 0xf); 149 } 150 if (*dsize == 0) { 151 *dsize = 4 << ((ctr >> 16) & 0xf); 152 } 153 } 154 } 155 156 #elif defined(_ARCH_PPC) && defined(__linux__) 157 # include "elf.h" 158 159 static void arch_cache_info(int *isize, int *dsize) 160 { 161 if (*isize == 0) { 162 *isize = qemu_getauxval(AT_ICACHEBSIZE); 163 } 164 if (*dsize == 0) { 165 *dsize = qemu_getauxval(AT_DCACHEBSIZE); 166 } 167 have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; 168 } 169 170 #else 171 static void arch_cache_info(int *isize, int *dsize) { } 172 #endif /* arch_cache_info */ 173 174 /* 175 * ... and if all else fails ... 176 */ 177 178 static void fallback_cache_info(int *isize, int *dsize) 179 { 180 /* If we can only find one of the two, assume they're the same. */ 181 if (*isize) { 182 if (*dsize) { 183 /* Success! */ 184 } else { 185 *dsize = *isize; 186 } 187 } else if (*dsize) { 188 *isize = *dsize; 189 } else { 190 #if defined(_ARCH_PPC) 191 /* 192 * For PPC, we're going to use the cache sizes computed for 193 * flush_idcache_range. Which means that we must use the 194 * architecture minimum. 195 */ 196 *isize = *dsize = 16; 197 #else 198 /* Otherwise, 64 bytes is not uncommon. */ 199 *isize = *dsize = 64; 200 #endif 201 } 202 } 203 204 static void __attribute__((constructor)) init_cache_info(void) 205 { 206 int isize = 0, dsize = 0; 207 208 sys_cache_info(&isize, &dsize); 209 arch_cache_info(&isize, &dsize); 210 fallback_cache_info(&isize, &dsize); 211 212 assert((isize & (isize - 1)) == 0); 213 assert((dsize & (dsize - 1)) == 0); 214 215 qemu_icache_linesize = isize; 216 qemu_icache_linesize_log = ctz32(isize); 217 qemu_dcache_linesize = dsize; 218 qemu_dcache_linesize_log = ctz32(dsize); 219 220 qatomic64_init(); 221 } 222 223 224 /* 225 * Architecture (+ OS) specific cache flushing mechanisms. 226 */ 227 228 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) 229 230 /* Caches are coherent and do not require flushing; symbol inline. */ 231 232 #elif defined(__aarch64__) && !defined(CONFIG_WIN32) 233 /* 234 * For Windows, we use generic implementation of flush_idcache_range, that 235 * performs a call to FlushInstructionCache, through __builtin___clear_cache. 236 */ 237 238 #ifdef CONFIG_DARWIN 239 /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 240 extern void sys_icache_invalidate(void *start, size_t len); 241 extern void sys_dcache_flush(void *start, size_t len); 242 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 243 { 244 sys_dcache_flush((void *)rw, len); 245 sys_icache_invalidate((void *)rx, len); 246 } 247 #else 248 249 /* 250 * This is a copy of gcc's __aarch64_sync_cache_range, modified 251 * to fit this three-operand interface. 252 */ 253 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 254 { 255 const unsigned CTR_IDC = 1u << 28; 256 const unsigned CTR_DIC = 1u << 29; 257 const uint64_t ctr_el0 = save_ctr_el0; 258 const uintptr_t icache_lsize = qemu_icache_linesize; 259 const uintptr_t dcache_lsize = qemu_dcache_linesize; 260 uintptr_t p; 261 262 /* 263 * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification 264 * is not required for instruction to data coherence. 265 */ 266 if (!(ctr_el0 & CTR_IDC)) { 267 /* 268 * Loop over the address range, clearing one cache line at once. 269 * Data cache must be flushed to unification first to make sure 270 * the instruction cache fetches the updated data. 271 */ 272 for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { 273 asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); 274 } 275 asm volatile("dsb\tish" : : : "memory"); 276 } 277 278 /* 279 * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point 280 * of Unification is not required for instruction to data coherence. 281 */ 282 if (!(ctr_el0 & CTR_DIC)) { 283 for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { 284 asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); 285 } 286 asm volatile ("dsb\tish" : : : "memory"); 287 } 288 289 asm volatile("isb" : : : "memory"); 290 } 291 #endif /* CONFIG_DARWIN */ 292 293 #elif defined(__mips__) 294 295 #ifdef __OpenBSD__ 296 #include <machine/sysarch.h> 297 #else 298 #include <sys/cachectl.h> 299 #endif 300 301 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 302 { 303 if (rx != rw) { 304 cacheflush((void *)rw, len, DCACHE); 305 } 306 cacheflush((void *)rx, len, ICACHE); 307 } 308 309 #elif defined(__powerpc__) 310 311 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 312 { 313 uintptr_t p, b, e; 314 size_t dsize, isize; 315 316 /* 317 * Some processors have coherent caches and support a simplified 318 * flushing procedure. See 319 * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 320 * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k 321 */ 322 if (have_coherent_icache) { 323 asm volatile ("sync\n\t" 324 "icbi 0,%0\n\t" 325 "isync" 326 : : "r"(rx) : "memory"); 327 return; 328 } 329 330 dsize = qemu_dcache_linesize; 331 isize = qemu_icache_linesize; 332 333 b = rw & ~(dsize - 1); 334 e = (rw + len + dsize - 1) & ~(dsize - 1); 335 for (p = b; p < e; p += dsize) { 336 asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); 337 } 338 asm volatile ("sync" : : : "memory"); 339 340 b = rx & ~(isize - 1); 341 e = (rx + len + isize - 1) & ~(isize - 1); 342 for (p = b; p < e; p += isize) { 343 asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); 344 } 345 asm volatile ("sync" : : : "memory"); 346 asm volatile ("isync" : : : "memory"); 347 } 348 349 #elif defined(__sparc__) 350 351 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 352 { 353 /* No additional data flush to the RW virtual address required. */ 354 uintptr_t p, end = (rx + len + 7) & -8; 355 for (p = rx & -8; p < end; p += 8) { 356 __asm__ __volatile__("flush\t%0" : : "r" (p)); 357 } 358 } 359 360 #else 361 362 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 363 { 364 if (rw != rx) { 365 __builtin___clear_cache((char *)rw, (char *)rw + len); 366 } 367 __builtin___clear_cache((char *)rx, (char *)rx + len); 368 } 369 370 #endif 371