1 /* 2 * Info about, and flushing the host cpu caches. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include "qemu/osdep.h" 9 #include "qemu/cacheflush.h" 10 #include "qemu/cacheinfo.h" 11 #include "qemu/bitops.h" 12 #include "qemu/host-utils.h" 13 #include "qemu/atomic.h" 14 15 16 int qemu_icache_linesize = 0; 17 int qemu_icache_linesize_log; 18 int qemu_dcache_linesize = 0; 19 int qemu_dcache_linesize_log; 20 21 /* 22 * Operating system specific cache detection mechanisms. 23 */ 24 25 #if defined(_WIN32) 26 27 static void sys_cache_info(int *isize, int *dsize) 28 { 29 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; 30 DWORD size = 0; 31 BOOL success; 32 size_t i, n; 33 34 /* 35 * Check for the required buffer size first. Note that if the zero 36 * size we use for the probe results in success, then there is no 37 * data available; fail in that case. 38 */ 39 success = GetLogicalProcessorInformation(0, &size); 40 if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 41 return; 42 } 43 44 n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 45 size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 46 buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); 47 if (!GetLogicalProcessorInformation(buf, &size)) { 48 goto fail; 49 } 50 51 for (i = 0; i < n; i++) { 52 if (buf[i].Relationship == RelationCache 53 && buf[i].Cache.Level == 1) { 54 switch (buf[i].Cache.Type) { 55 case CacheUnified: 56 *isize = *dsize = buf[i].Cache.LineSize; 57 break; 58 case CacheInstruction: 59 *isize = buf[i].Cache.LineSize; 60 break; 61 case CacheData: 62 *dsize = buf[i].Cache.LineSize; 63 break; 64 default: 65 break; 66 } 67 } 68 } 69 fail: 70 g_free(buf); 71 } 72 73 #elif defined(CONFIG_DARWIN) 74 # include <sys/sysctl.h> 75 static void sys_cache_info(int *isize, int *dsize) 76 { 77 /* There's only a single sysctl for both I/D cache line sizes. */ 78 long size; 79 size_t len = sizeof(size); 80 if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { 81 *isize = *dsize = size; 82 } 83 } 84 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 85 # include <sys/sysctl.h> 86 static void sys_cache_info(int *isize, int *dsize) 87 { 88 /* There's only a single sysctl for both I/D cache line sizes. */ 89 int size; 90 size_t len = sizeof(size); 91 if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { 92 *isize = *dsize = size; 93 } 94 } 95 #else 96 /* POSIX */ 97 98 static void sys_cache_info(int *isize, int *dsize) 99 { 100 # ifdef _SC_LEVEL1_ICACHE_LINESIZE 101 int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); 102 if (tmp_isize > 0) { 103 *isize = tmp_isize; 104 } 105 # endif 106 # ifdef _SC_LEVEL1_DCACHE_LINESIZE 107 int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); 108 if (tmp_dsize > 0) { 109 *dsize = tmp_dsize; 110 } 111 # endif 112 } 113 #endif /* sys_cache_info */ 114 115 116 /* 117 * Architecture (+ OS) specific cache detection mechanisms. 118 */ 119 120 #if defined(__powerpc__) 121 static bool have_coherent_icache; 122 #endif 123 124 #if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32) 125 /* 126 * Apple does not expose CTR_EL0, so we must use system interfaces. 127 * Windows neither, but we use a generic implementation of flush_idcache_range 128 * in this case. 129 */ 130 static uint64_t save_ctr_el0; 131 static void arch_cache_info(int *isize, int *dsize) 132 { 133 uint64_t ctr; 134 135 /* 136 * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, 137 * but (at least under Linux) these are marked protected by the 138 * kernel. However, CTR_EL0 contains the minimum linesize in the 139 * entire hierarchy, and is used by userspace cache flushing. 140 * 141 * We will also use this value in flush_idcache_range. 142 */ 143 asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); 144 save_ctr_el0 = ctr; 145 146 if (*isize == 0 || *dsize == 0) { 147 if (*isize == 0) { 148 *isize = 4 << (ctr & 0xf); 149 } 150 if (*dsize == 0) { 151 *dsize = 4 << ((ctr >> 16) & 0xf); 152 } 153 } 154 } 155 156 #elif defined(_ARCH_PPC) && defined(__linux__) 157 # include "elf.h" 158 159 static void arch_cache_info(int *isize, int *dsize) 160 { 161 if (*isize == 0) { 162 *isize = qemu_getauxval(AT_ICACHEBSIZE); 163 } 164 if (*dsize == 0) { 165 *dsize = qemu_getauxval(AT_DCACHEBSIZE); 166 } 167 have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; 168 } 169 170 #else 171 static void arch_cache_info(int *isize, int *dsize) { } 172 #endif /* arch_cache_info */ 173 174 /* 175 * ... and if all else fails ... 176 */ 177 178 static void fallback_cache_info(int *isize, int *dsize) 179 { 180 /* If we can only find one of the two, assume they're the same. */ 181 if (*isize) { 182 if (*dsize) { 183 /* Success! */ 184 } else { 185 *dsize = *isize; 186 } 187 } else if (*dsize) { 188 *isize = *dsize; 189 } else { 190 #if defined(_ARCH_PPC) 191 /* 192 * For PPC, we're going to use the cache sizes computed for 193 * flush_idcache_range. Which means that we must use the 194 * architecture minimum. 195 */ 196 *isize = *dsize = 16; 197 #else 198 /* Otherwise, 64 bytes is not uncommon. */ 199 *isize = *dsize = 64; 200 #endif 201 } 202 } 203 204 static void __attribute__((constructor)) init_cache_info(void) 205 { 206 int isize = 0, dsize = 0; 207 208 sys_cache_info(&isize, &dsize); 209 arch_cache_info(&isize, &dsize); 210 fallback_cache_info(&isize, &dsize); 211 212 assert((isize & (isize - 1)) == 0); 213 assert((dsize & (dsize - 1)) == 0); 214 215 qemu_icache_linesize = isize; 216 qemu_icache_linesize_log = ctz32(isize); 217 qemu_dcache_linesize = dsize; 218 qemu_dcache_linesize_log = ctz32(dsize); 219 220 qatomic64_init(); 221 } 222 223 224 /* 225 * Architecture (+ OS) specific cache flushing mechanisms. 226 */ 227 228 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) 229 230 /* Caches are coherent and do not require flushing; symbol inline. */ 231 232 #elif defined(__aarch64__) && !defined(CONFIG_WIN32) 233 /* 234 * For Windows, we use generic implementation of flush_idcache_range, that 235 * performs a call to FlushInstructionCache, through __builtin___clear_cache. 236 */ 237 238 #ifdef CONFIG_DARWIN 239 /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 240 #include <libkern/OSCacheControl.h> 241 242 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 243 { 244 if (rx == rw) { 245 /* 246 * sys_icache_invalidate() syncs the dcache and icache, 247 * so no need to call sys_dcache_flush(). 248 */ 249 } else { 250 sys_dcache_flush((void *)rw, len); 251 } 252 sys_icache_invalidate((void *)rx, len); 253 } 254 #else 255 256 /* 257 * This is a copy of gcc's __aarch64_sync_cache_range, modified 258 * to fit this three-operand interface. 259 */ 260 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 261 { 262 const unsigned CTR_IDC = 1u << 28; 263 const unsigned CTR_DIC = 1u << 29; 264 const uint64_t ctr_el0 = save_ctr_el0; 265 const uintptr_t icache_lsize = qemu_icache_linesize; 266 const uintptr_t dcache_lsize = qemu_dcache_linesize; 267 uintptr_t p; 268 269 /* 270 * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification 271 * is not required for instruction to data coherence. 272 */ 273 if (!(ctr_el0 & CTR_IDC)) { 274 /* 275 * Loop over the address range, clearing one cache line at once. 276 * Data cache must be flushed to unification first to make sure 277 * the instruction cache fetches the updated data. 278 */ 279 for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { 280 asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); 281 } 282 asm volatile("dsb\tish" : : : "memory"); 283 } 284 285 /* 286 * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point 287 * of Unification is not required for instruction to data coherence. 288 */ 289 if (!(ctr_el0 & CTR_DIC)) { 290 for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { 291 asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); 292 } 293 asm volatile ("dsb\tish" : : : "memory"); 294 } 295 296 asm volatile("isb" : : : "memory"); 297 } 298 #endif /* CONFIG_DARWIN */ 299 300 #elif defined(__mips__) 301 302 #ifdef __OpenBSD__ 303 #include <machine/sysarch.h> 304 #else 305 #include <sys/cachectl.h> 306 #endif 307 308 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 309 { 310 if (rx != rw) { 311 cacheflush((void *)rw, len, DCACHE); 312 } 313 cacheflush((void *)rx, len, ICACHE); 314 } 315 316 #elif defined(__powerpc__) 317 318 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 319 { 320 uintptr_t p, b, e; 321 size_t dsize, isize; 322 323 /* 324 * Some processors have coherent caches and support a simplified 325 * flushing procedure. See 326 * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 327 * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k 328 */ 329 if (have_coherent_icache) { 330 asm volatile ("sync\n\t" 331 "icbi 0,%0\n\t" 332 "isync" 333 : : "r"(rx) : "memory"); 334 return; 335 } 336 337 dsize = qemu_dcache_linesize; 338 isize = qemu_icache_linesize; 339 340 b = rw & ~(dsize - 1); 341 e = (rw + len + dsize - 1) & ~(dsize - 1); 342 for (p = b; p < e; p += dsize) { 343 asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); 344 } 345 asm volatile ("sync" : : : "memory"); 346 347 b = rx & ~(isize - 1); 348 e = (rx + len + isize - 1) & ~(isize - 1); 349 for (p = b; p < e; p += isize) { 350 asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); 351 } 352 asm volatile ("sync" : : : "memory"); 353 asm volatile ("isync" : : : "memory"); 354 } 355 356 #elif defined(__sparc__) 357 358 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 359 { 360 /* No additional data flush to the RW virtual address required. */ 361 uintptr_t p, end = (rx + len + 7) & -8; 362 for (p = rx & -8; p < end; p += 8) { 363 __asm__ __volatile__("flush\t%0" : : "r" (p)); 364 } 365 } 366 367 #else 368 369 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 370 { 371 if (rw != rx) { 372 __builtin___clear_cache((char *)rw, (char *)rw + len); 373 } 374 __builtin___clear_cache((char *)rx, (char *)rx + len); 375 } 376 377 #endif 378