1 /* 2 * Info about, and flushing the host cpu caches. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include "qemu/osdep.h" 9 #include "qemu/cacheflush.h" 10 #include "qemu/cacheinfo.h" 11 #include "qemu/bitops.h" 12 #include "qemu/host-utils.h" 13 #include "qemu/atomic.h" 14 15 16 int qemu_icache_linesize = 0; 17 int qemu_icache_linesize_log; 18 int qemu_dcache_linesize = 0; 19 int qemu_dcache_linesize_log; 20 21 /* 22 * Operating system specific cache detection mechanisms. 23 */ 24 25 #if defined(_WIN32) 26 27 static void sys_cache_info(int *isize, int *dsize) 28 { 29 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; 30 DWORD size = 0; 31 BOOL success; 32 size_t i, n; 33 34 /* 35 * Check for the required buffer size first. Note that if the zero 36 * size we use for the probe results in success, then there is no 37 * data available; fail in that case. 38 */ 39 success = GetLogicalProcessorInformation(0, &size); 40 if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 41 return; 42 } 43 44 n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 45 size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 46 buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); 47 if (!GetLogicalProcessorInformation(buf, &size)) { 48 goto fail; 49 } 50 51 for (i = 0; i < n; i++) { 52 if (buf[i].Relationship == RelationCache 53 && buf[i].Cache.Level == 1) { 54 switch (buf[i].Cache.Type) { 55 case CacheUnified: 56 *isize = *dsize = buf[i].Cache.LineSize; 57 break; 58 case CacheInstruction: 59 *isize = buf[i].Cache.LineSize; 60 break; 61 case CacheData: 62 *dsize = buf[i].Cache.LineSize; 63 break; 64 default: 65 break; 66 } 67 } 68 } 69 fail: 70 g_free(buf); 71 } 72 73 #elif defined(__APPLE__) 74 # include <sys/sysctl.h> 75 static void sys_cache_info(int *isize, int *dsize) 76 { 77 /* There's only a single sysctl for both I/D cache line sizes. */ 78 long size; 79 size_t len = sizeof(size); 80 if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { 81 *isize = *dsize = size; 82 } 83 } 84 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 85 # include <sys/sysctl.h> 86 static void sys_cache_info(int *isize, int *dsize) 87 { 88 /* There's only a single sysctl for both I/D cache line sizes. */ 89 int size; 90 size_t len = sizeof(size); 91 if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { 92 *isize = *dsize = size; 93 } 94 } 95 #else 96 /* POSIX */ 97 98 static void sys_cache_info(int *isize, int *dsize) 99 { 100 # ifdef _SC_LEVEL1_ICACHE_LINESIZE 101 int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); 102 if (tmp_isize > 0) { 103 *isize = tmp_isize; 104 } 105 # endif 106 # ifdef _SC_LEVEL1_DCACHE_LINESIZE 107 int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); 108 if (tmp_dsize > 0) { 109 *dsize = tmp_dsize; 110 } 111 # endif 112 } 113 #endif /* sys_cache_info */ 114 115 116 /* 117 * Architecture (+ OS) specific cache detection mechanisms. 118 */ 119 120 #if defined(__aarch64__) 121 122 static void arch_cache_info(int *isize, int *dsize) 123 { 124 if (*isize == 0 || *dsize == 0) { 125 uint64_t ctr; 126 127 /* 128 * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, 129 * but (at least under Linux) these are marked protected by the 130 * kernel. However, CTR_EL0 contains the minimum linesize in the 131 * entire hierarchy, and is used by userspace cache flushing. 132 */ 133 asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); 134 if (*isize == 0) { 135 *isize = 4 << (ctr & 0xf); 136 } 137 if (*dsize == 0) { 138 *dsize = 4 << ((ctr >> 16) & 0xf); 139 } 140 } 141 } 142 143 #elif defined(_ARCH_PPC) && defined(__linux__) 144 # include "elf.h" 145 146 static void arch_cache_info(int *isize, int *dsize) 147 { 148 if (*isize == 0) { 149 *isize = qemu_getauxval(AT_ICACHEBSIZE); 150 } 151 if (*dsize == 0) { 152 *dsize = qemu_getauxval(AT_DCACHEBSIZE); 153 } 154 } 155 156 #else 157 static void arch_cache_info(int *isize, int *dsize) { } 158 #endif /* arch_cache_info */ 159 160 /* 161 * ... and if all else fails ... 162 */ 163 164 static void fallback_cache_info(int *isize, int *dsize) 165 { 166 /* If we can only find one of the two, assume they're the same. */ 167 if (*isize) { 168 if (*dsize) { 169 /* Success! */ 170 } else { 171 *dsize = *isize; 172 } 173 } else if (*dsize) { 174 *isize = *dsize; 175 } else { 176 #if defined(_ARCH_PPC) 177 /* 178 * For PPC, we're going to use the cache sizes computed for 179 * flush_idcache_range. Which means that we must use the 180 * architecture minimum. 181 */ 182 *isize = *dsize = 16; 183 #else 184 /* Otherwise, 64 bytes is not uncommon. */ 185 *isize = *dsize = 64; 186 #endif 187 } 188 } 189 190 static void __attribute__((constructor)) init_cache_info(void) 191 { 192 int isize = 0, dsize = 0; 193 194 sys_cache_info(&isize, &dsize); 195 arch_cache_info(&isize, &dsize); 196 fallback_cache_info(&isize, &dsize); 197 198 assert((isize & (isize - 1)) == 0); 199 assert((dsize & (dsize - 1)) == 0); 200 201 qemu_icache_linesize = isize; 202 qemu_icache_linesize_log = ctz32(isize); 203 qemu_dcache_linesize = dsize; 204 qemu_dcache_linesize_log = ctz32(dsize); 205 206 qatomic64_init(); 207 } 208 209 210 /* 211 * Architecture (+ OS) specific cache flushing mechanisms. 212 */ 213 214 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) 215 216 /* Caches are coherent and do not require flushing; symbol inline. */ 217 218 #elif defined(__aarch64__) 219 220 #ifdef CONFIG_DARWIN 221 /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 222 extern void sys_icache_invalidate(void *start, size_t len); 223 extern void sys_dcache_flush(void *start, size_t len); 224 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 225 { 226 sys_dcache_flush((void *)rw, len); 227 sys_icache_invalidate((void *)rx, len); 228 } 229 #else 230 231 /* 232 * TODO: unify this with cacheinfo.c. 233 * We want to save the whole contents of CTR_EL0, so that we 234 * have more than the linesize, but also IDC and DIC. 235 */ 236 static uint64_t save_ctr_el0; 237 static void __attribute__((constructor)) init_ctr_el0(void) 238 { 239 asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0)); 240 } 241 242 /* 243 * This is a copy of gcc's __aarch64_sync_cache_range, modified 244 * to fit this three-operand interface. 245 */ 246 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 247 { 248 const unsigned CTR_IDC = 1u << 28; 249 const unsigned CTR_DIC = 1u << 29; 250 const uint64_t ctr_el0 = save_ctr_el0; 251 const uintptr_t icache_lsize = 4 << extract64(ctr_el0, 0, 4); 252 const uintptr_t dcache_lsize = 4 << extract64(ctr_el0, 16, 4); 253 uintptr_t p; 254 255 /* 256 * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification 257 * is not required for instruction to data coherence. 258 */ 259 if (!(ctr_el0 & CTR_IDC)) { 260 /* 261 * Loop over the address range, clearing one cache line at once. 262 * Data cache must be flushed to unification first to make sure 263 * the instruction cache fetches the updated data. 264 */ 265 for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { 266 asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); 267 } 268 asm volatile("dsb\tish" : : : "memory"); 269 } 270 271 /* 272 * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point 273 * of Unification is not required for instruction to data coherence. 274 */ 275 if (!(ctr_el0 & CTR_DIC)) { 276 for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { 277 asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); 278 } 279 asm volatile ("dsb\tish" : : : "memory"); 280 } 281 282 asm volatile("isb" : : : "memory"); 283 } 284 #endif /* CONFIG_DARWIN */ 285 286 #elif defined(__mips__) 287 288 #ifdef __OpenBSD__ 289 #include <machine/sysarch.h> 290 #else 291 #include <sys/cachectl.h> 292 #endif 293 294 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 295 { 296 if (rx != rw) { 297 cacheflush((void *)rw, len, DCACHE); 298 } 299 cacheflush((void *)rx, len, ICACHE); 300 } 301 302 #elif defined(__powerpc__) 303 304 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 305 { 306 uintptr_t p, b, e; 307 size_t dsize = qemu_dcache_linesize; 308 size_t isize = qemu_icache_linesize; 309 310 b = rw & ~(dsize - 1); 311 e = (rw + len + dsize - 1) & ~(dsize - 1); 312 for (p = b; p < e; p += dsize) { 313 asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); 314 } 315 asm volatile ("sync" : : : "memory"); 316 317 b = rx & ~(isize - 1); 318 e = (rx + len + isize - 1) & ~(isize - 1); 319 for (p = b; p < e; p += isize) { 320 asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); 321 } 322 asm volatile ("sync" : : : "memory"); 323 asm volatile ("isync" : : : "memory"); 324 } 325 326 #elif defined(__sparc__) 327 328 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 329 { 330 /* No additional data flush to the RW virtual address required. */ 331 uintptr_t p, end = (rx + len + 7) & -8; 332 for (p = rx & -8; p < end; p += 8) { 333 __asm__ __volatile__("flush\t%0" : : "r" (p)); 334 } 335 } 336 337 #else 338 339 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 340 { 341 if (rw != rx) { 342 __builtin___clear_cache((char *)rw, (char *)rw + len); 343 } 344 __builtin___clear_cache((char *)rx, (char *)rx + len); 345 } 346 347 #endif 348