1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Broadcom Brahma-B15 CPU read-ahead cache management functions 4 * 5 * Copyright (C) 2015-2016 Broadcom 6 */ 7 8 #include <linux/err.h> 9 #include <linux/spinlock.h> 10 #include <linux/io.h> 11 #include <linux/bitops.h> 12 #include <linux/of_address.h> 13 #include <linux/notifier.h> 14 #include <linux/cpu.h> 15 #include <linux/syscore_ops.h> 16 #include <linux/reboot.h> 17 18 #include <asm/cacheflush.h> 19 #include <asm/hardware/cache-b15-rac.h> 20 21 extern void v7_flush_kern_cache_all(void); 22 23 /* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */ 24 #define RAC_CONFIG0_REG (0x78) 25 #define RACENPREF_MASK (0x3) 26 #define RACPREFINST_SHIFT (0) 27 #define RACENINST_SHIFT (2) 28 #define RACPREFDATA_SHIFT (4) 29 #define RACENDATA_SHIFT (6) 30 #define RAC_CPU_SHIFT (8) 31 #define RACCFG_MASK (0xff) 32 #define RAC_CONFIG1_REG (0x7c) 33 /* Brahma-B15 is a quad-core only design */ 34 #define B15_RAC_FLUSH_REG (0x80) 35 /* Brahma-B53 is an octo-core design */ 36 #define B53_RAC_FLUSH_REG (0x84) 37 #define FLUSH_RAC (1 << 0) 38 39 /* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ 40 #define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ 41 RACENPREF_MASK << RACENINST_SHIFT | \ 42 1 << RACPREFDATA_SHIFT | \ 43 RACENPREF_MASK << RACENDATA_SHIFT) 44 45 #define RAC_ENABLED 0 46 /* Special state where we want to bypass the spinlock and call directly 47 * into the v7 cache maintenance operations during suspend/resume 48 */ 49 #define RAC_SUSPENDED 1 50 51 static void __iomem *b15_rac_base; 52 static DEFINE_SPINLOCK(rac_lock); 53 54 static u32 rac_config0_reg; 55 static u32 rac_flush_offset; 56 57 /* Initialization flag to avoid checking for b15_rac_base, and to prevent 58 * multi-platform kernels from crashing here as well. 59 */ 60 static unsigned long b15_rac_flags; 61 62 static inline u32 __b15_rac_disable(void) 63 { 64 u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); 65 __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG); 66 dmb(); 67 return val; 68 } 69 70 static inline void __b15_rac_flush(void) 71 { 72 u32 reg; 73 74 __raw_writel(FLUSH_RAC, b15_rac_base + rac_flush_offset); 75 do { 76 /* This dmb() is required to force the Bus Interface Unit 77 * to clean outstanding writes, and forces an idle cycle 78 * to be inserted. 79 */ 80 dmb(); 81 reg = __raw_readl(b15_rac_base + rac_flush_offset); 82 } while (reg & FLUSH_RAC); 83 } 84 85 static inline u32 b15_rac_disable_and_flush(void) 86 { 87 u32 reg; 88 89 reg = __b15_rac_disable(); 90 __b15_rac_flush(); 91 return reg; 92 } 93 94 static inline void __b15_rac_enable(u32 val) 95 { 96 __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG); 97 /* dsb() is required here to be consistent with __flush_icache_all() */ 98 dsb(); 99 } 100 101 #define BUILD_RAC_CACHE_OP(name, bar) \ 102 void b15_flush_##name(void) \ 103 { \ 104 unsigned int do_flush; \ 105 u32 val = 0; \ 106 \ 107 if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \ 108 v7_flush_##name(); \ 109 bar; \ 110 return; \ 111 } \ 112 \ 113 spin_lock(&rac_lock); \ 114 do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \ 115 if (do_flush) \ 116 val = b15_rac_disable_and_flush(); \ 117 v7_flush_##name(); \ 118 if (!do_flush) \ 119 bar; \ 120 else \ 121 __b15_rac_enable(val); \ 122 spin_unlock(&rac_lock); \ 123 } 124 125 #define nobarrier 126 127 /* The readahead cache present in the Brahma-B15 CPU is a special piece of 128 * hardware after the integrated L2 cache of the B15 CPU complex whose purpose 129 * is to prefetch instruction and/or data with a line size of either 64 bytes 130 * or 256 bytes. The rationale is that the data-bus of the CPU interface is 131 * optimized for 256-bytes transactions, and enabling the readahead cache 132 * provides a significant performance boost we want it enabled (typically 133 * twice the performance for a memcpy benchmark application). 134 * 135 * The readahead cache is transparent for Modified Virtual Addresses 136 * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and 137 * DCCIMVAC. 138 * 139 * It is however not transparent for the following cache maintenance 140 * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely 141 * what we are patching here with our BUILD_RAC_CACHE_OP here. 142 */ 143 BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier); 144 145 static void b15_rac_enable(void) 146 { 147 unsigned int cpu; 148 u32 enable = 0; 149 150 for_each_possible_cpu(cpu) 151 enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT)); 152 153 b15_rac_disable_and_flush(); 154 __b15_rac_enable(enable); 155 } 156 157 static int b15_rac_reboot_notifier(struct notifier_block *nb, 158 unsigned long action, 159 void *data) 160 { 161 /* During kexec, we are not yet migrated on the boot CPU, so we need to 162 * make sure we are SMP safe here. Once the RAC is disabled, flag it as 163 * suspended such that the hotplug notifier returns early. 164 */ 165 if (action == SYS_RESTART) { 166 spin_lock(&rac_lock); 167 b15_rac_disable_and_flush(); 168 clear_bit(RAC_ENABLED, &b15_rac_flags); 169 set_bit(RAC_SUSPENDED, &b15_rac_flags); 170 spin_unlock(&rac_lock); 171 } 172 173 return NOTIFY_DONE; 174 } 175 176 static struct notifier_block b15_rac_reboot_nb = { 177 .notifier_call = b15_rac_reboot_notifier, 178 }; 179 180 /* The CPU hotplug case is the most interesting one, we basically need to make 181 * sure that the RAC is disabled for the entire system prior to having a CPU 182 * die, in particular prior to this dying CPU having exited the coherency 183 * domain. 184 * 185 * Once this CPU is marked dead, we can safely re-enable the RAC for the 186 * remaining CPUs in the system which are still online. 187 * 188 * Offlining a CPU is the problematic case, onlining a CPU is not much of an 189 * issue since the CPU and its cache-level hierarchy will start filling with 190 * the RAC disabled, so L1 and L2 only. 191 * 192 * In this function, we should NOT have to verify any unsafe setting/condition 193 * b15_rac_base: 194 * 195 * It is protected by the RAC_ENABLED flag which is cleared by default, and 196 * being cleared when initial procedure is done. b15_rac_base had been set at 197 * that time. 198 * 199 * RAC_ENABLED: 200 * There is a small timing windows, in b15_rac_init(), between 201 * cpuhp_setup_state_*() 202 * ... 203 * set RAC_ENABLED 204 * However, there is no hotplug activity based on the Linux booting procedure. 205 * 206 * Since we have to disable RAC for all cores, we keep RAC on as long as as 207 * possible (disable it as late as possible) to gain the cache benefit. 208 * 209 * Thus, dying/dead states are chosen here 210 * 211 * We are choosing not do disable the RAC on a per-CPU basis, here, if we did 212 * we would want to consider disabling it as early as possible to benefit the 213 * other active CPUs. 214 */ 215 216 /* Running on the dying CPU */ 217 static int b15_rac_dying_cpu(unsigned int cpu) 218 { 219 /* During kexec/reboot, the RAC is disabled via the reboot notifier 220 * return early here. 221 */ 222 if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) 223 return 0; 224 225 spin_lock(&rac_lock); 226 227 /* Indicate that we are starting a hotplug procedure */ 228 __clear_bit(RAC_ENABLED, &b15_rac_flags); 229 230 /* Disable the readahead cache and save its value to a global */ 231 rac_config0_reg = b15_rac_disable_and_flush(); 232 233 spin_unlock(&rac_lock); 234 235 return 0; 236 } 237 238 /* Running on a non-dying CPU */ 239 static int b15_rac_dead_cpu(unsigned int cpu) 240 { 241 /* During kexec/reboot, the RAC is disabled via the reboot notifier 242 * return early here. 243 */ 244 if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) 245 return 0; 246 247 spin_lock(&rac_lock); 248 249 /* And enable it */ 250 __b15_rac_enable(rac_config0_reg); 251 __set_bit(RAC_ENABLED, &b15_rac_flags); 252 253 spin_unlock(&rac_lock); 254 255 return 0; 256 } 257 258 static int b15_rac_suspend(void) 259 { 260 /* Suspend the read-ahead cache oeprations, forcing our cache 261 * implementation to fallback to the regular ARMv7 calls. 262 * 263 * We are guaranteed to be running on the boot CPU at this point and 264 * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy 265 * here. 266 */ 267 rac_config0_reg = b15_rac_disable_and_flush(); 268 set_bit(RAC_SUSPENDED, &b15_rac_flags); 269 270 return 0; 271 } 272 273 static void b15_rac_resume(void) 274 { 275 /* Coming out of a S3 suspend/resume cycle, the read-ahead cache 276 * register RAC_CONFIG0_REG will be restored to its default value, make 277 * sure we re-enable it and set the enable flag, we are also guaranteed 278 * to run on the boot CPU, so not racy again. 279 */ 280 __b15_rac_enable(rac_config0_reg); 281 clear_bit(RAC_SUSPENDED, &b15_rac_flags); 282 } 283 284 static struct syscore_ops b15_rac_syscore_ops = { 285 .suspend = b15_rac_suspend, 286 .resume = b15_rac_resume, 287 }; 288 289 static int __init b15_rac_init(void) 290 { 291 struct device_node *dn, *cpu_dn; 292 int ret = 0, cpu; 293 u32 reg, en_mask = 0; 294 295 dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl"); 296 if (!dn) 297 return -ENODEV; 298 299 if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n")) 300 goto out; 301 302 b15_rac_base = of_iomap(dn, 0); 303 if (!b15_rac_base) { 304 pr_err("failed to remap BIU control base\n"); 305 ret = -ENOMEM; 306 goto out; 307 } 308 309 cpu_dn = of_get_cpu_node(0, NULL); 310 if (!cpu_dn) { 311 ret = -ENODEV; 312 goto out; 313 } 314 315 if (of_device_is_compatible(cpu_dn, "brcm,brahma-b15")) 316 rac_flush_offset = B15_RAC_FLUSH_REG; 317 else if (of_device_is_compatible(cpu_dn, "brcm,brahma-b53")) 318 rac_flush_offset = B53_RAC_FLUSH_REG; 319 else { 320 pr_err("Unsupported CPU\n"); 321 of_node_put(cpu_dn); 322 ret = -EINVAL; 323 goto out; 324 } 325 of_node_put(cpu_dn); 326 327 ret = register_reboot_notifier(&b15_rac_reboot_nb); 328 if (ret) { 329 pr_err("failed to register reboot notifier\n"); 330 iounmap(b15_rac_base); 331 goto out; 332 } 333 334 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { 335 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, 336 "arm/cache-b15-rac:dead", 337 NULL, b15_rac_dead_cpu); 338 if (ret) 339 goto out_unmap; 340 341 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING, 342 "arm/cache-b15-rac:dying", 343 NULL, b15_rac_dying_cpu); 344 if (ret) 345 goto out_cpu_dead; 346 } 347 348 if (IS_ENABLED(CONFIG_PM_SLEEP)) 349 register_syscore_ops(&b15_rac_syscore_ops); 350 351 spin_lock(&rac_lock); 352 reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); 353 for_each_possible_cpu(cpu) 354 en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT)); 355 WARN(reg & en_mask, "Read-ahead cache not previously disabled\n"); 356 357 b15_rac_enable(); 358 set_bit(RAC_ENABLED, &b15_rac_flags); 359 spin_unlock(&rac_lock); 360 361 pr_info("%pOF: Broadcom Brahma-B15 readahead cache\n", dn); 362 363 goto out; 364 365 out_cpu_dead: 366 cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING); 367 out_unmap: 368 unregister_reboot_notifier(&b15_rac_reboot_nb); 369 iounmap(b15_rac_base); 370 out: 371 of_node_put(dn); 372 return ret; 373 } 374 arch_initcall(b15_rac_init); 375