1 /* 2 * Broadcom Brahma-B15 CPU read-ahead cache management functions 3 * 4 * Copyright (C) 2015-2016 Broadcom 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11 #include <linux/err.h> 12 #include <linux/spinlock.h> 13 #include <linux/io.h> 14 #include <linux/bitops.h> 15 #include <linux/of_address.h> 16 #include <linux/notifier.h> 17 #include <linux/cpu.h> 18 #include <linux/syscore_ops.h> 19 #include <linux/reboot.h> 20 21 #include <asm/cacheflush.h> 22 #include <asm/hardware/cache-b15-rac.h> 23 24 extern void v7_flush_kern_cache_all(void); 25 26 /* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */ 27 #define RAC_CONFIG0_REG (0x78) 28 #define RACENPREF_MASK (0x3) 29 #define RACPREFINST_SHIFT (0) 30 #define RACENINST_SHIFT (2) 31 #define RACPREFDATA_SHIFT (4) 32 #define RACENDATA_SHIFT (6) 33 #define RAC_CPU_SHIFT (8) 34 #define RACCFG_MASK (0xff) 35 #define RAC_CONFIG1_REG (0x7c) 36 #define RAC_FLUSH_REG (0x80) 37 #define FLUSH_RAC (1 << 0) 38 39 /* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ 40 #define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ 41 RACENPREF_MASK << RACENINST_SHIFT | \ 42 1 << RACPREFDATA_SHIFT | \ 43 RACENPREF_MASK << RACENDATA_SHIFT) 44 45 #define RAC_ENABLED 0 46 /* Special state where we want to bypass the spinlock and call directly 47 * into the v7 cache maintenance operations during suspend/resume 48 */ 49 #define RAC_SUSPENDED 1 50 51 static void __iomem *b15_rac_base; 52 static DEFINE_SPINLOCK(rac_lock); 53 54 static u32 rac_config0_reg; 55 56 /* Initialization flag to avoid checking for b15_rac_base, and to prevent 57 * multi-platform kernels from crashing here as well. 58 */ 59 static unsigned long b15_rac_flags; 60 61 static inline u32 __b15_rac_disable(void) 62 { 63 u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); 64 __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG); 65 dmb(); 66 return val; 67 } 68 69 static inline void __b15_rac_flush(void) 70 { 71 u32 reg; 72 73 __raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG); 74 do { 75 /* This dmb() is required to force the Bus Interface Unit 76 * to clean oustanding writes, and forces an idle cycle 77 * to be inserted. 78 */ 79 dmb(); 80 reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG); 81 } while (reg & FLUSH_RAC); 82 } 83 84 static inline u32 b15_rac_disable_and_flush(void) 85 { 86 u32 reg; 87 88 reg = __b15_rac_disable(); 89 __b15_rac_flush(); 90 return reg; 91 } 92 93 static inline void __b15_rac_enable(u32 val) 94 { 95 __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG); 96 /* dsb() is required here to be consistent with __flush_icache_all() */ 97 dsb(); 98 } 99 100 #define BUILD_RAC_CACHE_OP(name, bar) \ 101 void b15_flush_##name(void) \ 102 { \ 103 unsigned int do_flush; \ 104 u32 val = 0; \ 105 \ 106 if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \ 107 v7_flush_##name(); \ 108 bar; \ 109 return; \ 110 } \ 111 \ 112 spin_lock(&rac_lock); \ 113 do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \ 114 if (do_flush) \ 115 val = b15_rac_disable_and_flush(); \ 116 v7_flush_##name(); \ 117 if (!do_flush) \ 118 bar; \ 119 else \ 120 __b15_rac_enable(val); \ 121 spin_unlock(&rac_lock); \ 122 } 123 124 #define nobarrier 125 126 /* The readahead cache present in the Brahma-B15 CPU is a special piece of 127 * hardware after the integrated L2 cache of the B15 CPU complex whose purpose 128 * is to prefetch instruction and/or data with a line size of either 64 bytes 129 * or 256 bytes. The rationale is that the data-bus of the CPU interface is 130 * optimized for 256-bytes transactions, and enabling the readahead cache 131 * provides a significant performance boost we want it enabled (typically 132 * twice the performance for a memcpy benchmark application). 133 * 134 * The readahead cache is transparent for Modified Virtual Addresses 135 * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and 136 * DCCIMVAC. 137 * 138 * It is however not transparent for the following cache maintenance 139 * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely 140 * what we are patching here with our BUILD_RAC_CACHE_OP here. 141 */ 142 BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier); 143 144 static void b15_rac_enable(void) 145 { 146 unsigned int cpu; 147 u32 enable = 0; 148 149 for_each_possible_cpu(cpu) 150 enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT)); 151 152 b15_rac_disable_and_flush(); 153 __b15_rac_enable(enable); 154 } 155 156 static int b15_rac_reboot_notifier(struct notifier_block *nb, 157 unsigned long action, 158 void *data) 159 { 160 /* During kexec, we are not yet migrated on the boot CPU, so we need to 161 * make sure we are SMP safe here. Once the RAC is disabled, flag it as 162 * suspended such that the hotplug notifier returns early. 163 */ 164 if (action == SYS_RESTART) { 165 spin_lock(&rac_lock); 166 b15_rac_disable_and_flush(); 167 clear_bit(RAC_ENABLED, &b15_rac_flags); 168 set_bit(RAC_SUSPENDED, &b15_rac_flags); 169 spin_unlock(&rac_lock); 170 } 171 172 return NOTIFY_DONE; 173 } 174 175 static struct notifier_block b15_rac_reboot_nb = { 176 .notifier_call = b15_rac_reboot_notifier, 177 }; 178 179 /* The CPU hotplug case is the most interesting one, we basically need to make 180 * sure that the RAC is disabled for the entire system prior to having a CPU 181 * die, in particular prior to this dying CPU having exited the coherency 182 * domain. 183 * 184 * Once this CPU is marked dead, we can safely re-enable the RAC for the 185 * remaining CPUs in the system which are still online. 186 * 187 * Offlining a CPU is the problematic case, onlining a CPU is not much of an 188 * issue since the CPU and its cache-level hierarchy will start filling with 189 * the RAC disabled, so L1 and L2 only. 190 * 191 * In this function, we should NOT have to verify any unsafe setting/condition 192 * b15_rac_base: 193 * 194 * It is protected by the RAC_ENABLED flag which is cleared by default, and 195 * being cleared when initial procedure is done. b15_rac_base had been set at 196 * that time. 197 * 198 * RAC_ENABLED: 199 * There is a small timing windows, in b15_rac_init(), between 200 * cpuhp_setup_state_*() 201 * ... 202 * set RAC_ENABLED 203 * However, there is no hotplug activity based on the Linux booting procedure. 204 * 205 * Since we have to disable RAC for all cores, we keep RAC on as long as as 206 * possible (disable it as late as possible) to gain the cache benefit. 207 * 208 * Thus, dying/dead states are chosen here 209 * 210 * We are choosing not do disable the RAC on a per-CPU basis, here, if we did 211 * we would want to consider disabling it as early as possible to benefit the 212 * other active CPUs. 213 */ 214 215 /* Running on the dying CPU */ 216 static int b15_rac_dying_cpu(unsigned int cpu) 217 { 218 /* During kexec/reboot, the RAC is disabled via the reboot notifier 219 * return early here. 220 */ 221 if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) 222 return 0; 223 224 spin_lock(&rac_lock); 225 226 /* Indicate that we are starting a hotplug procedure */ 227 __clear_bit(RAC_ENABLED, &b15_rac_flags); 228 229 /* Disable the readahead cache and save its value to a global */ 230 rac_config0_reg = b15_rac_disable_and_flush(); 231 232 spin_unlock(&rac_lock); 233 234 return 0; 235 } 236 237 /* Running on a non-dying CPU */ 238 static int b15_rac_dead_cpu(unsigned int cpu) 239 { 240 /* During kexec/reboot, the RAC is disabled via the reboot notifier 241 * return early here. 242 */ 243 if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) 244 return 0; 245 246 spin_lock(&rac_lock); 247 248 /* And enable it */ 249 __b15_rac_enable(rac_config0_reg); 250 __set_bit(RAC_ENABLED, &b15_rac_flags); 251 252 spin_unlock(&rac_lock); 253 254 return 0; 255 } 256 257 static int b15_rac_suspend(void) 258 { 259 /* Suspend the read-ahead cache oeprations, forcing our cache 260 * implementation to fallback to the regular ARMv7 calls. 261 * 262 * We are guaranteed to be running on the boot CPU at this point and 263 * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy 264 * here. 265 */ 266 rac_config0_reg = b15_rac_disable_and_flush(); 267 set_bit(RAC_SUSPENDED, &b15_rac_flags); 268 269 return 0; 270 } 271 272 static void b15_rac_resume(void) 273 { 274 /* Coming out of a S3 suspend/resume cycle, the read-ahead cache 275 * register RAC_CONFIG0_REG will be restored to its default value, make 276 * sure we re-enable it and set the enable flag, we are also guaranteed 277 * to run on the boot CPU, so not racy again. 278 */ 279 __b15_rac_enable(rac_config0_reg); 280 clear_bit(RAC_SUSPENDED, &b15_rac_flags); 281 } 282 283 static struct syscore_ops b15_rac_syscore_ops = { 284 .suspend = b15_rac_suspend, 285 .resume = b15_rac_resume, 286 }; 287 288 static int __init b15_rac_init(void) 289 { 290 struct device_node *dn; 291 int ret = 0, cpu; 292 u32 reg, en_mask = 0; 293 294 dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl"); 295 if (!dn) 296 return -ENODEV; 297 298 if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n")) 299 goto out; 300 301 b15_rac_base = of_iomap(dn, 0); 302 if (!b15_rac_base) { 303 pr_err("failed to remap BIU control base\n"); 304 ret = -ENOMEM; 305 goto out; 306 } 307 308 ret = register_reboot_notifier(&b15_rac_reboot_nb); 309 if (ret) { 310 pr_err("failed to register reboot notifier\n"); 311 iounmap(b15_rac_base); 312 goto out; 313 } 314 315 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { 316 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, 317 "arm/cache-b15-rac:dead", 318 NULL, b15_rac_dead_cpu); 319 if (ret) 320 goto out_unmap; 321 322 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING, 323 "arm/cache-b15-rac:dying", 324 NULL, b15_rac_dying_cpu); 325 if (ret) 326 goto out_cpu_dead; 327 } 328 329 if (IS_ENABLED(CONFIG_PM_SLEEP)) 330 register_syscore_ops(&b15_rac_syscore_ops); 331 332 spin_lock(&rac_lock); 333 reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); 334 for_each_possible_cpu(cpu) 335 en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT)); 336 WARN(reg & en_mask, "Read-ahead cache not previously disabled\n"); 337 338 b15_rac_enable(); 339 set_bit(RAC_ENABLED, &b15_rac_flags); 340 spin_unlock(&rac_lock); 341 342 pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n", 343 b15_rac_base + RAC_CONFIG0_REG); 344 345 goto out; 346 347 out_cpu_dead: 348 cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING); 349 out_unmap: 350 unregister_reboot_notifier(&b15_rac_reboot_nb); 351 iounmap(b15_rac_base); 352 out: 353 of_node_put(dn); 354 return ret; 355 } 356 arch_initcall(b15_rac_init); 357