1 /* 2 * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support 3 * 4 * Copyright (C) 2008 Marvell Semiconductor 5 * 6 * This file is licensed under the terms of the GNU General Public 7 * License version 2. This program is licensed "as is" without any 8 * warranty of any kind, whether express or implied. 9 * 10 * References: 11 * - Unified Layer 2 Cache for Feroceon CPU Cores, 12 * Document ID MV-S104858-00, Rev. A, October 23 2007. 13 */ 14 15 #include <linux/init.h> 16 #include <linux/of.h> 17 #include <linux/of_address.h> 18 #include <linux/highmem.h> 19 #include <linux/io.h> 20 #include <asm/cacheflush.h> 21 #include <asm/cp15.h> 22 #include <asm/hardware/cache-feroceon-l2.h> 23 24 #define L2_WRITETHROUGH_KIRKWOOD BIT(4) 25 26 /* 27 * Low-level cache maintenance operations. 28 * 29 * As well as the regular 'clean/invalidate/flush L2 cache line by 30 * MVA' instructions, the Feroceon L2 cache controller also features 31 * 'clean/invalidate L2 range by MVA' operations. 32 * 33 * Cache range operations are initiated by writing the start and 34 * end addresses to successive cp15 registers, and process every 35 * cache line whose first byte address lies in the inclusive range 36 * [start:end]. 37 * 38 * The cache range operations stall the CPU pipeline until completion. 39 * 40 * The range operations require two successive cp15 writes, in 41 * between which we don't want to be preempted. 42 */ 43 44 static inline unsigned long l2_get_va(unsigned long paddr) 45 { 46 #ifdef CONFIG_HIGHMEM 47 /* 48 * Because range ops can't be done on physical addresses, 49 * we simply install a virtual mapping for it only for the 50 * TLB lookup to occur, hence no need to flush the untouched 51 * memory mapping afterwards (note: a cache flush may happen 52 * in some circumstances depending on the path taken in kunmap_atomic). 53 */ 54 void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); 55 return (unsigned long)vaddr + (paddr & ~PAGE_MASK); 56 #else 57 return __phys_to_virt(paddr); 58 #endif 59 } 60 61 static inline void l2_put_va(unsigned long vaddr) 62 { 63 #ifdef CONFIG_HIGHMEM 64 kunmap_atomic((void *)vaddr); 65 #endif 66 } 67 68 static inline void l2_clean_pa(unsigned long addr) 69 { 70 __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); 71 } 72 73 static inline void l2_clean_pa_range(unsigned long start, unsigned long end) 74 { 75 unsigned long va_start, va_end, flags; 76 77 /* 78 * Make sure 'start' and 'end' reference the same page, as 79 * L2 is PIPT and range operations only do a TLB lookup on 80 * the start address. 81 */ 82 BUG_ON((start ^ end) >> PAGE_SHIFT); 83 84 va_start = l2_get_va(start); 85 va_end = va_start + (end - start); 86 raw_local_irq_save(flags); 87 __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" 88 "mcr p15, 1, %1, c15, c9, 5" 89 : : "r" (va_start), "r" (va_end)); 90 raw_local_irq_restore(flags); 91 l2_put_va(va_start); 92 } 93 94 static inline void l2_clean_inv_pa(unsigned long addr) 95 { 96 __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); 97 } 98 99 static inline void l2_inv_pa(unsigned long addr) 100 { 101 __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); 102 } 103 104 static inline void l2_inv_pa_range(unsigned long start, unsigned long end) 105 { 106 unsigned long va_start, va_end, flags; 107 108 /* 109 * Make sure 'start' and 'end' reference the same page, as 110 * L2 is PIPT and range operations only do a TLB lookup on 111 * the start address. 112 */ 113 BUG_ON((start ^ end) >> PAGE_SHIFT); 114 115 va_start = l2_get_va(start); 116 va_end = va_start + (end - start); 117 raw_local_irq_save(flags); 118 __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" 119 "mcr p15, 1, %1, c15, c11, 5" 120 : : "r" (va_start), "r" (va_end)); 121 raw_local_irq_restore(flags); 122 l2_put_va(va_start); 123 } 124 125 static inline void l2_inv_all(void) 126 { 127 __asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0)); 128 } 129 130 /* 131 * Linux primitives. 132 * 133 * Note that the end addresses passed to Linux primitives are 134 * noninclusive, while the hardware cache range operations use 135 * inclusive start and end addresses. 136 */ 137 #define CACHE_LINE_SIZE 32 138 #define MAX_RANGE_SIZE 1024 139 140 static int l2_wt_override; 141 142 static unsigned long calc_range_end(unsigned long start, unsigned long end) 143 { 144 unsigned long range_end; 145 146 BUG_ON(start & (CACHE_LINE_SIZE - 1)); 147 BUG_ON(end & (CACHE_LINE_SIZE - 1)); 148 149 /* 150 * Try to process all cache lines between 'start' and 'end'. 151 */ 152 range_end = end; 153 154 /* 155 * Limit the number of cache lines processed at once, 156 * since cache range operations stall the CPU pipeline 157 * until completion. 158 */ 159 if (range_end > start + MAX_RANGE_SIZE) 160 range_end = start + MAX_RANGE_SIZE; 161 162 /* 163 * Cache range operations can't straddle a page boundary. 164 */ 165 if (range_end > (start | (PAGE_SIZE - 1)) + 1) 166 range_end = (start | (PAGE_SIZE - 1)) + 1; 167 168 return range_end; 169 } 170 171 static void feroceon_l2_inv_range(unsigned long start, unsigned long end) 172 { 173 /* 174 * Clean and invalidate partial first cache line. 175 */ 176 if (start & (CACHE_LINE_SIZE - 1)) { 177 l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); 178 start = (start | (CACHE_LINE_SIZE - 1)) + 1; 179 } 180 181 /* 182 * Clean and invalidate partial last cache line. 183 */ 184 if (start < end && end & (CACHE_LINE_SIZE - 1)) { 185 l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); 186 end &= ~(CACHE_LINE_SIZE - 1); 187 } 188 189 /* 190 * Invalidate all full cache lines between 'start' and 'end'. 191 */ 192 while (start < end) { 193 unsigned long range_end = calc_range_end(start, end); 194 l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); 195 start = range_end; 196 } 197 198 dsb(); 199 } 200 201 static void feroceon_l2_clean_range(unsigned long start, unsigned long end) 202 { 203 /* 204 * If L2 is forced to WT, the L2 will always be clean and we 205 * don't need to do anything here. 206 */ 207 if (!l2_wt_override) { 208 start &= ~(CACHE_LINE_SIZE - 1); 209 end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); 210 while (start != end) { 211 unsigned long range_end = calc_range_end(start, end); 212 l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); 213 start = range_end; 214 } 215 } 216 217 dsb(); 218 } 219 220 static void feroceon_l2_flush_range(unsigned long start, unsigned long end) 221 { 222 start &= ~(CACHE_LINE_SIZE - 1); 223 end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); 224 while (start != end) { 225 unsigned long range_end = calc_range_end(start, end); 226 if (!l2_wt_override) 227 l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); 228 l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); 229 start = range_end; 230 } 231 232 dsb(); 233 } 234 235 236 /* 237 * Routines to disable and re-enable the D-cache and I-cache at run 238 * time. These are necessary because the L2 cache can only be enabled 239 * or disabled while the L1 Dcache and Icache are both disabled. 240 */ 241 static int __init flush_and_disable_dcache(void) 242 { 243 u32 cr; 244 245 cr = get_cr(); 246 if (cr & CR_C) { 247 unsigned long flags; 248 249 raw_local_irq_save(flags); 250 flush_cache_all(); 251 set_cr(cr & ~CR_C); 252 raw_local_irq_restore(flags); 253 return 1; 254 } 255 return 0; 256 } 257 258 static void __init enable_dcache(void) 259 { 260 u32 cr; 261 262 cr = get_cr(); 263 set_cr(cr | CR_C); 264 } 265 266 static void __init __invalidate_icache(void) 267 { 268 __asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); 269 } 270 271 static int __init invalidate_and_disable_icache(void) 272 { 273 u32 cr; 274 275 cr = get_cr(); 276 if (cr & CR_I) { 277 set_cr(cr & ~CR_I); 278 __invalidate_icache(); 279 return 1; 280 } 281 return 0; 282 } 283 284 static void __init enable_icache(void) 285 { 286 u32 cr; 287 288 cr = get_cr(); 289 set_cr(cr | CR_I); 290 } 291 292 static inline u32 read_extra_features(void) 293 { 294 u32 u; 295 296 __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); 297 298 return u; 299 } 300 301 static inline void write_extra_features(u32 u) 302 { 303 __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); 304 } 305 306 static void __init disable_l2_prefetch(void) 307 { 308 u32 u; 309 310 /* 311 * Read the CPU Extra Features register and verify that the 312 * Disable L2 Prefetch bit is set. 313 */ 314 u = read_extra_features(); 315 if (!(u & 0x01000000)) { 316 pr_info("Feroceon L2: Disabling L2 prefetch.\n"); 317 write_extra_features(u | 0x01000000); 318 } 319 } 320 321 static void __init enable_l2(void) 322 { 323 u32 u; 324 325 u = read_extra_features(); 326 if (!(u & 0x00400000)) { 327 int i, d; 328 329 pr_info("Feroceon L2: Enabling L2\n"); 330 331 d = flush_and_disable_dcache(); 332 i = invalidate_and_disable_icache(); 333 l2_inv_all(); 334 write_extra_features(u | 0x00400000); 335 if (i) 336 enable_icache(); 337 if (d) 338 enable_dcache(); 339 } else 340 pr_err(FW_BUG 341 "Feroceon L2: bootloader left the L2 cache on!\n"); 342 } 343 344 void __init feroceon_l2_init(int __l2_wt_override) 345 { 346 l2_wt_override = __l2_wt_override; 347 348 disable_l2_prefetch(); 349 350 outer_cache.inv_range = feroceon_l2_inv_range; 351 outer_cache.clean_range = feroceon_l2_clean_range; 352 outer_cache.flush_range = feroceon_l2_flush_range; 353 354 enable_l2(); 355 356 pr_info("Feroceon L2: Cache support initialised%s.\n", 357 l2_wt_override ? ", in WT override mode" : ""); 358 } 359 #ifdef CONFIG_OF 360 static const struct of_device_id feroceon_ids[] __initconst = { 361 { .compatible = "marvell,kirkwood-cache"}, 362 { .compatible = "marvell,feroceon-cache"}, 363 {} 364 }; 365 366 int __init feroceon_of_init(void) 367 { 368 struct device_node *node; 369 void __iomem *base; 370 bool l2_wt_override = false; 371 372 #if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) 373 l2_wt_override = true; 374 #endif 375 376 node = of_find_matching_node(NULL, feroceon_ids); 377 if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { 378 base = of_iomap(node, 0); 379 if (!base) 380 return -ENOMEM; 381 382 if (l2_wt_override) 383 writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base); 384 else 385 writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base); 386 } 387 388 feroceon_l2_init(l2_wt_override); 389 390 return 0; 391 } 392 #endif 393