1 /* 2 * arch/arm/mm/cache-l2x0.c - L210/L220/L310 cache controller support 3 * 4 * Copyright (C) 2007 ARM Limited 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 #include <linux/cpu.h> 20 #include <linux/err.h> 21 #include <linux/init.h> 22 #include <linux/smp.h> 23 #include <linux/spinlock.h> 24 #include <linux/log2.h> 25 #include <linux/io.h> 26 #include <linux/of.h> 27 #include <linux/of_address.h> 28 29 #include <asm/cacheflush.h> 30 #include <asm/cp15.h> 31 #include <asm/cputype.h> 32 #include <asm/hardware/cache-l2x0.h> 33 #include "cache-tauros3.h" 34 #include "cache-aurora-l2.h" 35 36 struct l2c_init_data { 37 const char *type; 38 unsigned way_size_0; 39 unsigned num_lock; 40 void (*of_parse)(const struct device_node *, u32 *, u32 *); 41 void (*enable)(void __iomem *, unsigned); 42 void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); 43 void (*save)(void __iomem *); 44 void (*configure)(void __iomem *); 45 void (*unlock)(void __iomem *, unsigned); 46 struct outer_cache_fns outer_cache; 47 }; 48 49 #define CACHE_LINE_SIZE 32 50 51 static void __iomem *l2x0_base; 52 static const struct l2c_init_data *l2x0_data; 53 static DEFINE_RAW_SPINLOCK(l2x0_lock); 54 static u32 l2x0_way_mask; /* Bitmask of active ways */ 55 static u32 l2x0_size; 56 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; 57 58 struct l2x0_regs l2x0_saved_regs; 59 60 /* 61 * Common code for all cache controllers. 62 */ 63 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) 64 { 65 /* wait for cache operation by line or way to complete */ 66 while (readl_relaxed(reg) & mask) 67 cpu_relax(); 68 } 69 70 /* 71 * By default, we write directly to secure registers. Platforms must 72 * override this if they are running non-secure. 73 */ 74 static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) 75 { 76 if (val == readl_relaxed(base + reg)) 77 return; 78 if (outer_cache.write_sec) 79 outer_cache.write_sec(val, reg); 80 else 81 writel_relaxed(val, base + reg); 82 } 83 84 /* 85 * This should only be called when we have a requirement that the 86 * register be written due to a work-around, as platforms running 87 * in non-secure mode may not be able to access this register. 88 */ 89 static inline void l2c_set_debug(void __iomem *base, unsigned long val) 90 { 91 l2c_write_sec(val, base, L2X0_DEBUG_CTRL); 92 } 93 94 static void __l2c_op_way(void __iomem *reg) 95 { 96 writel_relaxed(l2x0_way_mask, reg); 97 l2c_wait_mask(reg, l2x0_way_mask); 98 } 99 100 static inline void l2c_unlock(void __iomem *base, unsigned num) 101 { 102 unsigned i; 103 104 for (i = 0; i < num; i++) { 105 writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + 106 i * L2X0_LOCKDOWN_STRIDE); 107 writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + 108 i * L2X0_LOCKDOWN_STRIDE); 109 } 110 } 111 112 static void l2c_configure(void __iomem *base) 113 { 114 l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); 115 } 116 117 /* 118 * Enable the L2 cache controller. This function must only be 119 * called when the cache controller is known to be disabled. 120 */ 121 static void l2c_enable(void __iomem *base, unsigned num_lock) 122 { 123 unsigned long flags; 124 125 if (outer_cache.configure) 126 outer_cache.configure(&l2x0_saved_regs); 127 else 128 l2x0_data->configure(base); 129 130 l2x0_data->unlock(base, num_lock); 131 132 local_irq_save(flags); 133 __l2c_op_way(base + L2X0_INV_WAY); 134 writel_relaxed(0, base + sync_reg_offset); 135 l2c_wait_mask(base + sync_reg_offset, 1); 136 local_irq_restore(flags); 137 138 l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); 139 } 140 141 static void l2c_disable(void) 142 { 143 void __iomem *base = l2x0_base; 144 145 l2x0_pmu_suspend(); 146 147 outer_cache.flush_all(); 148 l2c_write_sec(0, base, L2X0_CTRL); 149 dsb(st); 150 } 151 152 static void l2c_save(void __iomem *base) 153 { 154 l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); 155 } 156 157 static void l2c_resume(void) 158 { 159 void __iomem *base = l2x0_base; 160 161 /* Do not touch the controller if already enabled. */ 162 if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) 163 l2c_enable(base, l2x0_data->num_lock); 164 165 l2x0_pmu_resume(); 166 } 167 168 /* 169 * L2C-210 specific code. 170 * 171 * The L2C-2x0 PA, set/way and sync operations are atomic, but we must 172 * ensure that no background operation is running. The way operations 173 * are all background tasks. 174 * 175 * While a background operation is in progress, any new operation is 176 * ignored (unspecified whether this causes an error.) Thankfully, not 177 * used on SMP. 178 * 179 * Never has a different sync register other than L2X0_CACHE_SYNC, but 180 * we use sync_reg_offset here so we can share some of this with L2C-310. 181 */ 182 static void __l2c210_cache_sync(void __iomem *base) 183 { 184 writel_relaxed(0, base + sync_reg_offset); 185 } 186 187 static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, 188 unsigned long end) 189 { 190 while (start < end) { 191 writel_relaxed(start, reg); 192 start += CACHE_LINE_SIZE; 193 } 194 } 195 196 static void l2c210_inv_range(unsigned long start, unsigned long end) 197 { 198 void __iomem *base = l2x0_base; 199 200 if (start & (CACHE_LINE_SIZE - 1)) { 201 start &= ~(CACHE_LINE_SIZE - 1); 202 writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); 203 start += CACHE_LINE_SIZE; 204 } 205 206 if (end & (CACHE_LINE_SIZE - 1)) { 207 end &= ~(CACHE_LINE_SIZE - 1); 208 writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); 209 } 210 211 __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); 212 __l2c210_cache_sync(base); 213 } 214 215 static void l2c210_clean_range(unsigned long start, unsigned long end) 216 { 217 void __iomem *base = l2x0_base; 218 219 start &= ~(CACHE_LINE_SIZE - 1); 220 __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); 221 __l2c210_cache_sync(base); 222 } 223 224 static void l2c210_flush_range(unsigned long start, unsigned long end) 225 { 226 void __iomem *base = l2x0_base; 227 228 start &= ~(CACHE_LINE_SIZE - 1); 229 __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); 230 __l2c210_cache_sync(base); 231 } 232 233 static void l2c210_flush_all(void) 234 { 235 void __iomem *base = l2x0_base; 236 237 BUG_ON(!irqs_disabled()); 238 239 __l2c_op_way(base + L2X0_CLEAN_INV_WAY); 240 __l2c210_cache_sync(base); 241 } 242 243 static void l2c210_sync(void) 244 { 245 __l2c210_cache_sync(l2x0_base); 246 } 247 248 static const struct l2c_init_data l2c210_data __initconst = { 249 .type = "L2C-210", 250 .way_size_0 = SZ_8K, 251 .num_lock = 1, 252 .enable = l2c_enable, 253 .save = l2c_save, 254 .configure = l2c_configure, 255 .unlock = l2c_unlock, 256 .outer_cache = { 257 .inv_range = l2c210_inv_range, 258 .clean_range = l2c210_clean_range, 259 .flush_range = l2c210_flush_range, 260 .flush_all = l2c210_flush_all, 261 .disable = l2c_disable, 262 .sync = l2c210_sync, 263 .resume = l2c_resume, 264 }, 265 }; 266 267 /* 268 * L2C-220 specific code. 269 * 270 * All operations are background operations: they have to be waited for. 271 * Conflicting requests generate a slave error (which will cause an 272 * imprecise abort.) Never uses sync_reg_offset, so we hard-code the 273 * sync register here. 274 * 275 * However, we can re-use the l2c210_resume call. 276 */ 277 static inline void __l2c220_cache_sync(void __iomem *base) 278 { 279 writel_relaxed(0, base + L2X0_CACHE_SYNC); 280 l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); 281 } 282 283 static void l2c220_op_way(void __iomem *base, unsigned reg) 284 { 285 unsigned long flags; 286 287 raw_spin_lock_irqsave(&l2x0_lock, flags); 288 __l2c_op_way(base + reg); 289 __l2c220_cache_sync(base); 290 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 291 } 292 293 static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, 294 unsigned long end, unsigned long flags) 295 { 296 raw_spinlock_t *lock = &l2x0_lock; 297 298 while (start < end) { 299 unsigned long blk_end = start + min(end - start, 4096UL); 300 301 while (start < blk_end) { 302 l2c_wait_mask(reg, 1); 303 writel_relaxed(start, reg); 304 start += CACHE_LINE_SIZE; 305 } 306 307 if (blk_end < end) { 308 raw_spin_unlock_irqrestore(lock, flags); 309 raw_spin_lock_irqsave(lock, flags); 310 } 311 } 312 313 return flags; 314 } 315 316 static void l2c220_inv_range(unsigned long start, unsigned long end) 317 { 318 void __iomem *base = l2x0_base; 319 unsigned long flags; 320 321 raw_spin_lock_irqsave(&l2x0_lock, flags); 322 if ((start | end) & (CACHE_LINE_SIZE - 1)) { 323 if (start & (CACHE_LINE_SIZE - 1)) { 324 start &= ~(CACHE_LINE_SIZE - 1); 325 writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); 326 start += CACHE_LINE_SIZE; 327 } 328 329 if (end & (CACHE_LINE_SIZE - 1)) { 330 end &= ~(CACHE_LINE_SIZE - 1); 331 l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); 332 writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); 333 } 334 } 335 336 flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, 337 start, end, flags); 338 l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); 339 __l2c220_cache_sync(base); 340 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 341 } 342 343 static void l2c220_clean_range(unsigned long start, unsigned long end) 344 { 345 void __iomem *base = l2x0_base; 346 unsigned long flags; 347 348 start &= ~(CACHE_LINE_SIZE - 1); 349 if ((end - start) >= l2x0_size) { 350 l2c220_op_way(base, L2X0_CLEAN_WAY); 351 return; 352 } 353 354 raw_spin_lock_irqsave(&l2x0_lock, flags); 355 flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, 356 start, end, flags); 357 l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); 358 __l2c220_cache_sync(base); 359 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 360 } 361 362 static void l2c220_flush_range(unsigned long start, unsigned long end) 363 { 364 void __iomem *base = l2x0_base; 365 unsigned long flags; 366 367 start &= ~(CACHE_LINE_SIZE - 1); 368 if ((end - start) >= l2x0_size) { 369 l2c220_op_way(base, L2X0_CLEAN_INV_WAY); 370 return; 371 } 372 373 raw_spin_lock_irqsave(&l2x0_lock, flags); 374 flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, 375 start, end, flags); 376 l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); 377 __l2c220_cache_sync(base); 378 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 379 } 380 381 static void l2c220_flush_all(void) 382 { 383 l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); 384 } 385 386 static void l2c220_sync(void) 387 { 388 unsigned long flags; 389 390 raw_spin_lock_irqsave(&l2x0_lock, flags); 391 __l2c220_cache_sync(l2x0_base); 392 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 393 } 394 395 static void l2c220_enable(void __iomem *base, unsigned num_lock) 396 { 397 /* 398 * Always enable non-secure access to the lockdown registers - 399 * we write to them as part of the L2C enable sequence so they 400 * need to be accessible. 401 */ 402 l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN; 403 404 l2c_enable(base, num_lock); 405 } 406 407 static void l2c220_unlock(void __iomem *base, unsigned num_lock) 408 { 409 if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN) 410 l2c_unlock(base, num_lock); 411 } 412 413 static const struct l2c_init_data l2c220_data = { 414 .type = "L2C-220", 415 .way_size_0 = SZ_8K, 416 .num_lock = 1, 417 .enable = l2c220_enable, 418 .save = l2c_save, 419 .configure = l2c_configure, 420 .unlock = l2c220_unlock, 421 .outer_cache = { 422 .inv_range = l2c220_inv_range, 423 .clean_range = l2c220_clean_range, 424 .flush_range = l2c220_flush_range, 425 .flush_all = l2c220_flush_all, 426 .disable = l2c_disable, 427 .sync = l2c220_sync, 428 .resume = l2c_resume, 429 }, 430 }; 431 432 /* 433 * L2C-310 specific code. 434 * 435 * Very similar to L2C-210, the PA, set/way and sync operations are atomic, 436 * and the way operations are all background tasks. However, issuing an 437 * operation while a background operation is in progress results in a 438 * SLVERR response. We can reuse: 439 * 440 * __l2c210_cache_sync (using sync_reg_offset) 441 * l2c210_sync 442 * l2c210_inv_range (if 588369 is not applicable) 443 * l2c210_clean_range 444 * l2c210_flush_range (if 588369 is not applicable) 445 * l2c210_flush_all (if 727915 is not applicable) 446 * 447 * Errata: 448 * 588369: PL310 R0P0->R1P0, fixed R2P0. 449 * Affects: all clean+invalidate operations 450 * clean and invalidate skips the invalidate step, so we need to issue 451 * separate operations. We also require the above debug workaround 452 * enclosing this code fragment on affected parts. On unaffected parts, 453 * we must not use this workaround without the debug register writes 454 * to avoid exposing a problem similar to 727915. 455 * 456 * 727915: PL310 R2P0->R3P0, fixed R3P1. 457 * Affects: clean+invalidate by way 458 * clean and invalidate by way runs in the background, and a store can 459 * hit the line between the clean operation and invalidate operation, 460 * resulting in the store being lost. 461 * 462 * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. 463 * Affects: 8x64-bit (double fill) line fetches 464 * double fill line fetches can fail to cause dirty data to be evicted 465 * from the cache before the new data overwrites the second line. 466 * 467 * 753970: PL310 R3P0, fixed R3P1. 468 * Affects: sync 469 * prevents merging writes after the sync operation, until another L2C 470 * operation is performed (or a number of other conditions.) 471 * 472 * 769419: PL310 R0P0->R3P1, fixed R3P2. 473 * Affects: store buffer 474 * store buffer is not automatically drained. 475 */ 476 static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) 477 { 478 void __iomem *base = l2x0_base; 479 480 if ((start | end) & (CACHE_LINE_SIZE - 1)) { 481 unsigned long flags; 482 483 /* Erratum 588369 for both clean+invalidate operations */ 484 raw_spin_lock_irqsave(&l2x0_lock, flags); 485 l2c_set_debug(base, 0x03); 486 487 if (start & (CACHE_LINE_SIZE - 1)) { 488 start &= ~(CACHE_LINE_SIZE - 1); 489 writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); 490 writel_relaxed(start, base + L2X0_INV_LINE_PA); 491 start += CACHE_LINE_SIZE; 492 } 493 494 if (end & (CACHE_LINE_SIZE - 1)) { 495 end &= ~(CACHE_LINE_SIZE - 1); 496 writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); 497 writel_relaxed(end, base + L2X0_INV_LINE_PA); 498 } 499 500 l2c_set_debug(base, 0x00); 501 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 502 } 503 504 __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); 505 __l2c210_cache_sync(base); 506 } 507 508 static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) 509 { 510 raw_spinlock_t *lock = &l2x0_lock; 511 unsigned long flags; 512 void __iomem *base = l2x0_base; 513 514 raw_spin_lock_irqsave(lock, flags); 515 while (start < end) { 516 unsigned long blk_end = start + min(end - start, 4096UL); 517 518 l2c_set_debug(base, 0x03); 519 while (start < blk_end) { 520 writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); 521 writel_relaxed(start, base + L2X0_INV_LINE_PA); 522 start += CACHE_LINE_SIZE; 523 } 524 l2c_set_debug(base, 0x00); 525 526 if (blk_end < end) { 527 raw_spin_unlock_irqrestore(lock, flags); 528 raw_spin_lock_irqsave(lock, flags); 529 } 530 } 531 raw_spin_unlock_irqrestore(lock, flags); 532 __l2c210_cache_sync(base); 533 } 534 535 static void l2c310_flush_all_erratum(void) 536 { 537 void __iomem *base = l2x0_base; 538 unsigned long flags; 539 540 raw_spin_lock_irqsave(&l2x0_lock, flags); 541 l2c_set_debug(base, 0x03); 542 __l2c_op_way(base + L2X0_CLEAN_INV_WAY); 543 l2c_set_debug(base, 0x00); 544 __l2c210_cache_sync(base); 545 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 546 } 547 548 static void __init l2c310_save(void __iomem *base) 549 { 550 unsigned revision; 551 552 l2c_save(base); 553 554 l2x0_saved_regs.tag_latency = readl_relaxed(base + 555 L310_TAG_LATENCY_CTRL); 556 l2x0_saved_regs.data_latency = readl_relaxed(base + 557 L310_DATA_LATENCY_CTRL); 558 l2x0_saved_regs.filter_end = readl_relaxed(base + 559 L310_ADDR_FILTER_END); 560 l2x0_saved_regs.filter_start = readl_relaxed(base + 561 L310_ADDR_FILTER_START); 562 563 revision = readl_relaxed(base + L2X0_CACHE_ID) & 564 L2X0_CACHE_ID_RTL_MASK; 565 566 /* From r2p0, there is Prefetch offset/control register */ 567 if (revision >= L310_CACHE_ID_RTL_R2P0) 568 l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + 569 L310_PREFETCH_CTRL); 570 571 /* From r3p0, there is Power control register */ 572 if (revision >= L310_CACHE_ID_RTL_R3P0) 573 l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + 574 L310_POWER_CTRL); 575 } 576 577 static void l2c310_configure(void __iomem *base) 578 { 579 unsigned revision; 580 581 l2c_configure(base); 582 583 /* restore pl310 setup */ 584 l2c_write_sec(l2x0_saved_regs.tag_latency, base, 585 L310_TAG_LATENCY_CTRL); 586 l2c_write_sec(l2x0_saved_regs.data_latency, base, 587 L310_DATA_LATENCY_CTRL); 588 l2c_write_sec(l2x0_saved_regs.filter_end, base, 589 L310_ADDR_FILTER_END); 590 l2c_write_sec(l2x0_saved_regs.filter_start, base, 591 L310_ADDR_FILTER_START); 592 593 revision = readl_relaxed(base + L2X0_CACHE_ID) & 594 L2X0_CACHE_ID_RTL_MASK; 595 596 if (revision >= L310_CACHE_ID_RTL_R2P0) 597 l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, 598 L310_PREFETCH_CTRL); 599 if (revision >= L310_CACHE_ID_RTL_R3P0) 600 l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, 601 L310_POWER_CTRL); 602 } 603 604 static int l2c310_starting_cpu(unsigned int cpu) 605 { 606 set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); 607 return 0; 608 } 609 610 static int l2c310_dying_cpu(unsigned int cpu) 611 { 612 set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); 613 return 0; 614 } 615 616 static void __init l2c310_enable(void __iomem *base, unsigned num_lock) 617 { 618 unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; 619 bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; 620 u32 aux = l2x0_saved_regs.aux_ctrl; 621 622 if (rev >= L310_CACHE_ID_RTL_R2P0) { 623 if (cortex_a9) { 624 aux |= L310_AUX_CTRL_EARLY_BRESP; 625 pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); 626 } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { 627 pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); 628 aux &= ~L310_AUX_CTRL_EARLY_BRESP; 629 } 630 } 631 632 if (cortex_a9) { 633 u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); 634 u32 acr = get_auxcr(); 635 636 pr_debug("Cortex-A9 ACR=0x%08x\n", acr); 637 638 if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) 639 pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); 640 641 if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) 642 pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); 643 644 if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { 645 aux |= L310_AUX_CTRL_FULL_LINE_ZERO; 646 pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); 647 } 648 } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { 649 pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); 650 aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); 651 } 652 653 /* 654 * Always enable non-secure access to the lockdown registers - 655 * we write to them as part of the L2C enable sequence so they 656 * need to be accessible. 657 */ 658 l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN; 659 660 l2c_enable(base, num_lock); 661 662 /* Read back resulting AUX_CTRL value as it could have been altered. */ 663 aux = readl_relaxed(base + L2X0_AUX_CTRL); 664 665 if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { 666 u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); 667 668 pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", 669 aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", 670 aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", 671 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); 672 } 673 674 /* r3p0 or later has power control register */ 675 if (rev >= L310_CACHE_ID_RTL_R3P0) { 676 u32 power_ctrl; 677 678 power_ctrl = readl_relaxed(base + L310_POWER_CTRL); 679 pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", 680 power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", 681 power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); 682 } 683 684 if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) 685 cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING, 686 "arm/l2x0:starting", l2c310_starting_cpu, 687 l2c310_dying_cpu); 688 } 689 690 static void __init l2c310_fixup(void __iomem *base, u32 cache_id, 691 struct outer_cache_fns *fns) 692 { 693 unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; 694 const char *errata[8]; 695 unsigned n = 0; 696 697 if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && 698 revision < L310_CACHE_ID_RTL_R2P0 && 699 /* For bcm compatibility */ 700 fns->inv_range == l2c210_inv_range) { 701 fns->inv_range = l2c310_inv_range_erratum; 702 fns->flush_range = l2c310_flush_range_erratum; 703 errata[n++] = "588369"; 704 } 705 706 if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && 707 revision >= L310_CACHE_ID_RTL_R2P0 && 708 revision < L310_CACHE_ID_RTL_R3P1) { 709 fns->flush_all = l2c310_flush_all_erratum; 710 errata[n++] = "727915"; 711 } 712 713 if (revision >= L310_CACHE_ID_RTL_R3P0 && 714 revision < L310_CACHE_ID_RTL_R3P2) { 715 u32 val = l2x0_saved_regs.prefetch_ctrl; 716 if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) { 717 val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; 718 l2x0_saved_regs.prefetch_ctrl = val; 719 errata[n++] = "752271"; 720 } 721 } 722 723 if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && 724 revision == L310_CACHE_ID_RTL_R3P0) { 725 sync_reg_offset = L2X0_DUMMY_REG; 726 errata[n++] = "753970"; 727 } 728 729 if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) 730 errata[n++] = "769419"; 731 732 if (n) { 733 unsigned i; 734 735 pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); 736 for (i = 0; i < n; i++) 737 pr_cont(" %s", errata[i]); 738 pr_cont(" enabled\n"); 739 } 740 } 741 742 static void l2c310_disable(void) 743 { 744 /* 745 * If full-line-of-zeros is enabled, we must first disable it in the 746 * Cortex-A9 auxiliary control register before disabling the L2 cache. 747 */ 748 if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) 749 set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); 750 751 l2c_disable(); 752 } 753 754 static void l2c310_resume(void) 755 { 756 l2c_resume(); 757 758 /* Re-enable full-line-of-zeros for Cortex-A9 */ 759 if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) 760 set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); 761 } 762 763 static void l2c310_unlock(void __iomem *base, unsigned num_lock) 764 { 765 if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN) 766 l2c_unlock(base, num_lock); 767 } 768 769 static const struct l2c_init_data l2c310_init_fns __initconst = { 770 .type = "L2C-310", 771 .way_size_0 = SZ_8K, 772 .num_lock = 8, 773 .enable = l2c310_enable, 774 .fixup = l2c310_fixup, 775 .save = l2c310_save, 776 .configure = l2c310_configure, 777 .unlock = l2c310_unlock, 778 .outer_cache = { 779 .inv_range = l2c210_inv_range, 780 .clean_range = l2c210_clean_range, 781 .flush_range = l2c210_flush_range, 782 .flush_all = l2c210_flush_all, 783 .disable = l2c310_disable, 784 .sync = l2c210_sync, 785 .resume = l2c310_resume, 786 }, 787 }; 788 789 static int __init __l2c_init(const struct l2c_init_data *data, 790 u32 aux_val, u32 aux_mask, u32 cache_id, bool nosync) 791 { 792 struct outer_cache_fns fns; 793 unsigned way_size_bits, ways; 794 u32 aux, old_aux; 795 796 /* 797 * Save the pointer globally so that callbacks which do not receive 798 * context from callers can access the structure. 799 */ 800 l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL); 801 if (!l2x0_data) 802 return -ENOMEM; 803 804 /* 805 * Sanity check the aux values. aux_mask is the bits we preserve 806 * from reading the hardware register, and aux_val is the bits we 807 * set. 808 */ 809 if (aux_val & aux_mask) 810 pr_alert("L2C: platform provided aux values permit register corruption.\n"); 811 812 old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); 813 aux &= aux_mask; 814 aux |= aux_val; 815 816 if (old_aux != aux) 817 pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", 818 old_aux, aux); 819 820 /* Determine the number of ways */ 821 switch (cache_id & L2X0_CACHE_ID_PART_MASK) { 822 case L2X0_CACHE_ID_PART_L310: 823 if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) 824 pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); 825 if (aux & (1 << 16)) 826 ways = 16; 827 else 828 ways = 8; 829 break; 830 831 case L2X0_CACHE_ID_PART_L210: 832 case L2X0_CACHE_ID_PART_L220: 833 ways = (aux >> 13) & 0xf; 834 break; 835 836 case AURORA_CACHE_ID: 837 ways = (aux >> 13) & 0xf; 838 ways = 2 << ((ways + 1) >> 2); 839 break; 840 841 default: 842 /* Assume unknown chips have 8 ways */ 843 ways = 8; 844 break; 845 } 846 847 l2x0_way_mask = (1 << ways) - 1; 848 849 /* 850 * way_size_0 is the size that a way_size value of zero would be 851 * given the calculation: way_size = way_size_0 << way_size_bits. 852 * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, 853 * then way_size_0 would be 8k. 854 * 855 * L2 cache size = number of ways * way size. 856 */ 857 way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> 858 L2C_AUX_CTRL_WAY_SIZE_SHIFT; 859 l2x0_size = ways * (data->way_size_0 << way_size_bits); 860 861 fns = data->outer_cache; 862 fns.write_sec = outer_cache.write_sec; 863 fns.configure = outer_cache.configure; 864 if (data->fixup) 865 data->fixup(l2x0_base, cache_id, &fns); 866 if (nosync) { 867 pr_info("L2C: disabling outer sync\n"); 868 fns.sync = NULL; 869 } 870 871 /* 872 * Check if l2x0 controller is already enabled. If we are booting 873 * in non-secure mode accessing the below registers will fault. 874 */ 875 if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { 876 l2x0_saved_regs.aux_ctrl = aux; 877 878 data->enable(l2x0_base, data->num_lock); 879 } 880 881 outer_cache = fns; 882 883 /* 884 * It is strange to save the register state before initialisation, 885 * but hey, this is what the DT implementations decided to do. 886 */ 887 if (data->save) 888 data->save(l2x0_base); 889 890 /* Re-read it in case some bits are reserved. */ 891 aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); 892 893 pr_info("%s cache controller enabled, %d ways, %d kB\n", 894 data->type, ways, l2x0_size >> 10); 895 pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", 896 data->type, cache_id, aux); 897 898 l2x0_pmu_register(l2x0_base, cache_id); 899 900 return 0; 901 } 902 903 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) 904 { 905 const struct l2c_init_data *data; 906 u32 cache_id; 907 908 l2x0_base = base; 909 910 cache_id = readl_relaxed(base + L2X0_CACHE_ID); 911 912 switch (cache_id & L2X0_CACHE_ID_PART_MASK) { 913 default: 914 case L2X0_CACHE_ID_PART_L210: 915 data = &l2c210_data; 916 break; 917 918 case L2X0_CACHE_ID_PART_L220: 919 data = &l2c220_data; 920 break; 921 922 case L2X0_CACHE_ID_PART_L310: 923 data = &l2c310_init_fns; 924 break; 925 } 926 927 /* Read back current (default) hardware configuration */ 928 if (data->save) 929 data->save(l2x0_base); 930 931 __l2c_init(data, aux_val, aux_mask, cache_id, false); 932 } 933 934 #ifdef CONFIG_OF 935 static int l2_wt_override; 936 937 /* Aurora don't have the cache ID register available, so we have to 938 * pass it though the device tree */ 939 static u32 cache_id_part_number_from_dt; 940 941 /** 942 * l2x0_cache_size_of_parse() - read cache size parameters from DT 943 * @np: the device tree node for the l2 cache 944 * @aux_val: pointer to machine-supplied auxilary register value, to 945 * be augmented by the call (bits to be set to 1) 946 * @aux_mask: pointer to machine-supplied auxilary register mask, to 947 * be augmented by the call (bits to be set to 0) 948 * @associativity: variable to return the calculated associativity in 949 * @max_way_size: the maximum size in bytes for the cache ways 950 */ 951 static int __init l2x0_cache_size_of_parse(const struct device_node *np, 952 u32 *aux_val, u32 *aux_mask, 953 u32 *associativity, 954 u32 max_way_size) 955 { 956 u32 mask = 0, val = 0; 957 u32 cache_size = 0, sets = 0; 958 u32 way_size_bits = 1; 959 u32 way_size = 0; 960 u32 block_size = 0; 961 u32 line_size = 0; 962 963 of_property_read_u32(np, "cache-size", &cache_size); 964 of_property_read_u32(np, "cache-sets", &sets); 965 of_property_read_u32(np, "cache-block-size", &block_size); 966 of_property_read_u32(np, "cache-line-size", &line_size); 967 968 if (!cache_size || !sets) 969 return -ENODEV; 970 971 /* All these l2 caches have the same line = block size actually */ 972 if (!line_size) { 973 if (block_size) { 974 /* If linesize is not given, it is equal to blocksize */ 975 line_size = block_size; 976 } else { 977 /* Fall back to known size */ 978 pr_warn("L2C OF: no cache block/line size given: " 979 "falling back to default size %d bytes\n", 980 CACHE_LINE_SIZE); 981 line_size = CACHE_LINE_SIZE; 982 } 983 } 984 985 if (line_size != CACHE_LINE_SIZE) 986 pr_warn("L2C OF: DT supplied line size %d bytes does " 987 "not match hardware line size of %d bytes\n", 988 line_size, 989 CACHE_LINE_SIZE); 990 991 /* 992 * Since: 993 * set size = cache size / sets 994 * ways = cache size / (sets * line size) 995 * way size = cache size / (cache size / (sets * line size)) 996 * way size = sets * line size 997 * associativity = ways = cache size / way size 998 */ 999 way_size = sets * line_size; 1000 *associativity = cache_size / way_size; 1001 1002 if (way_size > max_way_size) { 1003 pr_err("L2C OF: set size %dKB is too large\n", way_size); 1004 return -EINVAL; 1005 } 1006 1007 pr_info("L2C OF: override cache size: %d bytes (%dKB)\n", 1008 cache_size, cache_size >> 10); 1009 pr_info("L2C OF: override line size: %d bytes\n", line_size); 1010 pr_info("L2C OF: override way size: %d bytes (%dKB)\n", 1011 way_size, way_size >> 10); 1012 pr_info("L2C OF: override associativity: %d\n", *associativity); 1013 1014 /* 1015 * Calculates the bits 17:19 to set for way size: 1016 * 512KB -> 6, 256KB -> 5, ... 16KB -> 1 1017 */ 1018 way_size_bits = ilog2(way_size >> 10) - 3; 1019 if (way_size_bits < 1 || way_size_bits > 6) { 1020 pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n", 1021 way_size); 1022 return -EINVAL; 1023 } 1024 1025 mask |= L2C_AUX_CTRL_WAY_SIZE_MASK; 1026 val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT); 1027 1028 *aux_val &= ~mask; 1029 *aux_val |= val; 1030 *aux_mask &= ~mask; 1031 1032 return 0; 1033 } 1034 1035 static void __init l2x0_of_parse(const struct device_node *np, 1036 u32 *aux_val, u32 *aux_mask) 1037 { 1038 u32 data[2] = { 0, 0 }; 1039 u32 tag = 0; 1040 u32 dirty = 0; 1041 u32 val = 0, mask = 0; 1042 u32 assoc; 1043 int ret; 1044 1045 of_property_read_u32(np, "arm,tag-latency", &tag); 1046 if (tag) { 1047 mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; 1048 val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; 1049 } 1050 1051 of_property_read_u32_array(np, "arm,data-latency", 1052 data, ARRAY_SIZE(data)); 1053 if (data[0] && data[1]) { 1054 mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | 1055 L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; 1056 val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | 1057 ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); 1058 } 1059 1060 of_property_read_u32(np, "arm,dirty-latency", &dirty); 1061 if (dirty) { 1062 mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; 1063 val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; 1064 } 1065 1066 if (of_property_read_bool(np, "arm,parity-enable")) { 1067 mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; 1068 val |= L2C_AUX_CTRL_PARITY_ENABLE; 1069 } else if (of_property_read_bool(np, "arm,parity-disable")) { 1070 mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; 1071 } 1072 1073 if (of_property_read_bool(np, "arm,shared-override")) { 1074 mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; 1075 val |= L2C_AUX_CTRL_SHARED_OVERRIDE; 1076 } 1077 1078 ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K); 1079 if (ret) 1080 return; 1081 1082 if (assoc > 8) { 1083 pr_err("l2x0 of: cache setting yield too high associativity\n"); 1084 pr_err("l2x0 of: %d calculated, max 8\n", assoc); 1085 } else { 1086 mask |= L2X0_AUX_CTRL_ASSOC_MASK; 1087 val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT); 1088 } 1089 1090 *aux_val &= ~mask; 1091 *aux_val |= val; 1092 *aux_mask &= ~mask; 1093 } 1094 1095 static const struct l2c_init_data of_l2c210_data __initconst = { 1096 .type = "L2C-210", 1097 .way_size_0 = SZ_8K, 1098 .num_lock = 1, 1099 .of_parse = l2x0_of_parse, 1100 .enable = l2c_enable, 1101 .save = l2c_save, 1102 .configure = l2c_configure, 1103 .unlock = l2c_unlock, 1104 .outer_cache = { 1105 .inv_range = l2c210_inv_range, 1106 .clean_range = l2c210_clean_range, 1107 .flush_range = l2c210_flush_range, 1108 .flush_all = l2c210_flush_all, 1109 .disable = l2c_disable, 1110 .sync = l2c210_sync, 1111 .resume = l2c_resume, 1112 }, 1113 }; 1114 1115 static const struct l2c_init_data of_l2c220_data __initconst = { 1116 .type = "L2C-220", 1117 .way_size_0 = SZ_8K, 1118 .num_lock = 1, 1119 .of_parse = l2x0_of_parse, 1120 .enable = l2c220_enable, 1121 .save = l2c_save, 1122 .configure = l2c_configure, 1123 .unlock = l2c220_unlock, 1124 .outer_cache = { 1125 .inv_range = l2c220_inv_range, 1126 .clean_range = l2c220_clean_range, 1127 .flush_range = l2c220_flush_range, 1128 .flush_all = l2c220_flush_all, 1129 .disable = l2c_disable, 1130 .sync = l2c220_sync, 1131 .resume = l2c_resume, 1132 }, 1133 }; 1134 1135 static void __init l2c310_of_parse(const struct device_node *np, 1136 u32 *aux_val, u32 *aux_mask) 1137 { 1138 u32 data[3] = { 0, 0, 0 }; 1139 u32 tag[3] = { 0, 0, 0 }; 1140 u32 filter[2] = { 0, 0 }; 1141 u32 assoc; 1142 u32 prefetch; 1143 u32 power; 1144 u32 val; 1145 int ret; 1146 1147 of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); 1148 if (tag[0] && tag[1] && tag[2]) 1149 l2x0_saved_regs.tag_latency = 1150 L310_LATENCY_CTRL_RD(tag[0] - 1) | 1151 L310_LATENCY_CTRL_WR(tag[1] - 1) | 1152 L310_LATENCY_CTRL_SETUP(tag[2] - 1); 1153 1154 of_property_read_u32_array(np, "arm,data-latency", 1155 data, ARRAY_SIZE(data)); 1156 if (data[0] && data[1] && data[2]) 1157 l2x0_saved_regs.data_latency = 1158 L310_LATENCY_CTRL_RD(data[0] - 1) | 1159 L310_LATENCY_CTRL_WR(data[1] - 1) | 1160 L310_LATENCY_CTRL_SETUP(data[2] - 1); 1161 1162 of_property_read_u32_array(np, "arm,filter-ranges", 1163 filter, ARRAY_SIZE(filter)); 1164 if (filter[1]) { 1165 l2x0_saved_regs.filter_end = 1166 ALIGN(filter[0] + filter[1], SZ_1M); 1167 l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1)) 1168 | L310_ADDR_FILTER_EN; 1169 } 1170 1171 ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); 1172 if (!ret) { 1173 switch (assoc) { 1174 case 16: 1175 *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; 1176 *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; 1177 *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; 1178 break; 1179 case 8: 1180 *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; 1181 *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; 1182 break; 1183 default: 1184 pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", 1185 assoc); 1186 break; 1187 } 1188 } 1189 1190 if (of_property_read_bool(np, "arm,shared-override")) { 1191 *aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE; 1192 *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; 1193 } 1194 1195 if (of_property_read_bool(np, "arm,parity-enable")) { 1196 *aux_val |= L2C_AUX_CTRL_PARITY_ENABLE; 1197 *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; 1198 } else if (of_property_read_bool(np, "arm,parity-disable")) { 1199 *aux_val &= ~L2C_AUX_CTRL_PARITY_ENABLE; 1200 *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; 1201 } 1202 1203 prefetch = l2x0_saved_regs.prefetch_ctrl; 1204 1205 ret = of_property_read_u32(np, "arm,double-linefill", &val); 1206 if (ret == 0) { 1207 if (val) 1208 prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL; 1209 else 1210 prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; 1211 } else if (ret != -EINVAL) { 1212 pr_err("L2C-310 OF arm,double-linefill property value is missing\n"); 1213 } 1214 1215 ret = of_property_read_u32(np, "arm,double-linefill-incr", &val); 1216 if (ret == 0) { 1217 if (val) 1218 prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; 1219 else 1220 prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; 1221 } else if (ret != -EINVAL) { 1222 pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n"); 1223 } 1224 1225 ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val); 1226 if (ret == 0) { 1227 if (!val) 1228 prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; 1229 else 1230 prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; 1231 } else if (ret != -EINVAL) { 1232 pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n"); 1233 } 1234 1235 ret = of_property_read_u32(np, "arm,prefetch-drop", &val); 1236 if (ret == 0) { 1237 if (val) 1238 prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP; 1239 else 1240 prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP; 1241 } else if (ret != -EINVAL) { 1242 pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n"); 1243 } 1244 1245 ret = of_property_read_u32(np, "arm,prefetch-offset", &val); 1246 if (ret == 0) { 1247 prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK; 1248 prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK; 1249 } else if (ret != -EINVAL) { 1250 pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); 1251 } 1252 1253 ret = of_property_read_u32(np, "prefetch-data", &val); 1254 if (ret == 0) { 1255 if (val) 1256 prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH; 1257 else 1258 prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; 1259 } else if (ret != -EINVAL) { 1260 pr_err("L2C-310 OF prefetch-data property value is missing\n"); 1261 } 1262 1263 ret = of_property_read_u32(np, "prefetch-instr", &val); 1264 if (ret == 0) { 1265 if (val) 1266 prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH; 1267 else 1268 prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; 1269 } else if (ret != -EINVAL) { 1270 pr_err("L2C-310 OF prefetch-instr property value is missing\n"); 1271 } 1272 1273 l2x0_saved_regs.prefetch_ctrl = prefetch; 1274 1275 power = l2x0_saved_regs.pwr_ctrl | 1276 L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN; 1277 1278 ret = of_property_read_u32(np, "arm,dynamic-clock-gating", &val); 1279 if (!ret) { 1280 if (!val) 1281 power &= ~L310_DYNAMIC_CLK_GATING_EN; 1282 } else if (ret != -EINVAL) { 1283 pr_err("L2C-310 OF dynamic-clock-gating property value is missing or invalid\n"); 1284 } 1285 ret = of_property_read_u32(np, "arm,standby-mode", &val); 1286 if (!ret) { 1287 if (!val) 1288 power &= ~L310_STNDBY_MODE_EN; 1289 } else if (ret != -EINVAL) { 1290 pr_err("L2C-310 OF standby-mode property value is missing or invalid\n"); 1291 } 1292 1293 l2x0_saved_regs.pwr_ctrl = power; 1294 } 1295 1296 static const struct l2c_init_data of_l2c310_data __initconst = { 1297 .type = "L2C-310", 1298 .way_size_0 = SZ_8K, 1299 .num_lock = 8, 1300 .of_parse = l2c310_of_parse, 1301 .enable = l2c310_enable, 1302 .fixup = l2c310_fixup, 1303 .save = l2c310_save, 1304 .configure = l2c310_configure, 1305 .unlock = l2c310_unlock, 1306 .outer_cache = { 1307 .inv_range = l2c210_inv_range, 1308 .clean_range = l2c210_clean_range, 1309 .flush_range = l2c210_flush_range, 1310 .flush_all = l2c210_flush_all, 1311 .disable = l2c310_disable, 1312 .sync = l2c210_sync, 1313 .resume = l2c310_resume, 1314 }, 1315 }; 1316 1317 /* 1318 * This is a variant of the of_l2c310_data with .sync set to 1319 * NULL. Outer sync operations are not needed when the system is I/O 1320 * coherent, and potentially harmful in certain situations (PCIe/PL310 1321 * deadlock on Armada 375/38x due to hardware I/O coherency). The 1322 * other operations are kept because they are infrequent (therefore do 1323 * not cause the deadlock in practice) and needed for secondary CPU 1324 * boot and other power management activities. 1325 */ 1326 static const struct l2c_init_data of_l2c310_coherent_data __initconst = { 1327 .type = "L2C-310 Coherent", 1328 .way_size_0 = SZ_8K, 1329 .num_lock = 8, 1330 .of_parse = l2c310_of_parse, 1331 .enable = l2c310_enable, 1332 .fixup = l2c310_fixup, 1333 .save = l2c310_save, 1334 .configure = l2c310_configure, 1335 .unlock = l2c310_unlock, 1336 .outer_cache = { 1337 .inv_range = l2c210_inv_range, 1338 .clean_range = l2c210_clean_range, 1339 .flush_range = l2c210_flush_range, 1340 .flush_all = l2c210_flush_all, 1341 .disable = l2c310_disable, 1342 .resume = l2c310_resume, 1343 }, 1344 }; 1345 1346 /* 1347 * Note that the end addresses passed to Linux primitives are 1348 * noninclusive, while the hardware cache range operations use 1349 * inclusive start and end addresses. 1350 */ 1351 static unsigned long aurora_range_end(unsigned long start, unsigned long end) 1352 { 1353 /* 1354 * Limit the number of cache lines processed at once, 1355 * since cache range operations stall the CPU pipeline 1356 * until completion. 1357 */ 1358 if (end > start + MAX_RANGE_SIZE) 1359 end = start + MAX_RANGE_SIZE; 1360 1361 /* 1362 * Cache range operations can't straddle a page boundary. 1363 */ 1364 if (end > PAGE_ALIGN(start+1)) 1365 end = PAGE_ALIGN(start+1); 1366 1367 return end; 1368 } 1369 1370 static void aurora_pa_range(unsigned long start, unsigned long end, 1371 unsigned long offset) 1372 { 1373 void __iomem *base = l2x0_base; 1374 unsigned long range_end; 1375 unsigned long flags; 1376 1377 /* 1378 * round start and end adresses up to cache line size 1379 */ 1380 start &= ~(CACHE_LINE_SIZE - 1); 1381 end = ALIGN(end, CACHE_LINE_SIZE); 1382 1383 /* 1384 * perform operation on all full cache lines between 'start' and 'end' 1385 */ 1386 while (start < end) { 1387 range_end = aurora_range_end(start, end); 1388 1389 raw_spin_lock_irqsave(&l2x0_lock, flags); 1390 writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); 1391 writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); 1392 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 1393 1394 writel_relaxed(0, base + AURORA_SYNC_REG); 1395 start = range_end; 1396 } 1397 } 1398 static void aurora_inv_range(unsigned long start, unsigned long end) 1399 { 1400 aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); 1401 } 1402 1403 static void aurora_clean_range(unsigned long start, unsigned long end) 1404 { 1405 /* 1406 * If L2 is forced to WT, the L2 will always be clean and we 1407 * don't need to do anything here. 1408 */ 1409 if (!l2_wt_override) 1410 aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); 1411 } 1412 1413 static void aurora_flush_range(unsigned long start, unsigned long end) 1414 { 1415 if (l2_wt_override) 1416 aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); 1417 else 1418 aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); 1419 } 1420 1421 static void aurora_flush_all(void) 1422 { 1423 void __iomem *base = l2x0_base; 1424 unsigned long flags; 1425 1426 /* clean all ways */ 1427 raw_spin_lock_irqsave(&l2x0_lock, flags); 1428 __l2c_op_way(base + L2X0_CLEAN_INV_WAY); 1429 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 1430 1431 writel_relaxed(0, base + AURORA_SYNC_REG); 1432 } 1433 1434 static void aurora_cache_sync(void) 1435 { 1436 writel_relaxed(0, l2x0_base + AURORA_SYNC_REG); 1437 } 1438 1439 static void aurora_disable(void) 1440 { 1441 void __iomem *base = l2x0_base; 1442 unsigned long flags; 1443 1444 raw_spin_lock_irqsave(&l2x0_lock, flags); 1445 __l2c_op_way(base + L2X0_CLEAN_INV_WAY); 1446 writel_relaxed(0, base + AURORA_SYNC_REG); 1447 l2c_write_sec(0, base, L2X0_CTRL); 1448 dsb(st); 1449 raw_spin_unlock_irqrestore(&l2x0_lock, flags); 1450 } 1451 1452 static void aurora_save(void __iomem *base) 1453 { 1454 l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); 1455 l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); 1456 } 1457 1458 /* 1459 * For Aurora cache in no outer mode, enable via the CP15 coprocessor 1460 * broadcasting of cache commands to L2. 1461 */ 1462 static void __init aurora_enable_no_outer(void __iomem *base, 1463 unsigned num_lock) 1464 { 1465 u32 u; 1466 1467 asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); 1468 u |= AURORA_CTRL_FW; /* Set the FW bit */ 1469 asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); 1470 1471 isb(); 1472 1473 l2c_enable(base, num_lock); 1474 } 1475 1476 static void __init aurora_fixup(void __iomem *base, u32 cache_id, 1477 struct outer_cache_fns *fns) 1478 { 1479 sync_reg_offset = AURORA_SYNC_REG; 1480 } 1481 1482 static void __init aurora_of_parse(const struct device_node *np, 1483 u32 *aux_val, u32 *aux_mask) 1484 { 1485 u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; 1486 u32 mask = AURORA_ACR_REPLACEMENT_MASK; 1487 1488 of_property_read_u32(np, "cache-id-part", 1489 &cache_id_part_number_from_dt); 1490 1491 /* Determine and save the write policy */ 1492 l2_wt_override = of_property_read_bool(np, "wt-override"); 1493 1494 if (l2_wt_override) { 1495 val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; 1496 mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; 1497 } 1498 1499 *aux_val &= ~mask; 1500 *aux_val |= val; 1501 *aux_mask &= ~mask; 1502 } 1503 1504 static const struct l2c_init_data of_aurora_with_outer_data __initconst = { 1505 .type = "Aurora", 1506 .way_size_0 = SZ_4K, 1507 .num_lock = 4, 1508 .of_parse = aurora_of_parse, 1509 .enable = l2c_enable, 1510 .fixup = aurora_fixup, 1511 .save = aurora_save, 1512 .configure = l2c_configure, 1513 .unlock = l2c_unlock, 1514 .outer_cache = { 1515 .inv_range = aurora_inv_range, 1516 .clean_range = aurora_clean_range, 1517 .flush_range = aurora_flush_range, 1518 .flush_all = aurora_flush_all, 1519 .disable = aurora_disable, 1520 .sync = aurora_cache_sync, 1521 .resume = l2c_resume, 1522 }, 1523 }; 1524 1525 static const struct l2c_init_data of_aurora_no_outer_data __initconst = { 1526 .type = "Aurora", 1527 .way_size_0 = SZ_4K, 1528 .num_lock = 4, 1529 .of_parse = aurora_of_parse, 1530 .enable = aurora_enable_no_outer, 1531 .fixup = aurora_fixup, 1532 .save = aurora_save, 1533 .configure = l2c_configure, 1534 .unlock = l2c_unlock, 1535 .outer_cache = { 1536 .resume = l2c_resume, 1537 }, 1538 }; 1539 1540 /* 1541 * For certain Broadcom SoCs, depending on the address range, different offsets 1542 * need to be added to the address before passing it to L2 for 1543 * invalidation/clean/flush 1544 * 1545 * Section Address Range Offset EMI 1546 * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC 1547 * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS 1548 * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC 1549 * 1550 * When the start and end addresses have crossed two different sections, we 1551 * need to break the L2 operation into two, each within its own section. 1552 * For example, if we need to invalidate addresses starts at 0xBFFF0000 and 1553 * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) 1554 * 0xC0000000 - 0xC0001000 1555 * 1556 * Note 1: 1557 * By breaking a single L2 operation into two, we may potentially suffer some 1558 * performance hit, but keep in mind the cross section case is very rare 1559 * 1560 * Note 2: 1561 * We do not need to handle the case when the start address is in 1562 * Section 1 and the end address is in Section 3, since it is not a valid use 1563 * case 1564 * 1565 * Note 3: 1566 * Section 1 in practical terms can no longer be used on rev A2. Because of 1567 * that the code does not need to handle section 1 at all. 1568 * 1569 */ 1570 #define BCM_SYS_EMI_START_ADDR 0x40000000UL 1571 #define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL 1572 1573 #define BCM_SYS_EMI_OFFSET 0x40000000UL 1574 #define BCM_VC_EMI_OFFSET 0x80000000UL 1575 1576 static inline int bcm_addr_is_sys_emi(unsigned long addr) 1577 { 1578 return (addr >= BCM_SYS_EMI_START_ADDR) && 1579 (addr < BCM_VC_EMI_SEC3_START_ADDR); 1580 } 1581 1582 static inline unsigned long bcm_l2_phys_addr(unsigned long addr) 1583 { 1584 if (bcm_addr_is_sys_emi(addr)) 1585 return addr + BCM_SYS_EMI_OFFSET; 1586 else 1587 return addr + BCM_VC_EMI_OFFSET; 1588 } 1589 1590 static void bcm_inv_range(unsigned long start, unsigned long end) 1591 { 1592 unsigned long new_start, new_end; 1593 1594 BUG_ON(start < BCM_SYS_EMI_START_ADDR); 1595 1596 if (unlikely(end <= start)) 1597 return; 1598 1599 new_start = bcm_l2_phys_addr(start); 1600 new_end = bcm_l2_phys_addr(end); 1601 1602 /* normal case, no cross section between start and end */ 1603 if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { 1604 l2c210_inv_range(new_start, new_end); 1605 return; 1606 } 1607 1608 /* They cross sections, so it can only be a cross from section 1609 * 2 to section 3 1610 */ 1611 l2c210_inv_range(new_start, 1612 bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); 1613 l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), 1614 new_end); 1615 } 1616 1617 static void bcm_clean_range(unsigned long start, unsigned long end) 1618 { 1619 unsigned long new_start, new_end; 1620 1621 BUG_ON(start < BCM_SYS_EMI_START_ADDR); 1622 1623 if (unlikely(end <= start)) 1624 return; 1625 1626 new_start = bcm_l2_phys_addr(start); 1627 new_end = bcm_l2_phys_addr(end); 1628 1629 /* normal case, no cross section between start and end */ 1630 if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { 1631 l2c210_clean_range(new_start, new_end); 1632 return; 1633 } 1634 1635 /* They cross sections, so it can only be a cross from section 1636 * 2 to section 3 1637 */ 1638 l2c210_clean_range(new_start, 1639 bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); 1640 l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), 1641 new_end); 1642 } 1643 1644 static void bcm_flush_range(unsigned long start, unsigned long end) 1645 { 1646 unsigned long new_start, new_end; 1647 1648 BUG_ON(start < BCM_SYS_EMI_START_ADDR); 1649 1650 if (unlikely(end <= start)) 1651 return; 1652 1653 if ((end - start) >= l2x0_size) { 1654 outer_cache.flush_all(); 1655 return; 1656 } 1657 1658 new_start = bcm_l2_phys_addr(start); 1659 new_end = bcm_l2_phys_addr(end); 1660 1661 /* normal case, no cross section between start and end */ 1662 if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { 1663 l2c210_flush_range(new_start, new_end); 1664 return; 1665 } 1666 1667 /* They cross sections, so it can only be a cross from section 1668 * 2 to section 3 1669 */ 1670 l2c210_flush_range(new_start, 1671 bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); 1672 l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), 1673 new_end); 1674 } 1675 1676 /* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ 1677 static const struct l2c_init_data of_bcm_l2x0_data __initconst = { 1678 .type = "BCM-L2C-310", 1679 .way_size_0 = SZ_8K, 1680 .num_lock = 8, 1681 .of_parse = l2c310_of_parse, 1682 .enable = l2c310_enable, 1683 .save = l2c310_save, 1684 .configure = l2c310_configure, 1685 .unlock = l2c310_unlock, 1686 .outer_cache = { 1687 .inv_range = bcm_inv_range, 1688 .clean_range = bcm_clean_range, 1689 .flush_range = bcm_flush_range, 1690 .flush_all = l2c210_flush_all, 1691 .disable = l2c310_disable, 1692 .sync = l2c210_sync, 1693 .resume = l2c310_resume, 1694 }, 1695 }; 1696 1697 static void __init tauros3_save(void __iomem *base) 1698 { 1699 l2c_save(base); 1700 1701 l2x0_saved_regs.aux2_ctrl = 1702 readl_relaxed(base + TAUROS3_AUX2_CTRL); 1703 l2x0_saved_regs.prefetch_ctrl = 1704 readl_relaxed(base + L310_PREFETCH_CTRL); 1705 } 1706 1707 static void tauros3_configure(void __iomem *base) 1708 { 1709 l2c_configure(base); 1710 writel_relaxed(l2x0_saved_regs.aux2_ctrl, 1711 base + TAUROS3_AUX2_CTRL); 1712 writel_relaxed(l2x0_saved_regs.prefetch_ctrl, 1713 base + L310_PREFETCH_CTRL); 1714 } 1715 1716 static const struct l2c_init_data of_tauros3_data __initconst = { 1717 .type = "Tauros3", 1718 .way_size_0 = SZ_8K, 1719 .num_lock = 8, 1720 .enable = l2c_enable, 1721 .save = tauros3_save, 1722 .configure = tauros3_configure, 1723 .unlock = l2c_unlock, 1724 /* Tauros3 broadcasts L1 cache operations to L2 */ 1725 .outer_cache = { 1726 .resume = l2c_resume, 1727 }, 1728 }; 1729 1730 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns } 1731 static const struct of_device_id l2x0_ids[] __initconst = { 1732 L2C_ID("arm,l210-cache", of_l2c210_data), 1733 L2C_ID("arm,l220-cache", of_l2c220_data), 1734 L2C_ID("arm,pl310-cache", of_l2c310_data), 1735 L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), 1736 L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), 1737 L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), 1738 L2C_ID("marvell,tauros3-cache", of_tauros3_data), 1739 /* Deprecated IDs */ 1740 L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), 1741 {} 1742 }; 1743 1744 int __init l2x0_of_init(u32 aux_val, u32 aux_mask) 1745 { 1746 const struct l2c_init_data *data; 1747 struct device_node *np; 1748 struct resource res; 1749 u32 cache_id, old_aux; 1750 u32 cache_level = 2; 1751 bool nosync = false; 1752 1753 np = of_find_matching_node(NULL, l2x0_ids); 1754 if (!np) 1755 return -ENODEV; 1756 1757 if (of_address_to_resource(np, 0, &res)) 1758 return -ENODEV; 1759 1760 l2x0_base = ioremap(res.start, resource_size(&res)); 1761 if (!l2x0_base) 1762 return -ENOMEM; 1763 1764 l2x0_saved_regs.phy_base = res.start; 1765 1766 data = of_match_node(l2x0_ids, np)->data; 1767 1768 if (of_device_is_compatible(np, "arm,pl310-cache") && 1769 of_property_read_bool(np, "arm,io-coherent")) 1770 data = &of_l2c310_coherent_data; 1771 1772 old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); 1773 if (old_aux != ((old_aux & aux_mask) | aux_val)) { 1774 pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", 1775 old_aux, (old_aux & aux_mask) | aux_val); 1776 } else if (aux_mask != ~0U && aux_val != 0) { 1777 pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n"); 1778 } 1779 1780 /* All L2 caches are unified, so this property should be specified */ 1781 if (!of_property_read_bool(np, "cache-unified")) 1782 pr_err("L2C: device tree omits to specify unified cache\n"); 1783 1784 if (of_property_read_u32(np, "cache-level", &cache_level)) 1785 pr_err("L2C: device tree omits to specify cache-level\n"); 1786 1787 if (cache_level != 2) 1788 pr_err("L2C: device tree specifies invalid cache level\n"); 1789 1790 nosync = of_property_read_bool(np, "arm,outer-sync-disable"); 1791 1792 /* Read back current (default) hardware configuration */ 1793 if (data->save) 1794 data->save(l2x0_base); 1795 1796 /* L2 configuration can only be changed if the cache is disabled */ 1797 if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) 1798 if (data->of_parse) 1799 data->of_parse(np, &aux_val, &aux_mask); 1800 1801 if (cache_id_part_number_from_dt) 1802 cache_id = cache_id_part_number_from_dt; 1803 else 1804 cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); 1805 1806 return __l2c_init(data, aux_val, aux_mask, cache_id, nosync); 1807 } 1808 #endif 1809