1 /* 2 * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. 3 * 4 * SPDX-License-Identifier: GPL-2.0+ 5 */ 6 7 #include <config.h> 8 #include <common.h> 9 #include <linux/compiler.h> 10 #include <linux/kernel.h> 11 #include <linux/log2.h> 12 #include <asm/arcregs.h> 13 #include <asm/arc-bcr.h> 14 #include <asm/cache.h> 15 16 /* 17 * [ NOTE 1 ]: 18 * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable 19 * operation may result in unexpected behavior and data loss even if we flush 20 * data cache right before invalidation. That may happens if we store any context 21 * on stack (like we store BLINK register on stack before function call). 22 * BLINK register is the register where return address is automatically saved 23 * when we do function call with instructions like 'bl'. 24 * 25 * There is the real example: 26 * We may hang in the next code as we store any BLINK register on stack in 27 * invalidate_dcache_all() function. 28 * 29 * void flush_dcache_all() { 30 * __dc_entire_op(OP_FLUSH); 31 * // Other code // 32 * } 33 * 34 * void invalidate_dcache_all() { 35 * __dc_entire_op(OP_INV); 36 * // Other code // 37 * } 38 * 39 * void foo(void) { 40 * flush_dcache_all(); 41 * invalidate_dcache_all(); 42 * } 43 * 44 * Now let's see what really happens during that code execution: 45 * 46 * foo() 47 * |->> call flush_dcache_all 48 * [return address is saved to BLINK register] 49 * [push BLINK] (save to stack) ![point 1] 50 * |->> call __dc_entire_op(OP_FLUSH) 51 * [return address is saved to BLINK register] 52 * [flush L1 D$] 53 * return [jump to BLINK] 54 * <<------ 55 * [other flush_dcache_all code] 56 * [pop BLINK] (get from stack) 57 * return [jump to BLINK] 58 * <<------ 59 * |->> call invalidate_dcache_all 60 * [return address is saved to BLINK register] 61 * [push BLINK] (save to stack) ![point 2] 62 * |->> call __dc_entire_op(OP_FLUSH) 63 * [return address is saved to BLINK register] 64 * [invalidate L1 D$] ![point 3] 65 * // Oops!!! 66 * // We lose return address from invalidate_dcache_all function: 67 * // we save it to stack and invalidate L1 D$ after that! 68 * return [jump to BLINK] 69 * <<------ 70 * [other invalidate_dcache_all code] 71 * [pop BLINK] (get from stack) 72 * // we don't have this data in L1 dcache as we invalidated it in [point 3] 73 * // so we get it from next memory level (for example DDR memory) 74 * // but in the memory we have value which we save in [point 1], which 75 * // is return address from flush_dcache_all function (instead of 76 * // address from current invalidate_dcache_all function which we 77 * // saved in [point 2] !) 78 * return [jump to BLINK] 79 * <<------ 80 * // As BLINK points to invalidate_dcache_all, we call it again and 81 * // loop forever. 82 * 83 * Fortunately we may fix that by using flush & invalidation of D$ with a single 84 * one instruction (instead of flush and invalidation instructions pair) and 85 * enabling force function inline with '__attribute__((always_inline))' gcc 86 * attribute to avoid any function call (and BLINK store) between cache flush 87 * and disable. 88 * 89 * 90 * [ NOTE 2 ]: 91 * As of today we only support the following cache configurations on ARC. 92 * Other configurations may exist in HW (for example, since version 3.0 HS 93 * supports SL$ (L2 system level cache) disable) but we don't support it in SW. 94 * Configuration 1: 95 * ______________________ 96 * | | 97 * | ARC CPU | 98 * |______________________| 99 * ___|___ ___|___ 100 * | | | | 101 * | L1 I$ | | L1 D$ | 102 * |_______| |_______| 103 * on/off on/off 104 * ___|______________|____ 105 * | | 106 * | main memory | 107 * |______________________| 108 * 109 * Configuration 2: 110 * ______________________ 111 * | | 112 * | ARC CPU | 113 * |______________________| 114 * ___|___ ___|___ 115 * | | | | 116 * | L1 I$ | | L1 D$ | 117 * |_______| |_______| 118 * on/off on/off 119 * ___|______________|____ 120 * | | 121 * | L2 (SL$) | 122 * |______________________| 123 * always must be on 124 * ___|______________|____ 125 * | | 126 * | main memory | 127 * |______________________| 128 * 129 * Configuration 3: 130 * ______________________ 131 * | | 132 * | ARC CPU | 133 * |______________________| 134 * ___|___ ___|___ 135 * | | | | 136 * | L1 I$ | | L1 D$ | 137 * |_______| |_______| 138 * on/off must be on 139 * ___|______________|____ _______ 140 * | | | | 141 * | L2 (SL$) |-----| IOC | 142 * |______________________| |_______| 143 * always must be on on/off 144 * ___|______________|____ 145 * | | 146 * | main memory | 147 * |______________________| 148 */ 149 150 DECLARE_GLOBAL_DATA_PTR; 151 152 /* Bit values in IC_CTRL */ 153 #define IC_CTRL_CACHE_DISABLE BIT(0) 154 155 /* Bit values in DC_CTRL */ 156 #define DC_CTRL_CACHE_DISABLE BIT(0) 157 #define DC_CTRL_INV_MODE_FLUSH BIT(6) 158 #define DC_CTRL_FLUSH_STATUS BIT(8) 159 160 #define OP_INV BIT(0) 161 #define OP_FLUSH BIT(1) 162 #define OP_FLUSH_N_INV (OP_FLUSH | OP_INV) 163 164 /* Bit val in SLC_CONTROL */ 165 #define SLC_CTRL_DIS 0x001 166 #define SLC_CTRL_IM 0x040 167 #define SLC_CTRL_BUSY 0x100 168 #define SLC_CTRL_RGN_OP_INV 0x200 169 170 #define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1)) 171 172 static inline bool pae_exists(void) 173 { 174 /* TODO: should we compare mmu version from BCR and from CONFIG? */ 175 #if (CONFIG_ARC_MMU_VER >= 4) 176 union bcr_mmu_4 mmu4; 177 178 mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR); 179 180 if (mmu4.fields.pae) 181 return true; 182 #endif /* (CONFIG_ARC_MMU_VER >= 4) */ 183 184 return false; 185 } 186 187 static inline bool icache_exists(void) 188 { 189 union bcr_di_cache ibcr; 190 191 ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); 192 return !!ibcr.fields.ver; 193 } 194 195 static inline bool icache_enabled(void) 196 { 197 if (!icache_exists()) 198 return false; 199 200 return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE); 201 } 202 203 static inline bool dcache_exists(void) 204 { 205 union bcr_di_cache dbcr; 206 207 dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); 208 return !!dbcr.fields.ver; 209 } 210 211 static inline bool dcache_enabled(void) 212 { 213 if (!dcache_exists()) 214 return false; 215 216 return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE); 217 } 218 219 static inline bool slc_exists(void) 220 { 221 if (is_isa_arcv2()) { 222 union bcr_generic sbcr; 223 224 sbcr.word = read_aux_reg(ARC_BCR_SLC); 225 return !!sbcr.fields.ver; 226 } 227 228 return false; 229 } 230 231 static inline bool slc_data_bypass(void) 232 { 233 /* 234 * If L1 data cache is disabled SL$ is bypassed and all load/store 235 * requests are sent directly to main memory. 236 */ 237 return !dcache_enabled(); 238 } 239 240 static inline bool ioc_exists(void) 241 { 242 if (is_isa_arcv2()) { 243 union bcr_clust_cfg cbcr; 244 245 cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); 246 return cbcr.fields.c; 247 } 248 249 return false; 250 } 251 252 static inline bool ioc_enabled(void) 253 { 254 /* 255 * We check only CONFIG option instead of IOC HW state check as IOC 256 * must be disabled by default. 257 */ 258 if (is_ioc_enabled()) 259 return ioc_exists(); 260 261 return false; 262 } 263 264 static void __slc_entire_op(const int op) 265 { 266 unsigned int ctrl; 267 268 if (!slc_exists()) 269 return; 270 271 ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); 272 273 if (!(op & OP_FLUSH)) /* i.e. OP_INV */ 274 ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ 275 else 276 ctrl |= SLC_CTRL_IM; 277 278 write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); 279 280 if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ 281 write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1); 282 else 283 write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1); 284 285 /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ 286 read_aux_reg(ARC_AUX_SLC_CTRL); 287 288 /* Important to wait for flush to complete */ 289 while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); 290 } 291 292 static void slc_upper_region_init(void) 293 { 294 /* 295 * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist 296 * only if PAE exists in current HW. So we had to check pae_exist 297 * before using them. 298 */ 299 if (!pae_exists()) 300 return; 301 302 /* 303 * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0 304 * as we don't use PAE40. 305 */ 306 write_aux_reg(ARC_AUX_SLC_RGN_END1, 0); 307 write_aux_reg(ARC_AUX_SLC_RGN_START1, 0); 308 } 309 310 static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) 311 { 312 #ifdef CONFIG_ISA_ARCV2 313 314 unsigned int ctrl; 315 unsigned long end; 316 317 if (!slc_exists()) 318 return; 319 320 /* 321 * The Region Flush operation is specified by CTRL.RGN_OP[11..9] 322 * - b'000 (default) is Flush, 323 * - b'001 is Invalidate if CTRL.IM == 0 324 * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 325 */ 326 ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); 327 328 /* Don't rely on default value of IM bit */ 329 if (!(op & OP_FLUSH)) /* i.e. OP_INV */ 330 ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ 331 else 332 ctrl |= SLC_CTRL_IM; 333 334 if (op & OP_INV) 335 ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ 336 else 337 ctrl &= ~SLC_CTRL_RGN_OP_INV; 338 339 write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); 340 341 /* 342 * Lower bits are ignored, no need to clip 343 * END needs to be setup before START (latter triggers the operation) 344 * END can't be same as START, so add (l2_line_sz - 1) to sz 345 */ 346 end = paddr + sz + gd->arch.slc_line_sz - 1; 347 348 /* 349 * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1) 350 * are always == 0 as we don't use PAE40, so we only setup lower ones 351 * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START) 352 */ 353 write_aux_reg(ARC_AUX_SLC_RGN_END, end); 354 write_aux_reg(ARC_AUX_SLC_RGN_START, paddr); 355 356 /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ 357 read_aux_reg(ARC_AUX_SLC_CTRL); 358 359 while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); 360 361 #endif /* CONFIG_ISA_ARCV2 */ 362 } 363 364 static void arc_ioc_setup(void) 365 { 366 /* IOC Aperture start is equal to DDR start */ 367 unsigned int ap_base = CONFIG_SYS_SDRAM_BASE; 368 /* IOC Aperture size is equal to DDR size */ 369 long ap_size = CONFIG_SYS_SDRAM_SIZE; 370 371 /* Unsupported configuration. See [ NOTE 2 ] for more details. */ 372 if (!slc_exists()) 373 panic("Try to enable IOC but SLC is not present"); 374 375 /* Unsupported configuration. See [ NOTE 2 ] for more details. */ 376 if (!dcache_enabled()) 377 panic("Try to enable IOC but L1 D$ is disabled"); 378 379 flush_n_invalidate_dcache_all(); 380 381 if (!is_power_of_2(ap_size) || ap_size < 4096) 382 panic("IOC Aperture size must be power of 2 and bigger 4Kib"); 383 384 /* 385 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, 386 * so setting 0x11 implies 512M, 0x12 implies 1G... 387 */ 388 write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, 389 order_base_2(ap_size / 1024) - 2); 390 391 /* IOC Aperture start must be aligned to the size of the aperture */ 392 if (ap_base % ap_size != 0) 393 panic("IOC Aperture start must be aligned to the size of the aperture"); 394 395 write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); 396 write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); 397 write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); 398 } 399 400 static void read_decode_cache_bcr_arcv2(void) 401 { 402 #ifdef CONFIG_ISA_ARCV2 403 404 union bcr_slc_cfg slc_cfg; 405 406 if (slc_exists()) { 407 slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); 408 gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; 409 410 /* 411 * We don't support configuration where L1 I$ or L1 D$ is 412 * absent but SL$ exists. See [ NOTE 2 ] for more details. 413 */ 414 if (!icache_exists() || !dcache_exists()) 415 panic("Unsupported cache configuration: SLC exists but one of L1 caches is absent"); 416 } 417 418 #endif /* CONFIG_ISA_ARCV2 */ 419 } 420 421 void read_decode_cache_bcr(void) 422 { 423 int dc_line_sz = 0, ic_line_sz = 0; 424 union bcr_di_cache ibcr, dbcr; 425 426 ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); 427 if (ibcr.fields.ver) { 428 gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; 429 if (!ic_line_sz) 430 panic("Instruction exists but line length is 0\n"); 431 } 432 433 dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); 434 if (dbcr.fields.ver) { 435 gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; 436 if (!dc_line_sz) 437 panic("Data cache exists but line length is 0\n"); 438 } 439 440 if (ic_line_sz && dc_line_sz && (ic_line_sz != dc_line_sz)) 441 panic("Instruction and data cache line lengths differ\n"); 442 } 443 444 void cache_init(void) 445 { 446 read_decode_cache_bcr(); 447 448 if (is_isa_arcv2()) 449 read_decode_cache_bcr_arcv2(); 450 451 if (is_isa_arcv2() && ioc_enabled()) 452 arc_ioc_setup(); 453 454 if (is_isa_arcv2() && slc_exists()) 455 slc_upper_region_init(); 456 } 457 458 int icache_status(void) 459 { 460 return icache_enabled(); 461 } 462 463 void icache_enable(void) 464 { 465 if (icache_exists()) 466 write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) & 467 ~IC_CTRL_CACHE_DISABLE); 468 } 469 470 void icache_disable(void) 471 { 472 if (icache_exists()) 473 write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | 474 IC_CTRL_CACHE_DISABLE); 475 } 476 477 /* IC supports only invalidation */ 478 static inline void __ic_entire_invalidate(void) 479 { 480 if (!icache_enabled()) 481 return; 482 483 /* Any write to IC_IVIC register triggers invalidation of entire I$ */ 484 write_aux_reg(ARC_AUX_IC_IVIC, 1); 485 /* 486 * As per ARC HS databook (see chapter 5.3.3.2) 487 * it is required to add 3 NOPs after each write to IC_IVIC. 488 */ 489 __builtin_arc_nop(); 490 __builtin_arc_nop(); 491 __builtin_arc_nop(); 492 read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ 493 } 494 495 void invalidate_icache_all(void) 496 { 497 __ic_entire_invalidate(); 498 499 /* 500 * If SL$ is bypassed for data it is used only for instructions, 501 * so we need to invalidate it too. 502 * TODO: HS 3.0 supports SLC disable so we need to check slc 503 * enable/disable status here. 504 */ 505 if (is_isa_arcv2() && slc_data_bypass()) 506 __slc_entire_op(OP_INV); 507 } 508 509 int dcache_status(void) 510 { 511 return dcache_enabled(); 512 } 513 514 void dcache_enable(void) 515 { 516 if (!dcache_exists()) 517 return; 518 519 write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) & 520 ~(DC_CTRL_INV_MODE_FLUSH | DC_CTRL_CACHE_DISABLE)); 521 } 522 523 void dcache_disable(void) 524 { 525 if (!dcache_exists()) 526 return; 527 528 write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) | 529 DC_CTRL_CACHE_DISABLE); 530 } 531 532 /* Common Helper for Line Operations on D-cache */ 533 static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, 534 const int cacheop) 535 { 536 unsigned int aux_cmd; 537 int num_lines; 538 539 /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ 540 aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL; 541 542 sz += paddr & ~CACHE_LINE_MASK; 543 paddr &= CACHE_LINE_MASK; 544 545 num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz); 546 547 while (num_lines-- > 0) { 548 #if (CONFIG_ARC_MMU_VER == 3) 549 write_aux_reg(ARC_AUX_DC_PTAG, paddr); 550 #endif 551 write_aux_reg(aux_cmd, paddr); 552 paddr += gd->arch.l1_line_sz; 553 } 554 } 555 556 static void __before_dc_op(const int op) 557 { 558 unsigned int ctrl; 559 560 ctrl = read_aux_reg(ARC_AUX_DC_CTRL); 561 562 /* IM bit implies flush-n-inv, instead of vanilla inv */ 563 if (op == OP_INV) 564 ctrl &= ~DC_CTRL_INV_MODE_FLUSH; 565 else 566 ctrl |= DC_CTRL_INV_MODE_FLUSH; 567 568 write_aux_reg(ARC_AUX_DC_CTRL, ctrl); 569 } 570 571 static void __after_dc_op(const int op) 572 { 573 if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ 574 while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS); 575 } 576 577 static inline void __dc_entire_op(const int cacheop) 578 { 579 int aux; 580 581 if (!dcache_enabled()) 582 return; 583 584 __before_dc_op(cacheop); 585 586 if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ 587 aux = ARC_AUX_DC_IVDC; 588 else 589 aux = ARC_AUX_DC_FLSH; 590 591 write_aux_reg(aux, 0x1); 592 593 __after_dc_op(cacheop); 594 } 595 596 static inline void __dc_line_op(unsigned long paddr, unsigned long sz, 597 const int cacheop) 598 { 599 if (!dcache_enabled()) 600 return; 601 602 __before_dc_op(cacheop); 603 __dcache_line_loop(paddr, sz, cacheop); 604 __after_dc_op(cacheop); 605 } 606 607 void invalidate_dcache_range(unsigned long start, unsigned long end) 608 { 609 if (start >= end) 610 return; 611 612 /* 613 * ARCv1 -> call __dc_line_op 614 * ARCv2 && L1 D$ disabled -> nothing 615 * ARCv2 && L1 D$ enabled && IOC enabled -> nothing 616 * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op 617 */ 618 if (!is_isa_arcv2() || !ioc_enabled()) 619 __dc_line_op(start, end - start, OP_INV); 620 621 if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) 622 __slc_rgn_op(start, end - start, OP_INV); 623 } 624 625 void flush_dcache_range(unsigned long start, unsigned long end) 626 { 627 if (start >= end) 628 return; 629 630 /* 631 * ARCv1 -> call __dc_line_op 632 * ARCv2 && L1 D$ disabled -> nothing 633 * ARCv2 && L1 D$ enabled && IOC enabled -> nothing 634 * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op 635 */ 636 if (!is_isa_arcv2() || !ioc_enabled()) 637 __dc_line_op(start, end - start, OP_FLUSH); 638 639 if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) 640 __slc_rgn_op(start, end - start, OP_FLUSH); 641 } 642 643 void flush_cache(unsigned long start, unsigned long size) 644 { 645 flush_dcache_range(start, start + size); 646 } 647 648 /* 649 * As invalidate_dcache_all() is not used in generic U-Boot code and as we 650 * don't need it in arch/arc code alone (invalidate without flush) we implement 651 * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because 652 * it's much safer. See [ NOTE 1 ] for more details. 653 */ 654 void flush_n_invalidate_dcache_all(void) 655 { 656 __dc_entire_op(OP_FLUSH_N_INV); 657 658 if (is_isa_arcv2() && !slc_data_bypass()) 659 __slc_entire_op(OP_FLUSH_N_INV); 660 } 661 662 void flush_dcache_all(void) 663 { 664 __dc_entire_op(OP_FLUSH); 665 666 if (is_isa_arcv2() && !slc_data_bypass()) 667 __slc_entire_op(OP_FLUSH); 668 } 669 670 /* 671 * This is function to cleanup all caches (and therefore sync I/D caches) which 672 * can be used for cleanup before linux launch or to sync caches during 673 * relocation. 674 */ 675 void sync_n_cleanup_cache_all(void) 676 { 677 __dc_entire_op(OP_FLUSH_N_INV); 678 679 /* 680 * If SL$ is bypassed for data it is used only for instructions, 681 * and we shouldn't flush it. So invalidate it instead of flush_n_inv. 682 */ 683 if (is_isa_arcv2()) { 684 if (slc_data_bypass()) 685 __slc_entire_op(OP_INV); 686 else 687 __slc_entire_op(OP_FLUSH_N_INV); 688 } 689 690 __ic_entire_invalidate(); 691 } 692