1/* 2 * linux/arch/arm/mm/cache-v7.S 3 * 4 * Copyright (C) 2001 Deep Blue Solutions Ltd. 5 * Copyright (C) 2005 ARM Ltd. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This is the "shell" of the ARMv7 processor support. 12 */ 13#include <linux/linkage.h> 14#include <linux/init.h> 15#include <asm/assembler.h> 16#include <asm/errno.h> 17#include <asm/unwind.h> 18#include <asm/hardware/cache-b15-rac.h> 19 20#include "proc-macros.S" 21 22/* 23 * The secondary kernel init calls v7_flush_dcache_all before it enables 24 * the L1; however, the L1 comes out of reset in an undefined state, so 25 * the clean + invalidate performed by v7_flush_dcache_all causes a bunch 26 * of cache lines with uninitialized data and uninitialized tags to get 27 * written out to memory, which does really unpleasant things to the main 28 * processor. We fix this by performing an invalidate, rather than a 29 * clean + invalidate, before jumping into the kernel. 30 * 31 * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs 32 * to be called for both secondary cores startup and primary core resume 33 * procedures. 34 */ 35ENTRY(v7_invalidate_l1) 36 mov r0, #0 37 mcr p15, 2, r0, c0, c0, 0 38 mrc p15, 1, r0, c0, c0, 0 39 40 movw r1, #0x7fff 41 and r2, r1, r0, lsr #13 42 43 movw r1, #0x3ff 44 45 and r3, r1, r0, lsr #3 @ NumWays - 1 46 add r2, r2, #1 @ NumSets 47 48 and r0, r0, #0x7 49 add r0, r0, #4 @ SetShift 50 51 clz r1, r3 @ WayShift 52 add r4, r3, #1 @ NumWays 531: sub r2, r2, #1 @ NumSets-- 54 mov r3, r4 @ Temp = NumWays 552: subs r3, r3, #1 @ Temp-- 56 mov r5, r3, lsl r1 57 mov r6, r2, lsl r0 58 orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) 59 mcr p15, 0, r5, c7, c6, 2 60 bgt 2b 61 cmp r2, #0 62 bgt 1b 63 dsb st 64 isb 65 ret lr 66ENDPROC(v7_invalidate_l1) 67 68/* 69 * v7_flush_icache_all() 70 * 71 * Flush the whole I-cache. 72 * 73 * Registers: 74 * r0 - set to 0 75 */ 76ENTRY(v7_flush_icache_all) 77 mov r0, #0 78 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 79 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 80 ret lr 81ENDPROC(v7_flush_icache_all) 82 83 /* 84 * v7_flush_dcache_louis() 85 * 86 * Flush the D-cache up to the Level of Unification Inner Shareable 87 * 88 * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 89 */ 90 91ENTRY(v7_flush_dcache_louis) 92 dmb @ ensure ordering with previous memory accesses 93 mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr 94ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position 95ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position 96 ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr 97 bne start_flush_levels @ LoU != 0, start flushing 98#ifdef CONFIG_ARM_ERRATA_643719 99ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register 100ALT_UP( ret lr) @ LoUU is zero, so nothing to do 101 movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p? 102 movt r1, #:upper16:(0x410fc090 >> 4) 103 teq r1, r2, lsr #4 @ test for errata affected core and if so... 104 moveq r3, #1 << 1 @ fix LoUIS value 105 beq start_flush_levels @ start flushing cache levels 106#endif 107 ret lr 108ENDPROC(v7_flush_dcache_louis) 109 110/* 111 * v7_flush_dcache_all() 112 * 113 * Flush the whole D-cache. 114 * 115 * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 116 * 117 * - mm - mm_struct describing address space 118 */ 119ENTRY(v7_flush_dcache_all) 120 dmb @ ensure ordering with previous memory accesses 121 mrc p15, 1, r0, c0, c0, 1 @ read clidr 122 mov r3, r0, lsr #23 @ move LoC into position 123 ands r3, r3, #7 << 1 @ extract LoC*2 from clidr 124 beq finished @ if loc is 0, then no need to clean 125start_flush_levels: 126 mov r10, #0 @ start clean at cache level 0 127flush_levels: 128 add r2, r10, r10, lsr #1 @ work out 3x current cache level 129 mov r1, r0, lsr r2 @ extract cache type bits from clidr 130 and r1, r1, #7 @ mask of the bits for current cache only 131 cmp r1, #2 @ see what cache we have at this level 132 blt skip @ skip if no cache, or just i-cache 133#ifdef CONFIG_PREEMPT 134 save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic 135#endif 136 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 137 isb @ isb to sych the new cssr&csidr 138 mrc p15, 1, r1, c0, c0, 0 @ read the new csidr 139#ifdef CONFIG_PREEMPT 140 restore_irqs_notrace r9 141#endif 142 and r2, r1, #7 @ extract the length of the cache lines 143 add r2, r2, #4 @ add 4 (line length offset) 144 movw r4, #0x3ff 145 ands r4, r4, r1, lsr #3 @ find maximum number on the way size 146 clz r5, r4 @ find bit position of way size increment 147 movw r7, #0x7fff 148 ands r7, r7, r1, lsr #13 @ extract max number of the index size 149loop1: 150 mov r9, r7 @ create working copy of max index 151loop2: 152 ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 153 THUMB( lsl r6, r4, r5 ) 154 THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 155 ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 156 THUMB( lsl r6, r9, r2 ) 157 THUMB( orr r11, r11, r6 ) @ factor index number into r11 158 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way 159 subs r9, r9, #1 @ decrement the index 160 bge loop2 161 subs r4, r4, #1 @ decrement the way 162 bge loop1 163skip: 164 add r10, r10, #2 @ increment cache number 165 cmp r3, r10 166 bgt flush_levels 167finished: 168 mov r10, #0 @ switch back to cache level 0 169 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 170 dsb st 171 isb 172 ret lr 173ENDPROC(v7_flush_dcache_all) 174 175/* 176 * v7_flush_cache_all() 177 * 178 * Flush the entire cache system. 179 * The data cache flush is now achieved using atomic clean / invalidates 180 * working outwards from L1 cache. This is done using Set/Way based cache 181 * maintenance instructions. 182 * The instruction cache can still be invalidated back to the point of 183 * unification in a single instruction. 184 * 185 */ 186ENTRY(v7_flush_kern_cache_all) 187 ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 188 THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 189 bl v7_flush_dcache_all 190 mov r0, #0 191 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 192 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 193 ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 194 THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 195 ret lr 196ENDPROC(v7_flush_kern_cache_all) 197 198 /* 199 * v7_flush_kern_cache_louis(void) 200 * 201 * Flush the data cache up to Level of Unification Inner Shareable. 202 * Invalidate the I-cache to the point of unification. 203 */ 204ENTRY(v7_flush_kern_cache_louis) 205 ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 206 THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 207 bl v7_flush_dcache_louis 208 mov r0, #0 209 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 210 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 211 ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 212 THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 213 ret lr 214ENDPROC(v7_flush_kern_cache_louis) 215 216/* 217 * v7_flush_cache_all() 218 * 219 * Flush all TLB entries in a particular address space 220 * 221 * - mm - mm_struct describing address space 222 */ 223ENTRY(v7_flush_user_cache_all) 224 /*FALLTHROUGH*/ 225 226/* 227 * v7_flush_cache_range(start, end, flags) 228 * 229 * Flush a range of TLB entries in the specified address space. 230 * 231 * - start - start address (may not be aligned) 232 * - end - end address (exclusive, may not be aligned) 233 * - flags - vm_area_struct flags describing address space 234 * 235 * It is assumed that: 236 * - we have a VIPT cache. 237 */ 238ENTRY(v7_flush_user_cache_range) 239 ret lr 240ENDPROC(v7_flush_user_cache_all) 241ENDPROC(v7_flush_user_cache_range) 242 243/* 244 * v7_coherent_kern_range(start,end) 245 * 246 * Ensure that the I and D caches are coherent within specified 247 * region. This is typically used when code has been written to 248 * a memory region, and will be executed. 249 * 250 * - start - virtual start address of region 251 * - end - virtual end address of region 252 * 253 * It is assumed that: 254 * - the Icache does not read data from the write buffer 255 */ 256ENTRY(v7_coherent_kern_range) 257 /* FALLTHROUGH */ 258 259/* 260 * v7_coherent_user_range(start,end) 261 * 262 * Ensure that the I and D caches are coherent within specified 263 * region. This is typically used when code has been written to 264 * a memory region, and will be executed. 265 * 266 * - start - virtual start address of region 267 * - end - virtual end address of region 268 * 269 * It is assumed that: 270 * - the Icache does not read data from the write buffer 271 */ 272ENTRY(v7_coherent_user_range) 273 UNWIND(.fnstart ) 274 dcache_line_size r2, r3 275 sub r3, r2, #1 276 bic r12, r0, r3 277#ifdef CONFIG_ARM_ERRATA_764369 278 ALT_SMP(W(dsb)) 279 ALT_UP(W(nop)) 280#endif 2811: 282 USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification 283 add r12, r12, r2 284 cmp r12, r1 285 blo 1b 286 dsb ishst 287 icache_line_size r2, r3 288 sub r3, r2, #1 289 bic r12, r0, r3 2902: 291 USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line 292 add r12, r12, r2 293 cmp r12, r1 294 blo 2b 295 mov r0, #0 296 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable 297 ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB 298 dsb ishst 299 isb 300 ret lr 301 302/* 303 * Fault handling for the cache operation above. If the virtual address in r0 304 * isn't mapped, fail with -EFAULT. 305 */ 3069001: 307#ifdef CONFIG_ARM_ERRATA_775420 308 dsb 309#endif 310 mov r0, #-EFAULT 311 ret lr 312 UNWIND(.fnend ) 313ENDPROC(v7_coherent_kern_range) 314ENDPROC(v7_coherent_user_range) 315 316/* 317 * v7_flush_kern_dcache_area(void *addr, size_t size) 318 * 319 * Ensure that the data held in the page kaddr is written back 320 * to the page in question. 321 * 322 * - addr - kernel address 323 * - size - region size 324 */ 325ENTRY(v7_flush_kern_dcache_area) 326 dcache_line_size r2, r3 327 add r1, r0, r1 328 sub r3, r2, #1 329 bic r0, r0, r3 330#ifdef CONFIG_ARM_ERRATA_764369 331 ALT_SMP(W(dsb)) 332 ALT_UP(W(nop)) 333#endif 3341: 335 mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line 336 add r0, r0, r2 337 cmp r0, r1 338 blo 1b 339 dsb st 340 ret lr 341ENDPROC(v7_flush_kern_dcache_area) 342 343/* 344 * v7_dma_inv_range(start,end) 345 * 346 * Invalidate the data cache within the specified region; we will 347 * be performing a DMA operation in this region and we want to 348 * purge old data in the cache. 349 * 350 * - start - virtual start address of region 351 * - end - virtual end address of region 352 */ 353v7_dma_inv_range: 354 dcache_line_size r2, r3 355 sub r3, r2, #1 356 tst r0, r3 357 bic r0, r0, r3 358#ifdef CONFIG_ARM_ERRATA_764369 359 ALT_SMP(W(dsb)) 360 ALT_UP(W(nop)) 361#endif 362 mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line 363 364 tst r1, r3 365 bic r1, r1, r3 366 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line 3671: 368 mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line 369 add r0, r0, r2 370 cmp r0, r1 371 blo 1b 372 dsb st 373 ret lr 374ENDPROC(v7_dma_inv_range) 375 376/* 377 * v7_dma_clean_range(start,end) 378 * - start - virtual start address of region 379 * - end - virtual end address of region 380 */ 381v7_dma_clean_range: 382 dcache_line_size r2, r3 383 sub r3, r2, #1 384 bic r0, r0, r3 385#ifdef CONFIG_ARM_ERRATA_764369 386 ALT_SMP(W(dsb)) 387 ALT_UP(W(nop)) 388#endif 3891: 390 mcr p15, 0, r0, c7, c10, 1 @ clean D / U line 391 add r0, r0, r2 392 cmp r0, r1 393 blo 1b 394 dsb st 395 ret lr 396ENDPROC(v7_dma_clean_range) 397 398/* 399 * v7_dma_flush_range(start,end) 400 * - start - virtual start address of region 401 * - end - virtual end address of region 402 */ 403ENTRY(v7_dma_flush_range) 404 dcache_line_size r2, r3 405 sub r3, r2, #1 406 bic r0, r0, r3 407#ifdef CONFIG_ARM_ERRATA_764369 408 ALT_SMP(W(dsb)) 409 ALT_UP(W(nop)) 410#endif 4111: 412 mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line 413 add r0, r0, r2 414 cmp r0, r1 415 blo 1b 416 dsb st 417 ret lr 418ENDPROC(v7_dma_flush_range) 419 420/* 421 * dma_map_area(start, size, dir) 422 * - start - kernel virtual start address 423 * - size - size of region 424 * - dir - DMA direction 425 */ 426ENTRY(v7_dma_map_area) 427 add r1, r1, r0 428 teq r2, #DMA_FROM_DEVICE 429 beq v7_dma_inv_range 430 b v7_dma_clean_range 431ENDPROC(v7_dma_map_area) 432 433/* 434 * dma_unmap_area(start, size, dir) 435 * - start - kernel virtual start address 436 * - size - size of region 437 * - dir - DMA direction 438 */ 439ENTRY(v7_dma_unmap_area) 440 add r1, r1, r0 441 teq r2, #DMA_TO_DEVICE 442 bne v7_dma_inv_range 443 ret lr 444ENDPROC(v7_dma_unmap_area) 445 446 __INITDATA 447 448 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 449 define_cache_functions v7 450 451 /* The Broadcom Brahma-B15 read-ahead cache requires some modifications 452 * to the v7_cache_fns, we only override the ones we need 453 */ 454#ifndef CONFIG_CACHE_B15_RAC 455 globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all 456#endif 457 globl_equ b15_flush_icache_all, v7_flush_icache_all 458 globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis 459 globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all 460 globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range 461 globl_equ b15_coherent_kern_range, v7_coherent_kern_range 462 globl_equ b15_coherent_user_range, v7_coherent_user_range 463 globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area 464 465 globl_equ b15_dma_map_area, v7_dma_map_area 466 globl_equ b15_dma_unmap_area, v7_dma_unmap_area 467 globl_equ b15_dma_flush_range, v7_dma_flush_range 468 469 define_cache_functions b15 470