1/* 2 * linux/arch/arm/mm/cache-v7.S 3 * 4 * Copyright (C) 2001 Deep Blue Solutions Ltd. 5 * Copyright (C) 2005 ARM Ltd. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This is the "shell" of the ARMv7 processor support. 12 */ 13#include <linux/linkage.h> 14#include <linux/init.h> 15#include <asm/assembler.h> 16#include <asm/errno.h> 17#include <asm/unwind.h> 18 19#include "proc-macros.S" 20 21/* 22 * The secondary kernel init calls v7_flush_dcache_all before it enables 23 * the L1; however, the L1 comes out of reset in an undefined state, so 24 * the clean + invalidate performed by v7_flush_dcache_all causes a bunch 25 * of cache lines with uninitialized data and uninitialized tags to get 26 * written out to memory, which does really unpleasant things to the main 27 * processor. We fix this by performing an invalidate, rather than a 28 * clean + invalidate, before jumping into the kernel. 29 * 30 * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs 31 * to be called for both secondary cores startup and primary core resume 32 * procedures. 33 */ 34ENTRY(v7_invalidate_l1) 35 mov r0, #0 36 mcr p15, 2, r0, c0, c0, 0 37 mrc p15, 1, r0, c0, c0, 0 38 39 ldr r1, =0x7fff 40 and r2, r1, r0, lsr #13 41 42 ldr r1, =0x3ff 43 44 and r3, r1, r0, lsr #3 @ NumWays - 1 45 add r2, r2, #1 @ NumSets 46 47 and r0, r0, #0x7 48 add r0, r0, #4 @ SetShift 49 50 clz r1, r3 @ WayShift 51 add r4, r3, #1 @ NumWays 521: sub r2, r2, #1 @ NumSets-- 53 mov r3, r4 @ Temp = NumWays 542: subs r3, r3, #1 @ Temp-- 55 mov r5, r3, lsl r1 56 mov r6, r2, lsl r0 57 orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) 58 mcr p15, 0, r5, c7, c6, 2 59 bgt 2b 60 cmp r2, #0 61 bgt 1b 62 dsb st 63 isb 64 mov pc, lr 65ENDPROC(v7_invalidate_l1) 66 67/* 68 * v7_flush_icache_all() 69 * 70 * Flush the whole I-cache. 71 * 72 * Registers: 73 * r0 - set to 0 74 */ 75ENTRY(v7_flush_icache_all) 76 mov r0, #0 77 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 78 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 79 mov pc, lr 80ENDPROC(v7_flush_icache_all) 81 82 /* 83 * v7_flush_dcache_louis() 84 * 85 * Flush the D-cache up to the Level of Unification Inner Shareable 86 * 87 * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 88 */ 89 90ENTRY(v7_flush_dcache_louis) 91 dmb @ ensure ordering with previous memory accesses 92 mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr 93 ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr 94 ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr 95#ifdef CONFIG_ARM_ERRATA_643719 96 ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register 97 ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do 98 ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? 99 biceq r2, r2, #0x0000000f @ clear minor revision number 100 teqeq r2, r1 @ test for errata affected core and if so... 101 orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') 102#endif 103 ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 104 ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 105 moveq pc, lr @ return if level == 0 106 mov r10, #0 @ r10 (starting level) = 0 107 b flush_levels @ start flushing cache levels 108ENDPROC(v7_flush_dcache_louis) 109 110/* 111 * v7_flush_dcache_all() 112 * 113 * Flush the whole D-cache. 114 * 115 * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 116 * 117 * - mm - mm_struct describing address space 118 */ 119ENTRY(v7_flush_dcache_all) 120 dmb @ ensure ordering with previous memory accesses 121 mrc p15, 1, r0, c0, c0, 1 @ read clidr 122 ands r3, r0, #0x7000000 @ extract loc from clidr 123 mov r3, r3, lsr #23 @ left align loc bit field 124 beq finished @ if loc is 0, then no need to clean 125 mov r10, #0 @ start clean at cache level 0 126flush_levels: 127 add r2, r10, r10, lsr #1 @ work out 3x current cache level 128 mov r1, r0, lsr r2 @ extract cache type bits from clidr 129 and r1, r1, #7 @ mask of the bits for current cache only 130 cmp r1, #2 @ see what cache we have at this level 131 blt skip @ skip if no cache, or just i-cache 132#ifdef CONFIG_PREEMPT 133 save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic 134#endif 135 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 136 isb @ isb to sych the new cssr&csidr 137 mrc p15, 1, r1, c0, c0, 0 @ read the new csidr 138#ifdef CONFIG_PREEMPT 139 restore_irqs_notrace r9 140#endif 141 and r2, r1, #7 @ extract the length of the cache lines 142 add r2, r2, #4 @ add 4 (line length offset) 143 ldr r4, =0x3ff 144 ands r4, r4, r1, lsr #3 @ find maximum number on the way size 145 clz r5, r4 @ find bit position of way size increment 146 ldr r7, =0x7fff 147 ands r7, r7, r1, lsr #13 @ extract max number of the index size 148loop1: 149 mov r9, r7 @ create working copy of max index 150loop2: 151 ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 152 THUMB( lsl r6, r4, r5 ) 153 THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 154 ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 155 THUMB( lsl r6, r9, r2 ) 156 THUMB( orr r11, r11, r6 ) @ factor index number into r11 157 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way 158 subs r9, r9, #1 @ decrement the index 159 bge loop2 160 subs r4, r4, #1 @ decrement the way 161 bge loop1 162skip: 163 add r10, r10, #2 @ increment cache number 164 cmp r3, r10 165 bgt flush_levels 166finished: 167 mov r10, #0 @ swith back to cache level 0 168 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 169 dsb st 170 isb 171 mov pc, lr 172ENDPROC(v7_flush_dcache_all) 173 174/* 175 * v7_flush_cache_all() 176 * 177 * Flush the entire cache system. 178 * The data cache flush is now achieved using atomic clean / invalidates 179 * working outwards from L1 cache. This is done using Set/Way based cache 180 * maintenance instructions. 181 * The instruction cache can still be invalidated back to the point of 182 * unification in a single instruction. 183 * 184 */ 185ENTRY(v7_flush_kern_cache_all) 186 ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 187 THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 188 bl v7_flush_dcache_all 189 mov r0, #0 190 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 191 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 192 ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 193 THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 194 mov pc, lr 195ENDPROC(v7_flush_kern_cache_all) 196 197 /* 198 * v7_flush_kern_cache_louis(void) 199 * 200 * Flush the data cache up to Level of Unification Inner Shareable. 201 * Invalidate the I-cache to the point of unification. 202 */ 203ENTRY(v7_flush_kern_cache_louis) 204 ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 205 THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 206 bl v7_flush_dcache_louis 207 mov r0, #0 208 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 209 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 210 ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 211 THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 212 mov pc, lr 213ENDPROC(v7_flush_kern_cache_louis) 214 215/* 216 * v7_flush_cache_all() 217 * 218 * Flush all TLB entries in a particular address space 219 * 220 * - mm - mm_struct describing address space 221 */ 222ENTRY(v7_flush_user_cache_all) 223 /*FALLTHROUGH*/ 224 225/* 226 * v7_flush_cache_range(start, end, flags) 227 * 228 * Flush a range of TLB entries in the specified address space. 229 * 230 * - start - start address (may not be aligned) 231 * - end - end address (exclusive, may not be aligned) 232 * - flags - vm_area_struct flags describing address space 233 * 234 * It is assumed that: 235 * - we have a VIPT cache. 236 */ 237ENTRY(v7_flush_user_cache_range) 238 mov pc, lr 239ENDPROC(v7_flush_user_cache_all) 240ENDPROC(v7_flush_user_cache_range) 241 242/* 243 * v7_coherent_kern_range(start,end) 244 * 245 * Ensure that the I and D caches are coherent within specified 246 * region. This is typically used when code has been written to 247 * a memory region, and will be executed. 248 * 249 * - start - virtual start address of region 250 * - end - virtual end address of region 251 * 252 * It is assumed that: 253 * - the Icache does not read data from the write buffer 254 */ 255ENTRY(v7_coherent_kern_range) 256 /* FALLTHROUGH */ 257 258/* 259 * v7_coherent_user_range(start,end) 260 * 261 * Ensure that the I and D caches are coherent within specified 262 * region. This is typically used when code has been written to 263 * a memory region, and will be executed. 264 * 265 * - start - virtual start address of region 266 * - end - virtual end address of region 267 * 268 * It is assumed that: 269 * - the Icache does not read data from the write buffer 270 */ 271ENTRY(v7_coherent_user_range) 272 UNWIND(.fnstart ) 273 dcache_line_size r2, r3 274 sub r3, r2, #1 275 bic r12, r0, r3 276#ifdef CONFIG_ARM_ERRATA_764369 277 ALT_SMP(W(dsb)) 278 ALT_UP(W(nop)) 279#endif 2801: 281 USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification 282 add r12, r12, r2 283 cmp r12, r1 284 blo 1b 285 dsb ishst 286 icache_line_size r2, r3 287 sub r3, r2, #1 288 bic r12, r0, r3 2892: 290 USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line 291 add r12, r12, r2 292 cmp r12, r1 293 blo 2b 294 mov r0, #0 295 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable 296 ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB 297 dsb ishst 298 isb 299 mov pc, lr 300 301/* 302 * Fault handling for the cache operation above. If the virtual address in r0 303 * isn't mapped, fail with -EFAULT. 304 */ 3059001: 306#ifdef CONFIG_ARM_ERRATA_775420 307 dsb 308#endif 309 mov r0, #-EFAULT 310 mov pc, lr 311 UNWIND(.fnend ) 312ENDPROC(v7_coherent_kern_range) 313ENDPROC(v7_coherent_user_range) 314 315/* 316 * v7_flush_kern_dcache_area(void *addr, size_t size) 317 * 318 * Ensure that the data held in the page kaddr is written back 319 * to the page in question. 320 * 321 * - addr - kernel address 322 * - size - region size 323 */ 324ENTRY(v7_flush_kern_dcache_area) 325 dcache_line_size r2, r3 326 add r1, r0, r1 327 sub r3, r2, #1 328 bic r0, r0, r3 329#ifdef CONFIG_ARM_ERRATA_764369 330 ALT_SMP(W(dsb)) 331 ALT_UP(W(nop)) 332#endif 3331: 334 mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line 335 add r0, r0, r2 336 cmp r0, r1 337 blo 1b 338 dsb st 339 mov pc, lr 340ENDPROC(v7_flush_kern_dcache_area) 341 342/* 343 * v7_dma_inv_range(start,end) 344 * 345 * Invalidate the data cache within the specified region; we will 346 * be performing a DMA operation in this region and we want to 347 * purge old data in the cache. 348 * 349 * - start - virtual start address of region 350 * - end - virtual end address of region 351 */ 352v7_dma_inv_range: 353 dcache_line_size r2, r3 354 sub r3, r2, #1 355 tst r0, r3 356 bic r0, r0, r3 357#ifdef CONFIG_ARM_ERRATA_764369 358 ALT_SMP(W(dsb)) 359 ALT_UP(W(nop)) 360#endif 361 mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line 362 363 tst r1, r3 364 bic r1, r1, r3 365 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line 3661: 367 mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line 368 add r0, r0, r2 369 cmp r0, r1 370 blo 1b 371 dsb st 372 mov pc, lr 373ENDPROC(v7_dma_inv_range) 374 375/* 376 * v7_dma_clean_range(start,end) 377 * - start - virtual start address of region 378 * - end - virtual end address of region 379 */ 380v7_dma_clean_range: 381 dcache_line_size r2, r3 382 sub r3, r2, #1 383 bic r0, r0, r3 384#ifdef CONFIG_ARM_ERRATA_764369 385 ALT_SMP(W(dsb)) 386 ALT_UP(W(nop)) 387#endif 3881: 389 mcr p15, 0, r0, c7, c10, 1 @ clean D / U line 390 add r0, r0, r2 391 cmp r0, r1 392 blo 1b 393 dsb st 394 mov pc, lr 395ENDPROC(v7_dma_clean_range) 396 397/* 398 * v7_dma_flush_range(start,end) 399 * - start - virtual start address of region 400 * - end - virtual end address of region 401 */ 402ENTRY(v7_dma_flush_range) 403 dcache_line_size r2, r3 404 sub r3, r2, #1 405 bic r0, r0, r3 406#ifdef CONFIG_ARM_ERRATA_764369 407 ALT_SMP(W(dsb)) 408 ALT_UP(W(nop)) 409#endif 4101: 411 mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line 412 add r0, r0, r2 413 cmp r0, r1 414 blo 1b 415 dsb st 416 mov pc, lr 417ENDPROC(v7_dma_flush_range) 418 419/* 420 * dma_map_area(start, size, dir) 421 * - start - kernel virtual start address 422 * - size - size of region 423 * - dir - DMA direction 424 */ 425ENTRY(v7_dma_map_area) 426 add r1, r1, r0 427 teq r2, #DMA_FROM_DEVICE 428 beq v7_dma_inv_range 429 b v7_dma_clean_range 430ENDPROC(v7_dma_map_area) 431 432/* 433 * dma_unmap_area(start, size, dir) 434 * - start - kernel virtual start address 435 * - size - size of region 436 * - dir - DMA direction 437 */ 438ENTRY(v7_dma_unmap_area) 439 add r1, r1, r0 440 teq r2, #DMA_TO_DEVICE 441 bne v7_dma_inv_range 442 mov pc, lr 443ENDPROC(v7_dma_unmap_area) 444 445 __INITDATA 446 447 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 448 define_cache_functions v7 449