1/* 2 * linux/arch/arm/mm/proc-xsc3.S 3 * 4 * Original Author: Matthew Gilbert 5 * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> 6 * 7 * Copyright 2004 (C) Intel Corp. 8 * Copyright 2005 (C) MontaVista Software, Inc. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License version 2 as 12 * published by the Free Software Foundation. 13 * 14 * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is 15 * an extension to Intel's original XScale core that adds the following 16 * features: 17 * 18 * - ARMv6 Supersections 19 * - Low Locality Reference pages (replaces mini-cache) 20 * - 36-bit addressing 21 * - L2 cache 22 * - Cache coherency if chipset supports it 23 * 24 * Based on original XScale code by Nicolas Pitre. 25 */ 26 27#include <linux/linkage.h> 28#include <linux/init.h> 29#include <asm/assembler.h> 30#include <asm/hwcap.h> 31#include <asm/pgtable.h> 32#include <asm/pgtable-hwdef.h> 33#include <asm/page.h> 34#include <asm/ptrace.h> 35#include "proc-macros.S" 36 37/* 38 * This is the maximum size of an area which will be flushed. If the 39 * area is larger than this, then we flush the whole cache. 40 */ 41#define MAX_AREA_SIZE 32768 42 43/* 44 * The cache line size of the L1 I, L1 D and unified L2 cache. 45 */ 46#define CACHELINESIZE 32 47 48/* 49 * The size of the L1 D cache. 50 */ 51#define CACHESIZE 32768 52 53/* 54 * This macro is used to wait for a CP15 write and is needed when we 55 * have to ensure that the last operation to the coprocessor was 56 * completed before continuing with operation. 57 */ 58 .macro cpwait_ret, lr, rd 59 mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 60 sub pc, \lr, \rd, LSR #32 @ wait for completion and 61 @ flush instruction pipeline 62 .endm 63 64/* 65 * This macro cleans and invalidates the entire L1 D cache. 66 */ 67 68 .macro clean_d_cache rd, rs 69 mov \rd, #0x1f00 70 orr \rd, \rd, #0x00e0 711: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line 72 adds \rd, \rd, #0x40000000 73 bcc 1b 74 subs \rd, \rd, #0x20 75 bpl 1b 76 .endm 77 78 .text 79 80/* 81 * cpu_xsc3_proc_init() 82 * 83 * Nothing too exciting at the moment 84 */ 85ENTRY(cpu_xsc3_proc_init) 86 mov pc, lr 87 88/* 89 * cpu_xsc3_proc_fin() 90 */ 91ENTRY(cpu_xsc3_proc_fin) 92 mrc p15, 0, r0, c1, c0, 0 @ ctrl register 93 bic r0, r0, #0x1800 @ ...IZ........... 94 bic r0, r0, #0x0006 @ .............CA. 95 mcr p15, 0, r0, c1, c0, 0 @ disable caches 96 mov pc, lr 97 98/* 99 * cpu_xsc3_reset(loc) 100 * 101 * Perform a soft reset of the system. Put the CPU into the 102 * same state as it would be if it had been reset, and branch 103 * to what would be the reset vector. 104 * 105 * loc: location to jump to for soft reset 106 */ 107 .align 5 108ENTRY(cpu_xsc3_reset) 109 mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE 110 msr cpsr_c, r1 @ reset CPSR 111 mrc p15, 0, r1, c1, c0, 0 @ ctrl register 112 bic r1, r1, #0x3900 @ ..VIZ..S........ 113 bic r1, r1, #0x0086 @ ........B....CA. 114 mcr p15, 0, r1, c1, c0, 0 @ ctrl register 115 mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB 116 bic r1, r1, #0x0001 @ ...............M 117 mcr p15, 0, r1, c1, c0, 0 @ ctrl register 118 @ CAUTION: MMU turned off from this point. We count on the pipeline 119 @ already containing those two last instructions to survive. 120 mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs 121 mov pc, r0 122 123/* 124 * cpu_xsc3_do_idle() 125 * 126 * Cause the processor to idle 127 * 128 * For now we do nothing but go to idle mode for every case 129 * 130 * XScale supports clock switching, but using idle mode support 131 * allows external hardware to react to system state changes. 132 */ 133 .align 5 134 135ENTRY(cpu_xsc3_do_idle) 136 mov r0, #1 137 mcr p14, 0, r0, c7, c0, 0 @ go to idle 138 mov pc, lr 139 140/* ================================= CACHE ================================ */ 141 142/* 143 * flush_icache_all() 144 * 145 * Unconditionally clean and invalidate the entire icache. 146 */ 147ENTRY(xsc3_flush_icache_all) 148 mov r0, #0 149 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache 150 mov pc, lr 151ENDPROC(xsc3_flush_icache_all) 152 153/* 154 * flush_user_cache_all() 155 * 156 * Invalidate all cache entries in a particular address 157 * space. 158 */ 159ENTRY(xsc3_flush_user_cache_all) 160 /* FALLTHROUGH */ 161 162/* 163 * flush_kern_cache_all() 164 * 165 * Clean and invalidate the entire cache. 166 */ 167ENTRY(xsc3_flush_kern_cache_all) 168 mov r2, #VM_EXEC 169 mov ip, #0 170__flush_whole_cache: 171 clean_d_cache r0, r1 172 tst r2, #VM_EXEC 173 mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB 174 mcrne p15, 0, ip, c7, c10, 4 @ data write barrier 175 mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush 176 mov pc, lr 177 178/* 179 * flush_user_cache_range(start, end, vm_flags) 180 * 181 * Invalidate a range of cache entries in the specified 182 * address space. 183 * 184 * - start - start address (may not be aligned) 185 * - end - end address (exclusive, may not be aligned) 186 * - vma - vma_area_struct describing address space 187 */ 188 .align 5 189ENTRY(xsc3_flush_user_cache_range) 190 mov ip, #0 191 sub r3, r1, r0 @ calculate total size 192 cmp r3, #MAX_AREA_SIZE 193 bhs __flush_whole_cache 194 1951: tst r2, #VM_EXEC 196 mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line 197 mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line 198 add r0, r0, #CACHELINESIZE 199 cmp r0, r1 200 blo 1b 201 tst r2, #VM_EXEC 202 mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB 203 mcrne p15, 0, ip, c7, c10, 4 @ data write barrier 204 mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush 205 mov pc, lr 206 207/* 208 * coherent_kern_range(start, end) 209 * 210 * Ensure coherency between the I cache and the D cache in the 211 * region described by start. If you have non-snooping 212 * Harvard caches, you need to implement this function. 213 * 214 * - start - virtual start address 215 * - end - virtual end address 216 * 217 * Note: single I-cache line invalidation isn't used here since 218 * it also trashes the mini I-cache used by JTAG debuggers. 219 */ 220ENTRY(xsc3_coherent_kern_range) 221/* FALLTHROUGH */ 222ENTRY(xsc3_coherent_user_range) 223 bic r0, r0, #CACHELINESIZE - 1 2241: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line 225 add r0, r0, #CACHELINESIZE 226 cmp r0, r1 227 blo 1b 228 mov r0, #0 229 mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB 230 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 231 mcr p15, 0, r0, c7, c5, 4 @ prefetch flush 232 mov pc, lr 233 234/* 235 * flush_kern_dcache_area(void *addr, size_t size) 236 * 237 * Ensure no D cache aliasing occurs, either with itself or 238 * the I cache. 239 * 240 * - addr - kernel address 241 * - size - region size 242 */ 243ENTRY(xsc3_flush_kern_dcache_area) 244 add r1, r0, r1 2451: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line 246 add r0, r0, #CACHELINESIZE 247 cmp r0, r1 248 blo 1b 249 mov r0, #0 250 mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB 251 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 252 mcr p15, 0, r0, c7, c5, 4 @ prefetch flush 253 mov pc, lr 254 255/* 256 * dma_inv_range(start, end) 257 * 258 * Invalidate (discard) the specified virtual address range. 259 * May not write back any entries. If 'start' or 'end' 260 * are not cache line aligned, those lines must be written 261 * back. 262 * 263 * - start - virtual start address 264 * - end - virtual end address 265 */ 266xsc3_dma_inv_range: 267 tst r0, #CACHELINESIZE - 1 268 bic r0, r0, #CACHELINESIZE - 1 269 mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line 270 tst r1, #CACHELINESIZE - 1 271 mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line 2721: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line 273 add r0, r0, #CACHELINESIZE 274 cmp r0, r1 275 blo 1b 276 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 277 mov pc, lr 278 279/* 280 * dma_clean_range(start, end) 281 * 282 * Clean the specified virtual address range. 283 * 284 * - start - virtual start address 285 * - end - virtual end address 286 */ 287xsc3_dma_clean_range: 288 bic r0, r0, #CACHELINESIZE - 1 2891: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line 290 add r0, r0, #CACHELINESIZE 291 cmp r0, r1 292 blo 1b 293 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 294 mov pc, lr 295 296/* 297 * dma_flush_range(start, end) 298 * 299 * Clean and invalidate the specified virtual address range. 300 * 301 * - start - virtual start address 302 * - end - virtual end address 303 */ 304ENTRY(xsc3_dma_flush_range) 305 bic r0, r0, #CACHELINESIZE - 1 3061: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line 307 add r0, r0, #CACHELINESIZE 308 cmp r0, r1 309 blo 1b 310 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 311 mov pc, lr 312 313/* 314 * dma_map_area(start, size, dir) 315 * - start - kernel virtual start address 316 * - size - size of region 317 * - dir - DMA direction 318 */ 319ENTRY(xsc3_dma_map_area) 320 add r1, r1, r0 321 cmp r2, #DMA_TO_DEVICE 322 beq xsc3_dma_clean_range 323 bcs xsc3_dma_inv_range 324 b xsc3_dma_flush_range 325ENDPROC(xsc3_dma_map_area) 326 327/* 328 * dma_unmap_area(start, size, dir) 329 * - start - kernel virtual start address 330 * - size - size of region 331 * - dir - DMA direction 332 */ 333ENTRY(xsc3_dma_unmap_area) 334 mov pc, lr 335ENDPROC(xsc3_dma_unmap_area) 336 337 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 338 define_cache_functions xsc3 339 340ENTRY(cpu_xsc3_dcache_clean_area) 3411: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line 342 add r0, r0, #CACHELINESIZE 343 subs r1, r1, #CACHELINESIZE 344 bhi 1b 345 mov pc, lr 346 347/* =============================== PageTable ============================== */ 348 349/* 350 * cpu_xsc3_switch_mm(pgd) 351 * 352 * Set the translation base pointer to be as described by pgd. 353 * 354 * pgd: new page tables 355 */ 356 .align 5 357ENTRY(cpu_xsc3_switch_mm) 358 clean_d_cache r1, r2 359 mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB 360 mcr p15, 0, ip, c7, c10, 4 @ data write barrier 361 mcr p15, 0, ip, c7, c5, 4 @ prefetch flush 362 orr r0, r0, #0x18 @ cache the page table in L2 363 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer 364 mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs 365 cpwait_ret lr, ip 366 367/* 368 * cpu_xsc3_set_pte_ext(ptep, pte, ext) 369 * 370 * Set a PTE and flush it out 371 */ 372cpu_xsc3_mt_table: 373 .long 0x00 @ L_PTE_MT_UNCACHED 374 .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE 375 .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH 376 .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK 377 .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED 378 .long 0x00 @ unused 379 .long 0x00 @ L_PTE_MT_MINICACHE (not present) 380 .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) 381 .long 0x00 @ unused 382 .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC 383 .long 0x00 @ unused 384 .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED 385 .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED 386 .long 0x00 @ unused 387 .long 0x00 @ unused 388 .long 0x00 @ unused 389 390 .align 5 391ENTRY(cpu_xsc3_set_pte_ext) 392 xscale_set_pte_ext_prologue 393 394 tst r1, #L_PTE_SHARED @ shared? 395 and r1, r1, #L_PTE_MT_MASK 396 adr ip, cpu_xsc3_mt_table 397 ldr ip, [ip, r1] 398 orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit 399 bic r2, r2, #0x0c @ clear old C,B bits 400 orr r2, r2, ip 401 402 xscale_set_pte_ext_epilogue 403 mov pc, lr 404 405 .ltorg 406 .align 407 408.globl cpu_xsc3_suspend_size 409.equ cpu_xsc3_suspend_size, 4 * 6 410#ifdef CONFIG_PM_SLEEP 411ENTRY(cpu_xsc3_do_suspend) 412 stmfd sp!, {r4 - r9, lr} 413 mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode 414 mrc p15, 0, r5, c15, c1, 0 @ CP access reg 415 mrc p15, 0, r6, c13, c0, 0 @ PID 416 mrc p15, 0, r7, c3, c0, 0 @ domain ID 417 mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg 418 mrc p15, 0, r9, c1, c0, 0 @ control reg 419 bic r4, r4, #2 @ clear frequency change bit 420 stmia r0, {r4 - r9} @ store cp regs 421 ldmia sp!, {r4 - r9, pc} 422ENDPROC(cpu_xsc3_do_suspend) 423 424ENTRY(cpu_xsc3_do_resume) 425 ldmia r0, {r4 - r9} @ load cp regs 426 mov ip, #0 427 mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB 428 mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer 429 mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer 430 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs 431 mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. 432 mcr p15, 0, r5, c15, c1, 0 @ CP access reg 433 mcr p15, 0, r6, c13, c0, 0 @ PID 434 mcr p15, 0, r7, c3, c0, 0 @ domain ID 435 orr r1, r1, #0x18 @ cache the page table in L2 436 mcr p15, 0, r1, c2, c0, 0 @ translation table base addr 437 mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg 438 mov r0, r9 @ control register 439 b cpu_resume_mmu 440ENDPROC(cpu_xsc3_do_resume) 441#endif 442 443 __CPUINIT 444 445 .type __xsc3_setup, #function 446__xsc3_setup: 447 mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE 448 msr cpsr_c, r0 449 mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB 450 mcr p15, 0, ip, c7, c10, 4 @ data write barrier 451 mcr p15, 0, ip, c7, c5, 4 @ prefetch flush 452 mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs 453 orr r4, r4, #0x18 @ cache the page table in L2 454 mcr p15, 0, r4, c2, c0, 0 @ load page table pointer 455 456 mov r0, #1 << 6 @ cp6 access for early sched_clock 457 mcr p15, 0, r0, c15, c1, 0 @ write CP access register 458 459 mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg 460 and r0, r0, #2 @ preserve bit P bit setting 461 orr r0, r0, #(1 << 10) @ enable L2 for LLR cache 462 mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg 463 464 adr r5, xsc3_crval 465 ldmia r5, {r5, r6} 466 467#ifdef CONFIG_CACHE_XSC3L2 468 mrc p15, 1, r0, c0, c0, 1 @ get L2 present information 469 ands r0, r0, #0xf8 470 orrne r6, r6, #(1 << 26) @ enable L2 if present 471#endif 472 473 mrc p15, 0, r0, c1, c0, 0 @ get control register 474 bic r0, r0, r5 @ ..V. ..R. .... ..A. 475 orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) 476 @ ...I Z..S .... .... (uc) 477 mov pc, lr 478 479 .size __xsc3_setup, . - __xsc3_setup 480 481 .type xsc3_crval, #object 482xsc3_crval: 483 crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 484 485 __INITDATA 486 487 @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) 488 define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 489 490 .section ".rodata" 491 492 string cpu_arch_name, "armv5te" 493 string cpu_elf_name, "v5" 494 string cpu_xsc3_name, "XScale-V3 based processor" 495 496 .align 497 498 .section ".proc.info.init", #alloc, #execinstr 499 500.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req 501 .type __\name\()_proc_info,#object 502__\name\()_proc_info: 503 .long \cpu_val 504 .long \cpu_mask 505 .long PMD_TYPE_SECT | \ 506 PMD_SECT_BUFFERABLE | \ 507 PMD_SECT_CACHEABLE | \ 508 PMD_SECT_AP_WRITE | \ 509 PMD_SECT_AP_READ 510 .long PMD_TYPE_SECT | \ 511 PMD_SECT_AP_WRITE | \ 512 PMD_SECT_AP_READ 513 b __xsc3_setup 514 .long cpu_arch_name 515 .long cpu_elf_name 516 .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP 517 .long cpu_xsc3_name 518 .long xsc3_processor_functions 519 .long v4wbi_tlb_fns 520 .long xsc3_mc_user_fns 521 .long xsc3_cache_fns 522 .size __\name\()_proc_info, . - __\name\()_proc_info 523.endm 524 525 xsc3_proc_info xsc3, 0x69056000, 0xffffe000 526 527/* Note: PXA935 changed its implementor ID from Intel to Marvell */ 528 xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 529