1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm/mm/proc-xsc3.S 4 * 5 * Original Author: Matthew Gilbert 6 * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> 7 * 8 * Copyright 2004 (C) Intel Corp. 9 * Copyright 2005 (C) MontaVista Software, Inc. 10 * 11 * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is 12 * an extension to Intel's original XScale core that adds the following 13 * features: 14 * 15 * - ARMv6 Supersections 16 * - Low Locality Reference pages (replaces mini-cache) 17 * - 36-bit addressing 18 * - L2 cache 19 * - Cache coherency if chipset supports it 20 * 21 * Based on original XScale code by Nicolas Pitre. 22 */ 23 24#include <linux/linkage.h> 25#include <linux/init.h> 26#include <linux/pgtable.h> 27#include <asm/assembler.h> 28#include <asm/hwcap.h> 29#include <asm/pgtable-hwdef.h> 30#include <asm/page.h> 31#include <asm/ptrace.h> 32#include "proc-macros.S" 33 34/* 35 * This is the maximum size of an area which will be flushed. If the 36 * area is larger than this, then we flush the whole cache. 37 */ 38#define MAX_AREA_SIZE 32768 39 40/* 41 * The cache line size of the L1 I, L1 D and unified L2 cache. 42 */ 43#define CACHELINESIZE 32 44 45/* 46 * The size of the L1 D cache. 47 */ 48#define CACHESIZE 32768 49 50/* 51 * This macro is used to wait for a CP15 write and is needed when we 52 * have to ensure that the last operation to the coprocessor was 53 * completed before continuing with operation. 54 */ 55 .macro cpwait_ret, lr, rd 56 mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 57 sub pc, \lr, \rd, LSR #32 @ wait for completion and 58 @ flush instruction pipeline 59 .endm 60 61/* 62 * This macro cleans and invalidates the entire L1 D cache. 63 */ 64 65 .macro clean_d_cache rd, rs 66 mov \rd, #0x1f00 67 orr \rd, \rd, #0x00e0 681: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line 69 adds \rd, \rd, #0x40000000 70 bcc 1b 71 subs \rd, \rd, #0x20 72 bpl 1b 73 .endm 74 75 .text 76 77/* 78 * cpu_xsc3_proc_init() 79 * 80 * Nothing too exciting at the moment 81 */ 82ENTRY(cpu_xsc3_proc_init) 83 ret lr 84 85/* 86 * cpu_xsc3_proc_fin() 87 */ 88ENTRY(cpu_xsc3_proc_fin) 89 mrc p15, 0, r0, c1, c0, 0 @ ctrl register 90 bic r0, r0, #0x1800 @ ...IZ........... 91 bic r0, r0, #0x0006 @ .............CA. 92 mcr p15, 0, r0, c1, c0, 0 @ disable caches 93 ret lr 94 95/* 96 * cpu_xsc3_reset(loc) 97 * 98 * Perform a soft reset of the system. Put the CPU into the 99 * same state as it would be if it had been reset, and branch 100 * to what would be the reset vector. 101 * 102 * loc: location to jump to for soft reset 103 */ 104 .align 5 105 .pushsection .idmap.text, "ax" 106ENTRY(cpu_xsc3_reset) 107 mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE 108 msr cpsr_c, r1 @ reset CPSR 109 mrc p15, 0, r1, c1, c0, 0 @ ctrl register 110 bic r1, r1, #0x3900 @ ..VIZ..S........ 111 bic r1, r1, #0x0086 @ ........B....CA. 112 mcr p15, 0, r1, c1, c0, 0 @ ctrl register 113 mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB 114 bic r1, r1, #0x0001 @ ...............M 115 mcr p15, 0, r1, c1, c0, 0 @ ctrl register 116 @ CAUTION: MMU turned off from this point. We count on the pipeline 117 @ already containing those two last instructions to survive. 118 mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs 119 ret r0 120ENDPROC(cpu_xsc3_reset) 121 .popsection 122 123/* 124 * cpu_xsc3_do_idle() 125 * 126 * Cause the processor to idle 127 * 128 * For now we do nothing but go to idle mode for every case 129 * 130 * XScale supports clock switching, but using idle mode support 131 * allows external hardware to react to system state changes. 132 */ 133 .align 5 134 135ENTRY(cpu_xsc3_do_idle) 136 mov r0, #1 137 mcr p14, 0, r0, c7, c0, 0 @ go to idle 138 ret lr 139 140/* ================================= CACHE ================================ */ 141 142/* 143 * flush_icache_all() 144 * 145 * Unconditionally clean and invalidate the entire icache. 146 */ 147ENTRY(xsc3_flush_icache_all) 148 mov r0, #0 149 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache 150 ret lr 151ENDPROC(xsc3_flush_icache_all) 152 153/* 154 * flush_user_cache_all() 155 * 156 * Invalidate all cache entries in a particular address 157 * space. 158 */ 159ENTRY(xsc3_flush_user_cache_all) 160 /* FALLTHROUGH */ 161 162/* 163 * flush_kern_cache_all() 164 * 165 * Clean and invalidate the entire cache. 166 */ 167ENTRY(xsc3_flush_kern_cache_all) 168 mov r2, #VM_EXEC 169 mov ip, #0 170__flush_whole_cache: 171 clean_d_cache r0, r1 172 tst r2, #VM_EXEC 173 mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB 174 mcrne p15, 0, ip, c7, c10, 4 @ data write barrier 175 mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush 176 ret lr 177 178/* 179 * flush_user_cache_range(start, end, vm_flags) 180 * 181 * Invalidate a range of cache entries in the specified 182 * address space. 183 * 184 * - start - start address (may not be aligned) 185 * - end - end address (exclusive, may not be aligned) 186 * - vma - vma_area_struct describing address space 187 */ 188 .align 5 189ENTRY(xsc3_flush_user_cache_range) 190 mov ip, #0 191 sub r3, r1, r0 @ calculate total size 192 cmp r3, #MAX_AREA_SIZE 193 bhs __flush_whole_cache 194 1951: tst r2, #VM_EXEC 196 mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line 197 mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line 198 add r0, r0, #CACHELINESIZE 199 cmp r0, r1 200 blo 1b 201 tst r2, #VM_EXEC 202 mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB 203 mcrne p15, 0, ip, c7, c10, 4 @ data write barrier 204 mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush 205 ret lr 206 207/* 208 * coherent_kern_range(start, end) 209 * 210 * Ensure coherency between the I cache and the D cache in the 211 * region described by start. If you have non-snooping 212 * Harvard caches, you need to implement this function. 213 * 214 * - start - virtual start address 215 * - end - virtual end address 216 * 217 * Note: single I-cache line invalidation isn't used here since 218 * it also trashes the mini I-cache used by JTAG debuggers. 219 */ 220ENTRY(xsc3_coherent_kern_range) 221/* FALLTHROUGH */ 222ENTRY(xsc3_coherent_user_range) 223 bic r0, r0, #CACHELINESIZE - 1 2241: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line 225 add r0, r0, #CACHELINESIZE 226 cmp r0, r1 227 blo 1b 228 mov r0, #0 229 mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB 230 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 231 mcr p15, 0, r0, c7, c5, 4 @ prefetch flush 232 ret lr 233 234/* 235 * flush_kern_dcache_area(void *addr, size_t size) 236 * 237 * Ensure no D cache aliasing occurs, either with itself or 238 * the I cache. 239 * 240 * - addr - kernel address 241 * - size - region size 242 */ 243ENTRY(xsc3_flush_kern_dcache_area) 244 add r1, r0, r1 2451: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line 246 add r0, r0, #CACHELINESIZE 247 cmp r0, r1 248 blo 1b 249 mov r0, #0 250 mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB 251 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 252 mcr p15, 0, r0, c7, c5, 4 @ prefetch flush 253 ret lr 254 255/* 256 * dma_inv_range(start, end) 257 * 258 * Invalidate (discard) the specified virtual address range. 259 * May not write back any entries. If 'start' or 'end' 260 * are not cache line aligned, those lines must be written 261 * back. 262 * 263 * - start - virtual start address 264 * - end - virtual end address 265 */ 266xsc3_dma_inv_range: 267 tst r0, #CACHELINESIZE - 1 268 bic r0, r0, #CACHELINESIZE - 1 269 mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line 270 tst r1, #CACHELINESIZE - 1 271 mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line 2721: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line 273 add r0, r0, #CACHELINESIZE 274 cmp r0, r1 275 blo 1b 276 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 277 ret lr 278 279/* 280 * dma_clean_range(start, end) 281 * 282 * Clean the specified virtual address range. 283 * 284 * - start - virtual start address 285 * - end - virtual end address 286 */ 287xsc3_dma_clean_range: 288 bic r0, r0, #CACHELINESIZE - 1 2891: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line 290 add r0, r0, #CACHELINESIZE 291 cmp r0, r1 292 blo 1b 293 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 294 ret lr 295 296/* 297 * dma_flush_range(start, end) 298 * 299 * Clean and invalidate the specified virtual address range. 300 * 301 * - start - virtual start address 302 * - end - virtual end address 303 */ 304ENTRY(xsc3_dma_flush_range) 305 bic r0, r0, #CACHELINESIZE - 1 3061: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line 307 add r0, r0, #CACHELINESIZE 308 cmp r0, r1 309 blo 1b 310 mcr p15, 0, r0, c7, c10, 4 @ data write barrier 311 ret lr 312 313/* 314 * dma_map_area(start, size, dir) 315 * - start - kernel virtual start address 316 * - size - size of region 317 * - dir - DMA direction 318 */ 319ENTRY(xsc3_dma_map_area) 320 add r1, r1, r0 321 cmp r2, #DMA_TO_DEVICE 322 beq xsc3_dma_clean_range 323 bcs xsc3_dma_inv_range 324 b xsc3_dma_flush_range 325ENDPROC(xsc3_dma_map_area) 326 327/* 328 * dma_unmap_area(start, size, dir) 329 * - start - kernel virtual start address 330 * - size - size of region 331 * - dir - DMA direction 332 */ 333ENTRY(xsc3_dma_unmap_area) 334 ret lr 335ENDPROC(xsc3_dma_unmap_area) 336 337 .globl xsc3_flush_kern_cache_louis 338 .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all 339 340 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 341 define_cache_functions xsc3 342 343ENTRY(cpu_xsc3_dcache_clean_area) 3441: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line 345 add r0, r0, #CACHELINESIZE 346 subs r1, r1, #CACHELINESIZE 347 bhi 1b 348 ret lr 349 350/* =============================== PageTable ============================== */ 351 352/* 353 * cpu_xsc3_switch_mm(pgd) 354 * 355 * Set the translation base pointer to be as described by pgd. 356 * 357 * pgd: new page tables 358 */ 359 .align 5 360ENTRY(cpu_xsc3_switch_mm) 361 clean_d_cache r1, r2 362 mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB 363 mcr p15, 0, ip, c7, c10, 4 @ data write barrier 364 mcr p15, 0, ip, c7, c5, 4 @ prefetch flush 365 orr r0, r0, #0x18 @ cache the page table in L2 366 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer 367 mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs 368 cpwait_ret lr, ip 369 370/* 371 * cpu_xsc3_set_pte_ext(ptep, pte, ext) 372 * 373 * Set a PTE and flush it out 374 */ 375cpu_xsc3_mt_table: 376 .long 0x00 @ L_PTE_MT_UNCACHED 377 .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE 378 .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH 379 .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK 380 .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED 381 .long 0x00 @ unused 382 .long 0x00 @ L_PTE_MT_MINICACHE (not present) 383 .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) 384 .long 0x00 @ unused 385 .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC 386 .long 0x00 @ unused 387 .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED 388 .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED 389 .long 0x00 @ unused 390 .long 0x00 @ unused 391 .long 0x00 @ unused 392 393 .align 5 394ENTRY(cpu_xsc3_set_pte_ext) 395 xscale_set_pte_ext_prologue 396 397 tst r1, #L_PTE_SHARED @ shared? 398 and r1, r1, #L_PTE_MT_MASK 399 adr ip, cpu_xsc3_mt_table 400 ldr ip, [ip, r1] 401 orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit 402 bic r2, r2, #0x0c @ clear old C,B bits 403 orr r2, r2, ip 404 405 xscale_set_pte_ext_epilogue 406 ret lr 407 408 .ltorg 409 .align 410 411.globl cpu_xsc3_suspend_size 412.equ cpu_xsc3_suspend_size, 4 * 6 413#ifdef CONFIG_ARM_CPU_SUSPEND 414ENTRY(cpu_xsc3_do_suspend) 415 stmfd sp!, {r4 - r9, lr} 416 mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode 417 mrc p15, 0, r5, c15, c1, 0 @ CP access reg 418 mrc p15, 0, r6, c13, c0, 0 @ PID 419 mrc p15, 0, r7, c3, c0, 0 @ domain ID 420 mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg 421 mrc p15, 0, r9, c1, c0, 0 @ control reg 422 bic r4, r4, #2 @ clear frequency change bit 423 stmia r0, {r4 - r9} @ store cp regs 424 ldmia sp!, {r4 - r9, pc} 425ENDPROC(cpu_xsc3_do_suspend) 426 427ENTRY(cpu_xsc3_do_resume) 428 ldmia r0, {r4 - r9} @ load cp regs 429 mov ip, #0 430 mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB 431 mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer 432 mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer 433 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs 434 mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. 435 mcr p15, 0, r5, c15, c1, 0 @ CP access reg 436 mcr p15, 0, r6, c13, c0, 0 @ PID 437 mcr p15, 0, r7, c3, c0, 0 @ domain ID 438 orr r1, r1, #0x18 @ cache the page table in L2 439 mcr p15, 0, r1, c2, c0, 0 @ translation table base addr 440 mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg 441 mov r0, r9 @ control register 442 b cpu_resume_mmu 443ENDPROC(cpu_xsc3_do_resume) 444#endif 445 446 .type __xsc3_setup, #function 447__xsc3_setup: 448 mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE 449 msr cpsr_c, r0 450 mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB 451 mcr p15, 0, ip, c7, c10, 4 @ data write barrier 452 mcr p15, 0, ip, c7, c5, 4 @ prefetch flush 453 mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs 454 orr r4, r4, #0x18 @ cache the page table in L2 455 mcr p15, 0, r4, c2, c0, 0 @ load page table pointer 456 457 mov r0, #1 << 6 @ cp6 access for early sched_clock 458 mcr p15, 0, r0, c15, c1, 0 @ write CP access register 459 460 mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg 461 and r0, r0, #2 @ preserve bit P bit setting 462 orr r0, r0, #(1 << 10) @ enable L2 for LLR cache 463 mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg 464 465 adr r5, xsc3_crval 466 ldmia r5, {r5, r6} 467 468#ifdef CONFIG_CACHE_XSC3L2 469 mrc p15, 1, r0, c0, c0, 1 @ get L2 present information 470 ands r0, r0, #0xf8 471 orrne r6, r6, #(1 << 26) @ enable L2 if present 472#endif 473 474 mrc p15, 0, r0, c1, c0, 0 @ get control register 475 bic r0, r0, r5 @ ..V. ..R. .... ..A. 476 orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) 477 @ ...I Z..S .... .... (uc) 478 ret lr 479 480 .size __xsc3_setup, . - __xsc3_setup 481 482 .type xsc3_crval, #object 483xsc3_crval: 484 crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 485 486 __INITDATA 487 488 @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) 489 define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 490 491 .section ".rodata" 492 493 string cpu_arch_name, "armv5te" 494 string cpu_elf_name, "v5" 495 string cpu_xsc3_name, "XScale-V3 based processor" 496 497 .align 498 499 .section ".proc.info.init", "a" 500 501.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req 502 .type __\name\()_proc_info,#object 503__\name\()_proc_info: 504 .long \cpu_val 505 .long \cpu_mask 506 .long PMD_TYPE_SECT | \ 507 PMD_SECT_BUFFERABLE | \ 508 PMD_SECT_CACHEABLE | \ 509 PMD_SECT_AP_WRITE | \ 510 PMD_SECT_AP_READ 511 .long PMD_TYPE_SECT | \ 512 PMD_SECT_AP_WRITE | \ 513 PMD_SECT_AP_READ 514 initfn __xsc3_setup, __\name\()_proc_info 515 .long cpu_arch_name 516 .long cpu_elf_name 517 .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP 518 .long cpu_xsc3_name 519 .long xsc3_processor_functions 520 .long v4wbi_tlb_fns 521 .long xsc3_mc_user_fns 522 .long xsc3_cache_fns 523 .size __\name\()_proc_info, . - __\name\()_proc_info 524.endm 525 526 xsc3_proc_info xsc3, 0x69056000, 0xffffe000 527 528/* Note: PXA935 changed its implementor ID from Intel to Marvell */ 529 xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 530