xref: /openbmc/linux/arch/arm/mm/proc-xsc3.S (revision 3560adf6)
1/*
2 * linux/arch/arm/mm/proc-xsc3.S
3 *
4 * Original Author: Matthew Gilbert
5 * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
6 *
7 * Copyright 2004 (C) Intel Corp.
8 * Copyright 2005 (C) MontaVista Software, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is
15 * an extension to Intel's original XScale core that adds the following
16 * features:
17 *
18 * - ARMv6 Supersections
19 * - Low Locality Reference pages (replaces mini-cache)
20 * - 36-bit addressing
21 * - L2 cache
22 * - Cache coherency if chipset supports it
23 *
24 * Based on original XScale code by Nicolas Pitre.
25 */
26
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <asm/assembler.h>
30#include <asm/hwcap.h>
31#include <mach/hardware.h>
32#include <asm/pgtable.h>
33#include <asm/pgtable-hwdef.h>
34#include <asm/page.h>
35#include <asm/ptrace.h>
36#include "proc-macros.S"
37
38/*
39 * This is the maximum size of an area which will be flushed.  If the
40 * area is larger than this, then we flush the whole cache.
41 */
42#define MAX_AREA_SIZE	32768
43
44/*
45 * The cache line size of the L1 I, L1 D and unified L2 cache.
46 */
47#define CACHELINESIZE	32
48
49/*
50 * The size of the L1 D cache.
51 */
52#define CACHESIZE	32768
53
54/*
55 * This macro is used to wait for a CP15 write and is needed when we
56 * have to ensure that the last operation to the coprocessor was
57 * completed before continuing with operation.
58 */
59	.macro	cpwait_ret, lr, rd
60	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
61	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
62						@ flush instruction pipeline
63	.endm
64
65/*
66 * This macro cleans and invalidates the entire L1 D cache.
67 */
68
69 	.macro  clean_d_cache rd, rs
70	mov	\rd, #0x1f00
71	orr	\rd, \rd, #0x00e0
721:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/invalidate L1 D line
73	adds	\rd, \rd, #0x40000000
74	bcc	1b
75	subs	\rd, \rd, #0x20
76	bpl	1b
77	.endm
78
79	.text
80
81/*
82 * cpu_xsc3_proc_init()
83 *
84 * Nothing too exciting at the moment
85 */
86ENTRY(cpu_xsc3_proc_init)
87	mov	pc, lr
88
89/*
90 * cpu_xsc3_proc_fin()
91 */
92ENTRY(cpu_xsc3_proc_fin)
93	str	lr, [sp, #-4]!
94	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
95	msr	cpsr_c, r0
96	bl	xsc3_flush_kern_cache_all	@ clean caches
97	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
98	bic	r0, r0, #0x1800			@ ...IZ...........
99	bic	r0, r0, #0x0006			@ .............CA.
100	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
101	ldr	pc, [sp], #4
102
103/*
104 * cpu_xsc3_reset(loc)
105 *
106 * Perform a soft reset of the system.  Put the CPU into the
107 * same state as it would be if it had been reset, and branch
108 * to what would be the reset vector.
109 *
110 * loc: location to jump to for soft reset
111 */
112	.align	5
113ENTRY(cpu_xsc3_reset)
114	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
115	msr	cpsr_c, r1			@ reset CPSR
116	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
117	bic	r1, r1, #0x3900			@ ..VIZ..S........
118	bic	r1, r1, #0x0086			@ ........B....CA.
119	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
120	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
121	bic	r1, r1, #0x0001			@ ...............M
122	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
123	@ CAUTION: MMU turned off from this point.  We count on the pipeline
124	@ already containing those two last instructions to survive.
125	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
126	mov	pc, r0
127
128/*
129 * cpu_xsc3_do_idle()
130 *
131 * Cause the processor to idle
132 *
133 * For now we do nothing but go to idle mode for every case
134 *
135 * XScale supports clock switching, but using idle mode support
136 * allows external hardware to react to system state changes.
137 */
138	.align	5
139
140ENTRY(cpu_xsc3_do_idle)
141	mov	r0, #1
142	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
143	mov	pc, lr
144
145/* ================================= CACHE ================================ */
146
147/*
148 *	flush_user_cache_all()
149 *
150 *	Invalidate all cache entries in a particular address
151 *	space.
152 */
153ENTRY(xsc3_flush_user_cache_all)
154	/* FALLTHROUGH */
155
156/*
157 *	flush_kern_cache_all()
158 *
159 *	Clean and invalidate the entire cache.
160 */
161ENTRY(xsc3_flush_kern_cache_all)
162	mov	r2, #VM_EXEC
163	mov	ip, #0
164__flush_whole_cache:
165	clean_d_cache r0, r1
166	tst	r2, #VM_EXEC
167	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
168	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
169	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
170	mov	pc, lr
171
172/*
173 *	flush_user_cache_range(start, end, vm_flags)
174 *
175 *	Invalidate a range of cache entries in the specified
176 *	address space.
177 *
178 *	- start - start address (may not be aligned)
179 *	- end	- end address (exclusive, may not be aligned)
180 *	- vma	- vma_area_struct describing address space
181 */
182	.align	5
183ENTRY(xsc3_flush_user_cache_range)
184	mov	ip, #0
185	sub	r3, r1, r0			@ calculate total size
186	cmp	r3, #MAX_AREA_SIZE
187	bhs	__flush_whole_cache
188
1891:	tst	r2, #VM_EXEC
190	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate L1 I line
191	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
192	add	r0, r0, #CACHELINESIZE
193	cmp	r0, r1
194	blo	1b
195	tst	r2, #VM_EXEC
196	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
197	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
198	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
199	mov	pc, lr
200
201/*
202 *	coherent_kern_range(start, end)
203 *
204 *	Ensure coherency between the I cache and the D cache in the
205 *	region described by start.  If you have non-snooping
206 *	Harvard caches, you need to implement this function.
207 *
208 *	- start  - virtual start address
209 *	- end	 - virtual end address
210 *
211 *	Note: single I-cache line invalidation isn't used here since
212 *	it also trashes the mini I-cache used by JTAG debuggers.
213 */
214ENTRY(xsc3_coherent_kern_range)
215/* FALLTHROUGH */
216ENTRY(xsc3_coherent_user_range)
217	bic	r0, r0, #CACHELINESIZE - 1
2181:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
219	add	r0, r0, #CACHELINESIZE
220	cmp	r0, r1
221	blo	1b
222	mov	r0, #0
223	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
224	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
225	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
226	mov	pc, lr
227
228/*
229 *	flush_kern_dcache_area(void *addr, size_t size)
230 *
231 *	Ensure no D cache aliasing occurs, either with itself or
232 *	the I cache.
233 *
234 *	- addr	- kernel address
235 *	- size	- region size
236 */
237ENTRY(xsc3_flush_kern_dcache_area)
238	add	r1, r0, r1
2391:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
240	add	r0, r0, #CACHELINESIZE
241	cmp	r0, r1
242	blo	1b
243	mov	r0, #0
244	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
245	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
246	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
247	mov	pc, lr
248
249/*
250 *	dma_inv_range(start, end)
251 *
252 *	Invalidate (discard) the specified virtual address range.
253 *	May not write back any entries.  If 'start' or 'end'
254 *	are not cache line aligned, those lines must be written
255 *	back.
256 *
257 *	- start  - virtual start address
258 *	- end	 - virtual end address
259 */
260xsc3_dma_inv_range:
261	tst	r0, #CACHELINESIZE - 1
262	bic	r0, r0, #CACHELINESIZE - 1
263	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D line
264	tst	r1, #CACHELINESIZE - 1
265	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D line
2661:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D line
267	add	r0, r0, #CACHELINESIZE
268	cmp	r0, r1
269	blo	1b
270	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
271	mov	pc, lr
272
273/*
274 *	dma_clean_range(start, end)
275 *
276 *	Clean the specified virtual address range.
277 *
278 *	- start  - virtual start address
279 *	- end	 - virtual end address
280 */
281xsc3_dma_clean_range:
282	bic	r0, r0, #CACHELINESIZE - 1
2831:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
284	add	r0, r0, #CACHELINESIZE
285	cmp	r0, r1
286	blo	1b
287	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
288	mov	pc, lr
289
290/*
291 *	dma_flush_range(start, end)
292 *
293 *	Clean and invalidate the specified virtual address range.
294 *
295 *	- start  - virtual start address
296 *	- end	 - virtual end address
297 */
298ENTRY(xsc3_dma_flush_range)
299	bic	r0, r0, #CACHELINESIZE - 1
3001:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
301	add	r0, r0, #CACHELINESIZE
302	cmp	r0, r1
303	blo	1b
304	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
305	mov	pc, lr
306
307/*
308 *	dma_map_area(start, size, dir)
309 *	- start	- kernel virtual start address
310 *	- size	- size of region
311 *	- dir	- DMA direction
312 */
313ENTRY(xsc3_dma_map_area)
314	add	r1, r1, r0
315	cmp	r2, #DMA_TO_DEVICE
316	beq	xsc3_dma_clean_range
317	bcs	xsc3_dma_inv_range
318	b	xsc3_dma_flush_range
319ENDPROC(xsc3_dma_map_area)
320
321/*
322 *	dma_unmap_area(start, size, dir)
323 *	- start	- kernel virtual start address
324 *	- size	- size of region
325 *	- dir	- DMA direction
326 */
327ENTRY(xsc3_dma_unmap_area)
328	mov	pc, lr
329ENDPROC(xsc3_dma_unmap_area)
330
331ENTRY(xsc3_cache_fns)
332	.long	xsc3_flush_kern_cache_all
333	.long	xsc3_flush_user_cache_all
334	.long	xsc3_flush_user_cache_range
335	.long	xsc3_coherent_kern_range
336	.long	xsc3_coherent_user_range
337	.long	xsc3_flush_kern_dcache_area
338	.long	xsc3_dma_map_area
339	.long	xsc3_dma_unmap_area
340	.long	xsc3_dma_flush_range
341
342ENTRY(cpu_xsc3_dcache_clean_area)
3431:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
344	add	r0, r0, #CACHELINESIZE
345	subs	r1, r1, #CACHELINESIZE
346	bhi	1b
347	mov	pc, lr
348
349/* =============================== PageTable ============================== */
350
351/*
352 * cpu_xsc3_switch_mm(pgd)
353 *
354 * Set the translation base pointer to be as described by pgd.
355 *
356 * pgd: new page tables
357 */
358	.align	5
359ENTRY(cpu_xsc3_switch_mm)
360	clean_d_cache r1, r2
361	mcr	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
362	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
363	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
364	orr	r0, r0, #0x18			@ cache the page table in L2
365	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
366	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
367	cpwait_ret lr, ip
368
369/*
370 * cpu_xsc3_set_pte_ext(ptep, pte, ext)
371 *
372 * Set a PTE and flush it out
373 */
374cpu_xsc3_mt_table:
375	.long	0x00						@ L_PTE_MT_UNCACHED
376	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
377	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE			@ L_PTE_MT_WRITETHROUGH
378	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
379	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
380	.long	0x00						@ unused
381	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
382	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
383	.long	0x00						@ unused
384	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
385	.long	0x00						@ unused
386	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
387	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
388	.long	0x00						@ unused
389	.long	0x00						@ unused
390	.long	0x00						@ unused
391
392	.align	5
393ENTRY(cpu_xsc3_set_pte_ext)
394	xscale_set_pte_ext_prologue
395
396	tst	r1, #L_PTE_SHARED		@ shared?
397	and	r1, r1, #L_PTE_MT_MASK
398	adr	ip, cpu_xsc3_mt_table
399	ldr	ip, [ip, r1]
400	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
401	bic	r2, r2, #0x0c			@ clear old C,B bits
402	orr	r2, r2, ip
403
404	xscale_set_pte_ext_epilogue
405	mov	pc, lr
406
407	.ltorg
408
409	.align
410
411	__INIT
412
413	.type	__xsc3_setup, #function
414__xsc3_setup:
415	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
416	msr	cpsr_c, r0
417	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
418	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
419	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
420	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
421	orr	r4, r4, #0x18			@ cache the page table in L2
422	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
423
424	mov	r0, #1 << 6			@ cp6 access for early sched_clock
425	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
426
427	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
428	and	r0, r0, #2			@ preserve bit P bit setting
429	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
430	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
431
432	adr	r5, xsc3_crval
433	ldmia	r5, {r5, r6}
434
435#ifdef CONFIG_CACHE_XSC3L2
436	mrc	p15, 1, r0, c0, c0, 1		@ get L2 present information
437	ands	r0, r0, #0xf8
438	orrne	r6, r6, #(1 << 26)		@ enable L2 if present
439#endif
440
441	mrc	p15, 0, r0, c1, c0, 0		@ get control register
442	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
443	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
444						@ ...I Z..S .... .... (uc)
445	mov	pc, lr
446
447	.size	__xsc3_setup, . - __xsc3_setup
448
449	.type	xsc3_crval, #object
450xsc3_crval:
451	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
452
453	__INITDATA
454
455/*
456 * Purpose : Function pointers used to access above functions - all calls
457 *	     come through these
458 */
459
460	.type	xsc3_processor_functions, #object
461ENTRY(xsc3_processor_functions)
462	.word	v5t_early_abort
463	.word	legacy_pabort
464	.word	cpu_xsc3_proc_init
465	.word	cpu_xsc3_proc_fin
466	.word	cpu_xsc3_reset
467	.word	cpu_xsc3_do_idle
468	.word	cpu_xsc3_dcache_clean_area
469	.word	cpu_xsc3_switch_mm
470	.word	cpu_xsc3_set_pte_ext
471	.size	xsc3_processor_functions, . - xsc3_processor_functions
472
473	.section ".rodata"
474
475	.type	cpu_arch_name, #object
476cpu_arch_name:
477	.asciz	"armv5te"
478	.size	cpu_arch_name, . - cpu_arch_name
479
480	.type	cpu_elf_name, #object
481cpu_elf_name:
482	.asciz	"v5"
483	.size	cpu_elf_name, . - cpu_elf_name
484
485	.type	cpu_xsc3_name, #object
486cpu_xsc3_name:
487	.asciz	"XScale-V3 based processor"
488	.size	cpu_xsc3_name, . - cpu_xsc3_name
489
490	.align
491
492	.section ".proc.info.init", #alloc, #execinstr
493
494	.type	__xsc3_proc_info,#object
495__xsc3_proc_info:
496	.long	0x69056000
497	.long	0xffffe000
498	.long	PMD_TYPE_SECT | \
499		PMD_SECT_BUFFERABLE | \
500		PMD_SECT_CACHEABLE | \
501		PMD_SECT_AP_WRITE | \
502		PMD_SECT_AP_READ
503	.long	PMD_TYPE_SECT | \
504		PMD_SECT_AP_WRITE | \
505		PMD_SECT_AP_READ
506	b	__xsc3_setup
507	.long	cpu_arch_name
508	.long	cpu_elf_name
509	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
510	.long	cpu_xsc3_name
511	.long	xsc3_processor_functions
512	.long	v4wbi_tlb_fns
513	.long	xsc3_mc_user_fns
514	.long	xsc3_cache_fns
515	.size	__xsc3_proc_info, . - __xsc3_proc_info
516
517/* Note: PXA935 changed its implementor ID from Intel to Marvell */
518
519	.type	__xsc3_pxa935_proc_info,#object
520__xsc3_pxa935_proc_info:
521	.long	0x56056000
522	.long	0xffffe000
523	.long	PMD_TYPE_SECT | \
524		PMD_SECT_BUFFERABLE | \
525		PMD_SECT_CACHEABLE | \
526		PMD_SECT_AP_WRITE | \
527		PMD_SECT_AP_READ
528	.long	PMD_TYPE_SECT | \
529		PMD_SECT_AP_WRITE | \
530		PMD_SECT_AP_READ
531	b	__xsc3_setup
532	.long	cpu_arch_name
533	.long	cpu_elf_name
534	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
535	.long	cpu_xsc3_name
536	.long	xsc3_processor_functions
537	.long	v4wbi_tlb_fns
538	.long	xsc3_mc_user_fns
539	.long	xsc3_cache_fns
540	.size	__xsc3_pxa935_proc_info, . - __xsc3_pxa935_proc_info
541