xref: /openbmc/linux/arch/arm/mm/cache-v7.S (revision c08e20d2)
1/*
2 *  linux/arch/arm/mm/cache-v7.S
3 *
4 *  Copyright (C) 2001 Deep Blue Solutions Ltd.
5 *  Copyright (C) 2005 ARM Ltd.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 *  This is the "shell" of the ARMv7 processor support.
12 */
13#include <linux/linkage.h>
14#include <linux/init.h>
15#include <asm/assembler.h>
16#include <asm/errno.h>
17#include <asm/unwind.h>
18
19#include "proc-macros.S"
20
21/*
22 * The secondary kernel init calls v7_flush_dcache_all before it enables
23 * the L1; however, the L1 comes out of reset in an undefined state, so
24 * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
25 * of cache lines with uninitialized data and uninitialized tags to get
26 * written out to memory, which does really unpleasant things to the main
27 * processor.  We fix this by performing an invalidate, rather than a
28 * clean + invalidate, before jumping into the kernel.
29 *
30 * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
31 * to be called for both secondary cores startup and primary core resume
32 * procedures.
33 */
34ENTRY(v7_invalidate_l1)
35       mov     r0, #0
36       mcr     p15, 2, r0, c0, c0, 0
37       mrc     p15, 1, r0, c0, c0, 0
38
39       ldr     r1, =0x7fff
40       and     r2, r1, r0, lsr #13
41
42       ldr     r1, =0x3ff
43
44       and     r3, r1, r0, lsr #3      @ NumWays - 1
45       add     r2, r2, #1              @ NumSets
46
47       and     r0, r0, #0x7
48       add     r0, r0, #4      @ SetShift
49
50       clz     r1, r3          @ WayShift
51       add     r4, r3, #1      @ NumWays
521:     sub     r2, r2, #1      @ NumSets--
53       mov     r3, r4          @ Temp = NumWays
542:     subs    r3, r3, #1      @ Temp--
55       mov     r5, r3, lsl r1
56       mov     r6, r2, lsl r0
57       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
58       mcr     p15, 0, r5, c7, c6, 2
59       bgt     2b
60       cmp     r2, #0
61       bgt     1b
62       dsb
63       isb
64       mov     pc, lr
65ENDPROC(v7_invalidate_l1)
66
67/*
68 *	v7_flush_icache_all()
69 *
70 *	Flush the whole I-cache.
71 *
72 *	Registers:
73 *	r0 - set to 0
74 */
75ENTRY(v7_flush_icache_all)
76	mov	r0, #0
77	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
78	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
79	mov	pc, lr
80ENDPROC(v7_flush_icache_all)
81
82 /*
83 *     v7_flush_dcache_louis()
84 *
85 *     Flush the D-cache up to the Level of Unification Inner Shareable
86 *
87 *     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
88 */
89
90ENTRY(v7_flush_dcache_louis)
91	dmb					@ ensure ordering with previous memory accesses
92	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
93	ALT_SMP(ands	r3, r0, #(7 << 21))	@ extract LoUIS from clidr
94	ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
95	ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
96	ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
97	moveq	pc, lr				@ return if level == 0
98	mov	r10, #0				@ r10 (starting level) = 0
99	b	flush_levels			@ start flushing cache levels
100ENDPROC(v7_flush_dcache_louis)
101
102/*
103 *	v7_flush_dcache_all()
104 *
105 *	Flush the whole D-cache.
106 *
107 *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
108 *
109 *	- mm    - mm_struct describing address space
110 */
111ENTRY(v7_flush_dcache_all)
112	dmb					@ ensure ordering with previous memory accesses
113	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
114	ands	r3, r0, #0x7000000		@ extract loc from clidr
115	mov	r3, r3, lsr #23			@ left align loc bit field
116	beq	finished			@ if loc is 0, then no need to clean
117	mov	r10, #0				@ start clean at cache level 0
118flush_levels:
119	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
120	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
121	and	r1, r1, #7			@ mask of the bits for current cache only
122	cmp	r1, #2				@ see what cache we have at this level
123	blt	skip				@ skip if no cache, or just i-cache
124#ifdef CONFIG_PREEMPT
125	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
126#endif
127	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
128	isb					@ isb to sych the new cssr&csidr
129	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
130#ifdef CONFIG_PREEMPT
131	restore_irqs_notrace r9
132#endif
133	and	r2, r1, #7			@ extract the length of the cache lines
134	add	r2, r2, #4			@ add 4 (line length offset)
135	ldr	r4, =0x3ff
136	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
137	clz	r5, r4				@ find bit position of way size increment
138	ldr	r7, =0x7fff
139	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
140loop1:
141	mov	r9, r4				@ create working copy of max way size
142loop2:
143 ARM(	orr	r11, r10, r9, lsl r5	)	@ factor way and cache number into r11
144 THUMB(	lsl	r6, r9, r5		)
145 THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
146 ARM(	orr	r11, r11, r7, lsl r2	)	@ factor index number into r11
147 THUMB(	lsl	r6, r7, r2		)
148 THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
149	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
150	subs	r9, r9, #1			@ decrement the way
151	bge	loop2
152	subs	r7, r7, #1			@ decrement the index
153	bge	loop1
154skip:
155	add	r10, r10, #2			@ increment cache number
156	cmp	r3, r10
157	bgt	flush_levels
158finished:
159	mov	r10, #0				@ swith back to cache level 0
160	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
161	dsb
162	isb
163	mov	pc, lr
164ENDPROC(v7_flush_dcache_all)
165
166/*
167 *	v7_flush_cache_all()
168 *
169 *	Flush the entire cache system.
170 *  The data cache flush is now achieved using atomic clean / invalidates
171 *  working outwards from L1 cache. This is done using Set/Way based cache
172 *  maintenance instructions.
173 *  The instruction cache can still be invalidated back to the point of
174 *  unification in a single instruction.
175 *
176 */
177ENTRY(v7_flush_kern_cache_all)
178 ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
179 THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
180	bl	v7_flush_dcache_all
181	mov	r0, #0
182	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
183	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
184 ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
185 THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
186	mov	pc, lr
187ENDPROC(v7_flush_kern_cache_all)
188
189 /*
190 *     v7_flush_kern_cache_louis(void)
191 *
192 *     Flush the data cache up to Level of Unification Inner Shareable.
193 *     Invalidate the I-cache to the point of unification.
194 */
195ENTRY(v7_flush_kern_cache_louis)
196 ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
197 THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
198	bl	v7_flush_dcache_louis
199	mov	r0, #0
200	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
201	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
202 ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
203 THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
204	mov	pc, lr
205ENDPROC(v7_flush_kern_cache_louis)
206
207/*
208 *	v7_flush_cache_all()
209 *
210 *	Flush all TLB entries in a particular address space
211 *
212 *	- mm    - mm_struct describing address space
213 */
214ENTRY(v7_flush_user_cache_all)
215	/*FALLTHROUGH*/
216
217/*
218 *	v7_flush_cache_range(start, end, flags)
219 *
220 *	Flush a range of TLB entries in the specified address space.
221 *
222 *	- start - start address (may not be aligned)
223 *	- end   - end address (exclusive, may not be aligned)
224 *	- flags	- vm_area_struct flags describing address space
225 *
226 *	It is assumed that:
227 *	- we have a VIPT cache.
228 */
229ENTRY(v7_flush_user_cache_range)
230	mov	pc, lr
231ENDPROC(v7_flush_user_cache_all)
232ENDPROC(v7_flush_user_cache_range)
233
234/*
235 *	v7_coherent_kern_range(start,end)
236 *
237 *	Ensure that the I and D caches are coherent within specified
238 *	region.  This is typically used when code has been written to
239 *	a memory region, and will be executed.
240 *
241 *	- start   - virtual start address of region
242 *	- end     - virtual end address of region
243 *
244 *	It is assumed that:
245 *	- the Icache does not read data from the write buffer
246 */
247ENTRY(v7_coherent_kern_range)
248	/* FALLTHROUGH */
249
250/*
251 *	v7_coherent_user_range(start,end)
252 *
253 *	Ensure that the I and D caches are coherent within specified
254 *	region.  This is typically used when code has been written to
255 *	a memory region, and will be executed.
256 *
257 *	- start   - virtual start address of region
258 *	- end     - virtual end address of region
259 *
260 *	It is assumed that:
261 *	- the Icache does not read data from the write buffer
262 */
263ENTRY(v7_coherent_user_range)
264 UNWIND(.fnstart		)
265	dcache_line_size r2, r3
266	sub	r3, r2, #1
267	bic	r12, r0, r3
268#ifdef CONFIG_ARM_ERRATA_764369
269	ALT_SMP(W(dsb))
270	ALT_UP(W(nop))
271#endif
2721:
273 USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
274	add	r12, r12, r2
275	cmp	r12, r1
276	blo	1b
277	dsb
278	icache_line_size r2, r3
279	sub	r3, r2, #1
280	bic	r12, r0, r3
2812:
282 USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
283	add	r12, r12, r2
284	cmp	r12, r1
285	blo	2b
286	mov	r0, #0
287	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
288	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
289	dsb
290	isb
291	mov	pc, lr
292
293/*
294 * Fault handling for the cache operation above. If the virtual address in r0
295 * isn't mapped, fail with -EFAULT.
296 */
2979001:
298#ifdef CONFIG_ARM_ERRATA_775420
299	dsb
300#endif
301	mov	r0, #-EFAULT
302	mov	pc, lr
303 UNWIND(.fnend		)
304ENDPROC(v7_coherent_kern_range)
305ENDPROC(v7_coherent_user_range)
306
307/*
308 *	v7_flush_kern_dcache_area(void *addr, size_t size)
309 *
310 *	Ensure that the data held in the page kaddr is written back
311 *	to the page in question.
312 *
313 *	- addr	- kernel address
314 *	- size	- region size
315 */
316ENTRY(v7_flush_kern_dcache_area)
317	dcache_line_size r2, r3
318	add	r1, r0, r1
319	sub	r3, r2, #1
320	bic	r0, r0, r3
321#ifdef CONFIG_ARM_ERRATA_764369
322	ALT_SMP(W(dsb))
323	ALT_UP(W(nop))
324#endif
3251:
326	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line / unified line
327	add	r0, r0, r2
328	cmp	r0, r1
329	blo	1b
330	dsb
331	mov	pc, lr
332ENDPROC(v7_flush_kern_dcache_area)
333
334/*
335 *	v7_dma_inv_range(start,end)
336 *
337 *	Invalidate the data cache within the specified region; we will
338 *	be performing a DMA operation in this region and we want to
339 *	purge old data in the cache.
340 *
341 *	- start   - virtual start address of region
342 *	- end     - virtual end address of region
343 */
344v7_dma_inv_range:
345	dcache_line_size r2, r3
346	sub	r3, r2, #1
347	tst	r0, r3
348	bic	r0, r0, r3
349#ifdef CONFIG_ARM_ERRATA_764369
350	ALT_SMP(W(dsb))
351	ALT_UP(W(nop))
352#endif
353	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
354
355	tst	r1, r3
356	bic	r1, r1, r3
357	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D / U line
3581:
359	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
360	add	r0, r0, r2
361	cmp	r0, r1
362	blo	1b
363	dsb
364	mov	pc, lr
365ENDPROC(v7_dma_inv_range)
366
367/*
368 *	v7_dma_clean_range(start,end)
369 *	- start   - virtual start address of region
370 *	- end     - virtual end address of region
371 */
372v7_dma_clean_range:
373	dcache_line_size r2, r3
374	sub	r3, r2, #1
375	bic	r0, r0, r3
376#ifdef CONFIG_ARM_ERRATA_764369
377	ALT_SMP(W(dsb))
378	ALT_UP(W(nop))
379#endif
3801:
381	mcr	p15, 0, r0, c7, c10, 1		@ clean D / U line
382	add	r0, r0, r2
383	cmp	r0, r1
384	blo	1b
385	dsb
386	mov	pc, lr
387ENDPROC(v7_dma_clean_range)
388
389/*
390 *	v7_dma_flush_range(start,end)
391 *	- start   - virtual start address of region
392 *	- end     - virtual end address of region
393 */
394ENTRY(v7_dma_flush_range)
395	dcache_line_size r2, r3
396	sub	r3, r2, #1
397	bic	r0, r0, r3
398#ifdef CONFIG_ARM_ERRATA_764369
399	ALT_SMP(W(dsb))
400	ALT_UP(W(nop))
401#endif
4021:
403	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
404	add	r0, r0, r2
405	cmp	r0, r1
406	blo	1b
407	dsb
408	mov	pc, lr
409ENDPROC(v7_dma_flush_range)
410
411/*
412 *	dma_map_area(start, size, dir)
413 *	- start	- kernel virtual start address
414 *	- size	- size of region
415 *	- dir	- DMA direction
416 */
417ENTRY(v7_dma_map_area)
418	add	r1, r1, r0
419	teq	r2, #DMA_FROM_DEVICE
420	beq	v7_dma_inv_range
421	b	v7_dma_clean_range
422ENDPROC(v7_dma_map_area)
423
424/*
425 *	dma_unmap_area(start, size, dir)
426 *	- start	- kernel virtual start address
427 *	- size	- size of region
428 *	- dir	- DMA direction
429 */
430ENTRY(v7_dma_unmap_area)
431	add	r1, r1, r0
432	teq	r2, #DMA_TO_DEVICE
433	bne	v7_dma_inv_range
434	mov	pc, lr
435ENDPROC(v7_dma_unmap_area)
436
437	__INITDATA
438
439	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
440	define_cache_functions v7
441