xref: /openbmc/linux/arch/arm/mm/cache-l2x0.c (revision 3a43b581)
1 /*
2  * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
3  *
4  * Copyright (C) 2007 ARM Limited
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #include <linux/err.h>
20 #include <linux/init.h>
21 #include <linux/spinlock.h>
22 #include <linux/io.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 
26 #include <asm/cacheflush.h>
27 #include <asm/cputype.h>
28 #include <asm/hardware/cache-l2x0.h>
29 #include "cache-tauros3.h"
30 #include "cache-aurora-l2.h"
31 
32 struct l2c_init_data {
33 	const char *type;
34 	unsigned way_size_0;
35 	unsigned num_lock;
36 	void (*of_parse)(const struct device_node *, u32 *, u32 *);
37 	void (*enable)(void __iomem *, u32, unsigned);
38 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
39 	void (*save)(void __iomem *);
40 	struct outer_cache_fns outer_cache;
41 };
42 
43 #define CACHE_LINE_SIZE		32
44 
45 static void __iomem *l2x0_base;
46 static DEFINE_RAW_SPINLOCK(l2x0_lock);
47 static u32 l2x0_way_mask;	/* Bitmask of active ways */
48 static u32 l2x0_size;
49 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
50 
51 struct l2x0_regs l2x0_saved_regs;
52 
53 /*
54  * Common code for all cache controllers.
55  */
56 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
57 {
58 	/* wait for cache operation by line or way to complete */
59 	while (readl_relaxed(reg) & mask)
60 		cpu_relax();
61 }
62 
63 /*
64  * By default, we write directly to secure registers.  Platforms must
65  * override this if they are running non-secure.
66  */
67 static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
68 {
69 	if (val == readl_relaxed(base + reg))
70 		return;
71 	if (outer_cache.write_sec)
72 		outer_cache.write_sec(val, reg);
73 	else
74 		writel_relaxed(val, base + reg);
75 }
76 
77 /*
78  * This should only be called when we have a requirement that the
79  * register be written due to a work-around, as platforms running
80  * in non-secure mode may not be able to access this register.
81  */
82 static inline void l2c_set_debug(void __iomem *base, unsigned long val)
83 {
84 	if (outer_cache.set_debug)
85 		outer_cache.set_debug(val);
86 	else
87 		l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
88 }
89 
90 static void __l2c_op_way(void __iomem *reg)
91 {
92 	writel_relaxed(l2x0_way_mask, reg);
93 	l2c_wait_mask(reg, l2x0_way_mask);
94 }
95 
96 static inline void l2c_unlock(void __iomem *base, unsigned num)
97 {
98 	unsigned i;
99 
100 	for (i = 0; i < num; i++) {
101 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
102 			       i * L2X0_LOCKDOWN_STRIDE);
103 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
104 			       i * L2X0_LOCKDOWN_STRIDE);
105 	}
106 }
107 
108 /*
109  * Enable the L2 cache controller.  This function must only be
110  * called when the cache controller is known to be disabled.
111  */
112 static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
113 {
114 	unsigned long flags;
115 
116 	l2c_write_sec(aux, base, L2X0_AUX_CTRL);
117 
118 	l2c_unlock(base, num_lock);
119 
120 	local_irq_save(flags);
121 	__l2c_op_way(base + L2X0_INV_WAY);
122 	writel_relaxed(0, base + sync_reg_offset);
123 	l2c_wait_mask(base + sync_reg_offset, 1);
124 	local_irq_restore(flags);
125 
126 	l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
127 }
128 
129 static void l2c_disable(void)
130 {
131 	void __iomem *base = l2x0_base;
132 
133 	outer_cache.flush_all();
134 	l2c_write_sec(0, base, L2X0_CTRL);
135 	dsb(st);
136 }
137 
138 #ifdef CONFIG_CACHE_PL310
139 static inline void cache_wait(void __iomem *reg, unsigned long mask)
140 {
141 	/* cache operations by line are atomic on PL310 */
142 }
143 #else
144 #define cache_wait	l2c_wait_mask
145 #endif
146 
147 static inline void cache_sync(void)
148 {
149 	void __iomem *base = l2x0_base;
150 
151 	writel_relaxed(0, base + sync_reg_offset);
152 	cache_wait(base + L2X0_CACHE_SYNC, 1);
153 }
154 
155 #if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
156 static inline void debug_writel(unsigned long val)
157 {
158 	if (outer_cache.set_debug || outer_cache.write_sec)
159 		l2c_set_debug(l2x0_base, val);
160 }
161 #else
162 /* Optimised out for non-errata case */
163 static inline void debug_writel(unsigned long val)
164 {
165 }
166 #endif
167 
168 static void l2x0_cache_sync(void)
169 {
170 	unsigned long flags;
171 
172 	raw_spin_lock_irqsave(&l2x0_lock, flags);
173 	cache_sync();
174 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
175 }
176 
177 static void __l2x0_flush_all(void)
178 {
179 	debug_writel(0x03);
180 	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
181 	cache_sync();
182 	debug_writel(0x00);
183 }
184 
185 static void l2x0_flush_all(void)
186 {
187 	unsigned long flags;
188 
189 	/* clean all ways */
190 	raw_spin_lock_irqsave(&l2x0_lock, flags);
191 	__l2x0_flush_all();
192 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
193 }
194 
195 static void l2x0_disable(void)
196 {
197 	unsigned long flags;
198 
199 	raw_spin_lock_irqsave(&l2x0_lock, flags);
200 	__l2x0_flush_all();
201 	l2c_write_sec(0, l2x0_base, L2X0_CTRL);
202 	dsb(st);
203 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
204 }
205 
206 static void l2c_save(void __iomem *base)
207 {
208 	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
209 }
210 
211 /*
212  * L2C-210 specific code.
213  *
214  * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
215  * ensure that no background operation is running.  The way operations
216  * are all background tasks.
217  *
218  * While a background operation is in progress, any new operation is
219  * ignored (unspecified whether this causes an error.)  Thankfully, not
220  * used on SMP.
221  *
222  * Never has a different sync register other than L2X0_CACHE_SYNC, but
223  * we use sync_reg_offset here so we can share some of this with L2C-310.
224  */
225 static void __l2c210_cache_sync(void __iomem *base)
226 {
227 	writel_relaxed(0, base + sync_reg_offset);
228 }
229 
230 static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
231 	unsigned long end)
232 {
233 	while (start < end) {
234 		writel_relaxed(start, reg);
235 		start += CACHE_LINE_SIZE;
236 	}
237 }
238 
239 static void l2c210_inv_range(unsigned long start, unsigned long end)
240 {
241 	void __iomem *base = l2x0_base;
242 
243 	if (start & (CACHE_LINE_SIZE - 1)) {
244 		start &= ~(CACHE_LINE_SIZE - 1);
245 		writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
246 		start += CACHE_LINE_SIZE;
247 	}
248 
249 	if (end & (CACHE_LINE_SIZE - 1)) {
250 		end &= ~(CACHE_LINE_SIZE - 1);
251 		writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
252 	}
253 
254 	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
255 	__l2c210_cache_sync(base);
256 }
257 
258 static void l2c210_clean_range(unsigned long start, unsigned long end)
259 {
260 	void __iomem *base = l2x0_base;
261 
262 	start &= ~(CACHE_LINE_SIZE - 1);
263 	__l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
264 	__l2c210_cache_sync(base);
265 }
266 
267 static void l2c210_flush_range(unsigned long start, unsigned long end)
268 {
269 	void __iomem *base = l2x0_base;
270 
271 	start &= ~(CACHE_LINE_SIZE - 1);
272 	__l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
273 	__l2c210_cache_sync(base);
274 }
275 
276 static void l2c210_flush_all(void)
277 {
278 	void __iomem *base = l2x0_base;
279 
280 	BUG_ON(!irqs_disabled());
281 
282 	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
283 	__l2c210_cache_sync(base);
284 }
285 
286 static void l2c210_sync(void)
287 {
288 	__l2c210_cache_sync(l2x0_base);
289 }
290 
291 static void l2c210_resume(void)
292 {
293 	void __iomem *base = l2x0_base;
294 
295 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
296 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
297 }
298 
299 static const struct l2c_init_data l2c210_data __initconst = {
300 	.type = "L2C-210",
301 	.way_size_0 = SZ_8K,
302 	.num_lock = 1,
303 	.enable = l2c_enable,
304 	.save = l2c_save,
305 	.outer_cache = {
306 		.inv_range = l2c210_inv_range,
307 		.clean_range = l2c210_clean_range,
308 		.flush_range = l2c210_flush_range,
309 		.flush_all = l2c210_flush_all,
310 		.disable = l2c_disable,
311 		.sync = l2c210_sync,
312 		.resume = l2c210_resume,
313 	},
314 };
315 
316 /*
317  * L2C-220 specific code.
318  *
319  * All operations are background operations: they have to be waited for.
320  * Conflicting requests generate a slave error (which will cause an
321  * imprecise abort.)  Never uses sync_reg_offset, so we hard-code the
322  * sync register here.
323  *
324  * However, we can re-use the l2c210_resume call.
325  */
326 static inline void __l2c220_cache_sync(void __iomem *base)
327 {
328 	writel_relaxed(0, base + L2X0_CACHE_SYNC);
329 	l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
330 }
331 
332 static void l2c220_op_way(void __iomem *base, unsigned reg)
333 {
334 	unsigned long flags;
335 
336 	raw_spin_lock_irqsave(&l2x0_lock, flags);
337 	__l2c_op_way(base + reg);
338 	__l2c220_cache_sync(base);
339 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
340 }
341 
342 static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
343 	unsigned long end, unsigned long flags)
344 {
345 	raw_spinlock_t *lock = &l2x0_lock;
346 
347 	while (start < end) {
348 		unsigned long blk_end = start + min(end - start, 4096UL);
349 
350 		while (start < blk_end) {
351 			l2c_wait_mask(reg, 1);
352 			writel_relaxed(start, reg);
353 			start += CACHE_LINE_SIZE;
354 		}
355 
356 		if (blk_end < end) {
357 			raw_spin_unlock_irqrestore(lock, flags);
358 			raw_spin_lock_irqsave(lock, flags);
359 		}
360 	}
361 
362 	return flags;
363 }
364 
365 static void l2c220_inv_range(unsigned long start, unsigned long end)
366 {
367 	void __iomem *base = l2x0_base;
368 	unsigned long flags;
369 
370 	raw_spin_lock_irqsave(&l2x0_lock, flags);
371 	if ((start | end) & (CACHE_LINE_SIZE - 1)) {
372 		if (start & (CACHE_LINE_SIZE - 1)) {
373 			start &= ~(CACHE_LINE_SIZE - 1);
374 			writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
375 			start += CACHE_LINE_SIZE;
376 		}
377 
378 		if (end & (CACHE_LINE_SIZE - 1)) {
379 			end &= ~(CACHE_LINE_SIZE - 1);
380 			l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
381 			writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
382 		}
383 	}
384 
385 	flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
386 				   start, end, flags);
387 	l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
388 	__l2c220_cache_sync(base);
389 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
390 }
391 
392 static void l2c220_clean_range(unsigned long start, unsigned long end)
393 {
394 	void __iomem *base = l2x0_base;
395 	unsigned long flags;
396 
397 	start &= ~(CACHE_LINE_SIZE - 1);
398 	if ((end - start) >= l2x0_size) {
399 		l2c220_op_way(base, L2X0_CLEAN_WAY);
400 		return;
401 	}
402 
403 	raw_spin_lock_irqsave(&l2x0_lock, flags);
404 	flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
405 				   start, end, flags);
406 	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
407 	__l2c220_cache_sync(base);
408 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
409 }
410 
411 static void l2c220_flush_range(unsigned long start, unsigned long end)
412 {
413 	void __iomem *base = l2x0_base;
414 	unsigned long flags;
415 
416 	start &= ~(CACHE_LINE_SIZE - 1);
417 	if ((end - start) >= l2x0_size) {
418 		l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
419 		return;
420 	}
421 
422 	raw_spin_lock_irqsave(&l2x0_lock, flags);
423 	flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
424 				   start, end, flags);
425 	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
426 	__l2c220_cache_sync(base);
427 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
428 }
429 
430 static void l2c220_flush_all(void)
431 {
432 	l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
433 }
434 
435 static void l2c220_sync(void)
436 {
437 	unsigned long flags;
438 
439 	raw_spin_lock_irqsave(&l2x0_lock, flags);
440 	__l2c220_cache_sync(l2x0_base);
441 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
442 }
443 
444 static const struct l2c_init_data l2c220_data = {
445 	.type = "L2C-220",
446 	.way_size_0 = SZ_8K,
447 	.num_lock = 1,
448 	.enable = l2c_enable,
449 	.save = l2c_save,
450 	.outer_cache = {
451 		.inv_range = l2c220_inv_range,
452 		.clean_range = l2c220_clean_range,
453 		.flush_range = l2c220_flush_range,
454 		.flush_all = l2c220_flush_all,
455 		.disable = l2c_disable,
456 		.sync = l2c220_sync,
457 		.resume = l2c210_resume,
458 	},
459 };
460 
461 /*
462  * L2C-310 specific code.
463  *
464  * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
465  * and the way operations are all background tasks.  However, issuing an
466  * operation while a background operation is in progress results in a
467  * SLVERR response.  We can reuse:
468  *
469  *  __l2c210_cache_sync (using sync_reg_offset)
470  *  l2c210_sync
471  *  l2c210_inv_range (if 588369 is not applicable)
472  *  l2c210_clean_range
473  *  l2c210_flush_range (if 588369 is not applicable)
474  *  l2c210_flush_all (if 727915 is not applicable)
475  *
476  * Errata:
477  * 588369: PL310 R0P0->R1P0, fixed R2P0.
478  *	Affects: all clean+invalidate operations
479  *	clean and invalidate skips the invalidate step, so we need to issue
480  *	separate operations.  We also require the above debug workaround
481  *	enclosing this code fragment on affected parts.  On unaffected parts,
482  *	we must not use this workaround without the debug register writes
483  *	to avoid exposing a problem similar to 727915.
484  *
485  * 727915: PL310 R2P0->R3P0, fixed R3P1.
486  *	Affects: clean+invalidate by way
487  *	clean and invalidate by way runs in the background, and a store can
488  *	hit the line between the clean operation and invalidate operation,
489  *	resulting in the store being lost.
490  *
491  * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
492  *	Affects: 8x64-bit (double fill) line fetches
493  *	double fill line fetches can fail to cause dirty data to be evicted
494  *	from the cache before the new data overwrites the second line.
495  *
496  * 753970: PL310 R3P0, fixed R3P1.
497  *	Affects: sync
498  *	prevents merging writes after the sync operation, until another L2C
499  *	operation is performed (or a number of other conditions.)
500  *
501  * 769419: PL310 R0P0->R3P1, fixed R3P2.
502  *	Affects: store buffer
503  *	store buffer is not automatically drained.
504  */
505 static void l2c310_set_debug(unsigned long val)
506 {
507 	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
508 }
509 
510 static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
511 {
512 	void __iomem *base = l2x0_base;
513 
514 	if ((start | end) & (CACHE_LINE_SIZE - 1)) {
515 		unsigned long flags;
516 
517 		/* Erratum 588369 for both clean+invalidate operations */
518 		raw_spin_lock_irqsave(&l2x0_lock, flags);
519 		l2c_set_debug(base, 0x03);
520 
521 		if (start & (CACHE_LINE_SIZE - 1)) {
522 			start &= ~(CACHE_LINE_SIZE - 1);
523 			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
524 			writel_relaxed(start, base + L2X0_INV_LINE_PA);
525 			start += CACHE_LINE_SIZE;
526 		}
527 
528 		if (end & (CACHE_LINE_SIZE - 1)) {
529 			end &= ~(CACHE_LINE_SIZE - 1);
530 			writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
531 			writel_relaxed(end, base + L2X0_INV_LINE_PA);
532 		}
533 
534 		l2c_set_debug(base, 0x00);
535 		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
536 	}
537 
538 	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
539 	__l2c210_cache_sync(base);
540 }
541 
542 static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
543 {
544 	raw_spinlock_t *lock = &l2x0_lock;
545 	unsigned long flags;
546 	void __iomem *base = l2x0_base;
547 
548 	raw_spin_lock_irqsave(lock, flags);
549 	while (start < end) {
550 		unsigned long blk_end = start + min(end - start, 4096UL);
551 
552 		l2c_set_debug(base, 0x03);
553 		while (start < blk_end) {
554 			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
555 			writel_relaxed(start, base + L2X0_INV_LINE_PA);
556 			start += CACHE_LINE_SIZE;
557 		}
558 		l2c_set_debug(base, 0x00);
559 
560 		if (blk_end < end) {
561 			raw_spin_unlock_irqrestore(lock, flags);
562 			raw_spin_lock_irqsave(lock, flags);
563 		}
564 	}
565 	raw_spin_unlock_irqrestore(lock, flags);
566 	__l2c210_cache_sync(base);
567 }
568 
569 static void l2c310_flush_all_erratum(void)
570 {
571 	void __iomem *base = l2x0_base;
572 	unsigned long flags;
573 
574 	raw_spin_lock_irqsave(&l2x0_lock, flags);
575 	l2c_set_debug(base, 0x03);
576 	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
577 	l2c_set_debug(base, 0x00);
578 	__l2c210_cache_sync(base);
579 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
580 }
581 
582 static void __init l2c310_save(void __iomem *base)
583 {
584 	unsigned revision;
585 
586 	l2c_save(base);
587 
588 	l2x0_saved_regs.tag_latency = readl_relaxed(base +
589 		L310_TAG_LATENCY_CTRL);
590 	l2x0_saved_regs.data_latency = readl_relaxed(base +
591 		L310_DATA_LATENCY_CTRL);
592 	l2x0_saved_regs.filter_end = readl_relaxed(base +
593 		L310_ADDR_FILTER_END);
594 	l2x0_saved_regs.filter_start = readl_relaxed(base +
595 		L310_ADDR_FILTER_START);
596 
597 	revision = readl_relaxed(base + L2X0_CACHE_ID) &
598 			L2X0_CACHE_ID_RTL_MASK;
599 
600 	/* From r2p0, there is Prefetch offset/control register */
601 	if (revision >= L310_CACHE_ID_RTL_R2P0)
602 		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
603 							L310_PREFETCH_CTRL);
604 
605 	/* From r3p0, there is Power control register */
606 	if (revision >= L310_CACHE_ID_RTL_R3P0)
607 		l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
608 							L310_POWER_CTRL);
609 }
610 
611 static void l2c310_resume(void)
612 {
613 	void __iomem *base = l2x0_base;
614 
615 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
616 		unsigned revision;
617 
618 		/* restore pl310 setup */
619 		writel_relaxed(l2x0_saved_regs.tag_latency,
620 			       base + L310_TAG_LATENCY_CTRL);
621 		writel_relaxed(l2x0_saved_regs.data_latency,
622 			       base + L310_DATA_LATENCY_CTRL);
623 		writel_relaxed(l2x0_saved_regs.filter_end,
624 			       base + L310_ADDR_FILTER_END);
625 		writel_relaxed(l2x0_saved_regs.filter_start,
626 			       base + L310_ADDR_FILTER_START);
627 
628 		revision = readl_relaxed(base + L2X0_CACHE_ID) &
629 				L2X0_CACHE_ID_RTL_MASK;
630 
631 		if (revision >= L310_CACHE_ID_RTL_R2P0)
632 			l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
633 				      L310_PREFETCH_CTRL);
634 		if (revision >= L310_CACHE_ID_RTL_R3P0)
635 			l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
636 				      L310_POWER_CTRL);
637 
638 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
639 	}
640 }
641 
642 static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
643 {
644 	unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK;
645 	bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
646 
647 	if (rev >= L310_CACHE_ID_RTL_R2P0) {
648 		if (cortex_a9) {
649 			aux |= L310_AUX_CTRL_EARLY_BRESP;
650 			pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
651 		} else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
652 			pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
653 			aux &= ~L310_AUX_CTRL_EARLY_BRESP;
654 		}
655 	}
656 
657 	/* r3p0 or later has power control register */
658 	if (rev >= L310_CACHE_ID_RTL_R3P0) {
659 		u32 power_ctrl;
660 
661 		l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
662 			      base, L310_POWER_CTRL);
663 		power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
664 		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
665 			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
666 			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
667 	}
668 
669 	l2c_enable(base, aux, num_lock);
670 }
671 
672 static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
673 	struct outer_cache_fns *fns)
674 {
675 	unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
676 	const char *errata[8];
677 	unsigned n = 0;
678 
679 	/* For compatibility */
680 	if (revision <= L310_CACHE_ID_RTL_R3P0)
681 		fns->set_debug = l2c310_set_debug;
682 
683 	if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
684 	    revision < L310_CACHE_ID_RTL_R2P0 &&
685 	    /* For bcm compatibility */
686 	    fns->inv_range == l2c210_inv_range) {
687 		fns->inv_range = l2c310_inv_range_erratum;
688 		fns->flush_range = l2c310_flush_range_erratum;
689 		errata[n++] = "588369";
690 	}
691 
692 	if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
693 	    revision >= L310_CACHE_ID_RTL_R2P0 &&
694 	    revision < L310_CACHE_ID_RTL_R3P1) {
695 		fns->flush_all = l2c310_flush_all_erratum;
696 		errata[n++] = "727915";
697 	}
698 
699 	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
700 	    revision < L310_CACHE_ID_RTL_R3P2) {
701 		u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
702 		/* I don't think bit23 is required here... but iMX6 does so */
703 		if (val & (BIT(30) | BIT(23))) {
704 			val &= ~(BIT(30) | BIT(23));
705 			l2c_write_sec(val, base, L310_PREFETCH_CTRL);
706 			errata[n++] = "752271";
707 		}
708 	}
709 
710 	if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
711 	    revision == L310_CACHE_ID_RTL_R3P0) {
712 		sync_reg_offset = L2X0_DUMMY_REG;
713 		errata[n++] = "753970";
714 	}
715 
716 	if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
717 		errata[n++] = "769419";
718 
719 	if (n) {
720 		unsigned i;
721 
722 		pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
723 		for (i = 0; i < n; i++)
724 			pr_cont(" %s", errata[i]);
725 		pr_cont(" enabled\n");
726 	}
727 }
728 
729 static const struct l2c_init_data l2c310_init_fns __initconst = {
730 	.type = "L2C-310",
731 	.way_size_0 = SZ_8K,
732 	.num_lock = 8,
733 	.enable = l2c310_enable,
734 	.fixup = l2c310_fixup,
735 	.save = l2c310_save,
736 	.outer_cache = {
737 		.inv_range = l2c210_inv_range,
738 		.clean_range = l2c210_clean_range,
739 		.flush_range = l2c210_flush_range,
740 		.flush_all = l2c210_flush_all,
741 		.disable = l2c_disable,
742 		.sync = l2c210_sync,
743 		.set_debug = l2c310_set_debug,
744 		.resume = l2c310_resume,
745 	},
746 };
747 
748 static void __init __l2c_init(const struct l2c_init_data *data,
749 	u32 aux_val, u32 aux_mask, u32 cache_id)
750 {
751 	struct outer_cache_fns fns;
752 	unsigned way_size_bits, ways;
753 	u32 aux;
754 
755 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
756 
757 	aux &= aux_mask;
758 	aux |= aux_val;
759 
760 	/* Determine the number of ways */
761 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
762 	case L2X0_CACHE_ID_PART_L310:
763 		if (aux & (1 << 16))
764 			ways = 16;
765 		else
766 			ways = 8;
767 		break;
768 
769 	case L2X0_CACHE_ID_PART_L210:
770 	case L2X0_CACHE_ID_PART_L220:
771 		ways = (aux >> 13) & 0xf;
772 		break;
773 
774 	case AURORA_CACHE_ID:
775 		ways = (aux >> 13) & 0xf;
776 		ways = 2 << ((ways + 1) >> 2);
777 		break;
778 
779 	default:
780 		/* Assume unknown chips have 8 ways */
781 		ways = 8;
782 		break;
783 	}
784 
785 	l2x0_way_mask = (1 << ways) - 1;
786 
787 	/*
788 	 * way_size_0 is the size that a way_size value of zero would be
789 	 * given the calculation: way_size = way_size_0 << way_size_bits.
790 	 * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
791 	 * then way_size_0 would be 8k.
792 	 *
793 	 * L2 cache size = number of ways * way size.
794 	 */
795 	way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
796 			L2C_AUX_CTRL_WAY_SIZE_SHIFT;
797 	l2x0_size = ways * (data->way_size_0 << way_size_bits);
798 
799 	fns = data->outer_cache;
800 	fns.write_sec = outer_cache.write_sec;
801 	if (data->fixup)
802 		data->fixup(l2x0_base, cache_id, &fns);
803 	if (fns.write_sec)
804 		fns.set_debug = NULL;
805 
806 	/*
807 	 * Check if l2x0 controller is already enabled.  If we are booting
808 	 * in non-secure mode accessing the below registers will fault.
809 	 */
810 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
811 		data->enable(l2x0_base, aux, data->num_lock);
812 
813 	outer_cache = fns;
814 
815 	/*
816 	 * It is strange to save the register state before initialisation,
817 	 * but hey, this is what the DT implementations decided to do.
818 	 */
819 	if (data->save)
820 		data->save(l2x0_base);
821 
822 	/* Re-read it in case some bits are reserved. */
823 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
824 
825 	pr_info("%s cache controller enabled, %d ways, %d kB\n",
826 		data->type, ways, l2x0_size >> 10);
827 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
828 		data->type, cache_id, aux);
829 }
830 
831 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
832 {
833 	const struct l2c_init_data *data;
834 	u32 cache_id;
835 
836 	l2x0_base = base;
837 
838 	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
839 
840 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
841 	default:
842 	case L2X0_CACHE_ID_PART_L210:
843 		data = &l2c210_data;
844 		break;
845 
846 	case L2X0_CACHE_ID_PART_L220:
847 		data = &l2c220_data;
848 		break;
849 
850 	case L2X0_CACHE_ID_PART_L310:
851 		data = &l2c310_init_fns;
852 		break;
853 	}
854 
855 	__l2c_init(data, aux_val, aux_mask, cache_id);
856 }
857 
858 #ifdef CONFIG_OF
859 static int l2_wt_override;
860 
861 /* Aurora don't have the cache ID register available, so we have to
862  * pass it though the device tree */
863 static u32 cache_id_part_number_from_dt;
864 
865 static void __init l2x0_of_parse(const struct device_node *np,
866 				 u32 *aux_val, u32 *aux_mask)
867 {
868 	u32 data[2] = { 0, 0 };
869 	u32 tag = 0;
870 	u32 dirty = 0;
871 	u32 val = 0, mask = 0;
872 
873 	of_property_read_u32(np, "arm,tag-latency", &tag);
874 	if (tag) {
875 		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
876 		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
877 	}
878 
879 	of_property_read_u32_array(np, "arm,data-latency",
880 				   data, ARRAY_SIZE(data));
881 	if (data[0] && data[1]) {
882 		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
883 			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
884 		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
885 		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
886 	}
887 
888 	of_property_read_u32(np, "arm,dirty-latency", &dirty);
889 	if (dirty) {
890 		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
891 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
892 	}
893 
894 	*aux_val &= ~mask;
895 	*aux_val |= val;
896 	*aux_mask &= ~mask;
897 }
898 
899 static const struct l2c_init_data of_l2c210_data __initconst = {
900 	.type = "L2C-210",
901 	.way_size_0 = SZ_8K,
902 	.num_lock = 1,
903 	.of_parse = l2x0_of_parse,
904 	.enable = l2c_enable,
905 	.save = l2c_save,
906 	.outer_cache = {
907 		.inv_range   = l2c210_inv_range,
908 		.clean_range = l2c210_clean_range,
909 		.flush_range = l2c210_flush_range,
910 		.flush_all   = l2c210_flush_all,
911 		.disable     = l2c_disable,
912 		.sync        = l2c210_sync,
913 		.resume      = l2c210_resume,
914 	},
915 };
916 
917 static const struct l2c_init_data of_l2c220_data __initconst = {
918 	.type = "L2C-220",
919 	.way_size_0 = SZ_8K,
920 	.num_lock = 1,
921 	.of_parse = l2x0_of_parse,
922 	.enable = l2c_enable,
923 	.save = l2c_save,
924 	.outer_cache = {
925 		.inv_range   = l2c220_inv_range,
926 		.clean_range = l2c220_clean_range,
927 		.flush_range = l2c220_flush_range,
928 		.flush_all   = l2c220_flush_all,
929 		.disable     = l2c_disable,
930 		.sync        = l2c220_sync,
931 		.resume      = l2c210_resume,
932 	},
933 };
934 
935 static void __init l2c310_of_parse(const struct device_node *np,
936 	u32 *aux_val, u32 *aux_mask)
937 {
938 	u32 data[3] = { 0, 0, 0 };
939 	u32 tag[3] = { 0, 0, 0 };
940 	u32 filter[2] = { 0, 0 };
941 
942 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
943 	if (tag[0] && tag[1] && tag[2])
944 		writel_relaxed(
945 			L310_LATENCY_CTRL_RD(tag[0] - 1) |
946 			L310_LATENCY_CTRL_WR(tag[1] - 1) |
947 			L310_LATENCY_CTRL_SETUP(tag[2] - 1),
948 			l2x0_base + L310_TAG_LATENCY_CTRL);
949 
950 	of_property_read_u32_array(np, "arm,data-latency",
951 				   data, ARRAY_SIZE(data));
952 	if (data[0] && data[1] && data[2])
953 		writel_relaxed(
954 			L310_LATENCY_CTRL_RD(data[0] - 1) |
955 			L310_LATENCY_CTRL_WR(data[1] - 1) |
956 			L310_LATENCY_CTRL_SETUP(data[2] - 1),
957 			l2x0_base + L310_DATA_LATENCY_CTRL);
958 
959 	of_property_read_u32_array(np, "arm,filter-ranges",
960 				   filter, ARRAY_SIZE(filter));
961 	if (filter[1]) {
962 		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
963 			       l2x0_base + L310_ADDR_FILTER_END);
964 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
965 			       l2x0_base + L310_ADDR_FILTER_START);
966 	}
967 }
968 
969 static const struct l2c_init_data of_l2c310_data __initconst = {
970 	.type = "L2C-310",
971 	.way_size_0 = SZ_8K,
972 	.num_lock = 8,
973 	.of_parse = l2c310_of_parse,
974 	.enable = l2c310_enable,
975 	.fixup = l2c310_fixup,
976 	.save  = l2c310_save,
977 	.outer_cache = {
978 		.inv_range   = l2c210_inv_range,
979 		.clean_range = l2c210_clean_range,
980 		.flush_range = l2c210_flush_range,
981 		.flush_all   = l2c210_flush_all,
982 		.disable     = l2c_disable,
983 		.sync        = l2c210_sync,
984 		.set_debug   = l2c310_set_debug,
985 		.resume      = l2c310_resume,
986 	},
987 };
988 
989 /*
990  * Note that the end addresses passed to Linux primitives are
991  * noninclusive, while the hardware cache range operations use
992  * inclusive start and end addresses.
993  */
994 static unsigned long calc_range_end(unsigned long start, unsigned long end)
995 {
996 	/*
997 	 * Limit the number of cache lines processed at once,
998 	 * since cache range operations stall the CPU pipeline
999 	 * until completion.
1000 	 */
1001 	if (end > start + MAX_RANGE_SIZE)
1002 		end = start + MAX_RANGE_SIZE;
1003 
1004 	/*
1005 	 * Cache range operations can't straddle a page boundary.
1006 	 */
1007 	if (end > PAGE_ALIGN(start+1))
1008 		end = PAGE_ALIGN(start+1);
1009 
1010 	return end;
1011 }
1012 
1013 /*
1014  * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
1015  * and range operations only do a TLB lookup on the start address.
1016  */
1017 static void aurora_pa_range(unsigned long start, unsigned long end,
1018 			unsigned long offset)
1019 {
1020 	unsigned long flags;
1021 
1022 	raw_spin_lock_irqsave(&l2x0_lock, flags);
1023 	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
1024 	writel_relaxed(end, l2x0_base + offset);
1025 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1026 
1027 	cache_sync();
1028 }
1029 
1030 static void aurora_inv_range(unsigned long start, unsigned long end)
1031 {
1032 	/*
1033 	 * round start and end adresses up to cache line size
1034 	 */
1035 	start &= ~(CACHE_LINE_SIZE - 1);
1036 	end = ALIGN(end, CACHE_LINE_SIZE);
1037 
1038 	/*
1039 	 * Invalidate all full cache lines between 'start' and 'end'.
1040 	 */
1041 	while (start < end) {
1042 		unsigned long range_end = calc_range_end(start, end);
1043 		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1044 				AURORA_INVAL_RANGE_REG);
1045 		start = range_end;
1046 	}
1047 }
1048 
1049 static void aurora_clean_range(unsigned long start, unsigned long end)
1050 {
1051 	/*
1052 	 * If L2 is forced to WT, the L2 will always be clean and we
1053 	 * don't need to do anything here.
1054 	 */
1055 	if (!l2_wt_override) {
1056 		start &= ~(CACHE_LINE_SIZE - 1);
1057 		end = ALIGN(end, CACHE_LINE_SIZE);
1058 		while (start != end) {
1059 			unsigned long range_end = calc_range_end(start, end);
1060 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1061 					AURORA_CLEAN_RANGE_REG);
1062 			start = range_end;
1063 		}
1064 	}
1065 }
1066 
1067 static void aurora_flush_range(unsigned long start, unsigned long end)
1068 {
1069 	start &= ~(CACHE_LINE_SIZE - 1);
1070 	end = ALIGN(end, CACHE_LINE_SIZE);
1071 	while (start != end) {
1072 		unsigned long range_end = calc_range_end(start, end);
1073 		/*
1074 		 * If L2 is forced to WT, the L2 will always be clean and we
1075 		 * just need to invalidate.
1076 		 */
1077 		if (l2_wt_override)
1078 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1079 							AURORA_INVAL_RANGE_REG);
1080 		else
1081 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1082 							AURORA_FLUSH_RANGE_REG);
1083 		start = range_end;
1084 	}
1085 }
1086 
1087 static void aurora_save(void __iomem *base)
1088 {
1089 	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
1090 	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
1091 }
1092 
1093 static void aurora_resume(void)
1094 {
1095 	void __iomem *base = l2x0_base;
1096 
1097 	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1098 		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
1099 		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
1100 	}
1101 }
1102 
1103 /*
1104  * For Aurora cache in no outer mode, enable via the CP15 coprocessor
1105  * broadcasting of cache commands to L2.
1106  */
1107 static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
1108 	unsigned num_lock)
1109 {
1110 	u32 u;
1111 
1112 	asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
1113 	u |= AURORA_CTRL_FW;		/* Set the FW bit */
1114 	asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
1115 
1116 	isb();
1117 
1118 	l2c_enable(base, aux, num_lock);
1119 }
1120 
1121 static void __init aurora_fixup(void __iomem *base, u32 cache_id,
1122 	struct outer_cache_fns *fns)
1123 {
1124 	sync_reg_offset = AURORA_SYNC_REG;
1125 }
1126 
1127 static void __init aurora_of_parse(const struct device_node *np,
1128 				u32 *aux_val, u32 *aux_mask)
1129 {
1130 	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
1131 	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
1132 
1133 	of_property_read_u32(np, "cache-id-part",
1134 			&cache_id_part_number_from_dt);
1135 
1136 	/* Determine and save the write policy */
1137 	l2_wt_override = of_property_read_bool(np, "wt-override");
1138 
1139 	if (l2_wt_override) {
1140 		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
1141 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
1142 	}
1143 
1144 	*aux_val &= ~mask;
1145 	*aux_val |= val;
1146 	*aux_mask &= ~mask;
1147 }
1148 
1149 static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
1150 	.type = "Aurora",
1151 	.way_size_0 = SZ_4K,
1152 	.num_lock = 4,
1153 	.of_parse = aurora_of_parse,
1154 	.enable = l2c_enable,
1155 	.fixup = aurora_fixup,
1156 	.save  = aurora_save,
1157 	.outer_cache = {
1158 		.inv_range   = aurora_inv_range,
1159 		.clean_range = aurora_clean_range,
1160 		.flush_range = aurora_flush_range,
1161 		.flush_all   = l2x0_flush_all,
1162 		.disable     = l2x0_disable,
1163 		.sync        = l2x0_cache_sync,
1164 		.resume      = aurora_resume,
1165 	},
1166 };
1167 
1168 static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
1169 	.type = "Aurora",
1170 	.way_size_0 = SZ_4K,
1171 	.num_lock = 4,
1172 	.of_parse = aurora_of_parse,
1173 	.enable = aurora_enable_no_outer,
1174 	.fixup = aurora_fixup,
1175 	.save  = aurora_save,
1176 	.outer_cache = {
1177 		.resume      = aurora_resume,
1178 	},
1179 };
1180 
1181 /*
1182  * For certain Broadcom SoCs, depending on the address range, different offsets
1183  * need to be added to the address before passing it to L2 for
1184  * invalidation/clean/flush
1185  *
1186  * Section Address Range              Offset        EMI
1187  *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
1188  *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
1189  *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
1190  *
1191  * When the start and end addresses have crossed two different sections, we
1192  * need to break the L2 operation into two, each within its own section.
1193  * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
1194  * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
1195  * 0xC0000000 - 0xC0001000
1196  *
1197  * Note 1:
1198  * By breaking a single L2 operation into two, we may potentially suffer some
1199  * performance hit, but keep in mind the cross section case is very rare
1200  *
1201  * Note 2:
1202  * We do not need to handle the case when the start address is in
1203  * Section 1 and the end address is in Section 3, since it is not a valid use
1204  * case
1205  *
1206  * Note 3:
1207  * Section 1 in practical terms can no longer be used on rev A2. Because of
1208  * that the code does not need to handle section 1 at all.
1209  *
1210  */
1211 #define BCM_SYS_EMI_START_ADDR        0x40000000UL
1212 #define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
1213 
1214 #define BCM_SYS_EMI_OFFSET            0x40000000UL
1215 #define BCM_VC_EMI_OFFSET             0x80000000UL
1216 
1217 static inline int bcm_addr_is_sys_emi(unsigned long addr)
1218 {
1219 	return (addr >= BCM_SYS_EMI_START_ADDR) &&
1220 		(addr < BCM_VC_EMI_SEC3_START_ADDR);
1221 }
1222 
1223 static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
1224 {
1225 	if (bcm_addr_is_sys_emi(addr))
1226 		return addr + BCM_SYS_EMI_OFFSET;
1227 	else
1228 		return addr + BCM_VC_EMI_OFFSET;
1229 }
1230 
1231 static void bcm_inv_range(unsigned long start, unsigned long end)
1232 {
1233 	unsigned long new_start, new_end;
1234 
1235 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1236 
1237 	if (unlikely(end <= start))
1238 		return;
1239 
1240 	new_start = bcm_l2_phys_addr(start);
1241 	new_end = bcm_l2_phys_addr(end);
1242 
1243 	/* normal case, no cross section between start and end */
1244 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1245 		l2c210_inv_range(new_start, new_end);
1246 		return;
1247 	}
1248 
1249 	/* They cross sections, so it can only be a cross from section
1250 	 * 2 to section 3
1251 	 */
1252 	l2c210_inv_range(new_start,
1253 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1254 	l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1255 		new_end);
1256 }
1257 
1258 static void bcm_clean_range(unsigned long start, unsigned long end)
1259 {
1260 	unsigned long new_start, new_end;
1261 
1262 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1263 
1264 	if (unlikely(end <= start))
1265 		return;
1266 
1267 	new_start = bcm_l2_phys_addr(start);
1268 	new_end = bcm_l2_phys_addr(end);
1269 
1270 	/* normal case, no cross section between start and end */
1271 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1272 		l2c210_clean_range(new_start, new_end);
1273 		return;
1274 	}
1275 
1276 	/* They cross sections, so it can only be a cross from section
1277 	 * 2 to section 3
1278 	 */
1279 	l2c210_clean_range(new_start,
1280 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1281 	l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1282 		new_end);
1283 }
1284 
1285 static void bcm_flush_range(unsigned long start, unsigned long end)
1286 {
1287 	unsigned long new_start, new_end;
1288 
1289 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1290 
1291 	if (unlikely(end <= start))
1292 		return;
1293 
1294 	if ((end - start) >= l2x0_size) {
1295 		outer_cache.flush_all();
1296 		return;
1297 	}
1298 
1299 	new_start = bcm_l2_phys_addr(start);
1300 	new_end = bcm_l2_phys_addr(end);
1301 
1302 	/* normal case, no cross section between start and end */
1303 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1304 		l2c210_flush_range(new_start, new_end);
1305 		return;
1306 	}
1307 
1308 	/* They cross sections, so it can only be a cross from section
1309 	 * 2 to section 3
1310 	 */
1311 	l2c210_flush_range(new_start,
1312 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1313 	l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1314 		new_end);
1315 }
1316 
1317 /* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
1318 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
1319 	.type = "BCM-L2C-310",
1320 	.way_size_0 = SZ_8K,
1321 	.num_lock = 8,
1322 	.of_parse = l2c310_of_parse,
1323 	.enable = l2c310_enable,
1324 	.save  = l2c310_save,
1325 	.outer_cache = {
1326 		.inv_range   = bcm_inv_range,
1327 		.clean_range = bcm_clean_range,
1328 		.flush_range = bcm_flush_range,
1329 		.flush_all   = l2c210_flush_all,
1330 		.disable     = l2c_disable,
1331 		.sync        = l2c210_sync,
1332 		.resume      = l2c310_resume,
1333 	},
1334 };
1335 
1336 static void __init tauros3_save(void __iomem *base)
1337 {
1338 	l2c_save(base);
1339 
1340 	l2x0_saved_regs.aux2_ctrl =
1341 		readl_relaxed(base + TAUROS3_AUX2_CTRL);
1342 	l2x0_saved_regs.prefetch_ctrl =
1343 		readl_relaxed(base + L310_PREFETCH_CTRL);
1344 }
1345 
1346 static void tauros3_resume(void)
1347 {
1348 	void __iomem *base = l2x0_base;
1349 
1350 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1351 		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1352 			       base + TAUROS3_AUX2_CTRL);
1353 		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1354 			       base + L310_PREFETCH_CTRL);
1355 
1356 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
1357 	}
1358 }
1359 
1360 static const struct l2c_init_data of_tauros3_data __initconst = {
1361 	.type = "Tauros3",
1362 	.way_size_0 = SZ_8K,
1363 	.num_lock = 8,
1364 	.enable = l2c_enable,
1365 	.save  = tauros3_save,
1366 	/* Tauros3 broadcasts L1 cache operations to L2 */
1367 	.outer_cache = {
1368 		.resume      = tauros3_resume,
1369 	},
1370 };
1371 
1372 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1373 static const struct of_device_id l2x0_ids[] __initconst = {
1374 	L2C_ID("arm,l210-cache", of_l2c210_data),
1375 	L2C_ID("arm,l220-cache", of_l2c220_data),
1376 	L2C_ID("arm,pl310-cache", of_l2c310_data),
1377 	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1378 	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1379 	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1380 	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1381 	/* Deprecated IDs */
1382 	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1383 	{}
1384 };
1385 
1386 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1387 {
1388 	const struct l2c_init_data *data;
1389 	struct device_node *np;
1390 	struct resource res;
1391 	u32 cache_id;
1392 
1393 	np = of_find_matching_node(NULL, l2x0_ids);
1394 	if (!np)
1395 		return -ENODEV;
1396 
1397 	if (of_address_to_resource(np, 0, &res))
1398 		return -ENODEV;
1399 
1400 	l2x0_base = ioremap(res.start, resource_size(&res));
1401 	if (!l2x0_base)
1402 		return -ENOMEM;
1403 
1404 	l2x0_saved_regs.phy_base = res.start;
1405 
1406 	data = of_match_node(l2x0_ids, np)->data;
1407 
1408 	/* All L2 caches are unified, so this property should be specified */
1409 	if (!of_property_read_bool(np, "cache-unified"))
1410 		pr_err("L2C: device tree omits to specify unified cache\n");
1411 
1412 	/* L2 configuration can only be changed if the cache is disabled */
1413 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
1414 		if (data->of_parse)
1415 			data->of_parse(np, &aux_val, &aux_mask);
1416 
1417 	if (cache_id_part_number_from_dt)
1418 		cache_id = cache_id_part_number_from_dt;
1419 	else
1420 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1421 
1422 	__l2c_init(data, aux_val, aux_mask, cache_id);
1423 
1424 	return 0;
1425 }
1426 #endif
1427