xref: /openbmc/linux/arch/arm/mm/cache-l2x0.c (revision bda0b74e)
1 /*
2  * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
3  *
4  * Copyright (C) 2007 ARM Limited
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #include <linux/err.h>
20 #include <linux/init.h>
21 #include <linux/spinlock.h>
22 #include <linux/io.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 
26 #include <asm/cacheflush.h>
27 #include <asm/hardware/cache-l2x0.h>
28 #include "cache-tauros3.h"
29 #include "cache-aurora-l2.h"
30 
31 struct l2c_init_data {
32 	unsigned num_lock;
33 	void (*of_parse)(const struct device_node *, u32 *, u32 *);
34 	void (*enable)(void __iomem *, u32, unsigned);
35 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
36 	void (*save)(void __iomem *);
37 	struct outer_cache_fns outer_cache;
38 };
39 
40 #define CACHE_LINE_SIZE		32
41 
42 static void __iomem *l2x0_base;
43 static DEFINE_RAW_SPINLOCK(l2x0_lock);
44 static u32 l2x0_way_mask;	/* Bitmask of active ways */
45 static u32 l2x0_size;
46 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
47 
48 struct l2x0_regs l2x0_saved_regs;
49 
50 /*
51  * Common code for all cache controllers.
52  */
53 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
54 {
55 	/* wait for cache operation by line or way to complete */
56 	while (readl_relaxed(reg) & mask)
57 		cpu_relax();
58 }
59 
60 /*
61  * This should only be called when we have a requirement that the
62  * register be written due to a work-around, as platforms running
63  * in non-secure mode may not be able to access this register.
64  */
65 static inline void l2c_set_debug(void __iomem *base, unsigned long val)
66 {
67 	outer_cache.set_debug(val);
68 }
69 
70 static void __l2c_op_way(void __iomem *reg)
71 {
72 	writel_relaxed(l2x0_way_mask, reg);
73 	l2c_wait_mask(reg, l2x0_way_mask);
74 }
75 
76 static inline void l2c_unlock(void __iomem *base, unsigned num)
77 {
78 	unsigned i;
79 
80 	for (i = 0; i < num; i++) {
81 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
82 			       i * L2X0_LOCKDOWN_STRIDE);
83 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
84 			       i * L2X0_LOCKDOWN_STRIDE);
85 	}
86 }
87 
88 /*
89  * Enable the L2 cache controller.  This function must only be
90  * called when the cache controller is known to be disabled.
91  */
92 static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
93 {
94 	unsigned long flags;
95 
96 	/* Only write the aux register if it needs changing */
97 	if (readl_relaxed(base + L2X0_AUX_CTRL) != aux)
98 		writel_relaxed(aux, base + L2X0_AUX_CTRL);
99 
100 	l2c_unlock(base, num_lock);
101 
102 	local_irq_save(flags);
103 	__l2c_op_way(base + L2X0_INV_WAY);
104 	writel_relaxed(0, base + sync_reg_offset);
105 	l2c_wait_mask(base + sync_reg_offset, 1);
106 	local_irq_restore(flags);
107 
108 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
109 }
110 
111 static void l2c_disable(void)
112 {
113 	void __iomem *base = l2x0_base;
114 
115 	outer_cache.flush_all();
116 	writel_relaxed(0, base + L2X0_CTRL);
117 	dsb(st);
118 }
119 
120 #ifdef CONFIG_CACHE_PL310
121 static inline void cache_wait(void __iomem *reg, unsigned long mask)
122 {
123 	/* cache operations by line are atomic on PL310 */
124 }
125 #else
126 #define cache_wait	l2c_wait_mask
127 #endif
128 
129 static inline void cache_sync(void)
130 {
131 	void __iomem *base = l2x0_base;
132 
133 	writel_relaxed(0, base + sync_reg_offset);
134 	cache_wait(base + L2X0_CACHE_SYNC, 1);
135 }
136 
137 static inline void l2x0_clean_line(unsigned long addr)
138 {
139 	void __iomem *base = l2x0_base;
140 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
141 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
142 }
143 
144 static inline void l2x0_inv_line(unsigned long addr)
145 {
146 	void __iomem *base = l2x0_base;
147 	cache_wait(base + L2X0_INV_LINE_PA, 1);
148 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
149 }
150 
151 #if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
152 static inline void debug_writel(unsigned long val)
153 {
154 	if (outer_cache.set_debug)
155 		l2c_set_debug(l2x0_base, val);
156 }
157 #else
158 /* Optimised out for non-errata case */
159 static inline void debug_writel(unsigned long val)
160 {
161 }
162 #endif
163 
164 #ifdef CONFIG_PL310_ERRATA_588369
165 static inline void l2x0_flush_line(unsigned long addr)
166 {
167 	void __iomem *base = l2x0_base;
168 
169 	/* Clean by PA followed by Invalidate by PA */
170 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
171 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
172 	cache_wait(base + L2X0_INV_LINE_PA, 1);
173 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
174 }
175 #else
176 
177 static inline void l2x0_flush_line(unsigned long addr)
178 {
179 	void __iomem *base = l2x0_base;
180 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
181 	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
182 }
183 #endif
184 
185 static void l2x0_cache_sync(void)
186 {
187 	unsigned long flags;
188 
189 	raw_spin_lock_irqsave(&l2x0_lock, flags);
190 	cache_sync();
191 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
192 }
193 
194 static void __l2x0_flush_all(void)
195 {
196 	debug_writel(0x03);
197 	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
198 	cache_sync();
199 	debug_writel(0x00);
200 }
201 
202 static void l2x0_flush_all(void)
203 {
204 	unsigned long flags;
205 
206 	/* clean all ways */
207 	raw_spin_lock_irqsave(&l2x0_lock, flags);
208 	__l2x0_flush_all();
209 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
210 }
211 
212 static void l2x0_clean_all(void)
213 {
214 	unsigned long flags;
215 
216 	/* clean all ways */
217 	raw_spin_lock_irqsave(&l2x0_lock, flags);
218 	__l2c_op_way(l2x0_base + L2X0_CLEAN_WAY);
219 	cache_sync();
220 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
221 }
222 
223 static void l2x0_inv_all(void)
224 {
225 	unsigned long flags;
226 
227 	/* invalidate all ways */
228 	raw_spin_lock_irqsave(&l2x0_lock, flags);
229 	/* Invalidating when L2 is enabled is a nono */
230 	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
231 	__l2c_op_way(l2x0_base + L2X0_INV_WAY);
232 	cache_sync();
233 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
234 }
235 
236 static void l2x0_inv_range(unsigned long start, unsigned long end)
237 {
238 	void __iomem *base = l2x0_base;
239 	unsigned long flags;
240 
241 	raw_spin_lock_irqsave(&l2x0_lock, flags);
242 	if (start & (CACHE_LINE_SIZE - 1)) {
243 		start &= ~(CACHE_LINE_SIZE - 1);
244 		debug_writel(0x03);
245 		l2x0_flush_line(start);
246 		debug_writel(0x00);
247 		start += CACHE_LINE_SIZE;
248 	}
249 
250 	if (end & (CACHE_LINE_SIZE - 1)) {
251 		end &= ~(CACHE_LINE_SIZE - 1);
252 		debug_writel(0x03);
253 		l2x0_flush_line(end);
254 		debug_writel(0x00);
255 	}
256 
257 	while (start < end) {
258 		unsigned long blk_end = start + min(end - start, 4096UL);
259 
260 		while (start < blk_end) {
261 			l2x0_inv_line(start);
262 			start += CACHE_LINE_SIZE;
263 		}
264 
265 		if (blk_end < end) {
266 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
267 			raw_spin_lock_irqsave(&l2x0_lock, flags);
268 		}
269 	}
270 	cache_wait(base + L2X0_INV_LINE_PA, 1);
271 	cache_sync();
272 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
273 }
274 
275 static void l2x0_clean_range(unsigned long start, unsigned long end)
276 {
277 	void __iomem *base = l2x0_base;
278 	unsigned long flags;
279 
280 	if ((end - start) >= l2x0_size) {
281 		l2x0_clean_all();
282 		return;
283 	}
284 
285 	raw_spin_lock_irqsave(&l2x0_lock, flags);
286 	start &= ~(CACHE_LINE_SIZE - 1);
287 	while (start < end) {
288 		unsigned long blk_end = start + min(end - start, 4096UL);
289 
290 		while (start < blk_end) {
291 			l2x0_clean_line(start);
292 			start += CACHE_LINE_SIZE;
293 		}
294 
295 		if (blk_end < end) {
296 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
297 			raw_spin_lock_irqsave(&l2x0_lock, flags);
298 		}
299 	}
300 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
301 	cache_sync();
302 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
303 }
304 
305 static void l2x0_flush_range(unsigned long start, unsigned long end)
306 {
307 	void __iomem *base = l2x0_base;
308 	unsigned long flags;
309 
310 	if ((end - start) >= l2x0_size) {
311 		l2x0_flush_all();
312 		return;
313 	}
314 
315 	raw_spin_lock_irqsave(&l2x0_lock, flags);
316 	start &= ~(CACHE_LINE_SIZE - 1);
317 	while (start < end) {
318 		unsigned long blk_end = start + min(end - start, 4096UL);
319 
320 		debug_writel(0x03);
321 		while (start < blk_end) {
322 			l2x0_flush_line(start);
323 			start += CACHE_LINE_SIZE;
324 		}
325 		debug_writel(0x00);
326 
327 		if (blk_end < end) {
328 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
329 			raw_spin_lock_irqsave(&l2x0_lock, flags);
330 		}
331 	}
332 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
333 	cache_sync();
334 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
335 }
336 
337 static void l2x0_disable(void)
338 {
339 	unsigned long flags;
340 
341 	raw_spin_lock_irqsave(&l2x0_lock, flags);
342 	__l2x0_flush_all();
343 	writel_relaxed(0, l2x0_base + L2X0_CTRL);
344 	dsb(st);
345 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
346 }
347 
348 static void l2x0_enable(void __iomem *base, u32 aux, unsigned num_lock)
349 {
350 	unsigned id;
351 
352 	id = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK;
353 	if (id == L2X0_CACHE_ID_PART_L310)
354 		num_lock = 8;
355 	else
356 		num_lock = 1;
357 
358 	/* l2x0 controller is disabled */
359 	writel_relaxed(aux, base + L2X0_AUX_CTRL);
360 
361 	/* Make sure that I&D is not locked down when starting */
362 	l2c_unlock(base, num_lock);
363 
364 	l2x0_inv_all();
365 
366 	/* enable L2X0 */
367 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
368 }
369 
370 static void l2x0_resume(void)
371 {
372 	void __iomem *base = l2x0_base;
373 
374 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
375 		l2x0_enable(base, l2x0_saved_regs.aux_ctrl, 0);
376 }
377 
378 static const struct l2c_init_data l2x0_init_fns __initconst = {
379 	.enable = l2x0_enable,
380 	.outer_cache = {
381 		.inv_range = l2x0_inv_range,
382 		.clean_range = l2x0_clean_range,
383 		.flush_range = l2x0_flush_range,
384 		.flush_all = l2x0_flush_all,
385 		.disable = l2x0_disable,
386 		.sync = l2x0_cache_sync,
387 		.resume = l2x0_resume,
388 	},
389 };
390 
391 /*
392  * L2C-310 specific code.
393  *
394  * Errata:
395  * 588369: PL310 R0P0->R1P0, fixed R2P0.
396  *	Affects: all clean+invalidate operations
397  *	clean and invalidate skips the invalidate step, so we need to issue
398  *	separate operations.  We also require the above debug workaround
399  *	enclosing this code fragment on affected parts.  On unaffected parts,
400  *	we must not use this workaround without the debug register writes
401  *	to avoid exposing a problem similar to 727915.
402  *
403  * 727915: PL310 R2P0->R3P0, fixed R3P1.
404  *	Affects: clean+invalidate by way
405  *	clean and invalidate by way runs in the background, and a store can
406  *	hit the line between the clean operation and invalidate operation,
407  *	resulting in the store being lost.
408  *
409  * 753970: PL310 R3P0, fixed R3P1.
410  *	Affects: sync
411  *	prevents merging writes after the sync operation, until another L2C
412  *	operation is performed (or a number of other conditions.)
413  *
414  * 769419: PL310 R0P0->R3P1, fixed R3P2.
415  *	Affects: store buffer
416  *	store buffer is not automatically drained.
417  */
418 static void l2c310_set_debug(unsigned long val)
419 {
420 	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
421 }
422 
423 static void __init l2c310_save(void __iomem *base)
424 {
425 	unsigned revision;
426 
427 	l2x0_saved_regs.tag_latency = readl_relaxed(base +
428 		L2X0_TAG_LATENCY_CTRL);
429 	l2x0_saved_regs.data_latency = readl_relaxed(base +
430 		L2X0_DATA_LATENCY_CTRL);
431 	l2x0_saved_regs.filter_end = readl_relaxed(base +
432 		L2X0_ADDR_FILTER_END);
433 	l2x0_saved_regs.filter_start = readl_relaxed(base +
434 		L2X0_ADDR_FILTER_START);
435 
436 	revision = readl_relaxed(base + L2X0_CACHE_ID) &
437 			L2X0_CACHE_ID_RTL_MASK;
438 
439 	/* From r2p0, there is Prefetch offset/control register */
440 	if (revision >= L310_CACHE_ID_RTL_R2P0)
441 		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
442 							L2X0_PREFETCH_CTRL);
443 
444 	/* From r3p0, there is Power control register */
445 	if (revision >= L310_CACHE_ID_RTL_R3P0)
446 		l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
447 							L2X0_POWER_CTRL);
448 }
449 
450 static void l2c310_resume(void)
451 {
452 	void __iomem *base = l2x0_base;
453 
454 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
455 		unsigned revision;
456 
457 		/* restore pl310 setup */
458 		writel_relaxed(l2x0_saved_regs.tag_latency,
459 			       base + L2X0_TAG_LATENCY_CTRL);
460 		writel_relaxed(l2x0_saved_regs.data_latency,
461 			       base + L2X0_DATA_LATENCY_CTRL);
462 		writel_relaxed(l2x0_saved_regs.filter_end,
463 			       base + L2X0_ADDR_FILTER_END);
464 		writel_relaxed(l2x0_saved_regs.filter_start,
465 			       base + L2X0_ADDR_FILTER_START);
466 
467 		revision = readl_relaxed(base + L2X0_CACHE_ID) &
468 				L2X0_CACHE_ID_RTL_MASK;
469 
470 		if (revision >= L310_CACHE_ID_RTL_R2P0)
471 			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
472 				       base + L2X0_PREFETCH_CTRL);
473 		if (revision >= L310_CACHE_ID_RTL_R3P0)
474 			writel_relaxed(l2x0_saved_regs.pwr_ctrl,
475 				       base + L2X0_POWER_CTRL);
476 
477 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
478 	}
479 }
480 
481 static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
482 	struct outer_cache_fns *fns)
483 {
484 	unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
485 	const char *errata[4];
486 	unsigned n = 0;
487 
488 	if (revision <= L310_CACHE_ID_RTL_R3P0)
489 		fns->set_debug = l2c310_set_debug;
490 
491 	if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
492 	    revision == L310_CACHE_ID_RTL_R3P0) {
493 		sync_reg_offset = L2X0_DUMMY_REG;
494 		errata[n++] = "753970";
495 	}
496 
497 	if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
498 		errata[n++] = "769419";
499 
500 	if (n) {
501 		unsigned i;
502 
503 		pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
504 		for (i = 0; i < n; i++)
505 			pr_cont(" %s", errata[i]);
506 		pr_cont(" enabled\n");
507 	}
508 }
509 
510 static const struct l2c_init_data l2c310_init_fns __initconst = {
511 	.num_lock = 8,
512 	.enable = l2c_enable,
513 	.fixup = l2c310_fixup,
514 	.save = l2c310_save,
515 	.outer_cache = {
516 		.inv_range = l2x0_inv_range,
517 		.clean_range = l2x0_clean_range,
518 		.flush_range = l2x0_flush_range,
519 		.flush_all = l2x0_flush_all,
520 		.disable = l2x0_disable,
521 		.sync = l2x0_cache_sync,
522 		.resume = l2c310_resume,
523 	},
524 };
525 
526 static void __init __l2c_init(const struct l2c_init_data *data,
527 	u32 aux_val, u32 aux_mask, u32 cache_id)
528 {
529 	struct outer_cache_fns fns;
530 	u32 aux;
531 	u32 way_size = 0;
532 	int ways;
533 	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
534 	const char *type;
535 
536 	/*
537 	 * It is strange to save the register state before initialisation,
538 	 * but hey, this is what the DT implementations decided to do.
539 	 */
540 	if (data->save)
541 		data->save(l2x0_base);
542 
543 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
544 
545 	aux &= aux_mask;
546 	aux |= aux_val;
547 
548 	/* Determine the number of ways */
549 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
550 	case L2X0_CACHE_ID_PART_L310:
551 		if (aux & (1 << 16))
552 			ways = 16;
553 		else
554 			ways = 8;
555 		type = "L310";
556 		break;
557 
558 	case L2X0_CACHE_ID_PART_L210:
559 		ways = (aux >> 13) & 0xf;
560 		type = "L210";
561 		break;
562 
563 	case AURORA_CACHE_ID:
564 		ways = (aux >> 13) & 0xf;
565 		ways = 2 << ((ways + 1) >> 2);
566 		way_size_shift = AURORA_WAY_SIZE_SHIFT;
567 		type = "Aurora";
568 		break;
569 
570 	default:
571 		/* Assume unknown chips have 8 ways */
572 		ways = 8;
573 		type = "L2x0 series";
574 		break;
575 	}
576 
577 	l2x0_way_mask = (1 << ways) - 1;
578 
579 	/*
580 	 * L2 cache Size =  Way size * Number of ways
581 	 */
582 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
583 	way_size = 1 << (way_size + way_size_shift);
584 
585 	l2x0_size = ways * way_size * SZ_1K;
586 
587 	fns = data->outer_cache;
588 	if (data->fixup)
589 		data->fixup(l2x0_base, cache_id, &fns);
590 
591 	/*
592 	 * Check if l2x0 controller is already enabled.  If we are booting
593 	 * in non-secure mode accessing the below registers will fault.
594 	 */
595 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
596 		data->enable(l2x0_base, aux, data->num_lock);
597 
598 	/* Re-read it in case some bits are reserved. */
599 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
600 
601 	/* Save the value for resuming. */
602 	l2x0_saved_regs.aux_ctrl = aux;
603 
604 	outer_cache = fns;
605 
606 	pr_info("%s cache controller enabled, %d ways, %d kB\n",
607 		type, ways, l2x0_size >> 10);
608 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
609 		type, cache_id, aux);
610 }
611 
612 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
613 {
614 	const struct l2c_init_data *data;
615 	u32 cache_id;
616 
617 	l2x0_base = base;
618 
619 	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
620 
621 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
622 	default:
623 		data = &l2x0_init_fns;
624 		break;
625 
626 	case L2X0_CACHE_ID_PART_L310:
627 		data = &l2c310_init_fns;
628 		break;
629 	}
630 
631 	__l2c_init(data, aux_val, aux_mask, cache_id);
632 }
633 
634 #ifdef CONFIG_OF
635 static int l2_wt_override;
636 
637 /* Aurora don't have the cache ID register available, so we have to
638  * pass it though the device tree */
639 static u32 cache_id_part_number_from_dt;
640 
641 static void __init l2x0_of_parse(const struct device_node *np,
642 				 u32 *aux_val, u32 *aux_mask)
643 {
644 	u32 data[2] = { 0, 0 };
645 	u32 tag = 0;
646 	u32 dirty = 0;
647 	u32 val = 0, mask = 0;
648 
649 	of_property_read_u32(np, "arm,tag-latency", &tag);
650 	if (tag) {
651 		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
652 		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
653 	}
654 
655 	of_property_read_u32_array(np, "arm,data-latency",
656 				   data, ARRAY_SIZE(data));
657 	if (data[0] && data[1]) {
658 		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
659 			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
660 		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
661 		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
662 	}
663 
664 	of_property_read_u32(np, "arm,dirty-latency", &dirty);
665 	if (dirty) {
666 		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
667 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
668 	}
669 
670 	*aux_val &= ~mask;
671 	*aux_val |= val;
672 	*aux_mask &= ~mask;
673 }
674 
675 static const struct l2c_init_data of_l2x0_data __initconst = {
676 	.of_parse = l2x0_of_parse,
677 	.enable = l2x0_enable,
678 	.outer_cache = {
679 		.inv_range   = l2x0_inv_range,
680 		.clean_range = l2x0_clean_range,
681 		.flush_range = l2x0_flush_range,
682 		.flush_all   = l2x0_flush_all,
683 		.disable     = l2x0_disable,
684 		.sync        = l2x0_cache_sync,
685 		.resume      = l2x0_resume,
686 	},
687 };
688 
689 static void __init pl310_of_parse(const struct device_node *np,
690 				  u32 *aux_val, u32 *aux_mask)
691 {
692 	u32 data[3] = { 0, 0, 0 };
693 	u32 tag[3] = { 0, 0, 0 };
694 	u32 filter[2] = { 0, 0 };
695 
696 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
697 	if (tag[0] && tag[1] && tag[2])
698 		writel_relaxed(
699 			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
700 			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
701 			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
702 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
703 
704 	of_property_read_u32_array(np, "arm,data-latency",
705 				   data, ARRAY_SIZE(data));
706 	if (data[0] && data[1] && data[2])
707 		writel_relaxed(
708 			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
709 			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
710 			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
711 			l2x0_base + L2X0_DATA_LATENCY_CTRL);
712 
713 	of_property_read_u32_array(np, "arm,filter-ranges",
714 				   filter, ARRAY_SIZE(filter));
715 	if (filter[1]) {
716 		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
717 			       l2x0_base + L2X0_ADDR_FILTER_END);
718 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
719 			       l2x0_base + L2X0_ADDR_FILTER_START);
720 	}
721 }
722 
723 static const struct l2c_init_data of_pl310_data __initconst = {
724 	.num_lock = 8,
725 	.of_parse = pl310_of_parse,
726 	.enable = l2c_enable,
727 	.fixup = l2c310_fixup,
728 	.save  = l2c310_save,
729 	.outer_cache = {
730 		.inv_range   = l2x0_inv_range,
731 		.clean_range = l2x0_clean_range,
732 		.flush_range = l2x0_flush_range,
733 		.flush_all   = l2x0_flush_all,
734 		.disable     = l2x0_disable,
735 		.sync        = l2x0_cache_sync,
736 		.resume      = l2c310_resume,
737 	},
738 };
739 
740 /*
741  * Note that the end addresses passed to Linux primitives are
742  * noninclusive, while the hardware cache range operations use
743  * inclusive start and end addresses.
744  */
745 static unsigned long calc_range_end(unsigned long start, unsigned long end)
746 {
747 	/*
748 	 * Limit the number of cache lines processed at once,
749 	 * since cache range operations stall the CPU pipeline
750 	 * until completion.
751 	 */
752 	if (end > start + MAX_RANGE_SIZE)
753 		end = start + MAX_RANGE_SIZE;
754 
755 	/*
756 	 * Cache range operations can't straddle a page boundary.
757 	 */
758 	if (end > PAGE_ALIGN(start+1))
759 		end = PAGE_ALIGN(start+1);
760 
761 	return end;
762 }
763 
764 /*
765  * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
766  * and range operations only do a TLB lookup on the start address.
767  */
768 static void aurora_pa_range(unsigned long start, unsigned long end,
769 			unsigned long offset)
770 {
771 	unsigned long flags;
772 
773 	raw_spin_lock_irqsave(&l2x0_lock, flags);
774 	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
775 	writel_relaxed(end, l2x0_base + offset);
776 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
777 
778 	cache_sync();
779 }
780 
781 static void aurora_inv_range(unsigned long start, unsigned long end)
782 {
783 	/*
784 	 * round start and end adresses up to cache line size
785 	 */
786 	start &= ~(CACHE_LINE_SIZE - 1);
787 	end = ALIGN(end, CACHE_LINE_SIZE);
788 
789 	/*
790 	 * Invalidate all full cache lines between 'start' and 'end'.
791 	 */
792 	while (start < end) {
793 		unsigned long range_end = calc_range_end(start, end);
794 		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
795 				AURORA_INVAL_RANGE_REG);
796 		start = range_end;
797 	}
798 }
799 
800 static void aurora_clean_range(unsigned long start, unsigned long end)
801 {
802 	/*
803 	 * If L2 is forced to WT, the L2 will always be clean and we
804 	 * don't need to do anything here.
805 	 */
806 	if (!l2_wt_override) {
807 		start &= ~(CACHE_LINE_SIZE - 1);
808 		end = ALIGN(end, CACHE_LINE_SIZE);
809 		while (start != end) {
810 			unsigned long range_end = calc_range_end(start, end);
811 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
812 					AURORA_CLEAN_RANGE_REG);
813 			start = range_end;
814 		}
815 	}
816 }
817 
818 static void aurora_flush_range(unsigned long start, unsigned long end)
819 {
820 	start &= ~(CACHE_LINE_SIZE - 1);
821 	end = ALIGN(end, CACHE_LINE_SIZE);
822 	while (start != end) {
823 		unsigned long range_end = calc_range_end(start, end);
824 		/*
825 		 * If L2 is forced to WT, the L2 will always be clean and we
826 		 * just need to invalidate.
827 		 */
828 		if (l2_wt_override)
829 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
830 							AURORA_INVAL_RANGE_REG);
831 		else
832 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
833 							AURORA_FLUSH_RANGE_REG);
834 		start = range_end;
835 	}
836 }
837 
838 static void aurora_save(void __iomem *base)
839 {
840 	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
841 	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
842 }
843 
844 static void aurora_resume(void)
845 {
846 	void __iomem *base = l2x0_base;
847 
848 	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
849 		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
850 		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
851 	}
852 }
853 
854 /*
855  * For Aurora cache in no outer mode, enable via the CP15 coprocessor
856  * broadcasting of cache commands to L2.
857  */
858 static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
859 	unsigned num_lock)
860 {
861 	u32 u;
862 
863 	asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
864 	u |= AURORA_CTRL_FW;		/* Set the FW bit */
865 	asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
866 
867 	isb();
868 
869 	l2c_enable(base, aux, num_lock);
870 }
871 
872 static void __init aurora_fixup(void __iomem *base, u32 cache_id,
873 	struct outer_cache_fns *fns)
874 {
875 	sync_reg_offset = AURORA_SYNC_REG;
876 }
877 
878 static void __init aurora_of_parse(const struct device_node *np,
879 				u32 *aux_val, u32 *aux_mask)
880 {
881 	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
882 	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
883 
884 	of_property_read_u32(np, "cache-id-part",
885 			&cache_id_part_number_from_dt);
886 
887 	/* Determine and save the write policy */
888 	l2_wt_override = of_property_read_bool(np, "wt-override");
889 
890 	if (l2_wt_override) {
891 		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
892 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
893 	}
894 
895 	*aux_val &= ~mask;
896 	*aux_val |= val;
897 	*aux_mask &= ~mask;
898 }
899 
900 static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
901 	.num_lock = 4,
902 	.of_parse = aurora_of_parse,
903 	.enable = l2c_enable,
904 	.fixup = aurora_fixup,
905 	.save  = aurora_save,
906 	.outer_cache = {
907 		.inv_range   = aurora_inv_range,
908 		.clean_range = aurora_clean_range,
909 		.flush_range = aurora_flush_range,
910 		.flush_all   = l2x0_flush_all,
911 		.disable     = l2x0_disable,
912 		.sync        = l2x0_cache_sync,
913 		.resume      = aurora_resume,
914 	},
915 };
916 
917 static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
918 	.num_lock = 4,
919 	.of_parse = aurora_of_parse,
920 	.enable = aurora_enable_no_outer,
921 	.fixup = aurora_fixup,
922 	.save  = aurora_save,
923 	.outer_cache = {
924 		.resume      = aurora_resume,
925 	},
926 };
927 
928 /*
929  * For certain Broadcom SoCs, depending on the address range, different offsets
930  * need to be added to the address before passing it to L2 for
931  * invalidation/clean/flush
932  *
933  * Section Address Range              Offset        EMI
934  *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
935  *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
936  *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
937  *
938  * When the start and end addresses have crossed two different sections, we
939  * need to break the L2 operation into two, each within its own section.
940  * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
941  * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
942  * 0xC0000000 - 0xC0001000
943  *
944  * Note 1:
945  * By breaking a single L2 operation into two, we may potentially suffer some
946  * performance hit, but keep in mind the cross section case is very rare
947  *
948  * Note 2:
949  * We do not need to handle the case when the start address is in
950  * Section 1 and the end address is in Section 3, since it is not a valid use
951  * case
952  *
953  * Note 3:
954  * Section 1 in practical terms can no longer be used on rev A2. Because of
955  * that the code does not need to handle section 1 at all.
956  *
957  */
958 #define BCM_SYS_EMI_START_ADDR        0x40000000UL
959 #define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
960 
961 #define BCM_SYS_EMI_OFFSET            0x40000000UL
962 #define BCM_VC_EMI_OFFSET             0x80000000UL
963 
964 static inline int bcm_addr_is_sys_emi(unsigned long addr)
965 {
966 	return (addr >= BCM_SYS_EMI_START_ADDR) &&
967 		(addr < BCM_VC_EMI_SEC3_START_ADDR);
968 }
969 
970 static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
971 {
972 	if (bcm_addr_is_sys_emi(addr))
973 		return addr + BCM_SYS_EMI_OFFSET;
974 	else
975 		return addr + BCM_VC_EMI_OFFSET;
976 }
977 
978 static void bcm_inv_range(unsigned long start, unsigned long end)
979 {
980 	unsigned long new_start, new_end;
981 
982 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
983 
984 	if (unlikely(end <= start))
985 		return;
986 
987 	new_start = bcm_l2_phys_addr(start);
988 	new_end = bcm_l2_phys_addr(end);
989 
990 	/* normal case, no cross section between start and end */
991 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
992 		l2x0_inv_range(new_start, new_end);
993 		return;
994 	}
995 
996 	/* They cross sections, so it can only be a cross from section
997 	 * 2 to section 3
998 	 */
999 	l2x0_inv_range(new_start,
1000 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1001 	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1002 		new_end);
1003 }
1004 
1005 static void bcm_clean_range(unsigned long start, unsigned long end)
1006 {
1007 	unsigned long new_start, new_end;
1008 
1009 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1010 
1011 	if (unlikely(end <= start))
1012 		return;
1013 
1014 	if ((end - start) >= l2x0_size) {
1015 		l2x0_clean_all();
1016 		return;
1017 	}
1018 
1019 	new_start = bcm_l2_phys_addr(start);
1020 	new_end = bcm_l2_phys_addr(end);
1021 
1022 	/* normal case, no cross section between start and end */
1023 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1024 		l2x0_clean_range(new_start, new_end);
1025 		return;
1026 	}
1027 
1028 	/* They cross sections, so it can only be a cross from section
1029 	 * 2 to section 3
1030 	 */
1031 	l2x0_clean_range(new_start,
1032 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1033 	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1034 		new_end);
1035 }
1036 
1037 static void bcm_flush_range(unsigned long start, unsigned long end)
1038 {
1039 	unsigned long new_start, new_end;
1040 
1041 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1042 
1043 	if (unlikely(end <= start))
1044 		return;
1045 
1046 	if ((end - start) >= l2x0_size) {
1047 		l2x0_flush_all();
1048 		return;
1049 	}
1050 
1051 	new_start = bcm_l2_phys_addr(start);
1052 	new_end = bcm_l2_phys_addr(end);
1053 
1054 	/* normal case, no cross section between start and end */
1055 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1056 		l2x0_flush_range(new_start, new_end);
1057 		return;
1058 	}
1059 
1060 	/* They cross sections, so it can only be a cross from section
1061 	 * 2 to section 3
1062 	 */
1063 	l2x0_flush_range(new_start,
1064 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1065 	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1066 		new_end);
1067 }
1068 
1069 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
1070 	.num_lock = 8,
1071 	.of_parse = pl310_of_parse,
1072 	.enable = l2c_enable,
1073 	.fixup = l2c310_fixup,
1074 	.save  = l2c310_save,
1075 	.outer_cache = {
1076 		.inv_range   = bcm_inv_range,
1077 		.clean_range = bcm_clean_range,
1078 		.flush_range = bcm_flush_range,
1079 		.flush_all   = l2x0_flush_all,
1080 		.disable     = l2x0_disable,
1081 		.sync        = l2x0_cache_sync,
1082 		.resume      = l2c310_resume,
1083 	},
1084 };
1085 
1086 static void __init tauros3_save(void __iomem *base)
1087 {
1088 	l2x0_saved_regs.aux2_ctrl =
1089 		readl_relaxed(base + TAUROS3_AUX2_CTRL);
1090 	l2x0_saved_regs.prefetch_ctrl =
1091 		readl_relaxed(base + L2X0_PREFETCH_CTRL);
1092 }
1093 
1094 static void tauros3_resume(void)
1095 {
1096 	void __iomem *base = l2x0_base;
1097 
1098 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1099 		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1100 			       base + TAUROS3_AUX2_CTRL);
1101 		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1102 			       base + L2X0_PREFETCH_CTRL);
1103 
1104 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
1105 	}
1106 }
1107 
1108 static const struct l2c_init_data of_tauros3_data __initconst = {
1109 	.num_lock = 8,
1110 	.enable = l2c_enable,
1111 	.save  = tauros3_save,
1112 	/* Tauros3 broadcasts L1 cache operations to L2 */
1113 	.outer_cache = {
1114 		.resume      = tauros3_resume,
1115 	},
1116 };
1117 
1118 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1119 static const struct of_device_id l2x0_ids[] __initconst = {
1120 	L2C_ID("arm,l210-cache", of_l2x0_data),
1121 	L2C_ID("arm,l220-cache", of_l2x0_data),
1122 	L2C_ID("arm,pl310-cache", of_pl310_data),
1123 	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1124 	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1125 	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1126 	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1127 	/* Deprecated IDs */
1128 	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1129 	{}
1130 };
1131 
1132 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1133 {
1134 	const struct l2c_init_data *data;
1135 	struct device_node *np;
1136 	struct resource res;
1137 	u32 cache_id;
1138 
1139 	np = of_find_matching_node(NULL, l2x0_ids);
1140 	if (!np)
1141 		return -ENODEV;
1142 
1143 	if (of_address_to_resource(np, 0, &res))
1144 		return -ENODEV;
1145 
1146 	l2x0_base = ioremap(res.start, resource_size(&res));
1147 	if (!l2x0_base)
1148 		return -ENOMEM;
1149 
1150 	l2x0_saved_regs.phy_base = res.start;
1151 
1152 	data = of_match_node(l2x0_ids, np)->data;
1153 
1154 	/* L2 configuration can only be changed if the cache is disabled */
1155 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
1156 		if (data->of_parse)
1157 			data->of_parse(np, &aux_val, &aux_mask);
1158 
1159 	if (cache_id_part_number_from_dt)
1160 		cache_id = cache_id_part_number_from_dt;
1161 	else
1162 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1163 
1164 	__l2c_init(data, aux_val, aux_mask, cache_id);
1165 
1166 	return 0;
1167 }
1168 #endif
1169