xref: /openbmc/linux/arch/arm/mm/cache-l2x0.c (revision faf9b2e7)
1 /*
2  * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
3  *
4  * Copyright (C) 2007 ARM Limited
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #include <linux/err.h>
20 #include <linux/init.h>
21 #include <linux/spinlock.h>
22 #include <linux/io.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 
26 #include <asm/cacheflush.h>
27 #include <asm/hardware/cache-l2x0.h>
28 #include "cache-tauros3.h"
29 #include "cache-aurora-l2.h"
30 
31 struct l2c_init_data {
32 	unsigned num_lock;
33 	void (*of_parse)(const struct device_node *, u32 *, u32 *);
34 	void (*enable)(void __iomem *, u32, unsigned);
35 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
36 	void (*save)(void __iomem *);
37 	struct outer_cache_fns outer_cache;
38 };
39 
40 #define CACHE_LINE_SIZE		32
41 
42 static void __iomem *l2x0_base;
43 static DEFINE_RAW_SPINLOCK(l2x0_lock);
44 static u32 l2x0_way_mask;	/* Bitmask of active ways */
45 static u32 l2x0_size;
46 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
47 
48 struct l2x0_regs l2x0_saved_regs;
49 
50 /*
51  * Common code for all cache controllers.
52  */
53 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
54 {
55 	/* wait for cache operation by line or way to complete */
56 	while (readl_relaxed(reg) & mask)
57 		cpu_relax();
58 }
59 
60 /*
61  * This should only be called when we have a requirement that the
62  * register be written due to a work-around, as platforms running
63  * in non-secure mode may not be able to access this register.
64  */
65 static inline void l2c_set_debug(void __iomem *base, unsigned long val)
66 {
67 	outer_cache.set_debug(val);
68 }
69 
70 static void __l2c_op_way(void __iomem *reg)
71 {
72 	writel_relaxed(l2x0_way_mask, reg);
73 	l2c_wait_mask(reg, l2x0_way_mask);
74 }
75 
76 static inline void l2c_unlock(void __iomem *base, unsigned num)
77 {
78 	unsigned i;
79 
80 	for (i = 0; i < num; i++) {
81 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
82 			       i * L2X0_LOCKDOWN_STRIDE);
83 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
84 			       i * L2X0_LOCKDOWN_STRIDE);
85 	}
86 }
87 
88 /*
89  * Enable the L2 cache controller.  This function must only be
90  * called when the cache controller is known to be disabled.
91  */
92 static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
93 {
94 	unsigned long flags;
95 
96 	/* Only write the aux register if it needs changing */
97 	if (readl_relaxed(base + L2X0_AUX_CTRL) != aux)
98 		writel_relaxed(aux, base + L2X0_AUX_CTRL);
99 
100 	l2c_unlock(base, num_lock);
101 
102 	local_irq_save(flags);
103 	__l2c_op_way(base + L2X0_INV_WAY);
104 	writel_relaxed(0, base + sync_reg_offset);
105 	l2c_wait_mask(base + sync_reg_offset, 1);
106 	local_irq_restore(flags);
107 
108 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
109 }
110 
111 static void l2c_disable(void)
112 {
113 	void __iomem *base = l2x0_base;
114 
115 	outer_cache.flush_all();
116 	writel_relaxed(0, base + L2X0_CTRL);
117 	dsb(st);
118 }
119 
120 #ifdef CONFIG_CACHE_PL310
121 static inline void cache_wait(void __iomem *reg, unsigned long mask)
122 {
123 	/* cache operations by line are atomic on PL310 */
124 }
125 #else
126 #define cache_wait	l2c_wait_mask
127 #endif
128 
129 static inline void cache_sync(void)
130 {
131 	void __iomem *base = l2x0_base;
132 
133 	writel_relaxed(0, base + sync_reg_offset);
134 	cache_wait(base + L2X0_CACHE_SYNC, 1);
135 }
136 
137 static inline void l2x0_clean_line(unsigned long addr)
138 {
139 	void __iomem *base = l2x0_base;
140 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
141 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
142 }
143 
144 static inline void l2x0_inv_line(unsigned long addr)
145 {
146 	void __iomem *base = l2x0_base;
147 	cache_wait(base + L2X0_INV_LINE_PA, 1);
148 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
149 }
150 
151 #if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
152 static inline void debug_writel(unsigned long val)
153 {
154 	if (outer_cache.set_debug)
155 		l2c_set_debug(l2x0_base, val);
156 }
157 
158 static void pl310_set_debug(unsigned long val)
159 {
160 	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
161 }
162 #else
163 /* Optimised out for non-errata case */
164 static inline void debug_writel(unsigned long val)
165 {
166 }
167 
168 #define pl310_set_debug	NULL
169 #endif
170 
171 #ifdef CONFIG_PL310_ERRATA_588369
172 static inline void l2x0_flush_line(unsigned long addr)
173 {
174 	void __iomem *base = l2x0_base;
175 
176 	/* Clean by PA followed by Invalidate by PA */
177 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
178 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
179 	cache_wait(base + L2X0_INV_LINE_PA, 1);
180 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
181 }
182 #else
183 
184 static inline void l2x0_flush_line(unsigned long addr)
185 {
186 	void __iomem *base = l2x0_base;
187 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
188 	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
189 }
190 #endif
191 
192 static void l2x0_cache_sync(void)
193 {
194 	unsigned long flags;
195 
196 	raw_spin_lock_irqsave(&l2x0_lock, flags);
197 	cache_sync();
198 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
199 }
200 
201 static void __l2x0_flush_all(void)
202 {
203 	debug_writel(0x03);
204 	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
205 	cache_sync();
206 	debug_writel(0x00);
207 }
208 
209 static void l2x0_flush_all(void)
210 {
211 	unsigned long flags;
212 
213 	/* clean all ways */
214 	raw_spin_lock_irqsave(&l2x0_lock, flags);
215 	__l2x0_flush_all();
216 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
217 }
218 
219 static void l2x0_clean_all(void)
220 {
221 	unsigned long flags;
222 
223 	/* clean all ways */
224 	raw_spin_lock_irqsave(&l2x0_lock, flags);
225 	__l2c_op_way(l2x0_base + L2X0_CLEAN_WAY);
226 	cache_sync();
227 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
228 }
229 
230 static void l2x0_inv_all(void)
231 {
232 	unsigned long flags;
233 
234 	/* invalidate all ways */
235 	raw_spin_lock_irqsave(&l2x0_lock, flags);
236 	/* Invalidating when L2 is enabled is a nono */
237 	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
238 	__l2c_op_way(l2x0_base + L2X0_INV_WAY);
239 	cache_sync();
240 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
241 }
242 
243 static void l2x0_inv_range(unsigned long start, unsigned long end)
244 {
245 	void __iomem *base = l2x0_base;
246 	unsigned long flags;
247 
248 	raw_spin_lock_irqsave(&l2x0_lock, flags);
249 	if (start & (CACHE_LINE_SIZE - 1)) {
250 		start &= ~(CACHE_LINE_SIZE - 1);
251 		debug_writel(0x03);
252 		l2x0_flush_line(start);
253 		debug_writel(0x00);
254 		start += CACHE_LINE_SIZE;
255 	}
256 
257 	if (end & (CACHE_LINE_SIZE - 1)) {
258 		end &= ~(CACHE_LINE_SIZE - 1);
259 		debug_writel(0x03);
260 		l2x0_flush_line(end);
261 		debug_writel(0x00);
262 	}
263 
264 	while (start < end) {
265 		unsigned long blk_end = start + min(end - start, 4096UL);
266 
267 		while (start < blk_end) {
268 			l2x0_inv_line(start);
269 			start += CACHE_LINE_SIZE;
270 		}
271 
272 		if (blk_end < end) {
273 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
274 			raw_spin_lock_irqsave(&l2x0_lock, flags);
275 		}
276 	}
277 	cache_wait(base + L2X0_INV_LINE_PA, 1);
278 	cache_sync();
279 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
280 }
281 
282 static void l2x0_clean_range(unsigned long start, unsigned long end)
283 {
284 	void __iomem *base = l2x0_base;
285 	unsigned long flags;
286 
287 	if ((end - start) >= l2x0_size) {
288 		l2x0_clean_all();
289 		return;
290 	}
291 
292 	raw_spin_lock_irqsave(&l2x0_lock, flags);
293 	start &= ~(CACHE_LINE_SIZE - 1);
294 	while (start < end) {
295 		unsigned long blk_end = start + min(end - start, 4096UL);
296 
297 		while (start < blk_end) {
298 			l2x0_clean_line(start);
299 			start += CACHE_LINE_SIZE;
300 		}
301 
302 		if (blk_end < end) {
303 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
304 			raw_spin_lock_irqsave(&l2x0_lock, flags);
305 		}
306 	}
307 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
308 	cache_sync();
309 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
310 }
311 
312 static void l2x0_flush_range(unsigned long start, unsigned long end)
313 {
314 	void __iomem *base = l2x0_base;
315 	unsigned long flags;
316 
317 	if ((end - start) >= l2x0_size) {
318 		l2x0_flush_all();
319 		return;
320 	}
321 
322 	raw_spin_lock_irqsave(&l2x0_lock, flags);
323 	start &= ~(CACHE_LINE_SIZE - 1);
324 	while (start < end) {
325 		unsigned long blk_end = start + min(end - start, 4096UL);
326 
327 		debug_writel(0x03);
328 		while (start < blk_end) {
329 			l2x0_flush_line(start);
330 			start += CACHE_LINE_SIZE;
331 		}
332 		debug_writel(0x00);
333 
334 		if (blk_end < end) {
335 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
336 			raw_spin_lock_irqsave(&l2x0_lock, flags);
337 		}
338 	}
339 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
340 	cache_sync();
341 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
342 }
343 
344 static void l2x0_disable(void)
345 {
346 	unsigned long flags;
347 
348 	raw_spin_lock_irqsave(&l2x0_lock, flags);
349 	__l2x0_flush_all();
350 	writel_relaxed(0, l2x0_base + L2X0_CTRL);
351 	dsb(st);
352 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
353 }
354 
355 static void l2x0_enable(void __iomem *base, u32 aux, unsigned num_lock)
356 {
357 	unsigned id;
358 
359 	id = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK;
360 	if (id == L2X0_CACHE_ID_PART_L310)
361 		num_lock = 8;
362 	else
363 		num_lock = 1;
364 
365 	/* l2x0 controller is disabled */
366 	writel_relaxed(aux, base + L2X0_AUX_CTRL);
367 
368 	/* Make sure that I&D is not locked down when starting */
369 	l2c_unlock(base, num_lock);
370 
371 	l2x0_inv_all();
372 
373 	/* enable L2X0 */
374 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
375 }
376 
377 static void l2x0_resume(void)
378 {
379 	void __iomem *base = l2x0_base;
380 
381 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
382 		l2x0_enable(base, l2x0_saved_regs.aux_ctrl, 0);
383 }
384 
385 static const struct l2c_init_data l2x0_init_fns __initconst = {
386 	.enable = l2x0_enable,
387 	.outer_cache = {
388 		.inv_range = l2x0_inv_range,
389 		.clean_range = l2x0_clean_range,
390 		.flush_range = l2x0_flush_range,
391 		.flush_all = l2x0_flush_all,
392 		.disable = l2x0_disable,
393 		.sync = l2x0_cache_sync,
394 		.resume = l2x0_resume,
395 	},
396 };
397 
398 /*
399  * L2C-310 specific code.
400  *
401  * Errata:
402  * 588369: PL310 R0P0->R1P0, fixed R2P0.
403  *	Affects: all clean+invalidate operations
404  *	clean and invalidate skips the invalidate step, so we need to issue
405  *	separate operations.  We also require the above debug workaround
406  *	enclosing this code fragment on affected parts.  On unaffected parts,
407  *	we must not use this workaround without the debug register writes
408  *	to avoid exposing a problem similar to 727915.
409  *
410  * 727915: PL310 R2P0->R3P0, fixed R3P1.
411  *	Affects: clean+invalidate by way
412  *	clean and invalidate by way runs in the background, and a store can
413  *	hit the line between the clean operation and invalidate operation,
414  *	resulting in the store being lost.
415  *
416  * 753970: PL310 R3P0, fixed R3P1.
417  *	Affects: sync
418  *	prevents merging writes after the sync operation, until another L2C
419  *	operation is performed (or a number of other conditions.)
420  *
421  * 769419: PL310 R0P0->R3P1, fixed R3P2.
422  *	Affects: store buffer
423  *	store buffer is not automatically drained.
424  */
425 static void __init l2c310_save(void __iomem *base)
426 {
427 	unsigned revision;
428 
429 	l2x0_saved_regs.tag_latency = readl_relaxed(base +
430 		L2X0_TAG_LATENCY_CTRL);
431 	l2x0_saved_regs.data_latency = readl_relaxed(base +
432 		L2X0_DATA_LATENCY_CTRL);
433 	l2x0_saved_regs.filter_end = readl_relaxed(base +
434 		L2X0_ADDR_FILTER_END);
435 	l2x0_saved_regs.filter_start = readl_relaxed(base +
436 		L2X0_ADDR_FILTER_START);
437 
438 	revision = readl_relaxed(base + L2X0_CACHE_ID) &
439 			L2X0_CACHE_ID_RTL_MASK;
440 
441 	/* From r2p0, there is Prefetch offset/control register */
442 	if (revision >= L310_CACHE_ID_RTL_R2P0)
443 		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
444 							L2X0_PREFETCH_CTRL);
445 
446 	/* From r3p0, there is Power control register */
447 	if (revision >= L310_CACHE_ID_RTL_R3P0)
448 		l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
449 							L2X0_POWER_CTRL);
450 }
451 
452 static void l2c310_resume(void)
453 {
454 	void __iomem *base = l2x0_base;
455 
456 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
457 		unsigned revision;
458 
459 		/* restore pl310 setup */
460 		writel_relaxed(l2x0_saved_regs.tag_latency,
461 			       base + L2X0_TAG_LATENCY_CTRL);
462 		writel_relaxed(l2x0_saved_regs.data_latency,
463 			       base + L2X0_DATA_LATENCY_CTRL);
464 		writel_relaxed(l2x0_saved_regs.filter_end,
465 			       base + L2X0_ADDR_FILTER_END);
466 		writel_relaxed(l2x0_saved_regs.filter_start,
467 			       base + L2X0_ADDR_FILTER_START);
468 
469 		revision = readl_relaxed(base + L2X0_CACHE_ID) &
470 				L2X0_CACHE_ID_RTL_MASK;
471 
472 		if (revision >= L310_CACHE_ID_RTL_R2P0)
473 			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
474 				       base + L2X0_PREFETCH_CTRL);
475 		if (revision >= L310_CACHE_ID_RTL_R3P0)
476 			writel_relaxed(l2x0_saved_regs.pwr_ctrl,
477 				       base + L2X0_POWER_CTRL);
478 
479 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
480 	}
481 }
482 
483 static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
484 	struct outer_cache_fns *fns)
485 {
486 	unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
487 	const char *errata[4];
488 	unsigned n = 0;
489 
490 	if (revision <= L310_CACHE_ID_RTL_R3P0)
491 		fns->set_debug = pl310_set_debug;
492 
493 	if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
494 	    revision == L310_CACHE_ID_RTL_R3P0) {
495 		sync_reg_offset = L2X0_DUMMY_REG;
496 		errata[n++] = "753970";
497 	}
498 
499 	if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
500 		errata[n++] = "769419";
501 
502 	if (n) {
503 		unsigned i;
504 
505 		pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
506 		for (i = 0; i < n; i++)
507 			pr_cont(" %s", errata[i]);
508 		pr_cont(" enabled\n");
509 	}
510 }
511 
512 static const struct l2c_init_data l2c310_init_fns __initconst = {
513 	.num_lock = 8,
514 	.enable = l2c_enable,
515 	.fixup = l2c310_fixup,
516 	.save = l2c310_save,
517 	.outer_cache = {
518 		.inv_range = l2x0_inv_range,
519 		.clean_range = l2x0_clean_range,
520 		.flush_range = l2x0_flush_range,
521 		.flush_all = l2x0_flush_all,
522 		.disable = l2x0_disable,
523 		.sync = l2x0_cache_sync,
524 		.resume = l2c310_resume,
525 	},
526 };
527 
528 static void __init __l2c_init(const struct l2c_init_data *data,
529 	u32 aux_val, u32 aux_mask, u32 cache_id)
530 {
531 	struct outer_cache_fns fns;
532 	u32 aux;
533 	u32 way_size = 0;
534 	int ways;
535 	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
536 	const char *type;
537 
538 	/*
539 	 * It is strange to save the register state before initialisation,
540 	 * but hey, this is what the DT implementations decided to do.
541 	 */
542 	if (data->save)
543 		data->save(l2x0_base);
544 
545 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
546 
547 	aux &= aux_mask;
548 	aux |= aux_val;
549 
550 	/* Determine the number of ways */
551 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
552 	case L2X0_CACHE_ID_PART_L310:
553 		if (aux & (1 << 16))
554 			ways = 16;
555 		else
556 			ways = 8;
557 		type = "L310";
558 		break;
559 
560 	case L2X0_CACHE_ID_PART_L210:
561 		ways = (aux >> 13) & 0xf;
562 		type = "L210";
563 		break;
564 
565 	case AURORA_CACHE_ID:
566 		ways = (aux >> 13) & 0xf;
567 		ways = 2 << ((ways + 1) >> 2);
568 		way_size_shift = AURORA_WAY_SIZE_SHIFT;
569 		type = "Aurora";
570 		break;
571 
572 	default:
573 		/* Assume unknown chips have 8 ways */
574 		ways = 8;
575 		type = "L2x0 series";
576 		break;
577 	}
578 
579 	l2x0_way_mask = (1 << ways) - 1;
580 
581 	/*
582 	 * L2 cache Size =  Way size * Number of ways
583 	 */
584 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
585 	way_size = 1 << (way_size + way_size_shift);
586 
587 	l2x0_size = ways * way_size * SZ_1K;
588 
589 	fns = data->outer_cache;
590 	if (data->fixup)
591 		data->fixup(l2x0_base, cache_id, &fns);
592 
593 	/*
594 	 * Check if l2x0 controller is already enabled.  If we are booting
595 	 * in non-secure mode accessing the below registers will fault.
596 	 */
597 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
598 		data->enable(l2x0_base, aux, data->num_lock);
599 
600 	/* Re-read it in case some bits are reserved. */
601 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
602 
603 	/* Save the value for resuming. */
604 	l2x0_saved_regs.aux_ctrl = aux;
605 
606 	outer_cache = fns;
607 
608 	pr_info("%s cache controller enabled, %d ways, %d kB\n",
609 		type, ways, l2x0_size >> 10);
610 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
611 		type, cache_id, aux);
612 }
613 
614 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
615 {
616 	const struct l2c_init_data *data;
617 	u32 cache_id;
618 
619 	l2x0_base = base;
620 
621 	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
622 
623 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
624 	default:
625 		data = &l2x0_init_fns;
626 		break;
627 
628 	case L2X0_CACHE_ID_PART_L310:
629 		data = &l2c310_init_fns;
630 		break;
631 	}
632 
633 	__l2c_init(data, aux_val, aux_mask, cache_id);
634 }
635 
636 #ifdef CONFIG_OF
637 static int l2_wt_override;
638 
639 /* Aurora don't have the cache ID register available, so we have to
640  * pass it though the device tree */
641 static u32 cache_id_part_number_from_dt;
642 
643 static void __init l2x0_of_parse(const struct device_node *np,
644 				 u32 *aux_val, u32 *aux_mask)
645 {
646 	u32 data[2] = { 0, 0 };
647 	u32 tag = 0;
648 	u32 dirty = 0;
649 	u32 val = 0, mask = 0;
650 
651 	of_property_read_u32(np, "arm,tag-latency", &tag);
652 	if (tag) {
653 		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
654 		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
655 	}
656 
657 	of_property_read_u32_array(np, "arm,data-latency",
658 				   data, ARRAY_SIZE(data));
659 	if (data[0] && data[1]) {
660 		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
661 			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
662 		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
663 		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
664 	}
665 
666 	of_property_read_u32(np, "arm,dirty-latency", &dirty);
667 	if (dirty) {
668 		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
669 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
670 	}
671 
672 	*aux_val &= ~mask;
673 	*aux_val |= val;
674 	*aux_mask &= ~mask;
675 }
676 
677 static const struct l2c_init_data of_l2x0_data __initconst = {
678 	.of_parse = l2x0_of_parse,
679 	.enable = l2x0_enable,
680 	.outer_cache = {
681 		.inv_range   = l2x0_inv_range,
682 		.clean_range = l2x0_clean_range,
683 		.flush_range = l2x0_flush_range,
684 		.flush_all   = l2x0_flush_all,
685 		.disable     = l2x0_disable,
686 		.sync        = l2x0_cache_sync,
687 		.resume      = l2x0_resume,
688 	},
689 };
690 
691 static void __init pl310_of_parse(const struct device_node *np,
692 				  u32 *aux_val, u32 *aux_mask)
693 {
694 	u32 data[3] = { 0, 0, 0 };
695 	u32 tag[3] = { 0, 0, 0 };
696 	u32 filter[2] = { 0, 0 };
697 
698 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
699 	if (tag[0] && tag[1] && tag[2])
700 		writel_relaxed(
701 			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
702 			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
703 			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
704 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
705 
706 	of_property_read_u32_array(np, "arm,data-latency",
707 				   data, ARRAY_SIZE(data));
708 	if (data[0] && data[1] && data[2])
709 		writel_relaxed(
710 			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
711 			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
712 			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
713 			l2x0_base + L2X0_DATA_LATENCY_CTRL);
714 
715 	of_property_read_u32_array(np, "arm,filter-ranges",
716 				   filter, ARRAY_SIZE(filter));
717 	if (filter[1]) {
718 		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
719 			       l2x0_base + L2X0_ADDR_FILTER_END);
720 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
721 			       l2x0_base + L2X0_ADDR_FILTER_START);
722 	}
723 }
724 
725 static const struct l2c_init_data of_pl310_data __initconst = {
726 	.num_lock = 8,
727 	.of_parse = pl310_of_parse,
728 	.enable = l2c_enable,
729 	.fixup = l2c310_fixup,
730 	.save  = l2c310_save,
731 	.outer_cache = {
732 		.inv_range   = l2x0_inv_range,
733 		.clean_range = l2x0_clean_range,
734 		.flush_range = l2x0_flush_range,
735 		.flush_all   = l2x0_flush_all,
736 		.disable     = l2x0_disable,
737 		.sync        = l2x0_cache_sync,
738 		.resume      = l2c310_resume,
739 	},
740 };
741 
742 /*
743  * Note that the end addresses passed to Linux primitives are
744  * noninclusive, while the hardware cache range operations use
745  * inclusive start and end addresses.
746  */
747 static unsigned long calc_range_end(unsigned long start, unsigned long end)
748 {
749 	/*
750 	 * Limit the number of cache lines processed at once,
751 	 * since cache range operations stall the CPU pipeline
752 	 * until completion.
753 	 */
754 	if (end > start + MAX_RANGE_SIZE)
755 		end = start + MAX_RANGE_SIZE;
756 
757 	/*
758 	 * Cache range operations can't straddle a page boundary.
759 	 */
760 	if (end > PAGE_ALIGN(start+1))
761 		end = PAGE_ALIGN(start+1);
762 
763 	return end;
764 }
765 
766 /*
767  * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
768  * and range operations only do a TLB lookup on the start address.
769  */
770 static void aurora_pa_range(unsigned long start, unsigned long end,
771 			unsigned long offset)
772 {
773 	unsigned long flags;
774 
775 	raw_spin_lock_irqsave(&l2x0_lock, flags);
776 	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
777 	writel_relaxed(end, l2x0_base + offset);
778 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
779 
780 	cache_sync();
781 }
782 
783 static void aurora_inv_range(unsigned long start, unsigned long end)
784 {
785 	/*
786 	 * round start and end adresses up to cache line size
787 	 */
788 	start &= ~(CACHE_LINE_SIZE - 1);
789 	end = ALIGN(end, CACHE_LINE_SIZE);
790 
791 	/*
792 	 * Invalidate all full cache lines between 'start' and 'end'.
793 	 */
794 	while (start < end) {
795 		unsigned long range_end = calc_range_end(start, end);
796 		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
797 				AURORA_INVAL_RANGE_REG);
798 		start = range_end;
799 	}
800 }
801 
802 static void aurora_clean_range(unsigned long start, unsigned long end)
803 {
804 	/*
805 	 * If L2 is forced to WT, the L2 will always be clean and we
806 	 * don't need to do anything here.
807 	 */
808 	if (!l2_wt_override) {
809 		start &= ~(CACHE_LINE_SIZE - 1);
810 		end = ALIGN(end, CACHE_LINE_SIZE);
811 		while (start != end) {
812 			unsigned long range_end = calc_range_end(start, end);
813 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
814 					AURORA_CLEAN_RANGE_REG);
815 			start = range_end;
816 		}
817 	}
818 }
819 
820 static void aurora_flush_range(unsigned long start, unsigned long end)
821 {
822 	start &= ~(CACHE_LINE_SIZE - 1);
823 	end = ALIGN(end, CACHE_LINE_SIZE);
824 	while (start != end) {
825 		unsigned long range_end = calc_range_end(start, end);
826 		/*
827 		 * If L2 is forced to WT, the L2 will always be clean and we
828 		 * just need to invalidate.
829 		 */
830 		if (l2_wt_override)
831 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
832 							AURORA_INVAL_RANGE_REG);
833 		else
834 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
835 							AURORA_FLUSH_RANGE_REG);
836 		start = range_end;
837 	}
838 }
839 
840 static void aurora_save(void __iomem *base)
841 {
842 	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
843 	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
844 }
845 
846 static void aurora_resume(void)
847 {
848 	void __iomem *base = l2x0_base;
849 
850 	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
851 		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
852 		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
853 	}
854 }
855 
856 /*
857  * For Aurora cache in no outer mode, enable via the CP15 coprocessor
858  * broadcasting of cache commands to L2.
859  */
860 static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
861 	unsigned num_lock)
862 {
863 	u32 u;
864 
865 	asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
866 	u |= AURORA_CTRL_FW;		/* Set the FW bit */
867 	asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
868 
869 	isb();
870 
871 	l2c_enable(base, aux, num_lock);
872 }
873 
874 static void __init aurora_fixup(void __iomem *base, u32 cache_id,
875 	struct outer_cache_fns *fns)
876 {
877 	sync_reg_offset = AURORA_SYNC_REG;
878 }
879 
880 static void __init aurora_of_parse(const struct device_node *np,
881 				u32 *aux_val, u32 *aux_mask)
882 {
883 	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
884 	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
885 
886 	of_property_read_u32(np, "cache-id-part",
887 			&cache_id_part_number_from_dt);
888 
889 	/* Determine and save the write policy */
890 	l2_wt_override = of_property_read_bool(np, "wt-override");
891 
892 	if (l2_wt_override) {
893 		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
894 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
895 	}
896 
897 	*aux_val &= ~mask;
898 	*aux_val |= val;
899 	*aux_mask &= ~mask;
900 }
901 
902 static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
903 	.num_lock = 4,
904 	.of_parse = aurora_of_parse,
905 	.enable = l2c_enable,
906 	.fixup = aurora_fixup,
907 	.save  = aurora_save,
908 	.outer_cache = {
909 		.inv_range   = aurora_inv_range,
910 		.clean_range = aurora_clean_range,
911 		.flush_range = aurora_flush_range,
912 		.flush_all   = l2x0_flush_all,
913 		.disable     = l2x0_disable,
914 		.sync        = l2x0_cache_sync,
915 		.resume      = aurora_resume,
916 	},
917 };
918 
919 static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
920 	.num_lock = 4,
921 	.of_parse = aurora_of_parse,
922 	.enable = aurora_enable_no_outer,
923 	.fixup = aurora_fixup,
924 	.save  = aurora_save,
925 	.outer_cache = {
926 		.resume      = aurora_resume,
927 	},
928 };
929 
930 /*
931  * For certain Broadcom SoCs, depending on the address range, different offsets
932  * need to be added to the address before passing it to L2 for
933  * invalidation/clean/flush
934  *
935  * Section Address Range              Offset        EMI
936  *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
937  *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
938  *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
939  *
940  * When the start and end addresses have crossed two different sections, we
941  * need to break the L2 operation into two, each within its own section.
942  * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
943  * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
944  * 0xC0000000 - 0xC0001000
945  *
946  * Note 1:
947  * By breaking a single L2 operation into two, we may potentially suffer some
948  * performance hit, but keep in mind the cross section case is very rare
949  *
950  * Note 2:
951  * We do not need to handle the case when the start address is in
952  * Section 1 and the end address is in Section 3, since it is not a valid use
953  * case
954  *
955  * Note 3:
956  * Section 1 in practical terms can no longer be used on rev A2. Because of
957  * that the code does not need to handle section 1 at all.
958  *
959  */
960 #define BCM_SYS_EMI_START_ADDR        0x40000000UL
961 #define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
962 
963 #define BCM_SYS_EMI_OFFSET            0x40000000UL
964 #define BCM_VC_EMI_OFFSET             0x80000000UL
965 
966 static inline int bcm_addr_is_sys_emi(unsigned long addr)
967 {
968 	return (addr >= BCM_SYS_EMI_START_ADDR) &&
969 		(addr < BCM_VC_EMI_SEC3_START_ADDR);
970 }
971 
972 static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
973 {
974 	if (bcm_addr_is_sys_emi(addr))
975 		return addr + BCM_SYS_EMI_OFFSET;
976 	else
977 		return addr + BCM_VC_EMI_OFFSET;
978 }
979 
980 static void bcm_inv_range(unsigned long start, unsigned long end)
981 {
982 	unsigned long new_start, new_end;
983 
984 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
985 
986 	if (unlikely(end <= start))
987 		return;
988 
989 	new_start = bcm_l2_phys_addr(start);
990 	new_end = bcm_l2_phys_addr(end);
991 
992 	/* normal case, no cross section between start and end */
993 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
994 		l2x0_inv_range(new_start, new_end);
995 		return;
996 	}
997 
998 	/* They cross sections, so it can only be a cross from section
999 	 * 2 to section 3
1000 	 */
1001 	l2x0_inv_range(new_start,
1002 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1003 	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1004 		new_end);
1005 }
1006 
1007 static void bcm_clean_range(unsigned long start, unsigned long end)
1008 {
1009 	unsigned long new_start, new_end;
1010 
1011 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1012 
1013 	if (unlikely(end <= start))
1014 		return;
1015 
1016 	if ((end - start) >= l2x0_size) {
1017 		l2x0_clean_all();
1018 		return;
1019 	}
1020 
1021 	new_start = bcm_l2_phys_addr(start);
1022 	new_end = bcm_l2_phys_addr(end);
1023 
1024 	/* normal case, no cross section between start and end */
1025 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1026 		l2x0_clean_range(new_start, new_end);
1027 		return;
1028 	}
1029 
1030 	/* They cross sections, so it can only be a cross from section
1031 	 * 2 to section 3
1032 	 */
1033 	l2x0_clean_range(new_start,
1034 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1035 	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1036 		new_end);
1037 }
1038 
1039 static void bcm_flush_range(unsigned long start, unsigned long end)
1040 {
1041 	unsigned long new_start, new_end;
1042 
1043 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1044 
1045 	if (unlikely(end <= start))
1046 		return;
1047 
1048 	if ((end - start) >= l2x0_size) {
1049 		l2x0_flush_all();
1050 		return;
1051 	}
1052 
1053 	new_start = bcm_l2_phys_addr(start);
1054 	new_end = bcm_l2_phys_addr(end);
1055 
1056 	/* normal case, no cross section between start and end */
1057 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1058 		l2x0_flush_range(new_start, new_end);
1059 		return;
1060 	}
1061 
1062 	/* They cross sections, so it can only be a cross from section
1063 	 * 2 to section 3
1064 	 */
1065 	l2x0_flush_range(new_start,
1066 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1067 	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1068 		new_end);
1069 }
1070 
1071 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
1072 	.num_lock = 8,
1073 	.of_parse = pl310_of_parse,
1074 	.enable = l2c_enable,
1075 	.fixup = l2c310_fixup,
1076 	.save  = l2c310_save,
1077 	.outer_cache = {
1078 		.inv_range   = bcm_inv_range,
1079 		.clean_range = bcm_clean_range,
1080 		.flush_range = bcm_flush_range,
1081 		.flush_all   = l2x0_flush_all,
1082 		.disable     = l2x0_disable,
1083 		.sync        = l2x0_cache_sync,
1084 		.resume      = l2c310_resume,
1085 	},
1086 };
1087 
1088 static void __init tauros3_save(void __iomem *base)
1089 {
1090 	l2x0_saved_regs.aux2_ctrl =
1091 		readl_relaxed(base + TAUROS3_AUX2_CTRL);
1092 	l2x0_saved_regs.prefetch_ctrl =
1093 		readl_relaxed(base + L2X0_PREFETCH_CTRL);
1094 }
1095 
1096 static void tauros3_resume(void)
1097 {
1098 	void __iomem *base = l2x0_base;
1099 
1100 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1101 		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1102 			       base + TAUROS3_AUX2_CTRL);
1103 		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1104 			       base + L2X0_PREFETCH_CTRL);
1105 
1106 		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
1107 	}
1108 }
1109 
1110 static const struct l2c_init_data of_tauros3_data __initconst = {
1111 	.num_lock = 8,
1112 	.enable = l2c_enable,
1113 	.save  = tauros3_save,
1114 	/* Tauros3 broadcasts L1 cache operations to L2 */
1115 	.outer_cache = {
1116 		.resume      = tauros3_resume,
1117 	},
1118 };
1119 
1120 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1121 static const struct of_device_id l2x0_ids[] __initconst = {
1122 	L2C_ID("arm,l210-cache", of_l2x0_data),
1123 	L2C_ID("arm,l220-cache", of_l2x0_data),
1124 	L2C_ID("arm,pl310-cache", of_pl310_data),
1125 	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1126 	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1127 	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1128 	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1129 	/* Deprecated IDs */
1130 	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1131 	{}
1132 };
1133 
1134 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1135 {
1136 	const struct l2c_init_data *data;
1137 	struct device_node *np;
1138 	struct resource res;
1139 	u32 cache_id;
1140 
1141 	np = of_find_matching_node(NULL, l2x0_ids);
1142 	if (!np)
1143 		return -ENODEV;
1144 
1145 	if (of_address_to_resource(np, 0, &res))
1146 		return -ENODEV;
1147 
1148 	l2x0_base = ioremap(res.start, resource_size(&res));
1149 	if (!l2x0_base)
1150 		return -ENOMEM;
1151 
1152 	l2x0_saved_regs.phy_base = res.start;
1153 
1154 	data = of_match_node(l2x0_ids, np)->data;
1155 
1156 	/* L2 configuration can only be changed if the cache is disabled */
1157 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
1158 		if (data->of_parse)
1159 			data->of_parse(np, &aux_val, &aux_mask);
1160 
1161 	if (cache_id_part_number_from_dt)
1162 		cache_id = cache_id_part_number_from_dt;
1163 	else
1164 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1165 
1166 	__l2c_init(data, aux_val, aux_mask, cache_id);
1167 
1168 	return 0;
1169 }
1170 #endif
1171