xref: /openbmc/linux/arch/arm/mm/cache-l2x0.c (revision 3b8bad57)
1 /*
2  * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
3  *
4  * Copyright (C) 2007 ARM Limited
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #include <linux/err.h>
20 #include <linux/init.h>
21 #include <linux/spinlock.h>
22 #include <linux/io.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 
26 #include <asm/cacheflush.h>
27 #include <asm/hardware/cache-l2x0.h>
28 #include "cache-tauros3.h"
29 #include "cache-aurora-l2.h"
30 
31 struct l2c_init_data {
32 	unsigned num_lock;
33 	void (*of_parse)(const struct device_node *, u32 *, u32 *);
34 	void (*enable)(void __iomem *, u32, unsigned);
35 	void (*save)(void __iomem *);
36 	struct outer_cache_fns outer_cache;
37 };
38 
39 #define CACHE_LINE_SIZE		32
40 
41 static void __iomem *l2x0_base;
42 static DEFINE_RAW_SPINLOCK(l2x0_lock);
43 static u32 l2x0_way_mask;	/* Bitmask of active ways */
44 static u32 l2x0_size;
45 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
46 
47 struct l2x0_regs l2x0_saved_regs;
48 
49 /*
50  * Common code for all cache controllers.
51  */
52 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
53 {
54 	/* wait for cache operation by line or way to complete */
55 	while (readl_relaxed(reg) & mask)
56 		cpu_relax();
57 }
58 
59 /*
60  * This should only be called when we have a requirement that the
61  * register be written due to a work-around, as platforms running
62  * in non-secure mode may not be able to access this register.
63  */
64 static inline void l2c_set_debug(void __iomem *base, unsigned long val)
65 {
66 	outer_cache.set_debug(val);
67 }
68 
69 static void __l2c_op_way(void __iomem *reg)
70 {
71 	writel_relaxed(l2x0_way_mask, reg);
72 	l2c_wait_mask(reg, l2x0_way_mask);
73 }
74 
75 static inline void l2c_unlock(void __iomem *base, unsigned num)
76 {
77 	unsigned i;
78 
79 	for (i = 0; i < num; i++) {
80 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
81 			       i * L2X0_LOCKDOWN_STRIDE);
82 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
83 			       i * L2X0_LOCKDOWN_STRIDE);
84 	}
85 }
86 
87 /*
88  * Enable the L2 cache controller.  This function must only be
89  * called when the cache controller is known to be disabled.
90  */
91 static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
92 {
93 	unsigned long flags;
94 
95 	l2c_unlock(base, num_lock);
96 
97 	writel_relaxed(aux, base + L2X0_AUX_CTRL);
98 
99 	local_irq_save(flags);
100 	__l2c_op_way(base + L2X0_INV_WAY);
101 	writel_relaxed(0, base + sync_reg_offset);
102 	l2c_wait_mask(base + sync_reg_offset, 1);
103 	local_irq_restore(flags);
104 
105 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
106 }
107 
108 static void l2c_disable(void)
109 {
110 	void __iomem *base = l2x0_base;
111 
112 	outer_cache.flush_all();
113 	writel_relaxed(0, base + L2X0_CTRL);
114 	dsb(st);
115 }
116 
117 #ifdef CONFIG_CACHE_PL310
118 static inline void cache_wait(void __iomem *reg, unsigned long mask)
119 {
120 	/* cache operations by line are atomic on PL310 */
121 }
122 #else
123 #define cache_wait	l2c_wait_mask
124 #endif
125 
126 static inline void cache_sync(void)
127 {
128 	void __iomem *base = l2x0_base;
129 
130 	writel_relaxed(0, base + sync_reg_offset);
131 	cache_wait(base + L2X0_CACHE_SYNC, 1);
132 }
133 
134 static inline void l2x0_clean_line(unsigned long addr)
135 {
136 	void __iomem *base = l2x0_base;
137 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
138 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
139 }
140 
141 static inline void l2x0_inv_line(unsigned long addr)
142 {
143 	void __iomem *base = l2x0_base;
144 	cache_wait(base + L2X0_INV_LINE_PA, 1);
145 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
146 }
147 
148 #if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
149 static inline void debug_writel(unsigned long val)
150 {
151 	if (outer_cache.set_debug)
152 		l2c_set_debug(l2x0_base, val);
153 }
154 
155 static void pl310_set_debug(unsigned long val)
156 {
157 	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
158 }
159 #else
160 /* Optimised out for non-errata case */
161 static inline void debug_writel(unsigned long val)
162 {
163 }
164 
165 #define pl310_set_debug	NULL
166 #endif
167 
168 #ifdef CONFIG_PL310_ERRATA_588369
169 static inline void l2x0_flush_line(unsigned long addr)
170 {
171 	void __iomem *base = l2x0_base;
172 
173 	/* Clean by PA followed by Invalidate by PA */
174 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
175 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
176 	cache_wait(base + L2X0_INV_LINE_PA, 1);
177 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
178 }
179 #else
180 
181 static inline void l2x0_flush_line(unsigned long addr)
182 {
183 	void __iomem *base = l2x0_base;
184 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
185 	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
186 }
187 #endif
188 
189 static void l2x0_cache_sync(void)
190 {
191 	unsigned long flags;
192 
193 	raw_spin_lock_irqsave(&l2x0_lock, flags);
194 	cache_sync();
195 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
196 }
197 
198 static void __l2x0_flush_all(void)
199 {
200 	debug_writel(0x03);
201 	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
202 	cache_sync();
203 	debug_writel(0x00);
204 }
205 
206 static void l2x0_flush_all(void)
207 {
208 	unsigned long flags;
209 
210 	/* clean all ways */
211 	raw_spin_lock_irqsave(&l2x0_lock, flags);
212 	__l2x0_flush_all();
213 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
214 }
215 
216 static void l2x0_clean_all(void)
217 {
218 	unsigned long flags;
219 
220 	/* clean all ways */
221 	raw_spin_lock_irqsave(&l2x0_lock, flags);
222 	__l2c_op_way(l2x0_base + L2X0_CLEAN_WAY);
223 	cache_sync();
224 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
225 }
226 
227 static void l2x0_inv_all(void)
228 {
229 	unsigned long flags;
230 
231 	/* invalidate all ways */
232 	raw_spin_lock_irqsave(&l2x0_lock, flags);
233 	/* Invalidating when L2 is enabled is a nono */
234 	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
235 	__l2c_op_way(l2x0_base + L2X0_INV_WAY);
236 	cache_sync();
237 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
238 }
239 
240 static void l2x0_inv_range(unsigned long start, unsigned long end)
241 {
242 	void __iomem *base = l2x0_base;
243 	unsigned long flags;
244 
245 	raw_spin_lock_irqsave(&l2x0_lock, flags);
246 	if (start & (CACHE_LINE_SIZE - 1)) {
247 		start &= ~(CACHE_LINE_SIZE - 1);
248 		debug_writel(0x03);
249 		l2x0_flush_line(start);
250 		debug_writel(0x00);
251 		start += CACHE_LINE_SIZE;
252 	}
253 
254 	if (end & (CACHE_LINE_SIZE - 1)) {
255 		end &= ~(CACHE_LINE_SIZE - 1);
256 		debug_writel(0x03);
257 		l2x0_flush_line(end);
258 		debug_writel(0x00);
259 	}
260 
261 	while (start < end) {
262 		unsigned long blk_end = start + min(end - start, 4096UL);
263 
264 		while (start < blk_end) {
265 			l2x0_inv_line(start);
266 			start += CACHE_LINE_SIZE;
267 		}
268 
269 		if (blk_end < end) {
270 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
271 			raw_spin_lock_irqsave(&l2x0_lock, flags);
272 		}
273 	}
274 	cache_wait(base + L2X0_INV_LINE_PA, 1);
275 	cache_sync();
276 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
277 }
278 
279 static void l2x0_clean_range(unsigned long start, unsigned long end)
280 {
281 	void __iomem *base = l2x0_base;
282 	unsigned long flags;
283 
284 	if ((end - start) >= l2x0_size) {
285 		l2x0_clean_all();
286 		return;
287 	}
288 
289 	raw_spin_lock_irqsave(&l2x0_lock, flags);
290 	start &= ~(CACHE_LINE_SIZE - 1);
291 	while (start < end) {
292 		unsigned long blk_end = start + min(end - start, 4096UL);
293 
294 		while (start < blk_end) {
295 			l2x0_clean_line(start);
296 			start += CACHE_LINE_SIZE;
297 		}
298 
299 		if (blk_end < end) {
300 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
301 			raw_spin_lock_irqsave(&l2x0_lock, flags);
302 		}
303 	}
304 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
305 	cache_sync();
306 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
307 }
308 
309 static void l2x0_flush_range(unsigned long start, unsigned long end)
310 {
311 	void __iomem *base = l2x0_base;
312 	unsigned long flags;
313 
314 	if ((end - start) >= l2x0_size) {
315 		l2x0_flush_all();
316 		return;
317 	}
318 
319 	raw_spin_lock_irqsave(&l2x0_lock, flags);
320 	start &= ~(CACHE_LINE_SIZE - 1);
321 	while (start < end) {
322 		unsigned long blk_end = start + min(end - start, 4096UL);
323 
324 		debug_writel(0x03);
325 		while (start < blk_end) {
326 			l2x0_flush_line(start);
327 			start += CACHE_LINE_SIZE;
328 		}
329 		debug_writel(0x00);
330 
331 		if (blk_end < end) {
332 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
333 			raw_spin_lock_irqsave(&l2x0_lock, flags);
334 		}
335 	}
336 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
337 	cache_sync();
338 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
339 }
340 
341 static void l2x0_disable(void)
342 {
343 	unsigned long flags;
344 
345 	raw_spin_lock_irqsave(&l2x0_lock, flags);
346 	__l2x0_flush_all();
347 	writel_relaxed(0, l2x0_base + L2X0_CTRL);
348 	dsb(st);
349 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
350 }
351 
352 static void l2x0_unlock(u32 cache_id)
353 {
354 	int lockregs;
355 
356 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
357 	case L2X0_CACHE_ID_PART_L310:
358 		lockregs = 8;
359 		break;
360 	default:
361 		/* L210 and unknown types */
362 		lockregs = 1;
363 		break;
364 	}
365 
366 	l2c_unlock(l2x0_base, lockregs);
367 }
368 
369 static void l2x0_enable(void __iomem *base, u32 aux, unsigned num_lock)
370 {
371 	/* Make sure that I&D is not locked down when starting */
372 	l2x0_unlock(readl_relaxed(base + L2X0_CACHE_ID));
373 
374 	/* l2x0 controller is disabled */
375 	writel_relaxed(aux, base + L2X0_AUX_CTRL);
376 
377 	l2x0_inv_all();
378 
379 	/* enable L2X0 */
380 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
381 }
382 
383 static const struct l2c_init_data l2x0_init_fns __initconst = {
384 	.enable = l2x0_enable,
385 	.outer_cache = {
386 		.inv_range = l2x0_inv_range,
387 		.clean_range = l2x0_clean_range,
388 		.flush_range = l2x0_flush_range,
389 		.flush_all = l2x0_flush_all,
390 		.disable = l2x0_disable,
391 		.sync = l2x0_cache_sync,
392 	},
393 };
394 
395 static void __init __l2c_init(const struct l2c_init_data *data,
396 	u32 aux_val, u32 aux_mask, u32 cache_id)
397 {
398 	u32 aux;
399 	u32 way_size = 0;
400 	int ways;
401 	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
402 	const char *type;
403 
404 	/*
405 	 * It is strange to save the register state before initialisation,
406 	 * but hey, this is what the DT implementations decided to do.
407 	 */
408 	if (data->save)
409 		data->save(l2x0_base);
410 
411 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
412 
413 	aux &= aux_mask;
414 	aux |= aux_val;
415 
416 	/* Determine the number of ways */
417 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
418 	case L2X0_CACHE_ID_PART_L310:
419 		if (aux & (1 << 16))
420 			ways = 16;
421 		else
422 			ways = 8;
423 		type = "L310";
424 #ifdef CONFIG_PL310_ERRATA_753970
425 		/* Unmapped register. */
426 		sync_reg_offset = L2X0_DUMMY_REG;
427 #endif
428 		break;
429 	case L2X0_CACHE_ID_PART_L210:
430 		ways = (aux >> 13) & 0xf;
431 		type = "L210";
432 		break;
433 
434 	case AURORA_CACHE_ID:
435 		sync_reg_offset = AURORA_SYNC_REG;
436 		ways = (aux >> 13) & 0xf;
437 		ways = 2 << ((ways + 1) >> 2);
438 		way_size_shift = AURORA_WAY_SIZE_SHIFT;
439 		type = "Aurora";
440 		break;
441 	default:
442 		/* Assume unknown chips have 8 ways */
443 		ways = 8;
444 		type = "L2x0 series";
445 		break;
446 	}
447 
448 	l2x0_way_mask = (1 << ways) - 1;
449 
450 	/*
451 	 * L2 cache Size =  Way size * Number of ways
452 	 */
453 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
454 	way_size = 1 << (way_size + way_size_shift);
455 
456 	l2x0_size = ways * way_size * SZ_1K;
457 
458 	/*
459 	 * Check if l2x0 controller is already enabled.  If we are booting
460 	 * in non-secure mode accessing the below registers will fault.
461 	 */
462 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
463 		data->enable(l2x0_base, aux, data->num_lock);
464 
465 	/* Re-read it in case some bits are reserved. */
466 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
467 
468 	/* Save the value for resuming. */
469 	l2x0_saved_regs.aux_ctrl = aux;
470 
471 	outer_cache = data->outer_cache;
472 
473 	if ((cache_id & L2X0_CACHE_ID_PART_MASK) == L2X0_CACHE_ID_PART_L310 &&
474 	    (cache_id & L2X0_CACHE_ID_RTL_MASK) <= L310_CACHE_ID_RTL_R3P0)
475 		outer_cache.set_debug = pl310_set_debug;
476 
477 	pr_info("%s cache controller enabled\n", type);
478 	pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n",
479 		ways, cache_id, aux, l2x0_size >> 10);
480 }
481 
482 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
483 {
484 	u32 cache_id;
485 
486 	l2x0_base = base;
487 
488 	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
489 
490 	__l2c_init(&l2x0_init_fns, aux_val, aux_mask, cache_id);
491 }
492 
493 #ifdef CONFIG_OF
494 static int l2_wt_override;
495 
496 /* Aurora don't have the cache ID register available, so we have to
497  * pass it though the device tree */
498 static u32 cache_id_part_number_from_dt;
499 
500 static void __init l2x0_of_parse(const struct device_node *np,
501 				 u32 *aux_val, u32 *aux_mask)
502 {
503 	u32 data[2] = { 0, 0 };
504 	u32 tag = 0;
505 	u32 dirty = 0;
506 	u32 val = 0, mask = 0;
507 
508 	of_property_read_u32(np, "arm,tag-latency", &tag);
509 	if (tag) {
510 		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
511 		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
512 	}
513 
514 	of_property_read_u32_array(np, "arm,data-latency",
515 				   data, ARRAY_SIZE(data));
516 	if (data[0] && data[1]) {
517 		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
518 			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
519 		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
520 		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
521 	}
522 
523 	of_property_read_u32(np, "arm,dirty-latency", &dirty);
524 	if (dirty) {
525 		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
526 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
527 	}
528 
529 	*aux_val &= ~mask;
530 	*aux_val |= val;
531 	*aux_mask &= ~mask;
532 }
533 
534 static void l2x0_resume(void)
535 {
536 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
537 		/* restore aux ctrl and enable l2 */
538 		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
539 
540 		writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base +
541 			L2X0_AUX_CTRL);
542 
543 		l2x0_inv_all();
544 
545 		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
546 	}
547 }
548 
549 static const struct l2c_init_data of_l2x0_data __initconst = {
550 	.of_parse = l2x0_of_parse,
551 	.enable = l2x0_enable,
552 	.outer_cache = {
553 		.inv_range   = l2x0_inv_range,
554 		.clean_range = l2x0_clean_range,
555 		.flush_range = l2x0_flush_range,
556 		.flush_all   = l2x0_flush_all,
557 		.disable     = l2x0_disable,
558 		.sync        = l2x0_cache_sync,
559 		.resume      = l2x0_resume,
560 	},
561 };
562 
563 static void __init pl310_of_parse(const struct device_node *np,
564 				  u32 *aux_val, u32 *aux_mask)
565 {
566 	u32 data[3] = { 0, 0, 0 };
567 	u32 tag[3] = { 0, 0, 0 };
568 	u32 filter[2] = { 0, 0 };
569 
570 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
571 	if (tag[0] && tag[1] && tag[2])
572 		writel_relaxed(
573 			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
574 			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
575 			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
576 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
577 
578 	of_property_read_u32_array(np, "arm,data-latency",
579 				   data, ARRAY_SIZE(data));
580 	if (data[0] && data[1] && data[2])
581 		writel_relaxed(
582 			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
583 			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
584 			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
585 			l2x0_base + L2X0_DATA_LATENCY_CTRL);
586 
587 	of_property_read_u32_array(np, "arm,filter-ranges",
588 				   filter, ARRAY_SIZE(filter));
589 	if (filter[1]) {
590 		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
591 			       l2x0_base + L2X0_ADDR_FILTER_END);
592 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
593 			       l2x0_base + L2X0_ADDR_FILTER_START);
594 	}
595 }
596 
597 static void __init pl310_save(void __iomem *base)
598 {
599 	u32 l2x0_revision = readl_relaxed(base + L2X0_CACHE_ID) &
600 		L2X0_CACHE_ID_RTL_MASK;
601 
602 	l2x0_saved_regs.tag_latency = readl_relaxed(base +
603 		L2X0_TAG_LATENCY_CTRL);
604 	l2x0_saved_regs.data_latency = readl_relaxed(base +
605 		L2X0_DATA_LATENCY_CTRL);
606 	l2x0_saved_regs.filter_end = readl_relaxed(base +
607 		L2X0_ADDR_FILTER_END);
608 	l2x0_saved_regs.filter_start = readl_relaxed(base +
609 		L2X0_ADDR_FILTER_START);
610 
611 	if (l2x0_revision >= L310_CACHE_ID_RTL_R2P0) {
612 		/*
613 		 * From r2p0, there is Prefetch offset/control register
614 		 */
615 		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
616 			L2X0_PREFETCH_CTRL);
617 		/*
618 		 * From r3p0, there is Power control register
619 		 */
620 		if (l2x0_revision >= L310_CACHE_ID_RTL_R3P0)
621 			l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
622 				L2X0_POWER_CTRL);
623 	}
624 }
625 
626 static void pl310_resume(void)
627 {
628 	u32 l2x0_revision;
629 
630 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
631 		/* restore pl310 setup */
632 		writel_relaxed(l2x0_saved_regs.tag_latency,
633 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
634 		writel_relaxed(l2x0_saved_regs.data_latency,
635 			l2x0_base + L2X0_DATA_LATENCY_CTRL);
636 		writel_relaxed(l2x0_saved_regs.filter_end,
637 			l2x0_base + L2X0_ADDR_FILTER_END);
638 		writel_relaxed(l2x0_saved_regs.filter_start,
639 			l2x0_base + L2X0_ADDR_FILTER_START);
640 
641 		l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
642 			L2X0_CACHE_ID_RTL_MASK;
643 
644 		if (l2x0_revision >= L310_CACHE_ID_RTL_R2P0) {
645 			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
646 				l2x0_base + L2X0_PREFETCH_CTRL);
647 			if (l2x0_revision >= L310_CACHE_ID_RTL_R3P0)
648 				writel_relaxed(l2x0_saved_regs.pwr_ctrl,
649 					l2x0_base + L2X0_POWER_CTRL);
650 		}
651 	}
652 
653 	l2x0_resume();
654 }
655 
656 static const struct l2c_init_data of_pl310_data __initconst = {
657 	.num_lock = 8,
658 	.of_parse = pl310_of_parse,
659 	.enable = l2c_enable,
660 	.save  = pl310_save,
661 	.outer_cache = {
662 		.inv_range   = l2x0_inv_range,
663 		.clean_range = l2x0_clean_range,
664 		.flush_range = l2x0_flush_range,
665 		.flush_all   = l2x0_flush_all,
666 		.disable     = l2x0_disable,
667 		.sync        = l2x0_cache_sync,
668 		.resume      = pl310_resume,
669 	},
670 };
671 
672 /*
673  * Note that the end addresses passed to Linux primitives are
674  * noninclusive, while the hardware cache range operations use
675  * inclusive start and end addresses.
676  */
677 static unsigned long calc_range_end(unsigned long start, unsigned long end)
678 {
679 	/*
680 	 * Limit the number of cache lines processed at once,
681 	 * since cache range operations stall the CPU pipeline
682 	 * until completion.
683 	 */
684 	if (end > start + MAX_RANGE_SIZE)
685 		end = start + MAX_RANGE_SIZE;
686 
687 	/*
688 	 * Cache range operations can't straddle a page boundary.
689 	 */
690 	if (end > PAGE_ALIGN(start+1))
691 		end = PAGE_ALIGN(start+1);
692 
693 	return end;
694 }
695 
696 /*
697  * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
698  * and range operations only do a TLB lookup on the start address.
699  */
700 static void aurora_pa_range(unsigned long start, unsigned long end,
701 			unsigned long offset)
702 {
703 	unsigned long flags;
704 
705 	raw_spin_lock_irqsave(&l2x0_lock, flags);
706 	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
707 	writel_relaxed(end, l2x0_base + offset);
708 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
709 
710 	cache_sync();
711 }
712 
713 static void aurora_inv_range(unsigned long start, unsigned long end)
714 {
715 	/*
716 	 * round start and end adresses up to cache line size
717 	 */
718 	start &= ~(CACHE_LINE_SIZE - 1);
719 	end = ALIGN(end, CACHE_LINE_SIZE);
720 
721 	/*
722 	 * Invalidate all full cache lines between 'start' and 'end'.
723 	 */
724 	while (start < end) {
725 		unsigned long range_end = calc_range_end(start, end);
726 		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
727 				AURORA_INVAL_RANGE_REG);
728 		start = range_end;
729 	}
730 }
731 
732 static void aurora_clean_range(unsigned long start, unsigned long end)
733 {
734 	/*
735 	 * If L2 is forced to WT, the L2 will always be clean and we
736 	 * don't need to do anything here.
737 	 */
738 	if (!l2_wt_override) {
739 		start &= ~(CACHE_LINE_SIZE - 1);
740 		end = ALIGN(end, CACHE_LINE_SIZE);
741 		while (start != end) {
742 			unsigned long range_end = calc_range_end(start, end);
743 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
744 					AURORA_CLEAN_RANGE_REG);
745 			start = range_end;
746 		}
747 	}
748 }
749 
750 static void aurora_flush_range(unsigned long start, unsigned long end)
751 {
752 	start &= ~(CACHE_LINE_SIZE - 1);
753 	end = ALIGN(end, CACHE_LINE_SIZE);
754 	while (start != end) {
755 		unsigned long range_end = calc_range_end(start, end);
756 		/*
757 		 * If L2 is forced to WT, the L2 will always be clean and we
758 		 * just need to invalidate.
759 		 */
760 		if (l2_wt_override)
761 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
762 							AURORA_INVAL_RANGE_REG);
763 		else
764 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
765 							AURORA_FLUSH_RANGE_REG);
766 		start = range_end;
767 	}
768 }
769 
770 static void aurora_save(void __iomem *base)
771 {
772 	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
773 	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
774 }
775 
776 static void aurora_resume(void)
777 {
778 	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
779 		writel_relaxed(l2x0_saved_regs.aux_ctrl,
780 				l2x0_base + L2X0_AUX_CTRL);
781 		writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
782 	}
783 }
784 
785 static void __init aurora_broadcast_l2_commands(void)
786 {
787 	__u32 u;
788 	/* Enable Broadcasting of cache commands to L2*/
789 	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
790 	u |= AURORA_CTRL_FW;		/* Set the FW bit */
791 	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
792 	isb();
793 }
794 
795 static void __init aurora_of_parse(const struct device_node *np,
796 				u32 *aux_val, u32 *aux_mask)
797 {
798 	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
799 	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
800 
801 	of_property_read_u32(np, "cache-id-part",
802 			&cache_id_part_number_from_dt);
803 
804 	/* Determine and save the write policy */
805 	l2_wt_override = of_property_read_bool(np, "wt-override");
806 
807 	if (l2_wt_override) {
808 		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
809 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
810 	}
811 
812 	*aux_val &= ~mask;
813 	*aux_val |= val;
814 	*aux_mask &= ~mask;
815 }
816 
817 static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
818 	.num_lock = 4,
819 	.of_parse = aurora_of_parse,
820 	.enable = l2c_enable,
821 	.save  = aurora_save,
822 	.outer_cache = {
823 		.inv_range   = aurora_inv_range,
824 		.clean_range = aurora_clean_range,
825 		.flush_range = aurora_flush_range,
826 		.flush_all   = l2x0_flush_all,
827 		.disable     = l2x0_disable,
828 		.sync        = l2x0_cache_sync,
829 		.resume      = aurora_resume,
830 	},
831 };
832 
833 static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
834 	.num_lock = 4,
835 	.of_parse = aurora_of_parse,
836 	.enable = l2c_enable,
837 	.save  = aurora_save,
838 	.outer_cache = {
839 		.resume      = aurora_resume,
840 	},
841 };
842 
843 /*
844  * For certain Broadcom SoCs, depending on the address range, different offsets
845  * need to be added to the address before passing it to L2 for
846  * invalidation/clean/flush
847  *
848  * Section Address Range              Offset        EMI
849  *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
850  *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
851  *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
852  *
853  * When the start and end addresses have crossed two different sections, we
854  * need to break the L2 operation into two, each within its own section.
855  * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
856  * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
857  * 0xC0000000 - 0xC0001000
858  *
859  * Note 1:
860  * By breaking a single L2 operation into two, we may potentially suffer some
861  * performance hit, but keep in mind the cross section case is very rare
862  *
863  * Note 2:
864  * We do not need to handle the case when the start address is in
865  * Section 1 and the end address is in Section 3, since it is not a valid use
866  * case
867  *
868  * Note 3:
869  * Section 1 in practical terms can no longer be used on rev A2. Because of
870  * that the code does not need to handle section 1 at all.
871  *
872  */
873 #define BCM_SYS_EMI_START_ADDR        0x40000000UL
874 #define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
875 
876 #define BCM_SYS_EMI_OFFSET            0x40000000UL
877 #define BCM_VC_EMI_OFFSET             0x80000000UL
878 
879 static inline int bcm_addr_is_sys_emi(unsigned long addr)
880 {
881 	return (addr >= BCM_SYS_EMI_START_ADDR) &&
882 		(addr < BCM_VC_EMI_SEC3_START_ADDR);
883 }
884 
885 static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
886 {
887 	if (bcm_addr_is_sys_emi(addr))
888 		return addr + BCM_SYS_EMI_OFFSET;
889 	else
890 		return addr + BCM_VC_EMI_OFFSET;
891 }
892 
893 static void bcm_inv_range(unsigned long start, unsigned long end)
894 {
895 	unsigned long new_start, new_end;
896 
897 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
898 
899 	if (unlikely(end <= start))
900 		return;
901 
902 	new_start = bcm_l2_phys_addr(start);
903 	new_end = bcm_l2_phys_addr(end);
904 
905 	/* normal case, no cross section between start and end */
906 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
907 		l2x0_inv_range(new_start, new_end);
908 		return;
909 	}
910 
911 	/* They cross sections, so it can only be a cross from section
912 	 * 2 to section 3
913 	 */
914 	l2x0_inv_range(new_start,
915 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
916 	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
917 		new_end);
918 }
919 
920 static void bcm_clean_range(unsigned long start, unsigned long end)
921 {
922 	unsigned long new_start, new_end;
923 
924 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
925 
926 	if (unlikely(end <= start))
927 		return;
928 
929 	if ((end - start) >= l2x0_size) {
930 		l2x0_clean_all();
931 		return;
932 	}
933 
934 	new_start = bcm_l2_phys_addr(start);
935 	new_end = bcm_l2_phys_addr(end);
936 
937 	/* normal case, no cross section between start and end */
938 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
939 		l2x0_clean_range(new_start, new_end);
940 		return;
941 	}
942 
943 	/* They cross sections, so it can only be a cross from section
944 	 * 2 to section 3
945 	 */
946 	l2x0_clean_range(new_start,
947 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
948 	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
949 		new_end);
950 }
951 
952 static void bcm_flush_range(unsigned long start, unsigned long end)
953 {
954 	unsigned long new_start, new_end;
955 
956 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
957 
958 	if (unlikely(end <= start))
959 		return;
960 
961 	if ((end - start) >= l2x0_size) {
962 		l2x0_flush_all();
963 		return;
964 	}
965 
966 	new_start = bcm_l2_phys_addr(start);
967 	new_end = bcm_l2_phys_addr(end);
968 
969 	/* normal case, no cross section between start and end */
970 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
971 		l2x0_flush_range(new_start, new_end);
972 		return;
973 	}
974 
975 	/* They cross sections, so it can only be a cross from section
976 	 * 2 to section 3
977 	 */
978 	l2x0_flush_range(new_start,
979 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
980 	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
981 		new_end);
982 }
983 
984 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
985 	.num_lock = 8,
986 	.of_parse = pl310_of_parse,
987 	.enable = l2c_enable,
988 	.save  = pl310_save,
989 	.outer_cache = {
990 		.inv_range   = bcm_inv_range,
991 		.clean_range = bcm_clean_range,
992 		.flush_range = bcm_flush_range,
993 		.flush_all   = l2x0_flush_all,
994 		.disable     = l2x0_disable,
995 		.sync        = l2x0_cache_sync,
996 		.resume      = pl310_resume,
997 	},
998 };
999 
1000 static void __init tauros3_save(void __iomem *base)
1001 {
1002 	l2x0_saved_regs.aux2_ctrl =
1003 		readl_relaxed(base + TAUROS3_AUX2_CTRL);
1004 	l2x0_saved_regs.prefetch_ctrl =
1005 		readl_relaxed(base + L2X0_PREFETCH_CTRL);
1006 }
1007 
1008 static void tauros3_resume(void)
1009 {
1010 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1011 		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1012 			       l2x0_base + TAUROS3_AUX2_CTRL);
1013 		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1014 			       l2x0_base + L2X0_PREFETCH_CTRL);
1015 	}
1016 
1017 	l2x0_resume();
1018 }
1019 
1020 static const struct l2c_init_data of_tauros3_data __initconst = {
1021 	.num_lock = 8,
1022 	.enable = l2c_enable,
1023 	.save  = tauros3_save,
1024 	/* Tauros3 broadcasts L1 cache operations to L2 */
1025 	.outer_cache = {
1026 		.resume      = tauros3_resume,
1027 	},
1028 };
1029 
1030 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1031 static const struct of_device_id l2x0_ids[] __initconst = {
1032 	L2C_ID("arm,l210-cache", of_l2x0_data),
1033 	L2C_ID("arm,l220-cache", of_l2x0_data),
1034 	L2C_ID("arm,pl310-cache", of_pl310_data),
1035 	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1036 	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1037 	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1038 	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1039 	/* Deprecated IDs */
1040 	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1041 	{}
1042 };
1043 
1044 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1045 {
1046 	const struct l2c_init_data *data;
1047 	struct device_node *np;
1048 	struct resource res;
1049 	u32 cache_id;
1050 
1051 	np = of_find_matching_node(NULL, l2x0_ids);
1052 	if (!np)
1053 		return -ENODEV;
1054 
1055 	if (of_address_to_resource(np, 0, &res))
1056 		return -ENODEV;
1057 
1058 	l2x0_base = ioremap(res.start, resource_size(&res));
1059 	if (!l2x0_base)
1060 		return -ENOMEM;
1061 
1062 	l2x0_saved_regs.phy_base = res.start;
1063 
1064 	data = of_match_node(l2x0_ids, np)->data;
1065 
1066 	/* L2 configuration can only be changed if the cache is disabled */
1067 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1068 		if (data->of_parse)
1069 			data->of_parse(np, &aux_val, &aux_mask);
1070 
1071 		/* For aurora cache in no outer mode select the
1072 		 * correct mode using the coprocessor*/
1073 		if (data == &of_aurora_no_outer_data)
1074 			aurora_broadcast_l2_commands();
1075 	}
1076 
1077 	if (cache_id_part_number_from_dt)
1078 		cache_id = cache_id_part_number_from_dt;
1079 	else
1080 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1081 
1082 	__l2c_init(data, aux_val, aux_mask, cache_id);
1083 
1084 	return 0;
1085 }
1086 #endif
1087