xref: /openbmc/linux/arch/arm/mm/cache-l2x0.c (revision 9a07f27b)
1 /*
2  * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
3  *
4  * Copyright (C) 2007 ARM Limited
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #include <linux/err.h>
20 #include <linux/init.h>
21 #include <linux/spinlock.h>
22 #include <linux/io.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 
26 #include <asm/cacheflush.h>
27 #include <asm/hardware/cache-l2x0.h>
28 #include "cache-tauros3.h"
29 #include "cache-aurora-l2.h"
30 
31 struct l2c_init_data {
32 	unsigned num_lock;
33 	void (*of_parse)(const struct device_node *, u32 *, u32 *);
34 	void (*enable)(void __iomem *, u32, unsigned);
35 	void (*save)(void __iomem *);
36 	struct outer_cache_fns outer_cache;
37 };
38 
39 #define CACHE_LINE_SIZE		32
40 
41 static void __iomem *l2x0_base;
42 static DEFINE_RAW_SPINLOCK(l2x0_lock);
43 static u32 l2x0_way_mask;	/* Bitmask of active ways */
44 static u32 l2x0_size;
45 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
46 
47 struct l2x0_regs l2x0_saved_regs;
48 
49 /*
50  * Common code for all cache controllers.
51  */
52 static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
53 {
54 	/* wait for cache operation by line or way to complete */
55 	while (readl_relaxed(reg) & mask)
56 		cpu_relax();
57 }
58 
59 /*
60  * This should only be called when we have a requirement that the
61  * register be written due to a work-around, as platforms running
62  * in non-secure mode may not be able to access this register.
63  */
64 static inline void l2c_set_debug(void __iomem *base, unsigned long val)
65 {
66 	outer_cache.set_debug(val);
67 }
68 
69 static void __l2c_op_way(void __iomem *reg)
70 {
71 	writel_relaxed(l2x0_way_mask, reg);
72 	l2c_wait_mask(reg, l2x0_way_mask);
73 }
74 
75 static inline void l2c_unlock(void __iomem *base, unsigned num)
76 {
77 	unsigned i;
78 
79 	for (i = 0; i < num; i++) {
80 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
81 			       i * L2X0_LOCKDOWN_STRIDE);
82 		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
83 			       i * L2X0_LOCKDOWN_STRIDE);
84 	}
85 }
86 
87 /*
88  * Enable the L2 cache controller.  This function must only be
89  * called when the cache controller is known to be disabled.
90  */
91 static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
92 {
93 	unsigned long flags;
94 
95 	/* Only write the aux register if it needs changing */
96 	if (readl_relaxed(base + L2X0_AUX_CTRL) != aux)
97 		writel_relaxed(aux, base + L2X0_AUX_CTRL);
98 
99 	l2c_unlock(base, num_lock);
100 
101 	local_irq_save(flags);
102 	__l2c_op_way(base + L2X0_INV_WAY);
103 	writel_relaxed(0, base + sync_reg_offset);
104 	l2c_wait_mask(base + sync_reg_offset, 1);
105 	local_irq_restore(flags);
106 
107 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
108 }
109 
110 static void l2c_disable(void)
111 {
112 	void __iomem *base = l2x0_base;
113 
114 	outer_cache.flush_all();
115 	writel_relaxed(0, base + L2X0_CTRL);
116 	dsb(st);
117 }
118 
119 #ifdef CONFIG_CACHE_PL310
120 static inline void cache_wait(void __iomem *reg, unsigned long mask)
121 {
122 	/* cache operations by line are atomic on PL310 */
123 }
124 #else
125 #define cache_wait	l2c_wait_mask
126 #endif
127 
128 static inline void cache_sync(void)
129 {
130 	void __iomem *base = l2x0_base;
131 
132 	writel_relaxed(0, base + sync_reg_offset);
133 	cache_wait(base + L2X0_CACHE_SYNC, 1);
134 }
135 
136 static inline void l2x0_clean_line(unsigned long addr)
137 {
138 	void __iomem *base = l2x0_base;
139 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
140 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
141 }
142 
143 static inline void l2x0_inv_line(unsigned long addr)
144 {
145 	void __iomem *base = l2x0_base;
146 	cache_wait(base + L2X0_INV_LINE_PA, 1);
147 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
148 }
149 
150 #if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
151 static inline void debug_writel(unsigned long val)
152 {
153 	if (outer_cache.set_debug)
154 		l2c_set_debug(l2x0_base, val);
155 }
156 
157 static void pl310_set_debug(unsigned long val)
158 {
159 	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
160 }
161 #else
162 /* Optimised out for non-errata case */
163 static inline void debug_writel(unsigned long val)
164 {
165 }
166 
167 #define pl310_set_debug	NULL
168 #endif
169 
170 #ifdef CONFIG_PL310_ERRATA_588369
171 static inline void l2x0_flush_line(unsigned long addr)
172 {
173 	void __iomem *base = l2x0_base;
174 
175 	/* Clean by PA followed by Invalidate by PA */
176 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
177 	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
178 	cache_wait(base + L2X0_INV_LINE_PA, 1);
179 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
180 }
181 #else
182 
183 static inline void l2x0_flush_line(unsigned long addr)
184 {
185 	void __iomem *base = l2x0_base;
186 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
187 	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
188 }
189 #endif
190 
191 static void l2x0_cache_sync(void)
192 {
193 	unsigned long flags;
194 
195 	raw_spin_lock_irqsave(&l2x0_lock, flags);
196 	cache_sync();
197 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
198 }
199 
200 static void __l2x0_flush_all(void)
201 {
202 	debug_writel(0x03);
203 	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
204 	cache_sync();
205 	debug_writel(0x00);
206 }
207 
208 static void l2x0_flush_all(void)
209 {
210 	unsigned long flags;
211 
212 	/* clean all ways */
213 	raw_spin_lock_irqsave(&l2x0_lock, flags);
214 	__l2x0_flush_all();
215 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
216 }
217 
218 static void l2x0_clean_all(void)
219 {
220 	unsigned long flags;
221 
222 	/* clean all ways */
223 	raw_spin_lock_irqsave(&l2x0_lock, flags);
224 	__l2c_op_way(l2x0_base + L2X0_CLEAN_WAY);
225 	cache_sync();
226 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
227 }
228 
229 static void l2x0_inv_all(void)
230 {
231 	unsigned long flags;
232 
233 	/* invalidate all ways */
234 	raw_spin_lock_irqsave(&l2x0_lock, flags);
235 	/* Invalidating when L2 is enabled is a nono */
236 	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
237 	__l2c_op_way(l2x0_base + L2X0_INV_WAY);
238 	cache_sync();
239 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
240 }
241 
242 static void l2x0_inv_range(unsigned long start, unsigned long end)
243 {
244 	void __iomem *base = l2x0_base;
245 	unsigned long flags;
246 
247 	raw_spin_lock_irqsave(&l2x0_lock, flags);
248 	if (start & (CACHE_LINE_SIZE - 1)) {
249 		start &= ~(CACHE_LINE_SIZE - 1);
250 		debug_writel(0x03);
251 		l2x0_flush_line(start);
252 		debug_writel(0x00);
253 		start += CACHE_LINE_SIZE;
254 	}
255 
256 	if (end & (CACHE_LINE_SIZE - 1)) {
257 		end &= ~(CACHE_LINE_SIZE - 1);
258 		debug_writel(0x03);
259 		l2x0_flush_line(end);
260 		debug_writel(0x00);
261 	}
262 
263 	while (start < end) {
264 		unsigned long blk_end = start + min(end - start, 4096UL);
265 
266 		while (start < blk_end) {
267 			l2x0_inv_line(start);
268 			start += CACHE_LINE_SIZE;
269 		}
270 
271 		if (blk_end < end) {
272 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
273 			raw_spin_lock_irqsave(&l2x0_lock, flags);
274 		}
275 	}
276 	cache_wait(base + L2X0_INV_LINE_PA, 1);
277 	cache_sync();
278 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
279 }
280 
281 static void l2x0_clean_range(unsigned long start, unsigned long end)
282 {
283 	void __iomem *base = l2x0_base;
284 	unsigned long flags;
285 
286 	if ((end - start) >= l2x0_size) {
287 		l2x0_clean_all();
288 		return;
289 	}
290 
291 	raw_spin_lock_irqsave(&l2x0_lock, flags);
292 	start &= ~(CACHE_LINE_SIZE - 1);
293 	while (start < end) {
294 		unsigned long blk_end = start + min(end - start, 4096UL);
295 
296 		while (start < blk_end) {
297 			l2x0_clean_line(start);
298 			start += CACHE_LINE_SIZE;
299 		}
300 
301 		if (blk_end < end) {
302 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
303 			raw_spin_lock_irqsave(&l2x0_lock, flags);
304 		}
305 	}
306 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
307 	cache_sync();
308 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
309 }
310 
311 static void l2x0_flush_range(unsigned long start, unsigned long end)
312 {
313 	void __iomem *base = l2x0_base;
314 	unsigned long flags;
315 
316 	if ((end - start) >= l2x0_size) {
317 		l2x0_flush_all();
318 		return;
319 	}
320 
321 	raw_spin_lock_irqsave(&l2x0_lock, flags);
322 	start &= ~(CACHE_LINE_SIZE - 1);
323 	while (start < end) {
324 		unsigned long blk_end = start + min(end - start, 4096UL);
325 
326 		debug_writel(0x03);
327 		while (start < blk_end) {
328 			l2x0_flush_line(start);
329 			start += CACHE_LINE_SIZE;
330 		}
331 		debug_writel(0x00);
332 
333 		if (blk_end < end) {
334 			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
335 			raw_spin_lock_irqsave(&l2x0_lock, flags);
336 		}
337 	}
338 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
339 	cache_sync();
340 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
341 }
342 
343 static void l2x0_disable(void)
344 {
345 	unsigned long flags;
346 
347 	raw_spin_lock_irqsave(&l2x0_lock, flags);
348 	__l2x0_flush_all();
349 	writel_relaxed(0, l2x0_base + L2X0_CTRL);
350 	dsb(st);
351 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
352 }
353 
354 static void l2x0_unlock(u32 cache_id)
355 {
356 	int lockregs;
357 
358 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
359 	case L2X0_CACHE_ID_PART_L310:
360 		lockregs = 8;
361 		break;
362 	default:
363 		/* L210 and unknown types */
364 		lockregs = 1;
365 		break;
366 	}
367 
368 	l2c_unlock(l2x0_base, lockregs);
369 }
370 
371 static void l2x0_enable(void __iomem *base, u32 aux, unsigned num_lock)
372 {
373 	/* l2x0 controller is disabled */
374 	writel_relaxed(aux, base + L2X0_AUX_CTRL);
375 
376 	/* Make sure that I&D is not locked down when starting */
377 	l2x0_unlock(readl_relaxed(base + L2X0_CACHE_ID));
378 
379 	l2x0_inv_all();
380 
381 	/* enable L2X0 */
382 	writel_relaxed(L2X0_CTRL_EN, base + L2X0_CTRL);
383 }
384 
385 static const struct l2c_init_data l2x0_init_fns __initconst = {
386 	.enable = l2x0_enable,
387 	.outer_cache = {
388 		.inv_range = l2x0_inv_range,
389 		.clean_range = l2x0_clean_range,
390 		.flush_range = l2x0_flush_range,
391 		.flush_all = l2x0_flush_all,
392 		.disable = l2x0_disable,
393 		.sync = l2x0_cache_sync,
394 	},
395 };
396 
397 static void __init __l2c_init(const struct l2c_init_data *data,
398 	u32 aux_val, u32 aux_mask, u32 cache_id)
399 {
400 	u32 aux;
401 	u32 way_size = 0;
402 	int ways;
403 	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
404 	const char *type;
405 
406 	/*
407 	 * It is strange to save the register state before initialisation,
408 	 * but hey, this is what the DT implementations decided to do.
409 	 */
410 	if (data->save)
411 		data->save(l2x0_base);
412 
413 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
414 
415 	aux &= aux_mask;
416 	aux |= aux_val;
417 
418 	/* Determine the number of ways */
419 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
420 	case L2X0_CACHE_ID_PART_L310:
421 		if (aux & (1 << 16))
422 			ways = 16;
423 		else
424 			ways = 8;
425 		type = "L310";
426 #ifdef CONFIG_PL310_ERRATA_753970
427 		/* Unmapped register. */
428 		sync_reg_offset = L2X0_DUMMY_REG;
429 #endif
430 		break;
431 	case L2X0_CACHE_ID_PART_L210:
432 		ways = (aux >> 13) & 0xf;
433 		type = "L210";
434 		break;
435 
436 	case AURORA_CACHE_ID:
437 		sync_reg_offset = AURORA_SYNC_REG;
438 		ways = (aux >> 13) & 0xf;
439 		ways = 2 << ((ways + 1) >> 2);
440 		way_size_shift = AURORA_WAY_SIZE_SHIFT;
441 		type = "Aurora";
442 		break;
443 	default:
444 		/* Assume unknown chips have 8 ways */
445 		ways = 8;
446 		type = "L2x0 series";
447 		break;
448 	}
449 
450 	l2x0_way_mask = (1 << ways) - 1;
451 
452 	/*
453 	 * L2 cache Size =  Way size * Number of ways
454 	 */
455 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
456 	way_size = 1 << (way_size + way_size_shift);
457 
458 	l2x0_size = ways * way_size * SZ_1K;
459 
460 	/*
461 	 * Check if l2x0 controller is already enabled.  If we are booting
462 	 * in non-secure mode accessing the below registers will fault.
463 	 */
464 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
465 		data->enable(l2x0_base, aux, data->num_lock);
466 
467 	/* Re-read it in case some bits are reserved. */
468 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
469 
470 	/* Save the value for resuming. */
471 	l2x0_saved_regs.aux_ctrl = aux;
472 
473 	outer_cache = data->outer_cache;
474 
475 	if ((cache_id & L2X0_CACHE_ID_PART_MASK) == L2X0_CACHE_ID_PART_L310 &&
476 	    (cache_id & L2X0_CACHE_ID_RTL_MASK) <= L310_CACHE_ID_RTL_R3P0)
477 		outer_cache.set_debug = pl310_set_debug;
478 
479 	pr_info("%s cache controller enabled\n", type);
480 	pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n",
481 		ways, cache_id, aux, l2x0_size >> 10);
482 }
483 
484 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
485 {
486 	u32 cache_id;
487 
488 	l2x0_base = base;
489 
490 	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
491 
492 	__l2c_init(&l2x0_init_fns, aux_val, aux_mask, cache_id);
493 }
494 
495 #ifdef CONFIG_OF
496 static int l2_wt_override;
497 
498 /* Aurora don't have the cache ID register available, so we have to
499  * pass it though the device tree */
500 static u32 cache_id_part_number_from_dt;
501 
502 static void __init l2x0_of_parse(const struct device_node *np,
503 				 u32 *aux_val, u32 *aux_mask)
504 {
505 	u32 data[2] = { 0, 0 };
506 	u32 tag = 0;
507 	u32 dirty = 0;
508 	u32 val = 0, mask = 0;
509 
510 	of_property_read_u32(np, "arm,tag-latency", &tag);
511 	if (tag) {
512 		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
513 		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
514 	}
515 
516 	of_property_read_u32_array(np, "arm,data-latency",
517 				   data, ARRAY_SIZE(data));
518 	if (data[0] && data[1]) {
519 		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
520 			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
521 		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
522 		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
523 	}
524 
525 	of_property_read_u32(np, "arm,dirty-latency", &dirty);
526 	if (dirty) {
527 		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
528 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
529 	}
530 
531 	*aux_val &= ~mask;
532 	*aux_val |= val;
533 	*aux_mask &= ~mask;
534 }
535 
536 static void l2x0_resume(void)
537 {
538 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
539 		/* restore aux ctrl and enable l2 */
540 		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
541 
542 		writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base +
543 			L2X0_AUX_CTRL);
544 
545 		l2x0_inv_all();
546 
547 		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
548 	}
549 }
550 
551 static const struct l2c_init_data of_l2x0_data __initconst = {
552 	.of_parse = l2x0_of_parse,
553 	.enable = l2x0_enable,
554 	.outer_cache = {
555 		.inv_range   = l2x0_inv_range,
556 		.clean_range = l2x0_clean_range,
557 		.flush_range = l2x0_flush_range,
558 		.flush_all   = l2x0_flush_all,
559 		.disable     = l2x0_disable,
560 		.sync        = l2x0_cache_sync,
561 		.resume      = l2x0_resume,
562 	},
563 };
564 
565 static void __init pl310_of_parse(const struct device_node *np,
566 				  u32 *aux_val, u32 *aux_mask)
567 {
568 	u32 data[3] = { 0, 0, 0 };
569 	u32 tag[3] = { 0, 0, 0 };
570 	u32 filter[2] = { 0, 0 };
571 
572 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
573 	if (tag[0] && tag[1] && tag[2])
574 		writel_relaxed(
575 			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
576 			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
577 			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
578 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
579 
580 	of_property_read_u32_array(np, "arm,data-latency",
581 				   data, ARRAY_SIZE(data));
582 	if (data[0] && data[1] && data[2])
583 		writel_relaxed(
584 			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
585 			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
586 			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
587 			l2x0_base + L2X0_DATA_LATENCY_CTRL);
588 
589 	of_property_read_u32_array(np, "arm,filter-ranges",
590 				   filter, ARRAY_SIZE(filter));
591 	if (filter[1]) {
592 		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
593 			       l2x0_base + L2X0_ADDR_FILTER_END);
594 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
595 			       l2x0_base + L2X0_ADDR_FILTER_START);
596 	}
597 }
598 
599 static void __init pl310_save(void __iomem *base)
600 {
601 	u32 l2x0_revision = readl_relaxed(base + L2X0_CACHE_ID) &
602 		L2X0_CACHE_ID_RTL_MASK;
603 
604 	l2x0_saved_regs.tag_latency = readl_relaxed(base +
605 		L2X0_TAG_LATENCY_CTRL);
606 	l2x0_saved_regs.data_latency = readl_relaxed(base +
607 		L2X0_DATA_LATENCY_CTRL);
608 	l2x0_saved_regs.filter_end = readl_relaxed(base +
609 		L2X0_ADDR_FILTER_END);
610 	l2x0_saved_regs.filter_start = readl_relaxed(base +
611 		L2X0_ADDR_FILTER_START);
612 
613 	if (l2x0_revision >= L310_CACHE_ID_RTL_R2P0) {
614 		/*
615 		 * From r2p0, there is Prefetch offset/control register
616 		 */
617 		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
618 			L2X0_PREFETCH_CTRL);
619 		/*
620 		 * From r3p0, there is Power control register
621 		 */
622 		if (l2x0_revision >= L310_CACHE_ID_RTL_R3P0)
623 			l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
624 				L2X0_POWER_CTRL);
625 	}
626 }
627 
628 static void pl310_resume(void)
629 {
630 	u32 l2x0_revision;
631 
632 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
633 		/* restore pl310 setup */
634 		writel_relaxed(l2x0_saved_regs.tag_latency,
635 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
636 		writel_relaxed(l2x0_saved_regs.data_latency,
637 			l2x0_base + L2X0_DATA_LATENCY_CTRL);
638 		writel_relaxed(l2x0_saved_regs.filter_end,
639 			l2x0_base + L2X0_ADDR_FILTER_END);
640 		writel_relaxed(l2x0_saved_regs.filter_start,
641 			l2x0_base + L2X0_ADDR_FILTER_START);
642 
643 		l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
644 			L2X0_CACHE_ID_RTL_MASK;
645 
646 		if (l2x0_revision >= L310_CACHE_ID_RTL_R2P0) {
647 			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
648 				l2x0_base + L2X0_PREFETCH_CTRL);
649 			if (l2x0_revision >= L310_CACHE_ID_RTL_R3P0)
650 				writel_relaxed(l2x0_saved_regs.pwr_ctrl,
651 					l2x0_base + L2X0_POWER_CTRL);
652 		}
653 	}
654 
655 	l2x0_resume();
656 }
657 
658 static const struct l2c_init_data of_pl310_data __initconst = {
659 	.num_lock = 8,
660 	.of_parse = pl310_of_parse,
661 	.enable = l2c_enable,
662 	.save  = pl310_save,
663 	.outer_cache = {
664 		.inv_range   = l2x0_inv_range,
665 		.clean_range = l2x0_clean_range,
666 		.flush_range = l2x0_flush_range,
667 		.flush_all   = l2x0_flush_all,
668 		.disable     = l2x0_disable,
669 		.sync        = l2x0_cache_sync,
670 		.resume      = pl310_resume,
671 	},
672 };
673 
674 /*
675  * Note that the end addresses passed to Linux primitives are
676  * noninclusive, while the hardware cache range operations use
677  * inclusive start and end addresses.
678  */
679 static unsigned long calc_range_end(unsigned long start, unsigned long end)
680 {
681 	/*
682 	 * Limit the number of cache lines processed at once,
683 	 * since cache range operations stall the CPU pipeline
684 	 * until completion.
685 	 */
686 	if (end > start + MAX_RANGE_SIZE)
687 		end = start + MAX_RANGE_SIZE;
688 
689 	/*
690 	 * Cache range operations can't straddle a page boundary.
691 	 */
692 	if (end > PAGE_ALIGN(start+1))
693 		end = PAGE_ALIGN(start+1);
694 
695 	return end;
696 }
697 
698 /*
699  * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
700  * and range operations only do a TLB lookup on the start address.
701  */
702 static void aurora_pa_range(unsigned long start, unsigned long end,
703 			unsigned long offset)
704 {
705 	unsigned long flags;
706 
707 	raw_spin_lock_irqsave(&l2x0_lock, flags);
708 	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
709 	writel_relaxed(end, l2x0_base + offset);
710 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
711 
712 	cache_sync();
713 }
714 
715 static void aurora_inv_range(unsigned long start, unsigned long end)
716 {
717 	/*
718 	 * round start and end adresses up to cache line size
719 	 */
720 	start &= ~(CACHE_LINE_SIZE - 1);
721 	end = ALIGN(end, CACHE_LINE_SIZE);
722 
723 	/*
724 	 * Invalidate all full cache lines between 'start' and 'end'.
725 	 */
726 	while (start < end) {
727 		unsigned long range_end = calc_range_end(start, end);
728 		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
729 				AURORA_INVAL_RANGE_REG);
730 		start = range_end;
731 	}
732 }
733 
734 static void aurora_clean_range(unsigned long start, unsigned long end)
735 {
736 	/*
737 	 * If L2 is forced to WT, the L2 will always be clean and we
738 	 * don't need to do anything here.
739 	 */
740 	if (!l2_wt_override) {
741 		start &= ~(CACHE_LINE_SIZE - 1);
742 		end = ALIGN(end, CACHE_LINE_SIZE);
743 		while (start != end) {
744 			unsigned long range_end = calc_range_end(start, end);
745 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
746 					AURORA_CLEAN_RANGE_REG);
747 			start = range_end;
748 		}
749 	}
750 }
751 
752 static void aurora_flush_range(unsigned long start, unsigned long end)
753 {
754 	start &= ~(CACHE_LINE_SIZE - 1);
755 	end = ALIGN(end, CACHE_LINE_SIZE);
756 	while (start != end) {
757 		unsigned long range_end = calc_range_end(start, end);
758 		/*
759 		 * If L2 is forced to WT, the L2 will always be clean and we
760 		 * just need to invalidate.
761 		 */
762 		if (l2_wt_override)
763 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
764 							AURORA_INVAL_RANGE_REG);
765 		else
766 			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
767 							AURORA_FLUSH_RANGE_REG);
768 		start = range_end;
769 	}
770 }
771 
772 static void aurora_save(void __iomem *base)
773 {
774 	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
775 	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
776 }
777 
778 static void aurora_resume(void)
779 {
780 	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
781 		writel_relaxed(l2x0_saved_regs.aux_ctrl,
782 				l2x0_base + L2X0_AUX_CTRL);
783 		writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
784 	}
785 }
786 
787 static void __init aurora_broadcast_l2_commands(void)
788 {
789 	__u32 u;
790 	/* Enable Broadcasting of cache commands to L2*/
791 	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
792 	u |= AURORA_CTRL_FW;		/* Set the FW bit */
793 	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
794 	isb();
795 }
796 
797 static void __init aurora_of_parse(const struct device_node *np,
798 				u32 *aux_val, u32 *aux_mask)
799 {
800 	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
801 	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
802 
803 	of_property_read_u32(np, "cache-id-part",
804 			&cache_id_part_number_from_dt);
805 
806 	/* Determine and save the write policy */
807 	l2_wt_override = of_property_read_bool(np, "wt-override");
808 
809 	if (l2_wt_override) {
810 		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
811 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
812 	}
813 
814 	*aux_val &= ~mask;
815 	*aux_val |= val;
816 	*aux_mask &= ~mask;
817 }
818 
819 static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
820 	.num_lock = 4,
821 	.of_parse = aurora_of_parse,
822 	.enable = l2c_enable,
823 	.save  = aurora_save,
824 	.outer_cache = {
825 		.inv_range   = aurora_inv_range,
826 		.clean_range = aurora_clean_range,
827 		.flush_range = aurora_flush_range,
828 		.flush_all   = l2x0_flush_all,
829 		.disable     = l2x0_disable,
830 		.sync        = l2x0_cache_sync,
831 		.resume      = aurora_resume,
832 	},
833 };
834 
835 static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
836 	.num_lock = 4,
837 	.of_parse = aurora_of_parse,
838 	.enable = l2c_enable,
839 	.save  = aurora_save,
840 	.outer_cache = {
841 		.resume      = aurora_resume,
842 	},
843 };
844 
845 /*
846  * For certain Broadcom SoCs, depending on the address range, different offsets
847  * need to be added to the address before passing it to L2 for
848  * invalidation/clean/flush
849  *
850  * Section Address Range              Offset        EMI
851  *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
852  *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
853  *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
854  *
855  * When the start and end addresses have crossed two different sections, we
856  * need to break the L2 operation into two, each within its own section.
857  * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
858  * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
859  * 0xC0000000 - 0xC0001000
860  *
861  * Note 1:
862  * By breaking a single L2 operation into two, we may potentially suffer some
863  * performance hit, but keep in mind the cross section case is very rare
864  *
865  * Note 2:
866  * We do not need to handle the case when the start address is in
867  * Section 1 and the end address is in Section 3, since it is not a valid use
868  * case
869  *
870  * Note 3:
871  * Section 1 in practical terms can no longer be used on rev A2. Because of
872  * that the code does not need to handle section 1 at all.
873  *
874  */
875 #define BCM_SYS_EMI_START_ADDR        0x40000000UL
876 #define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
877 
878 #define BCM_SYS_EMI_OFFSET            0x40000000UL
879 #define BCM_VC_EMI_OFFSET             0x80000000UL
880 
881 static inline int bcm_addr_is_sys_emi(unsigned long addr)
882 {
883 	return (addr >= BCM_SYS_EMI_START_ADDR) &&
884 		(addr < BCM_VC_EMI_SEC3_START_ADDR);
885 }
886 
887 static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
888 {
889 	if (bcm_addr_is_sys_emi(addr))
890 		return addr + BCM_SYS_EMI_OFFSET;
891 	else
892 		return addr + BCM_VC_EMI_OFFSET;
893 }
894 
895 static void bcm_inv_range(unsigned long start, unsigned long end)
896 {
897 	unsigned long new_start, new_end;
898 
899 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
900 
901 	if (unlikely(end <= start))
902 		return;
903 
904 	new_start = bcm_l2_phys_addr(start);
905 	new_end = bcm_l2_phys_addr(end);
906 
907 	/* normal case, no cross section between start and end */
908 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
909 		l2x0_inv_range(new_start, new_end);
910 		return;
911 	}
912 
913 	/* They cross sections, so it can only be a cross from section
914 	 * 2 to section 3
915 	 */
916 	l2x0_inv_range(new_start,
917 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
918 	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
919 		new_end);
920 }
921 
922 static void bcm_clean_range(unsigned long start, unsigned long end)
923 {
924 	unsigned long new_start, new_end;
925 
926 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
927 
928 	if (unlikely(end <= start))
929 		return;
930 
931 	if ((end - start) >= l2x0_size) {
932 		l2x0_clean_all();
933 		return;
934 	}
935 
936 	new_start = bcm_l2_phys_addr(start);
937 	new_end = bcm_l2_phys_addr(end);
938 
939 	/* normal case, no cross section between start and end */
940 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
941 		l2x0_clean_range(new_start, new_end);
942 		return;
943 	}
944 
945 	/* They cross sections, so it can only be a cross from section
946 	 * 2 to section 3
947 	 */
948 	l2x0_clean_range(new_start,
949 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
950 	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
951 		new_end);
952 }
953 
954 static void bcm_flush_range(unsigned long start, unsigned long end)
955 {
956 	unsigned long new_start, new_end;
957 
958 	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
959 
960 	if (unlikely(end <= start))
961 		return;
962 
963 	if ((end - start) >= l2x0_size) {
964 		l2x0_flush_all();
965 		return;
966 	}
967 
968 	new_start = bcm_l2_phys_addr(start);
969 	new_end = bcm_l2_phys_addr(end);
970 
971 	/* normal case, no cross section between start and end */
972 	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
973 		l2x0_flush_range(new_start, new_end);
974 		return;
975 	}
976 
977 	/* They cross sections, so it can only be a cross from section
978 	 * 2 to section 3
979 	 */
980 	l2x0_flush_range(new_start,
981 		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
982 	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
983 		new_end);
984 }
985 
986 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
987 	.num_lock = 8,
988 	.of_parse = pl310_of_parse,
989 	.enable = l2c_enable,
990 	.save  = pl310_save,
991 	.outer_cache = {
992 		.inv_range   = bcm_inv_range,
993 		.clean_range = bcm_clean_range,
994 		.flush_range = bcm_flush_range,
995 		.flush_all   = l2x0_flush_all,
996 		.disable     = l2x0_disable,
997 		.sync        = l2x0_cache_sync,
998 		.resume      = pl310_resume,
999 	},
1000 };
1001 
1002 static void __init tauros3_save(void __iomem *base)
1003 {
1004 	l2x0_saved_regs.aux2_ctrl =
1005 		readl_relaxed(base + TAUROS3_AUX2_CTRL);
1006 	l2x0_saved_regs.prefetch_ctrl =
1007 		readl_relaxed(base + L2X0_PREFETCH_CTRL);
1008 }
1009 
1010 static void tauros3_resume(void)
1011 {
1012 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1013 		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1014 			       l2x0_base + TAUROS3_AUX2_CTRL);
1015 		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1016 			       l2x0_base + L2X0_PREFETCH_CTRL);
1017 	}
1018 
1019 	l2x0_resume();
1020 }
1021 
1022 static const struct l2c_init_data of_tauros3_data __initconst = {
1023 	.num_lock = 8,
1024 	.enable = l2c_enable,
1025 	.save  = tauros3_save,
1026 	/* Tauros3 broadcasts L1 cache operations to L2 */
1027 	.outer_cache = {
1028 		.resume      = tauros3_resume,
1029 	},
1030 };
1031 
1032 #define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1033 static const struct of_device_id l2x0_ids[] __initconst = {
1034 	L2C_ID("arm,l210-cache", of_l2x0_data),
1035 	L2C_ID("arm,l220-cache", of_l2x0_data),
1036 	L2C_ID("arm,pl310-cache", of_pl310_data),
1037 	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1038 	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1039 	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1040 	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1041 	/* Deprecated IDs */
1042 	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1043 	{}
1044 };
1045 
1046 int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1047 {
1048 	const struct l2c_init_data *data;
1049 	struct device_node *np;
1050 	struct resource res;
1051 	u32 cache_id;
1052 
1053 	np = of_find_matching_node(NULL, l2x0_ids);
1054 	if (!np)
1055 		return -ENODEV;
1056 
1057 	if (of_address_to_resource(np, 0, &res))
1058 		return -ENODEV;
1059 
1060 	l2x0_base = ioremap(res.start, resource_size(&res));
1061 	if (!l2x0_base)
1062 		return -ENOMEM;
1063 
1064 	l2x0_saved_regs.phy_base = res.start;
1065 
1066 	data = of_match_node(l2x0_ids, np)->data;
1067 
1068 	/* L2 configuration can only be changed if the cache is disabled */
1069 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1070 		if (data->of_parse)
1071 			data->of_parse(np, &aux_val, &aux_mask);
1072 
1073 		/* For aurora cache in no outer mode select the
1074 		 * correct mode using the coprocessor*/
1075 		if (data == &of_aurora_no_outer_data)
1076 			aurora_broadcast_l2_commands();
1077 	}
1078 
1079 	if (cache_id_part_number_from_dt)
1080 		cache_id = cache_id_part_number_from_dt;
1081 	else
1082 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1083 
1084 	__l2c_init(data, aux_val, aux_mask, cache_id);
1085 
1086 	return 0;
1087 }
1088 #endif
1089