xref: /openbmc/u-boot/arch/arc/lib/cache.c (revision c27814be)
1 /*
2  * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
3  *
4  * SPDX-License-Identifier:	GPL-2.0+
5  */
6 
7 #include <config.h>
8 #include <common.h>
9 #include <linux/compiler.h>
10 #include <linux/kernel.h>
11 #include <linux/log2.h>
12 #include <asm/arcregs.h>
13 #include <asm/cache.h>
14 
15 /*
16  * [ NOTE 1 ]:
17  * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable
18  * operation may result in unexpected behavior and data loss even if we flush
19  * data cache right before invalidation. That may happens if we store any context
20  * on stack (like we store BLINK register on stack before function call).
21  * BLINK register is the register where return address is automatically saved
22  * when we do function call with instructions like 'bl'.
23  *
24  * There is the real example:
25  * We may hang in the next code as we store any BLINK register on stack in
26  * invalidate_dcache_all() function.
27  *
28  * void flush_dcache_all() {
29  *     __dc_entire_op(OP_FLUSH);
30  *     // Other code //
31  * }
32  *
33  * void invalidate_dcache_all() {
34  *     __dc_entire_op(OP_INV);
35  *     // Other code //
36  * }
37  *
38  * void foo(void) {
39  *     flush_dcache_all();
40  *     invalidate_dcache_all();
41  * }
42  *
43  * Now let's see what really happens during that code execution:
44  *
45  * foo()
46  *   |->> call flush_dcache_all
47  *     [return address is saved to BLINK register]
48  *     [push BLINK] (save to stack)              ![point 1]
49  *     |->> call __dc_entire_op(OP_FLUSH)
50  *         [return address is saved to BLINK register]
51  *         [flush L1 D$]
52  *         return [jump to BLINK]
53  *     <<------
54  *     [other flush_dcache_all code]
55  *     [pop BLINK] (get from stack)
56  *     return [jump to BLINK]
57  *   <<------
58  *   |->> call invalidate_dcache_all
59  *     [return address is saved to BLINK register]
60  *     [push BLINK] (save to stack)               ![point 2]
61  *     |->> call __dc_entire_op(OP_FLUSH)
62  *         [return address is saved to BLINK register]
63  *         [invalidate L1 D$]                 ![point 3]
64  *         // Oops!!!
65  *         // We lose return address from invalidate_dcache_all function:
66  *         // we save it to stack and invalidate L1 D$ after that!
67  *         return [jump to BLINK]
68  *     <<------
69  *     [other invalidate_dcache_all code]
70  *     [pop BLINK] (get from stack)
71  *     // we don't have this data in L1 dcache as we invalidated it in [point 3]
72  *     // so we get it from next memory level (for example DDR memory)
73  *     // but in the memory we have value which we save in [point 1], which
74  *     // is return address from flush_dcache_all function (instead of
75  *     // address from current invalidate_dcache_all function which we
76  *     // saved in [point 2] !)
77  *     return [jump to BLINK]
78  *   <<------
79  *   // As BLINK points to invalidate_dcache_all, we call it again and
80  *   // loop forever.
81  *
82  * Fortunately we may fix that by using flush & invalidation of D$ with a single
83  * one instruction (instead of flush and invalidation instructions pair) and
84  * enabling force function inline with '__attribute__((always_inline))' gcc
85  * attribute to avoid any function call (and BLINK store) between cache flush
86  * and disable.
87  */
88 
89 /* Bit values in IC_CTRL */
90 #define IC_CTRL_CACHE_DISABLE	BIT(0)
91 
92 /* Bit values in DC_CTRL */
93 #define DC_CTRL_CACHE_DISABLE	BIT(0)
94 #define DC_CTRL_INV_MODE_FLUSH	BIT(6)
95 #define DC_CTRL_FLUSH_STATUS	BIT(8)
96 #define CACHE_VER_NUM_MASK	0xF
97 
98 #define OP_INV			BIT(0)
99 #define OP_FLUSH		BIT(1)
100 #define OP_FLUSH_N_INV		(OP_FLUSH | OP_INV)
101 
102 /* Bit val in SLC_CONTROL */
103 #define SLC_CTRL_DIS		0x001
104 #define SLC_CTRL_IM		0x040
105 #define SLC_CTRL_BUSY		0x100
106 #define SLC_CTRL_RGN_OP_INV	0x200
107 
108 /*
109  * By default that variable will fall into .bss section.
110  * But .bss section is not relocated and so it will be initilized before
111  * relocation but will be used after being zeroed.
112  */
113 int l1_line_sz __section(".data");
114 bool dcache_exists __section(".data") = false;
115 bool icache_exists __section(".data") = false;
116 
117 #define CACHE_LINE_MASK		(~(l1_line_sz - 1))
118 
119 #ifdef CONFIG_ISA_ARCV2
120 int slc_line_sz __section(".data");
121 bool slc_exists __section(".data") = false;
122 bool ioc_exists __section(".data") = false;
123 bool pae_exists __section(".data") = false;
124 
125 /* To force enable IOC set ioc_enable to 'true' */
126 bool ioc_enable __section(".data") = false;
127 
128 void read_decode_mmu_bcr(void)
129 {
130 	/* TODO: should we compare mmu version from BCR and from CONFIG? */
131 #if (CONFIG_ARC_MMU_VER >= 4)
132 	u32 tmp;
133 
134 	tmp = read_aux_reg(ARC_AUX_MMU_BCR);
135 
136 	struct bcr_mmu_4 {
137 #ifdef CONFIG_CPU_BIG_ENDIAN
138 	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
139 		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
140 #else
141 	/*           DTLB      ITLB      JES        JE         JA      */
142 	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
143 		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
144 #endif /* CONFIG_CPU_BIG_ENDIAN */
145 	} *mmu4;
146 
147 	mmu4 = (struct bcr_mmu_4 *)&tmp;
148 
149 	pae_exists = !!mmu4->pae;
150 #endif /* (CONFIG_ARC_MMU_VER >= 4) */
151 }
152 
153 static void __slc_entire_op(const int op)
154 {
155 	unsigned int ctrl;
156 
157 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
158 
159 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
160 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
161 	else
162 		ctrl |= SLC_CTRL_IM;
163 
164 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
165 
166 	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
167 		write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1);
168 	else
169 		write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1);
170 
171 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
172 	read_aux_reg(ARC_AUX_SLC_CTRL);
173 
174 	/* Important to wait for flush to complete */
175 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
176 }
177 
178 static void slc_upper_region_init(void)
179 {
180 	/*
181 	 * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0
182 	 * as we don't use PAE40.
183 	 */
184 	write_aux_reg(ARC_AUX_SLC_RGN_END1, 0);
185 	write_aux_reg(ARC_AUX_SLC_RGN_START1, 0);
186 }
187 
188 static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
189 {
190 	unsigned int ctrl;
191 	unsigned long end;
192 
193 	/*
194 	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
195 	 *  - b'000 (default) is Flush,
196 	 *  - b'001 is Invalidate if CTRL.IM == 0
197 	 *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
198 	 */
199 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
200 
201 	/* Don't rely on default value of IM bit */
202 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
203 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
204 	else
205 		ctrl |= SLC_CTRL_IM;
206 
207 	if (op & OP_INV)
208 		ctrl |= SLC_CTRL_RGN_OP_INV;	/* Inv or flush-n-inv */
209 	else
210 		ctrl &= ~SLC_CTRL_RGN_OP_INV;
211 
212 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
213 
214 	/*
215 	 * Lower bits are ignored, no need to clip
216 	 * END needs to be setup before START (latter triggers the operation)
217 	 * END can't be same as START, so add (l2_line_sz - 1) to sz
218 	 */
219 	end = paddr + sz + slc_line_sz - 1;
220 
221 	/*
222 	 * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1)
223 	 * are always == 0 as we don't use PAE40, so we only setup lower ones
224 	 * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START)
225 	 */
226 	write_aux_reg(ARC_AUX_SLC_RGN_END, end);
227 	write_aux_reg(ARC_AUX_SLC_RGN_START, paddr);
228 
229 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
230 	read_aux_reg(ARC_AUX_SLC_CTRL);
231 
232 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
233 }
234 #endif /* CONFIG_ISA_ARCV2 */
235 
236 #ifdef CONFIG_ISA_ARCV2
237 static void read_decode_cache_bcr_arcv2(void)
238 {
239 	union {
240 		struct {
241 #ifdef CONFIG_CPU_BIG_ENDIAN
242 			unsigned int pad:24, way:2, lsz:2, sz:4;
243 #else
244 			unsigned int sz:4, lsz:2, way:2, pad:24;
245 #endif
246 		} fields;
247 		unsigned int word;
248 	} slc_cfg;
249 
250 	union {
251 		struct {
252 #ifdef CONFIG_CPU_BIG_ENDIAN
253 			unsigned int pad:24, ver:8;
254 #else
255 			unsigned int ver:8, pad:24;
256 #endif
257 		} fields;
258 		unsigned int word;
259 	} sbcr;
260 
261 	sbcr.word = read_aux_reg(ARC_BCR_SLC);
262 	if (sbcr.fields.ver) {
263 		slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG);
264 		slc_exists = true;
265 		slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
266 	}
267 
268 	union {
269 		struct bcr_clust_cfg {
270 #ifdef CONFIG_CPU_BIG_ENDIAN
271 			unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
272 #else
273 			unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
274 #endif
275 		} fields;
276 		unsigned int word;
277 	} cbcr;
278 
279 	cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
280 	if (cbcr.fields.c && ioc_enable)
281 		ioc_exists = true;
282 }
283 #endif
284 
285 void read_decode_cache_bcr(void)
286 {
287 	int dc_line_sz = 0, ic_line_sz = 0;
288 
289 	union {
290 		struct {
291 #ifdef CONFIG_CPU_BIG_ENDIAN
292 			unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
293 #else
294 			unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
295 #endif
296 		} fields;
297 		unsigned int word;
298 	} ibcr, dbcr;
299 
300 	ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
301 	if (ibcr.fields.ver) {
302 		icache_exists = true;
303 		l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
304 		if (!ic_line_sz)
305 			panic("Instruction exists but line length is 0\n");
306 	}
307 
308 	dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
309 	if (dbcr.fields.ver) {
310 		dcache_exists = true;
311 		l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
312 		if (!dc_line_sz)
313 			panic("Data cache exists but line length is 0\n");
314 	}
315 
316 	if (ic_line_sz && dc_line_sz && (ic_line_sz != dc_line_sz))
317 		panic("Instruction and data cache line lengths differ\n");
318 }
319 
320 void cache_init(void)
321 {
322 	read_decode_cache_bcr();
323 
324 #ifdef CONFIG_ISA_ARCV2
325 	read_decode_cache_bcr_arcv2();
326 
327 	if (ioc_exists) {
328 		/* IOC Aperture start is equal to DDR start */
329 		unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
330 		/* IOC Aperture size is equal to DDR size */
331 		long ap_size = CONFIG_SYS_SDRAM_SIZE;
332 
333 		flush_n_invalidate_dcache_all();
334 
335 		if (!is_power_of_2(ap_size) || ap_size < 4096)
336 			panic("IOC Aperture size must be power of 2 and bigger 4Kib");
337 
338 		/*
339 		 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
340 		 * so setting 0x11 implies 512M, 0x12 implies 1G...
341 		 */
342 		write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
343 			      order_base_2(ap_size / 1024) - 2);
344 
345 		/* IOC Aperture start must be aligned to the size of the aperture */
346 		if (ap_base % ap_size != 0)
347 			panic("IOC Aperture start must be aligned to the size of the aperture");
348 
349 		write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
350 		write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
351 		write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
352 	}
353 
354 	read_decode_mmu_bcr();
355 
356 	/*
357 	 * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
358 	 * only if PAE exists in current HW. So we had to check pae_exist
359 	 * before using them.
360 	 */
361 	if (slc_exists && pae_exists)
362 		slc_upper_region_init();
363 #endif /* CONFIG_ISA_ARCV2 */
364 }
365 
366 int icache_status(void)
367 {
368 	if (!icache_exists)
369 		return 0;
370 
371 	if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE)
372 		return 0;
373 	else
374 		return 1;
375 }
376 
377 void icache_enable(void)
378 {
379 	if (icache_exists)
380 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
381 			      ~IC_CTRL_CACHE_DISABLE);
382 }
383 
384 void icache_disable(void)
385 {
386 	if (icache_exists)
387 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
388 			      IC_CTRL_CACHE_DISABLE);
389 }
390 
391 /* IC supports only invalidation */
392 static inline void __ic_entire_invalidate(void)
393 {
394 	if (!icache_status())
395 		return;
396 
397 	/* Any write to IC_IVIC register triggers invalidation of entire I$ */
398 	write_aux_reg(ARC_AUX_IC_IVIC, 1);
399 	/*
400 	 * As per ARC HS databook (see chapter 5.3.3.2)
401 	 * it is required to add 3 NOPs after each write to IC_IVIC.
402 	 */
403 	__builtin_arc_nop();
404 	__builtin_arc_nop();
405 	__builtin_arc_nop();
406 	read_aux_reg(ARC_AUX_IC_CTRL);  /* blocks */
407 }
408 
409 void invalidate_icache_all(void)
410 {
411 	__ic_entire_invalidate();
412 
413 #ifdef CONFIG_ISA_ARCV2
414 	if (slc_exists)
415 		__slc_entire_op(OP_INV);
416 #endif
417 }
418 
419 int dcache_status(void)
420 {
421 	if (!dcache_exists)
422 		return 0;
423 
424 	if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE)
425 		return 0;
426 	else
427 		return 1;
428 }
429 
430 void dcache_enable(void)
431 {
432 	if (!dcache_exists)
433 		return;
434 
435 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
436 		      ~(DC_CTRL_INV_MODE_FLUSH | DC_CTRL_CACHE_DISABLE));
437 }
438 
439 void dcache_disable(void)
440 {
441 	if (!dcache_exists)
442 		return;
443 
444 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
445 		      DC_CTRL_CACHE_DISABLE);
446 }
447 
448 #ifndef CONFIG_SYS_DCACHE_OFF
449 /* Common Helper for Line Operations on D-cache */
450 static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz,
451 				      const int cacheop)
452 {
453 	unsigned int aux_cmd;
454 	int num_lines;
455 
456 	/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
457 	aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
458 
459 	sz += paddr & ~CACHE_LINE_MASK;
460 	paddr &= CACHE_LINE_MASK;
461 
462 	num_lines = DIV_ROUND_UP(sz, l1_line_sz);
463 
464 	while (num_lines-- > 0) {
465 #if (CONFIG_ARC_MMU_VER == 3)
466 		write_aux_reg(ARC_AUX_DC_PTAG, paddr);
467 #endif
468 		write_aux_reg(aux_cmd, paddr);
469 		paddr += l1_line_sz;
470 	}
471 }
472 
473 static void __before_dc_op(const int op)
474 {
475 	unsigned int ctrl;
476 
477 	ctrl = read_aux_reg(ARC_AUX_DC_CTRL);
478 
479 	/* IM bit implies flush-n-inv, instead of vanilla inv */
480 	if (op == OP_INV)
481 		ctrl &= ~DC_CTRL_INV_MODE_FLUSH;
482 	else
483 		ctrl |= DC_CTRL_INV_MODE_FLUSH;
484 
485 	write_aux_reg(ARC_AUX_DC_CTRL, ctrl);
486 }
487 
488 static void __after_dc_op(const int op)
489 {
490 	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
491 		while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
492 }
493 
494 static inline void __dc_entire_op(const int cacheop)
495 {
496 	int aux;
497 
498 	__before_dc_op(cacheop);
499 
500 	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
501 		aux = ARC_AUX_DC_IVDC;
502 	else
503 		aux = ARC_AUX_DC_FLSH;
504 
505 	write_aux_reg(aux, 0x1);
506 
507 	__after_dc_op(cacheop);
508 }
509 
510 static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
511 				const int cacheop)
512 {
513 	__before_dc_op(cacheop);
514 	__dcache_line_loop(paddr, sz, cacheop);
515 	__after_dc_op(cacheop);
516 }
517 #else
518 #define __dc_entire_op(cacheop)
519 #define __dc_line_op(paddr, sz, cacheop)
520 #endif /* !CONFIG_SYS_DCACHE_OFF */
521 
522 void invalidate_dcache_range(unsigned long start, unsigned long end)
523 {
524 	if (start >= end)
525 		return;
526 
527 #ifdef CONFIG_ISA_ARCV2
528 	if (!ioc_exists)
529 #endif
530 		__dc_line_op(start, end - start, OP_INV);
531 
532 #ifdef CONFIG_ISA_ARCV2
533 	if (slc_exists && !ioc_exists)
534 		__slc_rgn_op(start, end - start, OP_INV);
535 #endif
536 }
537 
538 void flush_dcache_range(unsigned long start, unsigned long end)
539 {
540 	if (start >= end)
541 		return;
542 
543 #ifdef CONFIG_ISA_ARCV2
544 	if (!ioc_exists)
545 #endif
546 		__dc_line_op(start, end - start, OP_FLUSH);
547 
548 #ifdef CONFIG_ISA_ARCV2
549 	if (slc_exists && !ioc_exists)
550 		__slc_rgn_op(start, end - start, OP_FLUSH);
551 #endif
552 }
553 
554 void flush_cache(unsigned long start, unsigned long size)
555 {
556 	flush_dcache_range(start, start + size);
557 }
558 
559 /*
560  * As invalidate_dcache_all() is not used in generic U-Boot code and as we
561  * don't need it in arch/arc code alone (invalidate without flush) we implement
562  * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because
563  * it's much safer. See [ NOTE 1 ] for more details.
564  */
565 void flush_n_invalidate_dcache_all(void)
566 {
567 	__dc_entire_op(OP_FLUSH_N_INV);
568 
569 #ifdef CONFIG_ISA_ARCV2
570 	if (slc_exists)
571 		__slc_entire_op(OP_FLUSH_N_INV);
572 #endif
573 }
574 
575 void flush_dcache_all(void)
576 {
577 	__dc_entire_op(OP_FLUSH);
578 
579 #ifdef CONFIG_ISA_ARCV2
580 	if (slc_exists)
581 		__slc_entire_op(OP_FLUSH);
582 #endif
583 }
584