xref: /openbmc/u-boot/arch/arc/lib/cache.c (revision a6f557c4)
1 /*
2  * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
3  *
4  * SPDX-License-Identifier:	GPL-2.0+
5  */
6 
7 #include <config.h>
8 #include <common.h>
9 #include <linux/compiler.h>
10 #include <linux/kernel.h>
11 #include <linux/log2.h>
12 #include <asm/arcregs.h>
13 #include <asm/cache.h>
14 
15 /*
16  * [ NOTE 1 ]:
17  * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable
18  * operation may result in unexpected behavior and data loss even if we flush
19  * data cache right before invalidation. That may happens if we store any context
20  * on stack (like we store BLINK register on stack before function call).
21  * BLINK register is the register where return address is automatically saved
22  * when we do function call with instructions like 'bl'.
23  *
24  * There is the real example:
25  * We may hang in the next code as we store any BLINK register on stack in
26  * invalidate_dcache_all() function.
27  *
28  * void flush_dcache_all() {
29  *     __dc_entire_op(OP_FLUSH);
30  *     // Other code //
31  * }
32  *
33  * void invalidate_dcache_all() {
34  *     __dc_entire_op(OP_INV);
35  *     // Other code //
36  * }
37  *
38  * void foo(void) {
39  *     flush_dcache_all();
40  *     invalidate_dcache_all();
41  * }
42  *
43  * Now let's see what really happens during that code execution:
44  *
45  * foo()
46  *   |->> call flush_dcache_all
47  *     [return address is saved to BLINK register]
48  *     [push BLINK] (save to stack)              ![point 1]
49  *     |->> call __dc_entire_op(OP_FLUSH)
50  *         [return address is saved to BLINK register]
51  *         [flush L1 D$]
52  *         return [jump to BLINK]
53  *     <<------
54  *     [other flush_dcache_all code]
55  *     [pop BLINK] (get from stack)
56  *     return [jump to BLINK]
57  *   <<------
58  *   |->> call invalidate_dcache_all
59  *     [return address is saved to BLINK register]
60  *     [push BLINK] (save to stack)               ![point 2]
61  *     |->> call __dc_entire_op(OP_FLUSH)
62  *         [return address is saved to BLINK register]
63  *         [invalidate L1 D$]                 ![point 3]
64  *         // Oops!!!
65  *         // We lose return address from invalidate_dcache_all function:
66  *         // we save it to stack and invalidate L1 D$ after that!
67  *         return [jump to BLINK]
68  *     <<------
69  *     [other invalidate_dcache_all code]
70  *     [pop BLINK] (get from stack)
71  *     // we don't have this data in L1 dcache as we invalidated it in [point 3]
72  *     // so we get it from next memory level (for example DDR memory)
73  *     // but in the memory we have value which we save in [point 1], which
74  *     // is return address from flush_dcache_all function (instead of
75  *     // address from current invalidate_dcache_all function which we
76  *     // saved in [point 2] !)
77  *     return [jump to BLINK]
78  *   <<------
79  *   // As BLINK points to invalidate_dcache_all, we call it again and
80  *   // loop forever.
81  *
82  * Fortunately we may fix that by using flush & invalidation of D$ with a single
83  * one instruction (instead of flush and invalidation instructions pair) and
84  * enabling force function inline with '__attribute__((always_inline))' gcc
85  * attribute to avoid any function call (and BLINK store) between cache flush
86  * and disable.
87  */
88 
89 /* Bit values in IC_CTRL */
90 #define IC_CTRL_CACHE_DISABLE	BIT(0)
91 
92 /* Bit values in DC_CTRL */
93 #define DC_CTRL_CACHE_DISABLE	BIT(0)
94 #define DC_CTRL_INV_MODE_FLUSH	BIT(6)
95 #define DC_CTRL_FLUSH_STATUS	BIT(8)
96 #define CACHE_VER_NUM_MASK	0xF
97 
98 #define OP_INV			BIT(0)
99 #define OP_FLUSH		BIT(1)
100 #define OP_FLUSH_N_INV		(OP_FLUSH | OP_INV)
101 
102 /* Bit val in SLC_CONTROL */
103 #define SLC_CTRL_DIS		0x001
104 #define SLC_CTRL_IM		0x040
105 #define SLC_CTRL_BUSY		0x100
106 #define SLC_CTRL_RGN_OP_INV	0x200
107 
108 /*
109  * By default that variable will fall into .bss section.
110  * But .bss section is not relocated and so it will be initilized before
111  * relocation but will be used after being zeroed.
112  */
113 int l1_line_sz __section(".data");
114 bool dcache_exists __section(".data") = false;
115 bool icache_exists __section(".data") = false;
116 
117 #define CACHE_LINE_MASK		(~(l1_line_sz - 1))
118 
119 #ifdef CONFIG_ISA_ARCV2
120 int slc_line_sz __section(".data");
121 bool slc_exists __section(".data") = false;
122 bool ioc_exists __section(".data") = false;
123 bool pae_exists __section(".data") = false;
124 
125 /* To force enable IOC set ioc_enable to 'true' */
126 bool ioc_enable __section(".data") = false;
127 
128 void read_decode_mmu_bcr(void)
129 {
130 	/* TODO: should we compare mmu version from BCR and from CONFIG? */
131 #if (CONFIG_ARC_MMU_VER >= 4)
132 	u32 tmp;
133 
134 	tmp = read_aux_reg(ARC_AUX_MMU_BCR);
135 
136 	struct bcr_mmu_4 {
137 #ifdef CONFIG_CPU_BIG_ENDIAN
138 	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
139 		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
140 #else
141 	/*           DTLB      ITLB      JES        JE         JA      */
142 	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
143 		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
144 #endif /* CONFIG_CPU_BIG_ENDIAN */
145 	} *mmu4;
146 
147 	mmu4 = (struct bcr_mmu_4 *)&tmp;
148 
149 	pae_exists = !!mmu4->pae;
150 #endif /* (CONFIG_ARC_MMU_VER >= 4) */
151 }
152 
153 static void __slc_entire_op(const int op)
154 {
155 	unsigned int ctrl;
156 
157 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
158 
159 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
160 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
161 	else
162 		ctrl |= SLC_CTRL_IM;
163 
164 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
165 
166 	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
167 		write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1);
168 	else
169 		write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1);
170 
171 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
172 	read_aux_reg(ARC_AUX_SLC_CTRL);
173 
174 	/* Important to wait for flush to complete */
175 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
176 }
177 
178 static void slc_upper_region_init(void)
179 {
180 	/*
181 	 * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0
182 	 * as we don't use PAE40.
183 	 */
184 	write_aux_reg(ARC_AUX_SLC_RGN_END1, 0);
185 	write_aux_reg(ARC_AUX_SLC_RGN_START1, 0);
186 }
187 
188 static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
189 {
190 	unsigned int ctrl;
191 	unsigned long end;
192 
193 	/*
194 	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
195 	 *  - b'000 (default) is Flush,
196 	 *  - b'001 is Invalidate if CTRL.IM == 0
197 	 *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
198 	 */
199 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
200 
201 	/* Don't rely on default value of IM bit */
202 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
203 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
204 	else
205 		ctrl |= SLC_CTRL_IM;
206 
207 	if (op & OP_INV)
208 		ctrl |= SLC_CTRL_RGN_OP_INV;	/* Inv or flush-n-inv */
209 	else
210 		ctrl &= ~SLC_CTRL_RGN_OP_INV;
211 
212 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
213 
214 	/*
215 	 * Lower bits are ignored, no need to clip
216 	 * END needs to be setup before START (latter triggers the operation)
217 	 * END can't be same as START, so add (l2_line_sz - 1) to sz
218 	 */
219 	end = paddr + sz + slc_line_sz - 1;
220 
221 	/*
222 	 * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1)
223 	 * are always == 0 as we don't use PAE40, so we only setup lower ones
224 	 * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START)
225 	 */
226 	write_aux_reg(ARC_AUX_SLC_RGN_END, end);
227 	write_aux_reg(ARC_AUX_SLC_RGN_START, paddr);
228 
229 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
230 	read_aux_reg(ARC_AUX_SLC_CTRL);
231 
232 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
233 }
234 
235 static void arc_ioc_setup(void)
236 {
237 	/* IOC Aperture start is equal to DDR start */
238 	unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
239 	/* IOC Aperture size is equal to DDR size */
240 	long ap_size = CONFIG_SYS_SDRAM_SIZE;
241 
242 	flush_n_invalidate_dcache_all();
243 
244 	if (!is_power_of_2(ap_size) || ap_size < 4096)
245 		panic("IOC Aperture size must be power of 2 and bigger 4Kib");
246 
247 	/*
248 	 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
249 	 * so setting 0x11 implies 512M, 0x12 implies 1G...
250 	 */
251 	write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
252 		      order_base_2(ap_size / 1024) - 2);
253 
254 	/* IOC Aperture start must be aligned to the size of the aperture */
255 	if (ap_base % ap_size != 0)
256 		panic("IOC Aperture start must be aligned to the size of the aperture");
257 
258 	write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
259 	write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
260 	write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
261 }
262 #endif /* CONFIG_ISA_ARCV2 */
263 
264 #ifdef CONFIG_ISA_ARCV2
265 static void read_decode_cache_bcr_arcv2(void)
266 {
267 	union {
268 		struct {
269 #ifdef CONFIG_CPU_BIG_ENDIAN
270 			unsigned int pad:24, way:2, lsz:2, sz:4;
271 #else
272 			unsigned int sz:4, lsz:2, way:2, pad:24;
273 #endif
274 		} fields;
275 		unsigned int word;
276 	} slc_cfg;
277 
278 	union {
279 		struct {
280 #ifdef CONFIG_CPU_BIG_ENDIAN
281 			unsigned int pad:24, ver:8;
282 #else
283 			unsigned int ver:8, pad:24;
284 #endif
285 		} fields;
286 		unsigned int word;
287 	} sbcr;
288 
289 	sbcr.word = read_aux_reg(ARC_BCR_SLC);
290 	if (sbcr.fields.ver) {
291 		slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG);
292 		slc_exists = true;
293 		slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
294 	}
295 
296 	union {
297 		struct bcr_clust_cfg {
298 #ifdef CONFIG_CPU_BIG_ENDIAN
299 			unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
300 #else
301 			unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
302 #endif
303 		} fields;
304 		unsigned int word;
305 	} cbcr;
306 
307 	cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
308 	if (cbcr.fields.c && ioc_enable)
309 		ioc_exists = true;
310 }
311 #endif
312 
313 void read_decode_cache_bcr(void)
314 {
315 	int dc_line_sz = 0, ic_line_sz = 0;
316 
317 	union {
318 		struct {
319 #ifdef CONFIG_CPU_BIG_ENDIAN
320 			unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
321 #else
322 			unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
323 #endif
324 		} fields;
325 		unsigned int word;
326 	} ibcr, dbcr;
327 
328 	ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
329 	if (ibcr.fields.ver) {
330 		icache_exists = true;
331 		l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
332 		if (!ic_line_sz)
333 			panic("Instruction exists but line length is 0\n");
334 	}
335 
336 	dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
337 	if (dbcr.fields.ver) {
338 		dcache_exists = true;
339 		l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
340 		if (!dc_line_sz)
341 			panic("Data cache exists but line length is 0\n");
342 	}
343 
344 	if (ic_line_sz && dc_line_sz && (ic_line_sz != dc_line_sz))
345 		panic("Instruction and data cache line lengths differ\n");
346 }
347 
348 void cache_init(void)
349 {
350 	read_decode_cache_bcr();
351 
352 #ifdef CONFIG_ISA_ARCV2
353 	read_decode_cache_bcr_arcv2();
354 
355 	if (ioc_exists)
356 		arc_ioc_setup();
357 
358 	read_decode_mmu_bcr();
359 
360 	/*
361 	 * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
362 	 * only if PAE exists in current HW. So we had to check pae_exist
363 	 * before using them.
364 	 */
365 	if (slc_exists && pae_exists)
366 		slc_upper_region_init();
367 #endif /* CONFIG_ISA_ARCV2 */
368 }
369 
370 int icache_status(void)
371 {
372 	if (!icache_exists)
373 		return 0;
374 
375 	if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE)
376 		return 0;
377 	else
378 		return 1;
379 }
380 
381 void icache_enable(void)
382 {
383 	if (icache_exists)
384 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
385 			      ~IC_CTRL_CACHE_DISABLE);
386 }
387 
388 void icache_disable(void)
389 {
390 	if (icache_exists)
391 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
392 			      IC_CTRL_CACHE_DISABLE);
393 }
394 
395 /* IC supports only invalidation */
396 static inline void __ic_entire_invalidate(void)
397 {
398 	if (!icache_status())
399 		return;
400 
401 	/* Any write to IC_IVIC register triggers invalidation of entire I$ */
402 	write_aux_reg(ARC_AUX_IC_IVIC, 1);
403 	/*
404 	 * As per ARC HS databook (see chapter 5.3.3.2)
405 	 * it is required to add 3 NOPs after each write to IC_IVIC.
406 	 */
407 	__builtin_arc_nop();
408 	__builtin_arc_nop();
409 	__builtin_arc_nop();
410 	read_aux_reg(ARC_AUX_IC_CTRL);  /* blocks */
411 }
412 
413 void invalidate_icache_all(void)
414 {
415 	__ic_entire_invalidate();
416 
417 #ifdef CONFIG_ISA_ARCV2
418 	if (slc_exists)
419 		__slc_entire_op(OP_INV);
420 #endif
421 }
422 
423 int dcache_status(void)
424 {
425 	if (!dcache_exists)
426 		return 0;
427 
428 	if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE)
429 		return 0;
430 	else
431 		return 1;
432 }
433 
434 void dcache_enable(void)
435 {
436 	if (!dcache_exists)
437 		return;
438 
439 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
440 		      ~(DC_CTRL_INV_MODE_FLUSH | DC_CTRL_CACHE_DISABLE));
441 }
442 
443 void dcache_disable(void)
444 {
445 	if (!dcache_exists)
446 		return;
447 
448 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
449 		      DC_CTRL_CACHE_DISABLE);
450 }
451 
452 #ifndef CONFIG_SYS_DCACHE_OFF
453 /* Common Helper for Line Operations on D-cache */
454 static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz,
455 				      const int cacheop)
456 {
457 	unsigned int aux_cmd;
458 	int num_lines;
459 
460 	/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
461 	aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
462 
463 	sz += paddr & ~CACHE_LINE_MASK;
464 	paddr &= CACHE_LINE_MASK;
465 
466 	num_lines = DIV_ROUND_UP(sz, l1_line_sz);
467 
468 	while (num_lines-- > 0) {
469 #if (CONFIG_ARC_MMU_VER == 3)
470 		write_aux_reg(ARC_AUX_DC_PTAG, paddr);
471 #endif
472 		write_aux_reg(aux_cmd, paddr);
473 		paddr += l1_line_sz;
474 	}
475 }
476 
477 static void __before_dc_op(const int op)
478 {
479 	unsigned int ctrl;
480 
481 	ctrl = read_aux_reg(ARC_AUX_DC_CTRL);
482 
483 	/* IM bit implies flush-n-inv, instead of vanilla inv */
484 	if (op == OP_INV)
485 		ctrl &= ~DC_CTRL_INV_MODE_FLUSH;
486 	else
487 		ctrl |= DC_CTRL_INV_MODE_FLUSH;
488 
489 	write_aux_reg(ARC_AUX_DC_CTRL, ctrl);
490 }
491 
492 static void __after_dc_op(const int op)
493 {
494 	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
495 		while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
496 }
497 
498 static inline void __dc_entire_op(const int cacheop)
499 {
500 	int aux;
501 
502 	__before_dc_op(cacheop);
503 
504 	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
505 		aux = ARC_AUX_DC_IVDC;
506 	else
507 		aux = ARC_AUX_DC_FLSH;
508 
509 	write_aux_reg(aux, 0x1);
510 
511 	__after_dc_op(cacheop);
512 }
513 
514 static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
515 				const int cacheop)
516 {
517 	__before_dc_op(cacheop);
518 	__dcache_line_loop(paddr, sz, cacheop);
519 	__after_dc_op(cacheop);
520 }
521 #else
522 #define __dc_entire_op(cacheop)
523 #define __dc_line_op(paddr, sz, cacheop)
524 #endif /* !CONFIG_SYS_DCACHE_OFF */
525 
526 void invalidate_dcache_range(unsigned long start, unsigned long end)
527 {
528 	if (start >= end)
529 		return;
530 
531 #ifdef CONFIG_ISA_ARCV2
532 	if (!ioc_exists)
533 #endif
534 		__dc_line_op(start, end - start, OP_INV);
535 
536 #ifdef CONFIG_ISA_ARCV2
537 	if (slc_exists && !ioc_exists)
538 		__slc_rgn_op(start, end - start, OP_INV);
539 #endif
540 }
541 
542 void flush_dcache_range(unsigned long start, unsigned long end)
543 {
544 	if (start >= end)
545 		return;
546 
547 #ifdef CONFIG_ISA_ARCV2
548 	if (!ioc_exists)
549 #endif
550 		__dc_line_op(start, end - start, OP_FLUSH);
551 
552 #ifdef CONFIG_ISA_ARCV2
553 	if (slc_exists && !ioc_exists)
554 		__slc_rgn_op(start, end - start, OP_FLUSH);
555 #endif
556 }
557 
558 void flush_cache(unsigned long start, unsigned long size)
559 {
560 	flush_dcache_range(start, start + size);
561 }
562 
563 /*
564  * As invalidate_dcache_all() is not used in generic U-Boot code and as we
565  * don't need it in arch/arc code alone (invalidate without flush) we implement
566  * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because
567  * it's much safer. See [ NOTE 1 ] for more details.
568  */
569 void flush_n_invalidate_dcache_all(void)
570 {
571 	__dc_entire_op(OP_FLUSH_N_INV);
572 
573 #ifdef CONFIG_ISA_ARCV2
574 	if (slc_exists)
575 		__slc_entire_op(OP_FLUSH_N_INV);
576 #endif
577 }
578 
579 void flush_dcache_all(void)
580 {
581 	__dc_entire_op(OP_FLUSH);
582 
583 #ifdef CONFIG_ISA_ARCV2
584 	if (slc_exists)
585 		__slc_entire_op(OP_FLUSH);
586 #endif
587 }
588