xref: /openbmc/u-boot/arch/arc/lib/cache.c (revision ea9f6f1e873cc1b6fe220e1017357532045ce988)
1 /*
2  * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
3  *
4  * SPDX-License-Identifier:	GPL-2.0+
5  */
6 
7 #include <config.h>
8 #include <common.h>
9 #include <linux/compiler.h>
10 #include <linux/kernel.h>
11 #include <linux/log2.h>
12 #include <asm/arcregs.h>
13 #include <asm/arc-bcr.h>
14 #include <asm/cache.h>
15 
16 /*
17  * [ NOTE 1 ]:
18  * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable
19  * operation may result in unexpected behavior and data loss even if we flush
20  * data cache right before invalidation. That may happens if we store any context
21  * on stack (like we store BLINK register on stack before function call).
22  * BLINK register is the register where return address is automatically saved
23  * when we do function call with instructions like 'bl'.
24  *
25  * There is the real example:
26  * We may hang in the next code as we store any BLINK register on stack in
27  * invalidate_dcache_all() function.
28  *
29  * void flush_dcache_all() {
30  *     __dc_entire_op(OP_FLUSH);
31  *     // Other code //
32  * }
33  *
34  * void invalidate_dcache_all() {
35  *     __dc_entire_op(OP_INV);
36  *     // Other code //
37  * }
38  *
39  * void foo(void) {
40  *     flush_dcache_all();
41  *     invalidate_dcache_all();
42  * }
43  *
44  * Now let's see what really happens during that code execution:
45  *
46  * foo()
47  *   |->> call flush_dcache_all
48  *     [return address is saved to BLINK register]
49  *     [push BLINK] (save to stack)              ![point 1]
50  *     |->> call __dc_entire_op(OP_FLUSH)
51  *         [return address is saved to BLINK register]
52  *         [flush L1 D$]
53  *         return [jump to BLINK]
54  *     <<------
55  *     [other flush_dcache_all code]
56  *     [pop BLINK] (get from stack)
57  *     return [jump to BLINK]
58  *   <<------
59  *   |->> call invalidate_dcache_all
60  *     [return address is saved to BLINK register]
61  *     [push BLINK] (save to stack)               ![point 2]
62  *     |->> call __dc_entire_op(OP_FLUSH)
63  *         [return address is saved to BLINK register]
64  *         [invalidate L1 D$]                 ![point 3]
65  *         // Oops!!!
66  *         // We lose return address from invalidate_dcache_all function:
67  *         // we save it to stack and invalidate L1 D$ after that!
68  *         return [jump to BLINK]
69  *     <<------
70  *     [other invalidate_dcache_all code]
71  *     [pop BLINK] (get from stack)
72  *     // we don't have this data in L1 dcache as we invalidated it in [point 3]
73  *     // so we get it from next memory level (for example DDR memory)
74  *     // but in the memory we have value which we save in [point 1], which
75  *     // is return address from flush_dcache_all function (instead of
76  *     // address from current invalidate_dcache_all function which we
77  *     // saved in [point 2] !)
78  *     return [jump to BLINK]
79  *   <<------
80  *   // As BLINK points to invalidate_dcache_all, we call it again and
81  *   // loop forever.
82  *
83  * Fortunately we may fix that by using flush & invalidation of D$ with a single
84  * one instruction (instead of flush and invalidation instructions pair) and
85  * enabling force function inline with '__attribute__((always_inline))' gcc
86  * attribute to avoid any function call (and BLINK store) between cache flush
87  * and disable.
88  */
89 
90 /* Bit values in IC_CTRL */
91 #define IC_CTRL_CACHE_DISABLE	BIT(0)
92 
93 /* Bit values in DC_CTRL */
94 #define DC_CTRL_CACHE_DISABLE	BIT(0)
95 #define DC_CTRL_INV_MODE_FLUSH	BIT(6)
96 #define DC_CTRL_FLUSH_STATUS	BIT(8)
97 #define CACHE_VER_NUM_MASK	0xF
98 
99 #define OP_INV			BIT(0)
100 #define OP_FLUSH		BIT(1)
101 #define OP_FLUSH_N_INV		(OP_FLUSH | OP_INV)
102 
103 /* Bit val in SLC_CONTROL */
104 #define SLC_CTRL_DIS		0x001
105 #define SLC_CTRL_IM		0x040
106 #define SLC_CTRL_BUSY		0x100
107 #define SLC_CTRL_RGN_OP_INV	0x200
108 
109 /*
110  * By default that variable will fall into .bss section.
111  * But .bss section is not relocated and so it will be initilized before
112  * relocation but will be used after being zeroed.
113  */
114 int l1_line_sz __section(".data");
115 bool dcache_exists __section(".data") = false;
116 bool icache_exists __section(".data") = false;
117 
118 #define CACHE_LINE_MASK		(~(l1_line_sz - 1))
119 
120 int slc_line_sz __section(".data");
121 bool slc_exists __section(".data") = false;
122 bool ioc_exists __section(".data") = false;
123 bool pae_exists __section(".data") = false;
124 
125 /* To force enable IOC set ioc_enable to 'true' */
126 bool ioc_enable __section(".data") = false;
127 
128 void read_decode_mmu_bcr(void)
129 {
130 	/* TODO: should we compare mmu version from BCR and from CONFIG? */
131 #if (CONFIG_ARC_MMU_VER >= 4)
132 	union bcr_mmu_4 mmu4;
133 
134 	mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR);
135 
136 	pae_exists = !!mmu4.fields.pae;
137 #endif /* (CONFIG_ARC_MMU_VER >= 4) */
138 }
139 
140 static void __slc_entire_op(const int op)
141 {
142 	unsigned int ctrl;
143 
144 	if (!slc_exists)
145 		return;
146 
147 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
148 
149 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
150 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
151 	else
152 		ctrl |= SLC_CTRL_IM;
153 
154 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
155 
156 	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
157 		write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1);
158 	else
159 		write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1);
160 
161 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
162 	read_aux_reg(ARC_AUX_SLC_CTRL);
163 
164 	/* Important to wait for flush to complete */
165 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
166 }
167 
168 static void slc_upper_region_init(void)
169 {
170 	/*
171 	 * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0
172 	 * as we don't use PAE40.
173 	 */
174 	write_aux_reg(ARC_AUX_SLC_RGN_END1, 0);
175 	write_aux_reg(ARC_AUX_SLC_RGN_START1, 0);
176 }
177 
178 static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
179 {
180 #ifdef CONFIG_ISA_ARCV2
181 
182 	unsigned int ctrl;
183 	unsigned long end;
184 
185 	if (!slc_exists)
186 		return;
187 
188 	/*
189 	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
190 	 *  - b'000 (default) is Flush,
191 	 *  - b'001 is Invalidate if CTRL.IM == 0
192 	 *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
193 	 */
194 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
195 
196 	/* Don't rely on default value of IM bit */
197 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
198 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
199 	else
200 		ctrl |= SLC_CTRL_IM;
201 
202 	if (op & OP_INV)
203 		ctrl |= SLC_CTRL_RGN_OP_INV;	/* Inv or flush-n-inv */
204 	else
205 		ctrl &= ~SLC_CTRL_RGN_OP_INV;
206 
207 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
208 
209 	/*
210 	 * Lower bits are ignored, no need to clip
211 	 * END needs to be setup before START (latter triggers the operation)
212 	 * END can't be same as START, so add (l2_line_sz - 1) to sz
213 	 */
214 	end = paddr + sz + slc_line_sz - 1;
215 
216 	/*
217 	 * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1)
218 	 * are always == 0 as we don't use PAE40, so we only setup lower ones
219 	 * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START)
220 	 */
221 	write_aux_reg(ARC_AUX_SLC_RGN_END, end);
222 	write_aux_reg(ARC_AUX_SLC_RGN_START, paddr);
223 
224 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
225 	read_aux_reg(ARC_AUX_SLC_CTRL);
226 
227 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
228 
229 #endif /* CONFIG_ISA_ARCV2 */
230 }
231 
232 static void arc_ioc_setup(void)
233 {
234 	/* IOC Aperture start is equal to DDR start */
235 	unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
236 	/* IOC Aperture size is equal to DDR size */
237 	long ap_size = CONFIG_SYS_SDRAM_SIZE;
238 
239 	flush_n_invalidate_dcache_all();
240 
241 	if (!is_power_of_2(ap_size) || ap_size < 4096)
242 		panic("IOC Aperture size must be power of 2 and bigger 4Kib");
243 
244 	/*
245 	 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
246 	 * so setting 0x11 implies 512M, 0x12 implies 1G...
247 	 */
248 	write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
249 		      order_base_2(ap_size / 1024) - 2);
250 
251 	/* IOC Aperture start must be aligned to the size of the aperture */
252 	if (ap_base % ap_size != 0)
253 		panic("IOC Aperture start must be aligned to the size of the aperture");
254 
255 	write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
256 	write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
257 	write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
258 }
259 
260 static void read_decode_cache_bcr_arcv2(void)
261 {
262 #ifdef CONFIG_ISA_ARCV2
263 
264 	union bcr_slc_cfg slc_cfg;
265 	union bcr_clust_cfg cbcr;
266 	union bcr_generic sbcr;
267 
268 	sbcr.word = read_aux_reg(ARC_BCR_SLC);
269 	if (sbcr.fields.ver) {
270 		slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG);
271 		slc_exists = true;
272 		slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
273 	}
274 
275 	cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
276 	if (cbcr.fields.c && ioc_enable)
277 		ioc_exists = true;
278 
279 #endif /* CONFIG_ISA_ARCV2 */
280 }
281 
282 void read_decode_cache_bcr(void)
283 {
284 	int dc_line_sz = 0, ic_line_sz = 0;
285 	union bcr_di_cache ibcr, dbcr;
286 
287 	ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
288 	if (ibcr.fields.ver) {
289 		icache_exists = true;
290 		l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
291 		if (!ic_line_sz)
292 			panic("Instruction exists but line length is 0\n");
293 	}
294 
295 	dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
296 	if (dbcr.fields.ver) {
297 		dcache_exists = true;
298 		l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
299 		if (!dc_line_sz)
300 			panic("Data cache exists but line length is 0\n");
301 	}
302 
303 	if (ic_line_sz && dc_line_sz && (ic_line_sz != dc_line_sz))
304 		panic("Instruction and data cache line lengths differ\n");
305 }
306 
307 void cache_init(void)
308 {
309 	read_decode_cache_bcr();
310 
311 	if (is_isa_arcv2())
312 		read_decode_cache_bcr_arcv2();
313 
314 	if (is_isa_arcv2() && ioc_exists)
315 		arc_ioc_setup();
316 
317 	read_decode_mmu_bcr();
318 
319 	/*
320 	 * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
321 	 * only if PAE exists in current HW. So we had to check pae_exist
322 	 * before using them.
323 	 */
324 	if (is_isa_arcv2() && slc_exists && pae_exists)
325 		slc_upper_region_init();
326 }
327 
328 int icache_status(void)
329 {
330 	if (!icache_exists)
331 		return 0;
332 
333 	if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE)
334 		return 0;
335 	else
336 		return 1;
337 }
338 
339 void icache_enable(void)
340 {
341 	if (icache_exists)
342 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
343 			      ~IC_CTRL_CACHE_DISABLE);
344 }
345 
346 void icache_disable(void)
347 {
348 	if (icache_exists)
349 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
350 			      IC_CTRL_CACHE_DISABLE);
351 }
352 
353 /* IC supports only invalidation */
354 static inline void __ic_entire_invalidate(void)
355 {
356 	if (!icache_status())
357 		return;
358 
359 	/* Any write to IC_IVIC register triggers invalidation of entire I$ */
360 	write_aux_reg(ARC_AUX_IC_IVIC, 1);
361 	/*
362 	 * As per ARC HS databook (see chapter 5.3.3.2)
363 	 * it is required to add 3 NOPs after each write to IC_IVIC.
364 	 */
365 	__builtin_arc_nop();
366 	__builtin_arc_nop();
367 	__builtin_arc_nop();
368 	read_aux_reg(ARC_AUX_IC_CTRL);  /* blocks */
369 }
370 
371 void invalidate_icache_all(void)
372 {
373 	__ic_entire_invalidate();
374 
375 	if (is_isa_arcv2())
376 		__slc_entire_op(OP_INV);
377 }
378 
379 int dcache_status(void)
380 {
381 	if (!dcache_exists)
382 		return 0;
383 
384 	if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE)
385 		return 0;
386 	else
387 		return 1;
388 }
389 
390 void dcache_enable(void)
391 {
392 	if (!dcache_exists)
393 		return;
394 
395 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
396 		      ~(DC_CTRL_INV_MODE_FLUSH | DC_CTRL_CACHE_DISABLE));
397 }
398 
399 void dcache_disable(void)
400 {
401 	if (!dcache_exists)
402 		return;
403 
404 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
405 		      DC_CTRL_CACHE_DISABLE);
406 }
407 
408 /* Common Helper for Line Operations on D-cache */
409 static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz,
410 				      const int cacheop)
411 {
412 	unsigned int aux_cmd;
413 	int num_lines;
414 
415 	/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
416 	aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
417 
418 	sz += paddr & ~CACHE_LINE_MASK;
419 	paddr &= CACHE_LINE_MASK;
420 
421 	num_lines = DIV_ROUND_UP(sz, l1_line_sz);
422 
423 	while (num_lines-- > 0) {
424 #if (CONFIG_ARC_MMU_VER == 3)
425 		write_aux_reg(ARC_AUX_DC_PTAG, paddr);
426 #endif
427 		write_aux_reg(aux_cmd, paddr);
428 		paddr += l1_line_sz;
429 	}
430 }
431 
432 static void __before_dc_op(const int op)
433 {
434 	unsigned int ctrl;
435 
436 	ctrl = read_aux_reg(ARC_AUX_DC_CTRL);
437 
438 	/* IM bit implies flush-n-inv, instead of vanilla inv */
439 	if (op == OP_INV)
440 		ctrl &= ~DC_CTRL_INV_MODE_FLUSH;
441 	else
442 		ctrl |= DC_CTRL_INV_MODE_FLUSH;
443 
444 	write_aux_reg(ARC_AUX_DC_CTRL, ctrl);
445 }
446 
447 static void __after_dc_op(const int op)
448 {
449 	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
450 		while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
451 }
452 
453 static inline void __dc_entire_op(const int cacheop)
454 {
455 	int aux;
456 
457 	if (!dcache_status())
458 		return;
459 
460 	__before_dc_op(cacheop);
461 
462 	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
463 		aux = ARC_AUX_DC_IVDC;
464 	else
465 		aux = ARC_AUX_DC_FLSH;
466 
467 	write_aux_reg(aux, 0x1);
468 
469 	__after_dc_op(cacheop);
470 }
471 
472 static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
473 				const int cacheop)
474 {
475 	if (!dcache_status())
476 		return;
477 
478 	__before_dc_op(cacheop);
479 	__dcache_line_loop(paddr, sz, cacheop);
480 	__after_dc_op(cacheop);
481 }
482 
483 void invalidate_dcache_range(unsigned long start, unsigned long end)
484 {
485 	if (start >= end)
486 		return;
487 
488 	/*
489 	 * ARCv1                  -> call __dc_line_op
490 	 * ARCv2 && no IOC        -> call __dc_line_op; call __slc_rgn_op
491 	 * ARCv2 && IOC enabled   -> nothing
492 	 */
493 	if (!is_isa_arcv2() || !ioc_exists)
494 		__dc_line_op(start, end - start, OP_INV);
495 
496 	if (is_isa_arcv2() && !ioc_exists)
497 		__slc_rgn_op(start, end - start, OP_INV);
498 }
499 
500 void flush_dcache_range(unsigned long start, unsigned long end)
501 {
502 	if (start >= end)
503 		return;
504 
505 	/*
506 	 * ARCv1                  -> call __dc_line_op
507 	 * ARCv2 && no IOC        -> call __dc_line_op; call __slc_rgn_op
508 	 * ARCv2 && IOC enabled   -> nothing
509 	 */
510 	if (!is_isa_arcv2() || !ioc_exists)
511 		__dc_line_op(start, end - start, OP_FLUSH);
512 
513 	if (is_isa_arcv2() && !ioc_exists)
514 		__slc_rgn_op(start, end - start, OP_FLUSH);
515 }
516 
517 void flush_cache(unsigned long start, unsigned long size)
518 {
519 	flush_dcache_range(start, start + size);
520 }
521 
522 /*
523  * As invalidate_dcache_all() is not used in generic U-Boot code and as we
524  * don't need it in arch/arc code alone (invalidate without flush) we implement
525  * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because
526  * it's much safer. See [ NOTE 1 ] for more details.
527  */
528 void flush_n_invalidate_dcache_all(void)
529 {
530 	__dc_entire_op(OP_FLUSH_N_INV);
531 
532 	if (is_isa_arcv2())
533 		__slc_entire_op(OP_FLUSH_N_INV);
534 }
535 
536 void flush_dcache_all(void)
537 {
538 	__dc_entire_op(OP_FLUSH);
539 
540 	if (is_isa_arcv2())
541 		__slc_entire_op(OP_FLUSH);
542 }
543