xref: /openbmc/u-boot/arch/arc/lib/cache.c (revision 375945ba)
1 /*
2  * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
3  *
4  * SPDX-License-Identifier:	GPL-2.0+
5  */
6 
7 #include <config.h>
8 #include <common.h>
9 #include <linux/compiler.h>
10 #include <linux/kernel.h>
11 #include <linux/log2.h>
12 #include <asm/arcregs.h>
13 #include <asm/arc-bcr.h>
14 #include <asm/cache.h>
15 
16 /*
17  * [ NOTE 1 ]:
18  * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable
19  * operation may result in unexpected behavior and data loss even if we flush
20  * data cache right before invalidation. That may happens if we store any context
21  * on stack (like we store BLINK register on stack before function call).
22  * BLINK register is the register where return address is automatically saved
23  * when we do function call with instructions like 'bl'.
24  *
25  * There is the real example:
26  * We may hang in the next code as we store any BLINK register on stack in
27  * invalidate_dcache_all() function.
28  *
29  * void flush_dcache_all() {
30  *     __dc_entire_op(OP_FLUSH);
31  *     // Other code //
32  * }
33  *
34  * void invalidate_dcache_all() {
35  *     __dc_entire_op(OP_INV);
36  *     // Other code //
37  * }
38  *
39  * void foo(void) {
40  *     flush_dcache_all();
41  *     invalidate_dcache_all();
42  * }
43  *
44  * Now let's see what really happens during that code execution:
45  *
46  * foo()
47  *   |->> call flush_dcache_all
48  *     [return address is saved to BLINK register]
49  *     [push BLINK] (save to stack)              ![point 1]
50  *     |->> call __dc_entire_op(OP_FLUSH)
51  *         [return address is saved to BLINK register]
52  *         [flush L1 D$]
53  *         return [jump to BLINK]
54  *     <<------
55  *     [other flush_dcache_all code]
56  *     [pop BLINK] (get from stack)
57  *     return [jump to BLINK]
58  *   <<------
59  *   |->> call invalidate_dcache_all
60  *     [return address is saved to BLINK register]
61  *     [push BLINK] (save to stack)               ![point 2]
62  *     |->> call __dc_entire_op(OP_FLUSH)
63  *         [return address is saved to BLINK register]
64  *         [invalidate L1 D$]                 ![point 3]
65  *         // Oops!!!
66  *         // We lose return address from invalidate_dcache_all function:
67  *         // we save it to stack and invalidate L1 D$ after that!
68  *         return [jump to BLINK]
69  *     <<------
70  *     [other invalidate_dcache_all code]
71  *     [pop BLINK] (get from stack)
72  *     // we don't have this data in L1 dcache as we invalidated it in [point 3]
73  *     // so we get it from next memory level (for example DDR memory)
74  *     // but in the memory we have value which we save in [point 1], which
75  *     // is return address from flush_dcache_all function (instead of
76  *     // address from current invalidate_dcache_all function which we
77  *     // saved in [point 2] !)
78  *     return [jump to BLINK]
79  *   <<------
80  *   // As BLINK points to invalidate_dcache_all, we call it again and
81  *   // loop forever.
82  *
83  * Fortunately we may fix that by using flush & invalidation of D$ with a single
84  * one instruction (instead of flush and invalidation instructions pair) and
85  * enabling force function inline with '__attribute__((always_inline))' gcc
86  * attribute to avoid any function call (and BLINK store) between cache flush
87  * and disable.
88  */
89 
90 DECLARE_GLOBAL_DATA_PTR;
91 
92 /* Bit values in IC_CTRL */
93 #define IC_CTRL_CACHE_DISABLE	BIT(0)
94 
95 /* Bit values in DC_CTRL */
96 #define DC_CTRL_CACHE_DISABLE	BIT(0)
97 #define DC_CTRL_INV_MODE_FLUSH	BIT(6)
98 #define DC_CTRL_FLUSH_STATUS	BIT(8)
99 
100 #define OP_INV			BIT(0)
101 #define OP_FLUSH		BIT(1)
102 #define OP_FLUSH_N_INV		(OP_FLUSH | OP_INV)
103 
104 /* Bit val in SLC_CONTROL */
105 #define SLC_CTRL_DIS		0x001
106 #define SLC_CTRL_IM		0x040
107 #define SLC_CTRL_BUSY		0x100
108 #define SLC_CTRL_RGN_OP_INV	0x200
109 
110 #define CACHE_LINE_MASK		(~(gd->arch.l1_line_sz - 1))
111 
112 static inline bool pae_exists(void)
113 {
114 	/* TODO: should we compare mmu version from BCR and from CONFIG? */
115 #if (CONFIG_ARC_MMU_VER >= 4)
116 	union bcr_mmu_4 mmu4;
117 
118 	mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR);
119 
120 	if (mmu4.fields.pae)
121 		return true;
122 #endif /* (CONFIG_ARC_MMU_VER >= 4) */
123 
124 	return false;
125 }
126 
127 static inline bool icache_exists(void)
128 {
129 	union bcr_di_cache ibcr;
130 
131 	ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
132 	return !!ibcr.fields.ver;
133 }
134 
135 static inline bool icache_enabled(void)
136 {
137 	if (!icache_exists())
138 		return false;
139 
140 	return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE);
141 }
142 
143 static inline bool dcache_exists(void)
144 {
145 	union bcr_di_cache dbcr;
146 
147 	dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
148 	return !!dbcr.fields.ver;
149 }
150 
151 static inline bool dcache_enabled(void)
152 {
153 	if (!dcache_exists())
154 		return false;
155 
156 	return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE);
157 }
158 
159 static inline bool slc_exists(void)
160 {
161 	if (is_isa_arcv2()) {
162 		union bcr_generic sbcr;
163 
164 		sbcr.word = read_aux_reg(ARC_BCR_SLC);
165 		return !!sbcr.fields.ver;
166 	}
167 
168 	return false;
169 }
170 
171 static inline bool slc_data_bypass(void)
172 {
173 	/*
174 	 * If L1 data cache is disabled SL$ is bypassed and all load/store
175 	 * requests are sent directly to main memory.
176 	 */
177 	return !dcache_enabled();
178 }
179 
180 static inline bool ioc_exists(void)
181 {
182 	if (is_isa_arcv2()) {
183 		union bcr_clust_cfg cbcr;
184 
185 		cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
186 		return cbcr.fields.c;
187 	}
188 
189 	return false;
190 }
191 
192 static inline bool ioc_enabled(void)
193 {
194 	/*
195 	 * We check only CONFIG option instead of IOC HW state check as IOC
196 	 * must be disabled by default.
197 	 */
198 	if (is_ioc_enabled())
199 		return ioc_exists();
200 
201 	return false;
202 }
203 
204 static void __slc_entire_op(const int op)
205 {
206 	unsigned int ctrl;
207 
208 	if (!slc_exists())
209 		return;
210 
211 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
212 
213 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
214 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
215 	else
216 		ctrl |= SLC_CTRL_IM;
217 
218 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
219 
220 	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
221 		write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1);
222 	else
223 		write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1);
224 
225 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
226 	read_aux_reg(ARC_AUX_SLC_CTRL);
227 
228 	/* Important to wait for flush to complete */
229 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
230 }
231 
232 static void slc_upper_region_init(void)
233 {
234 	/*
235 	 * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
236 	 * only if PAE exists in current HW. So we had to check pae_exist
237 	 * before using them.
238 	 */
239 	if (!pae_exists())
240 		return;
241 
242 	/*
243 	 * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0
244 	 * as we don't use PAE40.
245 	 */
246 	write_aux_reg(ARC_AUX_SLC_RGN_END1, 0);
247 	write_aux_reg(ARC_AUX_SLC_RGN_START1, 0);
248 }
249 
250 static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
251 {
252 #ifdef CONFIG_ISA_ARCV2
253 
254 	unsigned int ctrl;
255 	unsigned long end;
256 
257 	if (!slc_exists())
258 		return;
259 
260 	/*
261 	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
262 	 *  - b'000 (default) is Flush,
263 	 *  - b'001 is Invalidate if CTRL.IM == 0
264 	 *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
265 	 */
266 	ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
267 
268 	/* Don't rely on default value of IM bit */
269 	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
270 		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
271 	else
272 		ctrl |= SLC_CTRL_IM;
273 
274 	if (op & OP_INV)
275 		ctrl |= SLC_CTRL_RGN_OP_INV;	/* Inv or flush-n-inv */
276 	else
277 		ctrl &= ~SLC_CTRL_RGN_OP_INV;
278 
279 	write_aux_reg(ARC_AUX_SLC_CTRL, ctrl);
280 
281 	/*
282 	 * Lower bits are ignored, no need to clip
283 	 * END needs to be setup before START (latter triggers the operation)
284 	 * END can't be same as START, so add (l2_line_sz - 1) to sz
285 	 */
286 	end = paddr + sz + gd->arch.slc_line_sz - 1;
287 
288 	/*
289 	 * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1)
290 	 * are always == 0 as we don't use PAE40, so we only setup lower ones
291 	 * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START)
292 	 */
293 	write_aux_reg(ARC_AUX_SLC_RGN_END, end);
294 	write_aux_reg(ARC_AUX_SLC_RGN_START, paddr);
295 
296 	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
297 	read_aux_reg(ARC_AUX_SLC_CTRL);
298 
299 	while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
300 
301 #endif /* CONFIG_ISA_ARCV2 */
302 }
303 
304 static void arc_ioc_setup(void)
305 {
306 	/* IOC Aperture start is equal to DDR start */
307 	unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
308 	/* IOC Aperture size is equal to DDR size */
309 	long ap_size = CONFIG_SYS_SDRAM_SIZE;
310 
311 	flush_n_invalidate_dcache_all();
312 
313 	if (!is_power_of_2(ap_size) || ap_size < 4096)
314 		panic("IOC Aperture size must be power of 2 and bigger 4Kib");
315 
316 	/*
317 	 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
318 	 * so setting 0x11 implies 512M, 0x12 implies 1G...
319 	 */
320 	write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
321 		      order_base_2(ap_size / 1024) - 2);
322 
323 	/* IOC Aperture start must be aligned to the size of the aperture */
324 	if (ap_base % ap_size != 0)
325 		panic("IOC Aperture start must be aligned to the size of the aperture");
326 
327 	write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
328 	write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
329 	write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
330 }
331 
332 static void read_decode_cache_bcr_arcv2(void)
333 {
334 #ifdef CONFIG_ISA_ARCV2
335 
336 	union bcr_slc_cfg slc_cfg;
337 
338 	if (slc_exists()) {
339 		slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG);
340 		gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
341 	}
342 
343 #endif /* CONFIG_ISA_ARCV2 */
344 }
345 
346 void read_decode_cache_bcr(void)
347 {
348 	int dc_line_sz = 0, ic_line_sz = 0;
349 	union bcr_di_cache ibcr, dbcr;
350 
351 	ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
352 	if (ibcr.fields.ver) {
353 		gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
354 		if (!ic_line_sz)
355 			panic("Instruction exists but line length is 0\n");
356 	}
357 
358 	dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
359 	if (dbcr.fields.ver) {
360 		gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
361 		if (!dc_line_sz)
362 			panic("Data cache exists but line length is 0\n");
363 	}
364 
365 	if (ic_line_sz && dc_line_sz && (ic_line_sz != dc_line_sz))
366 		panic("Instruction and data cache line lengths differ\n");
367 }
368 
369 void cache_init(void)
370 {
371 	read_decode_cache_bcr();
372 
373 	if (is_isa_arcv2())
374 		read_decode_cache_bcr_arcv2();
375 
376 	if (is_isa_arcv2() && ioc_enabled())
377 		arc_ioc_setup();
378 
379 	if (is_isa_arcv2() && slc_exists())
380 		slc_upper_region_init();
381 }
382 
383 int icache_status(void)
384 {
385 	return icache_enabled();
386 }
387 
388 void icache_enable(void)
389 {
390 	if (icache_exists())
391 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
392 			      ~IC_CTRL_CACHE_DISABLE);
393 }
394 
395 void icache_disable(void)
396 {
397 	if (icache_exists())
398 		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
399 			      IC_CTRL_CACHE_DISABLE);
400 }
401 
402 /* IC supports only invalidation */
403 static inline void __ic_entire_invalidate(void)
404 {
405 	if (!icache_enabled())
406 		return;
407 
408 	/* Any write to IC_IVIC register triggers invalidation of entire I$ */
409 	write_aux_reg(ARC_AUX_IC_IVIC, 1);
410 	/*
411 	 * As per ARC HS databook (see chapter 5.3.3.2)
412 	 * it is required to add 3 NOPs after each write to IC_IVIC.
413 	 */
414 	__builtin_arc_nop();
415 	__builtin_arc_nop();
416 	__builtin_arc_nop();
417 	read_aux_reg(ARC_AUX_IC_CTRL);  /* blocks */
418 }
419 
420 void invalidate_icache_all(void)
421 {
422 	__ic_entire_invalidate();
423 
424 	/*
425 	 * If SL$ is bypassed for data it is used only for instructions,
426 	 * so we need to invalidate it too.
427 	 * TODO: HS 3.0 supports SLC disable so we need to check slc
428 	 * enable/disable status here.
429 	 */
430 	if (is_isa_arcv2() && slc_data_bypass())
431 		__slc_entire_op(OP_INV);
432 }
433 
434 int dcache_status(void)
435 {
436 	return dcache_enabled();
437 }
438 
439 void dcache_enable(void)
440 {
441 	if (!dcache_exists())
442 		return;
443 
444 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
445 		      ~(DC_CTRL_INV_MODE_FLUSH | DC_CTRL_CACHE_DISABLE));
446 }
447 
448 void dcache_disable(void)
449 {
450 	if (!dcache_exists())
451 		return;
452 
453 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
454 		      DC_CTRL_CACHE_DISABLE);
455 }
456 
457 /* Common Helper for Line Operations on D-cache */
458 static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz,
459 				      const int cacheop)
460 {
461 	unsigned int aux_cmd;
462 	int num_lines;
463 
464 	/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
465 	aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
466 
467 	sz += paddr & ~CACHE_LINE_MASK;
468 	paddr &= CACHE_LINE_MASK;
469 
470 	num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz);
471 
472 	while (num_lines-- > 0) {
473 #if (CONFIG_ARC_MMU_VER == 3)
474 		write_aux_reg(ARC_AUX_DC_PTAG, paddr);
475 #endif
476 		write_aux_reg(aux_cmd, paddr);
477 		paddr += gd->arch.l1_line_sz;
478 	}
479 }
480 
481 static void __before_dc_op(const int op)
482 {
483 	unsigned int ctrl;
484 
485 	ctrl = read_aux_reg(ARC_AUX_DC_CTRL);
486 
487 	/* IM bit implies flush-n-inv, instead of vanilla inv */
488 	if (op == OP_INV)
489 		ctrl &= ~DC_CTRL_INV_MODE_FLUSH;
490 	else
491 		ctrl |= DC_CTRL_INV_MODE_FLUSH;
492 
493 	write_aux_reg(ARC_AUX_DC_CTRL, ctrl);
494 }
495 
496 static void __after_dc_op(const int op)
497 {
498 	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
499 		while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
500 }
501 
502 static inline void __dc_entire_op(const int cacheop)
503 {
504 	int aux;
505 
506 	if (!dcache_enabled())
507 		return;
508 
509 	__before_dc_op(cacheop);
510 
511 	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
512 		aux = ARC_AUX_DC_IVDC;
513 	else
514 		aux = ARC_AUX_DC_FLSH;
515 
516 	write_aux_reg(aux, 0x1);
517 
518 	__after_dc_op(cacheop);
519 }
520 
521 static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
522 				const int cacheop)
523 {
524 	if (!dcache_enabled())
525 		return;
526 
527 	__before_dc_op(cacheop);
528 	__dcache_line_loop(paddr, sz, cacheop);
529 	__after_dc_op(cacheop);
530 }
531 
532 void invalidate_dcache_range(unsigned long start, unsigned long end)
533 {
534 	if (start >= end)
535 		return;
536 
537 	/*
538 	 * ARCv1                                 -> call __dc_line_op
539 	 * ARCv2 && L1 D$ disabled               -> nothing
540 	 * ARCv2 && L1 D$ enabled && IOC enabled -> nothing
541 	 * ARCv2 && L1 D$ enabled && no IOC      -> call __dc_line_op; call __slc_rgn_op
542 	 */
543 	if (!is_isa_arcv2() || !ioc_enabled())
544 		__dc_line_op(start, end - start, OP_INV);
545 
546 	if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass())
547 		__slc_rgn_op(start, end - start, OP_INV);
548 }
549 
550 void flush_dcache_range(unsigned long start, unsigned long end)
551 {
552 	if (start >= end)
553 		return;
554 
555 	/*
556 	 * ARCv1                                 -> call __dc_line_op
557 	 * ARCv2 && L1 D$ disabled               -> nothing
558 	 * ARCv2 && L1 D$ enabled && IOC enabled -> nothing
559 	 * ARCv2 && L1 D$ enabled && no IOC      -> call __dc_line_op; call __slc_rgn_op
560 	 */
561 	if (!is_isa_arcv2() || !ioc_enabled())
562 		__dc_line_op(start, end - start, OP_FLUSH);
563 
564 	if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass())
565 		__slc_rgn_op(start, end - start, OP_FLUSH);
566 }
567 
568 void flush_cache(unsigned long start, unsigned long size)
569 {
570 	flush_dcache_range(start, start + size);
571 }
572 
573 /*
574  * As invalidate_dcache_all() is not used in generic U-Boot code and as we
575  * don't need it in arch/arc code alone (invalidate without flush) we implement
576  * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because
577  * it's much safer. See [ NOTE 1 ] for more details.
578  */
579 void flush_n_invalidate_dcache_all(void)
580 {
581 	__dc_entire_op(OP_FLUSH_N_INV);
582 
583 	if (is_isa_arcv2() && !slc_data_bypass())
584 		__slc_entire_op(OP_FLUSH_N_INV);
585 }
586 
587 void flush_dcache_all(void)
588 {
589 	__dc_entire_op(OP_FLUSH);
590 
591 	if (is_isa_arcv2() && !slc_data_bypass())
592 		__slc_entire_op(OP_FLUSH);
593 }
594 
595 /*
596  * This is function to cleanup all caches (and therefore sync I/D caches) which
597  * can be used for cleanup before linux launch or to sync caches during
598  * relocation.
599  */
600 void sync_n_cleanup_cache_all(void)
601 {
602 	__dc_entire_op(OP_FLUSH_N_INV);
603 
604 	/*
605 	 * If SL$ is bypassed for data it is used only for instructions,
606 	 * and we shouldn't flush it. So invalidate it instead of flush_n_inv.
607 	 */
608 	if (is_isa_arcv2()) {
609 		if (slc_data_bypass())
610 			__slc_entire_op(OP_INV);
611 		else
612 			__slc_entire_op(OP_FLUSH_N_INV);
613 	}
614 
615 	__ic_entire_invalidate();
616 }
617