xref: /openbmc/u-boot/arch/arm/cpu/armv8/cache_v8.c (revision 715dad6d7ede576bcd37fa6664226ec8ce58119a)
1 /*
2  * (C) Copyright 2013
3  * David Feng <fenghua@phytium.com.cn>
4  *
5  * (C) Copyright 2016
6  * Alexander Graf <agraf@suse.de>
7  *
8  * SPDX-License-Identifier:	GPL-2.0+
9  */
10 
11 #include <common.h>
12 #include <asm/system.h>
13 #include <asm/armv8/mmu.h>
14 
15 DECLARE_GLOBAL_DATA_PTR;
16 
17 #ifndef CONFIG_SYS_DCACHE_OFF
18 
19 /*
20  *  With 4k page granule, a virtual address is split into 4 lookup parts
21  *  spanning 9 bits each:
22  *
23  *    _______________________________________________
24  *   |       |       |       |       |       |       |
25  *   |   0   |  Lv0  |  Lv1  |  Lv2  |  Lv3  |  off  |
26  *   |_______|_______|_______|_______|_______|_______|
27  *     63-48   47-39   38-30   29-21   20-12   11-00
28  *
29  *             mask        page size
30  *
31  *    Lv0: FF8000000000       --
32  *    Lv1:   7FC0000000       1G
33  *    Lv2:     3FE00000       2M
34  *    Lv3:       1FF000       4K
35  *    off:          FFF
36  */
37 
38 u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
39 {
40 	u64 max_addr = 0;
41 	u64 ips, va_bits;
42 	u64 tcr;
43 	int i;
44 
45 	/* Find the largest address we need to support */
46 	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++)
47 		max_addr = max(max_addr, mem_map[i].virt + mem_map[i].size);
48 
49 	/* Calculate the maximum physical (and thus virtual) address */
50 	if (max_addr > (1ULL << 44)) {
51 		ips = 5;
52 		va_bits = 48;
53 	} else  if (max_addr > (1ULL << 42)) {
54 		ips = 4;
55 		va_bits = 44;
56 	} else  if (max_addr > (1ULL << 40)) {
57 		ips = 3;
58 		va_bits = 42;
59 	} else  if (max_addr > (1ULL << 36)) {
60 		ips = 2;
61 		va_bits = 40;
62 	} else  if (max_addr > (1ULL << 32)) {
63 		ips = 1;
64 		va_bits = 36;
65 	} else {
66 		ips = 0;
67 		va_bits = 32;
68 	}
69 
70 	if (el == 1) {
71 		tcr = TCR_EL1_RSVD | (ips << 32) | TCR_EPD1_DISABLE;
72 	} else if (el == 2) {
73 		tcr = TCR_EL2_RSVD | (ips << 16);
74 	} else {
75 		tcr = TCR_EL3_RSVD | (ips << 16);
76 	}
77 
78 	/* PTWs cacheable, inner/outer WBWA and inner shareable */
79 	tcr |= TCR_TG0_4K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
80 	tcr |= TCR_T0SZ(va_bits);
81 
82 	if (pips)
83 		*pips = ips;
84 	if (pva_bits)
85 		*pva_bits = va_bits;
86 
87 	return tcr;
88 }
89 
90 #define MAX_PTE_ENTRIES 512
91 
92 static int pte_type(u64 *pte)
93 {
94 	return *pte & PTE_TYPE_MASK;
95 }
96 
97 /* Returns the LSB number for a PTE on level <level> */
98 static int level2shift(int level)
99 {
100 	/* Page is 12 bits wide, every level translates 9 bits */
101 	return (12 + 9 * (3 - level));
102 }
103 
104 static u64 *find_pte(u64 addr, int level)
105 {
106 	int start_level = 0;
107 	u64 *pte;
108 	u64 idx;
109 	u64 va_bits;
110 	int i;
111 
112 	debug("addr=%llx level=%d\n", addr, level);
113 
114 	get_tcr(0, NULL, &va_bits);
115 	if (va_bits < 39)
116 		start_level = 1;
117 
118 	if (level < start_level)
119 		return NULL;
120 
121 	/* Walk through all page table levels to find our PTE */
122 	pte = (u64*)gd->arch.tlb_addr;
123 	for (i = start_level; i < 4; i++) {
124 		idx = (addr >> level2shift(i)) & 0x1FF;
125 		pte += idx;
126 		debug("idx=%llx PTE %p at level %d: %llx\n", idx, pte, i, *pte);
127 
128 		/* Found it */
129 		if (i == level)
130 			return pte;
131 		/* PTE is no table (either invalid or block), can't traverse */
132 		if (pte_type(pte) != PTE_TYPE_TABLE)
133 			return NULL;
134 		/* Off to the next level */
135 		pte = (u64*)(*pte & 0x0000fffffffff000ULL);
136 	}
137 
138 	/* Should never reach here */
139 	return NULL;
140 }
141 
142 /* Returns and creates a new full table (512 entries) */
143 static u64 *create_table(void)
144 {
145 	u64 *new_table = (u64*)gd->arch.tlb_fillptr;
146 	u64 pt_len = MAX_PTE_ENTRIES * sizeof(u64);
147 
148 	/* Allocate MAX_PTE_ENTRIES pte entries */
149 	gd->arch.tlb_fillptr += pt_len;
150 
151 	if (gd->arch.tlb_fillptr - gd->arch.tlb_addr > gd->arch.tlb_size)
152 		panic("Insufficient RAM for page table: 0x%lx > 0x%lx. "
153 		      "Please increase the size in get_page_table_size()",
154 			gd->arch.tlb_fillptr - gd->arch.tlb_addr,
155 			gd->arch.tlb_size);
156 
157 	/* Mark all entries as invalid */
158 	memset(new_table, 0, pt_len);
159 
160 	return new_table;
161 }
162 
163 static void set_pte_table(u64 *pte, u64 *table)
164 {
165 	/* Point *pte to the new table */
166 	debug("Setting %p to addr=%p\n", pte, table);
167 	*pte = PTE_TYPE_TABLE | (ulong)table;
168 }
169 
170 /* Splits a block PTE into table with subpages spanning the old block */
171 static void split_block(u64 *pte, int level)
172 {
173 	u64 old_pte = *pte;
174 	u64 *new_table;
175 	u64 i = 0;
176 	/* level describes the parent level, we need the child ones */
177 	int levelshift = level2shift(level + 1);
178 
179 	if (pte_type(pte) != PTE_TYPE_BLOCK)
180 		panic("PTE %p (%llx) is not a block. Some driver code wants to "
181 		      "modify dcache settings for an range not covered in "
182 		      "mem_map.", pte, old_pte);
183 
184 	new_table = create_table();
185 	debug("Splitting pte %p (%llx) into %p\n", pte, old_pte, new_table);
186 
187 	for (i = 0; i < MAX_PTE_ENTRIES; i++) {
188 		new_table[i] = old_pte | (i << levelshift);
189 
190 		/* Level 3 block PTEs have the table type */
191 		if ((level + 1) == 3)
192 			new_table[i] |= PTE_TYPE_TABLE;
193 
194 		debug("Setting new_table[%lld] = %llx\n", i, new_table[i]);
195 	}
196 
197 	/* Set the new table into effect */
198 	set_pte_table(pte, new_table);
199 }
200 
201 /* Add one mm_region map entry to the page tables */
202 static void add_map(struct mm_region *map)
203 {
204 	u64 *pte;
205 	u64 virt = map->virt;
206 	u64 phys = map->phys;
207 	u64 size = map->size;
208 	u64 attrs = map->attrs | PTE_TYPE_BLOCK | PTE_BLOCK_AF;
209 	u64 blocksize;
210 	int level;
211 	u64 *new_table;
212 
213 	while (size) {
214 		pte = find_pte(virt, 0);
215 		if (pte && (pte_type(pte) == PTE_TYPE_FAULT)) {
216 			debug("Creating table for virt 0x%llx\n", virt);
217 			new_table = create_table();
218 			set_pte_table(pte, new_table);
219 		}
220 
221 		for (level = 1; level < 4; level++) {
222 			pte = find_pte(virt, level);
223 			if (!pte)
224 				panic("pte not found\n");
225 
226 			blocksize = 1ULL << level2shift(level);
227 			debug("Checking if pte fits for virt=%llx size=%llx blocksize=%llx\n",
228 			      virt, size, blocksize);
229 			if (size >= blocksize && !(virt & (blocksize - 1))) {
230 				/* Page fits, create block PTE */
231 				debug("Setting PTE %p to block virt=%llx\n",
232 				      pte, virt);
233 				*pte = phys | attrs;
234 				virt += blocksize;
235 				phys += blocksize;
236 				size -= blocksize;
237 				break;
238 			} else if (pte_type(pte) == PTE_TYPE_FAULT) {
239 				/* Page doesn't fit, create subpages */
240 				debug("Creating subtable for virt 0x%llx blksize=%llx\n",
241 				      virt, blocksize);
242 				new_table = create_table();
243 				set_pte_table(pte, new_table);
244 			} else if (pte_type(pte) == PTE_TYPE_BLOCK) {
245 				debug("Split block into subtable for virt 0x%llx blksize=0x%llx\n",
246 				      virt, blocksize);
247 				split_block(pte, level);
248 			}
249 		}
250 	}
251 }
252 
253 enum pte_type {
254 	PTE_INVAL,
255 	PTE_BLOCK,
256 	PTE_LEVEL,
257 };
258 
259 /*
260  * This is a recursively called function to count the number of
261  * page tables we need to cover a particular PTE range. If you
262  * call this with level = -1 you basically get the full 48 bit
263  * coverage.
264  */
265 static int count_required_pts(u64 addr, int level, u64 maxaddr)
266 {
267 	int levelshift = level2shift(level);
268 	u64 levelsize = 1ULL << levelshift;
269 	u64 levelmask = levelsize - 1;
270 	u64 levelend = addr + levelsize;
271 	int r = 0;
272 	int i;
273 	enum pte_type pte_type = PTE_INVAL;
274 
275 	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) {
276 		struct mm_region *map = &mem_map[i];
277 		u64 start = map->virt;
278 		u64 end = start + map->size;
279 
280 		/* Check if the PTE would overlap with the map */
281 		if (max(addr, start) <= min(levelend, end)) {
282 			start = max(addr, start);
283 			end = min(levelend, end);
284 
285 			/* We need a sub-pt for this level */
286 			if ((start & levelmask) || (end & levelmask)) {
287 				pte_type = PTE_LEVEL;
288 				break;
289 			}
290 
291 			/* Lv0 can not do block PTEs, so do levels here too */
292 			if (level <= 0) {
293 				pte_type = PTE_LEVEL;
294 				break;
295 			}
296 
297 			/* PTE is active, but fits into a block */
298 			pte_type = PTE_BLOCK;
299 		}
300 	}
301 
302 	/*
303 	 * Block PTEs at this level are already covered by the parent page
304 	 * table, so we only need to count sub page tables.
305 	 */
306 	if (pte_type == PTE_LEVEL) {
307 		int sublevel = level + 1;
308 		u64 sublevelsize = 1ULL << level2shift(sublevel);
309 
310 		/* Account for the new sub page table ... */
311 		r = 1;
312 
313 		/* ... and for all child page tables that one might have */
314 		for (i = 0; i < MAX_PTE_ENTRIES; i++) {
315 			r += count_required_pts(addr, sublevel, maxaddr);
316 			addr += sublevelsize;
317 
318 			if (addr >= maxaddr) {
319 				/*
320 				 * We reached the end of address space, no need
321 				 * to look any further.
322 				 */
323 				break;
324 			}
325 		}
326 	}
327 
328 	return r;
329 }
330 
331 /* Returns the estimated required size of all page tables */
332 __weak u64 get_page_table_size(void)
333 {
334 	u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
335 	u64 size = 0;
336 	u64 va_bits;
337 	int start_level = 0;
338 
339 	get_tcr(0, NULL, &va_bits);
340 	if (va_bits < 39)
341 		start_level = 1;
342 
343 	/* Account for all page tables we would need to cover our memory map */
344 	size = one_pt * count_required_pts(0, start_level - 1, 1ULL << va_bits);
345 
346 	/*
347 	 * We need to duplicate our page table once to have an emergency pt to
348 	 * resort to when splitting page tables later on
349 	 */
350 	size *= 2;
351 
352 	/*
353 	 * We may need to split page tables later on if dcache settings change,
354 	 * so reserve up to 4 (random pick) page tables for that.
355 	 */
356 	size += one_pt * 4;
357 
358 	return size;
359 }
360 
361 void setup_pgtables(void)
362 {
363 	int i;
364 
365 	if (!gd->arch.tlb_fillptr || !gd->arch.tlb_addr)
366 		panic("Page table pointer not setup.");
367 
368 	/*
369 	 * Allocate the first level we're on with invalidate entries.
370 	 * If the starting level is 0 (va_bits >= 39), then this is our
371 	 * Lv0 page table, otherwise it's the entry Lv1 page table.
372 	 */
373 	create_table();
374 
375 	/* Now add all MMU table entries one after another to the table */
376 	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++)
377 		add_map(&mem_map[i]);
378 }
379 
380 static void setup_all_pgtables(void)
381 {
382 	u64 tlb_addr = gd->arch.tlb_addr;
383 	u64 tlb_size = gd->arch.tlb_size;
384 
385 	/* Reset the fill ptr */
386 	gd->arch.tlb_fillptr = tlb_addr;
387 
388 	/* Create normal system page tables */
389 	setup_pgtables();
390 
391 	/* Create emergency page tables */
392 	gd->arch.tlb_size -= (uintptr_t)gd->arch.tlb_fillptr -
393 			     (uintptr_t)gd->arch.tlb_addr;
394 	gd->arch.tlb_addr = gd->arch.tlb_fillptr;
395 	setup_pgtables();
396 	gd->arch.tlb_emerg = gd->arch.tlb_addr;
397 	gd->arch.tlb_addr = tlb_addr;
398 	gd->arch.tlb_size = tlb_size;
399 }
400 
401 /* to activate the MMU we need to set up virtual memory */
402 __weak void mmu_setup(void)
403 {
404 	int el;
405 
406 	/* Set up page tables only once */
407 	if (!gd->arch.tlb_fillptr)
408 		setup_all_pgtables();
409 
410 	el = current_el();
411 	set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
412 			  MEMORY_ATTRIBUTES);
413 
414 	/* enable the mmu */
415 	set_sctlr(get_sctlr() | CR_M);
416 }
417 
418 /*
419  * Performs a invalidation of the entire data cache at all levels
420  */
421 void invalidate_dcache_all(void)
422 {
423 	__asm_invalidate_dcache_all();
424 	__asm_invalidate_l3_dcache();
425 }
426 
427 /*
428  * Performs a clean & invalidation of the entire data cache at all levels.
429  * This function needs to be inline to avoid using stack.
430  * __asm_flush_l3_dcache return status of timeout
431  */
432 inline void flush_dcache_all(void)
433 {
434 	int ret;
435 
436 	__asm_flush_dcache_all();
437 	ret = __asm_flush_l3_dcache();
438 	if (ret)
439 		debug("flushing dcache returns 0x%x\n", ret);
440 	else
441 		debug("flushing dcache successfully.\n");
442 }
443 
444 /*
445  * Invalidates range in all levels of D-cache/unified cache
446  */
447 void invalidate_dcache_range(unsigned long start, unsigned long stop)
448 {
449 	__asm_flush_dcache_range(start, stop);
450 }
451 
452 /*
453  * Flush range(clean & invalidate) from all levels of D-cache/unified cache
454  */
455 void flush_dcache_range(unsigned long start, unsigned long stop)
456 {
457 	__asm_flush_dcache_range(start, stop);
458 }
459 
460 void dcache_enable(void)
461 {
462 	/* The data cache is not active unless the mmu is enabled */
463 	if (!(get_sctlr() & CR_M)) {
464 		invalidate_dcache_all();
465 		__asm_invalidate_tlb_all();
466 		mmu_setup();
467 	}
468 
469 	set_sctlr(get_sctlr() | CR_C);
470 }
471 
472 void dcache_disable(void)
473 {
474 	uint32_t sctlr;
475 
476 	sctlr = get_sctlr();
477 
478 	/* if cache isn't enabled no need to disable */
479 	if (!(sctlr & CR_C))
480 		return;
481 
482 	set_sctlr(sctlr & ~(CR_C|CR_M));
483 
484 	flush_dcache_all();
485 	__asm_invalidate_tlb_all();
486 }
487 
488 int dcache_status(void)
489 {
490 	return (get_sctlr() & CR_C) != 0;
491 }
492 
493 u64 *__weak arch_get_page_table(void) {
494 	puts("No page table offset defined\n");
495 
496 	return NULL;
497 }
498 
499 static bool is_aligned(u64 addr, u64 size, u64 align)
500 {
501 	return !(addr & (align - 1)) && !(size & (align - 1));
502 }
503 
504 static u64 set_one_region(u64 start, u64 size, u64 attrs, int level)
505 {
506 	int levelshift = level2shift(level);
507 	u64 levelsize = 1ULL << levelshift;
508 	u64 *pte = find_pte(start, level);
509 
510 	/* Can we can just modify the current level block PTE? */
511 	if (is_aligned(start, size, levelsize)) {
512 		*pte &= ~PMD_ATTRINDX_MASK;
513 		*pte |= attrs;
514 		debug("Set attrs=%llx pte=%p level=%d\n", attrs, pte, level);
515 
516 		return levelsize;
517 	}
518 
519 	/* Unaligned or doesn't fit, maybe split block into table */
520 	debug("addr=%llx level=%d pte=%p (%llx)\n", start, level, pte, *pte);
521 
522 	/* Maybe we need to split the block into a table */
523 	if (pte_type(pte) == PTE_TYPE_BLOCK)
524 		split_block(pte, level);
525 
526 	/* And then double-check it became a table or already is one */
527 	if (pte_type(pte) != PTE_TYPE_TABLE)
528 		panic("PTE %p (%llx) for addr=%llx should be a table",
529 		      pte, *pte, start);
530 
531 	/* Roll on to the next page table level */
532 	return 0;
533 }
534 
535 void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
536 				     enum dcache_option option)
537 {
538 	u64 attrs = PMD_ATTRINDX(option);
539 	u64 real_start = start;
540 	u64 real_size = size;
541 
542 	debug("start=%lx size=%lx\n", (ulong)start, (ulong)size);
543 
544 	if (!gd->arch.tlb_emerg)
545 		panic("Emergency page table not setup.");
546 
547 	/*
548 	 * We can not modify page tables that we're currently running on,
549 	 * so we first need to switch to the "emergency" page tables where
550 	 * we can safely modify our primary page tables and then switch back
551 	 */
552 	__asm_switch_ttbr(gd->arch.tlb_emerg);
553 
554 	/*
555 	 * Loop through the address range until we find a page granule that fits
556 	 * our alignment constraints, then set it to the new cache attributes
557 	 */
558 	while (size > 0) {
559 		int level;
560 		u64 r;
561 
562 		for (level = 1; level < 4; level++) {
563 			r = set_one_region(start, size, attrs, level);
564 			if (r) {
565 				/* PTE successfully replaced */
566 				size -= r;
567 				start += r;
568 				break;
569 			}
570 		}
571 
572 	}
573 
574 	/* We're done modifying page tables, switch back to our primary ones */
575 	__asm_switch_ttbr(gd->arch.tlb_addr);
576 
577 	/*
578 	 * Make sure there's nothing stale in dcache for a region that might
579 	 * have caches off now
580 	 */
581 	flush_dcache_range(real_start, real_start + real_size);
582 }
583 
584 #else	/* CONFIG_SYS_DCACHE_OFF */
585 
586 /*
587  * For SPL builds, we may want to not have dcache enabled. Any real U-Boot
588  * running however really wants to have dcache and the MMU active. Check that
589  * everything is sane and give the developer a hint if it isn't.
590  */
591 #ifndef CONFIG_SPL_BUILD
592 #error Please describe your MMU layout in CONFIG_SYS_MEM_MAP and enable dcache.
593 #endif
594 
595 void invalidate_dcache_all(void)
596 {
597 }
598 
599 void flush_dcache_all(void)
600 {
601 }
602 
603 void dcache_enable(void)
604 {
605 }
606 
607 void dcache_disable(void)
608 {
609 }
610 
611 int dcache_status(void)
612 {
613 	return 0;
614 }
615 
616 void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
617 				     enum dcache_option option)
618 {
619 }
620 
621 #endif	/* CONFIG_SYS_DCACHE_OFF */
622 
623 #ifndef CONFIG_SYS_ICACHE_OFF
624 
625 void icache_enable(void)
626 {
627 	invalidate_icache_all();
628 	set_sctlr(get_sctlr() | CR_I);
629 }
630 
631 void icache_disable(void)
632 {
633 	set_sctlr(get_sctlr() & ~CR_I);
634 }
635 
636 int icache_status(void)
637 {
638 	return (get_sctlr() & CR_I) != 0;
639 }
640 
641 void invalidate_icache_all(void)
642 {
643 	__asm_invalidate_icache_all();
644 	__asm_invalidate_l3_icache();
645 }
646 
647 #else	/* CONFIG_SYS_ICACHE_OFF */
648 
649 void icache_enable(void)
650 {
651 }
652 
653 void icache_disable(void)
654 {
655 }
656 
657 int icache_status(void)
658 {
659 	return 0;
660 }
661 
662 void invalidate_icache_all(void)
663 {
664 }
665 
666 #endif	/* CONFIG_SYS_ICACHE_OFF */
667 
668 /*
669  * Enable dCache & iCache, whether cache is actually enabled
670  * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
671  */
672 void __weak enable_caches(void)
673 {
674 	icache_enable();
675 	dcache_enable();
676 }
677