xref: /openbmc/u-boot/arch/arm/cpu/armv8/cache_v8.c (revision 07d538d2814fa03be243c71879372f4263030b78)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * (C) Copyright 2013
4  * David Feng <fenghua@phytium.com.cn>
5  *
6  * (C) Copyright 2016
7  * Alexander Graf <agraf@suse.de>
8  */
9 
10 #include <common.h>
11 #include <asm/system.h>
12 #include <asm/armv8/mmu.h>
13 
14 DECLARE_GLOBAL_DATA_PTR;
15 
16 #ifndef CONFIG_SYS_DCACHE_OFF
17 
18 /*
19  *  With 4k page granule, a virtual address is split into 4 lookup parts
20  *  spanning 9 bits each:
21  *
22  *    _______________________________________________
23  *   |       |       |       |       |       |       |
24  *   |   0   |  Lv0  |  Lv1  |  Lv2  |  Lv3  |  off  |
25  *   |_______|_______|_______|_______|_______|_______|
26  *     63-48   47-39   38-30   29-21   20-12   11-00
27  *
28  *             mask        page size
29  *
30  *    Lv0: FF8000000000       --
31  *    Lv1:   7FC0000000       1G
32  *    Lv2:     3FE00000       2M
33  *    Lv3:       1FF000       4K
34  *    off:          FFF
35  */
36 
37 u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
38 {
39 	u64 max_addr = 0;
40 	u64 ips, va_bits;
41 	u64 tcr;
42 	int i;
43 
44 	/* Find the largest address we need to support */
45 	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++)
46 		max_addr = max(max_addr, mem_map[i].virt + mem_map[i].size);
47 
48 	/* Calculate the maximum physical (and thus virtual) address */
49 	if (max_addr > (1ULL << 44)) {
50 		ips = 5;
51 		va_bits = 48;
52 	} else  if (max_addr > (1ULL << 42)) {
53 		ips = 4;
54 		va_bits = 44;
55 	} else  if (max_addr > (1ULL << 40)) {
56 		ips = 3;
57 		va_bits = 42;
58 	} else  if (max_addr > (1ULL << 36)) {
59 		ips = 2;
60 		va_bits = 40;
61 	} else  if (max_addr > (1ULL << 32)) {
62 		ips = 1;
63 		va_bits = 36;
64 	} else {
65 		ips = 0;
66 		va_bits = 32;
67 	}
68 
69 	if (el == 1) {
70 		tcr = TCR_EL1_RSVD | (ips << 32) | TCR_EPD1_DISABLE;
71 	} else if (el == 2) {
72 		tcr = TCR_EL2_RSVD | (ips << 16);
73 	} else {
74 		tcr = TCR_EL3_RSVD | (ips << 16);
75 	}
76 
77 	/* PTWs cacheable, inner/outer WBWA and inner shareable */
78 	tcr |= TCR_TG0_4K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
79 	tcr |= TCR_T0SZ(va_bits);
80 
81 	if (pips)
82 		*pips = ips;
83 	if (pva_bits)
84 		*pva_bits = va_bits;
85 
86 	return tcr;
87 }
88 
89 #define MAX_PTE_ENTRIES 512
90 
91 static int pte_type(u64 *pte)
92 {
93 	return *pte & PTE_TYPE_MASK;
94 }
95 
96 /* Returns the LSB number for a PTE on level <level> */
97 static int level2shift(int level)
98 {
99 	/* Page is 12 bits wide, every level translates 9 bits */
100 	return (12 + 9 * (3 - level));
101 }
102 
103 static u64 *find_pte(u64 addr, int level)
104 {
105 	int start_level = 0;
106 	u64 *pte;
107 	u64 idx;
108 	u64 va_bits;
109 	int i;
110 
111 	debug("addr=%llx level=%d\n", addr, level);
112 
113 	get_tcr(0, NULL, &va_bits);
114 	if (va_bits < 39)
115 		start_level = 1;
116 
117 	if (level < start_level)
118 		return NULL;
119 
120 	/* Walk through all page table levels to find our PTE */
121 	pte = (u64*)gd->arch.tlb_addr;
122 	for (i = start_level; i < 4; i++) {
123 		idx = (addr >> level2shift(i)) & 0x1FF;
124 		pte += idx;
125 		debug("idx=%llx PTE %p at level %d: %llx\n", idx, pte, i, *pte);
126 
127 		/* Found it */
128 		if (i == level)
129 			return pte;
130 		/* PTE is no table (either invalid or block), can't traverse */
131 		if (pte_type(pte) != PTE_TYPE_TABLE)
132 			return NULL;
133 		/* Off to the next level */
134 		pte = (u64*)(*pte & 0x0000fffffffff000ULL);
135 	}
136 
137 	/* Should never reach here */
138 	return NULL;
139 }
140 
141 /* Returns and creates a new full table (512 entries) */
142 static u64 *create_table(void)
143 {
144 	u64 *new_table = (u64*)gd->arch.tlb_fillptr;
145 	u64 pt_len = MAX_PTE_ENTRIES * sizeof(u64);
146 
147 	/* Allocate MAX_PTE_ENTRIES pte entries */
148 	gd->arch.tlb_fillptr += pt_len;
149 
150 	if (gd->arch.tlb_fillptr - gd->arch.tlb_addr > gd->arch.tlb_size)
151 		panic("Insufficient RAM for page table: 0x%lx > 0x%lx. "
152 		      "Please increase the size in get_page_table_size()",
153 			gd->arch.tlb_fillptr - gd->arch.tlb_addr,
154 			gd->arch.tlb_size);
155 
156 	/* Mark all entries as invalid */
157 	memset(new_table, 0, pt_len);
158 
159 	return new_table;
160 }
161 
162 static void set_pte_table(u64 *pte, u64 *table)
163 {
164 	/* Point *pte to the new table */
165 	debug("Setting %p to addr=%p\n", pte, table);
166 	*pte = PTE_TYPE_TABLE | (ulong)table;
167 }
168 
169 /* Splits a block PTE into table with subpages spanning the old block */
170 static void split_block(u64 *pte, int level)
171 {
172 	u64 old_pte = *pte;
173 	u64 *new_table;
174 	u64 i = 0;
175 	/* level describes the parent level, we need the child ones */
176 	int levelshift = level2shift(level + 1);
177 
178 	if (pte_type(pte) != PTE_TYPE_BLOCK)
179 		panic("PTE %p (%llx) is not a block. Some driver code wants to "
180 		      "modify dcache settings for an range not covered in "
181 		      "mem_map.", pte, old_pte);
182 
183 	new_table = create_table();
184 	debug("Splitting pte %p (%llx) into %p\n", pte, old_pte, new_table);
185 
186 	for (i = 0; i < MAX_PTE_ENTRIES; i++) {
187 		new_table[i] = old_pte | (i << levelshift);
188 
189 		/* Level 3 block PTEs have the table type */
190 		if ((level + 1) == 3)
191 			new_table[i] |= PTE_TYPE_TABLE;
192 
193 		debug("Setting new_table[%lld] = %llx\n", i, new_table[i]);
194 	}
195 
196 	/* Set the new table into effect */
197 	set_pte_table(pte, new_table);
198 }
199 
200 /* Add one mm_region map entry to the page tables */
201 static void add_map(struct mm_region *map)
202 {
203 	u64 *pte;
204 	u64 virt = map->virt;
205 	u64 phys = map->phys;
206 	u64 size = map->size;
207 	u64 attrs = map->attrs | PTE_TYPE_BLOCK | PTE_BLOCK_AF;
208 	u64 blocksize;
209 	int level;
210 	u64 *new_table;
211 
212 	while (size) {
213 		pte = find_pte(virt, 0);
214 		if (pte && (pte_type(pte) == PTE_TYPE_FAULT)) {
215 			debug("Creating table for virt 0x%llx\n", virt);
216 			new_table = create_table();
217 			set_pte_table(pte, new_table);
218 		}
219 
220 		for (level = 1; level < 4; level++) {
221 			pte = find_pte(virt, level);
222 			if (!pte)
223 				panic("pte not found\n");
224 
225 			blocksize = 1ULL << level2shift(level);
226 			debug("Checking if pte fits for virt=%llx size=%llx blocksize=%llx\n",
227 			      virt, size, blocksize);
228 			if (size >= blocksize && !(virt & (blocksize - 1))) {
229 				/* Page fits, create block PTE */
230 				debug("Setting PTE %p to block virt=%llx\n",
231 				      pte, virt);
232 				if (level == 3)
233 					*pte = phys | attrs | PTE_TYPE_PAGE;
234 				else
235 					*pte = phys | attrs;
236 				virt += blocksize;
237 				phys += blocksize;
238 				size -= blocksize;
239 				break;
240 			} else if (pte_type(pte) == PTE_TYPE_FAULT) {
241 				/* Page doesn't fit, create subpages */
242 				debug("Creating subtable for virt 0x%llx blksize=%llx\n",
243 				      virt, blocksize);
244 				new_table = create_table();
245 				set_pte_table(pte, new_table);
246 			} else if (pte_type(pte) == PTE_TYPE_BLOCK) {
247 				debug("Split block into subtable for virt 0x%llx blksize=0x%llx\n",
248 				      virt, blocksize);
249 				split_block(pte, level);
250 			}
251 		}
252 	}
253 }
254 
255 enum pte_type {
256 	PTE_INVAL,
257 	PTE_BLOCK,
258 	PTE_LEVEL,
259 };
260 
261 /*
262  * This is a recursively called function to count the number of
263  * page tables we need to cover a particular PTE range. If you
264  * call this with level = -1 you basically get the full 48 bit
265  * coverage.
266  */
267 static int count_required_pts(u64 addr, int level, u64 maxaddr)
268 {
269 	int levelshift = level2shift(level);
270 	u64 levelsize = 1ULL << levelshift;
271 	u64 levelmask = levelsize - 1;
272 	u64 levelend = addr + levelsize;
273 	int r = 0;
274 	int i;
275 	enum pte_type pte_type = PTE_INVAL;
276 
277 	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) {
278 		struct mm_region *map = &mem_map[i];
279 		u64 start = map->virt;
280 		u64 end = start + map->size;
281 
282 		/* Check if the PTE would overlap with the map */
283 		if (max(addr, start) <= min(levelend, end)) {
284 			start = max(addr, start);
285 			end = min(levelend, end);
286 
287 			/* We need a sub-pt for this level */
288 			if ((start & levelmask) || (end & levelmask)) {
289 				pte_type = PTE_LEVEL;
290 				break;
291 			}
292 
293 			/* Lv0 can not do block PTEs, so do levels here too */
294 			if (level <= 0) {
295 				pte_type = PTE_LEVEL;
296 				break;
297 			}
298 
299 			/* PTE is active, but fits into a block */
300 			pte_type = PTE_BLOCK;
301 		}
302 	}
303 
304 	/*
305 	 * Block PTEs at this level are already covered by the parent page
306 	 * table, so we only need to count sub page tables.
307 	 */
308 	if (pte_type == PTE_LEVEL) {
309 		int sublevel = level + 1;
310 		u64 sublevelsize = 1ULL << level2shift(sublevel);
311 
312 		/* Account for the new sub page table ... */
313 		r = 1;
314 
315 		/* ... and for all child page tables that one might have */
316 		for (i = 0; i < MAX_PTE_ENTRIES; i++) {
317 			r += count_required_pts(addr, sublevel, maxaddr);
318 			addr += sublevelsize;
319 
320 			if (addr >= maxaddr) {
321 				/*
322 				 * We reached the end of address space, no need
323 				 * to look any further.
324 				 */
325 				break;
326 			}
327 		}
328 	}
329 
330 	return r;
331 }
332 
333 /* Returns the estimated required size of all page tables */
334 __weak u64 get_page_table_size(void)
335 {
336 	u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
337 	u64 size = 0;
338 	u64 va_bits;
339 	int start_level = 0;
340 
341 	get_tcr(0, NULL, &va_bits);
342 	if (va_bits < 39)
343 		start_level = 1;
344 
345 	/* Account for all page tables we would need to cover our memory map */
346 	size = one_pt * count_required_pts(0, start_level - 1, 1ULL << va_bits);
347 
348 	/*
349 	 * We need to duplicate our page table once to have an emergency pt to
350 	 * resort to when splitting page tables later on
351 	 */
352 	size *= 2;
353 
354 	/*
355 	 * We may need to split page tables later on if dcache settings change,
356 	 * so reserve up to 4 (random pick) page tables for that.
357 	 */
358 	size += one_pt * 4;
359 
360 	return size;
361 }
362 
363 void setup_pgtables(void)
364 {
365 	int i;
366 
367 	if (!gd->arch.tlb_fillptr || !gd->arch.tlb_addr)
368 		panic("Page table pointer not setup.");
369 
370 	/*
371 	 * Allocate the first level we're on with invalidate entries.
372 	 * If the starting level is 0 (va_bits >= 39), then this is our
373 	 * Lv0 page table, otherwise it's the entry Lv1 page table.
374 	 */
375 	create_table();
376 
377 	/* Now add all MMU table entries one after another to the table */
378 	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++)
379 		add_map(&mem_map[i]);
380 }
381 
382 static void setup_all_pgtables(void)
383 {
384 	u64 tlb_addr = gd->arch.tlb_addr;
385 	u64 tlb_size = gd->arch.tlb_size;
386 
387 	/* Reset the fill ptr */
388 	gd->arch.tlb_fillptr = tlb_addr;
389 
390 	/* Create normal system page tables */
391 	setup_pgtables();
392 
393 	/* Create emergency page tables */
394 	gd->arch.tlb_size -= (uintptr_t)gd->arch.tlb_fillptr -
395 			     (uintptr_t)gd->arch.tlb_addr;
396 	gd->arch.tlb_addr = gd->arch.tlb_fillptr;
397 	setup_pgtables();
398 	gd->arch.tlb_emerg = gd->arch.tlb_addr;
399 	gd->arch.tlb_addr = tlb_addr;
400 	gd->arch.tlb_size = tlb_size;
401 }
402 
403 /* to activate the MMU we need to set up virtual memory */
404 __weak void mmu_setup(void)
405 {
406 	int el;
407 
408 	/* Set up page tables only once */
409 	if (!gd->arch.tlb_fillptr)
410 		setup_all_pgtables();
411 
412 	el = current_el();
413 	set_ttbr_tcr_mair(el, gd->arch.tlb_addr, get_tcr(el, NULL, NULL),
414 			  MEMORY_ATTRIBUTES);
415 
416 	/* enable the mmu */
417 	set_sctlr(get_sctlr() | CR_M);
418 }
419 
420 /*
421  * Performs a invalidation of the entire data cache at all levels
422  */
423 void invalidate_dcache_all(void)
424 {
425 	__asm_invalidate_dcache_all();
426 	__asm_invalidate_l3_dcache();
427 }
428 
429 /*
430  * Performs a clean & invalidation of the entire data cache at all levels.
431  * This function needs to be inline to avoid using stack.
432  * __asm_flush_l3_dcache return status of timeout
433  */
434 inline void flush_dcache_all(void)
435 {
436 	int ret;
437 
438 	__asm_flush_dcache_all();
439 	ret = __asm_flush_l3_dcache();
440 	if (ret)
441 		debug("flushing dcache returns 0x%x\n", ret);
442 	else
443 		debug("flushing dcache successfully.\n");
444 }
445 
446 /*
447  * Invalidates range in all levels of D-cache/unified cache
448  */
449 void invalidate_dcache_range(unsigned long start, unsigned long stop)
450 {
451 	__asm_invalidate_dcache_range(start, stop);
452 }
453 
454 /*
455  * Flush range(clean & invalidate) from all levels of D-cache/unified cache
456  */
457 void flush_dcache_range(unsigned long start, unsigned long stop)
458 {
459 	__asm_flush_dcache_range(start, stop);
460 }
461 
462 void dcache_enable(void)
463 {
464 	/* The data cache is not active unless the mmu is enabled */
465 	if (!(get_sctlr() & CR_M)) {
466 		invalidate_dcache_all();
467 		__asm_invalidate_tlb_all();
468 		mmu_setup();
469 	}
470 
471 	set_sctlr(get_sctlr() | CR_C);
472 }
473 
474 void dcache_disable(void)
475 {
476 	uint32_t sctlr;
477 
478 	sctlr = get_sctlr();
479 
480 	/* if cache isn't enabled no need to disable */
481 	if (!(sctlr & CR_C))
482 		return;
483 
484 	set_sctlr(sctlr & ~(CR_C|CR_M));
485 
486 	flush_dcache_all();
487 	__asm_invalidate_tlb_all();
488 }
489 
490 int dcache_status(void)
491 {
492 	return (get_sctlr() & CR_C) != 0;
493 }
494 
495 u64 *__weak arch_get_page_table(void) {
496 	puts("No page table offset defined\n");
497 
498 	return NULL;
499 }
500 
501 static bool is_aligned(u64 addr, u64 size, u64 align)
502 {
503 	return !(addr & (align - 1)) && !(size & (align - 1));
504 }
505 
506 /* Use flag to indicate if attrs has more than d-cache attributes */
507 static u64 set_one_region(u64 start, u64 size, u64 attrs, bool flag, int level)
508 {
509 	int levelshift = level2shift(level);
510 	u64 levelsize = 1ULL << levelshift;
511 	u64 *pte = find_pte(start, level);
512 
513 	/* Can we can just modify the current level block PTE? */
514 	if (is_aligned(start, size, levelsize)) {
515 		if (flag) {
516 			*pte &= ~PMD_ATTRMASK;
517 			*pte |= attrs & PMD_ATTRMASK;
518 		} else {
519 			*pte &= ~PMD_ATTRINDX_MASK;
520 			*pte |= attrs & PMD_ATTRINDX_MASK;
521 		}
522 		debug("Set attrs=%llx pte=%p level=%d\n", attrs, pte, level);
523 
524 		return levelsize;
525 	}
526 
527 	/* Unaligned or doesn't fit, maybe split block into table */
528 	debug("addr=%llx level=%d pte=%p (%llx)\n", start, level, pte, *pte);
529 
530 	/* Maybe we need to split the block into a table */
531 	if (pte_type(pte) == PTE_TYPE_BLOCK)
532 		split_block(pte, level);
533 
534 	/* And then double-check it became a table or already is one */
535 	if (pte_type(pte) != PTE_TYPE_TABLE)
536 		panic("PTE %p (%llx) for addr=%llx should be a table",
537 		      pte, *pte, start);
538 
539 	/* Roll on to the next page table level */
540 	return 0;
541 }
542 
543 void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
544 				     enum dcache_option option)
545 {
546 	u64 attrs = PMD_ATTRINDX(option);
547 	u64 real_start = start;
548 	u64 real_size = size;
549 
550 	debug("start=%lx size=%lx\n", (ulong)start, (ulong)size);
551 
552 	if (!gd->arch.tlb_emerg)
553 		panic("Emergency page table not setup.");
554 
555 	/*
556 	 * We can not modify page tables that we're currently running on,
557 	 * so we first need to switch to the "emergency" page tables where
558 	 * we can safely modify our primary page tables and then switch back
559 	 */
560 	__asm_switch_ttbr(gd->arch.tlb_emerg);
561 
562 	/*
563 	 * Loop through the address range until we find a page granule that fits
564 	 * our alignment constraints, then set it to the new cache attributes
565 	 */
566 	while (size > 0) {
567 		int level;
568 		u64 r;
569 
570 		for (level = 1; level < 4; level++) {
571 			/* Set d-cache attributes only */
572 			r = set_one_region(start, size, attrs, false, level);
573 			if (r) {
574 				/* PTE successfully replaced */
575 				size -= r;
576 				start += r;
577 				break;
578 			}
579 		}
580 
581 	}
582 
583 	/* We're done modifying page tables, switch back to our primary ones */
584 	__asm_switch_ttbr(gd->arch.tlb_addr);
585 
586 	/*
587 	 * Make sure there's nothing stale in dcache for a region that might
588 	 * have caches off now
589 	 */
590 	flush_dcache_range(real_start, real_start + real_size);
591 }
592 
593 /*
594  * Modify MMU table for a region with updated PXN/UXN/Memory type/valid bits.
595  * The procecess is break-before-make. The target region will be marked as
596  * invalid during the process of changing.
597  */
598 void mmu_change_region_attr(phys_addr_t addr, size_t siz, u64 attrs)
599 {
600 	int level;
601 	u64 r, size, start;
602 
603 	start = addr;
604 	size = siz;
605 	/*
606 	 * Loop through the address range until we find a page granule that fits
607 	 * our alignment constraints, then set it to "invalid".
608 	 */
609 	while (size > 0) {
610 		for (level = 1; level < 4; level++) {
611 			/* Set PTE to fault */
612 			r = set_one_region(start, size, PTE_TYPE_FAULT, true,
613 					   level);
614 			if (r) {
615 				/* PTE successfully invalidated */
616 				size -= r;
617 				start += r;
618 				break;
619 			}
620 		}
621 	}
622 
623 	flush_dcache_range(gd->arch.tlb_addr,
624 			   gd->arch.tlb_addr + gd->arch.tlb_size);
625 	__asm_invalidate_tlb_all();
626 
627 	/*
628 	 * Loop through the address range until we find a page granule that fits
629 	 * our alignment constraints, then set it to the new cache attributes
630 	 */
631 	start = addr;
632 	size = siz;
633 	while (size > 0) {
634 		for (level = 1; level < 4; level++) {
635 			/* Set PTE to new attributes */
636 			r = set_one_region(start, size, attrs, true, level);
637 			if (r) {
638 				/* PTE successfully updated */
639 				size -= r;
640 				start += r;
641 				break;
642 			}
643 		}
644 	}
645 	flush_dcache_range(gd->arch.tlb_addr,
646 			   gd->arch.tlb_addr + gd->arch.tlb_size);
647 	__asm_invalidate_tlb_all();
648 }
649 
650 #else	/* CONFIG_SYS_DCACHE_OFF */
651 
652 /*
653  * For SPL builds, we may want to not have dcache enabled. Any real U-Boot
654  * running however really wants to have dcache and the MMU active. Check that
655  * everything is sane and give the developer a hint if it isn't.
656  */
657 #ifndef CONFIG_SPL_BUILD
658 #error Please describe your MMU layout in CONFIG_SYS_MEM_MAP and enable dcache.
659 #endif
660 
661 void invalidate_dcache_all(void)
662 {
663 }
664 
665 void flush_dcache_all(void)
666 {
667 }
668 
669 void dcache_enable(void)
670 {
671 }
672 
673 void dcache_disable(void)
674 {
675 }
676 
677 int dcache_status(void)
678 {
679 	return 0;
680 }
681 
682 void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
683 				     enum dcache_option option)
684 {
685 }
686 
687 #endif	/* CONFIG_SYS_DCACHE_OFF */
688 
689 #ifndef CONFIG_SYS_ICACHE_OFF
690 
691 void icache_enable(void)
692 {
693 	invalidate_icache_all();
694 	set_sctlr(get_sctlr() | CR_I);
695 }
696 
697 void icache_disable(void)
698 {
699 	set_sctlr(get_sctlr() & ~CR_I);
700 }
701 
702 int icache_status(void)
703 {
704 	return (get_sctlr() & CR_I) != 0;
705 }
706 
707 void invalidate_icache_all(void)
708 {
709 	__asm_invalidate_icache_all();
710 	__asm_invalidate_l3_icache();
711 }
712 
713 #else	/* CONFIG_SYS_ICACHE_OFF */
714 
715 void icache_enable(void)
716 {
717 }
718 
719 void icache_disable(void)
720 {
721 }
722 
723 int icache_status(void)
724 {
725 	return 0;
726 }
727 
728 void invalidate_icache_all(void)
729 {
730 }
731 
732 #endif	/* CONFIG_SYS_ICACHE_OFF */
733 
734 /*
735  * Enable dCache & iCache, whether cache is actually enabled
736  * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
737  */
738 void __weak enable_caches(void)
739 {
740 	icache_enable();
741 	dcache_enable();
742 }
743