xref: /openbmc/linux/arch/arc/mm/tlb.c (revision 9b9c2cd4)
1 /*
2  * TLB Management (flush/create/diagnostics) for ARC700
3  *
4  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * vineetg: Aug 2011
11  *  -Reintroduce duplicate PD fixup - some customer chips still have the issue
12  *
13  * vineetg: May 2011
14  *  -No need to flush_cache_page( ) for each call to update_mmu_cache()
15  *   some of the LMBench tests improved amazingly
16  *      = page-fault thrice as fast (75 usec to 28 usec)
17  *      = mmap twice as fast (9.6 msec to 4.6 msec),
18  *      = fork (5.3 msec to 3.7 msec)
19  *
20  * vineetg: April 2011 :
21  *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
22  *      helps avoid a shift when preparing PD0 from PTE
23  *
24  * vineetg: April 2011 : Preparing for MMU V3
25  *  -MMU v2/v3 BCRs decoded differently
26  *  -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
27  *  -tlb_entry_erase( ) can be void
28  *  -local_flush_tlb_range( ):
29  *      = need not "ceil" @end
30  *      = walks MMU only if range spans < 32 entries, as opposed to 256
31  *
32  * Vineetg: Sept 10th 2008
33  *  -Changes related to MMU v2 (Rel 4.8)
34  *
35  * Vineetg: Aug 29th 2008
36  *  -In TLB Flush operations (Metal Fix MMU) there is a explict command to
37  *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
38  *    it fails. Thus need to load it with ANY valid value before invoking
39  *    TLBIVUTLB cmd
40  *
41  * Vineetg: Aug 21th 2008:
42  *  -Reduced the duration of IRQ lockouts in TLB Flush routines
43  *  -Multiple copies of TLB erase code seperated into a "single" function
44  *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
45  *       in interrupt-safe region.
46  *
47  * Vineetg: April 23rd Bug #93131
48  *    Problem: tlb_flush_kernel_range() doesnt do anything if the range to
49  *              flush is more than the size of TLB itself.
50  *
51  * Rahul Trivedi : Codito Technologies 2004
52  */
53 
54 #include <linux/module.h>
55 #include <linux/bug.h>
56 #include <asm/arcregs.h>
57 #include <asm/setup.h>
58 #include <asm/mmu_context.h>
59 #include <asm/mmu.h>
60 
61 /*			Need for ARC MMU v2
62  *
63  * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
64  * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
65  * map into same set, there would be contention for the 2 ways causing severe
66  * Thrashing.
67  *
68  * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
69  * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
70  * Given this, the thrasing problem should never happen because once the 3
71  * J-TLB entries are created (even though 3rd will knock out one of the prev
72  * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
73  *
74  * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
75  * This is a simple design for keeping them in sync. So what do we do?
76  * The solution which James came up was pretty neat. It utilised the assoc
77  * of uTLBs by not invalidating always but only when absolutely necessary.
78  *
79  * - Existing TLB commands work as before
80  * - New command (TLBWriteNI) for TLB write without clearing uTLBs
81  * - New command (TLBIVUTLB) to invalidate uTLBs.
82  *
83  * The uTLBs need only be invalidated when pages are being removed from the
84  * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
85  * as a result of a miss, the removed entry is still allowed to exist in the
86  * uTLBs as it is still valid and present in the OS page table. This allows the
87  * full associativity of the uTLBs to hide the limited associativity of the main
88  * TLB.
89  *
90  * During a miss handler, the new "TLBWriteNI" command is used to load
91  * entries without clearing the uTLBs.
92  *
93  * When the OS page table is updated, TLB entries that may be associated with a
94  * removed page are removed (flushed) from the TLB using TLBWrite. In this
95  * circumstance, the uTLBs must also be cleared. This is done by using the
96  * existing TLBWrite command. An explicit IVUTLB is also required for those
97  * corner cases when TLBWrite was not executed at all because the corresp
98  * J-TLB entry got evicted/replaced.
99  */
100 
101 
102 /* A copy of the ASID from the PID reg is kept in asid_cache */
103 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
104 
105 /*
106  * Utility Routine to erase a J-TLB entry
107  * Caller needs to setup Index Reg (manually or via getIndex)
108  */
109 static inline void __tlb_entry_erase(void)
110 {
111 	write_aux_reg(ARC_REG_TLBPD1, 0);
112 
113 	if (is_pae40_enabled())
114 		write_aux_reg(ARC_REG_TLBPD1HI, 0);
115 
116 	write_aux_reg(ARC_REG_TLBPD0, 0);
117 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
118 }
119 
120 #if (CONFIG_ARC_MMU_VER < 4)
121 
122 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
123 {
124 	unsigned int idx;
125 
126 	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
127 
128 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
129 	idx = read_aux_reg(ARC_REG_TLBINDEX);
130 
131 	return idx;
132 }
133 
134 static void tlb_entry_erase(unsigned int vaddr_n_asid)
135 {
136 	unsigned int idx;
137 
138 	/* Locate the TLB entry for this vaddr + ASID */
139 	idx = tlb_entry_lkup(vaddr_n_asid);
140 
141 	/* No error means entry found, zero it out */
142 	if (likely(!(idx & TLB_LKUP_ERR))) {
143 		__tlb_entry_erase();
144 	} else {
145 		/* Duplicate entry error */
146 		WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n",
147 					   vaddr_n_asid);
148 	}
149 }
150 
151 /****************************************************************************
152  * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs)
153  *
154  * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB
155  *
156  * utlb_invalidate ( )
157  *  -For v2 MMU calls Flush uTLB Cmd
158  *  -For v1 MMU does nothing (except for Metal Fix v1 MMU)
159  *      This is because in v1 TLBWrite itself invalidate uTLBs
160  ***************************************************************************/
161 
162 static void utlb_invalidate(void)
163 {
164 #if (CONFIG_ARC_MMU_VER >= 2)
165 
166 #if (CONFIG_ARC_MMU_VER == 2)
167 	/* MMU v2 introduced the uTLB Flush command.
168 	 * There was however an obscure hardware bug, where uTLB flush would
169 	 * fail when a prior probe for J-TLB (both totally unrelated) would
170 	 * return lkup err - because the entry didnt exist in MMU.
171 	 * The Workround was to set Index reg with some valid value, prior to
172 	 * flush. This was fixed in MMU v3 hence not needed any more
173 	 */
174 	unsigned int idx;
175 
176 	/* make sure INDEX Reg is valid */
177 	idx = read_aux_reg(ARC_REG_TLBINDEX);
178 
179 	/* If not write some dummy val */
180 	if (unlikely(idx & TLB_LKUP_ERR))
181 		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
182 #endif
183 
184 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
185 #endif
186 
187 }
188 
189 static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
190 {
191 	unsigned int idx;
192 
193 	/*
194 	 * First verify if entry for this vaddr+ASID already exists
195 	 * This also sets up PD0 (vaddr, ASID..) for final commit
196 	 */
197 	idx = tlb_entry_lkup(pd0);
198 
199 	/*
200 	 * If Not already present get a free slot from MMU.
201 	 * Otherwise, Probe would have located the entry and set INDEX Reg
202 	 * with existing location. This will cause Write CMD to over-write
203 	 * existing entry with new PD0 and PD1
204 	 */
205 	if (likely(idx & TLB_LKUP_ERR))
206 		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
207 
208 	/* setup the other half of TLB entry (pfn, rwx..) */
209 	write_aux_reg(ARC_REG_TLBPD1, pd1);
210 
211 	/*
212 	 * Commit the Entry to MMU
213 	 * It doesnt sound safe to use the TLBWriteNI cmd here
214 	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
215 	 */
216 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
217 }
218 
219 #else	/* CONFIG_ARC_MMU_VER >= 4) */
220 
221 static void utlb_invalidate(void)
222 {
223 	/* No need since uTLB is always in sync with JTLB */
224 }
225 
226 static void tlb_entry_erase(unsigned int vaddr_n_asid)
227 {
228 	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
229 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
230 }
231 
232 static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
233 {
234 	write_aux_reg(ARC_REG_TLBPD0, pd0);
235 	write_aux_reg(ARC_REG_TLBPD1, pd1);
236 
237 	if (is_pae40_enabled())
238 		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
239 
240 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
241 }
242 
243 #endif
244 
245 /*
246  * Un-conditionally (without lookup) erase the entire MMU contents
247  */
248 
249 noinline void local_flush_tlb_all(void)
250 {
251 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
252 	unsigned long flags;
253 	unsigned int entry;
254 	int num_tlb = mmu->sets * mmu->ways;
255 
256 	local_irq_save(flags);
257 
258 	/* Load PD0 and PD1 with template for a Blank Entry */
259 	write_aux_reg(ARC_REG_TLBPD1, 0);
260 
261 	if (is_pae40_enabled())
262 		write_aux_reg(ARC_REG_TLBPD1HI, 0);
263 
264 	write_aux_reg(ARC_REG_TLBPD0, 0);
265 
266 	for (entry = 0; entry < num_tlb; entry++) {
267 		/* write this entry to the TLB */
268 		write_aux_reg(ARC_REG_TLBINDEX, entry);
269 		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
270 	}
271 
272 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
273 		const int stlb_idx = 0x800;
274 
275 		/* Blank sTLB entry */
276 		write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ);
277 
278 		for (entry = stlb_idx; entry < stlb_idx + 16; entry++) {
279 			write_aux_reg(ARC_REG_TLBINDEX, entry);
280 			write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
281 		}
282 	}
283 
284 	utlb_invalidate();
285 
286 	local_irq_restore(flags);
287 }
288 
289 /*
290  * Flush the entrie MM for userland. The fastest way is to move to Next ASID
291  */
292 noinline void local_flush_tlb_mm(struct mm_struct *mm)
293 {
294 	/*
295 	 * Small optimisation courtesy IA64
296 	 * flush_mm called during fork,exit,munmap etc, multiple times as well.
297 	 * Only for fork( ) do we need to move parent to a new MMU ctxt,
298 	 * all other cases are NOPs, hence this check.
299 	 */
300 	if (atomic_read(&mm->mm_users) == 0)
301 		return;
302 
303 	/*
304 	 * - Move to a new ASID, but only if the mm is still wired in
305 	 *   (Android Binder ended up calling this for vma->mm != tsk->mm,
306 	 *    causing h/w - s/w ASID to get out of sync)
307 	 * - Also get_new_mmu_context() new implementation allocates a new
308 	 *   ASID only if it is not allocated already - so unallocate first
309 	 */
310 	destroy_context(mm);
311 	if (current->mm == mm)
312 		get_new_mmu_context(mm);
313 }
314 
315 /*
316  * Flush a Range of TLB entries for userland.
317  * @start is inclusive, while @end is exclusive
318  * Difference between this and Kernel Range Flush is
319  *  -Here the fastest way (if range is too large) is to move to next ASID
320  *      without doing any explicit Shootdown
321  *  -In case of kernel Flush, entry has to be shot down explictly
322  */
323 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
324 			   unsigned long end)
325 {
326 	const unsigned int cpu = smp_processor_id();
327 	unsigned long flags;
328 
329 	/* If range @start to @end is more than 32 TLB entries deep,
330 	 * its better to move to a new ASID rather than searching for
331 	 * individual entries and then shooting them down
332 	 *
333 	 * The calc above is rough, doesn't account for unaligned parts,
334 	 * since this is heuristics based anyways
335 	 */
336 	if (unlikely((end - start) >= PAGE_SIZE * 32)) {
337 		local_flush_tlb_mm(vma->vm_mm);
338 		return;
339 	}
340 
341 	/*
342 	 * @start moved to page start: this alone suffices for checking
343 	 * loop end condition below, w/o need for aligning @end to end
344 	 * e.g. 2000 to 4001 will anyhow loop twice
345 	 */
346 	start &= PAGE_MASK;
347 
348 	local_irq_save(flags);
349 
350 	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
351 		while (start < end) {
352 			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
353 			start += PAGE_SIZE;
354 		}
355 	}
356 
357 	utlb_invalidate();
358 
359 	local_irq_restore(flags);
360 }
361 
362 /* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective)
363  *  @start, @end interpreted as kvaddr
364  * Interestingly, shared TLB entries can also be flushed using just
365  * @start,@end alone (interpreted as user vaddr), although technically SASID
366  * is also needed. However our smart TLbProbe lookup takes care of that.
367  */
368 void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
369 {
370 	unsigned long flags;
371 
372 	/* exactly same as above, except for TLB entry not taking ASID */
373 
374 	if (unlikely((end - start) >= PAGE_SIZE * 32)) {
375 		local_flush_tlb_all();
376 		return;
377 	}
378 
379 	start &= PAGE_MASK;
380 
381 	local_irq_save(flags);
382 	while (start < end) {
383 		tlb_entry_erase(start);
384 		start += PAGE_SIZE;
385 	}
386 
387 	utlb_invalidate();
388 
389 	local_irq_restore(flags);
390 }
391 
392 /*
393  * Delete TLB entry in MMU for a given page (??? address)
394  * NOTE One TLB entry contains translation for single PAGE
395  */
396 
397 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
398 {
399 	const unsigned int cpu = smp_processor_id();
400 	unsigned long flags;
401 
402 	/* Note that it is critical that interrupts are DISABLED between
403 	 * checking the ASID and using it flush the TLB entry
404 	 */
405 	local_irq_save(flags);
406 
407 	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
408 		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
409 		utlb_invalidate();
410 	}
411 
412 	local_irq_restore(flags);
413 }
414 
415 #ifdef CONFIG_SMP
416 
417 struct tlb_args {
418 	struct vm_area_struct *ta_vma;
419 	unsigned long ta_start;
420 	unsigned long ta_end;
421 };
422 
423 static inline void ipi_flush_tlb_page(void *arg)
424 {
425 	struct tlb_args *ta = arg;
426 
427 	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
428 }
429 
430 static inline void ipi_flush_tlb_range(void *arg)
431 {
432 	struct tlb_args *ta = arg;
433 
434 	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
435 }
436 
437 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
438 static inline void ipi_flush_pmd_tlb_range(void *arg)
439 {
440 	struct tlb_args *ta = arg;
441 
442 	local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
443 }
444 #endif
445 
446 static inline void ipi_flush_tlb_kernel_range(void *arg)
447 {
448 	struct tlb_args *ta = (struct tlb_args *)arg;
449 
450 	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
451 }
452 
453 void flush_tlb_all(void)
454 {
455 	on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1);
456 }
457 
458 void flush_tlb_mm(struct mm_struct *mm)
459 {
460 	on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
461 			 mm, 1);
462 }
463 
464 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
465 {
466 	struct tlb_args ta = {
467 		.ta_vma = vma,
468 		.ta_start = uaddr
469 	};
470 
471 	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
472 }
473 
474 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
475 		     unsigned long end)
476 {
477 	struct tlb_args ta = {
478 		.ta_vma = vma,
479 		.ta_start = start,
480 		.ta_end = end
481 	};
482 
483 	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
484 }
485 
486 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
487 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
488 			 unsigned long end)
489 {
490 	struct tlb_args ta = {
491 		.ta_vma = vma,
492 		.ta_start = start,
493 		.ta_end = end
494 	};
495 
496 	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1);
497 }
498 #endif
499 
500 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
501 {
502 	struct tlb_args ta = {
503 		.ta_start = start,
504 		.ta_end = end
505 	};
506 
507 	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
508 }
509 #endif
510 
511 /*
512  * Routine to create a TLB entry
513  */
514 void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
515 {
516 	unsigned long flags;
517 	unsigned int asid_or_sasid, rwx;
518 	unsigned long pd0;
519 	pte_t pd1;
520 
521 	/*
522 	 * create_tlb() assumes that current->mm == vma->mm, since
523 	 * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr)
524 	 * -completes the lazy write to SASID reg (again valid for curr tsk)
525 	 *
526 	 * Removing the assumption involves
527 	 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
528 	 * -Fix the TLB paranoid debug code to not trigger false negatives.
529 	 * -More importantly it makes this handler inconsistent with fast-path
530 	 *  TLB Refill handler which always deals with "current"
531 	 *
532 	 * Lets see the use cases when current->mm != vma->mm and we land here
533 	 *  1. execve->copy_strings()->__get_user_pages->handle_mm_fault
534 	 *     Here VM wants to pre-install a TLB entry for user stack while
535 	 *     current->mm still points to pre-execve mm (hence the condition).
536 	 *     However the stack vaddr is soon relocated (randomization) and
537 	 *     move_page_tables() tries to undo that TLB entry.
538 	 *     Thus not creating TLB entry is not any worse.
539 	 *
540 	 *  2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a
541 	 *     breakpoint in debugged task. Not creating a TLB now is not
542 	 *     performance critical.
543 	 *
544 	 * Both the cases above are not good enough for code churn.
545 	 */
546 	if (current->active_mm != vma->vm_mm)
547 		return;
548 
549 	local_irq_save(flags);
550 
551 	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
552 
553 	vaddr &= PAGE_MASK;
554 
555 	/* update this PTE credentials */
556 	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
557 
558 	/* Create HW TLB(PD0,PD1) from PTE  */
559 
560 	/* ASID for this task */
561 	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
562 
563 	pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
564 
565 	/*
566 	 * ARC MMU provides fully orthogonal access bits for K/U mode,
567 	 * however Linux only saves 1 set to save PTE real-estate
568 	 * Here we convert 3 PTE bits into 6 MMU bits:
569 	 * -Kernel only entries have Kr Kw Kx 0 0 0
570 	 * -User entries have mirrored K and U bits
571 	 */
572 	rwx = pte_val(*ptep) & PTE_BITS_RWX;
573 
574 	if (pte_val(*ptep) & _PAGE_GLOBAL)
575 		rwx <<= 3;		/* r w x => Kr Kw Kx 0 0 0 */
576 	else
577 		rwx |= (rwx << 3);	/* r w x => Kr Kw Kx Ur Uw Ux */
578 
579 	pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1);
580 
581 	tlb_entry_insert(pd0, pd1);
582 
583 	local_irq_restore(flags);
584 }
585 
586 /*
587  * Called at the end of pagefault, for a userspace mapped page
588  *  -pre-install the corresponding TLB entry into MMU
589  *  -Finalize the delayed D-cache flush of kernel mapping of page due to
590  *  	flush_dcache_page(), copy_user_page()
591  *
592  * Note that flush (when done) involves both WBACK - so physical page is
593  * in sync as well as INV - so any non-congruent aliases don't remain
594  */
595 void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
596 		      pte_t *ptep)
597 {
598 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
599 	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
600 	struct page *page = pfn_to_page(pte_pfn(*ptep));
601 
602 	create_tlb(vma, vaddr, ptep);
603 
604 	if (page == ZERO_PAGE(0)) {
605 		return;
606 	}
607 
608 	/*
609 	 * Exec page : Independent of aliasing/page-color considerations,
610 	 *	       since icache doesn't snoop dcache on ARC, any dirty
611 	 *	       K-mapping of a code page needs to be wback+inv so that
612 	 *	       icache fetch by userspace sees code correctly.
613 	 * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it
614 	 *	       so userspace sees the right data.
615 	 *  (Avoids the flush for Non-exec + congruent mapping case)
616 	 */
617 	if ((vma->vm_flags & VM_EXEC) ||
618 	     addr_not_cache_congruent(paddr, vaddr)) {
619 
620 		int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
621 		if (dirty) {
622 			/* wback + inv dcache lines (K-mapping) */
623 			__flush_dcache_page(paddr, paddr);
624 
625 			/* invalidate any existing icache lines (U-mapping) */
626 			if (vma->vm_flags & VM_EXEC)
627 				__inv_icache_page(paddr, vaddr);
628 		}
629 	}
630 }
631 
632 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
633 
634 /*
635  * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
636  * support.
637  *
638  * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
639  * new bit "SZ" in TLB page desciptor to distinguish between them.
640  * Super Page size is configurable in hardware (4K to 16M), but fixed once
641  * RTL builds.
642  *
643  * The exact THP size a Linx configuration will support is a function of:
644  *  - MMU page size (typical 8K, RTL fixed)
645  *  - software page walker address split between PGD:PTE:PFN (typical
646  *    11:8:13, but can be changed with 1 line)
647  * So for above default, THP size supported is 8K * (2^8) = 2M
648  *
649  * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
650  * reduces to 1 level (as PTE is folded into PGD and canonically referred
651  * to as PMD).
652  * Thus THP PMD accessors are implemented in terms of PTE (just like sparc)
653  */
654 
655 void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
656 				 pmd_t *pmd)
657 {
658 	pte_t pte = __pte(pmd_val(*pmd));
659 	update_mmu_cache(vma, addr, &pte);
660 }
661 
662 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
663 				pgtable_t pgtable)
664 {
665 	struct list_head *lh = (struct list_head *) pgtable;
666 
667 	assert_spin_locked(&mm->page_table_lock);
668 
669 	/* FIFO */
670 	if (!pmd_huge_pte(mm, pmdp))
671 		INIT_LIST_HEAD(lh);
672 	else
673 		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
674 	pmd_huge_pte(mm, pmdp) = pgtable;
675 }
676 
677 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
678 {
679 	struct list_head *lh;
680 	pgtable_t pgtable;
681 
682 	assert_spin_locked(&mm->page_table_lock);
683 
684 	pgtable = pmd_huge_pte(mm, pmdp);
685 	lh = (struct list_head *) pgtable;
686 	if (list_empty(lh))
687 		pmd_huge_pte(mm, pmdp) = NULL;
688 	else {
689 		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
690 		list_del(lh);
691 	}
692 
693 	pte_val(pgtable[0]) = 0;
694 	pte_val(pgtable[1]) = 0;
695 
696 	return pgtable;
697 }
698 
699 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
700 			       unsigned long end)
701 {
702 	unsigned int cpu;
703 	unsigned long flags;
704 
705 	local_irq_save(flags);
706 
707 	cpu = smp_processor_id();
708 
709 	if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) {
710 		unsigned int asid = hw_pid(vma->vm_mm, cpu);
711 
712 		/* No need to loop here: this will always be for 1 Huge Page */
713 		tlb_entry_erase(start | _PAGE_HW_SZ | asid);
714 	}
715 
716 	local_irq_restore(flags);
717 }
718 
719 #endif
720 
721 /* Read the Cache Build Confuration Registers, Decode them and save into
722  * the cpuinfo structure for later use.
723  * No Validation is done here, simply read/convert the BCRs
724  */
725 void read_decode_mmu_bcr(void)
726 {
727 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
728 	unsigned int tmp;
729 	struct bcr_mmu_1_2 {
730 #ifdef CONFIG_CPU_BIG_ENDIAN
731 		unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
732 #else
733 		unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
734 #endif
735 	} *mmu2;
736 
737 	struct bcr_mmu_3 {
738 #ifdef CONFIG_CPU_BIG_ENDIAN
739 	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
740 		     u_itlb:4, u_dtlb:4;
741 #else
742 	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
743 		     ways:4, ver:8;
744 #endif
745 	} *mmu3;
746 
747 	struct bcr_mmu_4 {
748 #ifdef CONFIG_CPU_BIG_ENDIAN
749 	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
750 		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
751 #else
752 	/*           DTLB      ITLB      JES        JE         JA      */
753 	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
754 		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
755 #endif
756 	} *mmu4;
757 
758 	tmp = read_aux_reg(ARC_REG_MMU_BCR);
759 	mmu->ver = (tmp >> 24);
760 
761 	if (mmu->ver <= 2) {
762 		mmu2 = (struct bcr_mmu_1_2 *)&tmp;
763 		mmu->pg_sz_k = TO_KB(0x2000);
764 		mmu->sets = 1 << mmu2->sets;
765 		mmu->ways = 1 << mmu2->ways;
766 		mmu->u_dtlb = mmu2->u_dtlb;
767 		mmu->u_itlb = mmu2->u_itlb;
768 	} else if (mmu->ver == 3) {
769 		mmu3 = (struct bcr_mmu_3 *)&tmp;
770 		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
771 		mmu->sets = 1 << mmu3->sets;
772 		mmu->ways = 1 << mmu3->ways;
773 		mmu->u_dtlb = mmu3->u_dtlb;
774 		mmu->u_itlb = mmu3->u_itlb;
775 		mmu->sasid = mmu3->sasid;
776 	} else {
777 		mmu4 = (struct bcr_mmu_4 *)&tmp;
778 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
779 		mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
780 		mmu->sets = 64 << mmu4->n_entry;
781 		mmu->ways = mmu4->n_ways * 2;
782 		mmu->u_dtlb = mmu4->u_dtlb * 4;
783 		mmu->u_itlb = mmu4->u_itlb * 4;
784 		mmu->sasid = mmu4->sasid;
785 		mmu->pae = mmu4->pae;
786 	}
787 }
788 
789 char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
790 {
791 	int n = 0;
792 	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
793 	char super_pg[64] = "";
794 
795 	if (p_mmu->s_pg_sz_m)
796 		scnprintf(super_pg, 64, "%dM Super Page%s, ",
797 			  p_mmu->s_pg_sz_m,
798 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
799 
800 	n += scnprintf(buf + n, len - n,
801 		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
802 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
803 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
804 		       p_mmu->u_dtlb, p_mmu->u_itlb,
805 		       IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
806 
807 	return buf;
808 }
809 
810 void arc_mmu_init(void)
811 {
812 	char str[256];
813 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
814 
815 	printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
816 
817 	/* For efficiency sake, kernel is compile time built for a MMU ver
818 	 * This must match the hardware it is running on.
819 	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
820 	 *  hardware doesn't understand cmds such as WriteNI, or IVUTLB
821 	 * On the other hand, Linux built for V1 if run on MMU V2 will do
822 	 *   un-needed workarounds to prevent memcpy thrashing.
823 	 * Similarly MMU V3 has new features which won't work on older MMU
824 	 */
825 	if (mmu->ver != CONFIG_ARC_MMU_VER) {
826 		panic("MMU ver %d doesn't match kernel built for %d...\n",
827 		      mmu->ver, CONFIG_ARC_MMU_VER);
828 	}
829 
830 	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
831 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
832 
833 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
834 	    mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE))
835 		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
836 		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
837 
838 	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
839 		panic("Hardware doesn't support PAE40\n");
840 
841 	/* Enable the MMU */
842 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
843 
844 	/* In smp we use this reg for interrupt 1 scratch */
845 #ifndef CONFIG_SMP
846 	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
847 	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
848 #endif
849 }
850 
851 /*
852  * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4}
853  * The mapping is Column-first.
854  *		---------------------	-----------
855  *		|way0|way1|way2|way3|	|way0|way1|
856  *		---------------------	-----------
857  * [set0]	|  0 |  1 |  2 |  3 |	|  0 |  1 |
858  * [set1]	|  4 |  5 |  6 |  7 |	|  2 |  3 |
859  *		~		    ~	~	  ~
860  * [set127]	| 508| 509| 510| 511|	| 254| 255|
861  *		---------------------	-----------
862  * For normal operations we don't(must not) care how above works since
863  * MMU cmd getIndex(vaddr) abstracts that out.
864  * However for walking WAYS of a SET, we need to know this
865  */
866 #define SET_WAY_TO_IDX(mmu, set, way)  ((set) * mmu->ways + (way))
867 
868 /* Handling of Duplicate PD (TLB entry) in MMU.
869  * -Could be due to buggy customer tapeouts or obscure kernel bugs
870  * -MMU complaints not at the time of duplicate PD installation, but at the
871  *      time of lookup matching multiple ways.
872  * -Ideally these should never happen - but if they do - workaround by deleting
873  *      the duplicate one.
874  * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
875  */
876 volatile int dup_pd_silent; /* Be slient abt it or complain (default) */
877 
878 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
879 			  struct pt_regs *regs)
880 {
881 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
882 	unsigned int pd0[mmu->ways];
883 	unsigned long flags;
884 	int set;
885 
886 	local_irq_save(flags);
887 
888 	/* re-enable the MMU */
889 	write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
890 
891 	/* loop thru all sets of TLB */
892 	for (set = 0; set < mmu->sets; set++) {
893 
894 		int is_valid, way;
895 
896 		/* read out all the ways of current set */
897 		for (way = 0, is_valid = 0; way < mmu->ways; way++) {
898 			write_aux_reg(ARC_REG_TLBINDEX,
899 					  SET_WAY_TO_IDX(mmu, set, way));
900 			write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
901 			pd0[way] = read_aux_reg(ARC_REG_TLBPD0);
902 			is_valid |= pd0[way] & _PAGE_PRESENT;
903 			pd0[way] &= PAGE_MASK;
904 		}
905 
906 		/* If all the WAYS in SET are empty, skip to next SET */
907 		if (!is_valid)
908 			continue;
909 
910 		/* Scan the set for duplicate ways: needs a nested loop */
911 		for (way = 0; way < mmu->ways - 1; way++) {
912 
913 			int n;
914 
915 			if (!pd0[way])
916 				continue;
917 
918 			for (n = way + 1; n < mmu->ways; n++) {
919 				if (pd0[way] != pd0[n])
920 					continue;
921 
922 				if (!dup_pd_silent)
923 					pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n",
924 						pd0[way], set, way, n);
925 
926 				/*
927 				 * clear entry @way and not @n.
928 				 * This is critical to our optimised loop
929 				 */
930 				pd0[way] = 0;
931 				write_aux_reg(ARC_REG_TLBINDEX,
932 						SET_WAY_TO_IDX(mmu, set, way));
933 				__tlb_entry_erase();
934 			}
935 		}
936 	}
937 
938 	local_irq_restore(flags);
939 }
940 
941 /***********************************************************************
942  * Diagnostic Routines
943  *  -Called from Low Level TLB Hanlders if things don;t look good
944  **********************************************************************/
945 
946 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
947 
948 /*
949  * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
950  * don't match
951  */
952 void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
953 {
954 	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
955 	       is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
956 
957 	__asm__ __volatile__("flag 1");
958 }
959 
960 void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
961 {
962 	unsigned int mmu_asid;
963 
964 	mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
965 
966 	/*
967 	 * At the time of a TLB miss/installation
968 	 *   - HW version needs to match SW version
969 	 *   - SW needs to have a valid ASID
970 	 */
971 	if (addr < 0x70000000 &&
972 	    ((mm_asid == MM_CTXT_NO_ASID) ||
973 	      (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
974 		print_asid_mismatch(mm_asid, mmu_asid, 0);
975 }
976 #endif
977