xref: /openbmc/linux/arch/sparc/mm/srmmu.c (revision 93707cbabcc8baf2b2b5f4a99c1f08ee83eb7abd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * srmmu.c:  SRMMU specific routines for memory management.
4  *
5  * Copyright (C) 1995 David S. Miller  (davem@caip.rutgers.edu)
6  * Copyright (C) 1995,2002 Pete Zaitcev (zaitcev@yahoo.com)
7  * Copyright (C) 1996 Eddie C. Dost    (ecd@skynet.be)
8  * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
9  * Copyright (C) 1999,2000 Anton Blanchard (anton@samba.org)
10  */
11 
12 #include <linux/seq_file.h>
13 #include <linux/spinlock.h>
14 #include <linux/bootmem.h>
15 #include <linux/pagemap.h>
16 #include <linux/vmalloc.h>
17 #include <linux/kdebug.h>
18 #include <linux/export.h>
19 #include <linux/kernel.h>
20 #include <linux/init.h>
21 #include <linux/log2.h>
22 #include <linux/gfp.h>
23 #include <linux/fs.h>
24 #include <linux/mm.h>
25 
26 #include <asm/mmu_context.h>
27 #include <asm/cacheflush.h>
28 #include <asm/tlbflush.h>
29 #include <asm/io-unit.h>
30 #include <asm/pgalloc.h>
31 #include <asm/pgtable.h>
32 #include <asm/bitext.h>
33 #include <asm/vaddrs.h>
34 #include <asm/cache.h>
35 #include <asm/traps.h>
36 #include <asm/oplib.h>
37 #include <asm/mbus.h>
38 #include <asm/page.h>
39 #include <asm/asi.h>
40 #include <asm/msi.h>
41 #include <asm/smp.h>
42 #include <asm/io.h>
43 
44 /* Now the cpu specific definitions. */
45 #include <asm/turbosparc.h>
46 #include <asm/tsunami.h>
47 #include <asm/viking.h>
48 #include <asm/swift.h>
49 #include <asm/leon.h>
50 #include <asm/mxcc.h>
51 #include <asm/ross.h>
52 
53 #include "mm_32.h"
54 
55 enum mbus_module srmmu_modtype;
56 static unsigned int hwbug_bitmask;
57 int vac_cache_size;
58 EXPORT_SYMBOL(vac_cache_size);
59 int vac_line_size;
60 
61 extern struct resource sparc_iomap;
62 
63 extern unsigned long last_valid_pfn;
64 
65 static pgd_t *srmmu_swapper_pg_dir;
66 
67 const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops;
68 EXPORT_SYMBOL(sparc32_cachetlb_ops);
69 
70 #ifdef CONFIG_SMP
71 const struct sparc32_cachetlb_ops *local_ops;
72 
73 #define FLUSH_BEGIN(mm)
74 #define FLUSH_END
75 #else
76 #define FLUSH_BEGIN(mm) if ((mm)->context != NO_CONTEXT) {
77 #define FLUSH_END	}
78 #endif
79 
80 int flush_page_for_dma_global = 1;
81 
82 char *srmmu_name;
83 
84 ctxd_t *srmmu_ctx_table_phys;
85 static ctxd_t *srmmu_context_table;
86 
87 int viking_mxcc_present;
88 static DEFINE_SPINLOCK(srmmu_context_spinlock);
89 
90 static int is_hypersparc;
91 
92 static int srmmu_cache_pagetables;
93 
94 /* these will be initialized in srmmu_nocache_calcsize() */
95 static unsigned long srmmu_nocache_size;
96 static unsigned long srmmu_nocache_end;
97 
98 /* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */
99 #define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4)
100 
101 /* The context table is a nocache user with the biggest alignment needs. */
102 #define SRMMU_NOCACHE_ALIGN_MAX (sizeof(ctxd_t)*SRMMU_MAX_CONTEXTS)
103 
104 void *srmmu_nocache_pool;
105 static struct bit_map srmmu_nocache_map;
106 
107 static inline int srmmu_pmd_none(pmd_t pmd)
108 { return !(pmd_val(pmd) & 0xFFFFFFF); }
109 
110 /* XXX should we hyper_flush_whole_icache here - Anton */
111 static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
112 {
113 	pte_t pte;
114 
115 	pte = __pte((SRMMU_ET_PTD | (__nocache_pa(pgdp) >> 4)));
116 	set_pte((pte_t *)ctxp, pte);
117 }
118 
119 void pmd_set(pmd_t *pmdp, pte_t *ptep)
120 {
121 	unsigned long ptp;	/* Physical address, shifted right by 4 */
122 	int i;
123 
124 	ptp = __nocache_pa(ptep) >> 4;
125 	for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
126 		set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
127 		ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
128 	}
129 }
130 
131 void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
132 {
133 	unsigned long ptp;	/* Physical address, shifted right by 4 */
134 	int i;
135 
136 	ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4);	/* watch for overflow */
137 	for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
138 		set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
139 		ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
140 	}
141 }
142 
143 /* Find an entry in the third-level page table.. */
144 pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address)
145 {
146 	void *pte;
147 
148 	pte = __nocache_va((dir->pmdv[0] & SRMMU_PTD_PMASK) << 4);
149 	return (pte_t *) pte +
150 	    ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
151 }
152 
153 /*
154  * size: bytes to allocate in the nocache area.
155  * align: bytes, number to align at.
156  * Returns the virtual address of the allocated area.
157  */
158 static void *__srmmu_get_nocache(int size, int align)
159 {
160 	int offset;
161 	unsigned long addr;
162 
163 	if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
164 		printk(KERN_ERR "Size 0x%x too small for nocache request\n",
165 		       size);
166 		size = SRMMU_NOCACHE_BITMAP_SHIFT;
167 	}
168 	if (size & (SRMMU_NOCACHE_BITMAP_SHIFT - 1)) {
169 		printk(KERN_ERR "Size 0x%x unaligned int nocache request\n",
170 		       size);
171 		size += SRMMU_NOCACHE_BITMAP_SHIFT - 1;
172 	}
173 	BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX);
174 
175 	offset = bit_map_string_get(&srmmu_nocache_map,
176 				    size >> SRMMU_NOCACHE_BITMAP_SHIFT,
177 				    align >> SRMMU_NOCACHE_BITMAP_SHIFT);
178 	if (offset == -1) {
179 		printk(KERN_ERR "srmmu: out of nocache %d: %d/%d\n",
180 		       size, (int) srmmu_nocache_size,
181 		       srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
182 		return NULL;
183 	}
184 
185 	addr = SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT);
186 	return (void *)addr;
187 }
188 
189 void *srmmu_get_nocache(int size, int align)
190 {
191 	void *tmp;
192 
193 	tmp = __srmmu_get_nocache(size, align);
194 
195 	if (tmp)
196 		memset(tmp, 0, size);
197 
198 	return tmp;
199 }
200 
201 void srmmu_free_nocache(void *addr, int size)
202 {
203 	unsigned long vaddr;
204 	int offset;
205 
206 	vaddr = (unsigned long)addr;
207 	if (vaddr < SRMMU_NOCACHE_VADDR) {
208 		printk("Vaddr %lx is smaller than nocache base 0x%lx\n",
209 		    vaddr, (unsigned long)SRMMU_NOCACHE_VADDR);
210 		BUG();
211 	}
212 	if (vaddr + size > srmmu_nocache_end) {
213 		printk("Vaddr %lx is bigger than nocache end 0x%lx\n",
214 		    vaddr, srmmu_nocache_end);
215 		BUG();
216 	}
217 	if (!is_power_of_2(size)) {
218 		printk("Size 0x%x is not a power of 2\n", size);
219 		BUG();
220 	}
221 	if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
222 		printk("Size 0x%x is too small\n", size);
223 		BUG();
224 	}
225 	if (vaddr & (size - 1)) {
226 		printk("Vaddr %lx is not aligned to size 0x%x\n", vaddr, size);
227 		BUG();
228 	}
229 
230 	offset = (vaddr - SRMMU_NOCACHE_VADDR) >> SRMMU_NOCACHE_BITMAP_SHIFT;
231 	size = size >> SRMMU_NOCACHE_BITMAP_SHIFT;
232 
233 	bit_map_clear(&srmmu_nocache_map, offset, size);
234 }
235 
236 static void srmmu_early_allocate_ptable_skeleton(unsigned long start,
237 						 unsigned long end);
238 
239 /* Return how much physical memory we have.  */
240 static unsigned long __init probe_memory(void)
241 {
242 	unsigned long total = 0;
243 	int i;
244 
245 	for (i = 0; sp_banks[i].num_bytes; i++)
246 		total += sp_banks[i].num_bytes;
247 
248 	return total;
249 }
250 
251 /*
252  * Reserve nocache dynamically proportionally to the amount of
253  * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002
254  */
255 static void __init srmmu_nocache_calcsize(void)
256 {
257 	unsigned long sysmemavail = probe_memory() / 1024;
258 	int srmmu_nocache_npages;
259 
260 	srmmu_nocache_npages =
261 		sysmemavail / SRMMU_NOCACHE_ALCRATIO / 1024 * 256;
262 
263  /* P3 XXX The 4x overuse: corroborated by /proc/meminfo. */
264 	// if (srmmu_nocache_npages < 256) srmmu_nocache_npages = 256;
265 	if (srmmu_nocache_npages < SRMMU_MIN_NOCACHE_PAGES)
266 		srmmu_nocache_npages = SRMMU_MIN_NOCACHE_PAGES;
267 
268 	/* anything above 1280 blows up */
269 	if (srmmu_nocache_npages > SRMMU_MAX_NOCACHE_PAGES)
270 		srmmu_nocache_npages = SRMMU_MAX_NOCACHE_PAGES;
271 
272 	srmmu_nocache_size = srmmu_nocache_npages * PAGE_SIZE;
273 	srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size;
274 }
275 
276 static void __init srmmu_nocache_init(void)
277 {
278 	void *srmmu_nocache_bitmap;
279 	unsigned int bitmap_bits;
280 	pgd_t *pgd;
281 	pmd_t *pmd;
282 	pte_t *pte;
283 	unsigned long paddr, vaddr;
284 	unsigned long pteval;
285 
286 	bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT;
287 
288 	srmmu_nocache_pool = __alloc_bootmem(srmmu_nocache_size,
289 		SRMMU_NOCACHE_ALIGN_MAX, 0UL);
290 	memset(srmmu_nocache_pool, 0, srmmu_nocache_size);
291 
292 	srmmu_nocache_bitmap =
293 		__alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long),
294 				SMP_CACHE_BYTES, 0UL);
295 	bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits);
296 
297 	srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
298 	memset(__nocache_fix(srmmu_swapper_pg_dir), 0, SRMMU_PGD_TABLE_SIZE);
299 	init_mm.pgd = srmmu_swapper_pg_dir;
300 
301 	srmmu_early_allocate_ptable_skeleton(SRMMU_NOCACHE_VADDR, srmmu_nocache_end);
302 
303 	paddr = __pa((unsigned long)srmmu_nocache_pool);
304 	vaddr = SRMMU_NOCACHE_VADDR;
305 
306 	while (vaddr < srmmu_nocache_end) {
307 		pgd = pgd_offset_k(vaddr);
308 		pmd = pmd_offset(__nocache_fix(pgd), vaddr);
309 		pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
310 
311 		pteval = ((paddr >> 4) | SRMMU_ET_PTE | SRMMU_PRIV);
312 
313 		if (srmmu_cache_pagetables)
314 			pteval |= SRMMU_CACHE;
315 
316 		set_pte(__nocache_fix(pte), __pte(pteval));
317 
318 		vaddr += PAGE_SIZE;
319 		paddr += PAGE_SIZE;
320 	}
321 
322 	flush_cache_all();
323 	flush_tlb_all();
324 }
325 
326 pgd_t *get_pgd_fast(void)
327 {
328 	pgd_t *pgd = NULL;
329 
330 	pgd = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
331 	if (pgd) {
332 		pgd_t *init = pgd_offset_k(0);
333 		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
334 		memcpy(pgd + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
335 						(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
336 	}
337 
338 	return pgd;
339 }
340 
341 /*
342  * Hardware needs alignment to 256 only, but we align to whole page size
343  * to reduce fragmentation problems due to the buddy principle.
344  * XXX Provide actual fragmentation statistics in /proc.
345  *
346  * Alignments up to the page size are the same for physical and virtual
347  * addresses of the nocache area.
348  */
349 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
350 {
351 	unsigned long pte;
352 	struct page *page;
353 
354 	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
355 		return NULL;
356 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
357 	if (!pgtable_page_ctor(page)) {
358 		__free_page(page);
359 		return NULL;
360 	}
361 	return page;
362 }
363 
364 void pte_free(struct mm_struct *mm, pgtable_t pte)
365 {
366 	unsigned long p;
367 
368 	pgtable_page_dtor(pte);
369 	p = (unsigned long)page_address(pte);	/* Cached address (for test) */
370 	if (p == 0)
371 		BUG();
372 	p = page_to_pfn(pte) << PAGE_SHIFT;	/* Physical address */
373 
374 	/* free non cached virtual address*/
375 	srmmu_free_nocache(__nocache_va(p), PTE_SIZE);
376 }
377 
378 /* context handling - a dynamically sized pool is used */
379 #define NO_CONTEXT	-1
380 
381 struct ctx_list {
382 	struct ctx_list *next;
383 	struct ctx_list *prev;
384 	unsigned int ctx_number;
385 	struct mm_struct *ctx_mm;
386 };
387 
388 static struct ctx_list *ctx_list_pool;
389 static struct ctx_list ctx_free;
390 static struct ctx_list ctx_used;
391 
392 /* At boot time we determine the number of contexts */
393 static int num_contexts;
394 
395 static inline void remove_from_ctx_list(struct ctx_list *entry)
396 {
397 	entry->next->prev = entry->prev;
398 	entry->prev->next = entry->next;
399 }
400 
401 static inline void add_to_ctx_list(struct ctx_list *head, struct ctx_list *entry)
402 {
403 	entry->next = head;
404 	(entry->prev = head->prev)->next = entry;
405 	head->prev = entry;
406 }
407 #define add_to_free_ctxlist(entry) add_to_ctx_list(&ctx_free, entry)
408 #define add_to_used_ctxlist(entry) add_to_ctx_list(&ctx_used, entry)
409 
410 
411 static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm)
412 {
413 	struct ctx_list *ctxp;
414 
415 	ctxp = ctx_free.next;
416 	if (ctxp != &ctx_free) {
417 		remove_from_ctx_list(ctxp);
418 		add_to_used_ctxlist(ctxp);
419 		mm->context = ctxp->ctx_number;
420 		ctxp->ctx_mm = mm;
421 		return;
422 	}
423 	ctxp = ctx_used.next;
424 	if (ctxp->ctx_mm == old_mm)
425 		ctxp = ctxp->next;
426 	if (ctxp == &ctx_used)
427 		panic("out of mmu contexts");
428 	flush_cache_mm(ctxp->ctx_mm);
429 	flush_tlb_mm(ctxp->ctx_mm);
430 	remove_from_ctx_list(ctxp);
431 	add_to_used_ctxlist(ctxp);
432 	ctxp->ctx_mm->context = NO_CONTEXT;
433 	ctxp->ctx_mm = mm;
434 	mm->context = ctxp->ctx_number;
435 }
436 
437 static inline void free_context(int context)
438 {
439 	struct ctx_list *ctx_old;
440 
441 	ctx_old = ctx_list_pool + context;
442 	remove_from_ctx_list(ctx_old);
443 	add_to_free_ctxlist(ctx_old);
444 }
445 
446 static void __init sparc_context_init(int numctx)
447 {
448 	int ctx;
449 	unsigned long size;
450 
451 	size = numctx * sizeof(struct ctx_list);
452 	ctx_list_pool = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
453 
454 	for (ctx = 0; ctx < numctx; ctx++) {
455 		struct ctx_list *clist;
456 
457 		clist = (ctx_list_pool + ctx);
458 		clist->ctx_number = ctx;
459 		clist->ctx_mm = NULL;
460 	}
461 	ctx_free.next = ctx_free.prev = &ctx_free;
462 	ctx_used.next = ctx_used.prev = &ctx_used;
463 	for (ctx = 0; ctx < numctx; ctx++)
464 		add_to_free_ctxlist(ctx_list_pool + ctx);
465 }
466 
467 void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
468 	       struct task_struct *tsk)
469 {
470 	unsigned long flags;
471 
472 	if (mm->context == NO_CONTEXT) {
473 		spin_lock_irqsave(&srmmu_context_spinlock, flags);
474 		alloc_context(old_mm, mm);
475 		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
476 		srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
477 	}
478 
479 	if (sparc_cpu_model == sparc_leon)
480 		leon_switch_mm();
481 
482 	if (is_hypersparc)
483 		hyper_flush_whole_icache();
484 
485 	srmmu_set_context(mm->context);
486 }
487 
488 /* Low level IO area allocation on the SRMMU. */
489 static inline void srmmu_mapioaddr(unsigned long physaddr,
490 				   unsigned long virt_addr, int bus_type)
491 {
492 	pgd_t *pgdp;
493 	pmd_t *pmdp;
494 	pte_t *ptep;
495 	unsigned long tmp;
496 
497 	physaddr &= PAGE_MASK;
498 	pgdp = pgd_offset_k(virt_addr);
499 	pmdp = pmd_offset(pgdp, virt_addr);
500 	ptep = pte_offset_kernel(pmdp, virt_addr);
501 	tmp = (physaddr >> 4) | SRMMU_ET_PTE;
502 
503 	/* I need to test whether this is consistent over all
504 	 * sun4m's.  The bus_type represents the upper 4 bits of
505 	 * 36-bit physical address on the I/O space lines...
506 	 */
507 	tmp |= (bus_type << 28);
508 	tmp |= SRMMU_PRIV;
509 	__flush_page_to_ram(virt_addr);
510 	set_pte(ptep, __pte(tmp));
511 }
512 
513 void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
514 		      unsigned long xva, unsigned int len)
515 {
516 	while (len != 0) {
517 		len -= PAGE_SIZE;
518 		srmmu_mapioaddr(xpa, xva, bus);
519 		xva += PAGE_SIZE;
520 		xpa += PAGE_SIZE;
521 	}
522 	flush_tlb_all();
523 }
524 
525 static inline void srmmu_unmapioaddr(unsigned long virt_addr)
526 {
527 	pgd_t *pgdp;
528 	pmd_t *pmdp;
529 	pte_t *ptep;
530 
531 	pgdp = pgd_offset_k(virt_addr);
532 	pmdp = pmd_offset(pgdp, virt_addr);
533 	ptep = pte_offset_kernel(pmdp, virt_addr);
534 
535 	/* No need to flush uncacheable page. */
536 	__pte_clear(ptep);
537 }
538 
539 void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
540 {
541 	while (len != 0) {
542 		len -= PAGE_SIZE;
543 		srmmu_unmapioaddr(virt_addr);
544 		virt_addr += PAGE_SIZE;
545 	}
546 	flush_tlb_all();
547 }
548 
549 /* tsunami.S */
550 extern void tsunami_flush_cache_all(void);
551 extern void tsunami_flush_cache_mm(struct mm_struct *mm);
552 extern void tsunami_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
553 extern void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
554 extern void tsunami_flush_page_to_ram(unsigned long page);
555 extern void tsunami_flush_page_for_dma(unsigned long page);
556 extern void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
557 extern void tsunami_flush_tlb_all(void);
558 extern void tsunami_flush_tlb_mm(struct mm_struct *mm);
559 extern void tsunami_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
560 extern void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
561 extern void tsunami_setup_blockops(void);
562 
563 /* swift.S */
564 extern void swift_flush_cache_all(void);
565 extern void swift_flush_cache_mm(struct mm_struct *mm);
566 extern void swift_flush_cache_range(struct vm_area_struct *vma,
567 				    unsigned long start, unsigned long end);
568 extern void swift_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
569 extern void swift_flush_page_to_ram(unsigned long page);
570 extern void swift_flush_page_for_dma(unsigned long page);
571 extern void swift_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
572 extern void swift_flush_tlb_all(void);
573 extern void swift_flush_tlb_mm(struct mm_struct *mm);
574 extern void swift_flush_tlb_range(struct vm_area_struct *vma,
575 				  unsigned long start, unsigned long end);
576 extern void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
577 
578 #if 0  /* P3: deadwood to debug precise flushes on Swift. */
579 void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
580 {
581 	int cctx, ctx1;
582 
583 	page &= PAGE_MASK;
584 	if ((ctx1 = vma->vm_mm->context) != -1) {
585 		cctx = srmmu_get_context();
586 /* Is context # ever different from current context? P3 */
587 		if (cctx != ctx1) {
588 			printk("flush ctx %02x curr %02x\n", ctx1, cctx);
589 			srmmu_set_context(ctx1);
590 			swift_flush_page(page);
591 			__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
592 					"r" (page), "i" (ASI_M_FLUSH_PROBE));
593 			srmmu_set_context(cctx);
594 		} else {
595 			 /* Rm. prot. bits from virt. c. */
596 			/* swift_flush_cache_all(); */
597 			/* swift_flush_cache_page(vma, page); */
598 			swift_flush_page(page);
599 
600 			__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
601 				"r" (page), "i" (ASI_M_FLUSH_PROBE));
602 			/* same as above: srmmu_flush_tlb_page() */
603 		}
604 	}
605 }
606 #endif
607 
608 /*
609  * The following are all MBUS based SRMMU modules, and therefore could
610  * be found in a multiprocessor configuration.  On the whole, these
611  * chips seems to be much more touchy about DVMA and page tables
612  * with respect to cache coherency.
613  */
614 
615 /* viking.S */
616 extern void viking_flush_cache_all(void);
617 extern void viking_flush_cache_mm(struct mm_struct *mm);
618 extern void viking_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
619 				     unsigned long end);
620 extern void viking_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
621 extern void viking_flush_page_to_ram(unsigned long page);
622 extern void viking_flush_page_for_dma(unsigned long page);
623 extern void viking_flush_sig_insns(struct mm_struct *mm, unsigned long addr);
624 extern void viking_flush_page(unsigned long page);
625 extern void viking_mxcc_flush_page(unsigned long page);
626 extern void viking_flush_tlb_all(void);
627 extern void viking_flush_tlb_mm(struct mm_struct *mm);
628 extern void viking_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
629 				   unsigned long end);
630 extern void viking_flush_tlb_page(struct vm_area_struct *vma,
631 				  unsigned long page);
632 extern void sun4dsmp_flush_tlb_all(void);
633 extern void sun4dsmp_flush_tlb_mm(struct mm_struct *mm);
634 extern void sun4dsmp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
635 				   unsigned long end);
636 extern void sun4dsmp_flush_tlb_page(struct vm_area_struct *vma,
637 				  unsigned long page);
638 
639 /* hypersparc.S */
640 extern void hypersparc_flush_cache_all(void);
641 extern void hypersparc_flush_cache_mm(struct mm_struct *mm);
642 extern void hypersparc_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
643 extern void hypersparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
644 extern void hypersparc_flush_page_to_ram(unsigned long page);
645 extern void hypersparc_flush_page_for_dma(unsigned long page);
646 extern void hypersparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
647 extern void hypersparc_flush_tlb_all(void);
648 extern void hypersparc_flush_tlb_mm(struct mm_struct *mm);
649 extern void hypersparc_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
650 extern void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
651 extern void hypersparc_setup_blockops(void);
652 
653 /*
654  * NOTE: All of this startup code assumes the low 16mb (approx.) of
655  *       kernel mappings are done with one single contiguous chunk of
656  *       ram.  On small ram machines (classics mainly) we only get
657  *       around 8mb mapped for us.
658  */
659 
660 static void __init early_pgtable_allocfail(char *type)
661 {
662 	prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
663 	prom_halt();
664 }
665 
666 static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
667 							unsigned long end)
668 {
669 	pgd_t *pgdp;
670 	pmd_t *pmdp;
671 	pte_t *ptep;
672 
673 	while (start < end) {
674 		pgdp = pgd_offset_k(start);
675 		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
676 			pmdp = __srmmu_get_nocache(
677 			    SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
678 			if (pmdp == NULL)
679 				early_pgtable_allocfail("pmd");
680 			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
681 			pgd_set(__nocache_fix(pgdp), pmdp);
682 		}
683 		pmdp = pmd_offset(__nocache_fix(pgdp), start);
684 		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
685 			ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
686 			if (ptep == NULL)
687 				early_pgtable_allocfail("pte");
688 			memset(__nocache_fix(ptep), 0, PTE_SIZE);
689 			pmd_set(__nocache_fix(pmdp), ptep);
690 		}
691 		if (start > (0xffffffffUL - PMD_SIZE))
692 			break;
693 		start = (start + PMD_SIZE) & PMD_MASK;
694 	}
695 }
696 
697 static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
698 						  unsigned long end)
699 {
700 	pgd_t *pgdp;
701 	pmd_t *pmdp;
702 	pte_t *ptep;
703 
704 	while (start < end) {
705 		pgdp = pgd_offset_k(start);
706 		if (pgd_none(*pgdp)) {
707 			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
708 			if (pmdp == NULL)
709 				early_pgtable_allocfail("pmd");
710 			memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
711 			pgd_set(pgdp, pmdp);
712 		}
713 		pmdp = pmd_offset(pgdp, start);
714 		if (srmmu_pmd_none(*pmdp)) {
715 			ptep = __srmmu_get_nocache(PTE_SIZE,
716 							     PTE_SIZE);
717 			if (ptep == NULL)
718 				early_pgtable_allocfail("pte");
719 			memset(ptep, 0, PTE_SIZE);
720 			pmd_set(pmdp, ptep);
721 		}
722 		if (start > (0xffffffffUL - PMD_SIZE))
723 			break;
724 		start = (start + PMD_SIZE) & PMD_MASK;
725 	}
726 }
727 
728 /* These flush types are not available on all chips... */
729 static inline unsigned long srmmu_probe(unsigned long vaddr)
730 {
731 	unsigned long retval;
732 
733 	if (sparc_cpu_model != sparc_leon) {
734 
735 		vaddr &= PAGE_MASK;
736 		__asm__ __volatile__("lda [%1] %2, %0\n\t" :
737 				     "=r" (retval) :
738 				     "r" (vaddr | 0x400), "i" (ASI_M_FLUSH_PROBE));
739 	} else {
740 		retval = leon_swprobe(vaddr, NULL);
741 	}
742 	return retval;
743 }
744 
745 /*
746  * This is much cleaner than poking around physical address space
747  * looking at the prom's page table directly which is what most
748  * other OS's do.  Yuck... this is much better.
749  */
750 static void __init srmmu_inherit_prom_mappings(unsigned long start,
751 					       unsigned long end)
752 {
753 	unsigned long probed;
754 	unsigned long addr;
755 	pgd_t *pgdp;
756 	pmd_t *pmdp;
757 	pte_t *ptep;
758 	int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
759 
760 	while (start <= end) {
761 		if (start == 0)
762 			break; /* probably wrap around */
763 		if (start == 0xfef00000)
764 			start = KADB_DEBUGGER_BEGVM;
765 		probed = srmmu_probe(start);
766 		if (!probed) {
767 			/* continue probing until we find an entry */
768 			start += PAGE_SIZE;
769 			continue;
770 		}
771 
772 		/* A red snapper, see what it really is. */
773 		what = 0;
774 		addr = start - PAGE_SIZE;
775 
776 		if (!(start & ~(SRMMU_REAL_PMD_MASK))) {
777 			if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed)
778 				what = 1;
779 		}
780 
781 		if (!(start & ~(SRMMU_PGDIR_MASK))) {
782 			if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed)
783 				what = 2;
784 		}
785 
786 		pgdp = pgd_offset_k(start);
787 		if (what == 2) {
788 			*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
789 			start += SRMMU_PGDIR_SIZE;
790 			continue;
791 		}
792 		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
793 			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
794 						   SRMMU_PMD_TABLE_SIZE);
795 			if (pmdp == NULL)
796 				early_pgtable_allocfail("pmd");
797 			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
798 			pgd_set(__nocache_fix(pgdp), pmdp);
799 		}
800 		pmdp = pmd_offset(__nocache_fix(pgdp), start);
801 		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
802 			ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
803 			if (ptep == NULL)
804 				early_pgtable_allocfail("pte");
805 			memset(__nocache_fix(ptep), 0, PTE_SIZE);
806 			pmd_set(__nocache_fix(pmdp), ptep);
807 		}
808 		if (what == 1) {
809 			/* We bend the rule where all 16 PTPs in a pmd_t point
810 			 * inside the same PTE page, and we leak a perfectly
811 			 * good hardware PTE piece. Alternatives seem worse.
812 			 */
813 			unsigned int x;	/* Index of HW PMD in soft cluster */
814 			unsigned long *val;
815 			x = (start >> PMD_SHIFT) & 15;
816 			val = &pmdp->pmdv[x];
817 			*(unsigned long *)__nocache_fix(val) = probed;
818 			start += SRMMU_REAL_PMD_SIZE;
819 			continue;
820 		}
821 		ptep = pte_offset_kernel(__nocache_fix(pmdp), start);
822 		*(pte_t *)__nocache_fix(ptep) = __pte(probed);
823 		start += PAGE_SIZE;
824 	}
825 }
826 
827 #define KERNEL_PTE(page_shifted) ((page_shifted)|SRMMU_CACHE|SRMMU_PRIV|SRMMU_VALID)
828 
829 /* Create a third-level SRMMU 16MB page mapping. */
830 static void __init do_large_mapping(unsigned long vaddr, unsigned long phys_base)
831 {
832 	pgd_t *pgdp = pgd_offset_k(vaddr);
833 	unsigned long big_pte;
834 
835 	big_pte = KERNEL_PTE(phys_base >> 4);
836 	*(pgd_t *)__nocache_fix(pgdp) = __pgd(big_pte);
837 }
838 
839 /* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */
840 static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
841 {
842 	unsigned long pstart = (sp_banks[sp_entry].base_addr & SRMMU_PGDIR_MASK);
843 	unsigned long vstart = (vbase & SRMMU_PGDIR_MASK);
844 	unsigned long vend = SRMMU_PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
845 	/* Map "low" memory only */
846 	const unsigned long min_vaddr = PAGE_OFFSET;
847 	const unsigned long max_vaddr = PAGE_OFFSET + SRMMU_MAXMEM;
848 
849 	if (vstart < min_vaddr || vstart >= max_vaddr)
850 		return vstart;
851 
852 	if (vend > max_vaddr || vend < min_vaddr)
853 		vend = max_vaddr;
854 
855 	while (vstart < vend) {
856 		do_large_mapping(vstart, pstart);
857 		vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE;
858 	}
859 	return vstart;
860 }
861 
862 static void __init map_kernel(void)
863 {
864 	int i;
865 
866 	if (phys_base > 0) {
867 		do_large_mapping(PAGE_OFFSET, phys_base);
868 	}
869 
870 	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
871 		map_spbank((unsigned long)__va(sp_banks[i].base_addr), i);
872 	}
873 }
874 
875 void (*poke_srmmu)(void) = NULL;
876 
877 void __init srmmu_paging_init(void)
878 {
879 	int i;
880 	phandle cpunode;
881 	char node_str[128];
882 	pgd_t *pgd;
883 	pmd_t *pmd;
884 	pte_t *pte;
885 	unsigned long pages_avail;
886 
887 	init_mm.context = (unsigned long) NO_CONTEXT;
888 	sparc_iomap.start = SUN4M_IOBASE_VADDR;	/* 16MB of IOSPACE on all sun4m's. */
889 
890 	if (sparc_cpu_model == sun4d)
891 		num_contexts = 65536; /* We know it is Viking */
892 	else {
893 		/* Find the number of contexts on the srmmu. */
894 		cpunode = prom_getchild(prom_root_node);
895 		num_contexts = 0;
896 		while (cpunode != 0) {
897 			prom_getstring(cpunode, "device_type", node_str, sizeof(node_str));
898 			if (!strcmp(node_str, "cpu")) {
899 				num_contexts = prom_getintdefault(cpunode, "mmu-nctx", 0x8);
900 				break;
901 			}
902 			cpunode = prom_getsibling(cpunode);
903 		}
904 	}
905 
906 	if (!num_contexts) {
907 		prom_printf("Something wrong, can't find cpu node in paging_init.\n");
908 		prom_halt();
909 	}
910 
911 	pages_avail = 0;
912 	last_valid_pfn = bootmem_init(&pages_avail);
913 
914 	srmmu_nocache_calcsize();
915 	srmmu_nocache_init();
916 	srmmu_inherit_prom_mappings(0xfe400000, (LINUX_OPPROM_ENDVM - PAGE_SIZE));
917 	map_kernel();
918 
919 	/* ctx table has to be physically aligned to its size */
920 	srmmu_context_table = __srmmu_get_nocache(num_contexts * sizeof(ctxd_t), num_contexts * sizeof(ctxd_t));
921 	srmmu_ctx_table_phys = (ctxd_t *)__nocache_pa(srmmu_context_table);
922 
923 	for (i = 0; i < num_contexts; i++)
924 		srmmu_ctxd_set((ctxd_t *)__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir);
925 
926 	flush_cache_all();
927 	srmmu_set_ctable_ptr((unsigned long)srmmu_ctx_table_phys);
928 #ifdef CONFIG_SMP
929 	/* Stop from hanging here... */
930 	local_ops->tlb_all();
931 #else
932 	flush_tlb_all();
933 #endif
934 	poke_srmmu();
935 
936 	srmmu_allocate_ptable_skeleton(sparc_iomap.start, IOBASE_END);
937 	srmmu_allocate_ptable_skeleton(DVMA_VADDR, DVMA_END);
938 
939 	srmmu_allocate_ptable_skeleton(
940 		__fix_to_virt(__end_of_fixed_addresses - 1), FIXADDR_TOP);
941 	srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
942 
943 	pgd = pgd_offset_k(PKMAP_BASE);
944 	pmd = pmd_offset(pgd, PKMAP_BASE);
945 	pte = pte_offset_kernel(pmd, PKMAP_BASE);
946 	pkmap_page_table = pte;
947 
948 	flush_cache_all();
949 	flush_tlb_all();
950 
951 	sparc_context_init(num_contexts);
952 
953 	kmap_init();
954 
955 	{
956 		unsigned long zones_size[MAX_NR_ZONES];
957 		unsigned long zholes_size[MAX_NR_ZONES];
958 		unsigned long npages;
959 		int znum;
960 
961 		for (znum = 0; znum < MAX_NR_ZONES; znum++)
962 			zones_size[znum] = zholes_size[znum] = 0;
963 
964 		npages = max_low_pfn - pfn_base;
965 
966 		zones_size[ZONE_DMA] = npages;
967 		zholes_size[ZONE_DMA] = npages - pages_avail;
968 
969 		npages = highend_pfn - max_low_pfn;
970 		zones_size[ZONE_HIGHMEM] = npages;
971 		zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
972 
973 		free_area_init_node(0, zones_size, pfn_base, zholes_size);
974 	}
975 }
976 
977 void mmu_info(struct seq_file *m)
978 {
979 	seq_printf(m,
980 		   "MMU type\t: %s\n"
981 		   "contexts\t: %d\n"
982 		   "nocache total\t: %ld\n"
983 		   "nocache used\t: %d\n",
984 		   srmmu_name,
985 		   num_contexts,
986 		   srmmu_nocache_size,
987 		   srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
988 }
989 
990 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
991 {
992 	mm->context = NO_CONTEXT;
993 	return 0;
994 }
995 
996 void destroy_context(struct mm_struct *mm)
997 {
998 	unsigned long flags;
999 
1000 	if (mm->context != NO_CONTEXT) {
1001 		flush_cache_mm(mm);
1002 		srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
1003 		flush_tlb_mm(mm);
1004 		spin_lock_irqsave(&srmmu_context_spinlock, flags);
1005 		free_context(mm->context);
1006 		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
1007 		mm->context = NO_CONTEXT;
1008 	}
1009 }
1010 
1011 /* Init various srmmu chip types. */
1012 static void __init srmmu_is_bad(void)
1013 {
1014 	prom_printf("Could not determine SRMMU chip type.\n");
1015 	prom_halt();
1016 }
1017 
1018 static void __init init_vac_layout(void)
1019 {
1020 	phandle nd;
1021 	int cache_lines;
1022 	char node_str[128];
1023 #ifdef CONFIG_SMP
1024 	int cpu = 0;
1025 	unsigned long max_size = 0;
1026 	unsigned long min_line_size = 0x10000000;
1027 #endif
1028 
1029 	nd = prom_getchild(prom_root_node);
1030 	while ((nd = prom_getsibling(nd)) != 0) {
1031 		prom_getstring(nd, "device_type", node_str, sizeof(node_str));
1032 		if (!strcmp(node_str, "cpu")) {
1033 			vac_line_size = prom_getint(nd, "cache-line-size");
1034 			if (vac_line_size == -1) {
1035 				prom_printf("can't determine cache-line-size, halting.\n");
1036 				prom_halt();
1037 			}
1038 			cache_lines = prom_getint(nd, "cache-nlines");
1039 			if (cache_lines == -1) {
1040 				prom_printf("can't determine cache-nlines, halting.\n");
1041 				prom_halt();
1042 			}
1043 
1044 			vac_cache_size = cache_lines * vac_line_size;
1045 #ifdef CONFIG_SMP
1046 			if (vac_cache_size > max_size)
1047 				max_size = vac_cache_size;
1048 			if (vac_line_size < min_line_size)
1049 				min_line_size = vac_line_size;
1050 			//FIXME: cpus not contiguous!!
1051 			cpu++;
1052 			if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1053 				break;
1054 #else
1055 			break;
1056 #endif
1057 		}
1058 	}
1059 	if (nd == 0) {
1060 		prom_printf("No CPU nodes found, halting.\n");
1061 		prom_halt();
1062 	}
1063 #ifdef CONFIG_SMP
1064 	vac_cache_size = max_size;
1065 	vac_line_size = min_line_size;
1066 #endif
1067 	printk("SRMMU: Using VAC size of %d bytes, line size %d bytes.\n",
1068 	       (int)vac_cache_size, (int)vac_line_size);
1069 }
1070 
1071 static void poke_hypersparc(void)
1072 {
1073 	volatile unsigned long clear;
1074 	unsigned long mreg = srmmu_get_mmureg();
1075 
1076 	hyper_flush_unconditional_combined();
1077 
1078 	mreg &= ~(HYPERSPARC_CWENABLE);
1079 	mreg |= (HYPERSPARC_CENABLE | HYPERSPARC_WBENABLE);
1080 	mreg |= (HYPERSPARC_CMODE);
1081 
1082 	srmmu_set_mmureg(mreg);
1083 
1084 #if 0 /* XXX I think this is bad news... -DaveM */
1085 	hyper_clear_all_tags();
1086 #endif
1087 
1088 	put_ross_icr(HYPERSPARC_ICCR_FTD | HYPERSPARC_ICCR_ICE);
1089 	hyper_flush_whole_icache();
1090 	clear = srmmu_get_faddr();
1091 	clear = srmmu_get_fstatus();
1092 }
1093 
1094 static const struct sparc32_cachetlb_ops hypersparc_ops = {
1095 	.cache_all	= hypersparc_flush_cache_all,
1096 	.cache_mm	= hypersparc_flush_cache_mm,
1097 	.cache_page	= hypersparc_flush_cache_page,
1098 	.cache_range	= hypersparc_flush_cache_range,
1099 	.tlb_all	= hypersparc_flush_tlb_all,
1100 	.tlb_mm		= hypersparc_flush_tlb_mm,
1101 	.tlb_page	= hypersparc_flush_tlb_page,
1102 	.tlb_range	= hypersparc_flush_tlb_range,
1103 	.page_to_ram	= hypersparc_flush_page_to_ram,
1104 	.sig_insns	= hypersparc_flush_sig_insns,
1105 	.page_for_dma	= hypersparc_flush_page_for_dma,
1106 };
1107 
1108 static void __init init_hypersparc(void)
1109 {
1110 	srmmu_name = "ROSS HyperSparc";
1111 	srmmu_modtype = HyperSparc;
1112 
1113 	init_vac_layout();
1114 
1115 	is_hypersparc = 1;
1116 	sparc32_cachetlb_ops = &hypersparc_ops;
1117 
1118 	poke_srmmu = poke_hypersparc;
1119 
1120 	hypersparc_setup_blockops();
1121 }
1122 
1123 static void poke_swift(void)
1124 {
1125 	unsigned long mreg;
1126 
1127 	/* Clear any crap from the cache or else... */
1128 	swift_flush_cache_all();
1129 
1130 	/* Enable I & D caches */
1131 	mreg = srmmu_get_mmureg();
1132 	mreg |= (SWIFT_IE | SWIFT_DE);
1133 	/*
1134 	 * The Swift branch folding logic is completely broken.  At
1135 	 * trap time, if things are just right, if can mistakenly
1136 	 * think that a trap is coming from kernel mode when in fact
1137 	 * it is coming from user mode (it mis-executes the branch in
1138 	 * the trap code).  So you see things like crashme completely
1139 	 * hosing your machine which is completely unacceptable.  Turn
1140 	 * this shit off... nice job Fujitsu.
1141 	 */
1142 	mreg &= ~(SWIFT_BF);
1143 	srmmu_set_mmureg(mreg);
1144 }
1145 
1146 static const struct sparc32_cachetlb_ops swift_ops = {
1147 	.cache_all	= swift_flush_cache_all,
1148 	.cache_mm	= swift_flush_cache_mm,
1149 	.cache_page	= swift_flush_cache_page,
1150 	.cache_range	= swift_flush_cache_range,
1151 	.tlb_all	= swift_flush_tlb_all,
1152 	.tlb_mm		= swift_flush_tlb_mm,
1153 	.tlb_page	= swift_flush_tlb_page,
1154 	.tlb_range	= swift_flush_tlb_range,
1155 	.page_to_ram	= swift_flush_page_to_ram,
1156 	.sig_insns	= swift_flush_sig_insns,
1157 	.page_for_dma	= swift_flush_page_for_dma,
1158 };
1159 
1160 #define SWIFT_MASKID_ADDR  0x10003018
1161 static void __init init_swift(void)
1162 {
1163 	unsigned long swift_rev;
1164 
1165 	__asm__ __volatile__("lda [%1] %2, %0\n\t"
1166 			     "srl %0, 0x18, %0\n\t" :
1167 			     "=r" (swift_rev) :
1168 			     "r" (SWIFT_MASKID_ADDR), "i" (ASI_M_BYPASS));
1169 	srmmu_name = "Fujitsu Swift";
1170 	switch (swift_rev) {
1171 	case 0x11:
1172 	case 0x20:
1173 	case 0x23:
1174 	case 0x30:
1175 		srmmu_modtype = Swift_lots_o_bugs;
1176 		hwbug_bitmask |= (HWBUG_KERN_ACCBROKEN | HWBUG_KERN_CBITBROKEN);
1177 		/*
1178 		 * Gee george, I wonder why Sun is so hush hush about
1179 		 * this hardware bug... really braindamage stuff going
1180 		 * on here.  However I think we can find a way to avoid
1181 		 * all of the workaround overhead under Linux.  Basically,
1182 		 * any page fault can cause kernel pages to become user
1183 		 * accessible (the mmu gets confused and clears some of
1184 		 * the ACC bits in kernel ptes).  Aha, sounds pretty
1185 		 * horrible eh?  But wait, after extensive testing it appears
1186 		 * that if you use pgd_t level large kernel pte's (like the
1187 		 * 4MB pages on the Pentium) the bug does not get tripped
1188 		 * at all.  This avoids almost all of the major overhead.
1189 		 * Welcome to a world where your vendor tells you to,
1190 		 * "apply this kernel patch" instead of "sorry for the
1191 		 * broken hardware, send it back and we'll give you
1192 		 * properly functioning parts"
1193 		 */
1194 		break;
1195 	case 0x25:
1196 	case 0x31:
1197 		srmmu_modtype = Swift_bad_c;
1198 		hwbug_bitmask |= HWBUG_KERN_CBITBROKEN;
1199 		/*
1200 		 * You see Sun allude to this hardware bug but never
1201 		 * admit things directly, they'll say things like,
1202 		 * "the Swift chip cache problems" or similar.
1203 		 */
1204 		break;
1205 	default:
1206 		srmmu_modtype = Swift_ok;
1207 		break;
1208 	}
1209 
1210 	sparc32_cachetlb_ops = &swift_ops;
1211 	flush_page_for_dma_global = 0;
1212 
1213 	/*
1214 	 * Are you now convinced that the Swift is one of the
1215 	 * biggest VLSI abortions of all time?  Bravo Fujitsu!
1216 	 * Fujitsu, the !#?!%$'d up processor people.  I bet if
1217 	 * you examined the microcode of the Swift you'd find
1218 	 * XXX's all over the place.
1219 	 */
1220 	poke_srmmu = poke_swift;
1221 }
1222 
1223 static void turbosparc_flush_cache_all(void)
1224 {
1225 	flush_user_windows();
1226 	turbosparc_idflash_clear();
1227 }
1228 
1229 static void turbosparc_flush_cache_mm(struct mm_struct *mm)
1230 {
1231 	FLUSH_BEGIN(mm)
1232 	flush_user_windows();
1233 	turbosparc_idflash_clear();
1234 	FLUSH_END
1235 }
1236 
1237 static void turbosparc_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1238 {
1239 	FLUSH_BEGIN(vma->vm_mm)
1240 	flush_user_windows();
1241 	turbosparc_idflash_clear();
1242 	FLUSH_END
1243 }
1244 
1245 static void turbosparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
1246 {
1247 	FLUSH_BEGIN(vma->vm_mm)
1248 	flush_user_windows();
1249 	if (vma->vm_flags & VM_EXEC)
1250 		turbosparc_flush_icache();
1251 	turbosparc_flush_dcache();
1252 	FLUSH_END
1253 }
1254 
1255 /* TurboSparc is copy-back, if we turn it on, but this does not work. */
1256 static void turbosparc_flush_page_to_ram(unsigned long page)
1257 {
1258 #ifdef TURBOSPARC_WRITEBACK
1259 	volatile unsigned long clear;
1260 
1261 	if (srmmu_probe(page))
1262 		turbosparc_flush_page_cache(page);
1263 	clear = srmmu_get_fstatus();
1264 #endif
1265 }
1266 
1267 static void turbosparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
1268 {
1269 }
1270 
1271 static void turbosparc_flush_page_for_dma(unsigned long page)
1272 {
1273 	turbosparc_flush_dcache();
1274 }
1275 
1276 static void turbosparc_flush_tlb_all(void)
1277 {
1278 	srmmu_flush_whole_tlb();
1279 }
1280 
1281 static void turbosparc_flush_tlb_mm(struct mm_struct *mm)
1282 {
1283 	FLUSH_BEGIN(mm)
1284 	srmmu_flush_whole_tlb();
1285 	FLUSH_END
1286 }
1287 
1288 static void turbosparc_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1289 {
1290 	FLUSH_BEGIN(vma->vm_mm)
1291 	srmmu_flush_whole_tlb();
1292 	FLUSH_END
1293 }
1294 
1295 static void turbosparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
1296 {
1297 	FLUSH_BEGIN(vma->vm_mm)
1298 	srmmu_flush_whole_tlb();
1299 	FLUSH_END
1300 }
1301 
1302 
1303 static void poke_turbosparc(void)
1304 {
1305 	unsigned long mreg = srmmu_get_mmureg();
1306 	unsigned long ccreg;
1307 
1308 	/* Clear any crap from the cache or else... */
1309 	turbosparc_flush_cache_all();
1310 	/* Temporarily disable I & D caches */
1311 	mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE);
1312 	mreg &= ~(TURBOSPARC_PCENABLE);		/* Don't check parity */
1313 	srmmu_set_mmureg(mreg);
1314 
1315 	ccreg = turbosparc_get_ccreg();
1316 
1317 #ifdef TURBOSPARC_WRITEBACK
1318 	ccreg |= (TURBOSPARC_SNENABLE);		/* Do DVMA snooping in Dcache */
1319 	ccreg &= ~(TURBOSPARC_uS2 | TURBOSPARC_WTENABLE);
1320 			/* Write-back D-cache, emulate VLSI
1321 			 * abortion number three, not number one */
1322 #else
1323 	/* For now let's play safe, optimize later */
1324 	ccreg |= (TURBOSPARC_SNENABLE | TURBOSPARC_WTENABLE);
1325 			/* Do DVMA snooping in Dcache, Write-thru D-cache */
1326 	ccreg &= ~(TURBOSPARC_uS2);
1327 			/* Emulate VLSI abortion number three, not number one */
1328 #endif
1329 
1330 	switch (ccreg & 7) {
1331 	case 0: /* No SE cache */
1332 	case 7: /* Test mode */
1333 		break;
1334 	default:
1335 		ccreg |= (TURBOSPARC_SCENABLE);
1336 	}
1337 	turbosparc_set_ccreg(ccreg);
1338 
1339 	mreg |= (TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* I & D caches on */
1340 	mreg |= (TURBOSPARC_ICSNOOP);		/* Icache snooping on */
1341 	srmmu_set_mmureg(mreg);
1342 }
1343 
1344 static const struct sparc32_cachetlb_ops turbosparc_ops = {
1345 	.cache_all	= turbosparc_flush_cache_all,
1346 	.cache_mm	= turbosparc_flush_cache_mm,
1347 	.cache_page	= turbosparc_flush_cache_page,
1348 	.cache_range	= turbosparc_flush_cache_range,
1349 	.tlb_all	= turbosparc_flush_tlb_all,
1350 	.tlb_mm		= turbosparc_flush_tlb_mm,
1351 	.tlb_page	= turbosparc_flush_tlb_page,
1352 	.tlb_range	= turbosparc_flush_tlb_range,
1353 	.page_to_ram	= turbosparc_flush_page_to_ram,
1354 	.sig_insns	= turbosparc_flush_sig_insns,
1355 	.page_for_dma	= turbosparc_flush_page_for_dma,
1356 };
1357 
1358 static void __init init_turbosparc(void)
1359 {
1360 	srmmu_name = "Fujitsu TurboSparc";
1361 	srmmu_modtype = TurboSparc;
1362 	sparc32_cachetlb_ops = &turbosparc_ops;
1363 	poke_srmmu = poke_turbosparc;
1364 }
1365 
1366 static void poke_tsunami(void)
1367 {
1368 	unsigned long mreg = srmmu_get_mmureg();
1369 
1370 	tsunami_flush_icache();
1371 	tsunami_flush_dcache();
1372 	mreg &= ~TSUNAMI_ITD;
1373 	mreg |= (TSUNAMI_IENAB | TSUNAMI_DENAB);
1374 	srmmu_set_mmureg(mreg);
1375 }
1376 
1377 static const struct sparc32_cachetlb_ops tsunami_ops = {
1378 	.cache_all	= tsunami_flush_cache_all,
1379 	.cache_mm	= tsunami_flush_cache_mm,
1380 	.cache_page	= tsunami_flush_cache_page,
1381 	.cache_range	= tsunami_flush_cache_range,
1382 	.tlb_all	= tsunami_flush_tlb_all,
1383 	.tlb_mm		= tsunami_flush_tlb_mm,
1384 	.tlb_page	= tsunami_flush_tlb_page,
1385 	.tlb_range	= tsunami_flush_tlb_range,
1386 	.page_to_ram	= tsunami_flush_page_to_ram,
1387 	.sig_insns	= tsunami_flush_sig_insns,
1388 	.page_for_dma	= tsunami_flush_page_for_dma,
1389 };
1390 
1391 static void __init init_tsunami(void)
1392 {
1393 	/*
1394 	 * Tsunami's pretty sane, Sun and TI actually got it
1395 	 * somewhat right this time.  Fujitsu should have
1396 	 * taken some lessons from them.
1397 	 */
1398 
1399 	srmmu_name = "TI Tsunami";
1400 	srmmu_modtype = Tsunami;
1401 	sparc32_cachetlb_ops = &tsunami_ops;
1402 	poke_srmmu = poke_tsunami;
1403 
1404 	tsunami_setup_blockops();
1405 }
1406 
1407 static void poke_viking(void)
1408 {
1409 	unsigned long mreg = srmmu_get_mmureg();
1410 	static int smp_catch;
1411 
1412 	if (viking_mxcc_present) {
1413 		unsigned long mxcc_control = mxcc_get_creg();
1414 
1415 		mxcc_control |= (MXCC_CTL_ECE | MXCC_CTL_PRE | MXCC_CTL_MCE);
1416 		mxcc_control &= ~(MXCC_CTL_RRC);
1417 		mxcc_set_creg(mxcc_control);
1418 
1419 		/*
1420 		 * We don't need memory parity checks.
1421 		 * XXX This is a mess, have to dig out later. ecd.
1422 		viking_mxcc_turn_off_parity(&mreg, &mxcc_control);
1423 		 */
1424 
1425 		/* We do cache ptables on MXCC. */
1426 		mreg |= VIKING_TCENABLE;
1427 	} else {
1428 		unsigned long bpreg;
1429 
1430 		mreg &= ~(VIKING_TCENABLE);
1431 		if (smp_catch++) {
1432 			/* Must disable mixed-cmd mode here for other cpu's. */
1433 			bpreg = viking_get_bpreg();
1434 			bpreg &= ~(VIKING_ACTION_MIX);
1435 			viking_set_bpreg(bpreg);
1436 
1437 			/* Just in case PROM does something funny. */
1438 			msi_set_sync();
1439 		}
1440 	}
1441 
1442 	mreg |= VIKING_SPENABLE;
1443 	mreg |= (VIKING_ICENABLE | VIKING_DCENABLE);
1444 	mreg |= VIKING_SBENABLE;
1445 	mreg &= ~(VIKING_ACENABLE);
1446 	srmmu_set_mmureg(mreg);
1447 }
1448 
1449 static struct sparc32_cachetlb_ops viking_ops __ro_after_init = {
1450 	.cache_all	= viking_flush_cache_all,
1451 	.cache_mm	= viking_flush_cache_mm,
1452 	.cache_page	= viking_flush_cache_page,
1453 	.cache_range	= viking_flush_cache_range,
1454 	.tlb_all	= viking_flush_tlb_all,
1455 	.tlb_mm		= viking_flush_tlb_mm,
1456 	.tlb_page	= viking_flush_tlb_page,
1457 	.tlb_range	= viking_flush_tlb_range,
1458 	.page_to_ram	= viking_flush_page_to_ram,
1459 	.sig_insns	= viking_flush_sig_insns,
1460 	.page_for_dma	= viking_flush_page_for_dma,
1461 };
1462 
1463 #ifdef CONFIG_SMP
1464 /* On sun4d the cpu broadcasts local TLB flushes, so we can just
1465  * perform the local TLB flush and all the other cpus will see it.
1466  * But, unfortunately, there is a bug in the sun4d XBUS backplane
1467  * that requires that we add some synchronization to these flushes.
1468  *
1469  * The bug is that the fifo which keeps track of all the pending TLB
1470  * broadcasts in the system is an entry or two too small, so if we
1471  * have too many going at once we'll overflow that fifo and lose a TLB
1472  * flush resulting in corruption.
1473  *
1474  * Our workaround is to take a global spinlock around the TLB flushes,
1475  * which guarentees we won't ever have too many pending.  It's a big
1476  * hammer, but a semaphore like system to make sure we only have N TLB
1477  * flushes going at once will require SMP locking anyways so there's
1478  * no real value in trying any harder than this.
1479  */
1480 static struct sparc32_cachetlb_ops viking_sun4d_smp_ops __ro_after_init = {
1481 	.cache_all	= viking_flush_cache_all,
1482 	.cache_mm	= viking_flush_cache_mm,
1483 	.cache_page	= viking_flush_cache_page,
1484 	.cache_range	= viking_flush_cache_range,
1485 	.tlb_all	= sun4dsmp_flush_tlb_all,
1486 	.tlb_mm		= sun4dsmp_flush_tlb_mm,
1487 	.tlb_page	= sun4dsmp_flush_tlb_page,
1488 	.tlb_range	= sun4dsmp_flush_tlb_range,
1489 	.page_to_ram	= viking_flush_page_to_ram,
1490 	.sig_insns	= viking_flush_sig_insns,
1491 	.page_for_dma	= viking_flush_page_for_dma,
1492 };
1493 #endif
1494 
1495 static void __init init_viking(void)
1496 {
1497 	unsigned long mreg = srmmu_get_mmureg();
1498 
1499 	/* Ahhh, the viking.  SRMMU VLSI abortion number two... */
1500 	if (mreg & VIKING_MMODE) {
1501 		srmmu_name = "TI Viking";
1502 		viking_mxcc_present = 0;
1503 		msi_set_sync();
1504 
1505 		/*
1506 		 * We need this to make sure old viking takes no hits
1507 		 * on it's cache for dma snoops to workaround the
1508 		 * "load from non-cacheable memory" interrupt bug.
1509 		 * This is only necessary because of the new way in
1510 		 * which we use the IOMMU.
1511 		 */
1512 		viking_ops.page_for_dma = viking_flush_page;
1513 #ifdef CONFIG_SMP
1514 		viking_sun4d_smp_ops.page_for_dma = viking_flush_page;
1515 #endif
1516 		flush_page_for_dma_global = 0;
1517 	} else {
1518 		srmmu_name = "TI Viking/MXCC";
1519 		viking_mxcc_present = 1;
1520 		srmmu_cache_pagetables = 1;
1521 	}
1522 
1523 	sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
1524 		&viking_ops;
1525 #ifdef CONFIG_SMP
1526 	if (sparc_cpu_model == sun4d)
1527 		sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
1528 			&viking_sun4d_smp_ops;
1529 #endif
1530 
1531 	poke_srmmu = poke_viking;
1532 }
1533 
1534 /* Probe for the srmmu chip version. */
1535 static void __init get_srmmu_type(void)
1536 {
1537 	unsigned long mreg, psr;
1538 	unsigned long mod_typ, mod_rev, psr_typ, psr_vers;
1539 
1540 	srmmu_modtype = SRMMU_INVAL_MOD;
1541 	hwbug_bitmask = 0;
1542 
1543 	mreg = srmmu_get_mmureg(); psr = get_psr();
1544 	mod_typ = (mreg & 0xf0000000) >> 28;
1545 	mod_rev = (mreg & 0x0f000000) >> 24;
1546 	psr_typ = (psr >> 28) & 0xf;
1547 	psr_vers = (psr >> 24) & 0xf;
1548 
1549 	/* First, check for sparc-leon. */
1550 	if (sparc_cpu_model == sparc_leon) {
1551 		init_leon();
1552 		return;
1553 	}
1554 
1555 	/* Second, check for HyperSparc or Cypress. */
1556 	if (mod_typ == 1) {
1557 		switch (mod_rev) {
1558 		case 7:
1559 			/* UP or MP Hypersparc */
1560 			init_hypersparc();
1561 			break;
1562 		case 0:
1563 		case 2:
1564 		case 10:
1565 		case 11:
1566 		case 12:
1567 		case 13:
1568 		case 14:
1569 		case 15:
1570 		default:
1571 			prom_printf("Sparc-Linux Cypress support does not longer exit.\n");
1572 			prom_halt();
1573 			break;
1574 		}
1575 		return;
1576 	}
1577 
1578 	/* Now Fujitsu TurboSparc. It might happen that it is
1579 	 * in Swift emulation mode, so we will check later...
1580 	 */
1581 	if (psr_typ == 0 && psr_vers == 5) {
1582 		init_turbosparc();
1583 		return;
1584 	}
1585 
1586 	/* Next check for Fujitsu Swift. */
1587 	if (psr_typ == 0 && psr_vers == 4) {
1588 		phandle cpunode;
1589 		char node_str[128];
1590 
1591 		/* Look if it is not a TurboSparc emulating Swift... */
1592 		cpunode = prom_getchild(prom_root_node);
1593 		while ((cpunode = prom_getsibling(cpunode)) != 0) {
1594 			prom_getstring(cpunode, "device_type", node_str, sizeof(node_str));
1595 			if (!strcmp(node_str, "cpu")) {
1596 				if (!prom_getintdefault(cpunode, "psr-implementation", 1) &&
1597 				    prom_getintdefault(cpunode, "psr-version", 1) == 5) {
1598 					init_turbosparc();
1599 					return;
1600 				}
1601 				break;
1602 			}
1603 		}
1604 
1605 		init_swift();
1606 		return;
1607 	}
1608 
1609 	/* Now the Viking family of srmmu. */
1610 	if (psr_typ == 4 &&
1611 	   ((psr_vers == 0) ||
1612 	    ((psr_vers == 1) && (mod_typ == 0) && (mod_rev == 0)))) {
1613 		init_viking();
1614 		return;
1615 	}
1616 
1617 	/* Finally the Tsunami. */
1618 	if (psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) {
1619 		init_tsunami();
1620 		return;
1621 	}
1622 
1623 	/* Oh well */
1624 	srmmu_is_bad();
1625 }
1626 
1627 #ifdef CONFIG_SMP
1628 /* Local cross-calls. */
1629 static void smp_flush_page_for_dma(unsigned long page)
1630 {
1631 	xc1((smpfunc_t) local_ops->page_for_dma, page);
1632 	local_ops->page_for_dma(page);
1633 }
1634 
1635 static void smp_flush_cache_all(void)
1636 {
1637 	xc0((smpfunc_t) local_ops->cache_all);
1638 	local_ops->cache_all();
1639 }
1640 
1641 static void smp_flush_tlb_all(void)
1642 {
1643 	xc0((smpfunc_t) local_ops->tlb_all);
1644 	local_ops->tlb_all();
1645 }
1646 
1647 static void smp_flush_cache_mm(struct mm_struct *mm)
1648 {
1649 	if (mm->context != NO_CONTEXT) {
1650 		cpumask_t cpu_mask;
1651 		cpumask_copy(&cpu_mask, mm_cpumask(mm));
1652 		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1653 		if (!cpumask_empty(&cpu_mask))
1654 			xc1((smpfunc_t) local_ops->cache_mm, (unsigned long) mm);
1655 		local_ops->cache_mm(mm);
1656 	}
1657 }
1658 
1659 static void smp_flush_tlb_mm(struct mm_struct *mm)
1660 {
1661 	if (mm->context != NO_CONTEXT) {
1662 		cpumask_t cpu_mask;
1663 		cpumask_copy(&cpu_mask, mm_cpumask(mm));
1664 		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1665 		if (!cpumask_empty(&cpu_mask)) {
1666 			xc1((smpfunc_t) local_ops->tlb_mm, (unsigned long) mm);
1667 			if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
1668 				cpumask_copy(mm_cpumask(mm),
1669 					     cpumask_of(smp_processor_id()));
1670 		}
1671 		local_ops->tlb_mm(mm);
1672 	}
1673 }
1674 
1675 static void smp_flush_cache_range(struct vm_area_struct *vma,
1676 				  unsigned long start,
1677 				  unsigned long end)
1678 {
1679 	struct mm_struct *mm = vma->vm_mm;
1680 
1681 	if (mm->context != NO_CONTEXT) {
1682 		cpumask_t cpu_mask;
1683 		cpumask_copy(&cpu_mask, mm_cpumask(mm));
1684 		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1685 		if (!cpumask_empty(&cpu_mask))
1686 			xc3((smpfunc_t) local_ops->cache_range,
1687 			    (unsigned long) vma, start, end);
1688 		local_ops->cache_range(vma, start, end);
1689 	}
1690 }
1691 
1692 static void smp_flush_tlb_range(struct vm_area_struct *vma,
1693 				unsigned long start,
1694 				unsigned long end)
1695 {
1696 	struct mm_struct *mm = vma->vm_mm;
1697 
1698 	if (mm->context != NO_CONTEXT) {
1699 		cpumask_t cpu_mask;
1700 		cpumask_copy(&cpu_mask, mm_cpumask(mm));
1701 		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1702 		if (!cpumask_empty(&cpu_mask))
1703 			xc3((smpfunc_t) local_ops->tlb_range,
1704 			    (unsigned long) vma, start, end);
1705 		local_ops->tlb_range(vma, start, end);
1706 	}
1707 }
1708 
1709 static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
1710 {
1711 	struct mm_struct *mm = vma->vm_mm;
1712 
1713 	if (mm->context != NO_CONTEXT) {
1714 		cpumask_t cpu_mask;
1715 		cpumask_copy(&cpu_mask, mm_cpumask(mm));
1716 		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1717 		if (!cpumask_empty(&cpu_mask))
1718 			xc2((smpfunc_t) local_ops->cache_page,
1719 			    (unsigned long) vma, page);
1720 		local_ops->cache_page(vma, page);
1721 	}
1722 }
1723 
1724 static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
1725 {
1726 	struct mm_struct *mm = vma->vm_mm;
1727 
1728 	if (mm->context != NO_CONTEXT) {
1729 		cpumask_t cpu_mask;
1730 		cpumask_copy(&cpu_mask, mm_cpumask(mm));
1731 		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1732 		if (!cpumask_empty(&cpu_mask))
1733 			xc2((smpfunc_t) local_ops->tlb_page,
1734 			    (unsigned long) vma, page);
1735 		local_ops->tlb_page(vma, page);
1736 	}
1737 }
1738 
1739 static void smp_flush_page_to_ram(unsigned long page)
1740 {
1741 	/* Current theory is that those who call this are the one's
1742 	 * who have just dirtied their cache with the pages contents
1743 	 * in kernel space, therefore we only run this on local cpu.
1744 	 *
1745 	 * XXX This experiment failed, research further... -DaveM
1746 	 */
1747 #if 1
1748 	xc1((smpfunc_t) local_ops->page_to_ram, page);
1749 #endif
1750 	local_ops->page_to_ram(page);
1751 }
1752 
1753 static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
1754 {
1755 	cpumask_t cpu_mask;
1756 	cpumask_copy(&cpu_mask, mm_cpumask(mm));
1757 	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
1758 	if (!cpumask_empty(&cpu_mask))
1759 		xc2((smpfunc_t) local_ops->sig_insns,
1760 		    (unsigned long) mm, insn_addr);
1761 	local_ops->sig_insns(mm, insn_addr);
1762 }
1763 
1764 static struct sparc32_cachetlb_ops smp_cachetlb_ops __ro_after_init = {
1765 	.cache_all	= smp_flush_cache_all,
1766 	.cache_mm	= smp_flush_cache_mm,
1767 	.cache_page	= smp_flush_cache_page,
1768 	.cache_range	= smp_flush_cache_range,
1769 	.tlb_all	= smp_flush_tlb_all,
1770 	.tlb_mm		= smp_flush_tlb_mm,
1771 	.tlb_page	= smp_flush_tlb_page,
1772 	.tlb_range	= smp_flush_tlb_range,
1773 	.page_to_ram	= smp_flush_page_to_ram,
1774 	.sig_insns	= smp_flush_sig_insns,
1775 	.page_for_dma	= smp_flush_page_for_dma,
1776 };
1777 #endif
1778 
1779 /* Load up routines and constants for sun4m and sun4d mmu */
1780 void __init load_mmu(void)
1781 {
1782 	/* Functions */
1783 	get_srmmu_type();
1784 
1785 #ifdef CONFIG_SMP
1786 	/* El switcheroo... */
1787 	local_ops = sparc32_cachetlb_ops;
1788 
1789 	if (sparc_cpu_model == sun4d || sparc_cpu_model == sparc_leon) {
1790 		smp_cachetlb_ops.tlb_all = local_ops->tlb_all;
1791 		smp_cachetlb_ops.tlb_mm = local_ops->tlb_mm;
1792 		smp_cachetlb_ops.tlb_range = local_ops->tlb_range;
1793 		smp_cachetlb_ops.tlb_page = local_ops->tlb_page;
1794 	}
1795 
1796 	if (poke_srmmu == poke_viking) {
1797 		/* Avoid unnecessary cross calls. */
1798 		smp_cachetlb_ops.cache_all = local_ops->cache_all;
1799 		smp_cachetlb_ops.cache_mm = local_ops->cache_mm;
1800 		smp_cachetlb_ops.cache_range = local_ops->cache_range;
1801 		smp_cachetlb_ops.cache_page = local_ops->cache_page;
1802 
1803 		smp_cachetlb_ops.page_to_ram = local_ops->page_to_ram;
1804 		smp_cachetlb_ops.sig_insns = local_ops->sig_insns;
1805 		smp_cachetlb_ops.page_for_dma = local_ops->page_for_dma;
1806 	}
1807 
1808 	/* It really is const after this point. */
1809 	sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
1810 		&smp_cachetlb_ops;
1811 #endif
1812 
1813 	if (sparc_cpu_model == sun4d)
1814 		ld_mmu_iounit();
1815 	else
1816 		ld_mmu_iommu();
1817 #ifdef CONFIG_SMP
1818 	if (sparc_cpu_model == sun4d)
1819 		sun4d_init_smp();
1820 	else if (sparc_cpu_model == sparc_leon)
1821 		leon_init_smp();
1822 	else
1823 		sun4m_init_smp();
1824 #endif
1825 }
1826