xref: /openbmc/linux/arch/arm/mm/mmu.c (revision eca73214c9c50e290b8dc823b41730b01788872d)
1 /*
2  *  linux/arch/arm/mm/mmu.c
3  *
4  *  Copyright (C) 1995-2005 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/bootmem.h>
15 #include <linux/mman.h>
16 #include <linux/nodemask.h>
17 
18 #include <asm/cputype.h>
19 #include <asm/mach-types.h>
20 #include <asm/setup.h>
21 #include <asm/sizes.h>
22 #include <asm/tlb.h>
23 
24 #include <asm/mach/arch.h>
25 #include <asm/mach/map.h>
26 
27 #include "mm.h"
28 
29 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
30 
31 /*
32  * empty_zero_page is a special page that is used for
33  * zero-initialized data and COW.
34  */
35 struct page *empty_zero_page;
36 EXPORT_SYMBOL(empty_zero_page);
37 
38 /*
39  * The pmd table for the upper-most set of pages.
40  */
41 pmd_t *top_pmd;
42 
43 #define CPOLICY_UNCACHED	0
44 #define CPOLICY_BUFFERED	1
45 #define CPOLICY_WRITETHROUGH	2
46 #define CPOLICY_WRITEBACK	3
47 #define CPOLICY_WRITEALLOC	4
48 
49 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
50 static unsigned int ecc_mask __initdata = 0;
51 pgprot_t pgprot_user;
52 pgprot_t pgprot_kernel;
53 
54 EXPORT_SYMBOL(pgprot_user);
55 EXPORT_SYMBOL(pgprot_kernel);
56 
57 struct cachepolicy {
58 	const char	policy[16];
59 	unsigned int	cr_mask;
60 	unsigned int	pmd;
61 	unsigned int	pte;
62 };
63 
64 static struct cachepolicy cache_policies[] __initdata = {
65 	{
66 		.policy		= "uncached",
67 		.cr_mask	= CR_W|CR_C,
68 		.pmd		= PMD_SECT_UNCACHED,
69 		.pte		= 0,
70 	}, {
71 		.policy		= "buffered",
72 		.cr_mask	= CR_C,
73 		.pmd		= PMD_SECT_BUFFERED,
74 		.pte		= PTE_BUFFERABLE,
75 	}, {
76 		.policy		= "writethrough",
77 		.cr_mask	= 0,
78 		.pmd		= PMD_SECT_WT,
79 		.pte		= PTE_CACHEABLE,
80 	}, {
81 		.policy		= "writeback",
82 		.cr_mask	= 0,
83 		.pmd		= PMD_SECT_WB,
84 		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
85 	}, {
86 		.policy		= "writealloc",
87 		.cr_mask	= 0,
88 		.pmd		= PMD_SECT_WBWA,
89 		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
90 	}
91 };
92 
93 /*
94  * These are useful for identifying cache coherency
95  * problems by allowing the cache or the cache and
96  * writebuffer to be turned off.  (Note: the write
97  * buffer should not be on and the cache off).
98  */
99 static void __init early_cachepolicy(char **p)
100 {
101 	int i;
102 
103 	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
104 		int len = strlen(cache_policies[i].policy);
105 
106 		if (memcmp(*p, cache_policies[i].policy, len) == 0) {
107 			cachepolicy = i;
108 			cr_alignment &= ~cache_policies[i].cr_mask;
109 			cr_no_alignment &= ~cache_policies[i].cr_mask;
110 			*p += len;
111 			break;
112 		}
113 	}
114 	if (i == ARRAY_SIZE(cache_policies))
115 		printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
116 	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
117 		printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
118 		cachepolicy = CPOLICY_WRITEBACK;
119 	}
120 	flush_cache_all();
121 	set_cr(cr_alignment);
122 }
123 __early_param("cachepolicy=", early_cachepolicy);
124 
125 static void __init early_nocache(char **__unused)
126 {
127 	char *p = "buffered";
128 	printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
129 	early_cachepolicy(&p);
130 }
131 __early_param("nocache", early_nocache);
132 
133 static void __init early_nowrite(char **__unused)
134 {
135 	char *p = "uncached";
136 	printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
137 	early_cachepolicy(&p);
138 }
139 __early_param("nowb", early_nowrite);
140 
141 static void __init early_ecc(char **p)
142 {
143 	if (memcmp(*p, "on", 2) == 0) {
144 		ecc_mask = PMD_PROTECTION;
145 		*p += 2;
146 	} else if (memcmp(*p, "off", 3) == 0) {
147 		ecc_mask = 0;
148 		*p += 3;
149 	}
150 }
151 __early_param("ecc=", early_ecc);
152 
153 static int __init noalign_setup(char *__unused)
154 {
155 	cr_alignment &= ~CR_A;
156 	cr_no_alignment &= ~CR_A;
157 	set_cr(cr_alignment);
158 	return 1;
159 }
160 __setup("noalign", noalign_setup);
161 
162 #ifndef CONFIG_SMP
163 void adjust_cr(unsigned long mask, unsigned long set)
164 {
165 	unsigned long flags;
166 
167 	mask &= ~CR_A;
168 
169 	set &= mask;
170 
171 	local_irq_save(flags);
172 
173 	cr_no_alignment = (cr_no_alignment & ~mask) | set;
174 	cr_alignment = (cr_alignment & ~mask) | set;
175 
176 	set_cr((get_cr() & ~mask) | set);
177 
178 	local_irq_restore(flags);
179 }
180 #endif
181 
182 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
183 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_XN|PMD_SECT_AP_WRITE
184 
185 static struct mem_type mem_types[] = {
186 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
187 		.prot_pte	= PROT_PTE_DEVICE,
188 		.prot_l1	= PMD_TYPE_TABLE,
189 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_UNCACHED,
190 		.domain		= DOMAIN_IO,
191 	},
192 	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
193 		.prot_pte	= PROT_PTE_DEVICE,
194 		.prot_pte_ext	= PTE_EXT_TEX(2),
195 		.prot_l1	= PMD_TYPE_TABLE,
196 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_TEX(2),
197 		.domain		= DOMAIN_IO,
198 	},
199 	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
200 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_CACHEABLE | L_PTE_BUFFERABLE,
201 		.prot_l1	= PMD_TYPE_TABLE,
202 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
203 		.domain		= DOMAIN_IO,
204 	},
205 	[MT_DEVICE_IXP2000] = {	  /* IXP2400 requires XCB=101 for on-chip I/O */
206 		.prot_pte	= PROT_PTE_DEVICE,
207 		.prot_l1	= PMD_TYPE_TABLE,
208 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE |
209 				  PMD_SECT_TEX(1),
210 		.domain		= DOMAIN_IO,
211 	},
212 	[MT_CACHECLEAN] = {
213 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
214 		.domain    = DOMAIN_KERNEL,
215 	},
216 	[MT_MINICLEAN] = {
217 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
218 		.domain    = DOMAIN_KERNEL,
219 	},
220 	[MT_LOW_VECTORS] = {
221 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
222 				L_PTE_EXEC,
223 		.prot_l1   = PMD_TYPE_TABLE,
224 		.domain    = DOMAIN_USER,
225 	},
226 	[MT_HIGH_VECTORS] = {
227 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
228 				L_PTE_USER | L_PTE_EXEC,
229 		.prot_l1   = PMD_TYPE_TABLE,
230 		.domain    = DOMAIN_USER,
231 	},
232 	[MT_MEMORY] = {
233 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
234 		.domain    = DOMAIN_KERNEL,
235 	},
236 	[MT_ROM] = {
237 		.prot_sect = PMD_TYPE_SECT,
238 		.domain    = DOMAIN_KERNEL,
239 	},
240 };
241 
242 const struct mem_type *get_mem_type(unsigned int type)
243 {
244 	return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
245 }
246 
247 /*
248  * Adjust the PMD section entries according to the CPU in use.
249  */
250 static void __init build_mem_type_table(void)
251 {
252 	struct cachepolicy *cp;
253 	unsigned int cr = get_cr();
254 	unsigned int user_pgprot, kern_pgprot;
255 	int cpu_arch = cpu_architecture();
256 	int i;
257 
258 	if (cpu_arch < CPU_ARCH_ARMv6) {
259 #if defined(CONFIG_CPU_DCACHE_DISABLE)
260 		if (cachepolicy > CPOLICY_BUFFERED)
261 			cachepolicy = CPOLICY_BUFFERED;
262 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
263 		if (cachepolicy > CPOLICY_WRITETHROUGH)
264 			cachepolicy = CPOLICY_WRITETHROUGH;
265 #endif
266 	}
267 	if (cpu_arch < CPU_ARCH_ARMv5) {
268 		if (cachepolicy >= CPOLICY_WRITEALLOC)
269 			cachepolicy = CPOLICY_WRITEBACK;
270 		ecc_mask = 0;
271 	}
272 
273 	/*
274 	 * ARMv5 and lower, bit 4 must be set for page tables.
275 	 * (was: cache "update-able on write" bit on ARM610)
276 	 * However, Xscale cores require this bit to be cleared.
277 	 */
278 	if (cpu_is_xscale()) {
279 		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
280 			mem_types[i].prot_sect &= ~PMD_BIT4;
281 			mem_types[i].prot_l1 &= ~PMD_BIT4;
282 		}
283 	} else if (cpu_arch < CPU_ARCH_ARMv6) {
284 		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
285 			if (mem_types[i].prot_l1)
286 				mem_types[i].prot_l1 |= PMD_BIT4;
287 			if (mem_types[i].prot_sect)
288 				mem_types[i].prot_sect |= PMD_BIT4;
289 		}
290 	}
291 
292 	cp = &cache_policies[cachepolicy];
293 	kern_pgprot = user_pgprot = cp->pte;
294 
295 	/*
296 	 * Enable CPU-specific coherency if supported.
297 	 * (Only available on XSC3 at the moment.)
298 	 */
299 	if (arch_is_coherent()) {
300 		if (cpu_is_xsc3()) {
301 			mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
302 			mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
303 		}
304 	}
305 
306 	/*
307 	 * ARMv6 and above have extended page tables.
308 	 */
309 	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
310 		/*
311 		 * Mark cache clean areas and XIP ROM read only
312 		 * from SVC mode and no access from userspace.
313 		 */
314 		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
315 		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
316 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
317 
318 		/*
319 		 * Mark the device area as "shared device"
320 		 */
321 		mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE;
322 		mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
323 
324 #ifdef CONFIG_SMP
325 		/*
326 		 * Mark memory with the "shared" attribute for SMP systems
327 		 */
328 		user_pgprot |= L_PTE_SHARED;
329 		kern_pgprot |= L_PTE_SHARED;
330 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
331 #endif
332 	}
333 
334 	for (i = 0; i < 16; i++) {
335 		unsigned long v = pgprot_val(protection_map[i]);
336 		v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
337 		protection_map[i] = __pgprot(v);
338 	}
339 
340 	mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
341 	mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
342 
343 	if (cpu_arch >= CPU_ARCH_ARMv5) {
344 #ifndef CONFIG_SMP
345 		/*
346 		 * Only use write-through for non-SMP systems
347 		 */
348 		mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
349 		mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
350 #endif
351 	} else {
352 		mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
353 	}
354 
355 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
356 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
357 				 L_PTE_DIRTY | L_PTE_WRITE |
358 				 L_PTE_EXEC | kern_pgprot);
359 
360 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
361 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
362 	mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
363 	mem_types[MT_ROM].prot_sect |= cp->pmd;
364 
365 	switch (cp->pmd) {
366 	case PMD_SECT_WT:
367 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
368 		break;
369 	case PMD_SECT_WB:
370 	case PMD_SECT_WBWA:
371 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
372 		break;
373 	}
374 	printk("Memory policy: ECC %sabled, Data cache %s\n",
375 		ecc_mask ? "en" : "dis", cp->policy);
376 
377 	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
378 		struct mem_type *t = &mem_types[i];
379 		if (t->prot_l1)
380 			t->prot_l1 |= PMD_DOMAIN(t->domain);
381 		if (t->prot_sect)
382 			t->prot_sect |= PMD_DOMAIN(t->domain);
383 	}
384 }
385 
386 #define vectors_base()	(vectors_high() ? 0xffff0000 : 0)
387 
388 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
389 				  unsigned long end, unsigned long pfn,
390 				  const struct mem_type *type)
391 {
392 	pte_t *pte;
393 
394 	if (pmd_none(*pmd)) {
395 		pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
396 		__pmd_populate(pmd, __pa(pte) | type->prot_l1);
397 	}
398 
399 	pte = pte_offset_kernel(pmd, addr);
400 	do {
401 		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
402 			    type->prot_pte_ext);
403 		pfn++;
404 	} while (pte++, addr += PAGE_SIZE, addr != end);
405 }
406 
407 static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
408 				      unsigned long end, unsigned long phys,
409 				      const struct mem_type *type)
410 {
411 	pmd_t *pmd = pmd_offset(pgd, addr);
412 
413 	/*
414 	 * Try a section mapping - end, addr and phys must all be aligned
415 	 * to a section boundary.  Note that PMDs refer to the individual
416 	 * L1 entries, whereas PGDs refer to a group of L1 entries making
417 	 * up one logical pointer to an L2 table.
418 	 */
419 	if (((addr | end | phys) & ~SECTION_MASK) == 0) {
420 		pmd_t *p = pmd;
421 
422 		if (addr & SECTION_SIZE)
423 			pmd++;
424 
425 		do {
426 			*pmd = __pmd(phys | type->prot_sect);
427 			phys += SECTION_SIZE;
428 		} while (pmd++, addr += SECTION_SIZE, addr != end);
429 
430 		flush_pmd_entry(p);
431 	} else {
432 		/*
433 		 * No need to loop; pte's aren't interested in the
434 		 * individual L1 entries.
435 		 */
436 		alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
437 	}
438 }
439 
440 static void __init create_36bit_mapping(struct map_desc *md,
441 					const struct mem_type *type)
442 {
443 	unsigned long phys, addr, length, end;
444 	pgd_t *pgd;
445 
446 	addr = md->virtual;
447 	phys = (unsigned long)__pfn_to_phys(md->pfn);
448 	length = PAGE_ALIGN(md->length);
449 
450 	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
451 		printk(KERN_ERR "MM: CPU does not support supersection "
452 		       "mapping for 0x%08llx at 0x%08lx\n",
453 		       __pfn_to_phys((u64)md->pfn), addr);
454 		return;
455 	}
456 
457 	/* N.B.	ARMv6 supersections are only defined to work with domain 0.
458 	 *	Since domain assignments can in fact be arbitrary, the
459 	 *	'domain == 0' check below is required to insure that ARMv6
460 	 *	supersections are only allocated for domain 0 regardless
461 	 *	of the actual domain assignments in use.
462 	 */
463 	if (type->domain) {
464 		printk(KERN_ERR "MM: invalid domain in supersection "
465 		       "mapping for 0x%08llx at 0x%08lx\n",
466 		       __pfn_to_phys((u64)md->pfn), addr);
467 		return;
468 	}
469 
470 	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
471 		printk(KERN_ERR "MM: cannot create mapping for "
472 		       "0x%08llx at 0x%08lx invalid alignment\n",
473 		       __pfn_to_phys((u64)md->pfn), addr);
474 		return;
475 	}
476 
477 	/*
478 	 * Shift bits [35:32] of address into bits [23:20] of PMD
479 	 * (See ARMv6 spec).
480 	 */
481 	phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
482 
483 	pgd = pgd_offset_k(addr);
484 	end = addr + length;
485 	do {
486 		pmd_t *pmd = pmd_offset(pgd, addr);
487 		int i;
488 
489 		for (i = 0; i < 16; i++)
490 			*pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
491 
492 		addr += SUPERSECTION_SIZE;
493 		phys += SUPERSECTION_SIZE;
494 		pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
495 	} while (addr != end);
496 }
497 
498 /*
499  * Create the page directory entries and any necessary
500  * page tables for the mapping specified by `md'.  We
501  * are able to cope here with varying sizes and address
502  * offsets, and we take full advantage of sections and
503  * supersections.
504  */
505 void __init create_mapping(struct map_desc *md)
506 {
507 	unsigned long phys, addr, length, end;
508 	const struct mem_type *type;
509 	pgd_t *pgd;
510 
511 	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
512 		printk(KERN_WARNING "BUG: not creating mapping for "
513 		       "0x%08llx at 0x%08lx in user region\n",
514 		       __pfn_to_phys((u64)md->pfn), md->virtual);
515 		return;
516 	}
517 
518 	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
519 	    md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
520 		printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
521 		       "overlaps vmalloc space\n",
522 		       __pfn_to_phys((u64)md->pfn), md->virtual);
523 	}
524 
525 	type = &mem_types[md->type];
526 
527 	/*
528 	 * Catch 36-bit addresses
529 	 */
530 	if (md->pfn >= 0x100000) {
531 		create_36bit_mapping(md, type);
532 		return;
533 	}
534 
535 	addr = md->virtual & PAGE_MASK;
536 	phys = (unsigned long)__pfn_to_phys(md->pfn);
537 	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
538 
539 	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
540 		printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
541 		       "be mapped using pages, ignoring.\n",
542 		       __pfn_to_phys(md->pfn), addr);
543 		return;
544 	}
545 
546 	pgd = pgd_offset_k(addr);
547 	end = addr + length;
548 	do {
549 		unsigned long next = pgd_addr_end(addr, end);
550 
551 		alloc_init_section(pgd, addr, next, phys, type);
552 
553 		phys += next - addr;
554 		addr = next;
555 	} while (pgd++, addr != end);
556 }
557 
558 /*
559  * Create the architecture specific mappings
560  */
561 void __init iotable_init(struct map_desc *io_desc, int nr)
562 {
563 	int i;
564 
565 	for (i = 0; i < nr; i++)
566 		create_mapping(io_desc + i);
567 }
568 
569 static int __init check_membank_valid(struct membank *mb)
570 {
571 	/*
572 	 * Check whether this memory region has non-zero size or
573 	 * invalid node number.
574 	 */
575 	if (mb->size == 0 || mb->node >= MAX_NUMNODES)
576 		return 0;
577 
578 	/*
579 	 * Check whether this memory region would entirely overlap
580 	 * the vmalloc area.
581 	 */
582 	if (phys_to_virt(mb->start) >= VMALLOC_MIN) {
583 		printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
584 			"(vmalloc region overlap).\n",
585 			mb->start, mb->start + mb->size - 1);
586 		return 0;
587 	}
588 
589 	/*
590 	 * Check whether this memory region would partially overlap
591 	 * the vmalloc area.
592 	 */
593 	if (phys_to_virt(mb->start + mb->size) < phys_to_virt(mb->start) ||
594 	    phys_to_virt(mb->start + mb->size) > VMALLOC_MIN) {
595 		unsigned long newsize = VMALLOC_MIN - phys_to_virt(mb->start);
596 
597 		printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
598 			"to -%.8lx (vmalloc region overlap).\n",
599 			mb->start, mb->start + mb->size - 1,
600 			mb->start + newsize - 1);
601 		mb->size = newsize;
602 	}
603 
604 	return 1;
605 }
606 
607 static void __init sanity_check_meminfo(struct meminfo *mi)
608 {
609 	int i, j;
610 
611 	for (i = 0, j = 0; i < mi->nr_banks; i++) {
612 		if (check_membank_valid(&mi->bank[i]))
613 			mi->bank[j++] = mi->bank[i];
614 	}
615 	mi->nr_banks = j;
616 }
617 
618 static inline void prepare_page_table(struct meminfo *mi)
619 {
620 	unsigned long addr;
621 
622 	/*
623 	 * Clear out all the mappings below the kernel image.
624 	 */
625 	for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE)
626 		pmd_clear(pmd_off_k(addr));
627 
628 #ifdef CONFIG_XIP_KERNEL
629 	/* The XIP kernel is mapped in the module area -- skip over it */
630 	addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
631 #endif
632 	for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
633 		pmd_clear(pmd_off_k(addr));
634 
635 	/*
636 	 * Clear out all the kernel space mappings, except for the first
637 	 * memory bank, up to the end of the vmalloc region.
638 	 */
639 	for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
640 	     addr < VMALLOC_END; addr += PGDIR_SIZE)
641 		pmd_clear(pmd_off_k(addr));
642 }
643 
644 /*
645  * Reserve the various regions of node 0
646  */
647 void __init reserve_node_zero(pg_data_t *pgdat)
648 {
649 	unsigned long res_size = 0;
650 
651 	/*
652 	 * Register the kernel text and data with bootmem.
653 	 * Note that this can only be in node 0.
654 	 */
655 #ifdef CONFIG_XIP_KERNEL
656 	reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start,
657 			BOOTMEM_DEFAULT);
658 #else
659 	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext,
660 			BOOTMEM_DEFAULT);
661 #endif
662 
663 	/*
664 	 * Reserve the page tables.  These are already in use,
665 	 * and can only be in node 0.
666 	 */
667 	reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
668 			     PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
669 
670 	/*
671 	 * Hmm... This should go elsewhere, but we really really need to
672 	 * stop things allocating the low memory; ideally we need a better
673 	 * implementation of GFP_DMA which does not assume that DMA-able
674 	 * memory starts at zero.
675 	 */
676 	if (machine_is_integrator() || machine_is_cintegrator())
677 		res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
678 
679 	/*
680 	 * These should likewise go elsewhere.  They pre-reserve the
681 	 * screen memory region at the start of main system memory.
682 	 */
683 	if (machine_is_edb7211())
684 		res_size = 0x00020000;
685 	if (machine_is_p720t())
686 		res_size = 0x00014000;
687 
688 	/* H1940 and RX3715 need to reserve this for suspend */
689 
690 	if (machine_is_h1940() || machine_is_rx3715()) {
691 		reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
692 				BOOTMEM_DEFAULT);
693 		reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
694 				BOOTMEM_DEFAULT);
695 	}
696 
697 #ifdef CONFIG_SA1111
698 	/*
699 	 * Because of the SA1111 DMA bug, we want to preserve our
700 	 * precious DMA-able memory...
701 	 */
702 	res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
703 #endif
704 	if (res_size)
705 		reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
706 				BOOTMEM_DEFAULT);
707 }
708 
709 /*
710  * Set up device the mappings.  Since we clear out the page tables for all
711  * mappings above VMALLOC_END, we will remove any debug device mappings.
712  * This means you have to be careful how you debug this function, or any
713  * called function.  This means you can't use any function or debugging
714  * method which may touch any device, otherwise the kernel _will_ crash.
715  */
716 static void __init devicemaps_init(struct machine_desc *mdesc)
717 {
718 	struct map_desc map;
719 	unsigned long addr;
720 	void *vectors;
721 
722 	/*
723 	 * Allocate the vector page early.
724 	 */
725 	vectors = alloc_bootmem_low_pages(PAGE_SIZE);
726 	BUG_ON(!vectors);
727 
728 	for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
729 		pmd_clear(pmd_off_k(addr));
730 
731 	/*
732 	 * Map the kernel if it is XIP.
733 	 * It is always first in the modulearea.
734 	 */
735 #ifdef CONFIG_XIP_KERNEL
736 	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
737 	map.virtual = MODULE_START;
738 	map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
739 	map.type = MT_ROM;
740 	create_mapping(&map);
741 #endif
742 
743 	/*
744 	 * Map the cache flushing regions.
745 	 */
746 #ifdef FLUSH_BASE
747 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
748 	map.virtual = FLUSH_BASE;
749 	map.length = SZ_1M;
750 	map.type = MT_CACHECLEAN;
751 	create_mapping(&map);
752 #endif
753 #ifdef FLUSH_BASE_MINICACHE
754 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
755 	map.virtual = FLUSH_BASE_MINICACHE;
756 	map.length = SZ_1M;
757 	map.type = MT_MINICLEAN;
758 	create_mapping(&map);
759 #endif
760 
761 	/*
762 	 * Create a mapping for the machine vectors at the high-vectors
763 	 * location (0xffff0000).  If we aren't using high-vectors, also
764 	 * create a mapping at the low-vectors virtual address.
765 	 */
766 	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
767 	map.virtual = 0xffff0000;
768 	map.length = PAGE_SIZE;
769 	map.type = MT_HIGH_VECTORS;
770 	create_mapping(&map);
771 
772 	if (!vectors_high()) {
773 		map.virtual = 0;
774 		map.type = MT_LOW_VECTORS;
775 		create_mapping(&map);
776 	}
777 
778 	/*
779 	 * Ask the machine support to map in the statically mapped devices.
780 	 */
781 	if (mdesc->map_io)
782 		mdesc->map_io();
783 
784 	/*
785 	 * Finally flush the caches and tlb to ensure that we're in a
786 	 * consistent state wrt the writebuffer.  This also ensures that
787 	 * any write-allocated cache lines in the vector page are written
788 	 * back.  After this point, we can start to touch devices again.
789 	 */
790 	local_flush_tlb_all();
791 	flush_cache_all();
792 }
793 
794 /*
795  * paging_init() sets up the page tables, initialises the zone memory
796  * maps, and sets up the zero page, bad page and bad page tables.
797  */
798 void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
799 {
800 	void *zero_page;
801 
802 	build_mem_type_table();
803 	sanity_check_meminfo(mi);
804 	prepare_page_table(mi);
805 	bootmem_init(mi);
806 	devicemaps_init(mdesc);
807 
808 	top_pmd = pmd_off_k(0xffff0000);
809 
810 	/*
811 	 * allocate the zero page.  Note that we count on this going ok.
812 	 */
813 	zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
814 	memzero(zero_page, PAGE_SIZE);
815 	empty_zero_page = virt_to_page(zero_page);
816 	flush_dcache_page(empty_zero_page);
817 }
818 
819 /*
820  * In order to soft-boot, we need to insert a 1:1 mapping in place of
821  * the user-mode pages.  This will then ensure that we have predictable
822  * results when turning the mmu off
823  */
824 void setup_mm_for_reboot(char mode)
825 {
826 	unsigned long base_pmdval;
827 	pgd_t *pgd;
828 	int i;
829 
830 	if (current->mm && current->mm->pgd)
831 		pgd = current->mm->pgd;
832 	else
833 		pgd = init_mm.pgd;
834 
835 	base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
836 	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
837 		base_pmdval |= PMD_BIT4;
838 
839 	for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
840 		unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
841 		pmd_t *pmd;
842 
843 		pmd = pmd_off(pgd, i << PGDIR_SHIFT);
844 		pmd[0] = __pmd(pmdval);
845 		pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
846 		flush_pmd_entry(pmd);
847 	}
848 }
849