xref: /openbmc/linux/arch/arm/mm/mmu.c (revision 1ab142d4)
1 /*
2  *  linux/arch/arm/mm/mmu.c
3  *
4  *  Copyright (C) 1995-2005 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/mman.h>
15 #include <linux/nodemask.h>
16 #include <linux/memblock.h>
17 #include <linux/fs.h>
18 #include <linux/vmalloc.h>
19 
20 #include <asm/cputype.h>
21 #include <asm/sections.h>
22 #include <asm/cachetype.h>
23 #include <asm/setup.h>
24 #include <asm/sizes.h>
25 #include <asm/smp_plat.h>
26 #include <asm/tlb.h>
27 #include <asm/highmem.h>
28 #include <asm/traps.h>
29 
30 #include <asm/mach/arch.h>
31 #include <asm/mach/map.h>
32 
33 #include "mm.h"
34 
35 /*
36  * empty_zero_page is a special page that is used for
37  * zero-initialized data and COW.
38  */
39 struct page *empty_zero_page;
40 EXPORT_SYMBOL(empty_zero_page);
41 
42 /*
43  * The pmd table for the upper-most set of pages.
44  */
45 pmd_t *top_pmd;
46 
47 #define CPOLICY_UNCACHED	0
48 #define CPOLICY_BUFFERED	1
49 #define CPOLICY_WRITETHROUGH	2
50 #define CPOLICY_WRITEBACK	3
51 #define CPOLICY_WRITEALLOC	4
52 
53 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
54 static unsigned int ecc_mask __initdata = 0;
55 pgprot_t pgprot_user;
56 pgprot_t pgprot_kernel;
57 
58 EXPORT_SYMBOL(pgprot_user);
59 EXPORT_SYMBOL(pgprot_kernel);
60 
61 struct cachepolicy {
62 	const char	policy[16];
63 	unsigned int	cr_mask;
64 	pmdval_t	pmd;
65 	pteval_t	pte;
66 };
67 
68 static struct cachepolicy cache_policies[] __initdata = {
69 	{
70 		.policy		= "uncached",
71 		.cr_mask	= CR_W|CR_C,
72 		.pmd		= PMD_SECT_UNCACHED,
73 		.pte		= L_PTE_MT_UNCACHED,
74 	}, {
75 		.policy		= "buffered",
76 		.cr_mask	= CR_C,
77 		.pmd		= PMD_SECT_BUFFERED,
78 		.pte		= L_PTE_MT_BUFFERABLE,
79 	}, {
80 		.policy		= "writethrough",
81 		.cr_mask	= 0,
82 		.pmd		= PMD_SECT_WT,
83 		.pte		= L_PTE_MT_WRITETHROUGH,
84 	}, {
85 		.policy		= "writeback",
86 		.cr_mask	= 0,
87 		.pmd		= PMD_SECT_WB,
88 		.pte		= L_PTE_MT_WRITEBACK,
89 	}, {
90 		.policy		= "writealloc",
91 		.cr_mask	= 0,
92 		.pmd		= PMD_SECT_WBWA,
93 		.pte		= L_PTE_MT_WRITEALLOC,
94 	}
95 };
96 
97 /*
98  * These are useful for identifying cache coherency
99  * problems by allowing the cache or the cache and
100  * writebuffer to be turned off.  (Note: the write
101  * buffer should not be on and the cache off).
102  */
103 static int __init early_cachepolicy(char *p)
104 {
105 	int i;
106 
107 	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
108 		int len = strlen(cache_policies[i].policy);
109 
110 		if (memcmp(p, cache_policies[i].policy, len) == 0) {
111 			cachepolicy = i;
112 			cr_alignment &= ~cache_policies[i].cr_mask;
113 			cr_no_alignment &= ~cache_policies[i].cr_mask;
114 			break;
115 		}
116 	}
117 	if (i == ARRAY_SIZE(cache_policies))
118 		printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
119 	/*
120 	 * This restriction is partly to do with the way we boot; it is
121 	 * unpredictable to have memory mapped using two different sets of
122 	 * memory attributes (shared, type, and cache attribs).  We can not
123 	 * change these attributes once the initial assembly has setup the
124 	 * page tables.
125 	 */
126 	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
127 		printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
128 		cachepolicy = CPOLICY_WRITEBACK;
129 	}
130 	flush_cache_all();
131 	set_cr(cr_alignment);
132 	return 0;
133 }
134 early_param("cachepolicy", early_cachepolicy);
135 
136 static int __init early_nocache(char *__unused)
137 {
138 	char *p = "buffered";
139 	printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
140 	early_cachepolicy(p);
141 	return 0;
142 }
143 early_param("nocache", early_nocache);
144 
145 static int __init early_nowrite(char *__unused)
146 {
147 	char *p = "uncached";
148 	printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
149 	early_cachepolicy(p);
150 	return 0;
151 }
152 early_param("nowb", early_nowrite);
153 
154 #ifndef CONFIG_ARM_LPAE
155 static int __init early_ecc(char *p)
156 {
157 	if (memcmp(p, "on", 2) == 0)
158 		ecc_mask = PMD_PROTECTION;
159 	else if (memcmp(p, "off", 3) == 0)
160 		ecc_mask = 0;
161 	return 0;
162 }
163 early_param("ecc", early_ecc);
164 #endif
165 
166 static int __init noalign_setup(char *__unused)
167 {
168 	cr_alignment &= ~CR_A;
169 	cr_no_alignment &= ~CR_A;
170 	set_cr(cr_alignment);
171 	return 1;
172 }
173 __setup("noalign", noalign_setup);
174 
175 #ifndef CONFIG_SMP
176 void adjust_cr(unsigned long mask, unsigned long set)
177 {
178 	unsigned long flags;
179 
180 	mask &= ~CR_A;
181 
182 	set &= mask;
183 
184 	local_irq_save(flags);
185 
186 	cr_no_alignment = (cr_no_alignment & ~mask) | set;
187 	cr_alignment = (cr_alignment & ~mask) | set;
188 
189 	set_cr((get_cr() & ~mask) | set);
190 
191 	local_irq_restore(flags);
192 }
193 #endif
194 
195 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
196 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
197 
198 static struct mem_type mem_types[] = {
199 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
200 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
201 				  L_PTE_SHARED,
202 		.prot_l1	= PMD_TYPE_TABLE,
203 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_S,
204 		.domain		= DOMAIN_IO,
205 	},
206 	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
207 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
208 		.prot_l1	= PMD_TYPE_TABLE,
209 		.prot_sect	= PROT_SECT_DEVICE,
210 		.domain		= DOMAIN_IO,
211 	},
212 	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
213 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
214 		.prot_l1	= PMD_TYPE_TABLE,
215 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
216 		.domain		= DOMAIN_IO,
217 	},
218 	[MT_DEVICE_WC] = {	/* ioremap_wc */
219 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
220 		.prot_l1	= PMD_TYPE_TABLE,
221 		.prot_sect	= PROT_SECT_DEVICE,
222 		.domain		= DOMAIN_IO,
223 	},
224 	[MT_UNCACHED] = {
225 		.prot_pte	= PROT_PTE_DEVICE,
226 		.prot_l1	= PMD_TYPE_TABLE,
227 		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
228 		.domain		= DOMAIN_IO,
229 	},
230 	[MT_CACHECLEAN] = {
231 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
232 		.domain    = DOMAIN_KERNEL,
233 	},
234 #ifndef CONFIG_ARM_LPAE
235 	[MT_MINICLEAN] = {
236 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
237 		.domain    = DOMAIN_KERNEL,
238 	},
239 #endif
240 	[MT_LOW_VECTORS] = {
241 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
242 				L_PTE_RDONLY,
243 		.prot_l1   = PMD_TYPE_TABLE,
244 		.domain    = DOMAIN_USER,
245 	},
246 	[MT_HIGH_VECTORS] = {
247 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
248 				L_PTE_USER | L_PTE_RDONLY,
249 		.prot_l1   = PMD_TYPE_TABLE,
250 		.domain    = DOMAIN_USER,
251 	},
252 	[MT_MEMORY] = {
253 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
254 		.prot_l1   = PMD_TYPE_TABLE,
255 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
256 		.domain    = DOMAIN_KERNEL,
257 	},
258 	[MT_ROM] = {
259 		.prot_sect = PMD_TYPE_SECT,
260 		.domain    = DOMAIN_KERNEL,
261 	},
262 	[MT_MEMORY_NONCACHED] = {
263 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
264 				L_PTE_MT_BUFFERABLE,
265 		.prot_l1   = PMD_TYPE_TABLE,
266 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
267 		.domain    = DOMAIN_KERNEL,
268 	},
269 	[MT_MEMORY_DTCM] = {
270 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
271 				L_PTE_XN,
272 		.prot_l1   = PMD_TYPE_TABLE,
273 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
274 		.domain    = DOMAIN_KERNEL,
275 	},
276 	[MT_MEMORY_ITCM] = {
277 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
278 		.prot_l1   = PMD_TYPE_TABLE,
279 		.domain    = DOMAIN_KERNEL,
280 	},
281 	[MT_MEMORY_SO] = {
282 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
283 				L_PTE_MT_UNCACHED,
284 		.prot_l1   = PMD_TYPE_TABLE,
285 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
286 				PMD_SECT_UNCACHED | PMD_SECT_XN,
287 		.domain    = DOMAIN_KERNEL,
288 	},
289 };
290 
291 const struct mem_type *get_mem_type(unsigned int type)
292 {
293 	return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
294 }
295 EXPORT_SYMBOL(get_mem_type);
296 
297 /*
298  * Adjust the PMD section entries according to the CPU in use.
299  */
300 static void __init build_mem_type_table(void)
301 {
302 	struct cachepolicy *cp;
303 	unsigned int cr = get_cr();
304 	pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
305 	int cpu_arch = cpu_architecture();
306 	int i;
307 
308 	if (cpu_arch < CPU_ARCH_ARMv6) {
309 #if defined(CONFIG_CPU_DCACHE_DISABLE)
310 		if (cachepolicy > CPOLICY_BUFFERED)
311 			cachepolicy = CPOLICY_BUFFERED;
312 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
313 		if (cachepolicy > CPOLICY_WRITETHROUGH)
314 			cachepolicy = CPOLICY_WRITETHROUGH;
315 #endif
316 	}
317 	if (cpu_arch < CPU_ARCH_ARMv5) {
318 		if (cachepolicy >= CPOLICY_WRITEALLOC)
319 			cachepolicy = CPOLICY_WRITEBACK;
320 		ecc_mask = 0;
321 	}
322 	if (is_smp())
323 		cachepolicy = CPOLICY_WRITEALLOC;
324 
325 	/*
326 	 * Strip out features not present on earlier architectures.
327 	 * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
328 	 * without extended page tables don't have the 'Shared' bit.
329 	 */
330 	if (cpu_arch < CPU_ARCH_ARMv5)
331 		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
332 			mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
333 	if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
334 		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
335 			mem_types[i].prot_sect &= ~PMD_SECT_S;
336 
337 	/*
338 	 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
339 	 * "update-able on write" bit on ARM610).  However, Xscale and
340 	 * Xscale3 require this bit to be cleared.
341 	 */
342 	if (cpu_is_xscale() || cpu_is_xsc3()) {
343 		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
344 			mem_types[i].prot_sect &= ~PMD_BIT4;
345 			mem_types[i].prot_l1 &= ~PMD_BIT4;
346 		}
347 	} else if (cpu_arch < CPU_ARCH_ARMv6) {
348 		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
349 			if (mem_types[i].prot_l1)
350 				mem_types[i].prot_l1 |= PMD_BIT4;
351 			if (mem_types[i].prot_sect)
352 				mem_types[i].prot_sect |= PMD_BIT4;
353 		}
354 	}
355 
356 	/*
357 	 * Mark the device areas according to the CPU/architecture.
358 	 */
359 	if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
360 		if (!cpu_is_xsc3()) {
361 			/*
362 			 * Mark device regions on ARMv6+ as execute-never
363 			 * to prevent speculative instruction fetches.
364 			 */
365 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
366 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
367 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
368 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
369 		}
370 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
371 			/*
372 			 * For ARMv7 with TEX remapping,
373 			 * - shared device is SXCB=1100
374 			 * - nonshared device is SXCB=0100
375 			 * - write combine device mem is SXCB=0001
376 			 * (Uncached Normal memory)
377 			 */
378 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
379 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
380 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
381 		} else if (cpu_is_xsc3()) {
382 			/*
383 			 * For Xscale3,
384 			 * - shared device is TEXCB=00101
385 			 * - nonshared device is TEXCB=01000
386 			 * - write combine device mem is TEXCB=00100
387 			 * (Inner/Outer Uncacheable in xsc3 parlance)
388 			 */
389 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
390 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
391 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
392 		} else {
393 			/*
394 			 * For ARMv6 and ARMv7 without TEX remapping,
395 			 * - shared device is TEXCB=00001
396 			 * - nonshared device is TEXCB=01000
397 			 * - write combine device mem is TEXCB=00100
398 			 * (Uncached Normal in ARMv6 parlance).
399 			 */
400 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
401 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
402 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
403 		}
404 	} else {
405 		/*
406 		 * On others, write combining is "Uncached/Buffered"
407 		 */
408 		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
409 	}
410 
411 	/*
412 	 * Now deal with the memory-type mappings
413 	 */
414 	cp = &cache_policies[cachepolicy];
415 	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
416 
417 	/*
418 	 * Only use write-through for non-SMP systems
419 	 */
420 	if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
421 		vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
422 
423 	/*
424 	 * Enable CPU-specific coherency if supported.
425 	 * (Only available on XSC3 at the moment.)
426 	 */
427 	if (arch_is_coherent() && cpu_is_xsc3()) {
428 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
429 		mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
430 		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
431 		mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
432 	}
433 	/*
434 	 * ARMv6 and above have extended page tables.
435 	 */
436 	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
437 #ifndef CONFIG_ARM_LPAE
438 		/*
439 		 * Mark cache clean areas and XIP ROM read only
440 		 * from SVC mode and no access from userspace.
441 		 */
442 		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
443 		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
444 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
445 #endif
446 
447 		if (is_smp()) {
448 			/*
449 			 * Mark memory with the "shared" attribute
450 			 * for SMP systems
451 			 */
452 			user_pgprot |= L_PTE_SHARED;
453 			kern_pgprot |= L_PTE_SHARED;
454 			vecs_pgprot |= L_PTE_SHARED;
455 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
456 			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
457 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
458 			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
459 			mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
460 			mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
461 			mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
462 			mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
463 		}
464 	}
465 
466 	/*
467 	 * Non-cacheable Normal - intended for memory areas that must
468 	 * not cause dirty cache line writebacks when used
469 	 */
470 	if (cpu_arch >= CPU_ARCH_ARMv6) {
471 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
472 			/* Non-cacheable Normal is XCB = 001 */
473 			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
474 				PMD_SECT_BUFFERED;
475 		} else {
476 			/* For both ARMv6 and non-TEX-remapping ARMv7 */
477 			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
478 				PMD_SECT_TEX(1);
479 		}
480 	} else {
481 		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
482 	}
483 
484 #ifdef CONFIG_ARM_LPAE
485 	/*
486 	 * Do not generate access flag faults for the kernel mappings.
487 	 */
488 	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
489 		mem_types[i].prot_pte |= PTE_EXT_AF;
490 		mem_types[i].prot_sect |= PMD_SECT_AF;
491 	}
492 	kern_pgprot |= PTE_EXT_AF;
493 	vecs_pgprot |= PTE_EXT_AF;
494 #endif
495 
496 	for (i = 0; i < 16; i++) {
497 		unsigned long v = pgprot_val(protection_map[i]);
498 		protection_map[i] = __pgprot(v | user_pgprot);
499 	}
500 
501 	mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
502 	mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
503 
504 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
505 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
506 				 L_PTE_DIRTY | kern_pgprot);
507 
508 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
509 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
510 	mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
511 	mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
512 	mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
513 	mem_types[MT_ROM].prot_sect |= cp->pmd;
514 
515 	switch (cp->pmd) {
516 	case PMD_SECT_WT:
517 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
518 		break;
519 	case PMD_SECT_WB:
520 	case PMD_SECT_WBWA:
521 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
522 		break;
523 	}
524 	printk("Memory policy: ECC %sabled, Data cache %s\n",
525 		ecc_mask ? "en" : "dis", cp->policy);
526 
527 	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
528 		struct mem_type *t = &mem_types[i];
529 		if (t->prot_l1)
530 			t->prot_l1 |= PMD_DOMAIN(t->domain);
531 		if (t->prot_sect)
532 			t->prot_sect |= PMD_DOMAIN(t->domain);
533 	}
534 }
535 
536 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
537 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
538 			      unsigned long size, pgprot_t vma_prot)
539 {
540 	if (!pfn_valid(pfn))
541 		return pgprot_noncached(vma_prot);
542 	else if (file->f_flags & O_SYNC)
543 		return pgprot_writecombine(vma_prot);
544 	return vma_prot;
545 }
546 EXPORT_SYMBOL(phys_mem_access_prot);
547 #endif
548 
549 #define vectors_base()	(vectors_high() ? 0xffff0000 : 0)
550 
551 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
552 {
553 	void *ptr = __va(memblock_alloc(sz, align));
554 	memset(ptr, 0, sz);
555 	return ptr;
556 }
557 
558 static void __init *early_alloc(unsigned long sz)
559 {
560 	return early_alloc_aligned(sz, sz);
561 }
562 
563 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
564 {
565 	if (pmd_none(*pmd)) {
566 		pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
567 		__pmd_populate(pmd, __pa(pte), prot);
568 	}
569 	BUG_ON(pmd_bad(*pmd));
570 	return pte_offset_kernel(pmd, addr);
571 }
572 
573 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
574 				  unsigned long end, unsigned long pfn,
575 				  const struct mem_type *type)
576 {
577 	pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
578 	do {
579 		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
580 		pfn++;
581 	} while (pte++, addr += PAGE_SIZE, addr != end);
582 }
583 
584 static void __init alloc_init_section(pud_t *pud, unsigned long addr,
585 				      unsigned long end, phys_addr_t phys,
586 				      const struct mem_type *type)
587 {
588 	pmd_t *pmd = pmd_offset(pud, addr);
589 
590 	/*
591 	 * Try a section mapping - end, addr and phys must all be aligned
592 	 * to a section boundary.  Note that PMDs refer to the individual
593 	 * L1 entries, whereas PGDs refer to a group of L1 entries making
594 	 * up one logical pointer to an L2 table.
595 	 */
596 	if (((addr | end | phys) & ~SECTION_MASK) == 0) {
597 		pmd_t *p = pmd;
598 
599 #ifndef CONFIG_ARM_LPAE
600 		if (addr & SECTION_SIZE)
601 			pmd++;
602 #endif
603 
604 		do {
605 			*pmd = __pmd(phys | type->prot_sect);
606 			phys += SECTION_SIZE;
607 		} while (pmd++, addr += SECTION_SIZE, addr != end);
608 
609 		flush_pmd_entry(p);
610 	} else {
611 		/*
612 		 * No need to loop; pte's aren't interested in the
613 		 * individual L1 entries.
614 		 */
615 		alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
616 	}
617 }
618 
619 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
620 	unsigned long phys, const struct mem_type *type)
621 {
622 	pud_t *pud = pud_offset(pgd, addr);
623 	unsigned long next;
624 
625 	do {
626 		next = pud_addr_end(addr, end);
627 		alloc_init_section(pud, addr, next, phys, type);
628 		phys += next - addr;
629 	} while (pud++, addr = next, addr != end);
630 }
631 
632 #ifndef CONFIG_ARM_LPAE
633 static void __init create_36bit_mapping(struct map_desc *md,
634 					const struct mem_type *type)
635 {
636 	unsigned long addr, length, end;
637 	phys_addr_t phys;
638 	pgd_t *pgd;
639 
640 	addr = md->virtual;
641 	phys = __pfn_to_phys(md->pfn);
642 	length = PAGE_ALIGN(md->length);
643 
644 	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
645 		printk(KERN_ERR "MM: CPU does not support supersection "
646 		       "mapping for 0x%08llx at 0x%08lx\n",
647 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
648 		return;
649 	}
650 
651 	/* N.B.	ARMv6 supersections are only defined to work with domain 0.
652 	 *	Since domain assignments can in fact be arbitrary, the
653 	 *	'domain == 0' check below is required to insure that ARMv6
654 	 *	supersections are only allocated for domain 0 regardless
655 	 *	of the actual domain assignments in use.
656 	 */
657 	if (type->domain) {
658 		printk(KERN_ERR "MM: invalid domain in supersection "
659 		       "mapping for 0x%08llx at 0x%08lx\n",
660 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
661 		return;
662 	}
663 
664 	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
665 		printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
666 		       " at 0x%08lx invalid alignment\n",
667 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
668 		return;
669 	}
670 
671 	/*
672 	 * Shift bits [35:32] of address into bits [23:20] of PMD
673 	 * (See ARMv6 spec).
674 	 */
675 	phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
676 
677 	pgd = pgd_offset_k(addr);
678 	end = addr + length;
679 	do {
680 		pud_t *pud = pud_offset(pgd, addr);
681 		pmd_t *pmd = pmd_offset(pud, addr);
682 		int i;
683 
684 		for (i = 0; i < 16; i++)
685 			*pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
686 
687 		addr += SUPERSECTION_SIZE;
688 		phys += SUPERSECTION_SIZE;
689 		pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
690 	} while (addr != end);
691 }
692 #endif	/* !CONFIG_ARM_LPAE */
693 
694 /*
695  * Create the page directory entries and any necessary
696  * page tables for the mapping specified by `md'.  We
697  * are able to cope here with varying sizes and address
698  * offsets, and we take full advantage of sections and
699  * supersections.
700  */
701 static void __init create_mapping(struct map_desc *md)
702 {
703 	unsigned long addr, length, end;
704 	phys_addr_t phys;
705 	const struct mem_type *type;
706 	pgd_t *pgd;
707 
708 	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
709 		printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
710 		       " at 0x%08lx in user region\n",
711 		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
712 		return;
713 	}
714 
715 	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
716 	    md->virtual >= PAGE_OFFSET &&
717 	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
718 		printk(KERN_WARNING "BUG: mapping for 0x%08llx"
719 		       " at 0x%08lx out of vmalloc space\n",
720 		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
721 	}
722 
723 	type = &mem_types[md->type];
724 
725 #ifndef CONFIG_ARM_LPAE
726 	/*
727 	 * Catch 36-bit addresses
728 	 */
729 	if (md->pfn >= 0x100000) {
730 		create_36bit_mapping(md, type);
731 		return;
732 	}
733 #endif
734 
735 	addr = md->virtual & PAGE_MASK;
736 	phys = __pfn_to_phys(md->pfn);
737 	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
738 
739 	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
740 		printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
741 		       "be mapped using pages, ignoring.\n",
742 		       (long long)__pfn_to_phys(md->pfn), addr);
743 		return;
744 	}
745 
746 	pgd = pgd_offset_k(addr);
747 	end = addr + length;
748 	do {
749 		unsigned long next = pgd_addr_end(addr, end);
750 
751 		alloc_init_pud(pgd, addr, next, phys, type);
752 
753 		phys += next - addr;
754 		addr = next;
755 	} while (pgd++, addr != end);
756 }
757 
758 /*
759  * Create the architecture specific mappings
760  */
761 void __init iotable_init(struct map_desc *io_desc, int nr)
762 {
763 	struct map_desc *md;
764 	struct vm_struct *vm;
765 
766 	if (!nr)
767 		return;
768 
769 	vm = early_alloc_aligned(sizeof(*vm) * nr, __alignof__(*vm));
770 
771 	for (md = io_desc; nr; md++, nr--) {
772 		create_mapping(md);
773 		vm->addr = (void *)(md->virtual & PAGE_MASK);
774 		vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
775 		vm->phys_addr = __pfn_to_phys(md->pfn);
776 		vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
777 		vm->flags |= VM_ARM_MTYPE(md->type);
778 		vm->caller = iotable_init;
779 		vm_area_add_early(vm++);
780 	}
781 }
782 
783 static void * __initdata vmalloc_min =
784 	(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
785 
786 /*
787  * vmalloc=size forces the vmalloc area to be exactly 'size'
788  * bytes. This can be used to increase (or decrease) the vmalloc
789  * area - the default is 240m.
790  */
791 static int __init early_vmalloc(char *arg)
792 {
793 	unsigned long vmalloc_reserve = memparse(arg, NULL);
794 
795 	if (vmalloc_reserve < SZ_16M) {
796 		vmalloc_reserve = SZ_16M;
797 		printk(KERN_WARNING
798 			"vmalloc area too small, limiting to %luMB\n",
799 			vmalloc_reserve >> 20);
800 	}
801 
802 	if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
803 		vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
804 		printk(KERN_WARNING
805 			"vmalloc area is too big, limiting to %luMB\n",
806 			vmalloc_reserve >> 20);
807 	}
808 
809 	vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
810 	return 0;
811 }
812 early_param("vmalloc", early_vmalloc);
813 
814 static phys_addr_t lowmem_limit __initdata = 0;
815 
816 void __init sanity_check_meminfo(void)
817 {
818 	int i, j, highmem = 0;
819 
820 	for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
821 		struct membank *bank = &meminfo.bank[j];
822 		*bank = meminfo.bank[i];
823 
824 		if (bank->start > ULONG_MAX)
825 			highmem = 1;
826 
827 #ifdef CONFIG_HIGHMEM
828 		if (__va(bank->start) >= vmalloc_min ||
829 		    __va(bank->start) < (void *)PAGE_OFFSET)
830 			highmem = 1;
831 
832 		bank->highmem = highmem;
833 
834 		/*
835 		 * Split those memory banks which are partially overlapping
836 		 * the vmalloc area greatly simplifying things later.
837 		 */
838 		if (!highmem && __va(bank->start) < vmalloc_min &&
839 		    bank->size > vmalloc_min - __va(bank->start)) {
840 			if (meminfo.nr_banks >= NR_BANKS) {
841 				printk(KERN_CRIT "NR_BANKS too low, "
842 						 "ignoring high memory\n");
843 			} else {
844 				memmove(bank + 1, bank,
845 					(meminfo.nr_banks - i) * sizeof(*bank));
846 				meminfo.nr_banks++;
847 				i++;
848 				bank[1].size -= vmalloc_min - __va(bank->start);
849 				bank[1].start = __pa(vmalloc_min - 1) + 1;
850 				bank[1].highmem = highmem = 1;
851 				j++;
852 			}
853 			bank->size = vmalloc_min - __va(bank->start);
854 		}
855 #else
856 		bank->highmem = highmem;
857 
858 		/*
859 		 * Highmem banks not allowed with !CONFIG_HIGHMEM.
860 		 */
861 		if (highmem) {
862 			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
863 			       "(!CONFIG_HIGHMEM).\n",
864 			       (unsigned long long)bank->start,
865 			       (unsigned long long)bank->start + bank->size - 1);
866 			continue;
867 		}
868 
869 		/*
870 		 * Check whether this memory bank would entirely overlap
871 		 * the vmalloc area.
872 		 */
873 		if (__va(bank->start) >= vmalloc_min ||
874 		    __va(bank->start) < (void *)PAGE_OFFSET) {
875 			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
876 			       "(vmalloc region overlap).\n",
877 			       (unsigned long long)bank->start,
878 			       (unsigned long long)bank->start + bank->size - 1);
879 			continue;
880 		}
881 
882 		/*
883 		 * Check whether this memory bank would partially overlap
884 		 * the vmalloc area.
885 		 */
886 		if (__va(bank->start + bank->size) > vmalloc_min ||
887 		    __va(bank->start + bank->size) < __va(bank->start)) {
888 			unsigned long newsize = vmalloc_min - __va(bank->start);
889 			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
890 			       "to -%.8llx (vmalloc region overlap).\n",
891 			       (unsigned long long)bank->start,
892 			       (unsigned long long)bank->start + bank->size - 1,
893 			       (unsigned long long)bank->start + newsize - 1);
894 			bank->size = newsize;
895 		}
896 #endif
897 		if (!bank->highmem && bank->start + bank->size > lowmem_limit)
898 			lowmem_limit = bank->start + bank->size;
899 
900 		j++;
901 	}
902 #ifdef CONFIG_HIGHMEM
903 	if (highmem) {
904 		const char *reason = NULL;
905 
906 		if (cache_is_vipt_aliasing()) {
907 			/*
908 			 * Interactions between kmap and other mappings
909 			 * make highmem support with aliasing VIPT caches
910 			 * rather difficult.
911 			 */
912 			reason = "with VIPT aliasing cache";
913 		}
914 		if (reason) {
915 			printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
916 				reason);
917 			while (j > 0 && meminfo.bank[j - 1].highmem)
918 				j--;
919 		}
920 	}
921 #endif
922 	meminfo.nr_banks = j;
923 	high_memory = __va(lowmem_limit - 1) + 1;
924 	memblock_set_current_limit(lowmem_limit);
925 }
926 
927 static inline void prepare_page_table(void)
928 {
929 	unsigned long addr;
930 	phys_addr_t end;
931 
932 	/*
933 	 * Clear out all the mappings below the kernel image.
934 	 */
935 	for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
936 		pmd_clear(pmd_off_k(addr));
937 
938 #ifdef CONFIG_XIP_KERNEL
939 	/* The XIP kernel is mapped in the module area -- skip over it */
940 	addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
941 #endif
942 	for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
943 		pmd_clear(pmd_off_k(addr));
944 
945 	/*
946 	 * Find the end of the first block of lowmem.
947 	 */
948 	end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
949 	if (end >= lowmem_limit)
950 		end = lowmem_limit;
951 
952 	/*
953 	 * Clear out all the kernel space mappings, except for the first
954 	 * memory bank, up to the vmalloc region.
955 	 */
956 	for (addr = __phys_to_virt(end);
957 	     addr < VMALLOC_START; addr += PMD_SIZE)
958 		pmd_clear(pmd_off_k(addr));
959 }
960 
961 #ifdef CONFIG_ARM_LPAE
962 /* the first page is reserved for pgd */
963 #define SWAPPER_PG_DIR_SIZE	(PAGE_SIZE + \
964 				 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
965 #else
966 #define SWAPPER_PG_DIR_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
967 #endif
968 
969 /*
970  * Reserve the special regions of memory
971  */
972 void __init arm_mm_memblock_reserve(void)
973 {
974 	/*
975 	 * Reserve the page tables.  These are already in use,
976 	 * and can only be in node 0.
977 	 */
978 	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
979 
980 #ifdef CONFIG_SA1111
981 	/*
982 	 * Because of the SA1111 DMA bug, we want to preserve our
983 	 * precious DMA-able memory...
984 	 */
985 	memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
986 #endif
987 }
988 
989 /*
990  * Set up the device mappings.  Since we clear out the page tables for all
991  * mappings above VMALLOC_START, we will remove any debug device mappings.
992  * This means you have to be careful how you debug this function, or any
993  * called function.  This means you can't use any function or debugging
994  * method which may touch any device, otherwise the kernel _will_ crash.
995  */
996 static void __init devicemaps_init(struct machine_desc *mdesc)
997 {
998 	struct map_desc map;
999 	unsigned long addr;
1000 
1001 	/*
1002 	 * Allocate the vector page early.
1003 	 */
1004 	vectors_page = early_alloc(PAGE_SIZE);
1005 
1006 	for (addr = VMALLOC_START; addr; addr += PMD_SIZE)
1007 		pmd_clear(pmd_off_k(addr));
1008 
1009 	/*
1010 	 * Map the kernel if it is XIP.
1011 	 * It is always first in the modulearea.
1012 	 */
1013 #ifdef CONFIG_XIP_KERNEL
1014 	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
1015 	map.virtual = MODULES_VADDR;
1016 	map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
1017 	map.type = MT_ROM;
1018 	create_mapping(&map);
1019 #endif
1020 
1021 	/*
1022 	 * Map the cache flushing regions.
1023 	 */
1024 #ifdef FLUSH_BASE
1025 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
1026 	map.virtual = FLUSH_BASE;
1027 	map.length = SZ_1M;
1028 	map.type = MT_CACHECLEAN;
1029 	create_mapping(&map);
1030 #endif
1031 #ifdef FLUSH_BASE_MINICACHE
1032 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
1033 	map.virtual = FLUSH_BASE_MINICACHE;
1034 	map.length = SZ_1M;
1035 	map.type = MT_MINICLEAN;
1036 	create_mapping(&map);
1037 #endif
1038 
1039 	/*
1040 	 * Create a mapping for the machine vectors at the high-vectors
1041 	 * location (0xffff0000).  If we aren't using high-vectors, also
1042 	 * create a mapping at the low-vectors virtual address.
1043 	 */
1044 	map.pfn = __phys_to_pfn(virt_to_phys(vectors_page));
1045 	map.virtual = 0xffff0000;
1046 	map.length = PAGE_SIZE;
1047 	map.type = MT_HIGH_VECTORS;
1048 	create_mapping(&map);
1049 
1050 	if (!vectors_high()) {
1051 		map.virtual = 0;
1052 		map.type = MT_LOW_VECTORS;
1053 		create_mapping(&map);
1054 	}
1055 
1056 	/*
1057 	 * Ask the machine support to map in the statically mapped devices.
1058 	 */
1059 	if (mdesc->map_io)
1060 		mdesc->map_io();
1061 
1062 	/*
1063 	 * Finally flush the caches and tlb to ensure that we're in a
1064 	 * consistent state wrt the writebuffer.  This also ensures that
1065 	 * any write-allocated cache lines in the vector page are written
1066 	 * back.  After this point, we can start to touch devices again.
1067 	 */
1068 	local_flush_tlb_all();
1069 	flush_cache_all();
1070 }
1071 
1072 static void __init kmap_init(void)
1073 {
1074 #ifdef CONFIG_HIGHMEM
1075 	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
1076 		PKMAP_BASE, _PAGE_KERNEL_TABLE);
1077 #endif
1078 }
1079 
1080 static void __init map_lowmem(void)
1081 {
1082 	struct memblock_region *reg;
1083 
1084 	/* Map all the lowmem memory banks. */
1085 	for_each_memblock(memory, reg) {
1086 		phys_addr_t start = reg->base;
1087 		phys_addr_t end = start + reg->size;
1088 		struct map_desc map;
1089 
1090 		if (end > lowmem_limit)
1091 			end = lowmem_limit;
1092 		if (start >= end)
1093 			break;
1094 
1095 		map.pfn = __phys_to_pfn(start);
1096 		map.virtual = __phys_to_virt(start);
1097 		map.length = end - start;
1098 		map.type = MT_MEMORY;
1099 
1100 		create_mapping(&map);
1101 	}
1102 }
1103 
1104 /*
1105  * paging_init() sets up the page tables, initialises the zone memory
1106  * maps, and sets up the zero page, bad page and bad page tables.
1107  */
1108 void __init paging_init(struct machine_desc *mdesc)
1109 {
1110 	void *zero_page;
1111 
1112 	memblock_set_current_limit(lowmem_limit);
1113 
1114 	build_mem_type_table();
1115 	prepare_page_table();
1116 	map_lowmem();
1117 	devicemaps_init(mdesc);
1118 	kmap_init();
1119 
1120 	top_pmd = pmd_off_k(0xffff0000);
1121 
1122 	/* allocate the zero page. */
1123 	zero_page = early_alloc(PAGE_SIZE);
1124 
1125 	bootmem_init();
1126 
1127 	empty_zero_page = virt_to_page(zero_page);
1128 	__flush_dcache_page(NULL, empty_zero_page);
1129 }
1130