1 /* 2 * linux/arch/arm/mm/mmu.c 3 * 4 * Copyright (C) 1995-2005 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/module.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/mman.h> 15 #include <linux/nodemask.h> 16 #include <linux/memblock.h> 17 #include <linux/fs.h> 18 #include <linux/vmalloc.h> 19 #include <linux/sizes.h> 20 21 #include <asm/cp15.h> 22 #include <asm/cputype.h> 23 #include <asm/sections.h> 24 #include <asm/cachetype.h> 25 #include <asm/sections.h> 26 #include <asm/setup.h> 27 #include <asm/smp_plat.h> 28 #include <asm/tlb.h> 29 #include <asm/highmem.h> 30 #include <asm/system_info.h> 31 #include <asm/traps.h> 32 #include <asm/procinfo.h> 33 #include <asm/memory.h> 34 35 #include <asm/mach/arch.h> 36 #include <asm/mach/map.h> 37 #include <asm/mach/pci.h> 38 39 #include "mm.h" 40 #include "tcm.h" 41 42 /* 43 * empty_zero_page is a special page that is used for 44 * zero-initialized data and COW. 45 */ 46 struct page *empty_zero_page; 47 EXPORT_SYMBOL(empty_zero_page); 48 49 /* 50 * The pmd table for the upper-most set of pages. 51 */ 52 pmd_t *top_pmd; 53 54 #define CPOLICY_UNCACHED 0 55 #define CPOLICY_BUFFERED 1 56 #define CPOLICY_WRITETHROUGH 2 57 #define CPOLICY_WRITEBACK 3 58 #define CPOLICY_WRITEALLOC 4 59 60 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; 61 static unsigned int ecc_mask __initdata = 0; 62 pgprot_t pgprot_user; 63 pgprot_t pgprot_kernel; 64 pgprot_t pgprot_hyp_device; 65 pgprot_t pgprot_s2; 66 pgprot_t pgprot_s2_device; 67 68 EXPORT_SYMBOL(pgprot_user); 69 EXPORT_SYMBOL(pgprot_kernel); 70 71 struct cachepolicy { 72 const char policy[16]; 73 unsigned int cr_mask; 74 pmdval_t pmd; 75 pteval_t pte; 76 pteval_t pte_s2; 77 }; 78 79 #ifdef CONFIG_ARM_LPAE 80 #define s2_policy(policy) policy 81 #else 82 #define s2_policy(policy) 0 83 #endif 84 85 static struct cachepolicy cache_policies[] __initdata = { 86 { 87 .policy = "uncached", 88 .cr_mask = CR_W|CR_C, 89 .pmd = PMD_SECT_UNCACHED, 90 .pte = L_PTE_MT_UNCACHED, 91 .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED), 92 }, { 93 .policy = "buffered", 94 .cr_mask = CR_C, 95 .pmd = PMD_SECT_BUFFERED, 96 .pte = L_PTE_MT_BUFFERABLE, 97 .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED), 98 }, { 99 .policy = "writethrough", 100 .cr_mask = 0, 101 .pmd = PMD_SECT_WT, 102 .pte = L_PTE_MT_WRITETHROUGH, 103 .pte_s2 = s2_policy(L_PTE_S2_MT_WRITETHROUGH), 104 }, { 105 .policy = "writeback", 106 .cr_mask = 0, 107 .pmd = PMD_SECT_WB, 108 .pte = L_PTE_MT_WRITEBACK, 109 .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK), 110 }, { 111 .policy = "writealloc", 112 .cr_mask = 0, 113 .pmd = PMD_SECT_WBWA, 114 .pte = L_PTE_MT_WRITEALLOC, 115 .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK), 116 } 117 }; 118 119 #ifdef CONFIG_CPU_CP15 120 /* 121 * These are useful for identifying cache coherency 122 * problems by allowing the cache or the cache and 123 * writebuffer to be turned off. (Note: the write 124 * buffer should not be on and the cache off). 125 */ 126 static int __init early_cachepolicy(char *p) 127 { 128 int i; 129 130 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { 131 int len = strlen(cache_policies[i].policy); 132 133 if (memcmp(p, cache_policies[i].policy, len) == 0) { 134 cachepolicy = i; 135 cr_alignment &= ~cache_policies[i].cr_mask; 136 cr_no_alignment &= ~cache_policies[i].cr_mask; 137 break; 138 } 139 } 140 if (i == ARRAY_SIZE(cache_policies)) 141 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); 142 /* 143 * This restriction is partly to do with the way we boot; it is 144 * unpredictable to have memory mapped using two different sets of 145 * memory attributes (shared, type, and cache attribs). We can not 146 * change these attributes once the initial assembly has setup the 147 * page tables. 148 */ 149 if (cpu_architecture() >= CPU_ARCH_ARMv6) { 150 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); 151 cachepolicy = CPOLICY_WRITEBACK; 152 } 153 flush_cache_all(); 154 set_cr(cr_alignment); 155 return 0; 156 } 157 early_param("cachepolicy", early_cachepolicy); 158 159 static int __init early_nocache(char *__unused) 160 { 161 char *p = "buffered"; 162 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); 163 early_cachepolicy(p); 164 return 0; 165 } 166 early_param("nocache", early_nocache); 167 168 static int __init early_nowrite(char *__unused) 169 { 170 char *p = "uncached"; 171 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); 172 early_cachepolicy(p); 173 return 0; 174 } 175 early_param("nowb", early_nowrite); 176 177 #ifndef CONFIG_ARM_LPAE 178 static int __init early_ecc(char *p) 179 { 180 if (memcmp(p, "on", 2) == 0) 181 ecc_mask = PMD_PROTECTION; 182 else if (memcmp(p, "off", 3) == 0) 183 ecc_mask = 0; 184 return 0; 185 } 186 early_param("ecc", early_ecc); 187 #endif 188 189 static int __init noalign_setup(char *__unused) 190 { 191 cr_alignment &= ~CR_A; 192 cr_no_alignment &= ~CR_A; 193 set_cr(cr_alignment); 194 return 1; 195 } 196 __setup("noalign", noalign_setup); 197 198 #ifndef CONFIG_SMP 199 void adjust_cr(unsigned long mask, unsigned long set) 200 { 201 unsigned long flags; 202 203 mask &= ~CR_A; 204 205 set &= mask; 206 207 local_irq_save(flags); 208 209 cr_no_alignment = (cr_no_alignment & ~mask) | set; 210 cr_alignment = (cr_alignment & ~mask) | set; 211 212 set_cr((get_cr() & ~mask) | set); 213 214 local_irq_restore(flags); 215 } 216 #endif 217 218 #else /* ifdef CONFIG_CPU_CP15 */ 219 220 static int __init early_cachepolicy(char *p) 221 { 222 pr_warning("cachepolicy kernel parameter not supported without cp15\n"); 223 } 224 early_param("cachepolicy", early_cachepolicy); 225 226 static int __init noalign_setup(char *__unused) 227 { 228 pr_warning("noalign kernel parameter not supported without cp15\n"); 229 } 230 __setup("noalign", noalign_setup); 231 232 #endif /* ifdef CONFIG_CPU_CP15 / else */ 233 234 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN 235 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE 236 237 static struct mem_type mem_types[] = { 238 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ 239 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | 240 L_PTE_SHARED, 241 .prot_l1 = PMD_TYPE_TABLE, 242 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, 243 .domain = DOMAIN_IO, 244 }, 245 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ 246 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, 247 .prot_l1 = PMD_TYPE_TABLE, 248 .prot_sect = PROT_SECT_DEVICE, 249 .domain = DOMAIN_IO, 250 }, 251 [MT_DEVICE_CACHED] = { /* ioremap_cached */ 252 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, 253 .prot_l1 = PMD_TYPE_TABLE, 254 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, 255 .domain = DOMAIN_IO, 256 }, 257 [MT_DEVICE_WC] = { /* ioremap_wc */ 258 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, 259 .prot_l1 = PMD_TYPE_TABLE, 260 .prot_sect = PROT_SECT_DEVICE, 261 .domain = DOMAIN_IO, 262 }, 263 [MT_UNCACHED] = { 264 .prot_pte = PROT_PTE_DEVICE, 265 .prot_l1 = PMD_TYPE_TABLE, 266 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 267 .domain = DOMAIN_IO, 268 }, 269 [MT_CACHECLEAN] = { 270 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 271 .domain = DOMAIN_KERNEL, 272 }, 273 #ifndef CONFIG_ARM_LPAE 274 [MT_MINICLEAN] = { 275 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, 276 .domain = DOMAIN_KERNEL, 277 }, 278 #endif 279 [MT_LOW_VECTORS] = { 280 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 281 L_PTE_RDONLY, 282 .prot_l1 = PMD_TYPE_TABLE, 283 .domain = DOMAIN_USER, 284 }, 285 [MT_HIGH_VECTORS] = { 286 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 287 L_PTE_USER | L_PTE_RDONLY, 288 .prot_l1 = PMD_TYPE_TABLE, 289 .domain = DOMAIN_USER, 290 }, 291 [MT_MEMORY_RWX] = { 292 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, 293 .prot_l1 = PMD_TYPE_TABLE, 294 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 295 .domain = DOMAIN_KERNEL, 296 }, 297 [MT_MEMORY_RW] = { 298 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 299 L_PTE_XN, 300 .prot_l1 = PMD_TYPE_TABLE, 301 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 302 .domain = DOMAIN_KERNEL, 303 }, 304 [MT_ROM] = { 305 .prot_sect = PMD_TYPE_SECT, 306 .domain = DOMAIN_KERNEL, 307 }, 308 [MT_MEMORY_RWX_NONCACHED] = { 309 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 310 L_PTE_MT_BUFFERABLE, 311 .prot_l1 = PMD_TYPE_TABLE, 312 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 313 .domain = DOMAIN_KERNEL, 314 }, 315 [MT_MEMORY_RW_DTCM] = { 316 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 317 L_PTE_XN, 318 .prot_l1 = PMD_TYPE_TABLE, 319 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 320 .domain = DOMAIN_KERNEL, 321 }, 322 [MT_MEMORY_RWX_ITCM] = { 323 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, 324 .prot_l1 = PMD_TYPE_TABLE, 325 .domain = DOMAIN_KERNEL, 326 }, 327 [MT_MEMORY_RW_SO] = { 328 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 329 L_PTE_MT_UNCACHED | L_PTE_XN, 330 .prot_l1 = PMD_TYPE_TABLE, 331 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | 332 PMD_SECT_UNCACHED | PMD_SECT_XN, 333 .domain = DOMAIN_KERNEL, 334 }, 335 [MT_MEMORY_DMA_READY] = { 336 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 337 L_PTE_XN, 338 .prot_l1 = PMD_TYPE_TABLE, 339 .domain = DOMAIN_KERNEL, 340 }, 341 }; 342 343 const struct mem_type *get_mem_type(unsigned int type) 344 { 345 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; 346 } 347 EXPORT_SYMBOL(get_mem_type); 348 349 #define PTE_SET_FN(_name, pteop) \ 350 static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \ 351 void *data) \ 352 { \ 353 pte_t pte = pteop(*ptep); \ 354 \ 355 set_pte_ext(ptep, pte, 0); \ 356 return 0; \ 357 } \ 358 359 #define SET_MEMORY_FN(_name, callback) \ 360 int set_memory_##_name(unsigned long addr, int numpages) \ 361 { \ 362 unsigned long start = addr; \ 363 unsigned long size = PAGE_SIZE*numpages; \ 364 unsigned end = start + size; \ 365 \ 366 if (start < MODULES_VADDR || start >= MODULES_END) \ 367 return -EINVAL;\ 368 \ 369 if (end < MODULES_VADDR || end >= MODULES_END) \ 370 return -EINVAL; \ 371 \ 372 apply_to_page_range(&init_mm, start, size, callback, NULL); \ 373 flush_tlb_kernel_range(start, end); \ 374 return 0;\ 375 } 376 377 PTE_SET_FN(ro, pte_wrprotect) 378 PTE_SET_FN(rw, pte_mkwrite) 379 PTE_SET_FN(x, pte_mkexec) 380 PTE_SET_FN(nx, pte_mknexec) 381 382 SET_MEMORY_FN(ro, pte_set_ro) 383 SET_MEMORY_FN(rw, pte_set_rw) 384 SET_MEMORY_FN(x, pte_set_x) 385 SET_MEMORY_FN(nx, pte_set_nx) 386 387 /* 388 * Adjust the PMD section entries according to the CPU in use. 389 */ 390 static void __init build_mem_type_table(void) 391 { 392 struct cachepolicy *cp; 393 unsigned int cr = get_cr(); 394 pteval_t user_pgprot, kern_pgprot, vecs_pgprot; 395 pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot; 396 int cpu_arch = cpu_architecture(); 397 int i; 398 399 if (cpu_arch < CPU_ARCH_ARMv6) { 400 #if defined(CONFIG_CPU_DCACHE_DISABLE) 401 if (cachepolicy > CPOLICY_BUFFERED) 402 cachepolicy = CPOLICY_BUFFERED; 403 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) 404 if (cachepolicy > CPOLICY_WRITETHROUGH) 405 cachepolicy = CPOLICY_WRITETHROUGH; 406 #endif 407 } 408 if (cpu_arch < CPU_ARCH_ARMv5) { 409 if (cachepolicy >= CPOLICY_WRITEALLOC) 410 cachepolicy = CPOLICY_WRITEBACK; 411 ecc_mask = 0; 412 } 413 if (is_smp()) 414 cachepolicy = CPOLICY_WRITEALLOC; 415 416 /* 417 * Strip out features not present on earlier architectures. 418 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those 419 * without extended page tables don't have the 'Shared' bit. 420 */ 421 if (cpu_arch < CPU_ARCH_ARMv5) 422 for (i = 0; i < ARRAY_SIZE(mem_types); i++) 423 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); 424 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) 425 for (i = 0; i < ARRAY_SIZE(mem_types); i++) 426 mem_types[i].prot_sect &= ~PMD_SECT_S; 427 428 /* 429 * ARMv5 and lower, bit 4 must be set for page tables (was: cache 430 * "update-able on write" bit on ARM610). However, Xscale and 431 * Xscale3 require this bit to be cleared. 432 */ 433 if (cpu_is_xscale() || cpu_is_xsc3()) { 434 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 435 mem_types[i].prot_sect &= ~PMD_BIT4; 436 mem_types[i].prot_l1 &= ~PMD_BIT4; 437 } 438 } else if (cpu_arch < CPU_ARCH_ARMv6) { 439 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 440 if (mem_types[i].prot_l1) 441 mem_types[i].prot_l1 |= PMD_BIT4; 442 if (mem_types[i].prot_sect) 443 mem_types[i].prot_sect |= PMD_BIT4; 444 } 445 } 446 447 /* 448 * Mark the device areas according to the CPU/architecture. 449 */ 450 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { 451 if (!cpu_is_xsc3()) { 452 /* 453 * Mark device regions on ARMv6+ as execute-never 454 * to prevent speculative instruction fetches. 455 */ 456 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; 457 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; 458 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; 459 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; 460 461 /* Also setup NX memory mapping */ 462 mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; 463 } 464 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { 465 /* 466 * For ARMv7 with TEX remapping, 467 * - shared device is SXCB=1100 468 * - nonshared device is SXCB=0100 469 * - write combine device mem is SXCB=0001 470 * (Uncached Normal memory) 471 */ 472 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); 473 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); 474 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; 475 } else if (cpu_is_xsc3()) { 476 /* 477 * For Xscale3, 478 * - shared device is TEXCB=00101 479 * - nonshared device is TEXCB=01000 480 * - write combine device mem is TEXCB=00100 481 * (Inner/Outer Uncacheable in xsc3 parlance) 482 */ 483 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; 484 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); 485 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); 486 } else { 487 /* 488 * For ARMv6 and ARMv7 without TEX remapping, 489 * - shared device is TEXCB=00001 490 * - nonshared device is TEXCB=01000 491 * - write combine device mem is TEXCB=00100 492 * (Uncached Normal in ARMv6 parlance). 493 */ 494 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; 495 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); 496 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); 497 } 498 } else { 499 /* 500 * On others, write combining is "Uncached/Buffered" 501 */ 502 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; 503 } 504 505 /* 506 * Now deal with the memory-type mappings 507 */ 508 cp = &cache_policies[cachepolicy]; 509 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; 510 s2_pgprot = cp->pte_s2; 511 hyp_device_pgprot = s2_device_pgprot = mem_types[MT_DEVICE].prot_pte; 512 513 /* 514 * ARMv6 and above have extended page tables. 515 */ 516 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { 517 #ifndef CONFIG_ARM_LPAE 518 /* 519 * Mark cache clean areas and XIP ROM read only 520 * from SVC mode and no access from userspace. 521 */ 522 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 523 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 524 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 525 #endif 526 527 if (is_smp()) { 528 /* 529 * Mark memory with the "shared" attribute 530 * for SMP systems 531 */ 532 user_pgprot |= L_PTE_SHARED; 533 kern_pgprot |= L_PTE_SHARED; 534 vecs_pgprot |= L_PTE_SHARED; 535 s2_pgprot |= L_PTE_SHARED; 536 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; 537 mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; 538 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; 539 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; 540 mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S; 541 mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; 542 mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; 543 mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; 544 mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; 545 mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; 546 mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; 547 } 548 } 549 550 /* 551 * Non-cacheable Normal - intended for memory areas that must 552 * not cause dirty cache line writebacks when used 553 */ 554 if (cpu_arch >= CPU_ARCH_ARMv6) { 555 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { 556 /* Non-cacheable Normal is XCB = 001 */ 557 mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= 558 PMD_SECT_BUFFERED; 559 } else { 560 /* For both ARMv6 and non-TEX-remapping ARMv7 */ 561 mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= 562 PMD_SECT_TEX(1); 563 } 564 } else { 565 mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; 566 } 567 568 #ifdef CONFIG_ARM_LPAE 569 /* 570 * Do not generate access flag faults for the kernel mappings. 571 */ 572 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 573 mem_types[i].prot_pte |= PTE_EXT_AF; 574 if (mem_types[i].prot_sect) 575 mem_types[i].prot_sect |= PMD_SECT_AF; 576 } 577 kern_pgprot |= PTE_EXT_AF; 578 vecs_pgprot |= PTE_EXT_AF; 579 #endif 580 581 for (i = 0; i < 16; i++) { 582 pteval_t v = pgprot_val(protection_map[i]); 583 protection_map[i] = __pgprot(v | user_pgprot); 584 } 585 586 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; 587 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; 588 589 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); 590 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | 591 L_PTE_DIRTY | kern_pgprot); 592 pgprot_s2 = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot); 593 pgprot_s2_device = __pgprot(s2_device_pgprot); 594 pgprot_hyp_device = __pgprot(hyp_device_pgprot); 595 596 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; 597 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; 598 mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd; 599 mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; 600 mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; 601 mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; 602 mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; 603 mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; 604 mem_types[MT_ROM].prot_sect |= cp->pmd; 605 606 switch (cp->pmd) { 607 case PMD_SECT_WT: 608 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; 609 break; 610 case PMD_SECT_WB: 611 case PMD_SECT_WBWA: 612 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; 613 break; 614 } 615 pr_info("Memory policy: %sData cache %s\n", 616 ecc_mask ? "ECC enabled, " : "", cp->policy); 617 618 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 619 struct mem_type *t = &mem_types[i]; 620 if (t->prot_l1) 621 t->prot_l1 |= PMD_DOMAIN(t->domain); 622 if (t->prot_sect) 623 t->prot_sect |= PMD_DOMAIN(t->domain); 624 } 625 } 626 627 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE 628 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 629 unsigned long size, pgprot_t vma_prot) 630 { 631 if (!pfn_valid(pfn)) 632 return pgprot_noncached(vma_prot); 633 else if (file->f_flags & O_SYNC) 634 return pgprot_writecombine(vma_prot); 635 return vma_prot; 636 } 637 EXPORT_SYMBOL(phys_mem_access_prot); 638 #endif 639 640 #define vectors_base() (vectors_high() ? 0xffff0000 : 0) 641 642 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) 643 { 644 void *ptr = __va(memblock_alloc(sz, align)); 645 memset(ptr, 0, sz); 646 return ptr; 647 } 648 649 static void __init *early_alloc(unsigned long sz) 650 { 651 return early_alloc_aligned(sz, sz); 652 } 653 654 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) 655 { 656 if (pmd_none(*pmd)) { 657 pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); 658 __pmd_populate(pmd, __pa(pte), prot); 659 } 660 BUG_ON(pmd_bad(*pmd)); 661 return pte_offset_kernel(pmd, addr); 662 } 663 664 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, 665 unsigned long end, unsigned long pfn, 666 const struct mem_type *type) 667 { 668 pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1); 669 do { 670 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); 671 pfn++; 672 } while (pte++, addr += PAGE_SIZE, addr != end); 673 } 674 675 static void __init __map_init_section(pmd_t *pmd, unsigned long addr, 676 unsigned long end, phys_addr_t phys, 677 const struct mem_type *type) 678 { 679 pmd_t *p = pmd; 680 681 #ifndef CONFIG_ARM_LPAE 682 /* 683 * In classic MMU format, puds and pmds are folded in to 684 * the pgds. pmd_offset gives the PGD entry. PGDs refer to a 685 * group of L1 entries making up one logical pointer to 686 * an L2 table (2MB), where as PMDs refer to the individual 687 * L1 entries (1MB). Hence increment to get the correct 688 * offset for odd 1MB sections. 689 * (See arch/arm/include/asm/pgtable-2level.h) 690 */ 691 if (addr & SECTION_SIZE) 692 pmd++; 693 #endif 694 do { 695 *pmd = __pmd(phys | type->prot_sect); 696 phys += SECTION_SIZE; 697 } while (pmd++, addr += SECTION_SIZE, addr != end); 698 699 flush_pmd_entry(p); 700 } 701 702 static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, 703 unsigned long end, phys_addr_t phys, 704 const struct mem_type *type) 705 { 706 pmd_t *pmd = pmd_offset(pud, addr); 707 unsigned long next; 708 709 do { 710 /* 711 * With LPAE, we must loop over to map 712 * all the pmds for the given range. 713 */ 714 next = pmd_addr_end(addr, end); 715 716 /* 717 * Try a section mapping - addr, next and phys must all be 718 * aligned to a section boundary. 719 */ 720 if (type->prot_sect && 721 ((addr | next | phys) & ~SECTION_MASK) == 0) { 722 __map_init_section(pmd, addr, next, phys, type); 723 } else { 724 alloc_init_pte(pmd, addr, next, 725 __phys_to_pfn(phys), type); 726 } 727 728 phys += next - addr; 729 730 } while (pmd++, addr = next, addr != end); 731 } 732 733 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, 734 unsigned long end, phys_addr_t phys, 735 const struct mem_type *type) 736 { 737 pud_t *pud = pud_offset(pgd, addr); 738 unsigned long next; 739 740 do { 741 next = pud_addr_end(addr, end); 742 alloc_init_pmd(pud, addr, next, phys, type); 743 phys += next - addr; 744 } while (pud++, addr = next, addr != end); 745 } 746 747 #ifndef CONFIG_ARM_LPAE 748 static void __init create_36bit_mapping(struct map_desc *md, 749 const struct mem_type *type) 750 { 751 unsigned long addr, length, end; 752 phys_addr_t phys; 753 pgd_t *pgd; 754 755 addr = md->virtual; 756 phys = __pfn_to_phys(md->pfn); 757 length = PAGE_ALIGN(md->length); 758 759 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { 760 printk(KERN_ERR "MM: CPU does not support supersection " 761 "mapping for 0x%08llx at 0x%08lx\n", 762 (long long)__pfn_to_phys((u64)md->pfn), addr); 763 return; 764 } 765 766 /* N.B. ARMv6 supersections are only defined to work with domain 0. 767 * Since domain assignments can in fact be arbitrary, the 768 * 'domain == 0' check below is required to insure that ARMv6 769 * supersections are only allocated for domain 0 regardless 770 * of the actual domain assignments in use. 771 */ 772 if (type->domain) { 773 printk(KERN_ERR "MM: invalid domain in supersection " 774 "mapping for 0x%08llx at 0x%08lx\n", 775 (long long)__pfn_to_phys((u64)md->pfn), addr); 776 return; 777 } 778 779 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { 780 printk(KERN_ERR "MM: cannot create mapping for 0x%08llx" 781 " at 0x%08lx invalid alignment\n", 782 (long long)__pfn_to_phys((u64)md->pfn), addr); 783 return; 784 } 785 786 /* 787 * Shift bits [35:32] of address into bits [23:20] of PMD 788 * (See ARMv6 spec). 789 */ 790 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); 791 792 pgd = pgd_offset_k(addr); 793 end = addr + length; 794 do { 795 pud_t *pud = pud_offset(pgd, addr); 796 pmd_t *pmd = pmd_offset(pud, addr); 797 int i; 798 799 for (i = 0; i < 16; i++) 800 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); 801 802 addr += SUPERSECTION_SIZE; 803 phys += SUPERSECTION_SIZE; 804 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; 805 } while (addr != end); 806 } 807 #endif /* !CONFIG_ARM_LPAE */ 808 809 /* 810 * Create the page directory entries and any necessary 811 * page tables for the mapping specified by `md'. We 812 * are able to cope here with varying sizes and address 813 * offsets, and we take full advantage of sections and 814 * supersections. 815 */ 816 static void __init create_mapping(struct map_desc *md) 817 { 818 unsigned long addr, length, end; 819 phys_addr_t phys; 820 const struct mem_type *type; 821 pgd_t *pgd; 822 823 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { 824 printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx" 825 " at 0x%08lx in user region\n", 826 (long long)__pfn_to_phys((u64)md->pfn), md->virtual); 827 return; 828 } 829 830 if ((md->type == MT_DEVICE || md->type == MT_ROM) && 831 md->virtual >= PAGE_OFFSET && 832 (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { 833 printk(KERN_WARNING "BUG: mapping for 0x%08llx" 834 " at 0x%08lx out of vmalloc space\n", 835 (long long)__pfn_to_phys((u64)md->pfn), md->virtual); 836 } 837 838 type = &mem_types[md->type]; 839 840 #ifndef CONFIG_ARM_LPAE 841 /* 842 * Catch 36-bit addresses 843 */ 844 if (md->pfn >= 0x100000) { 845 create_36bit_mapping(md, type); 846 return; 847 } 848 #endif 849 850 addr = md->virtual & PAGE_MASK; 851 phys = __pfn_to_phys(md->pfn); 852 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); 853 854 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { 855 printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " 856 "be mapped using pages, ignoring.\n", 857 (long long)__pfn_to_phys(md->pfn), addr); 858 return; 859 } 860 861 pgd = pgd_offset_k(addr); 862 end = addr + length; 863 do { 864 unsigned long next = pgd_addr_end(addr, end); 865 866 alloc_init_pud(pgd, addr, next, phys, type); 867 868 phys += next - addr; 869 addr = next; 870 } while (pgd++, addr != end); 871 } 872 873 /* 874 * Create the architecture specific mappings 875 */ 876 void __init iotable_init(struct map_desc *io_desc, int nr) 877 { 878 struct map_desc *md; 879 struct vm_struct *vm; 880 struct static_vm *svm; 881 882 if (!nr) 883 return; 884 885 svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm)); 886 887 for (md = io_desc; nr; md++, nr--) { 888 create_mapping(md); 889 890 vm = &svm->vm; 891 vm->addr = (void *)(md->virtual & PAGE_MASK); 892 vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); 893 vm->phys_addr = __pfn_to_phys(md->pfn); 894 vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; 895 vm->flags |= VM_ARM_MTYPE(md->type); 896 vm->caller = iotable_init; 897 add_static_vm_early(svm++); 898 } 899 } 900 901 void __init vm_reserve_area_early(unsigned long addr, unsigned long size, 902 void *caller) 903 { 904 struct vm_struct *vm; 905 struct static_vm *svm; 906 907 svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm)); 908 909 vm = &svm->vm; 910 vm->addr = (void *)addr; 911 vm->size = size; 912 vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; 913 vm->caller = caller; 914 add_static_vm_early(svm); 915 } 916 917 #ifndef CONFIG_ARM_LPAE 918 919 /* 920 * The Linux PMD is made of two consecutive section entries covering 2MB 921 * (see definition in include/asm/pgtable-2level.h). However a call to 922 * create_mapping() may optimize static mappings by using individual 923 * 1MB section mappings. This leaves the actual PMD potentially half 924 * initialized if the top or bottom section entry isn't used, leaving it 925 * open to problems if a subsequent ioremap() or vmalloc() tries to use 926 * the virtual space left free by that unused section entry. 927 * 928 * Let's avoid the issue by inserting dummy vm entries covering the unused 929 * PMD halves once the static mappings are in place. 930 */ 931 932 static void __init pmd_empty_section_gap(unsigned long addr) 933 { 934 vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); 935 } 936 937 static void __init fill_pmd_gaps(void) 938 { 939 struct static_vm *svm; 940 struct vm_struct *vm; 941 unsigned long addr, next = 0; 942 pmd_t *pmd; 943 944 list_for_each_entry(svm, &static_vmlist, list) { 945 vm = &svm->vm; 946 addr = (unsigned long)vm->addr; 947 if (addr < next) 948 continue; 949 950 /* 951 * Check if this vm starts on an odd section boundary. 952 * If so and the first section entry for this PMD is free 953 * then we block the corresponding virtual address. 954 */ 955 if ((addr & ~PMD_MASK) == SECTION_SIZE) { 956 pmd = pmd_off_k(addr); 957 if (pmd_none(*pmd)) 958 pmd_empty_section_gap(addr & PMD_MASK); 959 } 960 961 /* 962 * Then check if this vm ends on an odd section boundary. 963 * If so and the second section entry for this PMD is empty 964 * then we block the corresponding virtual address. 965 */ 966 addr += vm->size; 967 if ((addr & ~PMD_MASK) == SECTION_SIZE) { 968 pmd = pmd_off_k(addr) + 1; 969 if (pmd_none(*pmd)) 970 pmd_empty_section_gap(addr); 971 } 972 973 /* no need to look at any vm entry until we hit the next PMD */ 974 next = (addr + PMD_SIZE - 1) & PMD_MASK; 975 } 976 } 977 978 #else 979 #define fill_pmd_gaps() do { } while (0) 980 #endif 981 982 #if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) 983 static void __init pci_reserve_io(void) 984 { 985 struct static_vm *svm; 986 987 svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE); 988 if (svm) 989 return; 990 991 vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); 992 } 993 #else 994 #define pci_reserve_io() do { } while (0) 995 #endif 996 997 #ifdef CONFIG_DEBUG_LL 998 void __init debug_ll_io_init(void) 999 { 1000 struct map_desc map; 1001 1002 debug_ll_addr(&map.pfn, &map.virtual); 1003 if (!map.pfn || !map.virtual) 1004 return; 1005 map.pfn = __phys_to_pfn(map.pfn); 1006 map.virtual &= PAGE_MASK; 1007 map.length = PAGE_SIZE; 1008 map.type = MT_DEVICE; 1009 iotable_init(&map, 1); 1010 } 1011 #endif 1012 1013 static void * __initdata vmalloc_min = 1014 (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); 1015 1016 /* 1017 * vmalloc=size forces the vmalloc area to be exactly 'size' 1018 * bytes. This can be used to increase (or decrease) the vmalloc 1019 * area - the default is 240m. 1020 */ 1021 static int __init early_vmalloc(char *arg) 1022 { 1023 unsigned long vmalloc_reserve = memparse(arg, NULL); 1024 1025 if (vmalloc_reserve < SZ_16M) { 1026 vmalloc_reserve = SZ_16M; 1027 printk(KERN_WARNING 1028 "vmalloc area too small, limiting to %luMB\n", 1029 vmalloc_reserve >> 20); 1030 } 1031 1032 if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) { 1033 vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M); 1034 printk(KERN_WARNING 1035 "vmalloc area is too big, limiting to %luMB\n", 1036 vmalloc_reserve >> 20); 1037 } 1038 1039 vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve); 1040 return 0; 1041 } 1042 early_param("vmalloc", early_vmalloc); 1043 1044 phys_addr_t arm_lowmem_limit __initdata = 0; 1045 1046 void __init sanity_check_meminfo(void) 1047 { 1048 phys_addr_t memblock_limit = 0; 1049 int i, j, highmem = 0; 1050 phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; 1051 1052 for (i = 0, j = 0; i < meminfo.nr_banks; i++) { 1053 struct membank *bank = &meminfo.bank[j]; 1054 phys_addr_t size_limit; 1055 1056 *bank = meminfo.bank[i]; 1057 size_limit = bank->size; 1058 1059 if (bank->start >= vmalloc_limit) 1060 highmem = 1; 1061 else 1062 size_limit = vmalloc_limit - bank->start; 1063 1064 bank->highmem = highmem; 1065 1066 #ifdef CONFIG_HIGHMEM 1067 /* 1068 * Split those memory banks which are partially overlapping 1069 * the vmalloc area greatly simplifying things later. 1070 */ 1071 if (!highmem && bank->size > size_limit) { 1072 if (meminfo.nr_banks >= NR_BANKS) { 1073 printk(KERN_CRIT "NR_BANKS too low, " 1074 "ignoring high memory\n"); 1075 } else { 1076 memmove(bank + 1, bank, 1077 (meminfo.nr_banks - i) * sizeof(*bank)); 1078 meminfo.nr_banks++; 1079 i++; 1080 bank[1].size -= size_limit; 1081 bank[1].start = vmalloc_limit; 1082 bank[1].highmem = highmem = 1; 1083 j++; 1084 } 1085 bank->size = size_limit; 1086 } 1087 #else 1088 /* 1089 * Highmem banks not allowed with !CONFIG_HIGHMEM. 1090 */ 1091 if (highmem) { 1092 printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " 1093 "(!CONFIG_HIGHMEM).\n", 1094 (unsigned long long)bank->start, 1095 (unsigned long long)bank->start + bank->size - 1); 1096 continue; 1097 } 1098 1099 /* 1100 * Check whether this memory bank would partially overlap 1101 * the vmalloc area. 1102 */ 1103 if (bank->size > size_limit) { 1104 printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " 1105 "to -%.8llx (vmalloc region overlap).\n", 1106 (unsigned long long)bank->start, 1107 (unsigned long long)bank->start + bank->size - 1, 1108 (unsigned long long)bank->start + size_limit - 1); 1109 bank->size = size_limit; 1110 } 1111 #endif 1112 if (!bank->highmem) { 1113 phys_addr_t bank_end = bank->start + bank->size; 1114 1115 if (bank_end > arm_lowmem_limit) 1116 arm_lowmem_limit = bank_end; 1117 1118 /* 1119 * Find the first non-section-aligned page, and point 1120 * memblock_limit at it. This relies on rounding the 1121 * limit down to be section-aligned, which happens at 1122 * the end of this function. 1123 * 1124 * With this algorithm, the start or end of almost any 1125 * bank can be non-section-aligned. The only exception 1126 * is that the start of the bank 0 must be section- 1127 * aligned, since otherwise memory would need to be 1128 * allocated when mapping the start of bank 0, which 1129 * occurs before any free memory is mapped. 1130 */ 1131 if (!memblock_limit) { 1132 if (!IS_ALIGNED(bank->start, SECTION_SIZE)) 1133 memblock_limit = bank->start; 1134 else if (!IS_ALIGNED(bank_end, SECTION_SIZE)) 1135 memblock_limit = bank_end; 1136 } 1137 } 1138 j++; 1139 } 1140 #ifdef CONFIG_HIGHMEM 1141 if (highmem) { 1142 const char *reason = NULL; 1143 1144 if (cache_is_vipt_aliasing()) { 1145 /* 1146 * Interactions between kmap and other mappings 1147 * make highmem support with aliasing VIPT caches 1148 * rather difficult. 1149 */ 1150 reason = "with VIPT aliasing cache"; 1151 } 1152 if (reason) { 1153 printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", 1154 reason); 1155 while (j > 0 && meminfo.bank[j - 1].highmem) 1156 j--; 1157 } 1158 } 1159 #endif 1160 meminfo.nr_banks = j; 1161 high_memory = __va(arm_lowmem_limit - 1) + 1; 1162 1163 /* 1164 * Round the memblock limit down to a section size. This 1165 * helps to ensure that we will allocate memory from the 1166 * last full section, which should be mapped. 1167 */ 1168 if (memblock_limit) 1169 memblock_limit = round_down(memblock_limit, SECTION_SIZE); 1170 if (!memblock_limit) 1171 memblock_limit = arm_lowmem_limit; 1172 1173 memblock_set_current_limit(memblock_limit); 1174 } 1175 1176 static inline void prepare_page_table(void) 1177 { 1178 unsigned long addr; 1179 phys_addr_t end; 1180 1181 /* 1182 * Clear out all the mappings below the kernel image. 1183 */ 1184 for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) 1185 pmd_clear(pmd_off_k(addr)); 1186 1187 #ifdef CONFIG_XIP_KERNEL 1188 /* The XIP kernel is mapped in the module area -- skip over it */ 1189 addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; 1190 #endif 1191 for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) 1192 pmd_clear(pmd_off_k(addr)); 1193 1194 /* 1195 * Find the end of the first block of lowmem. 1196 */ 1197 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; 1198 if (end >= arm_lowmem_limit) 1199 end = arm_lowmem_limit; 1200 1201 /* 1202 * Clear out all the kernel space mappings, except for the first 1203 * memory bank, up to the vmalloc region. 1204 */ 1205 for (addr = __phys_to_virt(end); 1206 addr < VMALLOC_START; addr += PMD_SIZE) 1207 pmd_clear(pmd_off_k(addr)); 1208 } 1209 1210 #ifdef CONFIG_ARM_LPAE 1211 /* the first page is reserved for pgd */ 1212 #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ 1213 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) 1214 #else 1215 #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) 1216 #endif 1217 1218 /* 1219 * Reserve the special regions of memory 1220 */ 1221 void __init arm_mm_memblock_reserve(void) 1222 { 1223 /* 1224 * Reserve the page tables. These are already in use, 1225 * and can only be in node 0. 1226 */ 1227 memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); 1228 1229 #ifdef CONFIG_SA1111 1230 /* 1231 * Because of the SA1111 DMA bug, we want to preserve our 1232 * precious DMA-able memory... 1233 */ 1234 memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); 1235 #endif 1236 } 1237 1238 /* 1239 * Set up the device mappings. Since we clear out the page tables for all 1240 * mappings above VMALLOC_START, we will remove any debug device mappings. 1241 * This means you have to be careful how you debug this function, or any 1242 * called function. This means you can't use any function or debugging 1243 * method which may touch any device, otherwise the kernel _will_ crash. 1244 */ 1245 static void __init devicemaps_init(const struct machine_desc *mdesc) 1246 { 1247 struct map_desc map; 1248 unsigned long addr; 1249 void *vectors; 1250 1251 /* 1252 * Allocate the vector page early. 1253 */ 1254 vectors = early_alloc(PAGE_SIZE * 2); 1255 1256 early_trap_init(vectors); 1257 1258 for (addr = VMALLOC_START; addr; addr += PMD_SIZE) 1259 pmd_clear(pmd_off_k(addr)); 1260 1261 /* 1262 * Map the kernel if it is XIP. 1263 * It is always first in the modulearea. 1264 */ 1265 #ifdef CONFIG_XIP_KERNEL 1266 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); 1267 map.virtual = MODULES_VADDR; 1268 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; 1269 map.type = MT_ROM; 1270 create_mapping(&map); 1271 #endif 1272 1273 /* 1274 * Map the cache flushing regions. 1275 */ 1276 #ifdef FLUSH_BASE 1277 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); 1278 map.virtual = FLUSH_BASE; 1279 map.length = SZ_1M; 1280 map.type = MT_CACHECLEAN; 1281 create_mapping(&map); 1282 #endif 1283 #ifdef FLUSH_BASE_MINICACHE 1284 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); 1285 map.virtual = FLUSH_BASE_MINICACHE; 1286 map.length = SZ_1M; 1287 map.type = MT_MINICLEAN; 1288 create_mapping(&map); 1289 #endif 1290 1291 /* 1292 * Create a mapping for the machine vectors at the high-vectors 1293 * location (0xffff0000). If we aren't using high-vectors, also 1294 * create a mapping at the low-vectors virtual address. 1295 */ 1296 map.pfn = __phys_to_pfn(virt_to_phys(vectors)); 1297 map.virtual = 0xffff0000; 1298 map.length = PAGE_SIZE; 1299 #ifdef CONFIG_KUSER_HELPERS 1300 map.type = MT_HIGH_VECTORS; 1301 #else 1302 map.type = MT_LOW_VECTORS; 1303 #endif 1304 create_mapping(&map); 1305 1306 if (!vectors_high()) { 1307 map.virtual = 0; 1308 map.length = PAGE_SIZE * 2; 1309 map.type = MT_LOW_VECTORS; 1310 create_mapping(&map); 1311 } 1312 1313 /* Now create a kernel read-only mapping */ 1314 map.pfn += 1; 1315 map.virtual = 0xffff0000 + PAGE_SIZE; 1316 map.length = PAGE_SIZE; 1317 map.type = MT_LOW_VECTORS; 1318 create_mapping(&map); 1319 1320 /* 1321 * Ask the machine support to map in the statically mapped devices. 1322 */ 1323 if (mdesc->map_io) 1324 mdesc->map_io(); 1325 else 1326 debug_ll_io_init(); 1327 fill_pmd_gaps(); 1328 1329 /* Reserve fixed i/o space in VMALLOC region */ 1330 pci_reserve_io(); 1331 1332 /* 1333 * Finally flush the caches and tlb to ensure that we're in a 1334 * consistent state wrt the writebuffer. This also ensures that 1335 * any write-allocated cache lines in the vector page are written 1336 * back. After this point, we can start to touch devices again. 1337 */ 1338 local_flush_tlb_all(); 1339 flush_cache_all(); 1340 } 1341 1342 static void __init kmap_init(void) 1343 { 1344 #ifdef CONFIG_HIGHMEM 1345 pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), 1346 PKMAP_BASE, _PAGE_KERNEL_TABLE); 1347 #endif 1348 } 1349 1350 static void __init map_lowmem(void) 1351 { 1352 struct memblock_region *reg; 1353 unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); 1354 unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); 1355 1356 /* Map all the lowmem memory banks. */ 1357 for_each_memblock(memory, reg) { 1358 phys_addr_t start = reg->base; 1359 phys_addr_t end = start + reg->size; 1360 struct map_desc map; 1361 1362 if (end > arm_lowmem_limit) 1363 end = arm_lowmem_limit; 1364 if (start >= end) 1365 break; 1366 1367 if (end < kernel_x_start || start >= kernel_x_end) { 1368 map.pfn = __phys_to_pfn(start); 1369 map.virtual = __phys_to_virt(start); 1370 map.length = end - start; 1371 map.type = MT_MEMORY_RWX; 1372 1373 create_mapping(&map); 1374 } else { 1375 /* This better cover the entire kernel */ 1376 if (start < kernel_x_start) { 1377 map.pfn = __phys_to_pfn(start); 1378 map.virtual = __phys_to_virt(start); 1379 map.length = kernel_x_start - start; 1380 map.type = MT_MEMORY_RW; 1381 1382 create_mapping(&map); 1383 } 1384 1385 map.pfn = __phys_to_pfn(kernel_x_start); 1386 map.virtual = __phys_to_virt(kernel_x_start); 1387 map.length = kernel_x_end - kernel_x_start; 1388 map.type = MT_MEMORY_RWX; 1389 1390 create_mapping(&map); 1391 1392 if (kernel_x_end < end) { 1393 map.pfn = __phys_to_pfn(kernel_x_end); 1394 map.virtual = __phys_to_virt(kernel_x_end); 1395 map.length = end - kernel_x_end; 1396 map.type = MT_MEMORY_RW; 1397 1398 create_mapping(&map); 1399 } 1400 } 1401 } 1402 } 1403 1404 #ifdef CONFIG_ARM_LPAE 1405 /* 1406 * early_paging_init() recreates boot time page table setup, allowing machines 1407 * to switch over to a high (>4G) address space on LPAE systems 1408 */ 1409 void __init early_paging_init(const struct machine_desc *mdesc, 1410 struct proc_info_list *procinfo) 1411 { 1412 pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; 1413 unsigned long map_start, map_end; 1414 pgd_t *pgd0, *pgdk; 1415 pud_t *pud0, *pudk, *pud_start; 1416 pmd_t *pmd0, *pmdk; 1417 phys_addr_t phys; 1418 int i; 1419 1420 if (!(mdesc->init_meminfo)) 1421 return; 1422 1423 /* remap kernel code and data */ 1424 map_start = init_mm.start_code; 1425 map_end = init_mm.brk; 1426 1427 /* get a handle on things... */ 1428 pgd0 = pgd_offset_k(0); 1429 pud_start = pud0 = pud_offset(pgd0, 0); 1430 pmd0 = pmd_offset(pud0, 0); 1431 1432 pgdk = pgd_offset_k(map_start); 1433 pudk = pud_offset(pgdk, map_start); 1434 pmdk = pmd_offset(pudk, map_start); 1435 1436 mdesc->init_meminfo(); 1437 1438 /* Run the patch stub to update the constants */ 1439 fixup_pv_table(&__pv_table_begin, 1440 (&__pv_table_end - &__pv_table_begin) << 2); 1441 1442 /* 1443 * Cache cleaning operations for self-modifying code 1444 * We should clean the entries by MVA but running a 1445 * for loop over every pv_table entry pointer would 1446 * just complicate the code. 1447 */ 1448 flush_cache_louis(); 1449 dsb(); 1450 isb(); 1451 1452 /* remap level 1 table */ 1453 for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { 1454 set_pud(pud0, 1455 __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); 1456 pmd0 += PTRS_PER_PMD; 1457 } 1458 1459 /* remap pmds for kernel mapping */ 1460 phys = __pa(map_start) & PMD_MASK; 1461 do { 1462 *pmdk++ = __pmd(phys | pmdprot); 1463 phys += PMD_SIZE; 1464 } while (phys < map_end); 1465 1466 flush_cache_all(); 1467 cpu_switch_mm(pgd0, &init_mm); 1468 cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); 1469 local_flush_bp_all(); 1470 local_flush_tlb_all(); 1471 } 1472 1473 #else 1474 1475 void __init early_paging_init(const struct machine_desc *mdesc, 1476 struct proc_info_list *procinfo) 1477 { 1478 if (mdesc->init_meminfo) 1479 mdesc->init_meminfo(); 1480 } 1481 1482 #endif 1483 1484 /* 1485 * paging_init() sets up the page tables, initialises the zone memory 1486 * maps, and sets up the zero page, bad page and bad page tables. 1487 */ 1488 void __init paging_init(const struct machine_desc *mdesc) 1489 { 1490 void *zero_page; 1491 1492 build_mem_type_table(); 1493 prepare_page_table(); 1494 map_lowmem(); 1495 dma_contiguous_remap(); 1496 devicemaps_init(mdesc); 1497 kmap_init(); 1498 tcm_init(); 1499 1500 top_pmd = pmd_off_k(0xffff0000); 1501 1502 /* allocate the zero page. */ 1503 zero_page = early_alloc(PAGE_SIZE); 1504 1505 bootmem_init(); 1506 1507 empty_zero_page = virt_to_page(zero_page); 1508 __flush_dcache_page(NULL, empty_zero_page); 1509 } 1510