1 /* 2 * linux/arch/arm/mm/mmu.c 3 * 4 * Copyright (C) 1995-2005 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/module.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/mman.h> 15 #include <linux/nodemask.h> 16 #include <linux/memblock.h> 17 #include <linux/fs.h> 18 #include <linux/vmalloc.h> 19 20 #include <asm/cputype.h> 21 #include <asm/sections.h> 22 #include <asm/cachetype.h> 23 #include <asm/setup.h> 24 #include <asm/sizes.h> 25 #include <asm/smp_plat.h> 26 #include <asm/tlb.h> 27 #include <asm/highmem.h> 28 #include <asm/traps.h> 29 30 #include <asm/mach/arch.h> 31 #include <asm/mach/map.h> 32 33 #include "mm.h" 34 35 /* 36 * empty_zero_page is a special page that is used for 37 * zero-initialized data and COW. 38 */ 39 struct page *empty_zero_page; 40 EXPORT_SYMBOL(empty_zero_page); 41 42 /* 43 * The pmd table for the upper-most set of pages. 44 */ 45 pmd_t *top_pmd; 46 47 #define CPOLICY_UNCACHED 0 48 #define CPOLICY_BUFFERED 1 49 #define CPOLICY_WRITETHROUGH 2 50 #define CPOLICY_WRITEBACK 3 51 #define CPOLICY_WRITEALLOC 4 52 53 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; 54 static unsigned int ecc_mask __initdata = 0; 55 pgprot_t pgprot_user; 56 pgprot_t pgprot_kernel; 57 58 EXPORT_SYMBOL(pgprot_user); 59 EXPORT_SYMBOL(pgprot_kernel); 60 61 struct cachepolicy { 62 const char policy[16]; 63 unsigned int cr_mask; 64 pmdval_t pmd; 65 pteval_t pte; 66 }; 67 68 static struct cachepolicy cache_policies[] __initdata = { 69 { 70 .policy = "uncached", 71 .cr_mask = CR_W|CR_C, 72 .pmd = PMD_SECT_UNCACHED, 73 .pte = L_PTE_MT_UNCACHED, 74 }, { 75 .policy = "buffered", 76 .cr_mask = CR_C, 77 .pmd = PMD_SECT_BUFFERED, 78 .pte = L_PTE_MT_BUFFERABLE, 79 }, { 80 .policy = "writethrough", 81 .cr_mask = 0, 82 .pmd = PMD_SECT_WT, 83 .pte = L_PTE_MT_WRITETHROUGH, 84 }, { 85 .policy = "writeback", 86 .cr_mask = 0, 87 .pmd = PMD_SECT_WB, 88 .pte = L_PTE_MT_WRITEBACK, 89 }, { 90 .policy = "writealloc", 91 .cr_mask = 0, 92 .pmd = PMD_SECT_WBWA, 93 .pte = L_PTE_MT_WRITEALLOC, 94 } 95 }; 96 97 /* 98 * These are useful for identifying cache coherency 99 * problems by allowing the cache or the cache and 100 * writebuffer to be turned off. (Note: the write 101 * buffer should not be on and the cache off). 102 */ 103 static int __init early_cachepolicy(char *p) 104 { 105 int i; 106 107 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { 108 int len = strlen(cache_policies[i].policy); 109 110 if (memcmp(p, cache_policies[i].policy, len) == 0) { 111 cachepolicy = i; 112 cr_alignment &= ~cache_policies[i].cr_mask; 113 cr_no_alignment &= ~cache_policies[i].cr_mask; 114 break; 115 } 116 } 117 if (i == ARRAY_SIZE(cache_policies)) 118 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); 119 /* 120 * This restriction is partly to do with the way we boot; it is 121 * unpredictable to have memory mapped using two different sets of 122 * memory attributes (shared, type, and cache attribs). We can not 123 * change these attributes once the initial assembly has setup the 124 * page tables. 125 */ 126 if (cpu_architecture() >= CPU_ARCH_ARMv6) { 127 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); 128 cachepolicy = CPOLICY_WRITEBACK; 129 } 130 flush_cache_all(); 131 set_cr(cr_alignment); 132 return 0; 133 } 134 early_param("cachepolicy", early_cachepolicy); 135 136 static int __init early_nocache(char *__unused) 137 { 138 char *p = "buffered"; 139 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); 140 early_cachepolicy(p); 141 return 0; 142 } 143 early_param("nocache", early_nocache); 144 145 static int __init early_nowrite(char *__unused) 146 { 147 char *p = "uncached"; 148 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); 149 early_cachepolicy(p); 150 return 0; 151 } 152 early_param("nowb", early_nowrite); 153 154 #ifndef CONFIG_ARM_LPAE 155 static int __init early_ecc(char *p) 156 { 157 if (memcmp(p, "on", 2) == 0) 158 ecc_mask = PMD_PROTECTION; 159 else if (memcmp(p, "off", 3) == 0) 160 ecc_mask = 0; 161 return 0; 162 } 163 early_param("ecc", early_ecc); 164 #endif 165 166 static int __init noalign_setup(char *__unused) 167 { 168 cr_alignment &= ~CR_A; 169 cr_no_alignment &= ~CR_A; 170 set_cr(cr_alignment); 171 return 1; 172 } 173 __setup("noalign", noalign_setup); 174 175 #ifndef CONFIG_SMP 176 void adjust_cr(unsigned long mask, unsigned long set) 177 { 178 unsigned long flags; 179 180 mask &= ~CR_A; 181 182 set &= mask; 183 184 local_irq_save(flags); 185 186 cr_no_alignment = (cr_no_alignment & ~mask) | set; 187 cr_alignment = (cr_alignment & ~mask) | set; 188 189 set_cr((get_cr() & ~mask) | set); 190 191 local_irq_restore(flags); 192 } 193 #endif 194 195 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN 196 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE 197 198 static struct mem_type mem_types[] = { 199 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ 200 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | 201 L_PTE_SHARED, 202 .prot_l1 = PMD_TYPE_TABLE, 203 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, 204 .domain = DOMAIN_IO, 205 }, 206 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ 207 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, 208 .prot_l1 = PMD_TYPE_TABLE, 209 .prot_sect = PROT_SECT_DEVICE, 210 .domain = DOMAIN_IO, 211 }, 212 [MT_DEVICE_CACHED] = { /* ioremap_cached */ 213 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, 214 .prot_l1 = PMD_TYPE_TABLE, 215 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, 216 .domain = DOMAIN_IO, 217 }, 218 [MT_DEVICE_WC] = { /* ioremap_wc */ 219 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, 220 .prot_l1 = PMD_TYPE_TABLE, 221 .prot_sect = PROT_SECT_DEVICE, 222 .domain = DOMAIN_IO, 223 }, 224 [MT_UNCACHED] = { 225 .prot_pte = PROT_PTE_DEVICE, 226 .prot_l1 = PMD_TYPE_TABLE, 227 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 228 .domain = DOMAIN_IO, 229 }, 230 [MT_CACHECLEAN] = { 231 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 232 .domain = DOMAIN_KERNEL, 233 }, 234 #ifndef CONFIG_ARM_LPAE 235 [MT_MINICLEAN] = { 236 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, 237 .domain = DOMAIN_KERNEL, 238 }, 239 #endif 240 [MT_LOW_VECTORS] = { 241 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 242 L_PTE_RDONLY, 243 .prot_l1 = PMD_TYPE_TABLE, 244 .domain = DOMAIN_USER, 245 }, 246 [MT_HIGH_VECTORS] = { 247 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 248 L_PTE_USER | L_PTE_RDONLY, 249 .prot_l1 = PMD_TYPE_TABLE, 250 .domain = DOMAIN_USER, 251 }, 252 [MT_MEMORY] = { 253 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, 254 .prot_l1 = PMD_TYPE_TABLE, 255 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 256 .domain = DOMAIN_KERNEL, 257 }, 258 [MT_ROM] = { 259 .prot_sect = PMD_TYPE_SECT, 260 .domain = DOMAIN_KERNEL, 261 }, 262 [MT_MEMORY_NONCACHED] = { 263 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 264 L_PTE_MT_BUFFERABLE, 265 .prot_l1 = PMD_TYPE_TABLE, 266 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 267 .domain = DOMAIN_KERNEL, 268 }, 269 [MT_MEMORY_DTCM] = { 270 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 271 L_PTE_XN, 272 .prot_l1 = PMD_TYPE_TABLE, 273 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 274 .domain = DOMAIN_KERNEL, 275 }, 276 [MT_MEMORY_ITCM] = { 277 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, 278 .prot_l1 = PMD_TYPE_TABLE, 279 .domain = DOMAIN_KERNEL, 280 }, 281 [MT_MEMORY_SO] = { 282 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 283 L_PTE_MT_UNCACHED, 284 .prot_l1 = PMD_TYPE_TABLE, 285 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | 286 PMD_SECT_UNCACHED | PMD_SECT_XN, 287 .domain = DOMAIN_KERNEL, 288 }, 289 }; 290 291 const struct mem_type *get_mem_type(unsigned int type) 292 { 293 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; 294 } 295 EXPORT_SYMBOL(get_mem_type); 296 297 /* 298 * Adjust the PMD section entries according to the CPU in use. 299 */ 300 static void __init build_mem_type_table(void) 301 { 302 struct cachepolicy *cp; 303 unsigned int cr = get_cr(); 304 pteval_t user_pgprot, kern_pgprot, vecs_pgprot; 305 int cpu_arch = cpu_architecture(); 306 int i; 307 308 if (cpu_arch < CPU_ARCH_ARMv6) { 309 #if defined(CONFIG_CPU_DCACHE_DISABLE) 310 if (cachepolicy > CPOLICY_BUFFERED) 311 cachepolicy = CPOLICY_BUFFERED; 312 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) 313 if (cachepolicy > CPOLICY_WRITETHROUGH) 314 cachepolicy = CPOLICY_WRITETHROUGH; 315 #endif 316 } 317 if (cpu_arch < CPU_ARCH_ARMv5) { 318 if (cachepolicy >= CPOLICY_WRITEALLOC) 319 cachepolicy = CPOLICY_WRITEBACK; 320 ecc_mask = 0; 321 } 322 if (is_smp()) 323 cachepolicy = CPOLICY_WRITEALLOC; 324 325 /* 326 * Strip out features not present on earlier architectures. 327 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those 328 * without extended page tables don't have the 'Shared' bit. 329 */ 330 if (cpu_arch < CPU_ARCH_ARMv5) 331 for (i = 0; i < ARRAY_SIZE(mem_types); i++) 332 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); 333 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) 334 for (i = 0; i < ARRAY_SIZE(mem_types); i++) 335 mem_types[i].prot_sect &= ~PMD_SECT_S; 336 337 /* 338 * ARMv5 and lower, bit 4 must be set for page tables (was: cache 339 * "update-able on write" bit on ARM610). However, Xscale and 340 * Xscale3 require this bit to be cleared. 341 */ 342 if (cpu_is_xscale() || cpu_is_xsc3()) { 343 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 344 mem_types[i].prot_sect &= ~PMD_BIT4; 345 mem_types[i].prot_l1 &= ~PMD_BIT4; 346 } 347 } else if (cpu_arch < CPU_ARCH_ARMv6) { 348 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 349 if (mem_types[i].prot_l1) 350 mem_types[i].prot_l1 |= PMD_BIT4; 351 if (mem_types[i].prot_sect) 352 mem_types[i].prot_sect |= PMD_BIT4; 353 } 354 } 355 356 /* 357 * Mark the device areas according to the CPU/architecture. 358 */ 359 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { 360 if (!cpu_is_xsc3()) { 361 /* 362 * Mark device regions on ARMv6+ as execute-never 363 * to prevent speculative instruction fetches. 364 */ 365 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; 366 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; 367 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; 368 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; 369 } 370 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { 371 /* 372 * For ARMv7 with TEX remapping, 373 * - shared device is SXCB=1100 374 * - nonshared device is SXCB=0100 375 * - write combine device mem is SXCB=0001 376 * (Uncached Normal memory) 377 */ 378 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); 379 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); 380 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; 381 } else if (cpu_is_xsc3()) { 382 /* 383 * For Xscale3, 384 * - shared device is TEXCB=00101 385 * - nonshared device is TEXCB=01000 386 * - write combine device mem is TEXCB=00100 387 * (Inner/Outer Uncacheable in xsc3 parlance) 388 */ 389 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; 390 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); 391 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); 392 } else { 393 /* 394 * For ARMv6 and ARMv7 without TEX remapping, 395 * - shared device is TEXCB=00001 396 * - nonshared device is TEXCB=01000 397 * - write combine device mem is TEXCB=00100 398 * (Uncached Normal in ARMv6 parlance). 399 */ 400 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; 401 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); 402 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); 403 } 404 } else { 405 /* 406 * On others, write combining is "Uncached/Buffered" 407 */ 408 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; 409 } 410 411 /* 412 * Now deal with the memory-type mappings 413 */ 414 cp = &cache_policies[cachepolicy]; 415 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; 416 417 /* 418 * Only use write-through for non-SMP systems 419 */ 420 if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) 421 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; 422 423 /* 424 * Enable CPU-specific coherency if supported. 425 * (Only available on XSC3 at the moment.) 426 */ 427 if (arch_is_coherent() && cpu_is_xsc3()) { 428 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; 429 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; 430 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; 431 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; 432 } 433 /* 434 * ARMv6 and above have extended page tables. 435 */ 436 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { 437 #ifndef CONFIG_ARM_LPAE 438 /* 439 * Mark cache clean areas and XIP ROM read only 440 * from SVC mode and no access from userspace. 441 */ 442 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 443 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 444 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 445 #endif 446 447 if (is_smp()) { 448 /* 449 * Mark memory with the "shared" attribute 450 * for SMP systems 451 */ 452 user_pgprot |= L_PTE_SHARED; 453 kern_pgprot |= L_PTE_SHARED; 454 vecs_pgprot |= L_PTE_SHARED; 455 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; 456 mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; 457 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; 458 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; 459 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; 460 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; 461 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; 462 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; 463 } 464 } 465 466 /* 467 * Non-cacheable Normal - intended for memory areas that must 468 * not cause dirty cache line writebacks when used 469 */ 470 if (cpu_arch >= CPU_ARCH_ARMv6) { 471 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { 472 /* Non-cacheable Normal is XCB = 001 */ 473 mem_types[MT_MEMORY_NONCACHED].prot_sect |= 474 PMD_SECT_BUFFERED; 475 } else { 476 /* For both ARMv6 and non-TEX-remapping ARMv7 */ 477 mem_types[MT_MEMORY_NONCACHED].prot_sect |= 478 PMD_SECT_TEX(1); 479 } 480 } else { 481 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; 482 } 483 484 #ifdef CONFIG_ARM_LPAE 485 /* 486 * Do not generate access flag faults for the kernel mappings. 487 */ 488 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 489 mem_types[i].prot_pte |= PTE_EXT_AF; 490 mem_types[i].prot_sect |= PMD_SECT_AF; 491 } 492 kern_pgprot |= PTE_EXT_AF; 493 vecs_pgprot |= PTE_EXT_AF; 494 #endif 495 496 for (i = 0; i < 16; i++) { 497 unsigned long v = pgprot_val(protection_map[i]); 498 protection_map[i] = __pgprot(v | user_pgprot); 499 } 500 501 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; 502 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; 503 504 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); 505 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | 506 L_PTE_DIRTY | kern_pgprot); 507 508 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; 509 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; 510 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; 511 mem_types[MT_MEMORY].prot_pte |= kern_pgprot; 512 mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; 513 mem_types[MT_ROM].prot_sect |= cp->pmd; 514 515 switch (cp->pmd) { 516 case PMD_SECT_WT: 517 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; 518 break; 519 case PMD_SECT_WB: 520 case PMD_SECT_WBWA: 521 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; 522 break; 523 } 524 printk("Memory policy: ECC %sabled, Data cache %s\n", 525 ecc_mask ? "en" : "dis", cp->policy); 526 527 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 528 struct mem_type *t = &mem_types[i]; 529 if (t->prot_l1) 530 t->prot_l1 |= PMD_DOMAIN(t->domain); 531 if (t->prot_sect) 532 t->prot_sect |= PMD_DOMAIN(t->domain); 533 } 534 } 535 536 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE 537 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 538 unsigned long size, pgprot_t vma_prot) 539 { 540 if (!pfn_valid(pfn)) 541 return pgprot_noncached(vma_prot); 542 else if (file->f_flags & O_SYNC) 543 return pgprot_writecombine(vma_prot); 544 return vma_prot; 545 } 546 EXPORT_SYMBOL(phys_mem_access_prot); 547 #endif 548 549 #define vectors_base() (vectors_high() ? 0xffff0000 : 0) 550 551 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) 552 { 553 void *ptr = __va(memblock_alloc(sz, align)); 554 memset(ptr, 0, sz); 555 return ptr; 556 } 557 558 static void __init *early_alloc(unsigned long sz) 559 { 560 return early_alloc_aligned(sz, sz); 561 } 562 563 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) 564 { 565 if (pmd_none(*pmd)) { 566 pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); 567 __pmd_populate(pmd, __pa(pte), prot); 568 } 569 BUG_ON(pmd_bad(*pmd)); 570 return pte_offset_kernel(pmd, addr); 571 } 572 573 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, 574 unsigned long end, unsigned long pfn, 575 const struct mem_type *type) 576 { 577 pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1); 578 do { 579 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); 580 pfn++; 581 } while (pte++, addr += PAGE_SIZE, addr != end); 582 } 583 584 static void __init alloc_init_section(pud_t *pud, unsigned long addr, 585 unsigned long end, phys_addr_t phys, 586 const struct mem_type *type) 587 { 588 pmd_t *pmd = pmd_offset(pud, addr); 589 590 /* 591 * Try a section mapping - end, addr and phys must all be aligned 592 * to a section boundary. Note that PMDs refer to the individual 593 * L1 entries, whereas PGDs refer to a group of L1 entries making 594 * up one logical pointer to an L2 table. 595 */ 596 if (((addr | end | phys) & ~SECTION_MASK) == 0) { 597 pmd_t *p = pmd; 598 599 #ifndef CONFIG_ARM_LPAE 600 if (addr & SECTION_SIZE) 601 pmd++; 602 #endif 603 604 do { 605 *pmd = __pmd(phys | type->prot_sect); 606 phys += SECTION_SIZE; 607 } while (pmd++, addr += SECTION_SIZE, addr != end); 608 609 flush_pmd_entry(p); 610 } else { 611 /* 612 * No need to loop; pte's aren't interested in the 613 * individual L1 entries. 614 */ 615 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); 616 } 617 } 618 619 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, 620 unsigned long phys, const struct mem_type *type) 621 { 622 pud_t *pud = pud_offset(pgd, addr); 623 unsigned long next; 624 625 do { 626 next = pud_addr_end(addr, end); 627 alloc_init_section(pud, addr, next, phys, type); 628 phys += next - addr; 629 } while (pud++, addr = next, addr != end); 630 } 631 632 #ifndef CONFIG_ARM_LPAE 633 static void __init create_36bit_mapping(struct map_desc *md, 634 const struct mem_type *type) 635 { 636 unsigned long addr, length, end; 637 phys_addr_t phys; 638 pgd_t *pgd; 639 640 addr = md->virtual; 641 phys = __pfn_to_phys(md->pfn); 642 length = PAGE_ALIGN(md->length); 643 644 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { 645 printk(KERN_ERR "MM: CPU does not support supersection " 646 "mapping for 0x%08llx at 0x%08lx\n", 647 (long long)__pfn_to_phys((u64)md->pfn), addr); 648 return; 649 } 650 651 /* N.B. ARMv6 supersections are only defined to work with domain 0. 652 * Since domain assignments can in fact be arbitrary, the 653 * 'domain == 0' check below is required to insure that ARMv6 654 * supersections are only allocated for domain 0 regardless 655 * of the actual domain assignments in use. 656 */ 657 if (type->domain) { 658 printk(KERN_ERR "MM: invalid domain in supersection " 659 "mapping for 0x%08llx at 0x%08lx\n", 660 (long long)__pfn_to_phys((u64)md->pfn), addr); 661 return; 662 } 663 664 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { 665 printk(KERN_ERR "MM: cannot create mapping for 0x%08llx" 666 " at 0x%08lx invalid alignment\n", 667 (long long)__pfn_to_phys((u64)md->pfn), addr); 668 return; 669 } 670 671 /* 672 * Shift bits [35:32] of address into bits [23:20] of PMD 673 * (See ARMv6 spec). 674 */ 675 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); 676 677 pgd = pgd_offset_k(addr); 678 end = addr + length; 679 do { 680 pud_t *pud = pud_offset(pgd, addr); 681 pmd_t *pmd = pmd_offset(pud, addr); 682 int i; 683 684 for (i = 0; i < 16; i++) 685 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); 686 687 addr += SUPERSECTION_SIZE; 688 phys += SUPERSECTION_SIZE; 689 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; 690 } while (addr != end); 691 } 692 #endif /* !CONFIG_ARM_LPAE */ 693 694 /* 695 * Create the page directory entries and any necessary 696 * page tables for the mapping specified by `md'. We 697 * are able to cope here with varying sizes and address 698 * offsets, and we take full advantage of sections and 699 * supersections. 700 */ 701 static void __init create_mapping(struct map_desc *md) 702 { 703 unsigned long addr, length, end; 704 phys_addr_t phys; 705 const struct mem_type *type; 706 pgd_t *pgd; 707 708 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { 709 printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx" 710 " at 0x%08lx in user region\n", 711 (long long)__pfn_to_phys((u64)md->pfn), md->virtual); 712 return; 713 } 714 715 if ((md->type == MT_DEVICE || md->type == MT_ROM) && 716 md->virtual >= PAGE_OFFSET && 717 (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { 718 printk(KERN_WARNING "BUG: mapping for 0x%08llx" 719 " at 0x%08lx out of vmalloc space\n", 720 (long long)__pfn_to_phys((u64)md->pfn), md->virtual); 721 } 722 723 type = &mem_types[md->type]; 724 725 #ifndef CONFIG_ARM_LPAE 726 /* 727 * Catch 36-bit addresses 728 */ 729 if (md->pfn >= 0x100000) { 730 create_36bit_mapping(md, type); 731 return; 732 } 733 #endif 734 735 addr = md->virtual & PAGE_MASK; 736 phys = __pfn_to_phys(md->pfn); 737 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); 738 739 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { 740 printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " 741 "be mapped using pages, ignoring.\n", 742 (long long)__pfn_to_phys(md->pfn), addr); 743 return; 744 } 745 746 pgd = pgd_offset_k(addr); 747 end = addr + length; 748 do { 749 unsigned long next = pgd_addr_end(addr, end); 750 751 alloc_init_pud(pgd, addr, next, phys, type); 752 753 phys += next - addr; 754 addr = next; 755 } while (pgd++, addr != end); 756 } 757 758 /* 759 * Create the architecture specific mappings 760 */ 761 void __init iotable_init(struct map_desc *io_desc, int nr) 762 { 763 struct map_desc *md; 764 struct vm_struct *vm; 765 766 if (!nr) 767 return; 768 769 vm = early_alloc_aligned(sizeof(*vm) * nr, __alignof__(*vm)); 770 771 for (md = io_desc; nr; md++, nr--) { 772 create_mapping(md); 773 vm->addr = (void *)(md->virtual & PAGE_MASK); 774 vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); 775 vm->phys_addr = __pfn_to_phys(md->pfn); 776 vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; 777 vm->flags |= VM_ARM_MTYPE(md->type); 778 vm->caller = iotable_init; 779 vm_area_add_early(vm++); 780 } 781 } 782 783 static void * __initdata vmalloc_min = 784 (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); 785 786 /* 787 * vmalloc=size forces the vmalloc area to be exactly 'size' 788 * bytes. This can be used to increase (or decrease) the vmalloc 789 * area - the default is 240m. 790 */ 791 static int __init early_vmalloc(char *arg) 792 { 793 unsigned long vmalloc_reserve = memparse(arg, NULL); 794 795 if (vmalloc_reserve < SZ_16M) { 796 vmalloc_reserve = SZ_16M; 797 printk(KERN_WARNING 798 "vmalloc area too small, limiting to %luMB\n", 799 vmalloc_reserve >> 20); 800 } 801 802 if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) { 803 vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M); 804 printk(KERN_WARNING 805 "vmalloc area is too big, limiting to %luMB\n", 806 vmalloc_reserve >> 20); 807 } 808 809 vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve); 810 return 0; 811 } 812 early_param("vmalloc", early_vmalloc); 813 814 static phys_addr_t lowmem_limit __initdata = 0; 815 816 void __init sanity_check_meminfo(void) 817 { 818 int i, j, highmem = 0; 819 820 for (i = 0, j = 0; i < meminfo.nr_banks; i++) { 821 struct membank *bank = &meminfo.bank[j]; 822 *bank = meminfo.bank[i]; 823 824 if (bank->start > ULONG_MAX) 825 highmem = 1; 826 827 #ifdef CONFIG_HIGHMEM 828 if (__va(bank->start) >= vmalloc_min || 829 __va(bank->start) < (void *)PAGE_OFFSET) 830 highmem = 1; 831 832 bank->highmem = highmem; 833 834 /* 835 * Split those memory banks which are partially overlapping 836 * the vmalloc area greatly simplifying things later. 837 */ 838 if (!highmem && __va(bank->start) < vmalloc_min && 839 bank->size > vmalloc_min - __va(bank->start)) { 840 if (meminfo.nr_banks >= NR_BANKS) { 841 printk(KERN_CRIT "NR_BANKS too low, " 842 "ignoring high memory\n"); 843 } else { 844 memmove(bank + 1, bank, 845 (meminfo.nr_banks - i) * sizeof(*bank)); 846 meminfo.nr_banks++; 847 i++; 848 bank[1].size -= vmalloc_min - __va(bank->start); 849 bank[1].start = __pa(vmalloc_min - 1) + 1; 850 bank[1].highmem = highmem = 1; 851 j++; 852 } 853 bank->size = vmalloc_min - __va(bank->start); 854 } 855 #else 856 bank->highmem = highmem; 857 858 /* 859 * Highmem banks not allowed with !CONFIG_HIGHMEM. 860 */ 861 if (highmem) { 862 printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " 863 "(!CONFIG_HIGHMEM).\n", 864 (unsigned long long)bank->start, 865 (unsigned long long)bank->start + bank->size - 1); 866 continue; 867 } 868 869 /* 870 * Check whether this memory bank would entirely overlap 871 * the vmalloc area. 872 */ 873 if (__va(bank->start) >= vmalloc_min || 874 __va(bank->start) < (void *)PAGE_OFFSET) { 875 printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " 876 "(vmalloc region overlap).\n", 877 (unsigned long long)bank->start, 878 (unsigned long long)bank->start + bank->size - 1); 879 continue; 880 } 881 882 /* 883 * Check whether this memory bank would partially overlap 884 * the vmalloc area. 885 */ 886 if (__va(bank->start + bank->size) > vmalloc_min || 887 __va(bank->start + bank->size) < __va(bank->start)) { 888 unsigned long newsize = vmalloc_min - __va(bank->start); 889 printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " 890 "to -%.8llx (vmalloc region overlap).\n", 891 (unsigned long long)bank->start, 892 (unsigned long long)bank->start + bank->size - 1, 893 (unsigned long long)bank->start + newsize - 1); 894 bank->size = newsize; 895 } 896 #endif 897 if (!bank->highmem && bank->start + bank->size > lowmem_limit) 898 lowmem_limit = bank->start + bank->size; 899 900 j++; 901 } 902 #ifdef CONFIG_HIGHMEM 903 if (highmem) { 904 const char *reason = NULL; 905 906 if (cache_is_vipt_aliasing()) { 907 /* 908 * Interactions between kmap and other mappings 909 * make highmem support with aliasing VIPT caches 910 * rather difficult. 911 */ 912 reason = "with VIPT aliasing cache"; 913 } 914 if (reason) { 915 printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", 916 reason); 917 while (j > 0 && meminfo.bank[j - 1].highmem) 918 j--; 919 } 920 } 921 #endif 922 meminfo.nr_banks = j; 923 high_memory = __va(lowmem_limit - 1) + 1; 924 memblock_set_current_limit(lowmem_limit); 925 } 926 927 static inline void prepare_page_table(void) 928 { 929 unsigned long addr; 930 phys_addr_t end; 931 932 /* 933 * Clear out all the mappings below the kernel image. 934 */ 935 for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) 936 pmd_clear(pmd_off_k(addr)); 937 938 #ifdef CONFIG_XIP_KERNEL 939 /* The XIP kernel is mapped in the module area -- skip over it */ 940 addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; 941 #endif 942 for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) 943 pmd_clear(pmd_off_k(addr)); 944 945 /* 946 * Find the end of the first block of lowmem. 947 */ 948 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; 949 if (end >= lowmem_limit) 950 end = lowmem_limit; 951 952 /* 953 * Clear out all the kernel space mappings, except for the first 954 * memory bank, up to the vmalloc region. 955 */ 956 for (addr = __phys_to_virt(end); 957 addr < VMALLOC_START; addr += PMD_SIZE) 958 pmd_clear(pmd_off_k(addr)); 959 } 960 961 #ifdef CONFIG_ARM_LPAE 962 /* the first page is reserved for pgd */ 963 #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ 964 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) 965 #else 966 #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) 967 #endif 968 969 /* 970 * Reserve the special regions of memory 971 */ 972 void __init arm_mm_memblock_reserve(void) 973 { 974 /* 975 * Reserve the page tables. These are already in use, 976 * and can only be in node 0. 977 */ 978 memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); 979 980 #ifdef CONFIG_SA1111 981 /* 982 * Because of the SA1111 DMA bug, we want to preserve our 983 * precious DMA-able memory... 984 */ 985 memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); 986 #endif 987 } 988 989 /* 990 * Set up the device mappings. Since we clear out the page tables for all 991 * mappings above VMALLOC_START, we will remove any debug device mappings. 992 * This means you have to be careful how you debug this function, or any 993 * called function. This means you can't use any function or debugging 994 * method which may touch any device, otherwise the kernel _will_ crash. 995 */ 996 static void __init devicemaps_init(struct machine_desc *mdesc) 997 { 998 struct map_desc map; 999 unsigned long addr; 1000 1001 /* 1002 * Allocate the vector page early. 1003 */ 1004 vectors_page = early_alloc(PAGE_SIZE); 1005 1006 for (addr = VMALLOC_START; addr; addr += PMD_SIZE) 1007 pmd_clear(pmd_off_k(addr)); 1008 1009 /* 1010 * Map the kernel if it is XIP. 1011 * It is always first in the modulearea. 1012 */ 1013 #ifdef CONFIG_XIP_KERNEL 1014 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); 1015 map.virtual = MODULES_VADDR; 1016 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; 1017 map.type = MT_ROM; 1018 create_mapping(&map); 1019 #endif 1020 1021 /* 1022 * Map the cache flushing regions. 1023 */ 1024 #ifdef FLUSH_BASE 1025 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); 1026 map.virtual = FLUSH_BASE; 1027 map.length = SZ_1M; 1028 map.type = MT_CACHECLEAN; 1029 create_mapping(&map); 1030 #endif 1031 #ifdef FLUSH_BASE_MINICACHE 1032 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); 1033 map.virtual = FLUSH_BASE_MINICACHE; 1034 map.length = SZ_1M; 1035 map.type = MT_MINICLEAN; 1036 create_mapping(&map); 1037 #endif 1038 1039 /* 1040 * Create a mapping for the machine vectors at the high-vectors 1041 * location (0xffff0000). If we aren't using high-vectors, also 1042 * create a mapping at the low-vectors virtual address. 1043 */ 1044 map.pfn = __phys_to_pfn(virt_to_phys(vectors_page)); 1045 map.virtual = 0xffff0000; 1046 map.length = PAGE_SIZE; 1047 map.type = MT_HIGH_VECTORS; 1048 create_mapping(&map); 1049 1050 if (!vectors_high()) { 1051 map.virtual = 0; 1052 map.type = MT_LOW_VECTORS; 1053 create_mapping(&map); 1054 } 1055 1056 /* 1057 * Ask the machine support to map in the statically mapped devices. 1058 */ 1059 if (mdesc->map_io) 1060 mdesc->map_io(); 1061 1062 /* 1063 * Finally flush the caches and tlb to ensure that we're in a 1064 * consistent state wrt the writebuffer. This also ensures that 1065 * any write-allocated cache lines in the vector page are written 1066 * back. After this point, we can start to touch devices again. 1067 */ 1068 local_flush_tlb_all(); 1069 flush_cache_all(); 1070 } 1071 1072 static void __init kmap_init(void) 1073 { 1074 #ifdef CONFIG_HIGHMEM 1075 pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), 1076 PKMAP_BASE, _PAGE_KERNEL_TABLE); 1077 #endif 1078 } 1079 1080 static void __init map_lowmem(void) 1081 { 1082 struct memblock_region *reg; 1083 1084 /* Map all the lowmem memory banks. */ 1085 for_each_memblock(memory, reg) { 1086 phys_addr_t start = reg->base; 1087 phys_addr_t end = start + reg->size; 1088 struct map_desc map; 1089 1090 if (end > lowmem_limit) 1091 end = lowmem_limit; 1092 if (start >= end) 1093 break; 1094 1095 map.pfn = __phys_to_pfn(start); 1096 map.virtual = __phys_to_virt(start); 1097 map.length = end - start; 1098 map.type = MT_MEMORY; 1099 1100 create_mapping(&map); 1101 } 1102 } 1103 1104 /* 1105 * paging_init() sets up the page tables, initialises the zone memory 1106 * maps, and sets up the zero page, bad page and bad page tables. 1107 */ 1108 void __init paging_init(struct machine_desc *mdesc) 1109 { 1110 void *zero_page; 1111 1112 memblock_set_current_limit(lowmem_limit); 1113 1114 build_mem_type_table(); 1115 prepare_page_table(); 1116 map_lowmem(); 1117 devicemaps_init(mdesc); 1118 kmap_init(); 1119 1120 top_pmd = pmd_off_k(0xffff0000); 1121 1122 /* allocate the zero page. */ 1123 zero_page = early_alloc(PAGE_SIZE); 1124 1125 bootmem_init(); 1126 1127 empty_zero_page = virt_to_page(zero_page); 1128 __flush_dcache_page(NULL, empty_zero_page); 1129 } 1130