1 /* 2 * linux/arch/arm/mm/init.c 3 * 4 * Copyright (C) 1995-2005 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/kernel.h> 11 #include <linux/errno.h> 12 #include <linux/ptrace.h> 13 #include <linux/swap.h> 14 #include <linux/init.h> 15 #include <linux/bootmem.h> 16 #include <linux/mman.h> 17 #include <linux/nodemask.h> 18 #include <linux/initrd.h> 19 20 #include <asm/mach-types.h> 21 #include <asm/setup.h> 22 #include <asm/sizes.h> 23 #include <asm/tlb.h> 24 25 #include <asm/mach/arch.h> 26 #include <asm/mach/map.h> 27 28 #include "mm.h" 29 30 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 31 32 extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; 33 extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; 34 extern unsigned long phys_initrd_start; 35 extern unsigned long phys_initrd_size; 36 37 /* 38 * The sole use of this is to pass memory configuration 39 * data from paging_init to mem_init. 40 */ 41 static struct meminfo meminfo __initdata = { 0, }; 42 43 /* 44 * empty_zero_page is a special page that is used for 45 * zero-initialized data and COW. 46 */ 47 struct page *empty_zero_page; 48 49 /* 50 * The pmd table for the upper-most set of pages. 51 */ 52 pmd_t *top_pmd; 53 54 void show_mem(void) 55 { 56 int free = 0, total = 0, reserved = 0; 57 int shared = 0, cached = 0, slab = 0, node; 58 59 printk("Mem-info:\n"); 60 show_free_areas(); 61 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 62 63 for_each_online_node(node) { 64 struct page *page, *end; 65 66 page = NODE_MEM_MAP(node); 67 end = page + NODE_DATA(node)->node_spanned_pages; 68 69 do { 70 total++; 71 if (PageReserved(page)) 72 reserved++; 73 else if (PageSwapCache(page)) 74 cached++; 75 else if (PageSlab(page)) 76 slab++; 77 else if (!page_count(page)) 78 free++; 79 else 80 shared += page_count(page) - 1; 81 page++; 82 } while (page < end); 83 } 84 85 printk("%d pages of RAM\n", total); 86 printk("%d free pages\n", free); 87 printk("%d reserved pages\n", reserved); 88 printk("%d slab pages\n", slab); 89 printk("%d pages shared\n", shared); 90 printk("%d pages swap cached\n", cached); 91 } 92 93 #define for_each_nodebank(iter,mi,no) \ 94 for (iter = 0; iter < mi->nr_banks; iter++) \ 95 if (mi->bank[iter].node == no) 96 97 /* 98 * FIXME: We really want to avoid allocating the bootmap bitmap 99 * over the top of the initrd. Hopefully, this is located towards 100 * the start of a bank, so if we allocate the bootmap bitmap at 101 * the end, we won't clash. 102 */ 103 static unsigned int __init 104 find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages) 105 { 106 unsigned int start_pfn, bank, bootmap_pfn; 107 108 start_pfn = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT; 109 bootmap_pfn = 0; 110 111 for_each_nodebank(bank, mi, node) { 112 unsigned int start, end; 113 114 start = mi->bank[bank].start >> PAGE_SHIFT; 115 end = (mi->bank[bank].size + 116 mi->bank[bank].start) >> PAGE_SHIFT; 117 118 if (end < start_pfn) 119 continue; 120 121 if (start < start_pfn) 122 start = start_pfn; 123 124 if (end <= start) 125 continue; 126 127 if (end - start >= bootmap_pages) { 128 bootmap_pfn = start; 129 break; 130 } 131 } 132 133 if (bootmap_pfn == 0) 134 BUG(); 135 136 return bootmap_pfn; 137 } 138 139 static int __init check_initrd(struct meminfo *mi) 140 { 141 int initrd_node = -2; 142 #ifdef CONFIG_BLK_DEV_INITRD 143 unsigned long end = phys_initrd_start + phys_initrd_size; 144 145 /* 146 * Make sure that the initrd is within a valid area of 147 * memory. 148 */ 149 if (phys_initrd_size) { 150 unsigned int i; 151 152 initrd_node = -1; 153 154 for (i = 0; i < mi->nr_banks; i++) { 155 unsigned long bank_end; 156 157 bank_end = mi->bank[i].start + mi->bank[i].size; 158 159 if (mi->bank[i].start <= phys_initrd_start && 160 end <= bank_end) 161 initrd_node = mi->bank[i].node; 162 } 163 } 164 165 if (initrd_node == -1) { 166 printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond " 167 "physical memory - disabling initrd\n", 168 phys_initrd_start, end); 169 phys_initrd_start = phys_initrd_size = 0; 170 } 171 #endif 172 173 return initrd_node; 174 } 175 176 /* 177 * Reserve the various regions of node 0 178 */ 179 static __init void reserve_node_zero(pg_data_t *pgdat) 180 { 181 unsigned long res_size = 0; 182 183 /* 184 * Register the kernel text and data with bootmem. 185 * Note that this can only be in node 0. 186 */ 187 #ifdef CONFIG_XIP_KERNEL 188 reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start); 189 #else 190 reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); 191 #endif 192 193 /* 194 * Reserve the page tables. These are already in use, 195 * and can only be in node 0. 196 */ 197 reserve_bootmem_node(pgdat, __pa(swapper_pg_dir), 198 PTRS_PER_PGD * sizeof(pgd_t)); 199 200 /* 201 * Hmm... This should go elsewhere, but we really really need to 202 * stop things allocating the low memory; ideally we need a better 203 * implementation of GFP_DMA which does not assume that DMA-able 204 * memory starts at zero. 205 */ 206 if (machine_is_integrator() || machine_is_cintegrator()) 207 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; 208 209 /* 210 * These should likewise go elsewhere. They pre-reserve the 211 * screen memory region at the start of main system memory. 212 */ 213 if (machine_is_edb7211()) 214 res_size = 0x00020000; 215 if (machine_is_p720t()) 216 res_size = 0x00014000; 217 218 #ifdef CONFIG_SA1111 219 /* 220 * Because of the SA1111 DMA bug, we want to preserve our 221 * precious DMA-able memory... 222 */ 223 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; 224 #endif 225 if (res_size) 226 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size); 227 } 228 229 static inline void prepare_page_table(struct meminfo *mi) 230 { 231 unsigned long addr; 232 233 /* 234 * Clear out all the mappings below the kernel image. 235 */ 236 for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE) 237 pmd_clear(pmd_off_k(addr)); 238 239 #ifdef CONFIG_XIP_KERNEL 240 /* The XIP kernel is mapped in the module area -- skip over it */ 241 addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK; 242 #endif 243 for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE) 244 pmd_clear(pmd_off_k(addr)); 245 246 /* 247 * Clear out all the kernel space mappings, except for the first 248 * memory bank, up to the end of the vmalloc region. 249 */ 250 for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size); 251 addr < VMALLOC_END; addr += PGDIR_SIZE) 252 pmd_clear(pmd_off_k(addr)); 253 } 254 255 static inline void map_memory_bank(struct membank *bank) 256 { 257 struct map_desc map; 258 259 map.pfn = __phys_to_pfn(bank->start); 260 map.virtual = __phys_to_virt(bank->start); 261 map.length = bank->size; 262 map.type = MT_MEMORY; 263 264 create_mapping(&map); 265 } 266 267 static unsigned long __init 268 bootmem_init_node(int node, int initrd_node, struct meminfo *mi) 269 { 270 unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; 271 unsigned long start_pfn, end_pfn, boot_pfn; 272 unsigned int boot_pages; 273 pg_data_t *pgdat; 274 int i; 275 276 start_pfn = -1UL; 277 end_pfn = 0; 278 279 /* 280 * Calculate the pfn range, and map the memory banks for this node. 281 */ 282 for_each_nodebank(i, mi, node) { 283 struct membank *bank = &mi->bank[i]; 284 unsigned long start, end; 285 286 start = bank->start >> PAGE_SHIFT; 287 end = (bank->start + bank->size) >> PAGE_SHIFT; 288 289 if (start_pfn > start) 290 start_pfn = start; 291 if (end_pfn < end) 292 end_pfn = end; 293 294 map_memory_bank(bank); 295 } 296 297 /* 298 * If there is no memory in this node, ignore it. 299 */ 300 if (end_pfn == 0) 301 return end_pfn; 302 303 /* 304 * Allocate the bootmem bitmap page. 305 */ 306 boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 307 boot_pfn = find_bootmap_pfn(node, mi, boot_pages); 308 309 /* 310 * Initialise the bootmem allocator for this node, handing the 311 * memory banks over to bootmem. 312 */ 313 node_set_online(node); 314 pgdat = NODE_DATA(node); 315 init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn); 316 317 for_each_nodebank(i, mi, node) 318 free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size); 319 320 /* 321 * Reserve the bootmem bitmap for this node. 322 */ 323 reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT, 324 boot_pages << PAGE_SHIFT); 325 326 #ifdef CONFIG_BLK_DEV_INITRD 327 /* 328 * If the initrd is in this node, reserve its memory. 329 */ 330 if (node == initrd_node) { 331 reserve_bootmem_node(pgdat, phys_initrd_start, 332 phys_initrd_size); 333 initrd_start = __phys_to_virt(phys_initrd_start); 334 initrd_end = initrd_start + phys_initrd_size; 335 } 336 #endif 337 338 /* 339 * Finally, reserve any node zero regions. 340 */ 341 if (node == 0) 342 reserve_node_zero(pgdat); 343 344 /* 345 * initialise the zones within this node. 346 */ 347 memset(zone_size, 0, sizeof(zone_size)); 348 memset(zhole_size, 0, sizeof(zhole_size)); 349 350 /* 351 * The size of this node has already been determined. If we need 352 * to do anything fancy with the allocation of this memory to the 353 * zones, now is the time to do it. 354 */ 355 zone_size[0] = end_pfn - start_pfn; 356 357 /* 358 * For each bank in this node, calculate the size of the holes. 359 * holes = node_size - sum(bank_sizes_in_node) 360 */ 361 zhole_size[0] = zone_size[0]; 362 for_each_nodebank(i, mi, node) 363 zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT; 364 365 /* 366 * Adjust the sizes according to any special requirements for 367 * this machine type. 368 */ 369 arch_adjust_zones(node, zone_size, zhole_size); 370 371 free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size); 372 373 return end_pfn; 374 } 375 376 static void __init bootmem_init(struct meminfo *mi) 377 { 378 unsigned long memend_pfn = 0; 379 int node, initrd_node, i; 380 381 /* 382 * Invalidate the node number for empty or invalid memory banks 383 */ 384 for (i = 0; i < mi->nr_banks; i++) 385 if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES) 386 mi->bank[i].node = -1; 387 388 memcpy(&meminfo, mi, sizeof(meminfo)); 389 390 prepare_page_table(mi); 391 392 /* 393 * Locate which node contains the ramdisk image, if any. 394 */ 395 initrd_node = check_initrd(mi); 396 397 /* 398 * Run through each node initialising the bootmem allocator. 399 */ 400 for_each_node(node) { 401 unsigned long end_pfn; 402 403 end_pfn = bootmem_init_node(node, initrd_node, mi); 404 405 /* 406 * Remember the highest memory PFN. 407 */ 408 if (end_pfn > memend_pfn) 409 memend_pfn = end_pfn; 410 } 411 412 high_memory = __va(memend_pfn << PAGE_SHIFT); 413 414 /* 415 * This doesn't seem to be used by the Linux memory manager any 416 * more, but is used by ll_rw_block. If we can get rid of it, we 417 * also get rid of some of the stuff above as well. 418 * 419 * Note: max_low_pfn and max_pfn reflect the number of _pages_ in 420 * the system, not the maximum PFN. 421 */ 422 max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET; 423 } 424 425 /* 426 * Set up device the mappings. Since we clear out the page tables for all 427 * mappings above VMALLOC_END, we will remove any debug device mappings. 428 * This means you have to be careful how you debug this function, or any 429 * called function. This means you can't use any function or debugging 430 * method which may touch any device, otherwise the kernel _will_ crash. 431 */ 432 static void __init devicemaps_init(struct machine_desc *mdesc) 433 { 434 struct map_desc map; 435 unsigned long addr; 436 void *vectors; 437 438 /* 439 * Allocate the vector page early. 440 */ 441 vectors = alloc_bootmem_low_pages(PAGE_SIZE); 442 BUG_ON(!vectors); 443 444 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE) 445 pmd_clear(pmd_off_k(addr)); 446 447 /* 448 * Map the kernel if it is XIP. 449 * It is always first in the modulearea. 450 */ 451 #ifdef CONFIG_XIP_KERNEL 452 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); 453 map.virtual = MODULE_START; 454 map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; 455 map.type = MT_ROM; 456 create_mapping(&map); 457 #endif 458 459 /* 460 * Map the cache flushing regions. 461 */ 462 #ifdef FLUSH_BASE 463 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); 464 map.virtual = FLUSH_BASE; 465 map.length = SZ_1M; 466 map.type = MT_CACHECLEAN; 467 create_mapping(&map); 468 #endif 469 #ifdef FLUSH_BASE_MINICACHE 470 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); 471 map.virtual = FLUSH_BASE_MINICACHE; 472 map.length = SZ_1M; 473 map.type = MT_MINICLEAN; 474 create_mapping(&map); 475 #endif 476 477 /* 478 * Create a mapping for the machine vectors at the high-vectors 479 * location (0xffff0000). If we aren't using high-vectors, also 480 * create a mapping at the low-vectors virtual address. 481 */ 482 map.pfn = __phys_to_pfn(virt_to_phys(vectors)); 483 map.virtual = 0xffff0000; 484 map.length = PAGE_SIZE; 485 map.type = MT_HIGH_VECTORS; 486 create_mapping(&map); 487 488 if (!vectors_high()) { 489 map.virtual = 0; 490 map.type = MT_LOW_VECTORS; 491 create_mapping(&map); 492 } 493 494 /* 495 * Ask the machine support to map in the statically mapped devices. 496 */ 497 if (mdesc->map_io) 498 mdesc->map_io(); 499 500 /* 501 * Finally flush the caches and tlb to ensure that we're in a 502 * consistent state wrt the writebuffer. This also ensures that 503 * any write-allocated cache lines in the vector page are written 504 * back. After this point, we can start to touch devices again. 505 */ 506 local_flush_tlb_all(); 507 flush_cache_all(); 508 } 509 510 /* 511 * paging_init() sets up the page tables, initialises the zone memory 512 * maps, and sets up the zero page, bad page and bad page tables. 513 */ 514 void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc) 515 { 516 void *zero_page; 517 518 build_mem_type_table(); 519 bootmem_init(mi); 520 devicemaps_init(mdesc); 521 522 top_pmd = pmd_off_k(0xffff0000); 523 524 /* 525 * allocate the zero page. Note that we count on this going ok. 526 */ 527 zero_page = alloc_bootmem_low_pages(PAGE_SIZE); 528 memzero(zero_page, PAGE_SIZE); 529 empty_zero_page = virt_to_page(zero_page); 530 flush_dcache_page(empty_zero_page); 531 } 532 533 static inline void free_area(unsigned long addr, unsigned long end, char *s) 534 { 535 unsigned int size = (end - addr) >> 10; 536 537 for (; addr < end; addr += PAGE_SIZE) { 538 struct page *page = virt_to_page(addr); 539 ClearPageReserved(page); 540 init_page_count(page); 541 free_page(addr); 542 totalram_pages++; 543 } 544 545 if (size && s) 546 printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); 547 } 548 549 static inline void 550 free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn) 551 { 552 struct page *start_pg, *end_pg; 553 unsigned long pg, pgend; 554 555 /* 556 * Convert start_pfn/end_pfn to a struct page pointer. 557 */ 558 start_pg = pfn_to_page(start_pfn); 559 end_pg = pfn_to_page(end_pfn); 560 561 /* 562 * Convert to physical addresses, and 563 * round start upwards and end downwards. 564 */ 565 pg = PAGE_ALIGN(__pa(start_pg)); 566 pgend = __pa(end_pg) & PAGE_MASK; 567 568 /* 569 * If there are free pages between these, 570 * free the section of the memmap array. 571 */ 572 if (pg < pgend) 573 free_bootmem_node(NODE_DATA(node), pg, pgend - pg); 574 } 575 576 /* 577 * The mem_map array can get very big. Free the unused area of the memory map. 578 */ 579 static void __init free_unused_memmap_node(int node, struct meminfo *mi) 580 { 581 unsigned long bank_start, prev_bank_end = 0; 582 unsigned int i; 583 584 /* 585 * [FIXME] This relies on each bank being in address order. This 586 * may not be the case, especially if the user has provided the 587 * information on the command line. 588 */ 589 for_each_nodebank(i, mi, node) { 590 bank_start = mi->bank[i].start >> PAGE_SHIFT; 591 if (bank_start < prev_bank_end) { 592 printk(KERN_ERR "MEM: unordered memory banks. " 593 "Not freeing memmap.\n"); 594 break; 595 } 596 597 /* 598 * If we had a previous bank, and there is a space 599 * between the current bank and the previous, free it. 600 */ 601 if (prev_bank_end && prev_bank_end != bank_start) 602 free_memmap(node, prev_bank_end, bank_start); 603 604 prev_bank_end = (mi->bank[i].start + 605 mi->bank[i].size) >> PAGE_SHIFT; 606 } 607 } 608 609 /* 610 * mem_init() marks the free areas in the mem_map and tells us how much 611 * memory is free. This is done after various parts of the system have 612 * claimed their memory after the kernel image. 613 */ 614 void __init mem_init(void) 615 { 616 unsigned int codepages, datapages, initpages; 617 int i, node; 618 619 codepages = &_etext - &_text; 620 datapages = &_end - &__data_start; 621 initpages = &__init_end - &__init_begin; 622 623 #ifndef CONFIG_DISCONTIGMEM 624 max_mapnr = virt_to_page(high_memory) - mem_map; 625 #endif 626 627 /* this will put all unused low memory onto the freelists */ 628 for_each_online_node(node) { 629 pg_data_t *pgdat = NODE_DATA(node); 630 631 free_unused_memmap_node(node, &meminfo); 632 633 if (pgdat->node_spanned_pages != 0) 634 totalram_pages += free_all_bootmem_node(pgdat); 635 } 636 637 #ifdef CONFIG_SA1111 638 /* now that our DMA memory is actually so designated, we can free it */ 639 free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL); 640 #endif 641 642 /* 643 * Since our memory may not be contiguous, calculate the 644 * real number of pages we have in this system 645 */ 646 printk(KERN_INFO "Memory:"); 647 648 num_physpages = 0; 649 for (i = 0; i < meminfo.nr_banks; i++) { 650 num_physpages += meminfo.bank[i].size >> PAGE_SHIFT; 651 printk(" %ldMB", meminfo.bank[i].size >> 20); 652 } 653 654 printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); 655 printk(KERN_NOTICE "Memory: %luKB available (%dK code, " 656 "%dK data, %dK init)\n", 657 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 658 codepages >> 10, datapages >> 10, initpages >> 10); 659 660 if (PAGE_SIZE >= 16384 && num_physpages <= 128) { 661 extern int sysctl_overcommit_memory; 662 /* 663 * On a machine this small we won't get 664 * anywhere without overcommit, so turn 665 * it on by default. 666 */ 667 sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; 668 } 669 } 670 671 void free_initmem(void) 672 { 673 if (!machine_is_integrator() && !machine_is_cintegrator()) { 674 free_area((unsigned long)(&__init_begin), 675 (unsigned long)(&__init_end), 676 "init"); 677 } 678 } 679 680 #ifdef CONFIG_BLK_DEV_INITRD 681 682 static int keep_initrd; 683 684 void free_initrd_mem(unsigned long start, unsigned long end) 685 { 686 if (!keep_initrd) 687 free_area(start, end, "initrd"); 688 } 689 690 static int __init keepinitrd_setup(char *__unused) 691 { 692 keep_initrd = 1; 693 return 1; 694 } 695 696 __setup("keepinitrd", keepinitrd_setup); 697 #endif 698