1 /* 2 * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. 3 * Copyright (c) 2001 Intel Corp. 4 * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> 5 * Copyright (c) 2002 NEC Corp. 6 * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> 7 * Copyright (c) 2004 Silicon Graphics, Inc 8 * Russ Anderson <rja@sgi.com> 9 * Jesse Barnes <jbarnes@sgi.com> 10 * Jack Steiner <steiner@sgi.com> 11 */ 12 13 /* 14 * Platform initialization for Discontig Memory 15 */ 16 17 #include <linux/kernel.h> 18 #include <linux/mm.h> 19 #include <linux/nmi.h> 20 #include <linux/swap.h> 21 #include <linux/bootmem.h> 22 #include <linux/acpi.h> 23 #include <linux/efi.h> 24 #include <linux/nodemask.h> 25 #include <linux/slab.h> 26 #include <asm/pgalloc.h> 27 #include <asm/tlb.h> 28 #include <asm/meminit.h> 29 #include <asm/numa.h> 30 #include <asm/sections.h> 31 32 /* 33 * Track per-node information needed to setup the boot memory allocator, the 34 * per-node areas, and the real VM. 35 */ 36 struct early_node_data { 37 struct ia64_node_data *node_data; 38 unsigned long pernode_addr; 39 unsigned long pernode_size; 40 unsigned long num_physpages; 41 #ifdef CONFIG_ZONE_DMA 42 unsigned long num_dma_physpages; 43 #endif 44 unsigned long min_pfn; 45 unsigned long max_pfn; 46 }; 47 48 static struct early_node_data mem_data[MAX_NUMNODES] __initdata; 49 static nodemask_t memory_less_mask __initdata; 50 51 pg_data_t *pgdat_list[MAX_NUMNODES]; 52 53 /* 54 * To prevent cache aliasing effects, align per-node structures so that they 55 * start at addresses that are strided by node number. 56 */ 57 #define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) 58 #define NODEDATA_ALIGN(addr, node) \ 59 ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ 60 (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) 61 62 /** 63 * build_node_maps - callback to setup bootmem structs for each node 64 * @start: physical start of range 65 * @len: length of range 66 * @node: node where this range resides 67 * 68 * We allocate a struct bootmem_data for each piece of memory that we wish to 69 * treat as a virtually contiguous block (i.e. each node). Each such block 70 * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down 71 * if necessary. Any non-existent pages will simply be part of the virtual 72 * memmap. We also update min_low_pfn and max_low_pfn here as we receive 73 * memory ranges from the caller. 74 */ 75 static int __init build_node_maps(unsigned long start, unsigned long len, 76 int node) 77 { 78 unsigned long spfn, epfn, end = start + len; 79 struct bootmem_data *bdp = &bootmem_node_data[node]; 80 81 epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; 82 spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; 83 84 if (!bdp->node_low_pfn) { 85 bdp->node_min_pfn = spfn; 86 bdp->node_low_pfn = epfn; 87 } else { 88 bdp->node_min_pfn = min(spfn, bdp->node_min_pfn); 89 bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); 90 } 91 92 return 0; 93 } 94 95 /** 96 * early_nr_cpus_node - return number of cpus on a given node 97 * @node: node to check 98 * 99 * Count the number of cpus on @node. We can't use nr_cpus_node() yet because 100 * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been 101 * called yet. Note that node 0 will also count all non-existent cpus. 102 */ 103 static int __meminit early_nr_cpus_node(int node) 104 { 105 int cpu, n = 0; 106 107 for_each_possible_early_cpu(cpu) 108 if (node == node_cpuid[cpu].nid) 109 n++; 110 111 return n; 112 } 113 114 /** 115 * compute_pernodesize - compute size of pernode data 116 * @node: the node id. 117 */ 118 static unsigned long __meminit compute_pernodesize(int node) 119 { 120 unsigned long pernodesize = 0, cpus; 121 122 cpus = early_nr_cpus_node(node); 123 pernodesize += PERCPU_PAGE_SIZE * cpus; 124 pernodesize += node * L1_CACHE_BYTES; 125 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); 126 pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); 127 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); 128 pernodesize = PAGE_ALIGN(pernodesize); 129 return pernodesize; 130 } 131 132 /** 133 * per_cpu_node_setup - setup per-cpu areas on each node 134 * @cpu_data: per-cpu area on this node 135 * @node: node to setup 136 * 137 * Copy the static per-cpu data into the region we just set aside and then 138 * setup __per_cpu_offset for each CPU on this node. Return a pointer to 139 * the end of the area. 140 */ 141 static void *per_cpu_node_setup(void *cpu_data, int node) 142 { 143 #ifdef CONFIG_SMP 144 int cpu; 145 146 for_each_possible_early_cpu(cpu) { 147 void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; 148 149 if (node != node_cpuid[cpu].nid) 150 continue; 151 152 memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); 153 __per_cpu_offset[cpu] = (char *)__va(cpu_data) - 154 __per_cpu_start; 155 156 /* 157 * percpu area for cpu0 is moved from the __init area 158 * which is setup by head.S and used till this point. 159 * Update ar.k3. This move is ensures that percpu 160 * area for cpu0 is on the correct node and its 161 * virtual address isn't insanely far from other 162 * percpu areas which is important for congruent 163 * percpu allocator. 164 */ 165 if (cpu == 0) 166 ia64_set_kr(IA64_KR_PER_CPU_DATA, 167 (unsigned long)cpu_data - 168 (unsigned long)__per_cpu_start); 169 170 cpu_data += PERCPU_PAGE_SIZE; 171 } 172 #endif 173 return cpu_data; 174 } 175 176 #ifdef CONFIG_SMP 177 /** 178 * setup_per_cpu_areas - setup percpu areas 179 * 180 * Arch code has already allocated and initialized percpu areas. All 181 * this function has to do is to teach the determined layout to the 182 * dynamic percpu allocator, which happens to be more complex than 183 * creating whole new ones using helpers. 184 */ 185 void __init setup_per_cpu_areas(void) 186 { 187 struct pcpu_alloc_info *ai; 188 struct pcpu_group_info *uninitialized_var(gi); 189 unsigned int *cpu_map; 190 void *base; 191 unsigned long base_offset; 192 unsigned int cpu; 193 ssize_t static_size, reserved_size, dyn_size; 194 int node, prev_node, unit, nr_units, rc; 195 196 ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); 197 if (!ai) 198 panic("failed to allocate pcpu_alloc_info"); 199 cpu_map = ai->groups[0].cpu_map; 200 201 /* determine base */ 202 base = (void *)ULONG_MAX; 203 for_each_possible_cpu(cpu) 204 base = min(base, 205 (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); 206 base_offset = (void *)__per_cpu_start - base; 207 208 /* build cpu_map, units are grouped by node */ 209 unit = 0; 210 for_each_node(node) 211 for_each_possible_cpu(cpu) 212 if (node == node_cpuid[cpu].nid) 213 cpu_map[unit++] = cpu; 214 nr_units = unit; 215 216 /* set basic parameters */ 217 static_size = __per_cpu_end - __per_cpu_start; 218 reserved_size = PERCPU_MODULE_RESERVE; 219 dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; 220 if (dyn_size < 0) 221 panic("percpu area overflow static=%zd reserved=%zd\n", 222 static_size, reserved_size); 223 224 ai->static_size = static_size; 225 ai->reserved_size = reserved_size; 226 ai->dyn_size = dyn_size; 227 ai->unit_size = PERCPU_PAGE_SIZE; 228 ai->atom_size = PAGE_SIZE; 229 ai->alloc_size = PERCPU_PAGE_SIZE; 230 231 /* 232 * CPUs are put into groups according to node. Walk cpu_map 233 * and create new groups at node boundaries. 234 */ 235 prev_node = -1; 236 ai->nr_groups = 0; 237 for (unit = 0; unit < nr_units; unit++) { 238 cpu = cpu_map[unit]; 239 node = node_cpuid[cpu].nid; 240 241 if (node == prev_node) { 242 gi->nr_units++; 243 continue; 244 } 245 prev_node = node; 246 247 gi = &ai->groups[ai->nr_groups++]; 248 gi->nr_units = 1; 249 gi->base_offset = __per_cpu_offset[cpu] + base_offset; 250 gi->cpu_map = &cpu_map[unit]; 251 } 252 253 rc = pcpu_setup_first_chunk(ai, base); 254 if (rc) 255 panic("failed to setup percpu area (err=%d)", rc); 256 257 pcpu_free_alloc_info(ai); 258 } 259 #endif 260 261 /** 262 * fill_pernode - initialize pernode data. 263 * @node: the node id. 264 * @pernode: physical address of pernode data 265 * @pernodesize: size of the pernode data 266 */ 267 static void __init fill_pernode(int node, unsigned long pernode, 268 unsigned long pernodesize) 269 { 270 void *cpu_data; 271 int cpus = early_nr_cpus_node(node); 272 struct bootmem_data *bdp = &bootmem_node_data[node]; 273 274 mem_data[node].pernode_addr = pernode; 275 mem_data[node].pernode_size = pernodesize; 276 memset(__va(pernode), 0, pernodesize); 277 278 cpu_data = (void *)pernode; 279 pernode += PERCPU_PAGE_SIZE * cpus; 280 pernode += node * L1_CACHE_BYTES; 281 282 pgdat_list[node] = __va(pernode); 283 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); 284 285 mem_data[node].node_data = __va(pernode); 286 pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); 287 288 pgdat_list[node]->bdata = bdp; 289 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); 290 291 cpu_data = per_cpu_node_setup(cpu_data, node); 292 293 return; 294 } 295 296 /** 297 * find_pernode_space - allocate memory for memory map and per-node structures 298 * @start: physical start of range 299 * @len: length of range 300 * @node: node where this range resides 301 * 302 * This routine reserves space for the per-cpu data struct, the list of 303 * pg_data_ts and the per-node data struct. Each node will have something like 304 * the following in the first chunk of addr. space large enough to hold it. 305 * 306 * ________________________ 307 * | | 308 * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first 309 * | PERCPU_PAGE_SIZE * | start and length big enough 310 * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. 311 * |------------------------| 312 * | local pg_data_t * | 313 * |------------------------| 314 * | local ia64_node_data | 315 * |------------------------| 316 * | ??? | 317 * |________________________| 318 * 319 * Once this space has been set aside, the bootmem maps are initialized. We 320 * could probably move the allocation of the per-cpu and ia64_node_data space 321 * outside of this function and use alloc_bootmem_node(), but doing it here 322 * is straightforward and we get the alignments we want so... 323 */ 324 static int __init find_pernode_space(unsigned long start, unsigned long len, 325 int node) 326 { 327 unsigned long spfn, epfn; 328 unsigned long pernodesize = 0, pernode, pages, mapsize; 329 struct bootmem_data *bdp = &bootmem_node_data[node]; 330 331 spfn = start >> PAGE_SHIFT; 332 epfn = (start + len) >> PAGE_SHIFT; 333 334 pages = bdp->node_low_pfn - bdp->node_min_pfn; 335 mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; 336 337 /* 338 * Make sure this memory falls within this node's usable memory 339 * since we may have thrown some away in build_maps(). 340 */ 341 if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn) 342 return 0; 343 344 /* Don't setup this node's local space twice... */ 345 if (mem_data[node].pernode_addr) 346 return 0; 347 348 /* 349 * Calculate total size needed, incl. what's necessary 350 * for good alignment and alias prevention. 351 */ 352 pernodesize = compute_pernodesize(node); 353 pernode = NODEDATA_ALIGN(start, node); 354 355 /* Is this range big enough for what we want to store here? */ 356 if (start + len > (pernode + pernodesize + mapsize)) 357 fill_pernode(node, pernode, pernodesize); 358 359 return 0; 360 } 361 362 /** 363 * free_node_bootmem - free bootmem allocator memory for use 364 * @start: physical start of range 365 * @len: length of range 366 * @node: node where this range resides 367 * 368 * Simply calls the bootmem allocator to free the specified ranged from 369 * the given pg_data_t's bdata struct. After this function has been called 370 * for all the entries in the EFI memory map, the bootmem allocator will 371 * be ready to service allocation requests. 372 */ 373 static int __init free_node_bootmem(unsigned long start, unsigned long len, 374 int node) 375 { 376 free_bootmem_node(pgdat_list[node], start, len); 377 378 return 0; 379 } 380 381 /** 382 * reserve_pernode_space - reserve memory for per-node space 383 * 384 * Reserve the space used by the bootmem maps & per-node space in the boot 385 * allocator so that when we actually create the real mem maps we don't 386 * use their memory. 387 */ 388 static void __init reserve_pernode_space(void) 389 { 390 unsigned long base, size, pages; 391 struct bootmem_data *bdp; 392 int node; 393 394 for_each_online_node(node) { 395 pg_data_t *pdp = pgdat_list[node]; 396 397 if (node_isset(node, memory_less_mask)) 398 continue; 399 400 bdp = pdp->bdata; 401 402 /* First the bootmem_map itself */ 403 pages = bdp->node_low_pfn - bdp->node_min_pfn; 404 size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; 405 base = __pa(bdp->node_bootmem_map); 406 reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); 407 408 /* Now the per-node space */ 409 size = mem_data[node].pernode_size; 410 base = __pa(mem_data[node].pernode_addr); 411 reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); 412 } 413 } 414 415 static void __meminit scatter_node_data(void) 416 { 417 pg_data_t **dst; 418 int node; 419 420 /* 421 * for_each_online_node() can't be used at here. 422 * node_online_map is not set for hot-added nodes at this time, 423 * because we are halfway through initialization of the new node's 424 * structures. If for_each_online_node() is used, a new node's 425 * pg_data_ptrs will be not initialized. Instead of using it, 426 * pgdat_list[] is checked. 427 */ 428 for_each_node(node) { 429 if (pgdat_list[node]) { 430 dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; 431 memcpy(dst, pgdat_list, sizeof(pgdat_list)); 432 } 433 } 434 } 435 436 /** 437 * initialize_pernode_data - fixup per-cpu & per-node pointers 438 * 439 * Each node's per-node area has a copy of the global pg_data_t list, so 440 * we copy that to each node here, as well as setting the per-cpu pointer 441 * to the local node data structure. The active_cpus field of the per-node 442 * structure gets setup by the platform_cpu_init() function later. 443 */ 444 static void __init initialize_pernode_data(void) 445 { 446 int cpu, node; 447 448 scatter_node_data(); 449 450 #ifdef CONFIG_SMP 451 /* Set the node_data pointer for each per-cpu struct */ 452 for_each_possible_early_cpu(cpu) { 453 node = node_cpuid[cpu].nid; 454 per_cpu(ia64_cpu_info, cpu).node_data = 455 mem_data[node].node_data; 456 } 457 #else 458 { 459 struct cpuinfo_ia64 *cpu0_cpu_info; 460 cpu = 0; 461 node = node_cpuid[cpu].nid; 462 cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + 463 ((char *)&ia64_cpu_info - __per_cpu_start)); 464 cpu0_cpu_info->node_data = mem_data[node].node_data; 465 } 466 #endif /* CONFIG_SMP */ 467 } 468 469 /** 470 * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit 471 * node but fall back to any other node when __alloc_bootmem_node fails 472 * for best. 473 * @nid: node id 474 * @pernodesize: size of this node's pernode data 475 */ 476 static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) 477 { 478 void *ptr = NULL; 479 u8 best = 0xff; 480 int bestnode = -1, node, anynode = 0; 481 482 for_each_online_node(node) { 483 if (node_isset(node, memory_less_mask)) 484 continue; 485 else if (node_distance(nid, node) < best) { 486 best = node_distance(nid, node); 487 bestnode = node; 488 } 489 anynode = node; 490 } 491 492 if (bestnode == -1) 493 bestnode = anynode; 494 495 ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize, 496 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 497 498 return ptr; 499 } 500 501 /** 502 * memory_less_nodes - allocate and initialize CPU only nodes pernode 503 * information. 504 */ 505 static void __init memory_less_nodes(void) 506 { 507 unsigned long pernodesize; 508 void *pernode; 509 int node; 510 511 for_each_node_mask(node, memory_less_mask) { 512 pernodesize = compute_pernodesize(node); 513 pernode = memory_less_node_alloc(node, pernodesize); 514 fill_pernode(node, __pa(pernode), pernodesize); 515 } 516 517 return; 518 } 519 520 /** 521 * find_memory - walk the EFI memory map and setup the bootmem allocator 522 * 523 * Called early in boot to setup the bootmem allocator, and to 524 * allocate the per-cpu and per-node structures. 525 */ 526 void __init find_memory(void) 527 { 528 int node; 529 530 reserve_memory(); 531 532 if (num_online_nodes() == 0) { 533 printk(KERN_ERR "node info missing!\n"); 534 node_set_online(0); 535 } 536 537 nodes_or(memory_less_mask, memory_less_mask, node_online_map); 538 min_low_pfn = -1; 539 max_low_pfn = 0; 540 541 /* These actually end up getting called by call_pernode_memory() */ 542 efi_memmap_walk(filter_rsvd_memory, build_node_maps); 543 efi_memmap_walk(filter_rsvd_memory, find_pernode_space); 544 efi_memmap_walk(find_max_min_low_pfn, NULL); 545 546 for_each_online_node(node) 547 if (bootmem_node_data[node].node_low_pfn) { 548 node_clear(node, memory_less_mask); 549 mem_data[node].min_pfn = ~0UL; 550 } 551 552 efi_memmap_walk(filter_memory, register_active_ranges); 553 554 /* 555 * Initialize the boot memory maps in reverse order since that's 556 * what the bootmem allocator expects 557 */ 558 for (node = MAX_NUMNODES - 1; node >= 0; node--) { 559 unsigned long pernode, pernodesize, map; 560 struct bootmem_data *bdp; 561 562 if (!node_online(node)) 563 continue; 564 else if (node_isset(node, memory_less_mask)) 565 continue; 566 567 bdp = &bootmem_node_data[node]; 568 pernode = mem_data[node].pernode_addr; 569 pernodesize = mem_data[node].pernode_size; 570 map = pernode + pernodesize; 571 572 init_bootmem_node(pgdat_list[node], 573 map>>PAGE_SHIFT, 574 bdp->node_min_pfn, 575 bdp->node_low_pfn); 576 } 577 578 efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); 579 580 reserve_pernode_space(); 581 memory_less_nodes(); 582 initialize_pernode_data(); 583 584 max_pfn = max_low_pfn; 585 586 find_initrd(); 587 } 588 589 #ifdef CONFIG_SMP 590 /** 591 * per_cpu_init - setup per-cpu variables 592 * 593 * find_pernode_space() does most of this already, we just need to set 594 * local_per_cpu_offset 595 */ 596 void __cpuinit *per_cpu_init(void) 597 { 598 int cpu; 599 static int first_time = 1; 600 601 if (first_time) { 602 first_time = 0; 603 for_each_possible_early_cpu(cpu) 604 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; 605 } 606 607 return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; 608 } 609 #endif /* CONFIG_SMP */ 610 611 /** 612 * show_mem - give short summary of memory stats 613 * 614 * Shows a simple page count of reserved and used pages in the system. 615 * For discontig machines, it does this on a per-pgdat basis. 616 */ 617 void show_mem(unsigned int filter) 618 { 619 int i, total_reserved = 0; 620 int total_shared = 0, total_cached = 0; 621 unsigned long total_present = 0; 622 pg_data_t *pgdat; 623 624 printk(KERN_INFO "Mem-info:\n"); 625 show_free_areas(filter); 626 printk(KERN_INFO "Node memory in pages:\n"); 627 for_each_online_pgdat(pgdat) { 628 unsigned long present; 629 unsigned long flags; 630 int shared = 0, cached = 0, reserved = 0; 631 int nid = pgdat->node_id; 632 633 if (skip_free_areas_node(filter, nid)) 634 continue; 635 pgdat_resize_lock(pgdat, &flags); 636 present = pgdat->node_present_pages; 637 for(i = 0; i < pgdat->node_spanned_pages; i++) { 638 struct page *page; 639 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) 640 touch_nmi_watchdog(); 641 if (pfn_valid(pgdat->node_start_pfn + i)) 642 page = pfn_to_page(pgdat->node_start_pfn + i); 643 else { 644 i = vmemmap_find_next_valid_pfn(nid, i) - 1; 645 continue; 646 } 647 if (PageReserved(page)) 648 reserved++; 649 else if (PageSwapCache(page)) 650 cached++; 651 else if (page_count(page)) 652 shared += page_count(page)-1; 653 } 654 pgdat_resize_unlock(pgdat, &flags); 655 total_present += present; 656 total_reserved += reserved; 657 total_cached += cached; 658 total_shared += shared; 659 printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " 660 "shrd: %10d, swpd: %10d\n", nid, 661 present, reserved, shared, cached); 662 } 663 printk(KERN_INFO "%ld pages of RAM\n", total_present); 664 printk(KERN_INFO "%d reserved pages\n", total_reserved); 665 printk(KERN_INFO "%d pages shared\n", total_shared); 666 printk(KERN_INFO "%d pages swap cached\n", total_cached); 667 printk(KERN_INFO "Total of %ld pages in page table cache\n", 668 quicklist_total_size()); 669 printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); 670 } 671 672 /** 673 * call_pernode_memory - use SRAT to call callback functions with node info 674 * @start: physical start of range 675 * @len: length of range 676 * @arg: function to call for each range 677 * 678 * efi_memmap_walk() knows nothing about layout of memory across nodes. Find 679 * out to which node a block of memory belongs. Ignore memory that we cannot 680 * identify, and split blocks that run across multiple nodes. 681 * 682 * Take this opportunity to round the start address up and the end address 683 * down to page boundaries. 684 */ 685 void call_pernode_memory(unsigned long start, unsigned long len, void *arg) 686 { 687 unsigned long rs, re, end = start + len; 688 void (*func)(unsigned long, unsigned long, int); 689 int i; 690 691 start = PAGE_ALIGN(start); 692 end &= PAGE_MASK; 693 if (start >= end) 694 return; 695 696 func = arg; 697 698 if (!num_node_memblks) { 699 /* No SRAT table, so assume one node (node 0) */ 700 if (start < end) 701 (*func)(start, end - start, 0); 702 return; 703 } 704 705 for (i = 0; i < num_node_memblks; i++) { 706 rs = max(start, node_memblk[i].start_paddr); 707 re = min(end, node_memblk[i].start_paddr + 708 node_memblk[i].size); 709 710 if (rs < re) 711 (*func)(rs, re - rs, node_memblk[i].nid); 712 713 if (re == end) 714 break; 715 } 716 } 717 718 /** 719 * count_node_pages - callback to build per-node memory info structures 720 * @start: physical start of range 721 * @len: length of range 722 * @node: node where this range resides 723 * 724 * Each node has it's own number of physical pages, DMAable pages, start, and 725 * end page frame number. This routine will be called by call_pernode_memory() 726 * for each piece of usable memory and will setup these values for each node. 727 * Very similar to build_maps(). 728 */ 729 static __init int count_node_pages(unsigned long start, unsigned long len, int node) 730 { 731 unsigned long end = start + len; 732 733 mem_data[node].num_physpages += len >> PAGE_SHIFT; 734 #ifdef CONFIG_ZONE_DMA 735 if (start <= __pa(MAX_DMA_ADDRESS)) 736 mem_data[node].num_dma_physpages += 737 (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT; 738 #endif 739 start = GRANULEROUNDDOWN(start); 740 end = GRANULEROUNDUP(end); 741 mem_data[node].max_pfn = max(mem_data[node].max_pfn, 742 end >> PAGE_SHIFT); 743 mem_data[node].min_pfn = min(mem_data[node].min_pfn, 744 start >> PAGE_SHIFT); 745 746 return 0; 747 } 748 749 /** 750 * paging_init - setup page tables 751 * 752 * paging_init() sets up the page tables for each node of the system and frees 753 * the bootmem allocator memory for general use. 754 */ 755 void __init paging_init(void) 756 { 757 unsigned long max_dma; 758 unsigned long pfn_offset = 0; 759 unsigned long max_pfn = 0; 760 int node; 761 unsigned long max_zone_pfns[MAX_NR_ZONES]; 762 763 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; 764 765 efi_memmap_walk(filter_rsvd_memory, count_node_pages); 766 767 sparse_memory_present_with_active_regions(MAX_NUMNODES); 768 sparse_init(); 769 770 #ifdef CONFIG_VIRTUAL_MEM_MAP 771 VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * 772 sizeof(struct page)); 773 vmem_map = (struct page *) VMALLOC_END; 774 efi_memmap_walk(create_mem_map_page_table, NULL); 775 printk("Virtual mem_map starts at 0x%p\n", vmem_map); 776 #endif 777 778 for_each_online_node(node) { 779 num_physpages += mem_data[node].num_physpages; 780 pfn_offset = mem_data[node].min_pfn; 781 782 #ifdef CONFIG_VIRTUAL_MEM_MAP 783 NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; 784 #endif 785 if (mem_data[node].max_pfn > max_pfn) 786 max_pfn = mem_data[node].max_pfn; 787 } 788 789 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 790 #ifdef CONFIG_ZONE_DMA 791 max_zone_pfns[ZONE_DMA] = max_dma; 792 #endif 793 max_zone_pfns[ZONE_NORMAL] = max_pfn; 794 free_area_init_nodes(max_zone_pfns); 795 796 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); 797 } 798 799 #ifdef CONFIG_MEMORY_HOTPLUG 800 pg_data_t *arch_alloc_nodedata(int nid) 801 { 802 unsigned long size = compute_pernodesize(nid); 803 804 return kzalloc(size, GFP_KERNEL); 805 } 806 807 void arch_free_nodedata(pg_data_t *pgdat) 808 { 809 kfree(pgdat); 810 } 811 812 void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) 813 { 814 pgdat_list[update_node] = update_pgdat; 815 scatter_node_data(); 816 } 817 #endif 818 819 #ifdef CONFIG_SPARSEMEM_VMEMMAP 820 int __meminit vmemmap_populate(struct page *start_page, 821 unsigned long size, int node) 822 { 823 return vmemmap_populate_basepages(start_page, size, node); 824 } 825 #endif 826