1 /* 2 * pSeries NUMA support 3 * 4 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/threads.h> 12 #include <linux/bootmem.h> 13 #include <linux/init.h> 14 #include <linux/mm.h> 15 #include <linux/mmzone.h> 16 #include <linux/module.h> 17 #include <linux/nodemask.h> 18 #include <linux/cpu.h> 19 #include <linux/notifier.h> 20 #include <asm/sparsemem.h> 21 #include <asm/lmb.h> 22 #include <asm/system.h> 23 #include <asm/smp.h> 24 25 static int numa_enabled = 1; 26 27 static char *cmdline __initdata; 28 29 static int numa_debug; 30 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 31 32 int numa_cpu_lookup_table[NR_CPUS]; 33 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 34 struct pglist_data *node_data[MAX_NUMNODES]; 35 36 EXPORT_SYMBOL(numa_cpu_lookup_table); 37 EXPORT_SYMBOL(numa_cpumask_lookup_table); 38 EXPORT_SYMBOL(node_data); 39 40 static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 41 static int min_common_depth; 42 static int n_mem_addr_cells, n_mem_size_cells; 43 44 static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, 45 unsigned int *nid) 46 { 47 unsigned long long mem; 48 char *p = cmdline; 49 static unsigned int fake_nid; 50 static unsigned long long curr_boundary; 51 52 /* 53 * Modify node id, iff we started creating NUMA nodes 54 * We want to continue from where we left of the last time 55 */ 56 if (fake_nid) 57 *nid = fake_nid; 58 /* 59 * In case there are no more arguments to parse, the 60 * node_id should be the same as the last fake node id 61 * (we've handled this above). 62 */ 63 if (!p) 64 return 0; 65 66 mem = memparse(p, &p); 67 if (!mem) 68 return 0; 69 70 if (mem < curr_boundary) 71 return 0; 72 73 curr_boundary = mem; 74 75 if ((end_pfn << PAGE_SHIFT) > mem) { 76 /* 77 * Skip commas and spaces 78 */ 79 while (*p == ',' || *p == ' ' || *p == '\t') 80 p++; 81 82 cmdline = p; 83 fake_nid++; 84 *nid = fake_nid; 85 dbg("created new fake_node with id %d\n", fake_nid); 86 return 1; 87 } 88 return 0; 89 } 90 91 static void __cpuinit map_cpu_to_node(int cpu, int node) 92 { 93 numa_cpu_lookup_table[cpu] = node; 94 95 dbg("adding cpu %d to node %d\n", cpu, node); 96 97 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 98 cpu_set(cpu, numa_cpumask_lookup_table[node]); 99 } 100 101 #ifdef CONFIG_HOTPLUG_CPU 102 static void unmap_cpu_from_node(unsigned long cpu) 103 { 104 int node = numa_cpu_lookup_table[cpu]; 105 106 dbg("removing cpu %lu from node %d\n", cpu, node); 107 108 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 109 cpu_clear(cpu, numa_cpumask_lookup_table[node]); 110 } else { 111 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 112 cpu, node); 113 } 114 } 115 #endif /* CONFIG_HOTPLUG_CPU */ 116 117 static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 118 { 119 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 120 struct device_node *cpu_node = NULL; 121 const unsigned int *interrupt_server, *reg; 122 int len; 123 124 while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 125 /* Try interrupt server first */ 126 interrupt_server = of_get_property(cpu_node, 127 "ibm,ppc-interrupt-server#s", &len); 128 129 len = len / sizeof(u32); 130 131 if (interrupt_server && (len > 0)) { 132 while (len--) { 133 if (interrupt_server[len] == hw_cpuid) 134 return cpu_node; 135 } 136 } else { 137 reg = of_get_property(cpu_node, "reg", &len); 138 if (reg && (len > 0) && (reg[0] == hw_cpuid)) 139 return cpu_node; 140 } 141 } 142 143 return NULL; 144 } 145 146 /* must hold reference to node during call */ 147 static const int *of_get_associativity(struct device_node *dev) 148 { 149 return of_get_property(dev, "ibm,associativity", NULL); 150 } 151 152 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 153 * info is found. 154 */ 155 static int of_node_to_nid_single(struct device_node *device) 156 { 157 int nid = -1; 158 const unsigned int *tmp; 159 160 if (min_common_depth == -1) 161 goto out; 162 163 tmp = of_get_associativity(device); 164 if (!tmp) 165 goto out; 166 167 if (tmp[0] >= min_common_depth) 168 nid = tmp[min_common_depth]; 169 170 /* POWER4 LPAR uses 0xffff as invalid node */ 171 if (nid == 0xffff || nid >= MAX_NUMNODES) 172 nid = -1; 173 out: 174 return nid; 175 } 176 177 /* Walk the device tree upwards, looking for an associativity id */ 178 int of_node_to_nid(struct device_node *device) 179 { 180 struct device_node *tmp; 181 int nid = -1; 182 183 of_node_get(device); 184 while (device) { 185 nid = of_node_to_nid_single(device); 186 if (nid != -1) 187 break; 188 189 tmp = device; 190 device = of_get_parent(tmp); 191 of_node_put(tmp); 192 } 193 of_node_put(device); 194 195 return nid; 196 } 197 EXPORT_SYMBOL_GPL(of_node_to_nid); 198 199 /* 200 * In theory, the "ibm,associativity" property may contain multiple 201 * associativity lists because a resource may be multiply connected 202 * into the machine. This resource then has different associativity 203 * characteristics relative to its multiple connections. We ignore 204 * this for now. We also assume that all cpu and memory sets have 205 * their distances represented at a common level. This won't be 206 * true for hierarchical NUMA. 207 * 208 * In any case the ibm,associativity-reference-points should give 209 * the correct depth for a normal NUMA system. 210 * 211 * - Dave Hansen <haveblue@us.ibm.com> 212 */ 213 static int __init find_min_common_depth(void) 214 { 215 int depth; 216 const unsigned int *ref_points; 217 struct device_node *rtas_root; 218 unsigned int len; 219 220 rtas_root = of_find_node_by_path("/rtas"); 221 222 if (!rtas_root) 223 return -1; 224 225 /* 226 * this property is 2 32-bit integers, each representing a level of 227 * depth in the associativity nodes. The first is for an SMP 228 * configuration (should be all 0's) and the second is for a normal 229 * NUMA configuration. 230 */ 231 ref_points = of_get_property(rtas_root, 232 "ibm,associativity-reference-points", &len); 233 234 if ((len >= 1) && ref_points) { 235 depth = ref_points[1]; 236 } else { 237 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 238 depth = -1; 239 } 240 of_node_put(rtas_root); 241 242 return depth; 243 } 244 245 static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 246 { 247 struct device_node *memory = NULL; 248 249 memory = of_find_node_by_type(memory, "memory"); 250 if (!memory) 251 panic("numa.c: No memory nodes found!"); 252 253 *n_addr_cells = of_n_addr_cells(memory); 254 *n_size_cells = of_n_size_cells(memory); 255 of_node_put(memory); 256 } 257 258 static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) 259 { 260 unsigned long result = 0; 261 262 while (n--) { 263 result = (result << 32) | **buf; 264 (*buf)++; 265 } 266 return result; 267 } 268 269 /* 270 * Figure out to which domain a cpu belongs and stick it there. 271 * Return the id of the domain used. 272 */ 273 static int __cpuinit numa_setup_cpu(unsigned long lcpu) 274 { 275 int nid = 0; 276 struct device_node *cpu = find_cpu_node(lcpu); 277 278 if (!cpu) { 279 WARN_ON(1); 280 goto out; 281 } 282 283 nid = of_node_to_nid_single(cpu); 284 285 if (nid < 0 || !node_online(nid)) 286 nid = any_online_node(NODE_MASK_ALL); 287 out: 288 map_cpu_to_node(lcpu, nid); 289 290 of_node_put(cpu); 291 292 return nid; 293 } 294 295 static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, 296 unsigned long action, 297 void *hcpu) 298 { 299 unsigned long lcpu = (unsigned long)hcpu; 300 int ret = NOTIFY_DONE; 301 302 switch (action) { 303 case CPU_UP_PREPARE: 304 case CPU_UP_PREPARE_FROZEN: 305 numa_setup_cpu(lcpu); 306 ret = NOTIFY_OK; 307 break; 308 #ifdef CONFIG_HOTPLUG_CPU 309 case CPU_DEAD: 310 case CPU_DEAD_FROZEN: 311 case CPU_UP_CANCELED: 312 case CPU_UP_CANCELED_FROZEN: 313 unmap_cpu_from_node(lcpu); 314 break; 315 ret = NOTIFY_OK; 316 #endif 317 } 318 return ret; 319 } 320 321 /* 322 * Check and possibly modify a memory region to enforce the memory limit. 323 * 324 * Returns the size the region should have to enforce the memory limit. 325 * This will either be the original value of size, a truncated value, 326 * or zero. If the returned value of size is 0 the region should be 327 * discarded as it lies wholy above the memory limit. 328 */ 329 static unsigned long __init numa_enforce_memory_limit(unsigned long start, 330 unsigned long size) 331 { 332 /* 333 * We use lmb_end_of_DRAM() in here instead of memory_limit because 334 * we've already adjusted it for the limit and it takes care of 335 * having memory holes below the limit. 336 */ 337 338 if (! memory_limit) 339 return size; 340 341 if (start + size <= lmb_end_of_DRAM()) 342 return size; 343 344 if (start >= lmb_end_of_DRAM()) 345 return 0; 346 347 return lmb_end_of_DRAM() - start; 348 } 349 350 /* 351 * Extract NUMA information from the ibm,dynamic-reconfiguration-memory 352 * node. This assumes n_mem_{addr,size}_cells have been set. 353 */ 354 static void __init parse_drconf_memory(struct device_node *memory) 355 { 356 const unsigned int *lm, *dm, *aa; 357 unsigned int ls, ld, la; 358 unsigned int n, aam, aalen; 359 unsigned long lmb_size, size, start; 360 int nid, default_nid = 0; 361 unsigned int ai, flags; 362 363 lm = of_get_property(memory, "ibm,lmb-size", &ls); 364 dm = of_get_property(memory, "ibm,dynamic-memory", &ld); 365 aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la); 366 if (!lm || !dm || !aa || 367 ls < sizeof(unsigned int) || ld < sizeof(unsigned int) || 368 la < 2 * sizeof(unsigned int)) 369 return; 370 371 lmb_size = read_n_cells(n_mem_size_cells, &lm); 372 n = *dm++; /* number of LMBs */ 373 aam = *aa++; /* number of associativity lists */ 374 aalen = *aa++; /* length of each associativity list */ 375 if (ld < (n * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int) || 376 la < (aam * aalen + 2) * sizeof(unsigned int)) 377 return; 378 379 for (; n != 0; --n) { 380 start = read_n_cells(n_mem_addr_cells, &dm); 381 ai = dm[2]; 382 flags = dm[3]; 383 dm += 4; 384 /* 0x80 == reserved, 0x8 = assigned to us */ 385 if ((flags & 0x80) || !(flags & 0x8)) 386 continue; 387 nid = default_nid; 388 /* flags & 0x40 means associativity index is invalid */ 389 if (min_common_depth > 0 && min_common_depth <= aalen && 390 (flags & 0x40) == 0 && ai < aam) { 391 /* this is like of_node_to_nid_single */ 392 nid = aa[ai * aalen + min_common_depth - 1]; 393 if (nid == 0xffff || nid >= MAX_NUMNODES) 394 nid = default_nid; 395 } 396 397 fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT), 398 &nid); 399 node_set_online(nid); 400 401 size = numa_enforce_memory_limit(start, lmb_size); 402 if (!size) 403 continue; 404 405 add_active_range(nid, start >> PAGE_SHIFT, 406 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); 407 } 408 } 409 410 static int __init parse_numa_properties(void) 411 { 412 struct device_node *cpu = NULL; 413 struct device_node *memory = NULL; 414 int default_nid = 0; 415 unsigned long i; 416 417 if (numa_enabled == 0) { 418 printk(KERN_WARNING "NUMA disabled by user\n"); 419 return -1; 420 } 421 422 min_common_depth = find_min_common_depth(); 423 424 if (min_common_depth < 0) 425 return min_common_depth; 426 427 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 428 429 /* 430 * Even though we connect cpus to numa domains later in SMP 431 * init, we need to know the node ids now. This is because 432 * each node to be onlined must have NODE_DATA etc backing it. 433 */ 434 for_each_present_cpu(i) { 435 int nid; 436 437 cpu = find_cpu_node(i); 438 BUG_ON(!cpu); 439 nid = of_node_to_nid_single(cpu); 440 of_node_put(cpu); 441 442 /* 443 * Don't fall back to default_nid yet -- we will plug 444 * cpus into nodes once the memory scan has discovered 445 * the topology. 446 */ 447 if (nid < 0) 448 continue; 449 node_set_online(nid); 450 } 451 452 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 453 memory = NULL; 454 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 455 unsigned long start; 456 unsigned long size; 457 int nid; 458 int ranges; 459 const unsigned int *memcell_buf; 460 unsigned int len; 461 462 memcell_buf = of_get_property(memory, 463 "linux,usable-memory", &len); 464 if (!memcell_buf || len <= 0) 465 memcell_buf = of_get_property(memory, "reg", &len); 466 if (!memcell_buf || len <= 0) 467 continue; 468 469 /* ranges in cell */ 470 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 471 new_range: 472 /* these are order-sensitive, and modify the buffer pointer */ 473 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 474 size = read_n_cells(n_mem_size_cells, &memcell_buf); 475 476 /* 477 * Assumption: either all memory nodes or none will 478 * have associativity properties. If none, then 479 * everything goes to default_nid. 480 */ 481 nid = of_node_to_nid_single(memory); 482 if (nid < 0) 483 nid = default_nid; 484 485 fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 486 node_set_online(nid); 487 488 if (!(size = numa_enforce_memory_limit(start, size))) { 489 if (--ranges) 490 goto new_range; 491 else 492 continue; 493 } 494 495 add_active_range(nid, start >> PAGE_SHIFT, 496 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); 497 498 if (--ranges) 499 goto new_range; 500 } 501 502 /* 503 * Now do the same thing for each LMB listed in the ibm,dynamic-memory 504 * property in the ibm,dynamic-reconfiguration-memory node. 505 */ 506 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 507 if (memory) 508 parse_drconf_memory(memory); 509 510 return 0; 511 } 512 513 static void __init setup_nonnuma(void) 514 { 515 unsigned long top_of_ram = lmb_end_of_DRAM(); 516 unsigned long total_ram = lmb_phys_mem_size(); 517 unsigned long start_pfn, end_pfn; 518 unsigned int i, nid = 0; 519 520 printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 521 top_of_ram, total_ram); 522 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 523 (top_of_ram - total_ram) >> 20); 524 525 for (i = 0; i < lmb.memory.cnt; ++i) { 526 start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; 527 end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); 528 529 fake_numa_create_new_node(end_pfn, &nid); 530 add_active_range(nid, start_pfn, end_pfn); 531 node_set_online(nid); 532 } 533 } 534 535 void __init dump_numa_cpu_topology(void) 536 { 537 unsigned int node; 538 unsigned int cpu, count; 539 540 if (min_common_depth == -1 || !numa_enabled) 541 return; 542 543 for_each_online_node(node) { 544 printk(KERN_DEBUG "Node %d CPUs:", node); 545 546 count = 0; 547 /* 548 * If we used a CPU iterator here we would miss printing 549 * the holes in the cpumap. 550 */ 551 for (cpu = 0; cpu < NR_CPUS; cpu++) { 552 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 553 if (count == 0) 554 printk(" %u", cpu); 555 ++count; 556 } else { 557 if (count > 1) 558 printk("-%u", cpu - 1); 559 count = 0; 560 } 561 } 562 563 if (count > 1) 564 printk("-%u", NR_CPUS - 1); 565 printk("\n"); 566 } 567 } 568 569 static void __init dump_numa_memory_topology(void) 570 { 571 unsigned int node; 572 unsigned int count; 573 574 if (min_common_depth == -1 || !numa_enabled) 575 return; 576 577 for_each_online_node(node) { 578 unsigned long i; 579 580 printk(KERN_DEBUG "Node %d Memory:", node); 581 582 count = 0; 583 584 for (i = 0; i < lmb_end_of_DRAM(); 585 i += (1 << SECTION_SIZE_BITS)) { 586 if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 587 if (count == 0) 588 printk(" 0x%lx", i); 589 ++count; 590 } else { 591 if (count > 0) 592 printk("-0x%lx", i); 593 count = 0; 594 } 595 } 596 597 if (count > 0) 598 printk("-0x%lx", i); 599 printk("\n"); 600 } 601 } 602 603 /* 604 * Allocate some memory, satisfying the lmb or bootmem allocator where 605 * required. nid is the preferred node and end is the physical address of 606 * the highest address in the node. 607 * 608 * Returns the physical address of the memory. 609 */ 610 static void __init *careful_allocation(int nid, unsigned long size, 611 unsigned long align, 612 unsigned long end_pfn) 613 { 614 int new_nid; 615 unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 616 617 /* retry over all memory */ 618 if (!ret) 619 ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 620 621 if (!ret) 622 panic("numa.c: cannot allocate %lu bytes on node %d", 623 size, nid); 624 625 /* 626 * If the memory came from a previously allocated node, we must 627 * retry with the bootmem allocator. 628 */ 629 new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 630 if (new_nid < nid) { 631 ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 632 size, align, 0); 633 634 if (!ret) 635 panic("numa.c: cannot allocate %lu bytes on node %d", 636 size, new_nid); 637 638 ret = __pa(ret); 639 640 dbg("alloc_bootmem %lx %lx\n", ret, size); 641 } 642 643 return (void *)ret; 644 } 645 646 static struct notifier_block __cpuinitdata ppc64_numa_nb = { 647 .notifier_call = cpu_numa_callback, 648 .priority = 1 /* Must run before sched domains notifier. */ 649 }; 650 651 void __init do_init_bootmem(void) 652 { 653 int nid; 654 unsigned int i; 655 656 min_low_pfn = 0; 657 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 658 max_pfn = max_low_pfn; 659 660 if (parse_numa_properties()) 661 setup_nonnuma(); 662 else 663 dump_numa_memory_topology(); 664 665 register_cpu_notifier(&ppc64_numa_nb); 666 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 667 (void *)(unsigned long)boot_cpuid); 668 669 for_each_online_node(nid) { 670 unsigned long start_pfn, end_pfn; 671 unsigned long bootmem_paddr; 672 unsigned long bootmap_pages; 673 674 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 675 676 /* Allocate the node structure node local if possible */ 677 NODE_DATA(nid) = careful_allocation(nid, 678 sizeof(struct pglist_data), 679 SMP_CACHE_BYTES, end_pfn); 680 NODE_DATA(nid) = __va(NODE_DATA(nid)); 681 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 682 683 dbg("node %d\n", nid); 684 dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 685 686 NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 687 NODE_DATA(nid)->node_start_pfn = start_pfn; 688 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 689 690 if (NODE_DATA(nid)->node_spanned_pages == 0) 691 continue; 692 693 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 694 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 695 696 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 697 bootmem_paddr = (unsigned long)careful_allocation(nid, 698 bootmap_pages << PAGE_SHIFT, 699 PAGE_SIZE, end_pfn); 700 memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 701 702 dbg("bootmap_paddr = %lx\n", bootmem_paddr); 703 704 init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 705 start_pfn, end_pfn); 706 707 free_bootmem_with_active_regions(nid, end_pfn); 708 709 /* Mark reserved regions on this node */ 710 for (i = 0; i < lmb.reserved.cnt; i++) { 711 unsigned long physbase = lmb.reserved.region[i].base; 712 unsigned long size = lmb.reserved.region[i].size; 713 unsigned long start_paddr = start_pfn << PAGE_SHIFT; 714 unsigned long end_paddr = end_pfn << PAGE_SHIFT; 715 716 if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 717 early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 718 continue; 719 720 if (physbase < end_paddr && 721 (physbase+size) > start_paddr) { 722 /* overlaps */ 723 if (physbase < start_paddr) { 724 size -= start_paddr - physbase; 725 physbase = start_paddr; 726 } 727 728 if (size > end_paddr - physbase) 729 size = end_paddr - physbase; 730 731 dbg("reserve_bootmem %lx %lx\n", physbase, 732 size); 733 reserve_bootmem_node(NODE_DATA(nid), physbase, 734 size, BOOTMEM_DEFAULT); 735 } 736 } 737 738 sparse_memory_present_with_active_regions(nid); 739 } 740 } 741 742 void __init paging_init(void) 743 { 744 unsigned long max_zone_pfns[MAX_NR_ZONES]; 745 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 746 max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; 747 free_area_init_nodes(max_zone_pfns); 748 } 749 750 static int __init early_numa(char *p) 751 { 752 if (!p) 753 return 0; 754 755 if (strstr(p, "off")) 756 numa_enabled = 0; 757 758 if (strstr(p, "debug")) 759 numa_debug = 1; 760 761 p = strstr(p, "fake="); 762 if (p) 763 cmdline = p + strlen("fake="); 764 765 return 0; 766 } 767 early_param("numa", early_numa); 768 769 #ifdef CONFIG_MEMORY_HOTPLUG 770 /* 771 * Find the node associated with a hot added memory section. Section 772 * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 773 * sections are fully contained within a single LMB. 774 */ 775 int hot_add_scn_to_nid(unsigned long scn_addr) 776 { 777 struct device_node *memory = NULL; 778 nodemask_t nodes; 779 int default_nid = any_online_node(NODE_MASK_ALL); 780 int nid; 781 782 if (!numa_enabled || (min_common_depth < 0)) 783 return default_nid; 784 785 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 786 unsigned long start, size; 787 int ranges; 788 const unsigned int *memcell_buf; 789 unsigned int len; 790 791 memcell_buf = of_get_property(memory, "reg", &len); 792 if (!memcell_buf || len <= 0) 793 continue; 794 795 /* ranges in cell */ 796 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 797 ha_new_range: 798 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 799 size = read_n_cells(n_mem_size_cells, &memcell_buf); 800 nid = of_node_to_nid_single(memory); 801 802 /* Domains not present at boot default to 0 */ 803 if (nid < 0 || !node_online(nid)) 804 nid = default_nid; 805 806 if ((scn_addr >= start) && (scn_addr < (start + size))) { 807 of_node_put(memory); 808 goto got_nid; 809 } 810 811 if (--ranges) /* process all ranges in cell */ 812 goto ha_new_range; 813 } 814 BUG(); /* section address should be found above */ 815 return 0; 816 817 /* Temporary code to ensure that returned node is not empty */ 818 got_nid: 819 nodes_setall(nodes); 820 while (NODE_DATA(nid)->node_spanned_pages == 0) { 821 node_clear(nid, nodes); 822 nid = any_online_node(nodes); 823 } 824 return nid; 825 } 826 #endif /* CONFIG_MEMORY_HOTPLUG */ 827