1 /* 2 * Generic VM initialization for x86-64 NUMA setups. 3 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/mm.h> 7 #include <linux/string.h> 8 #include <linux/init.h> 9 #include <linux/bootmem.h> 10 #include <linux/mmzone.h> 11 #include <linux/ctype.h> 12 #include <linux/module.h> 13 #include <linux/nodemask.h> 14 #include <linux/sched.h> 15 16 #include <asm/e820.h> 17 #include <asm/proto.h> 18 #include <asm/dma.h> 19 #include <asm/numa.h> 20 #include <asm/acpi.h> 21 #include <asm/k8.h> 22 23 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 24 EXPORT_SYMBOL(node_data); 25 26 struct memnode memnode; 27 28 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 29 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 30 }; 31 32 int numa_off __initdata; 33 static unsigned long __initdata nodemap_addr; 34 static unsigned long __initdata nodemap_size; 35 36 DEFINE_PER_CPU(int, node_number) = 0; 37 EXPORT_PER_CPU_SYMBOL(node_number); 38 39 /* 40 * Map cpu index to node index 41 */ 42 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 44 45 /* 46 * Given a shift value, try to populate memnodemap[] 47 * Returns : 48 * 1 if OK 49 * 0 if memnodmap[] too small (of shift too small) 50 * -1 if node overlap or lost ram (shift too big) 51 */ 52 static int __init populate_memnodemap(const struct bootnode *nodes, 53 int numnodes, int shift, int *nodeids) 54 { 55 unsigned long addr, end; 56 int i, res = -1; 57 58 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); 59 for (i = 0; i < numnodes; i++) { 60 addr = nodes[i].start; 61 end = nodes[i].end; 62 if (addr >= end) 63 continue; 64 if ((end >> shift) >= memnodemapsize) 65 return 0; 66 do { 67 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 68 return -1; 69 70 if (!nodeids) 71 memnodemap[addr >> shift] = i; 72 else 73 memnodemap[addr >> shift] = nodeids[i]; 74 75 addr += (1UL << shift); 76 } while (addr < end); 77 res = 1; 78 } 79 return res; 80 } 81 82 static int __init allocate_cachealigned_memnodemap(void) 83 { 84 unsigned long addr; 85 86 memnodemap = memnode.embedded_map; 87 if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map)) 88 return 0; 89 90 addr = 0x8000; 91 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 92 nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, 93 nodemap_size, L1_CACHE_BYTES); 94 if (nodemap_addr == -1UL) { 95 printk(KERN_ERR 96 "NUMA: Unable to allocate Memory to Node hash map\n"); 97 nodemap_addr = nodemap_size = 0; 98 return -1; 99 } 100 memnodemap = phys_to_virt(nodemap_addr); 101 reserve_early(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); 102 103 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", 104 nodemap_addr, nodemap_addr + nodemap_size); 105 return 0; 106 } 107 108 /* 109 * The LSB of all start and end addresses in the node map is the value of the 110 * maximum possible shift. 111 */ 112 static int __init extract_lsb_from_nodes(const struct bootnode *nodes, 113 int numnodes) 114 { 115 int i, nodes_used = 0; 116 unsigned long start, end; 117 unsigned long bitfield = 0, memtop = 0; 118 119 for (i = 0; i < numnodes; i++) { 120 start = nodes[i].start; 121 end = nodes[i].end; 122 if (start >= end) 123 continue; 124 bitfield |= start; 125 nodes_used++; 126 if (end > memtop) 127 memtop = end; 128 } 129 if (nodes_used <= 1) 130 i = 63; 131 else 132 i = find_first_bit(&bitfield, sizeof(unsigned long)*8); 133 memnodemapsize = (memtop >> i)+1; 134 return i; 135 } 136 137 int __init compute_hash_shift(struct bootnode *nodes, int numnodes, 138 int *nodeids) 139 { 140 int shift; 141 142 shift = extract_lsb_from_nodes(nodes, numnodes); 143 if (allocate_cachealigned_memnodemap()) 144 return -1; 145 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 146 shift); 147 148 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { 149 printk(KERN_INFO "Your memory is not aligned you need to " 150 "rebuild your kernel with a bigger NODEMAPSIZE " 151 "shift=%d\n", shift); 152 return -1; 153 } 154 return shift; 155 } 156 157 int __meminit __early_pfn_to_nid(unsigned long pfn) 158 { 159 return phys_to_nid(pfn << PAGE_SHIFT); 160 } 161 162 static void * __init early_node_mem(int nodeid, unsigned long start, 163 unsigned long end, unsigned long size, 164 unsigned long align) 165 { 166 unsigned long mem = find_e820_area(start, end, size, align); 167 void *ptr; 168 169 if (mem != -1L) 170 return __va(mem); 171 172 ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); 173 if (ptr == NULL) { 174 printk(KERN_ERR "Cannot find %lu bytes in node %d\n", 175 size, nodeid); 176 return NULL; 177 } 178 return ptr; 179 } 180 181 /* Initialize bootmem allocator for a node */ 182 void __init setup_node_bootmem(int nodeid, unsigned long start, 183 unsigned long end) 184 { 185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 186 unsigned long bootmap_start, nodedata_phys; 187 void *bootmap; 188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 189 int nid; 190 191 start = roundup(start, ZONE_ALIGN); 192 193 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 194 start, end); 195 196 start_pfn = start >> PAGE_SHIFT; 197 last_pfn = end >> PAGE_SHIFT; 198 199 node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, 200 SMP_CACHE_BYTES); 201 if (node_data[nodeid] == NULL) 202 return; 203 nodedata_phys = __pa(node_data[nodeid]); 204 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, 205 nodedata_phys + pgdat_size - 1); 206 207 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 208 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; 209 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 210 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 211 212 /* 213 * Find a place for the bootmem map 214 * nodedata_phys could be on other nodes by alloc_bootmem, 215 * so need to sure bootmap_start not to be small, otherwise 216 * early_node_mem will get that with find_e820_area instead 217 * of alloc_bootmem, that could clash with reserved range 218 */ 219 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); 220 nid = phys_to_nid(nodedata_phys); 221 if (nid == nodeid) 222 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); 223 else 224 bootmap_start = roundup(start, PAGE_SIZE); 225 /* 226 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like 227 * to use that to align to PAGE_SIZE 228 */ 229 bootmap = early_node_mem(nodeid, bootmap_start, end, 230 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); 231 if (bootmap == NULL) { 232 if (nodedata_phys < start || nodedata_phys >= end) 233 free_bootmem(nodedata_phys, pgdat_size); 234 node_data[nodeid] = NULL; 235 return; 236 } 237 bootmap_start = __pa(bootmap); 238 239 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 240 bootmap_start >> PAGE_SHIFT, 241 start_pfn, last_pfn); 242 243 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", 244 bootmap_start, bootmap_start + bootmap_size - 1, 245 bootmap_pages); 246 247 free_bootmem_with_active_regions(nodeid, end); 248 249 /* 250 * convert early reserve to bootmem reserve earlier 251 * otherwise early_node_mem could use early reserved mem 252 * on previous node 253 */ 254 early_res_to_bootmem(start, end); 255 256 /* 257 * in some case early_node_mem could use alloc_bootmem 258 * to get range on other node, don't reserve that again 259 */ 260 if (nid != nodeid) 261 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); 262 else 263 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, 264 pgdat_size, BOOTMEM_DEFAULT); 265 nid = phys_to_nid(bootmap_start); 266 if (nid != nodeid) 267 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); 268 else 269 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 270 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 271 272 #ifdef CONFIG_ACPI_NUMA 273 srat_reserve_add_area(nodeid); 274 #endif 275 node_set_online(nodeid); 276 } 277 278 /* 279 * There are unfortunately some poorly designed mainboards around that 280 * only connect memory to a single CPU. This breaks the 1:1 cpu->node 281 * mapping. To avoid this fill in the mapping for all possible CPUs, 282 * as the number of CPUs is not known yet. We round robin the existing 283 * nodes. 284 */ 285 void __init numa_init_array(void) 286 { 287 int rr, i; 288 289 rr = first_node(node_online_map); 290 for (i = 0; i < nr_cpu_ids; i++) { 291 if (early_cpu_to_node(i) != NUMA_NO_NODE) 292 continue; 293 numa_set_node(i, rr); 294 rr = next_node(rr, node_online_map); 295 if (rr == MAX_NUMNODES) 296 rr = first_node(node_online_map); 297 } 298 } 299 300 #ifdef CONFIG_NUMA_EMU 301 /* Numa emulation */ 302 static char *cmdline __initdata; 303 304 /* 305 * Setups up nid to range from addr to addr + size. If the end 306 * boundary is greater than max_addr, then max_addr is used instead. 307 * The return value is 0 if there is additional memory left for 308 * allocation past addr and -1 otherwise. addr is adjusted to be at 309 * the end of the node. 310 */ 311 static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, 312 u64 size, u64 max_addr) 313 { 314 int ret = 0; 315 316 nodes[nid].start = *addr; 317 *addr += size; 318 if (*addr >= max_addr) { 319 *addr = max_addr; 320 ret = -1; 321 } 322 nodes[nid].end = *addr; 323 node_set(nid, node_possible_map); 324 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 325 nodes[nid].start, nodes[nid].end, 326 (nodes[nid].end - nodes[nid].start) >> 20); 327 return ret; 328 } 329 330 /* 331 * Splits num_nodes nodes up equally starting at node_start. The return value 332 * is the number of nodes split up and addr is adjusted to be at the end of the 333 * last node allocated. 334 */ 335 static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, 336 u64 max_addr, int node_start, 337 int num_nodes) 338 { 339 unsigned int big; 340 u64 size; 341 int i; 342 343 if (num_nodes <= 0) 344 return -1; 345 if (num_nodes > MAX_NUMNODES) 346 num_nodes = MAX_NUMNODES; 347 size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / 348 num_nodes; 349 /* 350 * Calculate the number of big nodes that can be allocated as a result 351 * of consolidating the leftovers. 352 */ 353 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / 354 FAKE_NODE_MIN_SIZE; 355 356 /* Round down to nearest FAKE_NODE_MIN_SIZE. */ 357 size &= FAKE_NODE_MIN_HASH_MASK; 358 if (!size) { 359 printk(KERN_ERR "Not enough memory for each node. " 360 "NUMA emulation disabled.\n"); 361 return -1; 362 } 363 364 for (i = node_start; i < num_nodes + node_start; i++) { 365 u64 end = *addr + size; 366 367 if (i < big) 368 end += FAKE_NODE_MIN_SIZE; 369 /* 370 * The final node can have the remaining system RAM. Other 371 * nodes receive roughly the same amount of available pages. 372 */ 373 if (i == num_nodes + node_start - 1) 374 end = max_addr; 375 else 376 while (end - *addr - e820_hole_size(*addr, end) < 377 size) { 378 end += FAKE_NODE_MIN_SIZE; 379 if (end > max_addr) { 380 end = max_addr; 381 break; 382 } 383 } 384 if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) 385 break; 386 } 387 return i - node_start + 1; 388 } 389 390 /* 391 * Splits the remaining system RAM into chunks of size. The remaining memory is 392 * always assigned to a final node and can be asymmetric. Returns the number of 393 * nodes split. 394 */ 395 static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, 396 u64 max_addr, int node_start, u64 size) 397 { 398 int i = node_start; 399 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; 400 while (!setup_node_range(i++, nodes, addr, size, max_addr)) 401 ; 402 return i - node_start; 403 } 404 405 /* 406 * Sets up the system RAM area from start_pfn to last_pfn according to the 407 * numa=fake command-line option. 408 */ 409 static struct bootnode nodes[MAX_NUMNODES] __initdata; 410 411 static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) 412 { 413 u64 size, addr = start_pfn << PAGE_SHIFT; 414 u64 max_addr = last_pfn << PAGE_SHIFT; 415 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; 416 417 memset(&nodes, 0, sizeof(nodes)); 418 /* 419 * If the numa=fake command-line is just a single number N, split the 420 * system RAM into N fake nodes. 421 */ 422 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { 423 long n = simple_strtol(cmdline, NULL, 0); 424 425 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); 426 if (num_nodes < 0) 427 return num_nodes; 428 goto out; 429 } 430 431 /* Parse the command line. */ 432 for (coeff_flag = 0; ; cmdline++) { 433 if (*cmdline && isdigit(*cmdline)) { 434 num = num * 10 + *cmdline - '0'; 435 continue; 436 } 437 if (*cmdline == '*') { 438 if (num > 0) 439 coeff = num; 440 coeff_flag = 1; 441 } 442 if (!*cmdline || *cmdline == ',') { 443 if (!coeff_flag) 444 coeff = 1; 445 /* 446 * Round down to the nearest FAKE_NODE_MIN_SIZE. 447 * Command-line coefficients are in megabytes. 448 */ 449 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; 450 if (size) 451 for (i = 0; i < coeff; i++, num_nodes++) 452 if (setup_node_range(num_nodes, nodes, 453 &addr, size, max_addr) < 0) 454 goto done; 455 if (!*cmdline) 456 break; 457 coeff_flag = 0; 458 coeff = -1; 459 } 460 num = 0; 461 } 462 done: 463 if (!num_nodes) 464 return -1; 465 /* Fill remainder of system RAM, if appropriate. */ 466 if (addr < max_addr) { 467 if (coeff_flag && coeff < 0) { 468 /* Split remaining nodes into num-sized chunks */ 469 num_nodes += split_nodes_by_size(nodes, &addr, max_addr, 470 num_nodes, num); 471 goto out; 472 } 473 switch (*(cmdline - 1)) { 474 case '*': 475 /* Split remaining nodes into coeff chunks */ 476 if (coeff <= 0) 477 break; 478 num_nodes += split_nodes_equally(nodes, &addr, max_addr, 479 num_nodes, coeff); 480 break; 481 case ',': 482 /* Do not allocate remaining system RAM */ 483 break; 484 default: 485 /* Give one final node */ 486 setup_node_range(num_nodes, nodes, &addr, 487 max_addr - addr, max_addr); 488 num_nodes++; 489 } 490 } 491 out: 492 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 493 if (memnode_shift < 0) { 494 memnode_shift = 0; 495 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 496 "disabled.\n"); 497 return -1; 498 } 499 500 /* 501 * We need to vacate all active ranges that may have been registered by 502 * SRAT and set acpi_numa to -1 so that srat_disabled() always returns 503 * true. NUMA emulation has succeeded so we will not scan ACPI nodes. 504 */ 505 remove_all_active_ranges(); 506 #ifdef CONFIG_ACPI_NUMA 507 acpi_numa = -1; 508 #endif 509 for_each_node_mask(i, node_possible_map) { 510 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 511 nodes[i].end >> PAGE_SHIFT); 512 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 513 } 514 acpi_fake_nodes(nodes, num_nodes); 515 numa_init_array(); 516 return 0; 517 } 518 #endif /* CONFIG_NUMA_EMU */ 519 520 void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) 521 { 522 int i; 523 524 nodes_clear(node_possible_map); 525 nodes_clear(node_online_map); 526 527 #ifdef CONFIG_NUMA_EMU 528 if (cmdline && !numa_emulation(start_pfn, last_pfn)) 529 return; 530 nodes_clear(node_possible_map); 531 nodes_clear(node_online_map); 532 #endif 533 534 #ifdef CONFIG_ACPI_NUMA 535 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 536 last_pfn << PAGE_SHIFT)) 537 return; 538 nodes_clear(node_possible_map); 539 nodes_clear(node_online_map); 540 #endif 541 542 #ifdef CONFIG_K8_NUMA 543 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, 544 last_pfn<<PAGE_SHIFT)) 545 return; 546 nodes_clear(node_possible_map); 547 nodes_clear(node_online_map); 548 #endif 549 printk(KERN_INFO "%s\n", 550 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 551 552 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 553 start_pfn << PAGE_SHIFT, 554 last_pfn << PAGE_SHIFT); 555 /* setup dummy node covering all memory */ 556 memnode_shift = 63; 557 memnodemap = memnode.embedded_map; 558 memnodemap[0] = 0; 559 node_set_online(0); 560 node_set(0, node_possible_map); 561 for (i = 0; i < nr_cpu_ids; i++) 562 numa_set_node(i, 0); 563 e820_register_active_regions(0, start_pfn, last_pfn); 564 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 565 } 566 567 unsigned long __init numa_free_all_bootmem(void) 568 { 569 unsigned long pages = 0; 570 int i; 571 572 for_each_online_node(i) 573 pages += free_all_bootmem_node(NODE_DATA(i)); 574 575 return pages; 576 } 577 578 void __init paging_init(void) 579 { 580 unsigned long max_zone_pfns[MAX_NR_ZONES]; 581 582 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 583 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 584 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 585 max_zone_pfns[ZONE_NORMAL] = max_pfn; 586 587 sparse_memory_present_with_active_regions(MAX_NUMNODES); 588 sparse_init(); 589 590 free_area_init_nodes(max_zone_pfns); 591 } 592 593 static __init int numa_setup(char *opt) 594 { 595 if (!opt) 596 return -EINVAL; 597 if (!strncmp(opt, "off", 3)) 598 numa_off = 1; 599 #ifdef CONFIG_NUMA_EMU 600 if (!strncmp(opt, "fake=", 5)) 601 cmdline = opt + 5; 602 #endif 603 #ifdef CONFIG_ACPI_NUMA 604 if (!strncmp(opt, "noacpi", 6)) 605 acpi_numa = -1; 606 if (!strncmp(opt, "hotadd=", 7)) 607 hotadd_percent = simple_strtoul(opt+7, NULL, 10); 608 #endif 609 return 0; 610 } 611 early_param("numa", numa_setup); 612 613 #ifdef CONFIG_NUMA 614 /* 615 * Setup early cpu_to_node. 616 * 617 * Populate cpu_to_node[] only if x86_cpu_to_apicid[], 618 * and apicid_to_node[] tables have valid entries for a CPU. 619 * This means we skip cpu_to_node[] initialisation for NUMA 620 * emulation and faking node case (when running a kernel compiled 621 * for NUMA on a non NUMA box), which is OK as cpu_to_node[] 622 * is already initialized in a round robin manner at numa_init_array, 623 * prior to this call, and this initialization is good enough 624 * for the fake NUMA cases. 625 * 626 * Called before the per_cpu areas are setup. 627 */ 628 void __init init_cpu_to_node(void) 629 { 630 int cpu; 631 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 632 633 BUG_ON(cpu_to_apicid == NULL); 634 635 for_each_possible_cpu(cpu) { 636 int node; 637 u16 apicid = cpu_to_apicid[cpu]; 638 639 if (apicid == BAD_APICID) 640 continue; 641 node = apicid_to_node[apicid]; 642 if (node == NUMA_NO_NODE) 643 continue; 644 if (!node_online(node)) 645 continue; 646 numa_set_node(cpu, node); 647 } 648 } 649 #endif 650 651 652 void __cpuinit numa_set_node(int cpu, int node) 653 { 654 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 655 656 /* early setting, no percpu area yet */ 657 if (cpu_to_node_map) { 658 cpu_to_node_map[cpu] = node; 659 return; 660 } 661 662 #ifdef CONFIG_DEBUG_PER_CPU_MAPS 663 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { 664 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); 665 dump_stack(); 666 return; 667 } 668 #endif 669 per_cpu(x86_cpu_to_node_map, cpu) = node; 670 671 if (node != NUMA_NO_NODE) 672 per_cpu(node_number, cpu) = node; 673 } 674 675 void __cpuinit numa_clear_node(int cpu) 676 { 677 numa_set_node(cpu, NUMA_NO_NODE); 678 } 679 680 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 681 682 void __cpuinit numa_add_cpu(int cpu) 683 { 684 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 685 } 686 687 void __cpuinit numa_remove_cpu(int cpu) 688 { 689 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 690 } 691 692 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 693 694 /* 695 * --------- debug versions of the numa functions --------- 696 */ 697 static void __cpuinit numa_set_cpumask(int cpu, int enable) 698 { 699 int node = early_cpu_to_node(cpu); 700 struct cpumask *mask; 701 char buf[64]; 702 703 mask = node_to_cpumask_map[node]; 704 if (mask == NULL) { 705 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node); 706 dump_stack(); 707 return; 708 } 709 710 if (enable) 711 cpumask_set_cpu(cpu, mask); 712 else 713 cpumask_clear_cpu(cpu, mask); 714 715 cpulist_scnprintf(buf, sizeof(buf), mask); 716 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 717 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); 718 } 719 720 void __cpuinit numa_add_cpu(int cpu) 721 { 722 numa_set_cpumask(cpu, 1); 723 } 724 725 void __cpuinit numa_remove_cpu(int cpu) 726 { 727 numa_set_cpumask(cpu, 0); 728 } 729 730 int cpu_to_node(int cpu) 731 { 732 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 733 printk(KERN_WARNING 734 "cpu_to_node(%d): usage too early!\n", cpu); 735 dump_stack(); 736 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 737 } 738 return per_cpu(x86_cpu_to_node_map, cpu); 739 } 740 EXPORT_SYMBOL(cpu_to_node); 741 742 /* 743 * Same function as cpu_to_node() but used if called before the 744 * per_cpu areas are setup. 745 */ 746 int early_cpu_to_node(int cpu) 747 { 748 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 749 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 750 751 if (!cpu_possible(cpu)) { 752 printk(KERN_WARNING 753 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 754 dump_stack(); 755 return NUMA_NO_NODE; 756 } 757 return per_cpu(x86_cpu_to_node_map, cpu); 758 } 759 760 /* 761 * --------- end of debug versions of the numa functions --------- 762 */ 763 764 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 765