1 /* 2 * Generic VM initialization for x86-64 NUMA setups. 3 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/mm.h> 7 #include <linux/string.h> 8 #include <linux/init.h> 9 #include <linux/bootmem.h> 10 #include <linux/mmzone.h> 11 #include <linux/ctype.h> 12 #include <linux/module.h> 13 #include <linux/nodemask.h> 14 #include <linux/sched.h> 15 16 #include <asm/e820.h> 17 #include <asm/proto.h> 18 #include <asm/dma.h> 19 #include <asm/numa.h> 20 #include <asm/acpi.h> 21 #include <asm/k8.h> 22 23 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 24 EXPORT_SYMBOL(node_data); 25 26 struct memnode memnode; 27 28 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 29 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 30 }; 31 32 int numa_off __initdata; 33 static unsigned long __initdata nodemap_addr; 34 static unsigned long __initdata nodemap_size; 35 36 DEFINE_PER_CPU(int, node_number) = 0; 37 EXPORT_PER_CPU_SYMBOL(node_number); 38 39 /* 40 * Map cpu index to node index 41 */ 42 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 44 45 /* 46 * Given a shift value, try to populate memnodemap[] 47 * Returns : 48 * 1 if OK 49 * 0 if memnodmap[] too small (of shift too small) 50 * -1 if node overlap or lost ram (shift too big) 51 */ 52 static int __init populate_memnodemap(const struct bootnode *nodes, 53 int numnodes, int shift, int *nodeids) 54 { 55 unsigned long addr, end; 56 int i, res = -1; 57 58 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); 59 for (i = 0; i < numnodes; i++) { 60 addr = nodes[i].start; 61 end = nodes[i].end; 62 if (addr >= end) 63 continue; 64 if ((end >> shift) >= memnodemapsize) 65 return 0; 66 do { 67 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 68 return -1; 69 70 if (!nodeids) 71 memnodemap[addr >> shift] = i; 72 else 73 memnodemap[addr >> shift] = nodeids[i]; 74 75 addr += (1UL << shift); 76 } while (addr < end); 77 res = 1; 78 } 79 return res; 80 } 81 82 static int __init allocate_cachealigned_memnodemap(void) 83 { 84 unsigned long addr; 85 86 memnodemap = memnode.embedded_map; 87 if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map)) 88 return 0; 89 90 addr = 0x8000; 91 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 92 nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, 93 nodemap_size, L1_CACHE_BYTES); 94 if (nodemap_addr == -1UL) { 95 printk(KERN_ERR 96 "NUMA: Unable to allocate Memory to Node hash map\n"); 97 nodemap_addr = nodemap_size = 0; 98 return -1; 99 } 100 memnodemap = phys_to_virt(nodemap_addr); 101 reserve_early(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); 102 103 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", 104 nodemap_addr, nodemap_addr + nodemap_size); 105 return 0; 106 } 107 108 /* 109 * The LSB of all start and end addresses in the node map is the value of the 110 * maximum possible shift. 111 */ 112 static int __init extract_lsb_from_nodes(const struct bootnode *nodes, 113 int numnodes) 114 { 115 int i, nodes_used = 0; 116 unsigned long start, end; 117 unsigned long bitfield = 0, memtop = 0; 118 119 for (i = 0; i < numnodes; i++) { 120 start = nodes[i].start; 121 end = nodes[i].end; 122 if (start >= end) 123 continue; 124 bitfield |= start; 125 nodes_used++; 126 if (end > memtop) 127 memtop = end; 128 } 129 if (nodes_used <= 1) 130 i = 63; 131 else 132 i = find_first_bit(&bitfield, sizeof(unsigned long)*8); 133 memnodemapsize = (memtop >> i)+1; 134 return i; 135 } 136 137 int __init compute_hash_shift(struct bootnode *nodes, int numnodes, 138 int *nodeids) 139 { 140 int shift; 141 142 shift = extract_lsb_from_nodes(nodes, numnodes); 143 if (allocate_cachealigned_memnodemap()) 144 return -1; 145 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 146 shift); 147 148 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { 149 printk(KERN_INFO "Your memory is not aligned you need to " 150 "rebuild your kernel with a bigger NODEMAPSIZE " 151 "shift=%d\n", shift); 152 return -1; 153 } 154 return shift; 155 } 156 157 int __meminit __early_pfn_to_nid(unsigned long pfn) 158 { 159 return phys_to_nid(pfn << PAGE_SHIFT); 160 } 161 162 static void * __init early_node_mem(int nodeid, unsigned long start, 163 unsigned long end, unsigned long size, 164 unsigned long align) 165 { 166 unsigned long mem = find_e820_area(start, end, size, align); 167 void *ptr; 168 169 if (mem != -1L) 170 return __va(mem); 171 172 ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); 173 if (ptr == NULL) { 174 printk(KERN_ERR "Cannot find %lu bytes in node %d\n", 175 size, nodeid); 176 return NULL; 177 } 178 return ptr; 179 } 180 181 /* Initialize bootmem allocator for a node */ 182 void __init setup_node_bootmem(int nodeid, unsigned long start, 183 unsigned long end) 184 { 185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 186 unsigned long bootmap_start, nodedata_phys; 187 void *bootmap; 188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 189 int nid; 190 191 if (!end) 192 return; 193 194 start = roundup(start, ZONE_ALIGN); 195 196 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 197 start, end); 198 199 start_pfn = start >> PAGE_SHIFT; 200 last_pfn = end >> PAGE_SHIFT; 201 202 node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, 203 SMP_CACHE_BYTES); 204 if (node_data[nodeid] == NULL) 205 return; 206 nodedata_phys = __pa(node_data[nodeid]); 207 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, 208 nodedata_phys + pgdat_size - 1); 209 210 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 211 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; 212 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 213 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 214 215 /* 216 * Find a place for the bootmem map 217 * nodedata_phys could be on other nodes by alloc_bootmem, 218 * so need to sure bootmap_start not to be small, otherwise 219 * early_node_mem will get that with find_e820_area instead 220 * of alloc_bootmem, that could clash with reserved range 221 */ 222 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); 223 nid = phys_to_nid(nodedata_phys); 224 if (nid == nodeid) 225 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); 226 else 227 bootmap_start = roundup(start, PAGE_SIZE); 228 /* 229 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like 230 * to use that to align to PAGE_SIZE 231 */ 232 bootmap = early_node_mem(nodeid, bootmap_start, end, 233 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); 234 if (bootmap == NULL) { 235 if (nodedata_phys < start || nodedata_phys >= end) 236 free_bootmem(nodedata_phys, pgdat_size); 237 node_data[nodeid] = NULL; 238 return; 239 } 240 bootmap_start = __pa(bootmap); 241 242 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 243 bootmap_start >> PAGE_SHIFT, 244 start_pfn, last_pfn); 245 246 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", 247 bootmap_start, bootmap_start + bootmap_size - 1, 248 bootmap_pages); 249 250 free_bootmem_with_active_regions(nodeid, end); 251 252 /* 253 * convert early reserve to bootmem reserve earlier 254 * otherwise early_node_mem could use early reserved mem 255 * on previous node 256 */ 257 early_res_to_bootmem(start, end); 258 259 /* 260 * in some case early_node_mem could use alloc_bootmem 261 * to get range on other node, don't reserve that again 262 */ 263 if (nid != nodeid) 264 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); 265 else 266 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, 267 pgdat_size, BOOTMEM_DEFAULT); 268 nid = phys_to_nid(bootmap_start); 269 if (nid != nodeid) 270 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); 271 else 272 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 273 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 274 275 #ifdef CONFIG_ACPI_NUMA 276 srat_reserve_add_area(nodeid); 277 #endif 278 node_set_online(nodeid); 279 } 280 281 /* 282 * There are unfortunately some poorly designed mainboards around that 283 * only connect memory to a single CPU. This breaks the 1:1 cpu->node 284 * mapping. To avoid this fill in the mapping for all possible CPUs, 285 * as the number of CPUs is not known yet. We round robin the existing 286 * nodes. 287 */ 288 void __init numa_init_array(void) 289 { 290 int rr, i; 291 292 rr = first_node(node_online_map); 293 for (i = 0; i < nr_cpu_ids; i++) { 294 if (early_cpu_to_node(i) != NUMA_NO_NODE) 295 continue; 296 numa_set_node(i, rr); 297 rr = next_node(rr, node_online_map); 298 if (rr == MAX_NUMNODES) 299 rr = first_node(node_online_map); 300 } 301 } 302 303 #ifdef CONFIG_NUMA_EMU 304 /* Numa emulation */ 305 static char *cmdline __initdata; 306 307 /* 308 * Setups up nid to range from addr to addr + size. If the end 309 * boundary is greater than max_addr, then max_addr is used instead. 310 * The return value is 0 if there is additional memory left for 311 * allocation past addr and -1 otherwise. addr is adjusted to be at 312 * the end of the node. 313 */ 314 static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, 315 u64 size, u64 max_addr) 316 { 317 int ret = 0; 318 319 nodes[nid].start = *addr; 320 *addr += size; 321 if (*addr >= max_addr) { 322 *addr = max_addr; 323 ret = -1; 324 } 325 nodes[nid].end = *addr; 326 node_set(nid, node_possible_map); 327 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 328 nodes[nid].start, nodes[nid].end, 329 (nodes[nid].end - nodes[nid].start) >> 20); 330 return ret; 331 } 332 333 /* 334 * Splits num_nodes nodes up equally starting at node_start. The return value 335 * is the number of nodes split up and addr is adjusted to be at the end of the 336 * last node allocated. 337 */ 338 static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, 339 u64 max_addr, int node_start, 340 int num_nodes) 341 { 342 unsigned int big; 343 u64 size; 344 int i; 345 346 if (num_nodes <= 0) 347 return -1; 348 if (num_nodes > MAX_NUMNODES) 349 num_nodes = MAX_NUMNODES; 350 size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / 351 num_nodes; 352 /* 353 * Calculate the number of big nodes that can be allocated as a result 354 * of consolidating the leftovers. 355 */ 356 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / 357 FAKE_NODE_MIN_SIZE; 358 359 /* Round down to nearest FAKE_NODE_MIN_SIZE. */ 360 size &= FAKE_NODE_MIN_HASH_MASK; 361 if (!size) { 362 printk(KERN_ERR "Not enough memory for each node. " 363 "NUMA emulation disabled.\n"); 364 return -1; 365 } 366 367 for (i = node_start; i < num_nodes + node_start; i++) { 368 u64 end = *addr + size; 369 370 if (i < big) 371 end += FAKE_NODE_MIN_SIZE; 372 /* 373 * The final node can have the remaining system RAM. Other 374 * nodes receive roughly the same amount of available pages. 375 */ 376 if (i == num_nodes + node_start - 1) 377 end = max_addr; 378 else 379 while (end - *addr - e820_hole_size(*addr, end) < 380 size) { 381 end += FAKE_NODE_MIN_SIZE; 382 if (end > max_addr) { 383 end = max_addr; 384 break; 385 } 386 } 387 if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) 388 break; 389 } 390 return i - node_start + 1; 391 } 392 393 /* 394 * Splits the remaining system RAM into chunks of size. The remaining memory is 395 * always assigned to a final node and can be asymmetric. Returns the number of 396 * nodes split. 397 */ 398 static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, 399 u64 max_addr, int node_start, u64 size) 400 { 401 int i = node_start; 402 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; 403 while (!setup_node_range(i++, nodes, addr, size, max_addr)) 404 ; 405 return i - node_start; 406 } 407 408 /* 409 * Sets up the system RAM area from start_pfn to last_pfn according to the 410 * numa=fake command-line option. 411 */ 412 static struct bootnode nodes[MAX_NUMNODES] __initdata; 413 414 static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) 415 { 416 u64 size, addr = start_pfn << PAGE_SHIFT; 417 u64 max_addr = last_pfn << PAGE_SHIFT; 418 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; 419 420 memset(&nodes, 0, sizeof(nodes)); 421 /* 422 * If the numa=fake command-line is just a single number N, split the 423 * system RAM into N fake nodes. 424 */ 425 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { 426 long n = simple_strtol(cmdline, NULL, 0); 427 428 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); 429 if (num_nodes < 0) 430 return num_nodes; 431 goto out; 432 } 433 434 /* Parse the command line. */ 435 for (coeff_flag = 0; ; cmdline++) { 436 if (*cmdline && isdigit(*cmdline)) { 437 num = num * 10 + *cmdline - '0'; 438 continue; 439 } 440 if (*cmdline == '*') { 441 if (num > 0) 442 coeff = num; 443 coeff_flag = 1; 444 } 445 if (!*cmdline || *cmdline == ',') { 446 if (!coeff_flag) 447 coeff = 1; 448 /* 449 * Round down to the nearest FAKE_NODE_MIN_SIZE. 450 * Command-line coefficients are in megabytes. 451 */ 452 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; 453 if (size) 454 for (i = 0; i < coeff; i++, num_nodes++) 455 if (setup_node_range(num_nodes, nodes, 456 &addr, size, max_addr) < 0) 457 goto done; 458 if (!*cmdline) 459 break; 460 coeff_flag = 0; 461 coeff = -1; 462 } 463 num = 0; 464 } 465 done: 466 if (!num_nodes) 467 return -1; 468 /* Fill remainder of system RAM, if appropriate. */ 469 if (addr < max_addr) { 470 if (coeff_flag && coeff < 0) { 471 /* Split remaining nodes into num-sized chunks */ 472 num_nodes += split_nodes_by_size(nodes, &addr, max_addr, 473 num_nodes, num); 474 goto out; 475 } 476 switch (*(cmdline - 1)) { 477 case '*': 478 /* Split remaining nodes into coeff chunks */ 479 if (coeff <= 0) 480 break; 481 num_nodes += split_nodes_equally(nodes, &addr, max_addr, 482 num_nodes, coeff); 483 break; 484 case ',': 485 /* Do not allocate remaining system RAM */ 486 break; 487 default: 488 /* Give one final node */ 489 setup_node_range(num_nodes, nodes, &addr, 490 max_addr - addr, max_addr); 491 num_nodes++; 492 } 493 } 494 out: 495 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 496 if (memnode_shift < 0) { 497 memnode_shift = 0; 498 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 499 "disabled.\n"); 500 return -1; 501 } 502 503 /* 504 * We need to vacate all active ranges that may have been registered by 505 * SRAT and set acpi_numa to -1 so that srat_disabled() always returns 506 * true. NUMA emulation has succeeded so we will not scan ACPI nodes. 507 */ 508 remove_all_active_ranges(); 509 #ifdef CONFIG_ACPI_NUMA 510 acpi_numa = -1; 511 #endif 512 for_each_node_mask(i, node_possible_map) { 513 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 514 nodes[i].end >> PAGE_SHIFT); 515 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 516 } 517 acpi_fake_nodes(nodes, num_nodes); 518 numa_init_array(); 519 return 0; 520 } 521 #endif /* CONFIG_NUMA_EMU */ 522 523 void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) 524 { 525 int i; 526 527 nodes_clear(node_possible_map); 528 nodes_clear(node_online_map); 529 530 #ifdef CONFIG_NUMA_EMU 531 if (cmdline && !numa_emulation(start_pfn, last_pfn)) 532 return; 533 nodes_clear(node_possible_map); 534 nodes_clear(node_online_map); 535 #endif 536 537 #ifdef CONFIG_ACPI_NUMA 538 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 539 last_pfn << PAGE_SHIFT)) 540 return; 541 nodes_clear(node_possible_map); 542 nodes_clear(node_online_map); 543 #endif 544 545 #ifdef CONFIG_K8_NUMA 546 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, 547 last_pfn<<PAGE_SHIFT)) 548 return; 549 nodes_clear(node_possible_map); 550 nodes_clear(node_online_map); 551 #endif 552 printk(KERN_INFO "%s\n", 553 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 554 555 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 556 start_pfn << PAGE_SHIFT, 557 last_pfn << PAGE_SHIFT); 558 /* setup dummy node covering all memory */ 559 memnode_shift = 63; 560 memnodemap = memnode.embedded_map; 561 memnodemap[0] = 0; 562 node_set_online(0); 563 node_set(0, node_possible_map); 564 for (i = 0; i < nr_cpu_ids; i++) 565 numa_set_node(i, 0); 566 e820_register_active_regions(0, start_pfn, last_pfn); 567 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 568 } 569 570 unsigned long __init numa_free_all_bootmem(void) 571 { 572 unsigned long pages = 0; 573 int i; 574 575 for_each_online_node(i) 576 pages += free_all_bootmem_node(NODE_DATA(i)); 577 578 return pages; 579 } 580 581 void __init paging_init(void) 582 { 583 unsigned long max_zone_pfns[MAX_NR_ZONES]; 584 585 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 586 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 587 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 588 max_zone_pfns[ZONE_NORMAL] = max_pfn; 589 590 sparse_memory_present_with_active_regions(MAX_NUMNODES); 591 sparse_init(); 592 593 free_area_init_nodes(max_zone_pfns); 594 } 595 596 static __init int numa_setup(char *opt) 597 { 598 if (!opt) 599 return -EINVAL; 600 if (!strncmp(opt, "off", 3)) 601 numa_off = 1; 602 #ifdef CONFIG_NUMA_EMU 603 if (!strncmp(opt, "fake=", 5)) 604 cmdline = opt + 5; 605 #endif 606 #ifdef CONFIG_ACPI_NUMA 607 if (!strncmp(opt, "noacpi", 6)) 608 acpi_numa = -1; 609 if (!strncmp(opt, "hotadd=", 7)) 610 hotadd_percent = simple_strtoul(opt+7, NULL, 10); 611 #endif 612 return 0; 613 } 614 early_param("numa", numa_setup); 615 616 #ifdef CONFIG_NUMA 617 /* 618 * Setup early cpu_to_node. 619 * 620 * Populate cpu_to_node[] only if x86_cpu_to_apicid[], 621 * and apicid_to_node[] tables have valid entries for a CPU. 622 * This means we skip cpu_to_node[] initialisation for NUMA 623 * emulation and faking node case (when running a kernel compiled 624 * for NUMA on a non NUMA box), which is OK as cpu_to_node[] 625 * is already initialized in a round robin manner at numa_init_array, 626 * prior to this call, and this initialization is good enough 627 * for the fake NUMA cases. 628 * 629 * Called before the per_cpu areas are setup. 630 */ 631 void __init init_cpu_to_node(void) 632 { 633 int cpu; 634 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 635 636 BUG_ON(cpu_to_apicid == NULL); 637 638 for_each_possible_cpu(cpu) { 639 int node; 640 u16 apicid = cpu_to_apicid[cpu]; 641 642 if (apicid == BAD_APICID) 643 continue; 644 node = apicid_to_node[apicid]; 645 if (node == NUMA_NO_NODE) 646 continue; 647 if (!node_online(node)) 648 continue; 649 numa_set_node(cpu, node); 650 } 651 } 652 #endif 653 654 655 void __cpuinit numa_set_node(int cpu, int node) 656 { 657 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 658 659 /* early setting, no percpu area yet */ 660 if (cpu_to_node_map) { 661 cpu_to_node_map[cpu] = node; 662 return; 663 } 664 665 #ifdef CONFIG_DEBUG_PER_CPU_MAPS 666 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { 667 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); 668 dump_stack(); 669 return; 670 } 671 #endif 672 per_cpu(x86_cpu_to_node_map, cpu) = node; 673 674 if (node != NUMA_NO_NODE) 675 per_cpu(node_number, cpu) = node; 676 } 677 678 void __cpuinit numa_clear_node(int cpu) 679 { 680 numa_set_node(cpu, NUMA_NO_NODE); 681 } 682 683 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 684 685 void __cpuinit numa_add_cpu(int cpu) 686 { 687 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 688 } 689 690 void __cpuinit numa_remove_cpu(int cpu) 691 { 692 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 693 } 694 695 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 696 697 /* 698 * --------- debug versions of the numa functions --------- 699 */ 700 static void __cpuinit numa_set_cpumask(int cpu, int enable) 701 { 702 int node = early_cpu_to_node(cpu); 703 struct cpumask *mask; 704 char buf[64]; 705 706 mask = node_to_cpumask_map[node]; 707 if (mask == NULL) { 708 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node); 709 dump_stack(); 710 return; 711 } 712 713 if (enable) 714 cpumask_set_cpu(cpu, mask); 715 else 716 cpumask_clear_cpu(cpu, mask); 717 718 cpulist_scnprintf(buf, sizeof(buf), mask); 719 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 720 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); 721 } 722 723 void __cpuinit numa_add_cpu(int cpu) 724 { 725 numa_set_cpumask(cpu, 1); 726 } 727 728 void __cpuinit numa_remove_cpu(int cpu) 729 { 730 numa_set_cpumask(cpu, 0); 731 } 732 733 int cpu_to_node(int cpu) 734 { 735 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 736 printk(KERN_WARNING 737 "cpu_to_node(%d): usage too early!\n", cpu); 738 dump_stack(); 739 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 740 } 741 return per_cpu(x86_cpu_to_node_map, cpu); 742 } 743 EXPORT_SYMBOL(cpu_to_node); 744 745 /* 746 * Same function as cpu_to_node() but used if called before the 747 * per_cpu areas are setup. 748 */ 749 int early_cpu_to_node(int cpu) 750 { 751 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 752 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 753 754 if (!cpu_possible(cpu)) { 755 printk(KERN_WARNING 756 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 757 dump_stack(); 758 return NUMA_NO_NODE; 759 } 760 return per_cpu(x86_cpu_to_node_map, cpu); 761 } 762 763 /* 764 * --------- end of debug versions of the numa functions --------- 765 */ 766 767 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 768