1 #include <linux/kernel.h> 2 #include <linux/module.h> 3 #include <linux/init.h> 4 #include <linux/bootmem.h> 5 #include <linux/percpu.h> 6 #include <linux/kexec.h> 7 #include <linux/crash_dump.h> 8 #include <asm/smp.h> 9 #include <asm/percpu.h> 10 #include <asm/sections.h> 11 #include <asm/processor.h> 12 #include <asm/setup.h> 13 #include <asm/topology.h> 14 #include <asm/mpspec.h> 15 #include <asm/apicdef.h> 16 #include <asm/highmem.h> 17 18 #ifdef CONFIG_X86_LOCAL_APIC 19 unsigned int num_processors; 20 unsigned disabled_cpus __cpuinitdata; 21 /* Processor that is doing the boot up */ 22 unsigned int boot_cpu_physical_apicid = -1U; 23 unsigned int max_physical_apicid; 24 EXPORT_SYMBOL(boot_cpu_physical_apicid); 25 26 /* Bitmask of physically existing CPUs */ 27 physid_mask_t phys_cpu_present_map; 28 #endif 29 30 /* map cpu index to physical APIC ID */ 31 DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 32 DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 33 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 34 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 35 36 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 37 #define X86_64_NUMA 1 38 39 /* map cpu index to node index */ 40 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 41 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42 43 /* which logical CPUs are on which nodes */ 44 cpumask_t *node_to_cpumask_map; 45 EXPORT_SYMBOL(node_to_cpumask_map); 46 47 /* setup node_to_cpumask_map */ 48 static void __init setup_node_to_cpumask_map(void); 49 50 #else 51 static inline void setup_node_to_cpumask_map(void) { } 52 #endif 53 54 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 55 /* 56 * Copy data used in early init routines from the initial arrays to the 57 * per cpu data areas. These arrays then become expendable and the 58 * *_early_ptr's are zeroed indicating that the static arrays are gone. 59 */ 60 static void __init setup_per_cpu_maps(void) 61 { 62 int cpu; 63 64 for_each_possible_cpu(cpu) { 65 per_cpu(x86_cpu_to_apicid, cpu) = 66 early_per_cpu_map(x86_cpu_to_apicid, cpu); 67 per_cpu(x86_bios_cpu_apicid, cpu) = 68 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 69 #ifdef X86_64_NUMA 70 per_cpu(x86_cpu_to_node_map, cpu) = 71 early_per_cpu_map(x86_cpu_to_node_map, cpu); 72 #endif 73 } 74 75 /* indicate the early static arrays will soon be gone */ 76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 78 #ifdef X86_64_NUMA 79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 80 #endif 81 } 82 83 #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP 84 cpumask_t *cpumask_of_cpu_map __read_mostly; 85 EXPORT_SYMBOL(cpumask_of_cpu_map); 86 87 /* requires nr_cpu_ids to be initialized */ 88 static void __init setup_cpumask_of_cpu(void) 89 { 90 int i; 91 92 /* alloc_bootmem zeroes memory */ 93 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); 94 for (i = 0; i < nr_cpu_ids; i++) 95 cpu_set(i, cpumask_of_cpu_map[i]); 96 } 97 #else 98 static inline void setup_cpumask_of_cpu(void) { } 99 #endif 100 101 #ifdef CONFIG_X86_32 102 /* 103 * Great future not-so-futuristic plan: make i386 and x86_64 do it 104 * the same way 105 */ 106 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 107 EXPORT_SYMBOL(__per_cpu_offset); 108 static inline void setup_cpu_pda_map(void) { } 109 110 #elif !defined(CONFIG_SMP) 111 static inline void setup_cpu_pda_map(void) { } 112 113 #else /* CONFIG_SMP && CONFIG_X86_64 */ 114 115 /* 116 * Allocate cpu_pda pointer table and array via alloc_bootmem. 117 */ 118 static void __init setup_cpu_pda_map(void) 119 { 120 char *pda; 121 struct x8664_pda **new_cpu_pda; 122 unsigned long size; 123 int cpu; 124 125 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 126 127 /* allocate cpu_pda array and pointer table */ 128 { 129 unsigned long tsize = nr_cpu_ids * sizeof(void *); 130 unsigned long asize = size * (nr_cpu_ids - 1); 131 132 tsize = roundup(tsize, cache_line_size()); 133 new_cpu_pda = alloc_bootmem(tsize + asize); 134 pda = (char *)new_cpu_pda + tsize; 135 } 136 137 /* initialize pointer table to static pda's */ 138 for_each_possible_cpu(cpu) { 139 if (cpu == 0) { 140 /* leave boot cpu pda in place */ 141 new_cpu_pda[0] = cpu_pda(0); 142 continue; 143 } 144 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 145 new_cpu_pda[cpu]->in_bootmem = 1; 146 pda += size; 147 } 148 149 /* point to new pointer table */ 150 _cpu_pda = new_cpu_pda; 151 } 152 #endif 153 154 /* 155 * Great future plan: 156 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 157 * Always point %gs to its beginning 158 */ 159 void __init setup_per_cpu_areas(void) 160 { 161 ssize_t size = PERCPU_ENOUGH_ROOM; 162 char *ptr; 163 int cpu; 164 165 /* Setup cpu_pda map */ 166 setup_cpu_pda_map(); 167 168 /* Copy section for each CPU (we discard the original) */ 169 size = PERCPU_ENOUGH_ROOM; 170 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 171 size); 172 173 for_each_possible_cpu(cpu) { 174 #ifndef CONFIG_NEED_MULTIPLE_NODES 175 ptr = alloc_bootmem_pages(size); 176 #else 177 int node = early_cpu_to_node(cpu); 178 if (!node_online(node) || !NODE_DATA(node)) { 179 ptr = alloc_bootmem_pages(size); 180 printk(KERN_INFO 181 "cpu %d has no node %d or node-local memory\n", 182 cpu, node); 183 } 184 else 185 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 186 #endif 187 per_cpu_offset(cpu) = ptr - __per_cpu_start; 188 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 189 190 } 191 192 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 193 NR_CPUS, nr_cpu_ids, nr_node_ids); 194 195 /* Setup percpu data maps */ 196 setup_per_cpu_maps(); 197 198 /* Setup node to cpumask map */ 199 setup_node_to_cpumask_map(); 200 201 /* Setup cpumask_of_cpu map */ 202 setup_cpumask_of_cpu(); 203 } 204 205 #endif 206 207 #ifdef X86_64_NUMA 208 209 /* 210 * Allocate node_to_cpumask_map based on number of available nodes 211 * Requires node_possible_map to be valid. 212 * 213 * Note: node_to_cpumask() is not valid until after this is done. 214 */ 215 static void __init setup_node_to_cpumask_map(void) 216 { 217 unsigned int node, num = 0; 218 cpumask_t *map; 219 220 /* setup nr_node_ids if not done yet */ 221 if (nr_node_ids == MAX_NUMNODES) { 222 for_each_node_mask(node, node_possible_map) 223 num = node; 224 nr_node_ids = num + 1; 225 } 226 227 /* allocate the map */ 228 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 229 230 pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", 231 map, nr_node_ids); 232 233 /* node_to_cpumask() will now work */ 234 node_to_cpumask_map = map; 235 } 236 237 void __cpuinit numa_set_node(int cpu, int node) 238 { 239 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 240 241 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 242 cpu_pda(cpu)->nodenumber = node; 243 244 if (cpu_to_node_map) 245 cpu_to_node_map[cpu] = node; 246 247 else if (per_cpu_offset(cpu)) 248 per_cpu(x86_cpu_to_node_map, cpu) = node; 249 250 else 251 pr_debug("Setting node for non-present cpu %d\n", cpu); 252 } 253 254 void __cpuinit numa_clear_node(int cpu) 255 { 256 numa_set_node(cpu, NUMA_NO_NODE); 257 } 258 259 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 260 261 void __cpuinit numa_add_cpu(int cpu) 262 { 263 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 264 } 265 266 void __cpuinit numa_remove_cpu(int cpu) 267 { 268 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 269 } 270 271 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 272 273 /* 274 * --------- debug versions of the numa functions --------- 275 */ 276 static void __cpuinit numa_set_cpumask(int cpu, int enable) 277 { 278 int node = cpu_to_node(cpu); 279 cpumask_t *mask; 280 char buf[64]; 281 282 if (node_to_cpumask_map == NULL) { 283 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 284 dump_stack(); 285 return; 286 } 287 288 mask = &node_to_cpumask_map[node]; 289 if (enable) 290 cpu_set(cpu, *mask); 291 else 292 cpu_clear(cpu, *mask); 293 294 cpulist_scnprintf(buf, sizeof(buf), *mask); 295 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 296 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 297 } 298 299 void __cpuinit numa_add_cpu(int cpu) 300 { 301 numa_set_cpumask(cpu, 1); 302 } 303 304 void __cpuinit numa_remove_cpu(int cpu) 305 { 306 numa_set_cpumask(cpu, 0); 307 } 308 309 int cpu_to_node(int cpu) 310 { 311 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 312 printk(KERN_WARNING 313 "cpu_to_node(%d): usage too early!\n", cpu); 314 dump_stack(); 315 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 316 } 317 return per_cpu(x86_cpu_to_node_map, cpu); 318 } 319 EXPORT_SYMBOL(cpu_to_node); 320 321 /* 322 * Same function as cpu_to_node() but used if called before the 323 * per_cpu areas are setup. 324 */ 325 int early_cpu_to_node(int cpu) 326 { 327 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 328 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 329 330 if (!per_cpu_offset(cpu)) { 331 printk(KERN_WARNING 332 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 333 dump_stack(); 334 return NUMA_NO_NODE; 335 } 336 return per_cpu(x86_cpu_to_node_map, cpu); 337 } 338 339 340 /* empty cpumask */ 341 static const cpumask_t cpu_mask_none; 342 343 /* 344 * Returns a pointer to the bitmask of CPUs on Node 'node'. 345 */ 346 const cpumask_t *_node_to_cpumask_ptr(int node) 347 { 348 if (node_to_cpumask_map == NULL) { 349 printk(KERN_WARNING 350 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 351 node); 352 dump_stack(); 353 return (const cpumask_t *)&cpu_online_map; 354 } 355 if (node >= nr_node_ids) { 356 printk(KERN_WARNING 357 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 358 node, nr_node_ids); 359 dump_stack(); 360 return &cpu_mask_none; 361 } 362 return &node_to_cpumask_map[node]; 363 } 364 EXPORT_SYMBOL(_node_to_cpumask_ptr); 365 366 /* 367 * Returns a bitmask of CPUs on Node 'node'. 368 * 369 * Side note: this function creates the returned cpumask on the stack 370 * so with a high NR_CPUS count, excessive stack space is used. The 371 * node_to_cpumask_ptr function should be used whenever possible. 372 */ 373 cpumask_t node_to_cpumask(int node) 374 { 375 if (node_to_cpumask_map == NULL) { 376 printk(KERN_WARNING 377 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 378 dump_stack(); 379 return cpu_online_map; 380 } 381 if (node >= nr_node_ids) { 382 printk(KERN_WARNING 383 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 384 node, nr_node_ids); 385 dump_stack(); 386 return cpu_mask_none; 387 } 388 return node_to_cpumask_map[node]; 389 } 390 EXPORT_SYMBOL(node_to_cpumask); 391 392 /* 393 * --------- end of debug versions of the numa functions --------- 394 */ 395 396 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 397 398 #endif /* X86_64_NUMA */ 399 400