1 #include <linux/kernel.h> 2 #include <linux/module.h> 3 #include <linux/init.h> 4 #include <linux/bootmem.h> 5 #include <linux/percpu.h> 6 #include <linux/kexec.h> 7 #include <linux/crash_dump.h> 8 #include <asm/smp.h> 9 #include <asm/percpu.h> 10 #include <asm/sections.h> 11 #include <asm/processor.h> 12 #include <asm/setup.h> 13 #include <asm/topology.h> 14 #include <asm/mpspec.h> 15 #include <asm/apicdef.h> 16 #include <asm/highmem.h> 17 18 #ifdef CONFIG_X86_LOCAL_APIC 19 unsigned int num_processors; 20 unsigned disabled_cpus __cpuinitdata; 21 /* Processor that is doing the boot up */ 22 unsigned int boot_cpu_physical_apicid = -1U; 23 unsigned int max_physical_apicid; 24 EXPORT_SYMBOL(boot_cpu_physical_apicid); 25 26 /* Bitmask of physically existing CPUs */ 27 physid_mask_t phys_cpu_present_map; 28 #endif 29 30 /* map cpu index to physical APIC ID */ 31 DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 32 DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 33 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 34 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 35 36 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 37 #define X86_64_NUMA 1 38 39 /* map cpu index to node index */ 40 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 41 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42 43 /* which logical CPUs are on which nodes */ 44 cpumask_t *node_to_cpumask_map; 45 EXPORT_SYMBOL(node_to_cpumask_map); 46 47 /* setup node_to_cpumask_map */ 48 static void __init setup_node_to_cpumask_map(void); 49 50 #else 51 static inline void setup_node_to_cpumask_map(void) { } 52 #endif 53 54 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 55 /* 56 * Copy data used in early init routines from the initial arrays to the 57 * per cpu data areas. These arrays then become expendable and the 58 * *_early_ptr's are zeroed indicating that the static arrays are gone. 59 */ 60 static void __init setup_per_cpu_maps(void) 61 { 62 int cpu; 63 64 for_each_possible_cpu(cpu) { 65 per_cpu(x86_cpu_to_apicid, cpu) = 66 early_per_cpu_map(x86_cpu_to_apicid, cpu); 67 per_cpu(x86_bios_cpu_apicid, cpu) = 68 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 69 #ifdef X86_64_NUMA 70 per_cpu(x86_cpu_to_node_map, cpu) = 71 early_per_cpu_map(x86_cpu_to_node_map, cpu); 72 #endif 73 } 74 75 /* indicate the early static arrays will soon be gone */ 76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 78 #ifdef X86_64_NUMA 79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 80 #endif 81 } 82 83 #ifdef CONFIG_X86_32 84 /* 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it 86 * the same way 87 */ 88 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 89 EXPORT_SYMBOL(__per_cpu_offset); 90 static inline void setup_cpu_pda_map(void) { } 91 92 #elif !defined(CONFIG_SMP) 93 static inline void setup_cpu_pda_map(void) { } 94 95 #else /* CONFIG_SMP && CONFIG_X86_64 */ 96 97 /* 98 * Allocate cpu_pda pointer table and array via alloc_bootmem. 99 */ 100 static void __init setup_cpu_pda_map(void) 101 { 102 char *pda; 103 struct x8664_pda **new_cpu_pda; 104 unsigned long size; 105 int cpu; 106 107 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 108 109 /* allocate cpu_pda array and pointer table */ 110 { 111 unsigned long tsize = nr_cpu_ids * sizeof(void *); 112 unsigned long asize = size * (nr_cpu_ids - 1); 113 114 tsize = roundup(tsize, cache_line_size()); 115 new_cpu_pda = alloc_bootmem(tsize + asize); 116 pda = (char *)new_cpu_pda + tsize; 117 } 118 119 /* initialize pointer table to static pda's */ 120 for_each_possible_cpu(cpu) { 121 if (cpu == 0) { 122 /* leave boot cpu pda in place */ 123 new_cpu_pda[0] = cpu_pda(0); 124 continue; 125 } 126 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 127 new_cpu_pda[cpu]->in_bootmem = 1; 128 pda += size; 129 } 130 131 /* point to new pointer table */ 132 _cpu_pda = new_cpu_pda; 133 } 134 #endif 135 136 /* 137 * Great future plan: 138 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 139 * Always point %gs to its beginning 140 */ 141 void __init setup_per_cpu_areas(void) 142 { 143 ssize_t size = PERCPU_ENOUGH_ROOM; 144 char *ptr; 145 int cpu; 146 147 /* Setup cpu_pda map */ 148 setup_cpu_pda_map(); 149 150 /* Copy section for each CPU (we discard the original) */ 151 size = PERCPU_ENOUGH_ROOM; 152 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 153 size); 154 155 for_each_possible_cpu(cpu) { 156 #ifndef CONFIG_NEED_MULTIPLE_NODES 157 ptr = alloc_bootmem_pages(size); 158 #else 159 int node = early_cpu_to_node(cpu); 160 if (!node_online(node) || !NODE_DATA(node)) { 161 ptr = alloc_bootmem_pages(size); 162 printk(KERN_INFO 163 "cpu %d has no node %d or node-local memory\n", 164 cpu, node); 165 if (ptr) 166 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 167 cpu, __pa(ptr)); 168 } 169 else { 170 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 171 if (ptr) 172 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 173 cpu, node, __pa(ptr)); 174 } 175 #endif 176 per_cpu_offset(cpu) = ptr - __per_cpu_start; 177 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 178 179 } 180 181 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 182 NR_CPUS, nr_cpu_ids, nr_node_ids); 183 184 /* Setup percpu data maps */ 185 setup_per_cpu_maps(); 186 187 /* Setup node to cpumask map */ 188 setup_node_to_cpumask_map(); 189 } 190 191 #endif 192 193 #ifdef X86_64_NUMA 194 195 /* 196 * Allocate node_to_cpumask_map based on number of available nodes 197 * Requires node_possible_map to be valid. 198 * 199 * Note: node_to_cpumask() is not valid until after this is done. 200 */ 201 static void __init setup_node_to_cpumask_map(void) 202 { 203 unsigned int node, num = 0; 204 cpumask_t *map; 205 206 /* setup nr_node_ids if not done yet */ 207 if (nr_node_ids == MAX_NUMNODES) { 208 for_each_node_mask(node, node_possible_map) 209 num = node; 210 nr_node_ids = num + 1; 211 } 212 213 /* allocate the map */ 214 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 215 216 pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", 217 map, nr_node_ids); 218 219 /* node_to_cpumask() will now work */ 220 node_to_cpumask_map = map; 221 } 222 223 void __cpuinit numa_set_node(int cpu, int node) 224 { 225 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 226 227 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 228 cpu_pda(cpu)->nodenumber = node; 229 230 if (cpu_to_node_map) 231 cpu_to_node_map[cpu] = node; 232 233 else if (per_cpu_offset(cpu)) 234 per_cpu(x86_cpu_to_node_map, cpu) = node; 235 236 else 237 pr_debug("Setting node for non-present cpu %d\n", cpu); 238 } 239 240 void __cpuinit numa_clear_node(int cpu) 241 { 242 numa_set_node(cpu, NUMA_NO_NODE); 243 } 244 245 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 246 247 void __cpuinit numa_add_cpu(int cpu) 248 { 249 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 250 } 251 252 void __cpuinit numa_remove_cpu(int cpu) 253 { 254 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 255 } 256 257 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 258 259 /* 260 * --------- debug versions of the numa functions --------- 261 */ 262 static void __cpuinit numa_set_cpumask(int cpu, int enable) 263 { 264 int node = cpu_to_node(cpu); 265 cpumask_t *mask; 266 char buf[64]; 267 268 if (node_to_cpumask_map == NULL) { 269 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 270 dump_stack(); 271 return; 272 } 273 274 mask = &node_to_cpumask_map[node]; 275 if (enable) 276 cpu_set(cpu, *mask); 277 else 278 cpu_clear(cpu, *mask); 279 280 cpulist_scnprintf(buf, sizeof(buf), *mask); 281 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 282 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 283 } 284 285 void __cpuinit numa_add_cpu(int cpu) 286 { 287 numa_set_cpumask(cpu, 1); 288 } 289 290 void __cpuinit numa_remove_cpu(int cpu) 291 { 292 numa_set_cpumask(cpu, 0); 293 } 294 295 int cpu_to_node(int cpu) 296 { 297 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 298 printk(KERN_WARNING 299 "cpu_to_node(%d): usage too early!\n", cpu); 300 dump_stack(); 301 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 302 } 303 return per_cpu(x86_cpu_to_node_map, cpu); 304 } 305 EXPORT_SYMBOL(cpu_to_node); 306 307 /* 308 * Same function as cpu_to_node() but used if called before the 309 * per_cpu areas are setup. 310 */ 311 int early_cpu_to_node(int cpu) 312 { 313 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 314 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 315 316 if (!per_cpu_offset(cpu)) { 317 printk(KERN_WARNING 318 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 319 dump_stack(); 320 return NUMA_NO_NODE; 321 } 322 return per_cpu(x86_cpu_to_node_map, cpu); 323 } 324 325 326 /* empty cpumask */ 327 static const cpumask_t cpu_mask_none; 328 329 /* 330 * Returns a pointer to the bitmask of CPUs on Node 'node'. 331 */ 332 const cpumask_t *_node_to_cpumask_ptr(int node) 333 { 334 if (node_to_cpumask_map == NULL) { 335 printk(KERN_WARNING 336 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 337 node); 338 dump_stack(); 339 return (const cpumask_t *)&cpu_online_map; 340 } 341 if (node >= nr_node_ids) { 342 printk(KERN_WARNING 343 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 344 node, nr_node_ids); 345 dump_stack(); 346 return &cpu_mask_none; 347 } 348 return &node_to_cpumask_map[node]; 349 } 350 EXPORT_SYMBOL(_node_to_cpumask_ptr); 351 352 /* 353 * Returns a bitmask of CPUs on Node 'node'. 354 * 355 * Side note: this function creates the returned cpumask on the stack 356 * so with a high NR_CPUS count, excessive stack space is used. The 357 * node_to_cpumask_ptr function should be used whenever possible. 358 */ 359 cpumask_t node_to_cpumask(int node) 360 { 361 if (node_to_cpumask_map == NULL) { 362 printk(KERN_WARNING 363 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 364 dump_stack(); 365 return cpu_online_map; 366 } 367 if (node >= nr_node_ids) { 368 printk(KERN_WARNING 369 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 370 node, nr_node_ids); 371 dump_stack(); 372 return cpu_mask_none; 373 } 374 return node_to_cpumask_map[node]; 375 } 376 EXPORT_SYMBOL(node_to_cpumask); 377 378 /* 379 * --------- end of debug versions of the numa functions --------- 380 */ 381 382 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 383 384 #endif /* X86_64_NUMA */ 385 386