1 /* 2 * SMP Support 3 * 4 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> 5 * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com> 6 * 7 * Lots of stuff stolen from arch/alpha/kernel/smp.c 8 * 9 * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized 10 * the existing code (on the lines of x86 port). 11 * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy 12 * calibration on each CPU. 13 * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id 14 * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor 15 * & cpu_online_map now gets done here (instead of setup.c) 16 * 99/10/05 davidm Update to bring it in sync with new command-line processing 17 * scheme. 18 * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and 19 * smp_call_function_single to resend IPI on timeouts 20 */ 21 #include <linux/module.h> 22 #include <linux/kernel.h> 23 #include <linux/sched.h> 24 #include <linux/init.h> 25 #include <linux/interrupt.h> 26 #include <linux/smp.h> 27 #include <linux/kernel_stat.h> 28 #include <linux/mm.h> 29 #include <linux/cache.h> 30 #include <linux/delay.h> 31 #include <linux/efi.h> 32 #include <linux/bitops.h> 33 #include <linux/kexec.h> 34 35 #include <asm/atomic.h> 36 #include <asm/current.h> 37 #include <asm/delay.h> 38 #include <asm/machvec.h> 39 #include <asm/io.h> 40 #include <asm/irq.h> 41 #include <asm/page.h> 42 #include <asm/pgalloc.h> 43 #include <asm/pgtable.h> 44 #include <asm/processor.h> 45 #include <asm/ptrace.h> 46 #include <asm/sal.h> 47 #include <asm/system.h> 48 #include <asm/tlbflush.h> 49 #include <asm/unistd.h> 50 #include <asm/mca.h> 51 52 /* 53 * Note: alignment of 4 entries/cacheline was empirically determined 54 * to be a good tradeoff between hot cachelines & spreading the array 55 * across too many cacheline. 56 */ 57 static struct local_tlb_flush_counts { 58 unsigned int count; 59 } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; 60 61 static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; 62 63 64 /* 65 * Structure and data for smp_call_function(). This is designed to minimise static memory 66 * requirements. It also looks cleaner. 67 */ 68 static __cacheline_aligned DEFINE_SPINLOCK(call_lock); 69 70 struct call_data_struct { 71 void (*func) (void *info); 72 void *info; 73 long wait; 74 atomic_t started; 75 atomic_t finished; 76 }; 77 78 static volatile struct call_data_struct *call_data; 79 80 #define IPI_CALL_FUNC 0 81 #define IPI_CPU_STOP 1 82 #define IPI_KDUMP_CPU_STOP 3 83 84 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ 85 static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; 86 87 extern void cpu_halt (void); 88 89 void 90 lock_ipi_calllock(void) 91 { 92 spin_lock_irq(&call_lock); 93 } 94 95 void 96 unlock_ipi_calllock(void) 97 { 98 spin_unlock_irq(&call_lock); 99 } 100 101 static void 102 stop_this_cpu (void) 103 { 104 /* 105 * Remove this CPU: 106 */ 107 cpu_clear(smp_processor_id(), cpu_online_map); 108 max_xtp(); 109 local_irq_disable(); 110 cpu_halt(); 111 } 112 113 void 114 cpu_die(void) 115 { 116 max_xtp(); 117 local_irq_disable(); 118 cpu_halt(); 119 /* Should never be here */ 120 BUG(); 121 for (;;); 122 } 123 124 irqreturn_t 125 handle_IPI (int irq, void *dev_id) 126 { 127 int this_cpu = get_cpu(); 128 unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); 129 unsigned long ops; 130 131 mb(); /* Order interrupt and bit testing. */ 132 while ((ops = xchg(pending_ipis, 0)) != 0) { 133 mb(); /* Order bit clearing and data access. */ 134 do { 135 unsigned long which; 136 137 which = ffz(~ops); 138 ops &= ~(1 << which); 139 140 switch (which) { 141 case IPI_CALL_FUNC: 142 { 143 struct call_data_struct *data; 144 void (*func)(void *info); 145 void *info; 146 int wait; 147 148 /* release the 'pointer lock' */ 149 data = (struct call_data_struct *) call_data; 150 func = data->func; 151 info = data->info; 152 wait = data->wait; 153 154 mb(); 155 atomic_inc(&data->started); 156 /* 157 * At this point the structure may be gone unless 158 * wait is true. 159 */ 160 (*func)(info); 161 162 /* Notify the sending CPU that the task is done. */ 163 mb(); 164 if (wait) 165 atomic_inc(&data->finished); 166 } 167 break; 168 169 case IPI_CPU_STOP: 170 stop_this_cpu(); 171 break; 172 #ifdef CONFIG_KEXEC 173 case IPI_KDUMP_CPU_STOP: 174 unw_init_running(kdump_cpu_freeze, NULL); 175 break; 176 #endif 177 default: 178 printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); 179 break; 180 } 181 } while (ops); 182 mb(); /* Order data access and bit testing. */ 183 } 184 put_cpu(); 185 return IRQ_HANDLED; 186 } 187 188 /* 189 * Called with preemption disabled. 190 */ 191 static inline void 192 send_IPI_single (int dest_cpu, int op) 193 { 194 set_bit(op, &per_cpu(ipi_operation, dest_cpu)); 195 platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); 196 } 197 198 /* 199 * Called with preemption disabled. 200 */ 201 static inline void 202 send_IPI_allbutself (int op) 203 { 204 unsigned int i; 205 206 for_each_online_cpu(i) { 207 if (i != smp_processor_id()) 208 send_IPI_single(i, op); 209 } 210 } 211 212 /* 213 * Called with preemption disabled. 214 */ 215 static inline void 216 send_IPI_all (int op) 217 { 218 int i; 219 220 for_each_online_cpu(i) { 221 send_IPI_single(i, op); 222 } 223 } 224 225 /* 226 * Called with preemption disabled. 227 */ 228 static inline void 229 send_IPI_self (int op) 230 { 231 send_IPI_single(smp_processor_id(), op); 232 } 233 234 #ifdef CONFIG_KEXEC 235 void 236 kdump_smp_send_stop(void) 237 { 238 send_IPI_allbutself(IPI_KDUMP_CPU_STOP); 239 } 240 241 void 242 kdump_smp_send_init(void) 243 { 244 unsigned int cpu, self_cpu; 245 self_cpu = smp_processor_id(); 246 for_each_online_cpu(cpu) { 247 if (cpu != self_cpu) { 248 if(kdump_status[cpu] == 0) 249 platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); 250 } 251 } 252 } 253 #endif 254 /* 255 * Called with preemption disabled. 256 */ 257 void 258 smp_send_reschedule (int cpu) 259 { 260 platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); 261 } 262 263 /* 264 * Called with preemption disabled. 265 */ 266 static void 267 smp_send_local_flush_tlb (int cpu) 268 { 269 platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); 270 } 271 272 void 273 smp_local_flush_tlb(void) 274 { 275 /* 276 * Use atomic ops. Otherwise, the load/increment/store sequence from 277 * a "++" operation can have the line stolen between the load & store. 278 * The overhead of the atomic op in negligible in this case & offers 279 * significant benefit for the brief periods where lots of cpus 280 * are simultaneously flushing TLBs. 281 */ 282 ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); 283 local_flush_tlb_all(); 284 } 285 286 #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ 287 288 void 289 smp_flush_tlb_cpumask(cpumask_t xcpumask) 290 { 291 unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts); 292 cpumask_t cpumask = xcpumask; 293 int mycpu, cpu, flush_mycpu = 0; 294 295 preempt_disable(); 296 mycpu = smp_processor_id(); 297 298 for_each_cpu_mask(cpu, cpumask) 299 counts[cpu] = local_tlb_flush_counts[cpu].count; 300 301 mb(); 302 for_each_cpu_mask(cpu, cpumask) { 303 if (cpu == mycpu) 304 flush_mycpu = 1; 305 else 306 smp_send_local_flush_tlb(cpu); 307 } 308 309 if (flush_mycpu) 310 smp_local_flush_tlb(); 311 312 for_each_cpu_mask(cpu, cpumask) 313 while(counts[cpu] == local_tlb_flush_counts[cpu].count) 314 udelay(FLUSH_DELAY); 315 316 preempt_enable(); 317 } 318 319 void 320 smp_flush_tlb_all (void) 321 { 322 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); 323 } 324 325 void 326 smp_flush_tlb_mm (struct mm_struct *mm) 327 { 328 preempt_disable(); 329 /* this happens for the common case of a single-threaded fork(): */ 330 if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) 331 { 332 local_finish_flush_tlb_mm(mm); 333 preempt_enable(); 334 return; 335 } 336 337 preempt_enable(); 338 /* 339 * We could optimize this further by using mm->cpu_vm_mask to track which CPUs 340 * have been running in the address space. It's not clear that this is worth the 341 * trouble though: to avoid races, we have to raise the IPI on the target CPU 342 * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is 343 * rather trivial. 344 */ 345 on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); 346 } 347 348 /* 349 * Run a function on another CPU 350 * <func> The function to run. This must be fast and non-blocking. 351 * <info> An arbitrary pointer to pass to the function. 352 * <nonatomic> Currently unused. 353 * <wait> If true, wait until function has completed on other CPUs. 354 * [RETURNS] 0 on success, else a negative status code. 355 * 356 * Does not return until the remote CPU is nearly ready to execute <func> 357 * or is or has executed. 358 */ 359 360 int 361 smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic, 362 int wait) 363 { 364 struct call_data_struct data; 365 int cpus = 1; 366 int me = get_cpu(); /* prevent preemption and reschedule on another processor */ 367 368 if (cpuid == me) { 369 printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__); 370 put_cpu(); 371 return -EBUSY; 372 } 373 374 data.func = func; 375 data.info = info; 376 atomic_set(&data.started, 0); 377 data.wait = wait; 378 if (wait) 379 atomic_set(&data.finished, 0); 380 381 spin_lock_bh(&call_lock); 382 383 call_data = &data; 384 mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ 385 send_IPI_single(cpuid, IPI_CALL_FUNC); 386 387 /* Wait for response */ 388 while (atomic_read(&data.started) != cpus) 389 cpu_relax(); 390 391 if (wait) 392 while (atomic_read(&data.finished) != cpus) 393 cpu_relax(); 394 call_data = NULL; 395 396 spin_unlock_bh(&call_lock); 397 put_cpu(); 398 return 0; 399 } 400 EXPORT_SYMBOL(smp_call_function_single); 401 402 /* 403 * this function sends a 'generic call function' IPI to all other CPUs 404 * in the system. 405 */ 406 407 /* 408 * [SUMMARY] Run a function on all other CPUs. 409 * <func> The function to run. This must be fast and non-blocking. 410 * <info> An arbitrary pointer to pass to the function. 411 * <nonatomic> currently unused. 412 * <wait> If true, wait (atomically) until function has completed on other CPUs. 413 * [RETURNS] 0 on success, else a negative status code. 414 * 415 * Does not return until remote CPUs are nearly ready to execute <func> or are or have 416 * executed. 417 * 418 * You must not call this function with disabled interrupts or from a 419 * hardware interrupt handler or from a bottom half handler. 420 */ 421 int 422 smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) 423 { 424 struct call_data_struct data; 425 int cpus; 426 427 spin_lock(&call_lock); 428 cpus = num_online_cpus() - 1; 429 if (!cpus) { 430 spin_unlock(&call_lock); 431 return 0; 432 } 433 434 /* Can deadlock when called with interrupts disabled */ 435 WARN_ON(irqs_disabled()); 436 437 data.func = func; 438 data.info = info; 439 atomic_set(&data.started, 0); 440 data.wait = wait; 441 if (wait) 442 atomic_set(&data.finished, 0); 443 444 call_data = &data; 445 mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ 446 send_IPI_allbutself(IPI_CALL_FUNC); 447 448 /* Wait for response */ 449 while (atomic_read(&data.started) != cpus) 450 cpu_relax(); 451 452 if (wait) 453 while (atomic_read(&data.finished) != cpus) 454 cpu_relax(); 455 call_data = NULL; 456 457 spin_unlock(&call_lock); 458 return 0; 459 } 460 EXPORT_SYMBOL(smp_call_function); 461 462 /* 463 * this function calls the 'stop' function on all other CPUs in the system. 464 */ 465 void 466 smp_send_stop (void) 467 { 468 send_IPI_allbutself(IPI_CPU_STOP); 469 } 470 471 int __init 472 setup_profiling_timer (unsigned int multiplier) 473 { 474 return -EINVAL; 475 } 476