1 /* sun4d_smp.c: Sparc SS1000/SC2000 SMP support. 2 * 3 * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 4 * 5 * Based on sun4m's smp.c, which is: 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 7 */ 8 9 #include <asm/head.h> 10 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/threads.h> 14 #include <linux/smp.h> 15 #include <linux/interrupt.h> 16 #include <linux/kernel_stat.h> 17 #include <linux/init.h> 18 #include <linux/spinlock.h> 19 #include <linux/mm.h> 20 #include <linux/swap.h> 21 #include <linux/profile.h> 22 #include <linux/delay.h> 23 24 #include <asm/ptrace.h> 25 #include <asm/atomic.h> 26 #include <asm/irq_regs.h> 27 28 #include <asm/irq.h> 29 #include <asm/page.h> 30 #include <asm/pgalloc.h> 31 #include <asm/pgtable.h> 32 #include <asm/oplib.h> 33 #include <asm/sbus.h> 34 #include <asm/sbi.h> 35 #include <asm/tlbflush.h> 36 #include <asm/cacheflush.h> 37 #include <asm/cpudata.h> 38 39 #include "irq.h" 40 #define IRQ_CROSS_CALL 15 41 42 extern ctxd_t *srmmu_ctx_table_phys; 43 44 static volatile int smp_processors_ready = 0; 45 static int smp_highest_cpu; 46 extern volatile unsigned long cpu_callin_map[NR_CPUS]; 47 extern cpuinfo_sparc cpu_data[NR_CPUS]; 48 extern unsigned char boot_cpu_id; 49 extern volatile int smp_process_available; 50 51 extern cpumask_t smp_commenced_mask; 52 53 extern int __smp4d_processor_id(void); 54 55 /* #define SMP_DEBUG */ 56 57 #ifdef SMP_DEBUG 58 #define SMP_PRINTK(x) printk x 59 #else 60 #define SMP_PRINTK(x) 61 #endif 62 63 static inline unsigned long swap(volatile unsigned long *ptr, unsigned long val) 64 { 65 __asm__ __volatile__("swap [%1], %0\n\t" : 66 "=&r" (val), "=&r" (ptr) : 67 "0" (val), "1" (ptr)); 68 return val; 69 } 70 71 static void smp_setup_percpu_timer(void); 72 extern void cpu_probe(void); 73 extern void sun4d_distribute_irqs(void); 74 75 void __init smp4d_callin(void) 76 { 77 int cpuid = hard_smp4d_processor_id(); 78 extern spinlock_t sun4d_imsk_lock; 79 unsigned long flags; 80 81 /* Show we are alive */ 82 cpu_leds[cpuid] = 0x6; 83 show_leds(cpuid); 84 85 /* Enable level15 interrupt, disable level14 interrupt for now */ 86 cc_set_imsk((cc_get_imsk() & ~0x8000) | 0x4000); 87 88 local_flush_cache_all(); 89 local_flush_tlb_all(); 90 91 /* 92 * Unblock the master CPU _only_ when the scheduler state 93 * of all secondary CPUs will be up-to-date, so after 94 * the SMP initialization the master will be just allowed 95 * to call the scheduler code. 96 */ 97 /* Get our local ticker going. */ 98 smp_setup_percpu_timer(); 99 100 calibrate_delay(); 101 smp_store_cpu_info(cpuid); 102 local_flush_cache_all(); 103 local_flush_tlb_all(); 104 105 /* Allow master to continue. */ 106 swap((unsigned long *)&cpu_callin_map[cpuid], 1); 107 local_flush_cache_all(); 108 local_flush_tlb_all(); 109 110 cpu_probe(); 111 112 while((unsigned long)current_set[cpuid] < PAGE_OFFSET) 113 barrier(); 114 115 while(current_set[cpuid]->cpu != cpuid) 116 barrier(); 117 118 /* Fix idle thread fields. */ 119 __asm__ __volatile__("ld [%0], %%g6\n\t" 120 : : "r" (¤t_set[cpuid]) 121 : "memory" /* paranoid */); 122 123 cpu_leds[cpuid] = 0x9; 124 show_leds(cpuid); 125 126 /* Attach to the address space of init_task. */ 127 atomic_inc(&init_mm.mm_count); 128 current->active_mm = &init_mm; 129 130 local_flush_cache_all(); 131 local_flush_tlb_all(); 132 133 local_irq_enable(); /* We don't allow PIL 14 yet */ 134 135 while (!cpu_isset(cpuid, smp_commenced_mask)) 136 barrier(); 137 138 spin_lock_irqsave(&sun4d_imsk_lock, flags); 139 cc_set_imsk(cc_get_imsk() & ~0x4000); /* Allow PIL 14 as well */ 140 spin_unlock_irqrestore(&sun4d_imsk_lock, flags); 141 cpu_set(cpuid, cpu_online_map); 142 143 } 144 145 extern void init_IRQ(void); 146 extern void cpu_panic(void); 147 148 /* 149 * Cycle through the processors asking the PROM to start each one. 150 */ 151 152 extern struct linux_prom_registers smp_penguin_ctable; 153 extern unsigned long trapbase_cpu1[]; 154 extern unsigned long trapbase_cpu2[]; 155 extern unsigned long trapbase_cpu3[]; 156 157 void __init smp4d_boot_cpus(void) 158 { 159 if (boot_cpu_id) 160 current_set[0] = NULL; 161 smp_setup_percpu_timer(); 162 local_flush_cache_all(); 163 } 164 165 int __cpuinit smp4d_boot_one_cpu(int i) 166 { 167 extern unsigned long sun4d_cpu_startup; 168 unsigned long *entry = &sun4d_cpu_startup; 169 struct task_struct *p; 170 int timeout; 171 int cpu_node; 172 173 cpu_find_by_instance(i, &cpu_node,NULL); 174 /* Cook up an idler for this guy. */ 175 p = fork_idle(i); 176 current_set[i] = task_thread_info(p); 177 178 /* 179 * Initialize the contexts table 180 * Since the call to prom_startcpu() trashes the structure, 181 * we need to re-initialize it for each cpu 182 */ 183 smp_penguin_ctable.which_io = 0; 184 smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys; 185 smp_penguin_ctable.reg_size = 0; 186 187 /* whirrr, whirrr, whirrrrrrrrr... */ 188 SMP_PRINTK(("Starting CPU %d at %p \n", i, entry)); 189 local_flush_cache_all(); 190 prom_startcpu(cpu_node, 191 &smp_penguin_ctable, 0, (char *)entry); 192 193 SMP_PRINTK(("prom_startcpu returned :)\n")); 194 195 /* wheee... it's going... */ 196 for(timeout = 0; timeout < 10000; timeout++) { 197 if(cpu_callin_map[i]) 198 break; 199 udelay(200); 200 } 201 202 if (!(cpu_callin_map[i])) { 203 printk("Processor %d is stuck.\n", i); 204 return -ENODEV; 205 206 } 207 local_flush_cache_all(); 208 return 0; 209 } 210 211 void __init smp4d_smp_done(void) 212 { 213 int i, first; 214 int *prev; 215 216 /* setup cpu list for irq rotation */ 217 first = 0; 218 prev = &first; 219 for (i = 0; i < NR_CPUS; i++) 220 if (cpu_online(i)) { 221 *prev = i; 222 prev = &cpu_data(i).next; 223 } 224 *prev = first; 225 local_flush_cache_all(); 226 227 /* Free unneeded trap tables */ 228 ClearPageReserved(virt_to_page(trapbase_cpu1)); 229 init_page_count(virt_to_page(trapbase_cpu1)); 230 free_page((unsigned long)trapbase_cpu1); 231 totalram_pages++; 232 num_physpages++; 233 234 ClearPageReserved(virt_to_page(trapbase_cpu2)); 235 init_page_count(virt_to_page(trapbase_cpu2)); 236 free_page((unsigned long)trapbase_cpu2); 237 totalram_pages++; 238 num_physpages++; 239 240 ClearPageReserved(virt_to_page(trapbase_cpu3)); 241 init_page_count(virt_to_page(trapbase_cpu3)); 242 free_page((unsigned long)trapbase_cpu3); 243 totalram_pages++; 244 num_physpages++; 245 246 /* Ok, they are spinning and ready to go. */ 247 smp_processors_ready = 1; 248 sun4d_distribute_irqs(); 249 } 250 251 static struct smp_funcall { 252 smpfunc_t func; 253 unsigned long arg1; 254 unsigned long arg2; 255 unsigned long arg3; 256 unsigned long arg4; 257 unsigned long arg5; 258 unsigned char processors_in[NR_CPUS]; /* Set when ipi entered. */ 259 unsigned char processors_out[NR_CPUS]; /* Set when ipi exited. */ 260 } ccall_info __attribute__((aligned(8))); 261 262 static DEFINE_SPINLOCK(cross_call_lock); 263 264 /* Cross calls must be serialized, at least currently. */ 265 void smp4d_cross_call(smpfunc_t func, unsigned long arg1, unsigned long arg2, 266 unsigned long arg3, unsigned long arg4, unsigned long arg5) 267 { 268 if(smp_processors_ready) { 269 register int high = smp_highest_cpu; 270 unsigned long flags; 271 272 spin_lock_irqsave(&cross_call_lock, flags); 273 274 { 275 /* If you make changes here, make sure gcc generates proper code... */ 276 register smpfunc_t f asm("i0") = func; 277 register unsigned long a1 asm("i1") = arg1; 278 register unsigned long a2 asm("i2") = arg2; 279 register unsigned long a3 asm("i3") = arg3; 280 register unsigned long a4 asm("i4") = arg4; 281 register unsigned long a5 asm("i5") = arg5; 282 283 __asm__ __volatile__( 284 "std %0, [%6]\n\t" 285 "std %2, [%6 + 8]\n\t" 286 "std %4, [%6 + 16]\n\t" : : 287 "r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), 288 "r" (&ccall_info.func)); 289 } 290 291 /* Init receive/complete mapping, plus fire the IPI's off. */ 292 { 293 cpumask_t mask; 294 register int i; 295 296 mask = cpumask_of_cpu(hard_smp4d_processor_id()); 297 cpus_andnot(mask, cpu_online_map, mask); 298 for(i = 0; i <= high; i++) { 299 if (cpu_isset(i, mask)) { 300 ccall_info.processors_in[i] = 0; 301 ccall_info.processors_out[i] = 0; 302 sun4d_send_ipi(i, IRQ_CROSS_CALL); 303 } 304 } 305 } 306 307 { 308 register int i; 309 310 i = 0; 311 do { 312 while(!ccall_info.processors_in[i]) 313 barrier(); 314 } while(++i <= high); 315 316 i = 0; 317 do { 318 while(!ccall_info.processors_out[i]) 319 barrier(); 320 } while(++i <= high); 321 } 322 323 spin_unlock_irqrestore(&cross_call_lock, flags); 324 } 325 } 326 327 /* Running cross calls. */ 328 void smp4d_cross_call_irq(void) 329 { 330 int i = hard_smp4d_processor_id(); 331 332 ccall_info.processors_in[i] = 1; 333 ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, 334 ccall_info.arg4, ccall_info.arg5); 335 ccall_info.processors_out[i] = 1; 336 } 337 338 static int smp4d_stop_cpu_sender; 339 340 static void smp4d_stop_cpu(void) 341 { 342 int me = hard_smp4d_processor_id(); 343 344 if (me != smp4d_stop_cpu_sender) 345 while(1) barrier(); 346 } 347 348 /* Cross calls, in order to work efficiently and atomically do all 349 * the message passing work themselves, only stopcpu and reschedule 350 * messages come through here. 351 */ 352 void smp4d_message_pass(int target, int msg, unsigned long data, int wait) 353 { 354 int me = hard_smp4d_processor_id(); 355 356 SMP_PRINTK(("smp4d_message_pass %d %d %08lx %d\n", target, msg, data, wait)); 357 if (msg == MSG_STOP_CPU && target == MSG_ALL_BUT_SELF) { 358 unsigned long flags; 359 static DEFINE_SPINLOCK(stop_cpu_lock); 360 spin_lock_irqsave(&stop_cpu_lock, flags); 361 smp4d_stop_cpu_sender = me; 362 smp4d_cross_call((smpfunc_t)smp4d_stop_cpu, 0, 0, 0, 0, 0); 363 spin_unlock_irqrestore(&stop_cpu_lock, flags); 364 } 365 printk("Yeeee, trying to send SMP msg(%d) to %d on cpu %d\n", msg, target, me); 366 panic("Bogon SMP message pass."); 367 } 368 369 void smp4d_percpu_timer_interrupt(struct pt_regs *regs) 370 { 371 struct pt_regs *old_regs; 372 int cpu = hard_smp4d_processor_id(); 373 static int cpu_tick[NR_CPUS]; 374 static char led_mask[] = { 0xe, 0xd, 0xb, 0x7, 0xb, 0xd }; 375 376 old_regs = set_irq_regs(regs); 377 bw_get_prof_limit(cpu); 378 bw_clear_intr_mask(0, 1); /* INTR_TABLE[0] & 1 is Profile IRQ */ 379 380 cpu_tick[cpu]++; 381 if (!(cpu_tick[cpu] & 15)) { 382 if (cpu_tick[cpu] == 0x60) 383 cpu_tick[cpu] = 0; 384 cpu_leds[cpu] = led_mask[cpu_tick[cpu] >> 4]; 385 show_leds(cpu); 386 } 387 388 profile_tick(CPU_PROFILING); 389 390 if(!--prof_counter(cpu)) { 391 int user = user_mode(regs); 392 393 irq_enter(); 394 update_process_times(user); 395 irq_exit(); 396 397 prof_counter(cpu) = prof_multiplier(cpu); 398 } 399 set_irq_regs(old_regs); 400 } 401 402 extern unsigned int lvl14_resolution; 403 404 static void __init smp_setup_percpu_timer(void) 405 { 406 int cpu = hard_smp4d_processor_id(); 407 408 prof_counter(cpu) = prof_multiplier(cpu) = 1; 409 load_profile_irq(cpu, lvl14_resolution); 410 } 411 412 void __init smp4d_blackbox_id(unsigned *addr) 413 { 414 int rd = *addr & 0x3e000000; 415 416 addr[0] = 0xc0800800 | rd; /* lda [%g0] ASI_M_VIKING_TMP1, reg */ 417 addr[1] = 0x01000000; /* nop */ 418 addr[2] = 0x01000000; /* nop */ 419 } 420 421 void __init smp4d_blackbox_current(unsigned *addr) 422 { 423 int rd = *addr & 0x3e000000; 424 425 addr[0] = 0xc0800800 | rd; /* lda [%g0] ASI_M_VIKING_TMP1, reg */ 426 addr[2] = 0x81282002 | rd | (rd >> 11); /* sll reg, 2, reg */ 427 addr[4] = 0x01000000; /* nop */ 428 } 429 430 void __init sun4d_init_smp(void) 431 { 432 int i; 433 extern unsigned int t_nmi[], linux_trap_ipi15_sun4d[], linux_trap_ipi15_sun4m[]; 434 435 /* Patch ipi15 trap table */ 436 t_nmi[1] = t_nmi[1] + (linux_trap_ipi15_sun4d - linux_trap_ipi15_sun4m); 437 438 /* And set btfixup... */ 439 BTFIXUPSET_BLACKBOX(hard_smp_processor_id, smp4d_blackbox_id); 440 BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current); 441 BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM); 442 BTFIXUPSET_CALL(smp_message_pass, smp4d_message_pass, BTFIXUPCALL_NORM); 443 BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM); 444 445 for (i = 0; i < NR_CPUS; i++) { 446 ccall_info.processors_in[i] = 1; 447 ccall_info.processors_out[i] = 1; 448 } 449 } 450